• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1@ SPDX-License-Identifier: GPL-2.0
2
3@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
4@ has relicensed it under the GPLv2. Therefore this program is free software;
5@ you can redistribute it and/or modify it under the terms of the GNU General
6@ Public License version 2 as published by the Free Software Foundation.
7@
8@ The original headers, including the original license headers, are
9@ included below for completeness.
10
11@ ====================================================================
12@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
13@ project. The module is, however, dual licensed under OpenSSL and
14@ CRYPTOGAMS licenses depending on where you obtain it. For further
15@ details see http://www.openssl.org/~appro/cryptogams/.
16@ ====================================================================
17
18@ SHA256 block procedure for ARMv4. May 2007.
19
20@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
21@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
22@ byte [on single-issue Xscale PXA250 core].
23
24@ July 2010.
25@
26@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
27@ Cortex A8 core and ~20 cycles per processed byte.
28
29@ February 2011.
30@
31@ Profiler-assisted and platform-specific optimization resulted in 16%
32@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
33
34@ September 2013.
35@
36@ Add NEON implementation. On Cortex A8 it was measured to process one
37@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
38@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
39@ code (meaning that latter performs sub-optimally, nothing was done
40@ about it).
41
42@ May 2014.
43@
44@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
45
46#ifndef __KERNEL__
47# include "arm_arch.h"
48#else
49# define __ARM_ARCH__ __LINUX_ARM_ARCH__
50# define __ARM_MAX_ARCH__ 7
51#endif
52
53.text
54#if __ARM_ARCH__<7
55.code	32
56#else
57.syntax unified
58# ifdef __thumb2__
59#  define adrl adr
60.thumb
61# else
62.code   32
63# endif
64#endif
65
66.type	K256,%object
67.align	5
68K256:
69.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
70.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
71.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
72.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
73.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
74.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
75.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
76.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
77.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
78.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
79.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
80.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
81.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
82.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
83.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
84.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
85.size	K256,.-K256
86.word	0				@ terminator
87#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
88.LOPENSSL_armcap:
89.word	OPENSSL_armcap_P-sha256_block_data_order
90#endif
91.align	5
92
93.global	sha256_block_data_order
94.type	sha256_block_data_order,%function
95sha256_block_data_order:
96.Lsha256_block_data_order:
97#if __ARM_ARCH__<7
98	sub	r3,pc,#8		@ sha256_block_data_order
99#else
100	adr	r3,.Lsha256_block_data_order
101#endif
102#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
103	ldr	r12,.LOPENSSL_armcap
104	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
105	tst	r12,#ARMV8_SHA256
106	bne	.LARMv8
107	tst	r12,#ARMV7_NEON
108	bne	.LNEON
109#endif
110	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
111	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
112	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
113	sub	r14,r3,#256+32	@ K256
114	sub	sp,sp,#16*4		@ alloca(X[16])
115.Loop:
116# if __ARM_ARCH__>=7
117	ldr	r2,[r1],#4
118# else
119	ldrb	r2,[r1,#3]
120# endif
121	eor	r3,r5,r6		@ magic
122	eor	r12,r12,r12
123#if __ARM_ARCH__>=7
124	@ ldr	r2,[r1],#4			@ 0
125# if 0==15
126	str	r1,[sp,#17*4]			@ make room for r1
127# endif
128	eor	r0,r8,r8,ror#5
129	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
130	eor	r0,r0,r8,ror#19	@ Sigma1(e)
131# ifndef __ARMEB__
132	rev	r2,r2
133# endif
134#else
135	@ ldrb	r2,[r1,#3]			@ 0
136	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
137	ldrb	r12,[r1,#2]
138	ldrb	r0,[r1,#1]
139	orr	r2,r2,r12,lsl#8
140	ldrb	r12,[r1],#4
141	orr	r2,r2,r0,lsl#16
142# if 0==15
143	str	r1,[sp,#17*4]			@ make room for r1
144# endif
145	eor	r0,r8,r8,ror#5
146	orr	r2,r2,r12,lsl#24
147	eor	r0,r0,r8,ror#19	@ Sigma1(e)
148#endif
149	ldr	r12,[r14],#4			@ *K256++
150	add	r11,r11,r2			@ h+=X[i]
151	str	r2,[sp,#0*4]
152	eor	r2,r9,r10
153	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
154	and	r2,r2,r8
155	add	r11,r11,r12			@ h+=K256[i]
156	eor	r2,r2,r10			@ Ch(e,f,g)
157	eor	r0,r4,r4,ror#11
158	add	r11,r11,r2			@ h+=Ch(e,f,g)
159#if 0==31
160	and	r12,r12,#0xff
161	cmp	r12,#0xf2			@ done?
162#endif
163#if 0<15
164# if __ARM_ARCH__>=7
165	ldr	r2,[r1],#4			@ prefetch
166# else
167	ldrb	r2,[r1,#3]
168# endif
169	eor	r12,r4,r5			@ a^b, b^c in next round
170#else
171	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
172	eor	r12,r4,r5			@ a^b, b^c in next round
173	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
174#endif
175	eor	r0,r0,r4,ror#20	@ Sigma0(a)
176	and	r3,r3,r12			@ (b^c)&=(a^b)
177	add	r7,r7,r11			@ d+=h
178	eor	r3,r3,r5			@ Maj(a,b,c)
179	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
180	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
181#if __ARM_ARCH__>=7
182	@ ldr	r2,[r1],#4			@ 1
183# if 1==15
184	str	r1,[sp,#17*4]			@ make room for r1
185# endif
186	eor	r0,r7,r7,ror#5
187	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
188	eor	r0,r0,r7,ror#19	@ Sigma1(e)
189# ifndef __ARMEB__
190	rev	r2,r2
191# endif
192#else
193	@ ldrb	r2,[r1,#3]			@ 1
194	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
195	ldrb	r3,[r1,#2]
196	ldrb	r0,[r1,#1]
197	orr	r2,r2,r3,lsl#8
198	ldrb	r3,[r1],#4
199	orr	r2,r2,r0,lsl#16
200# if 1==15
201	str	r1,[sp,#17*4]			@ make room for r1
202# endif
203	eor	r0,r7,r7,ror#5
204	orr	r2,r2,r3,lsl#24
205	eor	r0,r0,r7,ror#19	@ Sigma1(e)
206#endif
207	ldr	r3,[r14],#4			@ *K256++
208	add	r10,r10,r2			@ h+=X[i]
209	str	r2,[sp,#1*4]
210	eor	r2,r8,r9
211	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
212	and	r2,r2,r7
213	add	r10,r10,r3			@ h+=K256[i]
214	eor	r2,r2,r9			@ Ch(e,f,g)
215	eor	r0,r11,r11,ror#11
216	add	r10,r10,r2			@ h+=Ch(e,f,g)
217#if 1==31
218	and	r3,r3,#0xff
219	cmp	r3,#0xf2			@ done?
220#endif
221#if 1<15
222# if __ARM_ARCH__>=7
223	ldr	r2,[r1],#4			@ prefetch
224# else
225	ldrb	r2,[r1,#3]
226# endif
227	eor	r3,r11,r4			@ a^b, b^c in next round
228#else
229	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
230	eor	r3,r11,r4			@ a^b, b^c in next round
231	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
232#endif
233	eor	r0,r0,r11,ror#20	@ Sigma0(a)
234	and	r12,r12,r3			@ (b^c)&=(a^b)
235	add	r6,r6,r10			@ d+=h
236	eor	r12,r12,r4			@ Maj(a,b,c)
237	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
238	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
239#if __ARM_ARCH__>=7
240	@ ldr	r2,[r1],#4			@ 2
241# if 2==15
242	str	r1,[sp,#17*4]			@ make room for r1
243# endif
244	eor	r0,r6,r6,ror#5
245	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
246	eor	r0,r0,r6,ror#19	@ Sigma1(e)
247# ifndef __ARMEB__
248	rev	r2,r2
249# endif
250#else
251	@ ldrb	r2,[r1,#3]			@ 2
252	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
253	ldrb	r12,[r1,#2]
254	ldrb	r0,[r1,#1]
255	orr	r2,r2,r12,lsl#8
256	ldrb	r12,[r1],#4
257	orr	r2,r2,r0,lsl#16
258# if 2==15
259	str	r1,[sp,#17*4]			@ make room for r1
260# endif
261	eor	r0,r6,r6,ror#5
262	orr	r2,r2,r12,lsl#24
263	eor	r0,r0,r6,ror#19	@ Sigma1(e)
264#endif
265	ldr	r12,[r14],#4			@ *K256++
266	add	r9,r9,r2			@ h+=X[i]
267	str	r2,[sp,#2*4]
268	eor	r2,r7,r8
269	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
270	and	r2,r2,r6
271	add	r9,r9,r12			@ h+=K256[i]
272	eor	r2,r2,r8			@ Ch(e,f,g)
273	eor	r0,r10,r10,ror#11
274	add	r9,r9,r2			@ h+=Ch(e,f,g)
275#if 2==31
276	and	r12,r12,#0xff
277	cmp	r12,#0xf2			@ done?
278#endif
279#if 2<15
280# if __ARM_ARCH__>=7
281	ldr	r2,[r1],#4			@ prefetch
282# else
283	ldrb	r2,[r1,#3]
284# endif
285	eor	r12,r10,r11			@ a^b, b^c in next round
286#else
287	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
288	eor	r12,r10,r11			@ a^b, b^c in next round
289	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
290#endif
291	eor	r0,r0,r10,ror#20	@ Sigma0(a)
292	and	r3,r3,r12			@ (b^c)&=(a^b)
293	add	r5,r5,r9			@ d+=h
294	eor	r3,r3,r11			@ Maj(a,b,c)
295	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
296	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
297#if __ARM_ARCH__>=7
298	@ ldr	r2,[r1],#4			@ 3
299# if 3==15
300	str	r1,[sp,#17*4]			@ make room for r1
301# endif
302	eor	r0,r5,r5,ror#5
303	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
304	eor	r0,r0,r5,ror#19	@ Sigma1(e)
305# ifndef __ARMEB__
306	rev	r2,r2
307# endif
308#else
309	@ ldrb	r2,[r1,#3]			@ 3
310	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
311	ldrb	r3,[r1,#2]
312	ldrb	r0,[r1,#1]
313	orr	r2,r2,r3,lsl#8
314	ldrb	r3,[r1],#4
315	orr	r2,r2,r0,lsl#16
316# if 3==15
317	str	r1,[sp,#17*4]			@ make room for r1
318# endif
319	eor	r0,r5,r5,ror#5
320	orr	r2,r2,r3,lsl#24
321	eor	r0,r0,r5,ror#19	@ Sigma1(e)
322#endif
323	ldr	r3,[r14],#4			@ *K256++
324	add	r8,r8,r2			@ h+=X[i]
325	str	r2,[sp,#3*4]
326	eor	r2,r6,r7
327	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
328	and	r2,r2,r5
329	add	r8,r8,r3			@ h+=K256[i]
330	eor	r2,r2,r7			@ Ch(e,f,g)
331	eor	r0,r9,r9,ror#11
332	add	r8,r8,r2			@ h+=Ch(e,f,g)
333#if 3==31
334	and	r3,r3,#0xff
335	cmp	r3,#0xf2			@ done?
336#endif
337#if 3<15
338# if __ARM_ARCH__>=7
339	ldr	r2,[r1],#4			@ prefetch
340# else
341	ldrb	r2,[r1,#3]
342# endif
343	eor	r3,r9,r10			@ a^b, b^c in next round
344#else
345	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
346	eor	r3,r9,r10			@ a^b, b^c in next round
347	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
348#endif
349	eor	r0,r0,r9,ror#20	@ Sigma0(a)
350	and	r12,r12,r3			@ (b^c)&=(a^b)
351	add	r4,r4,r8			@ d+=h
352	eor	r12,r12,r10			@ Maj(a,b,c)
353	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
354	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
355#if __ARM_ARCH__>=7
356	@ ldr	r2,[r1],#4			@ 4
357# if 4==15
358	str	r1,[sp,#17*4]			@ make room for r1
359# endif
360	eor	r0,r4,r4,ror#5
361	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
362	eor	r0,r0,r4,ror#19	@ Sigma1(e)
363# ifndef __ARMEB__
364	rev	r2,r2
365# endif
366#else
367	@ ldrb	r2,[r1,#3]			@ 4
368	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
369	ldrb	r12,[r1,#2]
370	ldrb	r0,[r1,#1]
371	orr	r2,r2,r12,lsl#8
372	ldrb	r12,[r1],#4
373	orr	r2,r2,r0,lsl#16
374# if 4==15
375	str	r1,[sp,#17*4]			@ make room for r1
376# endif
377	eor	r0,r4,r4,ror#5
378	orr	r2,r2,r12,lsl#24
379	eor	r0,r0,r4,ror#19	@ Sigma1(e)
380#endif
381	ldr	r12,[r14],#4			@ *K256++
382	add	r7,r7,r2			@ h+=X[i]
383	str	r2,[sp,#4*4]
384	eor	r2,r5,r6
385	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
386	and	r2,r2,r4
387	add	r7,r7,r12			@ h+=K256[i]
388	eor	r2,r2,r6			@ Ch(e,f,g)
389	eor	r0,r8,r8,ror#11
390	add	r7,r7,r2			@ h+=Ch(e,f,g)
391#if 4==31
392	and	r12,r12,#0xff
393	cmp	r12,#0xf2			@ done?
394#endif
395#if 4<15
396# if __ARM_ARCH__>=7
397	ldr	r2,[r1],#4			@ prefetch
398# else
399	ldrb	r2,[r1,#3]
400# endif
401	eor	r12,r8,r9			@ a^b, b^c in next round
402#else
403	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
404	eor	r12,r8,r9			@ a^b, b^c in next round
405	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
406#endif
407	eor	r0,r0,r8,ror#20	@ Sigma0(a)
408	and	r3,r3,r12			@ (b^c)&=(a^b)
409	add	r11,r11,r7			@ d+=h
410	eor	r3,r3,r9			@ Maj(a,b,c)
411	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
412	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
413#if __ARM_ARCH__>=7
414	@ ldr	r2,[r1],#4			@ 5
415# if 5==15
416	str	r1,[sp,#17*4]			@ make room for r1
417# endif
418	eor	r0,r11,r11,ror#5
419	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
420	eor	r0,r0,r11,ror#19	@ Sigma1(e)
421# ifndef __ARMEB__
422	rev	r2,r2
423# endif
424#else
425	@ ldrb	r2,[r1,#3]			@ 5
426	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
427	ldrb	r3,[r1,#2]
428	ldrb	r0,[r1,#1]
429	orr	r2,r2,r3,lsl#8
430	ldrb	r3,[r1],#4
431	orr	r2,r2,r0,lsl#16
432# if 5==15
433	str	r1,[sp,#17*4]			@ make room for r1
434# endif
435	eor	r0,r11,r11,ror#5
436	orr	r2,r2,r3,lsl#24
437	eor	r0,r0,r11,ror#19	@ Sigma1(e)
438#endif
439	ldr	r3,[r14],#4			@ *K256++
440	add	r6,r6,r2			@ h+=X[i]
441	str	r2,[sp,#5*4]
442	eor	r2,r4,r5
443	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
444	and	r2,r2,r11
445	add	r6,r6,r3			@ h+=K256[i]
446	eor	r2,r2,r5			@ Ch(e,f,g)
447	eor	r0,r7,r7,ror#11
448	add	r6,r6,r2			@ h+=Ch(e,f,g)
449#if 5==31
450	and	r3,r3,#0xff
451	cmp	r3,#0xf2			@ done?
452#endif
453#if 5<15
454# if __ARM_ARCH__>=7
455	ldr	r2,[r1],#4			@ prefetch
456# else
457	ldrb	r2,[r1,#3]
458# endif
459	eor	r3,r7,r8			@ a^b, b^c in next round
460#else
461	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
462	eor	r3,r7,r8			@ a^b, b^c in next round
463	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
464#endif
465	eor	r0,r0,r7,ror#20	@ Sigma0(a)
466	and	r12,r12,r3			@ (b^c)&=(a^b)
467	add	r10,r10,r6			@ d+=h
468	eor	r12,r12,r8			@ Maj(a,b,c)
469	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
470	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
471#if __ARM_ARCH__>=7
472	@ ldr	r2,[r1],#4			@ 6
473# if 6==15
474	str	r1,[sp,#17*4]			@ make room for r1
475# endif
476	eor	r0,r10,r10,ror#5
477	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
478	eor	r0,r0,r10,ror#19	@ Sigma1(e)
479# ifndef __ARMEB__
480	rev	r2,r2
481# endif
482#else
483	@ ldrb	r2,[r1,#3]			@ 6
484	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
485	ldrb	r12,[r1,#2]
486	ldrb	r0,[r1,#1]
487	orr	r2,r2,r12,lsl#8
488	ldrb	r12,[r1],#4
489	orr	r2,r2,r0,lsl#16
490# if 6==15
491	str	r1,[sp,#17*4]			@ make room for r1
492# endif
493	eor	r0,r10,r10,ror#5
494	orr	r2,r2,r12,lsl#24
495	eor	r0,r0,r10,ror#19	@ Sigma1(e)
496#endif
497	ldr	r12,[r14],#4			@ *K256++
498	add	r5,r5,r2			@ h+=X[i]
499	str	r2,[sp,#6*4]
500	eor	r2,r11,r4
501	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
502	and	r2,r2,r10
503	add	r5,r5,r12			@ h+=K256[i]
504	eor	r2,r2,r4			@ Ch(e,f,g)
505	eor	r0,r6,r6,ror#11
506	add	r5,r5,r2			@ h+=Ch(e,f,g)
507#if 6==31
508	and	r12,r12,#0xff
509	cmp	r12,#0xf2			@ done?
510#endif
511#if 6<15
512# if __ARM_ARCH__>=7
513	ldr	r2,[r1],#4			@ prefetch
514# else
515	ldrb	r2,[r1,#3]
516# endif
517	eor	r12,r6,r7			@ a^b, b^c in next round
518#else
519	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
520	eor	r12,r6,r7			@ a^b, b^c in next round
521	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
522#endif
523	eor	r0,r0,r6,ror#20	@ Sigma0(a)
524	and	r3,r3,r12			@ (b^c)&=(a^b)
525	add	r9,r9,r5			@ d+=h
526	eor	r3,r3,r7			@ Maj(a,b,c)
527	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
528	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
529#if __ARM_ARCH__>=7
530	@ ldr	r2,[r1],#4			@ 7
531# if 7==15
532	str	r1,[sp,#17*4]			@ make room for r1
533# endif
534	eor	r0,r9,r9,ror#5
535	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
536	eor	r0,r0,r9,ror#19	@ Sigma1(e)
537# ifndef __ARMEB__
538	rev	r2,r2
539# endif
540#else
541	@ ldrb	r2,[r1,#3]			@ 7
542	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
543	ldrb	r3,[r1,#2]
544	ldrb	r0,[r1,#1]
545	orr	r2,r2,r3,lsl#8
546	ldrb	r3,[r1],#4
547	orr	r2,r2,r0,lsl#16
548# if 7==15
549	str	r1,[sp,#17*4]			@ make room for r1
550# endif
551	eor	r0,r9,r9,ror#5
552	orr	r2,r2,r3,lsl#24
553	eor	r0,r0,r9,ror#19	@ Sigma1(e)
554#endif
555	ldr	r3,[r14],#4			@ *K256++
556	add	r4,r4,r2			@ h+=X[i]
557	str	r2,[sp,#7*4]
558	eor	r2,r10,r11
559	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
560	and	r2,r2,r9
561	add	r4,r4,r3			@ h+=K256[i]
562	eor	r2,r2,r11			@ Ch(e,f,g)
563	eor	r0,r5,r5,ror#11
564	add	r4,r4,r2			@ h+=Ch(e,f,g)
565#if 7==31
566	and	r3,r3,#0xff
567	cmp	r3,#0xf2			@ done?
568#endif
569#if 7<15
570# if __ARM_ARCH__>=7
571	ldr	r2,[r1],#4			@ prefetch
572# else
573	ldrb	r2,[r1,#3]
574# endif
575	eor	r3,r5,r6			@ a^b, b^c in next round
576#else
577	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
578	eor	r3,r5,r6			@ a^b, b^c in next round
579	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
580#endif
581	eor	r0,r0,r5,ror#20	@ Sigma0(a)
582	and	r12,r12,r3			@ (b^c)&=(a^b)
583	add	r8,r8,r4			@ d+=h
584	eor	r12,r12,r6			@ Maj(a,b,c)
585	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
586	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
587#if __ARM_ARCH__>=7
588	@ ldr	r2,[r1],#4			@ 8
589# if 8==15
590	str	r1,[sp,#17*4]			@ make room for r1
591# endif
592	eor	r0,r8,r8,ror#5
593	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
594	eor	r0,r0,r8,ror#19	@ Sigma1(e)
595# ifndef __ARMEB__
596	rev	r2,r2
597# endif
598#else
599	@ ldrb	r2,[r1,#3]			@ 8
600	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
601	ldrb	r12,[r1,#2]
602	ldrb	r0,[r1,#1]
603	orr	r2,r2,r12,lsl#8
604	ldrb	r12,[r1],#4
605	orr	r2,r2,r0,lsl#16
606# if 8==15
607	str	r1,[sp,#17*4]			@ make room for r1
608# endif
609	eor	r0,r8,r8,ror#5
610	orr	r2,r2,r12,lsl#24
611	eor	r0,r0,r8,ror#19	@ Sigma1(e)
612#endif
613	ldr	r12,[r14],#4			@ *K256++
614	add	r11,r11,r2			@ h+=X[i]
615	str	r2,[sp,#8*4]
616	eor	r2,r9,r10
617	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
618	and	r2,r2,r8
619	add	r11,r11,r12			@ h+=K256[i]
620	eor	r2,r2,r10			@ Ch(e,f,g)
621	eor	r0,r4,r4,ror#11
622	add	r11,r11,r2			@ h+=Ch(e,f,g)
623#if 8==31
624	and	r12,r12,#0xff
625	cmp	r12,#0xf2			@ done?
626#endif
627#if 8<15
628# if __ARM_ARCH__>=7
629	ldr	r2,[r1],#4			@ prefetch
630# else
631	ldrb	r2,[r1,#3]
632# endif
633	eor	r12,r4,r5			@ a^b, b^c in next round
634#else
635	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
636	eor	r12,r4,r5			@ a^b, b^c in next round
637	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
638#endif
639	eor	r0,r0,r4,ror#20	@ Sigma0(a)
640	and	r3,r3,r12			@ (b^c)&=(a^b)
641	add	r7,r7,r11			@ d+=h
642	eor	r3,r3,r5			@ Maj(a,b,c)
643	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
644	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
645#if __ARM_ARCH__>=7
646	@ ldr	r2,[r1],#4			@ 9
647# if 9==15
648	str	r1,[sp,#17*4]			@ make room for r1
649# endif
650	eor	r0,r7,r7,ror#5
651	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
652	eor	r0,r0,r7,ror#19	@ Sigma1(e)
653# ifndef __ARMEB__
654	rev	r2,r2
655# endif
656#else
657	@ ldrb	r2,[r1,#3]			@ 9
658	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
659	ldrb	r3,[r1,#2]
660	ldrb	r0,[r1,#1]
661	orr	r2,r2,r3,lsl#8
662	ldrb	r3,[r1],#4
663	orr	r2,r2,r0,lsl#16
664# if 9==15
665	str	r1,[sp,#17*4]			@ make room for r1
666# endif
667	eor	r0,r7,r7,ror#5
668	orr	r2,r2,r3,lsl#24
669	eor	r0,r0,r7,ror#19	@ Sigma1(e)
670#endif
671	ldr	r3,[r14],#4			@ *K256++
672	add	r10,r10,r2			@ h+=X[i]
673	str	r2,[sp,#9*4]
674	eor	r2,r8,r9
675	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
676	and	r2,r2,r7
677	add	r10,r10,r3			@ h+=K256[i]
678	eor	r2,r2,r9			@ Ch(e,f,g)
679	eor	r0,r11,r11,ror#11
680	add	r10,r10,r2			@ h+=Ch(e,f,g)
681#if 9==31
682	and	r3,r3,#0xff
683	cmp	r3,#0xf2			@ done?
684#endif
685#if 9<15
686# if __ARM_ARCH__>=7
687	ldr	r2,[r1],#4			@ prefetch
688# else
689	ldrb	r2,[r1,#3]
690# endif
691	eor	r3,r11,r4			@ a^b, b^c in next round
692#else
693	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
694	eor	r3,r11,r4			@ a^b, b^c in next round
695	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
696#endif
697	eor	r0,r0,r11,ror#20	@ Sigma0(a)
698	and	r12,r12,r3			@ (b^c)&=(a^b)
699	add	r6,r6,r10			@ d+=h
700	eor	r12,r12,r4			@ Maj(a,b,c)
701	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
702	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
703#if __ARM_ARCH__>=7
704	@ ldr	r2,[r1],#4			@ 10
705# if 10==15
706	str	r1,[sp,#17*4]			@ make room for r1
707# endif
708	eor	r0,r6,r6,ror#5
709	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
710	eor	r0,r0,r6,ror#19	@ Sigma1(e)
711# ifndef __ARMEB__
712	rev	r2,r2
713# endif
714#else
715	@ ldrb	r2,[r1,#3]			@ 10
716	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
717	ldrb	r12,[r1,#2]
718	ldrb	r0,[r1,#1]
719	orr	r2,r2,r12,lsl#8
720	ldrb	r12,[r1],#4
721	orr	r2,r2,r0,lsl#16
722# if 10==15
723	str	r1,[sp,#17*4]			@ make room for r1
724# endif
725	eor	r0,r6,r6,ror#5
726	orr	r2,r2,r12,lsl#24
727	eor	r0,r0,r6,ror#19	@ Sigma1(e)
728#endif
729	ldr	r12,[r14],#4			@ *K256++
730	add	r9,r9,r2			@ h+=X[i]
731	str	r2,[sp,#10*4]
732	eor	r2,r7,r8
733	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
734	and	r2,r2,r6
735	add	r9,r9,r12			@ h+=K256[i]
736	eor	r2,r2,r8			@ Ch(e,f,g)
737	eor	r0,r10,r10,ror#11
738	add	r9,r9,r2			@ h+=Ch(e,f,g)
739#if 10==31
740	and	r12,r12,#0xff
741	cmp	r12,#0xf2			@ done?
742#endif
743#if 10<15
744# if __ARM_ARCH__>=7
745	ldr	r2,[r1],#4			@ prefetch
746# else
747	ldrb	r2,[r1,#3]
748# endif
749	eor	r12,r10,r11			@ a^b, b^c in next round
750#else
751	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
752	eor	r12,r10,r11			@ a^b, b^c in next round
753	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
754#endif
755	eor	r0,r0,r10,ror#20	@ Sigma0(a)
756	and	r3,r3,r12			@ (b^c)&=(a^b)
757	add	r5,r5,r9			@ d+=h
758	eor	r3,r3,r11			@ Maj(a,b,c)
759	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
760	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
761#if __ARM_ARCH__>=7
762	@ ldr	r2,[r1],#4			@ 11
763# if 11==15
764	str	r1,[sp,#17*4]			@ make room for r1
765# endif
766	eor	r0,r5,r5,ror#5
767	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
768	eor	r0,r0,r5,ror#19	@ Sigma1(e)
769# ifndef __ARMEB__
770	rev	r2,r2
771# endif
772#else
773	@ ldrb	r2,[r1,#3]			@ 11
774	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
775	ldrb	r3,[r1,#2]
776	ldrb	r0,[r1,#1]
777	orr	r2,r2,r3,lsl#8
778	ldrb	r3,[r1],#4
779	orr	r2,r2,r0,lsl#16
780# if 11==15
781	str	r1,[sp,#17*4]			@ make room for r1
782# endif
783	eor	r0,r5,r5,ror#5
784	orr	r2,r2,r3,lsl#24
785	eor	r0,r0,r5,ror#19	@ Sigma1(e)
786#endif
787	ldr	r3,[r14],#4			@ *K256++
788	add	r8,r8,r2			@ h+=X[i]
789	str	r2,[sp,#11*4]
790	eor	r2,r6,r7
791	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
792	and	r2,r2,r5
793	add	r8,r8,r3			@ h+=K256[i]
794	eor	r2,r2,r7			@ Ch(e,f,g)
795	eor	r0,r9,r9,ror#11
796	add	r8,r8,r2			@ h+=Ch(e,f,g)
797#if 11==31
798	and	r3,r3,#0xff
799	cmp	r3,#0xf2			@ done?
800#endif
801#if 11<15
802# if __ARM_ARCH__>=7
803	ldr	r2,[r1],#4			@ prefetch
804# else
805	ldrb	r2,[r1,#3]
806# endif
807	eor	r3,r9,r10			@ a^b, b^c in next round
808#else
809	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
810	eor	r3,r9,r10			@ a^b, b^c in next round
811	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
812#endif
813	eor	r0,r0,r9,ror#20	@ Sigma0(a)
814	and	r12,r12,r3			@ (b^c)&=(a^b)
815	add	r4,r4,r8			@ d+=h
816	eor	r12,r12,r10			@ Maj(a,b,c)
817	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
818	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
819#if __ARM_ARCH__>=7
820	@ ldr	r2,[r1],#4			@ 12
821# if 12==15
822	str	r1,[sp,#17*4]			@ make room for r1
823# endif
824	eor	r0,r4,r4,ror#5
825	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
826	eor	r0,r0,r4,ror#19	@ Sigma1(e)
827# ifndef __ARMEB__
828	rev	r2,r2
829# endif
830#else
831	@ ldrb	r2,[r1,#3]			@ 12
832	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
833	ldrb	r12,[r1,#2]
834	ldrb	r0,[r1,#1]
835	orr	r2,r2,r12,lsl#8
836	ldrb	r12,[r1],#4
837	orr	r2,r2,r0,lsl#16
838# if 12==15
839	str	r1,[sp,#17*4]			@ make room for r1
840# endif
841	eor	r0,r4,r4,ror#5
842	orr	r2,r2,r12,lsl#24
843	eor	r0,r0,r4,ror#19	@ Sigma1(e)
844#endif
845	ldr	r12,[r14],#4			@ *K256++
846	add	r7,r7,r2			@ h+=X[i]
847	str	r2,[sp,#12*4]
848	eor	r2,r5,r6
849	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
850	and	r2,r2,r4
851	add	r7,r7,r12			@ h+=K256[i]
852	eor	r2,r2,r6			@ Ch(e,f,g)
853	eor	r0,r8,r8,ror#11
854	add	r7,r7,r2			@ h+=Ch(e,f,g)
855#if 12==31
856	and	r12,r12,#0xff
857	cmp	r12,#0xf2			@ done?
858#endif
859#if 12<15
860# if __ARM_ARCH__>=7
861	ldr	r2,[r1],#4			@ prefetch
862# else
863	ldrb	r2,[r1,#3]
864# endif
865	eor	r12,r8,r9			@ a^b, b^c in next round
866#else
867	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
868	eor	r12,r8,r9			@ a^b, b^c in next round
869	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
870#endif
871	eor	r0,r0,r8,ror#20	@ Sigma0(a)
872	and	r3,r3,r12			@ (b^c)&=(a^b)
873	add	r11,r11,r7			@ d+=h
874	eor	r3,r3,r9			@ Maj(a,b,c)
875	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
876	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
877#if __ARM_ARCH__>=7
878	@ ldr	r2,[r1],#4			@ 13
879# if 13==15
880	str	r1,[sp,#17*4]			@ make room for r1
881# endif
882	eor	r0,r11,r11,ror#5
883	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
884	eor	r0,r0,r11,ror#19	@ Sigma1(e)
885# ifndef __ARMEB__
886	rev	r2,r2
887# endif
888#else
889	@ ldrb	r2,[r1,#3]			@ 13
890	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
891	ldrb	r3,[r1,#2]
892	ldrb	r0,[r1,#1]
893	orr	r2,r2,r3,lsl#8
894	ldrb	r3,[r1],#4
895	orr	r2,r2,r0,lsl#16
896# if 13==15
897	str	r1,[sp,#17*4]			@ make room for r1
898# endif
899	eor	r0,r11,r11,ror#5
900	orr	r2,r2,r3,lsl#24
901	eor	r0,r0,r11,ror#19	@ Sigma1(e)
902#endif
903	ldr	r3,[r14],#4			@ *K256++
904	add	r6,r6,r2			@ h+=X[i]
905	str	r2,[sp,#13*4]
906	eor	r2,r4,r5
907	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
908	and	r2,r2,r11
909	add	r6,r6,r3			@ h+=K256[i]
910	eor	r2,r2,r5			@ Ch(e,f,g)
911	eor	r0,r7,r7,ror#11
912	add	r6,r6,r2			@ h+=Ch(e,f,g)
913#if 13==31
914	and	r3,r3,#0xff
915	cmp	r3,#0xf2			@ done?
916#endif
917#if 13<15
918# if __ARM_ARCH__>=7
919	ldr	r2,[r1],#4			@ prefetch
920# else
921	ldrb	r2,[r1,#3]
922# endif
923	eor	r3,r7,r8			@ a^b, b^c in next round
924#else
925	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
926	eor	r3,r7,r8			@ a^b, b^c in next round
927	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
928#endif
929	eor	r0,r0,r7,ror#20	@ Sigma0(a)
930	and	r12,r12,r3			@ (b^c)&=(a^b)
931	add	r10,r10,r6			@ d+=h
932	eor	r12,r12,r8			@ Maj(a,b,c)
933	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
934	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
935#if __ARM_ARCH__>=7
936	@ ldr	r2,[r1],#4			@ 14
937# if 14==15
938	str	r1,[sp,#17*4]			@ make room for r1
939# endif
940	eor	r0,r10,r10,ror#5
941	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
942	eor	r0,r0,r10,ror#19	@ Sigma1(e)
943# ifndef __ARMEB__
944	rev	r2,r2
945# endif
946#else
947	@ ldrb	r2,[r1,#3]			@ 14
948	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
949	ldrb	r12,[r1,#2]
950	ldrb	r0,[r1,#1]
951	orr	r2,r2,r12,lsl#8
952	ldrb	r12,[r1],#4
953	orr	r2,r2,r0,lsl#16
954# if 14==15
955	str	r1,[sp,#17*4]			@ make room for r1
956# endif
957	eor	r0,r10,r10,ror#5
958	orr	r2,r2,r12,lsl#24
959	eor	r0,r0,r10,ror#19	@ Sigma1(e)
960#endif
961	ldr	r12,[r14],#4			@ *K256++
962	add	r5,r5,r2			@ h+=X[i]
963	str	r2,[sp,#14*4]
964	eor	r2,r11,r4
965	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
966	and	r2,r2,r10
967	add	r5,r5,r12			@ h+=K256[i]
968	eor	r2,r2,r4			@ Ch(e,f,g)
969	eor	r0,r6,r6,ror#11
970	add	r5,r5,r2			@ h+=Ch(e,f,g)
971#if 14==31
972	and	r12,r12,#0xff
973	cmp	r12,#0xf2			@ done?
974#endif
975#if 14<15
976# if __ARM_ARCH__>=7
977	ldr	r2,[r1],#4			@ prefetch
978# else
979	ldrb	r2,[r1,#3]
980# endif
981	eor	r12,r6,r7			@ a^b, b^c in next round
982#else
983	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
984	eor	r12,r6,r7			@ a^b, b^c in next round
985	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
986#endif
987	eor	r0,r0,r6,ror#20	@ Sigma0(a)
988	and	r3,r3,r12			@ (b^c)&=(a^b)
989	add	r9,r9,r5			@ d+=h
990	eor	r3,r3,r7			@ Maj(a,b,c)
991	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
992	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
993#if __ARM_ARCH__>=7
994	@ ldr	r2,[r1],#4			@ 15
995# if 15==15
996	str	r1,[sp,#17*4]			@ make room for r1
997# endif
998	eor	r0,r9,r9,ror#5
999	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1000	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1001# ifndef __ARMEB__
1002	rev	r2,r2
1003# endif
1004#else
1005	@ ldrb	r2,[r1,#3]			@ 15
1006	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1007	ldrb	r3,[r1,#2]
1008	ldrb	r0,[r1,#1]
1009	orr	r2,r2,r3,lsl#8
1010	ldrb	r3,[r1],#4
1011	orr	r2,r2,r0,lsl#16
1012# if 15==15
1013	str	r1,[sp,#17*4]			@ make room for r1
1014# endif
1015	eor	r0,r9,r9,ror#5
1016	orr	r2,r2,r3,lsl#24
1017	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1018#endif
1019	ldr	r3,[r14],#4			@ *K256++
1020	add	r4,r4,r2			@ h+=X[i]
1021	str	r2,[sp,#15*4]
1022	eor	r2,r10,r11
1023	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1024	and	r2,r2,r9
1025	add	r4,r4,r3			@ h+=K256[i]
1026	eor	r2,r2,r11			@ Ch(e,f,g)
1027	eor	r0,r5,r5,ror#11
1028	add	r4,r4,r2			@ h+=Ch(e,f,g)
1029#if 15==31
1030	and	r3,r3,#0xff
1031	cmp	r3,#0xf2			@ done?
1032#endif
1033#if 15<15
1034# if __ARM_ARCH__>=7
1035	ldr	r2,[r1],#4			@ prefetch
1036# else
1037	ldrb	r2,[r1,#3]
1038# endif
1039	eor	r3,r5,r6			@ a^b, b^c in next round
1040#else
1041	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1042	eor	r3,r5,r6			@ a^b, b^c in next round
1043	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1044#endif
1045	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1046	and	r12,r12,r3			@ (b^c)&=(a^b)
1047	add	r8,r8,r4			@ d+=h
1048	eor	r12,r12,r6			@ Maj(a,b,c)
1049	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1050	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1051.Lrounds_16_xx:
1052	@ ldr	r2,[sp,#1*4]		@ 16
1053	@ ldr	r1,[sp,#14*4]
1054	mov	r0,r2,ror#7
1055	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1056	mov	r12,r1,ror#17
1057	eor	r0,r0,r2,ror#18
1058	eor	r12,r12,r1,ror#19
1059	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1060	ldr	r2,[sp,#0*4]
1061	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1062	ldr	r1,[sp,#9*4]
1063
1064	add	r12,r12,r0
1065	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1066	add	r2,r2,r12
1067	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1068	add	r2,r2,r1			@ X[i]
1069	ldr	r12,[r14],#4			@ *K256++
1070	add	r11,r11,r2			@ h+=X[i]
1071	str	r2,[sp,#0*4]
1072	eor	r2,r9,r10
1073	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1074	and	r2,r2,r8
1075	add	r11,r11,r12			@ h+=K256[i]
1076	eor	r2,r2,r10			@ Ch(e,f,g)
1077	eor	r0,r4,r4,ror#11
1078	add	r11,r11,r2			@ h+=Ch(e,f,g)
1079#if 16==31
1080	and	r12,r12,#0xff
1081	cmp	r12,#0xf2			@ done?
1082#endif
1083#if 16<15
1084# if __ARM_ARCH__>=7
1085	ldr	r2,[r1],#4			@ prefetch
1086# else
1087	ldrb	r2,[r1,#3]
1088# endif
1089	eor	r12,r4,r5			@ a^b, b^c in next round
1090#else
1091	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1092	eor	r12,r4,r5			@ a^b, b^c in next round
1093	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1094#endif
1095	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1096	and	r3,r3,r12			@ (b^c)&=(a^b)
1097	add	r7,r7,r11			@ d+=h
1098	eor	r3,r3,r5			@ Maj(a,b,c)
1099	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1100	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1101	@ ldr	r2,[sp,#2*4]		@ 17
1102	@ ldr	r1,[sp,#15*4]
1103	mov	r0,r2,ror#7
1104	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1105	mov	r3,r1,ror#17
1106	eor	r0,r0,r2,ror#18
1107	eor	r3,r3,r1,ror#19
1108	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1109	ldr	r2,[sp,#1*4]
1110	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1111	ldr	r1,[sp,#10*4]
1112
1113	add	r3,r3,r0
1114	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1115	add	r2,r2,r3
1116	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1117	add	r2,r2,r1			@ X[i]
1118	ldr	r3,[r14],#4			@ *K256++
1119	add	r10,r10,r2			@ h+=X[i]
1120	str	r2,[sp,#1*4]
1121	eor	r2,r8,r9
1122	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1123	and	r2,r2,r7
1124	add	r10,r10,r3			@ h+=K256[i]
1125	eor	r2,r2,r9			@ Ch(e,f,g)
1126	eor	r0,r11,r11,ror#11
1127	add	r10,r10,r2			@ h+=Ch(e,f,g)
1128#if 17==31
1129	and	r3,r3,#0xff
1130	cmp	r3,#0xf2			@ done?
1131#endif
1132#if 17<15
1133# if __ARM_ARCH__>=7
1134	ldr	r2,[r1],#4			@ prefetch
1135# else
1136	ldrb	r2,[r1,#3]
1137# endif
1138	eor	r3,r11,r4			@ a^b, b^c in next round
1139#else
1140	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1141	eor	r3,r11,r4			@ a^b, b^c in next round
1142	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1143#endif
1144	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1145	and	r12,r12,r3			@ (b^c)&=(a^b)
1146	add	r6,r6,r10			@ d+=h
1147	eor	r12,r12,r4			@ Maj(a,b,c)
1148	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1149	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1150	@ ldr	r2,[sp,#3*4]		@ 18
1151	@ ldr	r1,[sp,#0*4]
1152	mov	r0,r2,ror#7
1153	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1154	mov	r12,r1,ror#17
1155	eor	r0,r0,r2,ror#18
1156	eor	r12,r12,r1,ror#19
1157	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1158	ldr	r2,[sp,#2*4]
1159	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1160	ldr	r1,[sp,#11*4]
1161
1162	add	r12,r12,r0
1163	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1164	add	r2,r2,r12
1165	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1166	add	r2,r2,r1			@ X[i]
1167	ldr	r12,[r14],#4			@ *K256++
1168	add	r9,r9,r2			@ h+=X[i]
1169	str	r2,[sp,#2*4]
1170	eor	r2,r7,r8
1171	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1172	and	r2,r2,r6
1173	add	r9,r9,r12			@ h+=K256[i]
1174	eor	r2,r2,r8			@ Ch(e,f,g)
1175	eor	r0,r10,r10,ror#11
1176	add	r9,r9,r2			@ h+=Ch(e,f,g)
1177#if 18==31
1178	and	r12,r12,#0xff
1179	cmp	r12,#0xf2			@ done?
1180#endif
1181#if 18<15
1182# if __ARM_ARCH__>=7
1183	ldr	r2,[r1],#4			@ prefetch
1184# else
1185	ldrb	r2,[r1,#3]
1186# endif
1187	eor	r12,r10,r11			@ a^b, b^c in next round
1188#else
1189	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1190	eor	r12,r10,r11			@ a^b, b^c in next round
1191	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1192#endif
1193	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1194	and	r3,r3,r12			@ (b^c)&=(a^b)
1195	add	r5,r5,r9			@ d+=h
1196	eor	r3,r3,r11			@ Maj(a,b,c)
1197	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1198	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1199	@ ldr	r2,[sp,#4*4]		@ 19
1200	@ ldr	r1,[sp,#1*4]
1201	mov	r0,r2,ror#7
1202	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1203	mov	r3,r1,ror#17
1204	eor	r0,r0,r2,ror#18
1205	eor	r3,r3,r1,ror#19
1206	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1207	ldr	r2,[sp,#3*4]
1208	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1209	ldr	r1,[sp,#12*4]
1210
1211	add	r3,r3,r0
1212	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1213	add	r2,r2,r3
1214	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1215	add	r2,r2,r1			@ X[i]
1216	ldr	r3,[r14],#4			@ *K256++
1217	add	r8,r8,r2			@ h+=X[i]
1218	str	r2,[sp,#3*4]
1219	eor	r2,r6,r7
1220	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1221	and	r2,r2,r5
1222	add	r8,r8,r3			@ h+=K256[i]
1223	eor	r2,r2,r7			@ Ch(e,f,g)
1224	eor	r0,r9,r9,ror#11
1225	add	r8,r8,r2			@ h+=Ch(e,f,g)
1226#if 19==31
1227	and	r3,r3,#0xff
1228	cmp	r3,#0xf2			@ done?
1229#endif
1230#if 19<15
1231# if __ARM_ARCH__>=7
1232	ldr	r2,[r1],#4			@ prefetch
1233# else
1234	ldrb	r2,[r1,#3]
1235# endif
1236	eor	r3,r9,r10			@ a^b, b^c in next round
1237#else
1238	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1239	eor	r3,r9,r10			@ a^b, b^c in next round
1240	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1241#endif
1242	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1243	and	r12,r12,r3			@ (b^c)&=(a^b)
1244	add	r4,r4,r8			@ d+=h
1245	eor	r12,r12,r10			@ Maj(a,b,c)
1246	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1247	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1248	@ ldr	r2,[sp,#5*4]		@ 20
1249	@ ldr	r1,[sp,#2*4]
1250	mov	r0,r2,ror#7
1251	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1252	mov	r12,r1,ror#17
1253	eor	r0,r0,r2,ror#18
1254	eor	r12,r12,r1,ror#19
1255	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1256	ldr	r2,[sp,#4*4]
1257	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1258	ldr	r1,[sp,#13*4]
1259
1260	add	r12,r12,r0
1261	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1262	add	r2,r2,r12
1263	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1264	add	r2,r2,r1			@ X[i]
1265	ldr	r12,[r14],#4			@ *K256++
1266	add	r7,r7,r2			@ h+=X[i]
1267	str	r2,[sp,#4*4]
1268	eor	r2,r5,r6
1269	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1270	and	r2,r2,r4
1271	add	r7,r7,r12			@ h+=K256[i]
1272	eor	r2,r2,r6			@ Ch(e,f,g)
1273	eor	r0,r8,r8,ror#11
1274	add	r7,r7,r2			@ h+=Ch(e,f,g)
1275#if 20==31
1276	and	r12,r12,#0xff
1277	cmp	r12,#0xf2			@ done?
1278#endif
1279#if 20<15
1280# if __ARM_ARCH__>=7
1281	ldr	r2,[r1],#4			@ prefetch
1282# else
1283	ldrb	r2,[r1,#3]
1284# endif
1285	eor	r12,r8,r9			@ a^b, b^c in next round
1286#else
1287	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1288	eor	r12,r8,r9			@ a^b, b^c in next round
1289	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1290#endif
1291	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1292	and	r3,r3,r12			@ (b^c)&=(a^b)
1293	add	r11,r11,r7			@ d+=h
1294	eor	r3,r3,r9			@ Maj(a,b,c)
1295	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1296	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1297	@ ldr	r2,[sp,#6*4]		@ 21
1298	@ ldr	r1,[sp,#3*4]
1299	mov	r0,r2,ror#7
1300	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1301	mov	r3,r1,ror#17
1302	eor	r0,r0,r2,ror#18
1303	eor	r3,r3,r1,ror#19
1304	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1305	ldr	r2,[sp,#5*4]
1306	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1307	ldr	r1,[sp,#14*4]
1308
1309	add	r3,r3,r0
1310	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1311	add	r2,r2,r3
1312	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1313	add	r2,r2,r1			@ X[i]
1314	ldr	r3,[r14],#4			@ *K256++
1315	add	r6,r6,r2			@ h+=X[i]
1316	str	r2,[sp,#5*4]
1317	eor	r2,r4,r5
1318	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1319	and	r2,r2,r11
1320	add	r6,r6,r3			@ h+=K256[i]
1321	eor	r2,r2,r5			@ Ch(e,f,g)
1322	eor	r0,r7,r7,ror#11
1323	add	r6,r6,r2			@ h+=Ch(e,f,g)
1324#if 21==31
1325	and	r3,r3,#0xff
1326	cmp	r3,#0xf2			@ done?
1327#endif
1328#if 21<15
1329# if __ARM_ARCH__>=7
1330	ldr	r2,[r1],#4			@ prefetch
1331# else
1332	ldrb	r2,[r1,#3]
1333# endif
1334	eor	r3,r7,r8			@ a^b, b^c in next round
1335#else
1336	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1337	eor	r3,r7,r8			@ a^b, b^c in next round
1338	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1339#endif
1340	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1341	and	r12,r12,r3			@ (b^c)&=(a^b)
1342	add	r10,r10,r6			@ d+=h
1343	eor	r12,r12,r8			@ Maj(a,b,c)
1344	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1345	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1346	@ ldr	r2,[sp,#7*4]		@ 22
1347	@ ldr	r1,[sp,#4*4]
1348	mov	r0,r2,ror#7
1349	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1350	mov	r12,r1,ror#17
1351	eor	r0,r0,r2,ror#18
1352	eor	r12,r12,r1,ror#19
1353	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1354	ldr	r2,[sp,#6*4]
1355	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1356	ldr	r1,[sp,#15*4]
1357
1358	add	r12,r12,r0
1359	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1360	add	r2,r2,r12
1361	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1362	add	r2,r2,r1			@ X[i]
1363	ldr	r12,[r14],#4			@ *K256++
1364	add	r5,r5,r2			@ h+=X[i]
1365	str	r2,[sp,#6*4]
1366	eor	r2,r11,r4
1367	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1368	and	r2,r2,r10
1369	add	r5,r5,r12			@ h+=K256[i]
1370	eor	r2,r2,r4			@ Ch(e,f,g)
1371	eor	r0,r6,r6,ror#11
1372	add	r5,r5,r2			@ h+=Ch(e,f,g)
1373#if 22==31
1374	and	r12,r12,#0xff
1375	cmp	r12,#0xf2			@ done?
1376#endif
1377#if 22<15
1378# if __ARM_ARCH__>=7
1379	ldr	r2,[r1],#4			@ prefetch
1380# else
1381	ldrb	r2,[r1,#3]
1382# endif
1383	eor	r12,r6,r7			@ a^b, b^c in next round
1384#else
1385	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1386	eor	r12,r6,r7			@ a^b, b^c in next round
1387	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1388#endif
1389	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1390	and	r3,r3,r12			@ (b^c)&=(a^b)
1391	add	r9,r9,r5			@ d+=h
1392	eor	r3,r3,r7			@ Maj(a,b,c)
1393	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1394	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1395	@ ldr	r2,[sp,#8*4]		@ 23
1396	@ ldr	r1,[sp,#5*4]
1397	mov	r0,r2,ror#7
1398	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1399	mov	r3,r1,ror#17
1400	eor	r0,r0,r2,ror#18
1401	eor	r3,r3,r1,ror#19
1402	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1403	ldr	r2,[sp,#7*4]
1404	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1405	ldr	r1,[sp,#0*4]
1406
1407	add	r3,r3,r0
1408	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1409	add	r2,r2,r3
1410	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1411	add	r2,r2,r1			@ X[i]
1412	ldr	r3,[r14],#4			@ *K256++
1413	add	r4,r4,r2			@ h+=X[i]
1414	str	r2,[sp,#7*4]
1415	eor	r2,r10,r11
1416	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1417	and	r2,r2,r9
1418	add	r4,r4,r3			@ h+=K256[i]
1419	eor	r2,r2,r11			@ Ch(e,f,g)
1420	eor	r0,r5,r5,ror#11
1421	add	r4,r4,r2			@ h+=Ch(e,f,g)
1422#if 23==31
1423	and	r3,r3,#0xff
1424	cmp	r3,#0xf2			@ done?
1425#endif
1426#if 23<15
1427# if __ARM_ARCH__>=7
1428	ldr	r2,[r1],#4			@ prefetch
1429# else
1430	ldrb	r2,[r1,#3]
1431# endif
1432	eor	r3,r5,r6			@ a^b, b^c in next round
1433#else
1434	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1435	eor	r3,r5,r6			@ a^b, b^c in next round
1436	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1437#endif
1438	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1439	and	r12,r12,r3			@ (b^c)&=(a^b)
1440	add	r8,r8,r4			@ d+=h
1441	eor	r12,r12,r6			@ Maj(a,b,c)
1442	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1443	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1444	@ ldr	r2,[sp,#9*4]		@ 24
1445	@ ldr	r1,[sp,#6*4]
1446	mov	r0,r2,ror#7
1447	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1448	mov	r12,r1,ror#17
1449	eor	r0,r0,r2,ror#18
1450	eor	r12,r12,r1,ror#19
1451	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1452	ldr	r2,[sp,#8*4]
1453	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1454	ldr	r1,[sp,#1*4]
1455
1456	add	r12,r12,r0
1457	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1458	add	r2,r2,r12
1459	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1460	add	r2,r2,r1			@ X[i]
1461	ldr	r12,[r14],#4			@ *K256++
1462	add	r11,r11,r2			@ h+=X[i]
1463	str	r2,[sp,#8*4]
1464	eor	r2,r9,r10
1465	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1466	and	r2,r2,r8
1467	add	r11,r11,r12			@ h+=K256[i]
1468	eor	r2,r2,r10			@ Ch(e,f,g)
1469	eor	r0,r4,r4,ror#11
1470	add	r11,r11,r2			@ h+=Ch(e,f,g)
1471#if 24==31
1472	and	r12,r12,#0xff
1473	cmp	r12,#0xf2			@ done?
1474#endif
1475#if 24<15
1476# if __ARM_ARCH__>=7
1477	ldr	r2,[r1],#4			@ prefetch
1478# else
1479	ldrb	r2,[r1,#3]
1480# endif
1481	eor	r12,r4,r5			@ a^b, b^c in next round
1482#else
1483	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1484	eor	r12,r4,r5			@ a^b, b^c in next round
1485	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1486#endif
1487	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1488	and	r3,r3,r12			@ (b^c)&=(a^b)
1489	add	r7,r7,r11			@ d+=h
1490	eor	r3,r3,r5			@ Maj(a,b,c)
1491	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1492	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1493	@ ldr	r2,[sp,#10*4]		@ 25
1494	@ ldr	r1,[sp,#7*4]
1495	mov	r0,r2,ror#7
1496	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1497	mov	r3,r1,ror#17
1498	eor	r0,r0,r2,ror#18
1499	eor	r3,r3,r1,ror#19
1500	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1501	ldr	r2,[sp,#9*4]
1502	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1503	ldr	r1,[sp,#2*4]
1504
1505	add	r3,r3,r0
1506	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1507	add	r2,r2,r3
1508	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1509	add	r2,r2,r1			@ X[i]
1510	ldr	r3,[r14],#4			@ *K256++
1511	add	r10,r10,r2			@ h+=X[i]
1512	str	r2,[sp,#9*4]
1513	eor	r2,r8,r9
1514	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1515	and	r2,r2,r7
1516	add	r10,r10,r3			@ h+=K256[i]
1517	eor	r2,r2,r9			@ Ch(e,f,g)
1518	eor	r0,r11,r11,ror#11
1519	add	r10,r10,r2			@ h+=Ch(e,f,g)
1520#if 25==31
1521	and	r3,r3,#0xff
1522	cmp	r3,#0xf2			@ done?
1523#endif
1524#if 25<15
1525# if __ARM_ARCH__>=7
1526	ldr	r2,[r1],#4			@ prefetch
1527# else
1528	ldrb	r2,[r1,#3]
1529# endif
1530	eor	r3,r11,r4			@ a^b, b^c in next round
1531#else
1532	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1533	eor	r3,r11,r4			@ a^b, b^c in next round
1534	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1535#endif
1536	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1537	and	r12,r12,r3			@ (b^c)&=(a^b)
1538	add	r6,r6,r10			@ d+=h
1539	eor	r12,r12,r4			@ Maj(a,b,c)
1540	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1541	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1542	@ ldr	r2,[sp,#11*4]		@ 26
1543	@ ldr	r1,[sp,#8*4]
1544	mov	r0,r2,ror#7
1545	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1546	mov	r12,r1,ror#17
1547	eor	r0,r0,r2,ror#18
1548	eor	r12,r12,r1,ror#19
1549	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1550	ldr	r2,[sp,#10*4]
1551	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1552	ldr	r1,[sp,#3*4]
1553
1554	add	r12,r12,r0
1555	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1556	add	r2,r2,r12
1557	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1558	add	r2,r2,r1			@ X[i]
1559	ldr	r12,[r14],#4			@ *K256++
1560	add	r9,r9,r2			@ h+=X[i]
1561	str	r2,[sp,#10*4]
1562	eor	r2,r7,r8
1563	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1564	and	r2,r2,r6
1565	add	r9,r9,r12			@ h+=K256[i]
1566	eor	r2,r2,r8			@ Ch(e,f,g)
1567	eor	r0,r10,r10,ror#11
1568	add	r9,r9,r2			@ h+=Ch(e,f,g)
1569#if 26==31
1570	and	r12,r12,#0xff
1571	cmp	r12,#0xf2			@ done?
1572#endif
1573#if 26<15
1574# if __ARM_ARCH__>=7
1575	ldr	r2,[r1],#4			@ prefetch
1576# else
1577	ldrb	r2,[r1,#3]
1578# endif
1579	eor	r12,r10,r11			@ a^b, b^c in next round
1580#else
1581	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1582	eor	r12,r10,r11			@ a^b, b^c in next round
1583	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1584#endif
1585	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1586	and	r3,r3,r12			@ (b^c)&=(a^b)
1587	add	r5,r5,r9			@ d+=h
1588	eor	r3,r3,r11			@ Maj(a,b,c)
1589	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1590	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1591	@ ldr	r2,[sp,#12*4]		@ 27
1592	@ ldr	r1,[sp,#9*4]
1593	mov	r0,r2,ror#7
1594	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1595	mov	r3,r1,ror#17
1596	eor	r0,r0,r2,ror#18
1597	eor	r3,r3,r1,ror#19
1598	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1599	ldr	r2,[sp,#11*4]
1600	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1601	ldr	r1,[sp,#4*4]
1602
1603	add	r3,r3,r0
1604	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1605	add	r2,r2,r3
1606	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1607	add	r2,r2,r1			@ X[i]
1608	ldr	r3,[r14],#4			@ *K256++
1609	add	r8,r8,r2			@ h+=X[i]
1610	str	r2,[sp,#11*4]
1611	eor	r2,r6,r7
1612	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1613	and	r2,r2,r5
1614	add	r8,r8,r3			@ h+=K256[i]
1615	eor	r2,r2,r7			@ Ch(e,f,g)
1616	eor	r0,r9,r9,ror#11
1617	add	r8,r8,r2			@ h+=Ch(e,f,g)
1618#if 27==31
1619	and	r3,r3,#0xff
1620	cmp	r3,#0xf2			@ done?
1621#endif
1622#if 27<15
1623# if __ARM_ARCH__>=7
1624	ldr	r2,[r1],#4			@ prefetch
1625# else
1626	ldrb	r2,[r1,#3]
1627# endif
1628	eor	r3,r9,r10			@ a^b, b^c in next round
1629#else
1630	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1631	eor	r3,r9,r10			@ a^b, b^c in next round
1632	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1633#endif
1634	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1635	and	r12,r12,r3			@ (b^c)&=(a^b)
1636	add	r4,r4,r8			@ d+=h
1637	eor	r12,r12,r10			@ Maj(a,b,c)
1638	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1639	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1640	@ ldr	r2,[sp,#13*4]		@ 28
1641	@ ldr	r1,[sp,#10*4]
1642	mov	r0,r2,ror#7
1643	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1644	mov	r12,r1,ror#17
1645	eor	r0,r0,r2,ror#18
1646	eor	r12,r12,r1,ror#19
1647	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1648	ldr	r2,[sp,#12*4]
1649	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1650	ldr	r1,[sp,#5*4]
1651
1652	add	r12,r12,r0
1653	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1654	add	r2,r2,r12
1655	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1656	add	r2,r2,r1			@ X[i]
1657	ldr	r12,[r14],#4			@ *K256++
1658	add	r7,r7,r2			@ h+=X[i]
1659	str	r2,[sp,#12*4]
1660	eor	r2,r5,r6
1661	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1662	and	r2,r2,r4
1663	add	r7,r7,r12			@ h+=K256[i]
1664	eor	r2,r2,r6			@ Ch(e,f,g)
1665	eor	r0,r8,r8,ror#11
1666	add	r7,r7,r2			@ h+=Ch(e,f,g)
1667#if 28==31
1668	and	r12,r12,#0xff
1669	cmp	r12,#0xf2			@ done?
1670#endif
1671#if 28<15
1672# if __ARM_ARCH__>=7
1673	ldr	r2,[r1],#4			@ prefetch
1674# else
1675	ldrb	r2,[r1,#3]
1676# endif
1677	eor	r12,r8,r9			@ a^b, b^c in next round
1678#else
1679	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1680	eor	r12,r8,r9			@ a^b, b^c in next round
1681	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1682#endif
1683	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1684	and	r3,r3,r12			@ (b^c)&=(a^b)
1685	add	r11,r11,r7			@ d+=h
1686	eor	r3,r3,r9			@ Maj(a,b,c)
1687	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1688	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1689	@ ldr	r2,[sp,#14*4]		@ 29
1690	@ ldr	r1,[sp,#11*4]
1691	mov	r0,r2,ror#7
1692	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1693	mov	r3,r1,ror#17
1694	eor	r0,r0,r2,ror#18
1695	eor	r3,r3,r1,ror#19
1696	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1697	ldr	r2,[sp,#13*4]
1698	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1699	ldr	r1,[sp,#6*4]
1700
1701	add	r3,r3,r0
1702	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1703	add	r2,r2,r3
1704	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1705	add	r2,r2,r1			@ X[i]
1706	ldr	r3,[r14],#4			@ *K256++
1707	add	r6,r6,r2			@ h+=X[i]
1708	str	r2,[sp,#13*4]
1709	eor	r2,r4,r5
1710	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1711	and	r2,r2,r11
1712	add	r6,r6,r3			@ h+=K256[i]
1713	eor	r2,r2,r5			@ Ch(e,f,g)
1714	eor	r0,r7,r7,ror#11
1715	add	r6,r6,r2			@ h+=Ch(e,f,g)
1716#if 29==31
1717	and	r3,r3,#0xff
1718	cmp	r3,#0xf2			@ done?
1719#endif
1720#if 29<15
1721# if __ARM_ARCH__>=7
1722	ldr	r2,[r1],#4			@ prefetch
1723# else
1724	ldrb	r2,[r1,#3]
1725# endif
1726	eor	r3,r7,r8			@ a^b, b^c in next round
1727#else
1728	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1729	eor	r3,r7,r8			@ a^b, b^c in next round
1730	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1731#endif
1732	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1733	and	r12,r12,r3			@ (b^c)&=(a^b)
1734	add	r10,r10,r6			@ d+=h
1735	eor	r12,r12,r8			@ Maj(a,b,c)
1736	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1737	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1738	@ ldr	r2,[sp,#15*4]		@ 30
1739	@ ldr	r1,[sp,#12*4]
1740	mov	r0,r2,ror#7
1741	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1742	mov	r12,r1,ror#17
1743	eor	r0,r0,r2,ror#18
1744	eor	r12,r12,r1,ror#19
1745	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1746	ldr	r2,[sp,#14*4]
1747	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1748	ldr	r1,[sp,#7*4]
1749
1750	add	r12,r12,r0
1751	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1752	add	r2,r2,r12
1753	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1754	add	r2,r2,r1			@ X[i]
1755	ldr	r12,[r14],#4			@ *K256++
1756	add	r5,r5,r2			@ h+=X[i]
1757	str	r2,[sp,#14*4]
1758	eor	r2,r11,r4
1759	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1760	and	r2,r2,r10
1761	add	r5,r5,r12			@ h+=K256[i]
1762	eor	r2,r2,r4			@ Ch(e,f,g)
1763	eor	r0,r6,r6,ror#11
1764	add	r5,r5,r2			@ h+=Ch(e,f,g)
1765#if 30==31
1766	and	r12,r12,#0xff
1767	cmp	r12,#0xf2			@ done?
1768#endif
1769#if 30<15
1770# if __ARM_ARCH__>=7
1771	ldr	r2,[r1],#4			@ prefetch
1772# else
1773	ldrb	r2,[r1,#3]
1774# endif
1775	eor	r12,r6,r7			@ a^b, b^c in next round
1776#else
1777	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1778	eor	r12,r6,r7			@ a^b, b^c in next round
1779	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1780#endif
1781	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1782	and	r3,r3,r12			@ (b^c)&=(a^b)
1783	add	r9,r9,r5			@ d+=h
1784	eor	r3,r3,r7			@ Maj(a,b,c)
1785	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1786	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1787	@ ldr	r2,[sp,#0*4]		@ 31
1788	@ ldr	r1,[sp,#13*4]
1789	mov	r0,r2,ror#7
1790	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1791	mov	r3,r1,ror#17
1792	eor	r0,r0,r2,ror#18
1793	eor	r3,r3,r1,ror#19
1794	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1795	ldr	r2,[sp,#15*4]
1796	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1797	ldr	r1,[sp,#8*4]
1798
1799	add	r3,r3,r0
1800	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1801	add	r2,r2,r3
1802	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1803	add	r2,r2,r1			@ X[i]
1804	ldr	r3,[r14],#4			@ *K256++
1805	add	r4,r4,r2			@ h+=X[i]
1806	str	r2,[sp,#15*4]
1807	eor	r2,r10,r11
1808	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1809	and	r2,r2,r9
1810	add	r4,r4,r3			@ h+=K256[i]
1811	eor	r2,r2,r11			@ Ch(e,f,g)
1812	eor	r0,r5,r5,ror#11
1813	add	r4,r4,r2			@ h+=Ch(e,f,g)
1814#if 31==31
1815	and	r3,r3,#0xff
1816	cmp	r3,#0xf2			@ done?
1817#endif
1818#if 31<15
1819# if __ARM_ARCH__>=7
1820	ldr	r2,[r1],#4			@ prefetch
1821# else
1822	ldrb	r2,[r1,#3]
1823# endif
1824	eor	r3,r5,r6			@ a^b, b^c in next round
1825#else
1826	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1827	eor	r3,r5,r6			@ a^b, b^c in next round
1828	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1829#endif
1830	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1831	and	r12,r12,r3			@ (b^c)&=(a^b)
1832	add	r8,r8,r4			@ d+=h
1833	eor	r12,r12,r6			@ Maj(a,b,c)
1834	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1835	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1836#if __ARM_ARCH__>=7
1837	ite	eq			@ Thumb2 thing, sanity check in ARM
1838#endif
1839	ldreq	r3,[sp,#16*4]		@ pull ctx
1840	bne	.Lrounds_16_xx
1841
1842	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1843	ldr	r0,[r3,#0]
1844	ldr	r2,[r3,#4]
1845	ldr	r12,[r3,#8]
1846	add	r4,r4,r0
1847	ldr	r0,[r3,#12]
1848	add	r5,r5,r2
1849	ldr	r2,[r3,#16]
1850	add	r6,r6,r12
1851	ldr	r12,[r3,#20]
1852	add	r7,r7,r0
1853	ldr	r0,[r3,#24]
1854	add	r8,r8,r2
1855	ldr	r2,[r3,#28]
1856	add	r9,r9,r12
1857	ldr	r1,[sp,#17*4]		@ pull inp
1858	ldr	r12,[sp,#18*4]		@ pull inp+len
1859	add	r10,r10,r0
1860	add	r11,r11,r2
1861	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1862	cmp	r1,r12
1863	sub	r14,r14,#256	@ rewind Ktbl
1864	bne	.Loop
1865
1866	add	sp,sp,#19*4	@ destroy frame
1867#if __ARM_ARCH__>=5
1868	ldmia	sp!,{r4-r11,pc}
1869#else
1870	ldmia	sp!,{r4-r11,lr}
1871	tst	lr,#1
1872	moveq	pc,lr			@ be binary compatible with V4, yet
1873	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1874#endif
1875.size	sha256_block_data_order,.-sha256_block_data_order
1876#if __ARM_MAX_ARCH__>=7
1877.arch	armv7-a
1878.fpu	neon
1879
1880.global	sha256_block_data_order_neon
1881.type	sha256_block_data_order_neon,%function
1882.align	4
1883sha256_block_data_order_neon:
1884.LNEON:
1885	stmdb	sp!,{r4-r12,lr}
1886
1887	sub	r11,sp,#16*4+16
1888	adrl	r14,K256
1889	bic	r11,r11,#15		@ align for 128-bit stores
1890	mov	r12,sp
1891	mov	sp,r11			@ alloca
1892	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1893
1894	vld1.8		{q0},[r1]!
1895	vld1.8		{q1},[r1]!
1896	vld1.8		{q2},[r1]!
1897	vld1.8		{q3},[r1]!
1898	vld1.32		{q8},[r14,:128]!
1899	vld1.32		{q9},[r14,:128]!
1900	vld1.32		{q10},[r14,:128]!
1901	vld1.32		{q11},[r14,:128]!
1902	vrev32.8	q0,q0		@ yes, even on
1903	str		r0,[sp,#64]
1904	vrev32.8	q1,q1		@ big-endian
1905	str		r1,[sp,#68]
1906	mov		r1,sp
1907	vrev32.8	q2,q2
1908	str		r2,[sp,#72]
1909	vrev32.8	q3,q3
1910	str		r12,[sp,#76]		@ save original sp
1911	vadd.i32	q8,q8,q0
1912	vadd.i32	q9,q9,q1
1913	vst1.32		{q8},[r1,:128]!
1914	vadd.i32	q10,q10,q2
1915	vst1.32		{q9},[r1,:128]!
1916	vadd.i32	q11,q11,q3
1917	vst1.32		{q10},[r1,:128]!
1918	vst1.32		{q11},[r1,:128]!
1919
1920	ldmia		r0,{r4-r11}
1921	sub		r1,r1,#64
1922	ldr		r2,[sp,#0]
1923	eor		r12,r12,r12
1924	eor		r3,r5,r6
1925	b		.L_00_48
1926
1927.align	4
1928.L_00_48:
1929	vext.8	q8,q0,q1,#4
1930	add	r11,r11,r2
1931	eor	r2,r9,r10
1932	eor	r0,r8,r8,ror#5
1933	vext.8	q9,q2,q3,#4
1934	add	r4,r4,r12
1935	and	r2,r2,r8
1936	eor	r12,r0,r8,ror#19
1937	vshr.u32	q10,q8,#7
1938	eor	r0,r4,r4,ror#11
1939	eor	r2,r2,r10
1940	vadd.i32	q0,q0,q9
1941	add	r11,r11,r12,ror#6
1942	eor	r12,r4,r5
1943	vshr.u32	q9,q8,#3
1944	eor	r0,r0,r4,ror#20
1945	add	r11,r11,r2
1946	vsli.32	q10,q8,#25
1947	ldr	r2,[sp,#4]
1948	and	r3,r3,r12
1949	vshr.u32	q11,q8,#18
1950	add	r7,r7,r11
1951	add	r11,r11,r0,ror#2
1952	eor	r3,r3,r5
1953	veor	q9,q9,q10
1954	add	r10,r10,r2
1955	vsli.32	q11,q8,#14
1956	eor	r2,r8,r9
1957	eor	r0,r7,r7,ror#5
1958	vshr.u32	d24,d7,#17
1959	add	r11,r11,r3
1960	and	r2,r2,r7
1961	veor	q9,q9,q11
1962	eor	r3,r0,r7,ror#19
1963	eor	r0,r11,r11,ror#11
1964	vsli.32	d24,d7,#15
1965	eor	r2,r2,r9
1966	add	r10,r10,r3,ror#6
1967	vshr.u32	d25,d7,#10
1968	eor	r3,r11,r4
1969	eor	r0,r0,r11,ror#20
1970	vadd.i32	q0,q0,q9
1971	add	r10,r10,r2
1972	ldr	r2,[sp,#8]
1973	veor	d25,d25,d24
1974	and	r12,r12,r3
1975	add	r6,r6,r10
1976	vshr.u32	d24,d7,#19
1977	add	r10,r10,r0,ror#2
1978	eor	r12,r12,r4
1979	vsli.32	d24,d7,#13
1980	add	r9,r9,r2
1981	eor	r2,r7,r8
1982	veor	d25,d25,d24
1983	eor	r0,r6,r6,ror#5
1984	add	r10,r10,r12
1985	vadd.i32	d0,d0,d25
1986	and	r2,r2,r6
1987	eor	r12,r0,r6,ror#19
1988	vshr.u32	d24,d0,#17
1989	eor	r0,r10,r10,ror#11
1990	eor	r2,r2,r8
1991	vsli.32	d24,d0,#15
1992	add	r9,r9,r12,ror#6
1993	eor	r12,r10,r11
1994	vshr.u32	d25,d0,#10
1995	eor	r0,r0,r10,ror#20
1996	add	r9,r9,r2
1997	veor	d25,d25,d24
1998	ldr	r2,[sp,#12]
1999	and	r3,r3,r12
2000	vshr.u32	d24,d0,#19
2001	add	r5,r5,r9
2002	add	r9,r9,r0,ror#2
2003	eor	r3,r3,r11
2004	vld1.32	{q8},[r14,:128]!
2005	add	r8,r8,r2
2006	vsli.32	d24,d0,#13
2007	eor	r2,r6,r7
2008	eor	r0,r5,r5,ror#5
2009	veor	d25,d25,d24
2010	add	r9,r9,r3
2011	and	r2,r2,r5
2012	vadd.i32	d1,d1,d25
2013	eor	r3,r0,r5,ror#19
2014	eor	r0,r9,r9,ror#11
2015	vadd.i32	q8,q8,q0
2016	eor	r2,r2,r7
2017	add	r8,r8,r3,ror#6
2018	eor	r3,r9,r10
2019	eor	r0,r0,r9,ror#20
2020	add	r8,r8,r2
2021	ldr	r2,[sp,#16]
2022	and	r12,r12,r3
2023	add	r4,r4,r8
2024	vst1.32	{q8},[r1,:128]!
2025	add	r8,r8,r0,ror#2
2026	eor	r12,r12,r10
2027	vext.8	q8,q1,q2,#4
2028	add	r7,r7,r2
2029	eor	r2,r5,r6
2030	eor	r0,r4,r4,ror#5
2031	vext.8	q9,q3,q0,#4
2032	add	r8,r8,r12
2033	and	r2,r2,r4
2034	eor	r12,r0,r4,ror#19
2035	vshr.u32	q10,q8,#7
2036	eor	r0,r8,r8,ror#11
2037	eor	r2,r2,r6
2038	vadd.i32	q1,q1,q9
2039	add	r7,r7,r12,ror#6
2040	eor	r12,r8,r9
2041	vshr.u32	q9,q8,#3
2042	eor	r0,r0,r8,ror#20
2043	add	r7,r7,r2
2044	vsli.32	q10,q8,#25
2045	ldr	r2,[sp,#20]
2046	and	r3,r3,r12
2047	vshr.u32	q11,q8,#18
2048	add	r11,r11,r7
2049	add	r7,r7,r0,ror#2
2050	eor	r3,r3,r9
2051	veor	q9,q9,q10
2052	add	r6,r6,r2
2053	vsli.32	q11,q8,#14
2054	eor	r2,r4,r5
2055	eor	r0,r11,r11,ror#5
2056	vshr.u32	d24,d1,#17
2057	add	r7,r7,r3
2058	and	r2,r2,r11
2059	veor	q9,q9,q11
2060	eor	r3,r0,r11,ror#19
2061	eor	r0,r7,r7,ror#11
2062	vsli.32	d24,d1,#15
2063	eor	r2,r2,r5
2064	add	r6,r6,r3,ror#6
2065	vshr.u32	d25,d1,#10
2066	eor	r3,r7,r8
2067	eor	r0,r0,r7,ror#20
2068	vadd.i32	q1,q1,q9
2069	add	r6,r6,r2
2070	ldr	r2,[sp,#24]
2071	veor	d25,d25,d24
2072	and	r12,r12,r3
2073	add	r10,r10,r6
2074	vshr.u32	d24,d1,#19
2075	add	r6,r6,r0,ror#2
2076	eor	r12,r12,r8
2077	vsli.32	d24,d1,#13
2078	add	r5,r5,r2
2079	eor	r2,r11,r4
2080	veor	d25,d25,d24
2081	eor	r0,r10,r10,ror#5
2082	add	r6,r6,r12
2083	vadd.i32	d2,d2,d25
2084	and	r2,r2,r10
2085	eor	r12,r0,r10,ror#19
2086	vshr.u32	d24,d2,#17
2087	eor	r0,r6,r6,ror#11
2088	eor	r2,r2,r4
2089	vsli.32	d24,d2,#15
2090	add	r5,r5,r12,ror#6
2091	eor	r12,r6,r7
2092	vshr.u32	d25,d2,#10
2093	eor	r0,r0,r6,ror#20
2094	add	r5,r5,r2
2095	veor	d25,d25,d24
2096	ldr	r2,[sp,#28]
2097	and	r3,r3,r12
2098	vshr.u32	d24,d2,#19
2099	add	r9,r9,r5
2100	add	r5,r5,r0,ror#2
2101	eor	r3,r3,r7
2102	vld1.32	{q8},[r14,:128]!
2103	add	r4,r4,r2
2104	vsli.32	d24,d2,#13
2105	eor	r2,r10,r11
2106	eor	r0,r9,r9,ror#5
2107	veor	d25,d25,d24
2108	add	r5,r5,r3
2109	and	r2,r2,r9
2110	vadd.i32	d3,d3,d25
2111	eor	r3,r0,r9,ror#19
2112	eor	r0,r5,r5,ror#11
2113	vadd.i32	q8,q8,q1
2114	eor	r2,r2,r11
2115	add	r4,r4,r3,ror#6
2116	eor	r3,r5,r6
2117	eor	r0,r0,r5,ror#20
2118	add	r4,r4,r2
2119	ldr	r2,[sp,#32]
2120	and	r12,r12,r3
2121	add	r8,r8,r4
2122	vst1.32	{q8},[r1,:128]!
2123	add	r4,r4,r0,ror#2
2124	eor	r12,r12,r6
2125	vext.8	q8,q2,q3,#4
2126	add	r11,r11,r2
2127	eor	r2,r9,r10
2128	eor	r0,r8,r8,ror#5
2129	vext.8	q9,q0,q1,#4
2130	add	r4,r4,r12
2131	and	r2,r2,r8
2132	eor	r12,r0,r8,ror#19
2133	vshr.u32	q10,q8,#7
2134	eor	r0,r4,r4,ror#11
2135	eor	r2,r2,r10
2136	vadd.i32	q2,q2,q9
2137	add	r11,r11,r12,ror#6
2138	eor	r12,r4,r5
2139	vshr.u32	q9,q8,#3
2140	eor	r0,r0,r4,ror#20
2141	add	r11,r11,r2
2142	vsli.32	q10,q8,#25
2143	ldr	r2,[sp,#36]
2144	and	r3,r3,r12
2145	vshr.u32	q11,q8,#18
2146	add	r7,r7,r11
2147	add	r11,r11,r0,ror#2
2148	eor	r3,r3,r5
2149	veor	q9,q9,q10
2150	add	r10,r10,r2
2151	vsli.32	q11,q8,#14
2152	eor	r2,r8,r9
2153	eor	r0,r7,r7,ror#5
2154	vshr.u32	d24,d3,#17
2155	add	r11,r11,r3
2156	and	r2,r2,r7
2157	veor	q9,q9,q11
2158	eor	r3,r0,r7,ror#19
2159	eor	r0,r11,r11,ror#11
2160	vsli.32	d24,d3,#15
2161	eor	r2,r2,r9
2162	add	r10,r10,r3,ror#6
2163	vshr.u32	d25,d3,#10
2164	eor	r3,r11,r4
2165	eor	r0,r0,r11,ror#20
2166	vadd.i32	q2,q2,q9
2167	add	r10,r10,r2
2168	ldr	r2,[sp,#40]
2169	veor	d25,d25,d24
2170	and	r12,r12,r3
2171	add	r6,r6,r10
2172	vshr.u32	d24,d3,#19
2173	add	r10,r10,r0,ror#2
2174	eor	r12,r12,r4
2175	vsli.32	d24,d3,#13
2176	add	r9,r9,r2
2177	eor	r2,r7,r8
2178	veor	d25,d25,d24
2179	eor	r0,r6,r6,ror#5
2180	add	r10,r10,r12
2181	vadd.i32	d4,d4,d25
2182	and	r2,r2,r6
2183	eor	r12,r0,r6,ror#19
2184	vshr.u32	d24,d4,#17
2185	eor	r0,r10,r10,ror#11
2186	eor	r2,r2,r8
2187	vsli.32	d24,d4,#15
2188	add	r9,r9,r12,ror#6
2189	eor	r12,r10,r11
2190	vshr.u32	d25,d4,#10
2191	eor	r0,r0,r10,ror#20
2192	add	r9,r9,r2
2193	veor	d25,d25,d24
2194	ldr	r2,[sp,#44]
2195	and	r3,r3,r12
2196	vshr.u32	d24,d4,#19
2197	add	r5,r5,r9
2198	add	r9,r9,r0,ror#2
2199	eor	r3,r3,r11
2200	vld1.32	{q8},[r14,:128]!
2201	add	r8,r8,r2
2202	vsli.32	d24,d4,#13
2203	eor	r2,r6,r7
2204	eor	r0,r5,r5,ror#5
2205	veor	d25,d25,d24
2206	add	r9,r9,r3
2207	and	r2,r2,r5
2208	vadd.i32	d5,d5,d25
2209	eor	r3,r0,r5,ror#19
2210	eor	r0,r9,r9,ror#11
2211	vadd.i32	q8,q8,q2
2212	eor	r2,r2,r7
2213	add	r8,r8,r3,ror#6
2214	eor	r3,r9,r10
2215	eor	r0,r0,r9,ror#20
2216	add	r8,r8,r2
2217	ldr	r2,[sp,#48]
2218	and	r12,r12,r3
2219	add	r4,r4,r8
2220	vst1.32	{q8},[r1,:128]!
2221	add	r8,r8,r0,ror#2
2222	eor	r12,r12,r10
2223	vext.8	q8,q3,q0,#4
2224	add	r7,r7,r2
2225	eor	r2,r5,r6
2226	eor	r0,r4,r4,ror#5
2227	vext.8	q9,q1,q2,#4
2228	add	r8,r8,r12
2229	and	r2,r2,r4
2230	eor	r12,r0,r4,ror#19
2231	vshr.u32	q10,q8,#7
2232	eor	r0,r8,r8,ror#11
2233	eor	r2,r2,r6
2234	vadd.i32	q3,q3,q9
2235	add	r7,r7,r12,ror#6
2236	eor	r12,r8,r9
2237	vshr.u32	q9,q8,#3
2238	eor	r0,r0,r8,ror#20
2239	add	r7,r7,r2
2240	vsli.32	q10,q8,#25
2241	ldr	r2,[sp,#52]
2242	and	r3,r3,r12
2243	vshr.u32	q11,q8,#18
2244	add	r11,r11,r7
2245	add	r7,r7,r0,ror#2
2246	eor	r3,r3,r9
2247	veor	q9,q9,q10
2248	add	r6,r6,r2
2249	vsli.32	q11,q8,#14
2250	eor	r2,r4,r5
2251	eor	r0,r11,r11,ror#5
2252	vshr.u32	d24,d5,#17
2253	add	r7,r7,r3
2254	and	r2,r2,r11
2255	veor	q9,q9,q11
2256	eor	r3,r0,r11,ror#19
2257	eor	r0,r7,r7,ror#11
2258	vsli.32	d24,d5,#15
2259	eor	r2,r2,r5
2260	add	r6,r6,r3,ror#6
2261	vshr.u32	d25,d5,#10
2262	eor	r3,r7,r8
2263	eor	r0,r0,r7,ror#20
2264	vadd.i32	q3,q3,q9
2265	add	r6,r6,r2
2266	ldr	r2,[sp,#56]
2267	veor	d25,d25,d24
2268	and	r12,r12,r3
2269	add	r10,r10,r6
2270	vshr.u32	d24,d5,#19
2271	add	r6,r6,r0,ror#2
2272	eor	r12,r12,r8
2273	vsli.32	d24,d5,#13
2274	add	r5,r5,r2
2275	eor	r2,r11,r4
2276	veor	d25,d25,d24
2277	eor	r0,r10,r10,ror#5
2278	add	r6,r6,r12
2279	vadd.i32	d6,d6,d25
2280	and	r2,r2,r10
2281	eor	r12,r0,r10,ror#19
2282	vshr.u32	d24,d6,#17
2283	eor	r0,r6,r6,ror#11
2284	eor	r2,r2,r4
2285	vsli.32	d24,d6,#15
2286	add	r5,r5,r12,ror#6
2287	eor	r12,r6,r7
2288	vshr.u32	d25,d6,#10
2289	eor	r0,r0,r6,ror#20
2290	add	r5,r5,r2
2291	veor	d25,d25,d24
2292	ldr	r2,[sp,#60]
2293	and	r3,r3,r12
2294	vshr.u32	d24,d6,#19
2295	add	r9,r9,r5
2296	add	r5,r5,r0,ror#2
2297	eor	r3,r3,r7
2298	vld1.32	{q8},[r14,:128]!
2299	add	r4,r4,r2
2300	vsli.32	d24,d6,#13
2301	eor	r2,r10,r11
2302	eor	r0,r9,r9,ror#5
2303	veor	d25,d25,d24
2304	add	r5,r5,r3
2305	and	r2,r2,r9
2306	vadd.i32	d7,d7,d25
2307	eor	r3,r0,r9,ror#19
2308	eor	r0,r5,r5,ror#11
2309	vadd.i32	q8,q8,q3
2310	eor	r2,r2,r11
2311	add	r4,r4,r3,ror#6
2312	eor	r3,r5,r6
2313	eor	r0,r0,r5,ror#20
2314	add	r4,r4,r2
2315	ldr	r2,[r14]
2316	and	r12,r12,r3
2317	add	r8,r8,r4
2318	vst1.32	{q8},[r1,:128]!
2319	add	r4,r4,r0,ror#2
2320	eor	r12,r12,r6
2321	teq	r2,#0				@ check for K256 terminator
2322	ldr	r2,[sp,#0]
2323	sub	r1,r1,#64
2324	bne	.L_00_48
2325
2326	ldr		r1,[sp,#68]
2327	ldr		r0,[sp,#72]
2328	sub		r14,r14,#256	@ rewind r14
2329	teq		r1,r0
2330	it		eq
2331	subeq		r1,r1,#64		@ avoid SEGV
2332	vld1.8		{q0},[r1]!		@ load next input block
2333	vld1.8		{q1},[r1]!
2334	vld1.8		{q2},[r1]!
2335	vld1.8		{q3},[r1]!
2336	it		ne
2337	strne		r1,[sp,#68]
2338	mov		r1,sp
2339	add	r11,r11,r2
2340	eor	r2,r9,r10
2341	eor	r0,r8,r8,ror#5
2342	add	r4,r4,r12
2343	vld1.32	{q8},[r14,:128]!
2344	and	r2,r2,r8
2345	eor	r12,r0,r8,ror#19
2346	eor	r0,r4,r4,ror#11
2347	eor	r2,r2,r10
2348	vrev32.8	q0,q0
2349	add	r11,r11,r12,ror#6
2350	eor	r12,r4,r5
2351	eor	r0,r0,r4,ror#20
2352	add	r11,r11,r2
2353	vadd.i32	q8,q8,q0
2354	ldr	r2,[sp,#4]
2355	and	r3,r3,r12
2356	add	r7,r7,r11
2357	add	r11,r11,r0,ror#2
2358	eor	r3,r3,r5
2359	add	r10,r10,r2
2360	eor	r2,r8,r9
2361	eor	r0,r7,r7,ror#5
2362	add	r11,r11,r3
2363	and	r2,r2,r7
2364	eor	r3,r0,r7,ror#19
2365	eor	r0,r11,r11,ror#11
2366	eor	r2,r2,r9
2367	add	r10,r10,r3,ror#6
2368	eor	r3,r11,r4
2369	eor	r0,r0,r11,ror#20
2370	add	r10,r10,r2
2371	ldr	r2,[sp,#8]
2372	and	r12,r12,r3
2373	add	r6,r6,r10
2374	add	r10,r10,r0,ror#2
2375	eor	r12,r12,r4
2376	add	r9,r9,r2
2377	eor	r2,r7,r8
2378	eor	r0,r6,r6,ror#5
2379	add	r10,r10,r12
2380	and	r2,r2,r6
2381	eor	r12,r0,r6,ror#19
2382	eor	r0,r10,r10,ror#11
2383	eor	r2,r2,r8
2384	add	r9,r9,r12,ror#6
2385	eor	r12,r10,r11
2386	eor	r0,r0,r10,ror#20
2387	add	r9,r9,r2
2388	ldr	r2,[sp,#12]
2389	and	r3,r3,r12
2390	add	r5,r5,r9
2391	add	r9,r9,r0,ror#2
2392	eor	r3,r3,r11
2393	add	r8,r8,r2
2394	eor	r2,r6,r7
2395	eor	r0,r5,r5,ror#5
2396	add	r9,r9,r3
2397	and	r2,r2,r5
2398	eor	r3,r0,r5,ror#19
2399	eor	r0,r9,r9,ror#11
2400	eor	r2,r2,r7
2401	add	r8,r8,r3,ror#6
2402	eor	r3,r9,r10
2403	eor	r0,r0,r9,ror#20
2404	add	r8,r8,r2
2405	ldr	r2,[sp,#16]
2406	and	r12,r12,r3
2407	add	r4,r4,r8
2408	add	r8,r8,r0,ror#2
2409	eor	r12,r12,r10
2410	vst1.32	{q8},[r1,:128]!
2411	add	r7,r7,r2
2412	eor	r2,r5,r6
2413	eor	r0,r4,r4,ror#5
2414	add	r8,r8,r12
2415	vld1.32	{q8},[r14,:128]!
2416	and	r2,r2,r4
2417	eor	r12,r0,r4,ror#19
2418	eor	r0,r8,r8,ror#11
2419	eor	r2,r2,r6
2420	vrev32.8	q1,q1
2421	add	r7,r7,r12,ror#6
2422	eor	r12,r8,r9
2423	eor	r0,r0,r8,ror#20
2424	add	r7,r7,r2
2425	vadd.i32	q8,q8,q1
2426	ldr	r2,[sp,#20]
2427	and	r3,r3,r12
2428	add	r11,r11,r7
2429	add	r7,r7,r0,ror#2
2430	eor	r3,r3,r9
2431	add	r6,r6,r2
2432	eor	r2,r4,r5
2433	eor	r0,r11,r11,ror#5
2434	add	r7,r7,r3
2435	and	r2,r2,r11
2436	eor	r3,r0,r11,ror#19
2437	eor	r0,r7,r7,ror#11
2438	eor	r2,r2,r5
2439	add	r6,r6,r3,ror#6
2440	eor	r3,r7,r8
2441	eor	r0,r0,r7,ror#20
2442	add	r6,r6,r2
2443	ldr	r2,[sp,#24]
2444	and	r12,r12,r3
2445	add	r10,r10,r6
2446	add	r6,r6,r0,ror#2
2447	eor	r12,r12,r8
2448	add	r5,r5,r2
2449	eor	r2,r11,r4
2450	eor	r0,r10,r10,ror#5
2451	add	r6,r6,r12
2452	and	r2,r2,r10
2453	eor	r12,r0,r10,ror#19
2454	eor	r0,r6,r6,ror#11
2455	eor	r2,r2,r4
2456	add	r5,r5,r12,ror#6
2457	eor	r12,r6,r7
2458	eor	r0,r0,r6,ror#20
2459	add	r5,r5,r2
2460	ldr	r2,[sp,#28]
2461	and	r3,r3,r12
2462	add	r9,r9,r5
2463	add	r5,r5,r0,ror#2
2464	eor	r3,r3,r7
2465	add	r4,r4,r2
2466	eor	r2,r10,r11
2467	eor	r0,r9,r9,ror#5
2468	add	r5,r5,r3
2469	and	r2,r2,r9
2470	eor	r3,r0,r9,ror#19
2471	eor	r0,r5,r5,ror#11
2472	eor	r2,r2,r11
2473	add	r4,r4,r3,ror#6
2474	eor	r3,r5,r6
2475	eor	r0,r0,r5,ror#20
2476	add	r4,r4,r2
2477	ldr	r2,[sp,#32]
2478	and	r12,r12,r3
2479	add	r8,r8,r4
2480	add	r4,r4,r0,ror#2
2481	eor	r12,r12,r6
2482	vst1.32	{q8},[r1,:128]!
2483	add	r11,r11,r2
2484	eor	r2,r9,r10
2485	eor	r0,r8,r8,ror#5
2486	add	r4,r4,r12
2487	vld1.32	{q8},[r14,:128]!
2488	and	r2,r2,r8
2489	eor	r12,r0,r8,ror#19
2490	eor	r0,r4,r4,ror#11
2491	eor	r2,r2,r10
2492	vrev32.8	q2,q2
2493	add	r11,r11,r12,ror#6
2494	eor	r12,r4,r5
2495	eor	r0,r0,r4,ror#20
2496	add	r11,r11,r2
2497	vadd.i32	q8,q8,q2
2498	ldr	r2,[sp,#36]
2499	and	r3,r3,r12
2500	add	r7,r7,r11
2501	add	r11,r11,r0,ror#2
2502	eor	r3,r3,r5
2503	add	r10,r10,r2
2504	eor	r2,r8,r9
2505	eor	r0,r7,r7,ror#5
2506	add	r11,r11,r3
2507	and	r2,r2,r7
2508	eor	r3,r0,r7,ror#19
2509	eor	r0,r11,r11,ror#11
2510	eor	r2,r2,r9
2511	add	r10,r10,r3,ror#6
2512	eor	r3,r11,r4
2513	eor	r0,r0,r11,ror#20
2514	add	r10,r10,r2
2515	ldr	r2,[sp,#40]
2516	and	r12,r12,r3
2517	add	r6,r6,r10
2518	add	r10,r10,r0,ror#2
2519	eor	r12,r12,r4
2520	add	r9,r9,r2
2521	eor	r2,r7,r8
2522	eor	r0,r6,r6,ror#5
2523	add	r10,r10,r12
2524	and	r2,r2,r6
2525	eor	r12,r0,r6,ror#19
2526	eor	r0,r10,r10,ror#11
2527	eor	r2,r2,r8
2528	add	r9,r9,r12,ror#6
2529	eor	r12,r10,r11
2530	eor	r0,r0,r10,ror#20
2531	add	r9,r9,r2
2532	ldr	r2,[sp,#44]
2533	and	r3,r3,r12
2534	add	r5,r5,r9
2535	add	r9,r9,r0,ror#2
2536	eor	r3,r3,r11
2537	add	r8,r8,r2
2538	eor	r2,r6,r7
2539	eor	r0,r5,r5,ror#5
2540	add	r9,r9,r3
2541	and	r2,r2,r5
2542	eor	r3,r0,r5,ror#19
2543	eor	r0,r9,r9,ror#11
2544	eor	r2,r2,r7
2545	add	r8,r8,r3,ror#6
2546	eor	r3,r9,r10
2547	eor	r0,r0,r9,ror#20
2548	add	r8,r8,r2
2549	ldr	r2,[sp,#48]
2550	and	r12,r12,r3
2551	add	r4,r4,r8
2552	add	r8,r8,r0,ror#2
2553	eor	r12,r12,r10
2554	vst1.32	{q8},[r1,:128]!
2555	add	r7,r7,r2
2556	eor	r2,r5,r6
2557	eor	r0,r4,r4,ror#5
2558	add	r8,r8,r12
2559	vld1.32	{q8},[r14,:128]!
2560	and	r2,r2,r4
2561	eor	r12,r0,r4,ror#19
2562	eor	r0,r8,r8,ror#11
2563	eor	r2,r2,r6
2564	vrev32.8	q3,q3
2565	add	r7,r7,r12,ror#6
2566	eor	r12,r8,r9
2567	eor	r0,r0,r8,ror#20
2568	add	r7,r7,r2
2569	vadd.i32	q8,q8,q3
2570	ldr	r2,[sp,#52]
2571	and	r3,r3,r12
2572	add	r11,r11,r7
2573	add	r7,r7,r0,ror#2
2574	eor	r3,r3,r9
2575	add	r6,r6,r2
2576	eor	r2,r4,r5
2577	eor	r0,r11,r11,ror#5
2578	add	r7,r7,r3
2579	and	r2,r2,r11
2580	eor	r3,r0,r11,ror#19
2581	eor	r0,r7,r7,ror#11
2582	eor	r2,r2,r5
2583	add	r6,r6,r3,ror#6
2584	eor	r3,r7,r8
2585	eor	r0,r0,r7,ror#20
2586	add	r6,r6,r2
2587	ldr	r2,[sp,#56]
2588	and	r12,r12,r3
2589	add	r10,r10,r6
2590	add	r6,r6,r0,ror#2
2591	eor	r12,r12,r8
2592	add	r5,r5,r2
2593	eor	r2,r11,r4
2594	eor	r0,r10,r10,ror#5
2595	add	r6,r6,r12
2596	and	r2,r2,r10
2597	eor	r12,r0,r10,ror#19
2598	eor	r0,r6,r6,ror#11
2599	eor	r2,r2,r4
2600	add	r5,r5,r12,ror#6
2601	eor	r12,r6,r7
2602	eor	r0,r0,r6,ror#20
2603	add	r5,r5,r2
2604	ldr	r2,[sp,#60]
2605	and	r3,r3,r12
2606	add	r9,r9,r5
2607	add	r5,r5,r0,ror#2
2608	eor	r3,r3,r7
2609	add	r4,r4,r2
2610	eor	r2,r10,r11
2611	eor	r0,r9,r9,ror#5
2612	add	r5,r5,r3
2613	and	r2,r2,r9
2614	eor	r3,r0,r9,ror#19
2615	eor	r0,r5,r5,ror#11
2616	eor	r2,r2,r11
2617	add	r4,r4,r3,ror#6
2618	eor	r3,r5,r6
2619	eor	r0,r0,r5,ror#20
2620	add	r4,r4,r2
2621	ldr	r2,[sp,#64]
2622	and	r12,r12,r3
2623	add	r8,r8,r4
2624	add	r4,r4,r0,ror#2
2625	eor	r12,r12,r6
2626	vst1.32	{q8},[r1,:128]!
2627	ldr	r0,[r2,#0]
2628	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2629	ldr	r12,[r2,#4]
2630	ldr	r3,[r2,#8]
2631	ldr	r1,[r2,#12]
2632	add	r4,r4,r0			@ accumulate
2633	ldr	r0,[r2,#16]
2634	add	r5,r5,r12
2635	ldr	r12,[r2,#20]
2636	add	r6,r6,r3
2637	ldr	r3,[r2,#24]
2638	add	r7,r7,r1
2639	ldr	r1,[r2,#28]
2640	add	r8,r8,r0
2641	str	r4,[r2],#4
2642	add	r9,r9,r12
2643	str	r5,[r2],#4
2644	add	r10,r10,r3
2645	str	r6,[r2],#4
2646	add	r11,r11,r1
2647	str	r7,[r2],#4
2648	stmia	r2,{r8-r11}
2649
2650	ittte	ne
2651	movne	r1,sp
2652	ldrne	r2,[sp,#0]
2653	eorne	r12,r12,r12
2654	ldreq	sp,[sp,#76]			@ restore original sp
2655	itt	ne
2656	eorne	r3,r5,r6
2657	bne	.L_00_48
2658
2659	ldmia	sp!,{r4-r12,pc}
2660.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2661#endif
2662#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2663
2664# ifdef __thumb2__
2665#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2666# else
2667#  define INST(a,b,c,d)	.byte	a,b,c,d
2668# endif
2669
2670.type	sha256_block_data_order_armv8,%function
2671.align	5
2672sha256_block_data_order_armv8:
2673.LARMv8:
2674	vld1.32	{q0,q1},[r0]
2675# ifdef __thumb2__
2676	adr	r3,.LARMv8
2677	sub	r3,r3,#.LARMv8-K256
2678# else
2679	adrl	r3,K256
2680# endif
2681	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2682
2683.Loop_v8:
2684	vld1.8		{q8-q9},[r1]!
2685	vld1.8		{q10-q11},[r1]!
2686	vld1.32		{q12},[r3]!
2687	vrev32.8	q8,q8
2688	vrev32.8	q9,q9
2689	vrev32.8	q10,q10
2690	vrev32.8	q11,q11
2691	vmov		q14,q0	@ offload
2692	vmov		q15,q1
2693	teq		r1,r2
2694	vld1.32		{q13},[r3]!
2695	vadd.i32	q12,q12,q8
2696	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2697	vmov		q2,q0
2698	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2699	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2700	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2701	vld1.32		{q12},[r3]!
2702	vadd.i32	q13,q13,q9
2703	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2704	vmov		q2,q0
2705	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2706	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2707	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2708	vld1.32		{q13},[r3]!
2709	vadd.i32	q12,q12,q10
2710	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2711	vmov		q2,q0
2712	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2713	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2714	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2715	vld1.32		{q12},[r3]!
2716	vadd.i32	q13,q13,q11
2717	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2718	vmov		q2,q0
2719	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2720	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2721	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2722	vld1.32		{q13},[r3]!
2723	vadd.i32	q12,q12,q8
2724	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2725	vmov		q2,q0
2726	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2727	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2728	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2729	vld1.32		{q12},[r3]!
2730	vadd.i32	q13,q13,q9
2731	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2732	vmov		q2,q0
2733	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2734	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2735	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2736	vld1.32		{q13},[r3]!
2737	vadd.i32	q12,q12,q10
2738	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2739	vmov		q2,q0
2740	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2741	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2742	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2743	vld1.32		{q12},[r3]!
2744	vadd.i32	q13,q13,q11
2745	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2746	vmov		q2,q0
2747	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2748	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2749	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2750	vld1.32		{q13},[r3]!
2751	vadd.i32	q12,q12,q8
2752	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2753	vmov		q2,q0
2754	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2755	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2756	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2757	vld1.32		{q12},[r3]!
2758	vadd.i32	q13,q13,q9
2759	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2760	vmov		q2,q0
2761	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2762	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2763	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2764	vld1.32		{q13},[r3]!
2765	vadd.i32	q12,q12,q10
2766	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2767	vmov		q2,q0
2768	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2769	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2770	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2771	vld1.32		{q12},[r3]!
2772	vadd.i32	q13,q13,q11
2773	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2774	vmov		q2,q0
2775	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2776	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2777	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2778	vld1.32		{q13},[r3]!
2779	vadd.i32	q12,q12,q8
2780	vmov		q2,q0
2781	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2782	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2783
2784	vld1.32		{q12},[r3]!
2785	vadd.i32	q13,q13,q9
2786	vmov		q2,q0
2787	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2788	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2789
2790	vld1.32		{q13},[r3]
2791	vadd.i32	q12,q12,q10
2792	sub		r3,r3,#256-16	@ rewind
2793	vmov		q2,q0
2794	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2795	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2796
2797	vadd.i32	q13,q13,q11
2798	vmov		q2,q0
2799	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2800	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2801
2802	vadd.i32	q0,q0,q14
2803	vadd.i32	q1,q1,q15
2804	it		ne
2805	bne		.Loop_v8
2806
2807	vst1.32		{q0,q1},[r0]
2808
2809	bx	lr		@ bx lr
2810.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2811#endif
2812.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2813.align	2
2814#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2815.comm   OPENSSL_armcap_P,4,4
2816#endif
2817