• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1
2@ ====================================================================
3@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
4@ project. The module is, however, dual licensed under OpenSSL and
5@ CRYPTOGAMS licenses depending on where you obtain it. For further
6@ details see http://www.openssl.org/~appro/cryptogams/.
7@
8@ Permission to use under GPL terms is granted.
9@ ====================================================================
10
11@ SHA256 block procedure for ARMv4. May 2007.
12
13@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
14@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
15@ byte [on single-issue Xscale PXA250 core].
16
17@ July 2010.
18@
19@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
20@ Cortex A8 core and ~20 cycles per processed byte.
21
22@ February 2011.
23@
24@ Profiler-assisted and platform-specific optimization resulted in 16%
25@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
26
27@ September 2013.
28@
29@ Add NEON implementation. On Cortex A8 it was measured to process one
30@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
31@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
32@ code (meaning that latter performs sub-optimally, nothing was done
33@ about it).
34
35@ May 2014.
36@
37@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
38
39#ifndef __KERNEL__
40# include "arm_arch.h"
41#else
42# define __ARM_ARCH__ __LINUX_ARM_ARCH__
43# define __ARM_MAX_ARCH__ 7
44#endif
45
46.text
47#if __ARM_ARCH__<7
48.code	32
49#else
50.syntax unified
51# ifdef __thumb2__
52#  define adrl adr
53.thumb
54# else
55.code   32
56# endif
57#endif
58
59.type	K256,%object
60.align	5
61K256:
62.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
63.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
64.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
65.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
66.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
67.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
68.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
69.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
70.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
71.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
72.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
73.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
74.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
75.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
76.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
77.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
78.size	K256,.-K256
79.word	0				@ terminator
80#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
81.LOPENSSL_armcap:
82.word	OPENSSL_armcap_P-sha256_block_data_order
83#endif
84.align	5
85
86.global	sha256_block_data_order
87.type	sha256_block_data_order,%function
88sha256_block_data_order:
89.Lsha256_block_data_order:
90#if __ARM_ARCH__<7
91	sub	r3,pc,#8		@ sha256_block_data_order
92#else
93	adr	r3,.Lsha256_block_data_order
94#endif
95#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
96	ldr	r12,.LOPENSSL_armcap
97	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
98	tst	r12,#ARMV8_SHA256
99	bne	.LARMv8
100	tst	r12,#ARMV7_NEON
101	bne	.LNEON
102#endif
103	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
104	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
105	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
106	sub	r14,r3,#256+32	@ K256
107	sub	sp,sp,#16*4		@ alloca(X[16])
108.Loop:
109# if __ARM_ARCH__>=7
110	ldr	r2,[r1],#4
111# else
112	ldrb	r2,[r1,#3]
113# endif
114	eor	r3,r5,r6		@ magic
115	eor	r12,r12,r12
116#if __ARM_ARCH__>=7
117	@ ldr	r2,[r1],#4			@ 0
118# if 0==15
119	str	r1,[sp,#17*4]			@ make room for r1
120# endif
121	eor	r0,r8,r8,ror#5
122	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
123	eor	r0,r0,r8,ror#19	@ Sigma1(e)
124# ifndef __ARMEB__
125	rev	r2,r2
126# endif
127#else
128	@ ldrb	r2,[r1,#3]			@ 0
129	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
130	ldrb	r12,[r1,#2]
131	ldrb	r0,[r1,#1]
132	orr	r2,r2,r12,lsl#8
133	ldrb	r12,[r1],#4
134	orr	r2,r2,r0,lsl#16
135# if 0==15
136	str	r1,[sp,#17*4]			@ make room for r1
137# endif
138	eor	r0,r8,r8,ror#5
139	orr	r2,r2,r12,lsl#24
140	eor	r0,r0,r8,ror#19	@ Sigma1(e)
141#endif
142	ldr	r12,[r14],#4			@ *K256++
143	add	r11,r11,r2			@ h+=X[i]
144	str	r2,[sp,#0*4]
145	eor	r2,r9,r10
146	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
147	and	r2,r2,r8
148	add	r11,r11,r12			@ h+=K256[i]
149	eor	r2,r2,r10			@ Ch(e,f,g)
150	eor	r0,r4,r4,ror#11
151	add	r11,r11,r2			@ h+=Ch(e,f,g)
152#if 0==31
153	and	r12,r12,#0xff
154	cmp	r12,#0xf2			@ done?
155#endif
156#if 0<15
157# if __ARM_ARCH__>=7
158	ldr	r2,[r1],#4			@ prefetch
159# else
160	ldrb	r2,[r1,#3]
161# endif
162	eor	r12,r4,r5			@ a^b, b^c in next round
163#else
164	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
165	eor	r12,r4,r5			@ a^b, b^c in next round
166	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
167#endif
168	eor	r0,r0,r4,ror#20	@ Sigma0(a)
169	and	r3,r3,r12			@ (b^c)&=(a^b)
170	add	r7,r7,r11			@ d+=h
171	eor	r3,r3,r5			@ Maj(a,b,c)
172	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
173	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
174#if __ARM_ARCH__>=7
175	@ ldr	r2,[r1],#4			@ 1
176# if 1==15
177	str	r1,[sp,#17*4]			@ make room for r1
178# endif
179	eor	r0,r7,r7,ror#5
180	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
181	eor	r0,r0,r7,ror#19	@ Sigma1(e)
182# ifndef __ARMEB__
183	rev	r2,r2
184# endif
185#else
186	@ ldrb	r2,[r1,#3]			@ 1
187	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
188	ldrb	r3,[r1,#2]
189	ldrb	r0,[r1,#1]
190	orr	r2,r2,r3,lsl#8
191	ldrb	r3,[r1],#4
192	orr	r2,r2,r0,lsl#16
193# if 1==15
194	str	r1,[sp,#17*4]			@ make room for r1
195# endif
196	eor	r0,r7,r7,ror#5
197	orr	r2,r2,r3,lsl#24
198	eor	r0,r0,r7,ror#19	@ Sigma1(e)
199#endif
200	ldr	r3,[r14],#4			@ *K256++
201	add	r10,r10,r2			@ h+=X[i]
202	str	r2,[sp,#1*4]
203	eor	r2,r8,r9
204	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
205	and	r2,r2,r7
206	add	r10,r10,r3			@ h+=K256[i]
207	eor	r2,r2,r9			@ Ch(e,f,g)
208	eor	r0,r11,r11,ror#11
209	add	r10,r10,r2			@ h+=Ch(e,f,g)
210#if 1==31
211	and	r3,r3,#0xff
212	cmp	r3,#0xf2			@ done?
213#endif
214#if 1<15
215# if __ARM_ARCH__>=7
216	ldr	r2,[r1],#4			@ prefetch
217# else
218	ldrb	r2,[r1,#3]
219# endif
220	eor	r3,r11,r4			@ a^b, b^c in next round
221#else
222	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
223	eor	r3,r11,r4			@ a^b, b^c in next round
224	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
225#endif
226	eor	r0,r0,r11,ror#20	@ Sigma0(a)
227	and	r12,r12,r3			@ (b^c)&=(a^b)
228	add	r6,r6,r10			@ d+=h
229	eor	r12,r12,r4			@ Maj(a,b,c)
230	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
231	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
232#if __ARM_ARCH__>=7
233	@ ldr	r2,[r1],#4			@ 2
234# if 2==15
235	str	r1,[sp,#17*4]			@ make room for r1
236# endif
237	eor	r0,r6,r6,ror#5
238	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
239	eor	r0,r0,r6,ror#19	@ Sigma1(e)
240# ifndef __ARMEB__
241	rev	r2,r2
242# endif
243#else
244	@ ldrb	r2,[r1,#3]			@ 2
245	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
246	ldrb	r12,[r1,#2]
247	ldrb	r0,[r1,#1]
248	orr	r2,r2,r12,lsl#8
249	ldrb	r12,[r1],#4
250	orr	r2,r2,r0,lsl#16
251# if 2==15
252	str	r1,[sp,#17*4]			@ make room for r1
253# endif
254	eor	r0,r6,r6,ror#5
255	orr	r2,r2,r12,lsl#24
256	eor	r0,r0,r6,ror#19	@ Sigma1(e)
257#endif
258	ldr	r12,[r14],#4			@ *K256++
259	add	r9,r9,r2			@ h+=X[i]
260	str	r2,[sp,#2*4]
261	eor	r2,r7,r8
262	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
263	and	r2,r2,r6
264	add	r9,r9,r12			@ h+=K256[i]
265	eor	r2,r2,r8			@ Ch(e,f,g)
266	eor	r0,r10,r10,ror#11
267	add	r9,r9,r2			@ h+=Ch(e,f,g)
268#if 2==31
269	and	r12,r12,#0xff
270	cmp	r12,#0xf2			@ done?
271#endif
272#if 2<15
273# if __ARM_ARCH__>=7
274	ldr	r2,[r1],#4			@ prefetch
275# else
276	ldrb	r2,[r1,#3]
277# endif
278	eor	r12,r10,r11			@ a^b, b^c in next round
279#else
280	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
281	eor	r12,r10,r11			@ a^b, b^c in next round
282	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
283#endif
284	eor	r0,r0,r10,ror#20	@ Sigma0(a)
285	and	r3,r3,r12			@ (b^c)&=(a^b)
286	add	r5,r5,r9			@ d+=h
287	eor	r3,r3,r11			@ Maj(a,b,c)
288	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
289	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
290#if __ARM_ARCH__>=7
291	@ ldr	r2,[r1],#4			@ 3
292# if 3==15
293	str	r1,[sp,#17*4]			@ make room for r1
294# endif
295	eor	r0,r5,r5,ror#5
296	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
297	eor	r0,r0,r5,ror#19	@ Sigma1(e)
298# ifndef __ARMEB__
299	rev	r2,r2
300# endif
301#else
302	@ ldrb	r2,[r1,#3]			@ 3
303	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
304	ldrb	r3,[r1,#2]
305	ldrb	r0,[r1,#1]
306	orr	r2,r2,r3,lsl#8
307	ldrb	r3,[r1],#4
308	orr	r2,r2,r0,lsl#16
309# if 3==15
310	str	r1,[sp,#17*4]			@ make room for r1
311# endif
312	eor	r0,r5,r5,ror#5
313	orr	r2,r2,r3,lsl#24
314	eor	r0,r0,r5,ror#19	@ Sigma1(e)
315#endif
316	ldr	r3,[r14],#4			@ *K256++
317	add	r8,r8,r2			@ h+=X[i]
318	str	r2,[sp,#3*4]
319	eor	r2,r6,r7
320	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
321	and	r2,r2,r5
322	add	r8,r8,r3			@ h+=K256[i]
323	eor	r2,r2,r7			@ Ch(e,f,g)
324	eor	r0,r9,r9,ror#11
325	add	r8,r8,r2			@ h+=Ch(e,f,g)
326#if 3==31
327	and	r3,r3,#0xff
328	cmp	r3,#0xf2			@ done?
329#endif
330#if 3<15
331# if __ARM_ARCH__>=7
332	ldr	r2,[r1],#4			@ prefetch
333# else
334	ldrb	r2,[r1,#3]
335# endif
336	eor	r3,r9,r10			@ a^b, b^c in next round
337#else
338	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
339	eor	r3,r9,r10			@ a^b, b^c in next round
340	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
341#endif
342	eor	r0,r0,r9,ror#20	@ Sigma0(a)
343	and	r12,r12,r3			@ (b^c)&=(a^b)
344	add	r4,r4,r8			@ d+=h
345	eor	r12,r12,r10			@ Maj(a,b,c)
346	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
347	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
348#if __ARM_ARCH__>=7
349	@ ldr	r2,[r1],#4			@ 4
350# if 4==15
351	str	r1,[sp,#17*4]			@ make room for r1
352# endif
353	eor	r0,r4,r4,ror#5
354	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
355	eor	r0,r0,r4,ror#19	@ Sigma1(e)
356# ifndef __ARMEB__
357	rev	r2,r2
358# endif
359#else
360	@ ldrb	r2,[r1,#3]			@ 4
361	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
362	ldrb	r12,[r1,#2]
363	ldrb	r0,[r1,#1]
364	orr	r2,r2,r12,lsl#8
365	ldrb	r12,[r1],#4
366	orr	r2,r2,r0,lsl#16
367# if 4==15
368	str	r1,[sp,#17*4]			@ make room for r1
369# endif
370	eor	r0,r4,r4,ror#5
371	orr	r2,r2,r12,lsl#24
372	eor	r0,r0,r4,ror#19	@ Sigma1(e)
373#endif
374	ldr	r12,[r14],#4			@ *K256++
375	add	r7,r7,r2			@ h+=X[i]
376	str	r2,[sp,#4*4]
377	eor	r2,r5,r6
378	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
379	and	r2,r2,r4
380	add	r7,r7,r12			@ h+=K256[i]
381	eor	r2,r2,r6			@ Ch(e,f,g)
382	eor	r0,r8,r8,ror#11
383	add	r7,r7,r2			@ h+=Ch(e,f,g)
384#if 4==31
385	and	r12,r12,#0xff
386	cmp	r12,#0xf2			@ done?
387#endif
388#if 4<15
389# if __ARM_ARCH__>=7
390	ldr	r2,[r1],#4			@ prefetch
391# else
392	ldrb	r2,[r1,#3]
393# endif
394	eor	r12,r8,r9			@ a^b, b^c in next round
395#else
396	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
397	eor	r12,r8,r9			@ a^b, b^c in next round
398	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
399#endif
400	eor	r0,r0,r8,ror#20	@ Sigma0(a)
401	and	r3,r3,r12			@ (b^c)&=(a^b)
402	add	r11,r11,r7			@ d+=h
403	eor	r3,r3,r9			@ Maj(a,b,c)
404	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
405	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
406#if __ARM_ARCH__>=7
407	@ ldr	r2,[r1],#4			@ 5
408# if 5==15
409	str	r1,[sp,#17*4]			@ make room for r1
410# endif
411	eor	r0,r11,r11,ror#5
412	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
413	eor	r0,r0,r11,ror#19	@ Sigma1(e)
414# ifndef __ARMEB__
415	rev	r2,r2
416# endif
417#else
418	@ ldrb	r2,[r1,#3]			@ 5
419	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
420	ldrb	r3,[r1,#2]
421	ldrb	r0,[r1,#1]
422	orr	r2,r2,r3,lsl#8
423	ldrb	r3,[r1],#4
424	orr	r2,r2,r0,lsl#16
425# if 5==15
426	str	r1,[sp,#17*4]			@ make room for r1
427# endif
428	eor	r0,r11,r11,ror#5
429	orr	r2,r2,r3,lsl#24
430	eor	r0,r0,r11,ror#19	@ Sigma1(e)
431#endif
432	ldr	r3,[r14],#4			@ *K256++
433	add	r6,r6,r2			@ h+=X[i]
434	str	r2,[sp,#5*4]
435	eor	r2,r4,r5
436	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
437	and	r2,r2,r11
438	add	r6,r6,r3			@ h+=K256[i]
439	eor	r2,r2,r5			@ Ch(e,f,g)
440	eor	r0,r7,r7,ror#11
441	add	r6,r6,r2			@ h+=Ch(e,f,g)
442#if 5==31
443	and	r3,r3,#0xff
444	cmp	r3,#0xf2			@ done?
445#endif
446#if 5<15
447# if __ARM_ARCH__>=7
448	ldr	r2,[r1],#4			@ prefetch
449# else
450	ldrb	r2,[r1,#3]
451# endif
452	eor	r3,r7,r8			@ a^b, b^c in next round
453#else
454	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
455	eor	r3,r7,r8			@ a^b, b^c in next round
456	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
457#endif
458	eor	r0,r0,r7,ror#20	@ Sigma0(a)
459	and	r12,r12,r3			@ (b^c)&=(a^b)
460	add	r10,r10,r6			@ d+=h
461	eor	r12,r12,r8			@ Maj(a,b,c)
462	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
463	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
464#if __ARM_ARCH__>=7
465	@ ldr	r2,[r1],#4			@ 6
466# if 6==15
467	str	r1,[sp,#17*4]			@ make room for r1
468# endif
469	eor	r0,r10,r10,ror#5
470	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
471	eor	r0,r0,r10,ror#19	@ Sigma1(e)
472# ifndef __ARMEB__
473	rev	r2,r2
474# endif
475#else
476	@ ldrb	r2,[r1,#3]			@ 6
477	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
478	ldrb	r12,[r1,#2]
479	ldrb	r0,[r1,#1]
480	orr	r2,r2,r12,lsl#8
481	ldrb	r12,[r1],#4
482	orr	r2,r2,r0,lsl#16
483# if 6==15
484	str	r1,[sp,#17*4]			@ make room for r1
485# endif
486	eor	r0,r10,r10,ror#5
487	orr	r2,r2,r12,lsl#24
488	eor	r0,r0,r10,ror#19	@ Sigma1(e)
489#endif
490	ldr	r12,[r14],#4			@ *K256++
491	add	r5,r5,r2			@ h+=X[i]
492	str	r2,[sp,#6*4]
493	eor	r2,r11,r4
494	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
495	and	r2,r2,r10
496	add	r5,r5,r12			@ h+=K256[i]
497	eor	r2,r2,r4			@ Ch(e,f,g)
498	eor	r0,r6,r6,ror#11
499	add	r5,r5,r2			@ h+=Ch(e,f,g)
500#if 6==31
501	and	r12,r12,#0xff
502	cmp	r12,#0xf2			@ done?
503#endif
504#if 6<15
505# if __ARM_ARCH__>=7
506	ldr	r2,[r1],#4			@ prefetch
507# else
508	ldrb	r2,[r1,#3]
509# endif
510	eor	r12,r6,r7			@ a^b, b^c in next round
511#else
512	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
513	eor	r12,r6,r7			@ a^b, b^c in next round
514	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
515#endif
516	eor	r0,r0,r6,ror#20	@ Sigma0(a)
517	and	r3,r3,r12			@ (b^c)&=(a^b)
518	add	r9,r9,r5			@ d+=h
519	eor	r3,r3,r7			@ Maj(a,b,c)
520	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
521	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
522#if __ARM_ARCH__>=7
523	@ ldr	r2,[r1],#4			@ 7
524# if 7==15
525	str	r1,[sp,#17*4]			@ make room for r1
526# endif
527	eor	r0,r9,r9,ror#5
528	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
529	eor	r0,r0,r9,ror#19	@ Sigma1(e)
530# ifndef __ARMEB__
531	rev	r2,r2
532# endif
533#else
534	@ ldrb	r2,[r1,#3]			@ 7
535	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
536	ldrb	r3,[r1,#2]
537	ldrb	r0,[r1,#1]
538	orr	r2,r2,r3,lsl#8
539	ldrb	r3,[r1],#4
540	orr	r2,r2,r0,lsl#16
541# if 7==15
542	str	r1,[sp,#17*4]			@ make room for r1
543# endif
544	eor	r0,r9,r9,ror#5
545	orr	r2,r2,r3,lsl#24
546	eor	r0,r0,r9,ror#19	@ Sigma1(e)
547#endif
548	ldr	r3,[r14],#4			@ *K256++
549	add	r4,r4,r2			@ h+=X[i]
550	str	r2,[sp,#7*4]
551	eor	r2,r10,r11
552	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
553	and	r2,r2,r9
554	add	r4,r4,r3			@ h+=K256[i]
555	eor	r2,r2,r11			@ Ch(e,f,g)
556	eor	r0,r5,r5,ror#11
557	add	r4,r4,r2			@ h+=Ch(e,f,g)
558#if 7==31
559	and	r3,r3,#0xff
560	cmp	r3,#0xf2			@ done?
561#endif
562#if 7<15
563# if __ARM_ARCH__>=7
564	ldr	r2,[r1],#4			@ prefetch
565# else
566	ldrb	r2,[r1,#3]
567# endif
568	eor	r3,r5,r6			@ a^b, b^c in next round
569#else
570	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
571	eor	r3,r5,r6			@ a^b, b^c in next round
572	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
573#endif
574	eor	r0,r0,r5,ror#20	@ Sigma0(a)
575	and	r12,r12,r3			@ (b^c)&=(a^b)
576	add	r8,r8,r4			@ d+=h
577	eor	r12,r12,r6			@ Maj(a,b,c)
578	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
579	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
580#if __ARM_ARCH__>=7
581	@ ldr	r2,[r1],#4			@ 8
582# if 8==15
583	str	r1,[sp,#17*4]			@ make room for r1
584# endif
585	eor	r0,r8,r8,ror#5
586	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
587	eor	r0,r0,r8,ror#19	@ Sigma1(e)
588# ifndef __ARMEB__
589	rev	r2,r2
590# endif
591#else
592	@ ldrb	r2,[r1,#3]			@ 8
593	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
594	ldrb	r12,[r1,#2]
595	ldrb	r0,[r1,#1]
596	orr	r2,r2,r12,lsl#8
597	ldrb	r12,[r1],#4
598	orr	r2,r2,r0,lsl#16
599# if 8==15
600	str	r1,[sp,#17*4]			@ make room for r1
601# endif
602	eor	r0,r8,r8,ror#5
603	orr	r2,r2,r12,lsl#24
604	eor	r0,r0,r8,ror#19	@ Sigma1(e)
605#endif
606	ldr	r12,[r14],#4			@ *K256++
607	add	r11,r11,r2			@ h+=X[i]
608	str	r2,[sp,#8*4]
609	eor	r2,r9,r10
610	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
611	and	r2,r2,r8
612	add	r11,r11,r12			@ h+=K256[i]
613	eor	r2,r2,r10			@ Ch(e,f,g)
614	eor	r0,r4,r4,ror#11
615	add	r11,r11,r2			@ h+=Ch(e,f,g)
616#if 8==31
617	and	r12,r12,#0xff
618	cmp	r12,#0xf2			@ done?
619#endif
620#if 8<15
621# if __ARM_ARCH__>=7
622	ldr	r2,[r1],#4			@ prefetch
623# else
624	ldrb	r2,[r1,#3]
625# endif
626	eor	r12,r4,r5			@ a^b, b^c in next round
627#else
628	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
629	eor	r12,r4,r5			@ a^b, b^c in next round
630	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
631#endif
632	eor	r0,r0,r4,ror#20	@ Sigma0(a)
633	and	r3,r3,r12			@ (b^c)&=(a^b)
634	add	r7,r7,r11			@ d+=h
635	eor	r3,r3,r5			@ Maj(a,b,c)
636	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
637	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
638#if __ARM_ARCH__>=7
639	@ ldr	r2,[r1],#4			@ 9
640# if 9==15
641	str	r1,[sp,#17*4]			@ make room for r1
642# endif
643	eor	r0,r7,r7,ror#5
644	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
645	eor	r0,r0,r7,ror#19	@ Sigma1(e)
646# ifndef __ARMEB__
647	rev	r2,r2
648# endif
649#else
650	@ ldrb	r2,[r1,#3]			@ 9
651	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
652	ldrb	r3,[r1,#2]
653	ldrb	r0,[r1,#1]
654	orr	r2,r2,r3,lsl#8
655	ldrb	r3,[r1],#4
656	orr	r2,r2,r0,lsl#16
657# if 9==15
658	str	r1,[sp,#17*4]			@ make room for r1
659# endif
660	eor	r0,r7,r7,ror#5
661	orr	r2,r2,r3,lsl#24
662	eor	r0,r0,r7,ror#19	@ Sigma1(e)
663#endif
664	ldr	r3,[r14],#4			@ *K256++
665	add	r10,r10,r2			@ h+=X[i]
666	str	r2,[sp,#9*4]
667	eor	r2,r8,r9
668	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
669	and	r2,r2,r7
670	add	r10,r10,r3			@ h+=K256[i]
671	eor	r2,r2,r9			@ Ch(e,f,g)
672	eor	r0,r11,r11,ror#11
673	add	r10,r10,r2			@ h+=Ch(e,f,g)
674#if 9==31
675	and	r3,r3,#0xff
676	cmp	r3,#0xf2			@ done?
677#endif
678#if 9<15
679# if __ARM_ARCH__>=7
680	ldr	r2,[r1],#4			@ prefetch
681# else
682	ldrb	r2,[r1,#3]
683# endif
684	eor	r3,r11,r4			@ a^b, b^c in next round
685#else
686	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
687	eor	r3,r11,r4			@ a^b, b^c in next round
688	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
689#endif
690	eor	r0,r0,r11,ror#20	@ Sigma0(a)
691	and	r12,r12,r3			@ (b^c)&=(a^b)
692	add	r6,r6,r10			@ d+=h
693	eor	r12,r12,r4			@ Maj(a,b,c)
694	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
695	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
696#if __ARM_ARCH__>=7
697	@ ldr	r2,[r1],#4			@ 10
698# if 10==15
699	str	r1,[sp,#17*4]			@ make room for r1
700# endif
701	eor	r0,r6,r6,ror#5
702	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
703	eor	r0,r0,r6,ror#19	@ Sigma1(e)
704# ifndef __ARMEB__
705	rev	r2,r2
706# endif
707#else
708	@ ldrb	r2,[r1,#3]			@ 10
709	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
710	ldrb	r12,[r1,#2]
711	ldrb	r0,[r1,#1]
712	orr	r2,r2,r12,lsl#8
713	ldrb	r12,[r1],#4
714	orr	r2,r2,r0,lsl#16
715# if 10==15
716	str	r1,[sp,#17*4]			@ make room for r1
717# endif
718	eor	r0,r6,r6,ror#5
719	orr	r2,r2,r12,lsl#24
720	eor	r0,r0,r6,ror#19	@ Sigma1(e)
721#endif
722	ldr	r12,[r14],#4			@ *K256++
723	add	r9,r9,r2			@ h+=X[i]
724	str	r2,[sp,#10*4]
725	eor	r2,r7,r8
726	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
727	and	r2,r2,r6
728	add	r9,r9,r12			@ h+=K256[i]
729	eor	r2,r2,r8			@ Ch(e,f,g)
730	eor	r0,r10,r10,ror#11
731	add	r9,r9,r2			@ h+=Ch(e,f,g)
732#if 10==31
733	and	r12,r12,#0xff
734	cmp	r12,#0xf2			@ done?
735#endif
736#if 10<15
737# if __ARM_ARCH__>=7
738	ldr	r2,[r1],#4			@ prefetch
739# else
740	ldrb	r2,[r1,#3]
741# endif
742	eor	r12,r10,r11			@ a^b, b^c in next round
743#else
744	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
745	eor	r12,r10,r11			@ a^b, b^c in next round
746	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
747#endif
748	eor	r0,r0,r10,ror#20	@ Sigma0(a)
749	and	r3,r3,r12			@ (b^c)&=(a^b)
750	add	r5,r5,r9			@ d+=h
751	eor	r3,r3,r11			@ Maj(a,b,c)
752	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
753	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
754#if __ARM_ARCH__>=7
755	@ ldr	r2,[r1],#4			@ 11
756# if 11==15
757	str	r1,[sp,#17*4]			@ make room for r1
758# endif
759	eor	r0,r5,r5,ror#5
760	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
761	eor	r0,r0,r5,ror#19	@ Sigma1(e)
762# ifndef __ARMEB__
763	rev	r2,r2
764# endif
765#else
766	@ ldrb	r2,[r1,#3]			@ 11
767	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
768	ldrb	r3,[r1,#2]
769	ldrb	r0,[r1,#1]
770	orr	r2,r2,r3,lsl#8
771	ldrb	r3,[r1],#4
772	orr	r2,r2,r0,lsl#16
773# if 11==15
774	str	r1,[sp,#17*4]			@ make room for r1
775# endif
776	eor	r0,r5,r5,ror#5
777	orr	r2,r2,r3,lsl#24
778	eor	r0,r0,r5,ror#19	@ Sigma1(e)
779#endif
780	ldr	r3,[r14],#4			@ *K256++
781	add	r8,r8,r2			@ h+=X[i]
782	str	r2,[sp,#11*4]
783	eor	r2,r6,r7
784	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
785	and	r2,r2,r5
786	add	r8,r8,r3			@ h+=K256[i]
787	eor	r2,r2,r7			@ Ch(e,f,g)
788	eor	r0,r9,r9,ror#11
789	add	r8,r8,r2			@ h+=Ch(e,f,g)
790#if 11==31
791	and	r3,r3,#0xff
792	cmp	r3,#0xf2			@ done?
793#endif
794#if 11<15
795# if __ARM_ARCH__>=7
796	ldr	r2,[r1],#4			@ prefetch
797# else
798	ldrb	r2,[r1,#3]
799# endif
800	eor	r3,r9,r10			@ a^b, b^c in next round
801#else
802	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
803	eor	r3,r9,r10			@ a^b, b^c in next round
804	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
805#endif
806	eor	r0,r0,r9,ror#20	@ Sigma0(a)
807	and	r12,r12,r3			@ (b^c)&=(a^b)
808	add	r4,r4,r8			@ d+=h
809	eor	r12,r12,r10			@ Maj(a,b,c)
810	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
811	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
812#if __ARM_ARCH__>=7
813	@ ldr	r2,[r1],#4			@ 12
814# if 12==15
815	str	r1,[sp,#17*4]			@ make room for r1
816# endif
817	eor	r0,r4,r4,ror#5
818	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
819	eor	r0,r0,r4,ror#19	@ Sigma1(e)
820# ifndef __ARMEB__
821	rev	r2,r2
822# endif
823#else
824	@ ldrb	r2,[r1,#3]			@ 12
825	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
826	ldrb	r12,[r1,#2]
827	ldrb	r0,[r1,#1]
828	orr	r2,r2,r12,lsl#8
829	ldrb	r12,[r1],#4
830	orr	r2,r2,r0,lsl#16
831# if 12==15
832	str	r1,[sp,#17*4]			@ make room for r1
833# endif
834	eor	r0,r4,r4,ror#5
835	orr	r2,r2,r12,lsl#24
836	eor	r0,r0,r4,ror#19	@ Sigma1(e)
837#endif
838	ldr	r12,[r14],#4			@ *K256++
839	add	r7,r7,r2			@ h+=X[i]
840	str	r2,[sp,#12*4]
841	eor	r2,r5,r6
842	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
843	and	r2,r2,r4
844	add	r7,r7,r12			@ h+=K256[i]
845	eor	r2,r2,r6			@ Ch(e,f,g)
846	eor	r0,r8,r8,ror#11
847	add	r7,r7,r2			@ h+=Ch(e,f,g)
848#if 12==31
849	and	r12,r12,#0xff
850	cmp	r12,#0xf2			@ done?
851#endif
852#if 12<15
853# if __ARM_ARCH__>=7
854	ldr	r2,[r1],#4			@ prefetch
855# else
856	ldrb	r2,[r1,#3]
857# endif
858	eor	r12,r8,r9			@ a^b, b^c in next round
859#else
860	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
861	eor	r12,r8,r9			@ a^b, b^c in next round
862	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
863#endif
864	eor	r0,r0,r8,ror#20	@ Sigma0(a)
865	and	r3,r3,r12			@ (b^c)&=(a^b)
866	add	r11,r11,r7			@ d+=h
867	eor	r3,r3,r9			@ Maj(a,b,c)
868	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
869	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
870#if __ARM_ARCH__>=7
871	@ ldr	r2,[r1],#4			@ 13
872# if 13==15
873	str	r1,[sp,#17*4]			@ make room for r1
874# endif
875	eor	r0,r11,r11,ror#5
876	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
877	eor	r0,r0,r11,ror#19	@ Sigma1(e)
878# ifndef __ARMEB__
879	rev	r2,r2
880# endif
881#else
882	@ ldrb	r2,[r1,#3]			@ 13
883	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
884	ldrb	r3,[r1,#2]
885	ldrb	r0,[r1,#1]
886	orr	r2,r2,r3,lsl#8
887	ldrb	r3,[r1],#4
888	orr	r2,r2,r0,lsl#16
889# if 13==15
890	str	r1,[sp,#17*4]			@ make room for r1
891# endif
892	eor	r0,r11,r11,ror#5
893	orr	r2,r2,r3,lsl#24
894	eor	r0,r0,r11,ror#19	@ Sigma1(e)
895#endif
896	ldr	r3,[r14],#4			@ *K256++
897	add	r6,r6,r2			@ h+=X[i]
898	str	r2,[sp,#13*4]
899	eor	r2,r4,r5
900	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
901	and	r2,r2,r11
902	add	r6,r6,r3			@ h+=K256[i]
903	eor	r2,r2,r5			@ Ch(e,f,g)
904	eor	r0,r7,r7,ror#11
905	add	r6,r6,r2			@ h+=Ch(e,f,g)
906#if 13==31
907	and	r3,r3,#0xff
908	cmp	r3,#0xf2			@ done?
909#endif
910#if 13<15
911# if __ARM_ARCH__>=7
912	ldr	r2,[r1],#4			@ prefetch
913# else
914	ldrb	r2,[r1,#3]
915# endif
916	eor	r3,r7,r8			@ a^b, b^c in next round
917#else
918	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
919	eor	r3,r7,r8			@ a^b, b^c in next round
920	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
921#endif
922	eor	r0,r0,r7,ror#20	@ Sigma0(a)
923	and	r12,r12,r3			@ (b^c)&=(a^b)
924	add	r10,r10,r6			@ d+=h
925	eor	r12,r12,r8			@ Maj(a,b,c)
926	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
927	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
928#if __ARM_ARCH__>=7
929	@ ldr	r2,[r1],#4			@ 14
930# if 14==15
931	str	r1,[sp,#17*4]			@ make room for r1
932# endif
933	eor	r0,r10,r10,ror#5
934	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
935	eor	r0,r0,r10,ror#19	@ Sigma1(e)
936# ifndef __ARMEB__
937	rev	r2,r2
938# endif
939#else
940	@ ldrb	r2,[r1,#3]			@ 14
941	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
942	ldrb	r12,[r1,#2]
943	ldrb	r0,[r1,#1]
944	orr	r2,r2,r12,lsl#8
945	ldrb	r12,[r1],#4
946	orr	r2,r2,r0,lsl#16
947# if 14==15
948	str	r1,[sp,#17*4]			@ make room for r1
949# endif
950	eor	r0,r10,r10,ror#5
951	orr	r2,r2,r12,lsl#24
952	eor	r0,r0,r10,ror#19	@ Sigma1(e)
953#endif
954	ldr	r12,[r14],#4			@ *K256++
955	add	r5,r5,r2			@ h+=X[i]
956	str	r2,[sp,#14*4]
957	eor	r2,r11,r4
958	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
959	and	r2,r2,r10
960	add	r5,r5,r12			@ h+=K256[i]
961	eor	r2,r2,r4			@ Ch(e,f,g)
962	eor	r0,r6,r6,ror#11
963	add	r5,r5,r2			@ h+=Ch(e,f,g)
964#if 14==31
965	and	r12,r12,#0xff
966	cmp	r12,#0xf2			@ done?
967#endif
968#if 14<15
969# if __ARM_ARCH__>=7
970	ldr	r2,[r1],#4			@ prefetch
971# else
972	ldrb	r2,[r1,#3]
973# endif
974	eor	r12,r6,r7			@ a^b, b^c in next round
975#else
976	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
977	eor	r12,r6,r7			@ a^b, b^c in next round
978	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
979#endif
980	eor	r0,r0,r6,ror#20	@ Sigma0(a)
981	and	r3,r3,r12			@ (b^c)&=(a^b)
982	add	r9,r9,r5			@ d+=h
983	eor	r3,r3,r7			@ Maj(a,b,c)
984	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
985	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
986#if __ARM_ARCH__>=7
987	@ ldr	r2,[r1],#4			@ 15
988# if 15==15
989	str	r1,[sp,#17*4]			@ make room for r1
990# endif
991	eor	r0,r9,r9,ror#5
992	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
993	eor	r0,r0,r9,ror#19	@ Sigma1(e)
994# ifndef __ARMEB__
995	rev	r2,r2
996# endif
997#else
998	@ ldrb	r2,[r1,#3]			@ 15
999	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1000	ldrb	r3,[r1,#2]
1001	ldrb	r0,[r1,#1]
1002	orr	r2,r2,r3,lsl#8
1003	ldrb	r3,[r1],#4
1004	orr	r2,r2,r0,lsl#16
1005# if 15==15
1006	str	r1,[sp,#17*4]			@ make room for r1
1007# endif
1008	eor	r0,r9,r9,ror#5
1009	orr	r2,r2,r3,lsl#24
1010	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1011#endif
1012	ldr	r3,[r14],#4			@ *K256++
1013	add	r4,r4,r2			@ h+=X[i]
1014	str	r2,[sp,#15*4]
1015	eor	r2,r10,r11
1016	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1017	and	r2,r2,r9
1018	add	r4,r4,r3			@ h+=K256[i]
1019	eor	r2,r2,r11			@ Ch(e,f,g)
1020	eor	r0,r5,r5,ror#11
1021	add	r4,r4,r2			@ h+=Ch(e,f,g)
1022#if 15==31
1023	and	r3,r3,#0xff
1024	cmp	r3,#0xf2			@ done?
1025#endif
1026#if 15<15
1027# if __ARM_ARCH__>=7
1028	ldr	r2,[r1],#4			@ prefetch
1029# else
1030	ldrb	r2,[r1,#3]
1031# endif
1032	eor	r3,r5,r6			@ a^b, b^c in next round
1033#else
1034	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1035	eor	r3,r5,r6			@ a^b, b^c in next round
1036	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1037#endif
1038	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1039	and	r12,r12,r3			@ (b^c)&=(a^b)
1040	add	r8,r8,r4			@ d+=h
1041	eor	r12,r12,r6			@ Maj(a,b,c)
1042	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1043	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1044.Lrounds_16_xx:
1045	@ ldr	r2,[sp,#1*4]		@ 16
1046	@ ldr	r1,[sp,#14*4]
1047	mov	r0,r2,ror#7
1048	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1049	mov	r12,r1,ror#17
1050	eor	r0,r0,r2,ror#18
1051	eor	r12,r12,r1,ror#19
1052	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1053	ldr	r2,[sp,#0*4]
1054	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1055	ldr	r1,[sp,#9*4]
1056
1057	add	r12,r12,r0
1058	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1059	add	r2,r2,r12
1060	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1061	add	r2,r2,r1			@ X[i]
1062	ldr	r12,[r14],#4			@ *K256++
1063	add	r11,r11,r2			@ h+=X[i]
1064	str	r2,[sp,#0*4]
1065	eor	r2,r9,r10
1066	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1067	and	r2,r2,r8
1068	add	r11,r11,r12			@ h+=K256[i]
1069	eor	r2,r2,r10			@ Ch(e,f,g)
1070	eor	r0,r4,r4,ror#11
1071	add	r11,r11,r2			@ h+=Ch(e,f,g)
1072#if 16==31
1073	and	r12,r12,#0xff
1074	cmp	r12,#0xf2			@ done?
1075#endif
1076#if 16<15
1077# if __ARM_ARCH__>=7
1078	ldr	r2,[r1],#4			@ prefetch
1079# else
1080	ldrb	r2,[r1,#3]
1081# endif
1082	eor	r12,r4,r5			@ a^b, b^c in next round
1083#else
1084	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1085	eor	r12,r4,r5			@ a^b, b^c in next round
1086	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1087#endif
1088	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1089	and	r3,r3,r12			@ (b^c)&=(a^b)
1090	add	r7,r7,r11			@ d+=h
1091	eor	r3,r3,r5			@ Maj(a,b,c)
1092	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1093	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1094	@ ldr	r2,[sp,#2*4]		@ 17
1095	@ ldr	r1,[sp,#15*4]
1096	mov	r0,r2,ror#7
1097	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1098	mov	r3,r1,ror#17
1099	eor	r0,r0,r2,ror#18
1100	eor	r3,r3,r1,ror#19
1101	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1102	ldr	r2,[sp,#1*4]
1103	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1104	ldr	r1,[sp,#10*4]
1105
1106	add	r3,r3,r0
1107	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1108	add	r2,r2,r3
1109	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1110	add	r2,r2,r1			@ X[i]
1111	ldr	r3,[r14],#4			@ *K256++
1112	add	r10,r10,r2			@ h+=X[i]
1113	str	r2,[sp,#1*4]
1114	eor	r2,r8,r9
1115	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1116	and	r2,r2,r7
1117	add	r10,r10,r3			@ h+=K256[i]
1118	eor	r2,r2,r9			@ Ch(e,f,g)
1119	eor	r0,r11,r11,ror#11
1120	add	r10,r10,r2			@ h+=Ch(e,f,g)
1121#if 17==31
1122	and	r3,r3,#0xff
1123	cmp	r3,#0xf2			@ done?
1124#endif
1125#if 17<15
1126# if __ARM_ARCH__>=7
1127	ldr	r2,[r1],#4			@ prefetch
1128# else
1129	ldrb	r2,[r1,#3]
1130# endif
1131	eor	r3,r11,r4			@ a^b, b^c in next round
1132#else
1133	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1134	eor	r3,r11,r4			@ a^b, b^c in next round
1135	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1136#endif
1137	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1138	and	r12,r12,r3			@ (b^c)&=(a^b)
1139	add	r6,r6,r10			@ d+=h
1140	eor	r12,r12,r4			@ Maj(a,b,c)
1141	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1142	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1143	@ ldr	r2,[sp,#3*4]		@ 18
1144	@ ldr	r1,[sp,#0*4]
1145	mov	r0,r2,ror#7
1146	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1147	mov	r12,r1,ror#17
1148	eor	r0,r0,r2,ror#18
1149	eor	r12,r12,r1,ror#19
1150	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1151	ldr	r2,[sp,#2*4]
1152	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1153	ldr	r1,[sp,#11*4]
1154
1155	add	r12,r12,r0
1156	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1157	add	r2,r2,r12
1158	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1159	add	r2,r2,r1			@ X[i]
1160	ldr	r12,[r14],#4			@ *K256++
1161	add	r9,r9,r2			@ h+=X[i]
1162	str	r2,[sp,#2*4]
1163	eor	r2,r7,r8
1164	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1165	and	r2,r2,r6
1166	add	r9,r9,r12			@ h+=K256[i]
1167	eor	r2,r2,r8			@ Ch(e,f,g)
1168	eor	r0,r10,r10,ror#11
1169	add	r9,r9,r2			@ h+=Ch(e,f,g)
1170#if 18==31
1171	and	r12,r12,#0xff
1172	cmp	r12,#0xf2			@ done?
1173#endif
1174#if 18<15
1175# if __ARM_ARCH__>=7
1176	ldr	r2,[r1],#4			@ prefetch
1177# else
1178	ldrb	r2,[r1,#3]
1179# endif
1180	eor	r12,r10,r11			@ a^b, b^c in next round
1181#else
1182	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1183	eor	r12,r10,r11			@ a^b, b^c in next round
1184	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1185#endif
1186	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1187	and	r3,r3,r12			@ (b^c)&=(a^b)
1188	add	r5,r5,r9			@ d+=h
1189	eor	r3,r3,r11			@ Maj(a,b,c)
1190	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1191	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1192	@ ldr	r2,[sp,#4*4]		@ 19
1193	@ ldr	r1,[sp,#1*4]
1194	mov	r0,r2,ror#7
1195	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1196	mov	r3,r1,ror#17
1197	eor	r0,r0,r2,ror#18
1198	eor	r3,r3,r1,ror#19
1199	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1200	ldr	r2,[sp,#3*4]
1201	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1202	ldr	r1,[sp,#12*4]
1203
1204	add	r3,r3,r0
1205	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1206	add	r2,r2,r3
1207	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1208	add	r2,r2,r1			@ X[i]
1209	ldr	r3,[r14],#4			@ *K256++
1210	add	r8,r8,r2			@ h+=X[i]
1211	str	r2,[sp,#3*4]
1212	eor	r2,r6,r7
1213	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1214	and	r2,r2,r5
1215	add	r8,r8,r3			@ h+=K256[i]
1216	eor	r2,r2,r7			@ Ch(e,f,g)
1217	eor	r0,r9,r9,ror#11
1218	add	r8,r8,r2			@ h+=Ch(e,f,g)
1219#if 19==31
1220	and	r3,r3,#0xff
1221	cmp	r3,#0xf2			@ done?
1222#endif
1223#if 19<15
1224# if __ARM_ARCH__>=7
1225	ldr	r2,[r1],#4			@ prefetch
1226# else
1227	ldrb	r2,[r1,#3]
1228# endif
1229	eor	r3,r9,r10			@ a^b, b^c in next round
1230#else
1231	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1232	eor	r3,r9,r10			@ a^b, b^c in next round
1233	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1234#endif
1235	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1236	and	r12,r12,r3			@ (b^c)&=(a^b)
1237	add	r4,r4,r8			@ d+=h
1238	eor	r12,r12,r10			@ Maj(a,b,c)
1239	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1240	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1241	@ ldr	r2,[sp,#5*4]		@ 20
1242	@ ldr	r1,[sp,#2*4]
1243	mov	r0,r2,ror#7
1244	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1245	mov	r12,r1,ror#17
1246	eor	r0,r0,r2,ror#18
1247	eor	r12,r12,r1,ror#19
1248	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1249	ldr	r2,[sp,#4*4]
1250	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1251	ldr	r1,[sp,#13*4]
1252
1253	add	r12,r12,r0
1254	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1255	add	r2,r2,r12
1256	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1257	add	r2,r2,r1			@ X[i]
1258	ldr	r12,[r14],#4			@ *K256++
1259	add	r7,r7,r2			@ h+=X[i]
1260	str	r2,[sp,#4*4]
1261	eor	r2,r5,r6
1262	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1263	and	r2,r2,r4
1264	add	r7,r7,r12			@ h+=K256[i]
1265	eor	r2,r2,r6			@ Ch(e,f,g)
1266	eor	r0,r8,r8,ror#11
1267	add	r7,r7,r2			@ h+=Ch(e,f,g)
1268#if 20==31
1269	and	r12,r12,#0xff
1270	cmp	r12,#0xf2			@ done?
1271#endif
1272#if 20<15
1273# if __ARM_ARCH__>=7
1274	ldr	r2,[r1],#4			@ prefetch
1275# else
1276	ldrb	r2,[r1,#3]
1277# endif
1278	eor	r12,r8,r9			@ a^b, b^c in next round
1279#else
1280	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1281	eor	r12,r8,r9			@ a^b, b^c in next round
1282	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1283#endif
1284	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1285	and	r3,r3,r12			@ (b^c)&=(a^b)
1286	add	r11,r11,r7			@ d+=h
1287	eor	r3,r3,r9			@ Maj(a,b,c)
1288	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1289	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1290	@ ldr	r2,[sp,#6*4]		@ 21
1291	@ ldr	r1,[sp,#3*4]
1292	mov	r0,r2,ror#7
1293	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1294	mov	r3,r1,ror#17
1295	eor	r0,r0,r2,ror#18
1296	eor	r3,r3,r1,ror#19
1297	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1298	ldr	r2,[sp,#5*4]
1299	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1300	ldr	r1,[sp,#14*4]
1301
1302	add	r3,r3,r0
1303	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1304	add	r2,r2,r3
1305	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1306	add	r2,r2,r1			@ X[i]
1307	ldr	r3,[r14],#4			@ *K256++
1308	add	r6,r6,r2			@ h+=X[i]
1309	str	r2,[sp,#5*4]
1310	eor	r2,r4,r5
1311	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1312	and	r2,r2,r11
1313	add	r6,r6,r3			@ h+=K256[i]
1314	eor	r2,r2,r5			@ Ch(e,f,g)
1315	eor	r0,r7,r7,ror#11
1316	add	r6,r6,r2			@ h+=Ch(e,f,g)
1317#if 21==31
1318	and	r3,r3,#0xff
1319	cmp	r3,#0xf2			@ done?
1320#endif
1321#if 21<15
1322# if __ARM_ARCH__>=7
1323	ldr	r2,[r1],#4			@ prefetch
1324# else
1325	ldrb	r2,[r1,#3]
1326# endif
1327	eor	r3,r7,r8			@ a^b, b^c in next round
1328#else
1329	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1330	eor	r3,r7,r8			@ a^b, b^c in next round
1331	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1332#endif
1333	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1334	and	r12,r12,r3			@ (b^c)&=(a^b)
1335	add	r10,r10,r6			@ d+=h
1336	eor	r12,r12,r8			@ Maj(a,b,c)
1337	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1338	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1339	@ ldr	r2,[sp,#7*4]		@ 22
1340	@ ldr	r1,[sp,#4*4]
1341	mov	r0,r2,ror#7
1342	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1343	mov	r12,r1,ror#17
1344	eor	r0,r0,r2,ror#18
1345	eor	r12,r12,r1,ror#19
1346	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1347	ldr	r2,[sp,#6*4]
1348	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1349	ldr	r1,[sp,#15*4]
1350
1351	add	r12,r12,r0
1352	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1353	add	r2,r2,r12
1354	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1355	add	r2,r2,r1			@ X[i]
1356	ldr	r12,[r14],#4			@ *K256++
1357	add	r5,r5,r2			@ h+=X[i]
1358	str	r2,[sp,#6*4]
1359	eor	r2,r11,r4
1360	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1361	and	r2,r2,r10
1362	add	r5,r5,r12			@ h+=K256[i]
1363	eor	r2,r2,r4			@ Ch(e,f,g)
1364	eor	r0,r6,r6,ror#11
1365	add	r5,r5,r2			@ h+=Ch(e,f,g)
1366#if 22==31
1367	and	r12,r12,#0xff
1368	cmp	r12,#0xf2			@ done?
1369#endif
1370#if 22<15
1371# if __ARM_ARCH__>=7
1372	ldr	r2,[r1],#4			@ prefetch
1373# else
1374	ldrb	r2,[r1,#3]
1375# endif
1376	eor	r12,r6,r7			@ a^b, b^c in next round
1377#else
1378	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1379	eor	r12,r6,r7			@ a^b, b^c in next round
1380	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1381#endif
1382	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1383	and	r3,r3,r12			@ (b^c)&=(a^b)
1384	add	r9,r9,r5			@ d+=h
1385	eor	r3,r3,r7			@ Maj(a,b,c)
1386	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1387	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1388	@ ldr	r2,[sp,#8*4]		@ 23
1389	@ ldr	r1,[sp,#5*4]
1390	mov	r0,r2,ror#7
1391	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1392	mov	r3,r1,ror#17
1393	eor	r0,r0,r2,ror#18
1394	eor	r3,r3,r1,ror#19
1395	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1396	ldr	r2,[sp,#7*4]
1397	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1398	ldr	r1,[sp,#0*4]
1399
1400	add	r3,r3,r0
1401	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1402	add	r2,r2,r3
1403	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1404	add	r2,r2,r1			@ X[i]
1405	ldr	r3,[r14],#4			@ *K256++
1406	add	r4,r4,r2			@ h+=X[i]
1407	str	r2,[sp,#7*4]
1408	eor	r2,r10,r11
1409	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1410	and	r2,r2,r9
1411	add	r4,r4,r3			@ h+=K256[i]
1412	eor	r2,r2,r11			@ Ch(e,f,g)
1413	eor	r0,r5,r5,ror#11
1414	add	r4,r4,r2			@ h+=Ch(e,f,g)
1415#if 23==31
1416	and	r3,r3,#0xff
1417	cmp	r3,#0xf2			@ done?
1418#endif
1419#if 23<15
1420# if __ARM_ARCH__>=7
1421	ldr	r2,[r1],#4			@ prefetch
1422# else
1423	ldrb	r2,[r1,#3]
1424# endif
1425	eor	r3,r5,r6			@ a^b, b^c in next round
1426#else
1427	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1428	eor	r3,r5,r6			@ a^b, b^c in next round
1429	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1430#endif
1431	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1432	and	r12,r12,r3			@ (b^c)&=(a^b)
1433	add	r8,r8,r4			@ d+=h
1434	eor	r12,r12,r6			@ Maj(a,b,c)
1435	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1436	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1437	@ ldr	r2,[sp,#9*4]		@ 24
1438	@ ldr	r1,[sp,#6*4]
1439	mov	r0,r2,ror#7
1440	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1441	mov	r12,r1,ror#17
1442	eor	r0,r0,r2,ror#18
1443	eor	r12,r12,r1,ror#19
1444	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1445	ldr	r2,[sp,#8*4]
1446	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1447	ldr	r1,[sp,#1*4]
1448
1449	add	r12,r12,r0
1450	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1451	add	r2,r2,r12
1452	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1453	add	r2,r2,r1			@ X[i]
1454	ldr	r12,[r14],#4			@ *K256++
1455	add	r11,r11,r2			@ h+=X[i]
1456	str	r2,[sp,#8*4]
1457	eor	r2,r9,r10
1458	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1459	and	r2,r2,r8
1460	add	r11,r11,r12			@ h+=K256[i]
1461	eor	r2,r2,r10			@ Ch(e,f,g)
1462	eor	r0,r4,r4,ror#11
1463	add	r11,r11,r2			@ h+=Ch(e,f,g)
1464#if 24==31
1465	and	r12,r12,#0xff
1466	cmp	r12,#0xf2			@ done?
1467#endif
1468#if 24<15
1469# if __ARM_ARCH__>=7
1470	ldr	r2,[r1],#4			@ prefetch
1471# else
1472	ldrb	r2,[r1,#3]
1473# endif
1474	eor	r12,r4,r5			@ a^b, b^c in next round
1475#else
1476	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1477	eor	r12,r4,r5			@ a^b, b^c in next round
1478	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1479#endif
1480	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1481	and	r3,r3,r12			@ (b^c)&=(a^b)
1482	add	r7,r7,r11			@ d+=h
1483	eor	r3,r3,r5			@ Maj(a,b,c)
1484	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1485	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1486	@ ldr	r2,[sp,#10*4]		@ 25
1487	@ ldr	r1,[sp,#7*4]
1488	mov	r0,r2,ror#7
1489	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1490	mov	r3,r1,ror#17
1491	eor	r0,r0,r2,ror#18
1492	eor	r3,r3,r1,ror#19
1493	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1494	ldr	r2,[sp,#9*4]
1495	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1496	ldr	r1,[sp,#2*4]
1497
1498	add	r3,r3,r0
1499	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1500	add	r2,r2,r3
1501	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1502	add	r2,r2,r1			@ X[i]
1503	ldr	r3,[r14],#4			@ *K256++
1504	add	r10,r10,r2			@ h+=X[i]
1505	str	r2,[sp,#9*4]
1506	eor	r2,r8,r9
1507	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1508	and	r2,r2,r7
1509	add	r10,r10,r3			@ h+=K256[i]
1510	eor	r2,r2,r9			@ Ch(e,f,g)
1511	eor	r0,r11,r11,ror#11
1512	add	r10,r10,r2			@ h+=Ch(e,f,g)
1513#if 25==31
1514	and	r3,r3,#0xff
1515	cmp	r3,#0xf2			@ done?
1516#endif
1517#if 25<15
1518# if __ARM_ARCH__>=7
1519	ldr	r2,[r1],#4			@ prefetch
1520# else
1521	ldrb	r2,[r1,#3]
1522# endif
1523	eor	r3,r11,r4			@ a^b, b^c in next round
1524#else
1525	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1526	eor	r3,r11,r4			@ a^b, b^c in next round
1527	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1528#endif
1529	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1530	and	r12,r12,r3			@ (b^c)&=(a^b)
1531	add	r6,r6,r10			@ d+=h
1532	eor	r12,r12,r4			@ Maj(a,b,c)
1533	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1534	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1535	@ ldr	r2,[sp,#11*4]		@ 26
1536	@ ldr	r1,[sp,#8*4]
1537	mov	r0,r2,ror#7
1538	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1539	mov	r12,r1,ror#17
1540	eor	r0,r0,r2,ror#18
1541	eor	r12,r12,r1,ror#19
1542	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1543	ldr	r2,[sp,#10*4]
1544	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1545	ldr	r1,[sp,#3*4]
1546
1547	add	r12,r12,r0
1548	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1549	add	r2,r2,r12
1550	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1551	add	r2,r2,r1			@ X[i]
1552	ldr	r12,[r14],#4			@ *K256++
1553	add	r9,r9,r2			@ h+=X[i]
1554	str	r2,[sp,#10*4]
1555	eor	r2,r7,r8
1556	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1557	and	r2,r2,r6
1558	add	r9,r9,r12			@ h+=K256[i]
1559	eor	r2,r2,r8			@ Ch(e,f,g)
1560	eor	r0,r10,r10,ror#11
1561	add	r9,r9,r2			@ h+=Ch(e,f,g)
1562#if 26==31
1563	and	r12,r12,#0xff
1564	cmp	r12,#0xf2			@ done?
1565#endif
1566#if 26<15
1567# if __ARM_ARCH__>=7
1568	ldr	r2,[r1],#4			@ prefetch
1569# else
1570	ldrb	r2,[r1,#3]
1571# endif
1572	eor	r12,r10,r11			@ a^b, b^c in next round
1573#else
1574	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1575	eor	r12,r10,r11			@ a^b, b^c in next round
1576	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1577#endif
1578	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1579	and	r3,r3,r12			@ (b^c)&=(a^b)
1580	add	r5,r5,r9			@ d+=h
1581	eor	r3,r3,r11			@ Maj(a,b,c)
1582	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1583	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1584	@ ldr	r2,[sp,#12*4]		@ 27
1585	@ ldr	r1,[sp,#9*4]
1586	mov	r0,r2,ror#7
1587	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1588	mov	r3,r1,ror#17
1589	eor	r0,r0,r2,ror#18
1590	eor	r3,r3,r1,ror#19
1591	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1592	ldr	r2,[sp,#11*4]
1593	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1594	ldr	r1,[sp,#4*4]
1595
1596	add	r3,r3,r0
1597	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1598	add	r2,r2,r3
1599	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1600	add	r2,r2,r1			@ X[i]
1601	ldr	r3,[r14],#4			@ *K256++
1602	add	r8,r8,r2			@ h+=X[i]
1603	str	r2,[sp,#11*4]
1604	eor	r2,r6,r7
1605	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1606	and	r2,r2,r5
1607	add	r8,r8,r3			@ h+=K256[i]
1608	eor	r2,r2,r7			@ Ch(e,f,g)
1609	eor	r0,r9,r9,ror#11
1610	add	r8,r8,r2			@ h+=Ch(e,f,g)
1611#if 27==31
1612	and	r3,r3,#0xff
1613	cmp	r3,#0xf2			@ done?
1614#endif
1615#if 27<15
1616# if __ARM_ARCH__>=7
1617	ldr	r2,[r1],#4			@ prefetch
1618# else
1619	ldrb	r2,[r1,#3]
1620# endif
1621	eor	r3,r9,r10			@ a^b, b^c in next round
1622#else
1623	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1624	eor	r3,r9,r10			@ a^b, b^c in next round
1625	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1626#endif
1627	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1628	and	r12,r12,r3			@ (b^c)&=(a^b)
1629	add	r4,r4,r8			@ d+=h
1630	eor	r12,r12,r10			@ Maj(a,b,c)
1631	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1632	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1633	@ ldr	r2,[sp,#13*4]		@ 28
1634	@ ldr	r1,[sp,#10*4]
1635	mov	r0,r2,ror#7
1636	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1637	mov	r12,r1,ror#17
1638	eor	r0,r0,r2,ror#18
1639	eor	r12,r12,r1,ror#19
1640	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1641	ldr	r2,[sp,#12*4]
1642	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1643	ldr	r1,[sp,#5*4]
1644
1645	add	r12,r12,r0
1646	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1647	add	r2,r2,r12
1648	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1649	add	r2,r2,r1			@ X[i]
1650	ldr	r12,[r14],#4			@ *K256++
1651	add	r7,r7,r2			@ h+=X[i]
1652	str	r2,[sp,#12*4]
1653	eor	r2,r5,r6
1654	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1655	and	r2,r2,r4
1656	add	r7,r7,r12			@ h+=K256[i]
1657	eor	r2,r2,r6			@ Ch(e,f,g)
1658	eor	r0,r8,r8,ror#11
1659	add	r7,r7,r2			@ h+=Ch(e,f,g)
1660#if 28==31
1661	and	r12,r12,#0xff
1662	cmp	r12,#0xf2			@ done?
1663#endif
1664#if 28<15
1665# if __ARM_ARCH__>=7
1666	ldr	r2,[r1],#4			@ prefetch
1667# else
1668	ldrb	r2,[r1,#3]
1669# endif
1670	eor	r12,r8,r9			@ a^b, b^c in next round
1671#else
1672	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1673	eor	r12,r8,r9			@ a^b, b^c in next round
1674	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1675#endif
1676	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1677	and	r3,r3,r12			@ (b^c)&=(a^b)
1678	add	r11,r11,r7			@ d+=h
1679	eor	r3,r3,r9			@ Maj(a,b,c)
1680	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1681	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1682	@ ldr	r2,[sp,#14*4]		@ 29
1683	@ ldr	r1,[sp,#11*4]
1684	mov	r0,r2,ror#7
1685	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1686	mov	r3,r1,ror#17
1687	eor	r0,r0,r2,ror#18
1688	eor	r3,r3,r1,ror#19
1689	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1690	ldr	r2,[sp,#13*4]
1691	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1692	ldr	r1,[sp,#6*4]
1693
1694	add	r3,r3,r0
1695	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1696	add	r2,r2,r3
1697	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1698	add	r2,r2,r1			@ X[i]
1699	ldr	r3,[r14],#4			@ *K256++
1700	add	r6,r6,r2			@ h+=X[i]
1701	str	r2,[sp,#13*4]
1702	eor	r2,r4,r5
1703	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1704	and	r2,r2,r11
1705	add	r6,r6,r3			@ h+=K256[i]
1706	eor	r2,r2,r5			@ Ch(e,f,g)
1707	eor	r0,r7,r7,ror#11
1708	add	r6,r6,r2			@ h+=Ch(e,f,g)
1709#if 29==31
1710	and	r3,r3,#0xff
1711	cmp	r3,#0xf2			@ done?
1712#endif
1713#if 29<15
1714# if __ARM_ARCH__>=7
1715	ldr	r2,[r1],#4			@ prefetch
1716# else
1717	ldrb	r2,[r1,#3]
1718# endif
1719	eor	r3,r7,r8			@ a^b, b^c in next round
1720#else
1721	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1722	eor	r3,r7,r8			@ a^b, b^c in next round
1723	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1724#endif
1725	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1726	and	r12,r12,r3			@ (b^c)&=(a^b)
1727	add	r10,r10,r6			@ d+=h
1728	eor	r12,r12,r8			@ Maj(a,b,c)
1729	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1730	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1731	@ ldr	r2,[sp,#15*4]		@ 30
1732	@ ldr	r1,[sp,#12*4]
1733	mov	r0,r2,ror#7
1734	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1735	mov	r12,r1,ror#17
1736	eor	r0,r0,r2,ror#18
1737	eor	r12,r12,r1,ror#19
1738	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1739	ldr	r2,[sp,#14*4]
1740	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1741	ldr	r1,[sp,#7*4]
1742
1743	add	r12,r12,r0
1744	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1745	add	r2,r2,r12
1746	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1747	add	r2,r2,r1			@ X[i]
1748	ldr	r12,[r14],#4			@ *K256++
1749	add	r5,r5,r2			@ h+=X[i]
1750	str	r2,[sp,#14*4]
1751	eor	r2,r11,r4
1752	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1753	and	r2,r2,r10
1754	add	r5,r5,r12			@ h+=K256[i]
1755	eor	r2,r2,r4			@ Ch(e,f,g)
1756	eor	r0,r6,r6,ror#11
1757	add	r5,r5,r2			@ h+=Ch(e,f,g)
1758#if 30==31
1759	and	r12,r12,#0xff
1760	cmp	r12,#0xf2			@ done?
1761#endif
1762#if 30<15
1763# if __ARM_ARCH__>=7
1764	ldr	r2,[r1],#4			@ prefetch
1765# else
1766	ldrb	r2,[r1,#3]
1767# endif
1768	eor	r12,r6,r7			@ a^b, b^c in next round
1769#else
1770	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1771	eor	r12,r6,r7			@ a^b, b^c in next round
1772	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1773#endif
1774	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1775	and	r3,r3,r12			@ (b^c)&=(a^b)
1776	add	r9,r9,r5			@ d+=h
1777	eor	r3,r3,r7			@ Maj(a,b,c)
1778	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1779	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1780	@ ldr	r2,[sp,#0*4]		@ 31
1781	@ ldr	r1,[sp,#13*4]
1782	mov	r0,r2,ror#7
1783	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1784	mov	r3,r1,ror#17
1785	eor	r0,r0,r2,ror#18
1786	eor	r3,r3,r1,ror#19
1787	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1788	ldr	r2,[sp,#15*4]
1789	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1790	ldr	r1,[sp,#8*4]
1791
1792	add	r3,r3,r0
1793	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1794	add	r2,r2,r3
1795	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1796	add	r2,r2,r1			@ X[i]
1797	ldr	r3,[r14],#4			@ *K256++
1798	add	r4,r4,r2			@ h+=X[i]
1799	str	r2,[sp,#15*4]
1800	eor	r2,r10,r11
1801	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1802	and	r2,r2,r9
1803	add	r4,r4,r3			@ h+=K256[i]
1804	eor	r2,r2,r11			@ Ch(e,f,g)
1805	eor	r0,r5,r5,ror#11
1806	add	r4,r4,r2			@ h+=Ch(e,f,g)
1807#if 31==31
1808	and	r3,r3,#0xff
1809	cmp	r3,#0xf2			@ done?
1810#endif
1811#if 31<15
1812# if __ARM_ARCH__>=7
1813	ldr	r2,[r1],#4			@ prefetch
1814# else
1815	ldrb	r2,[r1,#3]
1816# endif
1817	eor	r3,r5,r6			@ a^b, b^c in next round
1818#else
1819	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1820	eor	r3,r5,r6			@ a^b, b^c in next round
1821	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1822#endif
1823	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1824	and	r12,r12,r3			@ (b^c)&=(a^b)
1825	add	r8,r8,r4			@ d+=h
1826	eor	r12,r12,r6			@ Maj(a,b,c)
1827	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1828	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1829#if __ARM_ARCH__>=7
1830	ite	eq			@ Thumb2 thing, sanity check in ARM
1831#endif
1832	ldreq	r3,[sp,#16*4]		@ pull ctx
1833	bne	.Lrounds_16_xx
1834
1835	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1836	ldr	r0,[r3,#0]
1837	ldr	r2,[r3,#4]
1838	ldr	r12,[r3,#8]
1839	add	r4,r4,r0
1840	ldr	r0,[r3,#12]
1841	add	r5,r5,r2
1842	ldr	r2,[r3,#16]
1843	add	r6,r6,r12
1844	ldr	r12,[r3,#20]
1845	add	r7,r7,r0
1846	ldr	r0,[r3,#24]
1847	add	r8,r8,r2
1848	ldr	r2,[r3,#28]
1849	add	r9,r9,r12
1850	ldr	r1,[sp,#17*4]		@ pull inp
1851	ldr	r12,[sp,#18*4]		@ pull inp+len
1852	add	r10,r10,r0
1853	add	r11,r11,r2
1854	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1855	cmp	r1,r12
1856	sub	r14,r14,#256	@ rewind Ktbl
1857	bne	.Loop
1858
1859	add	sp,sp,#19*4	@ destroy frame
1860#if __ARM_ARCH__>=5
1861	ldmia	sp!,{r4-r11,pc}
1862#else
1863	ldmia	sp!,{r4-r11,lr}
1864	tst	lr,#1
1865	moveq	pc,lr			@ be binary compatible with V4, yet
1866	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1867#endif
1868.size	sha256_block_data_order,.-sha256_block_data_order
1869#if __ARM_MAX_ARCH__>=7
1870.arch	armv7-a
1871.fpu	neon
1872
1873.global	sha256_block_data_order_neon
1874.type	sha256_block_data_order_neon,%function
1875.align	4
1876sha256_block_data_order_neon:
1877.LNEON:
1878	stmdb	sp!,{r4-r12,lr}
1879
1880	sub	r11,sp,#16*4+16
1881	adrl	r14,K256
1882	bic	r11,r11,#15		@ align for 128-bit stores
1883	mov	r12,sp
1884	mov	sp,r11			@ alloca
1885	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1886
1887	vld1.8		{q0},[r1]!
1888	vld1.8		{q1},[r1]!
1889	vld1.8		{q2},[r1]!
1890	vld1.8		{q3},[r1]!
1891	vld1.32		{q8},[r14,:128]!
1892	vld1.32		{q9},[r14,:128]!
1893	vld1.32		{q10},[r14,:128]!
1894	vld1.32		{q11},[r14,:128]!
1895	vrev32.8	q0,q0		@ yes, even on
1896	str		r0,[sp,#64]
1897	vrev32.8	q1,q1		@ big-endian
1898	str		r1,[sp,#68]
1899	mov		r1,sp
1900	vrev32.8	q2,q2
1901	str		r2,[sp,#72]
1902	vrev32.8	q3,q3
1903	str		r12,[sp,#76]		@ save original sp
1904	vadd.i32	q8,q8,q0
1905	vadd.i32	q9,q9,q1
1906	vst1.32		{q8},[r1,:128]!
1907	vadd.i32	q10,q10,q2
1908	vst1.32		{q9},[r1,:128]!
1909	vadd.i32	q11,q11,q3
1910	vst1.32		{q10},[r1,:128]!
1911	vst1.32		{q11},[r1,:128]!
1912
1913	ldmia		r0,{r4-r11}
1914	sub		r1,r1,#64
1915	ldr		r2,[sp,#0]
1916	eor		r12,r12,r12
1917	eor		r3,r5,r6
1918	b		.L_00_48
1919
1920.align	4
1921.L_00_48:
1922	vext.8	q8,q0,q1,#4
1923	add	r11,r11,r2
1924	eor	r2,r9,r10
1925	eor	r0,r8,r8,ror#5
1926	vext.8	q9,q2,q3,#4
1927	add	r4,r4,r12
1928	and	r2,r2,r8
1929	eor	r12,r0,r8,ror#19
1930	vshr.u32	q10,q8,#7
1931	eor	r0,r4,r4,ror#11
1932	eor	r2,r2,r10
1933	vadd.i32	q0,q0,q9
1934	add	r11,r11,r12,ror#6
1935	eor	r12,r4,r5
1936	vshr.u32	q9,q8,#3
1937	eor	r0,r0,r4,ror#20
1938	add	r11,r11,r2
1939	vsli.32	q10,q8,#25
1940	ldr	r2,[sp,#4]
1941	and	r3,r3,r12
1942	vshr.u32	q11,q8,#18
1943	add	r7,r7,r11
1944	add	r11,r11,r0,ror#2
1945	eor	r3,r3,r5
1946	veor	q9,q9,q10
1947	add	r10,r10,r2
1948	vsli.32	q11,q8,#14
1949	eor	r2,r8,r9
1950	eor	r0,r7,r7,ror#5
1951	vshr.u32	d24,d7,#17
1952	add	r11,r11,r3
1953	and	r2,r2,r7
1954	veor	q9,q9,q11
1955	eor	r3,r0,r7,ror#19
1956	eor	r0,r11,r11,ror#11
1957	vsli.32	d24,d7,#15
1958	eor	r2,r2,r9
1959	add	r10,r10,r3,ror#6
1960	vshr.u32	d25,d7,#10
1961	eor	r3,r11,r4
1962	eor	r0,r0,r11,ror#20
1963	vadd.i32	q0,q0,q9
1964	add	r10,r10,r2
1965	ldr	r2,[sp,#8]
1966	veor	d25,d25,d24
1967	and	r12,r12,r3
1968	add	r6,r6,r10
1969	vshr.u32	d24,d7,#19
1970	add	r10,r10,r0,ror#2
1971	eor	r12,r12,r4
1972	vsli.32	d24,d7,#13
1973	add	r9,r9,r2
1974	eor	r2,r7,r8
1975	veor	d25,d25,d24
1976	eor	r0,r6,r6,ror#5
1977	add	r10,r10,r12
1978	vadd.i32	d0,d0,d25
1979	and	r2,r2,r6
1980	eor	r12,r0,r6,ror#19
1981	vshr.u32	d24,d0,#17
1982	eor	r0,r10,r10,ror#11
1983	eor	r2,r2,r8
1984	vsli.32	d24,d0,#15
1985	add	r9,r9,r12,ror#6
1986	eor	r12,r10,r11
1987	vshr.u32	d25,d0,#10
1988	eor	r0,r0,r10,ror#20
1989	add	r9,r9,r2
1990	veor	d25,d25,d24
1991	ldr	r2,[sp,#12]
1992	and	r3,r3,r12
1993	vshr.u32	d24,d0,#19
1994	add	r5,r5,r9
1995	add	r9,r9,r0,ror#2
1996	eor	r3,r3,r11
1997	vld1.32	{q8},[r14,:128]!
1998	add	r8,r8,r2
1999	vsli.32	d24,d0,#13
2000	eor	r2,r6,r7
2001	eor	r0,r5,r5,ror#5
2002	veor	d25,d25,d24
2003	add	r9,r9,r3
2004	and	r2,r2,r5
2005	vadd.i32	d1,d1,d25
2006	eor	r3,r0,r5,ror#19
2007	eor	r0,r9,r9,ror#11
2008	vadd.i32	q8,q8,q0
2009	eor	r2,r2,r7
2010	add	r8,r8,r3,ror#6
2011	eor	r3,r9,r10
2012	eor	r0,r0,r9,ror#20
2013	add	r8,r8,r2
2014	ldr	r2,[sp,#16]
2015	and	r12,r12,r3
2016	add	r4,r4,r8
2017	vst1.32	{q8},[r1,:128]!
2018	add	r8,r8,r0,ror#2
2019	eor	r12,r12,r10
2020	vext.8	q8,q1,q2,#4
2021	add	r7,r7,r2
2022	eor	r2,r5,r6
2023	eor	r0,r4,r4,ror#5
2024	vext.8	q9,q3,q0,#4
2025	add	r8,r8,r12
2026	and	r2,r2,r4
2027	eor	r12,r0,r4,ror#19
2028	vshr.u32	q10,q8,#7
2029	eor	r0,r8,r8,ror#11
2030	eor	r2,r2,r6
2031	vadd.i32	q1,q1,q9
2032	add	r7,r7,r12,ror#6
2033	eor	r12,r8,r9
2034	vshr.u32	q9,q8,#3
2035	eor	r0,r0,r8,ror#20
2036	add	r7,r7,r2
2037	vsli.32	q10,q8,#25
2038	ldr	r2,[sp,#20]
2039	and	r3,r3,r12
2040	vshr.u32	q11,q8,#18
2041	add	r11,r11,r7
2042	add	r7,r7,r0,ror#2
2043	eor	r3,r3,r9
2044	veor	q9,q9,q10
2045	add	r6,r6,r2
2046	vsli.32	q11,q8,#14
2047	eor	r2,r4,r5
2048	eor	r0,r11,r11,ror#5
2049	vshr.u32	d24,d1,#17
2050	add	r7,r7,r3
2051	and	r2,r2,r11
2052	veor	q9,q9,q11
2053	eor	r3,r0,r11,ror#19
2054	eor	r0,r7,r7,ror#11
2055	vsli.32	d24,d1,#15
2056	eor	r2,r2,r5
2057	add	r6,r6,r3,ror#6
2058	vshr.u32	d25,d1,#10
2059	eor	r3,r7,r8
2060	eor	r0,r0,r7,ror#20
2061	vadd.i32	q1,q1,q9
2062	add	r6,r6,r2
2063	ldr	r2,[sp,#24]
2064	veor	d25,d25,d24
2065	and	r12,r12,r3
2066	add	r10,r10,r6
2067	vshr.u32	d24,d1,#19
2068	add	r6,r6,r0,ror#2
2069	eor	r12,r12,r8
2070	vsli.32	d24,d1,#13
2071	add	r5,r5,r2
2072	eor	r2,r11,r4
2073	veor	d25,d25,d24
2074	eor	r0,r10,r10,ror#5
2075	add	r6,r6,r12
2076	vadd.i32	d2,d2,d25
2077	and	r2,r2,r10
2078	eor	r12,r0,r10,ror#19
2079	vshr.u32	d24,d2,#17
2080	eor	r0,r6,r6,ror#11
2081	eor	r2,r2,r4
2082	vsli.32	d24,d2,#15
2083	add	r5,r5,r12,ror#6
2084	eor	r12,r6,r7
2085	vshr.u32	d25,d2,#10
2086	eor	r0,r0,r6,ror#20
2087	add	r5,r5,r2
2088	veor	d25,d25,d24
2089	ldr	r2,[sp,#28]
2090	and	r3,r3,r12
2091	vshr.u32	d24,d2,#19
2092	add	r9,r9,r5
2093	add	r5,r5,r0,ror#2
2094	eor	r3,r3,r7
2095	vld1.32	{q8},[r14,:128]!
2096	add	r4,r4,r2
2097	vsli.32	d24,d2,#13
2098	eor	r2,r10,r11
2099	eor	r0,r9,r9,ror#5
2100	veor	d25,d25,d24
2101	add	r5,r5,r3
2102	and	r2,r2,r9
2103	vadd.i32	d3,d3,d25
2104	eor	r3,r0,r9,ror#19
2105	eor	r0,r5,r5,ror#11
2106	vadd.i32	q8,q8,q1
2107	eor	r2,r2,r11
2108	add	r4,r4,r3,ror#6
2109	eor	r3,r5,r6
2110	eor	r0,r0,r5,ror#20
2111	add	r4,r4,r2
2112	ldr	r2,[sp,#32]
2113	and	r12,r12,r3
2114	add	r8,r8,r4
2115	vst1.32	{q8},[r1,:128]!
2116	add	r4,r4,r0,ror#2
2117	eor	r12,r12,r6
2118	vext.8	q8,q2,q3,#4
2119	add	r11,r11,r2
2120	eor	r2,r9,r10
2121	eor	r0,r8,r8,ror#5
2122	vext.8	q9,q0,q1,#4
2123	add	r4,r4,r12
2124	and	r2,r2,r8
2125	eor	r12,r0,r8,ror#19
2126	vshr.u32	q10,q8,#7
2127	eor	r0,r4,r4,ror#11
2128	eor	r2,r2,r10
2129	vadd.i32	q2,q2,q9
2130	add	r11,r11,r12,ror#6
2131	eor	r12,r4,r5
2132	vshr.u32	q9,q8,#3
2133	eor	r0,r0,r4,ror#20
2134	add	r11,r11,r2
2135	vsli.32	q10,q8,#25
2136	ldr	r2,[sp,#36]
2137	and	r3,r3,r12
2138	vshr.u32	q11,q8,#18
2139	add	r7,r7,r11
2140	add	r11,r11,r0,ror#2
2141	eor	r3,r3,r5
2142	veor	q9,q9,q10
2143	add	r10,r10,r2
2144	vsli.32	q11,q8,#14
2145	eor	r2,r8,r9
2146	eor	r0,r7,r7,ror#5
2147	vshr.u32	d24,d3,#17
2148	add	r11,r11,r3
2149	and	r2,r2,r7
2150	veor	q9,q9,q11
2151	eor	r3,r0,r7,ror#19
2152	eor	r0,r11,r11,ror#11
2153	vsli.32	d24,d3,#15
2154	eor	r2,r2,r9
2155	add	r10,r10,r3,ror#6
2156	vshr.u32	d25,d3,#10
2157	eor	r3,r11,r4
2158	eor	r0,r0,r11,ror#20
2159	vadd.i32	q2,q2,q9
2160	add	r10,r10,r2
2161	ldr	r2,[sp,#40]
2162	veor	d25,d25,d24
2163	and	r12,r12,r3
2164	add	r6,r6,r10
2165	vshr.u32	d24,d3,#19
2166	add	r10,r10,r0,ror#2
2167	eor	r12,r12,r4
2168	vsli.32	d24,d3,#13
2169	add	r9,r9,r2
2170	eor	r2,r7,r8
2171	veor	d25,d25,d24
2172	eor	r0,r6,r6,ror#5
2173	add	r10,r10,r12
2174	vadd.i32	d4,d4,d25
2175	and	r2,r2,r6
2176	eor	r12,r0,r6,ror#19
2177	vshr.u32	d24,d4,#17
2178	eor	r0,r10,r10,ror#11
2179	eor	r2,r2,r8
2180	vsli.32	d24,d4,#15
2181	add	r9,r9,r12,ror#6
2182	eor	r12,r10,r11
2183	vshr.u32	d25,d4,#10
2184	eor	r0,r0,r10,ror#20
2185	add	r9,r9,r2
2186	veor	d25,d25,d24
2187	ldr	r2,[sp,#44]
2188	and	r3,r3,r12
2189	vshr.u32	d24,d4,#19
2190	add	r5,r5,r9
2191	add	r9,r9,r0,ror#2
2192	eor	r3,r3,r11
2193	vld1.32	{q8},[r14,:128]!
2194	add	r8,r8,r2
2195	vsli.32	d24,d4,#13
2196	eor	r2,r6,r7
2197	eor	r0,r5,r5,ror#5
2198	veor	d25,d25,d24
2199	add	r9,r9,r3
2200	and	r2,r2,r5
2201	vadd.i32	d5,d5,d25
2202	eor	r3,r0,r5,ror#19
2203	eor	r0,r9,r9,ror#11
2204	vadd.i32	q8,q8,q2
2205	eor	r2,r2,r7
2206	add	r8,r8,r3,ror#6
2207	eor	r3,r9,r10
2208	eor	r0,r0,r9,ror#20
2209	add	r8,r8,r2
2210	ldr	r2,[sp,#48]
2211	and	r12,r12,r3
2212	add	r4,r4,r8
2213	vst1.32	{q8},[r1,:128]!
2214	add	r8,r8,r0,ror#2
2215	eor	r12,r12,r10
2216	vext.8	q8,q3,q0,#4
2217	add	r7,r7,r2
2218	eor	r2,r5,r6
2219	eor	r0,r4,r4,ror#5
2220	vext.8	q9,q1,q2,#4
2221	add	r8,r8,r12
2222	and	r2,r2,r4
2223	eor	r12,r0,r4,ror#19
2224	vshr.u32	q10,q8,#7
2225	eor	r0,r8,r8,ror#11
2226	eor	r2,r2,r6
2227	vadd.i32	q3,q3,q9
2228	add	r7,r7,r12,ror#6
2229	eor	r12,r8,r9
2230	vshr.u32	q9,q8,#3
2231	eor	r0,r0,r8,ror#20
2232	add	r7,r7,r2
2233	vsli.32	q10,q8,#25
2234	ldr	r2,[sp,#52]
2235	and	r3,r3,r12
2236	vshr.u32	q11,q8,#18
2237	add	r11,r11,r7
2238	add	r7,r7,r0,ror#2
2239	eor	r3,r3,r9
2240	veor	q9,q9,q10
2241	add	r6,r6,r2
2242	vsli.32	q11,q8,#14
2243	eor	r2,r4,r5
2244	eor	r0,r11,r11,ror#5
2245	vshr.u32	d24,d5,#17
2246	add	r7,r7,r3
2247	and	r2,r2,r11
2248	veor	q9,q9,q11
2249	eor	r3,r0,r11,ror#19
2250	eor	r0,r7,r7,ror#11
2251	vsli.32	d24,d5,#15
2252	eor	r2,r2,r5
2253	add	r6,r6,r3,ror#6
2254	vshr.u32	d25,d5,#10
2255	eor	r3,r7,r8
2256	eor	r0,r0,r7,ror#20
2257	vadd.i32	q3,q3,q9
2258	add	r6,r6,r2
2259	ldr	r2,[sp,#56]
2260	veor	d25,d25,d24
2261	and	r12,r12,r3
2262	add	r10,r10,r6
2263	vshr.u32	d24,d5,#19
2264	add	r6,r6,r0,ror#2
2265	eor	r12,r12,r8
2266	vsli.32	d24,d5,#13
2267	add	r5,r5,r2
2268	eor	r2,r11,r4
2269	veor	d25,d25,d24
2270	eor	r0,r10,r10,ror#5
2271	add	r6,r6,r12
2272	vadd.i32	d6,d6,d25
2273	and	r2,r2,r10
2274	eor	r12,r0,r10,ror#19
2275	vshr.u32	d24,d6,#17
2276	eor	r0,r6,r6,ror#11
2277	eor	r2,r2,r4
2278	vsli.32	d24,d6,#15
2279	add	r5,r5,r12,ror#6
2280	eor	r12,r6,r7
2281	vshr.u32	d25,d6,#10
2282	eor	r0,r0,r6,ror#20
2283	add	r5,r5,r2
2284	veor	d25,d25,d24
2285	ldr	r2,[sp,#60]
2286	and	r3,r3,r12
2287	vshr.u32	d24,d6,#19
2288	add	r9,r9,r5
2289	add	r5,r5,r0,ror#2
2290	eor	r3,r3,r7
2291	vld1.32	{q8},[r14,:128]!
2292	add	r4,r4,r2
2293	vsli.32	d24,d6,#13
2294	eor	r2,r10,r11
2295	eor	r0,r9,r9,ror#5
2296	veor	d25,d25,d24
2297	add	r5,r5,r3
2298	and	r2,r2,r9
2299	vadd.i32	d7,d7,d25
2300	eor	r3,r0,r9,ror#19
2301	eor	r0,r5,r5,ror#11
2302	vadd.i32	q8,q8,q3
2303	eor	r2,r2,r11
2304	add	r4,r4,r3,ror#6
2305	eor	r3,r5,r6
2306	eor	r0,r0,r5,ror#20
2307	add	r4,r4,r2
2308	ldr	r2,[r14]
2309	and	r12,r12,r3
2310	add	r8,r8,r4
2311	vst1.32	{q8},[r1,:128]!
2312	add	r4,r4,r0,ror#2
2313	eor	r12,r12,r6
2314	teq	r2,#0				@ check for K256 terminator
2315	ldr	r2,[sp,#0]
2316	sub	r1,r1,#64
2317	bne	.L_00_48
2318
2319	ldr		r1,[sp,#68]
2320	ldr		r0,[sp,#72]
2321	sub		r14,r14,#256	@ rewind r14
2322	teq		r1,r0
2323	it		eq
2324	subeq		r1,r1,#64		@ avoid SEGV
2325	vld1.8		{q0},[r1]!		@ load next input block
2326	vld1.8		{q1},[r1]!
2327	vld1.8		{q2},[r1]!
2328	vld1.8		{q3},[r1]!
2329	it		ne
2330	strne		r1,[sp,#68]
2331	mov		r1,sp
2332	add	r11,r11,r2
2333	eor	r2,r9,r10
2334	eor	r0,r8,r8,ror#5
2335	add	r4,r4,r12
2336	vld1.32	{q8},[r14,:128]!
2337	and	r2,r2,r8
2338	eor	r12,r0,r8,ror#19
2339	eor	r0,r4,r4,ror#11
2340	eor	r2,r2,r10
2341	vrev32.8	q0,q0
2342	add	r11,r11,r12,ror#6
2343	eor	r12,r4,r5
2344	eor	r0,r0,r4,ror#20
2345	add	r11,r11,r2
2346	vadd.i32	q8,q8,q0
2347	ldr	r2,[sp,#4]
2348	and	r3,r3,r12
2349	add	r7,r7,r11
2350	add	r11,r11,r0,ror#2
2351	eor	r3,r3,r5
2352	add	r10,r10,r2
2353	eor	r2,r8,r9
2354	eor	r0,r7,r7,ror#5
2355	add	r11,r11,r3
2356	and	r2,r2,r7
2357	eor	r3,r0,r7,ror#19
2358	eor	r0,r11,r11,ror#11
2359	eor	r2,r2,r9
2360	add	r10,r10,r3,ror#6
2361	eor	r3,r11,r4
2362	eor	r0,r0,r11,ror#20
2363	add	r10,r10,r2
2364	ldr	r2,[sp,#8]
2365	and	r12,r12,r3
2366	add	r6,r6,r10
2367	add	r10,r10,r0,ror#2
2368	eor	r12,r12,r4
2369	add	r9,r9,r2
2370	eor	r2,r7,r8
2371	eor	r0,r6,r6,ror#5
2372	add	r10,r10,r12
2373	and	r2,r2,r6
2374	eor	r12,r0,r6,ror#19
2375	eor	r0,r10,r10,ror#11
2376	eor	r2,r2,r8
2377	add	r9,r9,r12,ror#6
2378	eor	r12,r10,r11
2379	eor	r0,r0,r10,ror#20
2380	add	r9,r9,r2
2381	ldr	r2,[sp,#12]
2382	and	r3,r3,r12
2383	add	r5,r5,r9
2384	add	r9,r9,r0,ror#2
2385	eor	r3,r3,r11
2386	add	r8,r8,r2
2387	eor	r2,r6,r7
2388	eor	r0,r5,r5,ror#5
2389	add	r9,r9,r3
2390	and	r2,r2,r5
2391	eor	r3,r0,r5,ror#19
2392	eor	r0,r9,r9,ror#11
2393	eor	r2,r2,r7
2394	add	r8,r8,r3,ror#6
2395	eor	r3,r9,r10
2396	eor	r0,r0,r9,ror#20
2397	add	r8,r8,r2
2398	ldr	r2,[sp,#16]
2399	and	r12,r12,r3
2400	add	r4,r4,r8
2401	add	r8,r8,r0,ror#2
2402	eor	r12,r12,r10
2403	vst1.32	{q8},[r1,:128]!
2404	add	r7,r7,r2
2405	eor	r2,r5,r6
2406	eor	r0,r4,r4,ror#5
2407	add	r8,r8,r12
2408	vld1.32	{q8},[r14,:128]!
2409	and	r2,r2,r4
2410	eor	r12,r0,r4,ror#19
2411	eor	r0,r8,r8,ror#11
2412	eor	r2,r2,r6
2413	vrev32.8	q1,q1
2414	add	r7,r7,r12,ror#6
2415	eor	r12,r8,r9
2416	eor	r0,r0,r8,ror#20
2417	add	r7,r7,r2
2418	vadd.i32	q8,q8,q1
2419	ldr	r2,[sp,#20]
2420	and	r3,r3,r12
2421	add	r11,r11,r7
2422	add	r7,r7,r0,ror#2
2423	eor	r3,r3,r9
2424	add	r6,r6,r2
2425	eor	r2,r4,r5
2426	eor	r0,r11,r11,ror#5
2427	add	r7,r7,r3
2428	and	r2,r2,r11
2429	eor	r3,r0,r11,ror#19
2430	eor	r0,r7,r7,ror#11
2431	eor	r2,r2,r5
2432	add	r6,r6,r3,ror#6
2433	eor	r3,r7,r8
2434	eor	r0,r0,r7,ror#20
2435	add	r6,r6,r2
2436	ldr	r2,[sp,#24]
2437	and	r12,r12,r3
2438	add	r10,r10,r6
2439	add	r6,r6,r0,ror#2
2440	eor	r12,r12,r8
2441	add	r5,r5,r2
2442	eor	r2,r11,r4
2443	eor	r0,r10,r10,ror#5
2444	add	r6,r6,r12
2445	and	r2,r2,r10
2446	eor	r12,r0,r10,ror#19
2447	eor	r0,r6,r6,ror#11
2448	eor	r2,r2,r4
2449	add	r5,r5,r12,ror#6
2450	eor	r12,r6,r7
2451	eor	r0,r0,r6,ror#20
2452	add	r5,r5,r2
2453	ldr	r2,[sp,#28]
2454	and	r3,r3,r12
2455	add	r9,r9,r5
2456	add	r5,r5,r0,ror#2
2457	eor	r3,r3,r7
2458	add	r4,r4,r2
2459	eor	r2,r10,r11
2460	eor	r0,r9,r9,ror#5
2461	add	r5,r5,r3
2462	and	r2,r2,r9
2463	eor	r3,r0,r9,ror#19
2464	eor	r0,r5,r5,ror#11
2465	eor	r2,r2,r11
2466	add	r4,r4,r3,ror#6
2467	eor	r3,r5,r6
2468	eor	r0,r0,r5,ror#20
2469	add	r4,r4,r2
2470	ldr	r2,[sp,#32]
2471	and	r12,r12,r3
2472	add	r8,r8,r4
2473	add	r4,r4,r0,ror#2
2474	eor	r12,r12,r6
2475	vst1.32	{q8},[r1,:128]!
2476	add	r11,r11,r2
2477	eor	r2,r9,r10
2478	eor	r0,r8,r8,ror#5
2479	add	r4,r4,r12
2480	vld1.32	{q8},[r14,:128]!
2481	and	r2,r2,r8
2482	eor	r12,r0,r8,ror#19
2483	eor	r0,r4,r4,ror#11
2484	eor	r2,r2,r10
2485	vrev32.8	q2,q2
2486	add	r11,r11,r12,ror#6
2487	eor	r12,r4,r5
2488	eor	r0,r0,r4,ror#20
2489	add	r11,r11,r2
2490	vadd.i32	q8,q8,q2
2491	ldr	r2,[sp,#36]
2492	and	r3,r3,r12
2493	add	r7,r7,r11
2494	add	r11,r11,r0,ror#2
2495	eor	r3,r3,r5
2496	add	r10,r10,r2
2497	eor	r2,r8,r9
2498	eor	r0,r7,r7,ror#5
2499	add	r11,r11,r3
2500	and	r2,r2,r7
2501	eor	r3,r0,r7,ror#19
2502	eor	r0,r11,r11,ror#11
2503	eor	r2,r2,r9
2504	add	r10,r10,r3,ror#6
2505	eor	r3,r11,r4
2506	eor	r0,r0,r11,ror#20
2507	add	r10,r10,r2
2508	ldr	r2,[sp,#40]
2509	and	r12,r12,r3
2510	add	r6,r6,r10
2511	add	r10,r10,r0,ror#2
2512	eor	r12,r12,r4
2513	add	r9,r9,r2
2514	eor	r2,r7,r8
2515	eor	r0,r6,r6,ror#5
2516	add	r10,r10,r12
2517	and	r2,r2,r6
2518	eor	r12,r0,r6,ror#19
2519	eor	r0,r10,r10,ror#11
2520	eor	r2,r2,r8
2521	add	r9,r9,r12,ror#6
2522	eor	r12,r10,r11
2523	eor	r0,r0,r10,ror#20
2524	add	r9,r9,r2
2525	ldr	r2,[sp,#44]
2526	and	r3,r3,r12
2527	add	r5,r5,r9
2528	add	r9,r9,r0,ror#2
2529	eor	r3,r3,r11
2530	add	r8,r8,r2
2531	eor	r2,r6,r7
2532	eor	r0,r5,r5,ror#5
2533	add	r9,r9,r3
2534	and	r2,r2,r5
2535	eor	r3,r0,r5,ror#19
2536	eor	r0,r9,r9,ror#11
2537	eor	r2,r2,r7
2538	add	r8,r8,r3,ror#6
2539	eor	r3,r9,r10
2540	eor	r0,r0,r9,ror#20
2541	add	r8,r8,r2
2542	ldr	r2,[sp,#48]
2543	and	r12,r12,r3
2544	add	r4,r4,r8
2545	add	r8,r8,r0,ror#2
2546	eor	r12,r12,r10
2547	vst1.32	{q8},[r1,:128]!
2548	add	r7,r7,r2
2549	eor	r2,r5,r6
2550	eor	r0,r4,r4,ror#5
2551	add	r8,r8,r12
2552	vld1.32	{q8},[r14,:128]!
2553	and	r2,r2,r4
2554	eor	r12,r0,r4,ror#19
2555	eor	r0,r8,r8,ror#11
2556	eor	r2,r2,r6
2557	vrev32.8	q3,q3
2558	add	r7,r7,r12,ror#6
2559	eor	r12,r8,r9
2560	eor	r0,r0,r8,ror#20
2561	add	r7,r7,r2
2562	vadd.i32	q8,q8,q3
2563	ldr	r2,[sp,#52]
2564	and	r3,r3,r12
2565	add	r11,r11,r7
2566	add	r7,r7,r0,ror#2
2567	eor	r3,r3,r9
2568	add	r6,r6,r2
2569	eor	r2,r4,r5
2570	eor	r0,r11,r11,ror#5
2571	add	r7,r7,r3
2572	and	r2,r2,r11
2573	eor	r3,r0,r11,ror#19
2574	eor	r0,r7,r7,ror#11
2575	eor	r2,r2,r5
2576	add	r6,r6,r3,ror#6
2577	eor	r3,r7,r8
2578	eor	r0,r0,r7,ror#20
2579	add	r6,r6,r2
2580	ldr	r2,[sp,#56]
2581	and	r12,r12,r3
2582	add	r10,r10,r6
2583	add	r6,r6,r0,ror#2
2584	eor	r12,r12,r8
2585	add	r5,r5,r2
2586	eor	r2,r11,r4
2587	eor	r0,r10,r10,ror#5
2588	add	r6,r6,r12
2589	and	r2,r2,r10
2590	eor	r12,r0,r10,ror#19
2591	eor	r0,r6,r6,ror#11
2592	eor	r2,r2,r4
2593	add	r5,r5,r12,ror#6
2594	eor	r12,r6,r7
2595	eor	r0,r0,r6,ror#20
2596	add	r5,r5,r2
2597	ldr	r2,[sp,#60]
2598	and	r3,r3,r12
2599	add	r9,r9,r5
2600	add	r5,r5,r0,ror#2
2601	eor	r3,r3,r7
2602	add	r4,r4,r2
2603	eor	r2,r10,r11
2604	eor	r0,r9,r9,ror#5
2605	add	r5,r5,r3
2606	and	r2,r2,r9
2607	eor	r3,r0,r9,ror#19
2608	eor	r0,r5,r5,ror#11
2609	eor	r2,r2,r11
2610	add	r4,r4,r3,ror#6
2611	eor	r3,r5,r6
2612	eor	r0,r0,r5,ror#20
2613	add	r4,r4,r2
2614	ldr	r2,[sp,#64]
2615	and	r12,r12,r3
2616	add	r8,r8,r4
2617	add	r4,r4,r0,ror#2
2618	eor	r12,r12,r6
2619	vst1.32	{q8},[r1,:128]!
2620	ldr	r0,[r2,#0]
2621	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2622	ldr	r12,[r2,#4]
2623	ldr	r3,[r2,#8]
2624	ldr	r1,[r2,#12]
2625	add	r4,r4,r0			@ accumulate
2626	ldr	r0,[r2,#16]
2627	add	r5,r5,r12
2628	ldr	r12,[r2,#20]
2629	add	r6,r6,r3
2630	ldr	r3,[r2,#24]
2631	add	r7,r7,r1
2632	ldr	r1,[r2,#28]
2633	add	r8,r8,r0
2634	str	r4,[r2],#4
2635	add	r9,r9,r12
2636	str	r5,[r2],#4
2637	add	r10,r10,r3
2638	str	r6,[r2],#4
2639	add	r11,r11,r1
2640	str	r7,[r2],#4
2641	stmia	r2,{r8-r11}
2642
2643	ittte	ne
2644	movne	r1,sp
2645	ldrne	r2,[sp,#0]
2646	eorne	r12,r12,r12
2647	ldreq	sp,[sp,#76]			@ restore original sp
2648	itt	ne
2649	eorne	r3,r5,r6
2650	bne	.L_00_48
2651
2652	ldmia	sp!,{r4-r12,pc}
2653.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2654#endif
2655#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2656
2657# ifdef __thumb2__
2658#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2659# else
2660#  define INST(a,b,c,d)	.byte	a,b,c,d
2661# endif
2662
2663.type	sha256_block_data_order_armv8,%function
2664.align	5
2665sha256_block_data_order_armv8:
2666.LARMv8:
2667	vld1.32	{q0,q1},[r0]
2668# ifdef __thumb2__
2669	adr	r3,.LARMv8
2670	sub	r3,r3,#.LARMv8-K256
2671# else
2672	adrl	r3,K256
2673# endif
2674	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2675
2676.Loop_v8:
2677	vld1.8		{q8-q9},[r1]!
2678	vld1.8		{q10-q11},[r1]!
2679	vld1.32		{q12},[r3]!
2680	vrev32.8	q8,q8
2681	vrev32.8	q9,q9
2682	vrev32.8	q10,q10
2683	vrev32.8	q11,q11
2684	vmov		q14,q0	@ offload
2685	vmov		q15,q1
2686	teq		r1,r2
2687	vld1.32		{q13},[r3]!
2688	vadd.i32	q12,q12,q8
2689	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2690	vmov		q2,q0
2691	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2692	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2693	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2694	vld1.32		{q12},[r3]!
2695	vadd.i32	q13,q13,q9
2696	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2697	vmov		q2,q0
2698	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2699	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2700	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2701	vld1.32		{q13},[r3]!
2702	vadd.i32	q12,q12,q10
2703	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2704	vmov		q2,q0
2705	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2706	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2707	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2708	vld1.32		{q12},[r3]!
2709	vadd.i32	q13,q13,q11
2710	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2711	vmov		q2,q0
2712	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2713	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2714	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2715	vld1.32		{q13},[r3]!
2716	vadd.i32	q12,q12,q8
2717	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2718	vmov		q2,q0
2719	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2720	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2721	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2722	vld1.32		{q12},[r3]!
2723	vadd.i32	q13,q13,q9
2724	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2725	vmov		q2,q0
2726	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2727	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2728	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2729	vld1.32		{q13},[r3]!
2730	vadd.i32	q12,q12,q10
2731	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2732	vmov		q2,q0
2733	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2734	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2735	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2736	vld1.32		{q12},[r3]!
2737	vadd.i32	q13,q13,q11
2738	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2739	vmov		q2,q0
2740	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2741	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2742	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2743	vld1.32		{q13},[r3]!
2744	vadd.i32	q12,q12,q8
2745	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2746	vmov		q2,q0
2747	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2748	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2749	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2750	vld1.32		{q12},[r3]!
2751	vadd.i32	q13,q13,q9
2752	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2753	vmov		q2,q0
2754	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2755	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2756	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2757	vld1.32		{q13},[r3]!
2758	vadd.i32	q12,q12,q10
2759	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2760	vmov		q2,q0
2761	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2762	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2763	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2764	vld1.32		{q12},[r3]!
2765	vadd.i32	q13,q13,q11
2766	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2767	vmov		q2,q0
2768	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2769	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2770	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2771	vld1.32		{q13},[r3]!
2772	vadd.i32	q12,q12,q8
2773	vmov		q2,q0
2774	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2775	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2776
2777	vld1.32		{q12},[r3]!
2778	vadd.i32	q13,q13,q9
2779	vmov		q2,q0
2780	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2781	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2782
2783	vld1.32		{q13},[r3]
2784	vadd.i32	q12,q12,q10
2785	sub		r3,r3,#256-16	@ rewind
2786	vmov		q2,q0
2787	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2788	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2789
2790	vadd.i32	q13,q13,q11
2791	vmov		q2,q0
2792	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2793	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2794
2795	vadd.i32	q0,q0,q14
2796	vadd.i32	q1,q1,q15
2797	it		ne
2798	bne		.Loop_v8
2799
2800	vst1.32		{q0,q1},[r0]
2801
2802	bx	lr		@ bx lr
2803.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2804#endif
2805.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2806.align	2
2807#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2808.comm   OPENSSL_armcap_P,4,4
2809#endif
2810