• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1
2@ ====================================================================
3@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
4@ project. The module is, however, dual licensed under OpenSSL and
5@ CRYPTOGAMS licenses depending on where you obtain it. For further
6@ details see http://www.openssl.org/~appro/cryptogams/.
7@
8@ Permission to use under GPL terms is granted.
9@ ====================================================================
10
11@ SHA256 block procedure for ARMv4. May 2007.
12
13@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
14@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
15@ byte [on single-issue Xscale PXA250 core].
16
17@ July 2010.
18@
19@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
20@ Cortex A8 core and ~20 cycles per processed byte.
21
22@ February 2011.
23@
24@ Profiler-assisted and platform-specific optimization resulted in 16%
25@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
26
27@ September 2013.
28@
29@ Add NEON implementation. On Cortex A8 it was measured to process one
30@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
31@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
32@ code (meaning that latter performs sub-optimally, nothing was done
33@ about it).
34
35@ May 2014.
36@
37@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
38
39#ifndef __KERNEL__
40# include "arm_arch.h"
41#else
42# define __ARM_ARCH__ __LINUX_ARM_ARCH__
43# define __ARM_MAX_ARCH__ 7
44#endif
45
46.text
47#if __ARM_ARCH__<7
48.code	32
49#else
50.syntax	unified
51# if defined(__thumb2__) && !defined(__APPLE__)
52#  define adrl adr
53.thumb
54# else
55.code	32
56# endif
57#endif
58
59.type	K256,%object
60.align	5
61K256:
62.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
63.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
64.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
65.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
66.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
67.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
68.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
69.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
70.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
71.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
72.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
73.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
74.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
75.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
76.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
77.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
78.size	K256,.-K256
79.word	0				@ terminator
80#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
81.LOPENSSL_armcap:
82.word	OPENSSL_armcap_P-.Lsha256_block_data_order
83#endif
84.align	5
85
86.globl	sha256_block_data_order
87.type	sha256_block_data_order,%function
88sha256_block_data_order:
89.Lsha256_block_data_order:
90#if __ARM_ARCH__<7
91	sub	r3,pc,#8		@ sha256_block_data_order
92#else
93	adr	r3,sha256_block_data_order
94#endif
95#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
96	ldr	r12,.LOPENSSL_armcap
97	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
98#ifdef	__APPLE__
99	ldr	r12,[r12]
100#endif
101	tst	r12,#ARMV8_SHA256
102	bne	.LARMv8
103	tst	r12,#ARMV7_NEON
104	bne	.LNEON
105#endif
106	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
107	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
108	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
109	sub	r14,r3,#256+32	@ K256
110	sub	sp,sp,#16*4		@ alloca(X[16])
111.Loop:
112# if __ARM_ARCH__>=7
113	ldr	r2,[r1],#4
114# else
115	ldrb	r2,[r1,#3]
116# endif
117	eor	r3,r5,r6		@ magic
118	eor	r12,r12,r12
119#if __ARM_ARCH__>=7
120	@ ldr	r2,[r1],#4			@ 0
121# if 0==15
122	str	r1,[sp,#17*4]			@ make room for r1
123# endif
124	eor	r0,r8,r8,ror#5
125	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
126	eor	r0,r0,r8,ror#19	@ Sigma1(e)
127# ifndef __ARMEB__
128	rev	r2,r2
129# endif
130#else
131	@ ldrb	r2,[r1,#3]			@ 0
132	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
133	ldrb	r12,[r1,#2]
134	ldrb	r0,[r1,#1]
135	orr	r2,r2,r12,lsl#8
136	ldrb	r12,[r1],#4
137	orr	r2,r2,r0,lsl#16
138# if 0==15
139	str	r1,[sp,#17*4]			@ make room for r1
140# endif
141	eor	r0,r8,r8,ror#5
142	orr	r2,r2,r12,lsl#24
143	eor	r0,r0,r8,ror#19	@ Sigma1(e)
144#endif
145	ldr	r12,[r14],#4			@ *K256++
146	add	r11,r11,r2			@ h+=X[i]
147	str	r2,[sp,#0*4]
148	eor	r2,r9,r10
149	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
150	and	r2,r2,r8
151	add	r11,r11,r12			@ h+=K256[i]
152	eor	r2,r2,r10			@ Ch(e,f,g)
153	eor	r0,r4,r4,ror#11
154	add	r11,r11,r2			@ h+=Ch(e,f,g)
155#if 0==31
156	and	r12,r12,#0xff
157	cmp	r12,#0xf2			@ done?
158#endif
159#if 0<15
160# if __ARM_ARCH__>=7
161	ldr	r2,[r1],#4			@ prefetch
162# else
163	ldrb	r2,[r1,#3]
164# endif
165	eor	r12,r4,r5			@ a^b, b^c in next round
166#else
167	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
168	eor	r12,r4,r5			@ a^b, b^c in next round
169	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
170#endif
171	eor	r0,r0,r4,ror#20	@ Sigma0(a)
172	and	r3,r3,r12			@ (b^c)&=(a^b)
173	add	r7,r7,r11			@ d+=h
174	eor	r3,r3,r5			@ Maj(a,b,c)
175	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
176	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
177#if __ARM_ARCH__>=7
178	@ ldr	r2,[r1],#4			@ 1
179# if 1==15
180	str	r1,[sp,#17*4]			@ make room for r1
181# endif
182	eor	r0,r7,r7,ror#5
183	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
184	eor	r0,r0,r7,ror#19	@ Sigma1(e)
185# ifndef __ARMEB__
186	rev	r2,r2
187# endif
188#else
189	@ ldrb	r2,[r1,#3]			@ 1
190	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
191	ldrb	r3,[r1,#2]
192	ldrb	r0,[r1,#1]
193	orr	r2,r2,r3,lsl#8
194	ldrb	r3,[r1],#4
195	orr	r2,r2,r0,lsl#16
196# if 1==15
197	str	r1,[sp,#17*4]			@ make room for r1
198# endif
199	eor	r0,r7,r7,ror#5
200	orr	r2,r2,r3,lsl#24
201	eor	r0,r0,r7,ror#19	@ Sigma1(e)
202#endif
203	ldr	r3,[r14],#4			@ *K256++
204	add	r10,r10,r2			@ h+=X[i]
205	str	r2,[sp,#1*4]
206	eor	r2,r8,r9
207	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
208	and	r2,r2,r7
209	add	r10,r10,r3			@ h+=K256[i]
210	eor	r2,r2,r9			@ Ch(e,f,g)
211	eor	r0,r11,r11,ror#11
212	add	r10,r10,r2			@ h+=Ch(e,f,g)
213#if 1==31
214	and	r3,r3,#0xff
215	cmp	r3,#0xf2			@ done?
216#endif
217#if 1<15
218# if __ARM_ARCH__>=7
219	ldr	r2,[r1],#4			@ prefetch
220# else
221	ldrb	r2,[r1,#3]
222# endif
223	eor	r3,r11,r4			@ a^b, b^c in next round
224#else
225	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
226	eor	r3,r11,r4			@ a^b, b^c in next round
227	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
228#endif
229	eor	r0,r0,r11,ror#20	@ Sigma0(a)
230	and	r12,r12,r3			@ (b^c)&=(a^b)
231	add	r6,r6,r10			@ d+=h
232	eor	r12,r12,r4			@ Maj(a,b,c)
233	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
234	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
235#if __ARM_ARCH__>=7
236	@ ldr	r2,[r1],#4			@ 2
237# if 2==15
238	str	r1,[sp,#17*4]			@ make room for r1
239# endif
240	eor	r0,r6,r6,ror#5
241	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
242	eor	r0,r0,r6,ror#19	@ Sigma1(e)
243# ifndef __ARMEB__
244	rev	r2,r2
245# endif
246#else
247	@ ldrb	r2,[r1,#3]			@ 2
248	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
249	ldrb	r12,[r1,#2]
250	ldrb	r0,[r1,#1]
251	orr	r2,r2,r12,lsl#8
252	ldrb	r12,[r1],#4
253	orr	r2,r2,r0,lsl#16
254# if 2==15
255	str	r1,[sp,#17*4]			@ make room for r1
256# endif
257	eor	r0,r6,r6,ror#5
258	orr	r2,r2,r12,lsl#24
259	eor	r0,r0,r6,ror#19	@ Sigma1(e)
260#endif
261	ldr	r12,[r14],#4			@ *K256++
262	add	r9,r9,r2			@ h+=X[i]
263	str	r2,[sp,#2*4]
264	eor	r2,r7,r8
265	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
266	and	r2,r2,r6
267	add	r9,r9,r12			@ h+=K256[i]
268	eor	r2,r2,r8			@ Ch(e,f,g)
269	eor	r0,r10,r10,ror#11
270	add	r9,r9,r2			@ h+=Ch(e,f,g)
271#if 2==31
272	and	r12,r12,#0xff
273	cmp	r12,#0xf2			@ done?
274#endif
275#if 2<15
276# if __ARM_ARCH__>=7
277	ldr	r2,[r1],#4			@ prefetch
278# else
279	ldrb	r2,[r1,#3]
280# endif
281	eor	r12,r10,r11			@ a^b, b^c in next round
282#else
283	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
284	eor	r12,r10,r11			@ a^b, b^c in next round
285	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
286#endif
287	eor	r0,r0,r10,ror#20	@ Sigma0(a)
288	and	r3,r3,r12			@ (b^c)&=(a^b)
289	add	r5,r5,r9			@ d+=h
290	eor	r3,r3,r11			@ Maj(a,b,c)
291	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
292	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
293#if __ARM_ARCH__>=7
294	@ ldr	r2,[r1],#4			@ 3
295# if 3==15
296	str	r1,[sp,#17*4]			@ make room for r1
297# endif
298	eor	r0,r5,r5,ror#5
299	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
300	eor	r0,r0,r5,ror#19	@ Sigma1(e)
301# ifndef __ARMEB__
302	rev	r2,r2
303# endif
304#else
305	@ ldrb	r2,[r1,#3]			@ 3
306	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
307	ldrb	r3,[r1,#2]
308	ldrb	r0,[r1,#1]
309	orr	r2,r2,r3,lsl#8
310	ldrb	r3,[r1],#4
311	orr	r2,r2,r0,lsl#16
312# if 3==15
313	str	r1,[sp,#17*4]			@ make room for r1
314# endif
315	eor	r0,r5,r5,ror#5
316	orr	r2,r2,r3,lsl#24
317	eor	r0,r0,r5,ror#19	@ Sigma1(e)
318#endif
319	ldr	r3,[r14],#4			@ *K256++
320	add	r8,r8,r2			@ h+=X[i]
321	str	r2,[sp,#3*4]
322	eor	r2,r6,r7
323	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
324	and	r2,r2,r5
325	add	r8,r8,r3			@ h+=K256[i]
326	eor	r2,r2,r7			@ Ch(e,f,g)
327	eor	r0,r9,r9,ror#11
328	add	r8,r8,r2			@ h+=Ch(e,f,g)
329#if 3==31
330	and	r3,r3,#0xff
331	cmp	r3,#0xf2			@ done?
332#endif
333#if 3<15
334# if __ARM_ARCH__>=7
335	ldr	r2,[r1],#4			@ prefetch
336# else
337	ldrb	r2,[r1,#3]
338# endif
339	eor	r3,r9,r10			@ a^b, b^c in next round
340#else
341	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
342	eor	r3,r9,r10			@ a^b, b^c in next round
343	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
344#endif
345	eor	r0,r0,r9,ror#20	@ Sigma0(a)
346	and	r12,r12,r3			@ (b^c)&=(a^b)
347	add	r4,r4,r8			@ d+=h
348	eor	r12,r12,r10			@ Maj(a,b,c)
349	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
350	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
351#if __ARM_ARCH__>=7
352	@ ldr	r2,[r1],#4			@ 4
353# if 4==15
354	str	r1,[sp,#17*4]			@ make room for r1
355# endif
356	eor	r0,r4,r4,ror#5
357	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
358	eor	r0,r0,r4,ror#19	@ Sigma1(e)
359# ifndef __ARMEB__
360	rev	r2,r2
361# endif
362#else
363	@ ldrb	r2,[r1,#3]			@ 4
364	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
365	ldrb	r12,[r1,#2]
366	ldrb	r0,[r1,#1]
367	orr	r2,r2,r12,lsl#8
368	ldrb	r12,[r1],#4
369	orr	r2,r2,r0,lsl#16
370# if 4==15
371	str	r1,[sp,#17*4]			@ make room for r1
372# endif
373	eor	r0,r4,r4,ror#5
374	orr	r2,r2,r12,lsl#24
375	eor	r0,r0,r4,ror#19	@ Sigma1(e)
376#endif
377	ldr	r12,[r14],#4			@ *K256++
378	add	r7,r7,r2			@ h+=X[i]
379	str	r2,[sp,#4*4]
380	eor	r2,r5,r6
381	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
382	and	r2,r2,r4
383	add	r7,r7,r12			@ h+=K256[i]
384	eor	r2,r2,r6			@ Ch(e,f,g)
385	eor	r0,r8,r8,ror#11
386	add	r7,r7,r2			@ h+=Ch(e,f,g)
387#if 4==31
388	and	r12,r12,#0xff
389	cmp	r12,#0xf2			@ done?
390#endif
391#if 4<15
392# if __ARM_ARCH__>=7
393	ldr	r2,[r1],#4			@ prefetch
394# else
395	ldrb	r2,[r1,#3]
396# endif
397	eor	r12,r8,r9			@ a^b, b^c in next round
398#else
399	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
400	eor	r12,r8,r9			@ a^b, b^c in next round
401	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
402#endif
403	eor	r0,r0,r8,ror#20	@ Sigma0(a)
404	and	r3,r3,r12			@ (b^c)&=(a^b)
405	add	r11,r11,r7			@ d+=h
406	eor	r3,r3,r9			@ Maj(a,b,c)
407	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
408	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
409#if __ARM_ARCH__>=7
410	@ ldr	r2,[r1],#4			@ 5
411# if 5==15
412	str	r1,[sp,#17*4]			@ make room for r1
413# endif
414	eor	r0,r11,r11,ror#5
415	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
416	eor	r0,r0,r11,ror#19	@ Sigma1(e)
417# ifndef __ARMEB__
418	rev	r2,r2
419# endif
420#else
421	@ ldrb	r2,[r1,#3]			@ 5
422	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
423	ldrb	r3,[r1,#2]
424	ldrb	r0,[r1,#1]
425	orr	r2,r2,r3,lsl#8
426	ldrb	r3,[r1],#4
427	orr	r2,r2,r0,lsl#16
428# if 5==15
429	str	r1,[sp,#17*4]			@ make room for r1
430# endif
431	eor	r0,r11,r11,ror#5
432	orr	r2,r2,r3,lsl#24
433	eor	r0,r0,r11,ror#19	@ Sigma1(e)
434#endif
435	ldr	r3,[r14],#4			@ *K256++
436	add	r6,r6,r2			@ h+=X[i]
437	str	r2,[sp,#5*4]
438	eor	r2,r4,r5
439	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
440	and	r2,r2,r11
441	add	r6,r6,r3			@ h+=K256[i]
442	eor	r2,r2,r5			@ Ch(e,f,g)
443	eor	r0,r7,r7,ror#11
444	add	r6,r6,r2			@ h+=Ch(e,f,g)
445#if 5==31
446	and	r3,r3,#0xff
447	cmp	r3,#0xf2			@ done?
448#endif
449#if 5<15
450# if __ARM_ARCH__>=7
451	ldr	r2,[r1],#4			@ prefetch
452# else
453	ldrb	r2,[r1,#3]
454# endif
455	eor	r3,r7,r8			@ a^b, b^c in next round
456#else
457	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
458	eor	r3,r7,r8			@ a^b, b^c in next round
459	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
460#endif
461	eor	r0,r0,r7,ror#20	@ Sigma0(a)
462	and	r12,r12,r3			@ (b^c)&=(a^b)
463	add	r10,r10,r6			@ d+=h
464	eor	r12,r12,r8			@ Maj(a,b,c)
465	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
466	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
467#if __ARM_ARCH__>=7
468	@ ldr	r2,[r1],#4			@ 6
469# if 6==15
470	str	r1,[sp,#17*4]			@ make room for r1
471# endif
472	eor	r0,r10,r10,ror#5
473	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
474	eor	r0,r0,r10,ror#19	@ Sigma1(e)
475# ifndef __ARMEB__
476	rev	r2,r2
477# endif
478#else
479	@ ldrb	r2,[r1,#3]			@ 6
480	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
481	ldrb	r12,[r1,#2]
482	ldrb	r0,[r1,#1]
483	orr	r2,r2,r12,lsl#8
484	ldrb	r12,[r1],#4
485	orr	r2,r2,r0,lsl#16
486# if 6==15
487	str	r1,[sp,#17*4]			@ make room for r1
488# endif
489	eor	r0,r10,r10,ror#5
490	orr	r2,r2,r12,lsl#24
491	eor	r0,r0,r10,ror#19	@ Sigma1(e)
492#endif
493	ldr	r12,[r14],#4			@ *K256++
494	add	r5,r5,r2			@ h+=X[i]
495	str	r2,[sp,#6*4]
496	eor	r2,r11,r4
497	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
498	and	r2,r2,r10
499	add	r5,r5,r12			@ h+=K256[i]
500	eor	r2,r2,r4			@ Ch(e,f,g)
501	eor	r0,r6,r6,ror#11
502	add	r5,r5,r2			@ h+=Ch(e,f,g)
503#if 6==31
504	and	r12,r12,#0xff
505	cmp	r12,#0xf2			@ done?
506#endif
507#if 6<15
508# if __ARM_ARCH__>=7
509	ldr	r2,[r1],#4			@ prefetch
510# else
511	ldrb	r2,[r1,#3]
512# endif
513	eor	r12,r6,r7			@ a^b, b^c in next round
514#else
515	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
516	eor	r12,r6,r7			@ a^b, b^c in next round
517	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
518#endif
519	eor	r0,r0,r6,ror#20	@ Sigma0(a)
520	and	r3,r3,r12			@ (b^c)&=(a^b)
521	add	r9,r9,r5			@ d+=h
522	eor	r3,r3,r7			@ Maj(a,b,c)
523	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
524	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
525#if __ARM_ARCH__>=7
526	@ ldr	r2,[r1],#4			@ 7
527# if 7==15
528	str	r1,[sp,#17*4]			@ make room for r1
529# endif
530	eor	r0,r9,r9,ror#5
531	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
532	eor	r0,r0,r9,ror#19	@ Sigma1(e)
533# ifndef __ARMEB__
534	rev	r2,r2
535# endif
536#else
537	@ ldrb	r2,[r1,#3]			@ 7
538	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
539	ldrb	r3,[r1,#2]
540	ldrb	r0,[r1,#1]
541	orr	r2,r2,r3,lsl#8
542	ldrb	r3,[r1],#4
543	orr	r2,r2,r0,lsl#16
544# if 7==15
545	str	r1,[sp,#17*4]			@ make room for r1
546# endif
547	eor	r0,r9,r9,ror#5
548	orr	r2,r2,r3,lsl#24
549	eor	r0,r0,r9,ror#19	@ Sigma1(e)
550#endif
551	ldr	r3,[r14],#4			@ *K256++
552	add	r4,r4,r2			@ h+=X[i]
553	str	r2,[sp,#7*4]
554	eor	r2,r10,r11
555	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
556	and	r2,r2,r9
557	add	r4,r4,r3			@ h+=K256[i]
558	eor	r2,r2,r11			@ Ch(e,f,g)
559	eor	r0,r5,r5,ror#11
560	add	r4,r4,r2			@ h+=Ch(e,f,g)
561#if 7==31
562	and	r3,r3,#0xff
563	cmp	r3,#0xf2			@ done?
564#endif
565#if 7<15
566# if __ARM_ARCH__>=7
567	ldr	r2,[r1],#4			@ prefetch
568# else
569	ldrb	r2,[r1,#3]
570# endif
571	eor	r3,r5,r6			@ a^b, b^c in next round
572#else
573	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
574	eor	r3,r5,r6			@ a^b, b^c in next round
575	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
576#endif
577	eor	r0,r0,r5,ror#20	@ Sigma0(a)
578	and	r12,r12,r3			@ (b^c)&=(a^b)
579	add	r8,r8,r4			@ d+=h
580	eor	r12,r12,r6			@ Maj(a,b,c)
581	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
582	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
583#if __ARM_ARCH__>=7
584	@ ldr	r2,[r1],#4			@ 8
585# if 8==15
586	str	r1,[sp,#17*4]			@ make room for r1
587# endif
588	eor	r0,r8,r8,ror#5
589	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
590	eor	r0,r0,r8,ror#19	@ Sigma1(e)
591# ifndef __ARMEB__
592	rev	r2,r2
593# endif
594#else
595	@ ldrb	r2,[r1,#3]			@ 8
596	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
597	ldrb	r12,[r1,#2]
598	ldrb	r0,[r1,#1]
599	orr	r2,r2,r12,lsl#8
600	ldrb	r12,[r1],#4
601	orr	r2,r2,r0,lsl#16
602# if 8==15
603	str	r1,[sp,#17*4]			@ make room for r1
604# endif
605	eor	r0,r8,r8,ror#5
606	orr	r2,r2,r12,lsl#24
607	eor	r0,r0,r8,ror#19	@ Sigma1(e)
608#endif
609	ldr	r12,[r14],#4			@ *K256++
610	add	r11,r11,r2			@ h+=X[i]
611	str	r2,[sp,#8*4]
612	eor	r2,r9,r10
613	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
614	and	r2,r2,r8
615	add	r11,r11,r12			@ h+=K256[i]
616	eor	r2,r2,r10			@ Ch(e,f,g)
617	eor	r0,r4,r4,ror#11
618	add	r11,r11,r2			@ h+=Ch(e,f,g)
619#if 8==31
620	and	r12,r12,#0xff
621	cmp	r12,#0xf2			@ done?
622#endif
623#if 8<15
624# if __ARM_ARCH__>=7
625	ldr	r2,[r1],#4			@ prefetch
626# else
627	ldrb	r2,[r1,#3]
628# endif
629	eor	r12,r4,r5			@ a^b, b^c in next round
630#else
631	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
632	eor	r12,r4,r5			@ a^b, b^c in next round
633	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
634#endif
635	eor	r0,r0,r4,ror#20	@ Sigma0(a)
636	and	r3,r3,r12			@ (b^c)&=(a^b)
637	add	r7,r7,r11			@ d+=h
638	eor	r3,r3,r5			@ Maj(a,b,c)
639	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
640	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
641#if __ARM_ARCH__>=7
642	@ ldr	r2,[r1],#4			@ 9
643# if 9==15
644	str	r1,[sp,#17*4]			@ make room for r1
645# endif
646	eor	r0,r7,r7,ror#5
647	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
648	eor	r0,r0,r7,ror#19	@ Sigma1(e)
649# ifndef __ARMEB__
650	rev	r2,r2
651# endif
652#else
653	@ ldrb	r2,[r1,#3]			@ 9
654	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
655	ldrb	r3,[r1,#2]
656	ldrb	r0,[r1,#1]
657	orr	r2,r2,r3,lsl#8
658	ldrb	r3,[r1],#4
659	orr	r2,r2,r0,lsl#16
660# if 9==15
661	str	r1,[sp,#17*4]			@ make room for r1
662# endif
663	eor	r0,r7,r7,ror#5
664	orr	r2,r2,r3,lsl#24
665	eor	r0,r0,r7,ror#19	@ Sigma1(e)
666#endif
667	ldr	r3,[r14],#4			@ *K256++
668	add	r10,r10,r2			@ h+=X[i]
669	str	r2,[sp,#9*4]
670	eor	r2,r8,r9
671	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
672	and	r2,r2,r7
673	add	r10,r10,r3			@ h+=K256[i]
674	eor	r2,r2,r9			@ Ch(e,f,g)
675	eor	r0,r11,r11,ror#11
676	add	r10,r10,r2			@ h+=Ch(e,f,g)
677#if 9==31
678	and	r3,r3,#0xff
679	cmp	r3,#0xf2			@ done?
680#endif
681#if 9<15
682# if __ARM_ARCH__>=7
683	ldr	r2,[r1],#4			@ prefetch
684# else
685	ldrb	r2,[r1,#3]
686# endif
687	eor	r3,r11,r4			@ a^b, b^c in next round
688#else
689	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
690	eor	r3,r11,r4			@ a^b, b^c in next round
691	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
692#endif
693	eor	r0,r0,r11,ror#20	@ Sigma0(a)
694	and	r12,r12,r3			@ (b^c)&=(a^b)
695	add	r6,r6,r10			@ d+=h
696	eor	r12,r12,r4			@ Maj(a,b,c)
697	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
698	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
699#if __ARM_ARCH__>=7
700	@ ldr	r2,[r1],#4			@ 10
701# if 10==15
702	str	r1,[sp,#17*4]			@ make room for r1
703# endif
704	eor	r0,r6,r6,ror#5
705	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
706	eor	r0,r0,r6,ror#19	@ Sigma1(e)
707# ifndef __ARMEB__
708	rev	r2,r2
709# endif
710#else
711	@ ldrb	r2,[r1,#3]			@ 10
712	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
713	ldrb	r12,[r1,#2]
714	ldrb	r0,[r1,#1]
715	orr	r2,r2,r12,lsl#8
716	ldrb	r12,[r1],#4
717	orr	r2,r2,r0,lsl#16
718# if 10==15
719	str	r1,[sp,#17*4]			@ make room for r1
720# endif
721	eor	r0,r6,r6,ror#5
722	orr	r2,r2,r12,lsl#24
723	eor	r0,r0,r6,ror#19	@ Sigma1(e)
724#endif
725	ldr	r12,[r14],#4			@ *K256++
726	add	r9,r9,r2			@ h+=X[i]
727	str	r2,[sp,#10*4]
728	eor	r2,r7,r8
729	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
730	and	r2,r2,r6
731	add	r9,r9,r12			@ h+=K256[i]
732	eor	r2,r2,r8			@ Ch(e,f,g)
733	eor	r0,r10,r10,ror#11
734	add	r9,r9,r2			@ h+=Ch(e,f,g)
735#if 10==31
736	and	r12,r12,#0xff
737	cmp	r12,#0xf2			@ done?
738#endif
739#if 10<15
740# if __ARM_ARCH__>=7
741	ldr	r2,[r1],#4			@ prefetch
742# else
743	ldrb	r2,[r1,#3]
744# endif
745	eor	r12,r10,r11			@ a^b, b^c in next round
746#else
747	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
748	eor	r12,r10,r11			@ a^b, b^c in next round
749	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
750#endif
751	eor	r0,r0,r10,ror#20	@ Sigma0(a)
752	and	r3,r3,r12			@ (b^c)&=(a^b)
753	add	r5,r5,r9			@ d+=h
754	eor	r3,r3,r11			@ Maj(a,b,c)
755	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
756	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
757#if __ARM_ARCH__>=7
758	@ ldr	r2,[r1],#4			@ 11
759# if 11==15
760	str	r1,[sp,#17*4]			@ make room for r1
761# endif
762	eor	r0,r5,r5,ror#5
763	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
764	eor	r0,r0,r5,ror#19	@ Sigma1(e)
765# ifndef __ARMEB__
766	rev	r2,r2
767# endif
768#else
769	@ ldrb	r2,[r1,#3]			@ 11
770	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
771	ldrb	r3,[r1,#2]
772	ldrb	r0,[r1,#1]
773	orr	r2,r2,r3,lsl#8
774	ldrb	r3,[r1],#4
775	orr	r2,r2,r0,lsl#16
776# if 11==15
777	str	r1,[sp,#17*4]			@ make room for r1
778# endif
779	eor	r0,r5,r5,ror#5
780	orr	r2,r2,r3,lsl#24
781	eor	r0,r0,r5,ror#19	@ Sigma1(e)
782#endif
783	ldr	r3,[r14],#4			@ *K256++
784	add	r8,r8,r2			@ h+=X[i]
785	str	r2,[sp,#11*4]
786	eor	r2,r6,r7
787	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
788	and	r2,r2,r5
789	add	r8,r8,r3			@ h+=K256[i]
790	eor	r2,r2,r7			@ Ch(e,f,g)
791	eor	r0,r9,r9,ror#11
792	add	r8,r8,r2			@ h+=Ch(e,f,g)
793#if 11==31
794	and	r3,r3,#0xff
795	cmp	r3,#0xf2			@ done?
796#endif
797#if 11<15
798# if __ARM_ARCH__>=7
799	ldr	r2,[r1],#4			@ prefetch
800# else
801	ldrb	r2,[r1,#3]
802# endif
803	eor	r3,r9,r10			@ a^b, b^c in next round
804#else
805	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
806	eor	r3,r9,r10			@ a^b, b^c in next round
807	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
808#endif
809	eor	r0,r0,r9,ror#20	@ Sigma0(a)
810	and	r12,r12,r3			@ (b^c)&=(a^b)
811	add	r4,r4,r8			@ d+=h
812	eor	r12,r12,r10			@ Maj(a,b,c)
813	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
814	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
815#if __ARM_ARCH__>=7
816	@ ldr	r2,[r1],#4			@ 12
817# if 12==15
818	str	r1,[sp,#17*4]			@ make room for r1
819# endif
820	eor	r0,r4,r4,ror#5
821	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
822	eor	r0,r0,r4,ror#19	@ Sigma1(e)
823# ifndef __ARMEB__
824	rev	r2,r2
825# endif
826#else
827	@ ldrb	r2,[r1,#3]			@ 12
828	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
829	ldrb	r12,[r1,#2]
830	ldrb	r0,[r1,#1]
831	orr	r2,r2,r12,lsl#8
832	ldrb	r12,[r1],#4
833	orr	r2,r2,r0,lsl#16
834# if 12==15
835	str	r1,[sp,#17*4]			@ make room for r1
836# endif
837	eor	r0,r4,r4,ror#5
838	orr	r2,r2,r12,lsl#24
839	eor	r0,r0,r4,ror#19	@ Sigma1(e)
840#endif
841	ldr	r12,[r14],#4			@ *K256++
842	add	r7,r7,r2			@ h+=X[i]
843	str	r2,[sp,#12*4]
844	eor	r2,r5,r6
845	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
846	and	r2,r2,r4
847	add	r7,r7,r12			@ h+=K256[i]
848	eor	r2,r2,r6			@ Ch(e,f,g)
849	eor	r0,r8,r8,ror#11
850	add	r7,r7,r2			@ h+=Ch(e,f,g)
851#if 12==31
852	and	r12,r12,#0xff
853	cmp	r12,#0xf2			@ done?
854#endif
855#if 12<15
856# if __ARM_ARCH__>=7
857	ldr	r2,[r1],#4			@ prefetch
858# else
859	ldrb	r2,[r1,#3]
860# endif
861	eor	r12,r8,r9			@ a^b, b^c in next round
862#else
863	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
864	eor	r12,r8,r9			@ a^b, b^c in next round
865	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
866#endif
867	eor	r0,r0,r8,ror#20	@ Sigma0(a)
868	and	r3,r3,r12			@ (b^c)&=(a^b)
869	add	r11,r11,r7			@ d+=h
870	eor	r3,r3,r9			@ Maj(a,b,c)
871	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
872	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
873#if __ARM_ARCH__>=7
874	@ ldr	r2,[r1],#4			@ 13
875# if 13==15
876	str	r1,[sp,#17*4]			@ make room for r1
877# endif
878	eor	r0,r11,r11,ror#5
879	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
880	eor	r0,r0,r11,ror#19	@ Sigma1(e)
881# ifndef __ARMEB__
882	rev	r2,r2
883# endif
884#else
885	@ ldrb	r2,[r1,#3]			@ 13
886	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
887	ldrb	r3,[r1,#2]
888	ldrb	r0,[r1,#1]
889	orr	r2,r2,r3,lsl#8
890	ldrb	r3,[r1],#4
891	orr	r2,r2,r0,lsl#16
892# if 13==15
893	str	r1,[sp,#17*4]			@ make room for r1
894# endif
895	eor	r0,r11,r11,ror#5
896	orr	r2,r2,r3,lsl#24
897	eor	r0,r0,r11,ror#19	@ Sigma1(e)
898#endif
899	ldr	r3,[r14],#4			@ *K256++
900	add	r6,r6,r2			@ h+=X[i]
901	str	r2,[sp,#13*4]
902	eor	r2,r4,r5
903	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
904	and	r2,r2,r11
905	add	r6,r6,r3			@ h+=K256[i]
906	eor	r2,r2,r5			@ Ch(e,f,g)
907	eor	r0,r7,r7,ror#11
908	add	r6,r6,r2			@ h+=Ch(e,f,g)
909#if 13==31
910	and	r3,r3,#0xff
911	cmp	r3,#0xf2			@ done?
912#endif
913#if 13<15
914# if __ARM_ARCH__>=7
915	ldr	r2,[r1],#4			@ prefetch
916# else
917	ldrb	r2,[r1,#3]
918# endif
919	eor	r3,r7,r8			@ a^b, b^c in next round
920#else
921	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
922	eor	r3,r7,r8			@ a^b, b^c in next round
923	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
924#endif
925	eor	r0,r0,r7,ror#20	@ Sigma0(a)
926	and	r12,r12,r3			@ (b^c)&=(a^b)
927	add	r10,r10,r6			@ d+=h
928	eor	r12,r12,r8			@ Maj(a,b,c)
929	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
930	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
931#if __ARM_ARCH__>=7
932	@ ldr	r2,[r1],#4			@ 14
933# if 14==15
934	str	r1,[sp,#17*4]			@ make room for r1
935# endif
936	eor	r0,r10,r10,ror#5
937	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
938	eor	r0,r0,r10,ror#19	@ Sigma1(e)
939# ifndef __ARMEB__
940	rev	r2,r2
941# endif
942#else
943	@ ldrb	r2,[r1,#3]			@ 14
944	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
945	ldrb	r12,[r1,#2]
946	ldrb	r0,[r1,#1]
947	orr	r2,r2,r12,lsl#8
948	ldrb	r12,[r1],#4
949	orr	r2,r2,r0,lsl#16
950# if 14==15
951	str	r1,[sp,#17*4]			@ make room for r1
952# endif
953	eor	r0,r10,r10,ror#5
954	orr	r2,r2,r12,lsl#24
955	eor	r0,r0,r10,ror#19	@ Sigma1(e)
956#endif
957	ldr	r12,[r14],#4			@ *K256++
958	add	r5,r5,r2			@ h+=X[i]
959	str	r2,[sp,#14*4]
960	eor	r2,r11,r4
961	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
962	and	r2,r2,r10
963	add	r5,r5,r12			@ h+=K256[i]
964	eor	r2,r2,r4			@ Ch(e,f,g)
965	eor	r0,r6,r6,ror#11
966	add	r5,r5,r2			@ h+=Ch(e,f,g)
967#if 14==31
968	and	r12,r12,#0xff
969	cmp	r12,#0xf2			@ done?
970#endif
971#if 14<15
972# if __ARM_ARCH__>=7
973	ldr	r2,[r1],#4			@ prefetch
974# else
975	ldrb	r2,[r1,#3]
976# endif
977	eor	r12,r6,r7			@ a^b, b^c in next round
978#else
979	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
980	eor	r12,r6,r7			@ a^b, b^c in next round
981	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
982#endif
983	eor	r0,r0,r6,ror#20	@ Sigma0(a)
984	and	r3,r3,r12			@ (b^c)&=(a^b)
985	add	r9,r9,r5			@ d+=h
986	eor	r3,r3,r7			@ Maj(a,b,c)
987	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
988	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
989#if __ARM_ARCH__>=7
990	@ ldr	r2,[r1],#4			@ 15
991# if 15==15
992	str	r1,[sp,#17*4]			@ make room for r1
993# endif
994	eor	r0,r9,r9,ror#5
995	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
996	eor	r0,r0,r9,ror#19	@ Sigma1(e)
997# ifndef __ARMEB__
998	rev	r2,r2
999# endif
1000#else
1001	@ ldrb	r2,[r1,#3]			@ 15
1002	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1003	ldrb	r3,[r1,#2]
1004	ldrb	r0,[r1,#1]
1005	orr	r2,r2,r3,lsl#8
1006	ldrb	r3,[r1],#4
1007	orr	r2,r2,r0,lsl#16
1008# if 15==15
1009	str	r1,[sp,#17*4]			@ make room for r1
1010# endif
1011	eor	r0,r9,r9,ror#5
1012	orr	r2,r2,r3,lsl#24
1013	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1014#endif
1015	ldr	r3,[r14],#4			@ *K256++
1016	add	r4,r4,r2			@ h+=X[i]
1017	str	r2,[sp,#15*4]
1018	eor	r2,r10,r11
1019	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1020	and	r2,r2,r9
1021	add	r4,r4,r3			@ h+=K256[i]
1022	eor	r2,r2,r11			@ Ch(e,f,g)
1023	eor	r0,r5,r5,ror#11
1024	add	r4,r4,r2			@ h+=Ch(e,f,g)
1025#if 15==31
1026	and	r3,r3,#0xff
1027	cmp	r3,#0xf2			@ done?
1028#endif
1029#if 15<15
1030# if __ARM_ARCH__>=7
1031	ldr	r2,[r1],#4			@ prefetch
1032# else
1033	ldrb	r2,[r1,#3]
1034# endif
1035	eor	r3,r5,r6			@ a^b, b^c in next round
1036#else
1037	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1038	eor	r3,r5,r6			@ a^b, b^c in next round
1039	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1040#endif
1041	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1042	and	r12,r12,r3			@ (b^c)&=(a^b)
1043	add	r8,r8,r4			@ d+=h
1044	eor	r12,r12,r6			@ Maj(a,b,c)
1045	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1046	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1047.Lrounds_16_xx:
1048	@ ldr	r2,[sp,#1*4]		@ 16
1049	@ ldr	r1,[sp,#14*4]
1050	mov	r0,r2,ror#7
1051	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1052	mov	r12,r1,ror#17
1053	eor	r0,r0,r2,ror#18
1054	eor	r12,r12,r1,ror#19
1055	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1056	ldr	r2,[sp,#0*4]
1057	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1058	ldr	r1,[sp,#9*4]
1059
1060	add	r12,r12,r0
1061	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1062	add	r2,r2,r12
1063	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1064	add	r2,r2,r1			@ X[i]
1065	ldr	r12,[r14],#4			@ *K256++
1066	add	r11,r11,r2			@ h+=X[i]
1067	str	r2,[sp,#0*4]
1068	eor	r2,r9,r10
1069	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1070	and	r2,r2,r8
1071	add	r11,r11,r12			@ h+=K256[i]
1072	eor	r2,r2,r10			@ Ch(e,f,g)
1073	eor	r0,r4,r4,ror#11
1074	add	r11,r11,r2			@ h+=Ch(e,f,g)
1075#if 16==31
1076	and	r12,r12,#0xff
1077	cmp	r12,#0xf2			@ done?
1078#endif
1079#if 16<15
1080# if __ARM_ARCH__>=7
1081	ldr	r2,[r1],#4			@ prefetch
1082# else
1083	ldrb	r2,[r1,#3]
1084# endif
1085	eor	r12,r4,r5			@ a^b, b^c in next round
1086#else
1087	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1088	eor	r12,r4,r5			@ a^b, b^c in next round
1089	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1090#endif
1091	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1092	and	r3,r3,r12			@ (b^c)&=(a^b)
1093	add	r7,r7,r11			@ d+=h
1094	eor	r3,r3,r5			@ Maj(a,b,c)
1095	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1096	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1097	@ ldr	r2,[sp,#2*4]		@ 17
1098	@ ldr	r1,[sp,#15*4]
1099	mov	r0,r2,ror#7
1100	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1101	mov	r3,r1,ror#17
1102	eor	r0,r0,r2,ror#18
1103	eor	r3,r3,r1,ror#19
1104	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1105	ldr	r2,[sp,#1*4]
1106	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1107	ldr	r1,[sp,#10*4]
1108
1109	add	r3,r3,r0
1110	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1111	add	r2,r2,r3
1112	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1113	add	r2,r2,r1			@ X[i]
1114	ldr	r3,[r14],#4			@ *K256++
1115	add	r10,r10,r2			@ h+=X[i]
1116	str	r2,[sp,#1*4]
1117	eor	r2,r8,r9
1118	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1119	and	r2,r2,r7
1120	add	r10,r10,r3			@ h+=K256[i]
1121	eor	r2,r2,r9			@ Ch(e,f,g)
1122	eor	r0,r11,r11,ror#11
1123	add	r10,r10,r2			@ h+=Ch(e,f,g)
1124#if 17==31
1125	and	r3,r3,#0xff
1126	cmp	r3,#0xf2			@ done?
1127#endif
1128#if 17<15
1129# if __ARM_ARCH__>=7
1130	ldr	r2,[r1],#4			@ prefetch
1131# else
1132	ldrb	r2,[r1,#3]
1133# endif
1134	eor	r3,r11,r4			@ a^b, b^c in next round
1135#else
1136	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1137	eor	r3,r11,r4			@ a^b, b^c in next round
1138	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1139#endif
1140	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1141	and	r12,r12,r3			@ (b^c)&=(a^b)
1142	add	r6,r6,r10			@ d+=h
1143	eor	r12,r12,r4			@ Maj(a,b,c)
1144	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1145	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1146	@ ldr	r2,[sp,#3*4]		@ 18
1147	@ ldr	r1,[sp,#0*4]
1148	mov	r0,r2,ror#7
1149	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1150	mov	r12,r1,ror#17
1151	eor	r0,r0,r2,ror#18
1152	eor	r12,r12,r1,ror#19
1153	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1154	ldr	r2,[sp,#2*4]
1155	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1156	ldr	r1,[sp,#11*4]
1157
1158	add	r12,r12,r0
1159	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1160	add	r2,r2,r12
1161	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1162	add	r2,r2,r1			@ X[i]
1163	ldr	r12,[r14],#4			@ *K256++
1164	add	r9,r9,r2			@ h+=X[i]
1165	str	r2,[sp,#2*4]
1166	eor	r2,r7,r8
1167	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1168	and	r2,r2,r6
1169	add	r9,r9,r12			@ h+=K256[i]
1170	eor	r2,r2,r8			@ Ch(e,f,g)
1171	eor	r0,r10,r10,ror#11
1172	add	r9,r9,r2			@ h+=Ch(e,f,g)
1173#if 18==31
1174	and	r12,r12,#0xff
1175	cmp	r12,#0xf2			@ done?
1176#endif
1177#if 18<15
1178# if __ARM_ARCH__>=7
1179	ldr	r2,[r1],#4			@ prefetch
1180# else
1181	ldrb	r2,[r1,#3]
1182# endif
1183	eor	r12,r10,r11			@ a^b, b^c in next round
1184#else
1185	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1186	eor	r12,r10,r11			@ a^b, b^c in next round
1187	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1188#endif
1189	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1190	and	r3,r3,r12			@ (b^c)&=(a^b)
1191	add	r5,r5,r9			@ d+=h
1192	eor	r3,r3,r11			@ Maj(a,b,c)
1193	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1194	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1195	@ ldr	r2,[sp,#4*4]		@ 19
1196	@ ldr	r1,[sp,#1*4]
1197	mov	r0,r2,ror#7
1198	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1199	mov	r3,r1,ror#17
1200	eor	r0,r0,r2,ror#18
1201	eor	r3,r3,r1,ror#19
1202	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1203	ldr	r2,[sp,#3*4]
1204	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1205	ldr	r1,[sp,#12*4]
1206
1207	add	r3,r3,r0
1208	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1209	add	r2,r2,r3
1210	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1211	add	r2,r2,r1			@ X[i]
1212	ldr	r3,[r14],#4			@ *K256++
1213	add	r8,r8,r2			@ h+=X[i]
1214	str	r2,[sp,#3*4]
1215	eor	r2,r6,r7
1216	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1217	and	r2,r2,r5
1218	add	r8,r8,r3			@ h+=K256[i]
1219	eor	r2,r2,r7			@ Ch(e,f,g)
1220	eor	r0,r9,r9,ror#11
1221	add	r8,r8,r2			@ h+=Ch(e,f,g)
1222#if 19==31
1223	and	r3,r3,#0xff
1224	cmp	r3,#0xf2			@ done?
1225#endif
1226#if 19<15
1227# if __ARM_ARCH__>=7
1228	ldr	r2,[r1],#4			@ prefetch
1229# else
1230	ldrb	r2,[r1,#3]
1231# endif
1232	eor	r3,r9,r10			@ a^b, b^c in next round
1233#else
1234	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1235	eor	r3,r9,r10			@ a^b, b^c in next round
1236	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1237#endif
1238	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1239	and	r12,r12,r3			@ (b^c)&=(a^b)
1240	add	r4,r4,r8			@ d+=h
1241	eor	r12,r12,r10			@ Maj(a,b,c)
1242	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1243	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1244	@ ldr	r2,[sp,#5*4]		@ 20
1245	@ ldr	r1,[sp,#2*4]
1246	mov	r0,r2,ror#7
1247	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1248	mov	r12,r1,ror#17
1249	eor	r0,r0,r2,ror#18
1250	eor	r12,r12,r1,ror#19
1251	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1252	ldr	r2,[sp,#4*4]
1253	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1254	ldr	r1,[sp,#13*4]
1255
1256	add	r12,r12,r0
1257	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1258	add	r2,r2,r12
1259	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1260	add	r2,r2,r1			@ X[i]
1261	ldr	r12,[r14],#4			@ *K256++
1262	add	r7,r7,r2			@ h+=X[i]
1263	str	r2,[sp,#4*4]
1264	eor	r2,r5,r6
1265	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1266	and	r2,r2,r4
1267	add	r7,r7,r12			@ h+=K256[i]
1268	eor	r2,r2,r6			@ Ch(e,f,g)
1269	eor	r0,r8,r8,ror#11
1270	add	r7,r7,r2			@ h+=Ch(e,f,g)
1271#if 20==31
1272	and	r12,r12,#0xff
1273	cmp	r12,#0xf2			@ done?
1274#endif
1275#if 20<15
1276# if __ARM_ARCH__>=7
1277	ldr	r2,[r1],#4			@ prefetch
1278# else
1279	ldrb	r2,[r1,#3]
1280# endif
1281	eor	r12,r8,r9			@ a^b, b^c in next round
1282#else
1283	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1284	eor	r12,r8,r9			@ a^b, b^c in next round
1285	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1286#endif
1287	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1288	and	r3,r3,r12			@ (b^c)&=(a^b)
1289	add	r11,r11,r7			@ d+=h
1290	eor	r3,r3,r9			@ Maj(a,b,c)
1291	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1292	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1293	@ ldr	r2,[sp,#6*4]		@ 21
1294	@ ldr	r1,[sp,#3*4]
1295	mov	r0,r2,ror#7
1296	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1297	mov	r3,r1,ror#17
1298	eor	r0,r0,r2,ror#18
1299	eor	r3,r3,r1,ror#19
1300	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1301	ldr	r2,[sp,#5*4]
1302	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1303	ldr	r1,[sp,#14*4]
1304
1305	add	r3,r3,r0
1306	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1307	add	r2,r2,r3
1308	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1309	add	r2,r2,r1			@ X[i]
1310	ldr	r3,[r14],#4			@ *K256++
1311	add	r6,r6,r2			@ h+=X[i]
1312	str	r2,[sp,#5*4]
1313	eor	r2,r4,r5
1314	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1315	and	r2,r2,r11
1316	add	r6,r6,r3			@ h+=K256[i]
1317	eor	r2,r2,r5			@ Ch(e,f,g)
1318	eor	r0,r7,r7,ror#11
1319	add	r6,r6,r2			@ h+=Ch(e,f,g)
1320#if 21==31
1321	and	r3,r3,#0xff
1322	cmp	r3,#0xf2			@ done?
1323#endif
1324#if 21<15
1325# if __ARM_ARCH__>=7
1326	ldr	r2,[r1],#4			@ prefetch
1327# else
1328	ldrb	r2,[r1,#3]
1329# endif
1330	eor	r3,r7,r8			@ a^b, b^c in next round
1331#else
1332	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1333	eor	r3,r7,r8			@ a^b, b^c in next round
1334	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1335#endif
1336	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1337	and	r12,r12,r3			@ (b^c)&=(a^b)
1338	add	r10,r10,r6			@ d+=h
1339	eor	r12,r12,r8			@ Maj(a,b,c)
1340	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1341	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1342	@ ldr	r2,[sp,#7*4]		@ 22
1343	@ ldr	r1,[sp,#4*4]
1344	mov	r0,r2,ror#7
1345	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1346	mov	r12,r1,ror#17
1347	eor	r0,r0,r2,ror#18
1348	eor	r12,r12,r1,ror#19
1349	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1350	ldr	r2,[sp,#6*4]
1351	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1352	ldr	r1,[sp,#15*4]
1353
1354	add	r12,r12,r0
1355	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1356	add	r2,r2,r12
1357	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1358	add	r2,r2,r1			@ X[i]
1359	ldr	r12,[r14],#4			@ *K256++
1360	add	r5,r5,r2			@ h+=X[i]
1361	str	r2,[sp,#6*4]
1362	eor	r2,r11,r4
1363	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1364	and	r2,r2,r10
1365	add	r5,r5,r12			@ h+=K256[i]
1366	eor	r2,r2,r4			@ Ch(e,f,g)
1367	eor	r0,r6,r6,ror#11
1368	add	r5,r5,r2			@ h+=Ch(e,f,g)
1369#if 22==31
1370	and	r12,r12,#0xff
1371	cmp	r12,#0xf2			@ done?
1372#endif
1373#if 22<15
1374# if __ARM_ARCH__>=7
1375	ldr	r2,[r1],#4			@ prefetch
1376# else
1377	ldrb	r2,[r1,#3]
1378# endif
1379	eor	r12,r6,r7			@ a^b, b^c in next round
1380#else
1381	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1382	eor	r12,r6,r7			@ a^b, b^c in next round
1383	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1384#endif
1385	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1386	and	r3,r3,r12			@ (b^c)&=(a^b)
1387	add	r9,r9,r5			@ d+=h
1388	eor	r3,r3,r7			@ Maj(a,b,c)
1389	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1390	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1391	@ ldr	r2,[sp,#8*4]		@ 23
1392	@ ldr	r1,[sp,#5*4]
1393	mov	r0,r2,ror#7
1394	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1395	mov	r3,r1,ror#17
1396	eor	r0,r0,r2,ror#18
1397	eor	r3,r3,r1,ror#19
1398	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1399	ldr	r2,[sp,#7*4]
1400	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1401	ldr	r1,[sp,#0*4]
1402
1403	add	r3,r3,r0
1404	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1405	add	r2,r2,r3
1406	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1407	add	r2,r2,r1			@ X[i]
1408	ldr	r3,[r14],#4			@ *K256++
1409	add	r4,r4,r2			@ h+=X[i]
1410	str	r2,[sp,#7*4]
1411	eor	r2,r10,r11
1412	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1413	and	r2,r2,r9
1414	add	r4,r4,r3			@ h+=K256[i]
1415	eor	r2,r2,r11			@ Ch(e,f,g)
1416	eor	r0,r5,r5,ror#11
1417	add	r4,r4,r2			@ h+=Ch(e,f,g)
1418#if 23==31
1419	and	r3,r3,#0xff
1420	cmp	r3,#0xf2			@ done?
1421#endif
1422#if 23<15
1423# if __ARM_ARCH__>=7
1424	ldr	r2,[r1],#4			@ prefetch
1425# else
1426	ldrb	r2,[r1,#3]
1427# endif
1428	eor	r3,r5,r6			@ a^b, b^c in next round
1429#else
1430	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1431	eor	r3,r5,r6			@ a^b, b^c in next round
1432	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1433#endif
1434	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1435	and	r12,r12,r3			@ (b^c)&=(a^b)
1436	add	r8,r8,r4			@ d+=h
1437	eor	r12,r12,r6			@ Maj(a,b,c)
1438	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1439	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1440	@ ldr	r2,[sp,#9*4]		@ 24
1441	@ ldr	r1,[sp,#6*4]
1442	mov	r0,r2,ror#7
1443	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1444	mov	r12,r1,ror#17
1445	eor	r0,r0,r2,ror#18
1446	eor	r12,r12,r1,ror#19
1447	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1448	ldr	r2,[sp,#8*4]
1449	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1450	ldr	r1,[sp,#1*4]
1451
1452	add	r12,r12,r0
1453	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1454	add	r2,r2,r12
1455	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1456	add	r2,r2,r1			@ X[i]
1457	ldr	r12,[r14],#4			@ *K256++
1458	add	r11,r11,r2			@ h+=X[i]
1459	str	r2,[sp,#8*4]
1460	eor	r2,r9,r10
1461	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1462	and	r2,r2,r8
1463	add	r11,r11,r12			@ h+=K256[i]
1464	eor	r2,r2,r10			@ Ch(e,f,g)
1465	eor	r0,r4,r4,ror#11
1466	add	r11,r11,r2			@ h+=Ch(e,f,g)
1467#if 24==31
1468	and	r12,r12,#0xff
1469	cmp	r12,#0xf2			@ done?
1470#endif
1471#if 24<15
1472# if __ARM_ARCH__>=7
1473	ldr	r2,[r1],#4			@ prefetch
1474# else
1475	ldrb	r2,[r1,#3]
1476# endif
1477	eor	r12,r4,r5			@ a^b, b^c in next round
1478#else
1479	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1480	eor	r12,r4,r5			@ a^b, b^c in next round
1481	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1482#endif
1483	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1484	and	r3,r3,r12			@ (b^c)&=(a^b)
1485	add	r7,r7,r11			@ d+=h
1486	eor	r3,r3,r5			@ Maj(a,b,c)
1487	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1488	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1489	@ ldr	r2,[sp,#10*4]		@ 25
1490	@ ldr	r1,[sp,#7*4]
1491	mov	r0,r2,ror#7
1492	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1493	mov	r3,r1,ror#17
1494	eor	r0,r0,r2,ror#18
1495	eor	r3,r3,r1,ror#19
1496	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1497	ldr	r2,[sp,#9*4]
1498	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1499	ldr	r1,[sp,#2*4]
1500
1501	add	r3,r3,r0
1502	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1503	add	r2,r2,r3
1504	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1505	add	r2,r2,r1			@ X[i]
1506	ldr	r3,[r14],#4			@ *K256++
1507	add	r10,r10,r2			@ h+=X[i]
1508	str	r2,[sp,#9*4]
1509	eor	r2,r8,r9
1510	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1511	and	r2,r2,r7
1512	add	r10,r10,r3			@ h+=K256[i]
1513	eor	r2,r2,r9			@ Ch(e,f,g)
1514	eor	r0,r11,r11,ror#11
1515	add	r10,r10,r2			@ h+=Ch(e,f,g)
1516#if 25==31
1517	and	r3,r3,#0xff
1518	cmp	r3,#0xf2			@ done?
1519#endif
1520#if 25<15
1521# if __ARM_ARCH__>=7
1522	ldr	r2,[r1],#4			@ prefetch
1523# else
1524	ldrb	r2,[r1,#3]
1525# endif
1526	eor	r3,r11,r4			@ a^b, b^c in next round
1527#else
1528	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1529	eor	r3,r11,r4			@ a^b, b^c in next round
1530	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1531#endif
1532	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1533	and	r12,r12,r3			@ (b^c)&=(a^b)
1534	add	r6,r6,r10			@ d+=h
1535	eor	r12,r12,r4			@ Maj(a,b,c)
1536	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1537	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1538	@ ldr	r2,[sp,#11*4]		@ 26
1539	@ ldr	r1,[sp,#8*4]
1540	mov	r0,r2,ror#7
1541	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1542	mov	r12,r1,ror#17
1543	eor	r0,r0,r2,ror#18
1544	eor	r12,r12,r1,ror#19
1545	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1546	ldr	r2,[sp,#10*4]
1547	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1548	ldr	r1,[sp,#3*4]
1549
1550	add	r12,r12,r0
1551	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1552	add	r2,r2,r12
1553	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1554	add	r2,r2,r1			@ X[i]
1555	ldr	r12,[r14],#4			@ *K256++
1556	add	r9,r9,r2			@ h+=X[i]
1557	str	r2,[sp,#10*4]
1558	eor	r2,r7,r8
1559	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1560	and	r2,r2,r6
1561	add	r9,r9,r12			@ h+=K256[i]
1562	eor	r2,r2,r8			@ Ch(e,f,g)
1563	eor	r0,r10,r10,ror#11
1564	add	r9,r9,r2			@ h+=Ch(e,f,g)
1565#if 26==31
1566	and	r12,r12,#0xff
1567	cmp	r12,#0xf2			@ done?
1568#endif
1569#if 26<15
1570# if __ARM_ARCH__>=7
1571	ldr	r2,[r1],#4			@ prefetch
1572# else
1573	ldrb	r2,[r1,#3]
1574# endif
1575	eor	r12,r10,r11			@ a^b, b^c in next round
1576#else
1577	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1578	eor	r12,r10,r11			@ a^b, b^c in next round
1579	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1580#endif
1581	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1582	and	r3,r3,r12			@ (b^c)&=(a^b)
1583	add	r5,r5,r9			@ d+=h
1584	eor	r3,r3,r11			@ Maj(a,b,c)
1585	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1586	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1587	@ ldr	r2,[sp,#12*4]		@ 27
1588	@ ldr	r1,[sp,#9*4]
1589	mov	r0,r2,ror#7
1590	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1591	mov	r3,r1,ror#17
1592	eor	r0,r0,r2,ror#18
1593	eor	r3,r3,r1,ror#19
1594	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1595	ldr	r2,[sp,#11*4]
1596	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1597	ldr	r1,[sp,#4*4]
1598
1599	add	r3,r3,r0
1600	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1601	add	r2,r2,r3
1602	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1603	add	r2,r2,r1			@ X[i]
1604	ldr	r3,[r14],#4			@ *K256++
1605	add	r8,r8,r2			@ h+=X[i]
1606	str	r2,[sp,#11*4]
1607	eor	r2,r6,r7
1608	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1609	and	r2,r2,r5
1610	add	r8,r8,r3			@ h+=K256[i]
1611	eor	r2,r2,r7			@ Ch(e,f,g)
1612	eor	r0,r9,r9,ror#11
1613	add	r8,r8,r2			@ h+=Ch(e,f,g)
1614#if 27==31
1615	and	r3,r3,#0xff
1616	cmp	r3,#0xf2			@ done?
1617#endif
1618#if 27<15
1619# if __ARM_ARCH__>=7
1620	ldr	r2,[r1],#4			@ prefetch
1621# else
1622	ldrb	r2,[r1,#3]
1623# endif
1624	eor	r3,r9,r10			@ a^b, b^c in next round
1625#else
1626	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1627	eor	r3,r9,r10			@ a^b, b^c in next round
1628	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1629#endif
1630	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1631	and	r12,r12,r3			@ (b^c)&=(a^b)
1632	add	r4,r4,r8			@ d+=h
1633	eor	r12,r12,r10			@ Maj(a,b,c)
1634	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1635	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1636	@ ldr	r2,[sp,#13*4]		@ 28
1637	@ ldr	r1,[sp,#10*4]
1638	mov	r0,r2,ror#7
1639	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1640	mov	r12,r1,ror#17
1641	eor	r0,r0,r2,ror#18
1642	eor	r12,r12,r1,ror#19
1643	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1644	ldr	r2,[sp,#12*4]
1645	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1646	ldr	r1,[sp,#5*4]
1647
1648	add	r12,r12,r0
1649	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1650	add	r2,r2,r12
1651	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1652	add	r2,r2,r1			@ X[i]
1653	ldr	r12,[r14],#4			@ *K256++
1654	add	r7,r7,r2			@ h+=X[i]
1655	str	r2,[sp,#12*4]
1656	eor	r2,r5,r6
1657	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1658	and	r2,r2,r4
1659	add	r7,r7,r12			@ h+=K256[i]
1660	eor	r2,r2,r6			@ Ch(e,f,g)
1661	eor	r0,r8,r8,ror#11
1662	add	r7,r7,r2			@ h+=Ch(e,f,g)
1663#if 28==31
1664	and	r12,r12,#0xff
1665	cmp	r12,#0xf2			@ done?
1666#endif
1667#if 28<15
1668# if __ARM_ARCH__>=7
1669	ldr	r2,[r1],#4			@ prefetch
1670# else
1671	ldrb	r2,[r1,#3]
1672# endif
1673	eor	r12,r8,r9			@ a^b, b^c in next round
1674#else
1675	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1676	eor	r12,r8,r9			@ a^b, b^c in next round
1677	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1678#endif
1679	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1680	and	r3,r3,r12			@ (b^c)&=(a^b)
1681	add	r11,r11,r7			@ d+=h
1682	eor	r3,r3,r9			@ Maj(a,b,c)
1683	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1684	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1685	@ ldr	r2,[sp,#14*4]		@ 29
1686	@ ldr	r1,[sp,#11*4]
1687	mov	r0,r2,ror#7
1688	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1689	mov	r3,r1,ror#17
1690	eor	r0,r0,r2,ror#18
1691	eor	r3,r3,r1,ror#19
1692	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1693	ldr	r2,[sp,#13*4]
1694	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1695	ldr	r1,[sp,#6*4]
1696
1697	add	r3,r3,r0
1698	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1699	add	r2,r2,r3
1700	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1701	add	r2,r2,r1			@ X[i]
1702	ldr	r3,[r14],#4			@ *K256++
1703	add	r6,r6,r2			@ h+=X[i]
1704	str	r2,[sp,#13*4]
1705	eor	r2,r4,r5
1706	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1707	and	r2,r2,r11
1708	add	r6,r6,r3			@ h+=K256[i]
1709	eor	r2,r2,r5			@ Ch(e,f,g)
1710	eor	r0,r7,r7,ror#11
1711	add	r6,r6,r2			@ h+=Ch(e,f,g)
1712#if 29==31
1713	and	r3,r3,#0xff
1714	cmp	r3,#0xf2			@ done?
1715#endif
1716#if 29<15
1717# if __ARM_ARCH__>=7
1718	ldr	r2,[r1],#4			@ prefetch
1719# else
1720	ldrb	r2,[r1,#3]
1721# endif
1722	eor	r3,r7,r8			@ a^b, b^c in next round
1723#else
1724	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1725	eor	r3,r7,r8			@ a^b, b^c in next round
1726	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1727#endif
1728	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1729	and	r12,r12,r3			@ (b^c)&=(a^b)
1730	add	r10,r10,r6			@ d+=h
1731	eor	r12,r12,r8			@ Maj(a,b,c)
1732	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1733	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1734	@ ldr	r2,[sp,#15*4]		@ 30
1735	@ ldr	r1,[sp,#12*4]
1736	mov	r0,r2,ror#7
1737	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1738	mov	r12,r1,ror#17
1739	eor	r0,r0,r2,ror#18
1740	eor	r12,r12,r1,ror#19
1741	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1742	ldr	r2,[sp,#14*4]
1743	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1744	ldr	r1,[sp,#7*4]
1745
1746	add	r12,r12,r0
1747	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1748	add	r2,r2,r12
1749	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1750	add	r2,r2,r1			@ X[i]
1751	ldr	r12,[r14],#4			@ *K256++
1752	add	r5,r5,r2			@ h+=X[i]
1753	str	r2,[sp,#14*4]
1754	eor	r2,r11,r4
1755	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1756	and	r2,r2,r10
1757	add	r5,r5,r12			@ h+=K256[i]
1758	eor	r2,r2,r4			@ Ch(e,f,g)
1759	eor	r0,r6,r6,ror#11
1760	add	r5,r5,r2			@ h+=Ch(e,f,g)
1761#if 30==31
1762	and	r12,r12,#0xff
1763	cmp	r12,#0xf2			@ done?
1764#endif
1765#if 30<15
1766# if __ARM_ARCH__>=7
1767	ldr	r2,[r1],#4			@ prefetch
1768# else
1769	ldrb	r2,[r1,#3]
1770# endif
1771	eor	r12,r6,r7			@ a^b, b^c in next round
1772#else
1773	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1774	eor	r12,r6,r7			@ a^b, b^c in next round
1775	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1776#endif
1777	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1778	and	r3,r3,r12			@ (b^c)&=(a^b)
1779	add	r9,r9,r5			@ d+=h
1780	eor	r3,r3,r7			@ Maj(a,b,c)
1781	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1782	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1783	@ ldr	r2,[sp,#0*4]		@ 31
1784	@ ldr	r1,[sp,#13*4]
1785	mov	r0,r2,ror#7
1786	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1787	mov	r3,r1,ror#17
1788	eor	r0,r0,r2,ror#18
1789	eor	r3,r3,r1,ror#19
1790	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1791	ldr	r2,[sp,#15*4]
1792	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1793	ldr	r1,[sp,#8*4]
1794
1795	add	r3,r3,r0
1796	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1797	add	r2,r2,r3
1798	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1799	add	r2,r2,r1			@ X[i]
1800	ldr	r3,[r14],#4			@ *K256++
1801	add	r4,r4,r2			@ h+=X[i]
1802	str	r2,[sp,#15*4]
1803	eor	r2,r10,r11
1804	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1805	and	r2,r2,r9
1806	add	r4,r4,r3			@ h+=K256[i]
1807	eor	r2,r2,r11			@ Ch(e,f,g)
1808	eor	r0,r5,r5,ror#11
1809	add	r4,r4,r2			@ h+=Ch(e,f,g)
1810#if 31==31
1811	and	r3,r3,#0xff
1812	cmp	r3,#0xf2			@ done?
1813#endif
1814#if 31<15
1815# if __ARM_ARCH__>=7
1816	ldr	r2,[r1],#4			@ prefetch
1817# else
1818	ldrb	r2,[r1,#3]
1819# endif
1820	eor	r3,r5,r6			@ a^b, b^c in next round
1821#else
1822	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1823	eor	r3,r5,r6			@ a^b, b^c in next round
1824	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1825#endif
1826	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1827	and	r12,r12,r3			@ (b^c)&=(a^b)
1828	add	r8,r8,r4			@ d+=h
1829	eor	r12,r12,r6			@ Maj(a,b,c)
1830	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1831	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1832#if __ARM_ARCH__>=7
1833	ite	eq			@ Thumb2 thing, sanity check in ARM
1834#endif
1835	ldreq	r3,[sp,#16*4]		@ pull ctx
1836	bne	.Lrounds_16_xx
1837
1838	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1839	ldr	r0,[r3,#0]
1840	ldr	r2,[r3,#4]
1841	ldr	r12,[r3,#8]
1842	add	r4,r4,r0
1843	ldr	r0,[r3,#12]
1844	add	r5,r5,r2
1845	ldr	r2,[r3,#16]
1846	add	r6,r6,r12
1847	ldr	r12,[r3,#20]
1848	add	r7,r7,r0
1849	ldr	r0,[r3,#24]
1850	add	r8,r8,r2
1851	ldr	r2,[r3,#28]
1852	add	r9,r9,r12
1853	ldr	r1,[sp,#17*4]		@ pull inp
1854	ldr	r12,[sp,#18*4]		@ pull inp+len
1855	add	r10,r10,r0
1856	add	r11,r11,r2
1857	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1858	cmp	r1,r12
1859	sub	r14,r14,#256	@ rewind Ktbl
1860	bne	.Loop
1861
1862	add	sp,sp,#19*4	@ destroy frame
1863#if __ARM_ARCH__>=5
1864	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1865#else
1866	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1867	tst	lr,#1
1868	moveq	pc,lr			@ be binary compatible with V4, yet
1869.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1870#endif
1871.size	sha256_block_data_order,.-sha256_block_data_order
1872#if __ARM_MAX_ARCH__>=7
1873.arch	armv7-a
1874.fpu	neon
1875
1876.globl	sha256_block_data_order_neon
1877.type	sha256_block_data_order_neon,%function
1878.align	4
1879sha256_block_data_order_neon:
1880.LNEON:
1881	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1882
1883	sub	r11,sp,#16*4+16
1884	adrl	r14,K256
1885	bic	r11,r11,#15		@ align for 128-bit stores
1886	mov	r12,sp
1887	mov	sp,r11			@ alloca
1888	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1889
1890	vld1.8	{q0},[r1]!
1891	vld1.8	{q1},[r1]!
1892	vld1.8	{q2},[r1]!
1893	vld1.8	{q3},[r1]!
1894	vld1.32	{q8},[r14,:128]!
1895	vld1.32	{q9},[r14,:128]!
1896	vld1.32	{q10},[r14,:128]!
1897	vld1.32	{q11},[r14,:128]!
1898	vrev32.8	q0,q0		@ yes, even on
1899	str	r0,[sp,#64]
1900	vrev32.8	q1,q1		@ big-endian
1901	str	r1,[sp,#68]
1902	mov	r1,sp
1903	vrev32.8	q2,q2
1904	str	r2,[sp,#72]
1905	vrev32.8	q3,q3
1906	str	r12,[sp,#76]		@ save original sp
1907	vadd.i32	q8,q8,q0
1908	vadd.i32	q9,q9,q1
1909	vst1.32	{q8},[r1,:128]!
1910	vadd.i32	q10,q10,q2
1911	vst1.32	{q9},[r1,:128]!
1912	vadd.i32	q11,q11,q3
1913	vst1.32	{q10},[r1,:128]!
1914	vst1.32	{q11},[r1,:128]!
1915
1916	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1917	sub	r1,r1,#64
1918	ldr	r2,[sp,#0]
1919	eor	r12,r12,r12
1920	eor	r3,r5,r6
1921	b	.L_00_48
1922
1923.align	4
1924.L_00_48:
1925	vext.8	q8,q0,q1,#4
1926	add	r11,r11,r2
1927	eor	r2,r9,r10
1928	eor	r0,r8,r8,ror#5
1929	vext.8	q9,q2,q3,#4
1930	add	r4,r4,r12
1931	and	r2,r2,r8
1932	eor	r12,r0,r8,ror#19
1933	vshr.u32	q10,q8,#7
1934	eor	r0,r4,r4,ror#11
1935	eor	r2,r2,r10
1936	vadd.i32	q0,q0,q9
1937	add	r11,r11,r12,ror#6
1938	eor	r12,r4,r5
1939	vshr.u32	q9,q8,#3
1940	eor	r0,r0,r4,ror#20
1941	add	r11,r11,r2
1942	vsli.32	q10,q8,#25
1943	ldr	r2,[sp,#4]
1944	and	r3,r3,r12
1945	vshr.u32	q11,q8,#18
1946	add	r7,r7,r11
1947	add	r11,r11,r0,ror#2
1948	eor	r3,r3,r5
1949	veor	q9,q9,q10
1950	add	r10,r10,r2
1951	vsli.32	q11,q8,#14
1952	eor	r2,r8,r9
1953	eor	r0,r7,r7,ror#5
1954	vshr.u32	d24,d7,#17
1955	add	r11,r11,r3
1956	and	r2,r2,r7
1957	veor	q9,q9,q11
1958	eor	r3,r0,r7,ror#19
1959	eor	r0,r11,r11,ror#11
1960	vsli.32	d24,d7,#15
1961	eor	r2,r2,r9
1962	add	r10,r10,r3,ror#6
1963	vshr.u32	d25,d7,#10
1964	eor	r3,r11,r4
1965	eor	r0,r0,r11,ror#20
1966	vadd.i32	q0,q0,q9
1967	add	r10,r10,r2
1968	ldr	r2,[sp,#8]
1969	veor	d25,d25,d24
1970	and	r12,r12,r3
1971	add	r6,r6,r10
1972	vshr.u32	d24,d7,#19
1973	add	r10,r10,r0,ror#2
1974	eor	r12,r12,r4
1975	vsli.32	d24,d7,#13
1976	add	r9,r9,r2
1977	eor	r2,r7,r8
1978	veor	d25,d25,d24
1979	eor	r0,r6,r6,ror#5
1980	add	r10,r10,r12
1981	vadd.i32	d0,d0,d25
1982	and	r2,r2,r6
1983	eor	r12,r0,r6,ror#19
1984	vshr.u32	d24,d0,#17
1985	eor	r0,r10,r10,ror#11
1986	eor	r2,r2,r8
1987	vsli.32	d24,d0,#15
1988	add	r9,r9,r12,ror#6
1989	eor	r12,r10,r11
1990	vshr.u32	d25,d0,#10
1991	eor	r0,r0,r10,ror#20
1992	add	r9,r9,r2
1993	veor	d25,d25,d24
1994	ldr	r2,[sp,#12]
1995	and	r3,r3,r12
1996	vshr.u32	d24,d0,#19
1997	add	r5,r5,r9
1998	add	r9,r9,r0,ror#2
1999	eor	r3,r3,r11
2000	vld1.32	{q8},[r14,:128]!
2001	add	r8,r8,r2
2002	vsli.32	d24,d0,#13
2003	eor	r2,r6,r7
2004	eor	r0,r5,r5,ror#5
2005	veor	d25,d25,d24
2006	add	r9,r9,r3
2007	and	r2,r2,r5
2008	vadd.i32	d1,d1,d25
2009	eor	r3,r0,r5,ror#19
2010	eor	r0,r9,r9,ror#11
2011	vadd.i32	q8,q8,q0
2012	eor	r2,r2,r7
2013	add	r8,r8,r3,ror#6
2014	eor	r3,r9,r10
2015	eor	r0,r0,r9,ror#20
2016	add	r8,r8,r2
2017	ldr	r2,[sp,#16]
2018	and	r12,r12,r3
2019	add	r4,r4,r8
2020	vst1.32	{q8},[r1,:128]!
2021	add	r8,r8,r0,ror#2
2022	eor	r12,r12,r10
2023	vext.8	q8,q1,q2,#4
2024	add	r7,r7,r2
2025	eor	r2,r5,r6
2026	eor	r0,r4,r4,ror#5
2027	vext.8	q9,q3,q0,#4
2028	add	r8,r8,r12
2029	and	r2,r2,r4
2030	eor	r12,r0,r4,ror#19
2031	vshr.u32	q10,q8,#7
2032	eor	r0,r8,r8,ror#11
2033	eor	r2,r2,r6
2034	vadd.i32	q1,q1,q9
2035	add	r7,r7,r12,ror#6
2036	eor	r12,r8,r9
2037	vshr.u32	q9,q8,#3
2038	eor	r0,r0,r8,ror#20
2039	add	r7,r7,r2
2040	vsli.32	q10,q8,#25
2041	ldr	r2,[sp,#20]
2042	and	r3,r3,r12
2043	vshr.u32	q11,q8,#18
2044	add	r11,r11,r7
2045	add	r7,r7,r0,ror#2
2046	eor	r3,r3,r9
2047	veor	q9,q9,q10
2048	add	r6,r6,r2
2049	vsli.32	q11,q8,#14
2050	eor	r2,r4,r5
2051	eor	r0,r11,r11,ror#5
2052	vshr.u32	d24,d1,#17
2053	add	r7,r7,r3
2054	and	r2,r2,r11
2055	veor	q9,q9,q11
2056	eor	r3,r0,r11,ror#19
2057	eor	r0,r7,r7,ror#11
2058	vsli.32	d24,d1,#15
2059	eor	r2,r2,r5
2060	add	r6,r6,r3,ror#6
2061	vshr.u32	d25,d1,#10
2062	eor	r3,r7,r8
2063	eor	r0,r0,r7,ror#20
2064	vadd.i32	q1,q1,q9
2065	add	r6,r6,r2
2066	ldr	r2,[sp,#24]
2067	veor	d25,d25,d24
2068	and	r12,r12,r3
2069	add	r10,r10,r6
2070	vshr.u32	d24,d1,#19
2071	add	r6,r6,r0,ror#2
2072	eor	r12,r12,r8
2073	vsli.32	d24,d1,#13
2074	add	r5,r5,r2
2075	eor	r2,r11,r4
2076	veor	d25,d25,d24
2077	eor	r0,r10,r10,ror#5
2078	add	r6,r6,r12
2079	vadd.i32	d2,d2,d25
2080	and	r2,r2,r10
2081	eor	r12,r0,r10,ror#19
2082	vshr.u32	d24,d2,#17
2083	eor	r0,r6,r6,ror#11
2084	eor	r2,r2,r4
2085	vsli.32	d24,d2,#15
2086	add	r5,r5,r12,ror#6
2087	eor	r12,r6,r7
2088	vshr.u32	d25,d2,#10
2089	eor	r0,r0,r6,ror#20
2090	add	r5,r5,r2
2091	veor	d25,d25,d24
2092	ldr	r2,[sp,#28]
2093	and	r3,r3,r12
2094	vshr.u32	d24,d2,#19
2095	add	r9,r9,r5
2096	add	r5,r5,r0,ror#2
2097	eor	r3,r3,r7
2098	vld1.32	{q8},[r14,:128]!
2099	add	r4,r4,r2
2100	vsli.32	d24,d2,#13
2101	eor	r2,r10,r11
2102	eor	r0,r9,r9,ror#5
2103	veor	d25,d25,d24
2104	add	r5,r5,r3
2105	and	r2,r2,r9
2106	vadd.i32	d3,d3,d25
2107	eor	r3,r0,r9,ror#19
2108	eor	r0,r5,r5,ror#11
2109	vadd.i32	q8,q8,q1
2110	eor	r2,r2,r11
2111	add	r4,r4,r3,ror#6
2112	eor	r3,r5,r6
2113	eor	r0,r0,r5,ror#20
2114	add	r4,r4,r2
2115	ldr	r2,[sp,#32]
2116	and	r12,r12,r3
2117	add	r8,r8,r4
2118	vst1.32	{q8},[r1,:128]!
2119	add	r4,r4,r0,ror#2
2120	eor	r12,r12,r6
2121	vext.8	q8,q2,q3,#4
2122	add	r11,r11,r2
2123	eor	r2,r9,r10
2124	eor	r0,r8,r8,ror#5
2125	vext.8	q9,q0,q1,#4
2126	add	r4,r4,r12
2127	and	r2,r2,r8
2128	eor	r12,r0,r8,ror#19
2129	vshr.u32	q10,q8,#7
2130	eor	r0,r4,r4,ror#11
2131	eor	r2,r2,r10
2132	vadd.i32	q2,q2,q9
2133	add	r11,r11,r12,ror#6
2134	eor	r12,r4,r5
2135	vshr.u32	q9,q8,#3
2136	eor	r0,r0,r4,ror#20
2137	add	r11,r11,r2
2138	vsli.32	q10,q8,#25
2139	ldr	r2,[sp,#36]
2140	and	r3,r3,r12
2141	vshr.u32	q11,q8,#18
2142	add	r7,r7,r11
2143	add	r11,r11,r0,ror#2
2144	eor	r3,r3,r5
2145	veor	q9,q9,q10
2146	add	r10,r10,r2
2147	vsli.32	q11,q8,#14
2148	eor	r2,r8,r9
2149	eor	r0,r7,r7,ror#5
2150	vshr.u32	d24,d3,#17
2151	add	r11,r11,r3
2152	and	r2,r2,r7
2153	veor	q9,q9,q11
2154	eor	r3,r0,r7,ror#19
2155	eor	r0,r11,r11,ror#11
2156	vsli.32	d24,d3,#15
2157	eor	r2,r2,r9
2158	add	r10,r10,r3,ror#6
2159	vshr.u32	d25,d3,#10
2160	eor	r3,r11,r4
2161	eor	r0,r0,r11,ror#20
2162	vadd.i32	q2,q2,q9
2163	add	r10,r10,r2
2164	ldr	r2,[sp,#40]
2165	veor	d25,d25,d24
2166	and	r12,r12,r3
2167	add	r6,r6,r10
2168	vshr.u32	d24,d3,#19
2169	add	r10,r10,r0,ror#2
2170	eor	r12,r12,r4
2171	vsli.32	d24,d3,#13
2172	add	r9,r9,r2
2173	eor	r2,r7,r8
2174	veor	d25,d25,d24
2175	eor	r0,r6,r6,ror#5
2176	add	r10,r10,r12
2177	vadd.i32	d4,d4,d25
2178	and	r2,r2,r6
2179	eor	r12,r0,r6,ror#19
2180	vshr.u32	d24,d4,#17
2181	eor	r0,r10,r10,ror#11
2182	eor	r2,r2,r8
2183	vsli.32	d24,d4,#15
2184	add	r9,r9,r12,ror#6
2185	eor	r12,r10,r11
2186	vshr.u32	d25,d4,#10
2187	eor	r0,r0,r10,ror#20
2188	add	r9,r9,r2
2189	veor	d25,d25,d24
2190	ldr	r2,[sp,#44]
2191	and	r3,r3,r12
2192	vshr.u32	d24,d4,#19
2193	add	r5,r5,r9
2194	add	r9,r9,r0,ror#2
2195	eor	r3,r3,r11
2196	vld1.32	{q8},[r14,:128]!
2197	add	r8,r8,r2
2198	vsli.32	d24,d4,#13
2199	eor	r2,r6,r7
2200	eor	r0,r5,r5,ror#5
2201	veor	d25,d25,d24
2202	add	r9,r9,r3
2203	and	r2,r2,r5
2204	vadd.i32	d5,d5,d25
2205	eor	r3,r0,r5,ror#19
2206	eor	r0,r9,r9,ror#11
2207	vadd.i32	q8,q8,q2
2208	eor	r2,r2,r7
2209	add	r8,r8,r3,ror#6
2210	eor	r3,r9,r10
2211	eor	r0,r0,r9,ror#20
2212	add	r8,r8,r2
2213	ldr	r2,[sp,#48]
2214	and	r12,r12,r3
2215	add	r4,r4,r8
2216	vst1.32	{q8},[r1,:128]!
2217	add	r8,r8,r0,ror#2
2218	eor	r12,r12,r10
2219	vext.8	q8,q3,q0,#4
2220	add	r7,r7,r2
2221	eor	r2,r5,r6
2222	eor	r0,r4,r4,ror#5
2223	vext.8	q9,q1,q2,#4
2224	add	r8,r8,r12
2225	and	r2,r2,r4
2226	eor	r12,r0,r4,ror#19
2227	vshr.u32	q10,q8,#7
2228	eor	r0,r8,r8,ror#11
2229	eor	r2,r2,r6
2230	vadd.i32	q3,q3,q9
2231	add	r7,r7,r12,ror#6
2232	eor	r12,r8,r9
2233	vshr.u32	q9,q8,#3
2234	eor	r0,r0,r8,ror#20
2235	add	r7,r7,r2
2236	vsli.32	q10,q8,#25
2237	ldr	r2,[sp,#52]
2238	and	r3,r3,r12
2239	vshr.u32	q11,q8,#18
2240	add	r11,r11,r7
2241	add	r7,r7,r0,ror#2
2242	eor	r3,r3,r9
2243	veor	q9,q9,q10
2244	add	r6,r6,r2
2245	vsli.32	q11,q8,#14
2246	eor	r2,r4,r5
2247	eor	r0,r11,r11,ror#5
2248	vshr.u32	d24,d5,#17
2249	add	r7,r7,r3
2250	and	r2,r2,r11
2251	veor	q9,q9,q11
2252	eor	r3,r0,r11,ror#19
2253	eor	r0,r7,r7,ror#11
2254	vsli.32	d24,d5,#15
2255	eor	r2,r2,r5
2256	add	r6,r6,r3,ror#6
2257	vshr.u32	d25,d5,#10
2258	eor	r3,r7,r8
2259	eor	r0,r0,r7,ror#20
2260	vadd.i32	q3,q3,q9
2261	add	r6,r6,r2
2262	ldr	r2,[sp,#56]
2263	veor	d25,d25,d24
2264	and	r12,r12,r3
2265	add	r10,r10,r6
2266	vshr.u32	d24,d5,#19
2267	add	r6,r6,r0,ror#2
2268	eor	r12,r12,r8
2269	vsli.32	d24,d5,#13
2270	add	r5,r5,r2
2271	eor	r2,r11,r4
2272	veor	d25,d25,d24
2273	eor	r0,r10,r10,ror#5
2274	add	r6,r6,r12
2275	vadd.i32	d6,d6,d25
2276	and	r2,r2,r10
2277	eor	r12,r0,r10,ror#19
2278	vshr.u32	d24,d6,#17
2279	eor	r0,r6,r6,ror#11
2280	eor	r2,r2,r4
2281	vsli.32	d24,d6,#15
2282	add	r5,r5,r12,ror#6
2283	eor	r12,r6,r7
2284	vshr.u32	d25,d6,#10
2285	eor	r0,r0,r6,ror#20
2286	add	r5,r5,r2
2287	veor	d25,d25,d24
2288	ldr	r2,[sp,#60]
2289	and	r3,r3,r12
2290	vshr.u32	d24,d6,#19
2291	add	r9,r9,r5
2292	add	r5,r5,r0,ror#2
2293	eor	r3,r3,r7
2294	vld1.32	{q8},[r14,:128]!
2295	add	r4,r4,r2
2296	vsli.32	d24,d6,#13
2297	eor	r2,r10,r11
2298	eor	r0,r9,r9,ror#5
2299	veor	d25,d25,d24
2300	add	r5,r5,r3
2301	and	r2,r2,r9
2302	vadd.i32	d7,d7,d25
2303	eor	r3,r0,r9,ror#19
2304	eor	r0,r5,r5,ror#11
2305	vadd.i32	q8,q8,q3
2306	eor	r2,r2,r11
2307	add	r4,r4,r3,ror#6
2308	eor	r3,r5,r6
2309	eor	r0,r0,r5,ror#20
2310	add	r4,r4,r2
2311	ldr	r2,[r14]
2312	and	r12,r12,r3
2313	add	r8,r8,r4
2314	vst1.32	{q8},[r1,:128]!
2315	add	r4,r4,r0,ror#2
2316	eor	r12,r12,r6
2317	teq	r2,#0				@ check for K256 terminator
2318	ldr	r2,[sp,#0]
2319	sub	r1,r1,#64
2320	bne	.L_00_48
2321
2322	ldr	r1,[sp,#68]
2323	ldr	r0,[sp,#72]
2324	sub	r14,r14,#256	@ rewind r14
2325	teq	r1,r0
2326	it	eq
2327	subeq	r1,r1,#64		@ avoid SEGV
2328	vld1.8	{q0},[r1]!		@ load next input block
2329	vld1.8	{q1},[r1]!
2330	vld1.8	{q2},[r1]!
2331	vld1.8	{q3},[r1]!
2332	it	ne
2333	strne	r1,[sp,#68]
2334	mov	r1,sp
2335	add	r11,r11,r2
2336	eor	r2,r9,r10
2337	eor	r0,r8,r8,ror#5
2338	add	r4,r4,r12
2339	vld1.32	{q8},[r14,:128]!
2340	and	r2,r2,r8
2341	eor	r12,r0,r8,ror#19
2342	eor	r0,r4,r4,ror#11
2343	eor	r2,r2,r10
2344	vrev32.8	q0,q0
2345	add	r11,r11,r12,ror#6
2346	eor	r12,r4,r5
2347	eor	r0,r0,r4,ror#20
2348	add	r11,r11,r2
2349	vadd.i32	q8,q8,q0
2350	ldr	r2,[sp,#4]
2351	and	r3,r3,r12
2352	add	r7,r7,r11
2353	add	r11,r11,r0,ror#2
2354	eor	r3,r3,r5
2355	add	r10,r10,r2
2356	eor	r2,r8,r9
2357	eor	r0,r7,r7,ror#5
2358	add	r11,r11,r3
2359	and	r2,r2,r7
2360	eor	r3,r0,r7,ror#19
2361	eor	r0,r11,r11,ror#11
2362	eor	r2,r2,r9
2363	add	r10,r10,r3,ror#6
2364	eor	r3,r11,r4
2365	eor	r0,r0,r11,ror#20
2366	add	r10,r10,r2
2367	ldr	r2,[sp,#8]
2368	and	r12,r12,r3
2369	add	r6,r6,r10
2370	add	r10,r10,r0,ror#2
2371	eor	r12,r12,r4
2372	add	r9,r9,r2
2373	eor	r2,r7,r8
2374	eor	r0,r6,r6,ror#5
2375	add	r10,r10,r12
2376	and	r2,r2,r6
2377	eor	r12,r0,r6,ror#19
2378	eor	r0,r10,r10,ror#11
2379	eor	r2,r2,r8
2380	add	r9,r9,r12,ror#6
2381	eor	r12,r10,r11
2382	eor	r0,r0,r10,ror#20
2383	add	r9,r9,r2
2384	ldr	r2,[sp,#12]
2385	and	r3,r3,r12
2386	add	r5,r5,r9
2387	add	r9,r9,r0,ror#2
2388	eor	r3,r3,r11
2389	add	r8,r8,r2
2390	eor	r2,r6,r7
2391	eor	r0,r5,r5,ror#5
2392	add	r9,r9,r3
2393	and	r2,r2,r5
2394	eor	r3,r0,r5,ror#19
2395	eor	r0,r9,r9,ror#11
2396	eor	r2,r2,r7
2397	add	r8,r8,r3,ror#6
2398	eor	r3,r9,r10
2399	eor	r0,r0,r9,ror#20
2400	add	r8,r8,r2
2401	ldr	r2,[sp,#16]
2402	and	r12,r12,r3
2403	add	r4,r4,r8
2404	add	r8,r8,r0,ror#2
2405	eor	r12,r12,r10
2406	vst1.32	{q8},[r1,:128]!
2407	add	r7,r7,r2
2408	eor	r2,r5,r6
2409	eor	r0,r4,r4,ror#5
2410	add	r8,r8,r12
2411	vld1.32	{q8},[r14,:128]!
2412	and	r2,r2,r4
2413	eor	r12,r0,r4,ror#19
2414	eor	r0,r8,r8,ror#11
2415	eor	r2,r2,r6
2416	vrev32.8	q1,q1
2417	add	r7,r7,r12,ror#6
2418	eor	r12,r8,r9
2419	eor	r0,r0,r8,ror#20
2420	add	r7,r7,r2
2421	vadd.i32	q8,q8,q1
2422	ldr	r2,[sp,#20]
2423	and	r3,r3,r12
2424	add	r11,r11,r7
2425	add	r7,r7,r0,ror#2
2426	eor	r3,r3,r9
2427	add	r6,r6,r2
2428	eor	r2,r4,r5
2429	eor	r0,r11,r11,ror#5
2430	add	r7,r7,r3
2431	and	r2,r2,r11
2432	eor	r3,r0,r11,ror#19
2433	eor	r0,r7,r7,ror#11
2434	eor	r2,r2,r5
2435	add	r6,r6,r3,ror#6
2436	eor	r3,r7,r8
2437	eor	r0,r0,r7,ror#20
2438	add	r6,r6,r2
2439	ldr	r2,[sp,#24]
2440	and	r12,r12,r3
2441	add	r10,r10,r6
2442	add	r6,r6,r0,ror#2
2443	eor	r12,r12,r8
2444	add	r5,r5,r2
2445	eor	r2,r11,r4
2446	eor	r0,r10,r10,ror#5
2447	add	r6,r6,r12
2448	and	r2,r2,r10
2449	eor	r12,r0,r10,ror#19
2450	eor	r0,r6,r6,ror#11
2451	eor	r2,r2,r4
2452	add	r5,r5,r12,ror#6
2453	eor	r12,r6,r7
2454	eor	r0,r0,r6,ror#20
2455	add	r5,r5,r2
2456	ldr	r2,[sp,#28]
2457	and	r3,r3,r12
2458	add	r9,r9,r5
2459	add	r5,r5,r0,ror#2
2460	eor	r3,r3,r7
2461	add	r4,r4,r2
2462	eor	r2,r10,r11
2463	eor	r0,r9,r9,ror#5
2464	add	r5,r5,r3
2465	and	r2,r2,r9
2466	eor	r3,r0,r9,ror#19
2467	eor	r0,r5,r5,ror#11
2468	eor	r2,r2,r11
2469	add	r4,r4,r3,ror#6
2470	eor	r3,r5,r6
2471	eor	r0,r0,r5,ror#20
2472	add	r4,r4,r2
2473	ldr	r2,[sp,#32]
2474	and	r12,r12,r3
2475	add	r8,r8,r4
2476	add	r4,r4,r0,ror#2
2477	eor	r12,r12,r6
2478	vst1.32	{q8},[r1,:128]!
2479	add	r11,r11,r2
2480	eor	r2,r9,r10
2481	eor	r0,r8,r8,ror#5
2482	add	r4,r4,r12
2483	vld1.32	{q8},[r14,:128]!
2484	and	r2,r2,r8
2485	eor	r12,r0,r8,ror#19
2486	eor	r0,r4,r4,ror#11
2487	eor	r2,r2,r10
2488	vrev32.8	q2,q2
2489	add	r11,r11,r12,ror#6
2490	eor	r12,r4,r5
2491	eor	r0,r0,r4,ror#20
2492	add	r11,r11,r2
2493	vadd.i32	q8,q8,q2
2494	ldr	r2,[sp,#36]
2495	and	r3,r3,r12
2496	add	r7,r7,r11
2497	add	r11,r11,r0,ror#2
2498	eor	r3,r3,r5
2499	add	r10,r10,r2
2500	eor	r2,r8,r9
2501	eor	r0,r7,r7,ror#5
2502	add	r11,r11,r3
2503	and	r2,r2,r7
2504	eor	r3,r0,r7,ror#19
2505	eor	r0,r11,r11,ror#11
2506	eor	r2,r2,r9
2507	add	r10,r10,r3,ror#6
2508	eor	r3,r11,r4
2509	eor	r0,r0,r11,ror#20
2510	add	r10,r10,r2
2511	ldr	r2,[sp,#40]
2512	and	r12,r12,r3
2513	add	r6,r6,r10
2514	add	r10,r10,r0,ror#2
2515	eor	r12,r12,r4
2516	add	r9,r9,r2
2517	eor	r2,r7,r8
2518	eor	r0,r6,r6,ror#5
2519	add	r10,r10,r12
2520	and	r2,r2,r6
2521	eor	r12,r0,r6,ror#19
2522	eor	r0,r10,r10,ror#11
2523	eor	r2,r2,r8
2524	add	r9,r9,r12,ror#6
2525	eor	r12,r10,r11
2526	eor	r0,r0,r10,ror#20
2527	add	r9,r9,r2
2528	ldr	r2,[sp,#44]
2529	and	r3,r3,r12
2530	add	r5,r5,r9
2531	add	r9,r9,r0,ror#2
2532	eor	r3,r3,r11
2533	add	r8,r8,r2
2534	eor	r2,r6,r7
2535	eor	r0,r5,r5,ror#5
2536	add	r9,r9,r3
2537	and	r2,r2,r5
2538	eor	r3,r0,r5,ror#19
2539	eor	r0,r9,r9,ror#11
2540	eor	r2,r2,r7
2541	add	r8,r8,r3,ror#6
2542	eor	r3,r9,r10
2543	eor	r0,r0,r9,ror#20
2544	add	r8,r8,r2
2545	ldr	r2,[sp,#48]
2546	and	r12,r12,r3
2547	add	r4,r4,r8
2548	add	r8,r8,r0,ror#2
2549	eor	r12,r12,r10
2550	vst1.32	{q8},[r1,:128]!
2551	add	r7,r7,r2
2552	eor	r2,r5,r6
2553	eor	r0,r4,r4,ror#5
2554	add	r8,r8,r12
2555	vld1.32	{q8},[r14,:128]!
2556	and	r2,r2,r4
2557	eor	r12,r0,r4,ror#19
2558	eor	r0,r8,r8,ror#11
2559	eor	r2,r2,r6
2560	vrev32.8	q3,q3
2561	add	r7,r7,r12,ror#6
2562	eor	r12,r8,r9
2563	eor	r0,r0,r8,ror#20
2564	add	r7,r7,r2
2565	vadd.i32	q8,q8,q3
2566	ldr	r2,[sp,#52]
2567	and	r3,r3,r12
2568	add	r11,r11,r7
2569	add	r7,r7,r0,ror#2
2570	eor	r3,r3,r9
2571	add	r6,r6,r2
2572	eor	r2,r4,r5
2573	eor	r0,r11,r11,ror#5
2574	add	r7,r7,r3
2575	and	r2,r2,r11
2576	eor	r3,r0,r11,ror#19
2577	eor	r0,r7,r7,ror#11
2578	eor	r2,r2,r5
2579	add	r6,r6,r3,ror#6
2580	eor	r3,r7,r8
2581	eor	r0,r0,r7,ror#20
2582	add	r6,r6,r2
2583	ldr	r2,[sp,#56]
2584	and	r12,r12,r3
2585	add	r10,r10,r6
2586	add	r6,r6,r0,ror#2
2587	eor	r12,r12,r8
2588	add	r5,r5,r2
2589	eor	r2,r11,r4
2590	eor	r0,r10,r10,ror#5
2591	add	r6,r6,r12
2592	and	r2,r2,r10
2593	eor	r12,r0,r10,ror#19
2594	eor	r0,r6,r6,ror#11
2595	eor	r2,r2,r4
2596	add	r5,r5,r12,ror#6
2597	eor	r12,r6,r7
2598	eor	r0,r0,r6,ror#20
2599	add	r5,r5,r2
2600	ldr	r2,[sp,#60]
2601	and	r3,r3,r12
2602	add	r9,r9,r5
2603	add	r5,r5,r0,ror#2
2604	eor	r3,r3,r7
2605	add	r4,r4,r2
2606	eor	r2,r10,r11
2607	eor	r0,r9,r9,ror#5
2608	add	r5,r5,r3
2609	and	r2,r2,r9
2610	eor	r3,r0,r9,ror#19
2611	eor	r0,r5,r5,ror#11
2612	eor	r2,r2,r11
2613	add	r4,r4,r3,ror#6
2614	eor	r3,r5,r6
2615	eor	r0,r0,r5,ror#20
2616	add	r4,r4,r2
2617	ldr	r2,[sp,#64]
2618	and	r12,r12,r3
2619	add	r8,r8,r4
2620	add	r4,r4,r0,ror#2
2621	eor	r12,r12,r6
2622	vst1.32	{q8},[r1,:128]!
2623	ldr	r0,[r2,#0]
2624	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2625	ldr	r12,[r2,#4]
2626	ldr	r3,[r2,#8]
2627	ldr	r1,[r2,#12]
2628	add	r4,r4,r0			@ accumulate
2629	ldr	r0,[r2,#16]
2630	add	r5,r5,r12
2631	ldr	r12,[r2,#20]
2632	add	r6,r6,r3
2633	ldr	r3,[r2,#24]
2634	add	r7,r7,r1
2635	ldr	r1,[r2,#28]
2636	add	r8,r8,r0
2637	str	r4,[r2],#4
2638	add	r9,r9,r12
2639	str	r5,[r2],#4
2640	add	r10,r10,r3
2641	str	r6,[r2],#4
2642	add	r11,r11,r1
2643	str	r7,[r2],#4
2644	stmia	r2,{r8,r9,r10,r11}
2645
2646	ittte	ne
2647	movne	r1,sp
2648	ldrne	r2,[sp,#0]
2649	eorne	r12,r12,r12
2650	ldreq	sp,[sp,#76]			@ restore original sp
2651	itt	ne
2652	eorne	r3,r5,r6
2653	bne	.L_00_48
2654
2655	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2656.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2657#endif
2658#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2659
2660# if defined(__thumb2__) && !defined(__APPLE__)
2661#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2662# else
2663#  define INST(a,b,c,d)	.byte	a,b,c,d
2664# endif
2665
2666.type	sha256_block_data_order_armv8,%function
2667.align	5
2668sha256_block_data_order_armv8:
2669.LARMv8:
2670	vld1.32	{q0,q1},[r0]
2671# ifdef	__APPLE__
2672	sub	r3,r3,#256+32
2673# elif	defined(__thumb2__)
2674	adr	r3,.LARMv8
2675	sub	r3,r3,#.LARMv8-K256
2676# else
2677	adrl	r3,K256
2678# endif
2679	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2680
2681.Loop_v8:
2682	vld1.8	{q8,q9},[r1]!
2683	vld1.8	{q10,q11},[r1]!
2684	vld1.32	{q12},[r3]!
2685	vrev32.8	q8,q8
2686	vrev32.8	q9,q9
2687	vrev32.8	q10,q10
2688	vrev32.8	q11,q11
2689	vmov	q14,q0	@ offload
2690	vmov	q15,q1
2691	teq	r1,r2
2692	vld1.32	{q13},[r3]!
2693	vadd.i32	q12,q12,q8
2694	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2695	vmov	q2,q0
2696	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2697	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2698	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2699	vld1.32	{q12},[r3]!
2700	vadd.i32	q13,q13,q9
2701	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2702	vmov	q2,q0
2703	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2704	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2705	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2706	vld1.32	{q13},[r3]!
2707	vadd.i32	q12,q12,q10
2708	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2709	vmov	q2,q0
2710	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2711	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2712	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2713	vld1.32	{q12},[r3]!
2714	vadd.i32	q13,q13,q11
2715	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2716	vmov	q2,q0
2717	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2718	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2719	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2720	vld1.32	{q13},[r3]!
2721	vadd.i32	q12,q12,q8
2722	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2723	vmov	q2,q0
2724	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2725	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2726	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2727	vld1.32	{q12},[r3]!
2728	vadd.i32	q13,q13,q9
2729	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2730	vmov	q2,q0
2731	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2732	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2733	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2734	vld1.32	{q13},[r3]!
2735	vadd.i32	q12,q12,q10
2736	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2737	vmov	q2,q0
2738	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2739	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2740	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2741	vld1.32	{q12},[r3]!
2742	vadd.i32	q13,q13,q11
2743	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2744	vmov	q2,q0
2745	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2746	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2747	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2748	vld1.32	{q13},[r3]!
2749	vadd.i32	q12,q12,q8
2750	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2751	vmov	q2,q0
2752	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2753	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2754	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2755	vld1.32	{q12},[r3]!
2756	vadd.i32	q13,q13,q9
2757	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2758	vmov	q2,q0
2759	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2760	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2761	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2762	vld1.32	{q13},[r3]!
2763	vadd.i32	q12,q12,q10
2764	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2765	vmov	q2,q0
2766	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2767	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2768	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2769	vld1.32	{q12},[r3]!
2770	vadd.i32	q13,q13,q11
2771	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2772	vmov	q2,q0
2773	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2774	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2775	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2776	vld1.32	{q13},[r3]!
2777	vadd.i32	q12,q12,q8
2778	vmov	q2,q0
2779	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2780	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2781
2782	vld1.32	{q12},[r3]!
2783	vadd.i32	q13,q13,q9
2784	vmov	q2,q0
2785	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2786	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2787
2788	vld1.32	{q13},[r3]
2789	vadd.i32	q12,q12,q10
2790	sub	r3,r3,#256-16	@ rewind
2791	vmov	q2,q0
2792	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2793	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2794
2795	vadd.i32	q13,q13,q11
2796	vmov	q2,q0
2797	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2798	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2799
2800	vadd.i32	q0,q0,q14
2801	vadd.i32	q1,q1,q15
2802	it	ne
2803	bne	.Loop_v8
2804
2805	vst1.32	{q0,q1},[r0]
2806
2807	bx	lr		@ bx lr
2808.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2809#endif
2810.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2811.align	2
2812.align	2
2813#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2814.comm	OPENSSL_armcap_P,4,4
2815.hidden	OPENSSL_armcap_P
2816#endif
2817