• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#if defined(__arm__)
2
3@ ====================================================================
4@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5@ project. The module is, however, dual licensed under OpenSSL and
6@ CRYPTOGAMS licenses depending on where you obtain it. For further
7@ details see http://www.openssl.org/~appro/cryptogams/.
8@
9@ Permission to use under GPL terms is granted.
10@ ====================================================================
11
12@ SHA256 block procedure for ARMv4. May 2007.
13
14@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
15@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
16@ byte [on single-issue Xscale PXA250 core].
17
18@ July 2010.
19@
20@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
21@ Cortex A8 core and ~20 cycles per processed byte.
22
23@ February 2011.
24@
25@ Profiler-assisted and platform-specific optimization resulted in 16%
26@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
27
28@ September 2013.
29@
30@ Add NEON implementation. On Cortex A8 it was measured to process one
31@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
32@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
33@ code (meaning that latter performs sub-optimally, nothing was done
34@ about it).
35
36@ May 2014.
37@
38@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
39
40#ifndef __KERNEL__
41# include <openssl/arm_arch.h>
42#else
43# define __ARM_ARCH__ __LINUX_ARM_ARCH__
44# define __ARM_MAX_ARCH__ 7
45#endif
46
47.text
48#if __ARM_ARCH__<7
49.code	32
50#else
51.syntax	unified
52# if defined(__thumb2__) && !defined(__APPLE__)
53#  define adrl adr
54.thumb
55# else
56.code	32
57# endif
58#endif
59
60.type	K256,%object
61.align	5
62K256:
63.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
64.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
65.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
66.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
67.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
68.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
69.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
70.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
71.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
72.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
73.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
74.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
75.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
76.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
77.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
78.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
79.size	K256,.-K256
80.word	0				@ terminator
81#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
82.LOPENSSL_armcap:
83.word	OPENSSL_armcap_P-.Lsha256_block_data_order
84#endif
85.align	5
86
87.globl	sha256_block_data_order
88.type	sha256_block_data_order,%function
89sha256_block_data_order:
90.Lsha256_block_data_order:
91#if __ARM_ARCH__<7
92	sub	r3,pc,#8		@ sha256_block_data_order
93#else
94	adr	r3,sha256_block_data_order
95#endif
96#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
97	ldr	r12,.LOPENSSL_armcap
98	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
99#ifdef	__APPLE__
100	ldr	r12,[r12]
101#endif
102	tst	r12,#ARMV8_SHA256
103	bne	.LARMv8
104	tst	r12,#ARMV7_NEON
105	bne	.LNEON
106#endif
107	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
108	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
109	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
110	sub	r14,r3,#256+32	@ K256
111	sub	sp,sp,#16*4		@ alloca(X[16])
112.Loop:
113# if __ARM_ARCH__>=7
114	ldr	r2,[r1],#4
115# else
116	ldrb	r2,[r1,#3]
117# endif
118	eor	r3,r5,r6		@ magic
119	eor	r12,r12,r12
120#if __ARM_ARCH__>=7
121	@ ldr	r2,[r1],#4			@ 0
122# if 0==15
123	str	r1,[sp,#17*4]			@ make room for r1
124# endif
125	eor	r0,r8,r8,ror#5
126	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
127	eor	r0,r0,r8,ror#19	@ Sigma1(e)
128# ifndef __ARMEB__
129	rev	r2,r2
130# endif
131#else
132	@ ldrb	r2,[r1,#3]			@ 0
133	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
134	ldrb	r12,[r1,#2]
135	ldrb	r0,[r1,#1]
136	orr	r2,r2,r12,lsl#8
137	ldrb	r12,[r1],#4
138	orr	r2,r2,r0,lsl#16
139# if 0==15
140	str	r1,[sp,#17*4]			@ make room for r1
141# endif
142	eor	r0,r8,r8,ror#5
143	orr	r2,r2,r12,lsl#24
144	eor	r0,r0,r8,ror#19	@ Sigma1(e)
145#endif
146	ldr	r12,[r14],#4			@ *K256++
147	add	r11,r11,r2			@ h+=X[i]
148	str	r2,[sp,#0*4]
149	eor	r2,r9,r10
150	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
151	and	r2,r2,r8
152	add	r11,r11,r12			@ h+=K256[i]
153	eor	r2,r2,r10			@ Ch(e,f,g)
154	eor	r0,r4,r4,ror#11
155	add	r11,r11,r2			@ h+=Ch(e,f,g)
156#if 0==31
157	and	r12,r12,#0xff
158	cmp	r12,#0xf2			@ done?
159#endif
160#if 0<15
161# if __ARM_ARCH__>=7
162	ldr	r2,[r1],#4			@ prefetch
163# else
164	ldrb	r2,[r1,#3]
165# endif
166	eor	r12,r4,r5			@ a^b, b^c in next round
167#else
168	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
169	eor	r12,r4,r5			@ a^b, b^c in next round
170	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
171#endif
172	eor	r0,r0,r4,ror#20	@ Sigma0(a)
173	and	r3,r3,r12			@ (b^c)&=(a^b)
174	add	r7,r7,r11			@ d+=h
175	eor	r3,r3,r5			@ Maj(a,b,c)
176	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
177	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
178#if __ARM_ARCH__>=7
179	@ ldr	r2,[r1],#4			@ 1
180# if 1==15
181	str	r1,[sp,#17*4]			@ make room for r1
182# endif
183	eor	r0,r7,r7,ror#5
184	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
185	eor	r0,r0,r7,ror#19	@ Sigma1(e)
186# ifndef __ARMEB__
187	rev	r2,r2
188# endif
189#else
190	@ ldrb	r2,[r1,#3]			@ 1
191	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
192	ldrb	r3,[r1,#2]
193	ldrb	r0,[r1,#1]
194	orr	r2,r2,r3,lsl#8
195	ldrb	r3,[r1],#4
196	orr	r2,r2,r0,lsl#16
197# if 1==15
198	str	r1,[sp,#17*4]			@ make room for r1
199# endif
200	eor	r0,r7,r7,ror#5
201	orr	r2,r2,r3,lsl#24
202	eor	r0,r0,r7,ror#19	@ Sigma1(e)
203#endif
204	ldr	r3,[r14],#4			@ *K256++
205	add	r10,r10,r2			@ h+=X[i]
206	str	r2,[sp,#1*4]
207	eor	r2,r8,r9
208	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
209	and	r2,r2,r7
210	add	r10,r10,r3			@ h+=K256[i]
211	eor	r2,r2,r9			@ Ch(e,f,g)
212	eor	r0,r11,r11,ror#11
213	add	r10,r10,r2			@ h+=Ch(e,f,g)
214#if 1==31
215	and	r3,r3,#0xff
216	cmp	r3,#0xf2			@ done?
217#endif
218#if 1<15
219# if __ARM_ARCH__>=7
220	ldr	r2,[r1],#4			@ prefetch
221# else
222	ldrb	r2,[r1,#3]
223# endif
224	eor	r3,r11,r4			@ a^b, b^c in next round
225#else
226	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
227	eor	r3,r11,r4			@ a^b, b^c in next round
228	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
229#endif
230	eor	r0,r0,r11,ror#20	@ Sigma0(a)
231	and	r12,r12,r3			@ (b^c)&=(a^b)
232	add	r6,r6,r10			@ d+=h
233	eor	r12,r12,r4			@ Maj(a,b,c)
234	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
235	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
236#if __ARM_ARCH__>=7
237	@ ldr	r2,[r1],#4			@ 2
238# if 2==15
239	str	r1,[sp,#17*4]			@ make room for r1
240# endif
241	eor	r0,r6,r6,ror#5
242	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
243	eor	r0,r0,r6,ror#19	@ Sigma1(e)
244# ifndef __ARMEB__
245	rev	r2,r2
246# endif
247#else
248	@ ldrb	r2,[r1,#3]			@ 2
249	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
250	ldrb	r12,[r1,#2]
251	ldrb	r0,[r1,#1]
252	orr	r2,r2,r12,lsl#8
253	ldrb	r12,[r1],#4
254	orr	r2,r2,r0,lsl#16
255# if 2==15
256	str	r1,[sp,#17*4]			@ make room for r1
257# endif
258	eor	r0,r6,r6,ror#5
259	orr	r2,r2,r12,lsl#24
260	eor	r0,r0,r6,ror#19	@ Sigma1(e)
261#endif
262	ldr	r12,[r14],#4			@ *K256++
263	add	r9,r9,r2			@ h+=X[i]
264	str	r2,[sp,#2*4]
265	eor	r2,r7,r8
266	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
267	and	r2,r2,r6
268	add	r9,r9,r12			@ h+=K256[i]
269	eor	r2,r2,r8			@ Ch(e,f,g)
270	eor	r0,r10,r10,ror#11
271	add	r9,r9,r2			@ h+=Ch(e,f,g)
272#if 2==31
273	and	r12,r12,#0xff
274	cmp	r12,#0xf2			@ done?
275#endif
276#if 2<15
277# if __ARM_ARCH__>=7
278	ldr	r2,[r1],#4			@ prefetch
279# else
280	ldrb	r2,[r1,#3]
281# endif
282	eor	r12,r10,r11			@ a^b, b^c in next round
283#else
284	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
285	eor	r12,r10,r11			@ a^b, b^c in next round
286	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
287#endif
288	eor	r0,r0,r10,ror#20	@ Sigma0(a)
289	and	r3,r3,r12			@ (b^c)&=(a^b)
290	add	r5,r5,r9			@ d+=h
291	eor	r3,r3,r11			@ Maj(a,b,c)
292	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
293	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
294#if __ARM_ARCH__>=7
295	@ ldr	r2,[r1],#4			@ 3
296# if 3==15
297	str	r1,[sp,#17*4]			@ make room for r1
298# endif
299	eor	r0,r5,r5,ror#5
300	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
301	eor	r0,r0,r5,ror#19	@ Sigma1(e)
302# ifndef __ARMEB__
303	rev	r2,r2
304# endif
305#else
306	@ ldrb	r2,[r1,#3]			@ 3
307	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
308	ldrb	r3,[r1,#2]
309	ldrb	r0,[r1,#1]
310	orr	r2,r2,r3,lsl#8
311	ldrb	r3,[r1],#4
312	orr	r2,r2,r0,lsl#16
313# if 3==15
314	str	r1,[sp,#17*4]			@ make room for r1
315# endif
316	eor	r0,r5,r5,ror#5
317	orr	r2,r2,r3,lsl#24
318	eor	r0,r0,r5,ror#19	@ Sigma1(e)
319#endif
320	ldr	r3,[r14],#4			@ *K256++
321	add	r8,r8,r2			@ h+=X[i]
322	str	r2,[sp,#3*4]
323	eor	r2,r6,r7
324	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
325	and	r2,r2,r5
326	add	r8,r8,r3			@ h+=K256[i]
327	eor	r2,r2,r7			@ Ch(e,f,g)
328	eor	r0,r9,r9,ror#11
329	add	r8,r8,r2			@ h+=Ch(e,f,g)
330#if 3==31
331	and	r3,r3,#0xff
332	cmp	r3,#0xf2			@ done?
333#endif
334#if 3<15
335# if __ARM_ARCH__>=7
336	ldr	r2,[r1],#4			@ prefetch
337# else
338	ldrb	r2,[r1,#3]
339# endif
340	eor	r3,r9,r10			@ a^b, b^c in next round
341#else
342	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
343	eor	r3,r9,r10			@ a^b, b^c in next round
344	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
345#endif
346	eor	r0,r0,r9,ror#20	@ Sigma0(a)
347	and	r12,r12,r3			@ (b^c)&=(a^b)
348	add	r4,r4,r8			@ d+=h
349	eor	r12,r12,r10			@ Maj(a,b,c)
350	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
351	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
352#if __ARM_ARCH__>=7
353	@ ldr	r2,[r1],#4			@ 4
354# if 4==15
355	str	r1,[sp,#17*4]			@ make room for r1
356# endif
357	eor	r0,r4,r4,ror#5
358	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
359	eor	r0,r0,r4,ror#19	@ Sigma1(e)
360# ifndef __ARMEB__
361	rev	r2,r2
362# endif
363#else
364	@ ldrb	r2,[r1,#3]			@ 4
365	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
366	ldrb	r12,[r1,#2]
367	ldrb	r0,[r1,#1]
368	orr	r2,r2,r12,lsl#8
369	ldrb	r12,[r1],#4
370	orr	r2,r2,r0,lsl#16
371# if 4==15
372	str	r1,[sp,#17*4]			@ make room for r1
373# endif
374	eor	r0,r4,r4,ror#5
375	orr	r2,r2,r12,lsl#24
376	eor	r0,r0,r4,ror#19	@ Sigma1(e)
377#endif
378	ldr	r12,[r14],#4			@ *K256++
379	add	r7,r7,r2			@ h+=X[i]
380	str	r2,[sp,#4*4]
381	eor	r2,r5,r6
382	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
383	and	r2,r2,r4
384	add	r7,r7,r12			@ h+=K256[i]
385	eor	r2,r2,r6			@ Ch(e,f,g)
386	eor	r0,r8,r8,ror#11
387	add	r7,r7,r2			@ h+=Ch(e,f,g)
388#if 4==31
389	and	r12,r12,#0xff
390	cmp	r12,#0xf2			@ done?
391#endif
392#if 4<15
393# if __ARM_ARCH__>=7
394	ldr	r2,[r1],#4			@ prefetch
395# else
396	ldrb	r2,[r1,#3]
397# endif
398	eor	r12,r8,r9			@ a^b, b^c in next round
399#else
400	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
401	eor	r12,r8,r9			@ a^b, b^c in next round
402	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
403#endif
404	eor	r0,r0,r8,ror#20	@ Sigma0(a)
405	and	r3,r3,r12			@ (b^c)&=(a^b)
406	add	r11,r11,r7			@ d+=h
407	eor	r3,r3,r9			@ Maj(a,b,c)
408	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
409	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
410#if __ARM_ARCH__>=7
411	@ ldr	r2,[r1],#4			@ 5
412# if 5==15
413	str	r1,[sp,#17*4]			@ make room for r1
414# endif
415	eor	r0,r11,r11,ror#5
416	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
417	eor	r0,r0,r11,ror#19	@ Sigma1(e)
418# ifndef __ARMEB__
419	rev	r2,r2
420# endif
421#else
422	@ ldrb	r2,[r1,#3]			@ 5
423	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
424	ldrb	r3,[r1,#2]
425	ldrb	r0,[r1,#1]
426	orr	r2,r2,r3,lsl#8
427	ldrb	r3,[r1],#4
428	orr	r2,r2,r0,lsl#16
429# if 5==15
430	str	r1,[sp,#17*4]			@ make room for r1
431# endif
432	eor	r0,r11,r11,ror#5
433	orr	r2,r2,r3,lsl#24
434	eor	r0,r0,r11,ror#19	@ Sigma1(e)
435#endif
436	ldr	r3,[r14],#4			@ *K256++
437	add	r6,r6,r2			@ h+=X[i]
438	str	r2,[sp,#5*4]
439	eor	r2,r4,r5
440	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
441	and	r2,r2,r11
442	add	r6,r6,r3			@ h+=K256[i]
443	eor	r2,r2,r5			@ Ch(e,f,g)
444	eor	r0,r7,r7,ror#11
445	add	r6,r6,r2			@ h+=Ch(e,f,g)
446#if 5==31
447	and	r3,r3,#0xff
448	cmp	r3,#0xf2			@ done?
449#endif
450#if 5<15
451# if __ARM_ARCH__>=7
452	ldr	r2,[r1],#4			@ prefetch
453# else
454	ldrb	r2,[r1,#3]
455# endif
456	eor	r3,r7,r8			@ a^b, b^c in next round
457#else
458	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
459	eor	r3,r7,r8			@ a^b, b^c in next round
460	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
461#endif
462	eor	r0,r0,r7,ror#20	@ Sigma0(a)
463	and	r12,r12,r3			@ (b^c)&=(a^b)
464	add	r10,r10,r6			@ d+=h
465	eor	r12,r12,r8			@ Maj(a,b,c)
466	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
467	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
468#if __ARM_ARCH__>=7
469	@ ldr	r2,[r1],#4			@ 6
470# if 6==15
471	str	r1,[sp,#17*4]			@ make room for r1
472# endif
473	eor	r0,r10,r10,ror#5
474	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
475	eor	r0,r0,r10,ror#19	@ Sigma1(e)
476# ifndef __ARMEB__
477	rev	r2,r2
478# endif
479#else
480	@ ldrb	r2,[r1,#3]			@ 6
481	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
482	ldrb	r12,[r1,#2]
483	ldrb	r0,[r1,#1]
484	orr	r2,r2,r12,lsl#8
485	ldrb	r12,[r1],#4
486	orr	r2,r2,r0,lsl#16
487# if 6==15
488	str	r1,[sp,#17*4]			@ make room for r1
489# endif
490	eor	r0,r10,r10,ror#5
491	orr	r2,r2,r12,lsl#24
492	eor	r0,r0,r10,ror#19	@ Sigma1(e)
493#endif
494	ldr	r12,[r14],#4			@ *K256++
495	add	r5,r5,r2			@ h+=X[i]
496	str	r2,[sp,#6*4]
497	eor	r2,r11,r4
498	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
499	and	r2,r2,r10
500	add	r5,r5,r12			@ h+=K256[i]
501	eor	r2,r2,r4			@ Ch(e,f,g)
502	eor	r0,r6,r6,ror#11
503	add	r5,r5,r2			@ h+=Ch(e,f,g)
504#if 6==31
505	and	r12,r12,#0xff
506	cmp	r12,#0xf2			@ done?
507#endif
508#if 6<15
509# if __ARM_ARCH__>=7
510	ldr	r2,[r1],#4			@ prefetch
511# else
512	ldrb	r2,[r1,#3]
513# endif
514	eor	r12,r6,r7			@ a^b, b^c in next round
515#else
516	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
517	eor	r12,r6,r7			@ a^b, b^c in next round
518	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
519#endif
520	eor	r0,r0,r6,ror#20	@ Sigma0(a)
521	and	r3,r3,r12			@ (b^c)&=(a^b)
522	add	r9,r9,r5			@ d+=h
523	eor	r3,r3,r7			@ Maj(a,b,c)
524	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
525	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
526#if __ARM_ARCH__>=7
527	@ ldr	r2,[r1],#4			@ 7
528# if 7==15
529	str	r1,[sp,#17*4]			@ make room for r1
530# endif
531	eor	r0,r9,r9,ror#5
532	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
533	eor	r0,r0,r9,ror#19	@ Sigma1(e)
534# ifndef __ARMEB__
535	rev	r2,r2
536# endif
537#else
538	@ ldrb	r2,[r1,#3]			@ 7
539	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
540	ldrb	r3,[r1,#2]
541	ldrb	r0,[r1,#1]
542	orr	r2,r2,r3,lsl#8
543	ldrb	r3,[r1],#4
544	orr	r2,r2,r0,lsl#16
545# if 7==15
546	str	r1,[sp,#17*4]			@ make room for r1
547# endif
548	eor	r0,r9,r9,ror#5
549	orr	r2,r2,r3,lsl#24
550	eor	r0,r0,r9,ror#19	@ Sigma1(e)
551#endif
552	ldr	r3,[r14],#4			@ *K256++
553	add	r4,r4,r2			@ h+=X[i]
554	str	r2,[sp,#7*4]
555	eor	r2,r10,r11
556	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
557	and	r2,r2,r9
558	add	r4,r4,r3			@ h+=K256[i]
559	eor	r2,r2,r11			@ Ch(e,f,g)
560	eor	r0,r5,r5,ror#11
561	add	r4,r4,r2			@ h+=Ch(e,f,g)
562#if 7==31
563	and	r3,r3,#0xff
564	cmp	r3,#0xf2			@ done?
565#endif
566#if 7<15
567# if __ARM_ARCH__>=7
568	ldr	r2,[r1],#4			@ prefetch
569# else
570	ldrb	r2,[r1,#3]
571# endif
572	eor	r3,r5,r6			@ a^b, b^c in next round
573#else
574	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
575	eor	r3,r5,r6			@ a^b, b^c in next round
576	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
577#endif
578	eor	r0,r0,r5,ror#20	@ Sigma0(a)
579	and	r12,r12,r3			@ (b^c)&=(a^b)
580	add	r8,r8,r4			@ d+=h
581	eor	r12,r12,r6			@ Maj(a,b,c)
582	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
583	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
584#if __ARM_ARCH__>=7
585	@ ldr	r2,[r1],#4			@ 8
586# if 8==15
587	str	r1,[sp,#17*4]			@ make room for r1
588# endif
589	eor	r0,r8,r8,ror#5
590	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
591	eor	r0,r0,r8,ror#19	@ Sigma1(e)
592# ifndef __ARMEB__
593	rev	r2,r2
594# endif
595#else
596	@ ldrb	r2,[r1,#3]			@ 8
597	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
598	ldrb	r12,[r1,#2]
599	ldrb	r0,[r1,#1]
600	orr	r2,r2,r12,lsl#8
601	ldrb	r12,[r1],#4
602	orr	r2,r2,r0,lsl#16
603# if 8==15
604	str	r1,[sp,#17*4]			@ make room for r1
605# endif
606	eor	r0,r8,r8,ror#5
607	orr	r2,r2,r12,lsl#24
608	eor	r0,r0,r8,ror#19	@ Sigma1(e)
609#endif
610	ldr	r12,[r14],#4			@ *K256++
611	add	r11,r11,r2			@ h+=X[i]
612	str	r2,[sp,#8*4]
613	eor	r2,r9,r10
614	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
615	and	r2,r2,r8
616	add	r11,r11,r12			@ h+=K256[i]
617	eor	r2,r2,r10			@ Ch(e,f,g)
618	eor	r0,r4,r4,ror#11
619	add	r11,r11,r2			@ h+=Ch(e,f,g)
620#if 8==31
621	and	r12,r12,#0xff
622	cmp	r12,#0xf2			@ done?
623#endif
624#if 8<15
625# if __ARM_ARCH__>=7
626	ldr	r2,[r1],#4			@ prefetch
627# else
628	ldrb	r2,[r1,#3]
629# endif
630	eor	r12,r4,r5			@ a^b, b^c in next round
631#else
632	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
633	eor	r12,r4,r5			@ a^b, b^c in next round
634	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
635#endif
636	eor	r0,r0,r4,ror#20	@ Sigma0(a)
637	and	r3,r3,r12			@ (b^c)&=(a^b)
638	add	r7,r7,r11			@ d+=h
639	eor	r3,r3,r5			@ Maj(a,b,c)
640	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
641	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
642#if __ARM_ARCH__>=7
643	@ ldr	r2,[r1],#4			@ 9
644# if 9==15
645	str	r1,[sp,#17*4]			@ make room for r1
646# endif
647	eor	r0,r7,r7,ror#5
648	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
649	eor	r0,r0,r7,ror#19	@ Sigma1(e)
650# ifndef __ARMEB__
651	rev	r2,r2
652# endif
653#else
654	@ ldrb	r2,[r1,#3]			@ 9
655	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
656	ldrb	r3,[r1,#2]
657	ldrb	r0,[r1,#1]
658	orr	r2,r2,r3,lsl#8
659	ldrb	r3,[r1],#4
660	orr	r2,r2,r0,lsl#16
661# if 9==15
662	str	r1,[sp,#17*4]			@ make room for r1
663# endif
664	eor	r0,r7,r7,ror#5
665	orr	r2,r2,r3,lsl#24
666	eor	r0,r0,r7,ror#19	@ Sigma1(e)
667#endif
668	ldr	r3,[r14],#4			@ *K256++
669	add	r10,r10,r2			@ h+=X[i]
670	str	r2,[sp,#9*4]
671	eor	r2,r8,r9
672	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
673	and	r2,r2,r7
674	add	r10,r10,r3			@ h+=K256[i]
675	eor	r2,r2,r9			@ Ch(e,f,g)
676	eor	r0,r11,r11,ror#11
677	add	r10,r10,r2			@ h+=Ch(e,f,g)
678#if 9==31
679	and	r3,r3,#0xff
680	cmp	r3,#0xf2			@ done?
681#endif
682#if 9<15
683# if __ARM_ARCH__>=7
684	ldr	r2,[r1],#4			@ prefetch
685# else
686	ldrb	r2,[r1,#3]
687# endif
688	eor	r3,r11,r4			@ a^b, b^c in next round
689#else
690	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
691	eor	r3,r11,r4			@ a^b, b^c in next round
692	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
693#endif
694	eor	r0,r0,r11,ror#20	@ Sigma0(a)
695	and	r12,r12,r3			@ (b^c)&=(a^b)
696	add	r6,r6,r10			@ d+=h
697	eor	r12,r12,r4			@ Maj(a,b,c)
698	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
699	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
700#if __ARM_ARCH__>=7
701	@ ldr	r2,[r1],#4			@ 10
702# if 10==15
703	str	r1,[sp,#17*4]			@ make room for r1
704# endif
705	eor	r0,r6,r6,ror#5
706	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
707	eor	r0,r0,r6,ror#19	@ Sigma1(e)
708# ifndef __ARMEB__
709	rev	r2,r2
710# endif
711#else
712	@ ldrb	r2,[r1,#3]			@ 10
713	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
714	ldrb	r12,[r1,#2]
715	ldrb	r0,[r1,#1]
716	orr	r2,r2,r12,lsl#8
717	ldrb	r12,[r1],#4
718	orr	r2,r2,r0,lsl#16
719# if 10==15
720	str	r1,[sp,#17*4]			@ make room for r1
721# endif
722	eor	r0,r6,r6,ror#5
723	orr	r2,r2,r12,lsl#24
724	eor	r0,r0,r6,ror#19	@ Sigma1(e)
725#endif
726	ldr	r12,[r14],#4			@ *K256++
727	add	r9,r9,r2			@ h+=X[i]
728	str	r2,[sp,#10*4]
729	eor	r2,r7,r8
730	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
731	and	r2,r2,r6
732	add	r9,r9,r12			@ h+=K256[i]
733	eor	r2,r2,r8			@ Ch(e,f,g)
734	eor	r0,r10,r10,ror#11
735	add	r9,r9,r2			@ h+=Ch(e,f,g)
736#if 10==31
737	and	r12,r12,#0xff
738	cmp	r12,#0xf2			@ done?
739#endif
740#if 10<15
741# if __ARM_ARCH__>=7
742	ldr	r2,[r1],#4			@ prefetch
743# else
744	ldrb	r2,[r1,#3]
745# endif
746	eor	r12,r10,r11			@ a^b, b^c in next round
747#else
748	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
749	eor	r12,r10,r11			@ a^b, b^c in next round
750	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
751#endif
752	eor	r0,r0,r10,ror#20	@ Sigma0(a)
753	and	r3,r3,r12			@ (b^c)&=(a^b)
754	add	r5,r5,r9			@ d+=h
755	eor	r3,r3,r11			@ Maj(a,b,c)
756	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
757	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
758#if __ARM_ARCH__>=7
759	@ ldr	r2,[r1],#4			@ 11
760# if 11==15
761	str	r1,[sp,#17*4]			@ make room for r1
762# endif
763	eor	r0,r5,r5,ror#5
764	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
765	eor	r0,r0,r5,ror#19	@ Sigma1(e)
766# ifndef __ARMEB__
767	rev	r2,r2
768# endif
769#else
770	@ ldrb	r2,[r1,#3]			@ 11
771	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
772	ldrb	r3,[r1,#2]
773	ldrb	r0,[r1,#1]
774	orr	r2,r2,r3,lsl#8
775	ldrb	r3,[r1],#4
776	orr	r2,r2,r0,lsl#16
777# if 11==15
778	str	r1,[sp,#17*4]			@ make room for r1
779# endif
780	eor	r0,r5,r5,ror#5
781	orr	r2,r2,r3,lsl#24
782	eor	r0,r0,r5,ror#19	@ Sigma1(e)
783#endif
784	ldr	r3,[r14],#4			@ *K256++
785	add	r8,r8,r2			@ h+=X[i]
786	str	r2,[sp,#11*4]
787	eor	r2,r6,r7
788	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
789	and	r2,r2,r5
790	add	r8,r8,r3			@ h+=K256[i]
791	eor	r2,r2,r7			@ Ch(e,f,g)
792	eor	r0,r9,r9,ror#11
793	add	r8,r8,r2			@ h+=Ch(e,f,g)
794#if 11==31
795	and	r3,r3,#0xff
796	cmp	r3,#0xf2			@ done?
797#endif
798#if 11<15
799# if __ARM_ARCH__>=7
800	ldr	r2,[r1],#4			@ prefetch
801# else
802	ldrb	r2,[r1,#3]
803# endif
804	eor	r3,r9,r10			@ a^b, b^c in next round
805#else
806	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
807	eor	r3,r9,r10			@ a^b, b^c in next round
808	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
809#endif
810	eor	r0,r0,r9,ror#20	@ Sigma0(a)
811	and	r12,r12,r3			@ (b^c)&=(a^b)
812	add	r4,r4,r8			@ d+=h
813	eor	r12,r12,r10			@ Maj(a,b,c)
814	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
815	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
816#if __ARM_ARCH__>=7
817	@ ldr	r2,[r1],#4			@ 12
818# if 12==15
819	str	r1,[sp,#17*4]			@ make room for r1
820# endif
821	eor	r0,r4,r4,ror#5
822	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
823	eor	r0,r0,r4,ror#19	@ Sigma1(e)
824# ifndef __ARMEB__
825	rev	r2,r2
826# endif
827#else
828	@ ldrb	r2,[r1,#3]			@ 12
829	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
830	ldrb	r12,[r1,#2]
831	ldrb	r0,[r1,#1]
832	orr	r2,r2,r12,lsl#8
833	ldrb	r12,[r1],#4
834	orr	r2,r2,r0,lsl#16
835# if 12==15
836	str	r1,[sp,#17*4]			@ make room for r1
837# endif
838	eor	r0,r4,r4,ror#5
839	orr	r2,r2,r12,lsl#24
840	eor	r0,r0,r4,ror#19	@ Sigma1(e)
841#endif
842	ldr	r12,[r14],#4			@ *K256++
843	add	r7,r7,r2			@ h+=X[i]
844	str	r2,[sp,#12*4]
845	eor	r2,r5,r6
846	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
847	and	r2,r2,r4
848	add	r7,r7,r12			@ h+=K256[i]
849	eor	r2,r2,r6			@ Ch(e,f,g)
850	eor	r0,r8,r8,ror#11
851	add	r7,r7,r2			@ h+=Ch(e,f,g)
852#if 12==31
853	and	r12,r12,#0xff
854	cmp	r12,#0xf2			@ done?
855#endif
856#if 12<15
857# if __ARM_ARCH__>=7
858	ldr	r2,[r1],#4			@ prefetch
859# else
860	ldrb	r2,[r1,#3]
861# endif
862	eor	r12,r8,r9			@ a^b, b^c in next round
863#else
864	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
865	eor	r12,r8,r9			@ a^b, b^c in next round
866	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
867#endif
868	eor	r0,r0,r8,ror#20	@ Sigma0(a)
869	and	r3,r3,r12			@ (b^c)&=(a^b)
870	add	r11,r11,r7			@ d+=h
871	eor	r3,r3,r9			@ Maj(a,b,c)
872	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
873	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
874#if __ARM_ARCH__>=7
875	@ ldr	r2,[r1],#4			@ 13
876# if 13==15
877	str	r1,[sp,#17*4]			@ make room for r1
878# endif
879	eor	r0,r11,r11,ror#5
880	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
881	eor	r0,r0,r11,ror#19	@ Sigma1(e)
882# ifndef __ARMEB__
883	rev	r2,r2
884# endif
885#else
886	@ ldrb	r2,[r1,#3]			@ 13
887	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
888	ldrb	r3,[r1,#2]
889	ldrb	r0,[r1,#1]
890	orr	r2,r2,r3,lsl#8
891	ldrb	r3,[r1],#4
892	orr	r2,r2,r0,lsl#16
893# if 13==15
894	str	r1,[sp,#17*4]			@ make room for r1
895# endif
896	eor	r0,r11,r11,ror#5
897	orr	r2,r2,r3,lsl#24
898	eor	r0,r0,r11,ror#19	@ Sigma1(e)
899#endif
900	ldr	r3,[r14],#4			@ *K256++
901	add	r6,r6,r2			@ h+=X[i]
902	str	r2,[sp,#13*4]
903	eor	r2,r4,r5
904	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
905	and	r2,r2,r11
906	add	r6,r6,r3			@ h+=K256[i]
907	eor	r2,r2,r5			@ Ch(e,f,g)
908	eor	r0,r7,r7,ror#11
909	add	r6,r6,r2			@ h+=Ch(e,f,g)
910#if 13==31
911	and	r3,r3,#0xff
912	cmp	r3,#0xf2			@ done?
913#endif
914#if 13<15
915# if __ARM_ARCH__>=7
916	ldr	r2,[r1],#4			@ prefetch
917# else
918	ldrb	r2,[r1,#3]
919# endif
920	eor	r3,r7,r8			@ a^b, b^c in next round
921#else
922	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
923	eor	r3,r7,r8			@ a^b, b^c in next round
924	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
925#endif
926	eor	r0,r0,r7,ror#20	@ Sigma0(a)
927	and	r12,r12,r3			@ (b^c)&=(a^b)
928	add	r10,r10,r6			@ d+=h
929	eor	r12,r12,r8			@ Maj(a,b,c)
930	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
931	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
932#if __ARM_ARCH__>=7
933	@ ldr	r2,[r1],#4			@ 14
934# if 14==15
935	str	r1,[sp,#17*4]			@ make room for r1
936# endif
937	eor	r0,r10,r10,ror#5
938	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
939	eor	r0,r0,r10,ror#19	@ Sigma1(e)
940# ifndef __ARMEB__
941	rev	r2,r2
942# endif
943#else
944	@ ldrb	r2,[r1,#3]			@ 14
945	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
946	ldrb	r12,[r1,#2]
947	ldrb	r0,[r1,#1]
948	orr	r2,r2,r12,lsl#8
949	ldrb	r12,[r1],#4
950	orr	r2,r2,r0,lsl#16
951# if 14==15
952	str	r1,[sp,#17*4]			@ make room for r1
953# endif
954	eor	r0,r10,r10,ror#5
955	orr	r2,r2,r12,lsl#24
956	eor	r0,r0,r10,ror#19	@ Sigma1(e)
957#endif
958	ldr	r12,[r14],#4			@ *K256++
959	add	r5,r5,r2			@ h+=X[i]
960	str	r2,[sp,#14*4]
961	eor	r2,r11,r4
962	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
963	and	r2,r2,r10
964	add	r5,r5,r12			@ h+=K256[i]
965	eor	r2,r2,r4			@ Ch(e,f,g)
966	eor	r0,r6,r6,ror#11
967	add	r5,r5,r2			@ h+=Ch(e,f,g)
968#if 14==31
969	and	r12,r12,#0xff
970	cmp	r12,#0xf2			@ done?
971#endif
972#if 14<15
973# if __ARM_ARCH__>=7
974	ldr	r2,[r1],#4			@ prefetch
975# else
976	ldrb	r2,[r1,#3]
977# endif
978	eor	r12,r6,r7			@ a^b, b^c in next round
979#else
980	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
981	eor	r12,r6,r7			@ a^b, b^c in next round
982	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
983#endif
984	eor	r0,r0,r6,ror#20	@ Sigma0(a)
985	and	r3,r3,r12			@ (b^c)&=(a^b)
986	add	r9,r9,r5			@ d+=h
987	eor	r3,r3,r7			@ Maj(a,b,c)
988	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
989	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
990#if __ARM_ARCH__>=7
991	@ ldr	r2,[r1],#4			@ 15
992# if 15==15
993	str	r1,[sp,#17*4]			@ make room for r1
994# endif
995	eor	r0,r9,r9,ror#5
996	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
997	eor	r0,r0,r9,ror#19	@ Sigma1(e)
998# ifndef __ARMEB__
999	rev	r2,r2
1000# endif
1001#else
1002	@ ldrb	r2,[r1,#3]			@ 15
1003	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1004	ldrb	r3,[r1,#2]
1005	ldrb	r0,[r1,#1]
1006	orr	r2,r2,r3,lsl#8
1007	ldrb	r3,[r1],#4
1008	orr	r2,r2,r0,lsl#16
1009# if 15==15
1010	str	r1,[sp,#17*4]			@ make room for r1
1011# endif
1012	eor	r0,r9,r9,ror#5
1013	orr	r2,r2,r3,lsl#24
1014	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1015#endif
1016	ldr	r3,[r14],#4			@ *K256++
1017	add	r4,r4,r2			@ h+=X[i]
1018	str	r2,[sp,#15*4]
1019	eor	r2,r10,r11
1020	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1021	and	r2,r2,r9
1022	add	r4,r4,r3			@ h+=K256[i]
1023	eor	r2,r2,r11			@ Ch(e,f,g)
1024	eor	r0,r5,r5,ror#11
1025	add	r4,r4,r2			@ h+=Ch(e,f,g)
1026#if 15==31
1027	and	r3,r3,#0xff
1028	cmp	r3,#0xf2			@ done?
1029#endif
1030#if 15<15
1031# if __ARM_ARCH__>=7
1032	ldr	r2,[r1],#4			@ prefetch
1033# else
1034	ldrb	r2,[r1,#3]
1035# endif
1036	eor	r3,r5,r6			@ a^b, b^c in next round
1037#else
1038	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1039	eor	r3,r5,r6			@ a^b, b^c in next round
1040	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1041#endif
1042	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1043	and	r12,r12,r3			@ (b^c)&=(a^b)
1044	add	r8,r8,r4			@ d+=h
1045	eor	r12,r12,r6			@ Maj(a,b,c)
1046	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1047	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1048.Lrounds_16_xx:
1049	@ ldr	r2,[sp,#1*4]		@ 16
1050	@ ldr	r1,[sp,#14*4]
1051	mov	r0,r2,ror#7
1052	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1053	mov	r12,r1,ror#17
1054	eor	r0,r0,r2,ror#18
1055	eor	r12,r12,r1,ror#19
1056	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1057	ldr	r2,[sp,#0*4]
1058	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1059	ldr	r1,[sp,#9*4]
1060
1061	add	r12,r12,r0
1062	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1063	add	r2,r2,r12
1064	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1065	add	r2,r2,r1			@ X[i]
1066	ldr	r12,[r14],#4			@ *K256++
1067	add	r11,r11,r2			@ h+=X[i]
1068	str	r2,[sp,#0*4]
1069	eor	r2,r9,r10
1070	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1071	and	r2,r2,r8
1072	add	r11,r11,r12			@ h+=K256[i]
1073	eor	r2,r2,r10			@ Ch(e,f,g)
1074	eor	r0,r4,r4,ror#11
1075	add	r11,r11,r2			@ h+=Ch(e,f,g)
1076#if 16==31
1077	and	r12,r12,#0xff
1078	cmp	r12,#0xf2			@ done?
1079#endif
1080#if 16<15
1081# if __ARM_ARCH__>=7
1082	ldr	r2,[r1],#4			@ prefetch
1083# else
1084	ldrb	r2,[r1,#3]
1085# endif
1086	eor	r12,r4,r5			@ a^b, b^c in next round
1087#else
1088	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1089	eor	r12,r4,r5			@ a^b, b^c in next round
1090	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1091#endif
1092	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1093	and	r3,r3,r12			@ (b^c)&=(a^b)
1094	add	r7,r7,r11			@ d+=h
1095	eor	r3,r3,r5			@ Maj(a,b,c)
1096	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1097	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1098	@ ldr	r2,[sp,#2*4]		@ 17
1099	@ ldr	r1,[sp,#15*4]
1100	mov	r0,r2,ror#7
1101	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1102	mov	r3,r1,ror#17
1103	eor	r0,r0,r2,ror#18
1104	eor	r3,r3,r1,ror#19
1105	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1106	ldr	r2,[sp,#1*4]
1107	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1108	ldr	r1,[sp,#10*4]
1109
1110	add	r3,r3,r0
1111	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1112	add	r2,r2,r3
1113	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1114	add	r2,r2,r1			@ X[i]
1115	ldr	r3,[r14],#4			@ *K256++
1116	add	r10,r10,r2			@ h+=X[i]
1117	str	r2,[sp,#1*4]
1118	eor	r2,r8,r9
1119	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1120	and	r2,r2,r7
1121	add	r10,r10,r3			@ h+=K256[i]
1122	eor	r2,r2,r9			@ Ch(e,f,g)
1123	eor	r0,r11,r11,ror#11
1124	add	r10,r10,r2			@ h+=Ch(e,f,g)
1125#if 17==31
1126	and	r3,r3,#0xff
1127	cmp	r3,#0xf2			@ done?
1128#endif
1129#if 17<15
1130# if __ARM_ARCH__>=7
1131	ldr	r2,[r1],#4			@ prefetch
1132# else
1133	ldrb	r2,[r1,#3]
1134# endif
1135	eor	r3,r11,r4			@ a^b, b^c in next round
1136#else
1137	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1138	eor	r3,r11,r4			@ a^b, b^c in next round
1139	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1140#endif
1141	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1142	and	r12,r12,r3			@ (b^c)&=(a^b)
1143	add	r6,r6,r10			@ d+=h
1144	eor	r12,r12,r4			@ Maj(a,b,c)
1145	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1146	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1147	@ ldr	r2,[sp,#3*4]		@ 18
1148	@ ldr	r1,[sp,#0*4]
1149	mov	r0,r2,ror#7
1150	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1151	mov	r12,r1,ror#17
1152	eor	r0,r0,r2,ror#18
1153	eor	r12,r12,r1,ror#19
1154	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1155	ldr	r2,[sp,#2*4]
1156	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1157	ldr	r1,[sp,#11*4]
1158
1159	add	r12,r12,r0
1160	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1161	add	r2,r2,r12
1162	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1163	add	r2,r2,r1			@ X[i]
1164	ldr	r12,[r14],#4			@ *K256++
1165	add	r9,r9,r2			@ h+=X[i]
1166	str	r2,[sp,#2*4]
1167	eor	r2,r7,r8
1168	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1169	and	r2,r2,r6
1170	add	r9,r9,r12			@ h+=K256[i]
1171	eor	r2,r2,r8			@ Ch(e,f,g)
1172	eor	r0,r10,r10,ror#11
1173	add	r9,r9,r2			@ h+=Ch(e,f,g)
1174#if 18==31
1175	and	r12,r12,#0xff
1176	cmp	r12,#0xf2			@ done?
1177#endif
1178#if 18<15
1179# if __ARM_ARCH__>=7
1180	ldr	r2,[r1],#4			@ prefetch
1181# else
1182	ldrb	r2,[r1,#3]
1183# endif
1184	eor	r12,r10,r11			@ a^b, b^c in next round
1185#else
1186	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1187	eor	r12,r10,r11			@ a^b, b^c in next round
1188	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1189#endif
1190	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1191	and	r3,r3,r12			@ (b^c)&=(a^b)
1192	add	r5,r5,r9			@ d+=h
1193	eor	r3,r3,r11			@ Maj(a,b,c)
1194	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1195	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1196	@ ldr	r2,[sp,#4*4]		@ 19
1197	@ ldr	r1,[sp,#1*4]
1198	mov	r0,r2,ror#7
1199	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1200	mov	r3,r1,ror#17
1201	eor	r0,r0,r2,ror#18
1202	eor	r3,r3,r1,ror#19
1203	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1204	ldr	r2,[sp,#3*4]
1205	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1206	ldr	r1,[sp,#12*4]
1207
1208	add	r3,r3,r0
1209	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1210	add	r2,r2,r3
1211	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1212	add	r2,r2,r1			@ X[i]
1213	ldr	r3,[r14],#4			@ *K256++
1214	add	r8,r8,r2			@ h+=X[i]
1215	str	r2,[sp,#3*4]
1216	eor	r2,r6,r7
1217	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1218	and	r2,r2,r5
1219	add	r8,r8,r3			@ h+=K256[i]
1220	eor	r2,r2,r7			@ Ch(e,f,g)
1221	eor	r0,r9,r9,ror#11
1222	add	r8,r8,r2			@ h+=Ch(e,f,g)
1223#if 19==31
1224	and	r3,r3,#0xff
1225	cmp	r3,#0xf2			@ done?
1226#endif
1227#if 19<15
1228# if __ARM_ARCH__>=7
1229	ldr	r2,[r1],#4			@ prefetch
1230# else
1231	ldrb	r2,[r1,#3]
1232# endif
1233	eor	r3,r9,r10			@ a^b, b^c in next round
1234#else
1235	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1236	eor	r3,r9,r10			@ a^b, b^c in next round
1237	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1238#endif
1239	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1240	and	r12,r12,r3			@ (b^c)&=(a^b)
1241	add	r4,r4,r8			@ d+=h
1242	eor	r12,r12,r10			@ Maj(a,b,c)
1243	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1244	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1245	@ ldr	r2,[sp,#5*4]		@ 20
1246	@ ldr	r1,[sp,#2*4]
1247	mov	r0,r2,ror#7
1248	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1249	mov	r12,r1,ror#17
1250	eor	r0,r0,r2,ror#18
1251	eor	r12,r12,r1,ror#19
1252	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1253	ldr	r2,[sp,#4*4]
1254	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1255	ldr	r1,[sp,#13*4]
1256
1257	add	r12,r12,r0
1258	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1259	add	r2,r2,r12
1260	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1261	add	r2,r2,r1			@ X[i]
1262	ldr	r12,[r14],#4			@ *K256++
1263	add	r7,r7,r2			@ h+=X[i]
1264	str	r2,[sp,#4*4]
1265	eor	r2,r5,r6
1266	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1267	and	r2,r2,r4
1268	add	r7,r7,r12			@ h+=K256[i]
1269	eor	r2,r2,r6			@ Ch(e,f,g)
1270	eor	r0,r8,r8,ror#11
1271	add	r7,r7,r2			@ h+=Ch(e,f,g)
1272#if 20==31
1273	and	r12,r12,#0xff
1274	cmp	r12,#0xf2			@ done?
1275#endif
1276#if 20<15
1277# if __ARM_ARCH__>=7
1278	ldr	r2,[r1],#4			@ prefetch
1279# else
1280	ldrb	r2,[r1,#3]
1281# endif
1282	eor	r12,r8,r9			@ a^b, b^c in next round
1283#else
1284	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1285	eor	r12,r8,r9			@ a^b, b^c in next round
1286	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1287#endif
1288	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1289	and	r3,r3,r12			@ (b^c)&=(a^b)
1290	add	r11,r11,r7			@ d+=h
1291	eor	r3,r3,r9			@ Maj(a,b,c)
1292	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1293	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1294	@ ldr	r2,[sp,#6*4]		@ 21
1295	@ ldr	r1,[sp,#3*4]
1296	mov	r0,r2,ror#7
1297	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1298	mov	r3,r1,ror#17
1299	eor	r0,r0,r2,ror#18
1300	eor	r3,r3,r1,ror#19
1301	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1302	ldr	r2,[sp,#5*4]
1303	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1304	ldr	r1,[sp,#14*4]
1305
1306	add	r3,r3,r0
1307	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1308	add	r2,r2,r3
1309	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1310	add	r2,r2,r1			@ X[i]
1311	ldr	r3,[r14],#4			@ *K256++
1312	add	r6,r6,r2			@ h+=X[i]
1313	str	r2,[sp,#5*4]
1314	eor	r2,r4,r5
1315	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1316	and	r2,r2,r11
1317	add	r6,r6,r3			@ h+=K256[i]
1318	eor	r2,r2,r5			@ Ch(e,f,g)
1319	eor	r0,r7,r7,ror#11
1320	add	r6,r6,r2			@ h+=Ch(e,f,g)
1321#if 21==31
1322	and	r3,r3,#0xff
1323	cmp	r3,#0xf2			@ done?
1324#endif
1325#if 21<15
1326# if __ARM_ARCH__>=7
1327	ldr	r2,[r1],#4			@ prefetch
1328# else
1329	ldrb	r2,[r1,#3]
1330# endif
1331	eor	r3,r7,r8			@ a^b, b^c in next round
1332#else
1333	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1334	eor	r3,r7,r8			@ a^b, b^c in next round
1335	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1336#endif
1337	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1338	and	r12,r12,r3			@ (b^c)&=(a^b)
1339	add	r10,r10,r6			@ d+=h
1340	eor	r12,r12,r8			@ Maj(a,b,c)
1341	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1342	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1343	@ ldr	r2,[sp,#7*4]		@ 22
1344	@ ldr	r1,[sp,#4*4]
1345	mov	r0,r2,ror#7
1346	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1347	mov	r12,r1,ror#17
1348	eor	r0,r0,r2,ror#18
1349	eor	r12,r12,r1,ror#19
1350	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1351	ldr	r2,[sp,#6*4]
1352	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1353	ldr	r1,[sp,#15*4]
1354
1355	add	r12,r12,r0
1356	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1357	add	r2,r2,r12
1358	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1359	add	r2,r2,r1			@ X[i]
1360	ldr	r12,[r14],#4			@ *K256++
1361	add	r5,r5,r2			@ h+=X[i]
1362	str	r2,[sp,#6*4]
1363	eor	r2,r11,r4
1364	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1365	and	r2,r2,r10
1366	add	r5,r5,r12			@ h+=K256[i]
1367	eor	r2,r2,r4			@ Ch(e,f,g)
1368	eor	r0,r6,r6,ror#11
1369	add	r5,r5,r2			@ h+=Ch(e,f,g)
1370#if 22==31
1371	and	r12,r12,#0xff
1372	cmp	r12,#0xf2			@ done?
1373#endif
1374#if 22<15
1375# if __ARM_ARCH__>=7
1376	ldr	r2,[r1],#4			@ prefetch
1377# else
1378	ldrb	r2,[r1,#3]
1379# endif
1380	eor	r12,r6,r7			@ a^b, b^c in next round
1381#else
1382	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1383	eor	r12,r6,r7			@ a^b, b^c in next round
1384	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1385#endif
1386	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1387	and	r3,r3,r12			@ (b^c)&=(a^b)
1388	add	r9,r9,r5			@ d+=h
1389	eor	r3,r3,r7			@ Maj(a,b,c)
1390	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1391	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1392	@ ldr	r2,[sp,#8*4]		@ 23
1393	@ ldr	r1,[sp,#5*4]
1394	mov	r0,r2,ror#7
1395	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1396	mov	r3,r1,ror#17
1397	eor	r0,r0,r2,ror#18
1398	eor	r3,r3,r1,ror#19
1399	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1400	ldr	r2,[sp,#7*4]
1401	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1402	ldr	r1,[sp,#0*4]
1403
1404	add	r3,r3,r0
1405	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1406	add	r2,r2,r3
1407	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1408	add	r2,r2,r1			@ X[i]
1409	ldr	r3,[r14],#4			@ *K256++
1410	add	r4,r4,r2			@ h+=X[i]
1411	str	r2,[sp,#7*4]
1412	eor	r2,r10,r11
1413	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1414	and	r2,r2,r9
1415	add	r4,r4,r3			@ h+=K256[i]
1416	eor	r2,r2,r11			@ Ch(e,f,g)
1417	eor	r0,r5,r5,ror#11
1418	add	r4,r4,r2			@ h+=Ch(e,f,g)
1419#if 23==31
1420	and	r3,r3,#0xff
1421	cmp	r3,#0xf2			@ done?
1422#endif
1423#if 23<15
1424# if __ARM_ARCH__>=7
1425	ldr	r2,[r1],#4			@ prefetch
1426# else
1427	ldrb	r2,[r1,#3]
1428# endif
1429	eor	r3,r5,r6			@ a^b, b^c in next round
1430#else
1431	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1432	eor	r3,r5,r6			@ a^b, b^c in next round
1433	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1434#endif
1435	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1436	and	r12,r12,r3			@ (b^c)&=(a^b)
1437	add	r8,r8,r4			@ d+=h
1438	eor	r12,r12,r6			@ Maj(a,b,c)
1439	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1440	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1441	@ ldr	r2,[sp,#9*4]		@ 24
1442	@ ldr	r1,[sp,#6*4]
1443	mov	r0,r2,ror#7
1444	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1445	mov	r12,r1,ror#17
1446	eor	r0,r0,r2,ror#18
1447	eor	r12,r12,r1,ror#19
1448	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1449	ldr	r2,[sp,#8*4]
1450	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1451	ldr	r1,[sp,#1*4]
1452
1453	add	r12,r12,r0
1454	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1455	add	r2,r2,r12
1456	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1457	add	r2,r2,r1			@ X[i]
1458	ldr	r12,[r14],#4			@ *K256++
1459	add	r11,r11,r2			@ h+=X[i]
1460	str	r2,[sp,#8*4]
1461	eor	r2,r9,r10
1462	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1463	and	r2,r2,r8
1464	add	r11,r11,r12			@ h+=K256[i]
1465	eor	r2,r2,r10			@ Ch(e,f,g)
1466	eor	r0,r4,r4,ror#11
1467	add	r11,r11,r2			@ h+=Ch(e,f,g)
1468#if 24==31
1469	and	r12,r12,#0xff
1470	cmp	r12,#0xf2			@ done?
1471#endif
1472#if 24<15
1473# if __ARM_ARCH__>=7
1474	ldr	r2,[r1],#4			@ prefetch
1475# else
1476	ldrb	r2,[r1,#3]
1477# endif
1478	eor	r12,r4,r5			@ a^b, b^c in next round
1479#else
1480	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1481	eor	r12,r4,r5			@ a^b, b^c in next round
1482	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1483#endif
1484	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1485	and	r3,r3,r12			@ (b^c)&=(a^b)
1486	add	r7,r7,r11			@ d+=h
1487	eor	r3,r3,r5			@ Maj(a,b,c)
1488	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1489	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1490	@ ldr	r2,[sp,#10*4]		@ 25
1491	@ ldr	r1,[sp,#7*4]
1492	mov	r0,r2,ror#7
1493	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1494	mov	r3,r1,ror#17
1495	eor	r0,r0,r2,ror#18
1496	eor	r3,r3,r1,ror#19
1497	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1498	ldr	r2,[sp,#9*4]
1499	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1500	ldr	r1,[sp,#2*4]
1501
1502	add	r3,r3,r0
1503	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1504	add	r2,r2,r3
1505	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1506	add	r2,r2,r1			@ X[i]
1507	ldr	r3,[r14],#4			@ *K256++
1508	add	r10,r10,r2			@ h+=X[i]
1509	str	r2,[sp,#9*4]
1510	eor	r2,r8,r9
1511	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1512	and	r2,r2,r7
1513	add	r10,r10,r3			@ h+=K256[i]
1514	eor	r2,r2,r9			@ Ch(e,f,g)
1515	eor	r0,r11,r11,ror#11
1516	add	r10,r10,r2			@ h+=Ch(e,f,g)
1517#if 25==31
1518	and	r3,r3,#0xff
1519	cmp	r3,#0xf2			@ done?
1520#endif
1521#if 25<15
1522# if __ARM_ARCH__>=7
1523	ldr	r2,[r1],#4			@ prefetch
1524# else
1525	ldrb	r2,[r1,#3]
1526# endif
1527	eor	r3,r11,r4			@ a^b, b^c in next round
1528#else
1529	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1530	eor	r3,r11,r4			@ a^b, b^c in next round
1531	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1532#endif
1533	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1534	and	r12,r12,r3			@ (b^c)&=(a^b)
1535	add	r6,r6,r10			@ d+=h
1536	eor	r12,r12,r4			@ Maj(a,b,c)
1537	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1538	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1539	@ ldr	r2,[sp,#11*4]		@ 26
1540	@ ldr	r1,[sp,#8*4]
1541	mov	r0,r2,ror#7
1542	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1543	mov	r12,r1,ror#17
1544	eor	r0,r0,r2,ror#18
1545	eor	r12,r12,r1,ror#19
1546	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1547	ldr	r2,[sp,#10*4]
1548	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1549	ldr	r1,[sp,#3*4]
1550
1551	add	r12,r12,r0
1552	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1553	add	r2,r2,r12
1554	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1555	add	r2,r2,r1			@ X[i]
1556	ldr	r12,[r14],#4			@ *K256++
1557	add	r9,r9,r2			@ h+=X[i]
1558	str	r2,[sp,#10*4]
1559	eor	r2,r7,r8
1560	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1561	and	r2,r2,r6
1562	add	r9,r9,r12			@ h+=K256[i]
1563	eor	r2,r2,r8			@ Ch(e,f,g)
1564	eor	r0,r10,r10,ror#11
1565	add	r9,r9,r2			@ h+=Ch(e,f,g)
1566#if 26==31
1567	and	r12,r12,#0xff
1568	cmp	r12,#0xf2			@ done?
1569#endif
1570#if 26<15
1571# if __ARM_ARCH__>=7
1572	ldr	r2,[r1],#4			@ prefetch
1573# else
1574	ldrb	r2,[r1,#3]
1575# endif
1576	eor	r12,r10,r11			@ a^b, b^c in next round
1577#else
1578	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1579	eor	r12,r10,r11			@ a^b, b^c in next round
1580	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1581#endif
1582	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1583	and	r3,r3,r12			@ (b^c)&=(a^b)
1584	add	r5,r5,r9			@ d+=h
1585	eor	r3,r3,r11			@ Maj(a,b,c)
1586	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1587	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1588	@ ldr	r2,[sp,#12*4]		@ 27
1589	@ ldr	r1,[sp,#9*4]
1590	mov	r0,r2,ror#7
1591	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1592	mov	r3,r1,ror#17
1593	eor	r0,r0,r2,ror#18
1594	eor	r3,r3,r1,ror#19
1595	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1596	ldr	r2,[sp,#11*4]
1597	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1598	ldr	r1,[sp,#4*4]
1599
1600	add	r3,r3,r0
1601	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1602	add	r2,r2,r3
1603	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1604	add	r2,r2,r1			@ X[i]
1605	ldr	r3,[r14],#4			@ *K256++
1606	add	r8,r8,r2			@ h+=X[i]
1607	str	r2,[sp,#11*4]
1608	eor	r2,r6,r7
1609	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1610	and	r2,r2,r5
1611	add	r8,r8,r3			@ h+=K256[i]
1612	eor	r2,r2,r7			@ Ch(e,f,g)
1613	eor	r0,r9,r9,ror#11
1614	add	r8,r8,r2			@ h+=Ch(e,f,g)
1615#if 27==31
1616	and	r3,r3,#0xff
1617	cmp	r3,#0xf2			@ done?
1618#endif
1619#if 27<15
1620# if __ARM_ARCH__>=7
1621	ldr	r2,[r1],#4			@ prefetch
1622# else
1623	ldrb	r2,[r1,#3]
1624# endif
1625	eor	r3,r9,r10			@ a^b, b^c in next round
1626#else
1627	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1628	eor	r3,r9,r10			@ a^b, b^c in next round
1629	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1630#endif
1631	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1632	and	r12,r12,r3			@ (b^c)&=(a^b)
1633	add	r4,r4,r8			@ d+=h
1634	eor	r12,r12,r10			@ Maj(a,b,c)
1635	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1636	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1637	@ ldr	r2,[sp,#13*4]		@ 28
1638	@ ldr	r1,[sp,#10*4]
1639	mov	r0,r2,ror#7
1640	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1641	mov	r12,r1,ror#17
1642	eor	r0,r0,r2,ror#18
1643	eor	r12,r12,r1,ror#19
1644	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1645	ldr	r2,[sp,#12*4]
1646	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1647	ldr	r1,[sp,#5*4]
1648
1649	add	r12,r12,r0
1650	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1651	add	r2,r2,r12
1652	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1653	add	r2,r2,r1			@ X[i]
1654	ldr	r12,[r14],#4			@ *K256++
1655	add	r7,r7,r2			@ h+=X[i]
1656	str	r2,[sp,#12*4]
1657	eor	r2,r5,r6
1658	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1659	and	r2,r2,r4
1660	add	r7,r7,r12			@ h+=K256[i]
1661	eor	r2,r2,r6			@ Ch(e,f,g)
1662	eor	r0,r8,r8,ror#11
1663	add	r7,r7,r2			@ h+=Ch(e,f,g)
1664#if 28==31
1665	and	r12,r12,#0xff
1666	cmp	r12,#0xf2			@ done?
1667#endif
1668#if 28<15
1669# if __ARM_ARCH__>=7
1670	ldr	r2,[r1],#4			@ prefetch
1671# else
1672	ldrb	r2,[r1,#3]
1673# endif
1674	eor	r12,r8,r9			@ a^b, b^c in next round
1675#else
1676	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1677	eor	r12,r8,r9			@ a^b, b^c in next round
1678	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1679#endif
1680	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1681	and	r3,r3,r12			@ (b^c)&=(a^b)
1682	add	r11,r11,r7			@ d+=h
1683	eor	r3,r3,r9			@ Maj(a,b,c)
1684	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1685	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1686	@ ldr	r2,[sp,#14*4]		@ 29
1687	@ ldr	r1,[sp,#11*4]
1688	mov	r0,r2,ror#7
1689	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1690	mov	r3,r1,ror#17
1691	eor	r0,r0,r2,ror#18
1692	eor	r3,r3,r1,ror#19
1693	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1694	ldr	r2,[sp,#13*4]
1695	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1696	ldr	r1,[sp,#6*4]
1697
1698	add	r3,r3,r0
1699	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1700	add	r2,r2,r3
1701	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1702	add	r2,r2,r1			@ X[i]
1703	ldr	r3,[r14],#4			@ *K256++
1704	add	r6,r6,r2			@ h+=X[i]
1705	str	r2,[sp,#13*4]
1706	eor	r2,r4,r5
1707	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1708	and	r2,r2,r11
1709	add	r6,r6,r3			@ h+=K256[i]
1710	eor	r2,r2,r5			@ Ch(e,f,g)
1711	eor	r0,r7,r7,ror#11
1712	add	r6,r6,r2			@ h+=Ch(e,f,g)
1713#if 29==31
1714	and	r3,r3,#0xff
1715	cmp	r3,#0xf2			@ done?
1716#endif
1717#if 29<15
1718# if __ARM_ARCH__>=7
1719	ldr	r2,[r1],#4			@ prefetch
1720# else
1721	ldrb	r2,[r1,#3]
1722# endif
1723	eor	r3,r7,r8			@ a^b, b^c in next round
1724#else
1725	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1726	eor	r3,r7,r8			@ a^b, b^c in next round
1727	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1728#endif
1729	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1730	and	r12,r12,r3			@ (b^c)&=(a^b)
1731	add	r10,r10,r6			@ d+=h
1732	eor	r12,r12,r8			@ Maj(a,b,c)
1733	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1734	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1735	@ ldr	r2,[sp,#15*4]		@ 30
1736	@ ldr	r1,[sp,#12*4]
1737	mov	r0,r2,ror#7
1738	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1739	mov	r12,r1,ror#17
1740	eor	r0,r0,r2,ror#18
1741	eor	r12,r12,r1,ror#19
1742	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1743	ldr	r2,[sp,#14*4]
1744	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1745	ldr	r1,[sp,#7*4]
1746
1747	add	r12,r12,r0
1748	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1749	add	r2,r2,r12
1750	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1751	add	r2,r2,r1			@ X[i]
1752	ldr	r12,[r14],#4			@ *K256++
1753	add	r5,r5,r2			@ h+=X[i]
1754	str	r2,[sp,#14*4]
1755	eor	r2,r11,r4
1756	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1757	and	r2,r2,r10
1758	add	r5,r5,r12			@ h+=K256[i]
1759	eor	r2,r2,r4			@ Ch(e,f,g)
1760	eor	r0,r6,r6,ror#11
1761	add	r5,r5,r2			@ h+=Ch(e,f,g)
1762#if 30==31
1763	and	r12,r12,#0xff
1764	cmp	r12,#0xf2			@ done?
1765#endif
1766#if 30<15
1767# if __ARM_ARCH__>=7
1768	ldr	r2,[r1],#4			@ prefetch
1769# else
1770	ldrb	r2,[r1,#3]
1771# endif
1772	eor	r12,r6,r7			@ a^b, b^c in next round
1773#else
1774	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1775	eor	r12,r6,r7			@ a^b, b^c in next round
1776	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1777#endif
1778	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1779	and	r3,r3,r12			@ (b^c)&=(a^b)
1780	add	r9,r9,r5			@ d+=h
1781	eor	r3,r3,r7			@ Maj(a,b,c)
1782	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1783	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1784	@ ldr	r2,[sp,#0*4]		@ 31
1785	@ ldr	r1,[sp,#13*4]
1786	mov	r0,r2,ror#7
1787	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1788	mov	r3,r1,ror#17
1789	eor	r0,r0,r2,ror#18
1790	eor	r3,r3,r1,ror#19
1791	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1792	ldr	r2,[sp,#15*4]
1793	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1794	ldr	r1,[sp,#8*4]
1795
1796	add	r3,r3,r0
1797	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1798	add	r2,r2,r3
1799	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1800	add	r2,r2,r1			@ X[i]
1801	ldr	r3,[r14],#4			@ *K256++
1802	add	r4,r4,r2			@ h+=X[i]
1803	str	r2,[sp,#15*4]
1804	eor	r2,r10,r11
1805	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1806	and	r2,r2,r9
1807	add	r4,r4,r3			@ h+=K256[i]
1808	eor	r2,r2,r11			@ Ch(e,f,g)
1809	eor	r0,r5,r5,ror#11
1810	add	r4,r4,r2			@ h+=Ch(e,f,g)
1811#if 31==31
1812	and	r3,r3,#0xff
1813	cmp	r3,#0xf2			@ done?
1814#endif
1815#if 31<15
1816# if __ARM_ARCH__>=7
1817	ldr	r2,[r1],#4			@ prefetch
1818# else
1819	ldrb	r2,[r1,#3]
1820# endif
1821	eor	r3,r5,r6			@ a^b, b^c in next round
1822#else
1823	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1824	eor	r3,r5,r6			@ a^b, b^c in next round
1825	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1826#endif
1827	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1828	and	r12,r12,r3			@ (b^c)&=(a^b)
1829	add	r8,r8,r4			@ d+=h
1830	eor	r12,r12,r6			@ Maj(a,b,c)
1831	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1832	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1833#if __ARM_ARCH__>=7
1834	ite	eq			@ Thumb2 thing, sanity check in ARM
1835#endif
1836	ldreq	r3,[sp,#16*4]		@ pull ctx
1837	bne	.Lrounds_16_xx
1838
1839	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1840	ldr	r0,[r3,#0]
1841	ldr	r2,[r3,#4]
1842	ldr	r12,[r3,#8]
1843	add	r4,r4,r0
1844	ldr	r0,[r3,#12]
1845	add	r5,r5,r2
1846	ldr	r2,[r3,#16]
1847	add	r6,r6,r12
1848	ldr	r12,[r3,#20]
1849	add	r7,r7,r0
1850	ldr	r0,[r3,#24]
1851	add	r8,r8,r2
1852	ldr	r2,[r3,#28]
1853	add	r9,r9,r12
1854	ldr	r1,[sp,#17*4]		@ pull inp
1855	ldr	r12,[sp,#18*4]		@ pull inp+len
1856	add	r10,r10,r0
1857	add	r11,r11,r2
1858	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1859	cmp	r1,r12
1860	sub	r14,r14,#256	@ rewind Ktbl
1861	bne	.Loop
1862
1863	add	sp,sp,#19*4	@ destroy frame
1864#if __ARM_ARCH__>=5
1865	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1866#else
1867	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1868	tst	lr,#1
1869	moveq	pc,lr			@ be binary compatible with V4, yet
1870.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1871#endif
1872.size	sha256_block_data_order,.-sha256_block_data_order
1873#if __ARM_MAX_ARCH__>=7
1874.arch	armv7-a
1875.fpu	neon
1876
1877.globl	sha256_block_data_order_neon
1878.type	sha256_block_data_order_neon,%function
1879.align	4
1880sha256_block_data_order_neon:
1881.LNEON:
1882	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1883
1884	sub	r11,sp,#16*4+16
1885	adrl	r14,K256
1886	bic	r11,r11,#15		@ align for 128-bit stores
1887	mov	r12,sp
1888	mov	sp,r11			@ alloca
1889	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1890
1891	vld1.8	{q0},[r1]!
1892	vld1.8	{q1},[r1]!
1893	vld1.8	{q2},[r1]!
1894	vld1.8	{q3},[r1]!
1895	vld1.32	{q8},[r14,:128]!
1896	vld1.32	{q9},[r14,:128]!
1897	vld1.32	{q10},[r14,:128]!
1898	vld1.32	{q11},[r14,:128]!
1899	vrev32.8	q0,q0		@ yes, even on
1900	str	r0,[sp,#64]
1901	vrev32.8	q1,q1		@ big-endian
1902	str	r1,[sp,#68]
1903	mov	r1,sp
1904	vrev32.8	q2,q2
1905	str	r2,[sp,#72]
1906	vrev32.8	q3,q3
1907	str	r12,[sp,#76]		@ save original sp
1908	vadd.i32	q8,q8,q0
1909	vadd.i32	q9,q9,q1
1910	vst1.32	{q8},[r1,:128]!
1911	vadd.i32	q10,q10,q2
1912	vst1.32	{q9},[r1,:128]!
1913	vadd.i32	q11,q11,q3
1914	vst1.32	{q10},[r1,:128]!
1915	vst1.32	{q11},[r1,:128]!
1916
1917	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1918	sub	r1,r1,#64
1919	ldr	r2,[sp,#0]
1920	eor	r12,r12,r12
1921	eor	r3,r5,r6
1922	b	.L_00_48
1923
1924.align	4
1925.L_00_48:
1926	vext.8	q8,q0,q1,#4
1927	add	r11,r11,r2
1928	eor	r2,r9,r10
1929	eor	r0,r8,r8,ror#5
1930	vext.8	q9,q2,q3,#4
1931	add	r4,r4,r12
1932	and	r2,r2,r8
1933	eor	r12,r0,r8,ror#19
1934	vshr.u32	q10,q8,#7
1935	eor	r0,r4,r4,ror#11
1936	eor	r2,r2,r10
1937	vadd.i32	q0,q0,q9
1938	add	r11,r11,r12,ror#6
1939	eor	r12,r4,r5
1940	vshr.u32	q9,q8,#3
1941	eor	r0,r0,r4,ror#20
1942	add	r11,r11,r2
1943	vsli.32	q10,q8,#25
1944	ldr	r2,[sp,#4]
1945	and	r3,r3,r12
1946	vshr.u32	q11,q8,#18
1947	add	r7,r7,r11
1948	add	r11,r11,r0,ror#2
1949	eor	r3,r3,r5
1950	veor	q9,q9,q10
1951	add	r10,r10,r2
1952	vsli.32	q11,q8,#14
1953	eor	r2,r8,r9
1954	eor	r0,r7,r7,ror#5
1955	vshr.u32	d24,d7,#17
1956	add	r11,r11,r3
1957	and	r2,r2,r7
1958	veor	q9,q9,q11
1959	eor	r3,r0,r7,ror#19
1960	eor	r0,r11,r11,ror#11
1961	vsli.32	d24,d7,#15
1962	eor	r2,r2,r9
1963	add	r10,r10,r3,ror#6
1964	vshr.u32	d25,d7,#10
1965	eor	r3,r11,r4
1966	eor	r0,r0,r11,ror#20
1967	vadd.i32	q0,q0,q9
1968	add	r10,r10,r2
1969	ldr	r2,[sp,#8]
1970	veor	d25,d25,d24
1971	and	r12,r12,r3
1972	add	r6,r6,r10
1973	vshr.u32	d24,d7,#19
1974	add	r10,r10,r0,ror#2
1975	eor	r12,r12,r4
1976	vsli.32	d24,d7,#13
1977	add	r9,r9,r2
1978	eor	r2,r7,r8
1979	veor	d25,d25,d24
1980	eor	r0,r6,r6,ror#5
1981	add	r10,r10,r12
1982	vadd.i32	d0,d0,d25
1983	and	r2,r2,r6
1984	eor	r12,r0,r6,ror#19
1985	vshr.u32	d24,d0,#17
1986	eor	r0,r10,r10,ror#11
1987	eor	r2,r2,r8
1988	vsli.32	d24,d0,#15
1989	add	r9,r9,r12,ror#6
1990	eor	r12,r10,r11
1991	vshr.u32	d25,d0,#10
1992	eor	r0,r0,r10,ror#20
1993	add	r9,r9,r2
1994	veor	d25,d25,d24
1995	ldr	r2,[sp,#12]
1996	and	r3,r3,r12
1997	vshr.u32	d24,d0,#19
1998	add	r5,r5,r9
1999	add	r9,r9,r0,ror#2
2000	eor	r3,r3,r11
2001	vld1.32	{q8},[r14,:128]!
2002	add	r8,r8,r2
2003	vsli.32	d24,d0,#13
2004	eor	r2,r6,r7
2005	eor	r0,r5,r5,ror#5
2006	veor	d25,d25,d24
2007	add	r9,r9,r3
2008	and	r2,r2,r5
2009	vadd.i32	d1,d1,d25
2010	eor	r3,r0,r5,ror#19
2011	eor	r0,r9,r9,ror#11
2012	vadd.i32	q8,q8,q0
2013	eor	r2,r2,r7
2014	add	r8,r8,r3,ror#6
2015	eor	r3,r9,r10
2016	eor	r0,r0,r9,ror#20
2017	add	r8,r8,r2
2018	ldr	r2,[sp,#16]
2019	and	r12,r12,r3
2020	add	r4,r4,r8
2021	vst1.32	{q8},[r1,:128]!
2022	add	r8,r8,r0,ror#2
2023	eor	r12,r12,r10
2024	vext.8	q8,q1,q2,#4
2025	add	r7,r7,r2
2026	eor	r2,r5,r6
2027	eor	r0,r4,r4,ror#5
2028	vext.8	q9,q3,q0,#4
2029	add	r8,r8,r12
2030	and	r2,r2,r4
2031	eor	r12,r0,r4,ror#19
2032	vshr.u32	q10,q8,#7
2033	eor	r0,r8,r8,ror#11
2034	eor	r2,r2,r6
2035	vadd.i32	q1,q1,q9
2036	add	r7,r7,r12,ror#6
2037	eor	r12,r8,r9
2038	vshr.u32	q9,q8,#3
2039	eor	r0,r0,r8,ror#20
2040	add	r7,r7,r2
2041	vsli.32	q10,q8,#25
2042	ldr	r2,[sp,#20]
2043	and	r3,r3,r12
2044	vshr.u32	q11,q8,#18
2045	add	r11,r11,r7
2046	add	r7,r7,r0,ror#2
2047	eor	r3,r3,r9
2048	veor	q9,q9,q10
2049	add	r6,r6,r2
2050	vsli.32	q11,q8,#14
2051	eor	r2,r4,r5
2052	eor	r0,r11,r11,ror#5
2053	vshr.u32	d24,d1,#17
2054	add	r7,r7,r3
2055	and	r2,r2,r11
2056	veor	q9,q9,q11
2057	eor	r3,r0,r11,ror#19
2058	eor	r0,r7,r7,ror#11
2059	vsli.32	d24,d1,#15
2060	eor	r2,r2,r5
2061	add	r6,r6,r3,ror#6
2062	vshr.u32	d25,d1,#10
2063	eor	r3,r7,r8
2064	eor	r0,r0,r7,ror#20
2065	vadd.i32	q1,q1,q9
2066	add	r6,r6,r2
2067	ldr	r2,[sp,#24]
2068	veor	d25,d25,d24
2069	and	r12,r12,r3
2070	add	r10,r10,r6
2071	vshr.u32	d24,d1,#19
2072	add	r6,r6,r0,ror#2
2073	eor	r12,r12,r8
2074	vsli.32	d24,d1,#13
2075	add	r5,r5,r2
2076	eor	r2,r11,r4
2077	veor	d25,d25,d24
2078	eor	r0,r10,r10,ror#5
2079	add	r6,r6,r12
2080	vadd.i32	d2,d2,d25
2081	and	r2,r2,r10
2082	eor	r12,r0,r10,ror#19
2083	vshr.u32	d24,d2,#17
2084	eor	r0,r6,r6,ror#11
2085	eor	r2,r2,r4
2086	vsli.32	d24,d2,#15
2087	add	r5,r5,r12,ror#6
2088	eor	r12,r6,r7
2089	vshr.u32	d25,d2,#10
2090	eor	r0,r0,r6,ror#20
2091	add	r5,r5,r2
2092	veor	d25,d25,d24
2093	ldr	r2,[sp,#28]
2094	and	r3,r3,r12
2095	vshr.u32	d24,d2,#19
2096	add	r9,r9,r5
2097	add	r5,r5,r0,ror#2
2098	eor	r3,r3,r7
2099	vld1.32	{q8},[r14,:128]!
2100	add	r4,r4,r2
2101	vsli.32	d24,d2,#13
2102	eor	r2,r10,r11
2103	eor	r0,r9,r9,ror#5
2104	veor	d25,d25,d24
2105	add	r5,r5,r3
2106	and	r2,r2,r9
2107	vadd.i32	d3,d3,d25
2108	eor	r3,r0,r9,ror#19
2109	eor	r0,r5,r5,ror#11
2110	vadd.i32	q8,q8,q1
2111	eor	r2,r2,r11
2112	add	r4,r4,r3,ror#6
2113	eor	r3,r5,r6
2114	eor	r0,r0,r5,ror#20
2115	add	r4,r4,r2
2116	ldr	r2,[sp,#32]
2117	and	r12,r12,r3
2118	add	r8,r8,r4
2119	vst1.32	{q8},[r1,:128]!
2120	add	r4,r4,r0,ror#2
2121	eor	r12,r12,r6
2122	vext.8	q8,q2,q3,#4
2123	add	r11,r11,r2
2124	eor	r2,r9,r10
2125	eor	r0,r8,r8,ror#5
2126	vext.8	q9,q0,q1,#4
2127	add	r4,r4,r12
2128	and	r2,r2,r8
2129	eor	r12,r0,r8,ror#19
2130	vshr.u32	q10,q8,#7
2131	eor	r0,r4,r4,ror#11
2132	eor	r2,r2,r10
2133	vadd.i32	q2,q2,q9
2134	add	r11,r11,r12,ror#6
2135	eor	r12,r4,r5
2136	vshr.u32	q9,q8,#3
2137	eor	r0,r0,r4,ror#20
2138	add	r11,r11,r2
2139	vsli.32	q10,q8,#25
2140	ldr	r2,[sp,#36]
2141	and	r3,r3,r12
2142	vshr.u32	q11,q8,#18
2143	add	r7,r7,r11
2144	add	r11,r11,r0,ror#2
2145	eor	r3,r3,r5
2146	veor	q9,q9,q10
2147	add	r10,r10,r2
2148	vsli.32	q11,q8,#14
2149	eor	r2,r8,r9
2150	eor	r0,r7,r7,ror#5
2151	vshr.u32	d24,d3,#17
2152	add	r11,r11,r3
2153	and	r2,r2,r7
2154	veor	q9,q9,q11
2155	eor	r3,r0,r7,ror#19
2156	eor	r0,r11,r11,ror#11
2157	vsli.32	d24,d3,#15
2158	eor	r2,r2,r9
2159	add	r10,r10,r3,ror#6
2160	vshr.u32	d25,d3,#10
2161	eor	r3,r11,r4
2162	eor	r0,r0,r11,ror#20
2163	vadd.i32	q2,q2,q9
2164	add	r10,r10,r2
2165	ldr	r2,[sp,#40]
2166	veor	d25,d25,d24
2167	and	r12,r12,r3
2168	add	r6,r6,r10
2169	vshr.u32	d24,d3,#19
2170	add	r10,r10,r0,ror#2
2171	eor	r12,r12,r4
2172	vsli.32	d24,d3,#13
2173	add	r9,r9,r2
2174	eor	r2,r7,r8
2175	veor	d25,d25,d24
2176	eor	r0,r6,r6,ror#5
2177	add	r10,r10,r12
2178	vadd.i32	d4,d4,d25
2179	and	r2,r2,r6
2180	eor	r12,r0,r6,ror#19
2181	vshr.u32	d24,d4,#17
2182	eor	r0,r10,r10,ror#11
2183	eor	r2,r2,r8
2184	vsli.32	d24,d4,#15
2185	add	r9,r9,r12,ror#6
2186	eor	r12,r10,r11
2187	vshr.u32	d25,d4,#10
2188	eor	r0,r0,r10,ror#20
2189	add	r9,r9,r2
2190	veor	d25,d25,d24
2191	ldr	r2,[sp,#44]
2192	and	r3,r3,r12
2193	vshr.u32	d24,d4,#19
2194	add	r5,r5,r9
2195	add	r9,r9,r0,ror#2
2196	eor	r3,r3,r11
2197	vld1.32	{q8},[r14,:128]!
2198	add	r8,r8,r2
2199	vsli.32	d24,d4,#13
2200	eor	r2,r6,r7
2201	eor	r0,r5,r5,ror#5
2202	veor	d25,d25,d24
2203	add	r9,r9,r3
2204	and	r2,r2,r5
2205	vadd.i32	d5,d5,d25
2206	eor	r3,r0,r5,ror#19
2207	eor	r0,r9,r9,ror#11
2208	vadd.i32	q8,q8,q2
2209	eor	r2,r2,r7
2210	add	r8,r8,r3,ror#6
2211	eor	r3,r9,r10
2212	eor	r0,r0,r9,ror#20
2213	add	r8,r8,r2
2214	ldr	r2,[sp,#48]
2215	and	r12,r12,r3
2216	add	r4,r4,r8
2217	vst1.32	{q8},[r1,:128]!
2218	add	r8,r8,r0,ror#2
2219	eor	r12,r12,r10
2220	vext.8	q8,q3,q0,#4
2221	add	r7,r7,r2
2222	eor	r2,r5,r6
2223	eor	r0,r4,r4,ror#5
2224	vext.8	q9,q1,q2,#4
2225	add	r8,r8,r12
2226	and	r2,r2,r4
2227	eor	r12,r0,r4,ror#19
2228	vshr.u32	q10,q8,#7
2229	eor	r0,r8,r8,ror#11
2230	eor	r2,r2,r6
2231	vadd.i32	q3,q3,q9
2232	add	r7,r7,r12,ror#6
2233	eor	r12,r8,r9
2234	vshr.u32	q9,q8,#3
2235	eor	r0,r0,r8,ror#20
2236	add	r7,r7,r2
2237	vsli.32	q10,q8,#25
2238	ldr	r2,[sp,#52]
2239	and	r3,r3,r12
2240	vshr.u32	q11,q8,#18
2241	add	r11,r11,r7
2242	add	r7,r7,r0,ror#2
2243	eor	r3,r3,r9
2244	veor	q9,q9,q10
2245	add	r6,r6,r2
2246	vsli.32	q11,q8,#14
2247	eor	r2,r4,r5
2248	eor	r0,r11,r11,ror#5
2249	vshr.u32	d24,d5,#17
2250	add	r7,r7,r3
2251	and	r2,r2,r11
2252	veor	q9,q9,q11
2253	eor	r3,r0,r11,ror#19
2254	eor	r0,r7,r7,ror#11
2255	vsli.32	d24,d5,#15
2256	eor	r2,r2,r5
2257	add	r6,r6,r3,ror#6
2258	vshr.u32	d25,d5,#10
2259	eor	r3,r7,r8
2260	eor	r0,r0,r7,ror#20
2261	vadd.i32	q3,q3,q9
2262	add	r6,r6,r2
2263	ldr	r2,[sp,#56]
2264	veor	d25,d25,d24
2265	and	r12,r12,r3
2266	add	r10,r10,r6
2267	vshr.u32	d24,d5,#19
2268	add	r6,r6,r0,ror#2
2269	eor	r12,r12,r8
2270	vsli.32	d24,d5,#13
2271	add	r5,r5,r2
2272	eor	r2,r11,r4
2273	veor	d25,d25,d24
2274	eor	r0,r10,r10,ror#5
2275	add	r6,r6,r12
2276	vadd.i32	d6,d6,d25
2277	and	r2,r2,r10
2278	eor	r12,r0,r10,ror#19
2279	vshr.u32	d24,d6,#17
2280	eor	r0,r6,r6,ror#11
2281	eor	r2,r2,r4
2282	vsli.32	d24,d6,#15
2283	add	r5,r5,r12,ror#6
2284	eor	r12,r6,r7
2285	vshr.u32	d25,d6,#10
2286	eor	r0,r0,r6,ror#20
2287	add	r5,r5,r2
2288	veor	d25,d25,d24
2289	ldr	r2,[sp,#60]
2290	and	r3,r3,r12
2291	vshr.u32	d24,d6,#19
2292	add	r9,r9,r5
2293	add	r5,r5,r0,ror#2
2294	eor	r3,r3,r7
2295	vld1.32	{q8},[r14,:128]!
2296	add	r4,r4,r2
2297	vsli.32	d24,d6,#13
2298	eor	r2,r10,r11
2299	eor	r0,r9,r9,ror#5
2300	veor	d25,d25,d24
2301	add	r5,r5,r3
2302	and	r2,r2,r9
2303	vadd.i32	d7,d7,d25
2304	eor	r3,r0,r9,ror#19
2305	eor	r0,r5,r5,ror#11
2306	vadd.i32	q8,q8,q3
2307	eor	r2,r2,r11
2308	add	r4,r4,r3,ror#6
2309	eor	r3,r5,r6
2310	eor	r0,r0,r5,ror#20
2311	add	r4,r4,r2
2312	ldr	r2,[r14]
2313	and	r12,r12,r3
2314	add	r8,r8,r4
2315	vst1.32	{q8},[r1,:128]!
2316	add	r4,r4,r0,ror#2
2317	eor	r12,r12,r6
2318	teq	r2,#0				@ check for K256 terminator
2319	ldr	r2,[sp,#0]
2320	sub	r1,r1,#64
2321	bne	.L_00_48
2322
2323	ldr	r1,[sp,#68]
2324	ldr	r0,[sp,#72]
2325	sub	r14,r14,#256	@ rewind r14
2326	teq	r1,r0
2327	it	eq
2328	subeq	r1,r1,#64		@ avoid SEGV
2329	vld1.8	{q0},[r1]!		@ load next input block
2330	vld1.8	{q1},[r1]!
2331	vld1.8	{q2},[r1]!
2332	vld1.8	{q3},[r1]!
2333	it	ne
2334	strne	r1,[sp,#68]
2335	mov	r1,sp
2336	add	r11,r11,r2
2337	eor	r2,r9,r10
2338	eor	r0,r8,r8,ror#5
2339	add	r4,r4,r12
2340	vld1.32	{q8},[r14,:128]!
2341	and	r2,r2,r8
2342	eor	r12,r0,r8,ror#19
2343	eor	r0,r4,r4,ror#11
2344	eor	r2,r2,r10
2345	vrev32.8	q0,q0
2346	add	r11,r11,r12,ror#6
2347	eor	r12,r4,r5
2348	eor	r0,r0,r4,ror#20
2349	add	r11,r11,r2
2350	vadd.i32	q8,q8,q0
2351	ldr	r2,[sp,#4]
2352	and	r3,r3,r12
2353	add	r7,r7,r11
2354	add	r11,r11,r0,ror#2
2355	eor	r3,r3,r5
2356	add	r10,r10,r2
2357	eor	r2,r8,r9
2358	eor	r0,r7,r7,ror#5
2359	add	r11,r11,r3
2360	and	r2,r2,r7
2361	eor	r3,r0,r7,ror#19
2362	eor	r0,r11,r11,ror#11
2363	eor	r2,r2,r9
2364	add	r10,r10,r3,ror#6
2365	eor	r3,r11,r4
2366	eor	r0,r0,r11,ror#20
2367	add	r10,r10,r2
2368	ldr	r2,[sp,#8]
2369	and	r12,r12,r3
2370	add	r6,r6,r10
2371	add	r10,r10,r0,ror#2
2372	eor	r12,r12,r4
2373	add	r9,r9,r2
2374	eor	r2,r7,r8
2375	eor	r0,r6,r6,ror#5
2376	add	r10,r10,r12
2377	and	r2,r2,r6
2378	eor	r12,r0,r6,ror#19
2379	eor	r0,r10,r10,ror#11
2380	eor	r2,r2,r8
2381	add	r9,r9,r12,ror#6
2382	eor	r12,r10,r11
2383	eor	r0,r0,r10,ror#20
2384	add	r9,r9,r2
2385	ldr	r2,[sp,#12]
2386	and	r3,r3,r12
2387	add	r5,r5,r9
2388	add	r9,r9,r0,ror#2
2389	eor	r3,r3,r11
2390	add	r8,r8,r2
2391	eor	r2,r6,r7
2392	eor	r0,r5,r5,ror#5
2393	add	r9,r9,r3
2394	and	r2,r2,r5
2395	eor	r3,r0,r5,ror#19
2396	eor	r0,r9,r9,ror#11
2397	eor	r2,r2,r7
2398	add	r8,r8,r3,ror#6
2399	eor	r3,r9,r10
2400	eor	r0,r0,r9,ror#20
2401	add	r8,r8,r2
2402	ldr	r2,[sp,#16]
2403	and	r12,r12,r3
2404	add	r4,r4,r8
2405	add	r8,r8,r0,ror#2
2406	eor	r12,r12,r10
2407	vst1.32	{q8},[r1,:128]!
2408	add	r7,r7,r2
2409	eor	r2,r5,r6
2410	eor	r0,r4,r4,ror#5
2411	add	r8,r8,r12
2412	vld1.32	{q8},[r14,:128]!
2413	and	r2,r2,r4
2414	eor	r12,r0,r4,ror#19
2415	eor	r0,r8,r8,ror#11
2416	eor	r2,r2,r6
2417	vrev32.8	q1,q1
2418	add	r7,r7,r12,ror#6
2419	eor	r12,r8,r9
2420	eor	r0,r0,r8,ror#20
2421	add	r7,r7,r2
2422	vadd.i32	q8,q8,q1
2423	ldr	r2,[sp,#20]
2424	and	r3,r3,r12
2425	add	r11,r11,r7
2426	add	r7,r7,r0,ror#2
2427	eor	r3,r3,r9
2428	add	r6,r6,r2
2429	eor	r2,r4,r5
2430	eor	r0,r11,r11,ror#5
2431	add	r7,r7,r3
2432	and	r2,r2,r11
2433	eor	r3,r0,r11,ror#19
2434	eor	r0,r7,r7,ror#11
2435	eor	r2,r2,r5
2436	add	r6,r6,r3,ror#6
2437	eor	r3,r7,r8
2438	eor	r0,r0,r7,ror#20
2439	add	r6,r6,r2
2440	ldr	r2,[sp,#24]
2441	and	r12,r12,r3
2442	add	r10,r10,r6
2443	add	r6,r6,r0,ror#2
2444	eor	r12,r12,r8
2445	add	r5,r5,r2
2446	eor	r2,r11,r4
2447	eor	r0,r10,r10,ror#5
2448	add	r6,r6,r12
2449	and	r2,r2,r10
2450	eor	r12,r0,r10,ror#19
2451	eor	r0,r6,r6,ror#11
2452	eor	r2,r2,r4
2453	add	r5,r5,r12,ror#6
2454	eor	r12,r6,r7
2455	eor	r0,r0,r6,ror#20
2456	add	r5,r5,r2
2457	ldr	r2,[sp,#28]
2458	and	r3,r3,r12
2459	add	r9,r9,r5
2460	add	r5,r5,r0,ror#2
2461	eor	r3,r3,r7
2462	add	r4,r4,r2
2463	eor	r2,r10,r11
2464	eor	r0,r9,r9,ror#5
2465	add	r5,r5,r3
2466	and	r2,r2,r9
2467	eor	r3,r0,r9,ror#19
2468	eor	r0,r5,r5,ror#11
2469	eor	r2,r2,r11
2470	add	r4,r4,r3,ror#6
2471	eor	r3,r5,r6
2472	eor	r0,r0,r5,ror#20
2473	add	r4,r4,r2
2474	ldr	r2,[sp,#32]
2475	and	r12,r12,r3
2476	add	r8,r8,r4
2477	add	r4,r4,r0,ror#2
2478	eor	r12,r12,r6
2479	vst1.32	{q8},[r1,:128]!
2480	add	r11,r11,r2
2481	eor	r2,r9,r10
2482	eor	r0,r8,r8,ror#5
2483	add	r4,r4,r12
2484	vld1.32	{q8},[r14,:128]!
2485	and	r2,r2,r8
2486	eor	r12,r0,r8,ror#19
2487	eor	r0,r4,r4,ror#11
2488	eor	r2,r2,r10
2489	vrev32.8	q2,q2
2490	add	r11,r11,r12,ror#6
2491	eor	r12,r4,r5
2492	eor	r0,r0,r4,ror#20
2493	add	r11,r11,r2
2494	vadd.i32	q8,q8,q2
2495	ldr	r2,[sp,#36]
2496	and	r3,r3,r12
2497	add	r7,r7,r11
2498	add	r11,r11,r0,ror#2
2499	eor	r3,r3,r5
2500	add	r10,r10,r2
2501	eor	r2,r8,r9
2502	eor	r0,r7,r7,ror#5
2503	add	r11,r11,r3
2504	and	r2,r2,r7
2505	eor	r3,r0,r7,ror#19
2506	eor	r0,r11,r11,ror#11
2507	eor	r2,r2,r9
2508	add	r10,r10,r3,ror#6
2509	eor	r3,r11,r4
2510	eor	r0,r0,r11,ror#20
2511	add	r10,r10,r2
2512	ldr	r2,[sp,#40]
2513	and	r12,r12,r3
2514	add	r6,r6,r10
2515	add	r10,r10,r0,ror#2
2516	eor	r12,r12,r4
2517	add	r9,r9,r2
2518	eor	r2,r7,r8
2519	eor	r0,r6,r6,ror#5
2520	add	r10,r10,r12
2521	and	r2,r2,r6
2522	eor	r12,r0,r6,ror#19
2523	eor	r0,r10,r10,ror#11
2524	eor	r2,r2,r8
2525	add	r9,r9,r12,ror#6
2526	eor	r12,r10,r11
2527	eor	r0,r0,r10,ror#20
2528	add	r9,r9,r2
2529	ldr	r2,[sp,#44]
2530	and	r3,r3,r12
2531	add	r5,r5,r9
2532	add	r9,r9,r0,ror#2
2533	eor	r3,r3,r11
2534	add	r8,r8,r2
2535	eor	r2,r6,r7
2536	eor	r0,r5,r5,ror#5
2537	add	r9,r9,r3
2538	and	r2,r2,r5
2539	eor	r3,r0,r5,ror#19
2540	eor	r0,r9,r9,ror#11
2541	eor	r2,r2,r7
2542	add	r8,r8,r3,ror#6
2543	eor	r3,r9,r10
2544	eor	r0,r0,r9,ror#20
2545	add	r8,r8,r2
2546	ldr	r2,[sp,#48]
2547	and	r12,r12,r3
2548	add	r4,r4,r8
2549	add	r8,r8,r0,ror#2
2550	eor	r12,r12,r10
2551	vst1.32	{q8},[r1,:128]!
2552	add	r7,r7,r2
2553	eor	r2,r5,r6
2554	eor	r0,r4,r4,ror#5
2555	add	r8,r8,r12
2556	vld1.32	{q8},[r14,:128]!
2557	and	r2,r2,r4
2558	eor	r12,r0,r4,ror#19
2559	eor	r0,r8,r8,ror#11
2560	eor	r2,r2,r6
2561	vrev32.8	q3,q3
2562	add	r7,r7,r12,ror#6
2563	eor	r12,r8,r9
2564	eor	r0,r0,r8,ror#20
2565	add	r7,r7,r2
2566	vadd.i32	q8,q8,q3
2567	ldr	r2,[sp,#52]
2568	and	r3,r3,r12
2569	add	r11,r11,r7
2570	add	r7,r7,r0,ror#2
2571	eor	r3,r3,r9
2572	add	r6,r6,r2
2573	eor	r2,r4,r5
2574	eor	r0,r11,r11,ror#5
2575	add	r7,r7,r3
2576	and	r2,r2,r11
2577	eor	r3,r0,r11,ror#19
2578	eor	r0,r7,r7,ror#11
2579	eor	r2,r2,r5
2580	add	r6,r6,r3,ror#6
2581	eor	r3,r7,r8
2582	eor	r0,r0,r7,ror#20
2583	add	r6,r6,r2
2584	ldr	r2,[sp,#56]
2585	and	r12,r12,r3
2586	add	r10,r10,r6
2587	add	r6,r6,r0,ror#2
2588	eor	r12,r12,r8
2589	add	r5,r5,r2
2590	eor	r2,r11,r4
2591	eor	r0,r10,r10,ror#5
2592	add	r6,r6,r12
2593	and	r2,r2,r10
2594	eor	r12,r0,r10,ror#19
2595	eor	r0,r6,r6,ror#11
2596	eor	r2,r2,r4
2597	add	r5,r5,r12,ror#6
2598	eor	r12,r6,r7
2599	eor	r0,r0,r6,ror#20
2600	add	r5,r5,r2
2601	ldr	r2,[sp,#60]
2602	and	r3,r3,r12
2603	add	r9,r9,r5
2604	add	r5,r5,r0,ror#2
2605	eor	r3,r3,r7
2606	add	r4,r4,r2
2607	eor	r2,r10,r11
2608	eor	r0,r9,r9,ror#5
2609	add	r5,r5,r3
2610	and	r2,r2,r9
2611	eor	r3,r0,r9,ror#19
2612	eor	r0,r5,r5,ror#11
2613	eor	r2,r2,r11
2614	add	r4,r4,r3,ror#6
2615	eor	r3,r5,r6
2616	eor	r0,r0,r5,ror#20
2617	add	r4,r4,r2
2618	ldr	r2,[sp,#64]
2619	and	r12,r12,r3
2620	add	r8,r8,r4
2621	add	r4,r4,r0,ror#2
2622	eor	r12,r12,r6
2623	vst1.32	{q8},[r1,:128]!
2624	ldr	r0,[r2,#0]
2625	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2626	ldr	r12,[r2,#4]
2627	ldr	r3,[r2,#8]
2628	ldr	r1,[r2,#12]
2629	add	r4,r4,r0			@ accumulate
2630	ldr	r0,[r2,#16]
2631	add	r5,r5,r12
2632	ldr	r12,[r2,#20]
2633	add	r6,r6,r3
2634	ldr	r3,[r2,#24]
2635	add	r7,r7,r1
2636	ldr	r1,[r2,#28]
2637	add	r8,r8,r0
2638	str	r4,[r2],#4
2639	add	r9,r9,r12
2640	str	r5,[r2],#4
2641	add	r10,r10,r3
2642	str	r6,[r2],#4
2643	add	r11,r11,r1
2644	str	r7,[r2],#4
2645	stmia	r2,{r8,r9,r10,r11}
2646
2647	ittte	ne
2648	movne	r1,sp
2649	ldrne	r2,[sp,#0]
2650	eorne	r12,r12,r12
2651	ldreq	sp,[sp,#76]			@ restore original sp
2652	itt	ne
2653	eorne	r3,r5,r6
2654	bne	.L_00_48
2655
2656	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2657.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2658#endif
2659#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2660
2661# if defined(__thumb2__) && !defined(__APPLE__)
2662#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2663# else
2664#  define INST(a,b,c,d)	.byte	a,b,c,d
2665# endif
2666
2667.type	sha256_block_data_order_armv8,%function
2668.align	5
2669sha256_block_data_order_armv8:
2670.LARMv8:
2671	vld1.32	{q0,q1},[r0]
2672# ifdef	__APPLE__
2673	sub	r3,r3,#256+32
2674# elif	defined(__thumb2__)
2675	adr	r3,.LARMv8
2676	sub	r3,r3,#.LARMv8-K256
2677# else
2678	adrl	r3,K256
2679# endif
2680	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2681
2682.Loop_v8:
2683	vld1.8	{q8,q9},[r1]!
2684	vld1.8	{q10,q11},[r1]!
2685	vld1.32	{q12},[r3]!
2686	vrev32.8	q8,q8
2687	vrev32.8	q9,q9
2688	vrev32.8	q10,q10
2689	vrev32.8	q11,q11
2690	vmov	q14,q0	@ offload
2691	vmov	q15,q1
2692	teq	r1,r2
2693	vld1.32	{q13},[r3]!
2694	vadd.i32	q12,q12,q8
2695	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2696	vmov	q2,q0
2697	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2698	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2699	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2700	vld1.32	{q12},[r3]!
2701	vadd.i32	q13,q13,q9
2702	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2703	vmov	q2,q0
2704	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2705	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2706	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2707	vld1.32	{q13},[r3]!
2708	vadd.i32	q12,q12,q10
2709	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2710	vmov	q2,q0
2711	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2712	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2713	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2714	vld1.32	{q12},[r3]!
2715	vadd.i32	q13,q13,q11
2716	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2717	vmov	q2,q0
2718	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2719	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2720	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2721	vld1.32	{q13},[r3]!
2722	vadd.i32	q12,q12,q8
2723	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2724	vmov	q2,q0
2725	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2726	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2727	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2728	vld1.32	{q12},[r3]!
2729	vadd.i32	q13,q13,q9
2730	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2731	vmov	q2,q0
2732	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2733	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2734	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2735	vld1.32	{q13},[r3]!
2736	vadd.i32	q12,q12,q10
2737	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2738	vmov	q2,q0
2739	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2740	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2741	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2742	vld1.32	{q12},[r3]!
2743	vadd.i32	q13,q13,q11
2744	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2745	vmov	q2,q0
2746	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2747	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2748	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2749	vld1.32	{q13},[r3]!
2750	vadd.i32	q12,q12,q8
2751	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2752	vmov	q2,q0
2753	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2754	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2755	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2756	vld1.32	{q12},[r3]!
2757	vadd.i32	q13,q13,q9
2758	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2759	vmov	q2,q0
2760	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2761	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2762	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2763	vld1.32	{q13},[r3]!
2764	vadd.i32	q12,q12,q10
2765	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2766	vmov	q2,q0
2767	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2768	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2769	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2770	vld1.32	{q12},[r3]!
2771	vadd.i32	q13,q13,q11
2772	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2773	vmov	q2,q0
2774	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2775	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2776	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2777	vld1.32	{q13},[r3]!
2778	vadd.i32	q12,q12,q8
2779	vmov	q2,q0
2780	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2781	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2782
2783	vld1.32	{q12},[r3]!
2784	vadd.i32	q13,q13,q9
2785	vmov	q2,q0
2786	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2787	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2788
2789	vld1.32	{q13},[r3]
2790	vadd.i32	q12,q12,q10
2791	sub	r3,r3,#256-16	@ rewind
2792	vmov	q2,q0
2793	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2794	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2795
2796	vadd.i32	q13,q13,q11
2797	vmov	q2,q0
2798	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2799	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2800
2801	vadd.i32	q0,q0,q14
2802	vadd.i32	q1,q1,q15
2803	it	ne
2804	bne	.Loop_v8
2805
2806	vst1.32	{q0,q1},[r0]
2807
2808	bx	lr		@ bx lr
2809.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2810#endif
2811.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2812.align	2
2813.align	2
2814#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2815.comm	OPENSSL_armcap_P,4,4
2816.hidden	OPENSSL_armcap_P
2817#endif
2818#endif