• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1@ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
2@
3@ Licensed under the OpenSSL license (the "License").  You may not use
4@ this file except in compliance with the License.  You can obtain a copy
5@ in the file LICENSE in the source distribution or at
6@ https://www.openssl.org/source/license.html
7
8
9@ ====================================================================
10@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11@ project. The module is, however, dual licensed under OpenSSL and
12@ CRYPTOGAMS licenses depending on where you obtain it. For further
13@ details see http://www.openssl.org/~appro/cryptogams/.
14@
15@ Permission to use under GPL terms is granted.
16@ ====================================================================
17
18@ SHA256 block procedure for ARMv4. May 2007.
19
20@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
21@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
22@ byte [on single-issue Xscale PXA250 core].
23
24@ July 2010.
25@
26@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
27@ Cortex A8 core and ~20 cycles per processed byte.
28
29@ February 2011.
30@
31@ Profiler-assisted and platform-specific optimization resulted in 16%
32@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
33
34@ September 2013.
35@
36@ Add NEON implementation. On Cortex A8 it was measured to process one
37@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
38@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
39@ code (meaning that latter performs sub-optimally, nothing was done
40@ about it).
41
42@ May 2014.
43@
44@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
45
46#ifndef __KERNEL__
47# include "arm_arch.h"
48#else
49# define __ARM_ARCH__ __LINUX_ARM_ARCH__
50# define __ARM_MAX_ARCH__ 7
51#endif
52
53.text
54#if defined(__thumb2__)
55.syntax unified
56.thumb
57#else
58.code   32
59#endif
60
61.type	K256,%object
62.align	5
63K256:
64.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
65.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
66.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
67.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
68.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
69.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
70.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
71.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
72.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
73.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
74.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
75.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
76.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
77.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
78.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
79.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
80.size	K256,.-K256
81.word	0				@ terminator
82#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
83.LOPENSSL_armcap:
84.word	OPENSSL_armcap_P-.Lsha256_block_data_order
85#endif
86.align	5
87
88.global	sha256_block_data_order
89.type	sha256_block_data_order,%function
90sha256_block_data_order:
91.Lsha256_block_data_order:
92#if __ARM_ARCH__<7 && !defined(__thumb2__)
93	sub	r3,pc,#8		@ sha256_block_data_order
94#else
95	adr	r3,.Lsha256_block_data_order
96#endif
97#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
98	ldr	r12,.LOPENSSL_armcap
99	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
100#ifdef	__APPLE__
101	ldr	r12,[r12]
102#endif
103	tst	r12,#ARMV8_SHA256
104	bne	.LARMv8
105	tst	r12,#ARMV7_NEON
106	bne	.LNEON
107#endif
108	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
109	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
110	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
111	sub	r14,r3,#256+32	@ K256
112	sub	sp,sp,#16*4		@ alloca(X[16])
113.Loop:
114# if __ARM_ARCH__>=7
115	ldr	r2,[r1],#4
116# else
117	ldrb	r2,[r1,#3]
118# endif
119	eor	r3,r5,r6		@ magic
120	eor	r12,r12,r12
121#if __ARM_ARCH__>=7
122	@ ldr	r2,[r1],#4			@ 0
123# if 0==15
124	str	r1,[sp,#17*4]			@ make room for r1
125# endif
126	eor	r0,r8,r8,ror#5
127	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
128	eor	r0,r0,r8,ror#19	@ Sigma1(e)
129# ifndef __ARMEB__
130	rev	r2,r2
131# endif
132#else
133	@ ldrb	r2,[r1,#3]			@ 0
134	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
135	ldrb	r12,[r1,#2]
136	ldrb	r0,[r1,#1]
137	orr	r2,r2,r12,lsl#8
138	ldrb	r12,[r1],#4
139	orr	r2,r2,r0,lsl#16
140# if 0==15
141	str	r1,[sp,#17*4]			@ make room for r1
142# endif
143	eor	r0,r8,r8,ror#5
144	orr	r2,r2,r12,lsl#24
145	eor	r0,r0,r8,ror#19	@ Sigma1(e)
146#endif
147	ldr	r12,[r14],#4			@ *K256++
148	add	r11,r11,r2			@ h+=X[i]
149	str	r2,[sp,#0*4]
150	eor	r2,r9,r10
151	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
152	and	r2,r2,r8
153	add	r11,r11,r12			@ h+=K256[i]
154	eor	r2,r2,r10			@ Ch(e,f,g)
155	eor	r0,r4,r4,ror#11
156	add	r11,r11,r2			@ h+=Ch(e,f,g)
157#if 0==31
158	and	r12,r12,#0xff
159	cmp	r12,#0xf2			@ done?
160#endif
161#if 0<15
162# if __ARM_ARCH__>=7
163	ldr	r2,[r1],#4			@ prefetch
164# else
165	ldrb	r2,[r1,#3]
166# endif
167	eor	r12,r4,r5			@ a^b, b^c in next round
168#else
169	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
170	eor	r12,r4,r5			@ a^b, b^c in next round
171	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
172#endif
173	eor	r0,r0,r4,ror#20	@ Sigma0(a)
174	and	r3,r3,r12			@ (b^c)&=(a^b)
175	add	r7,r7,r11			@ d+=h
176	eor	r3,r3,r5			@ Maj(a,b,c)
177	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
178	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
179#if __ARM_ARCH__>=7
180	@ ldr	r2,[r1],#4			@ 1
181# if 1==15
182	str	r1,[sp,#17*4]			@ make room for r1
183# endif
184	eor	r0,r7,r7,ror#5
185	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
186	eor	r0,r0,r7,ror#19	@ Sigma1(e)
187# ifndef __ARMEB__
188	rev	r2,r2
189# endif
190#else
191	@ ldrb	r2,[r1,#3]			@ 1
192	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
193	ldrb	r3,[r1,#2]
194	ldrb	r0,[r1,#1]
195	orr	r2,r2,r3,lsl#8
196	ldrb	r3,[r1],#4
197	orr	r2,r2,r0,lsl#16
198# if 1==15
199	str	r1,[sp,#17*4]			@ make room for r1
200# endif
201	eor	r0,r7,r7,ror#5
202	orr	r2,r2,r3,lsl#24
203	eor	r0,r0,r7,ror#19	@ Sigma1(e)
204#endif
205	ldr	r3,[r14],#4			@ *K256++
206	add	r10,r10,r2			@ h+=X[i]
207	str	r2,[sp,#1*4]
208	eor	r2,r8,r9
209	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
210	and	r2,r2,r7
211	add	r10,r10,r3			@ h+=K256[i]
212	eor	r2,r2,r9			@ Ch(e,f,g)
213	eor	r0,r11,r11,ror#11
214	add	r10,r10,r2			@ h+=Ch(e,f,g)
215#if 1==31
216	and	r3,r3,#0xff
217	cmp	r3,#0xf2			@ done?
218#endif
219#if 1<15
220# if __ARM_ARCH__>=7
221	ldr	r2,[r1],#4			@ prefetch
222# else
223	ldrb	r2,[r1,#3]
224# endif
225	eor	r3,r11,r4			@ a^b, b^c in next round
226#else
227	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
228	eor	r3,r11,r4			@ a^b, b^c in next round
229	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
230#endif
231	eor	r0,r0,r11,ror#20	@ Sigma0(a)
232	and	r12,r12,r3			@ (b^c)&=(a^b)
233	add	r6,r6,r10			@ d+=h
234	eor	r12,r12,r4			@ Maj(a,b,c)
235	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
236	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
237#if __ARM_ARCH__>=7
238	@ ldr	r2,[r1],#4			@ 2
239# if 2==15
240	str	r1,[sp,#17*4]			@ make room for r1
241# endif
242	eor	r0,r6,r6,ror#5
243	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
244	eor	r0,r0,r6,ror#19	@ Sigma1(e)
245# ifndef __ARMEB__
246	rev	r2,r2
247# endif
248#else
249	@ ldrb	r2,[r1,#3]			@ 2
250	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
251	ldrb	r12,[r1,#2]
252	ldrb	r0,[r1,#1]
253	orr	r2,r2,r12,lsl#8
254	ldrb	r12,[r1],#4
255	orr	r2,r2,r0,lsl#16
256# if 2==15
257	str	r1,[sp,#17*4]			@ make room for r1
258# endif
259	eor	r0,r6,r6,ror#5
260	orr	r2,r2,r12,lsl#24
261	eor	r0,r0,r6,ror#19	@ Sigma1(e)
262#endif
263	ldr	r12,[r14],#4			@ *K256++
264	add	r9,r9,r2			@ h+=X[i]
265	str	r2,[sp,#2*4]
266	eor	r2,r7,r8
267	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
268	and	r2,r2,r6
269	add	r9,r9,r12			@ h+=K256[i]
270	eor	r2,r2,r8			@ Ch(e,f,g)
271	eor	r0,r10,r10,ror#11
272	add	r9,r9,r2			@ h+=Ch(e,f,g)
273#if 2==31
274	and	r12,r12,#0xff
275	cmp	r12,#0xf2			@ done?
276#endif
277#if 2<15
278# if __ARM_ARCH__>=7
279	ldr	r2,[r1],#4			@ prefetch
280# else
281	ldrb	r2,[r1,#3]
282# endif
283	eor	r12,r10,r11			@ a^b, b^c in next round
284#else
285	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
286	eor	r12,r10,r11			@ a^b, b^c in next round
287	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
288#endif
289	eor	r0,r0,r10,ror#20	@ Sigma0(a)
290	and	r3,r3,r12			@ (b^c)&=(a^b)
291	add	r5,r5,r9			@ d+=h
292	eor	r3,r3,r11			@ Maj(a,b,c)
293	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
294	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
295#if __ARM_ARCH__>=7
296	@ ldr	r2,[r1],#4			@ 3
297# if 3==15
298	str	r1,[sp,#17*4]			@ make room for r1
299# endif
300	eor	r0,r5,r5,ror#5
301	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
302	eor	r0,r0,r5,ror#19	@ Sigma1(e)
303# ifndef __ARMEB__
304	rev	r2,r2
305# endif
306#else
307	@ ldrb	r2,[r1,#3]			@ 3
308	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
309	ldrb	r3,[r1,#2]
310	ldrb	r0,[r1,#1]
311	orr	r2,r2,r3,lsl#8
312	ldrb	r3,[r1],#4
313	orr	r2,r2,r0,lsl#16
314# if 3==15
315	str	r1,[sp,#17*4]			@ make room for r1
316# endif
317	eor	r0,r5,r5,ror#5
318	orr	r2,r2,r3,lsl#24
319	eor	r0,r0,r5,ror#19	@ Sigma1(e)
320#endif
321	ldr	r3,[r14],#4			@ *K256++
322	add	r8,r8,r2			@ h+=X[i]
323	str	r2,[sp,#3*4]
324	eor	r2,r6,r7
325	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
326	and	r2,r2,r5
327	add	r8,r8,r3			@ h+=K256[i]
328	eor	r2,r2,r7			@ Ch(e,f,g)
329	eor	r0,r9,r9,ror#11
330	add	r8,r8,r2			@ h+=Ch(e,f,g)
331#if 3==31
332	and	r3,r3,#0xff
333	cmp	r3,#0xf2			@ done?
334#endif
335#if 3<15
336# if __ARM_ARCH__>=7
337	ldr	r2,[r1],#4			@ prefetch
338# else
339	ldrb	r2,[r1,#3]
340# endif
341	eor	r3,r9,r10			@ a^b, b^c in next round
342#else
343	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
344	eor	r3,r9,r10			@ a^b, b^c in next round
345	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
346#endif
347	eor	r0,r0,r9,ror#20	@ Sigma0(a)
348	and	r12,r12,r3			@ (b^c)&=(a^b)
349	add	r4,r4,r8			@ d+=h
350	eor	r12,r12,r10			@ Maj(a,b,c)
351	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
352	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
353#if __ARM_ARCH__>=7
354	@ ldr	r2,[r1],#4			@ 4
355# if 4==15
356	str	r1,[sp,#17*4]			@ make room for r1
357# endif
358	eor	r0,r4,r4,ror#5
359	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
360	eor	r0,r0,r4,ror#19	@ Sigma1(e)
361# ifndef __ARMEB__
362	rev	r2,r2
363# endif
364#else
365	@ ldrb	r2,[r1,#3]			@ 4
366	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
367	ldrb	r12,[r1,#2]
368	ldrb	r0,[r1,#1]
369	orr	r2,r2,r12,lsl#8
370	ldrb	r12,[r1],#4
371	orr	r2,r2,r0,lsl#16
372# if 4==15
373	str	r1,[sp,#17*4]			@ make room for r1
374# endif
375	eor	r0,r4,r4,ror#5
376	orr	r2,r2,r12,lsl#24
377	eor	r0,r0,r4,ror#19	@ Sigma1(e)
378#endif
379	ldr	r12,[r14],#4			@ *K256++
380	add	r7,r7,r2			@ h+=X[i]
381	str	r2,[sp,#4*4]
382	eor	r2,r5,r6
383	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
384	and	r2,r2,r4
385	add	r7,r7,r12			@ h+=K256[i]
386	eor	r2,r2,r6			@ Ch(e,f,g)
387	eor	r0,r8,r8,ror#11
388	add	r7,r7,r2			@ h+=Ch(e,f,g)
389#if 4==31
390	and	r12,r12,#0xff
391	cmp	r12,#0xf2			@ done?
392#endif
393#if 4<15
394# if __ARM_ARCH__>=7
395	ldr	r2,[r1],#4			@ prefetch
396# else
397	ldrb	r2,[r1,#3]
398# endif
399	eor	r12,r8,r9			@ a^b, b^c in next round
400#else
401	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
402	eor	r12,r8,r9			@ a^b, b^c in next round
403	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
404#endif
405	eor	r0,r0,r8,ror#20	@ Sigma0(a)
406	and	r3,r3,r12			@ (b^c)&=(a^b)
407	add	r11,r11,r7			@ d+=h
408	eor	r3,r3,r9			@ Maj(a,b,c)
409	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
410	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
411#if __ARM_ARCH__>=7
412	@ ldr	r2,[r1],#4			@ 5
413# if 5==15
414	str	r1,[sp,#17*4]			@ make room for r1
415# endif
416	eor	r0,r11,r11,ror#5
417	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
418	eor	r0,r0,r11,ror#19	@ Sigma1(e)
419# ifndef __ARMEB__
420	rev	r2,r2
421# endif
422#else
423	@ ldrb	r2,[r1,#3]			@ 5
424	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
425	ldrb	r3,[r1,#2]
426	ldrb	r0,[r1,#1]
427	orr	r2,r2,r3,lsl#8
428	ldrb	r3,[r1],#4
429	orr	r2,r2,r0,lsl#16
430# if 5==15
431	str	r1,[sp,#17*4]			@ make room for r1
432# endif
433	eor	r0,r11,r11,ror#5
434	orr	r2,r2,r3,lsl#24
435	eor	r0,r0,r11,ror#19	@ Sigma1(e)
436#endif
437	ldr	r3,[r14],#4			@ *K256++
438	add	r6,r6,r2			@ h+=X[i]
439	str	r2,[sp,#5*4]
440	eor	r2,r4,r5
441	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
442	and	r2,r2,r11
443	add	r6,r6,r3			@ h+=K256[i]
444	eor	r2,r2,r5			@ Ch(e,f,g)
445	eor	r0,r7,r7,ror#11
446	add	r6,r6,r2			@ h+=Ch(e,f,g)
447#if 5==31
448	and	r3,r3,#0xff
449	cmp	r3,#0xf2			@ done?
450#endif
451#if 5<15
452# if __ARM_ARCH__>=7
453	ldr	r2,[r1],#4			@ prefetch
454# else
455	ldrb	r2,[r1,#3]
456# endif
457	eor	r3,r7,r8			@ a^b, b^c in next round
458#else
459	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
460	eor	r3,r7,r8			@ a^b, b^c in next round
461	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
462#endif
463	eor	r0,r0,r7,ror#20	@ Sigma0(a)
464	and	r12,r12,r3			@ (b^c)&=(a^b)
465	add	r10,r10,r6			@ d+=h
466	eor	r12,r12,r8			@ Maj(a,b,c)
467	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
468	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
469#if __ARM_ARCH__>=7
470	@ ldr	r2,[r1],#4			@ 6
471# if 6==15
472	str	r1,[sp,#17*4]			@ make room for r1
473# endif
474	eor	r0,r10,r10,ror#5
475	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
476	eor	r0,r0,r10,ror#19	@ Sigma1(e)
477# ifndef __ARMEB__
478	rev	r2,r2
479# endif
480#else
481	@ ldrb	r2,[r1,#3]			@ 6
482	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
483	ldrb	r12,[r1,#2]
484	ldrb	r0,[r1,#1]
485	orr	r2,r2,r12,lsl#8
486	ldrb	r12,[r1],#4
487	orr	r2,r2,r0,lsl#16
488# if 6==15
489	str	r1,[sp,#17*4]			@ make room for r1
490# endif
491	eor	r0,r10,r10,ror#5
492	orr	r2,r2,r12,lsl#24
493	eor	r0,r0,r10,ror#19	@ Sigma1(e)
494#endif
495	ldr	r12,[r14],#4			@ *K256++
496	add	r5,r5,r2			@ h+=X[i]
497	str	r2,[sp,#6*4]
498	eor	r2,r11,r4
499	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
500	and	r2,r2,r10
501	add	r5,r5,r12			@ h+=K256[i]
502	eor	r2,r2,r4			@ Ch(e,f,g)
503	eor	r0,r6,r6,ror#11
504	add	r5,r5,r2			@ h+=Ch(e,f,g)
505#if 6==31
506	and	r12,r12,#0xff
507	cmp	r12,#0xf2			@ done?
508#endif
509#if 6<15
510# if __ARM_ARCH__>=7
511	ldr	r2,[r1],#4			@ prefetch
512# else
513	ldrb	r2,[r1,#3]
514# endif
515	eor	r12,r6,r7			@ a^b, b^c in next round
516#else
517	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
518	eor	r12,r6,r7			@ a^b, b^c in next round
519	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
520#endif
521	eor	r0,r0,r6,ror#20	@ Sigma0(a)
522	and	r3,r3,r12			@ (b^c)&=(a^b)
523	add	r9,r9,r5			@ d+=h
524	eor	r3,r3,r7			@ Maj(a,b,c)
525	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
526	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
527#if __ARM_ARCH__>=7
528	@ ldr	r2,[r1],#4			@ 7
529# if 7==15
530	str	r1,[sp,#17*4]			@ make room for r1
531# endif
532	eor	r0,r9,r9,ror#5
533	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
534	eor	r0,r0,r9,ror#19	@ Sigma1(e)
535# ifndef __ARMEB__
536	rev	r2,r2
537# endif
538#else
539	@ ldrb	r2,[r1,#3]			@ 7
540	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
541	ldrb	r3,[r1,#2]
542	ldrb	r0,[r1,#1]
543	orr	r2,r2,r3,lsl#8
544	ldrb	r3,[r1],#4
545	orr	r2,r2,r0,lsl#16
546# if 7==15
547	str	r1,[sp,#17*4]			@ make room for r1
548# endif
549	eor	r0,r9,r9,ror#5
550	orr	r2,r2,r3,lsl#24
551	eor	r0,r0,r9,ror#19	@ Sigma1(e)
552#endif
553	ldr	r3,[r14],#4			@ *K256++
554	add	r4,r4,r2			@ h+=X[i]
555	str	r2,[sp,#7*4]
556	eor	r2,r10,r11
557	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
558	and	r2,r2,r9
559	add	r4,r4,r3			@ h+=K256[i]
560	eor	r2,r2,r11			@ Ch(e,f,g)
561	eor	r0,r5,r5,ror#11
562	add	r4,r4,r2			@ h+=Ch(e,f,g)
563#if 7==31
564	and	r3,r3,#0xff
565	cmp	r3,#0xf2			@ done?
566#endif
567#if 7<15
568# if __ARM_ARCH__>=7
569	ldr	r2,[r1],#4			@ prefetch
570# else
571	ldrb	r2,[r1,#3]
572# endif
573	eor	r3,r5,r6			@ a^b, b^c in next round
574#else
575	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
576	eor	r3,r5,r6			@ a^b, b^c in next round
577	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
578#endif
579	eor	r0,r0,r5,ror#20	@ Sigma0(a)
580	and	r12,r12,r3			@ (b^c)&=(a^b)
581	add	r8,r8,r4			@ d+=h
582	eor	r12,r12,r6			@ Maj(a,b,c)
583	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
584	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
585#if __ARM_ARCH__>=7
586	@ ldr	r2,[r1],#4			@ 8
587# if 8==15
588	str	r1,[sp,#17*4]			@ make room for r1
589# endif
590	eor	r0,r8,r8,ror#5
591	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
592	eor	r0,r0,r8,ror#19	@ Sigma1(e)
593# ifndef __ARMEB__
594	rev	r2,r2
595# endif
596#else
597	@ ldrb	r2,[r1,#3]			@ 8
598	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
599	ldrb	r12,[r1,#2]
600	ldrb	r0,[r1,#1]
601	orr	r2,r2,r12,lsl#8
602	ldrb	r12,[r1],#4
603	orr	r2,r2,r0,lsl#16
604# if 8==15
605	str	r1,[sp,#17*4]			@ make room for r1
606# endif
607	eor	r0,r8,r8,ror#5
608	orr	r2,r2,r12,lsl#24
609	eor	r0,r0,r8,ror#19	@ Sigma1(e)
610#endif
611	ldr	r12,[r14],#4			@ *K256++
612	add	r11,r11,r2			@ h+=X[i]
613	str	r2,[sp,#8*4]
614	eor	r2,r9,r10
615	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
616	and	r2,r2,r8
617	add	r11,r11,r12			@ h+=K256[i]
618	eor	r2,r2,r10			@ Ch(e,f,g)
619	eor	r0,r4,r4,ror#11
620	add	r11,r11,r2			@ h+=Ch(e,f,g)
621#if 8==31
622	and	r12,r12,#0xff
623	cmp	r12,#0xf2			@ done?
624#endif
625#if 8<15
626# if __ARM_ARCH__>=7
627	ldr	r2,[r1],#4			@ prefetch
628# else
629	ldrb	r2,[r1,#3]
630# endif
631	eor	r12,r4,r5			@ a^b, b^c in next round
632#else
633	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
634	eor	r12,r4,r5			@ a^b, b^c in next round
635	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
636#endif
637	eor	r0,r0,r4,ror#20	@ Sigma0(a)
638	and	r3,r3,r12			@ (b^c)&=(a^b)
639	add	r7,r7,r11			@ d+=h
640	eor	r3,r3,r5			@ Maj(a,b,c)
641	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
642	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
643#if __ARM_ARCH__>=7
644	@ ldr	r2,[r1],#4			@ 9
645# if 9==15
646	str	r1,[sp,#17*4]			@ make room for r1
647# endif
648	eor	r0,r7,r7,ror#5
649	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
650	eor	r0,r0,r7,ror#19	@ Sigma1(e)
651# ifndef __ARMEB__
652	rev	r2,r2
653# endif
654#else
655	@ ldrb	r2,[r1,#3]			@ 9
656	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
657	ldrb	r3,[r1,#2]
658	ldrb	r0,[r1,#1]
659	orr	r2,r2,r3,lsl#8
660	ldrb	r3,[r1],#4
661	orr	r2,r2,r0,lsl#16
662# if 9==15
663	str	r1,[sp,#17*4]			@ make room for r1
664# endif
665	eor	r0,r7,r7,ror#5
666	orr	r2,r2,r3,lsl#24
667	eor	r0,r0,r7,ror#19	@ Sigma1(e)
668#endif
669	ldr	r3,[r14],#4			@ *K256++
670	add	r10,r10,r2			@ h+=X[i]
671	str	r2,[sp,#9*4]
672	eor	r2,r8,r9
673	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
674	and	r2,r2,r7
675	add	r10,r10,r3			@ h+=K256[i]
676	eor	r2,r2,r9			@ Ch(e,f,g)
677	eor	r0,r11,r11,ror#11
678	add	r10,r10,r2			@ h+=Ch(e,f,g)
679#if 9==31
680	and	r3,r3,#0xff
681	cmp	r3,#0xf2			@ done?
682#endif
683#if 9<15
684# if __ARM_ARCH__>=7
685	ldr	r2,[r1],#4			@ prefetch
686# else
687	ldrb	r2,[r1,#3]
688# endif
689	eor	r3,r11,r4			@ a^b, b^c in next round
690#else
691	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
692	eor	r3,r11,r4			@ a^b, b^c in next round
693	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
694#endif
695	eor	r0,r0,r11,ror#20	@ Sigma0(a)
696	and	r12,r12,r3			@ (b^c)&=(a^b)
697	add	r6,r6,r10			@ d+=h
698	eor	r12,r12,r4			@ Maj(a,b,c)
699	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
700	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
701#if __ARM_ARCH__>=7
702	@ ldr	r2,[r1],#4			@ 10
703# if 10==15
704	str	r1,[sp,#17*4]			@ make room for r1
705# endif
706	eor	r0,r6,r6,ror#5
707	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
708	eor	r0,r0,r6,ror#19	@ Sigma1(e)
709# ifndef __ARMEB__
710	rev	r2,r2
711# endif
712#else
713	@ ldrb	r2,[r1,#3]			@ 10
714	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
715	ldrb	r12,[r1,#2]
716	ldrb	r0,[r1,#1]
717	orr	r2,r2,r12,lsl#8
718	ldrb	r12,[r1],#4
719	orr	r2,r2,r0,lsl#16
720# if 10==15
721	str	r1,[sp,#17*4]			@ make room for r1
722# endif
723	eor	r0,r6,r6,ror#5
724	orr	r2,r2,r12,lsl#24
725	eor	r0,r0,r6,ror#19	@ Sigma1(e)
726#endif
727	ldr	r12,[r14],#4			@ *K256++
728	add	r9,r9,r2			@ h+=X[i]
729	str	r2,[sp,#10*4]
730	eor	r2,r7,r8
731	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
732	and	r2,r2,r6
733	add	r9,r9,r12			@ h+=K256[i]
734	eor	r2,r2,r8			@ Ch(e,f,g)
735	eor	r0,r10,r10,ror#11
736	add	r9,r9,r2			@ h+=Ch(e,f,g)
737#if 10==31
738	and	r12,r12,#0xff
739	cmp	r12,#0xf2			@ done?
740#endif
741#if 10<15
742# if __ARM_ARCH__>=7
743	ldr	r2,[r1],#4			@ prefetch
744# else
745	ldrb	r2,[r1,#3]
746# endif
747	eor	r12,r10,r11			@ a^b, b^c in next round
748#else
749	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
750	eor	r12,r10,r11			@ a^b, b^c in next round
751	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
752#endif
753	eor	r0,r0,r10,ror#20	@ Sigma0(a)
754	and	r3,r3,r12			@ (b^c)&=(a^b)
755	add	r5,r5,r9			@ d+=h
756	eor	r3,r3,r11			@ Maj(a,b,c)
757	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
758	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
759#if __ARM_ARCH__>=7
760	@ ldr	r2,[r1],#4			@ 11
761# if 11==15
762	str	r1,[sp,#17*4]			@ make room for r1
763# endif
764	eor	r0,r5,r5,ror#5
765	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
766	eor	r0,r0,r5,ror#19	@ Sigma1(e)
767# ifndef __ARMEB__
768	rev	r2,r2
769# endif
770#else
771	@ ldrb	r2,[r1,#3]			@ 11
772	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
773	ldrb	r3,[r1,#2]
774	ldrb	r0,[r1,#1]
775	orr	r2,r2,r3,lsl#8
776	ldrb	r3,[r1],#4
777	orr	r2,r2,r0,lsl#16
778# if 11==15
779	str	r1,[sp,#17*4]			@ make room for r1
780# endif
781	eor	r0,r5,r5,ror#5
782	orr	r2,r2,r3,lsl#24
783	eor	r0,r0,r5,ror#19	@ Sigma1(e)
784#endif
785	ldr	r3,[r14],#4			@ *K256++
786	add	r8,r8,r2			@ h+=X[i]
787	str	r2,[sp,#11*4]
788	eor	r2,r6,r7
789	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
790	and	r2,r2,r5
791	add	r8,r8,r3			@ h+=K256[i]
792	eor	r2,r2,r7			@ Ch(e,f,g)
793	eor	r0,r9,r9,ror#11
794	add	r8,r8,r2			@ h+=Ch(e,f,g)
795#if 11==31
796	and	r3,r3,#0xff
797	cmp	r3,#0xf2			@ done?
798#endif
799#if 11<15
800# if __ARM_ARCH__>=7
801	ldr	r2,[r1],#4			@ prefetch
802# else
803	ldrb	r2,[r1,#3]
804# endif
805	eor	r3,r9,r10			@ a^b, b^c in next round
806#else
807	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
808	eor	r3,r9,r10			@ a^b, b^c in next round
809	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
810#endif
811	eor	r0,r0,r9,ror#20	@ Sigma0(a)
812	and	r12,r12,r3			@ (b^c)&=(a^b)
813	add	r4,r4,r8			@ d+=h
814	eor	r12,r12,r10			@ Maj(a,b,c)
815	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
816	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
817#if __ARM_ARCH__>=7
818	@ ldr	r2,[r1],#4			@ 12
819# if 12==15
820	str	r1,[sp,#17*4]			@ make room for r1
821# endif
822	eor	r0,r4,r4,ror#5
823	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
824	eor	r0,r0,r4,ror#19	@ Sigma1(e)
825# ifndef __ARMEB__
826	rev	r2,r2
827# endif
828#else
829	@ ldrb	r2,[r1,#3]			@ 12
830	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
831	ldrb	r12,[r1,#2]
832	ldrb	r0,[r1,#1]
833	orr	r2,r2,r12,lsl#8
834	ldrb	r12,[r1],#4
835	orr	r2,r2,r0,lsl#16
836# if 12==15
837	str	r1,[sp,#17*4]			@ make room for r1
838# endif
839	eor	r0,r4,r4,ror#5
840	orr	r2,r2,r12,lsl#24
841	eor	r0,r0,r4,ror#19	@ Sigma1(e)
842#endif
843	ldr	r12,[r14],#4			@ *K256++
844	add	r7,r7,r2			@ h+=X[i]
845	str	r2,[sp,#12*4]
846	eor	r2,r5,r6
847	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
848	and	r2,r2,r4
849	add	r7,r7,r12			@ h+=K256[i]
850	eor	r2,r2,r6			@ Ch(e,f,g)
851	eor	r0,r8,r8,ror#11
852	add	r7,r7,r2			@ h+=Ch(e,f,g)
853#if 12==31
854	and	r12,r12,#0xff
855	cmp	r12,#0xf2			@ done?
856#endif
857#if 12<15
858# if __ARM_ARCH__>=7
859	ldr	r2,[r1],#4			@ prefetch
860# else
861	ldrb	r2,[r1,#3]
862# endif
863	eor	r12,r8,r9			@ a^b, b^c in next round
864#else
865	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
866	eor	r12,r8,r9			@ a^b, b^c in next round
867	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
868#endif
869	eor	r0,r0,r8,ror#20	@ Sigma0(a)
870	and	r3,r3,r12			@ (b^c)&=(a^b)
871	add	r11,r11,r7			@ d+=h
872	eor	r3,r3,r9			@ Maj(a,b,c)
873	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
874	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
875#if __ARM_ARCH__>=7
876	@ ldr	r2,[r1],#4			@ 13
877# if 13==15
878	str	r1,[sp,#17*4]			@ make room for r1
879# endif
880	eor	r0,r11,r11,ror#5
881	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
882	eor	r0,r0,r11,ror#19	@ Sigma1(e)
883# ifndef __ARMEB__
884	rev	r2,r2
885# endif
886#else
887	@ ldrb	r2,[r1,#3]			@ 13
888	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
889	ldrb	r3,[r1,#2]
890	ldrb	r0,[r1,#1]
891	orr	r2,r2,r3,lsl#8
892	ldrb	r3,[r1],#4
893	orr	r2,r2,r0,lsl#16
894# if 13==15
895	str	r1,[sp,#17*4]			@ make room for r1
896# endif
897	eor	r0,r11,r11,ror#5
898	orr	r2,r2,r3,lsl#24
899	eor	r0,r0,r11,ror#19	@ Sigma1(e)
900#endif
901	ldr	r3,[r14],#4			@ *K256++
902	add	r6,r6,r2			@ h+=X[i]
903	str	r2,[sp,#13*4]
904	eor	r2,r4,r5
905	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
906	and	r2,r2,r11
907	add	r6,r6,r3			@ h+=K256[i]
908	eor	r2,r2,r5			@ Ch(e,f,g)
909	eor	r0,r7,r7,ror#11
910	add	r6,r6,r2			@ h+=Ch(e,f,g)
911#if 13==31
912	and	r3,r3,#0xff
913	cmp	r3,#0xf2			@ done?
914#endif
915#if 13<15
916# if __ARM_ARCH__>=7
917	ldr	r2,[r1],#4			@ prefetch
918# else
919	ldrb	r2,[r1,#3]
920# endif
921	eor	r3,r7,r8			@ a^b, b^c in next round
922#else
923	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
924	eor	r3,r7,r8			@ a^b, b^c in next round
925	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
926#endif
927	eor	r0,r0,r7,ror#20	@ Sigma0(a)
928	and	r12,r12,r3			@ (b^c)&=(a^b)
929	add	r10,r10,r6			@ d+=h
930	eor	r12,r12,r8			@ Maj(a,b,c)
931	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
932	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
933#if __ARM_ARCH__>=7
934	@ ldr	r2,[r1],#4			@ 14
935# if 14==15
936	str	r1,[sp,#17*4]			@ make room for r1
937# endif
938	eor	r0,r10,r10,ror#5
939	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
940	eor	r0,r0,r10,ror#19	@ Sigma1(e)
941# ifndef __ARMEB__
942	rev	r2,r2
943# endif
944#else
945	@ ldrb	r2,[r1,#3]			@ 14
946	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
947	ldrb	r12,[r1,#2]
948	ldrb	r0,[r1,#1]
949	orr	r2,r2,r12,lsl#8
950	ldrb	r12,[r1],#4
951	orr	r2,r2,r0,lsl#16
952# if 14==15
953	str	r1,[sp,#17*4]			@ make room for r1
954# endif
955	eor	r0,r10,r10,ror#5
956	orr	r2,r2,r12,lsl#24
957	eor	r0,r0,r10,ror#19	@ Sigma1(e)
958#endif
959	ldr	r12,[r14],#4			@ *K256++
960	add	r5,r5,r2			@ h+=X[i]
961	str	r2,[sp,#14*4]
962	eor	r2,r11,r4
963	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
964	and	r2,r2,r10
965	add	r5,r5,r12			@ h+=K256[i]
966	eor	r2,r2,r4			@ Ch(e,f,g)
967	eor	r0,r6,r6,ror#11
968	add	r5,r5,r2			@ h+=Ch(e,f,g)
969#if 14==31
970	and	r12,r12,#0xff
971	cmp	r12,#0xf2			@ done?
972#endif
973#if 14<15
974# if __ARM_ARCH__>=7
975	ldr	r2,[r1],#4			@ prefetch
976# else
977	ldrb	r2,[r1,#3]
978# endif
979	eor	r12,r6,r7			@ a^b, b^c in next round
980#else
981	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
982	eor	r12,r6,r7			@ a^b, b^c in next round
983	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
984#endif
985	eor	r0,r0,r6,ror#20	@ Sigma0(a)
986	and	r3,r3,r12			@ (b^c)&=(a^b)
987	add	r9,r9,r5			@ d+=h
988	eor	r3,r3,r7			@ Maj(a,b,c)
989	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
990	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
991#if __ARM_ARCH__>=7
992	@ ldr	r2,[r1],#4			@ 15
993# if 15==15
994	str	r1,[sp,#17*4]			@ make room for r1
995# endif
996	eor	r0,r9,r9,ror#5
997	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
998	eor	r0,r0,r9,ror#19	@ Sigma1(e)
999# ifndef __ARMEB__
1000	rev	r2,r2
1001# endif
1002#else
1003	@ ldrb	r2,[r1,#3]			@ 15
1004	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1005	ldrb	r3,[r1,#2]
1006	ldrb	r0,[r1,#1]
1007	orr	r2,r2,r3,lsl#8
1008	ldrb	r3,[r1],#4
1009	orr	r2,r2,r0,lsl#16
1010# if 15==15
1011	str	r1,[sp,#17*4]			@ make room for r1
1012# endif
1013	eor	r0,r9,r9,ror#5
1014	orr	r2,r2,r3,lsl#24
1015	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1016#endif
1017	ldr	r3,[r14],#4			@ *K256++
1018	add	r4,r4,r2			@ h+=X[i]
1019	str	r2,[sp,#15*4]
1020	eor	r2,r10,r11
1021	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1022	and	r2,r2,r9
1023	add	r4,r4,r3			@ h+=K256[i]
1024	eor	r2,r2,r11			@ Ch(e,f,g)
1025	eor	r0,r5,r5,ror#11
1026	add	r4,r4,r2			@ h+=Ch(e,f,g)
1027#if 15==31
1028	and	r3,r3,#0xff
1029	cmp	r3,#0xf2			@ done?
1030#endif
1031#if 15<15
1032# if __ARM_ARCH__>=7
1033	ldr	r2,[r1],#4			@ prefetch
1034# else
1035	ldrb	r2,[r1,#3]
1036# endif
1037	eor	r3,r5,r6			@ a^b, b^c in next round
1038#else
1039	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1040	eor	r3,r5,r6			@ a^b, b^c in next round
1041	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1042#endif
1043	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1044	and	r12,r12,r3			@ (b^c)&=(a^b)
1045	add	r8,r8,r4			@ d+=h
1046	eor	r12,r12,r6			@ Maj(a,b,c)
1047	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1048	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1049.Lrounds_16_xx:
1050	@ ldr	r2,[sp,#1*4]		@ 16
1051	@ ldr	r1,[sp,#14*4]
1052	mov	r0,r2,ror#7
1053	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1054	mov	r12,r1,ror#17
1055	eor	r0,r0,r2,ror#18
1056	eor	r12,r12,r1,ror#19
1057	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1058	ldr	r2,[sp,#0*4]
1059	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1060	ldr	r1,[sp,#9*4]
1061
1062	add	r12,r12,r0
1063	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1064	add	r2,r2,r12
1065	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1066	add	r2,r2,r1			@ X[i]
1067	ldr	r12,[r14],#4			@ *K256++
1068	add	r11,r11,r2			@ h+=X[i]
1069	str	r2,[sp,#0*4]
1070	eor	r2,r9,r10
1071	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1072	and	r2,r2,r8
1073	add	r11,r11,r12			@ h+=K256[i]
1074	eor	r2,r2,r10			@ Ch(e,f,g)
1075	eor	r0,r4,r4,ror#11
1076	add	r11,r11,r2			@ h+=Ch(e,f,g)
1077#if 16==31
1078	and	r12,r12,#0xff
1079	cmp	r12,#0xf2			@ done?
1080#endif
1081#if 16<15
1082# if __ARM_ARCH__>=7
1083	ldr	r2,[r1],#4			@ prefetch
1084# else
1085	ldrb	r2,[r1,#3]
1086# endif
1087	eor	r12,r4,r5			@ a^b, b^c in next round
1088#else
1089	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1090	eor	r12,r4,r5			@ a^b, b^c in next round
1091	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1092#endif
1093	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1094	and	r3,r3,r12			@ (b^c)&=(a^b)
1095	add	r7,r7,r11			@ d+=h
1096	eor	r3,r3,r5			@ Maj(a,b,c)
1097	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1098	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1099	@ ldr	r2,[sp,#2*4]		@ 17
1100	@ ldr	r1,[sp,#15*4]
1101	mov	r0,r2,ror#7
1102	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1103	mov	r3,r1,ror#17
1104	eor	r0,r0,r2,ror#18
1105	eor	r3,r3,r1,ror#19
1106	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1107	ldr	r2,[sp,#1*4]
1108	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1109	ldr	r1,[sp,#10*4]
1110
1111	add	r3,r3,r0
1112	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1113	add	r2,r2,r3
1114	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1115	add	r2,r2,r1			@ X[i]
1116	ldr	r3,[r14],#4			@ *K256++
1117	add	r10,r10,r2			@ h+=X[i]
1118	str	r2,[sp,#1*4]
1119	eor	r2,r8,r9
1120	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1121	and	r2,r2,r7
1122	add	r10,r10,r3			@ h+=K256[i]
1123	eor	r2,r2,r9			@ Ch(e,f,g)
1124	eor	r0,r11,r11,ror#11
1125	add	r10,r10,r2			@ h+=Ch(e,f,g)
1126#if 17==31
1127	and	r3,r3,#0xff
1128	cmp	r3,#0xf2			@ done?
1129#endif
1130#if 17<15
1131# if __ARM_ARCH__>=7
1132	ldr	r2,[r1],#4			@ prefetch
1133# else
1134	ldrb	r2,[r1,#3]
1135# endif
1136	eor	r3,r11,r4			@ a^b, b^c in next round
1137#else
1138	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1139	eor	r3,r11,r4			@ a^b, b^c in next round
1140	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1141#endif
1142	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1143	and	r12,r12,r3			@ (b^c)&=(a^b)
1144	add	r6,r6,r10			@ d+=h
1145	eor	r12,r12,r4			@ Maj(a,b,c)
1146	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1147	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1148	@ ldr	r2,[sp,#3*4]		@ 18
1149	@ ldr	r1,[sp,#0*4]
1150	mov	r0,r2,ror#7
1151	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1152	mov	r12,r1,ror#17
1153	eor	r0,r0,r2,ror#18
1154	eor	r12,r12,r1,ror#19
1155	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1156	ldr	r2,[sp,#2*4]
1157	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1158	ldr	r1,[sp,#11*4]
1159
1160	add	r12,r12,r0
1161	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1162	add	r2,r2,r12
1163	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1164	add	r2,r2,r1			@ X[i]
1165	ldr	r12,[r14],#4			@ *K256++
1166	add	r9,r9,r2			@ h+=X[i]
1167	str	r2,[sp,#2*4]
1168	eor	r2,r7,r8
1169	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1170	and	r2,r2,r6
1171	add	r9,r9,r12			@ h+=K256[i]
1172	eor	r2,r2,r8			@ Ch(e,f,g)
1173	eor	r0,r10,r10,ror#11
1174	add	r9,r9,r2			@ h+=Ch(e,f,g)
1175#if 18==31
1176	and	r12,r12,#0xff
1177	cmp	r12,#0xf2			@ done?
1178#endif
1179#if 18<15
1180# if __ARM_ARCH__>=7
1181	ldr	r2,[r1],#4			@ prefetch
1182# else
1183	ldrb	r2,[r1,#3]
1184# endif
1185	eor	r12,r10,r11			@ a^b, b^c in next round
1186#else
1187	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1188	eor	r12,r10,r11			@ a^b, b^c in next round
1189	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1190#endif
1191	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1192	and	r3,r3,r12			@ (b^c)&=(a^b)
1193	add	r5,r5,r9			@ d+=h
1194	eor	r3,r3,r11			@ Maj(a,b,c)
1195	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1196	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1197	@ ldr	r2,[sp,#4*4]		@ 19
1198	@ ldr	r1,[sp,#1*4]
1199	mov	r0,r2,ror#7
1200	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1201	mov	r3,r1,ror#17
1202	eor	r0,r0,r2,ror#18
1203	eor	r3,r3,r1,ror#19
1204	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1205	ldr	r2,[sp,#3*4]
1206	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1207	ldr	r1,[sp,#12*4]
1208
1209	add	r3,r3,r0
1210	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1211	add	r2,r2,r3
1212	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1213	add	r2,r2,r1			@ X[i]
1214	ldr	r3,[r14],#4			@ *K256++
1215	add	r8,r8,r2			@ h+=X[i]
1216	str	r2,[sp,#3*4]
1217	eor	r2,r6,r7
1218	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1219	and	r2,r2,r5
1220	add	r8,r8,r3			@ h+=K256[i]
1221	eor	r2,r2,r7			@ Ch(e,f,g)
1222	eor	r0,r9,r9,ror#11
1223	add	r8,r8,r2			@ h+=Ch(e,f,g)
1224#if 19==31
1225	and	r3,r3,#0xff
1226	cmp	r3,#0xf2			@ done?
1227#endif
1228#if 19<15
1229# if __ARM_ARCH__>=7
1230	ldr	r2,[r1],#4			@ prefetch
1231# else
1232	ldrb	r2,[r1,#3]
1233# endif
1234	eor	r3,r9,r10			@ a^b, b^c in next round
1235#else
1236	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1237	eor	r3,r9,r10			@ a^b, b^c in next round
1238	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1239#endif
1240	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1241	and	r12,r12,r3			@ (b^c)&=(a^b)
1242	add	r4,r4,r8			@ d+=h
1243	eor	r12,r12,r10			@ Maj(a,b,c)
1244	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1245	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1246	@ ldr	r2,[sp,#5*4]		@ 20
1247	@ ldr	r1,[sp,#2*4]
1248	mov	r0,r2,ror#7
1249	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1250	mov	r12,r1,ror#17
1251	eor	r0,r0,r2,ror#18
1252	eor	r12,r12,r1,ror#19
1253	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1254	ldr	r2,[sp,#4*4]
1255	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1256	ldr	r1,[sp,#13*4]
1257
1258	add	r12,r12,r0
1259	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1260	add	r2,r2,r12
1261	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1262	add	r2,r2,r1			@ X[i]
1263	ldr	r12,[r14],#4			@ *K256++
1264	add	r7,r7,r2			@ h+=X[i]
1265	str	r2,[sp,#4*4]
1266	eor	r2,r5,r6
1267	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1268	and	r2,r2,r4
1269	add	r7,r7,r12			@ h+=K256[i]
1270	eor	r2,r2,r6			@ Ch(e,f,g)
1271	eor	r0,r8,r8,ror#11
1272	add	r7,r7,r2			@ h+=Ch(e,f,g)
1273#if 20==31
1274	and	r12,r12,#0xff
1275	cmp	r12,#0xf2			@ done?
1276#endif
1277#if 20<15
1278# if __ARM_ARCH__>=7
1279	ldr	r2,[r1],#4			@ prefetch
1280# else
1281	ldrb	r2,[r1,#3]
1282# endif
1283	eor	r12,r8,r9			@ a^b, b^c in next round
1284#else
1285	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1286	eor	r12,r8,r9			@ a^b, b^c in next round
1287	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1288#endif
1289	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1290	and	r3,r3,r12			@ (b^c)&=(a^b)
1291	add	r11,r11,r7			@ d+=h
1292	eor	r3,r3,r9			@ Maj(a,b,c)
1293	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1294	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1295	@ ldr	r2,[sp,#6*4]		@ 21
1296	@ ldr	r1,[sp,#3*4]
1297	mov	r0,r2,ror#7
1298	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1299	mov	r3,r1,ror#17
1300	eor	r0,r0,r2,ror#18
1301	eor	r3,r3,r1,ror#19
1302	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1303	ldr	r2,[sp,#5*4]
1304	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1305	ldr	r1,[sp,#14*4]
1306
1307	add	r3,r3,r0
1308	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1309	add	r2,r2,r3
1310	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1311	add	r2,r2,r1			@ X[i]
1312	ldr	r3,[r14],#4			@ *K256++
1313	add	r6,r6,r2			@ h+=X[i]
1314	str	r2,[sp,#5*4]
1315	eor	r2,r4,r5
1316	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1317	and	r2,r2,r11
1318	add	r6,r6,r3			@ h+=K256[i]
1319	eor	r2,r2,r5			@ Ch(e,f,g)
1320	eor	r0,r7,r7,ror#11
1321	add	r6,r6,r2			@ h+=Ch(e,f,g)
1322#if 21==31
1323	and	r3,r3,#0xff
1324	cmp	r3,#0xf2			@ done?
1325#endif
1326#if 21<15
1327# if __ARM_ARCH__>=7
1328	ldr	r2,[r1],#4			@ prefetch
1329# else
1330	ldrb	r2,[r1,#3]
1331# endif
1332	eor	r3,r7,r8			@ a^b, b^c in next round
1333#else
1334	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1335	eor	r3,r7,r8			@ a^b, b^c in next round
1336	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1337#endif
1338	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1339	and	r12,r12,r3			@ (b^c)&=(a^b)
1340	add	r10,r10,r6			@ d+=h
1341	eor	r12,r12,r8			@ Maj(a,b,c)
1342	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1343	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1344	@ ldr	r2,[sp,#7*4]		@ 22
1345	@ ldr	r1,[sp,#4*4]
1346	mov	r0,r2,ror#7
1347	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1348	mov	r12,r1,ror#17
1349	eor	r0,r0,r2,ror#18
1350	eor	r12,r12,r1,ror#19
1351	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1352	ldr	r2,[sp,#6*4]
1353	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1354	ldr	r1,[sp,#15*4]
1355
1356	add	r12,r12,r0
1357	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1358	add	r2,r2,r12
1359	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1360	add	r2,r2,r1			@ X[i]
1361	ldr	r12,[r14],#4			@ *K256++
1362	add	r5,r5,r2			@ h+=X[i]
1363	str	r2,[sp,#6*4]
1364	eor	r2,r11,r4
1365	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1366	and	r2,r2,r10
1367	add	r5,r5,r12			@ h+=K256[i]
1368	eor	r2,r2,r4			@ Ch(e,f,g)
1369	eor	r0,r6,r6,ror#11
1370	add	r5,r5,r2			@ h+=Ch(e,f,g)
1371#if 22==31
1372	and	r12,r12,#0xff
1373	cmp	r12,#0xf2			@ done?
1374#endif
1375#if 22<15
1376# if __ARM_ARCH__>=7
1377	ldr	r2,[r1],#4			@ prefetch
1378# else
1379	ldrb	r2,[r1,#3]
1380# endif
1381	eor	r12,r6,r7			@ a^b, b^c in next round
1382#else
1383	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1384	eor	r12,r6,r7			@ a^b, b^c in next round
1385	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1386#endif
1387	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1388	and	r3,r3,r12			@ (b^c)&=(a^b)
1389	add	r9,r9,r5			@ d+=h
1390	eor	r3,r3,r7			@ Maj(a,b,c)
1391	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1392	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1393	@ ldr	r2,[sp,#8*4]		@ 23
1394	@ ldr	r1,[sp,#5*4]
1395	mov	r0,r2,ror#7
1396	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1397	mov	r3,r1,ror#17
1398	eor	r0,r0,r2,ror#18
1399	eor	r3,r3,r1,ror#19
1400	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1401	ldr	r2,[sp,#7*4]
1402	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1403	ldr	r1,[sp,#0*4]
1404
1405	add	r3,r3,r0
1406	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1407	add	r2,r2,r3
1408	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1409	add	r2,r2,r1			@ X[i]
1410	ldr	r3,[r14],#4			@ *K256++
1411	add	r4,r4,r2			@ h+=X[i]
1412	str	r2,[sp,#7*4]
1413	eor	r2,r10,r11
1414	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1415	and	r2,r2,r9
1416	add	r4,r4,r3			@ h+=K256[i]
1417	eor	r2,r2,r11			@ Ch(e,f,g)
1418	eor	r0,r5,r5,ror#11
1419	add	r4,r4,r2			@ h+=Ch(e,f,g)
1420#if 23==31
1421	and	r3,r3,#0xff
1422	cmp	r3,#0xf2			@ done?
1423#endif
1424#if 23<15
1425# if __ARM_ARCH__>=7
1426	ldr	r2,[r1],#4			@ prefetch
1427# else
1428	ldrb	r2,[r1,#3]
1429# endif
1430	eor	r3,r5,r6			@ a^b, b^c in next round
1431#else
1432	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1433	eor	r3,r5,r6			@ a^b, b^c in next round
1434	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1435#endif
1436	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1437	and	r12,r12,r3			@ (b^c)&=(a^b)
1438	add	r8,r8,r4			@ d+=h
1439	eor	r12,r12,r6			@ Maj(a,b,c)
1440	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1441	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1442	@ ldr	r2,[sp,#9*4]		@ 24
1443	@ ldr	r1,[sp,#6*4]
1444	mov	r0,r2,ror#7
1445	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1446	mov	r12,r1,ror#17
1447	eor	r0,r0,r2,ror#18
1448	eor	r12,r12,r1,ror#19
1449	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1450	ldr	r2,[sp,#8*4]
1451	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1452	ldr	r1,[sp,#1*4]
1453
1454	add	r12,r12,r0
1455	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1456	add	r2,r2,r12
1457	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1458	add	r2,r2,r1			@ X[i]
1459	ldr	r12,[r14],#4			@ *K256++
1460	add	r11,r11,r2			@ h+=X[i]
1461	str	r2,[sp,#8*4]
1462	eor	r2,r9,r10
1463	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1464	and	r2,r2,r8
1465	add	r11,r11,r12			@ h+=K256[i]
1466	eor	r2,r2,r10			@ Ch(e,f,g)
1467	eor	r0,r4,r4,ror#11
1468	add	r11,r11,r2			@ h+=Ch(e,f,g)
1469#if 24==31
1470	and	r12,r12,#0xff
1471	cmp	r12,#0xf2			@ done?
1472#endif
1473#if 24<15
1474# if __ARM_ARCH__>=7
1475	ldr	r2,[r1],#4			@ prefetch
1476# else
1477	ldrb	r2,[r1,#3]
1478# endif
1479	eor	r12,r4,r5			@ a^b, b^c in next round
1480#else
1481	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1482	eor	r12,r4,r5			@ a^b, b^c in next round
1483	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1484#endif
1485	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1486	and	r3,r3,r12			@ (b^c)&=(a^b)
1487	add	r7,r7,r11			@ d+=h
1488	eor	r3,r3,r5			@ Maj(a,b,c)
1489	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1490	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1491	@ ldr	r2,[sp,#10*4]		@ 25
1492	@ ldr	r1,[sp,#7*4]
1493	mov	r0,r2,ror#7
1494	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1495	mov	r3,r1,ror#17
1496	eor	r0,r0,r2,ror#18
1497	eor	r3,r3,r1,ror#19
1498	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1499	ldr	r2,[sp,#9*4]
1500	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1501	ldr	r1,[sp,#2*4]
1502
1503	add	r3,r3,r0
1504	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1505	add	r2,r2,r3
1506	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1507	add	r2,r2,r1			@ X[i]
1508	ldr	r3,[r14],#4			@ *K256++
1509	add	r10,r10,r2			@ h+=X[i]
1510	str	r2,[sp,#9*4]
1511	eor	r2,r8,r9
1512	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1513	and	r2,r2,r7
1514	add	r10,r10,r3			@ h+=K256[i]
1515	eor	r2,r2,r9			@ Ch(e,f,g)
1516	eor	r0,r11,r11,ror#11
1517	add	r10,r10,r2			@ h+=Ch(e,f,g)
1518#if 25==31
1519	and	r3,r3,#0xff
1520	cmp	r3,#0xf2			@ done?
1521#endif
1522#if 25<15
1523# if __ARM_ARCH__>=7
1524	ldr	r2,[r1],#4			@ prefetch
1525# else
1526	ldrb	r2,[r1,#3]
1527# endif
1528	eor	r3,r11,r4			@ a^b, b^c in next round
1529#else
1530	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1531	eor	r3,r11,r4			@ a^b, b^c in next round
1532	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1533#endif
1534	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1535	and	r12,r12,r3			@ (b^c)&=(a^b)
1536	add	r6,r6,r10			@ d+=h
1537	eor	r12,r12,r4			@ Maj(a,b,c)
1538	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1539	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1540	@ ldr	r2,[sp,#11*4]		@ 26
1541	@ ldr	r1,[sp,#8*4]
1542	mov	r0,r2,ror#7
1543	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1544	mov	r12,r1,ror#17
1545	eor	r0,r0,r2,ror#18
1546	eor	r12,r12,r1,ror#19
1547	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1548	ldr	r2,[sp,#10*4]
1549	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1550	ldr	r1,[sp,#3*4]
1551
1552	add	r12,r12,r0
1553	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1554	add	r2,r2,r12
1555	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1556	add	r2,r2,r1			@ X[i]
1557	ldr	r12,[r14],#4			@ *K256++
1558	add	r9,r9,r2			@ h+=X[i]
1559	str	r2,[sp,#10*4]
1560	eor	r2,r7,r8
1561	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1562	and	r2,r2,r6
1563	add	r9,r9,r12			@ h+=K256[i]
1564	eor	r2,r2,r8			@ Ch(e,f,g)
1565	eor	r0,r10,r10,ror#11
1566	add	r9,r9,r2			@ h+=Ch(e,f,g)
1567#if 26==31
1568	and	r12,r12,#0xff
1569	cmp	r12,#0xf2			@ done?
1570#endif
1571#if 26<15
1572# if __ARM_ARCH__>=7
1573	ldr	r2,[r1],#4			@ prefetch
1574# else
1575	ldrb	r2,[r1,#3]
1576# endif
1577	eor	r12,r10,r11			@ a^b, b^c in next round
1578#else
1579	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1580	eor	r12,r10,r11			@ a^b, b^c in next round
1581	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1582#endif
1583	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1584	and	r3,r3,r12			@ (b^c)&=(a^b)
1585	add	r5,r5,r9			@ d+=h
1586	eor	r3,r3,r11			@ Maj(a,b,c)
1587	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1588	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1589	@ ldr	r2,[sp,#12*4]		@ 27
1590	@ ldr	r1,[sp,#9*4]
1591	mov	r0,r2,ror#7
1592	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1593	mov	r3,r1,ror#17
1594	eor	r0,r0,r2,ror#18
1595	eor	r3,r3,r1,ror#19
1596	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1597	ldr	r2,[sp,#11*4]
1598	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1599	ldr	r1,[sp,#4*4]
1600
1601	add	r3,r3,r0
1602	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1603	add	r2,r2,r3
1604	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1605	add	r2,r2,r1			@ X[i]
1606	ldr	r3,[r14],#4			@ *K256++
1607	add	r8,r8,r2			@ h+=X[i]
1608	str	r2,[sp,#11*4]
1609	eor	r2,r6,r7
1610	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1611	and	r2,r2,r5
1612	add	r8,r8,r3			@ h+=K256[i]
1613	eor	r2,r2,r7			@ Ch(e,f,g)
1614	eor	r0,r9,r9,ror#11
1615	add	r8,r8,r2			@ h+=Ch(e,f,g)
1616#if 27==31
1617	and	r3,r3,#0xff
1618	cmp	r3,#0xf2			@ done?
1619#endif
1620#if 27<15
1621# if __ARM_ARCH__>=7
1622	ldr	r2,[r1],#4			@ prefetch
1623# else
1624	ldrb	r2,[r1,#3]
1625# endif
1626	eor	r3,r9,r10			@ a^b, b^c in next round
1627#else
1628	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1629	eor	r3,r9,r10			@ a^b, b^c in next round
1630	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1631#endif
1632	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1633	and	r12,r12,r3			@ (b^c)&=(a^b)
1634	add	r4,r4,r8			@ d+=h
1635	eor	r12,r12,r10			@ Maj(a,b,c)
1636	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1637	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1638	@ ldr	r2,[sp,#13*4]		@ 28
1639	@ ldr	r1,[sp,#10*4]
1640	mov	r0,r2,ror#7
1641	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1642	mov	r12,r1,ror#17
1643	eor	r0,r0,r2,ror#18
1644	eor	r12,r12,r1,ror#19
1645	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1646	ldr	r2,[sp,#12*4]
1647	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1648	ldr	r1,[sp,#5*4]
1649
1650	add	r12,r12,r0
1651	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1652	add	r2,r2,r12
1653	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1654	add	r2,r2,r1			@ X[i]
1655	ldr	r12,[r14],#4			@ *K256++
1656	add	r7,r7,r2			@ h+=X[i]
1657	str	r2,[sp,#12*4]
1658	eor	r2,r5,r6
1659	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1660	and	r2,r2,r4
1661	add	r7,r7,r12			@ h+=K256[i]
1662	eor	r2,r2,r6			@ Ch(e,f,g)
1663	eor	r0,r8,r8,ror#11
1664	add	r7,r7,r2			@ h+=Ch(e,f,g)
1665#if 28==31
1666	and	r12,r12,#0xff
1667	cmp	r12,#0xf2			@ done?
1668#endif
1669#if 28<15
1670# if __ARM_ARCH__>=7
1671	ldr	r2,[r1],#4			@ prefetch
1672# else
1673	ldrb	r2,[r1,#3]
1674# endif
1675	eor	r12,r8,r9			@ a^b, b^c in next round
1676#else
1677	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1678	eor	r12,r8,r9			@ a^b, b^c in next round
1679	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1680#endif
1681	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1682	and	r3,r3,r12			@ (b^c)&=(a^b)
1683	add	r11,r11,r7			@ d+=h
1684	eor	r3,r3,r9			@ Maj(a,b,c)
1685	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1686	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1687	@ ldr	r2,[sp,#14*4]		@ 29
1688	@ ldr	r1,[sp,#11*4]
1689	mov	r0,r2,ror#7
1690	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1691	mov	r3,r1,ror#17
1692	eor	r0,r0,r2,ror#18
1693	eor	r3,r3,r1,ror#19
1694	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1695	ldr	r2,[sp,#13*4]
1696	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1697	ldr	r1,[sp,#6*4]
1698
1699	add	r3,r3,r0
1700	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1701	add	r2,r2,r3
1702	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1703	add	r2,r2,r1			@ X[i]
1704	ldr	r3,[r14],#4			@ *K256++
1705	add	r6,r6,r2			@ h+=X[i]
1706	str	r2,[sp,#13*4]
1707	eor	r2,r4,r5
1708	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1709	and	r2,r2,r11
1710	add	r6,r6,r3			@ h+=K256[i]
1711	eor	r2,r2,r5			@ Ch(e,f,g)
1712	eor	r0,r7,r7,ror#11
1713	add	r6,r6,r2			@ h+=Ch(e,f,g)
1714#if 29==31
1715	and	r3,r3,#0xff
1716	cmp	r3,#0xf2			@ done?
1717#endif
1718#if 29<15
1719# if __ARM_ARCH__>=7
1720	ldr	r2,[r1],#4			@ prefetch
1721# else
1722	ldrb	r2,[r1,#3]
1723# endif
1724	eor	r3,r7,r8			@ a^b, b^c in next round
1725#else
1726	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1727	eor	r3,r7,r8			@ a^b, b^c in next round
1728	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1729#endif
1730	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1731	and	r12,r12,r3			@ (b^c)&=(a^b)
1732	add	r10,r10,r6			@ d+=h
1733	eor	r12,r12,r8			@ Maj(a,b,c)
1734	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1735	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1736	@ ldr	r2,[sp,#15*4]		@ 30
1737	@ ldr	r1,[sp,#12*4]
1738	mov	r0,r2,ror#7
1739	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1740	mov	r12,r1,ror#17
1741	eor	r0,r0,r2,ror#18
1742	eor	r12,r12,r1,ror#19
1743	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1744	ldr	r2,[sp,#14*4]
1745	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1746	ldr	r1,[sp,#7*4]
1747
1748	add	r12,r12,r0
1749	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1750	add	r2,r2,r12
1751	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1752	add	r2,r2,r1			@ X[i]
1753	ldr	r12,[r14],#4			@ *K256++
1754	add	r5,r5,r2			@ h+=X[i]
1755	str	r2,[sp,#14*4]
1756	eor	r2,r11,r4
1757	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1758	and	r2,r2,r10
1759	add	r5,r5,r12			@ h+=K256[i]
1760	eor	r2,r2,r4			@ Ch(e,f,g)
1761	eor	r0,r6,r6,ror#11
1762	add	r5,r5,r2			@ h+=Ch(e,f,g)
1763#if 30==31
1764	and	r12,r12,#0xff
1765	cmp	r12,#0xf2			@ done?
1766#endif
1767#if 30<15
1768# if __ARM_ARCH__>=7
1769	ldr	r2,[r1],#4			@ prefetch
1770# else
1771	ldrb	r2,[r1,#3]
1772# endif
1773	eor	r12,r6,r7			@ a^b, b^c in next round
1774#else
1775	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1776	eor	r12,r6,r7			@ a^b, b^c in next round
1777	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1778#endif
1779	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1780	and	r3,r3,r12			@ (b^c)&=(a^b)
1781	add	r9,r9,r5			@ d+=h
1782	eor	r3,r3,r7			@ Maj(a,b,c)
1783	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1784	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1785	@ ldr	r2,[sp,#0*4]		@ 31
1786	@ ldr	r1,[sp,#13*4]
1787	mov	r0,r2,ror#7
1788	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1789	mov	r3,r1,ror#17
1790	eor	r0,r0,r2,ror#18
1791	eor	r3,r3,r1,ror#19
1792	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1793	ldr	r2,[sp,#15*4]
1794	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1795	ldr	r1,[sp,#8*4]
1796
1797	add	r3,r3,r0
1798	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1799	add	r2,r2,r3
1800	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1801	add	r2,r2,r1			@ X[i]
1802	ldr	r3,[r14],#4			@ *K256++
1803	add	r4,r4,r2			@ h+=X[i]
1804	str	r2,[sp,#15*4]
1805	eor	r2,r10,r11
1806	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1807	and	r2,r2,r9
1808	add	r4,r4,r3			@ h+=K256[i]
1809	eor	r2,r2,r11			@ Ch(e,f,g)
1810	eor	r0,r5,r5,ror#11
1811	add	r4,r4,r2			@ h+=Ch(e,f,g)
1812#if 31==31
1813	and	r3,r3,#0xff
1814	cmp	r3,#0xf2			@ done?
1815#endif
1816#if 31<15
1817# if __ARM_ARCH__>=7
1818	ldr	r2,[r1],#4			@ prefetch
1819# else
1820	ldrb	r2,[r1,#3]
1821# endif
1822	eor	r3,r5,r6			@ a^b, b^c in next round
1823#else
1824	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1825	eor	r3,r5,r6			@ a^b, b^c in next round
1826	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1827#endif
1828	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1829	and	r12,r12,r3			@ (b^c)&=(a^b)
1830	add	r8,r8,r4			@ d+=h
1831	eor	r12,r12,r6			@ Maj(a,b,c)
1832	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1833	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1834#ifdef	__thumb2__
1835	ite	eq			@ Thumb2 thing, sanity check in ARM
1836#endif
1837	ldreq	r3,[sp,#16*4]		@ pull ctx
1838	bne	.Lrounds_16_xx
1839
1840	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1841	ldr	r0,[r3,#0]
1842	ldr	r2,[r3,#4]
1843	ldr	r12,[r3,#8]
1844	add	r4,r4,r0
1845	ldr	r0,[r3,#12]
1846	add	r5,r5,r2
1847	ldr	r2,[r3,#16]
1848	add	r6,r6,r12
1849	ldr	r12,[r3,#20]
1850	add	r7,r7,r0
1851	ldr	r0,[r3,#24]
1852	add	r8,r8,r2
1853	ldr	r2,[r3,#28]
1854	add	r9,r9,r12
1855	ldr	r1,[sp,#17*4]		@ pull inp
1856	ldr	r12,[sp,#18*4]		@ pull inp+len
1857	add	r10,r10,r0
1858	add	r11,r11,r2
1859	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1860	cmp	r1,r12
1861	sub	r14,r14,#256	@ rewind Ktbl
1862	bne	.Loop
1863
1864	add	sp,sp,#19*4	@ destroy frame
1865#if __ARM_ARCH__>=5
1866	ldmia	sp!,{r4-r11,pc}
1867#else
1868	ldmia	sp!,{r4-r11,lr}
1869	tst	lr,#1
1870	moveq	pc,lr			@ be binary compatible with V4, yet
1871	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1872#endif
1873.size	sha256_block_data_order,.-sha256_block_data_order
1874#if __ARM_MAX_ARCH__>=7
1875.arch	armv7-a
1876.fpu	neon
1877
1878.global	sha256_block_data_order_neon
1879.type	sha256_block_data_order_neon,%function
1880.align	5
1881.skip	16
1882sha256_block_data_order_neon:
1883.LNEON:
1884	stmdb	sp!,{r4-r12,lr}
1885
1886	sub	r11,sp,#16*4+16
1887	adr	r14,K256
1888	bic	r11,r11,#15		@ align for 128-bit stores
1889	mov	r12,sp
1890	mov	sp,r11			@ alloca
1891	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1892
1893	vld1.8		{q0},[r1]!
1894	vld1.8		{q1},[r1]!
1895	vld1.8		{q2},[r1]!
1896	vld1.8		{q3},[r1]!
1897	vld1.32		{q8},[r14,:128]!
1898	vld1.32		{q9},[r14,:128]!
1899	vld1.32		{q10},[r14,:128]!
1900	vld1.32		{q11},[r14,:128]!
1901	vrev32.8	q0,q0		@ yes, even on
1902	str		r0,[sp,#64]
1903	vrev32.8	q1,q1		@ big-endian
1904	str		r1,[sp,#68]
1905	mov		r1,sp
1906	vrev32.8	q2,q2
1907	str		r2,[sp,#72]
1908	vrev32.8	q3,q3
1909	str		r12,[sp,#76]		@ save original sp
1910	vadd.i32	q8,q8,q0
1911	vadd.i32	q9,q9,q1
1912	vst1.32		{q8},[r1,:128]!
1913	vadd.i32	q10,q10,q2
1914	vst1.32		{q9},[r1,:128]!
1915	vadd.i32	q11,q11,q3
1916	vst1.32		{q10},[r1,:128]!
1917	vst1.32		{q11},[r1,:128]!
1918
1919	ldmia		r0,{r4-r11}
1920	sub		r1,r1,#64
1921	ldr		r2,[sp,#0]
1922	eor		r12,r12,r12
1923	eor		r3,r5,r6
1924	b		.L_00_48
1925
1926.align	4
1927.L_00_48:
1928	vext.8	q8,q0,q1,#4
1929	add	r11,r11,r2
1930	eor	r2,r9,r10
1931	eor	r0,r8,r8,ror#5
1932	vext.8	q9,q2,q3,#4
1933	add	r4,r4,r12
1934	and	r2,r2,r8
1935	eor	r12,r0,r8,ror#19
1936	vshr.u32	q10,q8,#7
1937	eor	r0,r4,r4,ror#11
1938	eor	r2,r2,r10
1939	vadd.i32	q0,q0,q9
1940	add	r11,r11,r12,ror#6
1941	eor	r12,r4,r5
1942	vshr.u32	q9,q8,#3
1943	eor	r0,r0,r4,ror#20
1944	add	r11,r11,r2
1945	vsli.32	q10,q8,#25
1946	ldr	r2,[sp,#4]
1947	and	r3,r3,r12
1948	vshr.u32	q11,q8,#18
1949	add	r7,r7,r11
1950	add	r11,r11,r0,ror#2
1951	eor	r3,r3,r5
1952	veor	q9,q9,q10
1953	add	r10,r10,r2
1954	vsli.32	q11,q8,#14
1955	eor	r2,r8,r9
1956	eor	r0,r7,r7,ror#5
1957	vshr.u32	d24,d7,#17
1958	add	r11,r11,r3
1959	and	r2,r2,r7
1960	veor	q9,q9,q11
1961	eor	r3,r0,r7,ror#19
1962	eor	r0,r11,r11,ror#11
1963	vsli.32	d24,d7,#15
1964	eor	r2,r2,r9
1965	add	r10,r10,r3,ror#6
1966	vshr.u32	d25,d7,#10
1967	eor	r3,r11,r4
1968	eor	r0,r0,r11,ror#20
1969	vadd.i32	q0,q0,q9
1970	add	r10,r10,r2
1971	ldr	r2,[sp,#8]
1972	veor	d25,d25,d24
1973	and	r12,r12,r3
1974	add	r6,r6,r10
1975	vshr.u32	d24,d7,#19
1976	add	r10,r10,r0,ror#2
1977	eor	r12,r12,r4
1978	vsli.32	d24,d7,#13
1979	add	r9,r9,r2
1980	eor	r2,r7,r8
1981	veor	d25,d25,d24
1982	eor	r0,r6,r6,ror#5
1983	add	r10,r10,r12
1984	vadd.i32	d0,d0,d25
1985	and	r2,r2,r6
1986	eor	r12,r0,r6,ror#19
1987	vshr.u32	d24,d0,#17
1988	eor	r0,r10,r10,ror#11
1989	eor	r2,r2,r8
1990	vsli.32	d24,d0,#15
1991	add	r9,r9,r12,ror#6
1992	eor	r12,r10,r11
1993	vshr.u32	d25,d0,#10
1994	eor	r0,r0,r10,ror#20
1995	add	r9,r9,r2
1996	veor	d25,d25,d24
1997	ldr	r2,[sp,#12]
1998	and	r3,r3,r12
1999	vshr.u32	d24,d0,#19
2000	add	r5,r5,r9
2001	add	r9,r9,r0,ror#2
2002	eor	r3,r3,r11
2003	vld1.32	{q8},[r14,:128]!
2004	add	r8,r8,r2
2005	vsli.32	d24,d0,#13
2006	eor	r2,r6,r7
2007	eor	r0,r5,r5,ror#5
2008	veor	d25,d25,d24
2009	add	r9,r9,r3
2010	and	r2,r2,r5
2011	vadd.i32	d1,d1,d25
2012	eor	r3,r0,r5,ror#19
2013	eor	r0,r9,r9,ror#11
2014	vadd.i32	q8,q8,q0
2015	eor	r2,r2,r7
2016	add	r8,r8,r3,ror#6
2017	eor	r3,r9,r10
2018	eor	r0,r0,r9,ror#20
2019	add	r8,r8,r2
2020	ldr	r2,[sp,#16]
2021	and	r12,r12,r3
2022	add	r4,r4,r8
2023	vst1.32	{q8},[r1,:128]!
2024	add	r8,r8,r0,ror#2
2025	eor	r12,r12,r10
2026	vext.8	q8,q1,q2,#4
2027	add	r7,r7,r2
2028	eor	r2,r5,r6
2029	eor	r0,r4,r4,ror#5
2030	vext.8	q9,q3,q0,#4
2031	add	r8,r8,r12
2032	and	r2,r2,r4
2033	eor	r12,r0,r4,ror#19
2034	vshr.u32	q10,q8,#7
2035	eor	r0,r8,r8,ror#11
2036	eor	r2,r2,r6
2037	vadd.i32	q1,q1,q9
2038	add	r7,r7,r12,ror#6
2039	eor	r12,r8,r9
2040	vshr.u32	q9,q8,#3
2041	eor	r0,r0,r8,ror#20
2042	add	r7,r7,r2
2043	vsli.32	q10,q8,#25
2044	ldr	r2,[sp,#20]
2045	and	r3,r3,r12
2046	vshr.u32	q11,q8,#18
2047	add	r11,r11,r7
2048	add	r7,r7,r0,ror#2
2049	eor	r3,r3,r9
2050	veor	q9,q9,q10
2051	add	r6,r6,r2
2052	vsli.32	q11,q8,#14
2053	eor	r2,r4,r5
2054	eor	r0,r11,r11,ror#5
2055	vshr.u32	d24,d1,#17
2056	add	r7,r7,r3
2057	and	r2,r2,r11
2058	veor	q9,q9,q11
2059	eor	r3,r0,r11,ror#19
2060	eor	r0,r7,r7,ror#11
2061	vsli.32	d24,d1,#15
2062	eor	r2,r2,r5
2063	add	r6,r6,r3,ror#6
2064	vshr.u32	d25,d1,#10
2065	eor	r3,r7,r8
2066	eor	r0,r0,r7,ror#20
2067	vadd.i32	q1,q1,q9
2068	add	r6,r6,r2
2069	ldr	r2,[sp,#24]
2070	veor	d25,d25,d24
2071	and	r12,r12,r3
2072	add	r10,r10,r6
2073	vshr.u32	d24,d1,#19
2074	add	r6,r6,r0,ror#2
2075	eor	r12,r12,r8
2076	vsli.32	d24,d1,#13
2077	add	r5,r5,r2
2078	eor	r2,r11,r4
2079	veor	d25,d25,d24
2080	eor	r0,r10,r10,ror#5
2081	add	r6,r6,r12
2082	vadd.i32	d2,d2,d25
2083	and	r2,r2,r10
2084	eor	r12,r0,r10,ror#19
2085	vshr.u32	d24,d2,#17
2086	eor	r0,r6,r6,ror#11
2087	eor	r2,r2,r4
2088	vsli.32	d24,d2,#15
2089	add	r5,r5,r12,ror#6
2090	eor	r12,r6,r7
2091	vshr.u32	d25,d2,#10
2092	eor	r0,r0,r6,ror#20
2093	add	r5,r5,r2
2094	veor	d25,d25,d24
2095	ldr	r2,[sp,#28]
2096	and	r3,r3,r12
2097	vshr.u32	d24,d2,#19
2098	add	r9,r9,r5
2099	add	r5,r5,r0,ror#2
2100	eor	r3,r3,r7
2101	vld1.32	{q8},[r14,:128]!
2102	add	r4,r4,r2
2103	vsli.32	d24,d2,#13
2104	eor	r2,r10,r11
2105	eor	r0,r9,r9,ror#5
2106	veor	d25,d25,d24
2107	add	r5,r5,r3
2108	and	r2,r2,r9
2109	vadd.i32	d3,d3,d25
2110	eor	r3,r0,r9,ror#19
2111	eor	r0,r5,r5,ror#11
2112	vadd.i32	q8,q8,q1
2113	eor	r2,r2,r11
2114	add	r4,r4,r3,ror#6
2115	eor	r3,r5,r6
2116	eor	r0,r0,r5,ror#20
2117	add	r4,r4,r2
2118	ldr	r2,[sp,#32]
2119	and	r12,r12,r3
2120	add	r8,r8,r4
2121	vst1.32	{q8},[r1,:128]!
2122	add	r4,r4,r0,ror#2
2123	eor	r12,r12,r6
2124	vext.8	q8,q2,q3,#4
2125	add	r11,r11,r2
2126	eor	r2,r9,r10
2127	eor	r0,r8,r8,ror#5
2128	vext.8	q9,q0,q1,#4
2129	add	r4,r4,r12
2130	and	r2,r2,r8
2131	eor	r12,r0,r8,ror#19
2132	vshr.u32	q10,q8,#7
2133	eor	r0,r4,r4,ror#11
2134	eor	r2,r2,r10
2135	vadd.i32	q2,q2,q9
2136	add	r11,r11,r12,ror#6
2137	eor	r12,r4,r5
2138	vshr.u32	q9,q8,#3
2139	eor	r0,r0,r4,ror#20
2140	add	r11,r11,r2
2141	vsli.32	q10,q8,#25
2142	ldr	r2,[sp,#36]
2143	and	r3,r3,r12
2144	vshr.u32	q11,q8,#18
2145	add	r7,r7,r11
2146	add	r11,r11,r0,ror#2
2147	eor	r3,r3,r5
2148	veor	q9,q9,q10
2149	add	r10,r10,r2
2150	vsli.32	q11,q8,#14
2151	eor	r2,r8,r9
2152	eor	r0,r7,r7,ror#5
2153	vshr.u32	d24,d3,#17
2154	add	r11,r11,r3
2155	and	r2,r2,r7
2156	veor	q9,q9,q11
2157	eor	r3,r0,r7,ror#19
2158	eor	r0,r11,r11,ror#11
2159	vsli.32	d24,d3,#15
2160	eor	r2,r2,r9
2161	add	r10,r10,r3,ror#6
2162	vshr.u32	d25,d3,#10
2163	eor	r3,r11,r4
2164	eor	r0,r0,r11,ror#20
2165	vadd.i32	q2,q2,q9
2166	add	r10,r10,r2
2167	ldr	r2,[sp,#40]
2168	veor	d25,d25,d24
2169	and	r12,r12,r3
2170	add	r6,r6,r10
2171	vshr.u32	d24,d3,#19
2172	add	r10,r10,r0,ror#2
2173	eor	r12,r12,r4
2174	vsli.32	d24,d3,#13
2175	add	r9,r9,r2
2176	eor	r2,r7,r8
2177	veor	d25,d25,d24
2178	eor	r0,r6,r6,ror#5
2179	add	r10,r10,r12
2180	vadd.i32	d4,d4,d25
2181	and	r2,r2,r6
2182	eor	r12,r0,r6,ror#19
2183	vshr.u32	d24,d4,#17
2184	eor	r0,r10,r10,ror#11
2185	eor	r2,r2,r8
2186	vsli.32	d24,d4,#15
2187	add	r9,r9,r12,ror#6
2188	eor	r12,r10,r11
2189	vshr.u32	d25,d4,#10
2190	eor	r0,r0,r10,ror#20
2191	add	r9,r9,r2
2192	veor	d25,d25,d24
2193	ldr	r2,[sp,#44]
2194	and	r3,r3,r12
2195	vshr.u32	d24,d4,#19
2196	add	r5,r5,r9
2197	add	r9,r9,r0,ror#2
2198	eor	r3,r3,r11
2199	vld1.32	{q8},[r14,:128]!
2200	add	r8,r8,r2
2201	vsli.32	d24,d4,#13
2202	eor	r2,r6,r7
2203	eor	r0,r5,r5,ror#5
2204	veor	d25,d25,d24
2205	add	r9,r9,r3
2206	and	r2,r2,r5
2207	vadd.i32	d5,d5,d25
2208	eor	r3,r0,r5,ror#19
2209	eor	r0,r9,r9,ror#11
2210	vadd.i32	q8,q8,q2
2211	eor	r2,r2,r7
2212	add	r8,r8,r3,ror#6
2213	eor	r3,r9,r10
2214	eor	r0,r0,r9,ror#20
2215	add	r8,r8,r2
2216	ldr	r2,[sp,#48]
2217	and	r12,r12,r3
2218	add	r4,r4,r8
2219	vst1.32	{q8},[r1,:128]!
2220	add	r8,r8,r0,ror#2
2221	eor	r12,r12,r10
2222	vext.8	q8,q3,q0,#4
2223	add	r7,r7,r2
2224	eor	r2,r5,r6
2225	eor	r0,r4,r4,ror#5
2226	vext.8	q9,q1,q2,#4
2227	add	r8,r8,r12
2228	and	r2,r2,r4
2229	eor	r12,r0,r4,ror#19
2230	vshr.u32	q10,q8,#7
2231	eor	r0,r8,r8,ror#11
2232	eor	r2,r2,r6
2233	vadd.i32	q3,q3,q9
2234	add	r7,r7,r12,ror#6
2235	eor	r12,r8,r9
2236	vshr.u32	q9,q8,#3
2237	eor	r0,r0,r8,ror#20
2238	add	r7,r7,r2
2239	vsli.32	q10,q8,#25
2240	ldr	r2,[sp,#52]
2241	and	r3,r3,r12
2242	vshr.u32	q11,q8,#18
2243	add	r11,r11,r7
2244	add	r7,r7,r0,ror#2
2245	eor	r3,r3,r9
2246	veor	q9,q9,q10
2247	add	r6,r6,r2
2248	vsli.32	q11,q8,#14
2249	eor	r2,r4,r5
2250	eor	r0,r11,r11,ror#5
2251	vshr.u32	d24,d5,#17
2252	add	r7,r7,r3
2253	and	r2,r2,r11
2254	veor	q9,q9,q11
2255	eor	r3,r0,r11,ror#19
2256	eor	r0,r7,r7,ror#11
2257	vsli.32	d24,d5,#15
2258	eor	r2,r2,r5
2259	add	r6,r6,r3,ror#6
2260	vshr.u32	d25,d5,#10
2261	eor	r3,r7,r8
2262	eor	r0,r0,r7,ror#20
2263	vadd.i32	q3,q3,q9
2264	add	r6,r6,r2
2265	ldr	r2,[sp,#56]
2266	veor	d25,d25,d24
2267	and	r12,r12,r3
2268	add	r10,r10,r6
2269	vshr.u32	d24,d5,#19
2270	add	r6,r6,r0,ror#2
2271	eor	r12,r12,r8
2272	vsli.32	d24,d5,#13
2273	add	r5,r5,r2
2274	eor	r2,r11,r4
2275	veor	d25,d25,d24
2276	eor	r0,r10,r10,ror#5
2277	add	r6,r6,r12
2278	vadd.i32	d6,d6,d25
2279	and	r2,r2,r10
2280	eor	r12,r0,r10,ror#19
2281	vshr.u32	d24,d6,#17
2282	eor	r0,r6,r6,ror#11
2283	eor	r2,r2,r4
2284	vsli.32	d24,d6,#15
2285	add	r5,r5,r12,ror#6
2286	eor	r12,r6,r7
2287	vshr.u32	d25,d6,#10
2288	eor	r0,r0,r6,ror#20
2289	add	r5,r5,r2
2290	veor	d25,d25,d24
2291	ldr	r2,[sp,#60]
2292	and	r3,r3,r12
2293	vshr.u32	d24,d6,#19
2294	add	r9,r9,r5
2295	add	r5,r5,r0,ror#2
2296	eor	r3,r3,r7
2297	vld1.32	{q8},[r14,:128]!
2298	add	r4,r4,r2
2299	vsli.32	d24,d6,#13
2300	eor	r2,r10,r11
2301	eor	r0,r9,r9,ror#5
2302	veor	d25,d25,d24
2303	add	r5,r5,r3
2304	and	r2,r2,r9
2305	vadd.i32	d7,d7,d25
2306	eor	r3,r0,r9,ror#19
2307	eor	r0,r5,r5,ror#11
2308	vadd.i32	q8,q8,q3
2309	eor	r2,r2,r11
2310	add	r4,r4,r3,ror#6
2311	eor	r3,r5,r6
2312	eor	r0,r0,r5,ror#20
2313	add	r4,r4,r2
2314	ldr	r2,[r14]
2315	and	r12,r12,r3
2316	add	r8,r8,r4
2317	vst1.32	{q8},[r1,:128]!
2318	add	r4,r4,r0,ror#2
2319	eor	r12,r12,r6
2320	teq	r2,#0				@ check for K256 terminator
2321	ldr	r2,[sp,#0]
2322	sub	r1,r1,#64
2323	bne	.L_00_48
2324
2325	ldr		r1,[sp,#68]
2326	ldr		r0,[sp,#72]
2327	sub		r14,r14,#256	@ rewind r14
2328	teq		r1,r0
2329	it		eq
2330	subeq		r1,r1,#64		@ avoid SEGV
2331	vld1.8		{q0},[r1]!		@ load next input block
2332	vld1.8		{q1},[r1]!
2333	vld1.8		{q2},[r1]!
2334	vld1.8		{q3},[r1]!
2335	it		ne
2336	strne		r1,[sp,#68]
2337	mov		r1,sp
2338	add	r11,r11,r2
2339	eor	r2,r9,r10
2340	eor	r0,r8,r8,ror#5
2341	add	r4,r4,r12
2342	vld1.32	{q8},[r14,:128]!
2343	and	r2,r2,r8
2344	eor	r12,r0,r8,ror#19
2345	eor	r0,r4,r4,ror#11
2346	eor	r2,r2,r10
2347	vrev32.8	q0,q0
2348	add	r11,r11,r12,ror#6
2349	eor	r12,r4,r5
2350	eor	r0,r0,r4,ror#20
2351	add	r11,r11,r2
2352	vadd.i32	q8,q8,q0
2353	ldr	r2,[sp,#4]
2354	and	r3,r3,r12
2355	add	r7,r7,r11
2356	add	r11,r11,r0,ror#2
2357	eor	r3,r3,r5
2358	add	r10,r10,r2
2359	eor	r2,r8,r9
2360	eor	r0,r7,r7,ror#5
2361	add	r11,r11,r3
2362	and	r2,r2,r7
2363	eor	r3,r0,r7,ror#19
2364	eor	r0,r11,r11,ror#11
2365	eor	r2,r2,r9
2366	add	r10,r10,r3,ror#6
2367	eor	r3,r11,r4
2368	eor	r0,r0,r11,ror#20
2369	add	r10,r10,r2
2370	ldr	r2,[sp,#8]
2371	and	r12,r12,r3
2372	add	r6,r6,r10
2373	add	r10,r10,r0,ror#2
2374	eor	r12,r12,r4
2375	add	r9,r9,r2
2376	eor	r2,r7,r8
2377	eor	r0,r6,r6,ror#5
2378	add	r10,r10,r12
2379	and	r2,r2,r6
2380	eor	r12,r0,r6,ror#19
2381	eor	r0,r10,r10,ror#11
2382	eor	r2,r2,r8
2383	add	r9,r9,r12,ror#6
2384	eor	r12,r10,r11
2385	eor	r0,r0,r10,ror#20
2386	add	r9,r9,r2
2387	ldr	r2,[sp,#12]
2388	and	r3,r3,r12
2389	add	r5,r5,r9
2390	add	r9,r9,r0,ror#2
2391	eor	r3,r3,r11
2392	add	r8,r8,r2
2393	eor	r2,r6,r7
2394	eor	r0,r5,r5,ror#5
2395	add	r9,r9,r3
2396	and	r2,r2,r5
2397	eor	r3,r0,r5,ror#19
2398	eor	r0,r9,r9,ror#11
2399	eor	r2,r2,r7
2400	add	r8,r8,r3,ror#6
2401	eor	r3,r9,r10
2402	eor	r0,r0,r9,ror#20
2403	add	r8,r8,r2
2404	ldr	r2,[sp,#16]
2405	and	r12,r12,r3
2406	add	r4,r4,r8
2407	add	r8,r8,r0,ror#2
2408	eor	r12,r12,r10
2409	vst1.32	{q8},[r1,:128]!
2410	add	r7,r7,r2
2411	eor	r2,r5,r6
2412	eor	r0,r4,r4,ror#5
2413	add	r8,r8,r12
2414	vld1.32	{q8},[r14,:128]!
2415	and	r2,r2,r4
2416	eor	r12,r0,r4,ror#19
2417	eor	r0,r8,r8,ror#11
2418	eor	r2,r2,r6
2419	vrev32.8	q1,q1
2420	add	r7,r7,r12,ror#6
2421	eor	r12,r8,r9
2422	eor	r0,r0,r8,ror#20
2423	add	r7,r7,r2
2424	vadd.i32	q8,q8,q1
2425	ldr	r2,[sp,#20]
2426	and	r3,r3,r12
2427	add	r11,r11,r7
2428	add	r7,r7,r0,ror#2
2429	eor	r3,r3,r9
2430	add	r6,r6,r2
2431	eor	r2,r4,r5
2432	eor	r0,r11,r11,ror#5
2433	add	r7,r7,r3
2434	and	r2,r2,r11
2435	eor	r3,r0,r11,ror#19
2436	eor	r0,r7,r7,ror#11
2437	eor	r2,r2,r5
2438	add	r6,r6,r3,ror#6
2439	eor	r3,r7,r8
2440	eor	r0,r0,r7,ror#20
2441	add	r6,r6,r2
2442	ldr	r2,[sp,#24]
2443	and	r12,r12,r3
2444	add	r10,r10,r6
2445	add	r6,r6,r0,ror#2
2446	eor	r12,r12,r8
2447	add	r5,r5,r2
2448	eor	r2,r11,r4
2449	eor	r0,r10,r10,ror#5
2450	add	r6,r6,r12
2451	and	r2,r2,r10
2452	eor	r12,r0,r10,ror#19
2453	eor	r0,r6,r6,ror#11
2454	eor	r2,r2,r4
2455	add	r5,r5,r12,ror#6
2456	eor	r12,r6,r7
2457	eor	r0,r0,r6,ror#20
2458	add	r5,r5,r2
2459	ldr	r2,[sp,#28]
2460	and	r3,r3,r12
2461	add	r9,r9,r5
2462	add	r5,r5,r0,ror#2
2463	eor	r3,r3,r7
2464	add	r4,r4,r2
2465	eor	r2,r10,r11
2466	eor	r0,r9,r9,ror#5
2467	add	r5,r5,r3
2468	and	r2,r2,r9
2469	eor	r3,r0,r9,ror#19
2470	eor	r0,r5,r5,ror#11
2471	eor	r2,r2,r11
2472	add	r4,r4,r3,ror#6
2473	eor	r3,r5,r6
2474	eor	r0,r0,r5,ror#20
2475	add	r4,r4,r2
2476	ldr	r2,[sp,#32]
2477	and	r12,r12,r3
2478	add	r8,r8,r4
2479	add	r4,r4,r0,ror#2
2480	eor	r12,r12,r6
2481	vst1.32	{q8},[r1,:128]!
2482	add	r11,r11,r2
2483	eor	r2,r9,r10
2484	eor	r0,r8,r8,ror#5
2485	add	r4,r4,r12
2486	vld1.32	{q8},[r14,:128]!
2487	and	r2,r2,r8
2488	eor	r12,r0,r8,ror#19
2489	eor	r0,r4,r4,ror#11
2490	eor	r2,r2,r10
2491	vrev32.8	q2,q2
2492	add	r11,r11,r12,ror#6
2493	eor	r12,r4,r5
2494	eor	r0,r0,r4,ror#20
2495	add	r11,r11,r2
2496	vadd.i32	q8,q8,q2
2497	ldr	r2,[sp,#36]
2498	and	r3,r3,r12
2499	add	r7,r7,r11
2500	add	r11,r11,r0,ror#2
2501	eor	r3,r3,r5
2502	add	r10,r10,r2
2503	eor	r2,r8,r9
2504	eor	r0,r7,r7,ror#5
2505	add	r11,r11,r3
2506	and	r2,r2,r7
2507	eor	r3,r0,r7,ror#19
2508	eor	r0,r11,r11,ror#11
2509	eor	r2,r2,r9
2510	add	r10,r10,r3,ror#6
2511	eor	r3,r11,r4
2512	eor	r0,r0,r11,ror#20
2513	add	r10,r10,r2
2514	ldr	r2,[sp,#40]
2515	and	r12,r12,r3
2516	add	r6,r6,r10
2517	add	r10,r10,r0,ror#2
2518	eor	r12,r12,r4
2519	add	r9,r9,r2
2520	eor	r2,r7,r8
2521	eor	r0,r6,r6,ror#5
2522	add	r10,r10,r12
2523	and	r2,r2,r6
2524	eor	r12,r0,r6,ror#19
2525	eor	r0,r10,r10,ror#11
2526	eor	r2,r2,r8
2527	add	r9,r9,r12,ror#6
2528	eor	r12,r10,r11
2529	eor	r0,r0,r10,ror#20
2530	add	r9,r9,r2
2531	ldr	r2,[sp,#44]
2532	and	r3,r3,r12
2533	add	r5,r5,r9
2534	add	r9,r9,r0,ror#2
2535	eor	r3,r3,r11
2536	add	r8,r8,r2
2537	eor	r2,r6,r7
2538	eor	r0,r5,r5,ror#5
2539	add	r9,r9,r3
2540	and	r2,r2,r5
2541	eor	r3,r0,r5,ror#19
2542	eor	r0,r9,r9,ror#11
2543	eor	r2,r2,r7
2544	add	r8,r8,r3,ror#6
2545	eor	r3,r9,r10
2546	eor	r0,r0,r9,ror#20
2547	add	r8,r8,r2
2548	ldr	r2,[sp,#48]
2549	and	r12,r12,r3
2550	add	r4,r4,r8
2551	add	r8,r8,r0,ror#2
2552	eor	r12,r12,r10
2553	vst1.32	{q8},[r1,:128]!
2554	add	r7,r7,r2
2555	eor	r2,r5,r6
2556	eor	r0,r4,r4,ror#5
2557	add	r8,r8,r12
2558	vld1.32	{q8},[r14,:128]!
2559	and	r2,r2,r4
2560	eor	r12,r0,r4,ror#19
2561	eor	r0,r8,r8,ror#11
2562	eor	r2,r2,r6
2563	vrev32.8	q3,q3
2564	add	r7,r7,r12,ror#6
2565	eor	r12,r8,r9
2566	eor	r0,r0,r8,ror#20
2567	add	r7,r7,r2
2568	vadd.i32	q8,q8,q3
2569	ldr	r2,[sp,#52]
2570	and	r3,r3,r12
2571	add	r11,r11,r7
2572	add	r7,r7,r0,ror#2
2573	eor	r3,r3,r9
2574	add	r6,r6,r2
2575	eor	r2,r4,r5
2576	eor	r0,r11,r11,ror#5
2577	add	r7,r7,r3
2578	and	r2,r2,r11
2579	eor	r3,r0,r11,ror#19
2580	eor	r0,r7,r7,ror#11
2581	eor	r2,r2,r5
2582	add	r6,r6,r3,ror#6
2583	eor	r3,r7,r8
2584	eor	r0,r0,r7,ror#20
2585	add	r6,r6,r2
2586	ldr	r2,[sp,#56]
2587	and	r12,r12,r3
2588	add	r10,r10,r6
2589	add	r6,r6,r0,ror#2
2590	eor	r12,r12,r8
2591	add	r5,r5,r2
2592	eor	r2,r11,r4
2593	eor	r0,r10,r10,ror#5
2594	add	r6,r6,r12
2595	and	r2,r2,r10
2596	eor	r12,r0,r10,ror#19
2597	eor	r0,r6,r6,ror#11
2598	eor	r2,r2,r4
2599	add	r5,r5,r12,ror#6
2600	eor	r12,r6,r7
2601	eor	r0,r0,r6,ror#20
2602	add	r5,r5,r2
2603	ldr	r2,[sp,#60]
2604	and	r3,r3,r12
2605	add	r9,r9,r5
2606	add	r5,r5,r0,ror#2
2607	eor	r3,r3,r7
2608	add	r4,r4,r2
2609	eor	r2,r10,r11
2610	eor	r0,r9,r9,ror#5
2611	add	r5,r5,r3
2612	and	r2,r2,r9
2613	eor	r3,r0,r9,ror#19
2614	eor	r0,r5,r5,ror#11
2615	eor	r2,r2,r11
2616	add	r4,r4,r3,ror#6
2617	eor	r3,r5,r6
2618	eor	r0,r0,r5,ror#20
2619	add	r4,r4,r2
2620	ldr	r2,[sp,#64]
2621	and	r12,r12,r3
2622	add	r8,r8,r4
2623	add	r4,r4,r0,ror#2
2624	eor	r12,r12,r6
2625	vst1.32	{q8},[r1,:128]!
2626	ldr	r0,[r2,#0]
2627	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2628	ldr	r12,[r2,#4]
2629	ldr	r3,[r2,#8]
2630	ldr	r1,[r2,#12]
2631	add	r4,r4,r0			@ accumulate
2632	ldr	r0,[r2,#16]
2633	add	r5,r5,r12
2634	ldr	r12,[r2,#20]
2635	add	r6,r6,r3
2636	ldr	r3,[r2,#24]
2637	add	r7,r7,r1
2638	ldr	r1,[r2,#28]
2639	add	r8,r8,r0
2640	str	r4,[r2],#4
2641	add	r9,r9,r12
2642	str	r5,[r2],#4
2643	add	r10,r10,r3
2644	str	r6,[r2],#4
2645	add	r11,r11,r1
2646	str	r7,[r2],#4
2647	stmia	r2,{r8-r11}
2648
2649	ittte	ne
2650	movne	r1,sp
2651	ldrne	r2,[sp,#0]
2652	eorne	r12,r12,r12
2653	ldreq	sp,[sp,#76]			@ restore original sp
2654	itt	ne
2655	eorne	r3,r5,r6
2656	bne	.L_00_48
2657
2658	ldmia	sp!,{r4-r12,pc}
2659.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2660#endif
2661#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2662
2663# if defined(__thumb2__)
2664#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2665# else
2666#  define INST(a,b,c,d)	.byte	a,b,c,d
2667# endif
2668
2669.type	sha256_block_data_order_armv8,%function
2670.align	5
2671sha256_block_data_order_armv8:
2672.LARMv8:
2673	vld1.32	{q0,q1},[r0]
2674	sub	r3,r3,#256+32
2675	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2676	b	.Loop_v8
2677
2678.align	4
2679.Loop_v8:
2680	vld1.8		{q8-q9},[r1]!
2681	vld1.8		{q10-q11},[r1]!
2682	vld1.32		{q12},[r3]!
2683	vrev32.8	q8,q8
2684	vrev32.8	q9,q9
2685	vrev32.8	q10,q10
2686	vrev32.8	q11,q11
2687	vmov		q14,q0	@ offload
2688	vmov		q15,q1
2689	teq		r1,r2
2690	vld1.32		{q13},[r3]!
2691	vadd.i32	q12,q12,q8
2692	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2693	vmov		q2,q0
2694	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2695	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2696	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2697	vld1.32		{q12},[r3]!
2698	vadd.i32	q13,q13,q9
2699	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2700	vmov		q2,q0
2701	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2702	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2703	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2704	vld1.32		{q13},[r3]!
2705	vadd.i32	q12,q12,q10
2706	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2707	vmov		q2,q0
2708	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2709	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2710	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2711	vld1.32		{q12},[r3]!
2712	vadd.i32	q13,q13,q11
2713	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2714	vmov		q2,q0
2715	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2716	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2717	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2718	vld1.32		{q13},[r3]!
2719	vadd.i32	q12,q12,q8
2720	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2721	vmov		q2,q0
2722	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2723	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2724	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2725	vld1.32		{q12},[r3]!
2726	vadd.i32	q13,q13,q9
2727	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2728	vmov		q2,q0
2729	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2730	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2731	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2732	vld1.32		{q13},[r3]!
2733	vadd.i32	q12,q12,q10
2734	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2735	vmov		q2,q0
2736	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2737	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2738	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2739	vld1.32		{q12},[r3]!
2740	vadd.i32	q13,q13,q11
2741	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2742	vmov		q2,q0
2743	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2744	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2745	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2746	vld1.32		{q13},[r3]!
2747	vadd.i32	q12,q12,q8
2748	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2749	vmov		q2,q0
2750	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2751	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2752	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2753	vld1.32		{q12},[r3]!
2754	vadd.i32	q13,q13,q9
2755	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2756	vmov		q2,q0
2757	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2758	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2759	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2760	vld1.32		{q13},[r3]!
2761	vadd.i32	q12,q12,q10
2762	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2763	vmov		q2,q0
2764	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2765	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2766	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2767	vld1.32		{q12},[r3]!
2768	vadd.i32	q13,q13,q11
2769	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2770	vmov		q2,q0
2771	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2772	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2773	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2774	vld1.32		{q13},[r3]!
2775	vadd.i32	q12,q12,q8
2776	vmov		q2,q0
2777	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2778	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2779
2780	vld1.32		{q12},[r3]!
2781	vadd.i32	q13,q13,q9
2782	vmov		q2,q0
2783	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2784	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2785
2786	vld1.32		{q13},[r3]
2787	vadd.i32	q12,q12,q10
2788	sub		r3,r3,#256-16	@ rewind
2789	vmov		q2,q0
2790	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2791	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2792
2793	vadd.i32	q13,q13,q11
2794	vmov		q2,q0
2795	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2796	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2797
2798	vadd.i32	q0,q0,q14
2799	vadd.i32	q1,q1,q15
2800	it		ne
2801	bne		.Loop_v8
2802
2803	vst1.32		{q0,q1},[r0]
2804
2805	bx	lr		@ bx lr
2806.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2807#endif
2808.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2809.align	2
2810#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2811.comm   OPENSSL_armcap_P,4,4
2812#endif
2813