• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1@ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
2@
3@ Licensed under the Apache License 2.0 (the "License").  You may not use
4@ this file except in compliance with the License.  You can obtain a copy
5@ in the file LICENSE in the source distribution or at
6@ https://www.openssl.org/source/license.html
7
8
9@ ====================================================================
10@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11@ project. The module is, however, dual licensed under OpenSSL and
12@ CRYPTOGAMS licenses depending on where you obtain it. For further
13@ details see http://www.openssl.org/~appro/cryptogams/.
14@
15@ Permission to use under GPL terms is granted.
16@ ====================================================================
17
18@ SHA256 block procedure for ARMv4. May 2007.
19
20@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
21@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
22@ byte [on single-issue Xscale PXA250 core].
23
24@ July 2010.
25@
26@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
27@ Cortex A8 core and ~20 cycles per processed byte.
28
29@ February 2011.
30@
31@ Profiler-assisted and platform-specific optimization resulted in 16%
32@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
33
34@ September 2013.
35@
36@ Add NEON implementation. On Cortex A8 it was measured to process one
37@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
38@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
39@ code (meaning that latter performs sub-optimally, nothing was done
40@ about it).
41
42@ May 2014.
43@
44@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
45
46@ $output is the last argument if it looks like a file (it has an extension)
47@ $flavour is the first argument if it doesn't look like a file
48#ifndef __KERNEL__
49# include "arm_arch.h"
50#else
51# define __ARM_ARCH__ __LINUX_ARM_ARCH__
52# define __ARM_MAX_ARCH__ 7
53#endif
54
55#if defined(__thumb2__)
56.syntax	unified
57.thumb
58#else
59.code	32
60#endif
61
62.text
63
64.type	K256,%object
65.align	5
66K256:
67.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
68.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
69.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
70.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
71.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
72.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
73.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
74.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
75.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
76.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
77.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
78.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
79.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
80.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
81.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
82.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
83.size	K256,.-K256
84.word	0				@ terminator
85#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
86.LOPENSSL_armcap:
87# ifdef	_WIN32
88.word	OPENSSL_armcap_P
89# else
90.word	OPENSSL_armcap_P-.Lsha256_block_data_order
91# endif
92#endif
93.align	5
94
95.globl	sha256_block_data_order
96.type	sha256_block_data_order,%function
97sha256_block_data_order:
98.Lsha256_block_data_order:
99#if __ARM_ARCH__<7 && !defined(__thumb2__)
100	sub	r3,pc,#8		@ sha256_block_data_order
101#else
102	adr	r3,.Lsha256_block_data_order
103#endif
104#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
105	ldr	r12,.LOPENSSL_armcap
106# if !defined(_WIN32)
107	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
108# endif
109# if defined(__APPLE__) || defined(_WIN32)
110	ldr	r12,[r12]
111# endif
112	tst	r12,#ARMV8_SHA256
113	bne	.LARMv8
114	tst	r12,#ARMV7_NEON
115	bne	.LNEON
116#endif
117	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
118	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
119	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
120	sub	r14,r3,#256+32	@ K256
121	sub	sp,sp,#16*4		@ alloca(X[16])
122.Loop:
123# if __ARM_ARCH__>=7
124	ldr	r2,[r1],#4
125# else
126	ldrb	r2,[r1,#3]
127# endif
128	eor	r3,r5,r6		@ magic
129	eor	r12,r12,r12
130#if __ARM_ARCH__>=7
131	@ ldr	r2,[r1],#4			@ 0
132# if 0==15
133	str	r1,[sp,#17*4]			@ make room for r1
134# endif
135	eor	r0,r8,r8,ror#5
136	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
137	eor	r0,r0,r8,ror#19	@ Sigma1(e)
138# ifndef __ARMEB__
139	rev	r2,r2
140# endif
141#else
142	@ ldrb	r2,[r1,#3]			@ 0
143	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
144	ldrb	r12,[r1,#2]
145	ldrb	r0,[r1,#1]
146	orr	r2,r2,r12,lsl#8
147	ldrb	r12,[r1],#4
148	orr	r2,r2,r0,lsl#16
149# if 0==15
150	str	r1,[sp,#17*4]			@ make room for r1
151# endif
152	eor	r0,r8,r8,ror#5
153	orr	r2,r2,r12,lsl#24
154	eor	r0,r0,r8,ror#19	@ Sigma1(e)
155#endif
156	ldr	r12,[r14],#4			@ *K256++
157	add	r11,r11,r2			@ h+=X[i]
158	str	r2,[sp,#0*4]
159	eor	r2,r9,r10
160	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
161	and	r2,r2,r8
162	add	r11,r11,r12			@ h+=K256[i]
163	eor	r2,r2,r10			@ Ch(e,f,g)
164	eor	r0,r4,r4,ror#11
165	add	r11,r11,r2			@ h+=Ch(e,f,g)
166#if 0==31
167	and	r12,r12,#0xff
168	cmp	r12,#0xf2			@ done?
169#endif
170#if 0<15
171# if __ARM_ARCH__>=7
172	ldr	r2,[r1],#4			@ prefetch
173# else
174	ldrb	r2,[r1,#3]
175# endif
176	eor	r12,r4,r5			@ a^b, b^c in next round
177#else
178	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
179	eor	r12,r4,r5			@ a^b, b^c in next round
180	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
181#endif
182	eor	r0,r0,r4,ror#20	@ Sigma0(a)
183	and	r3,r3,r12			@ (b^c)&=(a^b)
184	add	r7,r7,r11			@ d+=h
185	eor	r3,r3,r5			@ Maj(a,b,c)
186	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
187	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
188#if __ARM_ARCH__>=7
189	@ ldr	r2,[r1],#4			@ 1
190# if 1==15
191	str	r1,[sp,#17*4]			@ make room for r1
192# endif
193	eor	r0,r7,r7,ror#5
194	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
195	eor	r0,r0,r7,ror#19	@ Sigma1(e)
196# ifndef __ARMEB__
197	rev	r2,r2
198# endif
199#else
200	@ ldrb	r2,[r1,#3]			@ 1
201	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
202	ldrb	r3,[r1,#2]
203	ldrb	r0,[r1,#1]
204	orr	r2,r2,r3,lsl#8
205	ldrb	r3,[r1],#4
206	orr	r2,r2,r0,lsl#16
207# if 1==15
208	str	r1,[sp,#17*4]			@ make room for r1
209# endif
210	eor	r0,r7,r7,ror#5
211	orr	r2,r2,r3,lsl#24
212	eor	r0,r0,r7,ror#19	@ Sigma1(e)
213#endif
214	ldr	r3,[r14],#4			@ *K256++
215	add	r10,r10,r2			@ h+=X[i]
216	str	r2,[sp,#1*4]
217	eor	r2,r8,r9
218	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
219	and	r2,r2,r7
220	add	r10,r10,r3			@ h+=K256[i]
221	eor	r2,r2,r9			@ Ch(e,f,g)
222	eor	r0,r11,r11,ror#11
223	add	r10,r10,r2			@ h+=Ch(e,f,g)
224#if 1==31
225	and	r3,r3,#0xff
226	cmp	r3,#0xf2			@ done?
227#endif
228#if 1<15
229# if __ARM_ARCH__>=7
230	ldr	r2,[r1],#4			@ prefetch
231# else
232	ldrb	r2,[r1,#3]
233# endif
234	eor	r3,r11,r4			@ a^b, b^c in next round
235#else
236	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
237	eor	r3,r11,r4			@ a^b, b^c in next round
238	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
239#endif
240	eor	r0,r0,r11,ror#20	@ Sigma0(a)
241	and	r12,r12,r3			@ (b^c)&=(a^b)
242	add	r6,r6,r10			@ d+=h
243	eor	r12,r12,r4			@ Maj(a,b,c)
244	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
245	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
246#if __ARM_ARCH__>=7
247	@ ldr	r2,[r1],#4			@ 2
248# if 2==15
249	str	r1,[sp,#17*4]			@ make room for r1
250# endif
251	eor	r0,r6,r6,ror#5
252	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
253	eor	r0,r0,r6,ror#19	@ Sigma1(e)
254# ifndef __ARMEB__
255	rev	r2,r2
256# endif
257#else
258	@ ldrb	r2,[r1,#3]			@ 2
259	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
260	ldrb	r12,[r1,#2]
261	ldrb	r0,[r1,#1]
262	orr	r2,r2,r12,lsl#8
263	ldrb	r12,[r1],#4
264	orr	r2,r2,r0,lsl#16
265# if 2==15
266	str	r1,[sp,#17*4]			@ make room for r1
267# endif
268	eor	r0,r6,r6,ror#5
269	orr	r2,r2,r12,lsl#24
270	eor	r0,r0,r6,ror#19	@ Sigma1(e)
271#endif
272	ldr	r12,[r14],#4			@ *K256++
273	add	r9,r9,r2			@ h+=X[i]
274	str	r2,[sp,#2*4]
275	eor	r2,r7,r8
276	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
277	and	r2,r2,r6
278	add	r9,r9,r12			@ h+=K256[i]
279	eor	r2,r2,r8			@ Ch(e,f,g)
280	eor	r0,r10,r10,ror#11
281	add	r9,r9,r2			@ h+=Ch(e,f,g)
282#if 2==31
283	and	r12,r12,#0xff
284	cmp	r12,#0xf2			@ done?
285#endif
286#if 2<15
287# if __ARM_ARCH__>=7
288	ldr	r2,[r1],#4			@ prefetch
289# else
290	ldrb	r2,[r1,#3]
291# endif
292	eor	r12,r10,r11			@ a^b, b^c in next round
293#else
294	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
295	eor	r12,r10,r11			@ a^b, b^c in next round
296	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
297#endif
298	eor	r0,r0,r10,ror#20	@ Sigma0(a)
299	and	r3,r3,r12			@ (b^c)&=(a^b)
300	add	r5,r5,r9			@ d+=h
301	eor	r3,r3,r11			@ Maj(a,b,c)
302	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
303	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
304#if __ARM_ARCH__>=7
305	@ ldr	r2,[r1],#4			@ 3
306# if 3==15
307	str	r1,[sp,#17*4]			@ make room for r1
308# endif
309	eor	r0,r5,r5,ror#5
310	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
311	eor	r0,r0,r5,ror#19	@ Sigma1(e)
312# ifndef __ARMEB__
313	rev	r2,r2
314# endif
315#else
316	@ ldrb	r2,[r1,#3]			@ 3
317	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
318	ldrb	r3,[r1,#2]
319	ldrb	r0,[r1,#1]
320	orr	r2,r2,r3,lsl#8
321	ldrb	r3,[r1],#4
322	orr	r2,r2,r0,lsl#16
323# if 3==15
324	str	r1,[sp,#17*4]			@ make room for r1
325# endif
326	eor	r0,r5,r5,ror#5
327	orr	r2,r2,r3,lsl#24
328	eor	r0,r0,r5,ror#19	@ Sigma1(e)
329#endif
330	ldr	r3,[r14],#4			@ *K256++
331	add	r8,r8,r2			@ h+=X[i]
332	str	r2,[sp,#3*4]
333	eor	r2,r6,r7
334	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
335	and	r2,r2,r5
336	add	r8,r8,r3			@ h+=K256[i]
337	eor	r2,r2,r7			@ Ch(e,f,g)
338	eor	r0,r9,r9,ror#11
339	add	r8,r8,r2			@ h+=Ch(e,f,g)
340#if 3==31
341	and	r3,r3,#0xff
342	cmp	r3,#0xf2			@ done?
343#endif
344#if 3<15
345# if __ARM_ARCH__>=7
346	ldr	r2,[r1],#4			@ prefetch
347# else
348	ldrb	r2,[r1,#3]
349# endif
350	eor	r3,r9,r10			@ a^b, b^c in next round
351#else
352	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
353	eor	r3,r9,r10			@ a^b, b^c in next round
354	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
355#endif
356	eor	r0,r0,r9,ror#20	@ Sigma0(a)
357	and	r12,r12,r3			@ (b^c)&=(a^b)
358	add	r4,r4,r8			@ d+=h
359	eor	r12,r12,r10			@ Maj(a,b,c)
360	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
361	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
362#if __ARM_ARCH__>=7
363	@ ldr	r2,[r1],#4			@ 4
364# if 4==15
365	str	r1,[sp,#17*4]			@ make room for r1
366# endif
367	eor	r0,r4,r4,ror#5
368	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
369	eor	r0,r0,r4,ror#19	@ Sigma1(e)
370# ifndef __ARMEB__
371	rev	r2,r2
372# endif
373#else
374	@ ldrb	r2,[r1,#3]			@ 4
375	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
376	ldrb	r12,[r1,#2]
377	ldrb	r0,[r1,#1]
378	orr	r2,r2,r12,lsl#8
379	ldrb	r12,[r1],#4
380	orr	r2,r2,r0,lsl#16
381# if 4==15
382	str	r1,[sp,#17*4]			@ make room for r1
383# endif
384	eor	r0,r4,r4,ror#5
385	orr	r2,r2,r12,lsl#24
386	eor	r0,r0,r4,ror#19	@ Sigma1(e)
387#endif
388	ldr	r12,[r14],#4			@ *K256++
389	add	r7,r7,r2			@ h+=X[i]
390	str	r2,[sp,#4*4]
391	eor	r2,r5,r6
392	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
393	and	r2,r2,r4
394	add	r7,r7,r12			@ h+=K256[i]
395	eor	r2,r2,r6			@ Ch(e,f,g)
396	eor	r0,r8,r8,ror#11
397	add	r7,r7,r2			@ h+=Ch(e,f,g)
398#if 4==31
399	and	r12,r12,#0xff
400	cmp	r12,#0xf2			@ done?
401#endif
402#if 4<15
403# if __ARM_ARCH__>=7
404	ldr	r2,[r1],#4			@ prefetch
405# else
406	ldrb	r2,[r1,#3]
407# endif
408	eor	r12,r8,r9			@ a^b, b^c in next round
409#else
410	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
411	eor	r12,r8,r9			@ a^b, b^c in next round
412	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
413#endif
414	eor	r0,r0,r8,ror#20	@ Sigma0(a)
415	and	r3,r3,r12			@ (b^c)&=(a^b)
416	add	r11,r11,r7			@ d+=h
417	eor	r3,r3,r9			@ Maj(a,b,c)
418	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
419	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
420#if __ARM_ARCH__>=7
421	@ ldr	r2,[r1],#4			@ 5
422# if 5==15
423	str	r1,[sp,#17*4]			@ make room for r1
424# endif
425	eor	r0,r11,r11,ror#5
426	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
427	eor	r0,r0,r11,ror#19	@ Sigma1(e)
428# ifndef __ARMEB__
429	rev	r2,r2
430# endif
431#else
432	@ ldrb	r2,[r1,#3]			@ 5
433	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
434	ldrb	r3,[r1,#2]
435	ldrb	r0,[r1,#1]
436	orr	r2,r2,r3,lsl#8
437	ldrb	r3,[r1],#4
438	orr	r2,r2,r0,lsl#16
439# if 5==15
440	str	r1,[sp,#17*4]			@ make room for r1
441# endif
442	eor	r0,r11,r11,ror#5
443	orr	r2,r2,r3,lsl#24
444	eor	r0,r0,r11,ror#19	@ Sigma1(e)
445#endif
446	ldr	r3,[r14],#4			@ *K256++
447	add	r6,r6,r2			@ h+=X[i]
448	str	r2,[sp,#5*4]
449	eor	r2,r4,r5
450	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
451	and	r2,r2,r11
452	add	r6,r6,r3			@ h+=K256[i]
453	eor	r2,r2,r5			@ Ch(e,f,g)
454	eor	r0,r7,r7,ror#11
455	add	r6,r6,r2			@ h+=Ch(e,f,g)
456#if 5==31
457	and	r3,r3,#0xff
458	cmp	r3,#0xf2			@ done?
459#endif
460#if 5<15
461# if __ARM_ARCH__>=7
462	ldr	r2,[r1],#4			@ prefetch
463# else
464	ldrb	r2,[r1,#3]
465# endif
466	eor	r3,r7,r8			@ a^b, b^c in next round
467#else
468	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
469	eor	r3,r7,r8			@ a^b, b^c in next round
470	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
471#endif
472	eor	r0,r0,r7,ror#20	@ Sigma0(a)
473	and	r12,r12,r3			@ (b^c)&=(a^b)
474	add	r10,r10,r6			@ d+=h
475	eor	r12,r12,r8			@ Maj(a,b,c)
476	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
477	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
478#if __ARM_ARCH__>=7
479	@ ldr	r2,[r1],#4			@ 6
480# if 6==15
481	str	r1,[sp,#17*4]			@ make room for r1
482# endif
483	eor	r0,r10,r10,ror#5
484	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
485	eor	r0,r0,r10,ror#19	@ Sigma1(e)
486# ifndef __ARMEB__
487	rev	r2,r2
488# endif
489#else
490	@ ldrb	r2,[r1,#3]			@ 6
491	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
492	ldrb	r12,[r1,#2]
493	ldrb	r0,[r1,#1]
494	orr	r2,r2,r12,lsl#8
495	ldrb	r12,[r1],#4
496	orr	r2,r2,r0,lsl#16
497# if 6==15
498	str	r1,[sp,#17*4]			@ make room for r1
499# endif
500	eor	r0,r10,r10,ror#5
501	orr	r2,r2,r12,lsl#24
502	eor	r0,r0,r10,ror#19	@ Sigma1(e)
503#endif
504	ldr	r12,[r14],#4			@ *K256++
505	add	r5,r5,r2			@ h+=X[i]
506	str	r2,[sp,#6*4]
507	eor	r2,r11,r4
508	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
509	and	r2,r2,r10
510	add	r5,r5,r12			@ h+=K256[i]
511	eor	r2,r2,r4			@ Ch(e,f,g)
512	eor	r0,r6,r6,ror#11
513	add	r5,r5,r2			@ h+=Ch(e,f,g)
514#if 6==31
515	and	r12,r12,#0xff
516	cmp	r12,#0xf2			@ done?
517#endif
518#if 6<15
519# if __ARM_ARCH__>=7
520	ldr	r2,[r1],#4			@ prefetch
521# else
522	ldrb	r2,[r1,#3]
523# endif
524	eor	r12,r6,r7			@ a^b, b^c in next round
525#else
526	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
527	eor	r12,r6,r7			@ a^b, b^c in next round
528	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
529#endif
530	eor	r0,r0,r6,ror#20	@ Sigma0(a)
531	and	r3,r3,r12			@ (b^c)&=(a^b)
532	add	r9,r9,r5			@ d+=h
533	eor	r3,r3,r7			@ Maj(a,b,c)
534	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
535	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
536#if __ARM_ARCH__>=7
537	@ ldr	r2,[r1],#4			@ 7
538# if 7==15
539	str	r1,[sp,#17*4]			@ make room for r1
540# endif
541	eor	r0,r9,r9,ror#5
542	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
543	eor	r0,r0,r9,ror#19	@ Sigma1(e)
544# ifndef __ARMEB__
545	rev	r2,r2
546# endif
547#else
548	@ ldrb	r2,[r1,#3]			@ 7
549	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
550	ldrb	r3,[r1,#2]
551	ldrb	r0,[r1,#1]
552	orr	r2,r2,r3,lsl#8
553	ldrb	r3,[r1],#4
554	orr	r2,r2,r0,lsl#16
555# if 7==15
556	str	r1,[sp,#17*4]			@ make room for r1
557# endif
558	eor	r0,r9,r9,ror#5
559	orr	r2,r2,r3,lsl#24
560	eor	r0,r0,r9,ror#19	@ Sigma1(e)
561#endif
562	ldr	r3,[r14],#4			@ *K256++
563	add	r4,r4,r2			@ h+=X[i]
564	str	r2,[sp,#7*4]
565	eor	r2,r10,r11
566	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
567	and	r2,r2,r9
568	add	r4,r4,r3			@ h+=K256[i]
569	eor	r2,r2,r11			@ Ch(e,f,g)
570	eor	r0,r5,r5,ror#11
571	add	r4,r4,r2			@ h+=Ch(e,f,g)
572#if 7==31
573	and	r3,r3,#0xff
574	cmp	r3,#0xf2			@ done?
575#endif
576#if 7<15
577# if __ARM_ARCH__>=7
578	ldr	r2,[r1],#4			@ prefetch
579# else
580	ldrb	r2,[r1,#3]
581# endif
582	eor	r3,r5,r6			@ a^b, b^c in next round
583#else
584	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
585	eor	r3,r5,r6			@ a^b, b^c in next round
586	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
587#endif
588	eor	r0,r0,r5,ror#20	@ Sigma0(a)
589	and	r12,r12,r3			@ (b^c)&=(a^b)
590	add	r8,r8,r4			@ d+=h
591	eor	r12,r12,r6			@ Maj(a,b,c)
592	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
593	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
594#if __ARM_ARCH__>=7
595	@ ldr	r2,[r1],#4			@ 8
596# if 8==15
597	str	r1,[sp,#17*4]			@ make room for r1
598# endif
599	eor	r0,r8,r8,ror#5
600	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
601	eor	r0,r0,r8,ror#19	@ Sigma1(e)
602# ifndef __ARMEB__
603	rev	r2,r2
604# endif
605#else
606	@ ldrb	r2,[r1,#3]			@ 8
607	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
608	ldrb	r12,[r1,#2]
609	ldrb	r0,[r1,#1]
610	orr	r2,r2,r12,lsl#8
611	ldrb	r12,[r1],#4
612	orr	r2,r2,r0,lsl#16
613# if 8==15
614	str	r1,[sp,#17*4]			@ make room for r1
615# endif
616	eor	r0,r8,r8,ror#5
617	orr	r2,r2,r12,lsl#24
618	eor	r0,r0,r8,ror#19	@ Sigma1(e)
619#endif
620	ldr	r12,[r14],#4			@ *K256++
621	add	r11,r11,r2			@ h+=X[i]
622	str	r2,[sp,#8*4]
623	eor	r2,r9,r10
624	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
625	and	r2,r2,r8
626	add	r11,r11,r12			@ h+=K256[i]
627	eor	r2,r2,r10			@ Ch(e,f,g)
628	eor	r0,r4,r4,ror#11
629	add	r11,r11,r2			@ h+=Ch(e,f,g)
630#if 8==31
631	and	r12,r12,#0xff
632	cmp	r12,#0xf2			@ done?
633#endif
634#if 8<15
635# if __ARM_ARCH__>=7
636	ldr	r2,[r1],#4			@ prefetch
637# else
638	ldrb	r2,[r1,#3]
639# endif
640	eor	r12,r4,r5			@ a^b, b^c in next round
641#else
642	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
643	eor	r12,r4,r5			@ a^b, b^c in next round
644	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
645#endif
646	eor	r0,r0,r4,ror#20	@ Sigma0(a)
647	and	r3,r3,r12			@ (b^c)&=(a^b)
648	add	r7,r7,r11			@ d+=h
649	eor	r3,r3,r5			@ Maj(a,b,c)
650	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
651	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
652#if __ARM_ARCH__>=7
653	@ ldr	r2,[r1],#4			@ 9
654# if 9==15
655	str	r1,[sp,#17*4]			@ make room for r1
656# endif
657	eor	r0,r7,r7,ror#5
658	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
659	eor	r0,r0,r7,ror#19	@ Sigma1(e)
660# ifndef __ARMEB__
661	rev	r2,r2
662# endif
663#else
664	@ ldrb	r2,[r1,#3]			@ 9
665	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
666	ldrb	r3,[r1,#2]
667	ldrb	r0,[r1,#1]
668	orr	r2,r2,r3,lsl#8
669	ldrb	r3,[r1],#4
670	orr	r2,r2,r0,lsl#16
671# if 9==15
672	str	r1,[sp,#17*4]			@ make room for r1
673# endif
674	eor	r0,r7,r7,ror#5
675	orr	r2,r2,r3,lsl#24
676	eor	r0,r0,r7,ror#19	@ Sigma1(e)
677#endif
678	ldr	r3,[r14],#4			@ *K256++
679	add	r10,r10,r2			@ h+=X[i]
680	str	r2,[sp,#9*4]
681	eor	r2,r8,r9
682	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
683	and	r2,r2,r7
684	add	r10,r10,r3			@ h+=K256[i]
685	eor	r2,r2,r9			@ Ch(e,f,g)
686	eor	r0,r11,r11,ror#11
687	add	r10,r10,r2			@ h+=Ch(e,f,g)
688#if 9==31
689	and	r3,r3,#0xff
690	cmp	r3,#0xf2			@ done?
691#endif
692#if 9<15
693# if __ARM_ARCH__>=7
694	ldr	r2,[r1],#4			@ prefetch
695# else
696	ldrb	r2,[r1,#3]
697# endif
698	eor	r3,r11,r4			@ a^b, b^c in next round
699#else
700	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
701	eor	r3,r11,r4			@ a^b, b^c in next round
702	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
703#endif
704	eor	r0,r0,r11,ror#20	@ Sigma0(a)
705	and	r12,r12,r3			@ (b^c)&=(a^b)
706	add	r6,r6,r10			@ d+=h
707	eor	r12,r12,r4			@ Maj(a,b,c)
708	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
709	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
710#if __ARM_ARCH__>=7
711	@ ldr	r2,[r1],#4			@ 10
712# if 10==15
713	str	r1,[sp,#17*4]			@ make room for r1
714# endif
715	eor	r0,r6,r6,ror#5
716	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
717	eor	r0,r0,r6,ror#19	@ Sigma1(e)
718# ifndef __ARMEB__
719	rev	r2,r2
720# endif
721#else
722	@ ldrb	r2,[r1,#3]			@ 10
723	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
724	ldrb	r12,[r1,#2]
725	ldrb	r0,[r1,#1]
726	orr	r2,r2,r12,lsl#8
727	ldrb	r12,[r1],#4
728	orr	r2,r2,r0,lsl#16
729# if 10==15
730	str	r1,[sp,#17*4]			@ make room for r1
731# endif
732	eor	r0,r6,r6,ror#5
733	orr	r2,r2,r12,lsl#24
734	eor	r0,r0,r6,ror#19	@ Sigma1(e)
735#endif
736	ldr	r12,[r14],#4			@ *K256++
737	add	r9,r9,r2			@ h+=X[i]
738	str	r2,[sp,#10*4]
739	eor	r2,r7,r8
740	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
741	and	r2,r2,r6
742	add	r9,r9,r12			@ h+=K256[i]
743	eor	r2,r2,r8			@ Ch(e,f,g)
744	eor	r0,r10,r10,ror#11
745	add	r9,r9,r2			@ h+=Ch(e,f,g)
746#if 10==31
747	and	r12,r12,#0xff
748	cmp	r12,#0xf2			@ done?
749#endif
750#if 10<15
751# if __ARM_ARCH__>=7
752	ldr	r2,[r1],#4			@ prefetch
753# else
754	ldrb	r2,[r1,#3]
755# endif
756	eor	r12,r10,r11			@ a^b, b^c in next round
757#else
758	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
759	eor	r12,r10,r11			@ a^b, b^c in next round
760	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
761#endif
762	eor	r0,r0,r10,ror#20	@ Sigma0(a)
763	and	r3,r3,r12			@ (b^c)&=(a^b)
764	add	r5,r5,r9			@ d+=h
765	eor	r3,r3,r11			@ Maj(a,b,c)
766	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
767	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
768#if __ARM_ARCH__>=7
769	@ ldr	r2,[r1],#4			@ 11
770# if 11==15
771	str	r1,[sp,#17*4]			@ make room for r1
772# endif
773	eor	r0,r5,r5,ror#5
774	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
775	eor	r0,r0,r5,ror#19	@ Sigma1(e)
776# ifndef __ARMEB__
777	rev	r2,r2
778# endif
779#else
780	@ ldrb	r2,[r1,#3]			@ 11
781	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
782	ldrb	r3,[r1,#2]
783	ldrb	r0,[r1,#1]
784	orr	r2,r2,r3,lsl#8
785	ldrb	r3,[r1],#4
786	orr	r2,r2,r0,lsl#16
787# if 11==15
788	str	r1,[sp,#17*4]			@ make room for r1
789# endif
790	eor	r0,r5,r5,ror#5
791	orr	r2,r2,r3,lsl#24
792	eor	r0,r0,r5,ror#19	@ Sigma1(e)
793#endif
794	ldr	r3,[r14],#4			@ *K256++
795	add	r8,r8,r2			@ h+=X[i]
796	str	r2,[sp,#11*4]
797	eor	r2,r6,r7
798	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
799	and	r2,r2,r5
800	add	r8,r8,r3			@ h+=K256[i]
801	eor	r2,r2,r7			@ Ch(e,f,g)
802	eor	r0,r9,r9,ror#11
803	add	r8,r8,r2			@ h+=Ch(e,f,g)
804#if 11==31
805	and	r3,r3,#0xff
806	cmp	r3,#0xf2			@ done?
807#endif
808#if 11<15
809# if __ARM_ARCH__>=7
810	ldr	r2,[r1],#4			@ prefetch
811# else
812	ldrb	r2,[r1,#3]
813# endif
814	eor	r3,r9,r10			@ a^b, b^c in next round
815#else
816	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
817	eor	r3,r9,r10			@ a^b, b^c in next round
818	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
819#endif
820	eor	r0,r0,r9,ror#20	@ Sigma0(a)
821	and	r12,r12,r3			@ (b^c)&=(a^b)
822	add	r4,r4,r8			@ d+=h
823	eor	r12,r12,r10			@ Maj(a,b,c)
824	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
825	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
826#if __ARM_ARCH__>=7
827	@ ldr	r2,[r1],#4			@ 12
828# if 12==15
829	str	r1,[sp,#17*4]			@ make room for r1
830# endif
831	eor	r0,r4,r4,ror#5
832	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
833	eor	r0,r0,r4,ror#19	@ Sigma1(e)
834# ifndef __ARMEB__
835	rev	r2,r2
836# endif
837#else
838	@ ldrb	r2,[r1,#3]			@ 12
839	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
840	ldrb	r12,[r1,#2]
841	ldrb	r0,[r1,#1]
842	orr	r2,r2,r12,lsl#8
843	ldrb	r12,[r1],#4
844	orr	r2,r2,r0,lsl#16
845# if 12==15
846	str	r1,[sp,#17*4]			@ make room for r1
847# endif
848	eor	r0,r4,r4,ror#5
849	orr	r2,r2,r12,lsl#24
850	eor	r0,r0,r4,ror#19	@ Sigma1(e)
851#endif
852	ldr	r12,[r14],#4			@ *K256++
853	add	r7,r7,r2			@ h+=X[i]
854	str	r2,[sp,#12*4]
855	eor	r2,r5,r6
856	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
857	and	r2,r2,r4
858	add	r7,r7,r12			@ h+=K256[i]
859	eor	r2,r2,r6			@ Ch(e,f,g)
860	eor	r0,r8,r8,ror#11
861	add	r7,r7,r2			@ h+=Ch(e,f,g)
862#if 12==31
863	and	r12,r12,#0xff
864	cmp	r12,#0xf2			@ done?
865#endif
866#if 12<15
867# if __ARM_ARCH__>=7
868	ldr	r2,[r1],#4			@ prefetch
869# else
870	ldrb	r2,[r1,#3]
871# endif
872	eor	r12,r8,r9			@ a^b, b^c in next round
873#else
874	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
875	eor	r12,r8,r9			@ a^b, b^c in next round
876	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
877#endif
878	eor	r0,r0,r8,ror#20	@ Sigma0(a)
879	and	r3,r3,r12			@ (b^c)&=(a^b)
880	add	r11,r11,r7			@ d+=h
881	eor	r3,r3,r9			@ Maj(a,b,c)
882	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
883	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
884#if __ARM_ARCH__>=7
885	@ ldr	r2,[r1],#4			@ 13
886# if 13==15
887	str	r1,[sp,#17*4]			@ make room for r1
888# endif
889	eor	r0,r11,r11,ror#5
890	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
891	eor	r0,r0,r11,ror#19	@ Sigma1(e)
892# ifndef __ARMEB__
893	rev	r2,r2
894# endif
895#else
896	@ ldrb	r2,[r1,#3]			@ 13
897	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
898	ldrb	r3,[r1,#2]
899	ldrb	r0,[r1,#1]
900	orr	r2,r2,r3,lsl#8
901	ldrb	r3,[r1],#4
902	orr	r2,r2,r0,lsl#16
903# if 13==15
904	str	r1,[sp,#17*4]			@ make room for r1
905# endif
906	eor	r0,r11,r11,ror#5
907	orr	r2,r2,r3,lsl#24
908	eor	r0,r0,r11,ror#19	@ Sigma1(e)
909#endif
910	ldr	r3,[r14],#4			@ *K256++
911	add	r6,r6,r2			@ h+=X[i]
912	str	r2,[sp,#13*4]
913	eor	r2,r4,r5
914	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
915	and	r2,r2,r11
916	add	r6,r6,r3			@ h+=K256[i]
917	eor	r2,r2,r5			@ Ch(e,f,g)
918	eor	r0,r7,r7,ror#11
919	add	r6,r6,r2			@ h+=Ch(e,f,g)
920#if 13==31
921	and	r3,r3,#0xff
922	cmp	r3,#0xf2			@ done?
923#endif
924#if 13<15
925# if __ARM_ARCH__>=7
926	ldr	r2,[r1],#4			@ prefetch
927# else
928	ldrb	r2,[r1,#3]
929# endif
930	eor	r3,r7,r8			@ a^b, b^c in next round
931#else
932	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
933	eor	r3,r7,r8			@ a^b, b^c in next round
934	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
935#endif
936	eor	r0,r0,r7,ror#20	@ Sigma0(a)
937	and	r12,r12,r3			@ (b^c)&=(a^b)
938	add	r10,r10,r6			@ d+=h
939	eor	r12,r12,r8			@ Maj(a,b,c)
940	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
941	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
942#if __ARM_ARCH__>=7
943	@ ldr	r2,[r1],#4			@ 14
944# if 14==15
945	str	r1,[sp,#17*4]			@ make room for r1
946# endif
947	eor	r0,r10,r10,ror#5
948	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
949	eor	r0,r0,r10,ror#19	@ Sigma1(e)
950# ifndef __ARMEB__
951	rev	r2,r2
952# endif
953#else
954	@ ldrb	r2,[r1,#3]			@ 14
955	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
956	ldrb	r12,[r1,#2]
957	ldrb	r0,[r1,#1]
958	orr	r2,r2,r12,lsl#8
959	ldrb	r12,[r1],#4
960	orr	r2,r2,r0,lsl#16
961# if 14==15
962	str	r1,[sp,#17*4]			@ make room for r1
963# endif
964	eor	r0,r10,r10,ror#5
965	orr	r2,r2,r12,lsl#24
966	eor	r0,r0,r10,ror#19	@ Sigma1(e)
967#endif
968	ldr	r12,[r14],#4			@ *K256++
969	add	r5,r5,r2			@ h+=X[i]
970	str	r2,[sp,#14*4]
971	eor	r2,r11,r4
972	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
973	and	r2,r2,r10
974	add	r5,r5,r12			@ h+=K256[i]
975	eor	r2,r2,r4			@ Ch(e,f,g)
976	eor	r0,r6,r6,ror#11
977	add	r5,r5,r2			@ h+=Ch(e,f,g)
978#if 14==31
979	and	r12,r12,#0xff
980	cmp	r12,#0xf2			@ done?
981#endif
982#if 14<15
983# if __ARM_ARCH__>=7
984	ldr	r2,[r1],#4			@ prefetch
985# else
986	ldrb	r2,[r1,#3]
987# endif
988	eor	r12,r6,r7			@ a^b, b^c in next round
989#else
990	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
991	eor	r12,r6,r7			@ a^b, b^c in next round
992	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
993#endif
994	eor	r0,r0,r6,ror#20	@ Sigma0(a)
995	and	r3,r3,r12			@ (b^c)&=(a^b)
996	add	r9,r9,r5			@ d+=h
997	eor	r3,r3,r7			@ Maj(a,b,c)
998	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
999	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1000#if __ARM_ARCH__>=7
1001	@ ldr	r2,[r1],#4			@ 15
1002# if 15==15
1003	str	r1,[sp,#17*4]			@ make room for r1
1004# endif
1005	eor	r0,r9,r9,ror#5
1006	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1007	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1008# ifndef __ARMEB__
1009	rev	r2,r2
1010# endif
1011#else
1012	@ ldrb	r2,[r1,#3]			@ 15
1013	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1014	ldrb	r3,[r1,#2]
1015	ldrb	r0,[r1,#1]
1016	orr	r2,r2,r3,lsl#8
1017	ldrb	r3,[r1],#4
1018	orr	r2,r2,r0,lsl#16
1019# if 15==15
1020	str	r1,[sp,#17*4]			@ make room for r1
1021# endif
1022	eor	r0,r9,r9,ror#5
1023	orr	r2,r2,r3,lsl#24
1024	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1025#endif
1026	ldr	r3,[r14],#4			@ *K256++
1027	add	r4,r4,r2			@ h+=X[i]
1028	str	r2,[sp,#15*4]
1029	eor	r2,r10,r11
1030	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1031	and	r2,r2,r9
1032	add	r4,r4,r3			@ h+=K256[i]
1033	eor	r2,r2,r11			@ Ch(e,f,g)
1034	eor	r0,r5,r5,ror#11
1035	add	r4,r4,r2			@ h+=Ch(e,f,g)
1036#if 15==31
1037	and	r3,r3,#0xff
1038	cmp	r3,#0xf2			@ done?
1039#endif
1040#if 15<15
1041# if __ARM_ARCH__>=7
1042	ldr	r2,[r1],#4			@ prefetch
1043# else
1044	ldrb	r2,[r1,#3]
1045# endif
1046	eor	r3,r5,r6			@ a^b, b^c in next round
1047#else
1048	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1049	eor	r3,r5,r6			@ a^b, b^c in next round
1050	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1051#endif
1052	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1053	and	r12,r12,r3			@ (b^c)&=(a^b)
1054	add	r8,r8,r4			@ d+=h
1055	eor	r12,r12,r6			@ Maj(a,b,c)
1056	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1057	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1058.Lrounds_16_xx:
1059	@ ldr	r2,[sp,#1*4]		@ 16
1060	@ ldr	r1,[sp,#14*4]
1061	mov	r0,r2,ror#7
1062	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1063	mov	r12,r1,ror#17
1064	eor	r0,r0,r2,ror#18
1065	eor	r12,r12,r1,ror#19
1066	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1067	ldr	r2,[sp,#0*4]
1068	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1069	ldr	r1,[sp,#9*4]
1070
1071	add	r12,r12,r0
1072	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1073	add	r2,r2,r12
1074	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1075	add	r2,r2,r1			@ X[i]
1076	ldr	r12,[r14],#4			@ *K256++
1077	add	r11,r11,r2			@ h+=X[i]
1078	str	r2,[sp,#0*4]
1079	eor	r2,r9,r10
1080	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1081	and	r2,r2,r8
1082	add	r11,r11,r12			@ h+=K256[i]
1083	eor	r2,r2,r10			@ Ch(e,f,g)
1084	eor	r0,r4,r4,ror#11
1085	add	r11,r11,r2			@ h+=Ch(e,f,g)
1086#if 16==31
1087	and	r12,r12,#0xff
1088	cmp	r12,#0xf2			@ done?
1089#endif
1090#if 16<15
1091# if __ARM_ARCH__>=7
1092	ldr	r2,[r1],#4			@ prefetch
1093# else
1094	ldrb	r2,[r1,#3]
1095# endif
1096	eor	r12,r4,r5			@ a^b, b^c in next round
1097#else
1098	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1099	eor	r12,r4,r5			@ a^b, b^c in next round
1100	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1101#endif
1102	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1103	and	r3,r3,r12			@ (b^c)&=(a^b)
1104	add	r7,r7,r11			@ d+=h
1105	eor	r3,r3,r5			@ Maj(a,b,c)
1106	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1107	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1108	@ ldr	r2,[sp,#2*4]		@ 17
1109	@ ldr	r1,[sp,#15*4]
1110	mov	r0,r2,ror#7
1111	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1112	mov	r3,r1,ror#17
1113	eor	r0,r0,r2,ror#18
1114	eor	r3,r3,r1,ror#19
1115	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1116	ldr	r2,[sp,#1*4]
1117	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1118	ldr	r1,[sp,#10*4]
1119
1120	add	r3,r3,r0
1121	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1122	add	r2,r2,r3
1123	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1124	add	r2,r2,r1			@ X[i]
1125	ldr	r3,[r14],#4			@ *K256++
1126	add	r10,r10,r2			@ h+=X[i]
1127	str	r2,[sp,#1*4]
1128	eor	r2,r8,r9
1129	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1130	and	r2,r2,r7
1131	add	r10,r10,r3			@ h+=K256[i]
1132	eor	r2,r2,r9			@ Ch(e,f,g)
1133	eor	r0,r11,r11,ror#11
1134	add	r10,r10,r2			@ h+=Ch(e,f,g)
1135#if 17==31
1136	and	r3,r3,#0xff
1137	cmp	r3,#0xf2			@ done?
1138#endif
1139#if 17<15
1140# if __ARM_ARCH__>=7
1141	ldr	r2,[r1],#4			@ prefetch
1142# else
1143	ldrb	r2,[r1,#3]
1144# endif
1145	eor	r3,r11,r4			@ a^b, b^c in next round
1146#else
1147	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1148	eor	r3,r11,r4			@ a^b, b^c in next round
1149	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1150#endif
1151	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1152	and	r12,r12,r3			@ (b^c)&=(a^b)
1153	add	r6,r6,r10			@ d+=h
1154	eor	r12,r12,r4			@ Maj(a,b,c)
1155	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1156	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1157	@ ldr	r2,[sp,#3*4]		@ 18
1158	@ ldr	r1,[sp,#0*4]
1159	mov	r0,r2,ror#7
1160	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1161	mov	r12,r1,ror#17
1162	eor	r0,r0,r2,ror#18
1163	eor	r12,r12,r1,ror#19
1164	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1165	ldr	r2,[sp,#2*4]
1166	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1167	ldr	r1,[sp,#11*4]
1168
1169	add	r12,r12,r0
1170	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1171	add	r2,r2,r12
1172	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1173	add	r2,r2,r1			@ X[i]
1174	ldr	r12,[r14],#4			@ *K256++
1175	add	r9,r9,r2			@ h+=X[i]
1176	str	r2,[sp,#2*4]
1177	eor	r2,r7,r8
1178	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1179	and	r2,r2,r6
1180	add	r9,r9,r12			@ h+=K256[i]
1181	eor	r2,r2,r8			@ Ch(e,f,g)
1182	eor	r0,r10,r10,ror#11
1183	add	r9,r9,r2			@ h+=Ch(e,f,g)
1184#if 18==31
1185	and	r12,r12,#0xff
1186	cmp	r12,#0xf2			@ done?
1187#endif
1188#if 18<15
1189# if __ARM_ARCH__>=7
1190	ldr	r2,[r1],#4			@ prefetch
1191# else
1192	ldrb	r2,[r1,#3]
1193# endif
1194	eor	r12,r10,r11			@ a^b, b^c in next round
1195#else
1196	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1197	eor	r12,r10,r11			@ a^b, b^c in next round
1198	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1199#endif
1200	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1201	and	r3,r3,r12			@ (b^c)&=(a^b)
1202	add	r5,r5,r9			@ d+=h
1203	eor	r3,r3,r11			@ Maj(a,b,c)
1204	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1205	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1206	@ ldr	r2,[sp,#4*4]		@ 19
1207	@ ldr	r1,[sp,#1*4]
1208	mov	r0,r2,ror#7
1209	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1210	mov	r3,r1,ror#17
1211	eor	r0,r0,r2,ror#18
1212	eor	r3,r3,r1,ror#19
1213	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1214	ldr	r2,[sp,#3*4]
1215	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1216	ldr	r1,[sp,#12*4]
1217
1218	add	r3,r3,r0
1219	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1220	add	r2,r2,r3
1221	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1222	add	r2,r2,r1			@ X[i]
1223	ldr	r3,[r14],#4			@ *K256++
1224	add	r8,r8,r2			@ h+=X[i]
1225	str	r2,[sp,#3*4]
1226	eor	r2,r6,r7
1227	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1228	and	r2,r2,r5
1229	add	r8,r8,r3			@ h+=K256[i]
1230	eor	r2,r2,r7			@ Ch(e,f,g)
1231	eor	r0,r9,r9,ror#11
1232	add	r8,r8,r2			@ h+=Ch(e,f,g)
1233#if 19==31
1234	and	r3,r3,#0xff
1235	cmp	r3,#0xf2			@ done?
1236#endif
1237#if 19<15
1238# if __ARM_ARCH__>=7
1239	ldr	r2,[r1],#4			@ prefetch
1240# else
1241	ldrb	r2,[r1,#3]
1242# endif
1243	eor	r3,r9,r10			@ a^b, b^c in next round
1244#else
1245	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1246	eor	r3,r9,r10			@ a^b, b^c in next round
1247	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1248#endif
1249	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1250	and	r12,r12,r3			@ (b^c)&=(a^b)
1251	add	r4,r4,r8			@ d+=h
1252	eor	r12,r12,r10			@ Maj(a,b,c)
1253	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1254	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1255	@ ldr	r2,[sp,#5*4]		@ 20
1256	@ ldr	r1,[sp,#2*4]
1257	mov	r0,r2,ror#7
1258	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1259	mov	r12,r1,ror#17
1260	eor	r0,r0,r2,ror#18
1261	eor	r12,r12,r1,ror#19
1262	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1263	ldr	r2,[sp,#4*4]
1264	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1265	ldr	r1,[sp,#13*4]
1266
1267	add	r12,r12,r0
1268	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1269	add	r2,r2,r12
1270	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1271	add	r2,r2,r1			@ X[i]
1272	ldr	r12,[r14],#4			@ *K256++
1273	add	r7,r7,r2			@ h+=X[i]
1274	str	r2,[sp,#4*4]
1275	eor	r2,r5,r6
1276	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1277	and	r2,r2,r4
1278	add	r7,r7,r12			@ h+=K256[i]
1279	eor	r2,r2,r6			@ Ch(e,f,g)
1280	eor	r0,r8,r8,ror#11
1281	add	r7,r7,r2			@ h+=Ch(e,f,g)
1282#if 20==31
1283	and	r12,r12,#0xff
1284	cmp	r12,#0xf2			@ done?
1285#endif
1286#if 20<15
1287# if __ARM_ARCH__>=7
1288	ldr	r2,[r1],#4			@ prefetch
1289# else
1290	ldrb	r2,[r1,#3]
1291# endif
1292	eor	r12,r8,r9			@ a^b, b^c in next round
1293#else
1294	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1295	eor	r12,r8,r9			@ a^b, b^c in next round
1296	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1297#endif
1298	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1299	and	r3,r3,r12			@ (b^c)&=(a^b)
1300	add	r11,r11,r7			@ d+=h
1301	eor	r3,r3,r9			@ Maj(a,b,c)
1302	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1303	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1304	@ ldr	r2,[sp,#6*4]		@ 21
1305	@ ldr	r1,[sp,#3*4]
1306	mov	r0,r2,ror#7
1307	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1308	mov	r3,r1,ror#17
1309	eor	r0,r0,r2,ror#18
1310	eor	r3,r3,r1,ror#19
1311	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1312	ldr	r2,[sp,#5*4]
1313	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1314	ldr	r1,[sp,#14*4]
1315
1316	add	r3,r3,r0
1317	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1318	add	r2,r2,r3
1319	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1320	add	r2,r2,r1			@ X[i]
1321	ldr	r3,[r14],#4			@ *K256++
1322	add	r6,r6,r2			@ h+=X[i]
1323	str	r2,[sp,#5*4]
1324	eor	r2,r4,r5
1325	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1326	and	r2,r2,r11
1327	add	r6,r6,r3			@ h+=K256[i]
1328	eor	r2,r2,r5			@ Ch(e,f,g)
1329	eor	r0,r7,r7,ror#11
1330	add	r6,r6,r2			@ h+=Ch(e,f,g)
1331#if 21==31
1332	and	r3,r3,#0xff
1333	cmp	r3,#0xf2			@ done?
1334#endif
1335#if 21<15
1336# if __ARM_ARCH__>=7
1337	ldr	r2,[r1],#4			@ prefetch
1338# else
1339	ldrb	r2,[r1,#3]
1340# endif
1341	eor	r3,r7,r8			@ a^b, b^c in next round
1342#else
1343	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1344	eor	r3,r7,r8			@ a^b, b^c in next round
1345	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1346#endif
1347	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1348	and	r12,r12,r3			@ (b^c)&=(a^b)
1349	add	r10,r10,r6			@ d+=h
1350	eor	r12,r12,r8			@ Maj(a,b,c)
1351	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1352	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1353	@ ldr	r2,[sp,#7*4]		@ 22
1354	@ ldr	r1,[sp,#4*4]
1355	mov	r0,r2,ror#7
1356	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1357	mov	r12,r1,ror#17
1358	eor	r0,r0,r2,ror#18
1359	eor	r12,r12,r1,ror#19
1360	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1361	ldr	r2,[sp,#6*4]
1362	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1363	ldr	r1,[sp,#15*4]
1364
1365	add	r12,r12,r0
1366	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1367	add	r2,r2,r12
1368	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1369	add	r2,r2,r1			@ X[i]
1370	ldr	r12,[r14],#4			@ *K256++
1371	add	r5,r5,r2			@ h+=X[i]
1372	str	r2,[sp,#6*4]
1373	eor	r2,r11,r4
1374	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1375	and	r2,r2,r10
1376	add	r5,r5,r12			@ h+=K256[i]
1377	eor	r2,r2,r4			@ Ch(e,f,g)
1378	eor	r0,r6,r6,ror#11
1379	add	r5,r5,r2			@ h+=Ch(e,f,g)
1380#if 22==31
1381	and	r12,r12,#0xff
1382	cmp	r12,#0xf2			@ done?
1383#endif
1384#if 22<15
1385# if __ARM_ARCH__>=7
1386	ldr	r2,[r1],#4			@ prefetch
1387# else
1388	ldrb	r2,[r1,#3]
1389# endif
1390	eor	r12,r6,r7			@ a^b, b^c in next round
1391#else
1392	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1393	eor	r12,r6,r7			@ a^b, b^c in next round
1394	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1395#endif
1396	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1397	and	r3,r3,r12			@ (b^c)&=(a^b)
1398	add	r9,r9,r5			@ d+=h
1399	eor	r3,r3,r7			@ Maj(a,b,c)
1400	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1401	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1402	@ ldr	r2,[sp,#8*4]		@ 23
1403	@ ldr	r1,[sp,#5*4]
1404	mov	r0,r2,ror#7
1405	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1406	mov	r3,r1,ror#17
1407	eor	r0,r0,r2,ror#18
1408	eor	r3,r3,r1,ror#19
1409	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1410	ldr	r2,[sp,#7*4]
1411	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1412	ldr	r1,[sp,#0*4]
1413
1414	add	r3,r3,r0
1415	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1416	add	r2,r2,r3
1417	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1418	add	r2,r2,r1			@ X[i]
1419	ldr	r3,[r14],#4			@ *K256++
1420	add	r4,r4,r2			@ h+=X[i]
1421	str	r2,[sp,#7*4]
1422	eor	r2,r10,r11
1423	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1424	and	r2,r2,r9
1425	add	r4,r4,r3			@ h+=K256[i]
1426	eor	r2,r2,r11			@ Ch(e,f,g)
1427	eor	r0,r5,r5,ror#11
1428	add	r4,r4,r2			@ h+=Ch(e,f,g)
1429#if 23==31
1430	and	r3,r3,#0xff
1431	cmp	r3,#0xf2			@ done?
1432#endif
1433#if 23<15
1434# if __ARM_ARCH__>=7
1435	ldr	r2,[r1],#4			@ prefetch
1436# else
1437	ldrb	r2,[r1,#3]
1438# endif
1439	eor	r3,r5,r6			@ a^b, b^c in next round
1440#else
1441	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1442	eor	r3,r5,r6			@ a^b, b^c in next round
1443	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1444#endif
1445	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1446	and	r12,r12,r3			@ (b^c)&=(a^b)
1447	add	r8,r8,r4			@ d+=h
1448	eor	r12,r12,r6			@ Maj(a,b,c)
1449	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1450	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1451	@ ldr	r2,[sp,#9*4]		@ 24
1452	@ ldr	r1,[sp,#6*4]
1453	mov	r0,r2,ror#7
1454	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1455	mov	r12,r1,ror#17
1456	eor	r0,r0,r2,ror#18
1457	eor	r12,r12,r1,ror#19
1458	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1459	ldr	r2,[sp,#8*4]
1460	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1461	ldr	r1,[sp,#1*4]
1462
1463	add	r12,r12,r0
1464	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1465	add	r2,r2,r12
1466	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1467	add	r2,r2,r1			@ X[i]
1468	ldr	r12,[r14],#4			@ *K256++
1469	add	r11,r11,r2			@ h+=X[i]
1470	str	r2,[sp,#8*4]
1471	eor	r2,r9,r10
1472	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1473	and	r2,r2,r8
1474	add	r11,r11,r12			@ h+=K256[i]
1475	eor	r2,r2,r10			@ Ch(e,f,g)
1476	eor	r0,r4,r4,ror#11
1477	add	r11,r11,r2			@ h+=Ch(e,f,g)
1478#if 24==31
1479	and	r12,r12,#0xff
1480	cmp	r12,#0xf2			@ done?
1481#endif
1482#if 24<15
1483# if __ARM_ARCH__>=7
1484	ldr	r2,[r1],#4			@ prefetch
1485# else
1486	ldrb	r2,[r1,#3]
1487# endif
1488	eor	r12,r4,r5			@ a^b, b^c in next round
1489#else
1490	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1491	eor	r12,r4,r5			@ a^b, b^c in next round
1492	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1493#endif
1494	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1495	and	r3,r3,r12			@ (b^c)&=(a^b)
1496	add	r7,r7,r11			@ d+=h
1497	eor	r3,r3,r5			@ Maj(a,b,c)
1498	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1499	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1500	@ ldr	r2,[sp,#10*4]		@ 25
1501	@ ldr	r1,[sp,#7*4]
1502	mov	r0,r2,ror#7
1503	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1504	mov	r3,r1,ror#17
1505	eor	r0,r0,r2,ror#18
1506	eor	r3,r3,r1,ror#19
1507	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1508	ldr	r2,[sp,#9*4]
1509	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1510	ldr	r1,[sp,#2*4]
1511
1512	add	r3,r3,r0
1513	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1514	add	r2,r2,r3
1515	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1516	add	r2,r2,r1			@ X[i]
1517	ldr	r3,[r14],#4			@ *K256++
1518	add	r10,r10,r2			@ h+=X[i]
1519	str	r2,[sp,#9*4]
1520	eor	r2,r8,r9
1521	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1522	and	r2,r2,r7
1523	add	r10,r10,r3			@ h+=K256[i]
1524	eor	r2,r2,r9			@ Ch(e,f,g)
1525	eor	r0,r11,r11,ror#11
1526	add	r10,r10,r2			@ h+=Ch(e,f,g)
1527#if 25==31
1528	and	r3,r3,#0xff
1529	cmp	r3,#0xf2			@ done?
1530#endif
1531#if 25<15
1532# if __ARM_ARCH__>=7
1533	ldr	r2,[r1],#4			@ prefetch
1534# else
1535	ldrb	r2,[r1,#3]
1536# endif
1537	eor	r3,r11,r4			@ a^b, b^c in next round
1538#else
1539	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1540	eor	r3,r11,r4			@ a^b, b^c in next round
1541	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1542#endif
1543	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1544	and	r12,r12,r3			@ (b^c)&=(a^b)
1545	add	r6,r6,r10			@ d+=h
1546	eor	r12,r12,r4			@ Maj(a,b,c)
1547	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1548	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1549	@ ldr	r2,[sp,#11*4]		@ 26
1550	@ ldr	r1,[sp,#8*4]
1551	mov	r0,r2,ror#7
1552	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1553	mov	r12,r1,ror#17
1554	eor	r0,r0,r2,ror#18
1555	eor	r12,r12,r1,ror#19
1556	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1557	ldr	r2,[sp,#10*4]
1558	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1559	ldr	r1,[sp,#3*4]
1560
1561	add	r12,r12,r0
1562	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1563	add	r2,r2,r12
1564	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1565	add	r2,r2,r1			@ X[i]
1566	ldr	r12,[r14],#4			@ *K256++
1567	add	r9,r9,r2			@ h+=X[i]
1568	str	r2,[sp,#10*4]
1569	eor	r2,r7,r8
1570	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1571	and	r2,r2,r6
1572	add	r9,r9,r12			@ h+=K256[i]
1573	eor	r2,r2,r8			@ Ch(e,f,g)
1574	eor	r0,r10,r10,ror#11
1575	add	r9,r9,r2			@ h+=Ch(e,f,g)
1576#if 26==31
1577	and	r12,r12,#0xff
1578	cmp	r12,#0xf2			@ done?
1579#endif
1580#if 26<15
1581# if __ARM_ARCH__>=7
1582	ldr	r2,[r1],#4			@ prefetch
1583# else
1584	ldrb	r2,[r1,#3]
1585# endif
1586	eor	r12,r10,r11			@ a^b, b^c in next round
1587#else
1588	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1589	eor	r12,r10,r11			@ a^b, b^c in next round
1590	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1591#endif
1592	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1593	and	r3,r3,r12			@ (b^c)&=(a^b)
1594	add	r5,r5,r9			@ d+=h
1595	eor	r3,r3,r11			@ Maj(a,b,c)
1596	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1597	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1598	@ ldr	r2,[sp,#12*4]		@ 27
1599	@ ldr	r1,[sp,#9*4]
1600	mov	r0,r2,ror#7
1601	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1602	mov	r3,r1,ror#17
1603	eor	r0,r0,r2,ror#18
1604	eor	r3,r3,r1,ror#19
1605	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1606	ldr	r2,[sp,#11*4]
1607	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1608	ldr	r1,[sp,#4*4]
1609
1610	add	r3,r3,r0
1611	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1612	add	r2,r2,r3
1613	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1614	add	r2,r2,r1			@ X[i]
1615	ldr	r3,[r14],#4			@ *K256++
1616	add	r8,r8,r2			@ h+=X[i]
1617	str	r2,[sp,#11*4]
1618	eor	r2,r6,r7
1619	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1620	and	r2,r2,r5
1621	add	r8,r8,r3			@ h+=K256[i]
1622	eor	r2,r2,r7			@ Ch(e,f,g)
1623	eor	r0,r9,r9,ror#11
1624	add	r8,r8,r2			@ h+=Ch(e,f,g)
1625#if 27==31
1626	and	r3,r3,#0xff
1627	cmp	r3,#0xf2			@ done?
1628#endif
1629#if 27<15
1630# if __ARM_ARCH__>=7
1631	ldr	r2,[r1],#4			@ prefetch
1632# else
1633	ldrb	r2,[r1,#3]
1634# endif
1635	eor	r3,r9,r10			@ a^b, b^c in next round
1636#else
1637	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1638	eor	r3,r9,r10			@ a^b, b^c in next round
1639	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1640#endif
1641	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1642	and	r12,r12,r3			@ (b^c)&=(a^b)
1643	add	r4,r4,r8			@ d+=h
1644	eor	r12,r12,r10			@ Maj(a,b,c)
1645	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1646	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1647	@ ldr	r2,[sp,#13*4]		@ 28
1648	@ ldr	r1,[sp,#10*4]
1649	mov	r0,r2,ror#7
1650	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1651	mov	r12,r1,ror#17
1652	eor	r0,r0,r2,ror#18
1653	eor	r12,r12,r1,ror#19
1654	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1655	ldr	r2,[sp,#12*4]
1656	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1657	ldr	r1,[sp,#5*4]
1658
1659	add	r12,r12,r0
1660	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1661	add	r2,r2,r12
1662	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1663	add	r2,r2,r1			@ X[i]
1664	ldr	r12,[r14],#4			@ *K256++
1665	add	r7,r7,r2			@ h+=X[i]
1666	str	r2,[sp,#12*4]
1667	eor	r2,r5,r6
1668	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1669	and	r2,r2,r4
1670	add	r7,r7,r12			@ h+=K256[i]
1671	eor	r2,r2,r6			@ Ch(e,f,g)
1672	eor	r0,r8,r8,ror#11
1673	add	r7,r7,r2			@ h+=Ch(e,f,g)
1674#if 28==31
1675	and	r12,r12,#0xff
1676	cmp	r12,#0xf2			@ done?
1677#endif
1678#if 28<15
1679# if __ARM_ARCH__>=7
1680	ldr	r2,[r1],#4			@ prefetch
1681# else
1682	ldrb	r2,[r1,#3]
1683# endif
1684	eor	r12,r8,r9			@ a^b, b^c in next round
1685#else
1686	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1687	eor	r12,r8,r9			@ a^b, b^c in next round
1688	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1689#endif
1690	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1691	and	r3,r3,r12			@ (b^c)&=(a^b)
1692	add	r11,r11,r7			@ d+=h
1693	eor	r3,r3,r9			@ Maj(a,b,c)
1694	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1695	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1696	@ ldr	r2,[sp,#14*4]		@ 29
1697	@ ldr	r1,[sp,#11*4]
1698	mov	r0,r2,ror#7
1699	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1700	mov	r3,r1,ror#17
1701	eor	r0,r0,r2,ror#18
1702	eor	r3,r3,r1,ror#19
1703	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1704	ldr	r2,[sp,#13*4]
1705	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1706	ldr	r1,[sp,#6*4]
1707
1708	add	r3,r3,r0
1709	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1710	add	r2,r2,r3
1711	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1712	add	r2,r2,r1			@ X[i]
1713	ldr	r3,[r14],#4			@ *K256++
1714	add	r6,r6,r2			@ h+=X[i]
1715	str	r2,[sp,#13*4]
1716	eor	r2,r4,r5
1717	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1718	and	r2,r2,r11
1719	add	r6,r6,r3			@ h+=K256[i]
1720	eor	r2,r2,r5			@ Ch(e,f,g)
1721	eor	r0,r7,r7,ror#11
1722	add	r6,r6,r2			@ h+=Ch(e,f,g)
1723#if 29==31
1724	and	r3,r3,#0xff
1725	cmp	r3,#0xf2			@ done?
1726#endif
1727#if 29<15
1728# if __ARM_ARCH__>=7
1729	ldr	r2,[r1],#4			@ prefetch
1730# else
1731	ldrb	r2,[r1,#3]
1732# endif
1733	eor	r3,r7,r8			@ a^b, b^c in next round
1734#else
1735	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1736	eor	r3,r7,r8			@ a^b, b^c in next round
1737	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1738#endif
1739	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1740	and	r12,r12,r3			@ (b^c)&=(a^b)
1741	add	r10,r10,r6			@ d+=h
1742	eor	r12,r12,r8			@ Maj(a,b,c)
1743	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1744	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1745	@ ldr	r2,[sp,#15*4]		@ 30
1746	@ ldr	r1,[sp,#12*4]
1747	mov	r0,r2,ror#7
1748	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1749	mov	r12,r1,ror#17
1750	eor	r0,r0,r2,ror#18
1751	eor	r12,r12,r1,ror#19
1752	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1753	ldr	r2,[sp,#14*4]
1754	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1755	ldr	r1,[sp,#7*4]
1756
1757	add	r12,r12,r0
1758	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1759	add	r2,r2,r12
1760	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1761	add	r2,r2,r1			@ X[i]
1762	ldr	r12,[r14],#4			@ *K256++
1763	add	r5,r5,r2			@ h+=X[i]
1764	str	r2,[sp,#14*4]
1765	eor	r2,r11,r4
1766	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1767	and	r2,r2,r10
1768	add	r5,r5,r12			@ h+=K256[i]
1769	eor	r2,r2,r4			@ Ch(e,f,g)
1770	eor	r0,r6,r6,ror#11
1771	add	r5,r5,r2			@ h+=Ch(e,f,g)
1772#if 30==31
1773	and	r12,r12,#0xff
1774	cmp	r12,#0xf2			@ done?
1775#endif
1776#if 30<15
1777# if __ARM_ARCH__>=7
1778	ldr	r2,[r1],#4			@ prefetch
1779# else
1780	ldrb	r2,[r1,#3]
1781# endif
1782	eor	r12,r6,r7			@ a^b, b^c in next round
1783#else
1784	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1785	eor	r12,r6,r7			@ a^b, b^c in next round
1786	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1787#endif
1788	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1789	and	r3,r3,r12			@ (b^c)&=(a^b)
1790	add	r9,r9,r5			@ d+=h
1791	eor	r3,r3,r7			@ Maj(a,b,c)
1792	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1793	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1794	@ ldr	r2,[sp,#0*4]		@ 31
1795	@ ldr	r1,[sp,#13*4]
1796	mov	r0,r2,ror#7
1797	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1798	mov	r3,r1,ror#17
1799	eor	r0,r0,r2,ror#18
1800	eor	r3,r3,r1,ror#19
1801	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1802	ldr	r2,[sp,#15*4]
1803	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1804	ldr	r1,[sp,#8*4]
1805
1806	add	r3,r3,r0
1807	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1808	add	r2,r2,r3
1809	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1810	add	r2,r2,r1			@ X[i]
1811	ldr	r3,[r14],#4			@ *K256++
1812	add	r4,r4,r2			@ h+=X[i]
1813	str	r2,[sp,#15*4]
1814	eor	r2,r10,r11
1815	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1816	and	r2,r2,r9
1817	add	r4,r4,r3			@ h+=K256[i]
1818	eor	r2,r2,r11			@ Ch(e,f,g)
1819	eor	r0,r5,r5,ror#11
1820	add	r4,r4,r2			@ h+=Ch(e,f,g)
1821#if 31==31
1822	and	r3,r3,#0xff
1823	cmp	r3,#0xf2			@ done?
1824#endif
1825#if 31<15
1826# if __ARM_ARCH__>=7
1827	ldr	r2,[r1],#4			@ prefetch
1828# else
1829	ldrb	r2,[r1,#3]
1830# endif
1831	eor	r3,r5,r6			@ a^b, b^c in next round
1832#else
1833	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1834	eor	r3,r5,r6			@ a^b, b^c in next round
1835	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1836#endif
1837	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1838	and	r12,r12,r3			@ (b^c)&=(a^b)
1839	add	r8,r8,r4			@ d+=h
1840	eor	r12,r12,r6			@ Maj(a,b,c)
1841	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1842	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1843#ifdef	__thumb2__
1844	ite	eq			@ Thumb2 thing, sanity check in ARM
1845#endif
1846	ldreq	r3,[sp,#16*4]		@ pull ctx
1847	bne	.Lrounds_16_xx
1848
1849	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1850	ldr	r0,[r3,#0]
1851	ldr	r2,[r3,#4]
1852	ldr	r12,[r3,#8]
1853	add	r4,r4,r0
1854	ldr	r0,[r3,#12]
1855	add	r5,r5,r2
1856	ldr	r2,[r3,#16]
1857	add	r6,r6,r12
1858	ldr	r12,[r3,#20]
1859	add	r7,r7,r0
1860	ldr	r0,[r3,#24]
1861	add	r8,r8,r2
1862	ldr	r2,[r3,#28]
1863	add	r9,r9,r12
1864	ldr	r1,[sp,#17*4]		@ pull inp
1865	ldr	r12,[sp,#18*4]		@ pull inp+len
1866	add	r10,r10,r0
1867	add	r11,r11,r2
1868	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1869	cmp	r1,r12
1870	sub	r14,r14,#256	@ rewind Ktbl
1871	bne	.Loop
1872
1873	add	sp,sp,#19*4	@ destroy frame
1874#if __ARM_ARCH__>=5
1875	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1876#else
1877	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1878	tst	lr,#1
1879	moveq	pc,lr			@ be binary compatible with V4, yet
1880.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1881#endif
1882.size	sha256_block_data_order,.-sha256_block_data_order
1883#if __ARM_MAX_ARCH__>=7
1884.arch	armv7-a
1885.fpu	neon
1886
1887.globl	sha256_block_data_order_neon
1888.type	sha256_block_data_order_neon,%function
1889.align	5
1890.skip	16
1891sha256_block_data_order_neon:
1892.LNEON:
1893	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1894
1895	sub	r11,sp,#16*4+16
1896	adr	r14,K256
1897	bic	r11,r11,#15		@ align for 128-bit stores
1898	mov	r12,sp
1899	mov	sp,r11			@ alloca
1900	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1901
1902	vld1.8	{q0},[r1]!
1903	vld1.8	{q1},[r1]!
1904	vld1.8	{q2},[r1]!
1905	vld1.8	{q3},[r1]!
1906	vld1.32	{q8},[r14,:128]!
1907	vld1.32	{q9},[r14,:128]!
1908	vld1.32	{q10},[r14,:128]!
1909	vld1.32	{q11},[r14,:128]!
1910	vrev32.8	q0,q0		@ yes, even on
1911	str	r0,[sp,#64]
1912	vrev32.8	q1,q1		@ big-endian
1913	str	r1,[sp,#68]
1914	mov	r1,sp
1915	vrev32.8	q2,q2
1916	str	r2,[sp,#72]
1917	vrev32.8	q3,q3
1918	str	r12,[sp,#76]		@ save original sp
1919	vadd.i32	q8,q8,q0
1920	vadd.i32	q9,q9,q1
1921	vst1.32	{q8},[r1,:128]!
1922	vadd.i32	q10,q10,q2
1923	vst1.32	{q9},[r1,:128]!
1924	vadd.i32	q11,q11,q3
1925	vst1.32	{q10},[r1,:128]!
1926	vst1.32	{q11},[r1,:128]!
1927
1928	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1929	sub	r1,r1,#64
1930	ldr	r2,[sp,#0]
1931	eor	r12,r12,r12
1932	eor	r3,r5,r6
1933	b	.L_00_48
1934
1935.align	4
1936.L_00_48:
1937	vext.8	q8,q0,q1,#4
1938	add	r11,r11,r2
1939	eor	r2,r9,r10
1940	eor	r0,r8,r8,ror#5
1941	vext.8	q9,q2,q3,#4
1942	add	r4,r4,r12
1943	and	r2,r2,r8
1944	eor	r12,r0,r8,ror#19
1945	vshr.u32	q10,q8,#7
1946	eor	r0,r4,r4,ror#11
1947	eor	r2,r2,r10
1948	vadd.i32	q0,q0,q9
1949	add	r11,r11,r12,ror#6
1950	eor	r12,r4,r5
1951	vshr.u32	q9,q8,#3
1952	eor	r0,r0,r4,ror#20
1953	add	r11,r11,r2
1954	vsli.32	q10,q8,#25
1955	ldr	r2,[sp,#4]
1956	and	r3,r3,r12
1957	vshr.u32	q11,q8,#18
1958	add	r7,r7,r11
1959	add	r11,r11,r0,ror#2
1960	eor	r3,r3,r5
1961	veor	q9,q9,q10
1962	add	r10,r10,r2
1963	vsli.32	q11,q8,#14
1964	eor	r2,r8,r9
1965	eor	r0,r7,r7,ror#5
1966	vshr.u32	d24,d7,#17
1967	add	r11,r11,r3
1968	and	r2,r2,r7
1969	veor	q9,q9,q11
1970	eor	r3,r0,r7,ror#19
1971	eor	r0,r11,r11,ror#11
1972	vsli.32	d24,d7,#15
1973	eor	r2,r2,r9
1974	add	r10,r10,r3,ror#6
1975	vshr.u32	d25,d7,#10
1976	eor	r3,r11,r4
1977	eor	r0,r0,r11,ror#20
1978	vadd.i32	q0,q0,q9
1979	add	r10,r10,r2
1980	ldr	r2,[sp,#8]
1981	veor	d25,d25,d24
1982	and	r12,r12,r3
1983	add	r6,r6,r10
1984	vshr.u32	d24,d7,#19
1985	add	r10,r10,r0,ror#2
1986	eor	r12,r12,r4
1987	vsli.32	d24,d7,#13
1988	add	r9,r9,r2
1989	eor	r2,r7,r8
1990	veor	d25,d25,d24
1991	eor	r0,r6,r6,ror#5
1992	add	r10,r10,r12
1993	vadd.i32	d0,d0,d25
1994	and	r2,r2,r6
1995	eor	r12,r0,r6,ror#19
1996	vshr.u32	d24,d0,#17
1997	eor	r0,r10,r10,ror#11
1998	eor	r2,r2,r8
1999	vsli.32	d24,d0,#15
2000	add	r9,r9,r12,ror#6
2001	eor	r12,r10,r11
2002	vshr.u32	d25,d0,#10
2003	eor	r0,r0,r10,ror#20
2004	add	r9,r9,r2
2005	veor	d25,d25,d24
2006	ldr	r2,[sp,#12]
2007	and	r3,r3,r12
2008	vshr.u32	d24,d0,#19
2009	add	r5,r5,r9
2010	add	r9,r9,r0,ror#2
2011	eor	r3,r3,r11
2012	vld1.32	{q8},[r14,:128]!
2013	add	r8,r8,r2
2014	vsli.32	d24,d0,#13
2015	eor	r2,r6,r7
2016	eor	r0,r5,r5,ror#5
2017	veor	d25,d25,d24
2018	add	r9,r9,r3
2019	and	r2,r2,r5
2020	vadd.i32	d1,d1,d25
2021	eor	r3,r0,r5,ror#19
2022	eor	r0,r9,r9,ror#11
2023	vadd.i32	q8,q8,q0
2024	eor	r2,r2,r7
2025	add	r8,r8,r3,ror#6
2026	eor	r3,r9,r10
2027	eor	r0,r0,r9,ror#20
2028	add	r8,r8,r2
2029	ldr	r2,[sp,#16]
2030	and	r12,r12,r3
2031	add	r4,r4,r8
2032	vst1.32	{q8},[r1,:128]!
2033	add	r8,r8,r0,ror#2
2034	eor	r12,r12,r10
2035	vext.8	q8,q1,q2,#4
2036	add	r7,r7,r2
2037	eor	r2,r5,r6
2038	eor	r0,r4,r4,ror#5
2039	vext.8	q9,q3,q0,#4
2040	add	r8,r8,r12
2041	and	r2,r2,r4
2042	eor	r12,r0,r4,ror#19
2043	vshr.u32	q10,q8,#7
2044	eor	r0,r8,r8,ror#11
2045	eor	r2,r2,r6
2046	vadd.i32	q1,q1,q9
2047	add	r7,r7,r12,ror#6
2048	eor	r12,r8,r9
2049	vshr.u32	q9,q8,#3
2050	eor	r0,r0,r8,ror#20
2051	add	r7,r7,r2
2052	vsli.32	q10,q8,#25
2053	ldr	r2,[sp,#20]
2054	and	r3,r3,r12
2055	vshr.u32	q11,q8,#18
2056	add	r11,r11,r7
2057	add	r7,r7,r0,ror#2
2058	eor	r3,r3,r9
2059	veor	q9,q9,q10
2060	add	r6,r6,r2
2061	vsli.32	q11,q8,#14
2062	eor	r2,r4,r5
2063	eor	r0,r11,r11,ror#5
2064	vshr.u32	d24,d1,#17
2065	add	r7,r7,r3
2066	and	r2,r2,r11
2067	veor	q9,q9,q11
2068	eor	r3,r0,r11,ror#19
2069	eor	r0,r7,r7,ror#11
2070	vsli.32	d24,d1,#15
2071	eor	r2,r2,r5
2072	add	r6,r6,r3,ror#6
2073	vshr.u32	d25,d1,#10
2074	eor	r3,r7,r8
2075	eor	r0,r0,r7,ror#20
2076	vadd.i32	q1,q1,q9
2077	add	r6,r6,r2
2078	ldr	r2,[sp,#24]
2079	veor	d25,d25,d24
2080	and	r12,r12,r3
2081	add	r10,r10,r6
2082	vshr.u32	d24,d1,#19
2083	add	r6,r6,r0,ror#2
2084	eor	r12,r12,r8
2085	vsli.32	d24,d1,#13
2086	add	r5,r5,r2
2087	eor	r2,r11,r4
2088	veor	d25,d25,d24
2089	eor	r0,r10,r10,ror#5
2090	add	r6,r6,r12
2091	vadd.i32	d2,d2,d25
2092	and	r2,r2,r10
2093	eor	r12,r0,r10,ror#19
2094	vshr.u32	d24,d2,#17
2095	eor	r0,r6,r6,ror#11
2096	eor	r2,r2,r4
2097	vsli.32	d24,d2,#15
2098	add	r5,r5,r12,ror#6
2099	eor	r12,r6,r7
2100	vshr.u32	d25,d2,#10
2101	eor	r0,r0,r6,ror#20
2102	add	r5,r5,r2
2103	veor	d25,d25,d24
2104	ldr	r2,[sp,#28]
2105	and	r3,r3,r12
2106	vshr.u32	d24,d2,#19
2107	add	r9,r9,r5
2108	add	r5,r5,r0,ror#2
2109	eor	r3,r3,r7
2110	vld1.32	{q8},[r14,:128]!
2111	add	r4,r4,r2
2112	vsli.32	d24,d2,#13
2113	eor	r2,r10,r11
2114	eor	r0,r9,r9,ror#5
2115	veor	d25,d25,d24
2116	add	r5,r5,r3
2117	and	r2,r2,r9
2118	vadd.i32	d3,d3,d25
2119	eor	r3,r0,r9,ror#19
2120	eor	r0,r5,r5,ror#11
2121	vadd.i32	q8,q8,q1
2122	eor	r2,r2,r11
2123	add	r4,r4,r3,ror#6
2124	eor	r3,r5,r6
2125	eor	r0,r0,r5,ror#20
2126	add	r4,r4,r2
2127	ldr	r2,[sp,#32]
2128	and	r12,r12,r3
2129	add	r8,r8,r4
2130	vst1.32	{q8},[r1,:128]!
2131	add	r4,r4,r0,ror#2
2132	eor	r12,r12,r6
2133	vext.8	q8,q2,q3,#4
2134	add	r11,r11,r2
2135	eor	r2,r9,r10
2136	eor	r0,r8,r8,ror#5
2137	vext.8	q9,q0,q1,#4
2138	add	r4,r4,r12
2139	and	r2,r2,r8
2140	eor	r12,r0,r8,ror#19
2141	vshr.u32	q10,q8,#7
2142	eor	r0,r4,r4,ror#11
2143	eor	r2,r2,r10
2144	vadd.i32	q2,q2,q9
2145	add	r11,r11,r12,ror#6
2146	eor	r12,r4,r5
2147	vshr.u32	q9,q8,#3
2148	eor	r0,r0,r4,ror#20
2149	add	r11,r11,r2
2150	vsli.32	q10,q8,#25
2151	ldr	r2,[sp,#36]
2152	and	r3,r3,r12
2153	vshr.u32	q11,q8,#18
2154	add	r7,r7,r11
2155	add	r11,r11,r0,ror#2
2156	eor	r3,r3,r5
2157	veor	q9,q9,q10
2158	add	r10,r10,r2
2159	vsli.32	q11,q8,#14
2160	eor	r2,r8,r9
2161	eor	r0,r7,r7,ror#5
2162	vshr.u32	d24,d3,#17
2163	add	r11,r11,r3
2164	and	r2,r2,r7
2165	veor	q9,q9,q11
2166	eor	r3,r0,r7,ror#19
2167	eor	r0,r11,r11,ror#11
2168	vsli.32	d24,d3,#15
2169	eor	r2,r2,r9
2170	add	r10,r10,r3,ror#6
2171	vshr.u32	d25,d3,#10
2172	eor	r3,r11,r4
2173	eor	r0,r0,r11,ror#20
2174	vadd.i32	q2,q2,q9
2175	add	r10,r10,r2
2176	ldr	r2,[sp,#40]
2177	veor	d25,d25,d24
2178	and	r12,r12,r3
2179	add	r6,r6,r10
2180	vshr.u32	d24,d3,#19
2181	add	r10,r10,r0,ror#2
2182	eor	r12,r12,r4
2183	vsli.32	d24,d3,#13
2184	add	r9,r9,r2
2185	eor	r2,r7,r8
2186	veor	d25,d25,d24
2187	eor	r0,r6,r6,ror#5
2188	add	r10,r10,r12
2189	vadd.i32	d4,d4,d25
2190	and	r2,r2,r6
2191	eor	r12,r0,r6,ror#19
2192	vshr.u32	d24,d4,#17
2193	eor	r0,r10,r10,ror#11
2194	eor	r2,r2,r8
2195	vsli.32	d24,d4,#15
2196	add	r9,r9,r12,ror#6
2197	eor	r12,r10,r11
2198	vshr.u32	d25,d4,#10
2199	eor	r0,r0,r10,ror#20
2200	add	r9,r9,r2
2201	veor	d25,d25,d24
2202	ldr	r2,[sp,#44]
2203	and	r3,r3,r12
2204	vshr.u32	d24,d4,#19
2205	add	r5,r5,r9
2206	add	r9,r9,r0,ror#2
2207	eor	r3,r3,r11
2208	vld1.32	{q8},[r14,:128]!
2209	add	r8,r8,r2
2210	vsli.32	d24,d4,#13
2211	eor	r2,r6,r7
2212	eor	r0,r5,r5,ror#5
2213	veor	d25,d25,d24
2214	add	r9,r9,r3
2215	and	r2,r2,r5
2216	vadd.i32	d5,d5,d25
2217	eor	r3,r0,r5,ror#19
2218	eor	r0,r9,r9,ror#11
2219	vadd.i32	q8,q8,q2
2220	eor	r2,r2,r7
2221	add	r8,r8,r3,ror#6
2222	eor	r3,r9,r10
2223	eor	r0,r0,r9,ror#20
2224	add	r8,r8,r2
2225	ldr	r2,[sp,#48]
2226	and	r12,r12,r3
2227	add	r4,r4,r8
2228	vst1.32	{q8},[r1,:128]!
2229	add	r8,r8,r0,ror#2
2230	eor	r12,r12,r10
2231	vext.8	q8,q3,q0,#4
2232	add	r7,r7,r2
2233	eor	r2,r5,r6
2234	eor	r0,r4,r4,ror#5
2235	vext.8	q9,q1,q2,#4
2236	add	r8,r8,r12
2237	and	r2,r2,r4
2238	eor	r12,r0,r4,ror#19
2239	vshr.u32	q10,q8,#7
2240	eor	r0,r8,r8,ror#11
2241	eor	r2,r2,r6
2242	vadd.i32	q3,q3,q9
2243	add	r7,r7,r12,ror#6
2244	eor	r12,r8,r9
2245	vshr.u32	q9,q8,#3
2246	eor	r0,r0,r8,ror#20
2247	add	r7,r7,r2
2248	vsli.32	q10,q8,#25
2249	ldr	r2,[sp,#52]
2250	and	r3,r3,r12
2251	vshr.u32	q11,q8,#18
2252	add	r11,r11,r7
2253	add	r7,r7,r0,ror#2
2254	eor	r3,r3,r9
2255	veor	q9,q9,q10
2256	add	r6,r6,r2
2257	vsli.32	q11,q8,#14
2258	eor	r2,r4,r5
2259	eor	r0,r11,r11,ror#5
2260	vshr.u32	d24,d5,#17
2261	add	r7,r7,r3
2262	and	r2,r2,r11
2263	veor	q9,q9,q11
2264	eor	r3,r0,r11,ror#19
2265	eor	r0,r7,r7,ror#11
2266	vsli.32	d24,d5,#15
2267	eor	r2,r2,r5
2268	add	r6,r6,r3,ror#6
2269	vshr.u32	d25,d5,#10
2270	eor	r3,r7,r8
2271	eor	r0,r0,r7,ror#20
2272	vadd.i32	q3,q3,q9
2273	add	r6,r6,r2
2274	ldr	r2,[sp,#56]
2275	veor	d25,d25,d24
2276	and	r12,r12,r3
2277	add	r10,r10,r6
2278	vshr.u32	d24,d5,#19
2279	add	r6,r6,r0,ror#2
2280	eor	r12,r12,r8
2281	vsli.32	d24,d5,#13
2282	add	r5,r5,r2
2283	eor	r2,r11,r4
2284	veor	d25,d25,d24
2285	eor	r0,r10,r10,ror#5
2286	add	r6,r6,r12
2287	vadd.i32	d6,d6,d25
2288	and	r2,r2,r10
2289	eor	r12,r0,r10,ror#19
2290	vshr.u32	d24,d6,#17
2291	eor	r0,r6,r6,ror#11
2292	eor	r2,r2,r4
2293	vsli.32	d24,d6,#15
2294	add	r5,r5,r12,ror#6
2295	eor	r12,r6,r7
2296	vshr.u32	d25,d6,#10
2297	eor	r0,r0,r6,ror#20
2298	add	r5,r5,r2
2299	veor	d25,d25,d24
2300	ldr	r2,[sp,#60]
2301	and	r3,r3,r12
2302	vshr.u32	d24,d6,#19
2303	add	r9,r9,r5
2304	add	r5,r5,r0,ror#2
2305	eor	r3,r3,r7
2306	vld1.32	{q8},[r14,:128]!
2307	add	r4,r4,r2
2308	vsli.32	d24,d6,#13
2309	eor	r2,r10,r11
2310	eor	r0,r9,r9,ror#5
2311	veor	d25,d25,d24
2312	add	r5,r5,r3
2313	and	r2,r2,r9
2314	vadd.i32	d7,d7,d25
2315	eor	r3,r0,r9,ror#19
2316	eor	r0,r5,r5,ror#11
2317	vadd.i32	q8,q8,q3
2318	eor	r2,r2,r11
2319	add	r4,r4,r3,ror#6
2320	eor	r3,r5,r6
2321	eor	r0,r0,r5,ror#20
2322	add	r4,r4,r2
2323	ldr	r2,[r14]
2324	and	r12,r12,r3
2325	add	r8,r8,r4
2326	vst1.32	{q8},[r1,:128]!
2327	add	r4,r4,r0,ror#2
2328	eor	r12,r12,r6
2329	teq	r2,#0				@ check for K256 terminator
2330	ldr	r2,[sp,#0]
2331	sub	r1,r1,#64
2332	bne	.L_00_48
2333
2334	ldr	r1,[sp,#68]
2335	ldr	r0,[sp,#72]
2336	sub	r14,r14,#256	@ rewind r14
2337	teq	r1,r0
2338	it	eq
2339	subeq	r1,r1,#64		@ avoid SEGV
2340	vld1.8	{q0},[r1]!		@ load next input block
2341	vld1.8	{q1},[r1]!
2342	vld1.8	{q2},[r1]!
2343	vld1.8	{q3},[r1]!
2344	it	ne
2345	strne	r1,[sp,#68]
2346	mov	r1,sp
2347	add	r11,r11,r2
2348	eor	r2,r9,r10
2349	eor	r0,r8,r8,ror#5
2350	add	r4,r4,r12
2351	vld1.32	{q8},[r14,:128]!
2352	and	r2,r2,r8
2353	eor	r12,r0,r8,ror#19
2354	eor	r0,r4,r4,ror#11
2355	eor	r2,r2,r10
2356	vrev32.8	q0,q0
2357	add	r11,r11,r12,ror#6
2358	eor	r12,r4,r5
2359	eor	r0,r0,r4,ror#20
2360	add	r11,r11,r2
2361	vadd.i32	q8,q8,q0
2362	ldr	r2,[sp,#4]
2363	and	r3,r3,r12
2364	add	r7,r7,r11
2365	add	r11,r11,r0,ror#2
2366	eor	r3,r3,r5
2367	add	r10,r10,r2
2368	eor	r2,r8,r9
2369	eor	r0,r7,r7,ror#5
2370	add	r11,r11,r3
2371	and	r2,r2,r7
2372	eor	r3,r0,r7,ror#19
2373	eor	r0,r11,r11,ror#11
2374	eor	r2,r2,r9
2375	add	r10,r10,r3,ror#6
2376	eor	r3,r11,r4
2377	eor	r0,r0,r11,ror#20
2378	add	r10,r10,r2
2379	ldr	r2,[sp,#8]
2380	and	r12,r12,r3
2381	add	r6,r6,r10
2382	add	r10,r10,r0,ror#2
2383	eor	r12,r12,r4
2384	add	r9,r9,r2
2385	eor	r2,r7,r8
2386	eor	r0,r6,r6,ror#5
2387	add	r10,r10,r12
2388	and	r2,r2,r6
2389	eor	r12,r0,r6,ror#19
2390	eor	r0,r10,r10,ror#11
2391	eor	r2,r2,r8
2392	add	r9,r9,r12,ror#6
2393	eor	r12,r10,r11
2394	eor	r0,r0,r10,ror#20
2395	add	r9,r9,r2
2396	ldr	r2,[sp,#12]
2397	and	r3,r3,r12
2398	add	r5,r5,r9
2399	add	r9,r9,r0,ror#2
2400	eor	r3,r3,r11
2401	add	r8,r8,r2
2402	eor	r2,r6,r7
2403	eor	r0,r5,r5,ror#5
2404	add	r9,r9,r3
2405	and	r2,r2,r5
2406	eor	r3,r0,r5,ror#19
2407	eor	r0,r9,r9,ror#11
2408	eor	r2,r2,r7
2409	add	r8,r8,r3,ror#6
2410	eor	r3,r9,r10
2411	eor	r0,r0,r9,ror#20
2412	add	r8,r8,r2
2413	ldr	r2,[sp,#16]
2414	and	r12,r12,r3
2415	add	r4,r4,r8
2416	add	r8,r8,r0,ror#2
2417	eor	r12,r12,r10
2418	vst1.32	{q8},[r1,:128]!
2419	add	r7,r7,r2
2420	eor	r2,r5,r6
2421	eor	r0,r4,r4,ror#5
2422	add	r8,r8,r12
2423	vld1.32	{q8},[r14,:128]!
2424	and	r2,r2,r4
2425	eor	r12,r0,r4,ror#19
2426	eor	r0,r8,r8,ror#11
2427	eor	r2,r2,r6
2428	vrev32.8	q1,q1
2429	add	r7,r7,r12,ror#6
2430	eor	r12,r8,r9
2431	eor	r0,r0,r8,ror#20
2432	add	r7,r7,r2
2433	vadd.i32	q8,q8,q1
2434	ldr	r2,[sp,#20]
2435	and	r3,r3,r12
2436	add	r11,r11,r7
2437	add	r7,r7,r0,ror#2
2438	eor	r3,r3,r9
2439	add	r6,r6,r2
2440	eor	r2,r4,r5
2441	eor	r0,r11,r11,ror#5
2442	add	r7,r7,r3
2443	and	r2,r2,r11
2444	eor	r3,r0,r11,ror#19
2445	eor	r0,r7,r7,ror#11
2446	eor	r2,r2,r5
2447	add	r6,r6,r3,ror#6
2448	eor	r3,r7,r8
2449	eor	r0,r0,r7,ror#20
2450	add	r6,r6,r2
2451	ldr	r2,[sp,#24]
2452	and	r12,r12,r3
2453	add	r10,r10,r6
2454	add	r6,r6,r0,ror#2
2455	eor	r12,r12,r8
2456	add	r5,r5,r2
2457	eor	r2,r11,r4
2458	eor	r0,r10,r10,ror#5
2459	add	r6,r6,r12
2460	and	r2,r2,r10
2461	eor	r12,r0,r10,ror#19
2462	eor	r0,r6,r6,ror#11
2463	eor	r2,r2,r4
2464	add	r5,r5,r12,ror#6
2465	eor	r12,r6,r7
2466	eor	r0,r0,r6,ror#20
2467	add	r5,r5,r2
2468	ldr	r2,[sp,#28]
2469	and	r3,r3,r12
2470	add	r9,r9,r5
2471	add	r5,r5,r0,ror#2
2472	eor	r3,r3,r7
2473	add	r4,r4,r2
2474	eor	r2,r10,r11
2475	eor	r0,r9,r9,ror#5
2476	add	r5,r5,r3
2477	and	r2,r2,r9
2478	eor	r3,r0,r9,ror#19
2479	eor	r0,r5,r5,ror#11
2480	eor	r2,r2,r11
2481	add	r4,r4,r3,ror#6
2482	eor	r3,r5,r6
2483	eor	r0,r0,r5,ror#20
2484	add	r4,r4,r2
2485	ldr	r2,[sp,#32]
2486	and	r12,r12,r3
2487	add	r8,r8,r4
2488	add	r4,r4,r0,ror#2
2489	eor	r12,r12,r6
2490	vst1.32	{q8},[r1,:128]!
2491	add	r11,r11,r2
2492	eor	r2,r9,r10
2493	eor	r0,r8,r8,ror#5
2494	add	r4,r4,r12
2495	vld1.32	{q8},[r14,:128]!
2496	and	r2,r2,r8
2497	eor	r12,r0,r8,ror#19
2498	eor	r0,r4,r4,ror#11
2499	eor	r2,r2,r10
2500	vrev32.8	q2,q2
2501	add	r11,r11,r12,ror#6
2502	eor	r12,r4,r5
2503	eor	r0,r0,r4,ror#20
2504	add	r11,r11,r2
2505	vadd.i32	q8,q8,q2
2506	ldr	r2,[sp,#36]
2507	and	r3,r3,r12
2508	add	r7,r7,r11
2509	add	r11,r11,r0,ror#2
2510	eor	r3,r3,r5
2511	add	r10,r10,r2
2512	eor	r2,r8,r9
2513	eor	r0,r7,r7,ror#5
2514	add	r11,r11,r3
2515	and	r2,r2,r7
2516	eor	r3,r0,r7,ror#19
2517	eor	r0,r11,r11,ror#11
2518	eor	r2,r2,r9
2519	add	r10,r10,r3,ror#6
2520	eor	r3,r11,r4
2521	eor	r0,r0,r11,ror#20
2522	add	r10,r10,r2
2523	ldr	r2,[sp,#40]
2524	and	r12,r12,r3
2525	add	r6,r6,r10
2526	add	r10,r10,r0,ror#2
2527	eor	r12,r12,r4
2528	add	r9,r9,r2
2529	eor	r2,r7,r8
2530	eor	r0,r6,r6,ror#5
2531	add	r10,r10,r12
2532	and	r2,r2,r6
2533	eor	r12,r0,r6,ror#19
2534	eor	r0,r10,r10,ror#11
2535	eor	r2,r2,r8
2536	add	r9,r9,r12,ror#6
2537	eor	r12,r10,r11
2538	eor	r0,r0,r10,ror#20
2539	add	r9,r9,r2
2540	ldr	r2,[sp,#44]
2541	and	r3,r3,r12
2542	add	r5,r5,r9
2543	add	r9,r9,r0,ror#2
2544	eor	r3,r3,r11
2545	add	r8,r8,r2
2546	eor	r2,r6,r7
2547	eor	r0,r5,r5,ror#5
2548	add	r9,r9,r3
2549	and	r2,r2,r5
2550	eor	r3,r0,r5,ror#19
2551	eor	r0,r9,r9,ror#11
2552	eor	r2,r2,r7
2553	add	r8,r8,r3,ror#6
2554	eor	r3,r9,r10
2555	eor	r0,r0,r9,ror#20
2556	add	r8,r8,r2
2557	ldr	r2,[sp,#48]
2558	and	r12,r12,r3
2559	add	r4,r4,r8
2560	add	r8,r8,r0,ror#2
2561	eor	r12,r12,r10
2562	vst1.32	{q8},[r1,:128]!
2563	add	r7,r7,r2
2564	eor	r2,r5,r6
2565	eor	r0,r4,r4,ror#5
2566	add	r8,r8,r12
2567	vld1.32	{q8},[r14,:128]!
2568	and	r2,r2,r4
2569	eor	r12,r0,r4,ror#19
2570	eor	r0,r8,r8,ror#11
2571	eor	r2,r2,r6
2572	vrev32.8	q3,q3
2573	add	r7,r7,r12,ror#6
2574	eor	r12,r8,r9
2575	eor	r0,r0,r8,ror#20
2576	add	r7,r7,r2
2577	vadd.i32	q8,q8,q3
2578	ldr	r2,[sp,#52]
2579	and	r3,r3,r12
2580	add	r11,r11,r7
2581	add	r7,r7,r0,ror#2
2582	eor	r3,r3,r9
2583	add	r6,r6,r2
2584	eor	r2,r4,r5
2585	eor	r0,r11,r11,ror#5
2586	add	r7,r7,r3
2587	and	r2,r2,r11
2588	eor	r3,r0,r11,ror#19
2589	eor	r0,r7,r7,ror#11
2590	eor	r2,r2,r5
2591	add	r6,r6,r3,ror#6
2592	eor	r3,r7,r8
2593	eor	r0,r0,r7,ror#20
2594	add	r6,r6,r2
2595	ldr	r2,[sp,#56]
2596	and	r12,r12,r3
2597	add	r10,r10,r6
2598	add	r6,r6,r0,ror#2
2599	eor	r12,r12,r8
2600	add	r5,r5,r2
2601	eor	r2,r11,r4
2602	eor	r0,r10,r10,ror#5
2603	add	r6,r6,r12
2604	and	r2,r2,r10
2605	eor	r12,r0,r10,ror#19
2606	eor	r0,r6,r6,ror#11
2607	eor	r2,r2,r4
2608	add	r5,r5,r12,ror#6
2609	eor	r12,r6,r7
2610	eor	r0,r0,r6,ror#20
2611	add	r5,r5,r2
2612	ldr	r2,[sp,#60]
2613	and	r3,r3,r12
2614	add	r9,r9,r5
2615	add	r5,r5,r0,ror#2
2616	eor	r3,r3,r7
2617	add	r4,r4,r2
2618	eor	r2,r10,r11
2619	eor	r0,r9,r9,ror#5
2620	add	r5,r5,r3
2621	and	r2,r2,r9
2622	eor	r3,r0,r9,ror#19
2623	eor	r0,r5,r5,ror#11
2624	eor	r2,r2,r11
2625	add	r4,r4,r3,ror#6
2626	eor	r3,r5,r6
2627	eor	r0,r0,r5,ror#20
2628	add	r4,r4,r2
2629	ldr	r2,[sp,#64]
2630	and	r12,r12,r3
2631	add	r8,r8,r4
2632	add	r4,r4,r0,ror#2
2633	eor	r12,r12,r6
2634	vst1.32	{q8},[r1,:128]!
2635	ldr	r0,[r2,#0]
2636	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2637	ldr	r12,[r2,#4]
2638	ldr	r3,[r2,#8]
2639	ldr	r1,[r2,#12]
2640	add	r4,r4,r0			@ accumulate
2641	ldr	r0,[r2,#16]
2642	add	r5,r5,r12
2643	ldr	r12,[r2,#20]
2644	add	r6,r6,r3
2645	ldr	r3,[r2,#24]
2646	add	r7,r7,r1
2647	ldr	r1,[r2,#28]
2648	add	r8,r8,r0
2649	str	r4,[r2],#4
2650	add	r9,r9,r12
2651	str	r5,[r2],#4
2652	add	r10,r10,r3
2653	str	r6,[r2],#4
2654	add	r11,r11,r1
2655	str	r7,[r2],#4
2656	stmia	r2,{r8,r9,r10,r11}
2657
2658	ittte	ne
2659	movne	r1,sp
2660	ldrne	r2,[sp,#0]
2661	eorne	r12,r12,r12
2662	ldreq	sp,[sp,#76]			@ restore original sp
2663	itt	ne
2664	eorne	r3,r5,r6
2665	bne	.L_00_48
2666
2667	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2668.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2669#endif
2670#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2671
2672# if defined(__thumb2__)
2673#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2674# else
2675#  define INST(a,b,c,d)	.byte	a,b,c,d
2676# endif
2677
2678.type	sha256_block_data_order_armv8,%function
2679.align	5
2680sha256_block_data_order_armv8:
2681.LARMv8:
2682	vld1.32	{q0,q1},[r0]
2683	sub	r3,r3,#256+32
2684	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2685	b	.Loop_v8
2686
2687.align	4
2688.Loop_v8:
2689	vld1.8	{q8,q9},[r1]!
2690	vld1.8	{q10,q11},[r1]!
2691	vld1.32	{q12},[r3]!
2692	vrev32.8	q8,q8
2693	vrev32.8	q9,q9
2694	vrev32.8	q10,q10
2695	vrev32.8	q11,q11
2696	vmov	q14,q0	@ offload
2697	vmov	q15,q1
2698	teq	r1,r2
2699	vld1.32	{q13},[r3]!
2700	vadd.i32	q12,q12,q8
2701	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2702	vmov	q2,q0
2703	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2704	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2705	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2706	vld1.32	{q12},[r3]!
2707	vadd.i32	q13,q13,q9
2708	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2709	vmov	q2,q0
2710	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2711	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2712	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2713	vld1.32	{q13},[r3]!
2714	vadd.i32	q12,q12,q10
2715	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2716	vmov	q2,q0
2717	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2718	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2719	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2720	vld1.32	{q12},[r3]!
2721	vadd.i32	q13,q13,q11
2722	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2723	vmov	q2,q0
2724	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2725	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2726	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2727	vld1.32	{q13},[r3]!
2728	vadd.i32	q12,q12,q8
2729	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2730	vmov	q2,q0
2731	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2732	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2733	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2734	vld1.32	{q12},[r3]!
2735	vadd.i32	q13,q13,q9
2736	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2737	vmov	q2,q0
2738	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2739	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2740	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2741	vld1.32	{q13},[r3]!
2742	vadd.i32	q12,q12,q10
2743	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2744	vmov	q2,q0
2745	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2746	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2747	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2748	vld1.32	{q12},[r3]!
2749	vadd.i32	q13,q13,q11
2750	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2751	vmov	q2,q0
2752	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2753	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2754	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2755	vld1.32	{q13},[r3]!
2756	vadd.i32	q12,q12,q8
2757	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2758	vmov	q2,q0
2759	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2760	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2761	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2762	vld1.32	{q12},[r3]!
2763	vadd.i32	q13,q13,q9
2764	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2765	vmov	q2,q0
2766	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2767	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2768	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2769	vld1.32	{q13},[r3]!
2770	vadd.i32	q12,q12,q10
2771	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2772	vmov	q2,q0
2773	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2774	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2775	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2776	vld1.32	{q12},[r3]!
2777	vadd.i32	q13,q13,q11
2778	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2779	vmov	q2,q0
2780	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2781	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2782	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2783	vld1.32	{q13},[r3]!
2784	vadd.i32	q12,q12,q8
2785	vmov	q2,q0
2786	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2787	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2788
2789	vld1.32	{q12},[r3]!
2790	vadd.i32	q13,q13,q9
2791	vmov	q2,q0
2792	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2793	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2794
2795	vld1.32	{q13},[r3]
2796	vadd.i32	q12,q12,q10
2797	sub	r3,r3,#256-16	@ rewind
2798	vmov	q2,q0
2799	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2800	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2801
2802	vadd.i32	q13,q13,q11
2803	vmov	q2,q0
2804	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2805	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2806
2807	vadd.i32	q0,q0,q14
2808	vadd.i32	q1,q1,q15
2809	it	ne
2810	bne	.Loop_v8
2811
2812	vst1.32	{q0,q1},[r0]
2813
2814	bx	lr		@ bx lr
2815.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2816#endif
2817.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2818.align	2
2819.align	2
2820#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2821.comm	OPENSSL_armcap_P,4,4
2822#endif
2823