• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#if defined(__aarch64__)
2// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
3//
4// Licensed under the OpenSSL license (the "License").  You may not use
5// this file except in compliance with the License.  You can obtain a copy
6// in the file LICENSE in the source distribution or at
7// https://www.openssl.org/source/license.html
8
9// ====================================================================
10// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11// project. The module is, however, dual licensed under OpenSSL and
12// CRYPTOGAMS licenses depending on where you obtain it. For further
13// details see http://www.openssl.org/~appro/cryptogams/.
14//
15// Permission to use under GPLv2 terms is granted.
16// ====================================================================
17//
18// SHA256/512 for ARMv8.
19//
20// Performance in cycles per processed byte and improvement coefficient
21// over code generated with "default" compiler:
22//
23//		SHA256-hw	SHA256(*)	SHA512
24// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
25// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
26// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
27// Denver	2.01		10.5 (+26%)	6.70 (+8%)
28// X-Gene			20.0 (+100%)	12.8 (+300%(***))
29// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
30//
31// (*)	Software SHA256 results are of lesser relevance, presented
32//	mostly for informational purposes.
33// (**)	The result is a trade-off: it's possible to improve it by
34//	10% (or by 1 cycle per round), but at the cost of 20% loss
35//	on Cortex-A53 (or by 4 cycles per round).
36// (***)	Super-impressive coefficients over gcc-generated code are
37//	indication of some compiler "pathology", most notably code
38//	generated with -mgeneral-regs-only is significanty faster
39//	and the gap is only 40-90%.
40
41#ifndef	__KERNEL__
42# include <openssl/arm_arch.h>
43#endif
44
45.text
46
47
48.globl	sha512_block_data_order
49.hidden	sha512_block_data_order
50.type	sha512_block_data_order,%function
51.align	6
52sha512_block_data_order:
53	stp	x29,x30,[sp,#-128]!
54	add	x29,sp,#0
55
56	stp	x19,x20,[sp,#16]
57	stp	x21,x22,[sp,#32]
58	stp	x23,x24,[sp,#48]
59	stp	x25,x26,[sp,#64]
60	stp	x27,x28,[sp,#80]
61	sub	sp,sp,#4*8
62
63	ldp	x20,x21,[x0]				// load context
64	ldp	x22,x23,[x0,#2*8]
65	ldp	x24,x25,[x0,#4*8]
66	add	x2,x1,x2,lsl#7	// end of input
67	ldp	x26,x27,[x0,#6*8]
68	adr	x30,.LK512
69	stp	x0,x2,[x29,#96]
70
71.Loop:
72	ldp	x3,x4,[x1],#2*8
73	ldr	x19,[x30],#8			// *K++
74	eor	x28,x21,x22				// magic seed
75	str	x1,[x29,#112]
76#ifndef	__ARMEB__
77	rev	x3,x3			// 0
78#endif
79	ror	x16,x24,#14
80	add	x27,x27,x19			// h+=K[i]
81	eor	x6,x24,x24,ror#23
82	and	x17,x25,x24
83	bic	x19,x26,x24
84	add	x27,x27,x3			// h+=X[i]
85	orr	x17,x17,x19			// Ch(e,f,g)
86	eor	x19,x20,x21			// a^b, b^c in next round
87	eor	x16,x16,x6,ror#18	// Sigma1(e)
88	ror	x6,x20,#28
89	add	x27,x27,x17			// h+=Ch(e,f,g)
90	eor	x17,x20,x20,ror#5
91	add	x27,x27,x16			// h+=Sigma1(e)
92	and	x28,x28,x19			// (b^c)&=(a^b)
93	add	x23,x23,x27			// d+=h
94	eor	x28,x28,x21			// Maj(a,b,c)
95	eor	x17,x6,x17,ror#34	// Sigma0(a)
96	add	x27,x27,x28			// h+=Maj(a,b,c)
97	ldr	x28,[x30],#8		// *K++, x19 in next round
98	//add	x27,x27,x17			// h+=Sigma0(a)
99#ifndef	__ARMEB__
100	rev	x4,x4			// 1
101#endif
102	ldp	x5,x6,[x1],#2*8
103	add	x27,x27,x17			// h+=Sigma0(a)
104	ror	x16,x23,#14
105	add	x26,x26,x28			// h+=K[i]
106	eor	x7,x23,x23,ror#23
107	and	x17,x24,x23
108	bic	x28,x25,x23
109	add	x26,x26,x4			// h+=X[i]
110	orr	x17,x17,x28			// Ch(e,f,g)
111	eor	x28,x27,x20			// a^b, b^c in next round
112	eor	x16,x16,x7,ror#18	// Sigma1(e)
113	ror	x7,x27,#28
114	add	x26,x26,x17			// h+=Ch(e,f,g)
115	eor	x17,x27,x27,ror#5
116	add	x26,x26,x16			// h+=Sigma1(e)
117	and	x19,x19,x28			// (b^c)&=(a^b)
118	add	x22,x22,x26			// d+=h
119	eor	x19,x19,x20			// Maj(a,b,c)
120	eor	x17,x7,x17,ror#34	// Sigma0(a)
121	add	x26,x26,x19			// h+=Maj(a,b,c)
122	ldr	x19,[x30],#8		// *K++, x28 in next round
123	//add	x26,x26,x17			// h+=Sigma0(a)
124#ifndef	__ARMEB__
125	rev	x5,x5			// 2
126#endif
127	add	x26,x26,x17			// h+=Sigma0(a)
128	ror	x16,x22,#14
129	add	x25,x25,x19			// h+=K[i]
130	eor	x8,x22,x22,ror#23
131	and	x17,x23,x22
132	bic	x19,x24,x22
133	add	x25,x25,x5			// h+=X[i]
134	orr	x17,x17,x19			// Ch(e,f,g)
135	eor	x19,x26,x27			// a^b, b^c in next round
136	eor	x16,x16,x8,ror#18	// Sigma1(e)
137	ror	x8,x26,#28
138	add	x25,x25,x17			// h+=Ch(e,f,g)
139	eor	x17,x26,x26,ror#5
140	add	x25,x25,x16			// h+=Sigma1(e)
141	and	x28,x28,x19			// (b^c)&=(a^b)
142	add	x21,x21,x25			// d+=h
143	eor	x28,x28,x27			// Maj(a,b,c)
144	eor	x17,x8,x17,ror#34	// Sigma0(a)
145	add	x25,x25,x28			// h+=Maj(a,b,c)
146	ldr	x28,[x30],#8		// *K++, x19 in next round
147	//add	x25,x25,x17			// h+=Sigma0(a)
148#ifndef	__ARMEB__
149	rev	x6,x6			// 3
150#endif
151	ldp	x7,x8,[x1],#2*8
152	add	x25,x25,x17			// h+=Sigma0(a)
153	ror	x16,x21,#14
154	add	x24,x24,x28			// h+=K[i]
155	eor	x9,x21,x21,ror#23
156	and	x17,x22,x21
157	bic	x28,x23,x21
158	add	x24,x24,x6			// h+=X[i]
159	orr	x17,x17,x28			// Ch(e,f,g)
160	eor	x28,x25,x26			// a^b, b^c in next round
161	eor	x16,x16,x9,ror#18	// Sigma1(e)
162	ror	x9,x25,#28
163	add	x24,x24,x17			// h+=Ch(e,f,g)
164	eor	x17,x25,x25,ror#5
165	add	x24,x24,x16			// h+=Sigma1(e)
166	and	x19,x19,x28			// (b^c)&=(a^b)
167	add	x20,x20,x24			// d+=h
168	eor	x19,x19,x26			// Maj(a,b,c)
169	eor	x17,x9,x17,ror#34	// Sigma0(a)
170	add	x24,x24,x19			// h+=Maj(a,b,c)
171	ldr	x19,[x30],#8		// *K++, x28 in next round
172	//add	x24,x24,x17			// h+=Sigma0(a)
173#ifndef	__ARMEB__
174	rev	x7,x7			// 4
175#endif
176	add	x24,x24,x17			// h+=Sigma0(a)
177	ror	x16,x20,#14
178	add	x23,x23,x19			// h+=K[i]
179	eor	x10,x20,x20,ror#23
180	and	x17,x21,x20
181	bic	x19,x22,x20
182	add	x23,x23,x7			// h+=X[i]
183	orr	x17,x17,x19			// Ch(e,f,g)
184	eor	x19,x24,x25			// a^b, b^c in next round
185	eor	x16,x16,x10,ror#18	// Sigma1(e)
186	ror	x10,x24,#28
187	add	x23,x23,x17			// h+=Ch(e,f,g)
188	eor	x17,x24,x24,ror#5
189	add	x23,x23,x16			// h+=Sigma1(e)
190	and	x28,x28,x19			// (b^c)&=(a^b)
191	add	x27,x27,x23			// d+=h
192	eor	x28,x28,x25			// Maj(a,b,c)
193	eor	x17,x10,x17,ror#34	// Sigma0(a)
194	add	x23,x23,x28			// h+=Maj(a,b,c)
195	ldr	x28,[x30],#8		// *K++, x19 in next round
196	//add	x23,x23,x17			// h+=Sigma0(a)
197#ifndef	__ARMEB__
198	rev	x8,x8			// 5
199#endif
200	ldp	x9,x10,[x1],#2*8
201	add	x23,x23,x17			// h+=Sigma0(a)
202	ror	x16,x27,#14
203	add	x22,x22,x28			// h+=K[i]
204	eor	x11,x27,x27,ror#23
205	and	x17,x20,x27
206	bic	x28,x21,x27
207	add	x22,x22,x8			// h+=X[i]
208	orr	x17,x17,x28			// Ch(e,f,g)
209	eor	x28,x23,x24			// a^b, b^c in next round
210	eor	x16,x16,x11,ror#18	// Sigma1(e)
211	ror	x11,x23,#28
212	add	x22,x22,x17			// h+=Ch(e,f,g)
213	eor	x17,x23,x23,ror#5
214	add	x22,x22,x16			// h+=Sigma1(e)
215	and	x19,x19,x28			// (b^c)&=(a^b)
216	add	x26,x26,x22			// d+=h
217	eor	x19,x19,x24			// Maj(a,b,c)
218	eor	x17,x11,x17,ror#34	// Sigma0(a)
219	add	x22,x22,x19			// h+=Maj(a,b,c)
220	ldr	x19,[x30],#8		// *K++, x28 in next round
221	//add	x22,x22,x17			// h+=Sigma0(a)
222#ifndef	__ARMEB__
223	rev	x9,x9			// 6
224#endif
225	add	x22,x22,x17			// h+=Sigma0(a)
226	ror	x16,x26,#14
227	add	x21,x21,x19			// h+=K[i]
228	eor	x12,x26,x26,ror#23
229	and	x17,x27,x26
230	bic	x19,x20,x26
231	add	x21,x21,x9			// h+=X[i]
232	orr	x17,x17,x19			// Ch(e,f,g)
233	eor	x19,x22,x23			// a^b, b^c in next round
234	eor	x16,x16,x12,ror#18	// Sigma1(e)
235	ror	x12,x22,#28
236	add	x21,x21,x17			// h+=Ch(e,f,g)
237	eor	x17,x22,x22,ror#5
238	add	x21,x21,x16			// h+=Sigma1(e)
239	and	x28,x28,x19			// (b^c)&=(a^b)
240	add	x25,x25,x21			// d+=h
241	eor	x28,x28,x23			// Maj(a,b,c)
242	eor	x17,x12,x17,ror#34	// Sigma0(a)
243	add	x21,x21,x28			// h+=Maj(a,b,c)
244	ldr	x28,[x30],#8		// *K++, x19 in next round
245	//add	x21,x21,x17			// h+=Sigma0(a)
246#ifndef	__ARMEB__
247	rev	x10,x10			// 7
248#endif
249	ldp	x11,x12,[x1],#2*8
250	add	x21,x21,x17			// h+=Sigma0(a)
251	ror	x16,x25,#14
252	add	x20,x20,x28			// h+=K[i]
253	eor	x13,x25,x25,ror#23
254	and	x17,x26,x25
255	bic	x28,x27,x25
256	add	x20,x20,x10			// h+=X[i]
257	orr	x17,x17,x28			// Ch(e,f,g)
258	eor	x28,x21,x22			// a^b, b^c in next round
259	eor	x16,x16,x13,ror#18	// Sigma1(e)
260	ror	x13,x21,#28
261	add	x20,x20,x17			// h+=Ch(e,f,g)
262	eor	x17,x21,x21,ror#5
263	add	x20,x20,x16			// h+=Sigma1(e)
264	and	x19,x19,x28			// (b^c)&=(a^b)
265	add	x24,x24,x20			// d+=h
266	eor	x19,x19,x22			// Maj(a,b,c)
267	eor	x17,x13,x17,ror#34	// Sigma0(a)
268	add	x20,x20,x19			// h+=Maj(a,b,c)
269	ldr	x19,[x30],#8		// *K++, x28 in next round
270	//add	x20,x20,x17			// h+=Sigma0(a)
271#ifndef	__ARMEB__
272	rev	x11,x11			// 8
273#endif
274	add	x20,x20,x17			// h+=Sigma0(a)
275	ror	x16,x24,#14
276	add	x27,x27,x19			// h+=K[i]
277	eor	x14,x24,x24,ror#23
278	and	x17,x25,x24
279	bic	x19,x26,x24
280	add	x27,x27,x11			// h+=X[i]
281	orr	x17,x17,x19			// Ch(e,f,g)
282	eor	x19,x20,x21			// a^b, b^c in next round
283	eor	x16,x16,x14,ror#18	// Sigma1(e)
284	ror	x14,x20,#28
285	add	x27,x27,x17			// h+=Ch(e,f,g)
286	eor	x17,x20,x20,ror#5
287	add	x27,x27,x16			// h+=Sigma1(e)
288	and	x28,x28,x19			// (b^c)&=(a^b)
289	add	x23,x23,x27			// d+=h
290	eor	x28,x28,x21			// Maj(a,b,c)
291	eor	x17,x14,x17,ror#34	// Sigma0(a)
292	add	x27,x27,x28			// h+=Maj(a,b,c)
293	ldr	x28,[x30],#8		// *K++, x19 in next round
294	//add	x27,x27,x17			// h+=Sigma0(a)
295#ifndef	__ARMEB__
296	rev	x12,x12			// 9
297#endif
298	ldp	x13,x14,[x1],#2*8
299	add	x27,x27,x17			// h+=Sigma0(a)
300	ror	x16,x23,#14
301	add	x26,x26,x28			// h+=K[i]
302	eor	x15,x23,x23,ror#23
303	and	x17,x24,x23
304	bic	x28,x25,x23
305	add	x26,x26,x12			// h+=X[i]
306	orr	x17,x17,x28			// Ch(e,f,g)
307	eor	x28,x27,x20			// a^b, b^c in next round
308	eor	x16,x16,x15,ror#18	// Sigma1(e)
309	ror	x15,x27,#28
310	add	x26,x26,x17			// h+=Ch(e,f,g)
311	eor	x17,x27,x27,ror#5
312	add	x26,x26,x16			// h+=Sigma1(e)
313	and	x19,x19,x28			// (b^c)&=(a^b)
314	add	x22,x22,x26			// d+=h
315	eor	x19,x19,x20			// Maj(a,b,c)
316	eor	x17,x15,x17,ror#34	// Sigma0(a)
317	add	x26,x26,x19			// h+=Maj(a,b,c)
318	ldr	x19,[x30],#8		// *K++, x28 in next round
319	//add	x26,x26,x17			// h+=Sigma0(a)
320#ifndef	__ARMEB__
321	rev	x13,x13			// 10
322#endif
323	add	x26,x26,x17			// h+=Sigma0(a)
324	ror	x16,x22,#14
325	add	x25,x25,x19			// h+=K[i]
326	eor	x0,x22,x22,ror#23
327	and	x17,x23,x22
328	bic	x19,x24,x22
329	add	x25,x25,x13			// h+=X[i]
330	orr	x17,x17,x19			// Ch(e,f,g)
331	eor	x19,x26,x27			// a^b, b^c in next round
332	eor	x16,x16,x0,ror#18	// Sigma1(e)
333	ror	x0,x26,#28
334	add	x25,x25,x17			// h+=Ch(e,f,g)
335	eor	x17,x26,x26,ror#5
336	add	x25,x25,x16			// h+=Sigma1(e)
337	and	x28,x28,x19			// (b^c)&=(a^b)
338	add	x21,x21,x25			// d+=h
339	eor	x28,x28,x27			// Maj(a,b,c)
340	eor	x17,x0,x17,ror#34	// Sigma0(a)
341	add	x25,x25,x28			// h+=Maj(a,b,c)
342	ldr	x28,[x30],#8		// *K++, x19 in next round
343	//add	x25,x25,x17			// h+=Sigma0(a)
344#ifndef	__ARMEB__
345	rev	x14,x14			// 11
346#endif
347	ldp	x15,x0,[x1],#2*8
348	add	x25,x25,x17			// h+=Sigma0(a)
349	str	x6,[sp,#24]
350	ror	x16,x21,#14
351	add	x24,x24,x28			// h+=K[i]
352	eor	x6,x21,x21,ror#23
353	and	x17,x22,x21
354	bic	x28,x23,x21
355	add	x24,x24,x14			// h+=X[i]
356	orr	x17,x17,x28			// Ch(e,f,g)
357	eor	x28,x25,x26			// a^b, b^c in next round
358	eor	x16,x16,x6,ror#18	// Sigma1(e)
359	ror	x6,x25,#28
360	add	x24,x24,x17			// h+=Ch(e,f,g)
361	eor	x17,x25,x25,ror#5
362	add	x24,x24,x16			// h+=Sigma1(e)
363	and	x19,x19,x28			// (b^c)&=(a^b)
364	add	x20,x20,x24			// d+=h
365	eor	x19,x19,x26			// Maj(a,b,c)
366	eor	x17,x6,x17,ror#34	// Sigma0(a)
367	add	x24,x24,x19			// h+=Maj(a,b,c)
368	ldr	x19,[x30],#8		// *K++, x28 in next round
369	//add	x24,x24,x17			// h+=Sigma0(a)
370#ifndef	__ARMEB__
371	rev	x15,x15			// 12
372#endif
373	add	x24,x24,x17			// h+=Sigma0(a)
374	str	x7,[sp,#0]
375	ror	x16,x20,#14
376	add	x23,x23,x19			// h+=K[i]
377	eor	x7,x20,x20,ror#23
378	and	x17,x21,x20
379	bic	x19,x22,x20
380	add	x23,x23,x15			// h+=X[i]
381	orr	x17,x17,x19			// Ch(e,f,g)
382	eor	x19,x24,x25			// a^b, b^c in next round
383	eor	x16,x16,x7,ror#18	// Sigma1(e)
384	ror	x7,x24,#28
385	add	x23,x23,x17			// h+=Ch(e,f,g)
386	eor	x17,x24,x24,ror#5
387	add	x23,x23,x16			// h+=Sigma1(e)
388	and	x28,x28,x19			// (b^c)&=(a^b)
389	add	x27,x27,x23			// d+=h
390	eor	x28,x28,x25			// Maj(a,b,c)
391	eor	x17,x7,x17,ror#34	// Sigma0(a)
392	add	x23,x23,x28			// h+=Maj(a,b,c)
393	ldr	x28,[x30],#8		// *K++, x19 in next round
394	//add	x23,x23,x17			// h+=Sigma0(a)
395#ifndef	__ARMEB__
396	rev	x0,x0			// 13
397#endif
398	ldp	x1,x2,[x1]
399	add	x23,x23,x17			// h+=Sigma0(a)
400	str	x8,[sp,#8]
401	ror	x16,x27,#14
402	add	x22,x22,x28			// h+=K[i]
403	eor	x8,x27,x27,ror#23
404	and	x17,x20,x27
405	bic	x28,x21,x27
406	add	x22,x22,x0			// h+=X[i]
407	orr	x17,x17,x28			// Ch(e,f,g)
408	eor	x28,x23,x24			// a^b, b^c in next round
409	eor	x16,x16,x8,ror#18	// Sigma1(e)
410	ror	x8,x23,#28
411	add	x22,x22,x17			// h+=Ch(e,f,g)
412	eor	x17,x23,x23,ror#5
413	add	x22,x22,x16			// h+=Sigma1(e)
414	and	x19,x19,x28			// (b^c)&=(a^b)
415	add	x26,x26,x22			// d+=h
416	eor	x19,x19,x24			// Maj(a,b,c)
417	eor	x17,x8,x17,ror#34	// Sigma0(a)
418	add	x22,x22,x19			// h+=Maj(a,b,c)
419	ldr	x19,[x30],#8		// *K++, x28 in next round
420	//add	x22,x22,x17			// h+=Sigma0(a)
421#ifndef	__ARMEB__
422	rev	x1,x1			// 14
423#endif
424	ldr	x6,[sp,#24]
425	add	x22,x22,x17			// h+=Sigma0(a)
426	str	x9,[sp,#16]
427	ror	x16,x26,#14
428	add	x21,x21,x19			// h+=K[i]
429	eor	x9,x26,x26,ror#23
430	and	x17,x27,x26
431	bic	x19,x20,x26
432	add	x21,x21,x1			// h+=X[i]
433	orr	x17,x17,x19			// Ch(e,f,g)
434	eor	x19,x22,x23			// a^b, b^c in next round
435	eor	x16,x16,x9,ror#18	// Sigma1(e)
436	ror	x9,x22,#28
437	add	x21,x21,x17			// h+=Ch(e,f,g)
438	eor	x17,x22,x22,ror#5
439	add	x21,x21,x16			// h+=Sigma1(e)
440	and	x28,x28,x19			// (b^c)&=(a^b)
441	add	x25,x25,x21			// d+=h
442	eor	x28,x28,x23			// Maj(a,b,c)
443	eor	x17,x9,x17,ror#34	// Sigma0(a)
444	add	x21,x21,x28			// h+=Maj(a,b,c)
445	ldr	x28,[x30],#8		// *K++, x19 in next round
446	//add	x21,x21,x17			// h+=Sigma0(a)
447#ifndef	__ARMEB__
448	rev	x2,x2			// 15
449#endif
450	ldr	x7,[sp,#0]
451	add	x21,x21,x17			// h+=Sigma0(a)
452	str	x10,[sp,#24]
453	ror	x16,x25,#14
454	add	x20,x20,x28			// h+=K[i]
455	ror	x9,x4,#1
456	and	x17,x26,x25
457	ror	x8,x1,#19
458	bic	x28,x27,x25
459	ror	x10,x21,#28
460	add	x20,x20,x2			// h+=X[i]
461	eor	x16,x16,x25,ror#18
462	eor	x9,x9,x4,ror#8
463	orr	x17,x17,x28			// Ch(e,f,g)
464	eor	x28,x21,x22			// a^b, b^c in next round
465	eor	x16,x16,x25,ror#41	// Sigma1(e)
466	eor	x10,x10,x21,ror#34
467	add	x20,x20,x17			// h+=Ch(e,f,g)
468	and	x19,x19,x28			// (b^c)&=(a^b)
469	eor	x8,x8,x1,ror#61
470	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
471	add	x20,x20,x16			// h+=Sigma1(e)
472	eor	x19,x19,x22			// Maj(a,b,c)
473	eor	x17,x10,x21,ror#39	// Sigma0(a)
474	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
475	add	x3,x3,x12
476	add	x24,x24,x20			// d+=h
477	add	x20,x20,x19			// h+=Maj(a,b,c)
478	ldr	x19,[x30],#8		// *K++, x28 in next round
479	add	x3,x3,x9
480	add	x20,x20,x17			// h+=Sigma0(a)
481	add	x3,x3,x8
482.Loop_16_xx:
483	ldr	x8,[sp,#8]
484	str	x11,[sp,#0]
485	ror	x16,x24,#14
486	add	x27,x27,x19			// h+=K[i]
487	ror	x10,x5,#1
488	and	x17,x25,x24
489	ror	x9,x2,#19
490	bic	x19,x26,x24
491	ror	x11,x20,#28
492	add	x27,x27,x3			// h+=X[i]
493	eor	x16,x16,x24,ror#18
494	eor	x10,x10,x5,ror#8
495	orr	x17,x17,x19			// Ch(e,f,g)
496	eor	x19,x20,x21			// a^b, b^c in next round
497	eor	x16,x16,x24,ror#41	// Sigma1(e)
498	eor	x11,x11,x20,ror#34
499	add	x27,x27,x17			// h+=Ch(e,f,g)
500	and	x28,x28,x19			// (b^c)&=(a^b)
501	eor	x9,x9,x2,ror#61
502	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
503	add	x27,x27,x16			// h+=Sigma1(e)
504	eor	x28,x28,x21			// Maj(a,b,c)
505	eor	x17,x11,x20,ror#39	// Sigma0(a)
506	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
507	add	x4,x4,x13
508	add	x23,x23,x27			// d+=h
509	add	x27,x27,x28			// h+=Maj(a,b,c)
510	ldr	x28,[x30],#8		// *K++, x19 in next round
511	add	x4,x4,x10
512	add	x27,x27,x17			// h+=Sigma0(a)
513	add	x4,x4,x9
514	ldr	x9,[sp,#16]
515	str	x12,[sp,#8]
516	ror	x16,x23,#14
517	add	x26,x26,x28			// h+=K[i]
518	ror	x11,x6,#1
519	and	x17,x24,x23
520	ror	x10,x3,#19
521	bic	x28,x25,x23
522	ror	x12,x27,#28
523	add	x26,x26,x4			// h+=X[i]
524	eor	x16,x16,x23,ror#18
525	eor	x11,x11,x6,ror#8
526	orr	x17,x17,x28			// Ch(e,f,g)
527	eor	x28,x27,x20			// a^b, b^c in next round
528	eor	x16,x16,x23,ror#41	// Sigma1(e)
529	eor	x12,x12,x27,ror#34
530	add	x26,x26,x17			// h+=Ch(e,f,g)
531	and	x19,x19,x28			// (b^c)&=(a^b)
532	eor	x10,x10,x3,ror#61
533	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
534	add	x26,x26,x16			// h+=Sigma1(e)
535	eor	x19,x19,x20			// Maj(a,b,c)
536	eor	x17,x12,x27,ror#39	// Sigma0(a)
537	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
538	add	x5,x5,x14
539	add	x22,x22,x26			// d+=h
540	add	x26,x26,x19			// h+=Maj(a,b,c)
541	ldr	x19,[x30],#8		// *K++, x28 in next round
542	add	x5,x5,x11
543	add	x26,x26,x17			// h+=Sigma0(a)
544	add	x5,x5,x10
545	ldr	x10,[sp,#24]
546	str	x13,[sp,#16]
547	ror	x16,x22,#14
548	add	x25,x25,x19			// h+=K[i]
549	ror	x12,x7,#1
550	and	x17,x23,x22
551	ror	x11,x4,#19
552	bic	x19,x24,x22
553	ror	x13,x26,#28
554	add	x25,x25,x5			// h+=X[i]
555	eor	x16,x16,x22,ror#18
556	eor	x12,x12,x7,ror#8
557	orr	x17,x17,x19			// Ch(e,f,g)
558	eor	x19,x26,x27			// a^b, b^c in next round
559	eor	x16,x16,x22,ror#41	// Sigma1(e)
560	eor	x13,x13,x26,ror#34
561	add	x25,x25,x17			// h+=Ch(e,f,g)
562	and	x28,x28,x19			// (b^c)&=(a^b)
563	eor	x11,x11,x4,ror#61
564	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
565	add	x25,x25,x16			// h+=Sigma1(e)
566	eor	x28,x28,x27			// Maj(a,b,c)
567	eor	x17,x13,x26,ror#39	// Sigma0(a)
568	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
569	add	x6,x6,x15
570	add	x21,x21,x25			// d+=h
571	add	x25,x25,x28			// h+=Maj(a,b,c)
572	ldr	x28,[x30],#8		// *K++, x19 in next round
573	add	x6,x6,x12
574	add	x25,x25,x17			// h+=Sigma0(a)
575	add	x6,x6,x11
576	ldr	x11,[sp,#0]
577	str	x14,[sp,#24]
578	ror	x16,x21,#14
579	add	x24,x24,x28			// h+=K[i]
580	ror	x13,x8,#1
581	and	x17,x22,x21
582	ror	x12,x5,#19
583	bic	x28,x23,x21
584	ror	x14,x25,#28
585	add	x24,x24,x6			// h+=X[i]
586	eor	x16,x16,x21,ror#18
587	eor	x13,x13,x8,ror#8
588	orr	x17,x17,x28			// Ch(e,f,g)
589	eor	x28,x25,x26			// a^b, b^c in next round
590	eor	x16,x16,x21,ror#41	// Sigma1(e)
591	eor	x14,x14,x25,ror#34
592	add	x24,x24,x17			// h+=Ch(e,f,g)
593	and	x19,x19,x28			// (b^c)&=(a^b)
594	eor	x12,x12,x5,ror#61
595	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
596	add	x24,x24,x16			// h+=Sigma1(e)
597	eor	x19,x19,x26			// Maj(a,b,c)
598	eor	x17,x14,x25,ror#39	// Sigma0(a)
599	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
600	add	x7,x7,x0
601	add	x20,x20,x24			// d+=h
602	add	x24,x24,x19			// h+=Maj(a,b,c)
603	ldr	x19,[x30],#8		// *K++, x28 in next round
604	add	x7,x7,x13
605	add	x24,x24,x17			// h+=Sigma0(a)
606	add	x7,x7,x12
607	ldr	x12,[sp,#8]
608	str	x15,[sp,#0]
609	ror	x16,x20,#14
610	add	x23,x23,x19			// h+=K[i]
611	ror	x14,x9,#1
612	and	x17,x21,x20
613	ror	x13,x6,#19
614	bic	x19,x22,x20
615	ror	x15,x24,#28
616	add	x23,x23,x7			// h+=X[i]
617	eor	x16,x16,x20,ror#18
618	eor	x14,x14,x9,ror#8
619	orr	x17,x17,x19			// Ch(e,f,g)
620	eor	x19,x24,x25			// a^b, b^c in next round
621	eor	x16,x16,x20,ror#41	// Sigma1(e)
622	eor	x15,x15,x24,ror#34
623	add	x23,x23,x17			// h+=Ch(e,f,g)
624	and	x28,x28,x19			// (b^c)&=(a^b)
625	eor	x13,x13,x6,ror#61
626	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
627	add	x23,x23,x16			// h+=Sigma1(e)
628	eor	x28,x28,x25			// Maj(a,b,c)
629	eor	x17,x15,x24,ror#39	// Sigma0(a)
630	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
631	add	x8,x8,x1
632	add	x27,x27,x23			// d+=h
633	add	x23,x23,x28			// h+=Maj(a,b,c)
634	ldr	x28,[x30],#8		// *K++, x19 in next round
635	add	x8,x8,x14
636	add	x23,x23,x17			// h+=Sigma0(a)
637	add	x8,x8,x13
638	ldr	x13,[sp,#16]
639	str	x0,[sp,#8]
640	ror	x16,x27,#14
641	add	x22,x22,x28			// h+=K[i]
642	ror	x15,x10,#1
643	and	x17,x20,x27
644	ror	x14,x7,#19
645	bic	x28,x21,x27
646	ror	x0,x23,#28
647	add	x22,x22,x8			// h+=X[i]
648	eor	x16,x16,x27,ror#18
649	eor	x15,x15,x10,ror#8
650	orr	x17,x17,x28			// Ch(e,f,g)
651	eor	x28,x23,x24			// a^b, b^c in next round
652	eor	x16,x16,x27,ror#41	// Sigma1(e)
653	eor	x0,x0,x23,ror#34
654	add	x22,x22,x17			// h+=Ch(e,f,g)
655	and	x19,x19,x28			// (b^c)&=(a^b)
656	eor	x14,x14,x7,ror#61
657	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
658	add	x22,x22,x16			// h+=Sigma1(e)
659	eor	x19,x19,x24			// Maj(a,b,c)
660	eor	x17,x0,x23,ror#39	// Sigma0(a)
661	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
662	add	x9,x9,x2
663	add	x26,x26,x22			// d+=h
664	add	x22,x22,x19			// h+=Maj(a,b,c)
665	ldr	x19,[x30],#8		// *K++, x28 in next round
666	add	x9,x9,x15
667	add	x22,x22,x17			// h+=Sigma0(a)
668	add	x9,x9,x14
669	ldr	x14,[sp,#24]
670	str	x1,[sp,#16]
671	ror	x16,x26,#14
672	add	x21,x21,x19			// h+=K[i]
673	ror	x0,x11,#1
674	and	x17,x27,x26
675	ror	x15,x8,#19
676	bic	x19,x20,x26
677	ror	x1,x22,#28
678	add	x21,x21,x9			// h+=X[i]
679	eor	x16,x16,x26,ror#18
680	eor	x0,x0,x11,ror#8
681	orr	x17,x17,x19			// Ch(e,f,g)
682	eor	x19,x22,x23			// a^b, b^c in next round
683	eor	x16,x16,x26,ror#41	// Sigma1(e)
684	eor	x1,x1,x22,ror#34
685	add	x21,x21,x17			// h+=Ch(e,f,g)
686	and	x28,x28,x19			// (b^c)&=(a^b)
687	eor	x15,x15,x8,ror#61
688	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
689	add	x21,x21,x16			// h+=Sigma1(e)
690	eor	x28,x28,x23			// Maj(a,b,c)
691	eor	x17,x1,x22,ror#39	// Sigma0(a)
692	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
693	add	x10,x10,x3
694	add	x25,x25,x21			// d+=h
695	add	x21,x21,x28			// h+=Maj(a,b,c)
696	ldr	x28,[x30],#8		// *K++, x19 in next round
697	add	x10,x10,x0
698	add	x21,x21,x17			// h+=Sigma0(a)
699	add	x10,x10,x15
700	ldr	x15,[sp,#0]
701	str	x2,[sp,#24]
702	ror	x16,x25,#14
703	add	x20,x20,x28			// h+=K[i]
704	ror	x1,x12,#1
705	and	x17,x26,x25
706	ror	x0,x9,#19
707	bic	x28,x27,x25
708	ror	x2,x21,#28
709	add	x20,x20,x10			// h+=X[i]
710	eor	x16,x16,x25,ror#18
711	eor	x1,x1,x12,ror#8
712	orr	x17,x17,x28			// Ch(e,f,g)
713	eor	x28,x21,x22			// a^b, b^c in next round
714	eor	x16,x16,x25,ror#41	// Sigma1(e)
715	eor	x2,x2,x21,ror#34
716	add	x20,x20,x17			// h+=Ch(e,f,g)
717	and	x19,x19,x28			// (b^c)&=(a^b)
718	eor	x0,x0,x9,ror#61
719	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
720	add	x20,x20,x16			// h+=Sigma1(e)
721	eor	x19,x19,x22			// Maj(a,b,c)
722	eor	x17,x2,x21,ror#39	// Sigma0(a)
723	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
724	add	x11,x11,x4
725	add	x24,x24,x20			// d+=h
726	add	x20,x20,x19			// h+=Maj(a,b,c)
727	ldr	x19,[x30],#8		// *K++, x28 in next round
728	add	x11,x11,x1
729	add	x20,x20,x17			// h+=Sigma0(a)
730	add	x11,x11,x0
731	ldr	x0,[sp,#8]
732	str	x3,[sp,#0]
733	ror	x16,x24,#14
734	add	x27,x27,x19			// h+=K[i]
735	ror	x2,x13,#1
736	and	x17,x25,x24
737	ror	x1,x10,#19
738	bic	x19,x26,x24
739	ror	x3,x20,#28
740	add	x27,x27,x11			// h+=X[i]
741	eor	x16,x16,x24,ror#18
742	eor	x2,x2,x13,ror#8
743	orr	x17,x17,x19			// Ch(e,f,g)
744	eor	x19,x20,x21			// a^b, b^c in next round
745	eor	x16,x16,x24,ror#41	// Sigma1(e)
746	eor	x3,x3,x20,ror#34
747	add	x27,x27,x17			// h+=Ch(e,f,g)
748	and	x28,x28,x19			// (b^c)&=(a^b)
749	eor	x1,x1,x10,ror#61
750	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
751	add	x27,x27,x16			// h+=Sigma1(e)
752	eor	x28,x28,x21			// Maj(a,b,c)
753	eor	x17,x3,x20,ror#39	// Sigma0(a)
754	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
755	add	x12,x12,x5
756	add	x23,x23,x27			// d+=h
757	add	x27,x27,x28			// h+=Maj(a,b,c)
758	ldr	x28,[x30],#8		// *K++, x19 in next round
759	add	x12,x12,x2
760	add	x27,x27,x17			// h+=Sigma0(a)
761	add	x12,x12,x1
762	ldr	x1,[sp,#16]
763	str	x4,[sp,#8]
764	ror	x16,x23,#14
765	add	x26,x26,x28			// h+=K[i]
766	ror	x3,x14,#1
767	and	x17,x24,x23
768	ror	x2,x11,#19
769	bic	x28,x25,x23
770	ror	x4,x27,#28
771	add	x26,x26,x12			// h+=X[i]
772	eor	x16,x16,x23,ror#18
773	eor	x3,x3,x14,ror#8
774	orr	x17,x17,x28			// Ch(e,f,g)
775	eor	x28,x27,x20			// a^b, b^c in next round
776	eor	x16,x16,x23,ror#41	// Sigma1(e)
777	eor	x4,x4,x27,ror#34
778	add	x26,x26,x17			// h+=Ch(e,f,g)
779	and	x19,x19,x28			// (b^c)&=(a^b)
780	eor	x2,x2,x11,ror#61
781	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
782	add	x26,x26,x16			// h+=Sigma1(e)
783	eor	x19,x19,x20			// Maj(a,b,c)
784	eor	x17,x4,x27,ror#39	// Sigma0(a)
785	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
786	add	x13,x13,x6
787	add	x22,x22,x26			// d+=h
788	add	x26,x26,x19			// h+=Maj(a,b,c)
789	ldr	x19,[x30],#8		// *K++, x28 in next round
790	add	x13,x13,x3
791	add	x26,x26,x17			// h+=Sigma0(a)
792	add	x13,x13,x2
793	ldr	x2,[sp,#24]
794	str	x5,[sp,#16]
795	ror	x16,x22,#14
796	add	x25,x25,x19			// h+=K[i]
797	ror	x4,x15,#1
798	and	x17,x23,x22
799	ror	x3,x12,#19
800	bic	x19,x24,x22
801	ror	x5,x26,#28
802	add	x25,x25,x13			// h+=X[i]
803	eor	x16,x16,x22,ror#18
804	eor	x4,x4,x15,ror#8
805	orr	x17,x17,x19			// Ch(e,f,g)
806	eor	x19,x26,x27			// a^b, b^c in next round
807	eor	x16,x16,x22,ror#41	// Sigma1(e)
808	eor	x5,x5,x26,ror#34
809	add	x25,x25,x17			// h+=Ch(e,f,g)
810	and	x28,x28,x19			// (b^c)&=(a^b)
811	eor	x3,x3,x12,ror#61
812	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
813	add	x25,x25,x16			// h+=Sigma1(e)
814	eor	x28,x28,x27			// Maj(a,b,c)
815	eor	x17,x5,x26,ror#39	// Sigma0(a)
816	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
817	add	x14,x14,x7
818	add	x21,x21,x25			// d+=h
819	add	x25,x25,x28			// h+=Maj(a,b,c)
820	ldr	x28,[x30],#8		// *K++, x19 in next round
821	add	x14,x14,x4
822	add	x25,x25,x17			// h+=Sigma0(a)
823	add	x14,x14,x3
824	ldr	x3,[sp,#0]
825	str	x6,[sp,#24]
826	ror	x16,x21,#14
827	add	x24,x24,x28			// h+=K[i]
828	ror	x5,x0,#1
829	and	x17,x22,x21
830	ror	x4,x13,#19
831	bic	x28,x23,x21
832	ror	x6,x25,#28
833	add	x24,x24,x14			// h+=X[i]
834	eor	x16,x16,x21,ror#18
835	eor	x5,x5,x0,ror#8
836	orr	x17,x17,x28			// Ch(e,f,g)
837	eor	x28,x25,x26			// a^b, b^c in next round
838	eor	x16,x16,x21,ror#41	// Sigma1(e)
839	eor	x6,x6,x25,ror#34
840	add	x24,x24,x17			// h+=Ch(e,f,g)
841	and	x19,x19,x28			// (b^c)&=(a^b)
842	eor	x4,x4,x13,ror#61
843	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
844	add	x24,x24,x16			// h+=Sigma1(e)
845	eor	x19,x19,x26			// Maj(a,b,c)
846	eor	x17,x6,x25,ror#39	// Sigma0(a)
847	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
848	add	x15,x15,x8
849	add	x20,x20,x24			// d+=h
850	add	x24,x24,x19			// h+=Maj(a,b,c)
851	ldr	x19,[x30],#8		// *K++, x28 in next round
852	add	x15,x15,x5
853	add	x24,x24,x17			// h+=Sigma0(a)
854	add	x15,x15,x4
855	ldr	x4,[sp,#8]
856	str	x7,[sp,#0]
857	ror	x16,x20,#14
858	add	x23,x23,x19			// h+=K[i]
859	ror	x6,x1,#1
860	and	x17,x21,x20
861	ror	x5,x14,#19
862	bic	x19,x22,x20
863	ror	x7,x24,#28
864	add	x23,x23,x15			// h+=X[i]
865	eor	x16,x16,x20,ror#18
866	eor	x6,x6,x1,ror#8
867	orr	x17,x17,x19			// Ch(e,f,g)
868	eor	x19,x24,x25			// a^b, b^c in next round
869	eor	x16,x16,x20,ror#41	// Sigma1(e)
870	eor	x7,x7,x24,ror#34
871	add	x23,x23,x17			// h+=Ch(e,f,g)
872	and	x28,x28,x19			// (b^c)&=(a^b)
873	eor	x5,x5,x14,ror#61
874	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
875	add	x23,x23,x16			// h+=Sigma1(e)
876	eor	x28,x28,x25			// Maj(a,b,c)
877	eor	x17,x7,x24,ror#39	// Sigma0(a)
878	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
879	add	x0,x0,x9
880	add	x27,x27,x23			// d+=h
881	add	x23,x23,x28			// h+=Maj(a,b,c)
882	ldr	x28,[x30],#8		// *K++, x19 in next round
883	add	x0,x0,x6
884	add	x23,x23,x17			// h+=Sigma0(a)
885	add	x0,x0,x5
886	ldr	x5,[sp,#16]
887	str	x8,[sp,#8]
888	ror	x16,x27,#14
889	add	x22,x22,x28			// h+=K[i]
890	ror	x7,x2,#1
891	and	x17,x20,x27
892	ror	x6,x15,#19
893	bic	x28,x21,x27
894	ror	x8,x23,#28
895	add	x22,x22,x0			// h+=X[i]
896	eor	x16,x16,x27,ror#18
897	eor	x7,x7,x2,ror#8
898	orr	x17,x17,x28			// Ch(e,f,g)
899	eor	x28,x23,x24			// a^b, b^c in next round
900	eor	x16,x16,x27,ror#41	// Sigma1(e)
901	eor	x8,x8,x23,ror#34
902	add	x22,x22,x17			// h+=Ch(e,f,g)
903	and	x19,x19,x28			// (b^c)&=(a^b)
904	eor	x6,x6,x15,ror#61
905	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
906	add	x22,x22,x16			// h+=Sigma1(e)
907	eor	x19,x19,x24			// Maj(a,b,c)
908	eor	x17,x8,x23,ror#39	// Sigma0(a)
909	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
910	add	x1,x1,x10
911	add	x26,x26,x22			// d+=h
912	add	x22,x22,x19			// h+=Maj(a,b,c)
913	ldr	x19,[x30],#8		// *K++, x28 in next round
914	add	x1,x1,x7
915	add	x22,x22,x17			// h+=Sigma0(a)
916	add	x1,x1,x6
917	ldr	x6,[sp,#24]
918	str	x9,[sp,#16]
919	ror	x16,x26,#14
920	add	x21,x21,x19			// h+=K[i]
921	ror	x8,x3,#1
922	and	x17,x27,x26
923	ror	x7,x0,#19
924	bic	x19,x20,x26
925	ror	x9,x22,#28
926	add	x21,x21,x1			// h+=X[i]
927	eor	x16,x16,x26,ror#18
928	eor	x8,x8,x3,ror#8
929	orr	x17,x17,x19			// Ch(e,f,g)
930	eor	x19,x22,x23			// a^b, b^c in next round
931	eor	x16,x16,x26,ror#41	// Sigma1(e)
932	eor	x9,x9,x22,ror#34
933	add	x21,x21,x17			// h+=Ch(e,f,g)
934	and	x28,x28,x19			// (b^c)&=(a^b)
935	eor	x7,x7,x0,ror#61
936	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
937	add	x21,x21,x16			// h+=Sigma1(e)
938	eor	x28,x28,x23			// Maj(a,b,c)
939	eor	x17,x9,x22,ror#39	// Sigma0(a)
940	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
941	add	x2,x2,x11
942	add	x25,x25,x21			// d+=h
943	add	x21,x21,x28			// h+=Maj(a,b,c)
944	ldr	x28,[x30],#8		// *K++, x19 in next round
945	add	x2,x2,x8
946	add	x21,x21,x17			// h+=Sigma0(a)
947	add	x2,x2,x7
948	ldr	x7,[sp,#0]
949	str	x10,[sp,#24]
950	ror	x16,x25,#14
951	add	x20,x20,x28			// h+=K[i]
952	ror	x9,x4,#1
953	and	x17,x26,x25
954	ror	x8,x1,#19
955	bic	x28,x27,x25
956	ror	x10,x21,#28
957	add	x20,x20,x2			// h+=X[i]
958	eor	x16,x16,x25,ror#18
959	eor	x9,x9,x4,ror#8
960	orr	x17,x17,x28			// Ch(e,f,g)
961	eor	x28,x21,x22			// a^b, b^c in next round
962	eor	x16,x16,x25,ror#41	// Sigma1(e)
963	eor	x10,x10,x21,ror#34
964	add	x20,x20,x17			// h+=Ch(e,f,g)
965	and	x19,x19,x28			// (b^c)&=(a^b)
966	eor	x8,x8,x1,ror#61
967	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
968	add	x20,x20,x16			// h+=Sigma1(e)
969	eor	x19,x19,x22			// Maj(a,b,c)
970	eor	x17,x10,x21,ror#39	// Sigma0(a)
971	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
972	add	x3,x3,x12
973	add	x24,x24,x20			// d+=h
974	add	x20,x20,x19			// h+=Maj(a,b,c)
975	ldr	x19,[x30],#8		// *K++, x28 in next round
976	add	x3,x3,x9
977	add	x20,x20,x17			// h+=Sigma0(a)
978	add	x3,x3,x8
979	cbnz	x19,.Loop_16_xx
980
981	ldp	x0,x2,[x29,#96]
982	ldr	x1,[x29,#112]
983	sub	x30,x30,#648		// rewind
984
985	ldp	x3,x4,[x0]
986	ldp	x5,x6,[x0,#2*8]
987	add	x1,x1,#14*8			// advance input pointer
988	ldp	x7,x8,[x0,#4*8]
989	add	x20,x20,x3
990	ldp	x9,x10,[x0,#6*8]
991	add	x21,x21,x4
992	add	x22,x22,x5
993	add	x23,x23,x6
994	stp	x20,x21,[x0]
995	add	x24,x24,x7
996	add	x25,x25,x8
997	stp	x22,x23,[x0,#2*8]
998	add	x26,x26,x9
999	add	x27,x27,x10
1000	cmp	x1,x2
1001	stp	x24,x25,[x0,#4*8]
1002	stp	x26,x27,[x0,#6*8]
1003	b.ne	.Loop
1004
1005	ldp	x19,x20,[x29,#16]
1006	add	sp,sp,#4*8
1007	ldp	x21,x22,[x29,#32]
1008	ldp	x23,x24,[x29,#48]
1009	ldp	x25,x26,[x29,#64]
1010	ldp	x27,x28,[x29,#80]
1011	ldp	x29,x30,[sp],#128
1012	ret
1013.size	sha512_block_data_order,.-sha512_block_data_order
1014
1015.align	6
1016.type	.LK512,%object
1017.LK512:
1018.quad	0x428a2f98d728ae22,0x7137449123ef65cd
1019.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1020.quad	0x3956c25bf348b538,0x59f111f1b605d019
1021.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
1022.quad	0xd807aa98a3030242,0x12835b0145706fbe
1023.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1024.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
1025.quad	0x9bdc06a725c71235,0xc19bf174cf692694
1026.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
1027.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1028.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
1029.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1030.quad	0x983e5152ee66dfab,0xa831c66d2db43210
1031.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
1032.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
1033.quad	0x06ca6351e003826f,0x142929670a0e6e70
1034.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
1035.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1036.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
1037.quad	0x81c2c92e47edaee6,0x92722c851482353b
1038.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
1039.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
1040.quad	0xd192e819d6ef5218,0xd69906245565a910
1041.quad	0xf40e35855771202a,0x106aa07032bbd1b8
1042.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
1043.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1044.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1045.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1046.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
1047.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
1048.quad	0x90befffa23631e28,0xa4506cebde82bde9
1049.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
1050.quad	0xca273eceea26619c,0xd186b8c721c0c207
1051.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1052.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
1053.quad	0x113f9804bef90dae,0x1b710b35131c471b
1054.quad	0x28db77f523047d84,0x32caab7b40c72493
1055.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1056.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1057.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
1058.quad	0	// terminator
1059.size	.LK512,.-.LK512
1060#ifndef	__KERNEL__
1061.align	3
1062.LOPENSSL_armcap_P:
1063# ifdef	__ILP32__
1064.long	OPENSSL_armcap_P-.
1065# else
1066.quad	OPENSSL_armcap_P-.
1067# endif
1068#endif
1069.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1070.align	2
1071.align	2
1072#ifndef	__KERNEL__
1073.comm	OPENSSL_armcap_P,4,4
1074#endif
1075#endif
1076