• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__aarch64__)
13#if defined(BORINGSSL_PREFIX)
14#include <boringssl_prefix_symbols_asm.h>
15#endif
16// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
17//
18// Licensed under the OpenSSL license (the "License").  You may not use
19// this file except in compliance with the License.  You can obtain a copy
20// in the file LICENSE in the source distribution or at
21// https://www.openssl.org/source/license.html
22
23// ====================================================================
24// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
25// project. The module is, however, dual licensed under OpenSSL and
26// CRYPTOGAMS licenses depending on where you obtain it. For further
27// details see http://www.openssl.org/~appro/cryptogams/.
28//
29// Permission to use under GPLv2 terms is granted.
30// ====================================================================
31//
32// SHA256/512 for ARMv8.
33//
34// Performance in cycles per processed byte and improvement coefficient
35// over code generated with "default" compiler:
36//
37//		SHA256-hw	SHA256(*)	SHA512
38// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
39// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
40// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
41// Denver	2.01		10.5 (+26%)	6.70 (+8%)
42// X-Gene			20.0 (+100%)	12.8 (+300%(***))
43// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
44//
45// (*)	Software SHA256 results are of lesser relevance, presented
46//	mostly for informational purposes.
47// (**)	The result is a trade-off: it's possible to improve it by
48//	10% (or by 1 cycle per round), but at the cost of 20% loss
49//	on Cortex-A53 (or by 4 cycles per round).
50// (***)	Super-impressive coefficients over gcc-generated code are
51//	indication of some compiler "pathology", most notably code
52//	generated with -mgeneral-regs-only is significanty faster
53//	and the gap is only 40-90%.
54
55#ifndef	__KERNEL__
56# include <openssl/arm_arch.h>
57#endif
58
59.text
60
61
62.hidden	OPENSSL_armcap_P
63.globl	sha512_block_data_order
64.hidden	sha512_block_data_order
65.type	sha512_block_data_order,%function
66.align	6
67sha512_block_data_order:
68	AARCH64_SIGN_LINK_REGISTER
69	stp	x29,x30,[sp,#-128]!
70	add	x29,sp,#0
71
72	stp	x19,x20,[sp,#16]
73	stp	x21,x22,[sp,#32]
74	stp	x23,x24,[sp,#48]
75	stp	x25,x26,[sp,#64]
76	stp	x27,x28,[sp,#80]
77	sub	sp,sp,#4*8
78
79	ldp	x20,x21,[x0]				// load context
80	ldp	x22,x23,[x0,#2*8]
81	ldp	x24,x25,[x0,#4*8]
82	add	x2,x1,x2,lsl#7	// end of input
83	ldp	x26,x27,[x0,#6*8]
84	adrp	x30,.LK512
85	add	x30,x30,:lo12:.LK512
86	stp	x0,x2,[x29,#96]
87
88.Loop:
89	ldp	x3,x4,[x1],#2*8
90	ldr	x19,[x30],#8			// *K++
91	eor	x28,x21,x22				// magic seed
92	str	x1,[x29,#112]
93#ifndef	__ARMEB__
94	rev	x3,x3			// 0
95#endif
96	ror	x16,x24,#14
97	add	x27,x27,x19			// h+=K[i]
98	eor	x6,x24,x24,ror#23
99	and	x17,x25,x24
100	bic	x19,x26,x24
101	add	x27,x27,x3			// h+=X[i]
102	orr	x17,x17,x19			// Ch(e,f,g)
103	eor	x19,x20,x21			// a^b, b^c in next round
104	eor	x16,x16,x6,ror#18	// Sigma1(e)
105	ror	x6,x20,#28
106	add	x27,x27,x17			// h+=Ch(e,f,g)
107	eor	x17,x20,x20,ror#5
108	add	x27,x27,x16			// h+=Sigma1(e)
109	and	x28,x28,x19			// (b^c)&=(a^b)
110	add	x23,x23,x27			// d+=h
111	eor	x28,x28,x21			// Maj(a,b,c)
112	eor	x17,x6,x17,ror#34	// Sigma0(a)
113	add	x27,x27,x28			// h+=Maj(a,b,c)
114	ldr	x28,[x30],#8		// *K++, x19 in next round
115	//add	x27,x27,x17			// h+=Sigma0(a)
116#ifndef	__ARMEB__
117	rev	x4,x4			// 1
118#endif
119	ldp	x5,x6,[x1],#2*8
120	add	x27,x27,x17			// h+=Sigma0(a)
121	ror	x16,x23,#14
122	add	x26,x26,x28			// h+=K[i]
123	eor	x7,x23,x23,ror#23
124	and	x17,x24,x23
125	bic	x28,x25,x23
126	add	x26,x26,x4			// h+=X[i]
127	orr	x17,x17,x28			// Ch(e,f,g)
128	eor	x28,x27,x20			// a^b, b^c in next round
129	eor	x16,x16,x7,ror#18	// Sigma1(e)
130	ror	x7,x27,#28
131	add	x26,x26,x17			// h+=Ch(e,f,g)
132	eor	x17,x27,x27,ror#5
133	add	x26,x26,x16			// h+=Sigma1(e)
134	and	x19,x19,x28			// (b^c)&=(a^b)
135	add	x22,x22,x26			// d+=h
136	eor	x19,x19,x20			// Maj(a,b,c)
137	eor	x17,x7,x17,ror#34	// Sigma0(a)
138	add	x26,x26,x19			// h+=Maj(a,b,c)
139	ldr	x19,[x30],#8		// *K++, x28 in next round
140	//add	x26,x26,x17			// h+=Sigma0(a)
141#ifndef	__ARMEB__
142	rev	x5,x5			// 2
143#endif
144	add	x26,x26,x17			// h+=Sigma0(a)
145	ror	x16,x22,#14
146	add	x25,x25,x19			// h+=K[i]
147	eor	x8,x22,x22,ror#23
148	and	x17,x23,x22
149	bic	x19,x24,x22
150	add	x25,x25,x5			// h+=X[i]
151	orr	x17,x17,x19			// Ch(e,f,g)
152	eor	x19,x26,x27			// a^b, b^c in next round
153	eor	x16,x16,x8,ror#18	// Sigma1(e)
154	ror	x8,x26,#28
155	add	x25,x25,x17			// h+=Ch(e,f,g)
156	eor	x17,x26,x26,ror#5
157	add	x25,x25,x16			// h+=Sigma1(e)
158	and	x28,x28,x19			// (b^c)&=(a^b)
159	add	x21,x21,x25			// d+=h
160	eor	x28,x28,x27			// Maj(a,b,c)
161	eor	x17,x8,x17,ror#34	// Sigma0(a)
162	add	x25,x25,x28			// h+=Maj(a,b,c)
163	ldr	x28,[x30],#8		// *K++, x19 in next round
164	//add	x25,x25,x17			// h+=Sigma0(a)
165#ifndef	__ARMEB__
166	rev	x6,x6			// 3
167#endif
168	ldp	x7,x8,[x1],#2*8
169	add	x25,x25,x17			// h+=Sigma0(a)
170	ror	x16,x21,#14
171	add	x24,x24,x28			// h+=K[i]
172	eor	x9,x21,x21,ror#23
173	and	x17,x22,x21
174	bic	x28,x23,x21
175	add	x24,x24,x6			// h+=X[i]
176	orr	x17,x17,x28			// Ch(e,f,g)
177	eor	x28,x25,x26			// a^b, b^c in next round
178	eor	x16,x16,x9,ror#18	// Sigma1(e)
179	ror	x9,x25,#28
180	add	x24,x24,x17			// h+=Ch(e,f,g)
181	eor	x17,x25,x25,ror#5
182	add	x24,x24,x16			// h+=Sigma1(e)
183	and	x19,x19,x28			// (b^c)&=(a^b)
184	add	x20,x20,x24			// d+=h
185	eor	x19,x19,x26			// Maj(a,b,c)
186	eor	x17,x9,x17,ror#34	// Sigma0(a)
187	add	x24,x24,x19			// h+=Maj(a,b,c)
188	ldr	x19,[x30],#8		// *K++, x28 in next round
189	//add	x24,x24,x17			// h+=Sigma0(a)
190#ifndef	__ARMEB__
191	rev	x7,x7			// 4
192#endif
193	add	x24,x24,x17			// h+=Sigma0(a)
194	ror	x16,x20,#14
195	add	x23,x23,x19			// h+=K[i]
196	eor	x10,x20,x20,ror#23
197	and	x17,x21,x20
198	bic	x19,x22,x20
199	add	x23,x23,x7			// h+=X[i]
200	orr	x17,x17,x19			// Ch(e,f,g)
201	eor	x19,x24,x25			// a^b, b^c in next round
202	eor	x16,x16,x10,ror#18	// Sigma1(e)
203	ror	x10,x24,#28
204	add	x23,x23,x17			// h+=Ch(e,f,g)
205	eor	x17,x24,x24,ror#5
206	add	x23,x23,x16			// h+=Sigma1(e)
207	and	x28,x28,x19			// (b^c)&=(a^b)
208	add	x27,x27,x23			// d+=h
209	eor	x28,x28,x25			// Maj(a,b,c)
210	eor	x17,x10,x17,ror#34	// Sigma0(a)
211	add	x23,x23,x28			// h+=Maj(a,b,c)
212	ldr	x28,[x30],#8		// *K++, x19 in next round
213	//add	x23,x23,x17			// h+=Sigma0(a)
214#ifndef	__ARMEB__
215	rev	x8,x8			// 5
216#endif
217	ldp	x9,x10,[x1],#2*8
218	add	x23,x23,x17			// h+=Sigma0(a)
219	ror	x16,x27,#14
220	add	x22,x22,x28			// h+=K[i]
221	eor	x11,x27,x27,ror#23
222	and	x17,x20,x27
223	bic	x28,x21,x27
224	add	x22,x22,x8			// h+=X[i]
225	orr	x17,x17,x28			// Ch(e,f,g)
226	eor	x28,x23,x24			// a^b, b^c in next round
227	eor	x16,x16,x11,ror#18	// Sigma1(e)
228	ror	x11,x23,#28
229	add	x22,x22,x17			// h+=Ch(e,f,g)
230	eor	x17,x23,x23,ror#5
231	add	x22,x22,x16			// h+=Sigma1(e)
232	and	x19,x19,x28			// (b^c)&=(a^b)
233	add	x26,x26,x22			// d+=h
234	eor	x19,x19,x24			// Maj(a,b,c)
235	eor	x17,x11,x17,ror#34	// Sigma0(a)
236	add	x22,x22,x19			// h+=Maj(a,b,c)
237	ldr	x19,[x30],#8		// *K++, x28 in next round
238	//add	x22,x22,x17			// h+=Sigma0(a)
239#ifndef	__ARMEB__
240	rev	x9,x9			// 6
241#endif
242	add	x22,x22,x17			// h+=Sigma0(a)
243	ror	x16,x26,#14
244	add	x21,x21,x19			// h+=K[i]
245	eor	x12,x26,x26,ror#23
246	and	x17,x27,x26
247	bic	x19,x20,x26
248	add	x21,x21,x9			// h+=X[i]
249	orr	x17,x17,x19			// Ch(e,f,g)
250	eor	x19,x22,x23			// a^b, b^c in next round
251	eor	x16,x16,x12,ror#18	// Sigma1(e)
252	ror	x12,x22,#28
253	add	x21,x21,x17			// h+=Ch(e,f,g)
254	eor	x17,x22,x22,ror#5
255	add	x21,x21,x16			// h+=Sigma1(e)
256	and	x28,x28,x19			// (b^c)&=(a^b)
257	add	x25,x25,x21			// d+=h
258	eor	x28,x28,x23			// Maj(a,b,c)
259	eor	x17,x12,x17,ror#34	// Sigma0(a)
260	add	x21,x21,x28			// h+=Maj(a,b,c)
261	ldr	x28,[x30],#8		// *K++, x19 in next round
262	//add	x21,x21,x17			// h+=Sigma0(a)
263#ifndef	__ARMEB__
264	rev	x10,x10			// 7
265#endif
266	ldp	x11,x12,[x1],#2*8
267	add	x21,x21,x17			// h+=Sigma0(a)
268	ror	x16,x25,#14
269	add	x20,x20,x28			// h+=K[i]
270	eor	x13,x25,x25,ror#23
271	and	x17,x26,x25
272	bic	x28,x27,x25
273	add	x20,x20,x10			// h+=X[i]
274	orr	x17,x17,x28			// Ch(e,f,g)
275	eor	x28,x21,x22			// a^b, b^c in next round
276	eor	x16,x16,x13,ror#18	// Sigma1(e)
277	ror	x13,x21,#28
278	add	x20,x20,x17			// h+=Ch(e,f,g)
279	eor	x17,x21,x21,ror#5
280	add	x20,x20,x16			// h+=Sigma1(e)
281	and	x19,x19,x28			// (b^c)&=(a^b)
282	add	x24,x24,x20			// d+=h
283	eor	x19,x19,x22			// Maj(a,b,c)
284	eor	x17,x13,x17,ror#34	// Sigma0(a)
285	add	x20,x20,x19			// h+=Maj(a,b,c)
286	ldr	x19,[x30],#8		// *K++, x28 in next round
287	//add	x20,x20,x17			// h+=Sigma0(a)
288#ifndef	__ARMEB__
289	rev	x11,x11			// 8
290#endif
291	add	x20,x20,x17			// h+=Sigma0(a)
292	ror	x16,x24,#14
293	add	x27,x27,x19			// h+=K[i]
294	eor	x14,x24,x24,ror#23
295	and	x17,x25,x24
296	bic	x19,x26,x24
297	add	x27,x27,x11			// h+=X[i]
298	orr	x17,x17,x19			// Ch(e,f,g)
299	eor	x19,x20,x21			// a^b, b^c in next round
300	eor	x16,x16,x14,ror#18	// Sigma1(e)
301	ror	x14,x20,#28
302	add	x27,x27,x17			// h+=Ch(e,f,g)
303	eor	x17,x20,x20,ror#5
304	add	x27,x27,x16			// h+=Sigma1(e)
305	and	x28,x28,x19			// (b^c)&=(a^b)
306	add	x23,x23,x27			// d+=h
307	eor	x28,x28,x21			// Maj(a,b,c)
308	eor	x17,x14,x17,ror#34	// Sigma0(a)
309	add	x27,x27,x28			// h+=Maj(a,b,c)
310	ldr	x28,[x30],#8		// *K++, x19 in next round
311	//add	x27,x27,x17			// h+=Sigma0(a)
312#ifndef	__ARMEB__
313	rev	x12,x12			// 9
314#endif
315	ldp	x13,x14,[x1],#2*8
316	add	x27,x27,x17			// h+=Sigma0(a)
317	ror	x16,x23,#14
318	add	x26,x26,x28			// h+=K[i]
319	eor	x15,x23,x23,ror#23
320	and	x17,x24,x23
321	bic	x28,x25,x23
322	add	x26,x26,x12			// h+=X[i]
323	orr	x17,x17,x28			// Ch(e,f,g)
324	eor	x28,x27,x20			// a^b, b^c in next round
325	eor	x16,x16,x15,ror#18	// Sigma1(e)
326	ror	x15,x27,#28
327	add	x26,x26,x17			// h+=Ch(e,f,g)
328	eor	x17,x27,x27,ror#5
329	add	x26,x26,x16			// h+=Sigma1(e)
330	and	x19,x19,x28			// (b^c)&=(a^b)
331	add	x22,x22,x26			// d+=h
332	eor	x19,x19,x20			// Maj(a,b,c)
333	eor	x17,x15,x17,ror#34	// Sigma0(a)
334	add	x26,x26,x19			// h+=Maj(a,b,c)
335	ldr	x19,[x30],#8		// *K++, x28 in next round
336	//add	x26,x26,x17			// h+=Sigma0(a)
337#ifndef	__ARMEB__
338	rev	x13,x13			// 10
339#endif
340	add	x26,x26,x17			// h+=Sigma0(a)
341	ror	x16,x22,#14
342	add	x25,x25,x19			// h+=K[i]
343	eor	x0,x22,x22,ror#23
344	and	x17,x23,x22
345	bic	x19,x24,x22
346	add	x25,x25,x13			// h+=X[i]
347	orr	x17,x17,x19			// Ch(e,f,g)
348	eor	x19,x26,x27			// a^b, b^c in next round
349	eor	x16,x16,x0,ror#18	// Sigma1(e)
350	ror	x0,x26,#28
351	add	x25,x25,x17			// h+=Ch(e,f,g)
352	eor	x17,x26,x26,ror#5
353	add	x25,x25,x16			// h+=Sigma1(e)
354	and	x28,x28,x19			// (b^c)&=(a^b)
355	add	x21,x21,x25			// d+=h
356	eor	x28,x28,x27			// Maj(a,b,c)
357	eor	x17,x0,x17,ror#34	// Sigma0(a)
358	add	x25,x25,x28			// h+=Maj(a,b,c)
359	ldr	x28,[x30],#8		// *K++, x19 in next round
360	//add	x25,x25,x17			// h+=Sigma0(a)
361#ifndef	__ARMEB__
362	rev	x14,x14			// 11
363#endif
364	ldp	x15,x0,[x1],#2*8
365	add	x25,x25,x17			// h+=Sigma0(a)
366	str	x6,[sp,#24]
367	ror	x16,x21,#14
368	add	x24,x24,x28			// h+=K[i]
369	eor	x6,x21,x21,ror#23
370	and	x17,x22,x21
371	bic	x28,x23,x21
372	add	x24,x24,x14			// h+=X[i]
373	orr	x17,x17,x28			// Ch(e,f,g)
374	eor	x28,x25,x26			// a^b, b^c in next round
375	eor	x16,x16,x6,ror#18	// Sigma1(e)
376	ror	x6,x25,#28
377	add	x24,x24,x17			// h+=Ch(e,f,g)
378	eor	x17,x25,x25,ror#5
379	add	x24,x24,x16			// h+=Sigma1(e)
380	and	x19,x19,x28			// (b^c)&=(a^b)
381	add	x20,x20,x24			// d+=h
382	eor	x19,x19,x26			// Maj(a,b,c)
383	eor	x17,x6,x17,ror#34	// Sigma0(a)
384	add	x24,x24,x19			// h+=Maj(a,b,c)
385	ldr	x19,[x30],#8		// *K++, x28 in next round
386	//add	x24,x24,x17			// h+=Sigma0(a)
387#ifndef	__ARMEB__
388	rev	x15,x15			// 12
389#endif
390	add	x24,x24,x17			// h+=Sigma0(a)
391	str	x7,[sp,#0]
392	ror	x16,x20,#14
393	add	x23,x23,x19			// h+=K[i]
394	eor	x7,x20,x20,ror#23
395	and	x17,x21,x20
396	bic	x19,x22,x20
397	add	x23,x23,x15			// h+=X[i]
398	orr	x17,x17,x19			// Ch(e,f,g)
399	eor	x19,x24,x25			// a^b, b^c in next round
400	eor	x16,x16,x7,ror#18	// Sigma1(e)
401	ror	x7,x24,#28
402	add	x23,x23,x17			// h+=Ch(e,f,g)
403	eor	x17,x24,x24,ror#5
404	add	x23,x23,x16			// h+=Sigma1(e)
405	and	x28,x28,x19			// (b^c)&=(a^b)
406	add	x27,x27,x23			// d+=h
407	eor	x28,x28,x25			// Maj(a,b,c)
408	eor	x17,x7,x17,ror#34	// Sigma0(a)
409	add	x23,x23,x28			// h+=Maj(a,b,c)
410	ldr	x28,[x30],#8		// *K++, x19 in next round
411	//add	x23,x23,x17			// h+=Sigma0(a)
412#ifndef	__ARMEB__
413	rev	x0,x0			// 13
414#endif
415	ldp	x1,x2,[x1]
416	add	x23,x23,x17			// h+=Sigma0(a)
417	str	x8,[sp,#8]
418	ror	x16,x27,#14
419	add	x22,x22,x28			// h+=K[i]
420	eor	x8,x27,x27,ror#23
421	and	x17,x20,x27
422	bic	x28,x21,x27
423	add	x22,x22,x0			// h+=X[i]
424	orr	x17,x17,x28			// Ch(e,f,g)
425	eor	x28,x23,x24			// a^b, b^c in next round
426	eor	x16,x16,x8,ror#18	// Sigma1(e)
427	ror	x8,x23,#28
428	add	x22,x22,x17			// h+=Ch(e,f,g)
429	eor	x17,x23,x23,ror#5
430	add	x22,x22,x16			// h+=Sigma1(e)
431	and	x19,x19,x28			// (b^c)&=(a^b)
432	add	x26,x26,x22			// d+=h
433	eor	x19,x19,x24			// Maj(a,b,c)
434	eor	x17,x8,x17,ror#34	// Sigma0(a)
435	add	x22,x22,x19			// h+=Maj(a,b,c)
436	ldr	x19,[x30],#8		// *K++, x28 in next round
437	//add	x22,x22,x17			// h+=Sigma0(a)
438#ifndef	__ARMEB__
439	rev	x1,x1			// 14
440#endif
441	ldr	x6,[sp,#24]
442	add	x22,x22,x17			// h+=Sigma0(a)
443	str	x9,[sp,#16]
444	ror	x16,x26,#14
445	add	x21,x21,x19			// h+=K[i]
446	eor	x9,x26,x26,ror#23
447	and	x17,x27,x26
448	bic	x19,x20,x26
449	add	x21,x21,x1			// h+=X[i]
450	orr	x17,x17,x19			// Ch(e,f,g)
451	eor	x19,x22,x23			// a^b, b^c in next round
452	eor	x16,x16,x9,ror#18	// Sigma1(e)
453	ror	x9,x22,#28
454	add	x21,x21,x17			// h+=Ch(e,f,g)
455	eor	x17,x22,x22,ror#5
456	add	x21,x21,x16			// h+=Sigma1(e)
457	and	x28,x28,x19			// (b^c)&=(a^b)
458	add	x25,x25,x21			// d+=h
459	eor	x28,x28,x23			// Maj(a,b,c)
460	eor	x17,x9,x17,ror#34	// Sigma0(a)
461	add	x21,x21,x28			// h+=Maj(a,b,c)
462	ldr	x28,[x30],#8		// *K++, x19 in next round
463	//add	x21,x21,x17			// h+=Sigma0(a)
464#ifndef	__ARMEB__
465	rev	x2,x2			// 15
466#endif
467	ldr	x7,[sp,#0]
468	add	x21,x21,x17			// h+=Sigma0(a)
469	str	x10,[sp,#24]
470	ror	x16,x25,#14
471	add	x20,x20,x28			// h+=K[i]
472	ror	x9,x4,#1
473	and	x17,x26,x25
474	ror	x8,x1,#19
475	bic	x28,x27,x25
476	ror	x10,x21,#28
477	add	x20,x20,x2			// h+=X[i]
478	eor	x16,x16,x25,ror#18
479	eor	x9,x9,x4,ror#8
480	orr	x17,x17,x28			// Ch(e,f,g)
481	eor	x28,x21,x22			// a^b, b^c in next round
482	eor	x16,x16,x25,ror#41	// Sigma1(e)
483	eor	x10,x10,x21,ror#34
484	add	x20,x20,x17			// h+=Ch(e,f,g)
485	and	x19,x19,x28			// (b^c)&=(a^b)
486	eor	x8,x8,x1,ror#61
487	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
488	add	x20,x20,x16			// h+=Sigma1(e)
489	eor	x19,x19,x22			// Maj(a,b,c)
490	eor	x17,x10,x21,ror#39	// Sigma0(a)
491	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
492	add	x3,x3,x12
493	add	x24,x24,x20			// d+=h
494	add	x20,x20,x19			// h+=Maj(a,b,c)
495	ldr	x19,[x30],#8		// *K++, x28 in next round
496	add	x3,x3,x9
497	add	x20,x20,x17			// h+=Sigma0(a)
498	add	x3,x3,x8
499.Loop_16_xx:
500	ldr	x8,[sp,#8]
501	str	x11,[sp,#0]
502	ror	x16,x24,#14
503	add	x27,x27,x19			// h+=K[i]
504	ror	x10,x5,#1
505	and	x17,x25,x24
506	ror	x9,x2,#19
507	bic	x19,x26,x24
508	ror	x11,x20,#28
509	add	x27,x27,x3			// h+=X[i]
510	eor	x16,x16,x24,ror#18
511	eor	x10,x10,x5,ror#8
512	orr	x17,x17,x19			// Ch(e,f,g)
513	eor	x19,x20,x21			// a^b, b^c in next round
514	eor	x16,x16,x24,ror#41	// Sigma1(e)
515	eor	x11,x11,x20,ror#34
516	add	x27,x27,x17			// h+=Ch(e,f,g)
517	and	x28,x28,x19			// (b^c)&=(a^b)
518	eor	x9,x9,x2,ror#61
519	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
520	add	x27,x27,x16			// h+=Sigma1(e)
521	eor	x28,x28,x21			// Maj(a,b,c)
522	eor	x17,x11,x20,ror#39	// Sigma0(a)
523	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
524	add	x4,x4,x13
525	add	x23,x23,x27			// d+=h
526	add	x27,x27,x28			// h+=Maj(a,b,c)
527	ldr	x28,[x30],#8		// *K++, x19 in next round
528	add	x4,x4,x10
529	add	x27,x27,x17			// h+=Sigma0(a)
530	add	x4,x4,x9
531	ldr	x9,[sp,#16]
532	str	x12,[sp,#8]
533	ror	x16,x23,#14
534	add	x26,x26,x28			// h+=K[i]
535	ror	x11,x6,#1
536	and	x17,x24,x23
537	ror	x10,x3,#19
538	bic	x28,x25,x23
539	ror	x12,x27,#28
540	add	x26,x26,x4			// h+=X[i]
541	eor	x16,x16,x23,ror#18
542	eor	x11,x11,x6,ror#8
543	orr	x17,x17,x28			// Ch(e,f,g)
544	eor	x28,x27,x20			// a^b, b^c in next round
545	eor	x16,x16,x23,ror#41	// Sigma1(e)
546	eor	x12,x12,x27,ror#34
547	add	x26,x26,x17			// h+=Ch(e,f,g)
548	and	x19,x19,x28			// (b^c)&=(a^b)
549	eor	x10,x10,x3,ror#61
550	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
551	add	x26,x26,x16			// h+=Sigma1(e)
552	eor	x19,x19,x20			// Maj(a,b,c)
553	eor	x17,x12,x27,ror#39	// Sigma0(a)
554	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
555	add	x5,x5,x14
556	add	x22,x22,x26			// d+=h
557	add	x26,x26,x19			// h+=Maj(a,b,c)
558	ldr	x19,[x30],#8		// *K++, x28 in next round
559	add	x5,x5,x11
560	add	x26,x26,x17			// h+=Sigma0(a)
561	add	x5,x5,x10
562	ldr	x10,[sp,#24]
563	str	x13,[sp,#16]
564	ror	x16,x22,#14
565	add	x25,x25,x19			// h+=K[i]
566	ror	x12,x7,#1
567	and	x17,x23,x22
568	ror	x11,x4,#19
569	bic	x19,x24,x22
570	ror	x13,x26,#28
571	add	x25,x25,x5			// h+=X[i]
572	eor	x16,x16,x22,ror#18
573	eor	x12,x12,x7,ror#8
574	orr	x17,x17,x19			// Ch(e,f,g)
575	eor	x19,x26,x27			// a^b, b^c in next round
576	eor	x16,x16,x22,ror#41	// Sigma1(e)
577	eor	x13,x13,x26,ror#34
578	add	x25,x25,x17			// h+=Ch(e,f,g)
579	and	x28,x28,x19			// (b^c)&=(a^b)
580	eor	x11,x11,x4,ror#61
581	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
582	add	x25,x25,x16			// h+=Sigma1(e)
583	eor	x28,x28,x27			// Maj(a,b,c)
584	eor	x17,x13,x26,ror#39	// Sigma0(a)
585	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
586	add	x6,x6,x15
587	add	x21,x21,x25			// d+=h
588	add	x25,x25,x28			// h+=Maj(a,b,c)
589	ldr	x28,[x30],#8		// *K++, x19 in next round
590	add	x6,x6,x12
591	add	x25,x25,x17			// h+=Sigma0(a)
592	add	x6,x6,x11
593	ldr	x11,[sp,#0]
594	str	x14,[sp,#24]
595	ror	x16,x21,#14
596	add	x24,x24,x28			// h+=K[i]
597	ror	x13,x8,#1
598	and	x17,x22,x21
599	ror	x12,x5,#19
600	bic	x28,x23,x21
601	ror	x14,x25,#28
602	add	x24,x24,x6			// h+=X[i]
603	eor	x16,x16,x21,ror#18
604	eor	x13,x13,x8,ror#8
605	orr	x17,x17,x28			// Ch(e,f,g)
606	eor	x28,x25,x26			// a^b, b^c in next round
607	eor	x16,x16,x21,ror#41	// Sigma1(e)
608	eor	x14,x14,x25,ror#34
609	add	x24,x24,x17			// h+=Ch(e,f,g)
610	and	x19,x19,x28			// (b^c)&=(a^b)
611	eor	x12,x12,x5,ror#61
612	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
613	add	x24,x24,x16			// h+=Sigma1(e)
614	eor	x19,x19,x26			// Maj(a,b,c)
615	eor	x17,x14,x25,ror#39	// Sigma0(a)
616	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
617	add	x7,x7,x0
618	add	x20,x20,x24			// d+=h
619	add	x24,x24,x19			// h+=Maj(a,b,c)
620	ldr	x19,[x30],#8		// *K++, x28 in next round
621	add	x7,x7,x13
622	add	x24,x24,x17			// h+=Sigma0(a)
623	add	x7,x7,x12
624	ldr	x12,[sp,#8]
625	str	x15,[sp,#0]
626	ror	x16,x20,#14
627	add	x23,x23,x19			// h+=K[i]
628	ror	x14,x9,#1
629	and	x17,x21,x20
630	ror	x13,x6,#19
631	bic	x19,x22,x20
632	ror	x15,x24,#28
633	add	x23,x23,x7			// h+=X[i]
634	eor	x16,x16,x20,ror#18
635	eor	x14,x14,x9,ror#8
636	orr	x17,x17,x19			// Ch(e,f,g)
637	eor	x19,x24,x25			// a^b, b^c in next round
638	eor	x16,x16,x20,ror#41	// Sigma1(e)
639	eor	x15,x15,x24,ror#34
640	add	x23,x23,x17			// h+=Ch(e,f,g)
641	and	x28,x28,x19			// (b^c)&=(a^b)
642	eor	x13,x13,x6,ror#61
643	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
644	add	x23,x23,x16			// h+=Sigma1(e)
645	eor	x28,x28,x25			// Maj(a,b,c)
646	eor	x17,x15,x24,ror#39	// Sigma0(a)
647	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
648	add	x8,x8,x1
649	add	x27,x27,x23			// d+=h
650	add	x23,x23,x28			// h+=Maj(a,b,c)
651	ldr	x28,[x30],#8		// *K++, x19 in next round
652	add	x8,x8,x14
653	add	x23,x23,x17			// h+=Sigma0(a)
654	add	x8,x8,x13
655	ldr	x13,[sp,#16]
656	str	x0,[sp,#8]
657	ror	x16,x27,#14
658	add	x22,x22,x28			// h+=K[i]
659	ror	x15,x10,#1
660	and	x17,x20,x27
661	ror	x14,x7,#19
662	bic	x28,x21,x27
663	ror	x0,x23,#28
664	add	x22,x22,x8			// h+=X[i]
665	eor	x16,x16,x27,ror#18
666	eor	x15,x15,x10,ror#8
667	orr	x17,x17,x28			// Ch(e,f,g)
668	eor	x28,x23,x24			// a^b, b^c in next round
669	eor	x16,x16,x27,ror#41	// Sigma1(e)
670	eor	x0,x0,x23,ror#34
671	add	x22,x22,x17			// h+=Ch(e,f,g)
672	and	x19,x19,x28			// (b^c)&=(a^b)
673	eor	x14,x14,x7,ror#61
674	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
675	add	x22,x22,x16			// h+=Sigma1(e)
676	eor	x19,x19,x24			// Maj(a,b,c)
677	eor	x17,x0,x23,ror#39	// Sigma0(a)
678	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
679	add	x9,x9,x2
680	add	x26,x26,x22			// d+=h
681	add	x22,x22,x19			// h+=Maj(a,b,c)
682	ldr	x19,[x30],#8		// *K++, x28 in next round
683	add	x9,x9,x15
684	add	x22,x22,x17			// h+=Sigma0(a)
685	add	x9,x9,x14
686	ldr	x14,[sp,#24]
687	str	x1,[sp,#16]
688	ror	x16,x26,#14
689	add	x21,x21,x19			// h+=K[i]
690	ror	x0,x11,#1
691	and	x17,x27,x26
692	ror	x15,x8,#19
693	bic	x19,x20,x26
694	ror	x1,x22,#28
695	add	x21,x21,x9			// h+=X[i]
696	eor	x16,x16,x26,ror#18
697	eor	x0,x0,x11,ror#8
698	orr	x17,x17,x19			// Ch(e,f,g)
699	eor	x19,x22,x23			// a^b, b^c in next round
700	eor	x16,x16,x26,ror#41	// Sigma1(e)
701	eor	x1,x1,x22,ror#34
702	add	x21,x21,x17			// h+=Ch(e,f,g)
703	and	x28,x28,x19			// (b^c)&=(a^b)
704	eor	x15,x15,x8,ror#61
705	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
706	add	x21,x21,x16			// h+=Sigma1(e)
707	eor	x28,x28,x23			// Maj(a,b,c)
708	eor	x17,x1,x22,ror#39	// Sigma0(a)
709	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
710	add	x10,x10,x3
711	add	x25,x25,x21			// d+=h
712	add	x21,x21,x28			// h+=Maj(a,b,c)
713	ldr	x28,[x30],#8		// *K++, x19 in next round
714	add	x10,x10,x0
715	add	x21,x21,x17			// h+=Sigma0(a)
716	add	x10,x10,x15
717	ldr	x15,[sp,#0]
718	str	x2,[sp,#24]
719	ror	x16,x25,#14
720	add	x20,x20,x28			// h+=K[i]
721	ror	x1,x12,#1
722	and	x17,x26,x25
723	ror	x0,x9,#19
724	bic	x28,x27,x25
725	ror	x2,x21,#28
726	add	x20,x20,x10			// h+=X[i]
727	eor	x16,x16,x25,ror#18
728	eor	x1,x1,x12,ror#8
729	orr	x17,x17,x28			// Ch(e,f,g)
730	eor	x28,x21,x22			// a^b, b^c in next round
731	eor	x16,x16,x25,ror#41	// Sigma1(e)
732	eor	x2,x2,x21,ror#34
733	add	x20,x20,x17			// h+=Ch(e,f,g)
734	and	x19,x19,x28			// (b^c)&=(a^b)
735	eor	x0,x0,x9,ror#61
736	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
737	add	x20,x20,x16			// h+=Sigma1(e)
738	eor	x19,x19,x22			// Maj(a,b,c)
739	eor	x17,x2,x21,ror#39	// Sigma0(a)
740	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
741	add	x11,x11,x4
742	add	x24,x24,x20			// d+=h
743	add	x20,x20,x19			// h+=Maj(a,b,c)
744	ldr	x19,[x30],#8		// *K++, x28 in next round
745	add	x11,x11,x1
746	add	x20,x20,x17			// h+=Sigma0(a)
747	add	x11,x11,x0
748	ldr	x0,[sp,#8]
749	str	x3,[sp,#0]
750	ror	x16,x24,#14
751	add	x27,x27,x19			// h+=K[i]
752	ror	x2,x13,#1
753	and	x17,x25,x24
754	ror	x1,x10,#19
755	bic	x19,x26,x24
756	ror	x3,x20,#28
757	add	x27,x27,x11			// h+=X[i]
758	eor	x16,x16,x24,ror#18
759	eor	x2,x2,x13,ror#8
760	orr	x17,x17,x19			// Ch(e,f,g)
761	eor	x19,x20,x21			// a^b, b^c in next round
762	eor	x16,x16,x24,ror#41	// Sigma1(e)
763	eor	x3,x3,x20,ror#34
764	add	x27,x27,x17			// h+=Ch(e,f,g)
765	and	x28,x28,x19			// (b^c)&=(a^b)
766	eor	x1,x1,x10,ror#61
767	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
768	add	x27,x27,x16			// h+=Sigma1(e)
769	eor	x28,x28,x21			// Maj(a,b,c)
770	eor	x17,x3,x20,ror#39	// Sigma0(a)
771	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
772	add	x12,x12,x5
773	add	x23,x23,x27			// d+=h
774	add	x27,x27,x28			// h+=Maj(a,b,c)
775	ldr	x28,[x30],#8		// *K++, x19 in next round
776	add	x12,x12,x2
777	add	x27,x27,x17			// h+=Sigma0(a)
778	add	x12,x12,x1
779	ldr	x1,[sp,#16]
780	str	x4,[sp,#8]
781	ror	x16,x23,#14
782	add	x26,x26,x28			// h+=K[i]
783	ror	x3,x14,#1
784	and	x17,x24,x23
785	ror	x2,x11,#19
786	bic	x28,x25,x23
787	ror	x4,x27,#28
788	add	x26,x26,x12			// h+=X[i]
789	eor	x16,x16,x23,ror#18
790	eor	x3,x3,x14,ror#8
791	orr	x17,x17,x28			// Ch(e,f,g)
792	eor	x28,x27,x20			// a^b, b^c in next round
793	eor	x16,x16,x23,ror#41	// Sigma1(e)
794	eor	x4,x4,x27,ror#34
795	add	x26,x26,x17			// h+=Ch(e,f,g)
796	and	x19,x19,x28			// (b^c)&=(a^b)
797	eor	x2,x2,x11,ror#61
798	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
799	add	x26,x26,x16			// h+=Sigma1(e)
800	eor	x19,x19,x20			// Maj(a,b,c)
801	eor	x17,x4,x27,ror#39	// Sigma0(a)
802	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
803	add	x13,x13,x6
804	add	x22,x22,x26			// d+=h
805	add	x26,x26,x19			// h+=Maj(a,b,c)
806	ldr	x19,[x30],#8		// *K++, x28 in next round
807	add	x13,x13,x3
808	add	x26,x26,x17			// h+=Sigma0(a)
809	add	x13,x13,x2
810	ldr	x2,[sp,#24]
811	str	x5,[sp,#16]
812	ror	x16,x22,#14
813	add	x25,x25,x19			// h+=K[i]
814	ror	x4,x15,#1
815	and	x17,x23,x22
816	ror	x3,x12,#19
817	bic	x19,x24,x22
818	ror	x5,x26,#28
819	add	x25,x25,x13			// h+=X[i]
820	eor	x16,x16,x22,ror#18
821	eor	x4,x4,x15,ror#8
822	orr	x17,x17,x19			// Ch(e,f,g)
823	eor	x19,x26,x27			// a^b, b^c in next round
824	eor	x16,x16,x22,ror#41	// Sigma1(e)
825	eor	x5,x5,x26,ror#34
826	add	x25,x25,x17			// h+=Ch(e,f,g)
827	and	x28,x28,x19			// (b^c)&=(a^b)
828	eor	x3,x3,x12,ror#61
829	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
830	add	x25,x25,x16			// h+=Sigma1(e)
831	eor	x28,x28,x27			// Maj(a,b,c)
832	eor	x17,x5,x26,ror#39	// Sigma0(a)
833	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
834	add	x14,x14,x7
835	add	x21,x21,x25			// d+=h
836	add	x25,x25,x28			// h+=Maj(a,b,c)
837	ldr	x28,[x30],#8		// *K++, x19 in next round
838	add	x14,x14,x4
839	add	x25,x25,x17			// h+=Sigma0(a)
840	add	x14,x14,x3
841	ldr	x3,[sp,#0]
842	str	x6,[sp,#24]
843	ror	x16,x21,#14
844	add	x24,x24,x28			// h+=K[i]
845	ror	x5,x0,#1
846	and	x17,x22,x21
847	ror	x4,x13,#19
848	bic	x28,x23,x21
849	ror	x6,x25,#28
850	add	x24,x24,x14			// h+=X[i]
851	eor	x16,x16,x21,ror#18
852	eor	x5,x5,x0,ror#8
853	orr	x17,x17,x28			// Ch(e,f,g)
854	eor	x28,x25,x26			// a^b, b^c in next round
855	eor	x16,x16,x21,ror#41	// Sigma1(e)
856	eor	x6,x6,x25,ror#34
857	add	x24,x24,x17			// h+=Ch(e,f,g)
858	and	x19,x19,x28			// (b^c)&=(a^b)
859	eor	x4,x4,x13,ror#61
860	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
861	add	x24,x24,x16			// h+=Sigma1(e)
862	eor	x19,x19,x26			// Maj(a,b,c)
863	eor	x17,x6,x25,ror#39	// Sigma0(a)
864	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
865	add	x15,x15,x8
866	add	x20,x20,x24			// d+=h
867	add	x24,x24,x19			// h+=Maj(a,b,c)
868	ldr	x19,[x30],#8		// *K++, x28 in next round
869	add	x15,x15,x5
870	add	x24,x24,x17			// h+=Sigma0(a)
871	add	x15,x15,x4
872	ldr	x4,[sp,#8]
873	str	x7,[sp,#0]
874	ror	x16,x20,#14
875	add	x23,x23,x19			// h+=K[i]
876	ror	x6,x1,#1
877	and	x17,x21,x20
878	ror	x5,x14,#19
879	bic	x19,x22,x20
880	ror	x7,x24,#28
881	add	x23,x23,x15			// h+=X[i]
882	eor	x16,x16,x20,ror#18
883	eor	x6,x6,x1,ror#8
884	orr	x17,x17,x19			// Ch(e,f,g)
885	eor	x19,x24,x25			// a^b, b^c in next round
886	eor	x16,x16,x20,ror#41	// Sigma1(e)
887	eor	x7,x7,x24,ror#34
888	add	x23,x23,x17			// h+=Ch(e,f,g)
889	and	x28,x28,x19			// (b^c)&=(a^b)
890	eor	x5,x5,x14,ror#61
891	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
892	add	x23,x23,x16			// h+=Sigma1(e)
893	eor	x28,x28,x25			// Maj(a,b,c)
894	eor	x17,x7,x24,ror#39	// Sigma0(a)
895	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
896	add	x0,x0,x9
897	add	x27,x27,x23			// d+=h
898	add	x23,x23,x28			// h+=Maj(a,b,c)
899	ldr	x28,[x30],#8		// *K++, x19 in next round
900	add	x0,x0,x6
901	add	x23,x23,x17			// h+=Sigma0(a)
902	add	x0,x0,x5
903	ldr	x5,[sp,#16]
904	str	x8,[sp,#8]
905	ror	x16,x27,#14
906	add	x22,x22,x28			// h+=K[i]
907	ror	x7,x2,#1
908	and	x17,x20,x27
909	ror	x6,x15,#19
910	bic	x28,x21,x27
911	ror	x8,x23,#28
912	add	x22,x22,x0			// h+=X[i]
913	eor	x16,x16,x27,ror#18
914	eor	x7,x7,x2,ror#8
915	orr	x17,x17,x28			// Ch(e,f,g)
916	eor	x28,x23,x24			// a^b, b^c in next round
917	eor	x16,x16,x27,ror#41	// Sigma1(e)
918	eor	x8,x8,x23,ror#34
919	add	x22,x22,x17			// h+=Ch(e,f,g)
920	and	x19,x19,x28			// (b^c)&=(a^b)
921	eor	x6,x6,x15,ror#61
922	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
923	add	x22,x22,x16			// h+=Sigma1(e)
924	eor	x19,x19,x24			// Maj(a,b,c)
925	eor	x17,x8,x23,ror#39	// Sigma0(a)
926	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
927	add	x1,x1,x10
928	add	x26,x26,x22			// d+=h
929	add	x22,x22,x19			// h+=Maj(a,b,c)
930	ldr	x19,[x30],#8		// *K++, x28 in next round
931	add	x1,x1,x7
932	add	x22,x22,x17			// h+=Sigma0(a)
933	add	x1,x1,x6
934	ldr	x6,[sp,#24]
935	str	x9,[sp,#16]
936	ror	x16,x26,#14
937	add	x21,x21,x19			// h+=K[i]
938	ror	x8,x3,#1
939	and	x17,x27,x26
940	ror	x7,x0,#19
941	bic	x19,x20,x26
942	ror	x9,x22,#28
943	add	x21,x21,x1			// h+=X[i]
944	eor	x16,x16,x26,ror#18
945	eor	x8,x8,x3,ror#8
946	orr	x17,x17,x19			// Ch(e,f,g)
947	eor	x19,x22,x23			// a^b, b^c in next round
948	eor	x16,x16,x26,ror#41	// Sigma1(e)
949	eor	x9,x9,x22,ror#34
950	add	x21,x21,x17			// h+=Ch(e,f,g)
951	and	x28,x28,x19			// (b^c)&=(a^b)
952	eor	x7,x7,x0,ror#61
953	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
954	add	x21,x21,x16			// h+=Sigma1(e)
955	eor	x28,x28,x23			// Maj(a,b,c)
956	eor	x17,x9,x22,ror#39	// Sigma0(a)
957	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
958	add	x2,x2,x11
959	add	x25,x25,x21			// d+=h
960	add	x21,x21,x28			// h+=Maj(a,b,c)
961	ldr	x28,[x30],#8		// *K++, x19 in next round
962	add	x2,x2,x8
963	add	x21,x21,x17			// h+=Sigma0(a)
964	add	x2,x2,x7
965	ldr	x7,[sp,#0]
966	str	x10,[sp,#24]
967	ror	x16,x25,#14
968	add	x20,x20,x28			// h+=K[i]
969	ror	x9,x4,#1
970	and	x17,x26,x25
971	ror	x8,x1,#19
972	bic	x28,x27,x25
973	ror	x10,x21,#28
974	add	x20,x20,x2			// h+=X[i]
975	eor	x16,x16,x25,ror#18
976	eor	x9,x9,x4,ror#8
977	orr	x17,x17,x28			// Ch(e,f,g)
978	eor	x28,x21,x22			// a^b, b^c in next round
979	eor	x16,x16,x25,ror#41	// Sigma1(e)
980	eor	x10,x10,x21,ror#34
981	add	x20,x20,x17			// h+=Ch(e,f,g)
982	and	x19,x19,x28			// (b^c)&=(a^b)
983	eor	x8,x8,x1,ror#61
984	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
985	add	x20,x20,x16			// h+=Sigma1(e)
986	eor	x19,x19,x22			// Maj(a,b,c)
987	eor	x17,x10,x21,ror#39	// Sigma0(a)
988	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
989	add	x3,x3,x12
990	add	x24,x24,x20			// d+=h
991	add	x20,x20,x19			// h+=Maj(a,b,c)
992	ldr	x19,[x30],#8		// *K++, x28 in next round
993	add	x3,x3,x9
994	add	x20,x20,x17			// h+=Sigma0(a)
995	add	x3,x3,x8
996	cbnz	x19,.Loop_16_xx
997
998	ldp	x0,x2,[x29,#96]
999	ldr	x1,[x29,#112]
1000	sub	x30,x30,#648		// rewind
1001
1002	ldp	x3,x4,[x0]
1003	ldp	x5,x6,[x0,#2*8]
1004	add	x1,x1,#14*8			// advance input pointer
1005	ldp	x7,x8,[x0,#4*8]
1006	add	x20,x20,x3
1007	ldp	x9,x10,[x0,#6*8]
1008	add	x21,x21,x4
1009	add	x22,x22,x5
1010	add	x23,x23,x6
1011	stp	x20,x21,[x0]
1012	add	x24,x24,x7
1013	add	x25,x25,x8
1014	stp	x22,x23,[x0,#2*8]
1015	add	x26,x26,x9
1016	add	x27,x27,x10
1017	cmp	x1,x2
1018	stp	x24,x25,[x0,#4*8]
1019	stp	x26,x27,[x0,#6*8]
1020	b.ne	.Loop
1021
1022	ldp	x19,x20,[x29,#16]
1023	add	sp,sp,#4*8
1024	ldp	x21,x22,[x29,#32]
1025	ldp	x23,x24,[x29,#48]
1026	ldp	x25,x26,[x29,#64]
1027	ldp	x27,x28,[x29,#80]
1028	ldp	x29,x30,[sp],#128
1029	AARCH64_VALIDATE_LINK_REGISTER
1030	ret
1031.size	sha512_block_data_order,.-sha512_block_data_order
1032
1033.section	.rodata
1034.align	6
1035.type	.LK512,%object
1036.LK512:
1037.quad	0x428a2f98d728ae22,0x7137449123ef65cd
1038.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1039.quad	0x3956c25bf348b538,0x59f111f1b605d019
1040.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
1041.quad	0xd807aa98a3030242,0x12835b0145706fbe
1042.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1043.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
1044.quad	0x9bdc06a725c71235,0xc19bf174cf692694
1045.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
1046.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1047.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
1048.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1049.quad	0x983e5152ee66dfab,0xa831c66d2db43210
1050.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
1051.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
1052.quad	0x06ca6351e003826f,0x142929670a0e6e70
1053.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
1054.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1055.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
1056.quad	0x81c2c92e47edaee6,0x92722c851482353b
1057.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
1058.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
1059.quad	0xd192e819d6ef5218,0xd69906245565a910
1060.quad	0xf40e35855771202a,0x106aa07032bbd1b8
1061.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
1062.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1063.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1064.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1065.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
1066.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
1067.quad	0x90befffa23631e28,0xa4506cebde82bde9
1068.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
1069.quad	0xca273eceea26619c,0xd186b8c721c0c207
1070.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1071.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
1072.quad	0x113f9804bef90dae,0x1b710b35131c471b
1073.quad	0x28db77f523047d84,0x32caab7b40c72493
1074.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1075.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1076.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
1077.quad	0	// terminator
1078.size	.LK512,.-.LK512
1079.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1080.align	2
1081.align	2
1082#endif
1083#endif  // !OPENSSL_NO_ASM
1084.section	.note.GNU-stack,"",%progbits
1085