• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__aarch64__)
13#include "ring_core_generated/prefix_symbols_asm.h"
14// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
15//
16// Licensed under the OpenSSL license (the "License").  You may not use
17// this file except in compliance with the License.  You can obtain a copy
18// in the file LICENSE in the source distribution or at
19// https://www.openssl.org/source/license.html
20
21// ====================================================================
22// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
23// project. The module is, however, dual licensed under OpenSSL and
24// CRYPTOGAMS licenses depending on where you obtain it. For further
25// details see http://www.openssl.org/~appro/cryptogams/.
26//
27// Permission to use under GPLv2 terms is granted.
28// ====================================================================
29//
30// SHA256/512 for ARMv8.
31//
32// Performance in cycles per processed byte and improvement coefficient
33// over code generated with "default" compiler:
34//
35//		SHA256-hw	SHA256(*)	SHA512
36// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
37// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
38// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
39// Denver	2.01		10.5 (+26%)	6.70 (+8%)
40// X-Gene			20.0 (+100%)	12.8 (+300%(***))
41// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
42//
43// (*)	Software SHA256 results are of lesser relevance, presented
44//	mostly for informational purposes.
45// (**)	The result is a trade-off: it's possible to improve it by
46//	10% (or by 1 cycle per round), but at the cost of 20% loss
47//	on Cortex-A53 (or by 4 cycles per round).
48// (***)	Super-impressive coefficients over gcc-generated code are
49//	indication of some compiler "pathology", most notably code
50//	generated with -mgeneral-regs-only is significanty faster
51//	and the gap is only 40-90%.
52
53#ifndef	__KERNEL__
54# include <ring-core/arm_arch.h>
55#endif
56
57.text
58
59
60.hidden	OPENSSL_armcap_P
61.globl	sha512_block_data_order
62.hidden	sha512_block_data_order
63.type	sha512_block_data_order,%function
64.align	6
65sha512_block_data_order:
66	AARCH64_SIGN_LINK_REGISTER
67	stp	x29,x30,[sp,#-128]!
68	add	x29,sp,#0
69
70	stp	x19,x20,[sp,#16]
71	stp	x21,x22,[sp,#32]
72	stp	x23,x24,[sp,#48]
73	stp	x25,x26,[sp,#64]
74	stp	x27,x28,[sp,#80]
75	sub	sp,sp,#4*8
76
77	ldp	x20,x21,[x0]				// load context
78	ldp	x22,x23,[x0,#2*8]
79	ldp	x24,x25,[x0,#4*8]
80	add	x2,x1,x2,lsl#7	// end of input
81	ldp	x26,x27,[x0,#6*8]
82	adrp	x30,.LK512
83	add	x30,x30,:lo12:.LK512
84	stp	x0,x2,[x29,#96]
85
86.Loop:
87	ldp	x3,x4,[x1],#2*8
88	ldr	x19,[x30],#8			// *K++
89	eor	x28,x21,x22				// magic seed
90	str	x1,[x29,#112]
91#ifndef	__ARMEB__
92	rev	x3,x3			// 0
93#endif
94	ror	x16,x24,#14
95	add	x27,x27,x19			// h+=K[i]
96	eor	x6,x24,x24,ror#23
97	and	x17,x25,x24
98	bic	x19,x26,x24
99	add	x27,x27,x3			// h+=X[i]
100	orr	x17,x17,x19			// Ch(e,f,g)
101	eor	x19,x20,x21			// a^b, b^c in next round
102	eor	x16,x16,x6,ror#18	// Sigma1(e)
103	ror	x6,x20,#28
104	add	x27,x27,x17			// h+=Ch(e,f,g)
105	eor	x17,x20,x20,ror#5
106	add	x27,x27,x16			// h+=Sigma1(e)
107	and	x28,x28,x19			// (b^c)&=(a^b)
108	add	x23,x23,x27			// d+=h
109	eor	x28,x28,x21			// Maj(a,b,c)
110	eor	x17,x6,x17,ror#34	// Sigma0(a)
111	add	x27,x27,x28			// h+=Maj(a,b,c)
112	ldr	x28,[x30],#8		// *K++, x19 in next round
113	//add	x27,x27,x17			// h+=Sigma0(a)
114#ifndef	__ARMEB__
115	rev	x4,x4			// 1
116#endif
117	ldp	x5,x6,[x1],#2*8
118	add	x27,x27,x17			// h+=Sigma0(a)
119	ror	x16,x23,#14
120	add	x26,x26,x28			// h+=K[i]
121	eor	x7,x23,x23,ror#23
122	and	x17,x24,x23
123	bic	x28,x25,x23
124	add	x26,x26,x4			// h+=X[i]
125	orr	x17,x17,x28			// Ch(e,f,g)
126	eor	x28,x27,x20			// a^b, b^c in next round
127	eor	x16,x16,x7,ror#18	// Sigma1(e)
128	ror	x7,x27,#28
129	add	x26,x26,x17			// h+=Ch(e,f,g)
130	eor	x17,x27,x27,ror#5
131	add	x26,x26,x16			// h+=Sigma1(e)
132	and	x19,x19,x28			// (b^c)&=(a^b)
133	add	x22,x22,x26			// d+=h
134	eor	x19,x19,x20			// Maj(a,b,c)
135	eor	x17,x7,x17,ror#34	// Sigma0(a)
136	add	x26,x26,x19			// h+=Maj(a,b,c)
137	ldr	x19,[x30],#8		// *K++, x28 in next round
138	//add	x26,x26,x17			// h+=Sigma0(a)
139#ifndef	__ARMEB__
140	rev	x5,x5			// 2
141#endif
142	add	x26,x26,x17			// h+=Sigma0(a)
143	ror	x16,x22,#14
144	add	x25,x25,x19			// h+=K[i]
145	eor	x8,x22,x22,ror#23
146	and	x17,x23,x22
147	bic	x19,x24,x22
148	add	x25,x25,x5			// h+=X[i]
149	orr	x17,x17,x19			// Ch(e,f,g)
150	eor	x19,x26,x27			// a^b, b^c in next round
151	eor	x16,x16,x8,ror#18	// Sigma1(e)
152	ror	x8,x26,#28
153	add	x25,x25,x17			// h+=Ch(e,f,g)
154	eor	x17,x26,x26,ror#5
155	add	x25,x25,x16			// h+=Sigma1(e)
156	and	x28,x28,x19			// (b^c)&=(a^b)
157	add	x21,x21,x25			// d+=h
158	eor	x28,x28,x27			// Maj(a,b,c)
159	eor	x17,x8,x17,ror#34	// Sigma0(a)
160	add	x25,x25,x28			// h+=Maj(a,b,c)
161	ldr	x28,[x30],#8		// *K++, x19 in next round
162	//add	x25,x25,x17			// h+=Sigma0(a)
163#ifndef	__ARMEB__
164	rev	x6,x6			// 3
165#endif
166	ldp	x7,x8,[x1],#2*8
167	add	x25,x25,x17			// h+=Sigma0(a)
168	ror	x16,x21,#14
169	add	x24,x24,x28			// h+=K[i]
170	eor	x9,x21,x21,ror#23
171	and	x17,x22,x21
172	bic	x28,x23,x21
173	add	x24,x24,x6			// h+=X[i]
174	orr	x17,x17,x28			// Ch(e,f,g)
175	eor	x28,x25,x26			// a^b, b^c in next round
176	eor	x16,x16,x9,ror#18	// Sigma1(e)
177	ror	x9,x25,#28
178	add	x24,x24,x17			// h+=Ch(e,f,g)
179	eor	x17,x25,x25,ror#5
180	add	x24,x24,x16			// h+=Sigma1(e)
181	and	x19,x19,x28			// (b^c)&=(a^b)
182	add	x20,x20,x24			// d+=h
183	eor	x19,x19,x26			// Maj(a,b,c)
184	eor	x17,x9,x17,ror#34	// Sigma0(a)
185	add	x24,x24,x19			// h+=Maj(a,b,c)
186	ldr	x19,[x30],#8		// *K++, x28 in next round
187	//add	x24,x24,x17			// h+=Sigma0(a)
188#ifndef	__ARMEB__
189	rev	x7,x7			// 4
190#endif
191	add	x24,x24,x17			// h+=Sigma0(a)
192	ror	x16,x20,#14
193	add	x23,x23,x19			// h+=K[i]
194	eor	x10,x20,x20,ror#23
195	and	x17,x21,x20
196	bic	x19,x22,x20
197	add	x23,x23,x7			// h+=X[i]
198	orr	x17,x17,x19			// Ch(e,f,g)
199	eor	x19,x24,x25			// a^b, b^c in next round
200	eor	x16,x16,x10,ror#18	// Sigma1(e)
201	ror	x10,x24,#28
202	add	x23,x23,x17			// h+=Ch(e,f,g)
203	eor	x17,x24,x24,ror#5
204	add	x23,x23,x16			// h+=Sigma1(e)
205	and	x28,x28,x19			// (b^c)&=(a^b)
206	add	x27,x27,x23			// d+=h
207	eor	x28,x28,x25			// Maj(a,b,c)
208	eor	x17,x10,x17,ror#34	// Sigma0(a)
209	add	x23,x23,x28			// h+=Maj(a,b,c)
210	ldr	x28,[x30],#8		// *K++, x19 in next round
211	//add	x23,x23,x17			// h+=Sigma0(a)
212#ifndef	__ARMEB__
213	rev	x8,x8			// 5
214#endif
215	ldp	x9,x10,[x1],#2*8
216	add	x23,x23,x17			// h+=Sigma0(a)
217	ror	x16,x27,#14
218	add	x22,x22,x28			// h+=K[i]
219	eor	x11,x27,x27,ror#23
220	and	x17,x20,x27
221	bic	x28,x21,x27
222	add	x22,x22,x8			// h+=X[i]
223	orr	x17,x17,x28			// Ch(e,f,g)
224	eor	x28,x23,x24			// a^b, b^c in next round
225	eor	x16,x16,x11,ror#18	// Sigma1(e)
226	ror	x11,x23,#28
227	add	x22,x22,x17			// h+=Ch(e,f,g)
228	eor	x17,x23,x23,ror#5
229	add	x22,x22,x16			// h+=Sigma1(e)
230	and	x19,x19,x28			// (b^c)&=(a^b)
231	add	x26,x26,x22			// d+=h
232	eor	x19,x19,x24			// Maj(a,b,c)
233	eor	x17,x11,x17,ror#34	// Sigma0(a)
234	add	x22,x22,x19			// h+=Maj(a,b,c)
235	ldr	x19,[x30],#8		// *K++, x28 in next round
236	//add	x22,x22,x17			// h+=Sigma0(a)
237#ifndef	__ARMEB__
238	rev	x9,x9			// 6
239#endif
240	add	x22,x22,x17			// h+=Sigma0(a)
241	ror	x16,x26,#14
242	add	x21,x21,x19			// h+=K[i]
243	eor	x12,x26,x26,ror#23
244	and	x17,x27,x26
245	bic	x19,x20,x26
246	add	x21,x21,x9			// h+=X[i]
247	orr	x17,x17,x19			// Ch(e,f,g)
248	eor	x19,x22,x23			// a^b, b^c in next round
249	eor	x16,x16,x12,ror#18	// Sigma1(e)
250	ror	x12,x22,#28
251	add	x21,x21,x17			// h+=Ch(e,f,g)
252	eor	x17,x22,x22,ror#5
253	add	x21,x21,x16			// h+=Sigma1(e)
254	and	x28,x28,x19			// (b^c)&=(a^b)
255	add	x25,x25,x21			// d+=h
256	eor	x28,x28,x23			// Maj(a,b,c)
257	eor	x17,x12,x17,ror#34	// Sigma0(a)
258	add	x21,x21,x28			// h+=Maj(a,b,c)
259	ldr	x28,[x30],#8		// *K++, x19 in next round
260	//add	x21,x21,x17			// h+=Sigma0(a)
261#ifndef	__ARMEB__
262	rev	x10,x10			// 7
263#endif
264	ldp	x11,x12,[x1],#2*8
265	add	x21,x21,x17			// h+=Sigma0(a)
266	ror	x16,x25,#14
267	add	x20,x20,x28			// h+=K[i]
268	eor	x13,x25,x25,ror#23
269	and	x17,x26,x25
270	bic	x28,x27,x25
271	add	x20,x20,x10			// h+=X[i]
272	orr	x17,x17,x28			// Ch(e,f,g)
273	eor	x28,x21,x22			// a^b, b^c in next round
274	eor	x16,x16,x13,ror#18	// Sigma1(e)
275	ror	x13,x21,#28
276	add	x20,x20,x17			// h+=Ch(e,f,g)
277	eor	x17,x21,x21,ror#5
278	add	x20,x20,x16			// h+=Sigma1(e)
279	and	x19,x19,x28			// (b^c)&=(a^b)
280	add	x24,x24,x20			// d+=h
281	eor	x19,x19,x22			// Maj(a,b,c)
282	eor	x17,x13,x17,ror#34	// Sigma0(a)
283	add	x20,x20,x19			// h+=Maj(a,b,c)
284	ldr	x19,[x30],#8		// *K++, x28 in next round
285	//add	x20,x20,x17			// h+=Sigma0(a)
286#ifndef	__ARMEB__
287	rev	x11,x11			// 8
288#endif
289	add	x20,x20,x17			// h+=Sigma0(a)
290	ror	x16,x24,#14
291	add	x27,x27,x19			// h+=K[i]
292	eor	x14,x24,x24,ror#23
293	and	x17,x25,x24
294	bic	x19,x26,x24
295	add	x27,x27,x11			// h+=X[i]
296	orr	x17,x17,x19			// Ch(e,f,g)
297	eor	x19,x20,x21			// a^b, b^c in next round
298	eor	x16,x16,x14,ror#18	// Sigma1(e)
299	ror	x14,x20,#28
300	add	x27,x27,x17			// h+=Ch(e,f,g)
301	eor	x17,x20,x20,ror#5
302	add	x27,x27,x16			// h+=Sigma1(e)
303	and	x28,x28,x19			// (b^c)&=(a^b)
304	add	x23,x23,x27			// d+=h
305	eor	x28,x28,x21			// Maj(a,b,c)
306	eor	x17,x14,x17,ror#34	// Sigma0(a)
307	add	x27,x27,x28			// h+=Maj(a,b,c)
308	ldr	x28,[x30],#8		// *K++, x19 in next round
309	//add	x27,x27,x17			// h+=Sigma0(a)
310#ifndef	__ARMEB__
311	rev	x12,x12			// 9
312#endif
313	ldp	x13,x14,[x1],#2*8
314	add	x27,x27,x17			// h+=Sigma0(a)
315	ror	x16,x23,#14
316	add	x26,x26,x28			// h+=K[i]
317	eor	x15,x23,x23,ror#23
318	and	x17,x24,x23
319	bic	x28,x25,x23
320	add	x26,x26,x12			// h+=X[i]
321	orr	x17,x17,x28			// Ch(e,f,g)
322	eor	x28,x27,x20			// a^b, b^c in next round
323	eor	x16,x16,x15,ror#18	// Sigma1(e)
324	ror	x15,x27,#28
325	add	x26,x26,x17			// h+=Ch(e,f,g)
326	eor	x17,x27,x27,ror#5
327	add	x26,x26,x16			// h+=Sigma1(e)
328	and	x19,x19,x28			// (b^c)&=(a^b)
329	add	x22,x22,x26			// d+=h
330	eor	x19,x19,x20			// Maj(a,b,c)
331	eor	x17,x15,x17,ror#34	// Sigma0(a)
332	add	x26,x26,x19			// h+=Maj(a,b,c)
333	ldr	x19,[x30],#8		// *K++, x28 in next round
334	//add	x26,x26,x17			// h+=Sigma0(a)
335#ifndef	__ARMEB__
336	rev	x13,x13			// 10
337#endif
338	add	x26,x26,x17			// h+=Sigma0(a)
339	ror	x16,x22,#14
340	add	x25,x25,x19			// h+=K[i]
341	eor	x0,x22,x22,ror#23
342	and	x17,x23,x22
343	bic	x19,x24,x22
344	add	x25,x25,x13			// h+=X[i]
345	orr	x17,x17,x19			// Ch(e,f,g)
346	eor	x19,x26,x27			// a^b, b^c in next round
347	eor	x16,x16,x0,ror#18	// Sigma1(e)
348	ror	x0,x26,#28
349	add	x25,x25,x17			// h+=Ch(e,f,g)
350	eor	x17,x26,x26,ror#5
351	add	x25,x25,x16			// h+=Sigma1(e)
352	and	x28,x28,x19			// (b^c)&=(a^b)
353	add	x21,x21,x25			// d+=h
354	eor	x28,x28,x27			// Maj(a,b,c)
355	eor	x17,x0,x17,ror#34	// Sigma0(a)
356	add	x25,x25,x28			// h+=Maj(a,b,c)
357	ldr	x28,[x30],#8		// *K++, x19 in next round
358	//add	x25,x25,x17			// h+=Sigma0(a)
359#ifndef	__ARMEB__
360	rev	x14,x14			// 11
361#endif
362	ldp	x15,x0,[x1],#2*8
363	add	x25,x25,x17			// h+=Sigma0(a)
364	str	x6,[sp,#24]
365	ror	x16,x21,#14
366	add	x24,x24,x28			// h+=K[i]
367	eor	x6,x21,x21,ror#23
368	and	x17,x22,x21
369	bic	x28,x23,x21
370	add	x24,x24,x14			// h+=X[i]
371	orr	x17,x17,x28			// Ch(e,f,g)
372	eor	x28,x25,x26			// a^b, b^c in next round
373	eor	x16,x16,x6,ror#18	// Sigma1(e)
374	ror	x6,x25,#28
375	add	x24,x24,x17			// h+=Ch(e,f,g)
376	eor	x17,x25,x25,ror#5
377	add	x24,x24,x16			// h+=Sigma1(e)
378	and	x19,x19,x28			// (b^c)&=(a^b)
379	add	x20,x20,x24			// d+=h
380	eor	x19,x19,x26			// Maj(a,b,c)
381	eor	x17,x6,x17,ror#34	// Sigma0(a)
382	add	x24,x24,x19			// h+=Maj(a,b,c)
383	ldr	x19,[x30],#8		// *K++, x28 in next round
384	//add	x24,x24,x17			// h+=Sigma0(a)
385#ifndef	__ARMEB__
386	rev	x15,x15			// 12
387#endif
388	add	x24,x24,x17			// h+=Sigma0(a)
389	str	x7,[sp,#0]
390	ror	x16,x20,#14
391	add	x23,x23,x19			// h+=K[i]
392	eor	x7,x20,x20,ror#23
393	and	x17,x21,x20
394	bic	x19,x22,x20
395	add	x23,x23,x15			// h+=X[i]
396	orr	x17,x17,x19			// Ch(e,f,g)
397	eor	x19,x24,x25			// a^b, b^c in next round
398	eor	x16,x16,x7,ror#18	// Sigma1(e)
399	ror	x7,x24,#28
400	add	x23,x23,x17			// h+=Ch(e,f,g)
401	eor	x17,x24,x24,ror#5
402	add	x23,x23,x16			// h+=Sigma1(e)
403	and	x28,x28,x19			// (b^c)&=(a^b)
404	add	x27,x27,x23			// d+=h
405	eor	x28,x28,x25			// Maj(a,b,c)
406	eor	x17,x7,x17,ror#34	// Sigma0(a)
407	add	x23,x23,x28			// h+=Maj(a,b,c)
408	ldr	x28,[x30],#8		// *K++, x19 in next round
409	//add	x23,x23,x17			// h+=Sigma0(a)
410#ifndef	__ARMEB__
411	rev	x0,x0			// 13
412#endif
413	ldp	x1,x2,[x1]
414	add	x23,x23,x17			// h+=Sigma0(a)
415	str	x8,[sp,#8]
416	ror	x16,x27,#14
417	add	x22,x22,x28			// h+=K[i]
418	eor	x8,x27,x27,ror#23
419	and	x17,x20,x27
420	bic	x28,x21,x27
421	add	x22,x22,x0			// h+=X[i]
422	orr	x17,x17,x28			// Ch(e,f,g)
423	eor	x28,x23,x24			// a^b, b^c in next round
424	eor	x16,x16,x8,ror#18	// Sigma1(e)
425	ror	x8,x23,#28
426	add	x22,x22,x17			// h+=Ch(e,f,g)
427	eor	x17,x23,x23,ror#5
428	add	x22,x22,x16			// h+=Sigma1(e)
429	and	x19,x19,x28			// (b^c)&=(a^b)
430	add	x26,x26,x22			// d+=h
431	eor	x19,x19,x24			// Maj(a,b,c)
432	eor	x17,x8,x17,ror#34	// Sigma0(a)
433	add	x22,x22,x19			// h+=Maj(a,b,c)
434	ldr	x19,[x30],#8		// *K++, x28 in next round
435	//add	x22,x22,x17			// h+=Sigma0(a)
436#ifndef	__ARMEB__
437	rev	x1,x1			// 14
438#endif
439	ldr	x6,[sp,#24]
440	add	x22,x22,x17			// h+=Sigma0(a)
441	str	x9,[sp,#16]
442	ror	x16,x26,#14
443	add	x21,x21,x19			// h+=K[i]
444	eor	x9,x26,x26,ror#23
445	and	x17,x27,x26
446	bic	x19,x20,x26
447	add	x21,x21,x1			// h+=X[i]
448	orr	x17,x17,x19			// Ch(e,f,g)
449	eor	x19,x22,x23			// a^b, b^c in next round
450	eor	x16,x16,x9,ror#18	// Sigma1(e)
451	ror	x9,x22,#28
452	add	x21,x21,x17			// h+=Ch(e,f,g)
453	eor	x17,x22,x22,ror#5
454	add	x21,x21,x16			// h+=Sigma1(e)
455	and	x28,x28,x19			// (b^c)&=(a^b)
456	add	x25,x25,x21			// d+=h
457	eor	x28,x28,x23			// Maj(a,b,c)
458	eor	x17,x9,x17,ror#34	// Sigma0(a)
459	add	x21,x21,x28			// h+=Maj(a,b,c)
460	ldr	x28,[x30],#8		// *K++, x19 in next round
461	//add	x21,x21,x17			// h+=Sigma0(a)
462#ifndef	__ARMEB__
463	rev	x2,x2			// 15
464#endif
465	ldr	x7,[sp,#0]
466	add	x21,x21,x17			// h+=Sigma0(a)
467	str	x10,[sp,#24]
468	ror	x16,x25,#14
469	add	x20,x20,x28			// h+=K[i]
470	ror	x9,x4,#1
471	and	x17,x26,x25
472	ror	x8,x1,#19
473	bic	x28,x27,x25
474	ror	x10,x21,#28
475	add	x20,x20,x2			// h+=X[i]
476	eor	x16,x16,x25,ror#18
477	eor	x9,x9,x4,ror#8
478	orr	x17,x17,x28			// Ch(e,f,g)
479	eor	x28,x21,x22			// a^b, b^c in next round
480	eor	x16,x16,x25,ror#41	// Sigma1(e)
481	eor	x10,x10,x21,ror#34
482	add	x20,x20,x17			// h+=Ch(e,f,g)
483	and	x19,x19,x28			// (b^c)&=(a^b)
484	eor	x8,x8,x1,ror#61
485	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
486	add	x20,x20,x16			// h+=Sigma1(e)
487	eor	x19,x19,x22			// Maj(a,b,c)
488	eor	x17,x10,x21,ror#39	// Sigma0(a)
489	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
490	add	x3,x3,x12
491	add	x24,x24,x20			// d+=h
492	add	x20,x20,x19			// h+=Maj(a,b,c)
493	ldr	x19,[x30],#8		// *K++, x28 in next round
494	add	x3,x3,x9
495	add	x20,x20,x17			// h+=Sigma0(a)
496	add	x3,x3,x8
497.Loop_16_xx:
498	ldr	x8,[sp,#8]
499	str	x11,[sp,#0]
500	ror	x16,x24,#14
501	add	x27,x27,x19			// h+=K[i]
502	ror	x10,x5,#1
503	and	x17,x25,x24
504	ror	x9,x2,#19
505	bic	x19,x26,x24
506	ror	x11,x20,#28
507	add	x27,x27,x3			// h+=X[i]
508	eor	x16,x16,x24,ror#18
509	eor	x10,x10,x5,ror#8
510	orr	x17,x17,x19			// Ch(e,f,g)
511	eor	x19,x20,x21			// a^b, b^c in next round
512	eor	x16,x16,x24,ror#41	// Sigma1(e)
513	eor	x11,x11,x20,ror#34
514	add	x27,x27,x17			// h+=Ch(e,f,g)
515	and	x28,x28,x19			// (b^c)&=(a^b)
516	eor	x9,x9,x2,ror#61
517	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
518	add	x27,x27,x16			// h+=Sigma1(e)
519	eor	x28,x28,x21			// Maj(a,b,c)
520	eor	x17,x11,x20,ror#39	// Sigma0(a)
521	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
522	add	x4,x4,x13
523	add	x23,x23,x27			// d+=h
524	add	x27,x27,x28			// h+=Maj(a,b,c)
525	ldr	x28,[x30],#8		// *K++, x19 in next round
526	add	x4,x4,x10
527	add	x27,x27,x17			// h+=Sigma0(a)
528	add	x4,x4,x9
529	ldr	x9,[sp,#16]
530	str	x12,[sp,#8]
531	ror	x16,x23,#14
532	add	x26,x26,x28			// h+=K[i]
533	ror	x11,x6,#1
534	and	x17,x24,x23
535	ror	x10,x3,#19
536	bic	x28,x25,x23
537	ror	x12,x27,#28
538	add	x26,x26,x4			// h+=X[i]
539	eor	x16,x16,x23,ror#18
540	eor	x11,x11,x6,ror#8
541	orr	x17,x17,x28			// Ch(e,f,g)
542	eor	x28,x27,x20			// a^b, b^c in next round
543	eor	x16,x16,x23,ror#41	// Sigma1(e)
544	eor	x12,x12,x27,ror#34
545	add	x26,x26,x17			// h+=Ch(e,f,g)
546	and	x19,x19,x28			// (b^c)&=(a^b)
547	eor	x10,x10,x3,ror#61
548	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
549	add	x26,x26,x16			// h+=Sigma1(e)
550	eor	x19,x19,x20			// Maj(a,b,c)
551	eor	x17,x12,x27,ror#39	// Sigma0(a)
552	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
553	add	x5,x5,x14
554	add	x22,x22,x26			// d+=h
555	add	x26,x26,x19			// h+=Maj(a,b,c)
556	ldr	x19,[x30],#8		// *K++, x28 in next round
557	add	x5,x5,x11
558	add	x26,x26,x17			// h+=Sigma0(a)
559	add	x5,x5,x10
560	ldr	x10,[sp,#24]
561	str	x13,[sp,#16]
562	ror	x16,x22,#14
563	add	x25,x25,x19			// h+=K[i]
564	ror	x12,x7,#1
565	and	x17,x23,x22
566	ror	x11,x4,#19
567	bic	x19,x24,x22
568	ror	x13,x26,#28
569	add	x25,x25,x5			// h+=X[i]
570	eor	x16,x16,x22,ror#18
571	eor	x12,x12,x7,ror#8
572	orr	x17,x17,x19			// Ch(e,f,g)
573	eor	x19,x26,x27			// a^b, b^c in next round
574	eor	x16,x16,x22,ror#41	// Sigma1(e)
575	eor	x13,x13,x26,ror#34
576	add	x25,x25,x17			// h+=Ch(e,f,g)
577	and	x28,x28,x19			// (b^c)&=(a^b)
578	eor	x11,x11,x4,ror#61
579	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
580	add	x25,x25,x16			// h+=Sigma1(e)
581	eor	x28,x28,x27			// Maj(a,b,c)
582	eor	x17,x13,x26,ror#39	// Sigma0(a)
583	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
584	add	x6,x6,x15
585	add	x21,x21,x25			// d+=h
586	add	x25,x25,x28			// h+=Maj(a,b,c)
587	ldr	x28,[x30],#8		// *K++, x19 in next round
588	add	x6,x6,x12
589	add	x25,x25,x17			// h+=Sigma0(a)
590	add	x6,x6,x11
591	ldr	x11,[sp,#0]
592	str	x14,[sp,#24]
593	ror	x16,x21,#14
594	add	x24,x24,x28			// h+=K[i]
595	ror	x13,x8,#1
596	and	x17,x22,x21
597	ror	x12,x5,#19
598	bic	x28,x23,x21
599	ror	x14,x25,#28
600	add	x24,x24,x6			// h+=X[i]
601	eor	x16,x16,x21,ror#18
602	eor	x13,x13,x8,ror#8
603	orr	x17,x17,x28			// Ch(e,f,g)
604	eor	x28,x25,x26			// a^b, b^c in next round
605	eor	x16,x16,x21,ror#41	// Sigma1(e)
606	eor	x14,x14,x25,ror#34
607	add	x24,x24,x17			// h+=Ch(e,f,g)
608	and	x19,x19,x28			// (b^c)&=(a^b)
609	eor	x12,x12,x5,ror#61
610	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
611	add	x24,x24,x16			// h+=Sigma1(e)
612	eor	x19,x19,x26			// Maj(a,b,c)
613	eor	x17,x14,x25,ror#39	// Sigma0(a)
614	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
615	add	x7,x7,x0
616	add	x20,x20,x24			// d+=h
617	add	x24,x24,x19			// h+=Maj(a,b,c)
618	ldr	x19,[x30],#8		// *K++, x28 in next round
619	add	x7,x7,x13
620	add	x24,x24,x17			// h+=Sigma0(a)
621	add	x7,x7,x12
622	ldr	x12,[sp,#8]
623	str	x15,[sp,#0]
624	ror	x16,x20,#14
625	add	x23,x23,x19			// h+=K[i]
626	ror	x14,x9,#1
627	and	x17,x21,x20
628	ror	x13,x6,#19
629	bic	x19,x22,x20
630	ror	x15,x24,#28
631	add	x23,x23,x7			// h+=X[i]
632	eor	x16,x16,x20,ror#18
633	eor	x14,x14,x9,ror#8
634	orr	x17,x17,x19			// Ch(e,f,g)
635	eor	x19,x24,x25			// a^b, b^c in next round
636	eor	x16,x16,x20,ror#41	// Sigma1(e)
637	eor	x15,x15,x24,ror#34
638	add	x23,x23,x17			// h+=Ch(e,f,g)
639	and	x28,x28,x19			// (b^c)&=(a^b)
640	eor	x13,x13,x6,ror#61
641	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
642	add	x23,x23,x16			// h+=Sigma1(e)
643	eor	x28,x28,x25			// Maj(a,b,c)
644	eor	x17,x15,x24,ror#39	// Sigma0(a)
645	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
646	add	x8,x8,x1
647	add	x27,x27,x23			// d+=h
648	add	x23,x23,x28			// h+=Maj(a,b,c)
649	ldr	x28,[x30],#8		// *K++, x19 in next round
650	add	x8,x8,x14
651	add	x23,x23,x17			// h+=Sigma0(a)
652	add	x8,x8,x13
653	ldr	x13,[sp,#16]
654	str	x0,[sp,#8]
655	ror	x16,x27,#14
656	add	x22,x22,x28			// h+=K[i]
657	ror	x15,x10,#1
658	and	x17,x20,x27
659	ror	x14,x7,#19
660	bic	x28,x21,x27
661	ror	x0,x23,#28
662	add	x22,x22,x8			// h+=X[i]
663	eor	x16,x16,x27,ror#18
664	eor	x15,x15,x10,ror#8
665	orr	x17,x17,x28			// Ch(e,f,g)
666	eor	x28,x23,x24			// a^b, b^c in next round
667	eor	x16,x16,x27,ror#41	// Sigma1(e)
668	eor	x0,x0,x23,ror#34
669	add	x22,x22,x17			// h+=Ch(e,f,g)
670	and	x19,x19,x28			// (b^c)&=(a^b)
671	eor	x14,x14,x7,ror#61
672	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
673	add	x22,x22,x16			// h+=Sigma1(e)
674	eor	x19,x19,x24			// Maj(a,b,c)
675	eor	x17,x0,x23,ror#39	// Sigma0(a)
676	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
677	add	x9,x9,x2
678	add	x26,x26,x22			// d+=h
679	add	x22,x22,x19			// h+=Maj(a,b,c)
680	ldr	x19,[x30],#8		// *K++, x28 in next round
681	add	x9,x9,x15
682	add	x22,x22,x17			// h+=Sigma0(a)
683	add	x9,x9,x14
684	ldr	x14,[sp,#24]
685	str	x1,[sp,#16]
686	ror	x16,x26,#14
687	add	x21,x21,x19			// h+=K[i]
688	ror	x0,x11,#1
689	and	x17,x27,x26
690	ror	x15,x8,#19
691	bic	x19,x20,x26
692	ror	x1,x22,#28
693	add	x21,x21,x9			// h+=X[i]
694	eor	x16,x16,x26,ror#18
695	eor	x0,x0,x11,ror#8
696	orr	x17,x17,x19			// Ch(e,f,g)
697	eor	x19,x22,x23			// a^b, b^c in next round
698	eor	x16,x16,x26,ror#41	// Sigma1(e)
699	eor	x1,x1,x22,ror#34
700	add	x21,x21,x17			// h+=Ch(e,f,g)
701	and	x28,x28,x19			// (b^c)&=(a^b)
702	eor	x15,x15,x8,ror#61
703	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
704	add	x21,x21,x16			// h+=Sigma1(e)
705	eor	x28,x28,x23			// Maj(a,b,c)
706	eor	x17,x1,x22,ror#39	// Sigma0(a)
707	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
708	add	x10,x10,x3
709	add	x25,x25,x21			// d+=h
710	add	x21,x21,x28			// h+=Maj(a,b,c)
711	ldr	x28,[x30],#8		// *K++, x19 in next round
712	add	x10,x10,x0
713	add	x21,x21,x17			// h+=Sigma0(a)
714	add	x10,x10,x15
715	ldr	x15,[sp,#0]
716	str	x2,[sp,#24]
717	ror	x16,x25,#14
718	add	x20,x20,x28			// h+=K[i]
719	ror	x1,x12,#1
720	and	x17,x26,x25
721	ror	x0,x9,#19
722	bic	x28,x27,x25
723	ror	x2,x21,#28
724	add	x20,x20,x10			// h+=X[i]
725	eor	x16,x16,x25,ror#18
726	eor	x1,x1,x12,ror#8
727	orr	x17,x17,x28			// Ch(e,f,g)
728	eor	x28,x21,x22			// a^b, b^c in next round
729	eor	x16,x16,x25,ror#41	// Sigma1(e)
730	eor	x2,x2,x21,ror#34
731	add	x20,x20,x17			// h+=Ch(e,f,g)
732	and	x19,x19,x28			// (b^c)&=(a^b)
733	eor	x0,x0,x9,ror#61
734	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
735	add	x20,x20,x16			// h+=Sigma1(e)
736	eor	x19,x19,x22			// Maj(a,b,c)
737	eor	x17,x2,x21,ror#39	// Sigma0(a)
738	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
739	add	x11,x11,x4
740	add	x24,x24,x20			// d+=h
741	add	x20,x20,x19			// h+=Maj(a,b,c)
742	ldr	x19,[x30],#8		// *K++, x28 in next round
743	add	x11,x11,x1
744	add	x20,x20,x17			// h+=Sigma0(a)
745	add	x11,x11,x0
746	ldr	x0,[sp,#8]
747	str	x3,[sp,#0]
748	ror	x16,x24,#14
749	add	x27,x27,x19			// h+=K[i]
750	ror	x2,x13,#1
751	and	x17,x25,x24
752	ror	x1,x10,#19
753	bic	x19,x26,x24
754	ror	x3,x20,#28
755	add	x27,x27,x11			// h+=X[i]
756	eor	x16,x16,x24,ror#18
757	eor	x2,x2,x13,ror#8
758	orr	x17,x17,x19			// Ch(e,f,g)
759	eor	x19,x20,x21			// a^b, b^c in next round
760	eor	x16,x16,x24,ror#41	// Sigma1(e)
761	eor	x3,x3,x20,ror#34
762	add	x27,x27,x17			// h+=Ch(e,f,g)
763	and	x28,x28,x19			// (b^c)&=(a^b)
764	eor	x1,x1,x10,ror#61
765	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
766	add	x27,x27,x16			// h+=Sigma1(e)
767	eor	x28,x28,x21			// Maj(a,b,c)
768	eor	x17,x3,x20,ror#39	// Sigma0(a)
769	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
770	add	x12,x12,x5
771	add	x23,x23,x27			// d+=h
772	add	x27,x27,x28			// h+=Maj(a,b,c)
773	ldr	x28,[x30],#8		// *K++, x19 in next round
774	add	x12,x12,x2
775	add	x27,x27,x17			// h+=Sigma0(a)
776	add	x12,x12,x1
777	ldr	x1,[sp,#16]
778	str	x4,[sp,#8]
779	ror	x16,x23,#14
780	add	x26,x26,x28			// h+=K[i]
781	ror	x3,x14,#1
782	and	x17,x24,x23
783	ror	x2,x11,#19
784	bic	x28,x25,x23
785	ror	x4,x27,#28
786	add	x26,x26,x12			// h+=X[i]
787	eor	x16,x16,x23,ror#18
788	eor	x3,x3,x14,ror#8
789	orr	x17,x17,x28			// Ch(e,f,g)
790	eor	x28,x27,x20			// a^b, b^c in next round
791	eor	x16,x16,x23,ror#41	// Sigma1(e)
792	eor	x4,x4,x27,ror#34
793	add	x26,x26,x17			// h+=Ch(e,f,g)
794	and	x19,x19,x28			// (b^c)&=(a^b)
795	eor	x2,x2,x11,ror#61
796	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
797	add	x26,x26,x16			// h+=Sigma1(e)
798	eor	x19,x19,x20			// Maj(a,b,c)
799	eor	x17,x4,x27,ror#39	// Sigma0(a)
800	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
801	add	x13,x13,x6
802	add	x22,x22,x26			// d+=h
803	add	x26,x26,x19			// h+=Maj(a,b,c)
804	ldr	x19,[x30],#8		// *K++, x28 in next round
805	add	x13,x13,x3
806	add	x26,x26,x17			// h+=Sigma0(a)
807	add	x13,x13,x2
808	ldr	x2,[sp,#24]
809	str	x5,[sp,#16]
810	ror	x16,x22,#14
811	add	x25,x25,x19			// h+=K[i]
812	ror	x4,x15,#1
813	and	x17,x23,x22
814	ror	x3,x12,#19
815	bic	x19,x24,x22
816	ror	x5,x26,#28
817	add	x25,x25,x13			// h+=X[i]
818	eor	x16,x16,x22,ror#18
819	eor	x4,x4,x15,ror#8
820	orr	x17,x17,x19			// Ch(e,f,g)
821	eor	x19,x26,x27			// a^b, b^c in next round
822	eor	x16,x16,x22,ror#41	// Sigma1(e)
823	eor	x5,x5,x26,ror#34
824	add	x25,x25,x17			// h+=Ch(e,f,g)
825	and	x28,x28,x19			// (b^c)&=(a^b)
826	eor	x3,x3,x12,ror#61
827	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
828	add	x25,x25,x16			// h+=Sigma1(e)
829	eor	x28,x28,x27			// Maj(a,b,c)
830	eor	x17,x5,x26,ror#39	// Sigma0(a)
831	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
832	add	x14,x14,x7
833	add	x21,x21,x25			// d+=h
834	add	x25,x25,x28			// h+=Maj(a,b,c)
835	ldr	x28,[x30],#8		// *K++, x19 in next round
836	add	x14,x14,x4
837	add	x25,x25,x17			// h+=Sigma0(a)
838	add	x14,x14,x3
839	ldr	x3,[sp,#0]
840	str	x6,[sp,#24]
841	ror	x16,x21,#14
842	add	x24,x24,x28			// h+=K[i]
843	ror	x5,x0,#1
844	and	x17,x22,x21
845	ror	x4,x13,#19
846	bic	x28,x23,x21
847	ror	x6,x25,#28
848	add	x24,x24,x14			// h+=X[i]
849	eor	x16,x16,x21,ror#18
850	eor	x5,x5,x0,ror#8
851	orr	x17,x17,x28			// Ch(e,f,g)
852	eor	x28,x25,x26			// a^b, b^c in next round
853	eor	x16,x16,x21,ror#41	// Sigma1(e)
854	eor	x6,x6,x25,ror#34
855	add	x24,x24,x17			// h+=Ch(e,f,g)
856	and	x19,x19,x28			// (b^c)&=(a^b)
857	eor	x4,x4,x13,ror#61
858	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
859	add	x24,x24,x16			// h+=Sigma1(e)
860	eor	x19,x19,x26			// Maj(a,b,c)
861	eor	x17,x6,x25,ror#39	// Sigma0(a)
862	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
863	add	x15,x15,x8
864	add	x20,x20,x24			// d+=h
865	add	x24,x24,x19			// h+=Maj(a,b,c)
866	ldr	x19,[x30],#8		// *K++, x28 in next round
867	add	x15,x15,x5
868	add	x24,x24,x17			// h+=Sigma0(a)
869	add	x15,x15,x4
870	ldr	x4,[sp,#8]
871	str	x7,[sp,#0]
872	ror	x16,x20,#14
873	add	x23,x23,x19			// h+=K[i]
874	ror	x6,x1,#1
875	and	x17,x21,x20
876	ror	x5,x14,#19
877	bic	x19,x22,x20
878	ror	x7,x24,#28
879	add	x23,x23,x15			// h+=X[i]
880	eor	x16,x16,x20,ror#18
881	eor	x6,x6,x1,ror#8
882	orr	x17,x17,x19			// Ch(e,f,g)
883	eor	x19,x24,x25			// a^b, b^c in next round
884	eor	x16,x16,x20,ror#41	// Sigma1(e)
885	eor	x7,x7,x24,ror#34
886	add	x23,x23,x17			// h+=Ch(e,f,g)
887	and	x28,x28,x19			// (b^c)&=(a^b)
888	eor	x5,x5,x14,ror#61
889	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
890	add	x23,x23,x16			// h+=Sigma1(e)
891	eor	x28,x28,x25			// Maj(a,b,c)
892	eor	x17,x7,x24,ror#39	// Sigma0(a)
893	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
894	add	x0,x0,x9
895	add	x27,x27,x23			// d+=h
896	add	x23,x23,x28			// h+=Maj(a,b,c)
897	ldr	x28,[x30],#8		// *K++, x19 in next round
898	add	x0,x0,x6
899	add	x23,x23,x17			// h+=Sigma0(a)
900	add	x0,x0,x5
901	ldr	x5,[sp,#16]
902	str	x8,[sp,#8]
903	ror	x16,x27,#14
904	add	x22,x22,x28			// h+=K[i]
905	ror	x7,x2,#1
906	and	x17,x20,x27
907	ror	x6,x15,#19
908	bic	x28,x21,x27
909	ror	x8,x23,#28
910	add	x22,x22,x0			// h+=X[i]
911	eor	x16,x16,x27,ror#18
912	eor	x7,x7,x2,ror#8
913	orr	x17,x17,x28			// Ch(e,f,g)
914	eor	x28,x23,x24			// a^b, b^c in next round
915	eor	x16,x16,x27,ror#41	// Sigma1(e)
916	eor	x8,x8,x23,ror#34
917	add	x22,x22,x17			// h+=Ch(e,f,g)
918	and	x19,x19,x28			// (b^c)&=(a^b)
919	eor	x6,x6,x15,ror#61
920	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
921	add	x22,x22,x16			// h+=Sigma1(e)
922	eor	x19,x19,x24			// Maj(a,b,c)
923	eor	x17,x8,x23,ror#39	// Sigma0(a)
924	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
925	add	x1,x1,x10
926	add	x26,x26,x22			// d+=h
927	add	x22,x22,x19			// h+=Maj(a,b,c)
928	ldr	x19,[x30],#8		// *K++, x28 in next round
929	add	x1,x1,x7
930	add	x22,x22,x17			// h+=Sigma0(a)
931	add	x1,x1,x6
932	ldr	x6,[sp,#24]
933	str	x9,[sp,#16]
934	ror	x16,x26,#14
935	add	x21,x21,x19			// h+=K[i]
936	ror	x8,x3,#1
937	and	x17,x27,x26
938	ror	x7,x0,#19
939	bic	x19,x20,x26
940	ror	x9,x22,#28
941	add	x21,x21,x1			// h+=X[i]
942	eor	x16,x16,x26,ror#18
943	eor	x8,x8,x3,ror#8
944	orr	x17,x17,x19			// Ch(e,f,g)
945	eor	x19,x22,x23			// a^b, b^c in next round
946	eor	x16,x16,x26,ror#41	// Sigma1(e)
947	eor	x9,x9,x22,ror#34
948	add	x21,x21,x17			// h+=Ch(e,f,g)
949	and	x28,x28,x19			// (b^c)&=(a^b)
950	eor	x7,x7,x0,ror#61
951	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
952	add	x21,x21,x16			// h+=Sigma1(e)
953	eor	x28,x28,x23			// Maj(a,b,c)
954	eor	x17,x9,x22,ror#39	// Sigma0(a)
955	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
956	add	x2,x2,x11
957	add	x25,x25,x21			// d+=h
958	add	x21,x21,x28			// h+=Maj(a,b,c)
959	ldr	x28,[x30],#8		// *K++, x19 in next round
960	add	x2,x2,x8
961	add	x21,x21,x17			// h+=Sigma0(a)
962	add	x2,x2,x7
963	ldr	x7,[sp,#0]
964	str	x10,[sp,#24]
965	ror	x16,x25,#14
966	add	x20,x20,x28			// h+=K[i]
967	ror	x9,x4,#1
968	and	x17,x26,x25
969	ror	x8,x1,#19
970	bic	x28,x27,x25
971	ror	x10,x21,#28
972	add	x20,x20,x2			// h+=X[i]
973	eor	x16,x16,x25,ror#18
974	eor	x9,x9,x4,ror#8
975	orr	x17,x17,x28			// Ch(e,f,g)
976	eor	x28,x21,x22			// a^b, b^c in next round
977	eor	x16,x16,x25,ror#41	// Sigma1(e)
978	eor	x10,x10,x21,ror#34
979	add	x20,x20,x17			// h+=Ch(e,f,g)
980	and	x19,x19,x28			// (b^c)&=(a^b)
981	eor	x8,x8,x1,ror#61
982	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
983	add	x20,x20,x16			// h+=Sigma1(e)
984	eor	x19,x19,x22			// Maj(a,b,c)
985	eor	x17,x10,x21,ror#39	// Sigma0(a)
986	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
987	add	x3,x3,x12
988	add	x24,x24,x20			// d+=h
989	add	x20,x20,x19			// h+=Maj(a,b,c)
990	ldr	x19,[x30],#8		// *K++, x28 in next round
991	add	x3,x3,x9
992	add	x20,x20,x17			// h+=Sigma0(a)
993	add	x3,x3,x8
994	cbnz	x19,.Loop_16_xx
995
996	ldp	x0,x2,[x29,#96]
997	ldr	x1,[x29,#112]
998	sub	x30,x30,#648		// rewind
999
1000	ldp	x3,x4,[x0]
1001	ldp	x5,x6,[x0,#2*8]
1002	add	x1,x1,#14*8			// advance input pointer
1003	ldp	x7,x8,[x0,#4*8]
1004	add	x20,x20,x3
1005	ldp	x9,x10,[x0,#6*8]
1006	add	x21,x21,x4
1007	add	x22,x22,x5
1008	add	x23,x23,x6
1009	stp	x20,x21,[x0]
1010	add	x24,x24,x7
1011	add	x25,x25,x8
1012	stp	x22,x23,[x0,#2*8]
1013	add	x26,x26,x9
1014	add	x27,x27,x10
1015	cmp	x1,x2
1016	stp	x24,x25,[x0,#4*8]
1017	stp	x26,x27,[x0,#6*8]
1018	b.ne	.Loop
1019
1020	ldp	x19,x20,[x29,#16]
1021	add	sp,sp,#4*8
1022	ldp	x21,x22,[x29,#32]
1023	ldp	x23,x24,[x29,#48]
1024	ldp	x25,x26,[x29,#64]
1025	ldp	x27,x28,[x29,#80]
1026	ldp	x29,x30,[sp],#128
1027	AARCH64_VALIDATE_LINK_REGISTER
1028	ret
1029.size	sha512_block_data_order,.-sha512_block_data_order
1030
1031.section	.rodata
1032.align	6
1033.type	.LK512,%object
1034.LK512:
1035.quad	0x428a2f98d728ae22,0x7137449123ef65cd
1036.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1037.quad	0x3956c25bf348b538,0x59f111f1b605d019
1038.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
1039.quad	0xd807aa98a3030242,0x12835b0145706fbe
1040.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1041.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
1042.quad	0x9bdc06a725c71235,0xc19bf174cf692694
1043.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
1044.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1045.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
1046.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1047.quad	0x983e5152ee66dfab,0xa831c66d2db43210
1048.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
1049.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
1050.quad	0x06ca6351e003826f,0x142929670a0e6e70
1051.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
1052.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1053.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
1054.quad	0x81c2c92e47edaee6,0x92722c851482353b
1055.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
1056.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
1057.quad	0xd192e819d6ef5218,0xd69906245565a910
1058.quad	0xf40e35855771202a,0x106aa07032bbd1b8
1059.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
1060.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1061.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1062.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1063.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
1064.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
1065.quad	0x90befffa23631e28,0xa4506cebde82bde9
1066.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
1067.quad	0xca273eceea26619c,0xd186b8c721c0c207
1068.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1069.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
1070.quad	0x113f9804bef90dae,0x1b710b35131c471b
1071.quad	0x28db77f523047d84,0x32caab7b40c72493
1072.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1073.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1074.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
1075.quad	0	// terminator
1076.size	.LK512,.-.LK512
1077.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1078.align	2
1079.align	2
1080#endif
1081#endif  // !OPENSSL_NO_ASM
1082.section	.note.GNU-stack,"",%progbits
1083