• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__aarch64__)
13#if defined(BORINGSSL_PREFIX)
14#include <boringssl_prefix_symbols_asm.h>
15#endif
16// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
17//
18// Licensed under the OpenSSL license (the "License").  You may not use
19// this file except in compliance with the License.  You can obtain a copy
20// in the file LICENSE in the source distribution or at
21// https://www.openssl.org/source/license.html
22
23// ====================================================================
24// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
25// project. The module is, however, dual licensed under OpenSSL and
26// CRYPTOGAMS licenses depending on where you obtain it. For further
27// details see http://www.openssl.org/~appro/cryptogams/.
28//
29// Permission to use under GPLv2 terms is granted.
30// ====================================================================
31//
32// SHA256/512 for ARMv8.
33//
34// Performance in cycles per processed byte and improvement coefficient
35// over code generated with "default" compiler:
36//
37//		SHA256-hw	SHA256(*)	SHA512
38// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
39// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
40// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
41// Denver	2.01		10.5 (+26%)	6.70 (+8%)
42// X-Gene			20.0 (+100%)	12.8 (+300%(***))
43// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
44// Kryo		1.92		17.4 (+30%)	11.2 (+8%)
45//
46// (*)	Software SHA256 results are of lesser relevance, presented
47//	mostly for informational purposes.
48// (**)	The result is a trade-off: it's possible to improve it by
49//	10% (or by 1 cycle per round), but at the cost of 20% loss
50//	on Cortex-A53 (or by 4 cycles per round).
51// (***)	Super-impressive coefficients over gcc-generated code are
52//	indication of some compiler "pathology", most notably code
53//	generated with -mgeneral-regs-only is significantly faster
54//	and the gap is only 40-90%.
55
56#ifndef	__KERNEL__
57# include <openssl/arm_arch.h>
58#endif
59
60.text
61
62
63.hidden	OPENSSL_armcap_P
64.globl	sha512_block_data_order
65.hidden	sha512_block_data_order
66.type	sha512_block_data_order,%function
67.align	6
68sha512_block_data_order:
69	AARCH64_VALID_CALL_TARGET
70#ifndef	__KERNEL__
71#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
72	adrp	x16,:pg_hi21_nc:OPENSSL_armcap_P
73#else
74	adrp	x16,OPENSSL_armcap_P
75#endif
76	ldr	w16,[x16,:lo12:OPENSSL_armcap_P]
77	tst	w16,#ARMV8_SHA512
78	b.ne	.Lv8_entry
79#endif
80	AARCH64_SIGN_LINK_REGISTER
81	stp	x29,x30,[sp,#-128]!
82	add	x29,sp,#0
83
84	stp	x19,x20,[sp,#16]
85	stp	x21,x22,[sp,#32]
86	stp	x23,x24,[sp,#48]
87	stp	x25,x26,[sp,#64]
88	stp	x27,x28,[sp,#80]
89	sub	sp,sp,#4*8
90
91	ldp	x20,x21,[x0]				// load context
92	ldp	x22,x23,[x0,#2*8]
93	ldp	x24,x25,[x0,#4*8]
94	add	x2,x1,x2,lsl#7	// end of input
95	ldp	x26,x27,[x0,#6*8]
96	adrp	x30,.LK512
97	add	x30,x30,:lo12:.LK512
98	stp	x0,x2,[x29,#96]
99
100.Loop:
101	ldp	x3,x4,[x1],#2*8
102	ldr	x19,[x30],#8			// *K++
103	eor	x28,x21,x22				// magic seed
104	str	x1,[x29,#112]
105#ifndef	__AARCH64EB__
106	rev	x3,x3			// 0
107#endif
108	ror	x16,x24,#14
109	add	x27,x27,x19			// h+=K[i]
110	eor	x6,x24,x24,ror#23
111	and	x17,x25,x24
112	bic	x19,x26,x24
113	add	x27,x27,x3			// h+=X[i]
114	orr	x17,x17,x19			// Ch(e,f,g)
115	eor	x19,x20,x21			// a^b, b^c in next round
116	eor	x16,x16,x6,ror#18	// Sigma1(e)
117	ror	x6,x20,#28
118	add	x27,x27,x17			// h+=Ch(e,f,g)
119	eor	x17,x20,x20,ror#5
120	add	x27,x27,x16			// h+=Sigma1(e)
121	and	x28,x28,x19			// (b^c)&=(a^b)
122	add	x23,x23,x27			// d+=h
123	eor	x28,x28,x21			// Maj(a,b,c)
124	eor	x17,x6,x17,ror#34	// Sigma0(a)
125	add	x27,x27,x28			// h+=Maj(a,b,c)
126	ldr	x28,[x30],#8		// *K++, x19 in next round
127	//add	x27,x27,x17			// h+=Sigma0(a)
128#ifndef	__AARCH64EB__
129	rev	x4,x4			// 1
130#endif
131	ldp	x5,x6,[x1],#2*8
132	add	x27,x27,x17			// h+=Sigma0(a)
133	ror	x16,x23,#14
134	add	x26,x26,x28			// h+=K[i]
135	eor	x7,x23,x23,ror#23
136	and	x17,x24,x23
137	bic	x28,x25,x23
138	add	x26,x26,x4			// h+=X[i]
139	orr	x17,x17,x28			// Ch(e,f,g)
140	eor	x28,x27,x20			// a^b, b^c in next round
141	eor	x16,x16,x7,ror#18	// Sigma1(e)
142	ror	x7,x27,#28
143	add	x26,x26,x17			// h+=Ch(e,f,g)
144	eor	x17,x27,x27,ror#5
145	add	x26,x26,x16			// h+=Sigma1(e)
146	and	x19,x19,x28			// (b^c)&=(a^b)
147	add	x22,x22,x26			// d+=h
148	eor	x19,x19,x20			// Maj(a,b,c)
149	eor	x17,x7,x17,ror#34	// Sigma0(a)
150	add	x26,x26,x19			// h+=Maj(a,b,c)
151	ldr	x19,[x30],#8		// *K++, x28 in next round
152	//add	x26,x26,x17			// h+=Sigma0(a)
153#ifndef	__AARCH64EB__
154	rev	x5,x5			// 2
155#endif
156	add	x26,x26,x17			// h+=Sigma0(a)
157	ror	x16,x22,#14
158	add	x25,x25,x19			// h+=K[i]
159	eor	x8,x22,x22,ror#23
160	and	x17,x23,x22
161	bic	x19,x24,x22
162	add	x25,x25,x5			// h+=X[i]
163	orr	x17,x17,x19			// Ch(e,f,g)
164	eor	x19,x26,x27			// a^b, b^c in next round
165	eor	x16,x16,x8,ror#18	// Sigma1(e)
166	ror	x8,x26,#28
167	add	x25,x25,x17			// h+=Ch(e,f,g)
168	eor	x17,x26,x26,ror#5
169	add	x25,x25,x16			// h+=Sigma1(e)
170	and	x28,x28,x19			// (b^c)&=(a^b)
171	add	x21,x21,x25			// d+=h
172	eor	x28,x28,x27			// Maj(a,b,c)
173	eor	x17,x8,x17,ror#34	// Sigma0(a)
174	add	x25,x25,x28			// h+=Maj(a,b,c)
175	ldr	x28,[x30],#8		// *K++, x19 in next round
176	//add	x25,x25,x17			// h+=Sigma0(a)
177#ifndef	__AARCH64EB__
178	rev	x6,x6			// 3
179#endif
180	ldp	x7,x8,[x1],#2*8
181	add	x25,x25,x17			// h+=Sigma0(a)
182	ror	x16,x21,#14
183	add	x24,x24,x28			// h+=K[i]
184	eor	x9,x21,x21,ror#23
185	and	x17,x22,x21
186	bic	x28,x23,x21
187	add	x24,x24,x6			// h+=X[i]
188	orr	x17,x17,x28			// Ch(e,f,g)
189	eor	x28,x25,x26			// a^b, b^c in next round
190	eor	x16,x16,x9,ror#18	// Sigma1(e)
191	ror	x9,x25,#28
192	add	x24,x24,x17			// h+=Ch(e,f,g)
193	eor	x17,x25,x25,ror#5
194	add	x24,x24,x16			// h+=Sigma1(e)
195	and	x19,x19,x28			// (b^c)&=(a^b)
196	add	x20,x20,x24			// d+=h
197	eor	x19,x19,x26			// Maj(a,b,c)
198	eor	x17,x9,x17,ror#34	// Sigma0(a)
199	add	x24,x24,x19			// h+=Maj(a,b,c)
200	ldr	x19,[x30],#8		// *K++, x28 in next round
201	//add	x24,x24,x17			// h+=Sigma0(a)
202#ifndef	__AARCH64EB__
203	rev	x7,x7			// 4
204#endif
205	add	x24,x24,x17			// h+=Sigma0(a)
206	ror	x16,x20,#14
207	add	x23,x23,x19			// h+=K[i]
208	eor	x10,x20,x20,ror#23
209	and	x17,x21,x20
210	bic	x19,x22,x20
211	add	x23,x23,x7			// h+=X[i]
212	orr	x17,x17,x19			// Ch(e,f,g)
213	eor	x19,x24,x25			// a^b, b^c in next round
214	eor	x16,x16,x10,ror#18	// Sigma1(e)
215	ror	x10,x24,#28
216	add	x23,x23,x17			// h+=Ch(e,f,g)
217	eor	x17,x24,x24,ror#5
218	add	x23,x23,x16			// h+=Sigma1(e)
219	and	x28,x28,x19			// (b^c)&=(a^b)
220	add	x27,x27,x23			// d+=h
221	eor	x28,x28,x25			// Maj(a,b,c)
222	eor	x17,x10,x17,ror#34	// Sigma0(a)
223	add	x23,x23,x28			// h+=Maj(a,b,c)
224	ldr	x28,[x30],#8		// *K++, x19 in next round
225	//add	x23,x23,x17			// h+=Sigma0(a)
226#ifndef	__AARCH64EB__
227	rev	x8,x8			// 5
228#endif
229	ldp	x9,x10,[x1],#2*8
230	add	x23,x23,x17			// h+=Sigma0(a)
231	ror	x16,x27,#14
232	add	x22,x22,x28			// h+=K[i]
233	eor	x11,x27,x27,ror#23
234	and	x17,x20,x27
235	bic	x28,x21,x27
236	add	x22,x22,x8			// h+=X[i]
237	orr	x17,x17,x28			// Ch(e,f,g)
238	eor	x28,x23,x24			// a^b, b^c in next round
239	eor	x16,x16,x11,ror#18	// Sigma1(e)
240	ror	x11,x23,#28
241	add	x22,x22,x17			// h+=Ch(e,f,g)
242	eor	x17,x23,x23,ror#5
243	add	x22,x22,x16			// h+=Sigma1(e)
244	and	x19,x19,x28			// (b^c)&=(a^b)
245	add	x26,x26,x22			// d+=h
246	eor	x19,x19,x24			// Maj(a,b,c)
247	eor	x17,x11,x17,ror#34	// Sigma0(a)
248	add	x22,x22,x19			// h+=Maj(a,b,c)
249	ldr	x19,[x30],#8		// *K++, x28 in next round
250	//add	x22,x22,x17			// h+=Sigma0(a)
251#ifndef	__AARCH64EB__
252	rev	x9,x9			// 6
253#endif
254	add	x22,x22,x17			// h+=Sigma0(a)
255	ror	x16,x26,#14
256	add	x21,x21,x19			// h+=K[i]
257	eor	x12,x26,x26,ror#23
258	and	x17,x27,x26
259	bic	x19,x20,x26
260	add	x21,x21,x9			// h+=X[i]
261	orr	x17,x17,x19			// Ch(e,f,g)
262	eor	x19,x22,x23			// a^b, b^c in next round
263	eor	x16,x16,x12,ror#18	// Sigma1(e)
264	ror	x12,x22,#28
265	add	x21,x21,x17			// h+=Ch(e,f,g)
266	eor	x17,x22,x22,ror#5
267	add	x21,x21,x16			// h+=Sigma1(e)
268	and	x28,x28,x19			// (b^c)&=(a^b)
269	add	x25,x25,x21			// d+=h
270	eor	x28,x28,x23			// Maj(a,b,c)
271	eor	x17,x12,x17,ror#34	// Sigma0(a)
272	add	x21,x21,x28			// h+=Maj(a,b,c)
273	ldr	x28,[x30],#8		// *K++, x19 in next round
274	//add	x21,x21,x17			// h+=Sigma0(a)
275#ifndef	__AARCH64EB__
276	rev	x10,x10			// 7
277#endif
278	ldp	x11,x12,[x1],#2*8
279	add	x21,x21,x17			// h+=Sigma0(a)
280	ror	x16,x25,#14
281	add	x20,x20,x28			// h+=K[i]
282	eor	x13,x25,x25,ror#23
283	and	x17,x26,x25
284	bic	x28,x27,x25
285	add	x20,x20,x10			// h+=X[i]
286	orr	x17,x17,x28			// Ch(e,f,g)
287	eor	x28,x21,x22			// a^b, b^c in next round
288	eor	x16,x16,x13,ror#18	// Sigma1(e)
289	ror	x13,x21,#28
290	add	x20,x20,x17			// h+=Ch(e,f,g)
291	eor	x17,x21,x21,ror#5
292	add	x20,x20,x16			// h+=Sigma1(e)
293	and	x19,x19,x28			// (b^c)&=(a^b)
294	add	x24,x24,x20			// d+=h
295	eor	x19,x19,x22			// Maj(a,b,c)
296	eor	x17,x13,x17,ror#34	// Sigma0(a)
297	add	x20,x20,x19			// h+=Maj(a,b,c)
298	ldr	x19,[x30],#8		// *K++, x28 in next round
299	//add	x20,x20,x17			// h+=Sigma0(a)
300#ifndef	__AARCH64EB__
301	rev	x11,x11			// 8
302#endif
303	add	x20,x20,x17			// h+=Sigma0(a)
304	ror	x16,x24,#14
305	add	x27,x27,x19			// h+=K[i]
306	eor	x14,x24,x24,ror#23
307	and	x17,x25,x24
308	bic	x19,x26,x24
309	add	x27,x27,x11			// h+=X[i]
310	orr	x17,x17,x19			// Ch(e,f,g)
311	eor	x19,x20,x21			// a^b, b^c in next round
312	eor	x16,x16,x14,ror#18	// Sigma1(e)
313	ror	x14,x20,#28
314	add	x27,x27,x17			// h+=Ch(e,f,g)
315	eor	x17,x20,x20,ror#5
316	add	x27,x27,x16			// h+=Sigma1(e)
317	and	x28,x28,x19			// (b^c)&=(a^b)
318	add	x23,x23,x27			// d+=h
319	eor	x28,x28,x21			// Maj(a,b,c)
320	eor	x17,x14,x17,ror#34	// Sigma0(a)
321	add	x27,x27,x28			// h+=Maj(a,b,c)
322	ldr	x28,[x30],#8		// *K++, x19 in next round
323	//add	x27,x27,x17			// h+=Sigma0(a)
324#ifndef	__AARCH64EB__
325	rev	x12,x12			// 9
326#endif
327	ldp	x13,x14,[x1],#2*8
328	add	x27,x27,x17			// h+=Sigma0(a)
329	ror	x16,x23,#14
330	add	x26,x26,x28			// h+=K[i]
331	eor	x15,x23,x23,ror#23
332	and	x17,x24,x23
333	bic	x28,x25,x23
334	add	x26,x26,x12			// h+=X[i]
335	orr	x17,x17,x28			// Ch(e,f,g)
336	eor	x28,x27,x20			// a^b, b^c in next round
337	eor	x16,x16,x15,ror#18	// Sigma1(e)
338	ror	x15,x27,#28
339	add	x26,x26,x17			// h+=Ch(e,f,g)
340	eor	x17,x27,x27,ror#5
341	add	x26,x26,x16			// h+=Sigma1(e)
342	and	x19,x19,x28			// (b^c)&=(a^b)
343	add	x22,x22,x26			// d+=h
344	eor	x19,x19,x20			// Maj(a,b,c)
345	eor	x17,x15,x17,ror#34	// Sigma0(a)
346	add	x26,x26,x19			// h+=Maj(a,b,c)
347	ldr	x19,[x30],#8		// *K++, x28 in next round
348	//add	x26,x26,x17			// h+=Sigma0(a)
349#ifndef	__AARCH64EB__
350	rev	x13,x13			// 10
351#endif
352	add	x26,x26,x17			// h+=Sigma0(a)
353	ror	x16,x22,#14
354	add	x25,x25,x19			// h+=K[i]
355	eor	x0,x22,x22,ror#23
356	and	x17,x23,x22
357	bic	x19,x24,x22
358	add	x25,x25,x13			// h+=X[i]
359	orr	x17,x17,x19			// Ch(e,f,g)
360	eor	x19,x26,x27			// a^b, b^c in next round
361	eor	x16,x16,x0,ror#18	// Sigma1(e)
362	ror	x0,x26,#28
363	add	x25,x25,x17			// h+=Ch(e,f,g)
364	eor	x17,x26,x26,ror#5
365	add	x25,x25,x16			// h+=Sigma1(e)
366	and	x28,x28,x19			// (b^c)&=(a^b)
367	add	x21,x21,x25			// d+=h
368	eor	x28,x28,x27			// Maj(a,b,c)
369	eor	x17,x0,x17,ror#34	// Sigma0(a)
370	add	x25,x25,x28			// h+=Maj(a,b,c)
371	ldr	x28,[x30],#8		// *K++, x19 in next round
372	//add	x25,x25,x17			// h+=Sigma0(a)
373#ifndef	__AARCH64EB__
374	rev	x14,x14			// 11
375#endif
376	ldp	x15,x0,[x1],#2*8
377	add	x25,x25,x17			// h+=Sigma0(a)
378	str	x6,[sp,#24]
379	ror	x16,x21,#14
380	add	x24,x24,x28			// h+=K[i]
381	eor	x6,x21,x21,ror#23
382	and	x17,x22,x21
383	bic	x28,x23,x21
384	add	x24,x24,x14			// h+=X[i]
385	orr	x17,x17,x28			// Ch(e,f,g)
386	eor	x28,x25,x26			// a^b, b^c in next round
387	eor	x16,x16,x6,ror#18	// Sigma1(e)
388	ror	x6,x25,#28
389	add	x24,x24,x17			// h+=Ch(e,f,g)
390	eor	x17,x25,x25,ror#5
391	add	x24,x24,x16			// h+=Sigma1(e)
392	and	x19,x19,x28			// (b^c)&=(a^b)
393	add	x20,x20,x24			// d+=h
394	eor	x19,x19,x26			// Maj(a,b,c)
395	eor	x17,x6,x17,ror#34	// Sigma0(a)
396	add	x24,x24,x19			// h+=Maj(a,b,c)
397	ldr	x19,[x30],#8		// *K++, x28 in next round
398	//add	x24,x24,x17			// h+=Sigma0(a)
399#ifndef	__AARCH64EB__
400	rev	x15,x15			// 12
401#endif
402	add	x24,x24,x17			// h+=Sigma0(a)
403	str	x7,[sp,#0]
404	ror	x16,x20,#14
405	add	x23,x23,x19			// h+=K[i]
406	eor	x7,x20,x20,ror#23
407	and	x17,x21,x20
408	bic	x19,x22,x20
409	add	x23,x23,x15			// h+=X[i]
410	orr	x17,x17,x19			// Ch(e,f,g)
411	eor	x19,x24,x25			// a^b, b^c in next round
412	eor	x16,x16,x7,ror#18	// Sigma1(e)
413	ror	x7,x24,#28
414	add	x23,x23,x17			// h+=Ch(e,f,g)
415	eor	x17,x24,x24,ror#5
416	add	x23,x23,x16			// h+=Sigma1(e)
417	and	x28,x28,x19			// (b^c)&=(a^b)
418	add	x27,x27,x23			// d+=h
419	eor	x28,x28,x25			// Maj(a,b,c)
420	eor	x17,x7,x17,ror#34	// Sigma0(a)
421	add	x23,x23,x28			// h+=Maj(a,b,c)
422	ldr	x28,[x30],#8		// *K++, x19 in next round
423	//add	x23,x23,x17			// h+=Sigma0(a)
424#ifndef	__AARCH64EB__
425	rev	x0,x0			// 13
426#endif
427	ldp	x1,x2,[x1]
428	add	x23,x23,x17			// h+=Sigma0(a)
429	str	x8,[sp,#8]
430	ror	x16,x27,#14
431	add	x22,x22,x28			// h+=K[i]
432	eor	x8,x27,x27,ror#23
433	and	x17,x20,x27
434	bic	x28,x21,x27
435	add	x22,x22,x0			// h+=X[i]
436	orr	x17,x17,x28			// Ch(e,f,g)
437	eor	x28,x23,x24			// a^b, b^c in next round
438	eor	x16,x16,x8,ror#18	// Sigma1(e)
439	ror	x8,x23,#28
440	add	x22,x22,x17			// h+=Ch(e,f,g)
441	eor	x17,x23,x23,ror#5
442	add	x22,x22,x16			// h+=Sigma1(e)
443	and	x19,x19,x28			// (b^c)&=(a^b)
444	add	x26,x26,x22			// d+=h
445	eor	x19,x19,x24			// Maj(a,b,c)
446	eor	x17,x8,x17,ror#34	// Sigma0(a)
447	add	x22,x22,x19			// h+=Maj(a,b,c)
448	ldr	x19,[x30],#8		// *K++, x28 in next round
449	//add	x22,x22,x17			// h+=Sigma0(a)
450#ifndef	__AARCH64EB__
451	rev	x1,x1			// 14
452#endif
453	ldr	x6,[sp,#24]
454	add	x22,x22,x17			// h+=Sigma0(a)
455	str	x9,[sp,#16]
456	ror	x16,x26,#14
457	add	x21,x21,x19			// h+=K[i]
458	eor	x9,x26,x26,ror#23
459	and	x17,x27,x26
460	bic	x19,x20,x26
461	add	x21,x21,x1			// h+=X[i]
462	orr	x17,x17,x19			// Ch(e,f,g)
463	eor	x19,x22,x23			// a^b, b^c in next round
464	eor	x16,x16,x9,ror#18	// Sigma1(e)
465	ror	x9,x22,#28
466	add	x21,x21,x17			// h+=Ch(e,f,g)
467	eor	x17,x22,x22,ror#5
468	add	x21,x21,x16			// h+=Sigma1(e)
469	and	x28,x28,x19			// (b^c)&=(a^b)
470	add	x25,x25,x21			// d+=h
471	eor	x28,x28,x23			// Maj(a,b,c)
472	eor	x17,x9,x17,ror#34	// Sigma0(a)
473	add	x21,x21,x28			// h+=Maj(a,b,c)
474	ldr	x28,[x30],#8		// *K++, x19 in next round
475	//add	x21,x21,x17			// h+=Sigma0(a)
476#ifndef	__AARCH64EB__
477	rev	x2,x2			// 15
478#endif
479	ldr	x7,[sp,#0]
480	add	x21,x21,x17			// h+=Sigma0(a)
481	str	x10,[sp,#24]
482	ror	x16,x25,#14
483	add	x20,x20,x28			// h+=K[i]
484	ror	x9,x4,#1
485	and	x17,x26,x25
486	ror	x8,x1,#19
487	bic	x28,x27,x25
488	ror	x10,x21,#28
489	add	x20,x20,x2			// h+=X[i]
490	eor	x16,x16,x25,ror#18
491	eor	x9,x9,x4,ror#8
492	orr	x17,x17,x28			// Ch(e,f,g)
493	eor	x28,x21,x22			// a^b, b^c in next round
494	eor	x16,x16,x25,ror#41	// Sigma1(e)
495	eor	x10,x10,x21,ror#34
496	add	x20,x20,x17			// h+=Ch(e,f,g)
497	and	x19,x19,x28			// (b^c)&=(a^b)
498	eor	x8,x8,x1,ror#61
499	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
500	add	x20,x20,x16			// h+=Sigma1(e)
501	eor	x19,x19,x22			// Maj(a,b,c)
502	eor	x17,x10,x21,ror#39	// Sigma0(a)
503	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
504	add	x3,x3,x12
505	add	x24,x24,x20			// d+=h
506	add	x20,x20,x19			// h+=Maj(a,b,c)
507	ldr	x19,[x30],#8		// *K++, x28 in next round
508	add	x3,x3,x9
509	add	x20,x20,x17			// h+=Sigma0(a)
510	add	x3,x3,x8
511.Loop_16_xx:
512	ldr	x8,[sp,#8]
513	str	x11,[sp,#0]
514	ror	x16,x24,#14
515	add	x27,x27,x19			// h+=K[i]
516	ror	x10,x5,#1
517	and	x17,x25,x24
518	ror	x9,x2,#19
519	bic	x19,x26,x24
520	ror	x11,x20,#28
521	add	x27,x27,x3			// h+=X[i]
522	eor	x16,x16,x24,ror#18
523	eor	x10,x10,x5,ror#8
524	orr	x17,x17,x19			// Ch(e,f,g)
525	eor	x19,x20,x21			// a^b, b^c in next round
526	eor	x16,x16,x24,ror#41	// Sigma1(e)
527	eor	x11,x11,x20,ror#34
528	add	x27,x27,x17			// h+=Ch(e,f,g)
529	and	x28,x28,x19			// (b^c)&=(a^b)
530	eor	x9,x9,x2,ror#61
531	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
532	add	x27,x27,x16			// h+=Sigma1(e)
533	eor	x28,x28,x21			// Maj(a,b,c)
534	eor	x17,x11,x20,ror#39	// Sigma0(a)
535	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
536	add	x4,x4,x13
537	add	x23,x23,x27			// d+=h
538	add	x27,x27,x28			// h+=Maj(a,b,c)
539	ldr	x28,[x30],#8		// *K++, x19 in next round
540	add	x4,x4,x10
541	add	x27,x27,x17			// h+=Sigma0(a)
542	add	x4,x4,x9
543	ldr	x9,[sp,#16]
544	str	x12,[sp,#8]
545	ror	x16,x23,#14
546	add	x26,x26,x28			// h+=K[i]
547	ror	x11,x6,#1
548	and	x17,x24,x23
549	ror	x10,x3,#19
550	bic	x28,x25,x23
551	ror	x12,x27,#28
552	add	x26,x26,x4			// h+=X[i]
553	eor	x16,x16,x23,ror#18
554	eor	x11,x11,x6,ror#8
555	orr	x17,x17,x28			// Ch(e,f,g)
556	eor	x28,x27,x20			// a^b, b^c in next round
557	eor	x16,x16,x23,ror#41	// Sigma1(e)
558	eor	x12,x12,x27,ror#34
559	add	x26,x26,x17			// h+=Ch(e,f,g)
560	and	x19,x19,x28			// (b^c)&=(a^b)
561	eor	x10,x10,x3,ror#61
562	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
563	add	x26,x26,x16			// h+=Sigma1(e)
564	eor	x19,x19,x20			// Maj(a,b,c)
565	eor	x17,x12,x27,ror#39	// Sigma0(a)
566	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
567	add	x5,x5,x14
568	add	x22,x22,x26			// d+=h
569	add	x26,x26,x19			// h+=Maj(a,b,c)
570	ldr	x19,[x30],#8		// *K++, x28 in next round
571	add	x5,x5,x11
572	add	x26,x26,x17			// h+=Sigma0(a)
573	add	x5,x5,x10
574	ldr	x10,[sp,#24]
575	str	x13,[sp,#16]
576	ror	x16,x22,#14
577	add	x25,x25,x19			// h+=K[i]
578	ror	x12,x7,#1
579	and	x17,x23,x22
580	ror	x11,x4,#19
581	bic	x19,x24,x22
582	ror	x13,x26,#28
583	add	x25,x25,x5			// h+=X[i]
584	eor	x16,x16,x22,ror#18
585	eor	x12,x12,x7,ror#8
586	orr	x17,x17,x19			// Ch(e,f,g)
587	eor	x19,x26,x27			// a^b, b^c in next round
588	eor	x16,x16,x22,ror#41	// Sigma1(e)
589	eor	x13,x13,x26,ror#34
590	add	x25,x25,x17			// h+=Ch(e,f,g)
591	and	x28,x28,x19			// (b^c)&=(a^b)
592	eor	x11,x11,x4,ror#61
593	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
594	add	x25,x25,x16			// h+=Sigma1(e)
595	eor	x28,x28,x27			// Maj(a,b,c)
596	eor	x17,x13,x26,ror#39	// Sigma0(a)
597	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
598	add	x6,x6,x15
599	add	x21,x21,x25			// d+=h
600	add	x25,x25,x28			// h+=Maj(a,b,c)
601	ldr	x28,[x30],#8		// *K++, x19 in next round
602	add	x6,x6,x12
603	add	x25,x25,x17			// h+=Sigma0(a)
604	add	x6,x6,x11
605	ldr	x11,[sp,#0]
606	str	x14,[sp,#24]
607	ror	x16,x21,#14
608	add	x24,x24,x28			// h+=K[i]
609	ror	x13,x8,#1
610	and	x17,x22,x21
611	ror	x12,x5,#19
612	bic	x28,x23,x21
613	ror	x14,x25,#28
614	add	x24,x24,x6			// h+=X[i]
615	eor	x16,x16,x21,ror#18
616	eor	x13,x13,x8,ror#8
617	orr	x17,x17,x28			// Ch(e,f,g)
618	eor	x28,x25,x26			// a^b, b^c in next round
619	eor	x16,x16,x21,ror#41	// Sigma1(e)
620	eor	x14,x14,x25,ror#34
621	add	x24,x24,x17			// h+=Ch(e,f,g)
622	and	x19,x19,x28			// (b^c)&=(a^b)
623	eor	x12,x12,x5,ror#61
624	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
625	add	x24,x24,x16			// h+=Sigma1(e)
626	eor	x19,x19,x26			// Maj(a,b,c)
627	eor	x17,x14,x25,ror#39	// Sigma0(a)
628	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
629	add	x7,x7,x0
630	add	x20,x20,x24			// d+=h
631	add	x24,x24,x19			// h+=Maj(a,b,c)
632	ldr	x19,[x30],#8		// *K++, x28 in next round
633	add	x7,x7,x13
634	add	x24,x24,x17			// h+=Sigma0(a)
635	add	x7,x7,x12
636	ldr	x12,[sp,#8]
637	str	x15,[sp,#0]
638	ror	x16,x20,#14
639	add	x23,x23,x19			// h+=K[i]
640	ror	x14,x9,#1
641	and	x17,x21,x20
642	ror	x13,x6,#19
643	bic	x19,x22,x20
644	ror	x15,x24,#28
645	add	x23,x23,x7			// h+=X[i]
646	eor	x16,x16,x20,ror#18
647	eor	x14,x14,x9,ror#8
648	orr	x17,x17,x19			// Ch(e,f,g)
649	eor	x19,x24,x25			// a^b, b^c in next round
650	eor	x16,x16,x20,ror#41	// Sigma1(e)
651	eor	x15,x15,x24,ror#34
652	add	x23,x23,x17			// h+=Ch(e,f,g)
653	and	x28,x28,x19			// (b^c)&=(a^b)
654	eor	x13,x13,x6,ror#61
655	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
656	add	x23,x23,x16			// h+=Sigma1(e)
657	eor	x28,x28,x25			// Maj(a,b,c)
658	eor	x17,x15,x24,ror#39	// Sigma0(a)
659	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
660	add	x8,x8,x1
661	add	x27,x27,x23			// d+=h
662	add	x23,x23,x28			// h+=Maj(a,b,c)
663	ldr	x28,[x30],#8		// *K++, x19 in next round
664	add	x8,x8,x14
665	add	x23,x23,x17			// h+=Sigma0(a)
666	add	x8,x8,x13
667	ldr	x13,[sp,#16]
668	str	x0,[sp,#8]
669	ror	x16,x27,#14
670	add	x22,x22,x28			// h+=K[i]
671	ror	x15,x10,#1
672	and	x17,x20,x27
673	ror	x14,x7,#19
674	bic	x28,x21,x27
675	ror	x0,x23,#28
676	add	x22,x22,x8			// h+=X[i]
677	eor	x16,x16,x27,ror#18
678	eor	x15,x15,x10,ror#8
679	orr	x17,x17,x28			// Ch(e,f,g)
680	eor	x28,x23,x24			// a^b, b^c in next round
681	eor	x16,x16,x27,ror#41	// Sigma1(e)
682	eor	x0,x0,x23,ror#34
683	add	x22,x22,x17			// h+=Ch(e,f,g)
684	and	x19,x19,x28			// (b^c)&=(a^b)
685	eor	x14,x14,x7,ror#61
686	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
687	add	x22,x22,x16			// h+=Sigma1(e)
688	eor	x19,x19,x24			// Maj(a,b,c)
689	eor	x17,x0,x23,ror#39	// Sigma0(a)
690	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
691	add	x9,x9,x2
692	add	x26,x26,x22			// d+=h
693	add	x22,x22,x19			// h+=Maj(a,b,c)
694	ldr	x19,[x30],#8		// *K++, x28 in next round
695	add	x9,x9,x15
696	add	x22,x22,x17			// h+=Sigma0(a)
697	add	x9,x9,x14
698	ldr	x14,[sp,#24]
699	str	x1,[sp,#16]
700	ror	x16,x26,#14
701	add	x21,x21,x19			// h+=K[i]
702	ror	x0,x11,#1
703	and	x17,x27,x26
704	ror	x15,x8,#19
705	bic	x19,x20,x26
706	ror	x1,x22,#28
707	add	x21,x21,x9			// h+=X[i]
708	eor	x16,x16,x26,ror#18
709	eor	x0,x0,x11,ror#8
710	orr	x17,x17,x19			// Ch(e,f,g)
711	eor	x19,x22,x23			// a^b, b^c in next round
712	eor	x16,x16,x26,ror#41	// Sigma1(e)
713	eor	x1,x1,x22,ror#34
714	add	x21,x21,x17			// h+=Ch(e,f,g)
715	and	x28,x28,x19			// (b^c)&=(a^b)
716	eor	x15,x15,x8,ror#61
717	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
718	add	x21,x21,x16			// h+=Sigma1(e)
719	eor	x28,x28,x23			// Maj(a,b,c)
720	eor	x17,x1,x22,ror#39	// Sigma0(a)
721	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
722	add	x10,x10,x3
723	add	x25,x25,x21			// d+=h
724	add	x21,x21,x28			// h+=Maj(a,b,c)
725	ldr	x28,[x30],#8		// *K++, x19 in next round
726	add	x10,x10,x0
727	add	x21,x21,x17			// h+=Sigma0(a)
728	add	x10,x10,x15
729	ldr	x15,[sp,#0]
730	str	x2,[sp,#24]
731	ror	x16,x25,#14
732	add	x20,x20,x28			// h+=K[i]
733	ror	x1,x12,#1
734	and	x17,x26,x25
735	ror	x0,x9,#19
736	bic	x28,x27,x25
737	ror	x2,x21,#28
738	add	x20,x20,x10			// h+=X[i]
739	eor	x16,x16,x25,ror#18
740	eor	x1,x1,x12,ror#8
741	orr	x17,x17,x28			// Ch(e,f,g)
742	eor	x28,x21,x22			// a^b, b^c in next round
743	eor	x16,x16,x25,ror#41	// Sigma1(e)
744	eor	x2,x2,x21,ror#34
745	add	x20,x20,x17			// h+=Ch(e,f,g)
746	and	x19,x19,x28			// (b^c)&=(a^b)
747	eor	x0,x0,x9,ror#61
748	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
749	add	x20,x20,x16			// h+=Sigma1(e)
750	eor	x19,x19,x22			// Maj(a,b,c)
751	eor	x17,x2,x21,ror#39	// Sigma0(a)
752	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
753	add	x11,x11,x4
754	add	x24,x24,x20			// d+=h
755	add	x20,x20,x19			// h+=Maj(a,b,c)
756	ldr	x19,[x30],#8		// *K++, x28 in next round
757	add	x11,x11,x1
758	add	x20,x20,x17			// h+=Sigma0(a)
759	add	x11,x11,x0
760	ldr	x0,[sp,#8]
761	str	x3,[sp,#0]
762	ror	x16,x24,#14
763	add	x27,x27,x19			// h+=K[i]
764	ror	x2,x13,#1
765	and	x17,x25,x24
766	ror	x1,x10,#19
767	bic	x19,x26,x24
768	ror	x3,x20,#28
769	add	x27,x27,x11			// h+=X[i]
770	eor	x16,x16,x24,ror#18
771	eor	x2,x2,x13,ror#8
772	orr	x17,x17,x19			// Ch(e,f,g)
773	eor	x19,x20,x21			// a^b, b^c in next round
774	eor	x16,x16,x24,ror#41	// Sigma1(e)
775	eor	x3,x3,x20,ror#34
776	add	x27,x27,x17			// h+=Ch(e,f,g)
777	and	x28,x28,x19			// (b^c)&=(a^b)
778	eor	x1,x1,x10,ror#61
779	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
780	add	x27,x27,x16			// h+=Sigma1(e)
781	eor	x28,x28,x21			// Maj(a,b,c)
782	eor	x17,x3,x20,ror#39	// Sigma0(a)
783	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
784	add	x12,x12,x5
785	add	x23,x23,x27			// d+=h
786	add	x27,x27,x28			// h+=Maj(a,b,c)
787	ldr	x28,[x30],#8		// *K++, x19 in next round
788	add	x12,x12,x2
789	add	x27,x27,x17			// h+=Sigma0(a)
790	add	x12,x12,x1
791	ldr	x1,[sp,#16]
792	str	x4,[sp,#8]
793	ror	x16,x23,#14
794	add	x26,x26,x28			// h+=K[i]
795	ror	x3,x14,#1
796	and	x17,x24,x23
797	ror	x2,x11,#19
798	bic	x28,x25,x23
799	ror	x4,x27,#28
800	add	x26,x26,x12			// h+=X[i]
801	eor	x16,x16,x23,ror#18
802	eor	x3,x3,x14,ror#8
803	orr	x17,x17,x28			// Ch(e,f,g)
804	eor	x28,x27,x20			// a^b, b^c in next round
805	eor	x16,x16,x23,ror#41	// Sigma1(e)
806	eor	x4,x4,x27,ror#34
807	add	x26,x26,x17			// h+=Ch(e,f,g)
808	and	x19,x19,x28			// (b^c)&=(a^b)
809	eor	x2,x2,x11,ror#61
810	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
811	add	x26,x26,x16			// h+=Sigma1(e)
812	eor	x19,x19,x20			// Maj(a,b,c)
813	eor	x17,x4,x27,ror#39	// Sigma0(a)
814	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
815	add	x13,x13,x6
816	add	x22,x22,x26			// d+=h
817	add	x26,x26,x19			// h+=Maj(a,b,c)
818	ldr	x19,[x30],#8		// *K++, x28 in next round
819	add	x13,x13,x3
820	add	x26,x26,x17			// h+=Sigma0(a)
821	add	x13,x13,x2
822	ldr	x2,[sp,#24]
823	str	x5,[sp,#16]
824	ror	x16,x22,#14
825	add	x25,x25,x19			// h+=K[i]
826	ror	x4,x15,#1
827	and	x17,x23,x22
828	ror	x3,x12,#19
829	bic	x19,x24,x22
830	ror	x5,x26,#28
831	add	x25,x25,x13			// h+=X[i]
832	eor	x16,x16,x22,ror#18
833	eor	x4,x4,x15,ror#8
834	orr	x17,x17,x19			// Ch(e,f,g)
835	eor	x19,x26,x27			// a^b, b^c in next round
836	eor	x16,x16,x22,ror#41	// Sigma1(e)
837	eor	x5,x5,x26,ror#34
838	add	x25,x25,x17			// h+=Ch(e,f,g)
839	and	x28,x28,x19			// (b^c)&=(a^b)
840	eor	x3,x3,x12,ror#61
841	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
842	add	x25,x25,x16			// h+=Sigma1(e)
843	eor	x28,x28,x27			// Maj(a,b,c)
844	eor	x17,x5,x26,ror#39	// Sigma0(a)
845	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
846	add	x14,x14,x7
847	add	x21,x21,x25			// d+=h
848	add	x25,x25,x28			// h+=Maj(a,b,c)
849	ldr	x28,[x30],#8		// *K++, x19 in next round
850	add	x14,x14,x4
851	add	x25,x25,x17			// h+=Sigma0(a)
852	add	x14,x14,x3
853	ldr	x3,[sp,#0]
854	str	x6,[sp,#24]
855	ror	x16,x21,#14
856	add	x24,x24,x28			// h+=K[i]
857	ror	x5,x0,#1
858	and	x17,x22,x21
859	ror	x4,x13,#19
860	bic	x28,x23,x21
861	ror	x6,x25,#28
862	add	x24,x24,x14			// h+=X[i]
863	eor	x16,x16,x21,ror#18
864	eor	x5,x5,x0,ror#8
865	orr	x17,x17,x28			// Ch(e,f,g)
866	eor	x28,x25,x26			// a^b, b^c in next round
867	eor	x16,x16,x21,ror#41	// Sigma1(e)
868	eor	x6,x6,x25,ror#34
869	add	x24,x24,x17			// h+=Ch(e,f,g)
870	and	x19,x19,x28			// (b^c)&=(a^b)
871	eor	x4,x4,x13,ror#61
872	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
873	add	x24,x24,x16			// h+=Sigma1(e)
874	eor	x19,x19,x26			// Maj(a,b,c)
875	eor	x17,x6,x25,ror#39	// Sigma0(a)
876	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
877	add	x15,x15,x8
878	add	x20,x20,x24			// d+=h
879	add	x24,x24,x19			// h+=Maj(a,b,c)
880	ldr	x19,[x30],#8		// *K++, x28 in next round
881	add	x15,x15,x5
882	add	x24,x24,x17			// h+=Sigma0(a)
883	add	x15,x15,x4
884	ldr	x4,[sp,#8]
885	str	x7,[sp,#0]
886	ror	x16,x20,#14
887	add	x23,x23,x19			// h+=K[i]
888	ror	x6,x1,#1
889	and	x17,x21,x20
890	ror	x5,x14,#19
891	bic	x19,x22,x20
892	ror	x7,x24,#28
893	add	x23,x23,x15			// h+=X[i]
894	eor	x16,x16,x20,ror#18
895	eor	x6,x6,x1,ror#8
896	orr	x17,x17,x19			// Ch(e,f,g)
897	eor	x19,x24,x25			// a^b, b^c in next round
898	eor	x16,x16,x20,ror#41	// Sigma1(e)
899	eor	x7,x7,x24,ror#34
900	add	x23,x23,x17			// h+=Ch(e,f,g)
901	and	x28,x28,x19			// (b^c)&=(a^b)
902	eor	x5,x5,x14,ror#61
903	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
904	add	x23,x23,x16			// h+=Sigma1(e)
905	eor	x28,x28,x25			// Maj(a,b,c)
906	eor	x17,x7,x24,ror#39	// Sigma0(a)
907	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
908	add	x0,x0,x9
909	add	x27,x27,x23			// d+=h
910	add	x23,x23,x28			// h+=Maj(a,b,c)
911	ldr	x28,[x30],#8		// *K++, x19 in next round
912	add	x0,x0,x6
913	add	x23,x23,x17			// h+=Sigma0(a)
914	add	x0,x0,x5
915	ldr	x5,[sp,#16]
916	str	x8,[sp,#8]
917	ror	x16,x27,#14
918	add	x22,x22,x28			// h+=K[i]
919	ror	x7,x2,#1
920	and	x17,x20,x27
921	ror	x6,x15,#19
922	bic	x28,x21,x27
923	ror	x8,x23,#28
924	add	x22,x22,x0			// h+=X[i]
925	eor	x16,x16,x27,ror#18
926	eor	x7,x7,x2,ror#8
927	orr	x17,x17,x28			// Ch(e,f,g)
928	eor	x28,x23,x24			// a^b, b^c in next round
929	eor	x16,x16,x27,ror#41	// Sigma1(e)
930	eor	x8,x8,x23,ror#34
931	add	x22,x22,x17			// h+=Ch(e,f,g)
932	and	x19,x19,x28			// (b^c)&=(a^b)
933	eor	x6,x6,x15,ror#61
934	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
935	add	x22,x22,x16			// h+=Sigma1(e)
936	eor	x19,x19,x24			// Maj(a,b,c)
937	eor	x17,x8,x23,ror#39	// Sigma0(a)
938	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
939	add	x1,x1,x10
940	add	x26,x26,x22			// d+=h
941	add	x22,x22,x19			// h+=Maj(a,b,c)
942	ldr	x19,[x30],#8		// *K++, x28 in next round
943	add	x1,x1,x7
944	add	x22,x22,x17			// h+=Sigma0(a)
945	add	x1,x1,x6
946	ldr	x6,[sp,#24]
947	str	x9,[sp,#16]
948	ror	x16,x26,#14
949	add	x21,x21,x19			// h+=K[i]
950	ror	x8,x3,#1
951	and	x17,x27,x26
952	ror	x7,x0,#19
953	bic	x19,x20,x26
954	ror	x9,x22,#28
955	add	x21,x21,x1			// h+=X[i]
956	eor	x16,x16,x26,ror#18
957	eor	x8,x8,x3,ror#8
958	orr	x17,x17,x19			// Ch(e,f,g)
959	eor	x19,x22,x23			// a^b, b^c in next round
960	eor	x16,x16,x26,ror#41	// Sigma1(e)
961	eor	x9,x9,x22,ror#34
962	add	x21,x21,x17			// h+=Ch(e,f,g)
963	and	x28,x28,x19			// (b^c)&=(a^b)
964	eor	x7,x7,x0,ror#61
965	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
966	add	x21,x21,x16			// h+=Sigma1(e)
967	eor	x28,x28,x23			// Maj(a,b,c)
968	eor	x17,x9,x22,ror#39	// Sigma0(a)
969	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
970	add	x2,x2,x11
971	add	x25,x25,x21			// d+=h
972	add	x21,x21,x28			// h+=Maj(a,b,c)
973	ldr	x28,[x30],#8		// *K++, x19 in next round
974	add	x2,x2,x8
975	add	x21,x21,x17			// h+=Sigma0(a)
976	add	x2,x2,x7
977	ldr	x7,[sp,#0]
978	str	x10,[sp,#24]
979	ror	x16,x25,#14
980	add	x20,x20,x28			// h+=K[i]
981	ror	x9,x4,#1
982	and	x17,x26,x25
983	ror	x8,x1,#19
984	bic	x28,x27,x25
985	ror	x10,x21,#28
986	add	x20,x20,x2			// h+=X[i]
987	eor	x16,x16,x25,ror#18
988	eor	x9,x9,x4,ror#8
989	orr	x17,x17,x28			// Ch(e,f,g)
990	eor	x28,x21,x22			// a^b, b^c in next round
991	eor	x16,x16,x25,ror#41	// Sigma1(e)
992	eor	x10,x10,x21,ror#34
993	add	x20,x20,x17			// h+=Ch(e,f,g)
994	and	x19,x19,x28			// (b^c)&=(a^b)
995	eor	x8,x8,x1,ror#61
996	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
997	add	x20,x20,x16			// h+=Sigma1(e)
998	eor	x19,x19,x22			// Maj(a,b,c)
999	eor	x17,x10,x21,ror#39	// Sigma0(a)
1000	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
1001	add	x3,x3,x12
1002	add	x24,x24,x20			// d+=h
1003	add	x20,x20,x19			// h+=Maj(a,b,c)
1004	ldr	x19,[x30],#8		// *K++, x28 in next round
1005	add	x3,x3,x9
1006	add	x20,x20,x17			// h+=Sigma0(a)
1007	add	x3,x3,x8
1008	cbnz	x19,.Loop_16_xx
1009
1010	ldp	x0,x2,[x29,#96]
1011	ldr	x1,[x29,#112]
1012	sub	x30,x30,#648		// rewind
1013
1014	ldp	x3,x4,[x0]
1015	ldp	x5,x6,[x0,#2*8]
1016	add	x1,x1,#14*8			// advance input pointer
1017	ldp	x7,x8,[x0,#4*8]
1018	add	x20,x20,x3
1019	ldp	x9,x10,[x0,#6*8]
1020	add	x21,x21,x4
1021	add	x22,x22,x5
1022	add	x23,x23,x6
1023	stp	x20,x21,[x0]
1024	add	x24,x24,x7
1025	add	x25,x25,x8
1026	stp	x22,x23,[x0,#2*8]
1027	add	x26,x26,x9
1028	add	x27,x27,x10
1029	cmp	x1,x2
1030	stp	x24,x25,[x0,#4*8]
1031	stp	x26,x27,[x0,#6*8]
1032	b.ne	.Loop
1033
1034	ldp	x19,x20,[x29,#16]
1035	add	sp,sp,#4*8
1036	ldp	x21,x22,[x29,#32]
1037	ldp	x23,x24,[x29,#48]
1038	ldp	x25,x26,[x29,#64]
1039	ldp	x27,x28,[x29,#80]
1040	ldp	x29,x30,[sp],#128
1041	AARCH64_VALIDATE_LINK_REGISTER
1042	ret
1043.size	sha512_block_data_order,.-sha512_block_data_order
1044
1045.section	.rodata
1046.align	6
1047.type	.LK512,%object
1048.LK512:
1049.quad	0x428a2f98d728ae22,0x7137449123ef65cd
1050.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1051.quad	0x3956c25bf348b538,0x59f111f1b605d019
1052.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
1053.quad	0xd807aa98a3030242,0x12835b0145706fbe
1054.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1055.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
1056.quad	0x9bdc06a725c71235,0xc19bf174cf692694
1057.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
1058.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1059.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
1060.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1061.quad	0x983e5152ee66dfab,0xa831c66d2db43210
1062.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
1063.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
1064.quad	0x06ca6351e003826f,0x142929670a0e6e70
1065.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
1066.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1067.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
1068.quad	0x81c2c92e47edaee6,0x92722c851482353b
1069.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
1070.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
1071.quad	0xd192e819d6ef5218,0xd69906245565a910
1072.quad	0xf40e35855771202a,0x106aa07032bbd1b8
1073.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
1074.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1075.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1076.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1077.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
1078.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
1079.quad	0x90befffa23631e28,0xa4506cebde82bde9
1080.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
1081.quad	0xca273eceea26619c,0xd186b8c721c0c207
1082.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1083.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
1084.quad	0x113f9804bef90dae,0x1b710b35131c471b
1085.quad	0x28db77f523047d84,0x32caab7b40c72493
1086.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1087.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1088.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
1089.quad	0	// terminator
1090.size	.LK512,.-.LK512
1091.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1092.align	2
1093.align	2
1094.text
1095#ifndef	__KERNEL__
1096.type	sha512_block_armv8,%function
1097.align	6
1098sha512_block_armv8:
1099.Lv8_entry:
1100	stp	x29,x30,[sp,#-16]!
1101	add	x29,sp,#0
1102
1103	ld1	{v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64	// load input
1104	ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
1105
1106	ld1	{v0.2d,v1.2d,v2.2d,v3.2d},[x0]		// load context
1107	adrp	x3,.LK512
1108	add	x3,x3,:lo12:.LK512
1109
1110	rev64	v16.16b,v16.16b
1111	rev64	v17.16b,v17.16b
1112	rev64	v18.16b,v18.16b
1113	rev64	v19.16b,v19.16b
1114	rev64	v20.16b,v20.16b
1115	rev64	v21.16b,v21.16b
1116	rev64	v22.16b,v22.16b
1117	rev64	v23.16b,v23.16b
1118	b	.Loop_hw
1119
1120.align	4
1121.Loop_hw:
1122	ld1	{v24.2d},[x3],#16
1123	subs	x2,x2,#1
1124	sub	x4,x1,#128
1125	orr	v26.16b,v0.16b,v0.16b			// offload
1126	orr	v27.16b,v1.16b,v1.16b
1127	orr	v28.16b,v2.16b,v2.16b
1128	orr	v29.16b,v3.16b,v3.16b
1129	csel	x1,x1,x4,ne			// conditional rewind
1130	add	v24.2d,v24.2d,v16.2d
1131	ld1	{v25.2d},[x3],#16
1132	ext	v24.16b,v24.16b,v24.16b,#8
1133	ext	v5.16b,v2.16b,v3.16b,#8
1134	ext	v6.16b,v1.16b,v2.16b,#8
1135	add	v3.2d,v3.2d,v24.2d			// "T1 + H + K512[i]"
1136.inst	0xcec08230	//sha512su0 v16.16b,v17.16b
1137	ext	v7.16b,v20.16b,v21.16b,#8
1138.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
1139.inst	0xce678af0	//sha512su1 v16.16b,v23.16b,v7.16b
1140	add	v4.2d,v1.2d,v3.2d		// "D + T1"
1141.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
1142	add	v25.2d,v25.2d,v17.2d
1143	ld1	{v24.2d},[x3],#16
1144	ext	v25.16b,v25.16b,v25.16b,#8
1145	ext	v5.16b,v4.16b,v2.16b,#8
1146	ext	v6.16b,v0.16b,v4.16b,#8
1147	add	v2.2d,v2.2d,v25.2d			// "T1 + H + K512[i]"
1148.inst	0xcec08251	//sha512su0 v17.16b,v18.16b
1149	ext	v7.16b,v21.16b,v22.16b,#8
1150.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
1151.inst	0xce678a11	//sha512su1 v17.16b,v16.16b,v7.16b
1152	add	v1.2d,v0.2d,v2.2d		// "D + T1"
1153.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
1154	add	v24.2d,v24.2d,v18.2d
1155	ld1	{v25.2d},[x3],#16
1156	ext	v24.16b,v24.16b,v24.16b,#8
1157	ext	v5.16b,v1.16b,v4.16b,#8
1158	ext	v6.16b,v3.16b,v1.16b,#8
1159	add	v4.2d,v4.2d,v24.2d			// "T1 + H + K512[i]"
1160.inst	0xcec08272	//sha512su0 v18.16b,v19.16b
1161	ext	v7.16b,v22.16b,v23.16b,#8
1162.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
1163.inst	0xce678a32	//sha512su1 v18.16b,v17.16b,v7.16b
1164	add	v0.2d,v3.2d,v4.2d		// "D + T1"
1165.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
1166	add	v25.2d,v25.2d,v19.2d
1167	ld1	{v24.2d},[x3],#16
1168	ext	v25.16b,v25.16b,v25.16b,#8
1169	ext	v5.16b,v0.16b,v1.16b,#8
1170	ext	v6.16b,v2.16b,v0.16b,#8
1171	add	v1.2d,v1.2d,v25.2d			// "T1 + H + K512[i]"
1172.inst	0xcec08293	//sha512su0 v19.16b,v20.16b
1173	ext	v7.16b,v23.16b,v16.16b,#8
1174.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
1175.inst	0xce678a53	//sha512su1 v19.16b,v18.16b,v7.16b
1176	add	v3.2d,v2.2d,v1.2d		// "D + T1"
1177.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
1178	add	v24.2d,v24.2d,v20.2d
1179	ld1	{v25.2d},[x3],#16
1180	ext	v24.16b,v24.16b,v24.16b,#8
1181	ext	v5.16b,v3.16b,v0.16b,#8
1182	ext	v6.16b,v4.16b,v3.16b,#8
1183	add	v0.2d,v0.2d,v24.2d			// "T1 + H + K512[i]"
1184.inst	0xcec082b4	//sha512su0 v20.16b,v21.16b
1185	ext	v7.16b,v16.16b,v17.16b,#8
1186.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
1187.inst	0xce678a74	//sha512su1 v20.16b,v19.16b,v7.16b
1188	add	v2.2d,v4.2d,v0.2d		// "D + T1"
1189.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
1190	add	v25.2d,v25.2d,v21.2d
1191	ld1	{v24.2d},[x3],#16
1192	ext	v25.16b,v25.16b,v25.16b,#8
1193	ext	v5.16b,v2.16b,v3.16b,#8
1194	ext	v6.16b,v1.16b,v2.16b,#8
1195	add	v3.2d,v3.2d,v25.2d			// "T1 + H + K512[i]"
1196.inst	0xcec082d5	//sha512su0 v21.16b,v22.16b
1197	ext	v7.16b,v17.16b,v18.16b,#8
1198.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
1199.inst	0xce678a95	//sha512su1 v21.16b,v20.16b,v7.16b
1200	add	v4.2d,v1.2d,v3.2d		// "D + T1"
1201.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
1202	add	v24.2d,v24.2d,v22.2d
1203	ld1	{v25.2d},[x3],#16
1204	ext	v24.16b,v24.16b,v24.16b,#8
1205	ext	v5.16b,v4.16b,v2.16b,#8
1206	ext	v6.16b,v0.16b,v4.16b,#8
1207	add	v2.2d,v2.2d,v24.2d			// "T1 + H + K512[i]"
1208.inst	0xcec082f6	//sha512su0 v22.16b,v23.16b
1209	ext	v7.16b,v18.16b,v19.16b,#8
1210.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
1211.inst	0xce678ab6	//sha512su1 v22.16b,v21.16b,v7.16b
1212	add	v1.2d,v0.2d,v2.2d		// "D + T1"
1213.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
1214	add	v25.2d,v25.2d,v23.2d
1215	ld1	{v24.2d},[x3],#16
1216	ext	v25.16b,v25.16b,v25.16b,#8
1217	ext	v5.16b,v1.16b,v4.16b,#8
1218	ext	v6.16b,v3.16b,v1.16b,#8
1219	add	v4.2d,v4.2d,v25.2d			// "T1 + H + K512[i]"
1220.inst	0xcec08217	//sha512su0 v23.16b,v16.16b
1221	ext	v7.16b,v19.16b,v20.16b,#8
1222.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
1223.inst	0xce678ad7	//sha512su1 v23.16b,v22.16b,v7.16b
1224	add	v0.2d,v3.2d,v4.2d		// "D + T1"
1225.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
1226	add	v24.2d,v24.2d,v16.2d
1227	ld1	{v25.2d},[x3],#16
1228	ext	v24.16b,v24.16b,v24.16b,#8
1229	ext	v5.16b,v0.16b,v1.16b,#8
1230	ext	v6.16b,v2.16b,v0.16b,#8
1231	add	v1.2d,v1.2d,v24.2d			// "T1 + H + K512[i]"
1232.inst	0xcec08230	//sha512su0 v16.16b,v17.16b
1233	ext	v7.16b,v20.16b,v21.16b,#8
1234.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
1235.inst	0xce678af0	//sha512su1 v16.16b,v23.16b,v7.16b
1236	add	v3.2d,v2.2d,v1.2d		// "D + T1"
1237.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
1238	add	v25.2d,v25.2d,v17.2d
1239	ld1	{v24.2d},[x3],#16
1240	ext	v25.16b,v25.16b,v25.16b,#8
1241	ext	v5.16b,v3.16b,v0.16b,#8
1242	ext	v6.16b,v4.16b,v3.16b,#8
1243	add	v0.2d,v0.2d,v25.2d			// "T1 + H + K512[i]"
1244.inst	0xcec08251	//sha512su0 v17.16b,v18.16b
1245	ext	v7.16b,v21.16b,v22.16b,#8
1246.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
1247.inst	0xce678a11	//sha512su1 v17.16b,v16.16b,v7.16b
1248	add	v2.2d,v4.2d,v0.2d		// "D + T1"
1249.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
1250	add	v24.2d,v24.2d,v18.2d
1251	ld1	{v25.2d},[x3],#16
1252	ext	v24.16b,v24.16b,v24.16b,#8
1253	ext	v5.16b,v2.16b,v3.16b,#8
1254	ext	v6.16b,v1.16b,v2.16b,#8
1255	add	v3.2d,v3.2d,v24.2d			// "T1 + H + K512[i]"
1256.inst	0xcec08272	//sha512su0 v18.16b,v19.16b
1257	ext	v7.16b,v22.16b,v23.16b,#8
1258.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
1259.inst	0xce678a32	//sha512su1 v18.16b,v17.16b,v7.16b
1260	add	v4.2d,v1.2d,v3.2d		// "D + T1"
1261.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
1262	add	v25.2d,v25.2d,v19.2d
1263	ld1	{v24.2d},[x3],#16
1264	ext	v25.16b,v25.16b,v25.16b,#8
1265	ext	v5.16b,v4.16b,v2.16b,#8
1266	ext	v6.16b,v0.16b,v4.16b,#8
1267	add	v2.2d,v2.2d,v25.2d			// "T1 + H + K512[i]"
1268.inst	0xcec08293	//sha512su0 v19.16b,v20.16b
1269	ext	v7.16b,v23.16b,v16.16b,#8
1270.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
1271.inst	0xce678a53	//sha512su1 v19.16b,v18.16b,v7.16b
1272	add	v1.2d,v0.2d,v2.2d		// "D + T1"
1273.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
1274	add	v24.2d,v24.2d,v20.2d
1275	ld1	{v25.2d},[x3],#16
1276	ext	v24.16b,v24.16b,v24.16b,#8
1277	ext	v5.16b,v1.16b,v4.16b,#8
1278	ext	v6.16b,v3.16b,v1.16b,#8
1279	add	v4.2d,v4.2d,v24.2d			// "T1 + H + K512[i]"
1280.inst	0xcec082b4	//sha512su0 v20.16b,v21.16b
1281	ext	v7.16b,v16.16b,v17.16b,#8
1282.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
1283.inst	0xce678a74	//sha512su1 v20.16b,v19.16b,v7.16b
1284	add	v0.2d,v3.2d,v4.2d		// "D + T1"
1285.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
1286	add	v25.2d,v25.2d,v21.2d
1287	ld1	{v24.2d},[x3],#16
1288	ext	v25.16b,v25.16b,v25.16b,#8
1289	ext	v5.16b,v0.16b,v1.16b,#8
1290	ext	v6.16b,v2.16b,v0.16b,#8
1291	add	v1.2d,v1.2d,v25.2d			// "T1 + H + K512[i]"
1292.inst	0xcec082d5	//sha512su0 v21.16b,v22.16b
1293	ext	v7.16b,v17.16b,v18.16b,#8
1294.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
1295.inst	0xce678a95	//sha512su1 v21.16b,v20.16b,v7.16b
1296	add	v3.2d,v2.2d,v1.2d		// "D + T1"
1297.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
1298	add	v24.2d,v24.2d,v22.2d
1299	ld1	{v25.2d},[x3],#16
1300	ext	v24.16b,v24.16b,v24.16b,#8
1301	ext	v5.16b,v3.16b,v0.16b,#8
1302	ext	v6.16b,v4.16b,v3.16b,#8
1303	add	v0.2d,v0.2d,v24.2d			// "T1 + H + K512[i]"
1304.inst	0xcec082f6	//sha512su0 v22.16b,v23.16b
1305	ext	v7.16b,v18.16b,v19.16b,#8
1306.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
1307.inst	0xce678ab6	//sha512su1 v22.16b,v21.16b,v7.16b
1308	add	v2.2d,v4.2d,v0.2d		// "D + T1"
1309.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
1310	add	v25.2d,v25.2d,v23.2d
1311	ld1	{v24.2d},[x3],#16
1312	ext	v25.16b,v25.16b,v25.16b,#8
1313	ext	v5.16b,v2.16b,v3.16b,#8
1314	ext	v6.16b,v1.16b,v2.16b,#8
1315	add	v3.2d,v3.2d,v25.2d			// "T1 + H + K512[i]"
1316.inst	0xcec08217	//sha512su0 v23.16b,v16.16b
1317	ext	v7.16b,v19.16b,v20.16b,#8
1318.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
1319.inst	0xce678ad7	//sha512su1 v23.16b,v22.16b,v7.16b
1320	add	v4.2d,v1.2d,v3.2d		// "D + T1"
1321.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
1322	add	v24.2d,v24.2d,v16.2d
1323	ld1	{v25.2d},[x3],#16
1324	ext	v24.16b,v24.16b,v24.16b,#8
1325	ext	v5.16b,v4.16b,v2.16b,#8
1326	ext	v6.16b,v0.16b,v4.16b,#8
1327	add	v2.2d,v2.2d,v24.2d			// "T1 + H + K512[i]"
1328.inst	0xcec08230	//sha512su0 v16.16b,v17.16b
1329	ext	v7.16b,v20.16b,v21.16b,#8
1330.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
1331.inst	0xce678af0	//sha512su1 v16.16b,v23.16b,v7.16b
1332	add	v1.2d,v0.2d,v2.2d		// "D + T1"
1333.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
1334	add	v25.2d,v25.2d,v17.2d
1335	ld1	{v24.2d},[x3],#16
1336	ext	v25.16b,v25.16b,v25.16b,#8
1337	ext	v5.16b,v1.16b,v4.16b,#8
1338	ext	v6.16b,v3.16b,v1.16b,#8
1339	add	v4.2d,v4.2d,v25.2d			// "T1 + H + K512[i]"
1340.inst	0xcec08251	//sha512su0 v17.16b,v18.16b
1341	ext	v7.16b,v21.16b,v22.16b,#8
1342.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
1343.inst	0xce678a11	//sha512su1 v17.16b,v16.16b,v7.16b
1344	add	v0.2d,v3.2d,v4.2d		// "D + T1"
1345.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
1346	add	v24.2d,v24.2d,v18.2d
1347	ld1	{v25.2d},[x3],#16
1348	ext	v24.16b,v24.16b,v24.16b,#8
1349	ext	v5.16b,v0.16b,v1.16b,#8
1350	ext	v6.16b,v2.16b,v0.16b,#8
1351	add	v1.2d,v1.2d,v24.2d			// "T1 + H + K512[i]"
1352.inst	0xcec08272	//sha512su0 v18.16b,v19.16b
1353	ext	v7.16b,v22.16b,v23.16b,#8
1354.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
1355.inst	0xce678a32	//sha512su1 v18.16b,v17.16b,v7.16b
1356	add	v3.2d,v2.2d,v1.2d		// "D + T1"
1357.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
1358	add	v25.2d,v25.2d,v19.2d
1359	ld1	{v24.2d},[x3],#16
1360	ext	v25.16b,v25.16b,v25.16b,#8
1361	ext	v5.16b,v3.16b,v0.16b,#8
1362	ext	v6.16b,v4.16b,v3.16b,#8
1363	add	v0.2d,v0.2d,v25.2d			// "T1 + H + K512[i]"
1364.inst	0xcec08293	//sha512su0 v19.16b,v20.16b
1365	ext	v7.16b,v23.16b,v16.16b,#8
1366.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
1367.inst	0xce678a53	//sha512su1 v19.16b,v18.16b,v7.16b
1368	add	v2.2d,v4.2d,v0.2d		// "D + T1"
1369.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
1370	add	v24.2d,v24.2d,v20.2d
1371	ld1	{v25.2d},[x3],#16
1372	ext	v24.16b,v24.16b,v24.16b,#8
1373	ext	v5.16b,v2.16b,v3.16b,#8
1374	ext	v6.16b,v1.16b,v2.16b,#8
1375	add	v3.2d,v3.2d,v24.2d			// "T1 + H + K512[i]"
1376.inst	0xcec082b4	//sha512su0 v20.16b,v21.16b
1377	ext	v7.16b,v16.16b,v17.16b,#8
1378.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
1379.inst	0xce678a74	//sha512su1 v20.16b,v19.16b,v7.16b
1380	add	v4.2d,v1.2d,v3.2d		// "D + T1"
1381.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
1382	add	v25.2d,v25.2d,v21.2d
1383	ld1	{v24.2d},[x3],#16
1384	ext	v25.16b,v25.16b,v25.16b,#8
1385	ext	v5.16b,v4.16b,v2.16b,#8
1386	ext	v6.16b,v0.16b,v4.16b,#8
1387	add	v2.2d,v2.2d,v25.2d			// "T1 + H + K512[i]"
1388.inst	0xcec082d5	//sha512su0 v21.16b,v22.16b
1389	ext	v7.16b,v17.16b,v18.16b,#8
1390.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
1391.inst	0xce678a95	//sha512su1 v21.16b,v20.16b,v7.16b
1392	add	v1.2d,v0.2d,v2.2d		// "D + T1"
1393.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
1394	add	v24.2d,v24.2d,v22.2d
1395	ld1	{v25.2d},[x3],#16
1396	ext	v24.16b,v24.16b,v24.16b,#8
1397	ext	v5.16b,v1.16b,v4.16b,#8
1398	ext	v6.16b,v3.16b,v1.16b,#8
1399	add	v4.2d,v4.2d,v24.2d			// "T1 + H + K512[i]"
1400.inst	0xcec082f6	//sha512su0 v22.16b,v23.16b
1401	ext	v7.16b,v18.16b,v19.16b,#8
1402.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
1403.inst	0xce678ab6	//sha512su1 v22.16b,v21.16b,v7.16b
1404	add	v0.2d,v3.2d,v4.2d		// "D + T1"
1405.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
1406	add	v25.2d,v25.2d,v23.2d
1407	ld1	{v24.2d},[x3],#16
1408	ext	v25.16b,v25.16b,v25.16b,#8
1409	ext	v5.16b,v0.16b,v1.16b,#8
1410	ext	v6.16b,v2.16b,v0.16b,#8
1411	add	v1.2d,v1.2d,v25.2d			// "T1 + H + K512[i]"
1412.inst	0xcec08217	//sha512su0 v23.16b,v16.16b
1413	ext	v7.16b,v19.16b,v20.16b,#8
1414.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
1415.inst	0xce678ad7	//sha512su1 v23.16b,v22.16b,v7.16b
1416	add	v3.2d,v2.2d,v1.2d		// "D + T1"
1417.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
1418	add	v24.2d,v24.2d,v16.2d
1419	ld1	{v25.2d},[x3],#16
1420	ext	v24.16b,v24.16b,v24.16b,#8
1421	ext	v5.16b,v3.16b,v0.16b,#8
1422	ext	v6.16b,v4.16b,v3.16b,#8
1423	add	v0.2d,v0.2d,v24.2d			// "T1 + H + K512[i]"
1424.inst	0xcec08230	//sha512su0 v16.16b,v17.16b
1425	ext	v7.16b,v20.16b,v21.16b,#8
1426.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
1427.inst	0xce678af0	//sha512su1 v16.16b,v23.16b,v7.16b
1428	add	v2.2d,v4.2d,v0.2d		// "D + T1"
1429.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
1430	add	v25.2d,v25.2d,v17.2d
1431	ld1	{v24.2d},[x3],#16
1432	ext	v25.16b,v25.16b,v25.16b,#8
1433	ext	v5.16b,v2.16b,v3.16b,#8
1434	ext	v6.16b,v1.16b,v2.16b,#8
1435	add	v3.2d,v3.2d,v25.2d			// "T1 + H + K512[i]"
1436.inst	0xcec08251	//sha512su0 v17.16b,v18.16b
1437	ext	v7.16b,v21.16b,v22.16b,#8
1438.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
1439.inst	0xce678a11	//sha512su1 v17.16b,v16.16b,v7.16b
1440	add	v4.2d,v1.2d,v3.2d		// "D + T1"
1441.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
1442	add	v24.2d,v24.2d,v18.2d
1443	ld1	{v25.2d},[x3],#16
1444	ext	v24.16b,v24.16b,v24.16b,#8
1445	ext	v5.16b,v4.16b,v2.16b,#8
1446	ext	v6.16b,v0.16b,v4.16b,#8
1447	add	v2.2d,v2.2d,v24.2d			// "T1 + H + K512[i]"
1448.inst	0xcec08272	//sha512su0 v18.16b,v19.16b
1449	ext	v7.16b,v22.16b,v23.16b,#8
1450.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
1451.inst	0xce678a32	//sha512su1 v18.16b,v17.16b,v7.16b
1452	add	v1.2d,v0.2d,v2.2d		// "D + T1"
1453.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
1454	add	v25.2d,v25.2d,v19.2d
1455	ld1	{v24.2d},[x3],#16
1456	ext	v25.16b,v25.16b,v25.16b,#8
1457	ext	v5.16b,v1.16b,v4.16b,#8
1458	ext	v6.16b,v3.16b,v1.16b,#8
1459	add	v4.2d,v4.2d,v25.2d			// "T1 + H + K512[i]"
1460.inst	0xcec08293	//sha512su0 v19.16b,v20.16b
1461	ext	v7.16b,v23.16b,v16.16b,#8
1462.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
1463.inst	0xce678a53	//sha512su1 v19.16b,v18.16b,v7.16b
1464	add	v0.2d,v3.2d,v4.2d		// "D + T1"
1465.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
1466	add	v24.2d,v24.2d,v20.2d
1467	ld1	{v25.2d},[x3],#16
1468	ext	v24.16b,v24.16b,v24.16b,#8
1469	ext	v5.16b,v0.16b,v1.16b,#8
1470	ext	v6.16b,v2.16b,v0.16b,#8
1471	add	v1.2d,v1.2d,v24.2d			// "T1 + H + K512[i]"
1472.inst	0xcec082b4	//sha512su0 v20.16b,v21.16b
1473	ext	v7.16b,v16.16b,v17.16b,#8
1474.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
1475.inst	0xce678a74	//sha512su1 v20.16b,v19.16b,v7.16b
1476	add	v3.2d,v2.2d,v1.2d		// "D + T1"
1477.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
1478	add	v25.2d,v25.2d,v21.2d
1479	ld1	{v24.2d},[x3],#16
1480	ext	v25.16b,v25.16b,v25.16b,#8
1481	ext	v5.16b,v3.16b,v0.16b,#8
1482	ext	v6.16b,v4.16b,v3.16b,#8
1483	add	v0.2d,v0.2d,v25.2d			// "T1 + H + K512[i]"
1484.inst	0xcec082d5	//sha512su0 v21.16b,v22.16b
1485	ext	v7.16b,v17.16b,v18.16b,#8
1486.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
1487.inst	0xce678a95	//sha512su1 v21.16b,v20.16b,v7.16b
1488	add	v2.2d,v4.2d,v0.2d		// "D + T1"
1489.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
1490	add	v24.2d,v24.2d,v22.2d
1491	ld1	{v25.2d},[x3],#16
1492	ext	v24.16b,v24.16b,v24.16b,#8
1493	ext	v5.16b,v2.16b,v3.16b,#8
1494	ext	v6.16b,v1.16b,v2.16b,#8
1495	add	v3.2d,v3.2d,v24.2d			// "T1 + H + K512[i]"
1496.inst	0xcec082f6	//sha512su0 v22.16b,v23.16b
1497	ext	v7.16b,v18.16b,v19.16b,#8
1498.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
1499.inst	0xce678ab6	//sha512su1 v22.16b,v21.16b,v7.16b
1500	add	v4.2d,v1.2d,v3.2d		// "D + T1"
1501.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
1502	add	v25.2d,v25.2d,v23.2d
1503	ld1	{v24.2d},[x3],#16
1504	ext	v25.16b,v25.16b,v25.16b,#8
1505	ext	v5.16b,v4.16b,v2.16b,#8
1506	ext	v6.16b,v0.16b,v4.16b,#8
1507	add	v2.2d,v2.2d,v25.2d			// "T1 + H + K512[i]"
1508.inst	0xcec08217	//sha512su0 v23.16b,v16.16b
1509	ext	v7.16b,v19.16b,v20.16b,#8
1510.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
1511.inst	0xce678ad7	//sha512su1 v23.16b,v22.16b,v7.16b
1512	add	v1.2d,v0.2d,v2.2d		// "D + T1"
1513.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
1514	ld1	{v25.2d},[x3],#16
1515	add	v24.2d,v24.2d,v16.2d
1516	ld1	{v16.16b},[x1],#16		// load next input
1517	ext	v24.16b,v24.16b,v24.16b,#8
1518	ext	v5.16b,v1.16b,v4.16b,#8
1519	ext	v6.16b,v3.16b,v1.16b,#8
1520	add	v4.2d,v4.2d,v24.2d			// "T1 + H + K512[i]"
1521.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
1522	rev64	v16.16b,v16.16b
1523	add	v0.2d,v3.2d,v4.2d		// "D + T1"
1524.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
1525	ld1	{v24.2d},[x3],#16
1526	add	v25.2d,v25.2d,v17.2d
1527	ld1	{v17.16b},[x1],#16		// load next input
1528	ext	v25.16b,v25.16b,v25.16b,#8
1529	ext	v5.16b,v0.16b,v1.16b,#8
1530	ext	v6.16b,v2.16b,v0.16b,#8
1531	add	v1.2d,v1.2d,v25.2d			// "T1 + H + K512[i]"
1532.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
1533	rev64	v17.16b,v17.16b
1534	add	v3.2d,v2.2d,v1.2d		// "D + T1"
1535.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
1536	ld1	{v25.2d},[x3],#16
1537	add	v24.2d,v24.2d,v18.2d
1538	ld1	{v18.16b},[x1],#16		// load next input
1539	ext	v24.16b,v24.16b,v24.16b,#8
1540	ext	v5.16b,v3.16b,v0.16b,#8
1541	ext	v6.16b,v4.16b,v3.16b,#8
1542	add	v0.2d,v0.2d,v24.2d			// "T1 + H + K512[i]"
1543.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
1544	rev64	v18.16b,v18.16b
1545	add	v2.2d,v4.2d,v0.2d		// "D + T1"
1546.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
1547	ld1	{v24.2d},[x3],#16
1548	add	v25.2d,v25.2d,v19.2d
1549	ld1	{v19.16b},[x1],#16		// load next input
1550	ext	v25.16b,v25.16b,v25.16b,#8
1551	ext	v5.16b,v2.16b,v3.16b,#8
1552	ext	v6.16b,v1.16b,v2.16b,#8
1553	add	v3.2d,v3.2d,v25.2d			// "T1 + H + K512[i]"
1554.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
1555	rev64	v19.16b,v19.16b
1556	add	v4.2d,v1.2d,v3.2d		// "D + T1"
1557.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
1558	ld1	{v25.2d},[x3],#16
1559	add	v24.2d,v24.2d,v20.2d
1560	ld1	{v20.16b},[x1],#16		// load next input
1561	ext	v24.16b,v24.16b,v24.16b,#8
1562	ext	v5.16b,v4.16b,v2.16b,#8
1563	ext	v6.16b,v0.16b,v4.16b,#8
1564	add	v2.2d,v2.2d,v24.2d			// "T1 + H + K512[i]"
1565.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
1566	rev64	v20.16b,v20.16b
1567	add	v1.2d,v0.2d,v2.2d		// "D + T1"
1568.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
1569	ld1	{v24.2d},[x3],#16
1570	add	v25.2d,v25.2d,v21.2d
1571	ld1	{v21.16b},[x1],#16		// load next input
1572	ext	v25.16b,v25.16b,v25.16b,#8
1573	ext	v5.16b,v1.16b,v4.16b,#8
1574	ext	v6.16b,v3.16b,v1.16b,#8
1575	add	v4.2d,v4.2d,v25.2d			// "T1 + H + K512[i]"
1576.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
1577	rev64	v21.16b,v21.16b
1578	add	v0.2d,v3.2d,v4.2d		// "D + T1"
1579.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
1580	ld1	{v25.2d},[x3],#16
1581	add	v24.2d,v24.2d,v22.2d
1582	ld1	{v22.16b},[x1],#16		// load next input
1583	ext	v24.16b,v24.16b,v24.16b,#8
1584	ext	v5.16b,v0.16b,v1.16b,#8
1585	ext	v6.16b,v2.16b,v0.16b,#8
1586	add	v1.2d,v1.2d,v24.2d			// "T1 + H + K512[i]"
1587.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
1588	rev64	v22.16b,v22.16b
1589	add	v3.2d,v2.2d,v1.2d		// "D + T1"
1590.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
1591	sub	x3,x3,#80*8	// rewind
1592	add	v25.2d,v25.2d,v23.2d
1593	ld1	{v23.16b},[x1],#16		// load next input
1594	ext	v25.16b,v25.16b,v25.16b,#8
1595	ext	v5.16b,v3.16b,v0.16b,#8
1596	ext	v6.16b,v4.16b,v3.16b,#8
1597	add	v0.2d,v0.2d,v25.2d			// "T1 + H + K512[i]"
1598.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
1599	rev64	v23.16b,v23.16b
1600	add	v2.2d,v4.2d,v0.2d		// "D + T1"
1601.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
1602	add	v0.2d,v0.2d,v26.2d			// accumulate
1603	add	v1.2d,v1.2d,v27.2d
1604	add	v2.2d,v2.2d,v28.2d
1605	add	v3.2d,v3.2d,v29.2d
1606
1607	cbnz	x2,.Loop_hw
1608
1609	st1	{v0.2d,v1.2d,v2.2d,v3.2d},[x0]		// store context
1610
1611	ldr	x29,[sp],#16
1612	ret
1613.size	sha512_block_armv8,.-sha512_block_armv8
1614#endif
1615#endif
1616#endif  // !OPENSSL_NO_ASM
1617.section	.note.GNU-stack,"",%progbits
1618