• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__aarch64__)
13#if defined(BORINGSSL_PREFIX)
14#include <boringssl_prefix_symbols_asm.h>
15#endif
16// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
17//
18// Licensed under the OpenSSL license (the "License").  You may not use
19// this file except in compliance with the License.  You can obtain a copy
20// in the file LICENSE in the source distribution or at
21// https://www.openssl.org/source/license.html
22
23// ====================================================================
24// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
25// project. The module is, however, dual licensed under OpenSSL and
26// CRYPTOGAMS licenses depending on where you obtain it. For further
27// details see http://www.openssl.org/~appro/cryptogams/.
28//
29// Permission to use under GPLv2 terms is granted.
30// ====================================================================
31//
32// SHA256/512 for ARMv8.
33//
34// Performance in cycles per processed byte and improvement coefficient
35// over code generated with "default" compiler:
36//
37//		SHA256-hw	SHA256(*)	SHA512
38// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
39// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
40// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
41// Denver	2.01		10.5 (+26%)	6.70 (+8%)
42// X-Gene			20.0 (+100%)	12.8 (+300%(***))
43// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
44//
45// (*)	Software SHA256 results are of lesser relevance, presented
46//	mostly for informational purposes.
47// (**)	The result is a trade-off: it's possible to improve it by
48//	10% (or by 1 cycle per round), but at the cost of 20% loss
49//	on Cortex-A53 (or by 4 cycles per round).
50// (***)	Super-impressive coefficients over gcc-generated code are
51//	indication of some compiler "pathology", most notably code
52//	generated with -mgeneral-regs-only is significanty faster
53//	and the gap is only 40-90%.
54
55#ifndef	__KERNEL__
56# include <openssl/arm_arch.h>
57#endif
58
59.text
60
61
62
63.globl	sha512_block_data_order
64
65.def sha512_block_data_order
66   .type 32
67.endef
68.align	6
69sha512_block_data_order:
70	AARCH64_SIGN_LINK_REGISTER
71	stp	x29,x30,[sp,#-128]!
72	add	x29,sp,#0
73
74	stp	x19,x20,[sp,#16]
75	stp	x21,x22,[sp,#32]
76	stp	x23,x24,[sp,#48]
77	stp	x25,x26,[sp,#64]
78	stp	x27,x28,[sp,#80]
79	sub	sp,sp,#4*8
80
81	ldp	x20,x21,[x0]				// load context
82	ldp	x22,x23,[x0,#2*8]
83	ldp	x24,x25,[x0,#4*8]
84	add	x2,x1,x2,lsl#7	// end of input
85	ldp	x26,x27,[x0,#6*8]
86	adrp	x30,LK512
87	add	x30,x30,:lo12:LK512
88	stp	x0,x2,[x29,#96]
89
90Loop:
91	ldp	x3,x4,[x1],#2*8
92	ldr	x19,[x30],#8			// *K++
93	eor	x28,x21,x22				// magic seed
94	str	x1,[x29,#112]
95#ifndef	__ARMEB__
96	rev	x3,x3			// 0
97#endif
98	ror	x16,x24,#14
99	add	x27,x27,x19			// h+=K[i]
100	eor	x6,x24,x24,ror#23
101	and	x17,x25,x24
102	bic	x19,x26,x24
103	add	x27,x27,x3			// h+=X[i]
104	orr	x17,x17,x19			// Ch(e,f,g)
105	eor	x19,x20,x21			// a^b, b^c in next round
106	eor	x16,x16,x6,ror#18	// Sigma1(e)
107	ror	x6,x20,#28
108	add	x27,x27,x17			// h+=Ch(e,f,g)
109	eor	x17,x20,x20,ror#5
110	add	x27,x27,x16			// h+=Sigma1(e)
111	and	x28,x28,x19			// (b^c)&=(a^b)
112	add	x23,x23,x27			// d+=h
113	eor	x28,x28,x21			// Maj(a,b,c)
114	eor	x17,x6,x17,ror#34	// Sigma0(a)
115	add	x27,x27,x28			// h+=Maj(a,b,c)
116	ldr	x28,[x30],#8		// *K++, x19 in next round
117	//add	x27,x27,x17			// h+=Sigma0(a)
118#ifndef	__ARMEB__
119	rev	x4,x4			// 1
120#endif
121	ldp	x5,x6,[x1],#2*8
122	add	x27,x27,x17			// h+=Sigma0(a)
123	ror	x16,x23,#14
124	add	x26,x26,x28			// h+=K[i]
125	eor	x7,x23,x23,ror#23
126	and	x17,x24,x23
127	bic	x28,x25,x23
128	add	x26,x26,x4			// h+=X[i]
129	orr	x17,x17,x28			// Ch(e,f,g)
130	eor	x28,x27,x20			// a^b, b^c in next round
131	eor	x16,x16,x7,ror#18	// Sigma1(e)
132	ror	x7,x27,#28
133	add	x26,x26,x17			// h+=Ch(e,f,g)
134	eor	x17,x27,x27,ror#5
135	add	x26,x26,x16			// h+=Sigma1(e)
136	and	x19,x19,x28			// (b^c)&=(a^b)
137	add	x22,x22,x26			// d+=h
138	eor	x19,x19,x20			// Maj(a,b,c)
139	eor	x17,x7,x17,ror#34	// Sigma0(a)
140	add	x26,x26,x19			// h+=Maj(a,b,c)
141	ldr	x19,[x30],#8		// *K++, x28 in next round
142	//add	x26,x26,x17			// h+=Sigma0(a)
143#ifndef	__ARMEB__
144	rev	x5,x5			// 2
145#endif
146	add	x26,x26,x17			// h+=Sigma0(a)
147	ror	x16,x22,#14
148	add	x25,x25,x19			// h+=K[i]
149	eor	x8,x22,x22,ror#23
150	and	x17,x23,x22
151	bic	x19,x24,x22
152	add	x25,x25,x5			// h+=X[i]
153	orr	x17,x17,x19			// Ch(e,f,g)
154	eor	x19,x26,x27			// a^b, b^c in next round
155	eor	x16,x16,x8,ror#18	// Sigma1(e)
156	ror	x8,x26,#28
157	add	x25,x25,x17			// h+=Ch(e,f,g)
158	eor	x17,x26,x26,ror#5
159	add	x25,x25,x16			// h+=Sigma1(e)
160	and	x28,x28,x19			// (b^c)&=(a^b)
161	add	x21,x21,x25			// d+=h
162	eor	x28,x28,x27			// Maj(a,b,c)
163	eor	x17,x8,x17,ror#34	// Sigma0(a)
164	add	x25,x25,x28			// h+=Maj(a,b,c)
165	ldr	x28,[x30],#8		// *K++, x19 in next round
166	//add	x25,x25,x17			// h+=Sigma0(a)
167#ifndef	__ARMEB__
168	rev	x6,x6			// 3
169#endif
170	ldp	x7,x8,[x1],#2*8
171	add	x25,x25,x17			// h+=Sigma0(a)
172	ror	x16,x21,#14
173	add	x24,x24,x28			// h+=K[i]
174	eor	x9,x21,x21,ror#23
175	and	x17,x22,x21
176	bic	x28,x23,x21
177	add	x24,x24,x6			// h+=X[i]
178	orr	x17,x17,x28			// Ch(e,f,g)
179	eor	x28,x25,x26			// a^b, b^c in next round
180	eor	x16,x16,x9,ror#18	// Sigma1(e)
181	ror	x9,x25,#28
182	add	x24,x24,x17			// h+=Ch(e,f,g)
183	eor	x17,x25,x25,ror#5
184	add	x24,x24,x16			// h+=Sigma1(e)
185	and	x19,x19,x28			// (b^c)&=(a^b)
186	add	x20,x20,x24			// d+=h
187	eor	x19,x19,x26			// Maj(a,b,c)
188	eor	x17,x9,x17,ror#34	// Sigma0(a)
189	add	x24,x24,x19			// h+=Maj(a,b,c)
190	ldr	x19,[x30],#8		// *K++, x28 in next round
191	//add	x24,x24,x17			// h+=Sigma0(a)
192#ifndef	__ARMEB__
193	rev	x7,x7			// 4
194#endif
195	add	x24,x24,x17			// h+=Sigma0(a)
196	ror	x16,x20,#14
197	add	x23,x23,x19			// h+=K[i]
198	eor	x10,x20,x20,ror#23
199	and	x17,x21,x20
200	bic	x19,x22,x20
201	add	x23,x23,x7			// h+=X[i]
202	orr	x17,x17,x19			// Ch(e,f,g)
203	eor	x19,x24,x25			// a^b, b^c in next round
204	eor	x16,x16,x10,ror#18	// Sigma1(e)
205	ror	x10,x24,#28
206	add	x23,x23,x17			// h+=Ch(e,f,g)
207	eor	x17,x24,x24,ror#5
208	add	x23,x23,x16			// h+=Sigma1(e)
209	and	x28,x28,x19			// (b^c)&=(a^b)
210	add	x27,x27,x23			// d+=h
211	eor	x28,x28,x25			// Maj(a,b,c)
212	eor	x17,x10,x17,ror#34	// Sigma0(a)
213	add	x23,x23,x28			// h+=Maj(a,b,c)
214	ldr	x28,[x30],#8		// *K++, x19 in next round
215	//add	x23,x23,x17			// h+=Sigma0(a)
216#ifndef	__ARMEB__
217	rev	x8,x8			// 5
218#endif
219	ldp	x9,x10,[x1],#2*8
220	add	x23,x23,x17			// h+=Sigma0(a)
221	ror	x16,x27,#14
222	add	x22,x22,x28			// h+=K[i]
223	eor	x11,x27,x27,ror#23
224	and	x17,x20,x27
225	bic	x28,x21,x27
226	add	x22,x22,x8			// h+=X[i]
227	orr	x17,x17,x28			// Ch(e,f,g)
228	eor	x28,x23,x24			// a^b, b^c in next round
229	eor	x16,x16,x11,ror#18	// Sigma1(e)
230	ror	x11,x23,#28
231	add	x22,x22,x17			// h+=Ch(e,f,g)
232	eor	x17,x23,x23,ror#5
233	add	x22,x22,x16			// h+=Sigma1(e)
234	and	x19,x19,x28			// (b^c)&=(a^b)
235	add	x26,x26,x22			// d+=h
236	eor	x19,x19,x24			// Maj(a,b,c)
237	eor	x17,x11,x17,ror#34	// Sigma0(a)
238	add	x22,x22,x19			// h+=Maj(a,b,c)
239	ldr	x19,[x30],#8		// *K++, x28 in next round
240	//add	x22,x22,x17			// h+=Sigma0(a)
241#ifndef	__ARMEB__
242	rev	x9,x9			// 6
243#endif
244	add	x22,x22,x17			// h+=Sigma0(a)
245	ror	x16,x26,#14
246	add	x21,x21,x19			// h+=K[i]
247	eor	x12,x26,x26,ror#23
248	and	x17,x27,x26
249	bic	x19,x20,x26
250	add	x21,x21,x9			// h+=X[i]
251	orr	x17,x17,x19			// Ch(e,f,g)
252	eor	x19,x22,x23			// a^b, b^c in next round
253	eor	x16,x16,x12,ror#18	// Sigma1(e)
254	ror	x12,x22,#28
255	add	x21,x21,x17			// h+=Ch(e,f,g)
256	eor	x17,x22,x22,ror#5
257	add	x21,x21,x16			// h+=Sigma1(e)
258	and	x28,x28,x19			// (b^c)&=(a^b)
259	add	x25,x25,x21			// d+=h
260	eor	x28,x28,x23			// Maj(a,b,c)
261	eor	x17,x12,x17,ror#34	// Sigma0(a)
262	add	x21,x21,x28			// h+=Maj(a,b,c)
263	ldr	x28,[x30],#8		// *K++, x19 in next round
264	//add	x21,x21,x17			// h+=Sigma0(a)
265#ifndef	__ARMEB__
266	rev	x10,x10			// 7
267#endif
268	ldp	x11,x12,[x1],#2*8
269	add	x21,x21,x17			// h+=Sigma0(a)
270	ror	x16,x25,#14
271	add	x20,x20,x28			// h+=K[i]
272	eor	x13,x25,x25,ror#23
273	and	x17,x26,x25
274	bic	x28,x27,x25
275	add	x20,x20,x10			// h+=X[i]
276	orr	x17,x17,x28			// Ch(e,f,g)
277	eor	x28,x21,x22			// a^b, b^c in next round
278	eor	x16,x16,x13,ror#18	// Sigma1(e)
279	ror	x13,x21,#28
280	add	x20,x20,x17			// h+=Ch(e,f,g)
281	eor	x17,x21,x21,ror#5
282	add	x20,x20,x16			// h+=Sigma1(e)
283	and	x19,x19,x28			// (b^c)&=(a^b)
284	add	x24,x24,x20			// d+=h
285	eor	x19,x19,x22			// Maj(a,b,c)
286	eor	x17,x13,x17,ror#34	// Sigma0(a)
287	add	x20,x20,x19			// h+=Maj(a,b,c)
288	ldr	x19,[x30],#8		// *K++, x28 in next round
289	//add	x20,x20,x17			// h+=Sigma0(a)
290#ifndef	__ARMEB__
291	rev	x11,x11			// 8
292#endif
293	add	x20,x20,x17			// h+=Sigma0(a)
294	ror	x16,x24,#14
295	add	x27,x27,x19			// h+=K[i]
296	eor	x14,x24,x24,ror#23
297	and	x17,x25,x24
298	bic	x19,x26,x24
299	add	x27,x27,x11			// h+=X[i]
300	orr	x17,x17,x19			// Ch(e,f,g)
301	eor	x19,x20,x21			// a^b, b^c in next round
302	eor	x16,x16,x14,ror#18	// Sigma1(e)
303	ror	x14,x20,#28
304	add	x27,x27,x17			// h+=Ch(e,f,g)
305	eor	x17,x20,x20,ror#5
306	add	x27,x27,x16			// h+=Sigma1(e)
307	and	x28,x28,x19			// (b^c)&=(a^b)
308	add	x23,x23,x27			// d+=h
309	eor	x28,x28,x21			// Maj(a,b,c)
310	eor	x17,x14,x17,ror#34	// Sigma0(a)
311	add	x27,x27,x28			// h+=Maj(a,b,c)
312	ldr	x28,[x30],#8		// *K++, x19 in next round
313	//add	x27,x27,x17			// h+=Sigma0(a)
314#ifndef	__ARMEB__
315	rev	x12,x12			// 9
316#endif
317	ldp	x13,x14,[x1],#2*8
318	add	x27,x27,x17			// h+=Sigma0(a)
319	ror	x16,x23,#14
320	add	x26,x26,x28			// h+=K[i]
321	eor	x15,x23,x23,ror#23
322	and	x17,x24,x23
323	bic	x28,x25,x23
324	add	x26,x26,x12			// h+=X[i]
325	orr	x17,x17,x28			// Ch(e,f,g)
326	eor	x28,x27,x20			// a^b, b^c in next round
327	eor	x16,x16,x15,ror#18	// Sigma1(e)
328	ror	x15,x27,#28
329	add	x26,x26,x17			// h+=Ch(e,f,g)
330	eor	x17,x27,x27,ror#5
331	add	x26,x26,x16			// h+=Sigma1(e)
332	and	x19,x19,x28			// (b^c)&=(a^b)
333	add	x22,x22,x26			// d+=h
334	eor	x19,x19,x20			// Maj(a,b,c)
335	eor	x17,x15,x17,ror#34	// Sigma0(a)
336	add	x26,x26,x19			// h+=Maj(a,b,c)
337	ldr	x19,[x30],#8		// *K++, x28 in next round
338	//add	x26,x26,x17			// h+=Sigma0(a)
339#ifndef	__ARMEB__
340	rev	x13,x13			// 10
341#endif
342	add	x26,x26,x17			// h+=Sigma0(a)
343	ror	x16,x22,#14
344	add	x25,x25,x19			// h+=K[i]
345	eor	x0,x22,x22,ror#23
346	and	x17,x23,x22
347	bic	x19,x24,x22
348	add	x25,x25,x13			// h+=X[i]
349	orr	x17,x17,x19			// Ch(e,f,g)
350	eor	x19,x26,x27			// a^b, b^c in next round
351	eor	x16,x16,x0,ror#18	// Sigma1(e)
352	ror	x0,x26,#28
353	add	x25,x25,x17			// h+=Ch(e,f,g)
354	eor	x17,x26,x26,ror#5
355	add	x25,x25,x16			// h+=Sigma1(e)
356	and	x28,x28,x19			// (b^c)&=(a^b)
357	add	x21,x21,x25			// d+=h
358	eor	x28,x28,x27			// Maj(a,b,c)
359	eor	x17,x0,x17,ror#34	// Sigma0(a)
360	add	x25,x25,x28			// h+=Maj(a,b,c)
361	ldr	x28,[x30],#8		// *K++, x19 in next round
362	//add	x25,x25,x17			// h+=Sigma0(a)
363#ifndef	__ARMEB__
364	rev	x14,x14			// 11
365#endif
366	ldp	x15,x0,[x1],#2*8
367	add	x25,x25,x17			// h+=Sigma0(a)
368	str	x6,[sp,#24]
369	ror	x16,x21,#14
370	add	x24,x24,x28			// h+=K[i]
371	eor	x6,x21,x21,ror#23
372	and	x17,x22,x21
373	bic	x28,x23,x21
374	add	x24,x24,x14			// h+=X[i]
375	orr	x17,x17,x28			// Ch(e,f,g)
376	eor	x28,x25,x26			// a^b, b^c in next round
377	eor	x16,x16,x6,ror#18	// Sigma1(e)
378	ror	x6,x25,#28
379	add	x24,x24,x17			// h+=Ch(e,f,g)
380	eor	x17,x25,x25,ror#5
381	add	x24,x24,x16			// h+=Sigma1(e)
382	and	x19,x19,x28			// (b^c)&=(a^b)
383	add	x20,x20,x24			// d+=h
384	eor	x19,x19,x26			// Maj(a,b,c)
385	eor	x17,x6,x17,ror#34	// Sigma0(a)
386	add	x24,x24,x19			// h+=Maj(a,b,c)
387	ldr	x19,[x30],#8		// *K++, x28 in next round
388	//add	x24,x24,x17			// h+=Sigma0(a)
389#ifndef	__ARMEB__
390	rev	x15,x15			// 12
391#endif
392	add	x24,x24,x17			// h+=Sigma0(a)
393	str	x7,[sp,#0]
394	ror	x16,x20,#14
395	add	x23,x23,x19			// h+=K[i]
396	eor	x7,x20,x20,ror#23
397	and	x17,x21,x20
398	bic	x19,x22,x20
399	add	x23,x23,x15			// h+=X[i]
400	orr	x17,x17,x19			// Ch(e,f,g)
401	eor	x19,x24,x25			// a^b, b^c in next round
402	eor	x16,x16,x7,ror#18	// Sigma1(e)
403	ror	x7,x24,#28
404	add	x23,x23,x17			// h+=Ch(e,f,g)
405	eor	x17,x24,x24,ror#5
406	add	x23,x23,x16			// h+=Sigma1(e)
407	and	x28,x28,x19			// (b^c)&=(a^b)
408	add	x27,x27,x23			// d+=h
409	eor	x28,x28,x25			// Maj(a,b,c)
410	eor	x17,x7,x17,ror#34	// Sigma0(a)
411	add	x23,x23,x28			// h+=Maj(a,b,c)
412	ldr	x28,[x30],#8		// *K++, x19 in next round
413	//add	x23,x23,x17			// h+=Sigma0(a)
414#ifndef	__ARMEB__
415	rev	x0,x0			// 13
416#endif
417	ldp	x1,x2,[x1]
418	add	x23,x23,x17			// h+=Sigma0(a)
419	str	x8,[sp,#8]
420	ror	x16,x27,#14
421	add	x22,x22,x28			// h+=K[i]
422	eor	x8,x27,x27,ror#23
423	and	x17,x20,x27
424	bic	x28,x21,x27
425	add	x22,x22,x0			// h+=X[i]
426	orr	x17,x17,x28			// Ch(e,f,g)
427	eor	x28,x23,x24			// a^b, b^c in next round
428	eor	x16,x16,x8,ror#18	// Sigma1(e)
429	ror	x8,x23,#28
430	add	x22,x22,x17			// h+=Ch(e,f,g)
431	eor	x17,x23,x23,ror#5
432	add	x22,x22,x16			// h+=Sigma1(e)
433	and	x19,x19,x28			// (b^c)&=(a^b)
434	add	x26,x26,x22			// d+=h
435	eor	x19,x19,x24			// Maj(a,b,c)
436	eor	x17,x8,x17,ror#34	// Sigma0(a)
437	add	x22,x22,x19			// h+=Maj(a,b,c)
438	ldr	x19,[x30],#8		// *K++, x28 in next round
439	//add	x22,x22,x17			// h+=Sigma0(a)
440#ifndef	__ARMEB__
441	rev	x1,x1			// 14
442#endif
443	ldr	x6,[sp,#24]
444	add	x22,x22,x17			// h+=Sigma0(a)
445	str	x9,[sp,#16]
446	ror	x16,x26,#14
447	add	x21,x21,x19			// h+=K[i]
448	eor	x9,x26,x26,ror#23
449	and	x17,x27,x26
450	bic	x19,x20,x26
451	add	x21,x21,x1			// h+=X[i]
452	orr	x17,x17,x19			// Ch(e,f,g)
453	eor	x19,x22,x23			// a^b, b^c in next round
454	eor	x16,x16,x9,ror#18	// Sigma1(e)
455	ror	x9,x22,#28
456	add	x21,x21,x17			// h+=Ch(e,f,g)
457	eor	x17,x22,x22,ror#5
458	add	x21,x21,x16			// h+=Sigma1(e)
459	and	x28,x28,x19			// (b^c)&=(a^b)
460	add	x25,x25,x21			// d+=h
461	eor	x28,x28,x23			// Maj(a,b,c)
462	eor	x17,x9,x17,ror#34	// Sigma0(a)
463	add	x21,x21,x28			// h+=Maj(a,b,c)
464	ldr	x28,[x30],#8		// *K++, x19 in next round
465	//add	x21,x21,x17			// h+=Sigma0(a)
466#ifndef	__ARMEB__
467	rev	x2,x2			// 15
468#endif
469	ldr	x7,[sp,#0]
470	add	x21,x21,x17			// h+=Sigma0(a)
471	str	x10,[sp,#24]
472	ror	x16,x25,#14
473	add	x20,x20,x28			// h+=K[i]
474	ror	x9,x4,#1
475	and	x17,x26,x25
476	ror	x8,x1,#19
477	bic	x28,x27,x25
478	ror	x10,x21,#28
479	add	x20,x20,x2			// h+=X[i]
480	eor	x16,x16,x25,ror#18
481	eor	x9,x9,x4,ror#8
482	orr	x17,x17,x28			// Ch(e,f,g)
483	eor	x28,x21,x22			// a^b, b^c in next round
484	eor	x16,x16,x25,ror#41	// Sigma1(e)
485	eor	x10,x10,x21,ror#34
486	add	x20,x20,x17			// h+=Ch(e,f,g)
487	and	x19,x19,x28			// (b^c)&=(a^b)
488	eor	x8,x8,x1,ror#61
489	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
490	add	x20,x20,x16			// h+=Sigma1(e)
491	eor	x19,x19,x22			// Maj(a,b,c)
492	eor	x17,x10,x21,ror#39	// Sigma0(a)
493	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
494	add	x3,x3,x12
495	add	x24,x24,x20			// d+=h
496	add	x20,x20,x19			// h+=Maj(a,b,c)
497	ldr	x19,[x30],#8		// *K++, x28 in next round
498	add	x3,x3,x9
499	add	x20,x20,x17			// h+=Sigma0(a)
500	add	x3,x3,x8
501Loop_16_xx:
502	ldr	x8,[sp,#8]
503	str	x11,[sp,#0]
504	ror	x16,x24,#14
505	add	x27,x27,x19			// h+=K[i]
506	ror	x10,x5,#1
507	and	x17,x25,x24
508	ror	x9,x2,#19
509	bic	x19,x26,x24
510	ror	x11,x20,#28
511	add	x27,x27,x3			// h+=X[i]
512	eor	x16,x16,x24,ror#18
513	eor	x10,x10,x5,ror#8
514	orr	x17,x17,x19			// Ch(e,f,g)
515	eor	x19,x20,x21			// a^b, b^c in next round
516	eor	x16,x16,x24,ror#41	// Sigma1(e)
517	eor	x11,x11,x20,ror#34
518	add	x27,x27,x17			// h+=Ch(e,f,g)
519	and	x28,x28,x19			// (b^c)&=(a^b)
520	eor	x9,x9,x2,ror#61
521	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
522	add	x27,x27,x16			// h+=Sigma1(e)
523	eor	x28,x28,x21			// Maj(a,b,c)
524	eor	x17,x11,x20,ror#39	// Sigma0(a)
525	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
526	add	x4,x4,x13
527	add	x23,x23,x27			// d+=h
528	add	x27,x27,x28			// h+=Maj(a,b,c)
529	ldr	x28,[x30],#8		// *K++, x19 in next round
530	add	x4,x4,x10
531	add	x27,x27,x17			// h+=Sigma0(a)
532	add	x4,x4,x9
533	ldr	x9,[sp,#16]
534	str	x12,[sp,#8]
535	ror	x16,x23,#14
536	add	x26,x26,x28			// h+=K[i]
537	ror	x11,x6,#1
538	and	x17,x24,x23
539	ror	x10,x3,#19
540	bic	x28,x25,x23
541	ror	x12,x27,#28
542	add	x26,x26,x4			// h+=X[i]
543	eor	x16,x16,x23,ror#18
544	eor	x11,x11,x6,ror#8
545	orr	x17,x17,x28			// Ch(e,f,g)
546	eor	x28,x27,x20			// a^b, b^c in next round
547	eor	x16,x16,x23,ror#41	// Sigma1(e)
548	eor	x12,x12,x27,ror#34
549	add	x26,x26,x17			// h+=Ch(e,f,g)
550	and	x19,x19,x28			// (b^c)&=(a^b)
551	eor	x10,x10,x3,ror#61
552	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
553	add	x26,x26,x16			// h+=Sigma1(e)
554	eor	x19,x19,x20			// Maj(a,b,c)
555	eor	x17,x12,x27,ror#39	// Sigma0(a)
556	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
557	add	x5,x5,x14
558	add	x22,x22,x26			// d+=h
559	add	x26,x26,x19			// h+=Maj(a,b,c)
560	ldr	x19,[x30],#8		// *K++, x28 in next round
561	add	x5,x5,x11
562	add	x26,x26,x17			// h+=Sigma0(a)
563	add	x5,x5,x10
564	ldr	x10,[sp,#24]
565	str	x13,[sp,#16]
566	ror	x16,x22,#14
567	add	x25,x25,x19			// h+=K[i]
568	ror	x12,x7,#1
569	and	x17,x23,x22
570	ror	x11,x4,#19
571	bic	x19,x24,x22
572	ror	x13,x26,#28
573	add	x25,x25,x5			// h+=X[i]
574	eor	x16,x16,x22,ror#18
575	eor	x12,x12,x7,ror#8
576	orr	x17,x17,x19			// Ch(e,f,g)
577	eor	x19,x26,x27			// a^b, b^c in next round
578	eor	x16,x16,x22,ror#41	// Sigma1(e)
579	eor	x13,x13,x26,ror#34
580	add	x25,x25,x17			// h+=Ch(e,f,g)
581	and	x28,x28,x19			// (b^c)&=(a^b)
582	eor	x11,x11,x4,ror#61
583	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
584	add	x25,x25,x16			// h+=Sigma1(e)
585	eor	x28,x28,x27			// Maj(a,b,c)
586	eor	x17,x13,x26,ror#39	// Sigma0(a)
587	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
588	add	x6,x6,x15
589	add	x21,x21,x25			// d+=h
590	add	x25,x25,x28			// h+=Maj(a,b,c)
591	ldr	x28,[x30],#8		// *K++, x19 in next round
592	add	x6,x6,x12
593	add	x25,x25,x17			// h+=Sigma0(a)
594	add	x6,x6,x11
595	ldr	x11,[sp,#0]
596	str	x14,[sp,#24]
597	ror	x16,x21,#14
598	add	x24,x24,x28			// h+=K[i]
599	ror	x13,x8,#1
600	and	x17,x22,x21
601	ror	x12,x5,#19
602	bic	x28,x23,x21
603	ror	x14,x25,#28
604	add	x24,x24,x6			// h+=X[i]
605	eor	x16,x16,x21,ror#18
606	eor	x13,x13,x8,ror#8
607	orr	x17,x17,x28			// Ch(e,f,g)
608	eor	x28,x25,x26			// a^b, b^c in next round
609	eor	x16,x16,x21,ror#41	// Sigma1(e)
610	eor	x14,x14,x25,ror#34
611	add	x24,x24,x17			// h+=Ch(e,f,g)
612	and	x19,x19,x28			// (b^c)&=(a^b)
613	eor	x12,x12,x5,ror#61
614	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
615	add	x24,x24,x16			// h+=Sigma1(e)
616	eor	x19,x19,x26			// Maj(a,b,c)
617	eor	x17,x14,x25,ror#39	// Sigma0(a)
618	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
619	add	x7,x7,x0
620	add	x20,x20,x24			// d+=h
621	add	x24,x24,x19			// h+=Maj(a,b,c)
622	ldr	x19,[x30],#8		// *K++, x28 in next round
623	add	x7,x7,x13
624	add	x24,x24,x17			// h+=Sigma0(a)
625	add	x7,x7,x12
626	ldr	x12,[sp,#8]
627	str	x15,[sp,#0]
628	ror	x16,x20,#14
629	add	x23,x23,x19			// h+=K[i]
630	ror	x14,x9,#1
631	and	x17,x21,x20
632	ror	x13,x6,#19
633	bic	x19,x22,x20
634	ror	x15,x24,#28
635	add	x23,x23,x7			// h+=X[i]
636	eor	x16,x16,x20,ror#18
637	eor	x14,x14,x9,ror#8
638	orr	x17,x17,x19			// Ch(e,f,g)
639	eor	x19,x24,x25			// a^b, b^c in next round
640	eor	x16,x16,x20,ror#41	// Sigma1(e)
641	eor	x15,x15,x24,ror#34
642	add	x23,x23,x17			// h+=Ch(e,f,g)
643	and	x28,x28,x19			// (b^c)&=(a^b)
644	eor	x13,x13,x6,ror#61
645	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
646	add	x23,x23,x16			// h+=Sigma1(e)
647	eor	x28,x28,x25			// Maj(a,b,c)
648	eor	x17,x15,x24,ror#39	// Sigma0(a)
649	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
650	add	x8,x8,x1
651	add	x27,x27,x23			// d+=h
652	add	x23,x23,x28			// h+=Maj(a,b,c)
653	ldr	x28,[x30],#8		// *K++, x19 in next round
654	add	x8,x8,x14
655	add	x23,x23,x17			// h+=Sigma0(a)
656	add	x8,x8,x13
657	ldr	x13,[sp,#16]
658	str	x0,[sp,#8]
659	ror	x16,x27,#14
660	add	x22,x22,x28			// h+=K[i]
661	ror	x15,x10,#1
662	and	x17,x20,x27
663	ror	x14,x7,#19
664	bic	x28,x21,x27
665	ror	x0,x23,#28
666	add	x22,x22,x8			// h+=X[i]
667	eor	x16,x16,x27,ror#18
668	eor	x15,x15,x10,ror#8
669	orr	x17,x17,x28			// Ch(e,f,g)
670	eor	x28,x23,x24			// a^b, b^c in next round
671	eor	x16,x16,x27,ror#41	// Sigma1(e)
672	eor	x0,x0,x23,ror#34
673	add	x22,x22,x17			// h+=Ch(e,f,g)
674	and	x19,x19,x28			// (b^c)&=(a^b)
675	eor	x14,x14,x7,ror#61
676	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
677	add	x22,x22,x16			// h+=Sigma1(e)
678	eor	x19,x19,x24			// Maj(a,b,c)
679	eor	x17,x0,x23,ror#39	// Sigma0(a)
680	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
681	add	x9,x9,x2
682	add	x26,x26,x22			// d+=h
683	add	x22,x22,x19			// h+=Maj(a,b,c)
684	ldr	x19,[x30],#8		// *K++, x28 in next round
685	add	x9,x9,x15
686	add	x22,x22,x17			// h+=Sigma0(a)
687	add	x9,x9,x14
688	ldr	x14,[sp,#24]
689	str	x1,[sp,#16]
690	ror	x16,x26,#14
691	add	x21,x21,x19			// h+=K[i]
692	ror	x0,x11,#1
693	and	x17,x27,x26
694	ror	x15,x8,#19
695	bic	x19,x20,x26
696	ror	x1,x22,#28
697	add	x21,x21,x9			// h+=X[i]
698	eor	x16,x16,x26,ror#18
699	eor	x0,x0,x11,ror#8
700	orr	x17,x17,x19			// Ch(e,f,g)
701	eor	x19,x22,x23			// a^b, b^c in next round
702	eor	x16,x16,x26,ror#41	// Sigma1(e)
703	eor	x1,x1,x22,ror#34
704	add	x21,x21,x17			// h+=Ch(e,f,g)
705	and	x28,x28,x19			// (b^c)&=(a^b)
706	eor	x15,x15,x8,ror#61
707	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
708	add	x21,x21,x16			// h+=Sigma1(e)
709	eor	x28,x28,x23			// Maj(a,b,c)
710	eor	x17,x1,x22,ror#39	// Sigma0(a)
711	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
712	add	x10,x10,x3
713	add	x25,x25,x21			// d+=h
714	add	x21,x21,x28			// h+=Maj(a,b,c)
715	ldr	x28,[x30],#8		// *K++, x19 in next round
716	add	x10,x10,x0
717	add	x21,x21,x17			// h+=Sigma0(a)
718	add	x10,x10,x15
719	ldr	x15,[sp,#0]
720	str	x2,[sp,#24]
721	ror	x16,x25,#14
722	add	x20,x20,x28			// h+=K[i]
723	ror	x1,x12,#1
724	and	x17,x26,x25
725	ror	x0,x9,#19
726	bic	x28,x27,x25
727	ror	x2,x21,#28
728	add	x20,x20,x10			// h+=X[i]
729	eor	x16,x16,x25,ror#18
730	eor	x1,x1,x12,ror#8
731	orr	x17,x17,x28			// Ch(e,f,g)
732	eor	x28,x21,x22			// a^b, b^c in next round
733	eor	x16,x16,x25,ror#41	// Sigma1(e)
734	eor	x2,x2,x21,ror#34
735	add	x20,x20,x17			// h+=Ch(e,f,g)
736	and	x19,x19,x28			// (b^c)&=(a^b)
737	eor	x0,x0,x9,ror#61
738	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
739	add	x20,x20,x16			// h+=Sigma1(e)
740	eor	x19,x19,x22			// Maj(a,b,c)
741	eor	x17,x2,x21,ror#39	// Sigma0(a)
742	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
743	add	x11,x11,x4
744	add	x24,x24,x20			// d+=h
745	add	x20,x20,x19			// h+=Maj(a,b,c)
746	ldr	x19,[x30],#8		// *K++, x28 in next round
747	add	x11,x11,x1
748	add	x20,x20,x17			// h+=Sigma0(a)
749	add	x11,x11,x0
750	ldr	x0,[sp,#8]
751	str	x3,[sp,#0]
752	ror	x16,x24,#14
753	add	x27,x27,x19			// h+=K[i]
754	ror	x2,x13,#1
755	and	x17,x25,x24
756	ror	x1,x10,#19
757	bic	x19,x26,x24
758	ror	x3,x20,#28
759	add	x27,x27,x11			// h+=X[i]
760	eor	x16,x16,x24,ror#18
761	eor	x2,x2,x13,ror#8
762	orr	x17,x17,x19			// Ch(e,f,g)
763	eor	x19,x20,x21			// a^b, b^c in next round
764	eor	x16,x16,x24,ror#41	// Sigma1(e)
765	eor	x3,x3,x20,ror#34
766	add	x27,x27,x17			// h+=Ch(e,f,g)
767	and	x28,x28,x19			// (b^c)&=(a^b)
768	eor	x1,x1,x10,ror#61
769	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
770	add	x27,x27,x16			// h+=Sigma1(e)
771	eor	x28,x28,x21			// Maj(a,b,c)
772	eor	x17,x3,x20,ror#39	// Sigma0(a)
773	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
774	add	x12,x12,x5
775	add	x23,x23,x27			// d+=h
776	add	x27,x27,x28			// h+=Maj(a,b,c)
777	ldr	x28,[x30],#8		// *K++, x19 in next round
778	add	x12,x12,x2
779	add	x27,x27,x17			// h+=Sigma0(a)
780	add	x12,x12,x1
781	ldr	x1,[sp,#16]
782	str	x4,[sp,#8]
783	ror	x16,x23,#14
784	add	x26,x26,x28			// h+=K[i]
785	ror	x3,x14,#1
786	and	x17,x24,x23
787	ror	x2,x11,#19
788	bic	x28,x25,x23
789	ror	x4,x27,#28
790	add	x26,x26,x12			// h+=X[i]
791	eor	x16,x16,x23,ror#18
792	eor	x3,x3,x14,ror#8
793	orr	x17,x17,x28			// Ch(e,f,g)
794	eor	x28,x27,x20			// a^b, b^c in next round
795	eor	x16,x16,x23,ror#41	// Sigma1(e)
796	eor	x4,x4,x27,ror#34
797	add	x26,x26,x17			// h+=Ch(e,f,g)
798	and	x19,x19,x28			// (b^c)&=(a^b)
799	eor	x2,x2,x11,ror#61
800	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
801	add	x26,x26,x16			// h+=Sigma1(e)
802	eor	x19,x19,x20			// Maj(a,b,c)
803	eor	x17,x4,x27,ror#39	// Sigma0(a)
804	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
805	add	x13,x13,x6
806	add	x22,x22,x26			// d+=h
807	add	x26,x26,x19			// h+=Maj(a,b,c)
808	ldr	x19,[x30],#8		// *K++, x28 in next round
809	add	x13,x13,x3
810	add	x26,x26,x17			// h+=Sigma0(a)
811	add	x13,x13,x2
812	ldr	x2,[sp,#24]
813	str	x5,[sp,#16]
814	ror	x16,x22,#14
815	add	x25,x25,x19			// h+=K[i]
816	ror	x4,x15,#1
817	and	x17,x23,x22
818	ror	x3,x12,#19
819	bic	x19,x24,x22
820	ror	x5,x26,#28
821	add	x25,x25,x13			// h+=X[i]
822	eor	x16,x16,x22,ror#18
823	eor	x4,x4,x15,ror#8
824	orr	x17,x17,x19			// Ch(e,f,g)
825	eor	x19,x26,x27			// a^b, b^c in next round
826	eor	x16,x16,x22,ror#41	// Sigma1(e)
827	eor	x5,x5,x26,ror#34
828	add	x25,x25,x17			// h+=Ch(e,f,g)
829	and	x28,x28,x19			// (b^c)&=(a^b)
830	eor	x3,x3,x12,ror#61
831	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
832	add	x25,x25,x16			// h+=Sigma1(e)
833	eor	x28,x28,x27			// Maj(a,b,c)
834	eor	x17,x5,x26,ror#39	// Sigma0(a)
835	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
836	add	x14,x14,x7
837	add	x21,x21,x25			// d+=h
838	add	x25,x25,x28			// h+=Maj(a,b,c)
839	ldr	x28,[x30],#8		// *K++, x19 in next round
840	add	x14,x14,x4
841	add	x25,x25,x17			// h+=Sigma0(a)
842	add	x14,x14,x3
843	ldr	x3,[sp,#0]
844	str	x6,[sp,#24]
845	ror	x16,x21,#14
846	add	x24,x24,x28			// h+=K[i]
847	ror	x5,x0,#1
848	and	x17,x22,x21
849	ror	x4,x13,#19
850	bic	x28,x23,x21
851	ror	x6,x25,#28
852	add	x24,x24,x14			// h+=X[i]
853	eor	x16,x16,x21,ror#18
854	eor	x5,x5,x0,ror#8
855	orr	x17,x17,x28			// Ch(e,f,g)
856	eor	x28,x25,x26			// a^b, b^c in next round
857	eor	x16,x16,x21,ror#41	// Sigma1(e)
858	eor	x6,x6,x25,ror#34
859	add	x24,x24,x17			// h+=Ch(e,f,g)
860	and	x19,x19,x28			// (b^c)&=(a^b)
861	eor	x4,x4,x13,ror#61
862	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
863	add	x24,x24,x16			// h+=Sigma1(e)
864	eor	x19,x19,x26			// Maj(a,b,c)
865	eor	x17,x6,x25,ror#39	// Sigma0(a)
866	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
867	add	x15,x15,x8
868	add	x20,x20,x24			// d+=h
869	add	x24,x24,x19			// h+=Maj(a,b,c)
870	ldr	x19,[x30],#8		// *K++, x28 in next round
871	add	x15,x15,x5
872	add	x24,x24,x17			// h+=Sigma0(a)
873	add	x15,x15,x4
874	ldr	x4,[sp,#8]
875	str	x7,[sp,#0]
876	ror	x16,x20,#14
877	add	x23,x23,x19			// h+=K[i]
878	ror	x6,x1,#1
879	and	x17,x21,x20
880	ror	x5,x14,#19
881	bic	x19,x22,x20
882	ror	x7,x24,#28
883	add	x23,x23,x15			// h+=X[i]
884	eor	x16,x16,x20,ror#18
885	eor	x6,x6,x1,ror#8
886	orr	x17,x17,x19			// Ch(e,f,g)
887	eor	x19,x24,x25			// a^b, b^c in next round
888	eor	x16,x16,x20,ror#41	// Sigma1(e)
889	eor	x7,x7,x24,ror#34
890	add	x23,x23,x17			// h+=Ch(e,f,g)
891	and	x28,x28,x19			// (b^c)&=(a^b)
892	eor	x5,x5,x14,ror#61
893	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
894	add	x23,x23,x16			// h+=Sigma1(e)
895	eor	x28,x28,x25			// Maj(a,b,c)
896	eor	x17,x7,x24,ror#39	// Sigma0(a)
897	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
898	add	x0,x0,x9
899	add	x27,x27,x23			// d+=h
900	add	x23,x23,x28			// h+=Maj(a,b,c)
901	ldr	x28,[x30],#8		// *K++, x19 in next round
902	add	x0,x0,x6
903	add	x23,x23,x17			// h+=Sigma0(a)
904	add	x0,x0,x5
905	ldr	x5,[sp,#16]
906	str	x8,[sp,#8]
907	ror	x16,x27,#14
908	add	x22,x22,x28			// h+=K[i]
909	ror	x7,x2,#1
910	and	x17,x20,x27
911	ror	x6,x15,#19
912	bic	x28,x21,x27
913	ror	x8,x23,#28
914	add	x22,x22,x0			// h+=X[i]
915	eor	x16,x16,x27,ror#18
916	eor	x7,x7,x2,ror#8
917	orr	x17,x17,x28			// Ch(e,f,g)
918	eor	x28,x23,x24			// a^b, b^c in next round
919	eor	x16,x16,x27,ror#41	// Sigma1(e)
920	eor	x8,x8,x23,ror#34
921	add	x22,x22,x17			// h+=Ch(e,f,g)
922	and	x19,x19,x28			// (b^c)&=(a^b)
923	eor	x6,x6,x15,ror#61
924	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
925	add	x22,x22,x16			// h+=Sigma1(e)
926	eor	x19,x19,x24			// Maj(a,b,c)
927	eor	x17,x8,x23,ror#39	// Sigma0(a)
928	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
929	add	x1,x1,x10
930	add	x26,x26,x22			// d+=h
931	add	x22,x22,x19			// h+=Maj(a,b,c)
932	ldr	x19,[x30],#8		// *K++, x28 in next round
933	add	x1,x1,x7
934	add	x22,x22,x17			// h+=Sigma0(a)
935	add	x1,x1,x6
936	ldr	x6,[sp,#24]
937	str	x9,[sp,#16]
938	ror	x16,x26,#14
939	add	x21,x21,x19			// h+=K[i]
940	ror	x8,x3,#1
941	and	x17,x27,x26
942	ror	x7,x0,#19
943	bic	x19,x20,x26
944	ror	x9,x22,#28
945	add	x21,x21,x1			// h+=X[i]
946	eor	x16,x16,x26,ror#18
947	eor	x8,x8,x3,ror#8
948	orr	x17,x17,x19			// Ch(e,f,g)
949	eor	x19,x22,x23			// a^b, b^c in next round
950	eor	x16,x16,x26,ror#41	// Sigma1(e)
951	eor	x9,x9,x22,ror#34
952	add	x21,x21,x17			// h+=Ch(e,f,g)
953	and	x28,x28,x19			// (b^c)&=(a^b)
954	eor	x7,x7,x0,ror#61
955	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
956	add	x21,x21,x16			// h+=Sigma1(e)
957	eor	x28,x28,x23			// Maj(a,b,c)
958	eor	x17,x9,x22,ror#39	// Sigma0(a)
959	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
960	add	x2,x2,x11
961	add	x25,x25,x21			// d+=h
962	add	x21,x21,x28			// h+=Maj(a,b,c)
963	ldr	x28,[x30],#8		// *K++, x19 in next round
964	add	x2,x2,x8
965	add	x21,x21,x17			// h+=Sigma0(a)
966	add	x2,x2,x7
967	ldr	x7,[sp,#0]
968	str	x10,[sp,#24]
969	ror	x16,x25,#14
970	add	x20,x20,x28			// h+=K[i]
971	ror	x9,x4,#1
972	and	x17,x26,x25
973	ror	x8,x1,#19
974	bic	x28,x27,x25
975	ror	x10,x21,#28
976	add	x20,x20,x2			// h+=X[i]
977	eor	x16,x16,x25,ror#18
978	eor	x9,x9,x4,ror#8
979	orr	x17,x17,x28			// Ch(e,f,g)
980	eor	x28,x21,x22			// a^b, b^c in next round
981	eor	x16,x16,x25,ror#41	// Sigma1(e)
982	eor	x10,x10,x21,ror#34
983	add	x20,x20,x17			// h+=Ch(e,f,g)
984	and	x19,x19,x28			// (b^c)&=(a^b)
985	eor	x8,x8,x1,ror#61
986	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
987	add	x20,x20,x16			// h+=Sigma1(e)
988	eor	x19,x19,x22			// Maj(a,b,c)
989	eor	x17,x10,x21,ror#39	// Sigma0(a)
990	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
991	add	x3,x3,x12
992	add	x24,x24,x20			// d+=h
993	add	x20,x20,x19			// h+=Maj(a,b,c)
994	ldr	x19,[x30],#8		// *K++, x28 in next round
995	add	x3,x3,x9
996	add	x20,x20,x17			// h+=Sigma0(a)
997	add	x3,x3,x8
998	cbnz	x19,Loop_16_xx
999
1000	ldp	x0,x2,[x29,#96]
1001	ldr	x1,[x29,#112]
1002	sub	x30,x30,#648		// rewind
1003
1004	ldp	x3,x4,[x0]
1005	ldp	x5,x6,[x0,#2*8]
1006	add	x1,x1,#14*8			// advance input pointer
1007	ldp	x7,x8,[x0,#4*8]
1008	add	x20,x20,x3
1009	ldp	x9,x10,[x0,#6*8]
1010	add	x21,x21,x4
1011	add	x22,x22,x5
1012	add	x23,x23,x6
1013	stp	x20,x21,[x0]
1014	add	x24,x24,x7
1015	add	x25,x25,x8
1016	stp	x22,x23,[x0,#2*8]
1017	add	x26,x26,x9
1018	add	x27,x27,x10
1019	cmp	x1,x2
1020	stp	x24,x25,[x0,#4*8]
1021	stp	x26,x27,[x0,#6*8]
1022	b.ne	Loop
1023
1024	ldp	x19,x20,[x29,#16]
1025	add	sp,sp,#4*8
1026	ldp	x21,x22,[x29,#32]
1027	ldp	x23,x24,[x29,#48]
1028	ldp	x25,x26,[x29,#64]
1029	ldp	x27,x28,[x29,#80]
1030	ldp	x29,x30,[sp],#128
1031	AARCH64_VALIDATE_LINK_REGISTER
1032	ret
1033
1034
1035.section	.rodata
1036.align	6
1037
1038LK512:
1039.quad	0x428a2f98d728ae22,0x7137449123ef65cd
1040.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1041.quad	0x3956c25bf348b538,0x59f111f1b605d019
1042.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
1043.quad	0xd807aa98a3030242,0x12835b0145706fbe
1044.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1045.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
1046.quad	0x9bdc06a725c71235,0xc19bf174cf692694
1047.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
1048.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1049.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
1050.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1051.quad	0x983e5152ee66dfab,0xa831c66d2db43210
1052.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
1053.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
1054.quad	0x06ca6351e003826f,0x142929670a0e6e70
1055.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
1056.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1057.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
1058.quad	0x81c2c92e47edaee6,0x92722c851482353b
1059.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
1060.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
1061.quad	0xd192e819d6ef5218,0xd69906245565a910
1062.quad	0xf40e35855771202a,0x106aa07032bbd1b8
1063.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
1064.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1065.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1066.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1067.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
1068.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
1069.quad	0x90befffa23631e28,0xa4506cebde82bde9
1070.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
1071.quad	0xca273eceea26619c,0xd186b8c721c0c207
1072.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1073.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
1074.quad	0x113f9804bef90dae,0x1b710b35131c471b
1075.quad	0x28db77f523047d84,0x32caab7b40c72493
1076.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1077.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1078.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
1079.quad	0	// terminator
1080
1081.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1082.align	2
1083.align	2
1084#endif
1085#endif  // !OPENSSL_NO_ASM
1086