• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
2//
3// Licensed under the OpenSSL license (the "License").  You may not use
4// this file except in compliance with the License.  You can obtain a copy
5// in the file LICENSE in the source distribution or at
6// https://www.openssl.org/source/license.html
7
8// ====================================================================
9// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
10// project. The module is, however, dual licensed under OpenSSL and
11// CRYPTOGAMS licenses depending on where you obtain it. For further
12// details see http://www.openssl.org/~appro/cryptogams/.
13//
14// Permission to use under GPLv2 terms is granted.
15// ====================================================================
16//
17// SHA256/512 for ARMv8.
18//
19// Performance in cycles per processed byte and improvement coefficient
20// over code generated with "default" compiler:
21//
22//		SHA256-hw	SHA256(*)	SHA512
23// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
24// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
25// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
26// Denver	2.01		10.5 (+26%)	6.70 (+8%)
27// X-Gene			20.0 (+100%)	12.8 (+300%(***))
28// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
29//
30// (*)	Software SHA256 results are of lesser relevance, presented
31//	mostly for informational purposes.
32// (**)	The result is a trade-off: it's possible to improve it by
33//	10% (or by 1 cycle per round), but at the cost of 20% loss
34//	on Cortex-A53 (or by 4 cycles per round).
35// (***)	Super-impressive coefficients over gcc-generated code are
36//	indication of some compiler "pathology", most notably code
37//	generated with -mgeneral-regs-only is significanty faster
38//	and the gap is only 40-90%.
39
40#ifndef	__KERNEL__
41# include <openssl/arm_arch.h>
42#endif
43
44.text
45
46
47.globl	_sha512_block_data_order
48.private_extern	_sha512_block_data_order
49
50.align	6
51_sha512_block_data_order:
52	stp	x29,x30,[sp,#-128]!
53	add	x29,sp,#0
54
55	stp	x19,x20,[sp,#16]
56	stp	x21,x22,[sp,#32]
57	stp	x23,x24,[sp,#48]
58	stp	x25,x26,[sp,#64]
59	stp	x27,x28,[sp,#80]
60	sub	sp,sp,#4*8
61
62	ldp	x20,x21,[x0]				// load context
63	ldp	x22,x23,[x0,#2*8]
64	ldp	x24,x25,[x0,#4*8]
65	add	x2,x1,x2,lsl#7	// end of input
66	ldp	x26,x27,[x0,#6*8]
67	adr	x30,LK512
68	stp	x0,x2,[x29,#96]
69
70Loop:
71	ldp	x3,x4,[x1],#2*8
72	ldr	x19,[x30],#8			// *K++
73	eor	x28,x21,x22				// magic seed
74	str	x1,[x29,#112]
75#ifndef	__ARMEB__
76	rev	x3,x3			// 0
77#endif
78	ror	x16,x24,#14
79	add	x27,x27,x19			// h+=K[i]
80	eor	x6,x24,x24,ror#23
81	and	x17,x25,x24
82	bic	x19,x26,x24
83	add	x27,x27,x3			// h+=X[i]
84	orr	x17,x17,x19			// Ch(e,f,g)
85	eor	x19,x20,x21			// a^b, b^c in next round
86	eor	x16,x16,x6,ror#18	// Sigma1(e)
87	ror	x6,x20,#28
88	add	x27,x27,x17			// h+=Ch(e,f,g)
89	eor	x17,x20,x20,ror#5
90	add	x27,x27,x16			// h+=Sigma1(e)
91	and	x28,x28,x19			// (b^c)&=(a^b)
92	add	x23,x23,x27			// d+=h
93	eor	x28,x28,x21			// Maj(a,b,c)
94	eor	x17,x6,x17,ror#34	// Sigma0(a)
95	add	x27,x27,x28			// h+=Maj(a,b,c)
96	ldr	x28,[x30],#8		// *K++, x19 in next round
97	//add	x27,x27,x17			// h+=Sigma0(a)
98#ifndef	__ARMEB__
99	rev	x4,x4			// 1
100#endif
101	ldp	x5,x6,[x1],#2*8
102	add	x27,x27,x17			// h+=Sigma0(a)
103	ror	x16,x23,#14
104	add	x26,x26,x28			// h+=K[i]
105	eor	x7,x23,x23,ror#23
106	and	x17,x24,x23
107	bic	x28,x25,x23
108	add	x26,x26,x4			// h+=X[i]
109	orr	x17,x17,x28			// Ch(e,f,g)
110	eor	x28,x27,x20			// a^b, b^c in next round
111	eor	x16,x16,x7,ror#18	// Sigma1(e)
112	ror	x7,x27,#28
113	add	x26,x26,x17			// h+=Ch(e,f,g)
114	eor	x17,x27,x27,ror#5
115	add	x26,x26,x16			// h+=Sigma1(e)
116	and	x19,x19,x28			// (b^c)&=(a^b)
117	add	x22,x22,x26			// d+=h
118	eor	x19,x19,x20			// Maj(a,b,c)
119	eor	x17,x7,x17,ror#34	// Sigma0(a)
120	add	x26,x26,x19			// h+=Maj(a,b,c)
121	ldr	x19,[x30],#8		// *K++, x28 in next round
122	//add	x26,x26,x17			// h+=Sigma0(a)
123#ifndef	__ARMEB__
124	rev	x5,x5			// 2
125#endif
126	add	x26,x26,x17			// h+=Sigma0(a)
127	ror	x16,x22,#14
128	add	x25,x25,x19			// h+=K[i]
129	eor	x8,x22,x22,ror#23
130	and	x17,x23,x22
131	bic	x19,x24,x22
132	add	x25,x25,x5			// h+=X[i]
133	orr	x17,x17,x19			// Ch(e,f,g)
134	eor	x19,x26,x27			// a^b, b^c in next round
135	eor	x16,x16,x8,ror#18	// Sigma1(e)
136	ror	x8,x26,#28
137	add	x25,x25,x17			// h+=Ch(e,f,g)
138	eor	x17,x26,x26,ror#5
139	add	x25,x25,x16			// h+=Sigma1(e)
140	and	x28,x28,x19			// (b^c)&=(a^b)
141	add	x21,x21,x25			// d+=h
142	eor	x28,x28,x27			// Maj(a,b,c)
143	eor	x17,x8,x17,ror#34	// Sigma0(a)
144	add	x25,x25,x28			// h+=Maj(a,b,c)
145	ldr	x28,[x30],#8		// *K++, x19 in next round
146	//add	x25,x25,x17			// h+=Sigma0(a)
147#ifndef	__ARMEB__
148	rev	x6,x6			// 3
149#endif
150	ldp	x7,x8,[x1],#2*8
151	add	x25,x25,x17			// h+=Sigma0(a)
152	ror	x16,x21,#14
153	add	x24,x24,x28			// h+=K[i]
154	eor	x9,x21,x21,ror#23
155	and	x17,x22,x21
156	bic	x28,x23,x21
157	add	x24,x24,x6			// h+=X[i]
158	orr	x17,x17,x28			// Ch(e,f,g)
159	eor	x28,x25,x26			// a^b, b^c in next round
160	eor	x16,x16,x9,ror#18	// Sigma1(e)
161	ror	x9,x25,#28
162	add	x24,x24,x17			// h+=Ch(e,f,g)
163	eor	x17,x25,x25,ror#5
164	add	x24,x24,x16			// h+=Sigma1(e)
165	and	x19,x19,x28			// (b^c)&=(a^b)
166	add	x20,x20,x24			// d+=h
167	eor	x19,x19,x26			// Maj(a,b,c)
168	eor	x17,x9,x17,ror#34	// Sigma0(a)
169	add	x24,x24,x19			// h+=Maj(a,b,c)
170	ldr	x19,[x30],#8		// *K++, x28 in next round
171	//add	x24,x24,x17			// h+=Sigma0(a)
172#ifndef	__ARMEB__
173	rev	x7,x7			// 4
174#endif
175	add	x24,x24,x17			// h+=Sigma0(a)
176	ror	x16,x20,#14
177	add	x23,x23,x19			// h+=K[i]
178	eor	x10,x20,x20,ror#23
179	and	x17,x21,x20
180	bic	x19,x22,x20
181	add	x23,x23,x7			// h+=X[i]
182	orr	x17,x17,x19			// Ch(e,f,g)
183	eor	x19,x24,x25			// a^b, b^c in next round
184	eor	x16,x16,x10,ror#18	// Sigma1(e)
185	ror	x10,x24,#28
186	add	x23,x23,x17			// h+=Ch(e,f,g)
187	eor	x17,x24,x24,ror#5
188	add	x23,x23,x16			// h+=Sigma1(e)
189	and	x28,x28,x19			// (b^c)&=(a^b)
190	add	x27,x27,x23			// d+=h
191	eor	x28,x28,x25			// Maj(a,b,c)
192	eor	x17,x10,x17,ror#34	// Sigma0(a)
193	add	x23,x23,x28			// h+=Maj(a,b,c)
194	ldr	x28,[x30],#8		// *K++, x19 in next round
195	//add	x23,x23,x17			// h+=Sigma0(a)
196#ifndef	__ARMEB__
197	rev	x8,x8			// 5
198#endif
199	ldp	x9,x10,[x1],#2*8
200	add	x23,x23,x17			// h+=Sigma0(a)
201	ror	x16,x27,#14
202	add	x22,x22,x28			// h+=K[i]
203	eor	x11,x27,x27,ror#23
204	and	x17,x20,x27
205	bic	x28,x21,x27
206	add	x22,x22,x8			// h+=X[i]
207	orr	x17,x17,x28			// Ch(e,f,g)
208	eor	x28,x23,x24			// a^b, b^c in next round
209	eor	x16,x16,x11,ror#18	// Sigma1(e)
210	ror	x11,x23,#28
211	add	x22,x22,x17			// h+=Ch(e,f,g)
212	eor	x17,x23,x23,ror#5
213	add	x22,x22,x16			// h+=Sigma1(e)
214	and	x19,x19,x28			// (b^c)&=(a^b)
215	add	x26,x26,x22			// d+=h
216	eor	x19,x19,x24			// Maj(a,b,c)
217	eor	x17,x11,x17,ror#34	// Sigma0(a)
218	add	x22,x22,x19			// h+=Maj(a,b,c)
219	ldr	x19,[x30],#8		// *K++, x28 in next round
220	//add	x22,x22,x17			// h+=Sigma0(a)
221#ifndef	__ARMEB__
222	rev	x9,x9			// 6
223#endif
224	add	x22,x22,x17			// h+=Sigma0(a)
225	ror	x16,x26,#14
226	add	x21,x21,x19			// h+=K[i]
227	eor	x12,x26,x26,ror#23
228	and	x17,x27,x26
229	bic	x19,x20,x26
230	add	x21,x21,x9			// h+=X[i]
231	orr	x17,x17,x19			// Ch(e,f,g)
232	eor	x19,x22,x23			// a^b, b^c in next round
233	eor	x16,x16,x12,ror#18	// Sigma1(e)
234	ror	x12,x22,#28
235	add	x21,x21,x17			// h+=Ch(e,f,g)
236	eor	x17,x22,x22,ror#5
237	add	x21,x21,x16			// h+=Sigma1(e)
238	and	x28,x28,x19			// (b^c)&=(a^b)
239	add	x25,x25,x21			// d+=h
240	eor	x28,x28,x23			// Maj(a,b,c)
241	eor	x17,x12,x17,ror#34	// Sigma0(a)
242	add	x21,x21,x28			// h+=Maj(a,b,c)
243	ldr	x28,[x30],#8		// *K++, x19 in next round
244	//add	x21,x21,x17			// h+=Sigma0(a)
245#ifndef	__ARMEB__
246	rev	x10,x10			// 7
247#endif
248	ldp	x11,x12,[x1],#2*8
249	add	x21,x21,x17			// h+=Sigma0(a)
250	ror	x16,x25,#14
251	add	x20,x20,x28			// h+=K[i]
252	eor	x13,x25,x25,ror#23
253	and	x17,x26,x25
254	bic	x28,x27,x25
255	add	x20,x20,x10			// h+=X[i]
256	orr	x17,x17,x28			// Ch(e,f,g)
257	eor	x28,x21,x22			// a^b, b^c in next round
258	eor	x16,x16,x13,ror#18	// Sigma1(e)
259	ror	x13,x21,#28
260	add	x20,x20,x17			// h+=Ch(e,f,g)
261	eor	x17,x21,x21,ror#5
262	add	x20,x20,x16			// h+=Sigma1(e)
263	and	x19,x19,x28			// (b^c)&=(a^b)
264	add	x24,x24,x20			// d+=h
265	eor	x19,x19,x22			// Maj(a,b,c)
266	eor	x17,x13,x17,ror#34	// Sigma0(a)
267	add	x20,x20,x19			// h+=Maj(a,b,c)
268	ldr	x19,[x30],#8		// *K++, x28 in next round
269	//add	x20,x20,x17			// h+=Sigma0(a)
270#ifndef	__ARMEB__
271	rev	x11,x11			// 8
272#endif
273	add	x20,x20,x17			// h+=Sigma0(a)
274	ror	x16,x24,#14
275	add	x27,x27,x19			// h+=K[i]
276	eor	x14,x24,x24,ror#23
277	and	x17,x25,x24
278	bic	x19,x26,x24
279	add	x27,x27,x11			// h+=X[i]
280	orr	x17,x17,x19			// Ch(e,f,g)
281	eor	x19,x20,x21			// a^b, b^c in next round
282	eor	x16,x16,x14,ror#18	// Sigma1(e)
283	ror	x14,x20,#28
284	add	x27,x27,x17			// h+=Ch(e,f,g)
285	eor	x17,x20,x20,ror#5
286	add	x27,x27,x16			// h+=Sigma1(e)
287	and	x28,x28,x19			// (b^c)&=(a^b)
288	add	x23,x23,x27			// d+=h
289	eor	x28,x28,x21			// Maj(a,b,c)
290	eor	x17,x14,x17,ror#34	// Sigma0(a)
291	add	x27,x27,x28			// h+=Maj(a,b,c)
292	ldr	x28,[x30],#8		// *K++, x19 in next round
293	//add	x27,x27,x17			// h+=Sigma0(a)
294#ifndef	__ARMEB__
295	rev	x12,x12			// 9
296#endif
297	ldp	x13,x14,[x1],#2*8
298	add	x27,x27,x17			// h+=Sigma0(a)
299	ror	x16,x23,#14
300	add	x26,x26,x28			// h+=K[i]
301	eor	x15,x23,x23,ror#23
302	and	x17,x24,x23
303	bic	x28,x25,x23
304	add	x26,x26,x12			// h+=X[i]
305	orr	x17,x17,x28			// Ch(e,f,g)
306	eor	x28,x27,x20			// a^b, b^c in next round
307	eor	x16,x16,x15,ror#18	// Sigma1(e)
308	ror	x15,x27,#28
309	add	x26,x26,x17			// h+=Ch(e,f,g)
310	eor	x17,x27,x27,ror#5
311	add	x26,x26,x16			// h+=Sigma1(e)
312	and	x19,x19,x28			// (b^c)&=(a^b)
313	add	x22,x22,x26			// d+=h
314	eor	x19,x19,x20			// Maj(a,b,c)
315	eor	x17,x15,x17,ror#34	// Sigma0(a)
316	add	x26,x26,x19			// h+=Maj(a,b,c)
317	ldr	x19,[x30],#8		// *K++, x28 in next round
318	//add	x26,x26,x17			// h+=Sigma0(a)
319#ifndef	__ARMEB__
320	rev	x13,x13			// 10
321#endif
322	add	x26,x26,x17			// h+=Sigma0(a)
323	ror	x16,x22,#14
324	add	x25,x25,x19			// h+=K[i]
325	eor	x0,x22,x22,ror#23
326	and	x17,x23,x22
327	bic	x19,x24,x22
328	add	x25,x25,x13			// h+=X[i]
329	orr	x17,x17,x19			// Ch(e,f,g)
330	eor	x19,x26,x27			// a^b, b^c in next round
331	eor	x16,x16,x0,ror#18	// Sigma1(e)
332	ror	x0,x26,#28
333	add	x25,x25,x17			// h+=Ch(e,f,g)
334	eor	x17,x26,x26,ror#5
335	add	x25,x25,x16			// h+=Sigma1(e)
336	and	x28,x28,x19			// (b^c)&=(a^b)
337	add	x21,x21,x25			// d+=h
338	eor	x28,x28,x27			// Maj(a,b,c)
339	eor	x17,x0,x17,ror#34	// Sigma0(a)
340	add	x25,x25,x28			// h+=Maj(a,b,c)
341	ldr	x28,[x30],#8		// *K++, x19 in next round
342	//add	x25,x25,x17			// h+=Sigma0(a)
343#ifndef	__ARMEB__
344	rev	x14,x14			// 11
345#endif
346	ldp	x15,x0,[x1],#2*8
347	add	x25,x25,x17			// h+=Sigma0(a)
348	str	x6,[sp,#24]
349	ror	x16,x21,#14
350	add	x24,x24,x28			// h+=K[i]
351	eor	x6,x21,x21,ror#23
352	and	x17,x22,x21
353	bic	x28,x23,x21
354	add	x24,x24,x14			// h+=X[i]
355	orr	x17,x17,x28			// Ch(e,f,g)
356	eor	x28,x25,x26			// a^b, b^c in next round
357	eor	x16,x16,x6,ror#18	// Sigma1(e)
358	ror	x6,x25,#28
359	add	x24,x24,x17			// h+=Ch(e,f,g)
360	eor	x17,x25,x25,ror#5
361	add	x24,x24,x16			// h+=Sigma1(e)
362	and	x19,x19,x28			// (b^c)&=(a^b)
363	add	x20,x20,x24			// d+=h
364	eor	x19,x19,x26			// Maj(a,b,c)
365	eor	x17,x6,x17,ror#34	// Sigma0(a)
366	add	x24,x24,x19			// h+=Maj(a,b,c)
367	ldr	x19,[x30],#8		// *K++, x28 in next round
368	//add	x24,x24,x17			// h+=Sigma0(a)
369#ifndef	__ARMEB__
370	rev	x15,x15			// 12
371#endif
372	add	x24,x24,x17			// h+=Sigma0(a)
373	str	x7,[sp,#0]
374	ror	x16,x20,#14
375	add	x23,x23,x19			// h+=K[i]
376	eor	x7,x20,x20,ror#23
377	and	x17,x21,x20
378	bic	x19,x22,x20
379	add	x23,x23,x15			// h+=X[i]
380	orr	x17,x17,x19			// Ch(e,f,g)
381	eor	x19,x24,x25			// a^b, b^c in next round
382	eor	x16,x16,x7,ror#18	// Sigma1(e)
383	ror	x7,x24,#28
384	add	x23,x23,x17			// h+=Ch(e,f,g)
385	eor	x17,x24,x24,ror#5
386	add	x23,x23,x16			// h+=Sigma1(e)
387	and	x28,x28,x19			// (b^c)&=(a^b)
388	add	x27,x27,x23			// d+=h
389	eor	x28,x28,x25			// Maj(a,b,c)
390	eor	x17,x7,x17,ror#34	// Sigma0(a)
391	add	x23,x23,x28			// h+=Maj(a,b,c)
392	ldr	x28,[x30],#8		// *K++, x19 in next round
393	//add	x23,x23,x17			// h+=Sigma0(a)
394#ifndef	__ARMEB__
395	rev	x0,x0			// 13
396#endif
397	ldp	x1,x2,[x1]
398	add	x23,x23,x17			// h+=Sigma0(a)
399	str	x8,[sp,#8]
400	ror	x16,x27,#14
401	add	x22,x22,x28			// h+=K[i]
402	eor	x8,x27,x27,ror#23
403	and	x17,x20,x27
404	bic	x28,x21,x27
405	add	x22,x22,x0			// h+=X[i]
406	orr	x17,x17,x28			// Ch(e,f,g)
407	eor	x28,x23,x24			// a^b, b^c in next round
408	eor	x16,x16,x8,ror#18	// Sigma1(e)
409	ror	x8,x23,#28
410	add	x22,x22,x17			// h+=Ch(e,f,g)
411	eor	x17,x23,x23,ror#5
412	add	x22,x22,x16			// h+=Sigma1(e)
413	and	x19,x19,x28			// (b^c)&=(a^b)
414	add	x26,x26,x22			// d+=h
415	eor	x19,x19,x24			// Maj(a,b,c)
416	eor	x17,x8,x17,ror#34	// Sigma0(a)
417	add	x22,x22,x19			// h+=Maj(a,b,c)
418	ldr	x19,[x30],#8		// *K++, x28 in next round
419	//add	x22,x22,x17			// h+=Sigma0(a)
420#ifndef	__ARMEB__
421	rev	x1,x1			// 14
422#endif
423	ldr	x6,[sp,#24]
424	add	x22,x22,x17			// h+=Sigma0(a)
425	str	x9,[sp,#16]
426	ror	x16,x26,#14
427	add	x21,x21,x19			// h+=K[i]
428	eor	x9,x26,x26,ror#23
429	and	x17,x27,x26
430	bic	x19,x20,x26
431	add	x21,x21,x1			// h+=X[i]
432	orr	x17,x17,x19			// Ch(e,f,g)
433	eor	x19,x22,x23			// a^b, b^c in next round
434	eor	x16,x16,x9,ror#18	// Sigma1(e)
435	ror	x9,x22,#28
436	add	x21,x21,x17			// h+=Ch(e,f,g)
437	eor	x17,x22,x22,ror#5
438	add	x21,x21,x16			// h+=Sigma1(e)
439	and	x28,x28,x19			// (b^c)&=(a^b)
440	add	x25,x25,x21			// d+=h
441	eor	x28,x28,x23			// Maj(a,b,c)
442	eor	x17,x9,x17,ror#34	// Sigma0(a)
443	add	x21,x21,x28			// h+=Maj(a,b,c)
444	ldr	x28,[x30],#8		// *K++, x19 in next round
445	//add	x21,x21,x17			// h+=Sigma0(a)
446#ifndef	__ARMEB__
447	rev	x2,x2			// 15
448#endif
449	ldr	x7,[sp,#0]
450	add	x21,x21,x17			// h+=Sigma0(a)
451	str	x10,[sp,#24]
452	ror	x16,x25,#14
453	add	x20,x20,x28			// h+=K[i]
454	ror	x9,x4,#1
455	and	x17,x26,x25
456	ror	x8,x1,#19
457	bic	x28,x27,x25
458	ror	x10,x21,#28
459	add	x20,x20,x2			// h+=X[i]
460	eor	x16,x16,x25,ror#18
461	eor	x9,x9,x4,ror#8
462	orr	x17,x17,x28			// Ch(e,f,g)
463	eor	x28,x21,x22			// a^b, b^c in next round
464	eor	x16,x16,x25,ror#41	// Sigma1(e)
465	eor	x10,x10,x21,ror#34
466	add	x20,x20,x17			// h+=Ch(e,f,g)
467	and	x19,x19,x28			// (b^c)&=(a^b)
468	eor	x8,x8,x1,ror#61
469	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
470	add	x20,x20,x16			// h+=Sigma1(e)
471	eor	x19,x19,x22			// Maj(a,b,c)
472	eor	x17,x10,x21,ror#39	// Sigma0(a)
473	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
474	add	x3,x3,x12
475	add	x24,x24,x20			// d+=h
476	add	x20,x20,x19			// h+=Maj(a,b,c)
477	ldr	x19,[x30],#8		// *K++, x28 in next round
478	add	x3,x3,x9
479	add	x20,x20,x17			// h+=Sigma0(a)
480	add	x3,x3,x8
481Loop_16_xx:
482	ldr	x8,[sp,#8]
483	str	x11,[sp,#0]
484	ror	x16,x24,#14
485	add	x27,x27,x19			// h+=K[i]
486	ror	x10,x5,#1
487	and	x17,x25,x24
488	ror	x9,x2,#19
489	bic	x19,x26,x24
490	ror	x11,x20,#28
491	add	x27,x27,x3			// h+=X[i]
492	eor	x16,x16,x24,ror#18
493	eor	x10,x10,x5,ror#8
494	orr	x17,x17,x19			// Ch(e,f,g)
495	eor	x19,x20,x21			// a^b, b^c in next round
496	eor	x16,x16,x24,ror#41	// Sigma1(e)
497	eor	x11,x11,x20,ror#34
498	add	x27,x27,x17			// h+=Ch(e,f,g)
499	and	x28,x28,x19			// (b^c)&=(a^b)
500	eor	x9,x9,x2,ror#61
501	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
502	add	x27,x27,x16			// h+=Sigma1(e)
503	eor	x28,x28,x21			// Maj(a,b,c)
504	eor	x17,x11,x20,ror#39	// Sigma0(a)
505	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
506	add	x4,x4,x13
507	add	x23,x23,x27			// d+=h
508	add	x27,x27,x28			// h+=Maj(a,b,c)
509	ldr	x28,[x30],#8		// *K++, x19 in next round
510	add	x4,x4,x10
511	add	x27,x27,x17			// h+=Sigma0(a)
512	add	x4,x4,x9
513	ldr	x9,[sp,#16]
514	str	x12,[sp,#8]
515	ror	x16,x23,#14
516	add	x26,x26,x28			// h+=K[i]
517	ror	x11,x6,#1
518	and	x17,x24,x23
519	ror	x10,x3,#19
520	bic	x28,x25,x23
521	ror	x12,x27,#28
522	add	x26,x26,x4			// h+=X[i]
523	eor	x16,x16,x23,ror#18
524	eor	x11,x11,x6,ror#8
525	orr	x17,x17,x28			// Ch(e,f,g)
526	eor	x28,x27,x20			// a^b, b^c in next round
527	eor	x16,x16,x23,ror#41	// Sigma1(e)
528	eor	x12,x12,x27,ror#34
529	add	x26,x26,x17			// h+=Ch(e,f,g)
530	and	x19,x19,x28			// (b^c)&=(a^b)
531	eor	x10,x10,x3,ror#61
532	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
533	add	x26,x26,x16			// h+=Sigma1(e)
534	eor	x19,x19,x20			// Maj(a,b,c)
535	eor	x17,x12,x27,ror#39	// Sigma0(a)
536	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
537	add	x5,x5,x14
538	add	x22,x22,x26			// d+=h
539	add	x26,x26,x19			// h+=Maj(a,b,c)
540	ldr	x19,[x30],#8		// *K++, x28 in next round
541	add	x5,x5,x11
542	add	x26,x26,x17			// h+=Sigma0(a)
543	add	x5,x5,x10
544	ldr	x10,[sp,#24]
545	str	x13,[sp,#16]
546	ror	x16,x22,#14
547	add	x25,x25,x19			// h+=K[i]
548	ror	x12,x7,#1
549	and	x17,x23,x22
550	ror	x11,x4,#19
551	bic	x19,x24,x22
552	ror	x13,x26,#28
553	add	x25,x25,x5			// h+=X[i]
554	eor	x16,x16,x22,ror#18
555	eor	x12,x12,x7,ror#8
556	orr	x17,x17,x19			// Ch(e,f,g)
557	eor	x19,x26,x27			// a^b, b^c in next round
558	eor	x16,x16,x22,ror#41	// Sigma1(e)
559	eor	x13,x13,x26,ror#34
560	add	x25,x25,x17			// h+=Ch(e,f,g)
561	and	x28,x28,x19			// (b^c)&=(a^b)
562	eor	x11,x11,x4,ror#61
563	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
564	add	x25,x25,x16			// h+=Sigma1(e)
565	eor	x28,x28,x27			// Maj(a,b,c)
566	eor	x17,x13,x26,ror#39	// Sigma0(a)
567	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
568	add	x6,x6,x15
569	add	x21,x21,x25			// d+=h
570	add	x25,x25,x28			// h+=Maj(a,b,c)
571	ldr	x28,[x30],#8		// *K++, x19 in next round
572	add	x6,x6,x12
573	add	x25,x25,x17			// h+=Sigma0(a)
574	add	x6,x6,x11
575	ldr	x11,[sp,#0]
576	str	x14,[sp,#24]
577	ror	x16,x21,#14
578	add	x24,x24,x28			// h+=K[i]
579	ror	x13,x8,#1
580	and	x17,x22,x21
581	ror	x12,x5,#19
582	bic	x28,x23,x21
583	ror	x14,x25,#28
584	add	x24,x24,x6			// h+=X[i]
585	eor	x16,x16,x21,ror#18
586	eor	x13,x13,x8,ror#8
587	orr	x17,x17,x28			// Ch(e,f,g)
588	eor	x28,x25,x26			// a^b, b^c in next round
589	eor	x16,x16,x21,ror#41	// Sigma1(e)
590	eor	x14,x14,x25,ror#34
591	add	x24,x24,x17			// h+=Ch(e,f,g)
592	and	x19,x19,x28			// (b^c)&=(a^b)
593	eor	x12,x12,x5,ror#61
594	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
595	add	x24,x24,x16			// h+=Sigma1(e)
596	eor	x19,x19,x26			// Maj(a,b,c)
597	eor	x17,x14,x25,ror#39	// Sigma0(a)
598	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
599	add	x7,x7,x0
600	add	x20,x20,x24			// d+=h
601	add	x24,x24,x19			// h+=Maj(a,b,c)
602	ldr	x19,[x30],#8		// *K++, x28 in next round
603	add	x7,x7,x13
604	add	x24,x24,x17			// h+=Sigma0(a)
605	add	x7,x7,x12
606	ldr	x12,[sp,#8]
607	str	x15,[sp,#0]
608	ror	x16,x20,#14
609	add	x23,x23,x19			// h+=K[i]
610	ror	x14,x9,#1
611	and	x17,x21,x20
612	ror	x13,x6,#19
613	bic	x19,x22,x20
614	ror	x15,x24,#28
615	add	x23,x23,x7			// h+=X[i]
616	eor	x16,x16,x20,ror#18
617	eor	x14,x14,x9,ror#8
618	orr	x17,x17,x19			// Ch(e,f,g)
619	eor	x19,x24,x25			// a^b, b^c in next round
620	eor	x16,x16,x20,ror#41	// Sigma1(e)
621	eor	x15,x15,x24,ror#34
622	add	x23,x23,x17			// h+=Ch(e,f,g)
623	and	x28,x28,x19			// (b^c)&=(a^b)
624	eor	x13,x13,x6,ror#61
625	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
626	add	x23,x23,x16			// h+=Sigma1(e)
627	eor	x28,x28,x25			// Maj(a,b,c)
628	eor	x17,x15,x24,ror#39	// Sigma0(a)
629	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
630	add	x8,x8,x1
631	add	x27,x27,x23			// d+=h
632	add	x23,x23,x28			// h+=Maj(a,b,c)
633	ldr	x28,[x30],#8		// *K++, x19 in next round
634	add	x8,x8,x14
635	add	x23,x23,x17			// h+=Sigma0(a)
636	add	x8,x8,x13
637	ldr	x13,[sp,#16]
638	str	x0,[sp,#8]
639	ror	x16,x27,#14
640	add	x22,x22,x28			// h+=K[i]
641	ror	x15,x10,#1
642	and	x17,x20,x27
643	ror	x14,x7,#19
644	bic	x28,x21,x27
645	ror	x0,x23,#28
646	add	x22,x22,x8			// h+=X[i]
647	eor	x16,x16,x27,ror#18
648	eor	x15,x15,x10,ror#8
649	orr	x17,x17,x28			// Ch(e,f,g)
650	eor	x28,x23,x24			// a^b, b^c in next round
651	eor	x16,x16,x27,ror#41	// Sigma1(e)
652	eor	x0,x0,x23,ror#34
653	add	x22,x22,x17			// h+=Ch(e,f,g)
654	and	x19,x19,x28			// (b^c)&=(a^b)
655	eor	x14,x14,x7,ror#61
656	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
657	add	x22,x22,x16			// h+=Sigma1(e)
658	eor	x19,x19,x24			// Maj(a,b,c)
659	eor	x17,x0,x23,ror#39	// Sigma0(a)
660	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
661	add	x9,x9,x2
662	add	x26,x26,x22			// d+=h
663	add	x22,x22,x19			// h+=Maj(a,b,c)
664	ldr	x19,[x30],#8		// *K++, x28 in next round
665	add	x9,x9,x15
666	add	x22,x22,x17			// h+=Sigma0(a)
667	add	x9,x9,x14
668	ldr	x14,[sp,#24]
669	str	x1,[sp,#16]
670	ror	x16,x26,#14
671	add	x21,x21,x19			// h+=K[i]
672	ror	x0,x11,#1
673	and	x17,x27,x26
674	ror	x15,x8,#19
675	bic	x19,x20,x26
676	ror	x1,x22,#28
677	add	x21,x21,x9			// h+=X[i]
678	eor	x16,x16,x26,ror#18
679	eor	x0,x0,x11,ror#8
680	orr	x17,x17,x19			// Ch(e,f,g)
681	eor	x19,x22,x23			// a^b, b^c in next round
682	eor	x16,x16,x26,ror#41	// Sigma1(e)
683	eor	x1,x1,x22,ror#34
684	add	x21,x21,x17			// h+=Ch(e,f,g)
685	and	x28,x28,x19			// (b^c)&=(a^b)
686	eor	x15,x15,x8,ror#61
687	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
688	add	x21,x21,x16			// h+=Sigma1(e)
689	eor	x28,x28,x23			// Maj(a,b,c)
690	eor	x17,x1,x22,ror#39	// Sigma0(a)
691	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
692	add	x10,x10,x3
693	add	x25,x25,x21			// d+=h
694	add	x21,x21,x28			// h+=Maj(a,b,c)
695	ldr	x28,[x30],#8		// *K++, x19 in next round
696	add	x10,x10,x0
697	add	x21,x21,x17			// h+=Sigma0(a)
698	add	x10,x10,x15
699	ldr	x15,[sp,#0]
700	str	x2,[sp,#24]
701	ror	x16,x25,#14
702	add	x20,x20,x28			// h+=K[i]
703	ror	x1,x12,#1
704	and	x17,x26,x25
705	ror	x0,x9,#19
706	bic	x28,x27,x25
707	ror	x2,x21,#28
708	add	x20,x20,x10			// h+=X[i]
709	eor	x16,x16,x25,ror#18
710	eor	x1,x1,x12,ror#8
711	orr	x17,x17,x28			// Ch(e,f,g)
712	eor	x28,x21,x22			// a^b, b^c in next round
713	eor	x16,x16,x25,ror#41	// Sigma1(e)
714	eor	x2,x2,x21,ror#34
715	add	x20,x20,x17			// h+=Ch(e,f,g)
716	and	x19,x19,x28			// (b^c)&=(a^b)
717	eor	x0,x0,x9,ror#61
718	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
719	add	x20,x20,x16			// h+=Sigma1(e)
720	eor	x19,x19,x22			// Maj(a,b,c)
721	eor	x17,x2,x21,ror#39	// Sigma0(a)
722	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
723	add	x11,x11,x4
724	add	x24,x24,x20			// d+=h
725	add	x20,x20,x19			// h+=Maj(a,b,c)
726	ldr	x19,[x30],#8		// *K++, x28 in next round
727	add	x11,x11,x1
728	add	x20,x20,x17			// h+=Sigma0(a)
729	add	x11,x11,x0
730	ldr	x0,[sp,#8]
731	str	x3,[sp,#0]
732	ror	x16,x24,#14
733	add	x27,x27,x19			// h+=K[i]
734	ror	x2,x13,#1
735	and	x17,x25,x24
736	ror	x1,x10,#19
737	bic	x19,x26,x24
738	ror	x3,x20,#28
739	add	x27,x27,x11			// h+=X[i]
740	eor	x16,x16,x24,ror#18
741	eor	x2,x2,x13,ror#8
742	orr	x17,x17,x19			// Ch(e,f,g)
743	eor	x19,x20,x21			// a^b, b^c in next round
744	eor	x16,x16,x24,ror#41	// Sigma1(e)
745	eor	x3,x3,x20,ror#34
746	add	x27,x27,x17			// h+=Ch(e,f,g)
747	and	x28,x28,x19			// (b^c)&=(a^b)
748	eor	x1,x1,x10,ror#61
749	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
750	add	x27,x27,x16			// h+=Sigma1(e)
751	eor	x28,x28,x21			// Maj(a,b,c)
752	eor	x17,x3,x20,ror#39	// Sigma0(a)
753	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
754	add	x12,x12,x5
755	add	x23,x23,x27			// d+=h
756	add	x27,x27,x28			// h+=Maj(a,b,c)
757	ldr	x28,[x30],#8		// *K++, x19 in next round
758	add	x12,x12,x2
759	add	x27,x27,x17			// h+=Sigma0(a)
760	add	x12,x12,x1
761	ldr	x1,[sp,#16]
762	str	x4,[sp,#8]
763	ror	x16,x23,#14
764	add	x26,x26,x28			// h+=K[i]
765	ror	x3,x14,#1
766	and	x17,x24,x23
767	ror	x2,x11,#19
768	bic	x28,x25,x23
769	ror	x4,x27,#28
770	add	x26,x26,x12			// h+=X[i]
771	eor	x16,x16,x23,ror#18
772	eor	x3,x3,x14,ror#8
773	orr	x17,x17,x28			// Ch(e,f,g)
774	eor	x28,x27,x20			// a^b, b^c in next round
775	eor	x16,x16,x23,ror#41	// Sigma1(e)
776	eor	x4,x4,x27,ror#34
777	add	x26,x26,x17			// h+=Ch(e,f,g)
778	and	x19,x19,x28			// (b^c)&=(a^b)
779	eor	x2,x2,x11,ror#61
780	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
781	add	x26,x26,x16			// h+=Sigma1(e)
782	eor	x19,x19,x20			// Maj(a,b,c)
783	eor	x17,x4,x27,ror#39	// Sigma0(a)
784	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
785	add	x13,x13,x6
786	add	x22,x22,x26			// d+=h
787	add	x26,x26,x19			// h+=Maj(a,b,c)
788	ldr	x19,[x30],#8		// *K++, x28 in next round
789	add	x13,x13,x3
790	add	x26,x26,x17			// h+=Sigma0(a)
791	add	x13,x13,x2
792	ldr	x2,[sp,#24]
793	str	x5,[sp,#16]
794	ror	x16,x22,#14
795	add	x25,x25,x19			// h+=K[i]
796	ror	x4,x15,#1
797	and	x17,x23,x22
798	ror	x3,x12,#19
799	bic	x19,x24,x22
800	ror	x5,x26,#28
801	add	x25,x25,x13			// h+=X[i]
802	eor	x16,x16,x22,ror#18
803	eor	x4,x4,x15,ror#8
804	orr	x17,x17,x19			// Ch(e,f,g)
805	eor	x19,x26,x27			// a^b, b^c in next round
806	eor	x16,x16,x22,ror#41	// Sigma1(e)
807	eor	x5,x5,x26,ror#34
808	add	x25,x25,x17			// h+=Ch(e,f,g)
809	and	x28,x28,x19			// (b^c)&=(a^b)
810	eor	x3,x3,x12,ror#61
811	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
812	add	x25,x25,x16			// h+=Sigma1(e)
813	eor	x28,x28,x27			// Maj(a,b,c)
814	eor	x17,x5,x26,ror#39	// Sigma0(a)
815	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
816	add	x14,x14,x7
817	add	x21,x21,x25			// d+=h
818	add	x25,x25,x28			// h+=Maj(a,b,c)
819	ldr	x28,[x30],#8		// *K++, x19 in next round
820	add	x14,x14,x4
821	add	x25,x25,x17			// h+=Sigma0(a)
822	add	x14,x14,x3
823	ldr	x3,[sp,#0]
824	str	x6,[sp,#24]
825	ror	x16,x21,#14
826	add	x24,x24,x28			// h+=K[i]
827	ror	x5,x0,#1
828	and	x17,x22,x21
829	ror	x4,x13,#19
830	bic	x28,x23,x21
831	ror	x6,x25,#28
832	add	x24,x24,x14			// h+=X[i]
833	eor	x16,x16,x21,ror#18
834	eor	x5,x5,x0,ror#8
835	orr	x17,x17,x28			// Ch(e,f,g)
836	eor	x28,x25,x26			// a^b, b^c in next round
837	eor	x16,x16,x21,ror#41	// Sigma1(e)
838	eor	x6,x6,x25,ror#34
839	add	x24,x24,x17			// h+=Ch(e,f,g)
840	and	x19,x19,x28			// (b^c)&=(a^b)
841	eor	x4,x4,x13,ror#61
842	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
843	add	x24,x24,x16			// h+=Sigma1(e)
844	eor	x19,x19,x26			// Maj(a,b,c)
845	eor	x17,x6,x25,ror#39	// Sigma0(a)
846	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
847	add	x15,x15,x8
848	add	x20,x20,x24			// d+=h
849	add	x24,x24,x19			// h+=Maj(a,b,c)
850	ldr	x19,[x30],#8		// *K++, x28 in next round
851	add	x15,x15,x5
852	add	x24,x24,x17			// h+=Sigma0(a)
853	add	x15,x15,x4
854	ldr	x4,[sp,#8]
855	str	x7,[sp,#0]
856	ror	x16,x20,#14
857	add	x23,x23,x19			// h+=K[i]
858	ror	x6,x1,#1
859	and	x17,x21,x20
860	ror	x5,x14,#19
861	bic	x19,x22,x20
862	ror	x7,x24,#28
863	add	x23,x23,x15			// h+=X[i]
864	eor	x16,x16,x20,ror#18
865	eor	x6,x6,x1,ror#8
866	orr	x17,x17,x19			// Ch(e,f,g)
867	eor	x19,x24,x25			// a^b, b^c in next round
868	eor	x16,x16,x20,ror#41	// Sigma1(e)
869	eor	x7,x7,x24,ror#34
870	add	x23,x23,x17			// h+=Ch(e,f,g)
871	and	x28,x28,x19			// (b^c)&=(a^b)
872	eor	x5,x5,x14,ror#61
873	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
874	add	x23,x23,x16			// h+=Sigma1(e)
875	eor	x28,x28,x25			// Maj(a,b,c)
876	eor	x17,x7,x24,ror#39	// Sigma0(a)
877	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
878	add	x0,x0,x9
879	add	x27,x27,x23			// d+=h
880	add	x23,x23,x28			// h+=Maj(a,b,c)
881	ldr	x28,[x30],#8		// *K++, x19 in next round
882	add	x0,x0,x6
883	add	x23,x23,x17			// h+=Sigma0(a)
884	add	x0,x0,x5
885	ldr	x5,[sp,#16]
886	str	x8,[sp,#8]
887	ror	x16,x27,#14
888	add	x22,x22,x28			// h+=K[i]
889	ror	x7,x2,#1
890	and	x17,x20,x27
891	ror	x6,x15,#19
892	bic	x28,x21,x27
893	ror	x8,x23,#28
894	add	x22,x22,x0			// h+=X[i]
895	eor	x16,x16,x27,ror#18
896	eor	x7,x7,x2,ror#8
897	orr	x17,x17,x28			// Ch(e,f,g)
898	eor	x28,x23,x24			// a^b, b^c in next round
899	eor	x16,x16,x27,ror#41	// Sigma1(e)
900	eor	x8,x8,x23,ror#34
901	add	x22,x22,x17			// h+=Ch(e,f,g)
902	and	x19,x19,x28			// (b^c)&=(a^b)
903	eor	x6,x6,x15,ror#61
904	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
905	add	x22,x22,x16			// h+=Sigma1(e)
906	eor	x19,x19,x24			// Maj(a,b,c)
907	eor	x17,x8,x23,ror#39	// Sigma0(a)
908	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
909	add	x1,x1,x10
910	add	x26,x26,x22			// d+=h
911	add	x22,x22,x19			// h+=Maj(a,b,c)
912	ldr	x19,[x30],#8		// *K++, x28 in next round
913	add	x1,x1,x7
914	add	x22,x22,x17			// h+=Sigma0(a)
915	add	x1,x1,x6
916	ldr	x6,[sp,#24]
917	str	x9,[sp,#16]
918	ror	x16,x26,#14
919	add	x21,x21,x19			// h+=K[i]
920	ror	x8,x3,#1
921	and	x17,x27,x26
922	ror	x7,x0,#19
923	bic	x19,x20,x26
924	ror	x9,x22,#28
925	add	x21,x21,x1			// h+=X[i]
926	eor	x16,x16,x26,ror#18
927	eor	x8,x8,x3,ror#8
928	orr	x17,x17,x19			// Ch(e,f,g)
929	eor	x19,x22,x23			// a^b, b^c in next round
930	eor	x16,x16,x26,ror#41	// Sigma1(e)
931	eor	x9,x9,x22,ror#34
932	add	x21,x21,x17			// h+=Ch(e,f,g)
933	and	x28,x28,x19			// (b^c)&=(a^b)
934	eor	x7,x7,x0,ror#61
935	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
936	add	x21,x21,x16			// h+=Sigma1(e)
937	eor	x28,x28,x23			// Maj(a,b,c)
938	eor	x17,x9,x22,ror#39	// Sigma0(a)
939	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
940	add	x2,x2,x11
941	add	x25,x25,x21			// d+=h
942	add	x21,x21,x28			// h+=Maj(a,b,c)
943	ldr	x28,[x30],#8		// *K++, x19 in next round
944	add	x2,x2,x8
945	add	x21,x21,x17			// h+=Sigma0(a)
946	add	x2,x2,x7
947	ldr	x7,[sp,#0]
948	str	x10,[sp,#24]
949	ror	x16,x25,#14
950	add	x20,x20,x28			// h+=K[i]
951	ror	x9,x4,#1
952	and	x17,x26,x25
953	ror	x8,x1,#19
954	bic	x28,x27,x25
955	ror	x10,x21,#28
956	add	x20,x20,x2			// h+=X[i]
957	eor	x16,x16,x25,ror#18
958	eor	x9,x9,x4,ror#8
959	orr	x17,x17,x28			// Ch(e,f,g)
960	eor	x28,x21,x22			// a^b, b^c in next round
961	eor	x16,x16,x25,ror#41	// Sigma1(e)
962	eor	x10,x10,x21,ror#34
963	add	x20,x20,x17			// h+=Ch(e,f,g)
964	and	x19,x19,x28			// (b^c)&=(a^b)
965	eor	x8,x8,x1,ror#61
966	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
967	add	x20,x20,x16			// h+=Sigma1(e)
968	eor	x19,x19,x22			// Maj(a,b,c)
969	eor	x17,x10,x21,ror#39	// Sigma0(a)
970	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
971	add	x3,x3,x12
972	add	x24,x24,x20			// d+=h
973	add	x20,x20,x19			// h+=Maj(a,b,c)
974	ldr	x19,[x30],#8		// *K++, x28 in next round
975	add	x3,x3,x9
976	add	x20,x20,x17			// h+=Sigma0(a)
977	add	x3,x3,x8
978	cbnz	x19,Loop_16_xx
979
980	ldp	x0,x2,[x29,#96]
981	ldr	x1,[x29,#112]
982	sub	x30,x30,#648		// rewind
983
984	ldp	x3,x4,[x0]
985	ldp	x5,x6,[x0,#2*8]
986	add	x1,x1,#14*8			// advance input pointer
987	ldp	x7,x8,[x0,#4*8]
988	add	x20,x20,x3
989	ldp	x9,x10,[x0,#6*8]
990	add	x21,x21,x4
991	add	x22,x22,x5
992	add	x23,x23,x6
993	stp	x20,x21,[x0]
994	add	x24,x24,x7
995	add	x25,x25,x8
996	stp	x22,x23,[x0,#2*8]
997	add	x26,x26,x9
998	add	x27,x27,x10
999	cmp	x1,x2
1000	stp	x24,x25,[x0,#4*8]
1001	stp	x26,x27,[x0,#6*8]
1002	b.ne	Loop
1003
1004	ldp	x19,x20,[x29,#16]
1005	add	sp,sp,#4*8
1006	ldp	x21,x22,[x29,#32]
1007	ldp	x23,x24,[x29,#48]
1008	ldp	x25,x26,[x29,#64]
1009	ldp	x27,x28,[x29,#80]
1010	ldp	x29,x30,[sp],#128
1011	ret
1012
1013
1014.align	6
1015
1016LK512:
1017.quad	0x428a2f98d728ae22,0x7137449123ef65cd
1018.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1019.quad	0x3956c25bf348b538,0x59f111f1b605d019
1020.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
1021.quad	0xd807aa98a3030242,0x12835b0145706fbe
1022.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1023.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
1024.quad	0x9bdc06a725c71235,0xc19bf174cf692694
1025.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
1026.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1027.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
1028.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1029.quad	0x983e5152ee66dfab,0xa831c66d2db43210
1030.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
1031.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
1032.quad	0x06ca6351e003826f,0x142929670a0e6e70
1033.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
1034.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1035.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
1036.quad	0x81c2c92e47edaee6,0x92722c851482353b
1037.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
1038.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
1039.quad	0xd192e819d6ef5218,0xd69906245565a910
1040.quad	0xf40e35855771202a,0x106aa07032bbd1b8
1041.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
1042.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1043.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1044.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1045.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
1046.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
1047.quad	0x90befffa23631e28,0xa4506cebde82bde9
1048.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
1049.quad	0xca273eceea26619c,0xd186b8c721c0c207
1050.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1051.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
1052.quad	0x113f9804bef90dae,0x1b710b35131c471b
1053.quad	0x28db77f523047d84,0x32caab7b40c72493
1054.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1055.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1056.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
1057.quad	0	// terminator
1058
1059#ifndef	__KERNEL__
1060.align	3
1061LOPENSSL_armcap_P:
1062# ifdef	__ILP32__
1063.long	_OPENSSL_armcap_P-.
1064# else
1065.quad	_OPENSSL_armcap_P-.
1066# endif
1067#endif
1068.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1069.align	2
1070.align	2
1071#ifndef	__KERNEL__
1072.comm	_OPENSSL_armcap_P,4,4
1073#endif
1074