• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#include <GFp/arm_arch.h>
13
14.text
15#if defined(__thumb2__)
16.syntax	unified
17.thumb
18#else
19.code	32
20#endif
21
22.byte	69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
23.align	2
24.align	6
25#ifdef __thumb2__
26.thumb_func	__ecp_nistz256_mul_by_2
27#endif
28.align	4
29__ecp_nistz256_mul_by_2:
30	ldr	r4,[r1,#0]
31	ldr	r5,[r1,#4]
32	ldr	r6,[r1,#8]
33	adds	r4,r4,r4		@ a[0:7]+=a[0:7], i.e. add with itself
34	ldr	r7,[r1,#12]
35	adcs	r5,r5,r5
36	ldr	r8,[r1,#16]
37	adcs	r6,r6,r6
38	ldr	r9,[r1,#20]
39	adcs	r7,r7,r7
40	ldr	r10,[r1,#24]
41	adcs	r8,r8,r8
42	ldr	r11,[r1,#28]
43	adcs	r9,r9,r9
44	adcs	r10,r10,r10
45	mov	r3,#0
46	adcs	r11,r11,r11
47	adc	r3,r3,#0
48
49	b	Lreduce_by_sub
50
51
52@ void	GFp_nistz256_add(BN_ULONG r0[8],const BN_ULONG r1[8],
53@					const BN_ULONG r2[8]);
54.globl	_GFp_nistz256_add
55.private_extern	_GFp_nistz256_add
56#ifdef __thumb2__
57.thumb_func	_GFp_nistz256_add
58#endif
59.align	4
60_GFp_nistz256_add:
61	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
62	bl	__ecp_nistz256_add
63#if __ARM_ARCH__>=5 || !defined(__thumb__)
64	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
65#else
66	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
67	bx	lr			@ interoperable with Thumb ISA:-)
68#endif
69
70
71#ifdef __thumb2__
72.thumb_func	__ecp_nistz256_add
73#endif
74.align	4
75__ecp_nistz256_add:
76	str	lr,[sp,#-4]!		@ push lr
77
78	ldr	r4,[r1,#0]
79	ldr	r5,[r1,#4]
80	ldr	r6,[r1,#8]
81	ldr	r7,[r1,#12]
82	ldr	r8,[r1,#16]
83	ldr	r3,[r2,#0]
84	ldr	r9,[r1,#20]
85	ldr	r12,[r2,#4]
86	ldr	r10,[r1,#24]
87	ldr	r14,[r2,#8]
88	ldr	r11,[r1,#28]
89	ldr	r1,[r2,#12]
90	adds	r4,r4,r3
91	ldr	r3,[r2,#16]
92	adcs	r5,r5,r12
93	ldr	r12,[r2,#20]
94	adcs	r6,r6,r14
95	ldr	r14,[r2,#24]
96	adcs	r7,r7,r1
97	ldr	r1,[r2,#28]
98	adcs	r8,r8,r3
99	adcs	r9,r9,r12
100	adcs	r10,r10,r14
101	mov	r3,#0
102	adcs	r11,r11,r1
103	adc	r3,r3,#0
104	ldr	lr,[sp],#4		@ pop lr
105
106Lreduce_by_sub:
107
108	@ if a+b >= modulus, subtract modulus.
109	@
110	@ But since comparison implies subtraction, we subtract
111	@ modulus and then add it back if subtraction borrowed.
112
113	subs	r4,r4,#-1
114	sbcs	r5,r5,#-1
115	sbcs	r6,r6,#-1
116	sbcs	r7,r7,#0
117	sbcs	r8,r8,#0
118	sbcs	r9,r9,#0
119	sbcs	r10,r10,#1
120	sbcs	r11,r11,#-1
121	sbc	r3,r3,#0
122
123	@ Note that because mod has special form, i.e. consists of
124	@ 0xffffffff, 1 and 0s, we can conditionally synthesize it by
125	@ using value of borrow as a whole or extracting single bit.
126	@ Follow r3 register...
127
128	adds	r4,r4,r3		@ add synthesized modulus
129	adcs	r5,r5,r3
130	str	r4,[r0,#0]
131	adcs	r6,r6,r3
132	str	r5,[r0,#4]
133	adcs	r7,r7,#0
134	str	r6,[r0,#8]
135	adcs	r8,r8,#0
136	str	r7,[r0,#12]
137	adcs	r9,r9,#0
138	str	r8,[r0,#16]
139	adcs	r10,r10,r3,lsr#31
140	str	r9,[r0,#20]
141	adcs	r11,r11,r3
142	str	r10,[r0,#24]
143	str	r11,[r0,#28]
144
145	mov	pc,lr
146
147
148#ifdef __thumb2__
149.thumb_func	__ecp_nistz256_mul_by_3
150#endif
151.align	4
152__ecp_nistz256_mul_by_3:
153	str	lr,[sp,#-4]!		@ push lr
154
155	@ As multiplication by 3 is performed as 2*n+n, below are inline
156	@ copies of __ecp_nistz256_mul_by_2 and __ecp_nistz256_add, see
157	@ corresponding subroutines for details.
158
159	ldr	r4,[r1,#0]
160	ldr	r5,[r1,#4]
161	ldr	r6,[r1,#8]
162	adds	r4,r4,r4		@ a[0:7]+=a[0:7]
163	ldr	r7,[r1,#12]
164	adcs	r5,r5,r5
165	ldr	r8,[r1,#16]
166	adcs	r6,r6,r6
167	ldr	r9,[r1,#20]
168	adcs	r7,r7,r7
169	ldr	r10,[r1,#24]
170	adcs	r8,r8,r8
171	ldr	r11,[r1,#28]
172	adcs	r9,r9,r9
173	adcs	r10,r10,r10
174	mov	r3,#0
175	adcs	r11,r11,r11
176	adc	r3,r3,#0
177
178	subs	r4,r4,#-1		@ Lreduce_by_sub but without stores
179	sbcs	r5,r5,#-1
180	sbcs	r6,r6,#-1
181	sbcs	r7,r7,#0
182	sbcs	r8,r8,#0
183	sbcs	r9,r9,#0
184	sbcs	r10,r10,#1
185	sbcs	r11,r11,#-1
186	sbc	r3,r3,#0
187
188	adds	r4,r4,r3		@ add synthesized modulus
189	adcs	r5,r5,r3
190	adcs	r6,r6,r3
191	adcs	r7,r7,#0
192	adcs	r8,r8,#0
193	ldr	r2,[r1,#0]
194	adcs	r9,r9,#0
195	ldr	r12,[r1,#4]
196	adcs	r10,r10,r3,lsr#31
197	ldr	r14,[r1,#8]
198	adc	r11,r11,r3
199
200	ldr	r3,[r1,#12]
201	adds	r4,r4,r2		@ 2*a[0:7]+=a[0:7]
202	ldr	r2,[r1,#16]
203	adcs	r5,r5,r12
204	ldr	r12,[r1,#20]
205	adcs	r6,r6,r14
206	ldr	r14,[r1,#24]
207	adcs	r7,r7,r3
208	ldr	r1,[r1,#28]
209	adcs	r8,r8,r2
210	adcs	r9,r9,r12
211	adcs	r10,r10,r14
212	mov	r3,#0
213	adcs	r11,r11,r1
214	adc	r3,r3,#0
215	ldr	lr,[sp],#4		@ pop lr
216
217	b	Lreduce_by_sub
218
219
220#ifdef __thumb2__
221.thumb_func	__ecp_nistz256_div_by_2
222#endif
223.align	4
224__ecp_nistz256_div_by_2:
225	@ ret = (a is odd ? a+mod : a) >> 1
226
227	ldr	r4,[r1,#0]
228	ldr	r5,[r1,#4]
229	ldr	r6,[r1,#8]
230	mov	r3,r4,lsl#31		@ place least significant bit to most
231					@ significant position, now arithmetic
232					@ right shift by 31 will produce -1 or
233					@ 0, while logical right shift 1 or 0,
234					@ this is how modulus is conditionally
235					@ synthesized in this case...
236	ldr	r7,[r1,#12]
237	adds	r4,r4,r3,asr#31
238	ldr	r8,[r1,#16]
239	adcs	r5,r5,r3,asr#31
240	ldr	r9,[r1,#20]
241	adcs	r6,r6,r3,asr#31
242	ldr	r10,[r1,#24]
243	adcs	r7,r7,#0
244	ldr	r11,[r1,#28]
245	adcs	r8,r8,#0
246	mov	r4,r4,lsr#1		@ a[0:7]>>=1, we can start early
247					@ because it doesn't affect flags
248	adcs	r9,r9,#0
249	orr	r4,r4,r5,lsl#31
250	adcs	r10,r10,r3,lsr#31
251	mov	r2,#0
252	adcs	r11,r11,r3,asr#31
253	mov	r5,r5,lsr#1
254	adc	r2,r2,#0	@ top-most carry bit from addition
255
256	orr	r5,r5,r6,lsl#31
257	mov	r6,r6,lsr#1
258	str	r4,[r0,#0]
259	orr	r6,r6,r7,lsl#31
260	mov	r7,r7,lsr#1
261	str	r5,[r0,#4]
262	orr	r7,r7,r8,lsl#31
263	mov	r8,r8,lsr#1
264	str	r6,[r0,#8]
265	orr	r8,r8,r9,lsl#31
266	mov	r9,r9,lsr#1
267	str	r7,[r0,#12]
268	orr	r9,r9,r10,lsl#31
269	mov	r10,r10,lsr#1
270	str	r8,[r0,#16]
271	orr	r10,r10,r11,lsl#31
272	mov	r11,r11,lsr#1
273	str	r9,[r0,#20]
274	orr	r11,r11,r2,lsl#31	@ don't forget the top-most carry bit
275	str	r10,[r0,#24]
276	str	r11,[r0,#28]
277
278	mov	pc,lr
279
280
281#ifdef __thumb2__
282.thumb_func	__ecp_nistz256_sub
283#endif
284.align	4
285__ecp_nistz256_sub:
286	str	lr,[sp,#-4]!		@ push lr
287
288	ldr	r4,[r1,#0]
289	ldr	r5,[r1,#4]
290	ldr	r6,[r1,#8]
291	ldr	r7,[r1,#12]
292	ldr	r8,[r1,#16]
293	ldr	r3,[r2,#0]
294	ldr	r9,[r1,#20]
295	ldr	r12,[r2,#4]
296	ldr	r10,[r1,#24]
297	ldr	r14,[r2,#8]
298	ldr	r11,[r1,#28]
299	ldr	r1,[r2,#12]
300	subs	r4,r4,r3
301	ldr	r3,[r2,#16]
302	sbcs	r5,r5,r12
303	ldr	r12,[r2,#20]
304	sbcs	r6,r6,r14
305	ldr	r14,[r2,#24]
306	sbcs	r7,r7,r1
307	ldr	r1,[r2,#28]
308	sbcs	r8,r8,r3
309	sbcs	r9,r9,r12
310	sbcs	r10,r10,r14
311	sbcs	r11,r11,r1
312	sbc	r3,r3,r3		@ broadcast borrow bit
313	ldr	lr,[sp],#4		@ pop lr
314
315Lreduce_by_add:
316
317	@ if a-b borrows, add modulus.
318	@
319	@ Note that because mod has special form, i.e. consists of
320	@ 0xffffffff, 1 and 0s, we can conditionally synthesize it by
321	@ broadcasting borrow bit to a register, r3, and using it as
322	@ a whole or extracting single bit.
323
324	adds	r4,r4,r3		@ add synthesized modulus
325	adcs	r5,r5,r3
326	str	r4,[r0,#0]
327	adcs	r6,r6,r3
328	str	r5,[r0,#4]
329	adcs	r7,r7,#0
330	str	r6,[r0,#8]
331	adcs	r8,r8,#0
332	str	r7,[r0,#12]
333	adcs	r9,r9,#0
334	str	r8,[r0,#16]
335	adcs	r10,r10,r3,lsr#31
336	str	r9,[r0,#20]
337	adcs	r11,r11,r3
338	str	r10,[r0,#24]
339	str	r11,[r0,#28]
340
341	mov	pc,lr
342
343
344@ void	GFp_nistz256_neg(BN_ULONG r0[8],const BN_ULONG r1[8]);
345.globl	_GFp_nistz256_neg
346.private_extern	_GFp_nistz256_neg
347#ifdef __thumb2__
348.thumb_func	_GFp_nistz256_neg
349#endif
350.align	4
351_GFp_nistz256_neg:
352	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
353	bl	__ecp_nistz256_neg
354#if __ARM_ARCH__>=5 || !defined(__thumb__)
355	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
356#else
357	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
358	bx	lr			@ interoperable with Thumb ISA:-)
359#endif
360
361
362#ifdef __thumb2__
363.thumb_func	__ecp_nistz256_neg
364#endif
365.align	4
366__ecp_nistz256_neg:
367	ldr	r4,[r1,#0]
368	eor	r3,r3,r3
369	ldr	r5,[r1,#4]
370	ldr	r6,[r1,#8]
371	subs	r4,r3,r4
372	ldr	r7,[r1,#12]
373	sbcs	r5,r3,r5
374	ldr	r8,[r1,#16]
375	sbcs	r6,r3,r6
376	ldr	r9,[r1,#20]
377	sbcs	r7,r3,r7
378	ldr	r10,[r1,#24]
379	sbcs	r8,r3,r8
380	ldr	r11,[r1,#28]
381	sbcs	r9,r3,r9
382	sbcs	r10,r3,r10
383	sbcs	r11,r3,r11
384	sbc	r3,r3,r3
385
386	b	Lreduce_by_add
387
388@ void	GFp_nistz256_mul_mont(BN_ULONG r0[8],const BN_ULONG r1[8],
389@					     const BN_ULONG r2[8]);
390.globl	_GFp_nistz256_mul_mont
391.private_extern	_GFp_nistz256_mul_mont
392#ifdef __thumb2__
393.thumb_func	_GFp_nistz256_mul_mont
394#endif
395.align	4
396_GFp_nistz256_mul_mont:
397	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
398	bl	__ecp_nistz256_mul_mont
399#if __ARM_ARCH__>=5 || !defined(__thumb__)
400	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
401#else
402	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
403	bx	lr			@ interoperable with Thumb ISA:-)
404#endif
405
406
407#ifdef __thumb2__
408.thumb_func	__ecp_nistz256_mul_mont
409#endif
410.align	4
411__ecp_nistz256_mul_mont:
412	stmdb	sp!,{r0,r1,r2,lr}			@ make a copy of arguments too
413
414	ldr	r2,[r2,#0]			@ b[0]
415	ldmia	r1,{r4,r5,r6,r7,r8,r9,r10,r11}
416
417	umull	r3,r14,r4,r2		@ r[0]=a[0]*b[0]
418	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}		@ copy a[0-7] to stack, so
419						@ that it can be addressed
420						@ without spending register
421						@ on address
422	umull	r4,r0,r5,r2		@ r[1]=a[1]*b[0]
423	umull	r5,r1,r6,r2
424	adds	r4,r4,r14		@ accumulate high part of mult
425	umull	r6,r12,r7,r2
426	adcs	r5,r5,r0
427	umull	r7,r14,r8,r2
428	adcs	r6,r6,r1
429	umull	r8,r0,r9,r2
430	adcs	r7,r7,r12
431	umull	r9,r1,r10,r2
432	adcs	r8,r8,r14
433	umull	r10,r12,r11,r2
434	adcs	r9,r9,r0
435	adcs	r10,r10,r1
436	eor	r14,r14,r14			@ first overflow bit is zero
437	adc	r11,r12,#0
438	@ multiplication-less reduction 1
439	adds	r6,r6,r3		@ r[3]+=r[0]
440	ldr	r2,[sp,#40]			@ restore b_ptr
441	adcs	r7,r7,#0		@ r[4]+=0
442	adcs	r8,r8,#0		@ r[5]+=0
443	adcs	r9,r9,r3		@ r[6]+=r[0]
444	ldr	r1,[sp,#0]			@ load a[0]
445	adcs	r10,r10,#0		@ r[7]+=0
446	ldr	r2,[r2,#4*1]			@ load b[i]
447	adcs	r11,r11,r3		@ r[8]+=r[0]
448	eor	r0,r0,r0
449	adc	r14,r14,#0			@ overflow bit
450	subs	r10,r10,r3		@ r[7]-=r[0]
451	ldr	r12,[sp,#4]			@ a[1]
452	sbcs	r11,r11,#0		@ r[8]-=0
453	umlal	r4,r0,r1,r2		@ "r[0]"+=a[0]*b[i]
454	eor	r1,r1,r1
455	sbc	r3,r14,#0			@ overflow bit, keep in mind
456						@ that netto result is
457						@ addition of a value which
458						@ makes underflow impossible
459
460	ldr	r14,[sp,#8]			@ a[2]
461	umlal	r5,r1,r12,r2		@ "r[1]"+=a[1]*b[i]
462	str	r3,[sp,#36]		@ temporarily offload overflow
463	eor	r12,r12,r12
464	ldr	r3,[sp,#12]			@ a[3], r3 is alias r3
465	umlal	r6,r12,r14,r2		@ "r[2]"+=a[2]*b[i]
466	eor	r14,r14,r14
467	adds	r5,r5,r0		@ accumulate high part of mult
468	ldr	r0,[sp,#16]			@ a[4]
469	umlal	r7,r14,r3,r2		@ "r[3]"+=a[3]*b[i]
470	eor	r3,r3,r3
471	adcs	r6,r6,r1
472	ldr	r1,[sp,#20]			@ a[5]
473	umlal	r8,r3,r0,r2		@ "r[4]"+=a[4]*b[i]
474	eor	r0,r0,r0
475	adcs	r7,r7,r12
476	ldr	r12,[sp,#24]			@ a[6]
477	umlal	r9,r0,r1,r2		@ "r[5]"+=a[5]*b[i]
478	eor	r1,r1,r1
479	adcs	r8,r8,r14
480	ldr	r14,[sp,#28]			@ a[7]
481	umlal	r10,r1,r12,r2		@ "r[6]"+=a[6]*b[i]
482	eor	r12,r12,r12
483	adcs	r9,r9,r3
484	ldr	r3,[sp,#36]		@ restore overflow bit
485	umlal	r11,r12,r14,r2		@ "r[7]"+=a[7]*b[i]
486	eor	r14,r14,r14
487	adcs	r10,r10,r0
488	adcs	r11,r11,r1
489	adcs	r3,r3,r12
490	adc	r14,r14,#0			@ new overflow bit
491	@ multiplication-less reduction 2
492	adds	r7,r7,r4		@ r[3]+=r[0]
493	ldr	r2,[sp,#40]			@ restore b_ptr
494	adcs	r8,r8,#0		@ r[4]+=0
495	adcs	r9,r9,#0		@ r[5]+=0
496	adcs	r10,r10,r4		@ r[6]+=r[0]
497	ldr	r1,[sp,#0]			@ load a[0]
498	adcs	r11,r11,#0		@ r[7]+=0
499	ldr	r2,[r2,#4*2]			@ load b[i]
500	adcs	r3,r3,r4		@ r[8]+=r[0]
501	eor	r0,r0,r0
502	adc	r14,r14,#0			@ overflow bit
503	subs	r11,r11,r4		@ r[7]-=r[0]
504	ldr	r12,[sp,#4]			@ a[1]
505	sbcs	r3,r3,#0		@ r[8]-=0
506	umlal	r5,r0,r1,r2		@ "r[0]"+=a[0]*b[i]
507	eor	r1,r1,r1
508	sbc	r4,r14,#0			@ overflow bit, keep in mind
509						@ that netto result is
510						@ addition of a value which
511						@ makes underflow impossible
512
513	ldr	r14,[sp,#8]			@ a[2]
514	umlal	r6,r1,r12,r2		@ "r[1]"+=a[1]*b[i]
515	str	r4,[sp,#36]		@ temporarily offload overflow
516	eor	r12,r12,r12
517	ldr	r4,[sp,#12]			@ a[3], r4 is alias r4
518	umlal	r7,r12,r14,r2		@ "r[2]"+=a[2]*b[i]
519	eor	r14,r14,r14
520	adds	r6,r6,r0		@ accumulate high part of mult
521	ldr	r0,[sp,#16]			@ a[4]
522	umlal	r8,r14,r4,r2		@ "r[3]"+=a[3]*b[i]
523	eor	r4,r4,r4
524	adcs	r7,r7,r1
525	ldr	r1,[sp,#20]			@ a[5]
526	umlal	r9,r4,r0,r2		@ "r[4]"+=a[4]*b[i]
527	eor	r0,r0,r0
528	adcs	r8,r8,r12
529	ldr	r12,[sp,#24]			@ a[6]
530	umlal	r10,r0,r1,r2		@ "r[5]"+=a[5]*b[i]
531	eor	r1,r1,r1
532	adcs	r9,r9,r14
533	ldr	r14,[sp,#28]			@ a[7]
534	umlal	r11,r1,r12,r2		@ "r[6]"+=a[6]*b[i]
535	eor	r12,r12,r12
536	adcs	r10,r10,r4
537	ldr	r4,[sp,#36]		@ restore overflow bit
538	umlal	r3,r12,r14,r2		@ "r[7]"+=a[7]*b[i]
539	eor	r14,r14,r14
540	adcs	r11,r11,r0
541	adcs	r3,r3,r1
542	adcs	r4,r4,r12
543	adc	r14,r14,#0			@ new overflow bit
544	@ multiplication-less reduction 3
545	adds	r8,r8,r5		@ r[3]+=r[0]
546	ldr	r2,[sp,#40]			@ restore b_ptr
547	adcs	r9,r9,#0		@ r[4]+=0
548	adcs	r10,r10,#0		@ r[5]+=0
549	adcs	r11,r11,r5		@ r[6]+=r[0]
550	ldr	r1,[sp,#0]			@ load a[0]
551	adcs	r3,r3,#0		@ r[7]+=0
552	ldr	r2,[r2,#4*3]			@ load b[i]
553	adcs	r4,r4,r5		@ r[8]+=r[0]
554	eor	r0,r0,r0
555	adc	r14,r14,#0			@ overflow bit
556	subs	r3,r3,r5		@ r[7]-=r[0]
557	ldr	r12,[sp,#4]			@ a[1]
558	sbcs	r4,r4,#0		@ r[8]-=0
559	umlal	r6,r0,r1,r2		@ "r[0]"+=a[0]*b[i]
560	eor	r1,r1,r1
561	sbc	r5,r14,#0			@ overflow bit, keep in mind
562						@ that netto result is
563						@ addition of a value which
564						@ makes underflow impossible
565
566	ldr	r14,[sp,#8]			@ a[2]
567	umlal	r7,r1,r12,r2		@ "r[1]"+=a[1]*b[i]
568	str	r5,[sp,#36]		@ temporarily offload overflow
569	eor	r12,r12,r12
570	ldr	r5,[sp,#12]			@ a[3], r5 is alias r5
571	umlal	r8,r12,r14,r2		@ "r[2]"+=a[2]*b[i]
572	eor	r14,r14,r14
573	adds	r7,r7,r0		@ accumulate high part of mult
574	ldr	r0,[sp,#16]			@ a[4]
575	umlal	r9,r14,r5,r2		@ "r[3]"+=a[3]*b[i]
576	eor	r5,r5,r5
577	adcs	r8,r8,r1
578	ldr	r1,[sp,#20]			@ a[5]
579	umlal	r10,r5,r0,r2		@ "r[4]"+=a[4]*b[i]
580	eor	r0,r0,r0
581	adcs	r9,r9,r12
582	ldr	r12,[sp,#24]			@ a[6]
583	umlal	r11,r0,r1,r2		@ "r[5]"+=a[5]*b[i]
584	eor	r1,r1,r1
585	adcs	r10,r10,r14
586	ldr	r14,[sp,#28]			@ a[7]
587	umlal	r3,r1,r12,r2		@ "r[6]"+=a[6]*b[i]
588	eor	r12,r12,r12
589	adcs	r11,r11,r5
590	ldr	r5,[sp,#36]		@ restore overflow bit
591	umlal	r4,r12,r14,r2		@ "r[7]"+=a[7]*b[i]
592	eor	r14,r14,r14
593	adcs	r3,r3,r0
594	adcs	r4,r4,r1
595	adcs	r5,r5,r12
596	adc	r14,r14,#0			@ new overflow bit
597	@ multiplication-less reduction 4
598	adds	r9,r9,r6		@ r[3]+=r[0]
599	ldr	r2,[sp,#40]			@ restore b_ptr
600	adcs	r10,r10,#0		@ r[4]+=0
601	adcs	r11,r11,#0		@ r[5]+=0
602	adcs	r3,r3,r6		@ r[6]+=r[0]
603	ldr	r1,[sp,#0]			@ load a[0]
604	adcs	r4,r4,#0		@ r[7]+=0
605	ldr	r2,[r2,#4*4]			@ load b[i]
606	adcs	r5,r5,r6		@ r[8]+=r[0]
607	eor	r0,r0,r0
608	adc	r14,r14,#0			@ overflow bit
609	subs	r4,r4,r6		@ r[7]-=r[0]
610	ldr	r12,[sp,#4]			@ a[1]
611	sbcs	r5,r5,#0		@ r[8]-=0
612	umlal	r7,r0,r1,r2		@ "r[0]"+=a[0]*b[i]
613	eor	r1,r1,r1
614	sbc	r6,r14,#0			@ overflow bit, keep in mind
615						@ that netto result is
616						@ addition of a value which
617						@ makes underflow impossible
618
619	ldr	r14,[sp,#8]			@ a[2]
620	umlal	r8,r1,r12,r2		@ "r[1]"+=a[1]*b[i]
621	str	r6,[sp,#36]		@ temporarily offload overflow
622	eor	r12,r12,r12
623	ldr	r6,[sp,#12]			@ a[3], r6 is alias r6
624	umlal	r9,r12,r14,r2		@ "r[2]"+=a[2]*b[i]
625	eor	r14,r14,r14
626	adds	r8,r8,r0		@ accumulate high part of mult
627	ldr	r0,[sp,#16]			@ a[4]
628	umlal	r10,r14,r6,r2		@ "r[3]"+=a[3]*b[i]
629	eor	r6,r6,r6
630	adcs	r9,r9,r1
631	ldr	r1,[sp,#20]			@ a[5]
632	umlal	r11,r6,r0,r2		@ "r[4]"+=a[4]*b[i]
633	eor	r0,r0,r0
634	adcs	r10,r10,r12
635	ldr	r12,[sp,#24]			@ a[6]
636	umlal	r3,r0,r1,r2		@ "r[5]"+=a[5]*b[i]
637	eor	r1,r1,r1
638	adcs	r11,r11,r14
639	ldr	r14,[sp,#28]			@ a[7]
640	umlal	r4,r1,r12,r2		@ "r[6]"+=a[6]*b[i]
641	eor	r12,r12,r12
642	adcs	r3,r3,r6
643	ldr	r6,[sp,#36]		@ restore overflow bit
644	umlal	r5,r12,r14,r2		@ "r[7]"+=a[7]*b[i]
645	eor	r14,r14,r14
646	adcs	r4,r4,r0
647	adcs	r5,r5,r1
648	adcs	r6,r6,r12
649	adc	r14,r14,#0			@ new overflow bit
650	@ multiplication-less reduction 5
651	adds	r10,r10,r7		@ r[3]+=r[0]
652	ldr	r2,[sp,#40]			@ restore b_ptr
653	adcs	r11,r11,#0		@ r[4]+=0
654	adcs	r3,r3,#0		@ r[5]+=0
655	adcs	r4,r4,r7		@ r[6]+=r[0]
656	ldr	r1,[sp,#0]			@ load a[0]
657	adcs	r5,r5,#0		@ r[7]+=0
658	ldr	r2,[r2,#4*5]			@ load b[i]
659	adcs	r6,r6,r7		@ r[8]+=r[0]
660	eor	r0,r0,r0
661	adc	r14,r14,#0			@ overflow bit
662	subs	r5,r5,r7		@ r[7]-=r[0]
663	ldr	r12,[sp,#4]			@ a[1]
664	sbcs	r6,r6,#0		@ r[8]-=0
665	umlal	r8,r0,r1,r2		@ "r[0]"+=a[0]*b[i]
666	eor	r1,r1,r1
667	sbc	r7,r14,#0			@ overflow bit, keep in mind
668						@ that netto result is
669						@ addition of a value which
670						@ makes underflow impossible
671
672	ldr	r14,[sp,#8]			@ a[2]
673	umlal	r9,r1,r12,r2		@ "r[1]"+=a[1]*b[i]
674	str	r7,[sp,#36]		@ temporarily offload overflow
675	eor	r12,r12,r12
676	ldr	r7,[sp,#12]			@ a[3], r7 is alias r7
677	umlal	r10,r12,r14,r2		@ "r[2]"+=a[2]*b[i]
678	eor	r14,r14,r14
679	adds	r9,r9,r0		@ accumulate high part of mult
680	ldr	r0,[sp,#16]			@ a[4]
681	umlal	r11,r14,r7,r2		@ "r[3]"+=a[3]*b[i]
682	eor	r7,r7,r7
683	adcs	r10,r10,r1
684	ldr	r1,[sp,#20]			@ a[5]
685	umlal	r3,r7,r0,r2		@ "r[4]"+=a[4]*b[i]
686	eor	r0,r0,r0
687	adcs	r11,r11,r12
688	ldr	r12,[sp,#24]			@ a[6]
689	umlal	r4,r0,r1,r2		@ "r[5]"+=a[5]*b[i]
690	eor	r1,r1,r1
691	adcs	r3,r3,r14
692	ldr	r14,[sp,#28]			@ a[7]
693	umlal	r5,r1,r12,r2		@ "r[6]"+=a[6]*b[i]
694	eor	r12,r12,r12
695	adcs	r4,r4,r7
696	ldr	r7,[sp,#36]		@ restore overflow bit
697	umlal	r6,r12,r14,r2		@ "r[7]"+=a[7]*b[i]
698	eor	r14,r14,r14
699	adcs	r5,r5,r0
700	adcs	r6,r6,r1
701	adcs	r7,r7,r12
702	adc	r14,r14,#0			@ new overflow bit
703	@ multiplication-less reduction 6
704	adds	r11,r11,r8		@ r[3]+=r[0]
705	ldr	r2,[sp,#40]			@ restore b_ptr
706	adcs	r3,r3,#0		@ r[4]+=0
707	adcs	r4,r4,#0		@ r[5]+=0
708	adcs	r5,r5,r8		@ r[6]+=r[0]
709	ldr	r1,[sp,#0]			@ load a[0]
710	adcs	r6,r6,#0		@ r[7]+=0
711	ldr	r2,[r2,#4*6]			@ load b[i]
712	adcs	r7,r7,r8		@ r[8]+=r[0]
713	eor	r0,r0,r0
714	adc	r14,r14,#0			@ overflow bit
715	subs	r6,r6,r8		@ r[7]-=r[0]
716	ldr	r12,[sp,#4]			@ a[1]
717	sbcs	r7,r7,#0		@ r[8]-=0
718	umlal	r9,r0,r1,r2		@ "r[0]"+=a[0]*b[i]
719	eor	r1,r1,r1
720	sbc	r8,r14,#0			@ overflow bit, keep in mind
721						@ that netto result is
722						@ addition of a value which
723						@ makes underflow impossible
724
725	ldr	r14,[sp,#8]			@ a[2]
726	umlal	r10,r1,r12,r2		@ "r[1]"+=a[1]*b[i]
727	str	r8,[sp,#36]		@ temporarily offload overflow
728	eor	r12,r12,r12
729	ldr	r8,[sp,#12]			@ a[3], r8 is alias r8
730	umlal	r11,r12,r14,r2		@ "r[2]"+=a[2]*b[i]
731	eor	r14,r14,r14
732	adds	r10,r10,r0		@ accumulate high part of mult
733	ldr	r0,[sp,#16]			@ a[4]
734	umlal	r3,r14,r8,r2		@ "r[3]"+=a[3]*b[i]
735	eor	r8,r8,r8
736	adcs	r11,r11,r1
737	ldr	r1,[sp,#20]			@ a[5]
738	umlal	r4,r8,r0,r2		@ "r[4]"+=a[4]*b[i]
739	eor	r0,r0,r0
740	adcs	r3,r3,r12
741	ldr	r12,[sp,#24]			@ a[6]
742	umlal	r5,r0,r1,r2		@ "r[5]"+=a[5]*b[i]
743	eor	r1,r1,r1
744	adcs	r4,r4,r14
745	ldr	r14,[sp,#28]			@ a[7]
746	umlal	r6,r1,r12,r2		@ "r[6]"+=a[6]*b[i]
747	eor	r12,r12,r12
748	adcs	r5,r5,r8
749	ldr	r8,[sp,#36]		@ restore overflow bit
750	umlal	r7,r12,r14,r2		@ "r[7]"+=a[7]*b[i]
751	eor	r14,r14,r14
752	adcs	r6,r6,r0
753	adcs	r7,r7,r1
754	adcs	r8,r8,r12
755	adc	r14,r14,#0			@ new overflow bit
756	@ multiplication-less reduction 7
757	adds	r3,r3,r9		@ r[3]+=r[0]
758	ldr	r2,[sp,#40]			@ restore b_ptr
759	adcs	r4,r4,#0		@ r[4]+=0
760	adcs	r5,r5,#0		@ r[5]+=0
761	adcs	r6,r6,r9		@ r[6]+=r[0]
762	ldr	r1,[sp,#0]			@ load a[0]
763	adcs	r7,r7,#0		@ r[7]+=0
764	ldr	r2,[r2,#4*7]			@ load b[i]
765	adcs	r8,r8,r9		@ r[8]+=r[0]
766	eor	r0,r0,r0
767	adc	r14,r14,#0			@ overflow bit
768	subs	r7,r7,r9		@ r[7]-=r[0]
769	ldr	r12,[sp,#4]			@ a[1]
770	sbcs	r8,r8,#0		@ r[8]-=0
771	umlal	r10,r0,r1,r2		@ "r[0]"+=a[0]*b[i]
772	eor	r1,r1,r1
773	sbc	r9,r14,#0			@ overflow bit, keep in mind
774						@ that netto result is
775						@ addition of a value which
776						@ makes underflow impossible
777
778	ldr	r14,[sp,#8]			@ a[2]
779	umlal	r11,r1,r12,r2		@ "r[1]"+=a[1]*b[i]
780	str	r9,[sp,#36]		@ temporarily offload overflow
781	eor	r12,r12,r12
782	ldr	r9,[sp,#12]			@ a[3], r9 is alias r9
783	umlal	r3,r12,r14,r2		@ "r[2]"+=a[2]*b[i]
784	eor	r14,r14,r14
785	adds	r11,r11,r0		@ accumulate high part of mult
786	ldr	r0,[sp,#16]			@ a[4]
787	umlal	r4,r14,r9,r2		@ "r[3]"+=a[3]*b[i]
788	eor	r9,r9,r9
789	adcs	r3,r3,r1
790	ldr	r1,[sp,#20]			@ a[5]
791	umlal	r5,r9,r0,r2		@ "r[4]"+=a[4]*b[i]
792	eor	r0,r0,r0
793	adcs	r4,r4,r12
794	ldr	r12,[sp,#24]			@ a[6]
795	umlal	r6,r0,r1,r2		@ "r[5]"+=a[5]*b[i]
796	eor	r1,r1,r1
797	adcs	r5,r5,r14
798	ldr	r14,[sp,#28]			@ a[7]
799	umlal	r7,r1,r12,r2		@ "r[6]"+=a[6]*b[i]
800	eor	r12,r12,r12
801	adcs	r6,r6,r9
802	ldr	r9,[sp,#36]		@ restore overflow bit
803	umlal	r8,r12,r14,r2		@ "r[7]"+=a[7]*b[i]
804	eor	r14,r14,r14
805	adcs	r7,r7,r0
806	adcs	r8,r8,r1
807	adcs	r9,r9,r12
808	adc	r14,r14,#0			@ new overflow bit
809	@ last multiplication-less reduction
810	adds	r4,r4,r10
811	ldr	r0,[sp,#32]			@ restore r_ptr
812	adcs	r5,r5,#0
813	adcs	r6,r6,#0
814	adcs	r7,r7,r10
815	adcs	r8,r8,#0
816	adcs	r9,r9,r10
817	adc	r14,r14,#0
818	subs	r8,r8,r10
819	sbcs	r9,r9,#0
820	sbc	r10,r14,#0			@ overflow bit
821
822	@ Final step is "if result > mod, subtract mod", but we do it
823	@ "other way around", namely subtract modulus from result
824	@ and if it borrowed, add modulus back.
825
826	adds	r11,r11,#1		@ subs	r11,r11,#-1
827	adcs	r3,r3,#0		@ sbcs	r3,r3,#-1
828	adcs	r4,r4,#0		@ sbcs	r4,r4,#-1
829	sbcs	r5,r5,#0
830	sbcs	r6,r6,#0
831	sbcs	r7,r7,#0
832	sbcs	r8,r8,#1
833	adcs	r9,r9,#0		@ sbcs	r9,r9,#-1
834	ldr	lr,[sp,#44]			@ restore lr
835	sbc	r10,r10,#0		@ broadcast borrow bit
836	add	sp,sp,#48
837
838	@ Note that because mod has special form, i.e. consists of
839	@ 0xffffffff, 1 and 0s, we can conditionally synthesize it by
840	@ broadcasting borrow bit to a register, r10, and using it as
841	@ a whole or extracting single bit.
842
843	adds	r11,r11,r10		@ add modulus or zero
844	adcs	r3,r3,r10
845	str	r11,[r0,#0]
846	adcs	r4,r4,r10
847	str	r3,[r0,#4]
848	adcs	r5,r5,#0
849	str	r4,[r0,#8]
850	adcs	r6,r6,#0
851	str	r5,[r0,#12]
852	adcs	r7,r7,#0
853	str	r6,[r0,#16]
854	adcs	r8,r8,r10,lsr#31
855	str	r7,[r0,#20]
856	adc	r9,r9,r10
857	str	r8,[r0,#24]
858	str	r9,[r0,#28]
859
860	mov	pc,lr
861
862#ifdef __thumb2__
863.thumb_func	__ecp_nistz256_sub_from
864#endif
865.align	5
866__ecp_nistz256_sub_from:
867	str	lr,[sp,#-4]!		@ push lr
868
869	ldr	r10,[r2,#0]
870	ldr	r12,[r2,#4]
871	ldr	r14,[r2,#8]
872	ldr	r1,[r2,#12]
873	subs	r11,r11,r10
874	ldr	r10,[r2,#16]
875	sbcs	r3,r3,r12
876	ldr	r12,[r2,#20]
877	sbcs	r4,r4,r14
878	ldr	r14,[r2,#24]
879	sbcs	r5,r5,r1
880	ldr	r1,[r2,#28]
881	sbcs	r6,r6,r10
882	sbcs	r7,r7,r12
883	sbcs	r8,r8,r14
884	sbcs	r9,r9,r1
885	sbc	r2,r2,r2		@ broadcast borrow bit
886	ldr	lr,[sp],#4		@ pop lr
887
888	adds	r11,r11,r2		@ add synthesized modulus
889	adcs	r3,r3,r2
890	str	r11,[r0,#0]
891	adcs	r4,r4,r2
892	str	r3,[r0,#4]
893	adcs	r5,r5,#0
894	str	r4,[r0,#8]
895	adcs	r6,r6,#0
896	str	r5,[r0,#12]
897	adcs	r7,r7,#0
898	str	r6,[r0,#16]
899	adcs	r8,r8,r2,lsr#31
900	str	r7,[r0,#20]
901	adcs	r9,r9,r2
902	str	r8,[r0,#24]
903	str	r9,[r0,#28]
904
905	mov	pc,lr
906
907
908#ifdef __thumb2__
909.thumb_func	__ecp_nistz256_sub_morf
910#endif
911.align	5
912__ecp_nistz256_sub_morf:
913	str	lr,[sp,#-4]!		@ push lr
914
915	ldr	r10,[r2,#0]
916	ldr	r12,[r2,#4]
917	ldr	r14,[r2,#8]
918	ldr	r1,[r2,#12]
919	subs	r11,r10,r11
920	ldr	r10,[r2,#16]
921	sbcs	r3,r12,r3
922	ldr	r12,[r2,#20]
923	sbcs	r4,r14,r4
924	ldr	r14,[r2,#24]
925	sbcs	r5,r1,r5
926	ldr	r1,[r2,#28]
927	sbcs	r6,r10,r6
928	sbcs	r7,r12,r7
929	sbcs	r8,r14,r8
930	sbcs	r9,r1,r9
931	sbc	r2,r2,r2		@ broadcast borrow bit
932	ldr	lr,[sp],#4		@ pop lr
933
934	adds	r11,r11,r2		@ add synthesized modulus
935	adcs	r3,r3,r2
936	str	r11,[r0,#0]
937	adcs	r4,r4,r2
938	str	r3,[r0,#4]
939	adcs	r5,r5,#0
940	str	r4,[r0,#8]
941	adcs	r6,r6,#0
942	str	r5,[r0,#12]
943	adcs	r7,r7,#0
944	str	r6,[r0,#16]
945	adcs	r8,r8,r2,lsr#31
946	str	r7,[r0,#20]
947	adcs	r9,r9,r2
948	str	r8,[r0,#24]
949	str	r9,[r0,#28]
950
951	mov	pc,lr
952
953
954#ifdef __thumb2__
955.thumb_func	__ecp_nistz256_add_self
956#endif
957.align	4
958__ecp_nistz256_add_self:
959	adds	r11,r11,r11		@ a[0:7]+=a[0:7]
960	adcs	r3,r3,r3
961	adcs	r4,r4,r4
962	adcs	r5,r5,r5
963	adcs	r6,r6,r6
964	adcs	r7,r7,r7
965	adcs	r8,r8,r8
966	mov	r2,#0
967	adcs	r9,r9,r9
968	adc	r2,r2,#0
969
970	@ if a+b >= modulus, subtract modulus.
971	@
972	@ But since comparison implies subtraction, we subtract
973	@ modulus and then add it back if subtraction borrowed.
974
975	subs	r11,r11,#-1
976	sbcs	r3,r3,#-1
977	sbcs	r4,r4,#-1
978	sbcs	r5,r5,#0
979	sbcs	r6,r6,#0
980	sbcs	r7,r7,#0
981	sbcs	r8,r8,#1
982	sbcs	r9,r9,#-1
983	sbc	r2,r2,#0
984
985	@ Note that because mod has special form, i.e. consists of
986	@ 0xffffffff, 1 and 0s, we can conditionally synthesize it by
987	@ using value of borrow as a whole or extracting single bit.
988	@ Follow r2 register...
989
990	adds	r11,r11,r2		@ add synthesized modulus
991	adcs	r3,r3,r2
992	str	r11,[r0,#0]
993	adcs	r4,r4,r2
994	str	r3,[r0,#4]
995	adcs	r5,r5,#0
996	str	r4,[r0,#8]
997	adcs	r6,r6,#0
998	str	r5,[r0,#12]
999	adcs	r7,r7,#0
1000	str	r6,[r0,#16]
1001	adcs	r8,r8,r2,lsr#31
1002	str	r7,[r0,#20]
1003	adcs	r9,r9,r2
1004	str	r8,[r0,#24]
1005	str	r9,[r0,#28]
1006
1007	mov	pc,lr
1008
1009
1010.globl	_GFp_nistz256_point_double
1011.private_extern	_GFp_nistz256_point_double
1012#ifdef __thumb2__
1013.thumb_func	_GFp_nistz256_point_double
1014#endif
1015.align	5
1016_GFp_nistz256_point_double:
1017	stmdb	sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ push from r0, unusual, but intentional
1018	sub	sp,sp,#32*5
1019
1020Lpoint_double_shortcut:
1021	add	r3,sp,#96
1022	ldmia	r1!,{r4,r5,r6,r7,r8,r9,r10,r11}	@ copy in_x
1023	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1024
1025	add	r0,sp,#0
1026	bl	__ecp_nistz256_mul_by_2	@ p256_mul_by_2(S, in_y);
1027
1028	add	r2,r1,#32
1029	add	r1,r1,#32
1030	add	r0,sp,#64
1031	bl	__ecp_nistz256_mul_mont	@ p256_sqr_mont(Zsqr, in_z);
1032
1033	add	r1,sp,#0
1034	add	r2,sp,#0
1035	add	r0,sp,#0
1036	bl	__ecp_nistz256_mul_mont	@ p256_sqr_mont(S, S);
1037
1038	ldr	r2,[sp,#32*5+4]
1039	add	r1,r2,#32
1040	add	r2,r2,#64
1041	add	r0,sp,#128
1042	bl	__ecp_nistz256_mul_mont	@ p256_mul_mont(tmp0, in_z, in_y);
1043
1044	ldr	r0,[sp,#32*5]
1045	add	r0,r0,#64
1046	bl	__ecp_nistz256_add_self	@ p256_mul_by_2(res_z, tmp0);
1047
1048	add	r1,sp,#96
1049	add	r2,sp,#64
1050	add	r0,sp,#32
1051	bl	__ecp_nistz256_add	@ p256_add(M, in_x, Zsqr);
1052
1053	add	r1,sp,#96
1054	add	r2,sp,#64
1055	add	r0,sp,#64
1056	bl	__ecp_nistz256_sub	@ p256_sub(Zsqr, in_x, Zsqr);
1057
1058	add	r1,sp,#0
1059	add	r2,sp,#0
1060	add	r0,sp,#128
1061	bl	__ecp_nistz256_mul_mont	@ p256_sqr_mont(tmp0, S);
1062
1063	add	r1,sp,#64
1064	add	r2,sp,#32
1065	add	r0,sp,#32
1066	bl	__ecp_nistz256_mul_mont	@ p256_mul_mont(M, M, Zsqr);
1067
1068	ldr	r0,[sp,#32*5]
1069	add	r1,sp,#128
1070	add	r0,r0,#32
1071	bl	__ecp_nistz256_div_by_2	@ p256_div_by_2(res_y, tmp0);
1072
1073	add	r1,sp,#32
1074	add	r0,sp,#32
1075	bl	__ecp_nistz256_mul_by_3	@ p256_mul_by_3(M, M);
1076
1077	add	r1,sp,#96
1078	add	r2,sp,#0
1079	add	r0,sp,#0
1080	bl	__ecp_nistz256_mul_mont	@ p256_mul_mont(S, S, in_x);
1081
1082	add	r0,sp,#128
1083	bl	__ecp_nistz256_add_self	@ p256_mul_by_2(tmp0, S);
1084
1085	ldr	r0,[sp,#32*5]
1086	add	r1,sp,#32
1087	add	r2,sp,#32
1088	bl	__ecp_nistz256_mul_mont	@ p256_sqr_mont(res_x, M);
1089
1090	add	r2,sp,#128
1091	bl	__ecp_nistz256_sub_from	@ p256_sub(res_x, res_x, tmp0);
1092
1093	add	r2,sp,#0
1094	add	r0,sp,#0
1095	bl	__ecp_nistz256_sub_morf	@ p256_sub(S, S, res_x);
1096
1097	add	r1,sp,#32
1098	add	r2,sp,#0
1099	bl	__ecp_nistz256_mul_mont	@ p256_mul_mont(S, S, M);
1100
1101	ldr	r0,[sp,#32*5]
1102	add	r2,r0,#32
1103	add	r0,r0,#32
1104	bl	__ecp_nistz256_sub_from	@ p256_sub(res_y, S, res_y);
1105
1106	add	sp,sp,#32*5+16		@ +16 means "skip even over saved r0-r3"
1107#if __ARM_ARCH__>=5 || !defined(__thumb__)
1108	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
1109#else
1110	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1111	bx	lr			@ interoperable with Thumb ISA:-)
1112#endif
1113
1114#endif  // !OPENSSL_NO_ASM
1115