• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(BORINGSSL_PREFIX)
13#include <boringssl_prefix_symbols_asm.h>
14#endif
15.section	__TEXT,__const
16
17# p434 x 2
18Lp434x2:
19.quad	0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
20.quad	0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
21.quad	0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
22
23# p434 + 1
24Lp434p1:
25.quad	0xFDC1767AE3000000, 0x7BC65C783158AEA3
26.quad	0x6CFC5FD681C52056, 0x0002341F27177344
27
28.text
29.globl	_sike_mpmul
30.private_extern	_sike_mpmul
31.align	4
32_sike_mpmul:
33	stp	x29, x30, [sp,#-96]!
34	add	x29, sp, #0
35	stp	x19, x20, [sp,#16]
36	stp	x21, x22, [sp,#32]
37	stp	x23, x24, [sp,#48]
38	stp	x25, x26, [sp,#64]
39	stp	x27, x28, [sp,#80]
40
41	ldp	x3,  x4, [x0]
42	ldp	x5,  x6, [x0,#16]
43	ldp	x7,  x8, [x0,#32]
44	ldr	x9,      [x0,#48]
45	ldp	x10, x11, [x1,#0]
46	ldp	x12, x13, [x1,#16]
47	ldp	x14, x15, [x1,#32]
48	ldr	x16,      [x1,#48]
49
50        // x3-x7 <- AH + AL, x7 <- carry
51	adds	x3, x3, x7
52	adcs	x4, x4, x8
53	adcs	x5, x5, x9
54	adcs	x6, x6, xzr
55	adc	x7, xzr, xzr
56
57        // x10-x13 <- BH + BL, x8 <- carry
58	adds	x10, x10, x14
59	adcs	x11, x11, x15
60	adcs	x12, x12, x16
61	adcs	x13, x13, xzr
62	adc	x8, xzr, xzr
63
64        // x9 <- combined carry
65	and	x9, x7, x8
66        // x7-x8 <- mask
67	sub	x7, xzr, x7
68	sub	x8, xzr, x8
69
70        // x15-x19 <- masked (BH + BL)
71	and	x14, x10, x7
72	and	x15, x11, x7
73	and	x16, x12, x7
74	and	x17, x13, x7
75
76        // x20-x23 <- masked (AH + AL)
77	and	x20, x3, x8
78	and	x21, x4, x8
79	and	x22, x5, x8
80	and	x23, x6, x8
81
82        // x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
83	adds	x14, x14, x20
84	adcs	x15, x15, x21
85	adcs	x16, x16, x22
86	adcs	x17, x17, x23
87	adc	x7, x9, xzr
88
89        // x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
90	stp	x3, x4, [x2,#0]
91                // A0-A1 <- AH + AL, T0 <- mask
92	adds	x3, x3, x5
93	adcs	x4, x4, x6
94	adc	x25, xzr, xzr
95
96        // C6, T1 <- BH + BL, C7 <- mask
97	adds	x23, x10, x12
98	adcs	x26, x11, x13
99	adc	x24, xzr, xzr
100
101        // C0-C1 <- masked (BH + BL)
102	sub	x19, xzr, x25
103	sub	x20, xzr, x24
104	and	x8, x23, x19
105	and	x9, x26, x19
106
107        // C4-C5 <- masked (AH + AL), T0 <- combined carry
108	and	x21, x3, x20
109	and	x22, x4, x20
110	mul	x19, x3, x23
111	mul	x20, x3, x26
112	and	x25, x25, x24
113
114        // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
115	adds	x8, x21, x8
116	umulh	x21, x3, x26
117	adcs	x9, x22, x9
118	umulh	x22, x3, x23
119	adc	x25, x25, xzr
120
121        // C2-C5 <- (AH+AL) x (BH+BL), low part
122	mul	x3, x4, x23
123	umulh	x23, x4, x23
124	adds	x20, x20, x22
125	adc	x21, x21, xzr
126
127	mul	x24, x4, x26
128	umulh	x26, x4, x26
129	adds	x20, x20, x3
130	adcs	x21, x21, x23
131	adc	x22, xzr, xzr
132
133	adds	x21, x21, x24
134	adc	x22, x22, x26
135
136	ldp	x3, x4, [x2,#0]
137
138        // C2-C5, T0 <- (AH+AL) x (BH+BL), final part
139	adds	x21, x8, x21
140	umulh	x24, x3, x10
141	umulh	x26, x3, x11
142	adcs	x22, x9, x22
143	mul	x8, x3, x10
144	mul	x9, x3, x11
145	adc	x25, x25, xzr
146
147        // C0-C1, T1, C7 <- AL x BL
148	mul	x3, x4, x10
149	umulh	x10, x4, x10
150	adds	x9, x9, x24
151	adc	x26, x26, xzr
152
153	mul	x23, x4, x11
154	umulh	x11, x4, x11
155	adds	x9, x9, x3
156	adcs	x26, x26, x10
157	adc	x24, xzr, xzr
158
159	adds	x26, x26, x23
160	adc	x24, x24, x11
161
162
163        // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
164	mul	x3, x5, x12
165	umulh	x10, x5, x12
166	subs	x19, x19, x8
167	sbcs	x20, x20, x9
168	sbcs	x21, x21, x26
169	mul	x4, x5, x13
170	umulh	x23, x5, x13
171	sbcs	x22, x22, x24
172	sbc	x25, x25, xzr
173
174        // A0, A1, C6, B0 <- AH x BH
175	mul	x5, x6, x12
176	umulh	x12, x6, x12
177	adds	x4, x4, x10
178	adc	x23, x23, xzr
179
180	mul	x11, x6, x13
181	umulh	x13, x6, x13
182	adds	x4, x4, x5
183	adcs	x23, x23, x12
184	adc	x10, xzr, xzr
185
186	adds	x23, x23, x11
187	adc	x10, x10, x13
188
189
190        // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
191	subs	x19, x19, x3
192	sbcs	x20, x20, x4
193	sbcs	x21, x21, x23
194	sbcs	x22, x22, x10
195	sbc	x25, x25, xzr
196
197	adds	x19, x19, x26
198	adcs	x20, x20, x24
199	adcs	x21, x21, x3
200	adcs	x22, x22, x4
201	adcs	x23, x25, x23
202	adc	x24, x10, xzr
203
204
205        // x15-x19, x7 <- (AH+AL) x (BH+BL), final step
206	adds	x14, x14, x21
207	adcs	x15, x15, x22
208	adcs	x16, x16, x23
209	adcs	x17, x17, x24
210	adc	x7, x7, xzr
211
212        // Load AL
213	ldp	x3, x4, [x0]
214	ldp	x5, x6, [x0,#16]
215        // Load BL
216	ldp	x10, x11, [x1,#0]
217	ldp	x12, x13, [x1,#16]
218
219        // Temporarily store x8 in x2
220	stp	x8, x9, [x2,#0]
221        // x21-x28 <- AL x BL
222                // A0-A1 <- AH + AL, T0 <- mask
223	adds	x3, x3, x5
224	adcs	x4, x4, x6
225	adc	x8, xzr, xzr
226
227        // C6, T1 <- BH + BL, C7 <- mask
228	adds	x27, x10, x12
229	adcs	x9, x11, x13
230	adc	x28, xzr, xzr
231
232        // C0-C1 <- masked (BH + BL)
233	sub	x23, xzr, x8
234	sub	x24, xzr, x28
235	and	x21, x27, x23
236	and	x22, x9, x23
237
238        // C4-C5 <- masked (AH + AL), T0 <- combined carry
239	and	x25, x3, x24
240	and	x26, x4, x24
241	mul	x23, x3, x27
242	mul	x24, x3, x9
243	and	x8, x8, x28
244
245        // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
246	adds	x21, x25, x21
247	umulh	x25, x3, x9
248	adcs	x22, x26, x22
249	umulh	x26, x3, x27
250	adc	x8, x8, xzr
251
252        // C2-C5 <- (AH+AL) x (BH+BL), low part
253	mul	x3, x4, x27
254	umulh	x27, x4, x27
255	adds	x24, x24, x26
256	adc	x25, x25, xzr
257
258	mul	x28, x4, x9
259	umulh	x9, x4, x9
260	adds	x24, x24, x3
261	adcs	x25, x25, x27
262	adc	x26, xzr, xzr
263
264	adds	x25, x25, x28
265	adc	x26, x26, x9
266
267	ldp	x3, x4, [x0,#0]
268
269        // C2-C5, T0 <- (AH+AL) x (BH+BL), final part
270	adds	x25, x21, x25
271	umulh	x28, x3, x10
272	umulh	x9, x3, x11
273	adcs	x26, x22, x26
274	mul	x21, x3, x10
275	mul	x22, x3, x11
276	adc	x8, x8, xzr
277
278        // C0-C1, T1, C7 <- AL x BL
279	mul	x3, x4, x10
280	umulh	x10, x4, x10
281	adds	x22, x22, x28
282	adc	x9, x9, xzr
283
284	mul	x27, x4, x11
285	umulh	x11, x4, x11
286	adds	x22, x22, x3
287	adcs	x9, x9, x10
288	adc	x28, xzr, xzr
289
290	adds	x9, x9, x27
291	adc	x28, x28, x11
292
293
294        // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
295	mul	x3, x5, x12
296	umulh	x10, x5, x12
297	subs	x23, x23, x21
298	sbcs	x24, x24, x22
299	sbcs	x25, x25, x9
300	mul	x4, x5, x13
301	umulh	x27, x5, x13
302	sbcs	x26, x26, x28
303	sbc	x8, x8, xzr
304
305        // A0, A1, C6, B0 <- AH x BH
306	mul	x5, x6, x12
307	umulh	x12, x6, x12
308	adds	x4, x4, x10
309	adc	x27, x27, xzr
310
311	mul	x11, x6, x13
312	umulh	x13, x6, x13
313	adds	x4, x4, x5
314	adcs	x27, x27, x12
315	adc	x10, xzr, xzr
316
317	adds	x27, x27, x11
318	adc	x10, x10, x13
319
320
321        // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
322	subs	x23, x23, x3
323	sbcs	x24, x24, x4
324	sbcs	x25, x25, x27
325	sbcs	x26, x26, x10
326	sbc	x8, x8, xzr
327
328	adds	x23, x23, x9
329	adcs	x24, x24, x28
330	adcs	x25, x25, x3
331	adcs	x26, x26, x4
332	adcs	x27, x8, x27
333	adc	x28, x10, xzr
334
335        // Restore x8
336	ldp	x8, x9, [x2,#0]
337
338        // x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
339	subs	x8, x8, x21
340	sbcs	x9, x9, x22
341	sbcs	x19, x19, x23
342	sbcs	x20, x20, x24
343	sbcs	x14, x14, x25
344	sbcs	x15, x15, x26
345	sbcs	x16, x16, x27
346	sbcs	x17, x17, x28
347	sbc	x7, x7, xzr
348
349        // Store ALxBL, low
350	stp	x21, x22, [x2]
351	stp	x23, x24, [x2,#16]
352
353        // Load AH
354	ldp	x3, x4, [x0,#32]
355	ldr	x5,     [x0,#48]
356        // Load BH
357	ldp	x10, x11, [x1,#32]
358	ldr	x12,      [x1,#48]
359
360	adds	x8,  x8, x25
361	adcs	x9,  x9, x26
362	adcs	x19, x19, x27
363	adcs	x20, x20, x28
364	adc	x1, xzr, xzr
365
366	add	x0, x0, #32
367        // Temporarily store x8,x9 in x2
368	stp	x8,x9, [x2,#32]
369        // x21-x28 <- AH x BH
370
371        // A0 * B0
372	mul	x21, x3, x10  // C0
373	umulh	x24, x3, x10
374
375        // A0 * B1
376	mul	x22, x3, x11
377	umulh	x23, x3, x11
378
379        // A1 * B0
380	mul	x8, x4, x10
381	umulh	x9, x4, x10
382	adds	x22, x22, x24
383	adc	x23, x23, xzr
384
385        // A0 * B2
386	mul	x27, x3, x12
387	umulh	x28, x3, x12
388	adds	x22, x22, x8  // C1
389	adcs	x23, x23, x9
390	adc	x24, xzr, xzr
391
392        // A2 * B0
393	mul	x8, x5, x10
394	umulh	x25, x5, x10
395	adds	x23, x23, x27
396	adcs	x24, x24, x25
397	adc	x25, xzr, xzr
398
399        // A1 * B1
400	mul	x27, x4, x11
401	umulh	x9, x4, x11
402	adds	x23, x23, x8
403	adcs	x24, x24, x28
404	adc	x25, x25, xzr
405
406        // A1 * B2
407	mul	x8, x4, x12
408	umulh	x28, x4, x12
409	adds	x23, x23, x27 // C2
410	adcs	x24, x24, x9
411	adc	x25, x25, xzr
412
413        // A2 * B1
414	mul	x27, x5, x11
415	umulh	x9, x5, x11
416	adds	x24, x24, x8
417	adcs	x25, x25, x28
418	adc	x26, xzr, xzr
419
420        // A2 * B2
421	mul	x8, x5, x12
422	umulh	x28, x5, x12
423	adds	x24, x24, x27 // C3
424	adcs	x25, x25, x9
425	adc	x26, x26, xzr
426
427	adds	x25, x25, x8 // C4
428	adc	x26, x26, x28 // C5
429
430        // Restore x8,x9
431	ldp	x8,x9, [x2,#32]
432
433	neg	x1, x1
434
435        // x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
436	subs	x8, x8, x21
437	sbcs	x9, x9, x22
438	sbcs	x19, x19, x23
439	sbcs	x20, x20, x24
440	sbcs	x14, x14, x25
441	sbcs	x15, x15, x26
442	sbcs	x16, x16, xzr
443	sbcs	x17, x17, xzr
444	sbc	x7, x7, xzr
445
446        // Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
447	stp	x8,  x9, [x2,#32]
448	stp	x19, x20, [x2,#48]
449
450	adds	x1,  x1, #1
451	adcs	x14, x14, x21
452	adcs	x15, x15, x22
453	adcs	x16, x16, x23
454	adcs	x17, x17, x24
455	adcs	x25,  x7, x25
456	adc	x26, x26, xzr
457
458	stp	x14, x15, [x2,#64]
459	stp	x16, x17, [x2,#80]
460	stp	x25, x26, [x2,#96]
461
462	ldp	x19, x20, [x29,#16]
463	ldp	x21, x22, [x29,#32]
464	ldp	x23, x24, [x29,#48]
465	ldp	x25, x26, [x29,#64]
466	ldp	x27, x28, [x29,#80]
467	ldp	x29, x30, [sp],#96
468	ret
469.globl	_sike_fprdc
470.private_extern	_sike_fprdc
471.align	4
472_sike_fprdc:
473	stp	x29, x30, [sp, #-96]!
474	add	x29, sp, xzr
475	stp	x19, x20, [sp,#16]
476	stp	x21, x22, [sp,#32]
477	stp	x23, x24, [sp,#48]
478	stp	x25, x26, [sp,#64]
479	stp	x27, x28, [sp,#80]
480
481	ldp	x2, x3, [x0,#0]       // a[0-1]
482
483        // Load the prime constant
484	adrp	x26, Lp434p1@PAGE
485	add	x26, x26, Lp434p1@PAGEOFF
486	ldp	x23, x24, [x26, #0x0]
487	ldp	x25, x26, [x26,#0x10]
488
489        // a[0-1] * p434+1
490	mul	x4, x2, x23  // C0
491	umulh	x7, x2, x23
492
493	mul	x5, x2, x24
494	umulh	x6, x2, x24
495
496	mul	x10, x3, x23
497	umulh	x11, x3, x23
498	adds	x5, x5, x7
499	adc	x6, x6, xzr
500
501	mul	x27, x2, x25
502	umulh	x28, x2, x25
503	adds	x5, x5, x10  // C1
504	adcs	x6, x6, x11
505	adc	x7, xzr, xzr
506
507	mul	x10, x3, x24
508	umulh	x11, x3, x24
509	adds	x6, x6, x27
510	adcs	x7, x7, x28
511	adc	x8, xzr, xzr
512
513	mul	x27, x2, x26
514	umulh	x28, x2, x26
515	adds	x6, x6, x10  // C2
516	adcs	x7, x7, x11
517	adc	x8, x8, xzr
518
519	mul	x10, x3, x25
520	umulh	x11, x3, x25
521	adds	x7, x7, x27
522	adcs	x8, x8, x28
523	adc	x9, xzr, xzr
524
525	mul	x27, x3, x26
526	umulh	x28, x3, x26
527	adds	x7, x7, x10  // C3
528	adcs	x8, x8, x11
529	adc	x9, x9, xzr
530	adds	x8, x8, x27  // C4
531	adc	x9, x9, x28  // C5
532
533
534
535	ldp	x10, x11, [x0, #0x18]
536	ldp	x12, x13, [x0, #0x28]
537	ldp	x14, x15, [x0, #0x38]
538	ldp	x16, x17, [x0, #0x48]
539	ldp	x19, x20, [x0, #0x58]
540	ldr	x21,      [x0, #0x68]
541
542	adds	x10, x10, x4
543	adcs	x11, x11, x5
544	adcs	x12, x12, x6
545	adcs	x13, x13, x7
546	adcs	x14, x14, x8
547	adcs	x15, x15, x9
548	adcs	x22, x16, xzr
549	adcs	x17, x17, xzr
550	adcs	x19, x19, xzr
551	adcs	x20, x20, xzr
552	adc	x21, x21, xzr
553
554	ldr	x2,  [x0,#0x10]       // a[2]
555        // a[2-3] * p434+1
556	mul	x4, x2, x23  // C0
557	umulh	x7, x2, x23
558
559	mul	x5, x2, x24
560	umulh	x6, x2, x24
561
562	mul	x0, x10, x23
563	umulh	x3, x10, x23
564	adds	x5, x5, x7
565	adc	x6, x6, xzr
566
567	mul	x27, x2, x25
568	umulh	x28, x2, x25
569	adds	x5, x5, x0  // C1
570	adcs	x6, x6, x3
571	adc	x7, xzr, xzr
572
573	mul	x0, x10, x24
574	umulh	x3, x10, x24
575	adds	x6, x6, x27
576	adcs	x7, x7, x28
577	adc	x8, xzr, xzr
578
579	mul	x27, x2, x26
580	umulh	x28, x2, x26
581	adds	x6, x6, x0  // C2
582	adcs	x7, x7, x3
583	adc	x8, x8, xzr
584
585	mul	x0, x10, x25
586	umulh	x3, x10, x25
587	adds	x7, x7, x27
588	adcs	x8, x8, x28
589	adc	x9, xzr, xzr
590
591	mul	x27, x10, x26
592	umulh	x28, x10, x26
593	adds	x7, x7, x0  // C3
594	adcs	x8, x8, x3
595	adc	x9, x9, xzr
596	adds	x8, x8, x27  // C4
597	adc	x9, x9, x28  // C5
598
599
600
601	adds	x12, x12, x4
602	adcs	x13, x13, x5
603	adcs	x14, x14, x6
604	adcs	x15, x15, x7
605	adcs	x16, x22, x8
606	adcs	x17, x17, x9
607	adcs	x22, x19, xzr
608	adcs	x20, x20, xzr
609	adc	x21, x21, xzr
610
611	mul	x4, x11, x23  // C0
612	umulh	x7, x11, x23
613
614	mul	x5, x11, x24
615	umulh	x6, x11, x24
616
617	mul	x10, x12, x23
618	umulh	x3, x12, x23
619	adds	x5, x5, x7
620	adc	x6, x6, xzr
621
622	mul	x27, x11, x25
623	umulh	x28, x11, x25
624	adds	x5, x5, x10  // C1
625	adcs	x6, x6, x3
626	adc	x7, xzr, xzr
627
628	mul	x10, x12, x24
629	umulh	x3, x12, x24
630	adds	x6, x6, x27
631	adcs	x7, x7, x28
632	adc	x8, xzr, xzr
633
634	mul	x27, x11, x26
635	umulh	x28, x11, x26
636	adds	x6, x6, x10  // C2
637	adcs	x7, x7, x3
638	adc	x8, x8, xzr
639
640	mul	x10, x12, x25
641	umulh	x3, x12, x25
642	adds	x7, x7, x27
643	adcs	x8, x8, x28
644	adc	x9, xzr, xzr
645
646	mul	x27, x12, x26
647	umulh	x28, x12, x26
648	adds	x7, x7, x10  // C3
649	adcs	x8, x8, x3
650	adc	x9, x9, xzr
651	adds	x8, x8, x27  // C4
652	adc	x9, x9, x28  // C5
653
654
655	adds	x14, x14, x4
656	adcs	x15, x15, x5
657	adcs	x16, x16, x6
658	adcs	x17, x17, x7
659	adcs	x19, x22, x8
660	adcs	x20, x20, x9
661	adc	x22, x21, xzr
662
663	stp	x14, x15, [x1, #0x0]     // C0, C1
664
665	mul	x4, x13, x23    // C0
666	umulh	x10, x13, x23
667
668	mul	x5, x13, x24
669	umulh	x27, x13, x24
670	adds	x5, x5, x10    // C1
671	adc	x10, xzr, xzr
672
673	mul	x6, x13, x25
674	umulh	x28, x13, x25
675	adds	x27, x10, x27
676	adcs	x6, x6, x27    // C2
677	adc	x10, xzr, xzr
678
679	mul	x7, x13, x26
680	umulh	x8, x13, x26
681	adds	x28, x10, x28
682	adcs	x7, x7, x28    // C3
683	adc	x8, x8, xzr    // C4
684
685	adds	x16, x16, x4
686	adcs	x17, x17, x5
687	adcs	x19, x19, x6
688	adcs	x20, x20, x7
689	adc	x21, x22, x8
690
691	str	x16,       [x1, #0x10]
692	stp	x17, x19,  [x1, #0x18]
693	stp	x20, x21,  [x1, #0x28]
694
695	ldp	x19, x20, [x29,#16]
696	ldp	x21, x22, [x29,#32]
697	ldp	x23, x24, [x29,#48]
698	ldp	x25, x26, [x29,#64]
699	ldp	x27, x28, [x29,#80]
700	ldp	x29, x30, [sp],#96
701	ret
702.globl	_sike_fpadd
703.private_extern	_sike_fpadd
704.align	4
705_sike_fpadd:
706	stp	x29,x30, [sp,#-16]!
707	add	x29, sp, #0
708
709	ldp	x3, x4,   [x0,#0]
710	ldp	x5, x6,   [x0,#16]
711	ldp	x7, x8,   [x0,#32]
712	ldr	x9,       [x0,#48]
713	ldp	x11, x12, [x1,#0]
714	ldp	x13, x14, [x1,#16]
715	ldp	x15, x16, [x1,#32]
716	ldr	x17,      [x1,#48]
717
718        // Add a + b
719	adds	x3, x3, x11
720	adcs	x4, x4, x12
721	adcs	x5, x5, x13
722	adcs	x6, x6, x14
723	adcs	x7, x7, x15
724	adcs	x8, x8, x16
725	adc	x9, x9, x17
726
727        //  Subtract 2xp434
728	adrp	x17, Lp434x2@PAGE
729	add	x17, x17, Lp434x2@PAGEOFF
730	ldp	x11, x12, [x17, #0]
731	ldp	x13, x14, [x17, #16]
732	ldp	x15, x16, [x17, #32]
733	subs	x3, x3, x11
734	sbcs	x4, x4, x12
735	sbcs	x5, x5, x12
736	sbcs	x6, x6, x13
737	sbcs	x7, x7, x14
738	sbcs	x8, x8, x15
739	sbcs	x9, x9, x16
740	sbc	x0, xzr, xzr    // x0 can be reused now
741
742        // Add 2xp434 anded with the mask in x0
743	and	x11, x11, x0
744	and	x12, x12, x0
745	and	x13, x13, x0
746	and	x14, x14, x0
747	and	x15, x15, x0
748	and	x16, x16, x0
749
750	adds	x3, x3, x11
751	adcs	x4, x4, x12
752	adcs	x5, x5, x12
753	adcs	x6, x6, x13
754	adcs	x7, x7, x14
755	adcs	x8, x8, x15
756	adc	x9, x9, x16
757
758	stp	x3, x4,  [x2,#0]
759	stp	x5, x6,  [x2,#16]
760	stp	x7, x8,  [x2,#32]
761	str	x9,      [x2,#48]
762
763	ldp	x29, x30, [sp],#16
764	ret
765.globl	_sike_fpsub
766.private_extern	_sike_fpsub
767.align	4
768_sike_fpsub:
769	stp	x29, x30, [sp,#-16]!
770	add	x29, sp, #0
771
772	ldp	x3, x4,   [x0,#0]
773	ldp	x5, x6,   [x0,#16]
774	ldp	x7, x8,   [x0,#32]
775	ldr	x9,       [x0,#48]
776	ldp	x11, x12, [x1,#0]
777	ldp	x13, x14, [x1,#16]
778	ldp	x15, x16, [x1,#32]
779	ldr	x17,      [x1,#48]
780
781        // Subtract a - b
782	subs	x3, x3, x11
783	sbcs	x4, x4, x12
784	sbcs	x5, x5, x13
785	sbcs	x6, x6, x14
786	sbcs	x7, x7, x15
787	sbcs	x8, x8, x16
788	sbcs	x9, x9, x17
789	sbc	x0, xzr, xzr
790
791        // Add 2xp434 anded with the mask in x0
792	adrp	x17, Lp434x2@PAGE
793	add	x17, x17, Lp434x2@PAGEOFF
794
795        // First half
796	ldp	x11, x12, [x17, #0]
797	ldp	x13, x14, [x17, #16]
798	ldp	x15, x16, [x17, #32]
799
800        // Add 2xp434 anded with the mask in x0
801	and	x11, x11, x0
802	and	x12, x12, x0
803	and	x13, x13, x0
804	and	x14, x14, x0
805	and	x15, x15, x0
806	and	x16, x16, x0
807
808	adds	x3, x3, x11
809	adcs	x4, x4, x12
810	adcs	x5, x5, x12
811	adcs	x6, x6, x13
812	adcs	x7, x7, x14
813	adcs	x8, x8, x15
814	adc	x9, x9, x16
815
816	stp	x3, x4,  [x2,#0]
817	stp	x5, x6,  [x2,#16]
818	stp	x7, x8,  [x2,#32]
819	str	x9,      [x2,#48]
820
821	ldp	x29, x30, [sp],#16
822	ret
823.globl	_sike_mpadd_asm
824.private_extern	_sike_mpadd_asm
825.align	4
826_sike_mpadd_asm:
827	stp	x29, x30, [sp,#-16]!
828	add	x29, sp, #0
829
830	ldp	x3, x4,   [x0,#0]
831	ldp	x5, x6,   [x0,#16]
832	ldp	x7, x8,   [x0,#32]
833	ldr	x9,       [x0,#48]
834	ldp	x11, x12, [x1,#0]
835	ldp	x13, x14, [x1,#16]
836	ldp	x15, x16, [x1,#32]
837	ldr	x17,      [x1,#48]
838
839	adds	x3, x3, x11
840	adcs	x4, x4, x12
841	adcs	x5, x5, x13
842	adcs	x6, x6, x14
843	adcs	x7, x7, x15
844	adcs	x8, x8, x16
845	adc	x9, x9, x17
846
847	stp	x3, x4,   [x2,#0]
848	stp	x5, x6,   [x2,#16]
849	stp	x7, x8,   [x2,#32]
850	str	x9,       [x2,#48]
851
852	ldp	x29, x30, [sp],#16
853	ret
854.globl	_sike_mpsubx2_asm
855.private_extern	_sike_mpsubx2_asm
856.align	4
857_sike_mpsubx2_asm:
858	stp	x29, x30, [sp,#-16]!
859	add	x29, sp, #0
860
861	ldp	x3, x4,   [x0,#0]
862	ldp	x5, x6,   [x0,#16]
863	ldp	x11, x12, [x1,#0]
864	ldp	x13, x14, [x1,#16]
865	subs	x3, x3, x11
866	sbcs	x4, x4, x12
867	sbcs	x5, x5, x13
868	sbcs	x6, x6, x14
869	ldp	x7, x8,   [x0,#32]
870	ldp	x9, x10,  [x0,#48]
871	ldp	x11, x12, [x1,#32]
872	ldp	x13, x14, [x1,#48]
873	sbcs	x7, x7, x11
874	sbcs	x8, x8, x12
875	sbcs	x9, x9, x13
876	sbcs	x10, x10, x14
877
878	stp	x3, x4,   [x2,#0]
879	stp	x5, x6,   [x2,#16]
880	stp	x7, x8,   [x2,#32]
881	stp	x9, x10,  [x2,#48]
882
883	ldp	x3, x4,   [x0,#64]
884	ldp	x5, x6,   [x0,#80]
885	ldp	x11, x12, [x1,#64]
886	ldp	x13, x14, [x1,#80]
887	sbcs	x3, x3, x11
888	sbcs	x4, x4, x12
889	sbcs	x5, x5, x13
890	sbcs	x6, x6, x14
891	ldp	x7, x8,   [x0,#96]
892	ldp	x11, x12, [x1,#96]
893	sbcs	x7, x7, x11
894	sbcs	x8, x8, x12
895	sbc	x0, xzr, xzr
896
897	stp	x3, x4,   [x2,#64]
898	stp	x5, x6,   [x2,#80]
899	stp	x7, x8,   [x2,#96]
900
901	ldp	x29, x30, [sp],#16
902	ret
903.globl	_sike_mpdblsubx2_asm
904.private_extern	_sike_mpdblsubx2_asm
905.align	4
906_sike_mpdblsubx2_asm:
907	stp	x29, x30, [sp, #-16]!
908	add	x29, sp, #0
909
910	ldp	x3, x4,   [x2, #0]
911	ldp	x5, x6,   [x2,#16]
912	ldp	x7, x8,   [x2,#32]
913
914	ldp	x11, x12, [x0, #0]
915	ldp	x13, x14, [x0,#16]
916	ldp	x15, x16, [x0,#32]
917
918	subs	x3, x3, x11
919	sbcs	x4, x4, x12
920	sbcs	x5, x5, x13
921	sbcs	x6, x6, x14
922	sbcs	x7, x7, x15
923	sbcs	x8, x8, x16
924
925        // x9 stores carry
926	adc	x9, xzr, xzr
927
928	ldp	x11, x12, [x1, #0]
929	ldp	x13, x14, [x1,#16]
930	ldp	x15, x16, [x1,#32]
931	subs	x3, x3, x11
932	sbcs	x4, x4, x12
933	sbcs	x5, x5, x13
934	sbcs	x6, x6, x14
935	sbcs	x7, x7, x15
936	sbcs	x8, x8, x16
937	adc	x9, x9, xzr
938
939	stp	x3, x4,   [x2, #0]
940	stp	x5, x6,   [x2,#16]
941	stp	x7, x8,   [x2,#32]
942
943	ldp	x3, x4,   [x2,#48]
944	ldp	x5, x6,   [x2,#64]
945	ldp	x7, x8,   [x2,#80]
946
947	ldp	x11, x12, [x0,#48]
948	ldp	x13, x14, [x0,#64]
949	ldp	x15, x16, [x0,#80]
950
951        // x9 = 2 - x9
952	neg	x9, x9
953	add	x9, x9, #2
954
955	subs	x3, x3, x9
956	sbcs	x3, x3, x11
957	sbcs	x4, x4, x12
958	sbcs	x5, x5, x13
959	sbcs	x6, x6, x14
960	sbcs	x7, x7, x15
961	sbcs	x8, x8, x16
962	adc	x9, xzr, xzr
963
964	ldp	x11, x12, [x1,#48]
965	ldp	x13, x14, [x1,#64]
966	ldp	x15, x16, [x1,#80]
967	subs	x3, x3, x11
968	sbcs	x4, x4, x12
969	sbcs	x5, x5, x13
970	sbcs	x6, x6, x14
971	sbcs	x7, x7, x15
972	sbcs	x8, x8, x16
973	adc	x9, x9, xzr
974
975	stp	x3, x4,   [x2,#48]
976	stp	x5, x6,   [x2,#64]
977	stp	x7, x8,   [x2,#80]
978
979	ldp	x3,  x4, [x2,#96]
980	ldp	x11, x12, [x0,#96]
981	ldp	x13, x14, [x1,#96]
982
983        // x9 = 2 - x9
984	neg	x9, x9
985	add	x9, x9, #2
986
987	subs	x3, x3, x9
988	sbcs	x3, x3, x11
989	sbcs	x4, x4, x12
990	subs	x3, x3, x13
991	sbc	x4, x4, x14
992	stp	x3, x4,   [x2,#96]
993
994	ldp	x29, x30, [sp],#16
995	ret
996#endif  // !OPENSSL_NO_ASM
997