• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#include "ring_core_generated/prefix_symbols_asm.h"
12.text
13.extern	OPENSSL_ia32cap_P
14.hidden OPENSSL_ia32cap_P
15
16
17.align	64
18.Lpoly:
19.quad	0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
20
21.LOne:
22.long	1,1,1,1,1,1,1,1
23.LTwo:
24.long	2,2,2,2,2,2,2,2
25.LThree:
26.long	3,3,3,3,3,3,3,3
27.LONE_mont:
28.quad	0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
29
30
31.Lord:
32.quad	0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
33.LordK:
34.quad	0xccd1c8aaee00bc4f
35
36
37
38.globl	nistz256_neg
39.hidden nistz256_neg
40.type	nistz256_neg,@function
41.align	32
42nistz256_neg:
43.cfi_startproc
44	pushq	%r12
45.cfi_adjust_cfa_offset	8
46.cfi_offset	%r12,-16
47	pushq	%r13
48.cfi_adjust_cfa_offset	8
49.cfi_offset	%r13,-24
50.Lneg_body:
51
52	xorq	%r8,%r8
53	xorq	%r9,%r9
54	xorq	%r10,%r10
55	xorq	%r11,%r11
56	xorq	%r13,%r13
57
58	subq	0(%rsi),%r8
59	sbbq	8(%rsi),%r9
60	sbbq	16(%rsi),%r10
61	movq	%r8,%rax
62	sbbq	24(%rsi),%r11
63	leaq	.Lpoly(%rip),%rsi
64	movq	%r9,%rdx
65	sbbq	$0,%r13
66
67	addq	0(%rsi),%r8
68	movq	%r10,%rcx
69	adcq	8(%rsi),%r9
70	adcq	16(%rsi),%r10
71	movq	%r11,%r12
72	adcq	24(%rsi),%r11
73	testq	%r13,%r13
74
75	cmovzq	%rax,%r8
76	cmovzq	%rdx,%r9
77	movq	%r8,0(%rdi)
78	cmovzq	%rcx,%r10
79	movq	%r9,8(%rdi)
80	cmovzq	%r12,%r11
81	movq	%r10,16(%rdi)
82	movq	%r11,24(%rdi)
83
84	movq	0(%rsp),%r13
85.cfi_restore	%r13
86	movq	8(%rsp),%r12
87.cfi_restore	%r12
88	leaq	16(%rsp),%rsp
89.cfi_adjust_cfa_offset	-16
90.Lneg_epilogue:
91	.byte	0xf3,0xc3
92.cfi_endproc
93.size	nistz256_neg,.-nistz256_neg
94
95
96
97
98
99
100.globl	p256_scalar_mul_mont
101.hidden p256_scalar_mul_mont
102.type	p256_scalar_mul_mont,@function
103.align	32
104p256_scalar_mul_mont:
105.cfi_startproc
106	leaq	OPENSSL_ia32cap_P(%rip),%rcx
107	movq	8(%rcx),%rcx
108	andl	$0x80100,%ecx
109	cmpl	$0x80100,%ecx
110	je	.Lecp_nistz256_ord_mul_montx
111	pushq	%rbp
112.cfi_adjust_cfa_offset	8
113.cfi_offset	%rbp,-16
114	pushq	%rbx
115.cfi_adjust_cfa_offset	8
116.cfi_offset	%rbx,-24
117	pushq	%r12
118.cfi_adjust_cfa_offset	8
119.cfi_offset	%r12,-32
120	pushq	%r13
121.cfi_adjust_cfa_offset	8
122.cfi_offset	%r13,-40
123	pushq	%r14
124.cfi_adjust_cfa_offset	8
125.cfi_offset	%r14,-48
126	pushq	%r15
127.cfi_adjust_cfa_offset	8
128.cfi_offset	%r15,-56
129.Lord_mul_body:
130
131	movq	0(%rdx),%rax
132	movq	%rdx,%rbx
133	leaq	.Lord(%rip),%r14
134	movq	.LordK(%rip),%r15
135
136
137	movq	%rax,%rcx
138	mulq	0(%rsi)
139	movq	%rax,%r8
140	movq	%rcx,%rax
141	movq	%rdx,%r9
142
143	mulq	8(%rsi)
144	addq	%rax,%r9
145	movq	%rcx,%rax
146	adcq	$0,%rdx
147	movq	%rdx,%r10
148
149	mulq	16(%rsi)
150	addq	%rax,%r10
151	movq	%rcx,%rax
152	adcq	$0,%rdx
153
154	movq	%r8,%r13
155	imulq	%r15,%r8
156
157	movq	%rdx,%r11
158	mulq	24(%rsi)
159	addq	%rax,%r11
160	movq	%r8,%rax
161	adcq	$0,%rdx
162	movq	%rdx,%r12
163
164
165	mulq	0(%r14)
166	movq	%r8,%rbp
167	addq	%rax,%r13
168	movq	%r8,%rax
169	adcq	$0,%rdx
170	movq	%rdx,%rcx
171
172	subq	%r8,%r10
173	sbbq	$0,%r8
174
175	mulq	8(%r14)
176	addq	%rcx,%r9
177	adcq	$0,%rdx
178	addq	%rax,%r9
179	movq	%rbp,%rax
180	adcq	%rdx,%r10
181	movq	%rbp,%rdx
182	adcq	$0,%r8
183
184	shlq	$32,%rax
185	shrq	$32,%rdx
186	subq	%rax,%r11
187	movq	8(%rbx),%rax
188	sbbq	%rdx,%rbp
189
190	addq	%r8,%r11
191	adcq	%rbp,%r12
192	adcq	$0,%r13
193
194
195	movq	%rax,%rcx
196	mulq	0(%rsi)
197	addq	%rax,%r9
198	movq	%rcx,%rax
199	adcq	$0,%rdx
200	movq	%rdx,%rbp
201
202	mulq	8(%rsi)
203	addq	%rbp,%r10
204	adcq	$0,%rdx
205	addq	%rax,%r10
206	movq	%rcx,%rax
207	adcq	$0,%rdx
208	movq	%rdx,%rbp
209
210	mulq	16(%rsi)
211	addq	%rbp,%r11
212	adcq	$0,%rdx
213	addq	%rax,%r11
214	movq	%rcx,%rax
215	adcq	$0,%rdx
216
217	movq	%r9,%rcx
218	imulq	%r15,%r9
219
220	movq	%rdx,%rbp
221	mulq	24(%rsi)
222	addq	%rbp,%r12
223	adcq	$0,%rdx
224	xorq	%r8,%r8
225	addq	%rax,%r12
226	movq	%r9,%rax
227	adcq	%rdx,%r13
228	adcq	$0,%r8
229
230
231	mulq	0(%r14)
232	movq	%r9,%rbp
233	addq	%rax,%rcx
234	movq	%r9,%rax
235	adcq	%rdx,%rcx
236
237	subq	%r9,%r11
238	sbbq	$0,%r9
239
240	mulq	8(%r14)
241	addq	%rcx,%r10
242	adcq	$0,%rdx
243	addq	%rax,%r10
244	movq	%rbp,%rax
245	adcq	%rdx,%r11
246	movq	%rbp,%rdx
247	adcq	$0,%r9
248
249	shlq	$32,%rax
250	shrq	$32,%rdx
251	subq	%rax,%r12
252	movq	16(%rbx),%rax
253	sbbq	%rdx,%rbp
254
255	addq	%r9,%r12
256	adcq	%rbp,%r13
257	adcq	$0,%r8
258
259
260	movq	%rax,%rcx
261	mulq	0(%rsi)
262	addq	%rax,%r10
263	movq	%rcx,%rax
264	adcq	$0,%rdx
265	movq	%rdx,%rbp
266
267	mulq	8(%rsi)
268	addq	%rbp,%r11
269	adcq	$0,%rdx
270	addq	%rax,%r11
271	movq	%rcx,%rax
272	adcq	$0,%rdx
273	movq	%rdx,%rbp
274
275	mulq	16(%rsi)
276	addq	%rbp,%r12
277	adcq	$0,%rdx
278	addq	%rax,%r12
279	movq	%rcx,%rax
280	adcq	$0,%rdx
281
282	movq	%r10,%rcx
283	imulq	%r15,%r10
284
285	movq	%rdx,%rbp
286	mulq	24(%rsi)
287	addq	%rbp,%r13
288	adcq	$0,%rdx
289	xorq	%r9,%r9
290	addq	%rax,%r13
291	movq	%r10,%rax
292	adcq	%rdx,%r8
293	adcq	$0,%r9
294
295
296	mulq	0(%r14)
297	movq	%r10,%rbp
298	addq	%rax,%rcx
299	movq	%r10,%rax
300	adcq	%rdx,%rcx
301
302	subq	%r10,%r12
303	sbbq	$0,%r10
304
305	mulq	8(%r14)
306	addq	%rcx,%r11
307	adcq	$0,%rdx
308	addq	%rax,%r11
309	movq	%rbp,%rax
310	adcq	%rdx,%r12
311	movq	%rbp,%rdx
312	adcq	$0,%r10
313
314	shlq	$32,%rax
315	shrq	$32,%rdx
316	subq	%rax,%r13
317	movq	24(%rbx),%rax
318	sbbq	%rdx,%rbp
319
320	addq	%r10,%r13
321	adcq	%rbp,%r8
322	adcq	$0,%r9
323
324
325	movq	%rax,%rcx
326	mulq	0(%rsi)
327	addq	%rax,%r11
328	movq	%rcx,%rax
329	adcq	$0,%rdx
330	movq	%rdx,%rbp
331
332	mulq	8(%rsi)
333	addq	%rbp,%r12
334	adcq	$0,%rdx
335	addq	%rax,%r12
336	movq	%rcx,%rax
337	adcq	$0,%rdx
338	movq	%rdx,%rbp
339
340	mulq	16(%rsi)
341	addq	%rbp,%r13
342	adcq	$0,%rdx
343	addq	%rax,%r13
344	movq	%rcx,%rax
345	adcq	$0,%rdx
346
347	movq	%r11,%rcx
348	imulq	%r15,%r11
349
350	movq	%rdx,%rbp
351	mulq	24(%rsi)
352	addq	%rbp,%r8
353	adcq	$0,%rdx
354	xorq	%r10,%r10
355	addq	%rax,%r8
356	movq	%r11,%rax
357	adcq	%rdx,%r9
358	adcq	$0,%r10
359
360
361	mulq	0(%r14)
362	movq	%r11,%rbp
363	addq	%rax,%rcx
364	movq	%r11,%rax
365	adcq	%rdx,%rcx
366
367	subq	%r11,%r13
368	sbbq	$0,%r11
369
370	mulq	8(%r14)
371	addq	%rcx,%r12
372	adcq	$0,%rdx
373	addq	%rax,%r12
374	movq	%rbp,%rax
375	adcq	%rdx,%r13
376	movq	%rbp,%rdx
377	adcq	$0,%r11
378
379	shlq	$32,%rax
380	shrq	$32,%rdx
381	subq	%rax,%r8
382	sbbq	%rdx,%rbp
383
384	addq	%r11,%r8
385	adcq	%rbp,%r9
386	adcq	$0,%r10
387
388
389	movq	%r12,%rsi
390	subq	0(%r14),%r12
391	movq	%r13,%r11
392	sbbq	8(%r14),%r13
393	movq	%r8,%rcx
394	sbbq	16(%r14),%r8
395	movq	%r9,%rbp
396	sbbq	24(%r14),%r9
397	sbbq	$0,%r10
398
399	cmovcq	%rsi,%r12
400	cmovcq	%r11,%r13
401	cmovcq	%rcx,%r8
402	cmovcq	%rbp,%r9
403
404	movq	%r12,0(%rdi)
405	movq	%r13,8(%rdi)
406	movq	%r8,16(%rdi)
407	movq	%r9,24(%rdi)
408
409	movq	0(%rsp),%r15
410.cfi_restore	%r15
411	movq	8(%rsp),%r14
412.cfi_restore	%r14
413	movq	16(%rsp),%r13
414.cfi_restore	%r13
415	movq	24(%rsp),%r12
416.cfi_restore	%r12
417	movq	32(%rsp),%rbx
418.cfi_restore	%rbx
419	movq	40(%rsp),%rbp
420.cfi_restore	%rbp
421	leaq	48(%rsp),%rsp
422.cfi_adjust_cfa_offset	-48
423.Lord_mul_epilogue:
424	.byte	0xf3,0xc3
425.cfi_endproc
426.size	p256_scalar_mul_mont,.-p256_scalar_mul_mont
427
428
429
430
431
432
433
434.globl	p256_scalar_sqr_rep_mont
435.hidden p256_scalar_sqr_rep_mont
436.type	p256_scalar_sqr_rep_mont,@function
437.align	32
438p256_scalar_sqr_rep_mont:
439.cfi_startproc
440	leaq	OPENSSL_ia32cap_P(%rip),%rcx
441	movq	8(%rcx),%rcx
442	andl	$0x80100,%ecx
443	cmpl	$0x80100,%ecx
444	je	.Lecp_nistz256_ord_sqr_montx
445	pushq	%rbp
446.cfi_adjust_cfa_offset	8
447.cfi_offset	%rbp,-16
448	pushq	%rbx
449.cfi_adjust_cfa_offset	8
450.cfi_offset	%rbx,-24
451	pushq	%r12
452.cfi_adjust_cfa_offset	8
453.cfi_offset	%r12,-32
454	pushq	%r13
455.cfi_adjust_cfa_offset	8
456.cfi_offset	%r13,-40
457	pushq	%r14
458.cfi_adjust_cfa_offset	8
459.cfi_offset	%r14,-48
460	pushq	%r15
461.cfi_adjust_cfa_offset	8
462.cfi_offset	%r15,-56
463.Lord_sqr_body:
464
465	movq	0(%rsi),%r8
466	movq	8(%rsi),%rax
467	movq	16(%rsi),%r14
468	movq	24(%rsi),%r15
469	leaq	.Lord(%rip),%rsi
470	movq	%rdx,%rbx
471	jmp	.Loop_ord_sqr
472
473.align	32
474.Loop_ord_sqr:
475
476	movq	%rax,%rbp
477	mulq	%r8
478	movq	%rax,%r9
479.byte	102,72,15,110,205
480	movq	%r14,%rax
481	movq	%rdx,%r10
482
483	mulq	%r8
484	addq	%rax,%r10
485	movq	%r15,%rax
486.byte	102,73,15,110,214
487	adcq	$0,%rdx
488	movq	%rdx,%r11
489
490	mulq	%r8
491	addq	%rax,%r11
492	movq	%r15,%rax
493.byte	102,73,15,110,223
494	adcq	$0,%rdx
495	movq	%rdx,%r12
496
497
498	mulq	%r14
499	movq	%rax,%r13
500	movq	%r14,%rax
501	movq	%rdx,%r14
502
503
504	mulq	%rbp
505	addq	%rax,%r11
506	movq	%r15,%rax
507	adcq	$0,%rdx
508	movq	%rdx,%r15
509
510	mulq	%rbp
511	addq	%rax,%r12
512	adcq	$0,%rdx
513
514	addq	%r15,%r12
515	adcq	%rdx,%r13
516	adcq	$0,%r14
517
518
519	xorq	%r15,%r15
520	movq	%r8,%rax
521	addq	%r9,%r9
522	adcq	%r10,%r10
523	adcq	%r11,%r11
524	adcq	%r12,%r12
525	adcq	%r13,%r13
526	adcq	%r14,%r14
527	adcq	$0,%r15
528
529
530	mulq	%rax
531	movq	%rax,%r8
532.byte	102,72,15,126,200
533	movq	%rdx,%rbp
534
535	mulq	%rax
536	addq	%rbp,%r9
537	adcq	%rax,%r10
538.byte	102,72,15,126,208
539	adcq	$0,%rdx
540	movq	%rdx,%rbp
541
542	mulq	%rax
543	addq	%rbp,%r11
544	adcq	%rax,%r12
545.byte	102,72,15,126,216
546	adcq	$0,%rdx
547	movq	%rdx,%rbp
548
549	movq	%r8,%rcx
550	imulq	32(%rsi),%r8
551
552	mulq	%rax
553	addq	%rbp,%r13
554	adcq	%rax,%r14
555	movq	0(%rsi),%rax
556	adcq	%rdx,%r15
557
558
559	mulq	%r8
560	movq	%r8,%rbp
561	addq	%rax,%rcx
562	movq	8(%rsi),%rax
563	adcq	%rdx,%rcx
564
565	subq	%r8,%r10
566	sbbq	$0,%rbp
567
568	mulq	%r8
569	addq	%rcx,%r9
570	adcq	$0,%rdx
571	addq	%rax,%r9
572	movq	%r8,%rax
573	adcq	%rdx,%r10
574	movq	%r8,%rdx
575	adcq	$0,%rbp
576
577	movq	%r9,%rcx
578	imulq	32(%rsi),%r9
579
580	shlq	$32,%rax
581	shrq	$32,%rdx
582	subq	%rax,%r11
583	movq	0(%rsi),%rax
584	sbbq	%rdx,%r8
585
586	addq	%rbp,%r11
587	adcq	$0,%r8
588
589
590	mulq	%r9
591	movq	%r9,%rbp
592	addq	%rax,%rcx
593	movq	8(%rsi),%rax
594	adcq	%rdx,%rcx
595
596	subq	%r9,%r11
597	sbbq	$0,%rbp
598
599	mulq	%r9
600	addq	%rcx,%r10
601	adcq	$0,%rdx
602	addq	%rax,%r10
603	movq	%r9,%rax
604	adcq	%rdx,%r11
605	movq	%r9,%rdx
606	adcq	$0,%rbp
607
608	movq	%r10,%rcx
609	imulq	32(%rsi),%r10
610
611	shlq	$32,%rax
612	shrq	$32,%rdx
613	subq	%rax,%r8
614	movq	0(%rsi),%rax
615	sbbq	%rdx,%r9
616
617	addq	%rbp,%r8
618	adcq	$0,%r9
619
620
621	mulq	%r10
622	movq	%r10,%rbp
623	addq	%rax,%rcx
624	movq	8(%rsi),%rax
625	adcq	%rdx,%rcx
626
627	subq	%r10,%r8
628	sbbq	$0,%rbp
629
630	mulq	%r10
631	addq	%rcx,%r11
632	adcq	$0,%rdx
633	addq	%rax,%r11
634	movq	%r10,%rax
635	adcq	%rdx,%r8
636	movq	%r10,%rdx
637	adcq	$0,%rbp
638
639	movq	%r11,%rcx
640	imulq	32(%rsi),%r11
641
642	shlq	$32,%rax
643	shrq	$32,%rdx
644	subq	%rax,%r9
645	movq	0(%rsi),%rax
646	sbbq	%rdx,%r10
647
648	addq	%rbp,%r9
649	adcq	$0,%r10
650
651
652	mulq	%r11
653	movq	%r11,%rbp
654	addq	%rax,%rcx
655	movq	8(%rsi),%rax
656	adcq	%rdx,%rcx
657
658	subq	%r11,%r9
659	sbbq	$0,%rbp
660
661	mulq	%r11
662	addq	%rcx,%r8
663	adcq	$0,%rdx
664	addq	%rax,%r8
665	movq	%r11,%rax
666	adcq	%rdx,%r9
667	movq	%r11,%rdx
668	adcq	$0,%rbp
669
670	shlq	$32,%rax
671	shrq	$32,%rdx
672	subq	%rax,%r10
673	sbbq	%rdx,%r11
674
675	addq	%rbp,%r10
676	adcq	$0,%r11
677
678
679	xorq	%rdx,%rdx
680	addq	%r12,%r8
681	adcq	%r13,%r9
682	movq	%r8,%r12
683	adcq	%r14,%r10
684	adcq	%r15,%r11
685	movq	%r9,%rax
686	adcq	$0,%rdx
687
688
689	subq	0(%rsi),%r8
690	movq	%r10,%r14
691	sbbq	8(%rsi),%r9
692	sbbq	16(%rsi),%r10
693	movq	%r11,%r15
694	sbbq	24(%rsi),%r11
695	sbbq	$0,%rdx
696
697	cmovcq	%r12,%r8
698	cmovncq	%r9,%rax
699	cmovncq	%r10,%r14
700	cmovncq	%r11,%r15
701
702	decq	%rbx
703	jnz	.Loop_ord_sqr
704
705	movq	%r8,0(%rdi)
706	movq	%rax,8(%rdi)
707	pxor	%xmm1,%xmm1
708	movq	%r14,16(%rdi)
709	pxor	%xmm2,%xmm2
710	movq	%r15,24(%rdi)
711	pxor	%xmm3,%xmm3
712
713	movq	0(%rsp),%r15
714.cfi_restore	%r15
715	movq	8(%rsp),%r14
716.cfi_restore	%r14
717	movq	16(%rsp),%r13
718.cfi_restore	%r13
719	movq	24(%rsp),%r12
720.cfi_restore	%r12
721	movq	32(%rsp),%rbx
722.cfi_restore	%rbx
723	movq	40(%rsp),%rbp
724.cfi_restore	%rbp
725	leaq	48(%rsp),%rsp
726.cfi_adjust_cfa_offset	-48
727.Lord_sqr_epilogue:
728	.byte	0xf3,0xc3
729.cfi_endproc
730.size	p256_scalar_sqr_rep_mont,.-p256_scalar_sqr_rep_mont
731
732.type	ecp_nistz256_ord_mul_montx,@function
733.align	32
734ecp_nistz256_ord_mul_montx:
735.cfi_startproc
736.Lecp_nistz256_ord_mul_montx:
737	pushq	%rbp
738.cfi_adjust_cfa_offset	8
739.cfi_offset	%rbp,-16
740	pushq	%rbx
741.cfi_adjust_cfa_offset	8
742.cfi_offset	%rbx,-24
743	pushq	%r12
744.cfi_adjust_cfa_offset	8
745.cfi_offset	%r12,-32
746	pushq	%r13
747.cfi_adjust_cfa_offset	8
748.cfi_offset	%r13,-40
749	pushq	%r14
750.cfi_adjust_cfa_offset	8
751.cfi_offset	%r14,-48
752	pushq	%r15
753.cfi_adjust_cfa_offset	8
754.cfi_offset	%r15,-56
755.Lord_mulx_body:
756
757	movq	%rdx,%rbx
758	movq	0(%rdx),%rdx
759	movq	0(%rsi),%r9
760	movq	8(%rsi),%r10
761	movq	16(%rsi),%r11
762	movq	24(%rsi),%r12
763	leaq	-128(%rsi),%rsi
764	leaq	.Lord-128(%rip),%r14
765	movq	.LordK(%rip),%r15
766
767
768	mulxq	%r9,%r8,%r9
769	mulxq	%r10,%rcx,%r10
770	mulxq	%r11,%rbp,%r11
771	addq	%rcx,%r9
772	mulxq	%r12,%rcx,%r12
773	movq	%r8,%rdx
774	mulxq	%r15,%rdx,%rax
775	adcq	%rbp,%r10
776	adcq	%rcx,%r11
777	adcq	$0,%r12
778
779
780	xorq	%r13,%r13
781	mulxq	0+128(%r14),%rcx,%rbp
782	adcxq	%rcx,%r8
783	adoxq	%rbp,%r9
784
785	mulxq	8+128(%r14),%rcx,%rbp
786	adcxq	%rcx,%r9
787	adoxq	%rbp,%r10
788
789	mulxq	16+128(%r14),%rcx,%rbp
790	adcxq	%rcx,%r10
791	adoxq	%rbp,%r11
792
793	mulxq	24+128(%r14),%rcx,%rbp
794	movq	8(%rbx),%rdx
795	adcxq	%rcx,%r11
796	adoxq	%rbp,%r12
797	adcxq	%r8,%r12
798	adoxq	%r8,%r13
799	adcq	$0,%r13
800
801
802	mulxq	0+128(%rsi),%rcx,%rbp
803	adcxq	%rcx,%r9
804	adoxq	%rbp,%r10
805
806	mulxq	8+128(%rsi),%rcx,%rbp
807	adcxq	%rcx,%r10
808	adoxq	%rbp,%r11
809
810	mulxq	16+128(%rsi),%rcx,%rbp
811	adcxq	%rcx,%r11
812	adoxq	%rbp,%r12
813
814	mulxq	24+128(%rsi),%rcx,%rbp
815	movq	%r9,%rdx
816	mulxq	%r15,%rdx,%rax
817	adcxq	%rcx,%r12
818	adoxq	%rbp,%r13
819
820	adcxq	%r8,%r13
821	adoxq	%r8,%r8
822	adcq	$0,%r8
823
824
825	mulxq	0+128(%r14),%rcx,%rbp
826	adcxq	%rcx,%r9
827	adoxq	%rbp,%r10
828
829	mulxq	8+128(%r14),%rcx,%rbp
830	adcxq	%rcx,%r10
831	adoxq	%rbp,%r11
832
833	mulxq	16+128(%r14),%rcx,%rbp
834	adcxq	%rcx,%r11
835	adoxq	%rbp,%r12
836
837	mulxq	24+128(%r14),%rcx,%rbp
838	movq	16(%rbx),%rdx
839	adcxq	%rcx,%r12
840	adoxq	%rbp,%r13
841	adcxq	%r9,%r13
842	adoxq	%r9,%r8
843	adcq	$0,%r8
844
845
846	mulxq	0+128(%rsi),%rcx,%rbp
847	adcxq	%rcx,%r10
848	adoxq	%rbp,%r11
849
850	mulxq	8+128(%rsi),%rcx,%rbp
851	adcxq	%rcx,%r11
852	adoxq	%rbp,%r12
853
854	mulxq	16+128(%rsi),%rcx,%rbp
855	adcxq	%rcx,%r12
856	adoxq	%rbp,%r13
857
858	mulxq	24+128(%rsi),%rcx,%rbp
859	movq	%r10,%rdx
860	mulxq	%r15,%rdx,%rax
861	adcxq	%rcx,%r13
862	adoxq	%rbp,%r8
863
864	adcxq	%r9,%r8
865	adoxq	%r9,%r9
866	adcq	$0,%r9
867
868
869	mulxq	0+128(%r14),%rcx,%rbp
870	adcxq	%rcx,%r10
871	adoxq	%rbp,%r11
872
873	mulxq	8+128(%r14),%rcx,%rbp
874	adcxq	%rcx,%r11
875	adoxq	%rbp,%r12
876
877	mulxq	16+128(%r14),%rcx,%rbp
878	adcxq	%rcx,%r12
879	adoxq	%rbp,%r13
880
881	mulxq	24+128(%r14),%rcx,%rbp
882	movq	24(%rbx),%rdx
883	adcxq	%rcx,%r13
884	adoxq	%rbp,%r8
885	adcxq	%r10,%r8
886	adoxq	%r10,%r9
887	adcq	$0,%r9
888
889
890	mulxq	0+128(%rsi),%rcx,%rbp
891	adcxq	%rcx,%r11
892	adoxq	%rbp,%r12
893
894	mulxq	8+128(%rsi),%rcx,%rbp
895	adcxq	%rcx,%r12
896	adoxq	%rbp,%r13
897
898	mulxq	16+128(%rsi),%rcx,%rbp
899	adcxq	%rcx,%r13
900	adoxq	%rbp,%r8
901
902	mulxq	24+128(%rsi),%rcx,%rbp
903	movq	%r11,%rdx
904	mulxq	%r15,%rdx,%rax
905	adcxq	%rcx,%r8
906	adoxq	%rbp,%r9
907
908	adcxq	%r10,%r9
909	adoxq	%r10,%r10
910	adcq	$0,%r10
911
912
913	mulxq	0+128(%r14),%rcx,%rbp
914	adcxq	%rcx,%r11
915	adoxq	%rbp,%r12
916
917	mulxq	8+128(%r14),%rcx,%rbp
918	adcxq	%rcx,%r12
919	adoxq	%rbp,%r13
920
921	mulxq	16+128(%r14),%rcx,%rbp
922	adcxq	%rcx,%r13
923	adoxq	%rbp,%r8
924
925	mulxq	24+128(%r14),%rcx,%rbp
926	leaq	128(%r14),%r14
927	movq	%r12,%rbx
928	adcxq	%rcx,%r8
929	adoxq	%rbp,%r9
930	movq	%r13,%rdx
931	adcxq	%r11,%r9
932	adoxq	%r11,%r10
933	adcq	$0,%r10
934
935
936
937	movq	%r8,%rcx
938	subq	0(%r14),%r12
939	sbbq	8(%r14),%r13
940	sbbq	16(%r14),%r8
941	movq	%r9,%rbp
942	sbbq	24(%r14),%r9
943	sbbq	$0,%r10
944
945	cmovcq	%rbx,%r12
946	cmovcq	%rdx,%r13
947	cmovcq	%rcx,%r8
948	cmovcq	%rbp,%r9
949
950	movq	%r12,0(%rdi)
951	movq	%r13,8(%rdi)
952	movq	%r8,16(%rdi)
953	movq	%r9,24(%rdi)
954
955	movq	0(%rsp),%r15
956.cfi_restore	%r15
957	movq	8(%rsp),%r14
958.cfi_restore	%r14
959	movq	16(%rsp),%r13
960.cfi_restore	%r13
961	movq	24(%rsp),%r12
962.cfi_restore	%r12
963	movq	32(%rsp),%rbx
964.cfi_restore	%rbx
965	movq	40(%rsp),%rbp
966.cfi_restore	%rbp
967	leaq	48(%rsp),%rsp
968.cfi_adjust_cfa_offset	-48
969.Lord_mulx_epilogue:
970	.byte	0xf3,0xc3
971.cfi_endproc
972.size	ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx
973
974.type	ecp_nistz256_ord_sqr_montx,@function
975.align	32
976ecp_nistz256_ord_sqr_montx:
977.cfi_startproc
978.Lecp_nistz256_ord_sqr_montx:
979	pushq	%rbp
980.cfi_adjust_cfa_offset	8
981.cfi_offset	%rbp,-16
982	pushq	%rbx
983.cfi_adjust_cfa_offset	8
984.cfi_offset	%rbx,-24
985	pushq	%r12
986.cfi_adjust_cfa_offset	8
987.cfi_offset	%r12,-32
988	pushq	%r13
989.cfi_adjust_cfa_offset	8
990.cfi_offset	%r13,-40
991	pushq	%r14
992.cfi_adjust_cfa_offset	8
993.cfi_offset	%r14,-48
994	pushq	%r15
995.cfi_adjust_cfa_offset	8
996.cfi_offset	%r15,-56
997.Lord_sqrx_body:
998
999	movq	%rdx,%rbx
1000	movq	0(%rsi),%rdx
1001	movq	8(%rsi),%r14
1002	movq	16(%rsi),%r15
1003	movq	24(%rsi),%r8
1004	leaq	.Lord(%rip),%rsi
1005	jmp	.Loop_ord_sqrx
1006
1007.align	32
1008.Loop_ord_sqrx:
1009	mulxq	%r14,%r9,%r10
1010	mulxq	%r15,%rcx,%r11
1011	movq	%rdx,%rax
1012.byte	102,73,15,110,206
1013	mulxq	%r8,%rbp,%r12
1014	movq	%r14,%rdx
1015	addq	%rcx,%r10
1016.byte	102,73,15,110,215
1017	adcq	%rbp,%r11
1018	adcq	$0,%r12
1019	xorq	%r13,%r13
1020
1021	mulxq	%r15,%rcx,%rbp
1022	adcxq	%rcx,%r11
1023	adoxq	%rbp,%r12
1024
1025	mulxq	%r8,%rcx,%rbp
1026	movq	%r15,%rdx
1027	adcxq	%rcx,%r12
1028	adoxq	%rbp,%r13
1029	adcq	$0,%r13
1030
1031	mulxq	%r8,%rcx,%r14
1032	movq	%rax,%rdx
1033.byte	102,73,15,110,216
1034	xorq	%r15,%r15
1035	adcxq	%r9,%r9
1036	adoxq	%rcx,%r13
1037	adcxq	%r10,%r10
1038	adoxq	%r15,%r14
1039
1040
1041	mulxq	%rdx,%r8,%rbp
1042.byte	102,72,15,126,202
1043	adcxq	%r11,%r11
1044	adoxq	%rbp,%r9
1045	adcxq	%r12,%r12
1046	mulxq	%rdx,%rcx,%rax
1047.byte	102,72,15,126,210
1048	adcxq	%r13,%r13
1049	adoxq	%rcx,%r10
1050	adcxq	%r14,%r14
1051	mulxq	%rdx,%rcx,%rbp
1052.byte	0x67
1053.byte	102,72,15,126,218
1054	adoxq	%rax,%r11
1055	adcxq	%r15,%r15
1056	adoxq	%rcx,%r12
1057	adoxq	%rbp,%r13
1058	mulxq	%rdx,%rcx,%rax
1059	adoxq	%rcx,%r14
1060	adoxq	%rax,%r15
1061
1062
1063	movq	%r8,%rdx
1064	mulxq	32(%rsi),%rdx,%rcx
1065
1066	xorq	%rax,%rax
1067	mulxq	0(%rsi),%rcx,%rbp
1068	adcxq	%rcx,%r8
1069	adoxq	%rbp,%r9
1070	mulxq	8(%rsi),%rcx,%rbp
1071	adcxq	%rcx,%r9
1072	adoxq	%rbp,%r10
1073	mulxq	16(%rsi),%rcx,%rbp
1074	adcxq	%rcx,%r10
1075	adoxq	%rbp,%r11
1076	mulxq	24(%rsi),%rcx,%rbp
1077	adcxq	%rcx,%r11
1078	adoxq	%rbp,%r8
1079	adcxq	%rax,%r8
1080
1081
1082	movq	%r9,%rdx
1083	mulxq	32(%rsi),%rdx,%rcx
1084
1085	mulxq	0(%rsi),%rcx,%rbp
1086	adoxq	%rcx,%r9
1087	adcxq	%rbp,%r10
1088	mulxq	8(%rsi),%rcx,%rbp
1089	adoxq	%rcx,%r10
1090	adcxq	%rbp,%r11
1091	mulxq	16(%rsi),%rcx,%rbp
1092	adoxq	%rcx,%r11
1093	adcxq	%rbp,%r8
1094	mulxq	24(%rsi),%rcx,%rbp
1095	adoxq	%rcx,%r8
1096	adcxq	%rbp,%r9
1097	adoxq	%rax,%r9
1098
1099
1100	movq	%r10,%rdx
1101	mulxq	32(%rsi),%rdx,%rcx
1102
1103	mulxq	0(%rsi),%rcx,%rbp
1104	adcxq	%rcx,%r10
1105	adoxq	%rbp,%r11
1106	mulxq	8(%rsi),%rcx,%rbp
1107	adcxq	%rcx,%r11
1108	adoxq	%rbp,%r8
1109	mulxq	16(%rsi),%rcx,%rbp
1110	adcxq	%rcx,%r8
1111	adoxq	%rbp,%r9
1112	mulxq	24(%rsi),%rcx,%rbp
1113	adcxq	%rcx,%r9
1114	adoxq	%rbp,%r10
1115	adcxq	%rax,%r10
1116
1117
1118	movq	%r11,%rdx
1119	mulxq	32(%rsi),%rdx,%rcx
1120
1121	mulxq	0(%rsi),%rcx,%rbp
1122	adoxq	%rcx,%r11
1123	adcxq	%rbp,%r8
1124	mulxq	8(%rsi),%rcx,%rbp
1125	adoxq	%rcx,%r8
1126	adcxq	%rbp,%r9
1127	mulxq	16(%rsi),%rcx,%rbp
1128	adoxq	%rcx,%r9
1129	adcxq	%rbp,%r10
1130	mulxq	24(%rsi),%rcx,%rbp
1131	adoxq	%rcx,%r10
1132	adcxq	%rbp,%r11
1133	adoxq	%rax,%r11
1134
1135
1136	addq	%r8,%r12
1137	adcq	%r13,%r9
1138	movq	%r12,%rdx
1139	adcq	%r14,%r10
1140	adcq	%r15,%r11
1141	movq	%r9,%r14
1142	adcq	$0,%rax
1143
1144
1145	subq	0(%rsi),%r12
1146	movq	%r10,%r15
1147	sbbq	8(%rsi),%r9
1148	sbbq	16(%rsi),%r10
1149	movq	%r11,%r8
1150	sbbq	24(%rsi),%r11
1151	sbbq	$0,%rax
1152
1153	cmovncq	%r12,%rdx
1154	cmovncq	%r9,%r14
1155	cmovncq	%r10,%r15
1156	cmovncq	%r11,%r8
1157
1158	decq	%rbx
1159	jnz	.Loop_ord_sqrx
1160
1161	movq	%rdx,0(%rdi)
1162	movq	%r14,8(%rdi)
1163	pxor	%xmm1,%xmm1
1164	movq	%r15,16(%rdi)
1165	pxor	%xmm2,%xmm2
1166	movq	%r8,24(%rdi)
1167	pxor	%xmm3,%xmm3
1168
1169	movq	0(%rsp),%r15
1170.cfi_restore	%r15
1171	movq	8(%rsp),%r14
1172.cfi_restore	%r14
1173	movq	16(%rsp),%r13
1174.cfi_restore	%r13
1175	movq	24(%rsp),%r12
1176.cfi_restore	%r12
1177	movq	32(%rsp),%rbx
1178.cfi_restore	%rbx
1179	movq	40(%rsp),%rbp
1180.cfi_restore	%rbp
1181	leaq	48(%rsp),%rsp
1182.cfi_adjust_cfa_offset	-48
1183.Lord_sqrx_epilogue:
1184	.byte	0xf3,0xc3
1185.cfi_endproc
1186.size	ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx
1187
1188
1189
1190
1191
1192
1193.globl	p256_mul_mont
1194.hidden p256_mul_mont
1195.type	p256_mul_mont,@function
1196.align	32
1197p256_mul_mont:
1198.cfi_startproc
1199	leaq	OPENSSL_ia32cap_P(%rip),%rcx
1200	movq	8(%rcx),%rcx
1201	andl	$0x80100,%ecx
1202.Lmul_mont:
1203	pushq	%rbp
1204.cfi_adjust_cfa_offset	8
1205.cfi_offset	%rbp,-16
1206	pushq	%rbx
1207.cfi_adjust_cfa_offset	8
1208.cfi_offset	%rbx,-24
1209	pushq	%r12
1210.cfi_adjust_cfa_offset	8
1211.cfi_offset	%r12,-32
1212	pushq	%r13
1213.cfi_adjust_cfa_offset	8
1214.cfi_offset	%r13,-40
1215	pushq	%r14
1216.cfi_adjust_cfa_offset	8
1217.cfi_offset	%r14,-48
1218	pushq	%r15
1219.cfi_adjust_cfa_offset	8
1220.cfi_offset	%r15,-56
1221.Lmul_body:
1222	cmpl	$0x80100,%ecx
1223	je	.Lmul_montx
1224	movq	%rdx,%rbx
1225	movq	0(%rdx),%rax
1226	movq	0(%rsi),%r9
1227	movq	8(%rsi),%r10
1228	movq	16(%rsi),%r11
1229	movq	24(%rsi),%r12
1230
1231	call	__ecp_nistz256_mul_montq
1232	jmp	.Lmul_mont_done
1233
1234.align	32
1235.Lmul_montx:
1236	movq	%rdx,%rbx
1237	movq	0(%rdx),%rdx
1238	movq	0(%rsi),%r9
1239	movq	8(%rsi),%r10
1240	movq	16(%rsi),%r11
1241	movq	24(%rsi),%r12
1242	leaq	-128(%rsi),%rsi
1243
1244	call	__ecp_nistz256_mul_montx
1245.Lmul_mont_done:
1246	movq	0(%rsp),%r15
1247.cfi_restore	%r15
1248	movq	8(%rsp),%r14
1249.cfi_restore	%r14
1250	movq	16(%rsp),%r13
1251.cfi_restore	%r13
1252	movq	24(%rsp),%r12
1253.cfi_restore	%r12
1254	movq	32(%rsp),%rbx
1255.cfi_restore	%rbx
1256	movq	40(%rsp),%rbp
1257.cfi_restore	%rbp
1258	leaq	48(%rsp),%rsp
1259.cfi_adjust_cfa_offset	-48
1260.Lmul_epilogue:
1261	.byte	0xf3,0xc3
1262.cfi_endproc
1263.size	p256_mul_mont,.-p256_mul_mont
1264
1265.type	__ecp_nistz256_mul_montq,@function
1266.align	32
1267__ecp_nistz256_mul_montq:
1268.cfi_startproc
1269
1270
1271	movq	%rax,%rbp
1272	mulq	%r9
1273	movq	.Lpoly+8(%rip),%r14
1274	movq	%rax,%r8
1275	movq	%rbp,%rax
1276	movq	%rdx,%r9
1277
1278	mulq	%r10
1279	movq	.Lpoly+24(%rip),%r15
1280	addq	%rax,%r9
1281	movq	%rbp,%rax
1282	adcq	$0,%rdx
1283	movq	%rdx,%r10
1284
1285	mulq	%r11
1286	addq	%rax,%r10
1287	movq	%rbp,%rax
1288	adcq	$0,%rdx
1289	movq	%rdx,%r11
1290
1291	mulq	%r12
1292	addq	%rax,%r11
1293	movq	%r8,%rax
1294	adcq	$0,%rdx
1295	xorq	%r13,%r13
1296	movq	%rdx,%r12
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307	movq	%r8,%rbp
1308	shlq	$32,%r8
1309	mulq	%r15
1310	shrq	$32,%rbp
1311	addq	%r8,%r9
1312	adcq	%rbp,%r10
1313	adcq	%rax,%r11
1314	movq	8(%rbx),%rax
1315	adcq	%rdx,%r12
1316	adcq	$0,%r13
1317	xorq	%r8,%r8
1318
1319
1320
1321	movq	%rax,%rbp
1322	mulq	0(%rsi)
1323	addq	%rax,%r9
1324	movq	%rbp,%rax
1325	adcq	$0,%rdx
1326	movq	%rdx,%rcx
1327
1328	mulq	8(%rsi)
1329	addq	%rcx,%r10
1330	adcq	$0,%rdx
1331	addq	%rax,%r10
1332	movq	%rbp,%rax
1333	adcq	$0,%rdx
1334	movq	%rdx,%rcx
1335
1336	mulq	16(%rsi)
1337	addq	%rcx,%r11
1338	adcq	$0,%rdx
1339	addq	%rax,%r11
1340	movq	%rbp,%rax
1341	adcq	$0,%rdx
1342	movq	%rdx,%rcx
1343
1344	mulq	24(%rsi)
1345	addq	%rcx,%r12
1346	adcq	$0,%rdx
1347	addq	%rax,%r12
1348	movq	%r9,%rax
1349	adcq	%rdx,%r13
1350	adcq	$0,%r8
1351
1352
1353
1354	movq	%r9,%rbp
1355	shlq	$32,%r9
1356	mulq	%r15
1357	shrq	$32,%rbp
1358	addq	%r9,%r10
1359	adcq	%rbp,%r11
1360	adcq	%rax,%r12
1361	movq	16(%rbx),%rax
1362	adcq	%rdx,%r13
1363	adcq	$0,%r8
1364	xorq	%r9,%r9
1365
1366
1367
1368	movq	%rax,%rbp
1369	mulq	0(%rsi)
1370	addq	%rax,%r10
1371	movq	%rbp,%rax
1372	adcq	$0,%rdx
1373	movq	%rdx,%rcx
1374
1375	mulq	8(%rsi)
1376	addq	%rcx,%r11
1377	adcq	$0,%rdx
1378	addq	%rax,%r11
1379	movq	%rbp,%rax
1380	adcq	$0,%rdx
1381	movq	%rdx,%rcx
1382
1383	mulq	16(%rsi)
1384	addq	%rcx,%r12
1385	adcq	$0,%rdx
1386	addq	%rax,%r12
1387	movq	%rbp,%rax
1388	adcq	$0,%rdx
1389	movq	%rdx,%rcx
1390
1391	mulq	24(%rsi)
1392	addq	%rcx,%r13
1393	adcq	$0,%rdx
1394	addq	%rax,%r13
1395	movq	%r10,%rax
1396	adcq	%rdx,%r8
1397	adcq	$0,%r9
1398
1399
1400
1401	movq	%r10,%rbp
1402	shlq	$32,%r10
1403	mulq	%r15
1404	shrq	$32,%rbp
1405	addq	%r10,%r11
1406	adcq	%rbp,%r12
1407	adcq	%rax,%r13
1408	movq	24(%rbx),%rax
1409	adcq	%rdx,%r8
1410	adcq	$0,%r9
1411	xorq	%r10,%r10
1412
1413
1414
1415	movq	%rax,%rbp
1416	mulq	0(%rsi)
1417	addq	%rax,%r11
1418	movq	%rbp,%rax
1419	adcq	$0,%rdx
1420	movq	%rdx,%rcx
1421
1422	mulq	8(%rsi)
1423	addq	%rcx,%r12
1424	adcq	$0,%rdx
1425	addq	%rax,%r12
1426	movq	%rbp,%rax
1427	adcq	$0,%rdx
1428	movq	%rdx,%rcx
1429
1430	mulq	16(%rsi)
1431	addq	%rcx,%r13
1432	adcq	$0,%rdx
1433	addq	%rax,%r13
1434	movq	%rbp,%rax
1435	adcq	$0,%rdx
1436	movq	%rdx,%rcx
1437
1438	mulq	24(%rsi)
1439	addq	%rcx,%r8
1440	adcq	$0,%rdx
1441	addq	%rax,%r8
1442	movq	%r11,%rax
1443	adcq	%rdx,%r9
1444	adcq	$0,%r10
1445
1446
1447
1448	movq	%r11,%rbp
1449	shlq	$32,%r11
1450	mulq	%r15
1451	shrq	$32,%rbp
1452	addq	%r11,%r12
1453	adcq	%rbp,%r13
1454	movq	%r12,%rcx
1455	adcq	%rax,%r8
1456	adcq	%rdx,%r9
1457	movq	%r13,%rbp
1458	adcq	$0,%r10
1459
1460
1461
1462	subq	$-1,%r12
1463	movq	%r8,%rbx
1464	sbbq	%r14,%r13
1465	sbbq	$0,%r8
1466	movq	%r9,%rdx
1467	sbbq	%r15,%r9
1468	sbbq	$0,%r10
1469
1470	cmovcq	%rcx,%r12
1471	cmovcq	%rbp,%r13
1472	movq	%r12,0(%rdi)
1473	cmovcq	%rbx,%r8
1474	movq	%r13,8(%rdi)
1475	cmovcq	%rdx,%r9
1476	movq	%r8,16(%rdi)
1477	movq	%r9,24(%rdi)
1478
1479	.byte	0xf3,0xc3
1480.cfi_endproc
1481.size	__ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
1482
1483
1484
1485
1486
1487
1488
1489
1490.globl	p256_sqr_mont
1491.hidden p256_sqr_mont
1492.type	p256_sqr_mont,@function
1493.align	32
1494p256_sqr_mont:
1495.cfi_startproc
1496	leaq	OPENSSL_ia32cap_P(%rip),%rcx
1497	movq	8(%rcx),%rcx
1498	andl	$0x80100,%ecx
1499	pushq	%rbp
1500.cfi_adjust_cfa_offset	8
1501.cfi_offset	%rbp,-16
1502	pushq	%rbx
1503.cfi_adjust_cfa_offset	8
1504.cfi_offset	%rbx,-24
1505	pushq	%r12
1506.cfi_adjust_cfa_offset	8
1507.cfi_offset	%r12,-32
1508	pushq	%r13
1509.cfi_adjust_cfa_offset	8
1510.cfi_offset	%r13,-40
1511	pushq	%r14
1512.cfi_adjust_cfa_offset	8
1513.cfi_offset	%r14,-48
1514	pushq	%r15
1515.cfi_adjust_cfa_offset	8
1516.cfi_offset	%r15,-56
1517.Lsqr_body:
1518	cmpl	$0x80100,%ecx
1519	je	.Lsqr_montx
1520	movq	0(%rsi),%rax
1521	movq	8(%rsi),%r14
1522	movq	16(%rsi),%r15
1523	movq	24(%rsi),%r8
1524
1525	call	__ecp_nistz256_sqr_montq
1526	jmp	.Lsqr_mont_done
1527
1528.align	32
1529.Lsqr_montx:
1530	movq	0(%rsi),%rdx
1531	movq	8(%rsi),%r14
1532	movq	16(%rsi),%r15
1533	movq	24(%rsi),%r8
1534	leaq	-128(%rsi),%rsi
1535
1536	call	__ecp_nistz256_sqr_montx
1537.Lsqr_mont_done:
1538	movq	0(%rsp),%r15
1539.cfi_restore	%r15
1540	movq	8(%rsp),%r14
1541.cfi_restore	%r14
1542	movq	16(%rsp),%r13
1543.cfi_restore	%r13
1544	movq	24(%rsp),%r12
1545.cfi_restore	%r12
1546	movq	32(%rsp),%rbx
1547.cfi_restore	%rbx
1548	movq	40(%rsp),%rbp
1549.cfi_restore	%rbp
1550	leaq	48(%rsp),%rsp
1551.cfi_adjust_cfa_offset	-48
1552.Lsqr_epilogue:
1553	.byte	0xf3,0xc3
1554.cfi_endproc
1555.size	p256_sqr_mont,.-p256_sqr_mont
1556
1557.type	__ecp_nistz256_sqr_montq,@function
1558.align	32
1559__ecp_nistz256_sqr_montq:
1560.cfi_startproc
1561	movq	%rax,%r13
1562	mulq	%r14
1563	movq	%rax,%r9
1564	movq	%r15,%rax
1565	movq	%rdx,%r10
1566
1567	mulq	%r13
1568	addq	%rax,%r10
1569	movq	%r8,%rax
1570	adcq	$0,%rdx
1571	movq	%rdx,%r11
1572
1573	mulq	%r13
1574	addq	%rax,%r11
1575	movq	%r15,%rax
1576	adcq	$0,%rdx
1577	movq	%rdx,%r12
1578
1579
1580	mulq	%r14
1581	addq	%rax,%r11
1582	movq	%r8,%rax
1583	adcq	$0,%rdx
1584	movq	%rdx,%rbp
1585
1586	mulq	%r14
1587	addq	%rax,%r12
1588	movq	%r8,%rax
1589	adcq	$0,%rdx
1590	addq	%rbp,%r12
1591	movq	%rdx,%r13
1592	adcq	$0,%r13
1593
1594
1595	mulq	%r15
1596	xorq	%r15,%r15
1597	addq	%rax,%r13
1598	movq	0(%rsi),%rax
1599	movq	%rdx,%r14
1600	adcq	$0,%r14
1601
1602	addq	%r9,%r9
1603	adcq	%r10,%r10
1604	adcq	%r11,%r11
1605	adcq	%r12,%r12
1606	adcq	%r13,%r13
1607	adcq	%r14,%r14
1608	adcq	$0,%r15
1609
1610	mulq	%rax
1611	movq	%rax,%r8
1612	movq	8(%rsi),%rax
1613	movq	%rdx,%rcx
1614
1615	mulq	%rax
1616	addq	%rcx,%r9
1617	adcq	%rax,%r10
1618	movq	16(%rsi),%rax
1619	adcq	$0,%rdx
1620	movq	%rdx,%rcx
1621
1622	mulq	%rax
1623	addq	%rcx,%r11
1624	adcq	%rax,%r12
1625	movq	24(%rsi),%rax
1626	adcq	$0,%rdx
1627	movq	%rdx,%rcx
1628
1629	mulq	%rax
1630	addq	%rcx,%r13
1631	adcq	%rax,%r14
1632	movq	%r8,%rax
1633	adcq	%rdx,%r15
1634
1635	movq	.Lpoly+8(%rip),%rsi
1636	movq	.Lpoly+24(%rip),%rbp
1637
1638
1639
1640
1641	movq	%r8,%rcx
1642	shlq	$32,%r8
1643	mulq	%rbp
1644	shrq	$32,%rcx
1645	addq	%r8,%r9
1646	adcq	%rcx,%r10
1647	adcq	%rax,%r11
1648	movq	%r9,%rax
1649	adcq	$0,%rdx
1650
1651
1652
1653	movq	%r9,%rcx
1654	shlq	$32,%r9
1655	movq	%rdx,%r8
1656	mulq	%rbp
1657	shrq	$32,%rcx
1658	addq	%r9,%r10
1659	adcq	%rcx,%r11
1660	adcq	%rax,%r8
1661	movq	%r10,%rax
1662	adcq	$0,%rdx
1663
1664
1665
1666	movq	%r10,%rcx
1667	shlq	$32,%r10
1668	movq	%rdx,%r9
1669	mulq	%rbp
1670	shrq	$32,%rcx
1671	addq	%r10,%r11
1672	adcq	%rcx,%r8
1673	adcq	%rax,%r9
1674	movq	%r11,%rax
1675	adcq	$0,%rdx
1676
1677
1678
1679	movq	%r11,%rcx
1680	shlq	$32,%r11
1681	movq	%rdx,%r10
1682	mulq	%rbp
1683	shrq	$32,%rcx
1684	addq	%r11,%r8
1685	adcq	%rcx,%r9
1686	adcq	%rax,%r10
1687	adcq	$0,%rdx
1688	xorq	%r11,%r11
1689
1690
1691
1692	addq	%r8,%r12
1693	adcq	%r9,%r13
1694	movq	%r12,%r8
1695	adcq	%r10,%r14
1696	adcq	%rdx,%r15
1697	movq	%r13,%r9
1698	adcq	$0,%r11
1699
1700	subq	$-1,%r12
1701	movq	%r14,%r10
1702	sbbq	%rsi,%r13
1703	sbbq	$0,%r14
1704	movq	%r15,%rcx
1705	sbbq	%rbp,%r15
1706	sbbq	$0,%r11
1707
1708	cmovcq	%r8,%r12
1709	cmovcq	%r9,%r13
1710	movq	%r12,0(%rdi)
1711	cmovcq	%r10,%r14
1712	movq	%r13,8(%rdi)
1713	cmovcq	%rcx,%r15
1714	movq	%r14,16(%rdi)
1715	movq	%r15,24(%rdi)
1716
1717	.byte	0xf3,0xc3
1718.cfi_endproc
1719.size	__ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
1720.type	__ecp_nistz256_mul_montx,@function
1721.align	32
1722__ecp_nistz256_mul_montx:
1723.cfi_startproc
1724
1725
1726	mulxq	%r9,%r8,%r9
1727	mulxq	%r10,%rcx,%r10
1728	movq	$32,%r14
1729	xorq	%r13,%r13
1730	mulxq	%r11,%rbp,%r11
1731	movq	.Lpoly+24(%rip),%r15
1732	adcq	%rcx,%r9
1733	mulxq	%r12,%rcx,%r12
1734	movq	%r8,%rdx
1735	adcq	%rbp,%r10
1736	shlxq	%r14,%r8,%rbp
1737	adcq	%rcx,%r11
1738	shrxq	%r14,%r8,%rcx
1739	adcq	$0,%r12
1740
1741
1742
1743	addq	%rbp,%r9
1744	adcq	%rcx,%r10
1745
1746	mulxq	%r15,%rcx,%rbp
1747	movq	8(%rbx),%rdx
1748	adcq	%rcx,%r11
1749	adcq	%rbp,%r12
1750	adcq	$0,%r13
1751	xorq	%r8,%r8
1752
1753
1754
1755	mulxq	0+128(%rsi),%rcx,%rbp
1756	adcxq	%rcx,%r9
1757	adoxq	%rbp,%r10
1758
1759	mulxq	8+128(%rsi),%rcx,%rbp
1760	adcxq	%rcx,%r10
1761	adoxq	%rbp,%r11
1762
1763	mulxq	16+128(%rsi),%rcx,%rbp
1764	adcxq	%rcx,%r11
1765	adoxq	%rbp,%r12
1766
1767	mulxq	24+128(%rsi),%rcx,%rbp
1768	movq	%r9,%rdx
1769	adcxq	%rcx,%r12
1770	shlxq	%r14,%r9,%rcx
1771	adoxq	%rbp,%r13
1772	shrxq	%r14,%r9,%rbp
1773
1774	adcxq	%r8,%r13
1775	adoxq	%r8,%r8
1776	adcq	$0,%r8
1777
1778
1779
1780	addq	%rcx,%r10
1781	adcq	%rbp,%r11
1782
1783	mulxq	%r15,%rcx,%rbp
1784	movq	16(%rbx),%rdx
1785	adcq	%rcx,%r12
1786	adcq	%rbp,%r13
1787	adcq	$0,%r8
1788	xorq	%r9,%r9
1789
1790
1791
1792	mulxq	0+128(%rsi),%rcx,%rbp
1793	adcxq	%rcx,%r10
1794	adoxq	%rbp,%r11
1795
1796	mulxq	8+128(%rsi),%rcx,%rbp
1797	adcxq	%rcx,%r11
1798	adoxq	%rbp,%r12
1799
1800	mulxq	16+128(%rsi),%rcx,%rbp
1801	adcxq	%rcx,%r12
1802	adoxq	%rbp,%r13
1803
1804	mulxq	24+128(%rsi),%rcx,%rbp
1805	movq	%r10,%rdx
1806	adcxq	%rcx,%r13
1807	shlxq	%r14,%r10,%rcx
1808	adoxq	%rbp,%r8
1809	shrxq	%r14,%r10,%rbp
1810
1811	adcxq	%r9,%r8
1812	adoxq	%r9,%r9
1813	adcq	$0,%r9
1814
1815
1816
1817	addq	%rcx,%r11
1818	adcq	%rbp,%r12
1819
1820	mulxq	%r15,%rcx,%rbp
1821	movq	24(%rbx),%rdx
1822	adcq	%rcx,%r13
1823	adcq	%rbp,%r8
1824	adcq	$0,%r9
1825	xorq	%r10,%r10
1826
1827
1828
1829	mulxq	0+128(%rsi),%rcx,%rbp
1830	adcxq	%rcx,%r11
1831	adoxq	%rbp,%r12
1832
1833	mulxq	8+128(%rsi),%rcx,%rbp
1834	adcxq	%rcx,%r12
1835	adoxq	%rbp,%r13
1836
1837	mulxq	16+128(%rsi),%rcx,%rbp
1838	adcxq	%rcx,%r13
1839	adoxq	%rbp,%r8
1840
1841	mulxq	24+128(%rsi),%rcx,%rbp
1842	movq	%r11,%rdx
1843	adcxq	%rcx,%r8
1844	shlxq	%r14,%r11,%rcx
1845	adoxq	%rbp,%r9
1846	shrxq	%r14,%r11,%rbp
1847
1848	adcxq	%r10,%r9
1849	adoxq	%r10,%r10
1850	adcq	$0,%r10
1851
1852
1853
1854	addq	%rcx,%r12
1855	adcq	%rbp,%r13
1856
1857	mulxq	%r15,%rcx,%rbp
1858	movq	%r12,%rbx
1859	movq	.Lpoly+8(%rip),%r14
1860	adcq	%rcx,%r8
1861	movq	%r13,%rdx
1862	adcq	%rbp,%r9
1863	adcq	$0,%r10
1864
1865
1866
1867	xorl	%eax,%eax
1868	movq	%r8,%rcx
1869	sbbq	$-1,%r12
1870	sbbq	%r14,%r13
1871	sbbq	$0,%r8
1872	movq	%r9,%rbp
1873	sbbq	%r15,%r9
1874	sbbq	$0,%r10
1875
1876	cmovcq	%rbx,%r12
1877	cmovcq	%rdx,%r13
1878	movq	%r12,0(%rdi)
1879	cmovcq	%rcx,%r8
1880	movq	%r13,8(%rdi)
1881	cmovcq	%rbp,%r9
1882	movq	%r8,16(%rdi)
1883	movq	%r9,24(%rdi)
1884
1885	.byte	0xf3,0xc3
1886.cfi_endproc
1887.size	__ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
1888
1889.type	__ecp_nistz256_sqr_montx,@function
1890.align	32
1891__ecp_nistz256_sqr_montx:
1892.cfi_startproc
1893	mulxq	%r14,%r9,%r10
1894	mulxq	%r15,%rcx,%r11
1895	xorl	%eax,%eax
1896	adcq	%rcx,%r10
1897	mulxq	%r8,%rbp,%r12
1898	movq	%r14,%rdx
1899	adcq	%rbp,%r11
1900	adcq	$0,%r12
1901	xorq	%r13,%r13
1902
1903
1904	mulxq	%r15,%rcx,%rbp
1905	adcxq	%rcx,%r11
1906	adoxq	%rbp,%r12
1907
1908	mulxq	%r8,%rcx,%rbp
1909	movq	%r15,%rdx
1910	adcxq	%rcx,%r12
1911	adoxq	%rbp,%r13
1912	adcq	$0,%r13
1913
1914
1915	mulxq	%r8,%rcx,%r14
1916	movq	0+128(%rsi),%rdx
1917	xorq	%r15,%r15
1918	adcxq	%r9,%r9
1919	adoxq	%rcx,%r13
1920	adcxq	%r10,%r10
1921	adoxq	%r15,%r14
1922
1923	mulxq	%rdx,%r8,%rbp
1924	movq	8+128(%rsi),%rdx
1925	adcxq	%r11,%r11
1926	adoxq	%rbp,%r9
1927	adcxq	%r12,%r12
1928	mulxq	%rdx,%rcx,%rax
1929	movq	16+128(%rsi),%rdx
1930	adcxq	%r13,%r13
1931	adoxq	%rcx,%r10
1932	adcxq	%r14,%r14
1933.byte	0x67
1934	mulxq	%rdx,%rcx,%rbp
1935	movq	24+128(%rsi),%rdx
1936	adoxq	%rax,%r11
1937	adcxq	%r15,%r15
1938	adoxq	%rcx,%r12
1939	movq	$32,%rsi
1940	adoxq	%rbp,%r13
1941.byte	0x67,0x67
1942	mulxq	%rdx,%rcx,%rax
1943	movq	.Lpoly+24(%rip),%rdx
1944	adoxq	%rcx,%r14
1945	shlxq	%rsi,%r8,%rcx
1946	adoxq	%rax,%r15
1947	shrxq	%rsi,%r8,%rax
1948	movq	%rdx,%rbp
1949
1950
1951	addq	%rcx,%r9
1952	adcq	%rax,%r10
1953
1954	mulxq	%r8,%rcx,%r8
1955	adcq	%rcx,%r11
1956	shlxq	%rsi,%r9,%rcx
1957	adcq	$0,%r8
1958	shrxq	%rsi,%r9,%rax
1959
1960
1961	addq	%rcx,%r10
1962	adcq	%rax,%r11
1963
1964	mulxq	%r9,%rcx,%r9
1965	adcq	%rcx,%r8
1966	shlxq	%rsi,%r10,%rcx
1967	adcq	$0,%r9
1968	shrxq	%rsi,%r10,%rax
1969
1970
1971	addq	%rcx,%r11
1972	adcq	%rax,%r8
1973
1974	mulxq	%r10,%rcx,%r10
1975	adcq	%rcx,%r9
1976	shlxq	%rsi,%r11,%rcx
1977	adcq	$0,%r10
1978	shrxq	%rsi,%r11,%rax
1979
1980
1981	addq	%rcx,%r8
1982	adcq	%rax,%r9
1983
1984	mulxq	%r11,%rcx,%r11
1985	adcq	%rcx,%r10
1986	adcq	$0,%r11
1987
1988	xorq	%rdx,%rdx
1989	addq	%r8,%r12
1990	movq	.Lpoly+8(%rip),%rsi
1991	adcq	%r9,%r13
1992	movq	%r12,%r8
1993	adcq	%r10,%r14
1994	adcq	%r11,%r15
1995	movq	%r13,%r9
1996	adcq	$0,%rdx
1997
1998	subq	$-1,%r12
1999	movq	%r14,%r10
2000	sbbq	%rsi,%r13
2001	sbbq	$0,%r14
2002	movq	%r15,%r11
2003	sbbq	%rbp,%r15
2004	sbbq	$0,%rdx
2005
2006	cmovcq	%r8,%r12
2007	cmovcq	%r9,%r13
2008	movq	%r12,0(%rdi)
2009	cmovcq	%r10,%r14
2010	movq	%r13,8(%rdi)
2011	cmovcq	%r11,%r15
2012	movq	%r14,16(%rdi)
2013	movq	%r15,24(%rdi)
2014
2015	.byte	0xf3,0xc3
2016.cfi_endproc
2017.size	__ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
2018
2019
2020.globl	nistz256_select_w5
2021.hidden nistz256_select_w5
2022.type	nistz256_select_w5,@function
2023.align	32
2024nistz256_select_w5:
2025.cfi_startproc
2026	leaq	OPENSSL_ia32cap_P(%rip),%rax
2027	movq	8(%rax),%rax
2028	testl	$32,%eax
2029	jnz	.Lavx2_select_w5
2030	movdqa	.LOne(%rip),%xmm0
2031	movd	%edx,%xmm1
2032
2033	pxor	%xmm2,%xmm2
2034	pxor	%xmm3,%xmm3
2035	pxor	%xmm4,%xmm4
2036	pxor	%xmm5,%xmm5
2037	pxor	%xmm6,%xmm6
2038	pxor	%xmm7,%xmm7
2039
2040	movdqa	%xmm0,%xmm8
2041	pshufd	$0,%xmm1,%xmm1
2042
2043	movq	$16,%rax
2044.Lselect_loop_sse_w5:
2045
2046	movdqa	%xmm8,%xmm15
2047	paddd	%xmm0,%xmm8
2048	pcmpeqd	%xmm1,%xmm15
2049
2050	movdqa	0(%rsi),%xmm9
2051	movdqa	16(%rsi),%xmm10
2052	movdqa	32(%rsi),%xmm11
2053	movdqa	48(%rsi),%xmm12
2054	movdqa	64(%rsi),%xmm13
2055	movdqa	80(%rsi),%xmm14
2056	leaq	96(%rsi),%rsi
2057
2058	pand	%xmm15,%xmm9
2059	pand	%xmm15,%xmm10
2060	por	%xmm9,%xmm2
2061	pand	%xmm15,%xmm11
2062	por	%xmm10,%xmm3
2063	pand	%xmm15,%xmm12
2064	por	%xmm11,%xmm4
2065	pand	%xmm15,%xmm13
2066	por	%xmm12,%xmm5
2067	pand	%xmm15,%xmm14
2068	por	%xmm13,%xmm6
2069	por	%xmm14,%xmm7
2070
2071	decq	%rax
2072	jnz	.Lselect_loop_sse_w5
2073
2074	movdqu	%xmm2,0(%rdi)
2075	movdqu	%xmm3,16(%rdi)
2076	movdqu	%xmm4,32(%rdi)
2077	movdqu	%xmm5,48(%rdi)
2078	movdqu	%xmm6,64(%rdi)
2079	movdqu	%xmm7,80(%rdi)
2080	.byte	0xf3,0xc3
2081.cfi_endproc
2082.LSEH_end_nistz256_select_w5:
2083.size	nistz256_select_w5,.-nistz256_select_w5
2084
2085
2086
2087.globl	nistz256_select_w7
2088.hidden nistz256_select_w7
2089.type	nistz256_select_w7,@function
2090.align	32
2091nistz256_select_w7:
2092.cfi_startproc
2093	leaq	OPENSSL_ia32cap_P(%rip),%rax
2094	movq	8(%rax),%rax
2095	testl	$32,%eax
2096	jnz	.Lavx2_select_w7
2097	movdqa	.LOne(%rip),%xmm8
2098	movd	%edx,%xmm1
2099
2100	pxor	%xmm2,%xmm2
2101	pxor	%xmm3,%xmm3
2102	pxor	%xmm4,%xmm4
2103	pxor	%xmm5,%xmm5
2104
2105	movdqa	%xmm8,%xmm0
2106	pshufd	$0,%xmm1,%xmm1
2107	movq	$64,%rax
2108
2109.Lselect_loop_sse_w7:
2110	movdqa	%xmm8,%xmm15
2111	paddd	%xmm0,%xmm8
2112	movdqa	0(%rsi),%xmm9
2113	movdqa	16(%rsi),%xmm10
2114	pcmpeqd	%xmm1,%xmm15
2115	movdqa	32(%rsi),%xmm11
2116	movdqa	48(%rsi),%xmm12
2117	leaq	64(%rsi),%rsi
2118
2119	pand	%xmm15,%xmm9
2120	pand	%xmm15,%xmm10
2121	por	%xmm9,%xmm2
2122	pand	%xmm15,%xmm11
2123	por	%xmm10,%xmm3
2124	pand	%xmm15,%xmm12
2125	por	%xmm11,%xmm4
2126	prefetcht0	255(%rsi)
2127	por	%xmm12,%xmm5
2128
2129	decq	%rax
2130	jnz	.Lselect_loop_sse_w7
2131
2132	movdqu	%xmm2,0(%rdi)
2133	movdqu	%xmm3,16(%rdi)
2134	movdqu	%xmm4,32(%rdi)
2135	movdqu	%xmm5,48(%rdi)
2136	.byte	0xf3,0xc3
2137.cfi_endproc
2138.LSEH_end_nistz256_select_w7:
2139.size	nistz256_select_w7,.-nistz256_select_w7
2140
2141
2142.type	ecp_nistz256_avx2_select_w5,@function
2143.align	32
2144ecp_nistz256_avx2_select_w5:
2145.cfi_startproc
2146.Lavx2_select_w5:
2147	vzeroupper
2148	vmovdqa	.LTwo(%rip),%ymm0
2149
2150	vpxor	%ymm2,%ymm2,%ymm2
2151	vpxor	%ymm3,%ymm3,%ymm3
2152	vpxor	%ymm4,%ymm4,%ymm4
2153
2154	vmovdqa	.LOne(%rip),%ymm5
2155	vmovdqa	.LTwo(%rip),%ymm10
2156
2157	vmovd	%edx,%xmm1
2158	vpermd	%ymm1,%ymm2,%ymm1
2159
2160	movq	$8,%rax
2161.Lselect_loop_avx2_w5:
2162
2163	vmovdqa	0(%rsi),%ymm6
2164	vmovdqa	32(%rsi),%ymm7
2165	vmovdqa	64(%rsi),%ymm8
2166
2167	vmovdqa	96(%rsi),%ymm11
2168	vmovdqa	128(%rsi),%ymm12
2169	vmovdqa	160(%rsi),%ymm13
2170
2171	vpcmpeqd	%ymm1,%ymm5,%ymm9
2172	vpcmpeqd	%ymm1,%ymm10,%ymm14
2173
2174	vpaddd	%ymm0,%ymm5,%ymm5
2175	vpaddd	%ymm0,%ymm10,%ymm10
2176	leaq	192(%rsi),%rsi
2177
2178	vpand	%ymm9,%ymm6,%ymm6
2179	vpand	%ymm9,%ymm7,%ymm7
2180	vpand	%ymm9,%ymm8,%ymm8
2181	vpand	%ymm14,%ymm11,%ymm11
2182	vpand	%ymm14,%ymm12,%ymm12
2183	vpand	%ymm14,%ymm13,%ymm13
2184
2185	vpxor	%ymm6,%ymm2,%ymm2
2186	vpxor	%ymm7,%ymm3,%ymm3
2187	vpxor	%ymm8,%ymm4,%ymm4
2188	vpxor	%ymm11,%ymm2,%ymm2
2189	vpxor	%ymm12,%ymm3,%ymm3
2190	vpxor	%ymm13,%ymm4,%ymm4
2191
2192	decq	%rax
2193	jnz	.Lselect_loop_avx2_w5
2194
2195	vmovdqu	%ymm2,0(%rdi)
2196	vmovdqu	%ymm3,32(%rdi)
2197	vmovdqu	%ymm4,64(%rdi)
2198	vzeroupper
2199	.byte	0xf3,0xc3
2200.cfi_endproc
2201.LSEH_end_ecp_nistz256_avx2_select_w5:
2202.size	ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
2203
2204
2205
2206.type	ecp_nistz256_avx2_select_w7,@function
2207.align	32
2208ecp_nistz256_avx2_select_w7:
2209.cfi_startproc
2210.Lavx2_select_w7:
2211	vzeroupper
2212	vmovdqa	.LThree(%rip),%ymm0
2213
2214	vpxor	%ymm2,%ymm2,%ymm2
2215	vpxor	%ymm3,%ymm3,%ymm3
2216
2217	vmovdqa	.LOne(%rip),%ymm4
2218	vmovdqa	.LTwo(%rip),%ymm8
2219	vmovdqa	.LThree(%rip),%ymm12
2220
2221	vmovd	%edx,%xmm1
2222	vpermd	%ymm1,%ymm2,%ymm1
2223
2224
2225	movq	$21,%rax
2226.Lselect_loop_avx2_w7:
2227
2228	vmovdqa	0(%rsi),%ymm5
2229	vmovdqa	32(%rsi),%ymm6
2230
2231	vmovdqa	64(%rsi),%ymm9
2232	vmovdqa	96(%rsi),%ymm10
2233
2234	vmovdqa	128(%rsi),%ymm13
2235	vmovdqa	160(%rsi),%ymm14
2236
2237	vpcmpeqd	%ymm1,%ymm4,%ymm7
2238	vpcmpeqd	%ymm1,%ymm8,%ymm11
2239	vpcmpeqd	%ymm1,%ymm12,%ymm15
2240
2241	vpaddd	%ymm0,%ymm4,%ymm4
2242	vpaddd	%ymm0,%ymm8,%ymm8
2243	vpaddd	%ymm0,%ymm12,%ymm12
2244	leaq	192(%rsi),%rsi
2245
2246	vpand	%ymm7,%ymm5,%ymm5
2247	vpand	%ymm7,%ymm6,%ymm6
2248	vpand	%ymm11,%ymm9,%ymm9
2249	vpand	%ymm11,%ymm10,%ymm10
2250	vpand	%ymm15,%ymm13,%ymm13
2251	vpand	%ymm15,%ymm14,%ymm14
2252
2253	vpxor	%ymm5,%ymm2,%ymm2
2254	vpxor	%ymm6,%ymm3,%ymm3
2255	vpxor	%ymm9,%ymm2,%ymm2
2256	vpxor	%ymm10,%ymm3,%ymm3
2257	vpxor	%ymm13,%ymm2,%ymm2
2258	vpxor	%ymm14,%ymm3,%ymm3
2259
2260	decq	%rax
2261	jnz	.Lselect_loop_avx2_w7
2262
2263
2264	vmovdqa	0(%rsi),%ymm5
2265	vmovdqa	32(%rsi),%ymm6
2266
2267	vpcmpeqd	%ymm1,%ymm4,%ymm7
2268
2269	vpand	%ymm7,%ymm5,%ymm5
2270	vpand	%ymm7,%ymm6,%ymm6
2271
2272	vpxor	%ymm5,%ymm2,%ymm2
2273	vpxor	%ymm6,%ymm3,%ymm3
2274
2275	vmovdqu	%ymm2,0(%rdi)
2276	vmovdqu	%ymm3,32(%rdi)
2277	vzeroupper
2278	.byte	0xf3,0xc3
2279.cfi_endproc
2280.LSEH_end_ecp_nistz256_avx2_select_w7:
2281.size	ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
2282.type	__ecp_nistz256_add_toq,@function
2283.align	32
2284__ecp_nistz256_add_toq:
2285.cfi_startproc
2286	xorq	%r11,%r11
2287	addq	0(%rbx),%r12
2288	adcq	8(%rbx),%r13
2289	movq	%r12,%rax
2290	adcq	16(%rbx),%r8
2291	adcq	24(%rbx),%r9
2292	movq	%r13,%rbp
2293	adcq	$0,%r11
2294
2295	subq	$-1,%r12
2296	movq	%r8,%rcx
2297	sbbq	%r14,%r13
2298	sbbq	$0,%r8
2299	movq	%r9,%r10
2300	sbbq	%r15,%r9
2301	sbbq	$0,%r11
2302
2303	cmovcq	%rax,%r12
2304	cmovcq	%rbp,%r13
2305	movq	%r12,0(%rdi)
2306	cmovcq	%rcx,%r8
2307	movq	%r13,8(%rdi)
2308	cmovcq	%r10,%r9
2309	movq	%r8,16(%rdi)
2310	movq	%r9,24(%rdi)
2311
2312	.byte	0xf3,0xc3
2313.cfi_endproc
2314.size	__ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
2315
2316.type	__ecp_nistz256_sub_fromq,@function
2317.align	32
2318__ecp_nistz256_sub_fromq:
2319.cfi_startproc
2320	subq	0(%rbx),%r12
2321	sbbq	8(%rbx),%r13
2322	movq	%r12,%rax
2323	sbbq	16(%rbx),%r8
2324	sbbq	24(%rbx),%r9
2325	movq	%r13,%rbp
2326	sbbq	%r11,%r11
2327
2328	addq	$-1,%r12
2329	movq	%r8,%rcx
2330	adcq	%r14,%r13
2331	adcq	$0,%r8
2332	movq	%r9,%r10
2333	adcq	%r15,%r9
2334	testq	%r11,%r11
2335
2336	cmovzq	%rax,%r12
2337	cmovzq	%rbp,%r13
2338	movq	%r12,0(%rdi)
2339	cmovzq	%rcx,%r8
2340	movq	%r13,8(%rdi)
2341	cmovzq	%r10,%r9
2342	movq	%r8,16(%rdi)
2343	movq	%r9,24(%rdi)
2344
2345	.byte	0xf3,0xc3
2346.cfi_endproc
2347.size	__ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
2348
2349.type	__ecp_nistz256_subq,@function
2350.align	32
2351__ecp_nistz256_subq:
2352.cfi_startproc
2353	subq	%r12,%rax
2354	sbbq	%r13,%rbp
2355	movq	%rax,%r12
2356	sbbq	%r8,%rcx
2357	sbbq	%r9,%r10
2358	movq	%rbp,%r13
2359	sbbq	%r11,%r11
2360
2361	addq	$-1,%rax
2362	movq	%rcx,%r8
2363	adcq	%r14,%rbp
2364	adcq	$0,%rcx
2365	movq	%r10,%r9
2366	adcq	%r15,%r10
2367	testq	%r11,%r11
2368
2369	cmovnzq	%rax,%r12
2370	cmovnzq	%rbp,%r13
2371	cmovnzq	%rcx,%r8
2372	cmovnzq	%r10,%r9
2373
2374	.byte	0xf3,0xc3
2375.cfi_endproc
2376.size	__ecp_nistz256_subq,.-__ecp_nistz256_subq
2377
2378.type	__ecp_nistz256_mul_by_2q,@function
2379.align	32
2380__ecp_nistz256_mul_by_2q:
2381.cfi_startproc
2382	xorq	%r11,%r11
2383	addq	%r12,%r12
2384	adcq	%r13,%r13
2385	movq	%r12,%rax
2386	adcq	%r8,%r8
2387	adcq	%r9,%r9
2388	movq	%r13,%rbp
2389	adcq	$0,%r11
2390
2391	subq	$-1,%r12
2392	movq	%r8,%rcx
2393	sbbq	%r14,%r13
2394	sbbq	$0,%r8
2395	movq	%r9,%r10
2396	sbbq	%r15,%r9
2397	sbbq	$0,%r11
2398
2399	cmovcq	%rax,%r12
2400	cmovcq	%rbp,%r13
2401	movq	%r12,0(%rdi)
2402	cmovcq	%rcx,%r8
2403	movq	%r13,8(%rdi)
2404	cmovcq	%r10,%r9
2405	movq	%r8,16(%rdi)
2406	movq	%r9,24(%rdi)
2407
2408	.byte	0xf3,0xc3
2409.cfi_endproc
2410.size	__ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
2411.globl	p256_point_double
2412.hidden p256_point_double
2413.type	p256_point_double,@function
2414.align	32
2415p256_point_double:
2416.cfi_startproc
2417	leaq	OPENSSL_ia32cap_P(%rip),%rcx
2418	movq	8(%rcx),%rcx
2419	andl	$0x80100,%ecx
2420	cmpl	$0x80100,%ecx
2421	je	.Lpoint_doublex
2422	pushq	%rbp
2423.cfi_adjust_cfa_offset	8
2424.cfi_offset	%rbp,-16
2425	pushq	%rbx
2426.cfi_adjust_cfa_offset	8
2427.cfi_offset	%rbx,-24
2428	pushq	%r12
2429.cfi_adjust_cfa_offset	8
2430.cfi_offset	%r12,-32
2431	pushq	%r13
2432.cfi_adjust_cfa_offset	8
2433.cfi_offset	%r13,-40
2434	pushq	%r14
2435.cfi_adjust_cfa_offset	8
2436.cfi_offset	%r14,-48
2437	pushq	%r15
2438.cfi_adjust_cfa_offset	8
2439.cfi_offset	%r15,-56
2440	subq	$160+8,%rsp
2441.cfi_adjust_cfa_offset	32*5+8
2442.Lpoint_doubleq_body:
2443
2444.Lpoint_double_shortcutq:
2445	movdqu	0(%rsi),%xmm0
2446	movq	%rsi,%rbx
2447	movdqu	16(%rsi),%xmm1
2448	movq	32+0(%rsi),%r12
2449	movq	32+8(%rsi),%r13
2450	movq	32+16(%rsi),%r8
2451	movq	32+24(%rsi),%r9
2452	movq	.Lpoly+8(%rip),%r14
2453	movq	.Lpoly+24(%rip),%r15
2454	movdqa	%xmm0,96(%rsp)
2455	movdqa	%xmm1,96+16(%rsp)
2456	leaq	32(%rdi),%r10
2457	leaq	64(%rdi),%r11
2458.byte	102,72,15,110,199
2459.byte	102,73,15,110,202
2460.byte	102,73,15,110,211
2461
2462	leaq	0(%rsp),%rdi
2463	call	__ecp_nistz256_mul_by_2q
2464
2465	movq	64+0(%rsi),%rax
2466	movq	64+8(%rsi),%r14
2467	movq	64+16(%rsi),%r15
2468	movq	64+24(%rsi),%r8
2469	leaq	64-0(%rsi),%rsi
2470	leaq	64(%rsp),%rdi
2471	call	__ecp_nistz256_sqr_montq
2472
2473	movq	0+0(%rsp),%rax
2474	movq	8+0(%rsp),%r14
2475	leaq	0+0(%rsp),%rsi
2476	movq	16+0(%rsp),%r15
2477	movq	24+0(%rsp),%r8
2478	leaq	0(%rsp),%rdi
2479	call	__ecp_nistz256_sqr_montq
2480
2481	movq	32(%rbx),%rax
2482	movq	64+0(%rbx),%r9
2483	movq	64+8(%rbx),%r10
2484	movq	64+16(%rbx),%r11
2485	movq	64+24(%rbx),%r12
2486	leaq	64-0(%rbx),%rsi
2487	leaq	32(%rbx),%rbx
2488.byte	102,72,15,126,215
2489	call	__ecp_nistz256_mul_montq
2490	call	__ecp_nistz256_mul_by_2q
2491
2492	movq	96+0(%rsp),%r12
2493	movq	96+8(%rsp),%r13
2494	leaq	64(%rsp),%rbx
2495	movq	96+16(%rsp),%r8
2496	movq	96+24(%rsp),%r9
2497	leaq	32(%rsp),%rdi
2498	call	__ecp_nistz256_add_toq
2499
2500	movq	96+0(%rsp),%r12
2501	movq	96+8(%rsp),%r13
2502	leaq	64(%rsp),%rbx
2503	movq	96+16(%rsp),%r8
2504	movq	96+24(%rsp),%r9
2505	leaq	64(%rsp),%rdi
2506	call	__ecp_nistz256_sub_fromq
2507
2508	movq	0+0(%rsp),%rax
2509	movq	8+0(%rsp),%r14
2510	leaq	0+0(%rsp),%rsi
2511	movq	16+0(%rsp),%r15
2512	movq	24+0(%rsp),%r8
2513.byte	102,72,15,126,207
2514	call	__ecp_nistz256_sqr_montq
2515	xorq	%r9,%r9
2516	movq	%r12,%rax
2517	addq	$-1,%r12
2518	movq	%r13,%r10
2519	adcq	%rsi,%r13
2520	movq	%r14,%rcx
2521	adcq	$0,%r14
2522	movq	%r15,%r8
2523	adcq	%rbp,%r15
2524	adcq	$0,%r9
2525	xorq	%rsi,%rsi
2526	testq	$1,%rax
2527
2528	cmovzq	%rax,%r12
2529	cmovzq	%r10,%r13
2530	cmovzq	%rcx,%r14
2531	cmovzq	%r8,%r15
2532	cmovzq	%rsi,%r9
2533
2534	movq	%r13,%rax
2535	shrq	$1,%r12
2536	shlq	$63,%rax
2537	movq	%r14,%r10
2538	shrq	$1,%r13
2539	orq	%rax,%r12
2540	shlq	$63,%r10
2541	movq	%r15,%rcx
2542	shrq	$1,%r14
2543	orq	%r10,%r13
2544	shlq	$63,%rcx
2545	movq	%r12,0(%rdi)
2546	shrq	$1,%r15
2547	movq	%r13,8(%rdi)
2548	shlq	$63,%r9
2549	orq	%rcx,%r14
2550	orq	%r9,%r15
2551	movq	%r14,16(%rdi)
2552	movq	%r15,24(%rdi)
2553	movq	64(%rsp),%rax
2554	leaq	64(%rsp),%rbx
2555	movq	0+32(%rsp),%r9
2556	movq	8+32(%rsp),%r10
2557	leaq	0+32(%rsp),%rsi
2558	movq	16+32(%rsp),%r11
2559	movq	24+32(%rsp),%r12
2560	leaq	32(%rsp),%rdi
2561	call	__ecp_nistz256_mul_montq
2562
2563	leaq	128(%rsp),%rdi
2564	call	__ecp_nistz256_mul_by_2q
2565
2566	leaq	32(%rsp),%rbx
2567	leaq	32(%rsp),%rdi
2568	call	__ecp_nistz256_add_toq
2569
2570	movq	96(%rsp),%rax
2571	leaq	96(%rsp),%rbx
2572	movq	0+0(%rsp),%r9
2573	movq	8+0(%rsp),%r10
2574	leaq	0+0(%rsp),%rsi
2575	movq	16+0(%rsp),%r11
2576	movq	24+0(%rsp),%r12
2577	leaq	0(%rsp),%rdi
2578	call	__ecp_nistz256_mul_montq
2579
2580	leaq	128(%rsp),%rdi
2581	call	__ecp_nistz256_mul_by_2q
2582
2583	movq	0+32(%rsp),%rax
2584	movq	8+32(%rsp),%r14
2585	leaq	0+32(%rsp),%rsi
2586	movq	16+32(%rsp),%r15
2587	movq	24+32(%rsp),%r8
2588.byte	102,72,15,126,199
2589	call	__ecp_nistz256_sqr_montq
2590
2591	leaq	128(%rsp),%rbx
2592	movq	%r14,%r8
2593	movq	%r15,%r9
2594	movq	%rsi,%r14
2595	movq	%rbp,%r15
2596	call	__ecp_nistz256_sub_fromq
2597
2598	movq	0+0(%rsp),%rax
2599	movq	0+8(%rsp),%rbp
2600	movq	0+16(%rsp),%rcx
2601	movq	0+24(%rsp),%r10
2602	leaq	0(%rsp),%rdi
2603	call	__ecp_nistz256_subq
2604
2605	movq	32(%rsp),%rax
2606	leaq	32(%rsp),%rbx
2607	movq	%r12,%r14
2608	xorl	%ecx,%ecx
2609	movq	%r12,0+0(%rsp)
2610	movq	%r13,%r10
2611	movq	%r13,0+8(%rsp)
2612	cmovzq	%r8,%r11
2613	movq	%r8,0+16(%rsp)
2614	leaq	0-0(%rsp),%rsi
2615	cmovzq	%r9,%r12
2616	movq	%r9,0+24(%rsp)
2617	movq	%r14,%r9
2618	leaq	0(%rsp),%rdi
2619	call	__ecp_nistz256_mul_montq
2620
2621.byte	102,72,15,126,203
2622.byte	102,72,15,126,207
2623	call	__ecp_nistz256_sub_fromq
2624
2625	leaq	160+56(%rsp),%rsi
2626.cfi_def_cfa	%rsi,8
2627	movq	-48(%rsi),%r15
2628.cfi_restore	%r15
2629	movq	-40(%rsi),%r14
2630.cfi_restore	%r14
2631	movq	-32(%rsi),%r13
2632.cfi_restore	%r13
2633	movq	-24(%rsi),%r12
2634.cfi_restore	%r12
2635	movq	-16(%rsi),%rbx
2636.cfi_restore	%rbx
2637	movq	-8(%rsi),%rbp
2638.cfi_restore	%rbp
2639	leaq	(%rsi),%rsp
2640.cfi_def_cfa_register	%rsp
2641.Lpoint_doubleq_epilogue:
2642	.byte	0xf3,0xc3
2643.cfi_endproc
2644.size	p256_point_double,.-p256_point_double
2645.globl	p256_point_add
2646.hidden p256_point_add
2647.type	p256_point_add,@function
2648.align	32
2649p256_point_add:
2650.cfi_startproc
2651	leaq	OPENSSL_ia32cap_P(%rip),%rcx
2652	movq	8(%rcx),%rcx
2653	andl	$0x80100,%ecx
2654	cmpl	$0x80100,%ecx
2655	je	.Lpoint_addx
2656	pushq	%rbp
2657.cfi_adjust_cfa_offset	8
2658.cfi_offset	%rbp,-16
2659	pushq	%rbx
2660.cfi_adjust_cfa_offset	8
2661.cfi_offset	%rbx,-24
2662	pushq	%r12
2663.cfi_adjust_cfa_offset	8
2664.cfi_offset	%r12,-32
2665	pushq	%r13
2666.cfi_adjust_cfa_offset	8
2667.cfi_offset	%r13,-40
2668	pushq	%r14
2669.cfi_adjust_cfa_offset	8
2670.cfi_offset	%r14,-48
2671	pushq	%r15
2672.cfi_adjust_cfa_offset	8
2673.cfi_offset	%r15,-56
2674	subq	$576+8,%rsp
2675.cfi_adjust_cfa_offset	32*18+8
2676.Lpoint_addq_body:
2677
2678	movdqu	0(%rsi),%xmm0
2679	movdqu	16(%rsi),%xmm1
2680	movdqu	32(%rsi),%xmm2
2681	movdqu	48(%rsi),%xmm3
2682	movdqu	64(%rsi),%xmm4
2683	movdqu	80(%rsi),%xmm5
2684	movq	%rsi,%rbx
2685	movq	%rdx,%rsi
2686	movdqa	%xmm0,384(%rsp)
2687	movdqa	%xmm1,384+16(%rsp)
2688	movdqa	%xmm2,416(%rsp)
2689	movdqa	%xmm3,416+16(%rsp)
2690	movdqa	%xmm4,448(%rsp)
2691	movdqa	%xmm5,448+16(%rsp)
2692	por	%xmm4,%xmm5
2693
2694	movdqu	0(%rsi),%xmm0
2695	pshufd	$0xb1,%xmm5,%xmm3
2696	movdqu	16(%rsi),%xmm1
2697	movdqu	32(%rsi),%xmm2
2698	por	%xmm3,%xmm5
2699	movdqu	48(%rsi),%xmm3
2700	movq	64+0(%rsi),%rax
2701	movq	64+8(%rsi),%r14
2702	movq	64+16(%rsi),%r15
2703	movq	64+24(%rsi),%r8
2704	movdqa	%xmm0,480(%rsp)
2705	pshufd	$0x1e,%xmm5,%xmm4
2706	movdqa	%xmm1,480+16(%rsp)
2707	movdqu	64(%rsi),%xmm0
2708	movdqu	80(%rsi),%xmm1
2709	movdqa	%xmm2,512(%rsp)
2710	movdqa	%xmm3,512+16(%rsp)
2711	por	%xmm4,%xmm5
2712	pxor	%xmm4,%xmm4
2713	por	%xmm0,%xmm1
2714.byte	102,72,15,110,199
2715
2716	leaq	64-0(%rsi),%rsi
2717	movq	%rax,544+0(%rsp)
2718	movq	%r14,544+8(%rsp)
2719	movq	%r15,544+16(%rsp)
2720	movq	%r8,544+24(%rsp)
2721	leaq	96(%rsp),%rdi
2722	call	__ecp_nistz256_sqr_montq
2723
2724	pcmpeqd	%xmm4,%xmm5
2725	pshufd	$0xb1,%xmm1,%xmm4
2726	por	%xmm1,%xmm4
2727	pshufd	$0,%xmm5,%xmm5
2728	pshufd	$0x1e,%xmm4,%xmm3
2729	por	%xmm3,%xmm4
2730	pxor	%xmm3,%xmm3
2731	pcmpeqd	%xmm3,%xmm4
2732	pshufd	$0,%xmm4,%xmm4
2733	movq	64+0(%rbx),%rax
2734	movq	64+8(%rbx),%r14
2735	movq	64+16(%rbx),%r15
2736	movq	64+24(%rbx),%r8
2737.byte	102,72,15,110,203
2738
2739	leaq	64-0(%rbx),%rsi
2740	leaq	32(%rsp),%rdi
2741	call	__ecp_nistz256_sqr_montq
2742
2743	movq	544(%rsp),%rax
2744	leaq	544(%rsp),%rbx
2745	movq	0+96(%rsp),%r9
2746	movq	8+96(%rsp),%r10
2747	leaq	0+96(%rsp),%rsi
2748	movq	16+96(%rsp),%r11
2749	movq	24+96(%rsp),%r12
2750	leaq	224(%rsp),%rdi
2751	call	__ecp_nistz256_mul_montq
2752
2753	movq	448(%rsp),%rax
2754	leaq	448(%rsp),%rbx
2755	movq	0+32(%rsp),%r9
2756	movq	8+32(%rsp),%r10
2757	leaq	0+32(%rsp),%rsi
2758	movq	16+32(%rsp),%r11
2759	movq	24+32(%rsp),%r12
2760	leaq	256(%rsp),%rdi
2761	call	__ecp_nistz256_mul_montq
2762
2763	movq	416(%rsp),%rax
2764	leaq	416(%rsp),%rbx
2765	movq	0+224(%rsp),%r9
2766	movq	8+224(%rsp),%r10
2767	leaq	0+224(%rsp),%rsi
2768	movq	16+224(%rsp),%r11
2769	movq	24+224(%rsp),%r12
2770	leaq	224(%rsp),%rdi
2771	call	__ecp_nistz256_mul_montq
2772
2773	movq	512(%rsp),%rax
2774	leaq	512(%rsp),%rbx
2775	movq	0+256(%rsp),%r9
2776	movq	8+256(%rsp),%r10
2777	leaq	0+256(%rsp),%rsi
2778	movq	16+256(%rsp),%r11
2779	movq	24+256(%rsp),%r12
2780	leaq	256(%rsp),%rdi
2781	call	__ecp_nistz256_mul_montq
2782
2783	leaq	224(%rsp),%rbx
2784	leaq	64(%rsp),%rdi
2785	call	__ecp_nistz256_sub_fromq
2786
2787	orq	%r13,%r12
2788	movdqa	%xmm4,%xmm2
2789	orq	%r8,%r12
2790	orq	%r9,%r12
2791	por	%xmm5,%xmm2
2792.byte	102,73,15,110,220
2793
2794	movq	384(%rsp),%rax
2795	leaq	384(%rsp),%rbx
2796	movq	0+96(%rsp),%r9
2797	movq	8+96(%rsp),%r10
2798	leaq	0+96(%rsp),%rsi
2799	movq	16+96(%rsp),%r11
2800	movq	24+96(%rsp),%r12
2801	leaq	160(%rsp),%rdi
2802	call	__ecp_nistz256_mul_montq
2803
2804	movq	480(%rsp),%rax
2805	leaq	480(%rsp),%rbx
2806	movq	0+32(%rsp),%r9
2807	movq	8+32(%rsp),%r10
2808	leaq	0+32(%rsp),%rsi
2809	movq	16+32(%rsp),%r11
2810	movq	24+32(%rsp),%r12
2811	leaq	192(%rsp),%rdi
2812	call	__ecp_nistz256_mul_montq
2813
2814	leaq	160(%rsp),%rbx
2815	leaq	0(%rsp),%rdi
2816	call	__ecp_nistz256_sub_fromq
2817
2818	orq	%r13,%r12
2819	orq	%r8,%r12
2820	orq	%r9,%r12
2821
2822.byte	102,73,15,126,208
2823.byte	102,73,15,126,217
2824	orq	%r8,%r12
2825.byte	0x3e
2826	jnz	.Ladd_proceedq
2827
2828
2829
2830	testq	%r9,%r9
2831	jz	.Ladd_doubleq
2832
2833
2834
2835
2836
2837
2838.byte	102,72,15,126,199
2839	pxor	%xmm0,%xmm0
2840	movdqu	%xmm0,0(%rdi)
2841	movdqu	%xmm0,16(%rdi)
2842	movdqu	%xmm0,32(%rdi)
2843	movdqu	%xmm0,48(%rdi)
2844	movdqu	%xmm0,64(%rdi)
2845	movdqu	%xmm0,80(%rdi)
2846	jmp	.Ladd_doneq
2847
2848.align	32
2849.Ladd_doubleq:
2850.byte	102,72,15,126,206
2851.byte	102,72,15,126,199
2852	addq	$416,%rsp
2853.cfi_adjust_cfa_offset	-416
2854	jmp	.Lpoint_double_shortcutq
2855.cfi_adjust_cfa_offset	416
2856
2857.align	32
2858.Ladd_proceedq:
2859	movq	0+64(%rsp),%rax
2860	movq	8+64(%rsp),%r14
2861	leaq	0+64(%rsp),%rsi
2862	movq	16+64(%rsp),%r15
2863	movq	24+64(%rsp),%r8
2864	leaq	96(%rsp),%rdi
2865	call	__ecp_nistz256_sqr_montq
2866
2867	movq	448(%rsp),%rax
2868	leaq	448(%rsp),%rbx
2869	movq	0+0(%rsp),%r9
2870	movq	8+0(%rsp),%r10
2871	leaq	0+0(%rsp),%rsi
2872	movq	16+0(%rsp),%r11
2873	movq	24+0(%rsp),%r12
2874	leaq	352(%rsp),%rdi
2875	call	__ecp_nistz256_mul_montq
2876
2877	movq	0+0(%rsp),%rax
2878	movq	8+0(%rsp),%r14
2879	leaq	0+0(%rsp),%rsi
2880	movq	16+0(%rsp),%r15
2881	movq	24+0(%rsp),%r8
2882	leaq	32(%rsp),%rdi
2883	call	__ecp_nistz256_sqr_montq
2884
2885	movq	544(%rsp),%rax
2886	leaq	544(%rsp),%rbx
2887	movq	0+352(%rsp),%r9
2888	movq	8+352(%rsp),%r10
2889	leaq	0+352(%rsp),%rsi
2890	movq	16+352(%rsp),%r11
2891	movq	24+352(%rsp),%r12
2892	leaq	352(%rsp),%rdi
2893	call	__ecp_nistz256_mul_montq
2894
2895	movq	0(%rsp),%rax
2896	leaq	0(%rsp),%rbx
2897	movq	0+32(%rsp),%r9
2898	movq	8+32(%rsp),%r10
2899	leaq	0+32(%rsp),%rsi
2900	movq	16+32(%rsp),%r11
2901	movq	24+32(%rsp),%r12
2902	leaq	128(%rsp),%rdi
2903	call	__ecp_nistz256_mul_montq
2904
2905	movq	160(%rsp),%rax
2906	leaq	160(%rsp),%rbx
2907	movq	0+32(%rsp),%r9
2908	movq	8+32(%rsp),%r10
2909	leaq	0+32(%rsp),%rsi
2910	movq	16+32(%rsp),%r11
2911	movq	24+32(%rsp),%r12
2912	leaq	192(%rsp),%rdi
2913	call	__ecp_nistz256_mul_montq
2914
2915
2916
2917
2918	xorq	%r11,%r11
2919	addq	%r12,%r12
2920	leaq	96(%rsp),%rsi
2921	adcq	%r13,%r13
2922	movq	%r12,%rax
2923	adcq	%r8,%r8
2924	adcq	%r9,%r9
2925	movq	%r13,%rbp
2926	adcq	$0,%r11
2927
2928	subq	$-1,%r12
2929	movq	%r8,%rcx
2930	sbbq	%r14,%r13
2931	sbbq	$0,%r8
2932	movq	%r9,%r10
2933	sbbq	%r15,%r9
2934	sbbq	$0,%r11
2935
2936	cmovcq	%rax,%r12
2937	movq	0(%rsi),%rax
2938	cmovcq	%rbp,%r13
2939	movq	8(%rsi),%rbp
2940	cmovcq	%rcx,%r8
2941	movq	16(%rsi),%rcx
2942	cmovcq	%r10,%r9
2943	movq	24(%rsi),%r10
2944
2945	call	__ecp_nistz256_subq
2946
2947	leaq	128(%rsp),%rbx
2948	leaq	288(%rsp),%rdi
2949	call	__ecp_nistz256_sub_fromq
2950
2951	movq	192+0(%rsp),%rax
2952	movq	192+8(%rsp),%rbp
2953	movq	192+16(%rsp),%rcx
2954	movq	192+24(%rsp),%r10
2955	leaq	320(%rsp),%rdi
2956
2957	call	__ecp_nistz256_subq
2958
2959	movq	%r12,0(%rdi)
2960	movq	%r13,8(%rdi)
2961	movq	%r8,16(%rdi)
2962	movq	%r9,24(%rdi)
2963	movq	128(%rsp),%rax
2964	leaq	128(%rsp),%rbx
2965	movq	0+224(%rsp),%r9
2966	movq	8+224(%rsp),%r10
2967	leaq	0+224(%rsp),%rsi
2968	movq	16+224(%rsp),%r11
2969	movq	24+224(%rsp),%r12
2970	leaq	256(%rsp),%rdi
2971	call	__ecp_nistz256_mul_montq
2972
2973	movq	320(%rsp),%rax
2974	leaq	320(%rsp),%rbx
2975	movq	0+64(%rsp),%r9
2976	movq	8+64(%rsp),%r10
2977	leaq	0+64(%rsp),%rsi
2978	movq	16+64(%rsp),%r11
2979	movq	24+64(%rsp),%r12
2980	leaq	320(%rsp),%rdi
2981	call	__ecp_nistz256_mul_montq
2982
2983	leaq	256(%rsp),%rbx
2984	leaq	320(%rsp),%rdi
2985	call	__ecp_nistz256_sub_fromq
2986
2987.byte	102,72,15,126,199
2988
2989	movdqa	%xmm5,%xmm0
2990	movdqa	%xmm5,%xmm1
2991	pandn	352(%rsp),%xmm0
2992	movdqa	%xmm5,%xmm2
2993	pandn	352+16(%rsp),%xmm1
2994	movdqa	%xmm5,%xmm3
2995	pand	544(%rsp),%xmm2
2996	pand	544+16(%rsp),%xmm3
2997	por	%xmm0,%xmm2
2998	por	%xmm1,%xmm3
2999
3000	movdqa	%xmm4,%xmm0
3001	movdqa	%xmm4,%xmm1
3002	pandn	%xmm2,%xmm0
3003	movdqa	%xmm4,%xmm2
3004	pandn	%xmm3,%xmm1
3005	movdqa	%xmm4,%xmm3
3006	pand	448(%rsp),%xmm2
3007	pand	448+16(%rsp),%xmm3
3008	por	%xmm0,%xmm2
3009	por	%xmm1,%xmm3
3010	movdqu	%xmm2,64(%rdi)
3011	movdqu	%xmm3,80(%rdi)
3012
3013	movdqa	%xmm5,%xmm0
3014	movdqa	%xmm5,%xmm1
3015	pandn	288(%rsp),%xmm0
3016	movdqa	%xmm5,%xmm2
3017	pandn	288+16(%rsp),%xmm1
3018	movdqa	%xmm5,%xmm3
3019	pand	480(%rsp),%xmm2
3020	pand	480+16(%rsp),%xmm3
3021	por	%xmm0,%xmm2
3022	por	%xmm1,%xmm3
3023
3024	movdqa	%xmm4,%xmm0
3025	movdqa	%xmm4,%xmm1
3026	pandn	%xmm2,%xmm0
3027	movdqa	%xmm4,%xmm2
3028	pandn	%xmm3,%xmm1
3029	movdqa	%xmm4,%xmm3
3030	pand	384(%rsp),%xmm2
3031	pand	384+16(%rsp),%xmm3
3032	por	%xmm0,%xmm2
3033	por	%xmm1,%xmm3
3034	movdqu	%xmm2,0(%rdi)
3035	movdqu	%xmm3,16(%rdi)
3036
3037	movdqa	%xmm5,%xmm0
3038	movdqa	%xmm5,%xmm1
3039	pandn	320(%rsp),%xmm0
3040	movdqa	%xmm5,%xmm2
3041	pandn	320+16(%rsp),%xmm1
3042	movdqa	%xmm5,%xmm3
3043	pand	512(%rsp),%xmm2
3044	pand	512+16(%rsp),%xmm3
3045	por	%xmm0,%xmm2
3046	por	%xmm1,%xmm3
3047
3048	movdqa	%xmm4,%xmm0
3049	movdqa	%xmm4,%xmm1
3050	pandn	%xmm2,%xmm0
3051	movdqa	%xmm4,%xmm2
3052	pandn	%xmm3,%xmm1
3053	movdqa	%xmm4,%xmm3
3054	pand	416(%rsp),%xmm2
3055	pand	416+16(%rsp),%xmm3
3056	por	%xmm0,%xmm2
3057	por	%xmm1,%xmm3
3058	movdqu	%xmm2,32(%rdi)
3059	movdqu	%xmm3,48(%rdi)
3060
3061.Ladd_doneq:
3062	leaq	576+56(%rsp),%rsi
3063.cfi_def_cfa	%rsi,8
3064	movq	-48(%rsi),%r15
3065.cfi_restore	%r15
3066	movq	-40(%rsi),%r14
3067.cfi_restore	%r14
3068	movq	-32(%rsi),%r13
3069.cfi_restore	%r13
3070	movq	-24(%rsi),%r12
3071.cfi_restore	%r12
3072	movq	-16(%rsi),%rbx
3073.cfi_restore	%rbx
3074	movq	-8(%rsi),%rbp
3075.cfi_restore	%rbp
3076	leaq	(%rsi),%rsp
3077.cfi_def_cfa_register	%rsp
3078.Lpoint_addq_epilogue:
3079	.byte	0xf3,0xc3
3080.cfi_endproc
3081.size	p256_point_add,.-p256_point_add
3082.globl	p256_point_add_affine
3083.hidden p256_point_add_affine
3084.type	p256_point_add_affine,@function
3085.align	32
3086p256_point_add_affine:
3087.cfi_startproc
3088	leaq	OPENSSL_ia32cap_P(%rip),%rcx
3089	movq	8(%rcx),%rcx
3090	andl	$0x80100,%ecx
3091	cmpl	$0x80100,%ecx
3092	je	.Lpoint_add_affinex
3093	pushq	%rbp
3094.cfi_adjust_cfa_offset	8
3095.cfi_offset	%rbp,-16
3096	pushq	%rbx
3097.cfi_adjust_cfa_offset	8
3098.cfi_offset	%rbx,-24
3099	pushq	%r12
3100.cfi_adjust_cfa_offset	8
3101.cfi_offset	%r12,-32
3102	pushq	%r13
3103.cfi_adjust_cfa_offset	8
3104.cfi_offset	%r13,-40
3105	pushq	%r14
3106.cfi_adjust_cfa_offset	8
3107.cfi_offset	%r14,-48
3108	pushq	%r15
3109.cfi_adjust_cfa_offset	8
3110.cfi_offset	%r15,-56
3111	subq	$480+8,%rsp
3112.cfi_adjust_cfa_offset	32*15+8
3113.Ladd_affineq_body:
3114
3115	movdqu	0(%rsi),%xmm0
3116	movq	%rdx,%rbx
3117	movdqu	16(%rsi),%xmm1
3118	movdqu	32(%rsi),%xmm2
3119	movdqu	48(%rsi),%xmm3
3120	movdqu	64(%rsi),%xmm4
3121	movdqu	80(%rsi),%xmm5
3122	movq	64+0(%rsi),%rax
3123	movq	64+8(%rsi),%r14
3124	movq	64+16(%rsi),%r15
3125	movq	64+24(%rsi),%r8
3126	movdqa	%xmm0,320(%rsp)
3127	movdqa	%xmm1,320+16(%rsp)
3128	movdqa	%xmm2,352(%rsp)
3129	movdqa	%xmm3,352+16(%rsp)
3130	movdqa	%xmm4,384(%rsp)
3131	movdqa	%xmm5,384+16(%rsp)
3132	por	%xmm4,%xmm5
3133
3134	movdqu	0(%rbx),%xmm0
3135	pshufd	$0xb1,%xmm5,%xmm3
3136	movdqu	16(%rbx),%xmm1
3137	movdqu	32(%rbx),%xmm2
3138	por	%xmm3,%xmm5
3139	movdqu	48(%rbx),%xmm3
3140	movdqa	%xmm0,416(%rsp)
3141	pshufd	$0x1e,%xmm5,%xmm4
3142	movdqa	%xmm1,416+16(%rsp)
3143	por	%xmm0,%xmm1
3144.byte	102,72,15,110,199
3145	movdqa	%xmm2,448(%rsp)
3146	movdqa	%xmm3,448+16(%rsp)
3147	por	%xmm2,%xmm3
3148	por	%xmm4,%xmm5
3149	pxor	%xmm4,%xmm4
3150	por	%xmm1,%xmm3
3151
3152	leaq	64-0(%rsi),%rsi
3153	leaq	32(%rsp),%rdi
3154	call	__ecp_nistz256_sqr_montq
3155
3156	pcmpeqd	%xmm4,%xmm5
3157	pshufd	$0xb1,%xmm3,%xmm4
3158	movq	0(%rbx),%rax
3159
3160	movq	%r12,%r9
3161	por	%xmm3,%xmm4
3162	pshufd	$0,%xmm5,%xmm5
3163	pshufd	$0x1e,%xmm4,%xmm3
3164	movq	%r13,%r10
3165	por	%xmm3,%xmm4
3166	pxor	%xmm3,%xmm3
3167	movq	%r14,%r11
3168	pcmpeqd	%xmm3,%xmm4
3169	pshufd	$0,%xmm4,%xmm4
3170
3171	leaq	32-0(%rsp),%rsi
3172	movq	%r15,%r12
3173	leaq	0(%rsp),%rdi
3174	call	__ecp_nistz256_mul_montq
3175
3176	leaq	320(%rsp),%rbx
3177	leaq	64(%rsp),%rdi
3178	call	__ecp_nistz256_sub_fromq
3179
3180	movq	384(%rsp),%rax
3181	leaq	384(%rsp),%rbx
3182	movq	0+32(%rsp),%r9
3183	movq	8+32(%rsp),%r10
3184	leaq	0+32(%rsp),%rsi
3185	movq	16+32(%rsp),%r11
3186	movq	24+32(%rsp),%r12
3187	leaq	32(%rsp),%rdi
3188	call	__ecp_nistz256_mul_montq
3189
3190	movq	384(%rsp),%rax
3191	leaq	384(%rsp),%rbx
3192	movq	0+64(%rsp),%r9
3193	movq	8+64(%rsp),%r10
3194	leaq	0+64(%rsp),%rsi
3195	movq	16+64(%rsp),%r11
3196	movq	24+64(%rsp),%r12
3197	leaq	288(%rsp),%rdi
3198	call	__ecp_nistz256_mul_montq
3199
3200	movq	448(%rsp),%rax
3201	leaq	448(%rsp),%rbx
3202	movq	0+32(%rsp),%r9
3203	movq	8+32(%rsp),%r10
3204	leaq	0+32(%rsp),%rsi
3205	movq	16+32(%rsp),%r11
3206	movq	24+32(%rsp),%r12
3207	leaq	32(%rsp),%rdi
3208	call	__ecp_nistz256_mul_montq
3209
3210	leaq	352(%rsp),%rbx
3211	leaq	96(%rsp),%rdi
3212	call	__ecp_nistz256_sub_fromq
3213
3214	movq	0+64(%rsp),%rax
3215	movq	8+64(%rsp),%r14
3216	leaq	0+64(%rsp),%rsi
3217	movq	16+64(%rsp),%r15
3218	movq	24+64(%rsp),%r8
3219	leaq	128(%rsp),%rdi
3220	call	__ecp_nistz256_sqr_montq
3221
3222	movq	0+96(%rsp),%rax
3223	movq	8+96(%rsp),%r14
3224	leaq	0+96(%rsp),%rsi
3225	movq	16+96(%rsp),%r15
3226	movq	24+96(%rsp),%r8
3227	leaq	192(%rsp),%rdi
3228	call	__ecp_nistz256_sqr_montq
3229
3230	movq	128(%rsp),%rax
3231	leaq	128(%rsp),%rbx
3232	movq	0+64(%rsp),%r9
3233	movq	8+64(%rsp),%r10
3234	leaq	0+64(%rsp),%rsi
3235	movq	16+64(%rsp),%r11
3236	movq	24+64(%rsp),%r12
3237	leaq	160(%rsp),%rdi
3238	call	__ecp_nistz256_mul_montq
3239
3240	movq	320(%rsp),%rax
3241	leaq	320(%rsp),%rbx
3242	movq	0+128(%rsp),%r9
3243	movq	8+128(%rsp),%r10
3244	leaq	0+128(%rsp),%rsi
3245	movq	16+128(%rsp),%r11
3246	movq	24+128(%rsp),%r12
3247	leaq	0(%rsp),%rdi
3248	call	__ecp_nistz256_mul_montq
3249
3250
3251
3252
3253	xorq	%r11,%r11
3254	addq	%r12,%r12
3255	leaq	192(%rsp),%rsi
3256	adcq	%r13,%r13
3257	movq	%r12,%rax
3258	adcq	%r8,%r8
3259	adcq	%r9,%r9
3260	movq	%r13,%rbp
3261	adcq	$0,%r11
3262
3263	subq	$-1,%r12
3264	movq	%r8,%rcx
3265	sbbq	%r14,%r13
3266	sbbq	$0,%r8
3267	movq	%r9,%r10
3268	sbbq	%r15,%r9
3269	sbbq	$0,%r11
3270
3271	cmovcq	%rax,%r12
3272	movq	0(%rsi),%rax
3273	cmovcq	%rbp,%r13
3274	movq	8(%rsi),%rbp
3275	cmovcq	%rcx,%r8
3276	movq	16(%rsi),%rcx
3277	cmovcq	%r10,%r9
3278	movq	24(%rsi),%r10
3279
3280	call	__ecp_nistz256_subq
3281
3282	leaq	160(%rsp),%rbx
3283	leaq	224(%rsp),%rdi
3284	call	__ecp_nistz256_sub_fromq
3285
3286	movq	0+0(%rsp),%rax
3287	movq	0+8(%rsp),%rbp
3288	movq	0+16(%rsp),%rcx
3289	movq	0+24(%rsp),%r10
3290	leaq	64(%rsp),%rdi
3291
3292	call	__ecp_nistz256_subq
3293
3294	movq	%r12,0(%rdi)
3295	movq	%r13,8(%rdi)
3296	movq	%r8,16(%rdi)
3297	movq	%r9,24(%rdi)
3298	movq	352(%rsp),%rax
3299	leaq	352(%rsp),%rbx
3300	movq	0+160(%rsp),%r9
3301	movq	8+160(%rsp),%r10
3302	leaq	0+160(%rsp),%rsi
3303	movq	16+160(%rsp),%r11
3304	movq	24+160(%rsp),%r12
3305	leaq	32(%rsp),%rdi
3306	call	__ecp_nistz256_mul_montq
3307
3308	movq	96(%rsp),%rax
3309	leaq	96(%rsp),%rbx
3310	movq	0+64(%rsp),%r9
3311	movq	8+64(%rsp),%r10
3312	leaq	0+64(%rsp),%rsi
3313	movq	16+64(%rsp),%r11
3314	movq	24+64(%rsp),%r12
3315	leaq	64(%rsp),%rdi
3316	call	__ecp_nistz256_mul_montq
3317
3318	leaq	32(%rsp),%rbx
3319	leaq	256(%rsp),%rdi
3320	call	__ecp_nistz256_sub_fromq
3321
3322.byte	102,72,15,126,199
3323
3324	movdqa	%xmm5,%xmm0
3325	movdqa	%xmm5,%xmm1
3326	pandn	288(%rsp),%xmm0
3327	movdqa	%xmm5,%xmm2
3328	pandn	288+16(%rsp),%xmm1
3329	movdqa	%xmm5,%xmm3
3330	pand	.LONE_mont(%rip),%xmm2
3331	pand	.LONE_mont+16(%rip),%xmm3
3332	por	%xmm0,%xmm2
3333	por	%xmm1,%xmm3
3334
3335	movdqa	%xmm4,%xmm0
3336	movdqa	%xmm4,%xmm1
3337	pandn	%xmm2,%xmm0
3338	movdqa	%xmm4,%xmm2
3339	pandn	%xmm3,%xmm1
3340	movdqa	%xmm4,%xmm3
3341	pand	384(%rsp),%xmm2
3342	pand	384+16(%rsp),%xmm3
3343	por	%xmm0,%xmm2
3344	por	%xmm1,%xmm3
3345	movdqu	%xmm2,64(%rdi)
3346	movdqu	%xmm3,80(%rdi)
3347
3348	movdqa	%xmm5,%xmm0
3349	movdqa	%xmm5,%xmm1
3350	pandn	224(%rsp),%xmm0
3351	movdqa	%xmm5,%xmm2
3352	pandn	224+16(%rsp),%xmm1
3353	movdqa	%xmm5,%xmm3
3354	pand	416(%rsp),%xmm2
3355	pand	416+16(%rsp),%xmm3
3356	por	%xmm0,%xmm2
3357	por	%xmm1,%xmm3
3358
3359	movdqa	%xmm4,%xmm0
3360	movdqa	%xmm4,%xmm1
3361	pandn	%xmm2,%xmm0
3362	movdqa	%xmm4,%xmm2
3363	pandn	%xmm3,%xmm1
3364	movdqa	%xmm4,%xmm3
3365	pand	320(%rsp),%xmm2
3366	pand	320+16(%rsp),%xmm3
3367	por	%xmm0,%xmm2
3368	por	%xmm1,%xmm3
3369	movdqu	%xmm2,0(%rdi)
3370	movdqu	%xmm3,16(%rdi)
3371
3372	movdqa	%xmm5,%xmm0
3373	movdqa	%xmm5,%xmm1
3374	pandn	256(%rsp),%xmm0
3375	movdqa	%xmm5,%xmm2
3376	pandn	256+16(%rsp),%xmm1
3377	movdqa	%xmm5,%xmm3
3378	pand	448(%rsp),%xmm2
3379	pand	448+16(%rsp),%xmm3
3380	por	%xmm0,%xmm2
3381	por	%xmm1,%xmm3
3382
3383	movdqa	%xmm4,%xmm0
3384	movdqa	%xmm4,%xmm1
3385	pandn	%xmm2,%xmm0
3386	movdqa	%xmm4,%xmm2
3387	pandn	%xmm3,%xmm1
3388	movdqa	%xmm4,%xmm3
3389	pand	352(%rsp),%xmm2
3390	pand	352+16(%rsp),%xmm3
3391	por	%xmm0,%xmm2
3392	por	%xmm1,%xmm3
3393	movdqu	%xmm2,32(%rdi)
3394	movdqu	%xmm3,48(%rdi)
3395
3396	leaq	480+56(%rsp),%rsi
3397.cfi_def_cfa	%rsi,8
3398	movq	-48(%rsi),%r15
3399.cfi_restore	%r15
3400	movq	-40(%rsi),%r14
3401.cfi_restore	%r14
3402	movq	-32(%rsi),%r13
3403.cfi_restore	%r13
3404	movq	-24(%rsi),%r12
3405.cfi_restore	%r12
3406	movq	-16(%rsi),%rbx
3407.cfi_restore	%rbx
3408	movq	-8(%rsi),%rbp
3409.cfi_restore	%rbp
3410	leaq	(%rsi),%rsp
3411.cfi_def_cfa_register	%rsp
3412.Ladd_affineq_epilogue:
3413	.byte	0xf3,0xc3
3414.cfi_endproc
3415.size	p256_point_add_affine,.-p256_point_add_affine
3416.type	__ecp_nistz256_add_tox,@function
3417.align	32
3418__ecp_nistz256_add_tox:
3419.cfi_startproc
3420	xorq	%r11,%r11
3421	adcq	0(%rbx),%r12
3422	adcq	8(%rbx),%r13
3423	movq	%r12,%rax
3424	adcq	16(%rbx),%r8
3425	adcq	24(%rbx),%r9
3426	movq	%r13,%rbp
3427	adcq	$0,%r11
3428
3429	xorq	%r10,%r10
3430	sbbq	$-1,%r12
3431	movq	%r8,%rcx
3432	sbbq	%r14,%r13
3433	sbbq	$0,%r8
3434	movq	%r9,%r10
3435	sbbq	%r15,%r9
3436	sbbq	$0,%r11
3437
3438	cmovcq	%rax,%r12
3439	cmovcq	%rbp,%r13
3440	movq	%r12,0(%rdi)
3441	cmovcq	%rcx,%r8
3442	movq	%r13,8(%rdi)
3443	cmovcq	%r10,%r9
3444	movq	%r8,16(%rdi)
3445	movq	%r9,24(%rdi)
3446
3447	.byte	0xf3,0xc3
3448.cfi_endproc
3449.size	__ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
3450
3451.type	__ecp_nistz256_sub_fromx,@function
3452.align	32
3453__ecp_nistz256_sub_fromx:
3454.cfi_startproc
3455	xorq	%r11,%r11
3456	sbbq	0(%rbx),%r12
3457	sbbq	8(%rbx),%r13
3458	movq	%r12,%rax
3459	sbbq	16(%rbx),%r8
3460	sbbq	24(%rbx),%r9
3461	movq	%r13,%rbp
3462	sbbq	$0,%r11
3463
3464	xorq	%r10,%r10
3465	adcq	$-1,%r12
3466	movq	%r8,%rcx
3467	adcq	%r14,%r13
3468	adcq	$0,%r8
3469	movq	%r9,%r10
3470	adcq	%r15,%r9
3471
3472	btq	$0,%r11
3473	cmovncq	%rax,%r12
3474	cmovncq	%rbp,%r13
3475	movq	%r12,0(%rdi)
3476	cmovncq	%rcx,%r8
3477	movq	%r13,8(%rdi)
3478	cmovncq	%r10,%r9
3479	movq	%r8,16(%rdi)
3480	movq	%r9,24(%rdi)
3481
3482	.byte	0xf3,0xc3
3483.cfi_endproc
3484.size	__ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
3485
3486.type	__ecp_nistz256_subx,@function
3487.align	32
3488__ecp_nistz256_subx:
3489.cfi_startproc
3490	xorq	%r11,%r11
3491	sbbq	%r12,%rax
3492	sbbq	%r13,%rbp
3493	movq	%rax,%r12
3494	sbbq	%r8,%rcx
3495	sbbq	%r9,%r10
3496	movq	%rbp,%r13
3497	sbbq	$0,%r11
3498
3499	xorq	%r9,%r9
3500	adcq	$-1,%rax
3501	movq	%rcx,%r8
3502	adcq	%r14,%rbp
3503	adcq	$0,%rcx
3504	movq	%r10,%r9
3505	adcq	%r15,%r10
3506
3507	btq	$0,%r11
3508	cmovcq	%rax,%r12
3509	cmovcq	%rbp,%r13
3510	cmovcq	%rcx,%r8
3511	cmovcq	%r10,%r9
3512
3513	.byte	0xf3,0xc3
3514.cfi_endproc
3515.size	__ecp_nistz256_subx,.-__ecp_nistz256_subx
3516
3517.type	__ecp_nistz256_mul_by_2x,@function
3518.align	32
3519__ecp_nistz256_mul_by_2x:
3520.cfi_startproc
3521	xorq	%r11,%r11
3522	adcq	%r12,%r12
3523	adcq	%r13,%r13
3524	movq	%r12,%rax
3525	adcq	%r8,%r8
3526	adcq	%r9,%r9
3527	movq	%r13,%rbp
3528	adcq	$0,%r11
3529
3530	xorq	%r10,%r10
3531	sbbq	$-1,%r12
3532	movq	%r8,%rcx
3533	sbbq	%r14,%r13
3534	sbbq	$0,%r8
3535	movq	%r9,%r10
3536	sbbq	%r15,%r9
3537	sbbq	$0,%r11
3538
3539	cmovcq	%rax,%r12
3540	cmovcq	%rbp,%r13
3541	movq	%r12,0(%rdi)
3542	cmovcq	%rcx,%r8
3543	movq	%r13,8(%rdi)
3544	cmovcq	%r10,%r9
3545	movq	%r8,16(%rdi)
3546	movq	%r9,24(%rdi)
3547
3548	.byte	0xf3,0xc3
3549.cfi_endproc
3550.size	__ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
3551.type	p256_point_doublex,@function
3552.align	32
3553p256_point_doublex:
3554.cfi_startproc
3555.Lpoint_doublex:
3556	pushq	%rbp
3557.cfi_adjust_cfa_offset	8
3558.cfi_offset	%rbp,-16
3559	pushq	%rbx
3560.cfi_adjust_cfa_offset	8
3561.cfi_offset	%rbx,-24
3562	pushq	%r12
3563.cfi_adjust_cfa_offset	8
3564.cfi_offset	%r12,-32
3565	pushq	%r13
3566.cfi_adjust_cfa_offset	8
3567.cfi_offset	%r13,-40
3568	pushq	%r14
3569.cfi_adjust_cfa_offset	8
3570.cfi_offset	%r14,-48
3571	pushq	%r15
3572.cfi_adjust_cfa_offset	8
3573.cfi_offset	%r15,-56
3574	subq	$160+8,%rsp
3575.cfi_adjust_cfa_offset	32*5+8
3576.Lpoint_doublex_body:
3577
3578.Lpoint_double_shortcutx:
3579	movdqu	0(%rsi),%xmm0
3580	movq	%rsi,%rbx
3581	movdqu	16(%rsi),%xmm1
3582	movq	32+0(%rsi),%r12
3583	movq	32+8(%rsi),%r13
3584	movq	32+16(%rsi),%r8
3585	movq	32+24(%rsi),%r9
3586	movq	.Lpoly+8(%rip),%r14
3587	movq	.Lpoly+24(%rip),%r15
3588	movdqa	%xmm0,96(%rsp)
3589	movdqa	%xmm1,96+16(%rsp)
3590	leaq	32(%rdi),%r10
3591	leaq	64(%rdi),%r11
3592.byte	102,72,15,110,199
3593.byte	102,73,15,110,202
3594.byte	102,73,15,110,211
3595
3596	leaq	0(%rsp),%rdi
3597	call	__ecp_nistz256_mul_by_2x
3598
3599	movq	64+0(%rsi),%rdx
3600	movq	64+8(%rsi),%r14
3601	movq	64+16(%rsi),%r15
3602	movq	64+24(%rsi),%r8
3603	leaq	64-128(%rsi),%rsi
3604	leaq	64(%rsp),%rdi
3605	call	__ecp_nistz256_sqr_montx
3606
3607	movq	0+0(%rsp),%rdx
3608	movq	8+0(%rsp),%r14
3609	leaq	-128+0(%rsp),%rsi
3610	movq	16+0(%rsp),%r15
3611	movq	24+0(%rsp),%r8
3612	leaq	0(%rsp),%rdi
3613	call	__ecp_nistz256_sqr_montx
3614
3615	movq	32(%rbx),%rdx
3616	movq	64+0(%rbx),%r9
3617	movq	64+8(%rbx),%r10
3618	movq	64+16(%rbx),%r11
3619	movq	64+24(%rbx),%r12
3620	leaq	64-128(%rbx),%rsi
3621	leaq	32(%rbx),%rbx
3622.byte	102,72,15,126,215
3623	call	__ecp_nistz256_mul_montx
3624	call	__ecp_nistz256_mul_by_2x
3625
3626	movq	96+0(%rsp),%r12
3627	movq	96+8(%rsp),%r13
3628	leaq	64(%rsp),%rbx
3629	movq	96+16(%rsp),%r8
3630	movq	96+24(%rsp),%r9
3631	leaq	32(%rsp),%rdi
3632	call	__ecp_nistz256_add_tox
3633
3634	movq	96+0(%rsp),%r12
3635	movq	96+8(%rsp),%r13
3636	leaq	64(%rsp),%rbx
3637	movq	96+16(%rsp),%r8
3638	movq	96+24(%rsp),%r9
3639	leaq	64(%rsp),%rdi
3640	call	__ecp_nistz256_sub_fromx
3641
3642	movq	0+0(%rsp),%rdx
3643	movq	8+0(%rsp),%r14
3644	leaq	-128+0(%rsp),%rsi
3645	movq	16+0(%rsp),%r15
3646	movq	24+0(%rsp),%r8
3647.byte	102,72,15,126,207
3648	call	__ecp_nistz256_sqr_montx
3649	xorq	%r9,%r9
3650	movq	%r12,%rax
3651	addq	$-1,%r12
3652	movq	%r13,%r10
3653	adcq	%rsi,%r13
3654	movq	%r14,%rcx
3655	adcq	$0,%r14
3656	movq	%r15,%r8
3657	adcq	%rbp,%r15
3658	adcq	$0,%r9
3659	xorq	%rsi,%rsi
3660	testq	$1,%rax
3661
3662	cmovzq	%rax,%r12
3663	cmovzq	%r10,%r13
3664	cmovzq	%rcx,%r14
3665	cmovzq	%r8,%r15
3666	cmovzq	%rsi,%r9
3667
3668	movq	%r13,%rax
3669	shrq	$1,%r12
3670	shlq	$63,%rax
3671	movq	%r14,%r10
3672	shrq	$1,%r13
3673	orq	%rax,%r12
3674	shlq	$63,%r10
3675	movq	%r15,%rcx
3676	shrq	$1,%r14
3677	orq	%r10,%r13
3678	shlq	$63,%rcx
3679	movq	%r12,0(%rdi)
3680	shrq	$1,%r15
3681	movq	%r13,8(%rdi)
3682	shlq	$63,%r9
3683	orq	%rcx,%r14
3684	orq	%r9,%r15
3685	movq	%r14,16(%rdi)
3686	movq	%r15,24(%rdi)
3687	movq	64(%rsp),%rdx
3688	leaq	64(%rsp),%rbx
3689	movq	0+32(%rsp),%r9
3690	movq	8+32(%rsp),%r10
3691	leaq	-128+32(%rsp),%rsi
3692	movq	16+32(%rsp),%r11
3693	movq	24+32(%rsp),%r12
3694	leaq	32(%rsp),%rdi
3695	call	__ecp_nistz256_mul_montx
3696
3697	leaq	128(%rsp),%rdi
3698	call	__ecp_nistz256_mul_by_2x
3699
3700	leaq	32(%rsp),%rbx
3701	leaq	32(%rsp),%rdi
3702	call	__ecp_nistz256_add_tox
3703
3704	movq	96(%rsp),%rdx
3705	leaq	96(%rsp),%rbx
3706	movq	0+0(%rsp),%r9
3707	movq	8+0(%rsp),%r10
3708	leaq	-128+0(%rsp),%rsi
3709	movq	16+0(%rsp),%r11
3710	movq	24+0(%rsp),%r12
3711	leaq	0(%rsp),%rdi
3712	call	__ecp_nistz256_mul_montx
3713
3714	leaq	128(%rsp),%rdi
3715	call	__ecp_nistz256_mul_by_2x
3716
3717	movq	0+32(%rsp),%rdx
3718	movq	8+32(%rsp),%r14
3719	leaq	-128+32(%rsp),%rsi
3720	movq	16+32(%rsp),%r15
3721	movq	24+32(%rsp),%r8
3722.byte	102,72,15,126,199
3723	call	__ecp_nistz256_sqr_montx
3724
3725	leaq	128(%rsp),%rbx
3726	movq	%r14,%r8
3727	movq	%r15,%r9
3728	movq	%rsi,%r14
3729	movq	%rbp,%r15
3730	call	__ecp_nistz256_sub_fromx
3731
3732	movq	0+0(%rsp),%rax
3733	movq	0+8(%rsp),%rbp
3734	movq	0+16(%rsp),%rcx
3735	movq	0+24(%rsp),%r10
3736	leaq	0(%rsp),%rdi
3737	call	__ecp_nistz256_subx
3738
3739	movq	32(%rsp),%rdx
3740	leaq	32(%rsp),%rbx
3741	movq	%r12,%r14
3742	xorl	%ecx,%ecx
3743	movq	%r12,0+0(%rsp)
3744	movq	%r13,%r10
3745	movq	%r13,0+8(%rsp)
3746	cmovzq	%r8,%r11
3747	movq	%r8,0+16(%rsp)
3748	leaq	0-128(%rsp),%rsi
3749	cmovzq	%r9,%r12
3750	movq	%r9,0+24(%rsp)
3751	movq	%r14,%r9
3752	leaq	0(%rsp),%rdi
3753	call	__ecp_nistz256_mul_montx
3754
3755.byte	102,72,15,126,203
3756.byte	102,72,15,126,207
3757	call	__ecp_nistz256_sub_fromx
3758
3759	leaq	160+56(%rsp),%rsi
3760.cfi_def_cfa	%rsi,8
3761	movq	-48(%rsi),%r15
3762.cfi_restore	%r15
3763	movq	-40(%rsi),%r14
3764.cfi_restore	%r14
3765	movq	-32(%rsi),%r13
3766.cfi_restore	%r13
3767	movq	-24(%rsi),%r12
3768.cfi_restore	%r12
3769	movq	-16(%rsi),%rbx
3770.cfi_restore	%rbx
3771	movq	-8(%rsi),%rbp
3772.cfi_restore	%rbp
3773	leaq	(%rsi),%rsp
3774.cfi_def_cfa_register	%rsp
3775.Lpoint_doublex_epilogue:
3776	.byte	0xf3,0xc3
3777.cfi_endproc
3778.size	p256_point_doublex,.-p256_point_doublex
3779.type	p256_point_addx,@function
3780.align	32
3781p256_point_addx:
3782.cfi_startproc
3783.Lpoint_addx:
3784	pushq	%rbp
3785.cfi_adjust_cfa_offset	8
3786.cfi_offset	%rbp,-16
3787	pushq	%rbx
3788.cfi_adjust_cfa_offset	8
3789.cfi_offset	%rbx,-24
3790	pushq	%r12
3791.cfi_adjust_cfa_offset	8
3792.cfi_offset	%r12,-32
3793	pushq	%r13
3794.cfi_adjust_cfa_offset	8
3795.cfi_offset	%r13,-40
3796	pushq	%r14
3797.cfi_adjust_cfa_offset	8
3798.cfi_offset	%r14,-48
3799	pushq	%r15
3800.cfi_adjust_cfa_offset	8
3801.cfi_offset	%r15,-56
3802	subq	$576+8,%rsp
3803.cfi_adjust_cfa_offset	32*18+8
3804.Lpoint_addx_body:
3805
3806	movdqu	0(%rsi),%xmm0
3807	movdqu	16(%rsi),%xmm1
3808	movdqu	32(%rsi),%xmm2
3809	movdqu	48(%rsi),%xmm3
3810	movdqu	64(%rsi),%xmm4
3811	movdqu	80(%rsi),%xmm5
3812	movq	%rsi,%rbx
3813	movq	%rdx,%rsi
3814	movdqa	%xmm0,384(%rsp)
3815	movdqa	%xmm1,384+16(%rsp)
3816	movdqa	%xmm2,416(%rsp)
3817	movdqa	%xmm3,416+16(%rsp)
3818	movdqa	%xmm4,448(%rsp)
3819	movdqa	%xmm5,448+16(%rsp)
3820	por	%xmm4,%xmm5
3821
3822	movdqu	0(%rsi),%xmm0
3823	pshufd	$0xb1,%xmm5,%xmm3
3824	movdqu	16(%rsi),%xmm1
3825	movdqu	32(%rsi),%xmm2
3826	por	%xmm3,%xmm5
3827	movdqu	48(%rsi),%xmm3
3828	movq	64+0(%rsi),%rdx
3829	movq	64+8(%rsi),%r14
3830	movq	64+16(%rsi),%r15
3831	movq	64+24(%rsi),%r8
3832	movdqa	%xmm0,480(%rsp)
3833	pshufd	$0x1e,%xmm5,%xmm4
3834	movdqa	%xmm1,480+16(%rsp)
3835	movdqu	64(%rsi),%xmm0
3836	movdqu	80(%rsi),%xmm1
3837	movdqa	%xmm2,512(%rsp)
3838	movdqa	%xmm3,512+16(%rsp)
3839	por	%xmm4,%xmm5
3840	pxor	%xmm4,%xmm4
3841	por	%xmm0,%xmm1
3842.byte	102,72,15,110,199
3843
3844	leaq	64-128(%rsi),%rsi
3845	movq	%rdx,544+0(%rsp)
3846	movq	%r14,544+8(%rsp)
3847	movq	%r15,544+16(%rsp)
3848	movq	%r8,544+24(%rsp)
3849	leaq	96(%rsp),%rdi
3850	call	__ecp_nistz256_sqr_montx
3851
3852	pcmpeqd	%xmm4,%xmm5
3853	pshufd	$0xb1,%xmm1,%xmm4
3854	por	%xmm1,%xmm4
3855	pshufd	$0,%xmm5,%xmm5
3856	pshufd	$0x1e,%xmm4,%xmm3
3857	por	%xmm3,%xmm4
3858	pxor	%xmm3,%xmm3
3859	pcmpeqd	%xmm3,%xmm4
3860	pshufd	$0,%xmm4,%xmm4
3861	movq	64+0(%rbx),%rdx
3862	movq	64+8(%rbx),%r14
3863	movq	64+16(%rbx),%r15
3864	movq	64+24(%rbx),%r8
3865.byte	102,72,15,110,203
3866
3867	leaq	64-128(%rbx),%rsi
3868	leaq	32(%rsp),%rdi
3869	call	__ecp_nistz256_sqr_montx
3870
3871	movq	544(%rsp),%rdx
3872	leaq	544(%rsp),%rbx
3873	movq	0+96(%rsp),%r9
3874	movq	8+96(%rsp),%r10
3875	leaq	-128+96(%rsp),%rsi
3876	movq	16+96(%rsp),%r11
3877	movq	24+96(%rsp),%r12
3878	leaq	224(%rsp),%rdi
3879	call	__ecp_nistz256_mul_montx
3880
3881	movq	448(%rsp),%rdx
3882	leaq	448(%rsp),%rbx
3883	movq	0+32(%rsp),%r9
3884	movq	8+32(%rsp),%r10
3885	leaq	-128+32(%rsp),%rsi
3886	movq	16+32(%rsp),%r11
3887	movq	24+32(%rsp),%r12
3888	leaq	256(%rsp),%rdi
3889	call	__ecp_nistz256_mul_montx
3890
3891	movq	416(%rsp),%rdx
3892	leaq	416(%rsp),%rbx
3893	movq	0+224(%rsp),%r9
3894	movq	8+224(%rsp),%r10
3895	leaq	-128+224(%rsp),%rsi
3896	movq	16+224(%rsp),%r11
3897	movq	24+224(%rsp),%r12
3898	leaq	224(%rsp),%rdi
3899	call	__ecp_nistz256_mul_montx
3900
3901	movq	512(%rsp),%rdx
3902	leaq	512(%rsp),%rbx
3903	movq	0+256(%rsp),%r9
3904	movq	8+256(%rsp),%r10
3905	leaq	-128+256(%rsp),%rsi
3906	movq	16+256(%rsp),%r11
3907	movq	24+256(%rsp),%r12
3908	leaq	256(%rsp),%rdi
3909	call	__ecp_nistz256_mul_montx
3910
3911	leaq	224(%rsp),%rbx
3912	leaq	64(%rsp),%rdi
3913	call	__ecp_nistz256_sub_fromx
3914
3915	orq	%r13,%r12
3916	movdqa	%xmm4,%xmm2
3917	orq	%r8,%r12
3918	orq	%r9,%r12
3919	por	%xmm5,%xmm2
3920.byte	102,73,15,110,220
3921
3922	movq	384(%rsp),%rdx
3923	leaq	384(%rsp),%rbx
3924	movq	0+96(%rsp),%r9
3925	movq	8+96(%rsp),%r10
3926	leaq	-128+96(%rsp),%rsi
3927	movq	16+96(%rsp),%r11
3928	movq	24+96(%rsp),%r12
3929	leaq	160(%rsp),%rdi
3930	call	__ecp_nistz256_mul_montx
3931
3932	movq	480(%rsp),%rdx
3933	leaq	480(%rsp),%rbx
3934	movq	0+32(%rsp),%r9
3935	movq	8+32(%rsp),%r10
3936	leaq	-128+32(%rsp),%rsi
3937	movq	16+32(%rsp),%r11
3938	movq	24+32(%rsp),%r12
3939	leaq	192(%rsp),%rdi
3940	call	__ecp_nistz256_mul_montx
3941
3942	leaq	160(%rsp),%rbx
3943	leaq	0(%rsp),%rdi
3944	call	__ecp_nistz256_sub_fromx
3945
3946	orq	%r13,%r12
3947	orq	%r8,%r12
3948	orq	%r9,%r12
3949
3950.byte	102,73,15,126,208
3951.byte	102,73,15,126,217
3952	orq	%r8,%r12
3953.byte	0x3e
3954	jnz	.Ladd_proceedx
3955
3956
3957
3958	testq	%r9,%r9
3959	jz	.Ladd_doublex
3960
3961
3962
3963
3964
3965
3966.byte	102,72,15,126,199
3967	pxor	%xmm0,%xmm0
3968	movdqu	%xmm0,0(%rdi)
3969	movdqu	%xmm0,16(%rdi)
3970	movdqu	%xmm0,32(%rdi)
3971	movdqu	%xmm0,48(%rdi)
3972	movdqu	%xmm0,64(%rdi)
3973	movdqu	%xmm0,80(%rdi)
3974	jmp	.Ladd_donex
3975
3976.align	32
3977.Ladd_doublex:
3978.byte	102,72,15,126,206
3979.byte	102,72,15,126,199
3980	addq	$416,%rsp
3981.cfi_adjust_cfa_offset	-416
3982	jmp	.Lpoint_double_shortcutx
3983.cfi_adjust_cfa_offset	416
3984
3985.align	32
3986.Ladd_proceedx:
3987	movq	0+64(%rsp),%rdx
3988	movq	8+64(%rsp),%r14
3989	leaq	-128+64(%rsp),%rsi
3990	movq	16+64(%rsp),%r15
3991	movq	24+64(%rsp),%r8
3992	leaq	96(%rsp),%rdi
3993	call	__ecp_nistz256_sqr_montx
3994
3995	movq	448(%rsp),%rdx
3996	leaq	448(%rsp),%rbx
3997	movq	0+0(%rsp),%r9
3998	movq	8+0(%rsp),%r10
3999	leaq	-128+0(%rsp),%rsi
4000	movq	16+0(%rsp),%r11
4001	movq	24+0(%rsp),%r12
4002	leaq	352(%rsp),%rdi
4003	call	__ecp_nistz256_mul_montx
4004
4005	movq	0+0(%rsp),%rdx
4006	movq	8+0(%rsp),%r14
4007	leaq	-128+0(%rsp),%rsi
4008	movq	16+0(%rsp),%r15
4009	movq	24+0(%rsp),%r8
4010	leaq	32(%rsp),%rdi
4011	call	__ecp_nistz256_sqr_montx
4012
4013	movq	544(%rsp),%rdx
4014	leaq	544(%rsp),%rbx
4015	movq	0+352(%rsp),%r9
4016	movq	8+352(%rsp),%r10
4017	leaq	-128+352(%rsp),%rsi
4018	movq	16+352(%rsp),%r11
4019	movq	24+352(%rsp),%r12
4020	leaq	352(%rsp),%rdi
4021	call	__ecp_nistz256_mul_montx
4022
4023	movq	0(%rsp),%rdx
4024	leaq	0(%rsp),%rbx
4025	movq	0+32(%rsp),%r9
4026	movq	8+32(%rsp),%r10
4027	leaq	-128+32(%rsp),%rsi
4028	movq	16+32(%rsp),%r11
4029	movq	24+32(%rsp),%r12
4030	leaq	128(%rsp),%rdi
4031	call	__ecp_nistz256_mul_montx
4032
4033	movq	160(%rsp),%rdx
4034	leaq	160(%rsp),%rbx
4035	movq	0+32(%rsp),%r9
4036	movq	8+32(%rsp),%r10
4037	leaq	-128+32(%rsp),%rsi
4038	movq	16+32(%rsp),%r11
4039	movq	24+32(%rsp),%r12
4040	leaq	192(%rsp),%rdi
4041	call	__ecp_nistz256_mul_montx
4042
4043
4044
4045
4046	xorq	%r11,%r11
4047	addq	%r12,%r12
4048	leaq	96(%rsp),%rsi
4049	adcq	%r13,%r13
4050	movq	%r12,%rax
4051	adcq	%r8,%r8
4052	adcq	%r9,%r9
4053	movq	%r13,%rbp
4054	adcq	$0,%r11
4055
4056	subq	$-1,%r12
4057	movq	%r8,%rcx
4058	sbbq	%r14,%r13
4059	sbbq	$0,%r8
4060	movq	%r9,%r10
4061	sbbq	%r15,%r9
4062	sbbq	$0,%r11
4063
4064	cmovcq	%rax,%r12
4065	movq	0(%rsi),%rax
4066	cmovcq	%rbp,%r13
4067	movq	8(%rsi),%rbp
4068	cmovcq	%rcx,%r8
4069	movq	16(%rsi),%rcx
4070	cmovcq	%r10,%r9
4071	movq	24(%rsi),%r10
4072
4073	call	__ecp_nistz256_subx
4074
4075	leaq	128(%rsp),%rbx
4076	leaq	288(%rsp),%rdi
4077	call	__ecp_nistz256_sub_fromx
4078
4079	movq	192+0(%rsp),%rax
4080	movq	192+8(%rsp),%rbp
4081	movq	192+16(%rsp),%rcx
4082	movq	192+24(%rsp),%r10
4083	leaq	320(%rsp),%rdi
4084
4085	call	__ecp_nistz256_subx
4086
4087	movq	%r12,0(%rdi)
4088	movq	%r13,8(%rdi)
4089	movq	%r8,16(%rdi)
4090	movq	%r9,24(%rdi)
4091	movq	128(%rsp),%rdx
4092	leaq	128(%rsp),%rbx
4093	movq	0+224(%rsp),%r9
4094	movq	8+224(%rsp),%r10
4095	leaq	-128+224(%rsp),%rsi
4096	movq	16+224(%rsp),%r11
4097	movq	24+224(%rsp),%r12
4098	leaq	256(%rsp),%rdi
4099	call	__ecp_nistz256_mul_montx
4100
4101	movq	320(%rsp),%rdx
4102	leaq	320(%rsp),%rbx
4103	movq	0+64(%rsp),%r9
4104	movq	8+64(%rsp),%r10
4105	leaq	-128+64(%rsp),%rsi
4106	movq	16+64(%rsp),%r11
4107	movq	24+64(%rsp),%r12
4108	leaq	320(%rsp),%rdi
4109	call	__ecp_nistz256_mul_montx
4110
4111	leaq	256(%rsp),%rbx
4112	leaq	320(%rsp),%rdi
4113	call	__ecp_nistz256_sub_fromx
4114
4115.byte	102,72,15,126,199
4116
4117	movdqa	%xmm5,%xmm0
4118	movdqa	%xmm5,%xmm1
4119	pandn	352(%rsp),%xmm0
4120	movdqa	%xmm5,%xmm2
4121	pandn	352+16(%rsp),%xmm1
4122	movdqa	%xmm5,%xmm3
4123	pand	544(%rsp),%xmm2
4124	pand	544+16(%rsp),%xmm3
4125	por	%xmm0,%xmm2
4126	por	%xmm1,%xmm3
4127
4128	movdqa	%xmm4,%xmm0
4129	movdqa	%xmm4,%xmm1
4130	pandn	%xmm2,%xmm0
4131	movdqa	%xmm4,%xmm2
4132	pandn	%xmm3,%xmm1
4133	movdqa	%xmm4,%xmm3
4134	pand	448(%rsp),%xmm2
4135	pand	448+16(%rsp),%xmm3
4136	por	%xmm0,%xmm2
4137	por	%xmm1,%xmm3
4138	movdqu	%xmm2,64(%rdi)
4139	movdqu	%xmm3,80(%rdi)
4140
4141	movdqa	%xmm5,%xmm0
4142	movdqa	%xmm5,%xmm1
4143	pandn	288(%rsp),%xmm0
4144	movdqa	%xmm5,%xmm2
4145	pandn	288+16(%rsp),%xmm1
4146	movdqa	%xmm5,%xmm3
4147	pand	480(%rsp),%xmm2
4148	pand	480+16(%rsp),%xmm3
4149	por	%xmm0,%xmm2
4150	por	%xmm1,%xmm3
4151
4152	movdqa	%xmm4,%xmm0
4153	movdqa	%xmm4,%xmm1
4154	pandn	%xmm2,%xmm0
4155	movdqa	%xmm4,%xmm2
4156	pandn	%xmm3,%xmm1
4157	movdqa	%xmm4,%xmm3
4158	pand	384(%rsp),%xmm2
4159	pand	384+16(%rsp),%xmm3
4160	por	%xmm0,%xmm2
4161	por	%xmm1,%xmm3
4162	movdqu	%xmm2,0(%rdi)
4163	movdqu	%xmm3,16(%rdi)
4164
4165	movdqa	%xmm5,%xmm0
4166	movdqa	%xmm5,%xmm1
4167	pandn	320(%rsp),%xmm0
4168	movdqa	%xmm5,%xmm2
4169	pandn	320+16(%rsp),%xmm1
4170	movdqa	%xmm5,%xmm3
4171	pand	512(%rsp),%xmm2
4172	pand	512+16(%rsp),%xmm3
4173	por	%xmm0,%xmm2
4174	por	%xmm1,%xmm3
4175
4176	movdqa	%xmm4,%xmm0
4177	movdqa	%xmm4,%xmm1
4178	pandn	%xmm2,%xmm0
4179	movdqa	%xmm4,%xmm2
4180	pandn	%xmm3,%xmm1
4181	movdqa	%xmm4,%xmm3
4182	pand	416(%rsp),%xmm2
4183	pand	416+16(%rsp),%xmm3
4184	por	%xmm0,%xmm2
4185	por	%xmm1,%xmm3
4186	movdqu	%xmm2,32(%rdi)
4187	movdqu	%xmm3,48(%rdi)
4188
4189.Ladd_donex:
4190	leaq	576+56(%rsp),%rsi
4191.cfi_def_cfa	%rsi,8
4192	movq	-48(%rsi),%r15
4193.cfi_restore	%r15
4194	movq	-40(%rsi),%r14
4195.cfi_restore	%r14
4196	movq	-32(%rsi),%r13
4197.cfi_restore	%r13
4198	movq	-24(%rsi),%r12
4199.cfi_restore	%r12
4200	movq	-16(%rsi),%rbx
4201.cfi_restore	%rbx
4202	movq	-8(%rsi),%rbp
4203.cfi_restore	%rbp
4204	leaq	(%rsi),%rsp
4205.cfi_def_cfa_register	%rsp
4206.Lpoint_addx_epilogue:
4207	.byte	0xf3,0xc3
4208.cfi_endproc
4209.size	p256_point_addx,.-p256_point_addx
4210.type	p256_point_add_affinex,@function
4211.align	32
4212p256_point_add_affinex:
4213.cfi_startproc
4214.Lpoint_add_affinex:
4215	pushq	%rbp
4216.cfi_adjust_cfa_offset	8
4217.cfi_offset	%rbp,-16
4218	pushq	%rbx
4219.cfi_adjust_cfa_offset	8
4220.cfi_offset	%rbx,-24
4221	pushq	%r12
4222.cfi_adjust_cfa_offset	8
4223.cfi_offset	%r12,-32
4224	pushq	%r13
4225.cfi_adjust_cfa_offset	8
4226.cfi_offset	%r13,-40
4227	pushq	%r14
4228.cfi_adjust_cfa_offset	8
4229.cfi_offset	%r14,-48
4230	pushq	%r15
4231.cfi_adjust_cfa_offset	8
4232.cfi_offset	%r15,-56
4233	subq	$480+8,%rsp
4234.cfi_adjust_cfa_offset	32*15+8
4235.Ladd_affinex_body:
4236
4237	movdqu	0(%rsi),%xmm0
4238	movq	%rdx,%rbx
4239	movdqu	16(%rsi),%xmm1
4240	movdqu	32(%rsi),%xmm2
4241	movdqu	48(%rsi),%xmm3
4242	movdqu	64(%rsi),%xmm4
4243	movdqu	80(%rsi),%xmm5
4244	movq	64+0(%rsi),%rdx
4245	movq	64+8(%rsi),%r14
4246	movq	64+16(%rsi),%r15
4247	movq	64+24(%rsi),%r8
4248	movdqa	%xmm0,320(%rsp)
4249	movdqa	%xmm1,320+16(%rsp)
4250	movdqa	%xmm2,352(%rsp)
4251	movdqa	%xmm3,352+16(%rsp)
4252	movdqa	%xmm4,384(%rsp)
4253	movdqa	%xmm5,384+16(%rsp)
4254	por	%xmm4,%xmm5
4255
4256	movdqu	0(%rbx),%xmm0
4257	pshufd	$0xb1,%xmm5,%xmm3
4258	movdqu	16(%rbx),%xmm1
4259	movdqu	32(%rbx),%xmm2
4260	por	%xmm3,%xmm5
4261	movdqu	48(%rbx),%xmm3
4262	movdqa	%xmm0,416(%rsp)
4263	pshufd	$0x1e,%xmm5,%xmm4
4264	movdqa	%xmm1,416+16(%rsp)
4265	por	%xmm0,%xmm1
4266.byte	102,72,15,110,199
4267	movdqa	%xmm2,448(%rsp)
4268	movdqa	%xmm3,448+16(%rsp)
4269	por	%xmm2,%xmm3
4270	por	%xmm4,%xmm5
4271	pxor	%xmm4,%xmm4
4272	por	%xmm1,%xmm3
4273
4274	leaq	64-128(%rsi),%rsi
4275	leaq	32(%rsp),%rdi
4276	call	__ecp_nistz256_sqr_montx
4277
4278	pcmpeqd	%xmm4,%xmm5
4279	pshufd	$0xb1,%xmm3,%xmm4
4280	movq	0(%rbx),%rdx
4281
4282	movq	%r12,%r9
4283	por	%xmm3,%xmm4
4284	pshufd	$0,%xmm5,%xmm5
4285	pshufd	$0x1e,%xmm4,%xmm3
4286	movq	%r13,%r10
4287	por	%xmm3,%xmm4
4288	pxor	%xmm3,%xmm3
4289	movq	%r14,%r11
4290	pcmpeqd	%xmm3,%xmm4
4291	pshufd	$0,%xmm4,%xmm4
4292
4293	leaq	32-128(%rsp),%rsi
4294	movq	%r15,%r12
4295	leaq	0(%rsp),%rdi
4296	call	__ecp_nistz256_mul_montx
4297
4298	leaq	320(%rsp),%rbx
4299	leaq	64(%rsp),%rdi
4300	call	__ecp_nistz256_sub_fromx
4301
4302	movq	384(%rsp),%rdx
4303	leaq	384(%rsp),%rbx
4304	movq	0+32(%rsp),%r9
4305	movq	8+32(%rsp),%r10
4306	leaq	-128+32(%rsp),%rsi
4307	movq	16+32(%rsp),%r11
4308	movq	24+32(%rsp),%r12
4309	leaq	32(%rsp),%rdi
4310	call	__ecp_nistz256_mul_montx
4311
4312	movq	384(%rsp),%rdx
4313	leaq	384(%rsp),%rbx
4314	movq	0+64(%rsp),%r9
4315	movq	8+64(%rsp),%r10
4316	leaq	-128+64(%rsp),%rsi
4317	movq	16+64(%rsp),%r11
4318	movq	24+64(%rsp),%r12
4319	leaq	288(%rsp),%rdi
4320	call	__ecp_nistz256_mul_montx
4321
4322	movq	448(%rsp),%rdx
4323	leaq	448(%rsp),%rbx
4324	movq	0+32(%rsp),%r9
4325	movq	8+32(%rsp),%r10
4326	leaq	-128+32(%rsp),%rsi
4327	movq	16+32(%rsp),%r11
4328	movq	24+32(%rsp),%r12
4329	leaq	32(%rsp),%rdi
4330	call	__ecp_nistz256_mul_montx
4331
4332	leaq	352(%rsp),%rbx
4333	leaq	96(%rsp),%rdi
4334	call	__ecp_nistz256_sub_fromx
4335
4336	movq	0+64(%rsp),%rdx
4337	movq	8+64(%rsp),%r14
4338	leaq	-128+64(%rsp),%rsi
4339	movq	16+64(%rsp),%r15
4340	movq	24+64(%rsp),%r8
4341	leaq	128(%rsp),%rdi
4342	call	__ecp_nistz256_sqr_montx
4343
4344	movq	0+96(%rsp),%rdx
4345	movq	8+96(%rsp),%r14
4346	leaq	-128+96(%rsp),%rsi
4347	movq	16+96(%rsp),%r15
4348	movq	24+96(%rsp),%r8
4349	leaq	192(%rsp),%rdi
4350	call	__ecp_nistz256_sqr_montx
4351
4352	movq	128(%rsp),%rdx
4353	leaq	128(%rsp),%rbx
4354	movq	0+64(%rsp),%r9
4355	movq	8+64(%rsp),%r10
4356	leaq	-128+64(%rsp),%rsi
4357	movq	16+64(%rsp),%r11
4358	movq	24+64(%rsp),%r12
4359	leaq	160(%rsp),%rdi
4360	call	__ecp_nistz256_mul_montx
4361
4362	movq	320(%rsp),%rdx
4363	leaq	320(%rsp),%rbx
4364	movq	0+128(%rsp),%r9
4365	movq	8+128(%rsp),%r10
4366	leaq	-128+128(%rsp),%rsi
4367	movq	16+128(%rsp),%r11
4368	movq	24+128(%rsp),%r12
4369	leaq	0(%rsp),%rdi
4370	call	__ecp_nistz256_mul_montx
4371
4372
4373
4374
4375	xorq	%r11,%r11
4376	addq	%r12,%r12
4377	leaq	192(%rsp),%rsi
4378	adcq	%r13,%r13
4379	movq	%r12,%rax
4380	adcq	%r8,%r8
4381	adcq	%r9,%r9
4382	movq	%r13,%rbp
4383	adcq	$0,%r11
4384
4385	subq	$-1,%r12
4386	movq	%r8,%rcx
4387	sbbq	%r14,%r13
4388	sbbq	$0,%r8
4389	movq	%r9,%r10
4390	sbbq	%r15,%r9
4391	sbbq	$0,%r11
4392
4393	cmovcq	%rax,%r12
4394	movq	0(%rsi),%rax
4395	cmovcq	%rbp,%r13
4396	movq	8(%rsi),%rbp
4397	cmovcq	%rcx,%r8
4398	movq	16(%rsi),%rcx
4399	cmovcq	%r10,%r9
4400	movq	24(%rsi),%r10
4401
4402	call	__ecp_nistz256_subx
4403
4404	leaq	160(%rsp),%rbx
4405	leaq	224(%rsp),%rdi
4406	call	__ecp_nistz256_sub_fromx
4407
4408	movq	0+0(%rsp),%rax
4409	movq	0+8(%rsp),%rbp
4410	movq	0+16(%rsp),%rcx
4411	movq	0+24(%rsp),%r10
4412	leaq	64(%rsp),%rdi
4413
4414	call	__ecp_nistz256_subx
4415
4416	movq	%r12,0(%rdi)
4417	movq	%r13,8(%rdi)
4418	movq	%r8,16(%rdi)
4419	movq	%r9,24(%rdi)
4420	movq	352(%rsp),%rdx
4421	leaq	352(%rsp),%rbx
4422	movq	0+160(%rsp),%r9
4423	movq	8+160(%rsp),%r10
4424	leaq	-128+160(%rsp),%rsi
4425	movq	16+160(%rsp),%r11
4426	movq	24+160(%rsp),%r12
4427	leaq	32(%rsp),%rdi
4428	call	__ecp_nistz256_mul_montx
4429
4430	movq	96(%rsp),%rdx
4431	leaq	96(%rsp),%rbx
4432	movq	0+64(%rsp),%r9
4433	movq	8+64(%rsp),%r10
4434	leaq	-128+64(%rsp),%rsi
4435	movq	16+64(%rsp),%r11
4436	movq	24+64(%rsp),%r12
4437	leaq	64(%rsp),%rdi
4438	call	__ecp_nistz256_mul_montx
4439
4440	leaq	32(%rsp),%rbx
4441	leaq	256(%rsp),%rdi
4442	call	__ecp_nistz256_sub_fromx
4443
4444.byte	102,72,15,126,199
4445
4446	movdqa	%xmm5,%xmm0
4447	movdqa	%xmm5,%xmm1
4448	pandn	288(%rsp),%xmm0
4449	movdqa	%xmm5,%xmm2
4450	pandn	288+16(%rsp),%xmm1
4451	movdqa	%xmm5,%xmm3
4452	pand	.LONE_mont(%rip),%xmm2
4453	pand	.LONE_mont+16(%rip),%xmm3
4454	por	%xmm0,%xmm2
4455	por	%xmm1,%xmm3
4456
4457	movdqa	%xmm4,%xmm0
4458	movdqa	%xmm4,%xmm1
4459	pandn	%xmm2,%xmm0
4460	movdqa	%xmm4,%xmm2
4461	pandn	%xmm3,%xmm1
4462	movdqa	%xmm4,%xmm3
4463	pand	384(%rsp),%xmm2
4464	pand	384+16(%rsp),%xmm3
4465	por	%xmm0,%xmm2
4466	por	%xmm1,%xmm3
4467	movdqu	%xmm2,64(%rdi)
4468	movdqu	%xmm3,80(%rdi)
4469
4470	movdqa	%xmm5,%xmm0
4471	movdqa	%xmm5,%xmm1
4472	pandn	224(%rsp),%xmm0
4473	movdqa	%xmm5,%xmm2
4474	pandn	224+16(%rsp),%xmm1
4475	movdqa	%xmm5,%xmm3
4476	pand	416(%rsp),%xmm2
4477	pand	416+16(%rsp),%xmm3
4478	por	%xmm0,%xmm2
4479	por	%xmm1,%xmm3
4480
4481	movdqa	%xmm4,%xmm0
4482	movdqa	%xmm4,%xmm1
4483	pandn	%xmm2,%xmm0
4484	movdqa	%xmm4,%xmm2
4485	pandn	%xmm3,%xmm1
4486	movdqa	%xmm4,%xmm3
4487	pand	320(%rsp),%xmm2
4488	pand	320+16(%rsp),%xmm3
4489	por	%xmm0,%xmm2
4490	por	%xmm1,%xmm3
4491	movdqu	%xmm2,0(%rdi)
4492	movdqu	%xmm3,16(%rdi)
4493
4494	movdqa	%xmm5,%xmm0
4495	movdqa	%xmm5,%xmm1
4496	pandn	256(%rsp),%xmm0
4497	movdqa	%xmm5,%xmm2
4498	pandn	256+16(%rsp),%xmm1
4499	movdqa	%xmm5,%xmm3
4500	pand	448(%rsp),%xmm2
4501	pand	448+16(%rsp),%xmm3
4502	por	%xmm0,%xmm2
4503	por	%xmm1,%xmm3
4504
4505	movdqa	%xmm4,%xmm0
4506	movdqa	%xmm4,%xmm1
4507	pandn	%xmm2,%xmm0
4508	movdqa	%xmm4,%xmm2
4509	pandn	%xmm3,%xmm1
4510	movdqa	%xmm4,%xmm3
4511	pand	352(%rsp),%xmm2
4512	pand	352+16(%rsp),%xmm3
4513	por	%xmm0,%xmm2
4514	por	%xmm1,%xmm3
4515	movdqu	%xmm2,32(%rdi)
4516	movdqu	%xmm3,48(%rdi)
4517
4518	leaq	480+56(%rsp),%rsi
4519.cfi_def_cfa	%rsi,8
4520	movq	-48(%rsi),%r15
4521.cfi_restore	%r15
4522	movq	-40(%rsi),%r14
4523.cfi_restore	%r14
4524	movq	-32(%rsi),%r13
4525.cfi_restore	%r13
4526	movq	-24(%rsi),%r12
4527.cfi_restore	%r12
4528	movq	-16(%rsi),%rbx
4529.cfi_restore	%rbx
4530	movq	-8(%rsi),%rbp
4531.cfi_restore	%rbp
4532	leaq	(%rsi),%rsp
4533.cfi_def_cfa_register	%rsp
4534.Ladd_affinex_epilogue:
4535	.byte	0xf3,0xc3
4536.cfi_endproc
4537.size	p256_point_add_affinex,.-p256_point_add_affinex
4538#endif
4539.section	.note.GNU-stack,"",@progbits
4540