• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14.text
15.extern	OPENSSL_ia32cap_P
16.hidden OPENSSL_ia32cap_P
17
18
19.align	64
20.Lpoly:
21.quad	0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
22
23.LOne:
24.long	1,1,1,1,1,1,1,1
25.LTwo:
26.long	2,2,2,2,2,2,2,2
27.LThree:
28.long	3,3,3,3,3,3,3,3
29.LONE_mont:
30.quad	0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
31
32
33.Lord:
34.quad	0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
35.LordK:
36.quad	0xccd1c8aaee00bc4f
37
38
39
40.globl	ecp_nistz256_neg
41.hidden ecp_nistz256_neg
42.type	ecp_nistz256_neg,@function
43.align	32
44ecp_nistz256_neg:
45.cfi_startproc
46	pushq	%r12
47.cfi_adjust_cfa_offset	8
48.cfi_offset	%r12,-16
49	pushq	%r13
50.cfi_adjust_cfa_offset	8
51.cfi_offset	%r13,-24
52.Lneg_body:
53
54	xorq	%r8,%r8
55	xorq	%r9,%r9
56	xorq	%r10,%r10
57	xorq	%r11,%r11
58	xorq	%r13,%r13
59
60	subq	0(%rsi),%r8
61	sbbq	8(%rsi),%r9
62	sbbq	16(%rsi),%r10
63	movq	%r8,%rax
64	sbbq	24(%rsi),%r11
65	leaq	.Lpoly(%rip),%rsi
66	movq	%r9,%rdx
67	sbbq	$0,%r13
68
69	addq	0(%rsi),%r8
70	movq	%r10,%rcx
71	adcq	8(%rsi),%r9
72	adcq	16(%rsi),%r10
73	movq	%r11,%r12
74	adcq	24(%rsi),%r11
75	testq	%r13,%r13
76
77	cmovzq	%rax,%r8
78	cmovzq	%rdx,%r9
79	movq	%r8,0(%rdi)
80	cmovzq	%rcx,%r10
81	movq	%r9,8(%rdi)
82	cmovzq	%r12,%r11
83	movq	%r10,16(%rdi)
84	movq	%r11,24(%rdi)
85
86	movq	0(%rsp),%r13
87.cfi_restore	%r13
88	movq	8(%rsp),%r12
89.cfi_restore	%r12
90	leaq	16(%rsp),%rsp
91.cfi_adjust_cfa_offset	-16
92.Lneg_epilogue:
93	.byte	0xf3,0xc3
94.cfi_endproc
95.size	ecp_nistz256_neg,.-ecp_nistz256_neg
96
97
98
99
100
101
102.globl	ecp_nistz256_ord_mul_mont
103.hidden ecp_nistz256_ord_mul_mont
104.type	ecp_nistz256_ord_mul_mont,@function
105.align	32
106ecp_nistz256_ord_mul_mont:
107.cfi_startproc
108	leaq	OPENSSL_ia32cap_P(%rip),%rcx
109	movq	8(%rcx),%rcx
110	andl	$0x80100,%ecx
111	cmpl	$0x80100,%ecx
112	je	.Lecp_nistz256_ord_mul_montx
113	pushq	%rbp
114.cfi_adjust_cfa_offset	8
115.cfi_offset	%rbp,-16
116	pushq	%rbx
117.cfi_adjust_cfa_offset	8
118.cfi_offset	%rbx,-24
119	pushq	%r12
120.cfi_adjust_cfa_offset	8
121.cfi_offset	%r12,-32
122	pushq	%r13
123.cfi_adjust_cfa_offset	8
124.cfi_offset	%r13,-40
125	pushq	%r14
126.cfi_adjust_cfa_offset	8
127.cfi_offset	%r14,-48
128	pushq	%r15
129.cfi_adjust_cfa_offset	8
130.cfi_offset	%r15,-56
131.Lord_mul_body:
132
133	movq	0(%rdx),%rax
134	movq	%rdx,%rbx
135	leaq	.Lord(%rip),%r14
136	movq	.LordK(%rip),%r15
137
138
139	movq	%rax,%rcx
140	mulq	0(%rsi)
141	movq	%rax,%r8
142	movq	%rcx,%rax
143	movq	%rdx,%r9
144
145	mulq	8(%rsi)
146	addq	%rax,%r9
147	movq	%rcx,%rax
148	adcq	$0,%rdx
149	movq	%rdx,%r10
150
151	mulq	16(%rsi)
152	addq	%rax,%r10
153	movq	%rcx,%rax
154	adcq	$0,%rdx
155
156	movq	%r8,%r13
157	imulq	%r15,%r8
158
159	movq	%rdx,%r11
160	mulq	24(%rsi)
161	addq	%rax,%r11
162	movq	%r8,%rax
163	adcq	$0,%rdx
164	movq	%rdx,%r12
165
166
167	mulq	0(%r14)
168	movq	%r8,%rbp
169	addq	%rax,%r13
170	movq	%r8,%rax
171	adcq	$0,%rdx
172	movq	%rdx,%rcx
173
174	subq	%r8,%r10
175	sbbq	$0,%r8
176
177	mulq	8(%r14)
178	addq	%rcx,%r9
179	adcq	$0,%rdx
180	addq	%rax,%r9
181	movq	%rbp,%rax
182	adcq	%rdx,%r10
183	movq	%rbp,%rdx
184	adcq	$0,%r8
185
186	shlq	$32,%rax
187	shrq	$32,%rdx
188	subq	%rax,%r11
189	movq	8(%rbx),%rax
190	sbbq	%rdx,%rbp
191
192	addq	%r8,%r11
193	adcq	%rbp,%r12
194	adcq	$0,%r13
195
196
197	movq	%rax,%rcx
198	mulq	0(%rsi)
199	addq	%rax,%r9
200	movq	%rcx,%rax
201	adcq	$0,%rdx
202	movq	%rdx,%rbp
203
204	mulq	8(%rsi)
205	addq	%rbp,%r10
206	adcq	$0,%rdx
207	addq	%rax,%r10
208	movq	%rcx,%rax
209	adcq	$0,%rdx
210	movq	%rdx,%rbp
211
212	mulq	16(%rsi)
213	addq	%rbp,%r11
214	adcq	$0,%rdx
215	addq	%rax,%r11
216	movq	%rcx,%rax
217	adcq	$0,%rdx
218
219	movq	%r9,%rcx
220	imulq	%r15,%r9
221
222	movq	%rdx,%rbp
223	mulq	24(%rsi)
224	addq	%rbp,%r12
225	adcq	$0,%rdx
226	xorq	%r8,%r8
227	addq	%rax,%r12
228	movq	%r9,%rax
229	adcq	%rdx,%r13
230	adcq	$0,%r8
231
232
233	mulq	0(%r14)
234	movq	%r9,%rbp
235	addq	%rax,%rcx
236	movq	%r9,%rax
237	adcq	%rdx,%rcx
238
239	subq	%r9,%r11
240	sbbq	$0,%r9
241
242	mulq	8(%r14)
243	addq	%rcx,%r10
244	adcq	$0,%rdx
245	addq	%rax,%r10
246	movq	%rbp,%rax
247	adcq	%rdx,%r11
248	movq	%rbp,%rdx
249	adcq	$0,%r9
250
251	shlq	$32,%rax
252	shrq	$32,%rdx
253	subq	%rax,%r12
254	movq	16(%rbx),%rax
255	sbbq	%rdx,%rbp
256
257	addq	%r9,%r12
258	adcq	%rbp,%r13
259	adcq	$0,%r8
260
261
262	movq	%rax,%rcx
263	mulq	0(%rsi)
264	addq	%rax,%r10
265	movq	%rcx,%rax
266	adcq	$0,%rdx
267	movq	%rdx,%rbp
268
269	mulq	8(%rsi)
270	addq	%rbp,%r11
271	adcq	$0,%rdx
272	addq	%rax,%r11
273	movq	%rcx,%rax
274	adcq	$0,%rdx
275	movq	%rdx,%rbp
276
277	mulq	16(%rsi)
278	addq	%rbp,%r12
279	adcq	$0,%rdx
280	addq	%rax,%r12
281	movq	%rcx,%rax
282	adcq	$0,%rdx
283
284	movq	%r10,%rcx
285	imulq	%r15,%r10
286
287	movq	%rdx,%rbp
288	mulq	24(%rsi)
289	addq	%rbp,%r13
290	adcq	$0,%rdx
291	xorq	%r9,%r9
292	addq	%rax,%r13
293	movq	%r10,%rax
294	adcq	%rdx,%r8
295	adcq	$0,%r9
296
297
298	mulq	0(%r14)
299	movq	%r10,%rbp
300	addq	%rax,%rcx
301	movq	%r10,%rax
302	adcq	%rdx,%rcx
303
304	subq	%r10,%r12
305	sbbq	$0,%r10
306
307	mulq	8(%r14)
308	addq	%rcx,%r11
309	adcq	$0,%rdx
310	addq	%rax,%r11
311	movq	%rbp,%rax
312	adcq	%rdx,%r12
313	movq	%rbp,%rdx
314	adcq	$0,%r10
315
316	shlq	$32,%rax
317	shrq	$32,%rdx
318	subq	%rax,%r13
319	movq	24(%rbx),%rax
320	sbbq	%rdx,%rbp
321
322	addq	%r10,%r13
323	adcq	%rbp,%r8
324	adcq	$0,%r9
325
326
327	movq	%rax,%rcx
328	mulq	0(%rsi)
329	addq	%rax,%r11
330	movq	%rcx,%rax
331	adcq	$0,%rdx
332	movq	%rdx,%rbp
333
334	mulq	8(%rsi)
335	addq	%rbp,%r12
336	adcq	$0,%rdx
337	addq	%rax,%r12
338	movq	%rcx,%rax
339	adcq	$0,%rdx
340	movq	%rdx,%rbp
341
342	mulq	16(%rsi)
343	addq	%rbp,%r13
344	adcq	$0,%rdx
345	addq	%rax,%r13
346	movq	%rcx,%rax
347	adcq	$0,%rdx
348
349	movq	%r11,%rcx
350	imulq	%r15,%r11
351
352	movq	%rdx,%rbp
353	mulq	24(%rsi)
354	addq	%rbp,%r8
355	adcq	$0,%rdx
356	xorq	%r10,%r10
357	addq	%rax,%r8
358	movq	%r11,%rax
359	adcq	%rdx,%r9
360	adcq	$0,%r10
361
362
363	mulq	0(%r14)
364	movq	%r11,%rbp
365	addq	%rax,%rcx
366	movq	%r11,%rax
367	adcq	%rdx,%rcx
368
369	subq	%r11,%r13
370	sbbq	$0,%r11
371
372	mulq	8(%r14)
373	addq	%rcx,%r12
374	adcq	$0,%rdx
375	addq	%rax,%r12
376	movq	%rbp,%rax
377	adcq	%rdx,%r13
378	movq	%rbp,%rdx
379	adcq	$0,%r11
380
381	shlq	$32,%rax
382	shrq	$32,%rdx
383	subq	%rax,%r8
384	sbbq	%rdx,%rbp
385
386	addq	%r11,%r8
387	adcq	%rbp,%r9
388	adcq	$0,%r10
389
390
391	movq	%r12,%rsi
392	subq	0(%r14),%r12
393	movq	%r13,%r11
394	sbbq	8(%r14),%r13
395	movq	%r8,%rcx
396	sbbq	16(%r14),%r8
397	movq	%r9,%rbp
398	sbbq	24(%r14),%r9
399	sbbq	$0,%r10
400
401	cmovcq	%rsi,%r12
402	cmovcq	%r11,%r13
403	cmovcq	%rcx,%r8
404	cmovcq	%rbp,%r9
405
406	movq	%r12,0(%rdi)
407	movq	%r13,8(%rdi)
408	movq	%r8,16(%rdi)
409	movq	%r9,24(%rdi)
410
411	movq	0(%rsp),%r15
412.cfi_restore	%r15
413	movq	8(%rsp),%r14
414.cfi_restore	%r14
415	movq	16(%rsp),%r13
416.cfi_restore	%r13
417	movq	24(%rsp),%r12
418.cfi_restore	%r12
419	movq	32(%rsp),%rbx
420.cfi_restore	%rbx
421	movq	40(%rsp),%rbp
422.cfi_restore	%rbp
423	leaq	48(%rsp),%rsp
424.cfi_adjust_cfa_offset	-48
425.Lord_mul_epilogue:
426	.byte	0xf3,0xc3
427.cfi_endproc
428.size	ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont
429
430
431
432
433
434
435
436.globl	ecp_nistz256_ord_sqr_mont
437.hidden ecp_nistz256_ord_sqr_mont
438.type	ecp_nistz256_ord_sqr_mont,@function
439.align	32
440ecp_nistz256_ord_sqr_mont:
441.cfi_startproc
442	leaq	OPENSSL_ia32cap_P(%rip),%rcx
443	movq	8(%rcx),%rcx
444	andl	$0x80100,%ecx
445	cmpl	$0x80100,%ecx
446	je	.Lecp_nistz256_ord_sqr_montx
447	pushq	%rbp
448.cfi_adjust_cfa_offset	8
449.cfi_offset	%rbp,-16
450	pushq	%rbx
451.cfi_adjust_cfa_offset	8
452.cfi_offset	%rbx,-24
453	pushq	%r12
454.cfi_adjust_cfa_offset	8
455.cfi_offset	%r12,-32
456	pushq	%r13
457.cfi_adjust_cfa_offset	8
458.cfi_offset	%r13,-40
459	pushq	%r14
460.cfi_adjust_cfa_offset	8
461.cfi_offset	%r14,-48
462	pushq	%r15
463.cfi_adjust_cfa_offset	8
464.cfi_offset	%r15,-56
465.Lord_sqr_body:
466
467	movq	0(%rsi),%r8
468	movq	8(%rsi),%rax
469	movq	16(%rsi),%r14
470	movq	24(%rsi),%r15
471	leaq	.Lord(%rip),%rsi
472	movq	%rdx,%rbx
473	jmp	.Loop_ord_sqr
474
475.align	32
476.Loop_ord_sqr:
477
478	movq	%rax,%rbp
479	mulq	%r8
480	movq	%rax,%r9
481.byte	102,72,15,110,205
482	movq	%r14,%rax
483	movq	%rdx,%r10
484
485	mulq	%r8
486	addq	%rax,%r10
487	movq	%r15,%rax
488.byte	102,73,15,110,214
489	adcq	$0,%rdx
490	movq	%rdx,%r11
491
492	mulq	%r8
493	addq	%rax,%r11
494	movq	%r15,%rax
495.byte	102,73,15,110,223
496	adcq	$0,%rdx
497	movq	%rdx,%r12
498
499
500	mulq	%r14
501	movq	%rax,%r13
502	movq	%r14,%rax
503	movq	%rdx,%r14
504
505
506	mulq	%rbp
507	addq	%rax,%r11
508	movq	%r15,%rax
509	adcq	$0,%rdx
510	movq	%rdx,%r15
511
512	mulq	%rbp
513	addq	%rax,%r12
514	adcq	$0,%rdx
515
516	addq	%r15,%r12
517	adcq	%rdx,%r13
518	adcq	$0,%r14
519
520
521	xorq	%r15,%r15
522	movq	%r8,%rax
523	addq	%r9,%r9
524	adcq	%r10,%r10
525	adcq	%r11,%r11
526	adcq	%r12,%r12
527	adcq	%r13,%r13
528	adcq	%r14,%r14
529	adcq	$0,%r15
530
531
532	mulq	%rax
533	movq	%rax,%r8
534.byte	102,72,15,126,200
535	movq	%rdx,%rbp
536
537	mulq	%rax
538	addq	%rbp,%r9
539	adcq	%rax,%r10
540.byte	102,72,15,126,208
541	adcq	$0,%rdx
542	movq	%rdx,%rbp
543
544	mulq	%rax
545	addq	%rbp,%r11
546	adcq	%rax,%r12
547.byte	102,72,15,126,216
548	adcq	$0,%rdx
549	movq	%rdx,%rbp
550
551	movq	%r8,%rcx
552	imulq	32(%rsi),%r8
553
554	mulq	%rax
555	addq	%rbp,%r13
556	adcq	%rax,%r14
557	movq	0(%rsi),%rax
558	adcq	%rdx,%r15
559
560
561	mulq	%r8
562	movq	%r8,%rbp
563	addq	%rax,%rcx
564	movq	8(%rsi),%rax
565	adcq	%rdx,%rcx
566
567	subq	%r8,%r10
568	sbbq	$0,%rbp
569
570	mulq	%r8
571	addq	%rcx,%r9
572	adcq	$0,%rdx
573	addq	%rax,%r9
574	movq	%r8,%rax
575	adcq	%rdx,%r10
576	movq	%r8,%rdx
577	adcq	$0,%rbp
578
579	movq	%r9,%rcx
580	imulq	32(%rsi),%r9
581
582	shlq	$32,%rax
583	shrq	$32,%rdx
584	subq	%rax,%r11
585	movq	0(%rsi),%rax
586	sbbq	%rdx,%r8
587
588	addq	%rbp,%r11
589	adcq	$0,%r8
590
591
592	mulq	%r9
593	movq	%r9,%rbp
594	addq	%rax,%rcx
595	movq	8(%rsi),%rax
596	adcq	%rdx,%rcx
597
598	subq	%r9,%r11
599	sbbq	$0,%rbp
600
601	mulq	%r9
602	addq	%rcx,%r10
603	adcq	$0,%rdx
604	addq	%rax,%r10
605	movq	%r9,%rax
606	adcq	%rdx,%r11
607	movq	%r9,%rdx
608	adcq	$0,%rbp
609
610	movq	%r10,%rcx
611	imulq	32(%rsi),%r10
612
613	shlq	$32,%rax
614	shrq	$32,%rdx
615	subq	%rax,%r8
616	movq	0(%rsi),%rax
617	sbbq	%rdx,%r9
618
619	addq	%rbp,%r8
620	adcq	$0,%r9
621
622
623	mulq	%r10
624	movq	%r10,%rbp
625	addq	%rax,%rcx
626	movq	8(%rsi),%rax
627	adcq	%rdx,%rcx
628
629	subq	%r10,%r8
630	sbbq	$0,%rbp
631
632	mulq	%r10
633	addq	%rcx,%r11
634	adcq	$0,%rdx
635	addq	%rax,%r11
636	movq	%r10,%rax
637	adcq	%rdx,%r8
638	movq	%r10,%rdx
639	adcq	$0,%rbp
640
641	movq	%r11,%rcx
642	imulq	32(%rsi),%r11
643
644	shlq	$32,%rax
645	shrq	$32,%rdx
646	subq	%rax,%r9
647	movq	0(%rsi),%rax
648	sbbq	%rdx,%r10
649
650	addq	%rbp,%r9
651	adcq	$0,%r10
652
653
654	mulq	%r11
655	movq	%r11,%rbp
656	addq	%rax,%rcx
657	movq	8(%rsi),%rax
658	adcq	%rdx,%rcx
659
660	subq	%r11,%r9
661	sbbq	$0,%rbp
662
663	mulq	%r11
664	addq	%rcx,%r8
665	adcq	$0,%rdx
666	addq	%rax,%r8
667	movq	%r11,%rax
668	adcq	%rdx,%r9
669	movq	%r11,%rdx
670	adcq	$0,%rbp
671
672	shlq	$32,%rax
673	shrq	$32,%rdx
674	subq	%rax,%r10
675	sbbq	%rdx,%r11
676
677	addq	%rbp,%r10
678	adcq	$0,%r11
679
680
681	xorq	%rdx,%rdx
682	addq	%r12,%r8
683	adcq	%r13,%r9
684	movq	%r8,%r12
685	adcq	%r14,%r10
686	adcq	%r15,%r11
687	movq	%r9,%rax
688	adcq	$0,%rdx
689
690
691	subq	0(%rsi),%r8
692	movq	%r10,%r14
693	sbbq	8(%rsi),%r9
694	sbbq	16(%rsi),%r10
695	movq	%r11,%r15
696	sbbq	24(%rsi),%r11
697	sbbq	$0,%rdx
698
699	cmovcq	%r12,%r8
700	cmovncq	%r9,%rax
701	cmovncq	%r10,%r14
702	cmovncq	%r11,%r15
703
704	decq	%rbx
705	jnz	.Loop_ord_sqr
706
707	movq	%r8,0(%rdi)
708	movq	%rax,8(%rdi)
709	pxor	%xmm1,%xmm1
710	movq	%r14,16(%rdi)
711	pxor	%xmm2,%xmm2
712	movq	%r15,24(%rdi)
713	pxor	%xmm3,%xmm3
714
715	movq	0(%rsp),%r15
716.cfi_restore	%r15
717	movq	8(%rsp),%r14
718.cfi_restore	%r14
719	movq	16(%rsp),%r13
720.cfi_restore	%r13
721	movq	24(%rsp),%r12
722.cfi_restore	%r12
723	movq	32(%rsp),%rbx
724.cfi_restore	%rbx
725	movq	40(%rsp),%rbp
726.cfi_restore	%rbp
727	leaq	48(%rsp),%rsp
728.cfi_adjust_cfa_offset	-48
729.Lord_sqr_epilogue:
730	.byte	0xf3,0xc3
731.cfi_endproc
732.size	ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
733
734.type	ecp_nistz256_ord_mul_montx,@function
735.align	32
736ecp_nistz256_ord_mul_montx:
737.cfi_startproc
738.Lecp_nistz256_ord_mul_montx:
739	pushq	%rbp
740.cfi_adjust_cfa_offset	8
741.cfi_offset	%rbp,-16
742	pushq	%rbx
743.cfi_adjust_cfa_offset	8
744.cfi_offset	%rbx,-24
745	pushq	%r12
746.cfi_adjust_cfa_offset	8
747.cfi_offset	%r12,-32
748	pushq	%r13
749.cfi_adjust_cfa_offset	8
750.cfi_offset	%r13,-40
751	pushq	%r14
752.cfi_adjust_cfa_offset	8
753.cfi_offset	%r14,-48
754	pushq	%r15
755.cfi_adjust_cfa_offset	8
756.cfi_offset	%r15,-56
757.Lord_mulx_body:
758
759	movq	%rdx,%rbx
760	movq	0(%rdx),%rdx
761	movq	0(%rsi),%r9
762	movq	8(%rsi),%r10
763	movq	16(%rsi),%r11
764	movq	24(%rsi),%r12
765	leaq	-128(%rsi),%rsi
766	leaq	.Lord-128(%rip),%r14
767	movq	.LordK(%rip),%r15
768
769
770	mulxq	%r9,%r8,%r9
771	mulxq	%r10,%rcx,%r10
772	mulxq	%r11,%rbp,%r11
773	addq	%rcx,%r9
774	mulxq	%r12,%rcx,%r12
775	movq	%r8,%rdx
776	mulxq	%r15,%rdx,%rax
777	adcq	%rbp,%r10
778	adcq	%rcx,%r11
779	adcq	$0,%r12
780
781
782	xorq	%r13,%r13
783	mulxq	0+128(%r14),%rcx,%rbp
784	adcxq	%rcx,%r8
785	adoxq	%rbp,%r9
786
787	mulxq	8+128(%r14),%rcx,%rbp
788	adcxq	%rcx,%r9
789	adoxq	%rbp,%r10
790
791	mulxq	16+128(%r14),%rcx,%rbp
792	adcxq	%rcx,%r10
793	adoxq	%rbp,%r11
794
795	mulxq	24+128(%r14),%rcx,%rbp
796	movq	8(%rbx),%rdx
797	adcxq	%rcx,%r11
798	adoxq	%rbp,%r12
799	adcxq	%r8,%r12
800	adoxq	%r8,%r13
801	adcq	$0,%r13
802
803
804	mulxq	0+128(%rsi),%rcx,%rbp
805	adcxq	%rcx,%r9
806	adoxq	%rbp,%r10
807
808	mulxq	8+128(%rsi),%rcx,%rbp
809	adcxq	%rcx,%r10
810	adoxq	%rbp,%r11
811
812	mulxq	16+128(%rsi),%rcx,%rbp
813	adcxq	%rcx,%r11
814	adoxq	%rbp,%r12
815
816	mulxq	24+128(%rsi),%rcx,%rbp
817	movq	%r9,%rdx
818	mulxq	%r15,%rdx,%rax
819	adcxq	%rcx,%r12
820	adoxq	%rbp,%r13
821
822	adcxq	%r8,%r13
823	adoxq	%r8,%r8
824	adcq	$0,%r8
825
826
827	mulxq	0+128(%r14),%rcx,%rbp
828	adcxq	%rcx,%r9
829	adoxq	%rbp,%r10
830
831	mulxq	8+128(%r14),%rcx,%rbp
832	adcxq	%rcx,%r10
833	adoxq	%rbp,%r11
834
835	mulxq	16+128(%r14),%rcx,%rbp
836	adcxq	%rcx,%r11
837	adoxq	%rbp,%r12
838
839	mulxq	24+128(%r14),%rcx,%rbp
840	movq	16(%rbx),%rdx
841	adcxq	%rcx,%r12
842	adoxq	%rbp,%r13
843	adcxq	%r9,%r13
844	adoxq	%r9,%r8
845	adcq	$0,%r8
846
847
848	mulxq	0+128(%rsi),%rcx,%rbp
849	adcxq	%rcx,%r10
850	adoxq	%rbp,%r11
851
852	mulxq	8+128(%rsi),%rcx,%rbp
853	adcxq	%rcx,%r11
854	adoxq	%rbp,%r12
855
856	mulxq	16+128(%rsi),%rcx,%rbp
857	adcxq	%rcx,%r12
858	adoxq	%rbp,%r13
859
860	mulxq	24+128(%rsi),%rcx,%rbp
861	movq	%r10,%rdx
862	mulxq	%r15,%rdx,%rax
863	adcxq	%rcx,%r13
864	adoxq	%rbp,%r8
865
866	adcxq	%r9,%r8
867	adoxq	%r9,%r9
868	adcq	$0,%r9
869
870
871	mulxq	0+128(%r14),%rcx,%rbp
872	adcxq	%rcx,%r10
873	adoxq	%rbp,%r11
874
875	mulxq	8+128(%r14),%rcx,%rbp
876	adcxq	%rcx,%r11
877	adoxq	%rbp,%r12
878
879	mulxq	16+128(%r14),%rcx,%rbp
880	adcxq	%rcx,%r12
881	adoxq	%rbp,%r13
882
883	mulxq	24+128(%r14),%rcx,%rbp
884	movq	24(%rbx),%rdx
885	adcxq	%rcx,%r13
886	adoxq	%rbp,%r8
887	adcxq	%r10,%r8
888	adoxq	%r10,%r9
889	adcq	$0,%r9
890
891
892	mulxq	0+128(%rsi),%rcx,%rbp
893	adcxq	%rcx,%r11
894	adoxq	%rbp,%r12
895
896	mulxq	8+128(%rsi),%rcx,%rbp
897	adcxq	%rcx,%r12
898	adoxq	%rbp,%r13
899
900	mulxq	16+128(%rsi),%rcx,%rbp
901	adcxq	%rcx,%r13
902	adoxq	%rbp,%r8
903
904	mulxq	24+128(%rsi),%rcx,%rbp
905	movq	%r11,%rdx
906	mulxq	%r15,%rdx,%rax
907	adcxq	%rcx,%r8
908	adoxq	%rbp,%r9
909
910	adcxq	%r10,%r9
911	adoxq	%r10,%r10
912	adcq	$0,%r10
913
914
915	mulxq	0+128(%r14),%rcx,%rbp
916	adcxq	%rcx,%r11
917	adoxq	%rbp,%r12
918
919	mulxq	8+128(%r14),%rcx,%rbp
920	adcxq	%rcx,%r12
921	adoxq	%rbp,%r13
922
923	mulxq	16+128(%r14),%rcx,%rbp
924	adcxq	%rcx,%r13
925	adoxq	%rbp,%r8
926
927	mulxq	24+128(%r14),%rcx,%rbp
928	leaq	128(%r14),%r14
929	movq	%r12,%rbx
930	adcxq	%rcx,%r8
931	adoxq	%rbp,%r9
932	movq	%r13,%rdx
933	adcxq	%r11,%r9
934	adoxq	%r11,%r10
935	adcq	$0,%r10
936
937
938
939	movq	%r8,%rcx
940	subq	0(%r14),%r12
941	sbbq	8(%r14),%r13
942	sbbq	16(%r14),%r8
943	movq	%r9,%rbp
944	sbbq	24(%r14),%r9
945	sbbq	$0,%r10
946
947	cmovcq	%rbx,%r12
948	cmovcq	%rdx,%r13
949	cmovcq	%rcx,%r8
950	cmovcq	%rbp,%r9
951
952	movq	%r12,0(%rdi)
953	movq	%r13,8(%rdi)
954	movq	%r8,16(%rdi)
955	movq	%r9,24(%rdi)
956
957	movq	0(%rsp),%r15
958.cfi_restore	%r15
959	movq	8(%rsp),%r14
960.cfi_restore	%r14
961	movq	16(%rsp),%r13
962.cfi_restore	%r13
963	movq	24(%rsp),%r12
964.cfi_restore	%r12
965	movq	32(%rsp),%rbx
966.cfi_restore	%rbx
967	movq	40(%rsp),%rbp
968.cfi_restore	%rbp
969	leaq	48(%rsp),%rsp
970.cfi_adjust_cfa_offset	-48
971.Lord_mulx_epilogue:
972	.byte	0xf3,0xc3
973.cfi_endproc
974.size	ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx
975
976.type	ecp_nistz256_ord_sqr_montx,@function
977.align	32
978ecp_nistz256_ord_sqr_montx:
979.cfi_startproc
980.Lecp_nistz256_ord_sqr_montx:
981	pushq	%rbp
982.cfi_adjust_cfa_offset	8
983.cfi_offset	%rbp,-16
984	pushq	%rbx
985.cfi_adjust_cfa_offset	8
986.cfi_offset	%rbx,-24
987	pushq	%r12
988.cfi_adjust_cfa_offset	8
989.cfi_offset	%r12,-32
990	pushq	%r13
991.cfi_adjust_cfa_offset	8
992.cfi_offset	%r13,-40
993	pushq	%r14
994.cfi_adjust_cfa_offset	8
995.cfi_offset	%r14,-48
996	pushq	%r15
997.cfi_adjust_cfa_offset	8
998.cfi_offset	%r15,-56
999.Lord_sqrx_body:
1000
1001	movq	%rdx,%rbx
1002	movq	0(%rsi),%rdx
1003	movq	8(%rsi),%r14
1004	movq	16(%rsi),%r15
1005	movq	24(%rsi),%r8
1006	leaq	.Lord(%rip),%rsi
1007	jmp	.Loop_ord_sqrx
1008
1009.align	32
1010.Loop_ord_sqrx:
1011	mulxq	%r14,%r9,%r10
1012	mulxq	%r15,%rcx,%r11
1013	movq	%rdx,%rax
1014.byte	102,73,15,110,206
1015	mulxq	%r8,%rbp,%r12
1016	movq	%r14,%rdx
1017	addq	%rcx,%r10
1018.byte	102,73,15,110,215
1019	adcq	%rbp,%r11
1020	adcq	$0,%r12
1021	xorq	%r13,%r13
1022
1023	mulxq	%r15,%rcx,%rbp
1024	adcxq	%rcx,%r11
1025	adoxq	%rbp,%r12
1026
1027	mulxq	%r8,%rcx,%rbp
1028	movq	%r15,%rdx
1029	adcxq	%rcx,%r12
1030	adoxq	%rbp,%r13
1031	adcq	$0,%r13
1032
1033	mulxq	%r8,%rcx,%r14
1034	movq	%rax,%rdx
1035.byte	102,73,15,110,216
1036	xorq	%r15,%r15
1037	adcxq	%r9,%r9
1038	adoxq	%rcx,%r13
1039	adcxq	%r10,%r10
1040	adoxq	%r15,%r14
1041
1042
1043	mulxq	%rdx,%r8,%rbp
1044.byte	102,72,15,126,202
1045	adcxq	%r11,%r11
1046	adoxq	%rbp,%r9
1047	adcxq	%r12,%r12
1048	mulxq	%rdx,%rcx,%rax
1049.byte	102,72,15,126,210
1050	adcxq	%r13,%r13
1051	adoxq	%rcx,%r10
1052	adcxq	%r14,%r14
1053	mulxq	%rdx,%rcx,%rbp
1054.byte	0x67
1055.byte	102,72,15,126,218
1056	adoxq	%rax,%r11
1057	adcxq	%r15,%r15
1058	adoxq	%rcx,%r12
1059	adoxq	%rbp,%r13
1060	mulxq	%rdx,%rcx,%rax
1061	adoxq	%rcx,%r14
1062	adoxq	%rax,%r15
1063
1064
1065	movq	%r8,%rdx
1066	mulxq	32(%rsi),%rdx,%rcx
1067
1068	xorq	%rax,%rax
1069	mulxq	0(%rsi),%rcx,%rbp
1070	adcxq	%rcx,%r8
1071	adoxq	%rbp,%r9
1072	mulxq	8(%rsi),%rcx,%rbp
1073	adcxq	%rcx,%r9
1074	adoxq	%rbp,%r10
1075	mulxq	16(%rsi),%rcx,%rbp
1076	adcxq	%rcx,%r10
1077	adoxq	%rbp,%r11
1078	mulxq	24(%rsi),%rcx,%rbp
1079	adcxq	%rcx,%r11
1080	adoxq	%rbp,%r8
1081	adcxq	%rax,%r8
1082
1083
1084	movq	%r9,%rdx
1085	mulxq	32(%rsi),%rdx,%rcx
1086
1087	mulxq	0(%rsi),%rcx,%rbp
1088	adoxq	%rcx,%r9
1089	adcxq	%rbp,%r10
1090	mulxq	8(%rsi),%rcx,%rbp
1091	adoxq	%rcx,%r10
1092	adcxq	%rbp,%r11
1093	mulxq	16(%rsi),%rcx,%rbp
1094	adoxq	%rcx,%r11
1095	adcxq	%rbp,%r8
1096	mulxq	24(%rsi),%rcx,%rbp
1097	adoxq	%rcx,%r8
1098	adcxq	%rbp,%r9
1099	adoxq	%rax,%r9
1100
1101
1102	movq	%r10,%rdx
1103	mulxq	32(%rsi),%rdx,%rcx
1104
1105	mulxq	0(%rsi),%rcx,%rbp
1106	adcxq	%rcx,%r10
1107	adoxq	%rbp,%r11
1108	mulxq	8(%rsi),%rcx,%rbp
1109	adcxq	%rcx,%r11
1110	adoxq	%rbp,%r8
1111	mulxq	16(%rsi),%rcx,%rbp
1112	adcxq	%rcx,%r8
1113	adoxq	%rbp,%r9
1114	mulxq	24(%rsi),%rcx,%rbp
1115	adcxq	%rcx,%r9
1116	adoxq	%rbp,%r10
1117	adcxq	%rax,%r10
1118
1119
1120	movq	%r11,%rdx
1121	mulxq	32(%rsi),%rdx,%rcx
1122
1123	mulxq	0(%rsi),%rcx,%rbp
1124	adoxq	%rcx,%r11
1125	adcxq	%rbp,%r8
1126	mulxq	8(%rsi),%rcx,%rbp
1127	adoxq	%rcx,%r8
1128	adcxq	%rbp,%r9
1129	mulxq	16(%rsi),%rcx,%rbp
1130	adoxq	%rcx,%r9
1131	adcxq	%rbp,%r10
1132	mulxq	24(%rsi),%rcx,%rbp
1133	adoxq	%rcx,%r10
1134	adcxq	%rbp,%r11
1135	adoxq	%rax,%r11
1136
1137
1138	addq	%r8,%r12
1139	adcq	%r13,%r9
1140	movq	%r12,%rdx
1141	adcq	%r14,%r10
1142	adcq	%r15,%r11
1143	movq	%r9,%r14
1144	adcq	$0,%rax
1145
1146
1147	subq	0(%rsi),%r12
1148	movq	%r10,%r15
1149	sbbq	8(%rsi),%r9
1150	sbbq	16(%rsi),%r10
1151	movq	%r11,%r8
1152	sbbq	24(%rsi),%r11
1153	sbbq	$0,%rax
1154
1155	cmovncq	%r12,%rdx
1156	cmovncq	%r9,%r14
1157	cmovncq	%r10,%r15
1158	cmovncq	%r11,%r8
1159
1160	decq	%rbx
1161	jnz	.Loop_ord_sqrx
1162
1163	movq	%rdx,0(%rdi)
1164	movq	%r14,8(%rdi)
1165	pxor	%xmm1,%xmm1
1166	movq	%r15,16(%rdi)
1167	pxor	%xmm2,%xmm2
1168	movq	%r8,24(%rdi)
1169	pxor	%xmm3,%xmm3
1170
1171	movq	0(%rsp),%r15
1172.cfi_restore	%r15
1173	movq	8(%rsp),%r14
1174.cfi_restore	%r14
1175	movq	16(%rsp),%r13
1176.cfi_restore	%r13
1177	movq	24(%rsp),%r12
1178.cfi_restore	%r12
1179	movq	32(%rsp),%rbx
1180.cfi_restore	%rbx
1181	movq	40(%rsp),%rbp
1182.cfi_restore	%rbp
1183	leaq	48(%rsp),%rsp
1184.cfi_adjust_cfa_offset	-48
1185.Lord_sqrx_epilogue:
1186	.byte	0xf3,0xc3
1187.cfi_endproc
1188.size	ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx
1189
1190
1191
1192
1193
1194
1195.globl	ecp_nistz256_mul_mont
1196.hidden ecp_nistz256_mul_mont
1197.type	ecp_nistz256_mul_mont,@function
1198.align	32
1199ecp_nistz256_mul_mont:
1200.cfi_startproc
1201	leaq	OPENSSL_ia32cap_P(%rip),%rcx
1202	movq	8(%rcx),%rcx
1203	andl	$0x80100,%ecx
1204.Lmul_mont:
1205	pushq	%rbp
1206.cfi_adjust_cfa_offset	8
1207.cfi_offset	%rbp,-16
1208	pushq	%rbx
1209.cfi_adjust_cfa_offset	8
1210.cfi_offset	%rbx,-24
1211	pushq	%r12
1212.cfi_adjust_cfa_offset	8
1213.cfi_offset	%r12,-32
1214	pushq	%r13
1215.cfi_adjust_cfa_offset	8
1216.cfi_offset	%r13,-40
1217	pushq	%r14
1218.cfi_adjust_cfa_offset	8
1219.cfi_offset	%r14,-48
1220	pushq	%r15
1221.cfi_adjust_cfa_offset	8
1222.cfi_offset	%r15,-56
1223.Lmul_body:
1224	cmpl	$0x80100,%ecx
1225	je	.Lmul_montx
1226	movq	%rdx,%rbx
1227	movq	0(%rdx),%rax
1228	movq	0(%rsi),%r9
1229	movq	8(%rsi),%r10
1230	movq	16(%rsi),%r11
1231	movq	24(%rsi),%r12
1232
1233	call	__ecp_nistz256_mul_montq
1234	jmp	.Lmul_mont_done
1235
1236.align	32
1237.Lmul_montx:
1238	movq	%rdx,%rbx
1239	movq	0(%rdx),%rdx
1240	movq	0(%rsi),%r9
1241	movq	8(%rsi),%r10
1242	movq	16(%rsi),%r11
1243	movq	24(%rsi),%r12
1244	leaq	-128(%rsi),%rsi
1245
1246	call	__ecp_nistz256_mul_montx
1247.Lmul_mont_done:
1248	movq	0(%rsp),%r15
1249.cfi_restore	%r15
1250	movq	8(%rsp),%r14
1251.cfi_restore	%r14
1252	movq	16(%rsp),%r13
1253.cfi_restore	%r13
1254	movq	24(%rsp),%r12
1255.cfi_restore	%r12
1256	movq	32(%rsp),%rbx
1257.cfi_restore	%rbx
1258	movq	40(%rsp),%rbp
1259.cfi_restore	%rbp
1260	leaq	48(%rsp),%rsp
1261.cfi_adjust_cfa_offset	-48
1262.Lmul_epilogue:
1263	.byte	0xf3,0xc3
1264.cfi_endproc
1265.size	ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
1266
1267.type	__ecp_nistz256_mul_montq,@function
1268.align	32
1269__ecp_nistz256_mul_montq:
1270.cfi_startproc
1271
1272
1273	movq	%rax,%rbp
1274	mulq	%r9
1275	movq	.Lpoly+8(%rip),%r14
1276	movq	%rax,%r8
1277	movq	%rbp,%rax
1278	movq	%rdx,%r9
1279
1280	mulq	%r10
1281	movq	.Lpoly+24(%rip),%r15
1282	addq	%rax,%r9
1283	movq	%rbp,%rax
1284	adcq	$0,%rdx
1285	movq	%rdx,%r10
1286
1287	mulq	%r11
1288	addq	%rax,%r10
1289	movq	%rbp,%rax
1290	adcq	$0,%rdx
1291	movq	%rdx,%r11
1292
1293	mulq	%r12
1294	addq	%rax,%r11
1295	movq	%r8,%rax
1296	adcq	$0,%rdx
1297	xorq	%r13,%r13
1298	movq	%rdx,%r12
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309	movq	%r8,%rbp
1310	shlq	$32,%r8
1311	mulq	%r15
1312	shrq	$32,%rbp
1313	addq	%r8,%r9
1314	adcq	%rbp,%r10
1315	adcq	%rax,%r11
1316	movq	8(%rbx),%rax
1317	adcq	%rdx,%r12
1318	adcq	$0,%r13
1319	xorq	%r8,%r8
1320
1321
1322
1323	movq	%rax,%rbp
1324	mulq	0(%rsi)
1325	addq	%rax,%r9
1326	movq	%rbp,%rax
1327	adcq	$0,%rdx
1328	movq	%rdx,%rcx
1329
1330	mulq	8(%rsi)
1331	addq	%rcx,%r10
1332	adcq	$0,%rdx
1333	addq	%rax,%r10
1334	movq	%rbp,%rax
1335	adcq	$0,%rdx
1336	movq	%rdx,%rcx
1337
1338	mulq	16(%rsi)
1339	addq	%rcx,%r11
1340	adcq	$0,%rdx
1341	addq	%rax,%r11
1342	movq	%rbp,%rax
1343	adcq	$0,%rdx
1344	movq	%rdx,%rcx
1345
1346	mulq	24(%rsi)
1347	addq	%rcx,%r12
1348	adcq	$0,%rdx
1349	addq	%rax,%r12
1350	movq	%r9,%rax
1351	adcq	%rdx,%r13
1352	adcq	$0,%r8
1353
1354
1355
1356	movq	%r9,%rbp
1357	shlq	$32,%r9
1358	mulq	%r15
1359	shrq	$32,%rbp
1360	addq	%r9,%r10
1361	adcq	%rbp,%r11
1362	adcq	%rax,%r12
1363	movq	16(%rbx),%rax
1364	adcq	%rdx,%r13
1365	adcq	$0,%r8
1366	xorq	%r9,%r9
1367
1368
1369
1370	movq	%rax,%rbp
1371	mulq	0(%rsi)
1372	addq	%rax,%r10
1373	movq	%rbp,%rax
1374	adcq	$0,%rdx
1375	movq	%rdx,%rcx
1376
1377	mulq	8(%rsi)
1378	addq	%rcx,%r11
1379	adcq	$0,%rdx
1380	addq	%rax,%r11
1381	movq	%rbp,%rax
1382	adcq	$0,%rdx
1383	movq	%rdx,%rcx
1384
1385	mulq	16(%rsi)
1386	addq	%rcx,%r12
1387	adcq	$0,%rdx
1388	addq	%rax,%r12
1389	movq	%rbp,%rax
1390	adcq	$0,%rdx
1391	movq	%rdx,%rcx
1392
1393	mulq	24(%rsi)
1394	addq	%rcx,%r13
1395	adcq	$0,%rdx
1396	addq	%rax,%r13
1397	movq	%r10,%rax
1398	adcq	%rdx,%r8
1399	adcq	$0,%r9
1400
1401
1402
1403	movq	%r10,%rbp
1404	shlq	$32,%r10
1405	mulq	%r15
1406	shrq	$32,%rbp
1407	addq	%r10,%r11
1408	adcq	%rbp,%r12
1409	adcq	%rax,%r13
1410	movq	24(%rbx),%rax
1411	adcq	%rdx,%r8
1412	adcq	$0,%r9
1413	xorq	%r10,%r10
1414
1415
1416
1417	movq	%rax,%rbp
1418	mulq	0(%rsi)
1419	addq	%rax,%r11
1420	movq	%rbp,%rax
1421	adcq	$0,%rdx
1422	movq	%rdx,%rcx
1423
1424	mulq	8(%rsi)
1425	addq	%rcx,%r12
1426	adcq	$0,%rdx
1427	addq	%rax,%r12
1428	movq	%rbp,%rax
1429	adcq	$0,%rdx
1430	movq	%rdx,%rcx
1431
1432	mulq	16(%rsi)
1433	addq	%rcx,%r13
1434	adcq	$0,%rdx
1435	addq	%rax,%r13
1436	movq	%rbp,%rax
1437	adcq	$0,%rdx
1438	movq	%rdx,%rcx
1439
1440	mulq	24(%rsi)
1441	addq	%rcx,%r8
1442	adcq	$0,%rdx
1443	addq	%rax,%r8
1444	movq	%r11,%rax
1445	adcq	%rdx,%r9
1446	adcq	$0,%r10
1447
1448
1449
1450	movq	%r11,%rbp
1451	shlq	$32,%r11
1452	mulq	%r15
1453	shrq	$32,%rbp
1454	addq	%r11,%r12
1455	adcq	%rbp,%r13
1456	movq	%r12,%rcx
1457	adcq	%rax,%r8
1458	adcq	%rdx,%r9
1459	movq	%r13,%rbp
1460	adcq	$0,%r10
1461
1462
1463
1464	subq	$-1,%r12
1465	movq	%r8,%rbx
1466	sbbq	%r14,%r13
1467	sbbq	$0,%r8
1468	movq	%r9,%rdx
1469	sbbq	%r15,%r9
1470	sbbq	$0,%r10
1471
1472	cmovcq	%rcx,%r12
1473	cmovcq	%rbp,%r13
1474	movq	%r12,0(%rdi)
1475	cmovcq	%rbx,%r8
1476	movq	%r13,8(%rdi)
1477	cmovcq	%rdx,%r9
1478	movq	%r8,16(%rdi)
1479	movq	%r9,24(%rdi)
1480
1481	.byte	0xf3,0xc3
1482.cfi_endproc
1483.size	__ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
1484
1485
1486
1487
1488
1489
1490
1491
1492.globl	ecp_nistz256_sqr_mont
1493.hidden ecp_nistz256_sqr_mont
1494.type	ecp_nistz256_sqr_mont,@function
1495.align	32
1496ecp_nistz256_sqr_mont:
1497.cfi_startproc
1498	leaq	OPENSSL_ia32cap_P(%rip),%rcx
1499	movq	8(%rcx),%rcx
1500	andl	$0x80100,%ecx
1501	pushq	%rbp
1502.cfi_adjust_cfa_offset	8
1503.cfi_offset	%rbp,-16
1504	pushq	%rbx
1505.cfi_adjust_cfa_offset	8
1506.cfi_offset	%rbx,-24
1507	pushq	%r12
1508.cfi_adjust_cfa_offset	8
1509.cfi_offset	%r12,-32
1510	pushq	%r13
1511.cfi_adjust_cfa_offset	8
1512.cfi_offset	%r13,-40
1513	pushq	%r14
1514.cfi_adjust_cfa_offset	8
1515.cfi_offset	%r14,-48
1516	pushq	%r15
1517.cfi_adjust_cfa_offset	8
1518.cfi_offset	%r15,-56
1519.Lsqr_body:
1520	cmpl	$0x80100,%ecx
1521	je	.Lsqr_montx
1522	movq	0(%rsi),%rax
1523	movq	8(%rsi),%r14
1524	movq	16(%rsi),%r15
1525	movq	24(%rsi),%r8
1526
1527	call	__ecp_nistz256_sqr_montq
1528	jmp	.Lsqr_mont_done
1529
1530.align	32
1531.Lsqr_montx:
1532	movq	0(%rsi),%rdx
1533	movq	8(%rsi),%r14
1534	movq	16(%rsi),%r15
1535	movq	24(%rsi),%r8
1536	leaq	-128(%rsi),%rsi
1537
1538	call	__ecp_nistz256_sqr_montx
1539.Lsqr_mont_done:
1540	movq	0(%rsp),%r15
1541.cfi_restore	%r15
1542	movq	8(%rsp),%r14
1543.cfi_restore	%r14
1544	movq	16(%rsp),%r13
1545.cfi_restore	%r13
1546	movq	24(%rsp),%r12
1547.cfi_restore	%r12
1548	movq	32(%rsp),%rbx
1549.cfi_restore	%rbx
1550	movq	40(%rsp),%rbp
1551.cfi_restore	%rbp
1552	leaq	48(%rsp),%rsp
1553.cfi_adjust_cfa_offset	-48
1554.Lsqr_epilogue:
1555	.byte	0xf3,0xc3
1556.cfi_endproc
1557.size	ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
1558
1559.type	__ecp_nistz256_sqr_montq,@function
1560.align	32
1561__ecp_nistz256_sqr_montq:
1562.cfi_startproc
1563	movq	%rax,%r13
1564	mulq	%r14
1565	movq	%rax,%r9
1566	movq	%r15,%rax
1567	movq	%rdx,%r10
1568
1569	mulq	%r13
1570	addq	%rax,%r10
1571	movq	%r8,%rax
1572	adcq	$0,%rdx
1573	movq	%rdx,%r11
1574
1575	mulq	%r13
1576	addq	%rax,%r11
1577	movq	%r15,%rax
1578	adcq	$0,%rdx
1579	movq	%rdx,%r12
1580
1581
1582	mulq	%r14
1583	addq	%rax,%r11
1584	movq	%r8,%rax
1585	adcq	$0,%rdx
1586	movq	%rdx,%rbp
1587
1588	mulq	%r14
1589	addq	%rax,%r12
1590	movq	%r8,%rax
1591	adcq	$0,%rdx
1592	addq	%rbp,%r12
1593	movq	%rdx,%r13
1594	adcq	$0,%r13
1595
1596
1597	mulq	%r15
1598	xorq	%r15,%r15
1599	addq	%rax,%r13
1600	movq	0(%rsi),%rax
1601	movq	%rdx,%r14
1602	adcq	$0,%r14
1603
1604	addq	%r9,%r9
1605	adcq	%r10,%r10
1606	adcq	%r11,%r11
1607	adcq	%r12,%r12
1608	adcq	%r13,%r13
1609	adcq	%r14,%r14
1610	adcq	$0,%r15
1611
1612	mulq	%rax
1613	movq	%rax,%r8
1614	movq	8(%rsi),%rax
1615	movq	%rdx,%rcx
1616
1617	mulq	%rax
1618	addq	%rcx,%r9
1619	adcq	%rax,%r10
1620	movq	16(%rsi),%rax
1621	adcq	$0,%rdx
1622	movq	%rdx,%rcx
1623
1624	mulq	%rax
1625	addq	%rcx,%r11
1626	adcq	%rax,%r12
1627	movq	24(%rsi),%rax
1628	adcq	$0,%rdx
1629	movq	%rdx,%rcx
1630
1631	mulq	%rax
1632	addq	%rcx,%r13
1633	adcq	%rax,%r14
1634	movq	%r8,%rax
1635	adcq	%rdx,%r15
1636
1637	movq	.Lpoly+8(%rip),%rsi
1638	movq	.Lpoly+24(%rip),%rbp
1639
1640
1641
1642
1643	movq	%r8,%rcx
1644	shlq	$32,%r8
1645	mulq	%rbp
1646	shrq	$32,%rcx
1647	addq	%r8,%r9
1648	adcq	%rcx,%r10
1649	adcq	%rax,%r11
1650	movq	%r9,%rax
1651	adcq	$0,%rdx
1652
1653
1654
1655	movq	%r9,%rcx
1656	shlq	$32,%r9
1657	movq	%rdx,%r8
1658	mulq	%rbp
1659	shrq	$32,%rcx
1660	addq	%r9,%r10
1661	adcq	%rcx,%r11
1662	adcq	%rax,%r8
1663	movq	%r10,%rax
1664	adcq	$0,%rdx
1665
1666
1667
1668	movq	%r10,%rcx
1669	shlq	$32,%r10
1670	movq	%rdx,%r9
1671	mulq	%rbp
1672	shrq	$32,%rcx
1673	addq	%r10,%r11
1674	adcq	%rcx,%r8
1675	adcq	%rax,%r9
1676	movq	%r11,%rax
1677	adcq	$0,%rdx
1678
1679
1680
1681	movq	%r11,%rcx
1682	shlq	$32,%r11
1683	movq	%rdx,%r10
1684	mulq	%rbp
1685	shrq	$32,%rcx
1686	addq	%r11,%r8
1687	adcq	%rcx,%r9
1688	adcq	%rax,%r10
1689	adcq	$0,%rdx
1690	xorq	%r11,%r11
1691
1692
1693
1694	addq	%r8,%r12
1695	adcq	%r9,%r13
1696	movq	%r12,%r8
1697	adcq	%r10,%r14
1698	adcq	%rdx,%r15
1699	movq	%r13,%r9
1700	adcq	$0,%r11
1701
1702	subq	$-1,%r12
1703	movq	%r14,%r10
1704	sbbq	%rsi,%r13
1705	sbbq	$0,%r14
1706	movq	%r15,%rcx
1707	sbbq	%rbp,%r15
1708	sbbq	$0,%r11
1709
1710	cmovcq	%r8,%r12
1711	cmovcq	%r9,%r13
1712	movq	%r12,0(%rdi)
1713	cmovcq	%r10,%r14
1714	movq	%r13,8(%rdi)
1715	cmovcq	%rcx,%r15
1716	movq	%r14,16(%rdi)
1717	movq	%r15,24(%rdi)
1718
1719	.byte	0xf3,0xc3
1720.cfi_endproc
1721.size	__ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
1722.type	__ecp_nistz256_mul_montx,@function
1723.align	32
1724__ecp_nistz256_mul_montx:
1725.cfi_startproc
1726
1727
1728	mulxq	%r9,%r8,%r9
1729	mulxq	%r10,%rcx,%r10
1730	movq	$32,%r14
1731	xorq	%r13,%r13
1732	mulxq	%r11,%rbp,%r11
1733	movq	.Lpoly+24(%rip),%r15
1734	adcq	%rcx,%r9
1735	mulxq	%r12,%rcx,%r12
1736	movq	%r8,%rdx
1737	adcq	%rbp,%r10
1738	shlxq	%r14,%r8,%rbp
1739	adcq	%rcx,%r11
1740	shrxq	%r14,%r8,%rcx
1741	adcq	$0,%r12
1742
1743
1744
1745	addq	%rbp,%r9
1746	adcq	%rcx,%r10
1747
1748	mulxq	%r15,%rcx,%rbp
1749	movq	8(%rbx),%rdx
1750	adcq	%rcx,%r11
1751	adcq	%rbp,%r12
1752	adcq	$0,%r13
1753	xorq	%r8,%r8
1754
1755
1756
1757	mulxq	0+128(%rsi),%rcx,%rbp
1758	adcxq	%rcx,%r9
1759	adoxq	%rbp,%r10
1760
1761	mulxq	8+128(%rsi),%rcx,%rbp
1762	adcxq	%rcx,%r10
1763	adoxq	%rbp,%r11
1764
1765	mulxq	16+128(%rsi),%rcx,%rbp
1766	adcxq	%rcx,%r11
1767	adoxq	%rbp,%r12
1768
1769	mulxq	24+128(%rsi),%rcx,%rbp
1770	movq	%r9,%rdx
1771	adcxq	%rcx,%r12
1772	shlxq	%r14,%r9,%rcx
1773	adoxq	%rbp,%r13
1774	shrxq	%r14,%r9,%rbp
1775
1776	adcxq	%r8,%r13
1777	adoxq	%r8,%r8
1778	adcq	$0,%r8
1779
1780
1781
1782	addq	%rcx,%r10
1783	adcq	%rbp,%r11
1784
1785	mulxq	%r15,%rcx,%rbp
1786	movq	16(%rbx),%rdx
1787	adcq	%rcx,%r12
1788	adcq	%rbp,%r13
1789	adcq	$0,%r8
1790	xorq	%r9,%r9
1791
1792
1793
1794	mulxq	0+128(%rsi),%rcx,%rbp
1795	adcxq	%rcx,%r10
1796	adoxq	%rbp,%r11
1797
1798	mulxq	8+128(%rsi),%rcx,%rbp
1799	adcxq	%rcx,%r11
1800	adoxq	%rbp,%r12
1801
1802	mulxq	16+128(%rsi),%rcx,%rbp
1803	adcxq	%rcx,%r12
1804	adoxq	%rbp,%r13
1805
1806	mulxq	24+128(%rsi),%rcx,%rbp
1807	movq	%r10,%rdx
1808	adcxq	%rcx,%r13
1809	shlxq	%r14,%r10,%rcx
1810	adoxq	%rbp,%r8
1811	shrxq	%r14,%r10,%rbp
1812
1813	adcxq	%r9,%r8
1814	adoxq	%r9,%r9
1815	adcq	$0,%r9
1816
1817
1818
1819	addq	%rcx,%r11
1820	adcq	%rbp,%r12
1821
1822	mulxq	%r15,%rcx,%rbp
1823	movq	24(%rbx),%rdx
1824	adcq	%rcx,%r13
1825	adcq	%rbp,%r8
1826	adcq	$0,%r9
1827	xorq	%r10,%r10
1828
1829
1830
1831	mulxq	0+128(%rsi),%rcx,%rbp
1832	adcxq	%rcx,%r11
1833	adoxq	%rbp,%r12
1834
1835	mulxq	8+128(%rsi),%rcx,%rbp
1836	adcxq	%rcx,%r12
1837	adoxq	%rbp,%r13
1838
1839	mulxq	16+128(%rsi),%rcx,%rbp
1840	adcxq	%rcx,%r13
1841	adoxq	%rbp,%r8
1842
1843	mulxq	24+128(%rsi),%rcx,%rbp
1844	movq	%r11,%rdx
1845	adcxq	%rcx,%r8
1846	shlxq	%r14,%r11,%rcx
1847	adoxq	%rbp,%r9
1848	shrxq	%r14,%r11,%rbp
1849
1850	adcxq	%r10,%r9
1851	adoxq	%r10,%r10
1852	adcq	$0,%r10
1853
1854
1855
1856	addq	%rcx,%r12
1857	adcq	%rbp,%r13
1858
1859	mulxq	%r15,%rcx,%rbp
1860	movq	%r12,%rbx
1861	movq	.Lpoly+8(%rip),%r14
1862	adcq	%rcx,%r8
1863	movq	%r13,%rdx
1864	adcq	%rbp,%r9
1865	adcq	$0,%r10
1866
1867
1868
1869	xorl	%eax,%eax
1870	movq	%r8,%rcx
1871	sbbq	$-1,%r12
1872	sbbq	%r14,%r13
1873	sbbq	$0,%r8
1874	movq	%r9,%rbp
1875	sbbq	%r15,%r9
1876	sbbq	$0,%r10
1877
1878	cmovcq	%rbx,%r12
1879	cmovcq	%rdx,%r13
1880	movq	%r12,0(%rdi)
1881	cmovcq	%rcx,%r8
1882	movq	%r13,8(%rdi)
1883	cmovcq	%rbp,%r9
1884	movq	%r8,16(%rdi)
1885	movq	%r9,24(%rdi)
1886
1887	.byte	0xf3,0xc3
1888.cfi_endproc
1889.size	__ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
1890
1891.type	__ecp_nistz256_sqr_montx,@function
1892.align	32
1893__ecp_nistz256_sqr_montx:
1894.cfi_startproc
1895	mulxq	%r14,%r9,%r10
1896	mulxq	%r15,%rcx,%r11
1897	xorl	%eax,%eax
1898	adcq	%rcx,%r10
1899	mulxq	%r8,%rbp,%r12
1900	movq	%r14,%rdx
1901	adcq	%rbp,%r11
1902	adcq	$0,%r12
1903	xorq	%r13,%r13
1904
1905
1906	mulxq	%r15,%rcx,%rbp
1907	adcxq	%rcx,%r11
1908	adoxq	%rbp,%r12
1909
1910	mulxq	%r8,%rcx,%rbp
1911	movq	%r15,%rdx
1912	adcxq	%rcx,%r12
1913	adoxq	%rbp,%r13
1914	adcq	$0,%r13
1915
1916
1917	mulxq	%r8,%rcx,%r14
1918	movq	0+128(%rsi),%rdx
1919	xorq	%r15,%r15
1920	adcxq	%r9,%r9
1921	adoxq	%rcx,%r13
1922	adcxq	%r10,%r10
1923	adoxq	%r15,%r14
1924
1925	mulxq	%rdx,%r8,%rbp
1926	movq	8+128(%rsi),%rdx
1927	adcxq	%r11,%r11
1928	adoxq	%rbp,%r9
1929	adcxq	%r12,%r12
1930	mulxq	%rdx,%rcx,%rax
1931	movq	16+128(%rsi),%rdx
1932	adcxq	%r13,%r13
1933	adoxq	%rcx,%r10
1934	adcxq	%r14,%r14
1935.byte	0x67
1936	mulxq	%rdx,%rcx,%rbp
1937	movq	24+128(%rsi),%rdx
1938	adoxq	%rax,%r11
1939	adcxq	%r15,%r15
1940	adoxq	%rcx,%r12
1941	movq	$32,%rsi
1942	adoxq	%rbp,%r13
1943.byte	0x67,0x67
1944	mulxq	%rdx,%rcx,%rax
1945	movq	.Lpoly+24(%rip),%rdx
1946	adoxq	%rcx,%r14
1947	shlxq	%rsi,%r8,%rcx
1948	adoxq	%rax,%r15
1949	shrxq	%rsi,%r8,%rax
1950	movq	%rdx,%rbp
1951
1952
1953	addq	%rcx,%r9
1954	adcq	%rax,%r10
1955
1956	mulxq	%r8,%rcx,%r8
1957	adcq	%rcx,%r11
1958	shlxq	%rsi,%r9,%rcx
1959	adcq	$0,%r8
1960	shrxq	%rsi,%r9,%rax
1961
1962
1963	addq	%rcx,%r10
1964	adcq	%rax,%r11
1965
1966	mulxq	%r9,%rcx,%r9
1967	adcq	%rcx,%r8
1968	shlxq	%rsi,%r10,%rcx
1969	adcq	$0,%r9
1970	shrxq	%rsi,%r10,%rax
1971
1972
1973	addq	%rcx,%r11
1974	adcq	%rax,%r8
1975
1976	mulxq	%r10,%rcx,%r10
1977	adcq	%rcx,%r9
1978	shlxq	%rsi,%r11,%rcx
1979	adcq	$0,%r10
1980	shrxq	%rsi,%r11,%rax
1981
1982
1983	addq	%rcx,%r8
1984	adcq	%rax,%r9
1985
1986	mulxq	%r11,%rcx,%r11
1987	adcq	%rcx,%r10
1988	adcq	$0,%r11
1989
1990	xorq	%rdx,%rdx
1991	addq	%r8,%r12
1992	movq	.Lpoly+8(%rip),%rsi
1993	adcq	%r9,%r13
1994	movq	%r12,%r8
1995	adcq	%r10,%r14
1996	adcq	%r11,%r15
1997	movq	%r13,%r9
1998	adcq	$0,%rdx
1999
2000	subq	$-1,%r12
2001	movq	%r14,%r10
2002	sbbq	%rsi,%r13
2003	sbbq	$0,%r14
2004	movq	%r15,%r11
2005	sbbq	%rbp,%r15
2006	sbbq	$0,%rdx
2007
2008	cmovcq	%r8,%r12
2009	cmovcq	%r9,%r13
2010	movq	%r12,0(%rdi)
2011	cmovcq	%r10,%r14
2012	movq	%r13,8(%rdi)
2013	cmovcq	%r11,%r15
2014	movq	%r14,16(%rdi)
2015	movq	%r15,24(%rdi)
2016
2017	.byte	0xf3,0xc3
2018.cfi_endproc
2019.size	__ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
2020
2021
2022.globl	ecp_nistz256_select_w5
2023.hidden ecp_nistz256_select_w5
2024.type	ecp_nistz256_select_w5,@function
2025.align	32
2026ecp_nistz256_select_w5:
2027.cfi_startproc
2028	leaq	OPENSSL_ia32cap_P(%rip),%rax
2029	movq	8(%rax),%rax
2030	testl	$32,%eax
2031	jnz	.Lavx2_select_w5
2032	movdqa	.LOne(%rip),%xmm0
2033	movd	%edx,%xmm1
2034
2035	pxor	%xmm2,%xmm2
2036	pxor	%xmm3,%xmm3
2037	pxor	%xmm4,%xmm4
2038	pxor	%xmm5,%xmm5
2039	pxor	%xmm6,%xmm6
2040	pxor	%xmm7,%xmm7
2041
2042	movdqa	%xmm0,%xmm8
2043	pshufd	$0,%xmm1,%xmm1
2044
2045	movq	$16,%rax
2046.Lselect_loop_sse_w5:
2047
2048	movdqa	%xmm8,%xmm15
2049	paddd	%xmm0,%xmm8
2050	pcmpeqd	%xmm1,%xmm15
2051
2052	movdqa	0(%rsi),%xmm9
2053	movdqa	16(%rsi),%xmm10
2054	movdqa	32(%rsi),%xmm11
2055	movdqa	48(%rsi),%xmm12
2056	movdqa	64(%rsi),%xmm13
2057	movdqa	80(%rsi),%xmm14
2058	leaq	96(%rsi),%rsi
2059
2060	pand	%xmm15,%xmm9
2061	pand	%xmm15,%xmm10
2062	por	%xmm9,%xmm2
2063	pand	%xmm15,%xmm11
2064	por	%xmm10,%xmm3
2065	pand	%xmm15,%xmm12
2066	por	%xmm11,%xmm4
2067	pand	%xmm15,%xmm13
2068	por	%xmm12,%xmm5
2069	pand	%xmm15,%xmm14
2070	por	%xmm13,%xmm6
2071	por	%xmm14,%xmm7
2072
2073	decq	%rax
2074	jnz	.Lselect_loop_sse_w5
2075
2076	movdqu	%xmm2,0(%rdi)
2077	movdqu	%xmm3,16(%rdi)
2078	movdqu	%xmm4,32(%rdi)
2079	movdqu	%xmm5,48(%rdi)
2080	movdqu	%xmm6,64(%rdi)
2081	movdqu	%xmm7,80(%rdi)
2082	.byte	0xf3,0xc3
2083.cfi_endproc
2084.LSEH_end_ecp_nistz256_select_w5:
2085.size	ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
2086
2087
2088
2089.globl	ecp_nistz256_select_w7
2090.hidden ecp_nistz256_select_w7
2091.type	ecp_nistz256_select_w7,@function
2092.align	32
2093ecp_nistz256_select_w7:
2094.cfi_startproc
2095	leaq	OPENSSL_ia32cap_P(%rip),%rax
2096	movq	8(%rax),%rax
2097	testl	$32,%eax
2098	jnz	.Lavx2_select_w7
2099	movdqa	.LOne(%rip),%xmm8
2100	movd	%edx,%xmm1
2101
2102	pxor	%xmm2,%xmm2
2103	pxor	%xmm3,%xmm3
2104	pxor	%xmm4,%xmm4
2105	pxor	%xmm5,%xmm5
2106
2107	movdqa	%xmm8,%xmm0
2108	pshufd	$0,%xmm1,%xmm1
2109	movq	$64,%rax
2110
2111.Lselect_loop_sse_w7:
2112	movdqa	%xmm8,%xmm15
2113	paddd	%xmm0,%xmm8
2114	movdqa	0(%rsi),%xmm9
2115	movdqa	16(%rsi),%xmm10
2116	pcmpeqd	%xmm1,%xmm15
2117	movdqa	32(%rsi),%xmm11
2118	movdqa	48(%rsi),%xmm12
2119	leaq	64(%rsi),%rsi
2120
2121	pand	%xmm15,%xmm9
2122	pand	%xmm15,%xmm10
2123	por	%xmm9,%xmm2
2124	pand	%xmm15,%xmm11
2125	por	%xmm10,%xmm3
2126	pand	%xmm15,%xmm12
2127	por	%xmm11,%xmm4
2128	prefetcht0	255(%rsi)
2129	por	%xmm12,%xmm5
2130
2131	decq	%rax
2132	jnz	.Lselect_loop_sse_w7
2133
2134	movdqu	%xmm2,0(%rdi)
2135	movdqu	%xmm3,16(%rdi)
2136	movdqu	%xmm4,32(%rdi)
2137	movdqu	%xmm5,48(%rdi)
2138	.byte	0xf3,0xc3
2139.cfi_endproc
2140.LSEH_end_ecp_nistz256_select_w7:
2141.size	ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
2142
2143
2144.type	ecp_nistz256_avx2_select_w5,@function
2145.align	32
2146ecp_nistz256_avx2_select_w5:
2147.cfi_startproc
2148.Lavx2_select_w5:
2149	vzeroupper
2150	vmovdqa	.LTwo(%rip),%ymm0
2151
2152	vpxor	%ymm2,%ymm2,%ymm2
2153	vpxor	%ymm3,%ymm3,%ymm3
2154	vpxor	%ymm4,%ymm4,%ymm4
2155
2156	vmovdqa	.LOne(%rip),%ymm5
2157	vmovdqa	.LTwo(%rip),%ymm10
2158
2159	vmovd	%edx,%xmm1
2160	vpermd	%ymm1,%ymm2,%ymm1
2161
2162	movq	$8,%rax
2163.Lselect_loop_avx2_w5:
2164
2165	vmovdqa	0(%rsi),%ymm6
2166	vmovdqa	32(%rsi),%ymm7
2167	vmovdqa	64(%rsi),%ymm8
2168
2169	vmovdqa	96(%rsi),%ymm11
2170	vmovdqa	128(%rsi),%ymm12
2171	vmovdqa	160(%rsi),%ymm13
2172
2173	vpcmpeqd	%ymm1,%ymm5,%ymm9
2174	vpcmpeqd	%ymm1,%ymm10,%ymm14
2175
2176	vpaddd	%ymm0,%ymm5,%ymm5
2177	vpaddd	%ymm0,%ymm10,%ymm10
2178	leaq	192(%rsi),%rsi
2179
2180	vpand	%ymm9,%ymm6,%ymm6
2181	vpand	%ymm9,%ymm7,%ymm7
2182	vpand	%ymm9,%ymm8,%ymm8
2183	vpand	%ymm14,%ymm11,%ymm11
2184	vpand	%ymm14,%ymm12,%ymm12
2185	vpand	%ymm14,%ymm13,%ymm13
2186
2187	vpxor	%ymm6,%ymm2,%ymm2
2188	vpxor	%ymm7,%ymm3,%ymm3
2189	vpxor	%ymm8,%ymm4,%ymm4
2190	vpxor	%ymm11,%ymm2,%ymm2
2191	vpxor	%ymm12,%ymm3,%ymm3
2192	vpxor	%ymm13,%ymm4,%ymm4
2193
2194	decq	%rax
2195	jnz	.Lselect_loop_avx2_w5
2196
2197	vmovdqu	%ymm2,0(%rdi)
2198	vmovdqu	%ymm3,32(%rdi)
2199	vmovdqu	%ymm4,64(%rdi)
2200	vzeroupper
2201	.byte	0xf3,0xc3
2202.cfi_endproc
2203.LSEH_end_ecp_nistz256_avx2_select_w5:
2204.size	ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
2205
2206
2207
2208.globl	ecp_nistz256_avx2_select_w7
2209.hidden ecp_nistz256_avx2_select_w7
2210.type	ecp_nistz256_avx2_select_w7,@function
2211.align	32
2212ecp_nistz256_avx2_select_w7:
2213.cfi_startproc
2214.Lavx2_select_w7:
2215	vzeroupper
2216	vmovdqa	.LThree(%rip),%ymm0
2217
2218	vpxor	%ymm2,%ymm2,%ymm2
2219	vpxor	%ymm3,%ymm3,%ymm3
2220
2221	vmovdqa	.LOne(%rip),%ymm4
2222	vmovdqa	.LTwo(%rip),%ymm8
2223	vmovdqa	.LThree(%rip),%ymm12
2224
2225	vmovd	%edx,%xmm1
2226	vpermd	%ymm1,%ymm2,%ymm1
2227
2228
2229	movq	$21,%rax
2230.Lselect_loop_avx2_w7:
2231
2232	vmovdqa	0(%rsi),%ymm5
2233	vmovdqa	32(%rsi),%ymm6
2234
2235	vmovdqa	64(%rsi),%ymm9
2236	vmovdqa	96(%rsi),%ymm10
2237
2238	vmovdqa	128(%rsi),%ymm13
2239	vmovdqa	160(%rsi),%ymm14
2240
2241	vpcmpeqd	%ymm1,%ymm4,%ymm7
2242	vpcmpeqd	%ymm1,%ymm8,%ymm11
2243	vpcmpeqd	%ymm1,%ymm12,%ymm15
2244
2245	vpaddd	%ymm0,%ymm4,%ymm4
2246	vpaddd	%ymm0,%ymm8,%ymm8
2247	vpaddd	%ymm0,%ymm12,%ymm12
2248	leaq	192(%rsi),%rsi
2249
2250	vpand	%ymm7,%ymm5,%ymm5
2251	vpand	%ymm7,%ymm6,%ymm6
2252	vpand	%ymm11,%ymm9,%ymm9
2253	vpand	%ymm11,%ymm10,%ymm10
2254	vpand	%ymm15,%ymm13,%ymm13
2255	vpand	%ymm15,%ymm14,%ymm14
2256
2257	vpxor	%ymm5,%ymm2,%ymm2
2258	vpxor	%ymm6,%ymm3,%ymm3
2259	vpxor	%ymm9,%ymm2,%ymm2
2260	vpxor	%ymm10,%ymm3,%ymm3
2261	vpxor	%ymm13,%ymm2,%ymm2
2262	vpxor	%ymm14,%ymm3,%ymm3
2263
2264	decq	%rax
2265	jnz	.Lselect_loop_avx2_w7
2266
2267
2268	vmovdqa	0(%rsi),%ymm5
2269	vmovdqa	32(%rsi),%ymm6
2270
2271	vpcmpeqd	%ymm1,%ymm4,%ymm7
2272
2273	vpand	%ymm7,%ymm5,%ymm5
2274	vpand	%ymm7,%ymm6,%ymm6
2275
2276	vpxor	%ymm5,%ymm2,%ymm2
2277	vpxor	%ymm6,%ymm3,%ymm3
2278
2279	vmovdqu	%ymm2,0(%rdi)
2280	vmovdqu	%ymm3,32(%rdi)
2281	vzeroupper
2282	.byte	0xf3,0xc3
2283.cfi_endproc
2284.LSEH_end_ecp_nistz256_avx2_select_w7:
2285.size	ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
2286.type	__ecp_nistz256_add_toq,@function
2287.align	32
2288__ecp_nistz256_add_toq:
2289.cfi_startproc
2290	xorq	%r11,%r11
2291	addq	0(%rbx),%r12
2292	adcq	8(%rbx),%r13
2293	movq	%r12,%rax
2294	adcq	16(%rbx),%r8
2295	adcq	24(%rbx),%r9
2296	movq	%r13,%rbp
2297	adcq	$0,%r11
2298
2299	subq	$-1,%r12
2300	movq	%r8,%rcx
2301	sbbq	%r14,%r13
2302	sbbq	$0,%r8
2303	movq	%r9,%r10
2304	sbbq	%r15,%r9
2305	sbbq	$0,%r11
2306
2307	cmovcq	%rax,%r12
2308	cmovcq	%rbp,%r13
2309	movq	%r12,0(%rdi)
2310	cmovcq	%rcx,%r8
2311	movq	%r13,8(%rdi)
2312	cmovcq	%r10,%r9
2313	movq	%r8,16(%rdi)
2314	movq	%r9,24(%rdi)
2315
2316	.byte	0xf3,0xc3
2317.cfi_endproc
2318.size	__ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
2319
2320.type	__ecp_nistz256_sub_fromq,@function
2321.align	32
2322__ecp_nistz256_sub_fromq:
2323.cfi_startproc
2324	subq	0(%rbx),%r12
2325	sbbq	8(%rbx),%r13
2326	movq	%r12,%rax
2327	sbbq	16(%rbx),%r8
2328	sbbq	24(%rbx),%r9
2329	movq	%r13,%rbp
2330	sbbq	%r11,%r11
2331
2332	addq	$-1,%r12
2333	movq	%r8,%rcx
2334	adcq	%r14,%r13
2335	adcq	$0,%r8
2336	movq	%r9,%r10
2337	adcq	%r15,%r9
2338	testq	%r11,%r11
2339
2340	cmovzq	%rax,%r12
2341	cmovzq	%rbp,%r13
2342	movq	%r12,0(%rdi)
2343	cmovzq	%rcx,%r8
2344	movq	%r13,8(%rdi)
2345	cmovzq	%r10,%r9
2346	movq	%r8,16(%rdi)
2347	movq	%r9,24(%rdi)
2348
2349	.byte	0xf3,0xc3
2350.cfi_endproc
2351.size	__ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
2352
2353.type	__ecp_nistz256_subq,@function
2354.align	32
2355__ecp_nistz256_subq:
2356.cfi_startproc
2357	subq	%r12,%rax
2358	sbbq	%r13,%rbp
2359	movq	%rax,%r12
2360	sbbq	%r8,%rcx
2361	sbbq	%r9,%r10
2362	movq	%rbp,%r13
2363	sbbq	%r11,%r11
2364
2365	addq	$-1,%rax
2366	movq	%rcx,%r8
2367	adcq	%r14,%rbp
2368	adcq	$0,%rcx
2369	movq	%r10,%r9
2370	adcq	%r15,%r10
2371	testq	%r11,%r11
2372
2373	cmovnzq	%rax,%r12
2374	cmovnzq	%rbp,%r13
2375	cmovnzq	%rcx,%r8
2376	cmovnzq	%r10,%r9
2377
2378	.byte	0xf3,0xc3
2379.cfi_endproc
2380.size	__ecp_nistz256_subq,.-__ecp_nistz256_subq
2381
2382.type	__ecp_nistz256_mul_by_2q,@function
2383.align	32
2384__ecp_nistz256_mul_by_2q:
2385.cfi_startproc
2386	xorq	%r11,%r11
2387	addq	%r12,%r12
2388	adcq	%r13,%r13
2389	movq	%r12,%rax
2390	adcq	%r8,%r8
2391	adcq	%r9,%r9
2392	movq	%r13,%rbp
2393	adcq	$0,%r11
2394
2395	subq	$-1,%r12
2396	movq	%r8,%rcx
2397	sbbq	%r14,%r13
2398	sbbq	$0,%r8
2399	movq	%r9,%r10
2400	sbbq	%r15,%r9
2401	sbbq	$0,%r11
2402
2403	cmovcq	%rax,%r12
2404	cmovcq	%rbp,%r13
2405	movq	%r12,0(%rdi)
2406	cmovcq	%rcx,%r8
2407	movq	%r13,8(%rdi)
2408	cmovcq	%r10,%r9
2409	movq	%r8,16(%rdi)
2410	movq	%r9,24(%rdi)
2411
2412	.byte	0xf3,0xc3
2413.cfi_endproc
2414.size	__ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
2415.globl	ecp_nistz256_point_double
2416.hidden ecp_nistz256_point_double
2417.type	ecp_nistz256_point_double,@function
2418.align	32
2419ecp_nistz256_point_double:
2420.cfi_startproc
2421	leaq	OPENSSL_ia32cap_P(%rip),%rcx
2422	movq	8(%rcx),%rcx
2423	andl	$0x80100,%ecx
2424	cmpl	$0x80100,%ecx
2425	je	.Lpoint_doublex
2426	pushq	%rbp
2427.cfi_adjust_cfa_offset	8
2428.cfi_offset	%rbp,-16
2429	pushq	%rbx
2430.cfi_adjust_cfa_offset	8
2431.cfi_offset	%rbx,-24
2432	pushq	%r12
2433.cfi_adjust_cfa_offset	8
2434.cfi_offset	%r12,-32
2435	pushq	%r13
2436.cfi_adjust_cfa_offset	8
2437.cfi_offset	%r13,-40
2438	pushq	%r14
2439.cfi_adjust_cfa_offset	8
2440.cfi_offset	%r14,-48
2441	pushq	%r15
2442.cfi_adjust_cfa_offset	8
2443.cfi_offset	%r15,-56
2444	subq	$160+8,%rsp
2445.cfi_adjust_cfa_offset	32*5+8
2446.Lpoint_doubleq_body:
2447
2448.Lpoint_double_shortcutq:
2449	movdqu	0(%rsi),%xmm0
2450	movq	%rsi,%rbx
2451	movdqu	16(%rsi),%xmm1
2452	movq	32+0(%rsi),%r12
2453	movq	32+8(%rsi),%r13
2454	movq	32+16(%rsi),%r8
2455	movq	32+24(%rsi),%r9
2456	movq	.Lpoly+8(%rip),%r14
2457	movq	.Lpoly+24(%rip),%r15
2458	movdqa	%xmm0,96(%rsp)
2459	movdqa	%xmm1,96+16(%rsp)
2460	leaq	32(%rdi),%r10
2461	leaq	64(%rdi),%r11
2462.byte	102,72,15,110,199
2463.byte	102,73,15,110,202
2464.byte	102,73,15,110,211
2465
2466	leaq	0(%rsp),%rdi
2467	call	__ecp_nistz256_mul_by_2q
2468
2469	movq	64+0(%rsi),%rax
2470	movq	64+8(%rsi),%r14
2471	movq	64+16(%rsi),%r15
2472	movq	64+24(%rsi),%r8
2473	leaq	64-0(%rsi),%rsi
2474	leaq	64(%rsp),%rdi
2475	call	__ecp_nistz256_sqr_montq
2476
2477	movq	0+0(%rsp),%rax
2478	movq	8+0(%rsp),%r14
2479	leaq	0+0(%rsp),%rsi
2480	movq	16+0(%rsp),%r15
2481	movq	24+0(%rsp),%r8
2482	leaq	0(%rsp),%rdi
2483	call	__ecp_nistz256_sqr_montq
2484
2485	movq	32(%rbx),%rax
2486	movq	64+0(%rbx),%r9
2487	movq	64+8(%rbx),%r10
2488	movq	64+16(%rbx),%r11
2489	movq	64+24(%rbx),%r12
2490	leaq	64-0(%rbx),%rsi
2491	leaq	32(%rbx),%rbx
2492.byte	102,72,15,126,215
2493	call	__ecp_nistz256_mul_montq
2494	call	__ecp_nistz256_mul_by_2q
2495
2496	movq	96+0(%rsp),%r12
2497	movq	96+8(%rsp),%r13
2498	leaq	64(%rsp),%rbx
2499	movq	96+16(%rsp),%r8
2500	movq	96+24(%rsp),%r9
2501	leaq	32(%rsp),%rdi
2502	call	__ecp_nistz256_add_toq
2503
2504	movq	96+0(%rsp),%r12
2505	movq	96+8(%rsp),%r13
2506	leaq	64(%rsp),%rbx
2507	movq	96+16(%rsp),%r8
2508	movq	96+24(%rsp),%r9
2509	leaq	64(%rsp),%rdi
2510	call	__ecp_nistz256_sub_fromq
2511
2512	movq	0+0(%rsp),%rax
2513	movq	8+0(%rsp),%r14
2514	leaq	0+0(%rsp),%rsi
2515	movq	16+0(%rsp),%r15
2516	movq	24+0(%rsp),%r8
2517.byte	102,72,15,126,207
2518	call	__ecp_nistz256_sqr_montq
2519	xorq	%r9,%r9
2520	movq	%r12,%rax
2521	addq	$-1,%r12
2522	movq	%r13,%r10
2523	adcq	%rsi,%r13
2524	movq	%r14,%rcx
2525	adcq	$0,%r14
2526	movq	%r15,%r8
2527	adcq	%rbp,%r15
2528	adcq	$0,%r9
2529	xorq	%rsi,%rsi
2530	testq	$1,%rax
2531
2532	cmovzq	%rax,%r12
2533	cmovzq	%r10,%r13
2534	cmovzq	%rcx,%r14
2535	cmovzq	%r8,%r15
2536	cmovzq	%rsi,%r9
2537
2538	movq	%r13,%rax
2539	shrq	$1,%r12
2540	shlq	$63,%rax
2541	movq	%r14,%r10
2542	shrq	$1,%r13
2543	orq	%rax,%r12
2544	shlq	$63,%r10
2545	movq	%r15,%rcx
2546	shrq	$1,%r14
2547	orq	%r10,%r13
2548	shlq	$63,%rcx
2549	movq	%r12,0(%rdi)
2550	shrq	$1,%r15
2551	movq	%r13,8(%rdi)
2552	shlq	$63,%r9
2553	orq	%rcx,%r14
2554	orq	%r9,%r15
2555	movq	%r14,16(%rdi)
2556	movq	%r15,24(%rdi)
2557	movq	64(%rsp),%rax
2558	leaq	64(%rsp),%rbx
2559	movq	0+32(%rsp),%r9
2560	movq	8+32(%rsp),%r10
2561	leaq	0+32(%rsp),%rsi
2562	movq	16+32(%rsp),%r11
2563	movq	24+32(%rsp),%r12
2564	leaq	32(%rsp),%rdi
2565	call	__ecp_nistz256_mul_montq
2566
2567	leaq	128(%rsp),%rdi
2568	call	__ecp_nistz256_mul_by_2q
2569
2570	leaq	32(%rsp),%rbx
2571	leaq	32(%rsp),%rdi
2572	call	__ecp_nistz256_add_toq
2573
2574	movq	96(%rsp),%rax
2575	leaq	96(%rsp),%rbx
2576	movq	0+0(%rsp),%r9
2577	movq	8+0(%rsp),%r10
2578	leaq	0+0(%rsp),%rsi
2579	movq	16+0(%rsp),%r11
2580	movq	24+0(%rsp),%r12
2581	leaq	0(%rsp),%rdi
2582	call	__ecp_nistz256_mul_montq
2583
2584	leaq	128(%rsp),%rdi
2585	call	__ecp_nistz256_mul_by_2q
2586
2587	movq	0+32(%rsp),%rax
2588	movq	8+32(%rsp),%r14
2589	leaq	0+32(%rsp),%rsi
2590	movq	16+32(%rsp),%r15
2591	movq	24+32(%rsp),%r8
2592.byte	102,72,15,126,199
2593	call	__ecp_nistz256_sqr_montq
2594
2595	leaq	128(%rsp),%rbx
2596	movq	%r14,%r8
2597	movq	%r15,%r9
2598	movq	%rsi,%r14
2599	movq	%rbp,%r15
2600	call	__ecp_nistz256_sub_fromq
2601
2602	movq	0+0(%rsp),%rax
2603	movq	0+8(%rsp),%rbp
2604	movq	0+16(%rsp),%rcx
2605	movq	0+24(%rsp),%r10
2606	leaq	0(%rsp),%rdi
2607	call	__ecp_nistz256_subq
2608
2609	movq	32(%rsp),%rax
2610	leaq	32(%rsp),%rbx
2611	movq	%r12,%r14
2612	xorl	%ecx,%ecx
2613	movq	%r12,0+0(%rsp)
2614	movq	%r13,%r10
2615	movq	%r13,0+8(%rsp)
2616	cmovzq	%r8,%r11
2617	movq	%r8,0+16(%rsp)
2618	leaq	0-0(%rsp),%rsi
2619	cmovzq	%r9,%r12
2620	movq	%r9,0+24(%rsp)
2621	movq	%r14,%r9
2622	leaq	0(%rsp),%rdi
2623	call	__ecp_nistz256_mul_montq
2624
2625.byte	102,72,15,126,203
2626.byte	102,72,15,126,207
2627	call	__ecp_nistz256_sub_fromq
2628
2629	leaq	160+56(%rsp),%rsi
2630.cfi_def_cfa	%rsi,8
2631	movq	-48(%rsi),%r15
2632.cfi_restore	%r15
2633	movq	-40(%rsi),%r14
2634.cfi_restore	%r14
2635	movq	-32(%rsi),%r13
2636.cfi_restore	%r13
2637	movq	-24(%rsi),%r12
2638.cfi_restore	%r12
2639	movq	-16(%rsi),%rbx
2640.cfi_restore	%rbx
2641	movq	-8(%rsi),%rbp
2642.cfi_restore	%rbp
2643	leaq	(%rsi),%rsp
2644.cfi_def_cfa_register	%rsp
2645.Lpoint_doubleq_epilogue:
2646	.byte	0xf3,0xc3
2647.cfi_endproc
2648.size	ecp_nistz256_point_double,.-ecp_nistz256_point_double
2649.globl	ecp_nistz256_point_add
2650.hidden ecp_nistz256_point_add
2651.type	ecp_nistz256_point_add,@function
2652.align	32
2653ecp_nistz256_point_add:
2654.cfi_startproc
2655	leaq	OPENSSL_ia32cap_P(%rip),%rcx
2656	movq	8(%rcx),%rcx
2657	andl	$0x80100,%ecx
2658	cmpl	$0x80100,%ecx
2659	je	.Lpoint_addx
2660	pushq	%rbp
2661.cfi_adjust_cfa_offset	8
2662.cfi_offset	%rbp,-16
2663	pushq	%rbx
2664.cfi_adjust_cfa_offset	8
2665.cfi_offset	%rbx,-24
2666	pushq	%r12
2667.cfi_adjust_cfa_offset	8
2668.cfi_offset	%r12,-32
2669	pushq	%r13
2670.cfi_adjust_cfa_offset	8
2671.cfi_offset	%r13,-40
2672	pushq	%r14
2673.cfi_adjust_cfa_offset	8
2674.cfi_offset	%r14,-48
2675	pushq	%r15
2676.cfi_adjust_cfa_offset	8
2677.cfi_offset	%r15,-56
2678	subq	$576+8,%rsp
2679.cfi_adjust_cfa_offset	32*18+8
2680.Lpoint_addq_body:
2681
2682	movdqu	0(%rsi),%xmm0
2683	movdqu	16(%rsi),%xmm1
2684	movdqu	32(%rsi),%xmm2
2685	movdqu	48(%rsi),%xmm3
2686	movdqu	64(%rsi),%xmm4
2687	movdqu	80(%rsi),%xmm5
2688	movq	%rsi,%rbx
2689	movq	%rdx,%rsi
2690	movdqa	%xmm0,384(%rsp)
2691	movdqa	%xmm1,384+16(%rsp)
2692	movdqa	%xmm2,416(%rsp)
2693	movdqa	%xmm3,416+16(%rsp)
2694	movdqa	%xmm4,448(%rsp)
2695	movdqa	%xmm5,448+16(%rsp)
2696	por	%xmm4,%xmm5
2697
2698	movdqu	0(%rsi),%xmm0
2699	pshufd	$0xb1,%xmm5,%xmm3
2700	movdqu	16(%rsi),%xmm1
2701	movdqu	32(%rsi),%xmm2
2702	por	%xmm3,%xmm5
2703	movdqu	48(%rsi),%xmm3
2704	movq	64+0(%rsi),%rax
2705	movq	64+8(%rsi),%r14
2706	movq	64+16(%rsi),%r15
2707	movq	64+24(%rsi),%r8
2708	movdqa	%xmm0,480(%rsp)
2709	pshufd	$0x1e,%xmm5,%xmm4
2710	movdqa	%xmm1,480+16(%rsp)
2711	movdqu	64(%rsi),%xmm0
2712	movdqu	80(%rsi),%xmm1
2713	movdqa	%xmm2,512(%rsp)
2714	movdqa	%xmm3,512+16(%rsp)
2715	por	%xmm4,%xmm5
2716	pxor	%xmm4,%xmm4
2717	por	%xmm0,%xmm1
2718.byte	102,72,15,110,199
2719
2720	leaq	64-0(%rsi),%rsi
2721	movq	%rax,544+0(%rsp)
2722	movq	%r14,544+8(%rsp)
2723	movq	%r15,544+16(%rsp)
2724	movq	%r8,544+24(%rsp)
2725	leaq	96(%rsp),%rdi
2726	call	__ecp_nistz256_sqr_montq
2727
2728	pcmpeqd	%xmm4,%xmm5
2729	pshufd	$0xb1,%xmm1,%xmm4
2730	por	%xmm1,%xmm4
2731	pshufd	$0,%xmm5,%xmm5
2732	pshufd	$0x1e,%xmm4,%xmm3
2733	por	%xmm3,%xmm4
2734	pxor	%xmm3,%xmm3
2735	pcmpeqd	%xmm3,%xmm4
2736	pshufd	$0,%xmm4,%xmm4
2737	movq	64+0(%rbx),%rax
2738	movq	64+8(%rbx),%r14
2739	movq	64+16(%rbx),%r15
2740	movq	64+24(%rbx),%r8
2741.byte	102,72,15,110,203
2742
2743	leaq	64-0(%rbx),%rsi
2744	leaq	32(%rsp),%rdi
2745	call	__ecp_nistz256_sqr_montq
2746
2747	movq	544(%rsp),%rax
2748	leaq	544(%rsp),%rbx
2749	movq	0+96(%rsp),%r9
2750	movq	8+96(%rsp),%r10
2751	leaq	0+96(%rsp),%rsi
2752	movq	16+96(%rsp),%r11
2753	movq	24+96(%rsp),%r12
2754	leaq	224(%rsp),%rdi
2755	call	__ecp_nistz256_mul_montq
2756
2757	movq	448(%rsp),%rax
2758	leaq	448(%rsp),%rbx
2759	movq	0+32(%rsp),%r9
2760	movq	8+32(%rsp),%r10
2761	leaq	0+32(%rsp),%rsi
2762	movq	16+32(%rsp),%r11
2763	movq	24+32(%rsp),%r12
2764	leaq	256(%rsp),%rdi
2765	call	__ecp_nistz256_mul_montq
2766
2767	movq	416(%rsp),%rax
2768	leaq	416(%rsp),%rbx
2769	movq	0+224(%rsp),%r9
2770	movq	8+224(%rsp),%r10
2771	leaq	0+224(%rsp),%rsi
2772	movq	16+224(%rsp),%r11
2773	movq	24+224(%rsp),%r12
2774	leaq	224(%rsp),%rdi
2775	call	__ecp_nistz256_mul_montq
2776
2777	movq	512(%rsp),%rax
2778	leaq	512(%rsp),%rbx
2779	movq	0+256(%rsp),%r9
2780	movq	8+256(%rsp),%r10
2781	leaq	0+256(%rsp),%rsi
2782	movq	16+256(%rsp),%r11
2783	movq	24+256(%rsp),%r12
2784	leaq	256(%rsp),%rdi
2785	call	__ecp_nistz256_mul_montq
2786
2787	leaq	224(%rsp),%rbx
2788	leaq	64(%rsp),%rdi
2789	call	__ecp_nistz256_sub_fromq
2790
2791	orq	%r13,%r12
2792	movdqa	%xmm4,%xmm2
2793	orq	%r8,%r12
2794	orq	%r9,%r12
2795	por	%xmm5,%xmm2
2796.byte	102,73,15,110,220
2797
2798	movq	384(%rsp),%rax
2799	leaq	384(%rsp),%rbx
2800	movq	0+96(%rsp),%r9
2801	movq	8+96(%rsp),%r10
2802	leaq	0+96(%rsp),%rsi
2803	movq	16+96(%rsp),%r11
2804	movq	24+96(%rsp),%r12
2805	leaq	160(%rsp),%rdi
2806	call	__ecp_nistz256_mul_montq
2807
2808	movq	480(%rsp),%rax
2809	leaq	480(%rsp),%rbx
2810	movq	0+32(%rsp),%r9
2811	movq	8+32(%rsp),%r10
2812	leaq	0+32(%rsp),%rsi
2813	movq	16+32(%rsp),%r11
2814	movq	24+32(%rsp),%r12
2815	leaq	192(%rsp),%rdi
2816	call	__ecp_nistz256_mul_montq
2817
2818	leaq	160(%rsp),%rbx
2819	leaq	0(%rsp),%rdi
2820	call	__ecp_nistz256_sub_fromq
2821
2822	orq	%r13,%r12
2823	orq	%r8,%r12
2824	orq	%r9,%r12
2825
2826.byte	102,73,15,126,208
2827.byte	102,73,15,126,217
2828	orq	%r8,%r12
2829.byte	0x3e
2830	jnz	.Ladd_proceedq
2831
2832
2833
2834	testq	%r9,%r9
2835	jz	.Ladd_doubleq
2836
2837
2838
2839
2840
2841
2842.byte	102,72,15,126,199
2843	pxor	%xmm0,%xmm0
2844	movdqu	%xmm0,0(%rdi)
2845	movdqu	%xmm0,16(%rdi)
2846	movdqu	%xmm0,32(%rdi)
2847	movdqu	%xmm0,48(%rdi)
2848	movdqu	%xmm0,64(%rdi)
2849	movdqu	%xmm0,80(%rdi)
2850	jmp	.Ladd_doneq
2851
2852.align	32
2853.Ladd_doubleq:
2854.byte	102,72,15,126,206
2855.byte	102,72,15,126,199
2856	addq	$416,%rsp
2857.cfi_adjust_cfa_offset	-416
2858	jmp	.Lpoint_double_shortcutq
2859.cfi_adjust_cfa_offset	416
2860
2861.align	32
2862.Ladd_proceedq:
2863	movq	0+64(%rsp),%rax
2864	movq	8+64(%rsp),%r14
2865	leaq	0+64(%rsp),%rsi
2866	movq	16+64(%rsp),%r15
2867	movq	24+64(%rsp),%r8
2868	leaq	96(%rsp),%rdi
2869	call	__ecp_nistz256_sqr_montq
2870
2871	movq	448(%rsp),%rax
2872	leaq	448(%rsp),%rbx
2873	movq	0+0(%rsp),%r9
2874	movq	8+0(%rsp),%r10
2875	leaq	0+0(%rsp),%rsi
2876	movq	16+0(%rsp),%r11
2877	movq	24+0(%rsp),%r12
2878	leaq	352(%rsp),%rdi
2879	call	__ecp_nistz256_mul_montq
2880
2881	movq	0+0(%rsp),%rax
2882	movq	8+0(%rsp),%r14
2883	leaq	0+0(%rsp),%rsi
2884	movq	16+0(%rsp),%r15
2885	movq	24+0(%rsp),%r8
2886	leaq	32(%rsp),%rdi
2887	call	__ecp_nistz256_sqr_montq
2888
2889	movq	544(%rsp),%rax
2890	leaq	544(%rsp),%rbx
2891	movq	0+352(%rsp),%r9
2892	movq	8+352(%rsp),%r10
2893	leaq	0+352(%rsp),%rsi
2894	movq	16+352(%rsp),%r11
2895	movq	24+352(%rsp),%r12
2896	leaq	352(%rsp),%rdi
2897	call	__ecp_nistz256_mul_montq
2898
2899	movq	0(%rsp),%rax
2900	leaq	0(%rsp),%rbx
2901	movq	0+32(%rsp),%r9
2902	movq	8+32(%rsp),%r10
2903	leaq	0+32(%rsp),%rsi
2904	movq	16+32(%rsp),%r11
2905	movq	24+32(%rsp),%r12
2906	leaq	128(%rsp),%rdi
2907	call	__ecp_nistz256_mul_montq
2908
2909	movq	160(%rsp),%rax
2910	leaq	160(%rsp),%rbx
2911	movq	0+32(%rsp),%r9
2912	movq	8+32(%rsp),%r10
2913	leaq	0+32(%rsp),%rsi
2914	movq	16+32(%rsp),%r11
2915	movq	24+32(%rsp),%r12
2916	leaq	192(%rsp),%rdi
2917	call	__ecp_nistz256_mul_montq
2918
2919
2920
2921
2922	xorq	%r11,%r11
2923	addq	%r12,%r12
2924	leaq	96(%rsp),%rsi
2925	adcq	%r13,%r13
2926	movq	%r12,%rax
2927	adcq	%r8,%r8
2928	adcq	%r9,%r9
2929	movq	%r13,%rbp
2930	adcq	$0,%r11
2931
2932	subq	$-1,%r12
2933	movq	%r8,%rcx
2934	sbbq	%r14,%r13
2935	sbbq	$0,%r8
2936	movq	%r9,%r10
2937	sbbq	%r15,%r9
2938	sbbq	$0,%r11
2939
2940	cmovcq	%rax,%r12
2941	movq	0(%rsi),%rax
2942	cmovcq	%rbp,%r13
2943	movq	8(%rsi),%rbp
2944	cmovcq	%rcx,%r8
2945	movq	16(%rsi),%rcx
2946	cmovcq	%r10,%r9
2947	movq	24(%rsi),%r10
2948
2949	call	__ecp_nistz256_subq
2950
2951	leaq	128(%rsp),%rbx
2952	leaq	288(%rsp),%rdi
2953	call	__ecp_nistz256_sub_fromq
2954
2955	movq	192+0(%rsp),%rax
2956	movq	192+8(%rsp),%rbp
2957	movq	192+16(%rsp),%rcx
2958	movq	192+24(%rsp),%r10
2959	leaq	320(%rsp),%rdi
2960
2961	call	__ecp_nistz256_subq
2962
2963	movq	%r12,0(%rdi)
2964	movq	%r13,8(%rdi)
2965	movq	%r8,16(%rdi)
2966	movq	%r9,24(%rdi)
2967	movq	128(%rsp),%rax
2968	leaq	128(%rsp),%rbx
2969	movq	0+224(%rsp),%r9
2970	movq	8+224(%rsp),%r10
2971	leaq	0+224(%rsp),%rsi
2972	movq	16+224(%rsp),%r11
2973	movq	24+224(%rsp),%r12
2974	leaq	256(%rsp),%rdi
2975	call	__ecp_nistz256_mul_montq
2976
2977	movq	320(%rsp),%rax
2978	leaq	320(%rsp),%rbx
2979	movq	0+64(%rsp),%r9
2980	movq	8+64(%rsp),%r10
2981	leaq	0+64(%rsp),%rsi
2982	movq	16+64(%rsp),%r11
2983	movq	24+64(%rsp),%r12
2984	leaq	320(%rsp),%rdi
2985	call	__ecp_nistz256_mul_montq
2986
2987	leaq	256(%rsp),%rbx
2988	leaq	320(%rsp),%rdi
2989	call	__ecp_nistz256_sub_fromq
2990
2991.byte	102,72,15,126,199
2992
2993	movdqa	%xmm5,%xmm0
2994	movdqa	%xmm5,%xmm1
2995	pandn	352(%rsp),%xmm0
2996	movdqa	%xmm5,%xmm2
2997	pandn	352+16(%rsp),%xmm1
2998	movdqa	%xmm5,%xmm3
2999	pand	544(%rsp),%xmm2
3000	pand	544+16(%rsp),%xmm3
3001	por	%xmm0,%xmm2
3002	por	%xmm1,%xmm3
3003
3004	movdqa	%xmm4,%xmm0
3005	movdqa	%xmm4,%xmm1
3006	pandn	%xmm2,%xmm0
3007	movdqa	%xmm4,%xmm2
3008	pandn	%xmm3,%xmm1
3009	movdqa	%xmm4,%xmm3
3010	pand	448(%rsp),%xmm2
3011	pand	448+16(%rsp),%xmm3
3012	por	%xmm0,%xmm2
3013	por	%xmm1,%xmm3
3014	movdqu	%xmm2,64(%rdi)
3015	movdqu	%xmm3,80(%rdi)
3016
3017	movdqa	%xmm5,%xmm0
3018	movdqa	%xmm5,%xmm1
3019	pandn	288(%rsp),%xmm0
3020	movdqa	%xmm5,%xmm2
3021	pandn	288+16(%rsp),%xmm1
3022	movdqa	%xmm5,%xmm3
3023	pand	480(%rsp),%xmm2
3024	pand	480+16(%rsp),%xmm3
3025	por	%xmm0,%xmm2
3026	por	%xmm1,%xmm3
3027
3028	movdqa	%xmm4,%xmm0
3029	movdqa	%xmm4,%xmm1
3030	pandn	%xmm2,%xmm0
3031	movdqa	%xmm4,%xmm2
3032	pandn	%xmm3,%xmm1
3033	movdqa	%xmm4,%xmm3
3034	pand	384(%rsp),%xmm2
3035	pand	384+16(%rsp),%xmm3
3036	por	%xmm0,%xmm2
3037	por	%xmm1,%xmm3
3038	movdqu	%xmm2,0(%rdi)
3039	movdqu	%xmm3,16(%rdi)
3040
3041	movdqa	%xmm5,%xmm0
3042	movdqa	%xmm5,%xmm1
3043	pandn	320(%rsp),%xmm0
3044	movdqa	%xmm5,%xmm2
3045	pandn	320+16(%rsp),%xmm1
3046	movdqa	%xmm5,%xmm3
3047	pand	512(%rsp),%xmm2
3048	pand	512+16(%rsp),%xmm3
3049	por	%xmm0,%xmm2
3050	por	%xmm1,%xmm3
3051
3052	movdqa	%xmm4,%xmm0
3053	movdqa	%xmm4,%xmm1
3054	pandn	%xmm2,%xmm0
3055	movdqa	%xmm4,%xmm2
3056	pandn	%xmm3,%xmm1
3057	movdqa	%xmm4,%xmm3
3058	pand	416(%rsp),%xmm2
3059	pand	416+16(%rsp),%xmm3
3060	por	%xmm0,%xmm2
3061	por	%xmm1,%xmm3
3062	movdqu	%xmm2,32(%rdi)
3063	movdqu	%xmm3,48(%rdi)
3064
3065.Ladd_doneq:
3066	leaq	576+56(%rsp),%rsi
3067.cfi_def_cfa	%rsi,8
3068	movq	-48(%rsi),%r15
3069.cfi_restore	%r15
3070	movq	-40(%rsi),%r14
3071.cfi_restore	%r14
3072	movq	-32(%rsi),%r13
3073.cfi_restore	%r13
3074	movq	-24(%rsi),%r12
3075.cfi_restore	%r12
3076	movq	-16(%rsi),%rbx
3077.cfi_restore	%rbx
3078	movq	-8(%rsi),%rbp
3079.cfi_restore	%rbp
3080	leaq	(%rsi),%rsp
3081.cfi_def_cfa_register	%rsp
3082.Lpoint_addq_epilogue:
3083	.byte	0xf3,0xc3
3084.cfi_endproc
3085.size	ecp_nistz256_point_add,.-ecp_nistz256_point_add
3086.globl	ecp_nistz256_point_add_affine
3087.hidden ecp_nistz256_point_add_affine
3088.type	ecp_nistz256_point_add_affine,@function
3089.align	32
3090ecp_nistz256_point_add_affine:
3091.cfi_startproc
3092	leaq	OPENSSL_ia32cap_P(%rip),%rcx
3093	movq	8(%rcx),%rcx
3094	andl	$0x80100,%ecx
3095	cmpl	$0x80100,%ecx
3096	je	.Lpoint_add_affinex
3097	pushq	%rbp
3098.cfi_adjust_cfa_offset	8
3099.cfi_offset	%rbp,-16
3100	pushq	%rbx
3101.cfi_adjust_cfa_offset	8
3102.cfi_offset	%rbx,-24
3103	pushq	%r12
3104.cfi_adjust_cfa_offset	8
3105.cfi_offset	%r12,-32
3106	pushq	%r13
3107.cfi_adjust_cfa_offset	8
3108.cfi_offset	%r13,-40
3109	pushq	%r14
3110.cfi_adjust_cfa_offset	8
3111.cfi_offset	%r14,-48
3112	pushq	%r15
3113.cfi_adjust_cfa_offset	8
3114.cfi_offset	%r15,-56
3115	subq	$480+8,%rsp
3116.cfi_adjust_cfa_offset	32*15+8
3117.Ladd_affineq_body:
3118
3119	movdqu	0(%rsi),%xmm0
3120	movq	%rdx,%rbx
3121	movdqu	16(%rsi),%xmm1
3122	movdqu	32(%rsi),%xmm2
3123	movdqu	48(%rsi),%xmm3
3124	movdqu	64(%rsi),%xmm4
3125	movdqu	80(%rsi),%xmm5
3126	movq	64+0(%rsi),%rax
3127	movq	64+8(%rsi),%r14
3128	movq	64+16(%rsi),%r15
3129	movq	64+24(%rsi),%r8
3130	movdqa	%xmm0,320(%rsp)
3131	movdqa	%xmm1,320+16(%rsp)
3132	movdqa	%xmm2,352(%rsp)
3133	movdqa	%xmm3,352+16(%rsp)
3134	movdqa	%xmm4,384(%rsp)
3135	movdqa	%xmm5,384+16(%rsp)
3136	por	%xmm4,%xmm5
3137
3138	movdqu	0(%rbx),%xmm0
3139	pshufd	$0xb1,%xmm5,%xmm3
3140	movdqu	16(%rbx),%xmm1
3141	movdqu	32(%rbx),%xmm2
3142	por	%xmm3,%xmm5
3143	movdqu	48(%rbx),%xmm3
3144	movdqa	%xmm0,416(%rsp)
3145	pshufd	$0x1e,%xmm5,%xmm4
3146	movdqa	%xmm1,416+16(%rsp)
3147	por	%xmm0,%xmm1
3148.byte	102,72,15,110,199
3149	movdqa	%xmm2,448(%rsp)
3150	movdqa	%xmm3,448+16(%rsp)
3151	por	%xmm2,%xmm3
3152	por	%xmm4,%xmm5
3153	pxor	%xmm4,%xmm4
3154	por	%xmm1,%xmm3
3155
3156	leaq	64-0(%rsi),%rsi
3157	leaq	32(%rsp),%rdi
3158	call	__ecp_nistz256_sqr_montq
3159
3160	pcmpeqd	%xmm4,%xmm5
3161	pshufd	$0xb1,%xmm3,%xmm4
3162	movq	0(%rbx),%rax
3163
3164	movq	%r12,%r9
3165	por	%xmm3,%xmm4
3166	pshufd	$0,%xmm5,%xmm5
3167	pshufd	$0x1e,%xmm4,%xmm3
3168	movq	%r13,%r10
3169	por	%xmm3,%xmm4
3170	pxor	%xmm3,%xmm3
3171	movq	%r14,%r11
3172	pcmpeqd	%xmm3,%xmm4
3173	pshufd	$0,%xmm4,%xmm4
3174
3175	leaq	32-0(%rsp),%rsi
3176	movq	%r15,%r12
3177	leaq	0(%rsp),%rdi
3178	call	__ecp_nistz256_mul_montq
3179
3180	leaq	320(%rsp),%rbx
3181	leaq	64(%rsp),%rdi
3182	call	__ecp_nistz256_sub_fromq
3183
3184	movq	384(%rsp),%rax
3185	leaq	384(%rsp),%rbx
3186	movq	0+32(%rsp),%r9
3187	movq	8+32(%rsp),%r10
3188	leaq	0+32(%rsp),%rsi
3189	movq	16+32(%rsp),%r11
3190	movq	24+32(%rsp),%r12
3191	leaq	32(%rsp),%rdi
3192	call	__ecp_nistz256_mul_montq
3193
3194	movq	384(%rsp),%rax
3195	leaq	384(%rsp),%rbx
3196	movq	0+64(%rsp),%r9
3197	movq	8+64(%rsp),%r10
3198	leaq	0+64(%rsp),%rsi
3199	movq	16+64(%rsp),%r11
3200	movq	24+64(%rsp),%r12
3201	leaq	288(%rsp),%rdi
3202	call	__ecp_nistz256_mul_montq
3203
3204	movq	448(%rsp),%rax
3205	leaq	448(%rsp),%rbx
3206	movq	0+32(%rsp),%r9
3207	movq	8+32(%rsp),%r10
3208	leaq	0+32(%rsp),%rsi
3209	movq	16+32(%rsp),%r11
3210	movq	24+32(%rsp),%r12
3211	leaq	32(%rsp),%rdi
3212	call	__ecp_nistz256_mul_montq
3213
3214	leaq	352(%rsp),%rbx
3215	leaq	96(%rsp),%rdi
3216	call	__ecp_nistz256_sub_fromq
3217
3218	movq	0+64(%rsp),%rax
3219	movq	8+64(%rsp),%r14
3220	leaq	0+64(%rsp),%rsi
3221	movq	16+64(%rsp),%r15
3222	movq	24+64(%rsp),%r8
3223	leaq	128(%rsp),%rdi
3224	call	__ecp_nistz256_sqr_montq
3225
3226	movq	0+96(%rsp),%rax
3227	movq	8+96(%rsp),%r14
3228	leaq	0+96(%rsp),%rsi
3229	movq	16+96(%rsp),%r15
3230	movq	24+96(%rsp),%r8
3231	leaq	192(%rsp),%rdi
3232	call	__ecp_nistz256_sqr_montq
3233
3234	movq	128(%rsp),%rax
3235	leaq	128(%rsp),%rbx
3236	movq	0+64(%rsp),%r9
3237	movq	8+64(%rsp),%r10
3238	leaq	0+64(%rsp),%rsi
3239	movq	16+64(%rsp),%r11
3240	movq	24+64(%rsp),%r12
3241	leaq	160(%rsp),%rdi
3242	call	__ecp_nistz256_mul_montq
3243
3244	movq	320(%rsp),%rax
3245	leaq	320(%rsp),%rbx
3246	movq	0+128(%rsp),%r9
3247	movq	8+128(%rsp),%r10
3248	leaq	0+128(%rsp),%rsi
3249	movq	16+128(%rsp),%r11
3250	movq	24+128(%rsp),%r12
3251	leaq	0(%rsp),%rdi
3252	call	__ecp_nistz256_mul_montq
3253
3254
3255
3256
3257	xorq	%r11,%r11
3258	addq	%r12,%r12
3259	leaq	192(%rsp),%rsi
3260	adcq	%r13,%r13
3261	movq	%r12,%rax
3262	adcq	%r8,%r8
3263	adcq	%r9,%r9
3264	movq	%r13,%rbp
3265	adcq	$0,%r11
3266
3267	subq	$-1,%r12
3268	movq	%r8,%rcx
3269	sbbq	%r14,%r13
3270	sbbq	$0,%r8
3271	movq	%r9,%r10
3272	sbbq	%r15,%r9
3273	sbbq	$0,%r11
3274
3275	cmovcq	%rax,%r12
3276	movq	0(%rsi),%rax
3277	cmovcq	%rbp,%r13
3278	movq	8(%rsi),%rbp
3279	cmovcq	%rcx,%r8
3280	movq	16(%rsi),%rcx
3281	cmovcq	%r10,%r9
3282	movq	24(%rsi),%r10
3283
3284	call	__ecp_nistz256_subq
3285
3286	leaq	160(%rsp),%rbx
3287	leaq	224(%rsp),%rdi
3288	call	__ecp_nistz256_sub_fromq
3289
3290	movq	0+0(%rsp),%rax
3291	movq	0+8(%rsp),%rbp
3292	movq	0+16(%rsp),%rcx
3293	movq	0+24(%rsp),%r10
3294	leaq	64(%rsp),%rdi
3295
3296	call	__ecp_nistz256_subq
3297
3298	movq	%r12,0(%rdi)
3299	movq	%r13,8(%rdi)
3300	movq	%r8,16(%rdi)
3301	movq	%r9,24(%rdi)
3302	movq	352(%rsp),%rax
3303	leaq	352(%rsp),%rbx
3304	movq	0+160(%rsp),%r9
3305	movq	8+160(%rsp),%r10
3306	leaq	0+160(%rsp),%rsi
3307	movq	16+160(%rsp),%r11
3308	movq	24+160(%rsp),%r12
3309	leaq	32(%rsp),%rdi
3310	call	__ecp_nistz256_mul_montq
3311
3312	movq	96(%rsp),%rax
3313	leaq	96(%rsp),%rbx
3314	movq	0+64(%rsp),%r9
3315	movq	8+64(%rsp),%r10
3316	leaq	0+64(%rsp),%rsi
3317	movq	16+64(%rsp),%r11
3318	movq	24+64(%rsp),%r12
3319	leaq	64(%rsp),%rdi
3320	call	__ecp_nistz256_mul_montq
3321
3322	leaq	32(%rsp),%rbx
3323	leaq	256(%rsp),%rdi
3324	call	__ecp_nistz256_sub_fromq
3325
3326.byte	102,72,15,126,199
3327
3328	movdqa	%xmm5,%xmm0
3329	movdqa	%xmm5,%xmm1
3330	pandn	288(%rsp),%xmm0
3331	movdqa	%xmm5,%xmm2
3332	pandn	288+16(%rsp),%xmm1
3333	movdqa	%xmm5,%xmm3
3334	pand	.LONE_mont(%rip),%xmm2
3335	pand	.LONE_mont+16(%rip),%xmm3
3336	por	%xmm0,%xmm2
3337	por	%xmm1,%xmm3
3338
3339	movdqa	%xmm4,%xmm0
3340	movdqa	%xmm4,%xmm1
3341	pandn	%xmm2,%xmm0
3342	movdqa	%xmm4,%xmm2
3343	pandn	%xmm3,%xmm1
3344	movdqa	%xmm4,%xmm3
3345	pand	384(%rsp),%xmm2
3346	pand	384+16(%rsp),%xmm3
3347	por	%xmm0,%xmm2
3348	por	%xmm1,%xmm3
3349	movdqu	%xmm2,64(%rdi)
3350	movdqu	%xmm3,80(%rdi)
3351
3352	movdqa	%xmm5,%xmm0
3353	movdqa	%xmm5,%xmm1
3354	pandn	224(%rsp),%xmm0
3355	movdqa	%xmm5,%xmm2
3356	pandn	224+16(%rsp),%xmm1
3357	movdqa	%xmm5,%xmm3
3358	pand	416(%rsp),%xmm2
3359	pand	416+16(%rsp),%xmm3
3360	por	%xmm0,%xmm2
3361	por	%xmm1,%xmm3
3362
3363	movdqa	%xmm4,%xmm0
3364	movdqa	%xmm4,%xmm1
3365	pandn	%xmm2,%xmm0
3366	movdqa	%xmm4,%xmm2
3367	pandn	%xmm3,%xmm1
3368	movdqa	%xmm4,%xmm3
3369	pand	320(%rsp),%xmm2
3370	pand	320+16(%rsp),%xmm3
3371	por	%xmm0,%xmm2
3372	por	%xmm1,%xmm3
3373	movdqu	%xmm2,0(%rdi)
3374	movdqu	%xmm3,16(%rdi)
3375
3376	movdqa	%xmm5,%xmm0
3377	movdqa	%xmm5,%xmm1
3378	pandn	256(%rsp),%xmm0
3379	movdqa	%xmm5,%xmm2
3380	pandn	256+16(%rsp),%xmm1
3381	movdqa	%xmm5,%xmm3
3382	pand	448(%rsp),%xmm2
3383	pand	448+16(%rsp),%xmm3
3384	por	%xmm0,%xmm2
3385	por	%xmm1,%xmm3
3386
3387	movdqa	%xmm4,%xmm0
3388	movdqa	%xmm4,%xmm1
3389	pandn	%xmm2,%xmm0
3390	movdqa	%xmm4,%xmm2
3391	pandn	%xmm3,%xmm1
3392	movdqa	%xmm4,%xmm3
3393	pand	352(%rsp),%xmm2
3394	pand	352+16(%rsp),%xmm3
3395	por	%xmm0,%xmm2
3396	por	%xmm1,%xmm3
3397	movdqu	%xmm2,32(%rdi)
3398	movdqu	%xmm3,48(%rdi)
3399
3400	leaq	480+56(%rsp),%rsi
3401.cfi_def_cfa	%rsi,8
3402	movq	-48(%rsi),%r15
3403.cfi_restore	%r15
3404	movq	-40(%rsi),%r14
3405.cfi_restore	%r14
3406	movq	-32(%rsi),%r13
3407.cfi_restore	%r13
3408	movq	-24(%rsi),%r12
3409.cfi_restore	%r12
3410	movq	-16(%rsi),%rbx
3411.cfi_restore	%rbx
3412	movq	-8(%rsi),%rbp
3413.cfi_restore	%rbp
3414	leaq	(%rsi),%rsp
3415.cfi_def_cfa_register	%rsp
3416.Ladd_affineq_epilogue:
3417	.byte	0xf3,0xc3
3418.cfi_endproc
3419.size	ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
3420.type	__ecp_nistz256_add_tox,@function
3421.align	32
3422__ecp_nistz256_add_tox:
3423.cfi_startproc
3424	xorq	%r11,%r11
3425	adcq	0(%rbx),%r12
3426	adcq	8(%rbx),%r13
3427	movq	%r12,%rax
3428	adcq	16(%rbx),%r8
3429	adcq	24(%rbx),%r9
3430	movq	%r13,%rbp
3431	adcq	$0,%r11
3432
3433	xorq	%r10,%r10
3434	sbbq	$-1,%r12
3435	movq	%r8,%rcx
3436	sbbq	%r14,%r13
3437	sbbq	$0,%r8
3438	movq	%r9,%r10
3439	sbbq	%r15,%r9
3440	sbbq	$0,%r11
3441
3442	cmovcq	%rax,%r12
3443	cmovcq	%rbp,%r13
3444	movq	%r12,0(%rdi)
3445	cmovcq	%rcx,%r8
3446	movq	%r13,8(%rdi)
3447	cmovcq	%r10,%r9
3448	movq	%r8,16(%rdi)
3449	movq	%r9,24(%rdi)
3450
3451	.byte	0xf3,0xc3
3452.cfi_endproc
3453.size	__ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
3454
3455.type	__ecp_nistz256_sub_fromx,@function
3456.align	32
3457__ecp_nistz256_sub_fromx:
3458.cfi_startproc
3459	xorq	%r11,%r11
3460	sbbq	0(%rbx),%r12
3461	sbbq	8(%rbx),%r13
3462	movq	%r12,%rax
3463	sbbq	16(%rbx),%r8
3464	sbbq	24(%rbx),%r9
3465	movq	%r13,%rbp
3466	sbbq	$0,%r11
3467
3468	xorq	%r10,%r10
3469	adcq	$-1,%r12
3470	movq	%r8,%rcx
3471	adcq	%r14,%r13
3472	adcq	$0,%r8
3473	movq	%r9,%r10
3474	adcq	%r15,%r9
3475
3476	btq	$0,%r11
3477	cmovncq	%rax,%r12
3478	cmovncq	%rbp,%r13
3479	movq	%r12,0(%rdi)
3480	cmovncq	%rcx,%r8
3481	movq	%r13,8(%rdi)
3482	cmovncq	%r10,%r9
3483	movq	%r8,16(%rdi)
3484	movq	%r9,24(%rdi)
3485
3486	.byte	0xf3,0xc3
3487.cfi_endproc
3488.size	__ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
3489
3490.type	__ecp_nistz256_subx,@function
3491.align	32
3492__ecp_nistz256_subx:
3493.cfi_startproc
3494	xorq	%r11,%r11
3495	sbbq	%r12,%rax
3496	sbbq	%r13,%rbp
3497	movq	%rax,%r12
3498	sbbq	%r8,%rcx
3499	sbbq	%r9,%r10
3500	movq	%rbp,%r13
3501	sbbq	$0,%r11
3502
3503	xorq	%r9,%r9
3504	adcq	$-1,%rax
3505	movq	%rcx,%r8
3506	adcq	%r14,%rbp
3507	adcq	$0,%rcx
3508	movq	%r10,%r9
3509	adcq	%r15,%r10
3510
3511	btq	$0,%r11
3512	cmovcq	%rax,%r12
3513	cmovcq	%rbp,%r13
3514	cmovcq	%rcx,%r8
3515	cmovcq	%r10,%r9
3516
3517	.byte	0xf3,0xc3
3518.cfi_endproc
3519.size	__ecp_nistz256_subx,.-__ecp_nistz256_subx
3520
3521.type	__ecp_nistz256_mul_by_2x,@function
3522.align	32
3523__ecp_nistz256_mul_by_2x:
3524.cfi_startproc
3525	xorq	%r11,%r11
3526	adcq	%r12,%r12
3527	adcq	%r13,%r13
3528	movq	%r12,%rax
3529	adcq	%r8,%r8
3530	adcq	%r9,%r9
3531	movq	%r13,%rbp
3532	adcq	$0,%r11
3533
3534	xorq	%r10,%r10
3535	sbbq	$-1,%r12
3536	movq	%r8,%rcx
3537	sbbq	%r14,%r13
3538	sbbq	$0,%r8
3539	movq	%r9,%r10
3540	sbbq	%r15,%r9
3541	sbbq	$0,%r11
3542
3543	cmovcq	%rax,%r12
3544	cmovcq	%rbp,%r13
3545	movq	%r12,0(%rdi)
3546	cmovcq	%rcx,%r8
3547	movq	%r13,8(%rdi)
3548	cmovcq	%r10,%r9
3549	movq	%r8,16(%rdi)
3550	movq	%r9,24(%rdi)
3551
3552	.byte	0xf3,0xc3
3553.cfi_endproc
3554.size	__ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
3555.type	ecp_nistz256_point_doublex,@function
3556.align	32
3557ecp_nistz256_point_doublex:
3558.cfi_startproc
3559.Lpoint_doublex:
3560	pushq	%rbp
3561.cfi_adjust_cfa_offset	8
3562.cfi_offset	%rbp,-16
3563	pushq	%rbx
3564.cfi_adjust_cfa_offset	8
3565.cfi_offset	%rbx,-24
3566	pushq	%r12
3567.cfi_adjust_cfa_offset	8
3568.cfi_offset	%r12,-32
3569	pushq	%r13
3570.cfi_adjust_cfa_offset	8
3571.cfi_offset	%r13,-40
3572	pushq	%r14
3573.cfi_adjust_cfa_offset	8
3574.cfi_offset	%r14,-48
3575	pushq	%r15
3576.cfi_adjust_cfa_offset	8
3577.cfi_offset	%r15,-56
3578	subq	$160+8,%rsp
3579.cfi_adjust_cfa_offset	32*5+8
3580.Lpoint_doublex_body:
3581
3582.Lpoint_double_shortcutx:
3583	movdqu	0(%rsi),%xmm0
3584	movq	%rsi,%rbx
3585	movdqu	16(%rsi),%xmm1
3586	movq	32+0(%rsi),%r12
3587	movq	32+8(%rsi),%r13
3588	movq	32+16(%rsi),%r8
3589	movq	32+24(%rsi),%r9
3590	movq	.Lpoly+8(%rip),%r14
3591	movq	.Lpoly+24(%rip),%r15
3592	movdqa	%xmm0,96(%rsp)
3593	movdqa	%xmm1,96+16(%rsp)
3594	leaq	32(%rdi),%r10
3595	leaq	64(%rdi),%r11
3596.byte	102,72,15,110,199
3597.byte	102,73,15,110,202
3598.byte	102,73,15,110,211
3599
3600	leaq	0(%rsp),%rdi
3601	call	__ecp_nistz256_mul_by_2x
3602
3603	movq	64+0(%rsi),%rdx
3604	movq	64+8(%rsi),%r14
3605	movq	64+16(%rsi),%r15
3606	movq	64+24(%rsi),%r8
3607	leaq	64-128(%rsi),%rsi
3608	leaq	64(%rsp),%rdi
3609	call	__ecp_nistz256_sqr_montx
3610
3611	movq	0+0(%rsp),%rdx
3612	movq	8+0(%rsp),%r14
3613	leaq	-128+0(%rsp),%rsi
3614	movq	16+0(%rsp),%r15
3615	movq	24+0(%rsp),%r8
3616	leaq	0(%rsp),%rdi
3617	call	__ecp_nistz256_sqr_montx
3618
3619	movq	32(%rbx),%rdx
3620	movq	64+0(%rbx),%r9
3621	movq	64+8(%rbx),%r10
3622	movq	64+16(%rbx),%r11
3623	movq	64+24(%rbx),%r12
3624	leaq	64-128(%rbx),%rsi
3625	leaq	32(%rbx),%rbx
3626.byte	102,72,15,126,215
3627	call	__ecp_nistz256_mul_montx
3628	call	__ecp_nistz256_mul_by_2x
3629
3630	movq	96+0(%rsp),%r12
3631	movq	96+8(%rsp),%r13
3632	leaq	64(%rsp),%rbx
3633	movq	96+16(%rsp),%r8
3634	movq	96+24(%rsp),%r9
3635	leaq	32(%rsp),%rdi
3636	call	__ecp_nistz256_add_tox
3637
3638	movq	96+0(%rsp),%r12
3639	movq	96+8(%rsp),%r13
3640	leaq	64(%rsp),%rbx
3641	movq	96+16(%rsp),%r8
3642	movq	96+24(%rsp),%r9
3643	leaq	64(%rsp),%rdi
3644	call	__ecp_nistz256_sub_fromx
3645
3646	movq	0+0(%rsp),%rdx
3647	movq	8+0(%rsp),%r14
3648	leaq	-128+0(%rsp),%rsi
3649	movq	16+0(%rsp),%r15
3650	movq	24+0(%rsp),%r8
3651.byte	102,72,15,126,207
3652	call	__ecp_nistz256_sqr_montx
3653	xorq	%r9,%r9
3654	movq	%r12,%rax
3655	addq	$-1,%r12
3656	movq	%r13,%r10
3657	adcq	%rsi,%r13
3658	movq	%r14,%rcx
3659	adcq	$0,%r14
3660	movq	%r15,%r8
3661	adcq	%rbp,%r15
3662	adcq	$0,%r9
3663	xorq	%rsi,%rsi
3664	testq	$1,%rax
3665
3666	cmovzq	%rax,%r12
3667	cmovzq	%r10,%r13
3668	cmovzq	%rcx,%r14
3669	cmovzq	%r8,%r15
3670	cmovzq	%rsi,%r9
3671
3672	movq	%r13,%rax
3673	shrq	$1,%r12
3674	shlq	$63,%rax
3675	movq	%r14,%r10
3676	shrq	$1,%r13
3677	orq	%rax,%r12
3678	shlq	$63,%r10
3679	movq	%r15,%rcx
3680	shrq	$1,%r14
3681	orq	%r10,%r13
3682	shlq	$63,%rcx
3683	movq	%r12,0(%rdi)
3684	shrq	$1,%r15
3685	movq	%r13,8(%rdi)
3686	shlq	$63,%r9
3687	orq	%rcx,%r14
3688	orq	%r9,%r15
3689	movq	%r14,16(%rdi)
3690	movq	%r15,24(%rdi)
3691	movq	64(%rsp),%rdx
3692	leaq	64(%rsp),%rbx
3693	movq	0+32(%rsp),%r9
3694	movq	8+32(%rsp),%r10
3695	leaq	-128+32(%rsp),%rsi
3696	movq	16+32(%rsp),%r11
3697	movq	24+32(%rsp),%r12
3698	leaq	32(%rsp),%rdi
3699	call	__ecp_nistz256_mul_montx
3700
3701	leaq	128(%rsp),%rdi
3702	call	__ecp_nistz256_mul_by_2x
3703
3704	leaq	32(%rsp),%rbx
3705	leaq	32(%rsp),%rdi
3706	call	__ecp_nistz256_add_tox
3707
3708	movq	96(%rsp),%rdx
3709	leaq	96(%rsp),%rbx
3710	movq	0+0(%rsp),%r9
3711	movq	8+0(%rsp),%r10
3712	leaq	-128+0(%rsp),%rsi
3713	movq	16+0(%rsp),%r11
3714	movq	24+0(%rsp),%r12
3715	leaq	0(%rsp),%rdi
3716	call	__ecp_nistz256_mul_montx
3717
3718	leaq	128(%rsp),%rdi
3719	call	__ecp_nistz256_mul_by_2x
3720
3721	movq	0+32(%rsp),%rdx
3722	movq	8+32(%rsp),%r14
3723	leaq	-128+32(%rsp),%rsi
3724	movq	16+32(%rsp),%r15
3725	movq	24+32(%rsp),%r8
3726.byte	102,72,15,126,199
3727	call	__ecp_nistz256_sqr_montx
3728
3729	leaq	128(%rsp),%rbx
3730	movq	%r14,%r8
3731	movq	%r15,%r9
3732	movq	%rsi,%r14
3733	movq	%rbp,%r15
3734	call	__ecp_nistz256_sub_fromx
3735
3736	movq	0+0(%rsp),%rax
3737	movq	0+8(%rsp),%rbp
3738	movq	0+16(%rsp),%rcx
3739	movq	0+24(%rsp),%r10
3740	leaq	0(%rsp),%rdi
3741	call	__ecp_nistz256_subx
3742
3743	movq	32(%rsp),%rdx
3744	leaq	32(%rsp),%rbx
3745	movq	%r12,%r14
3746	xorl	%ecx,%ecx
3747	movq	%r12,0+0(%rsp)
3748	movq	%r13,%r10
3749	movq	%r13,0+8(%rsp)
3750	cmovzq	%r8,%r11
3751	movq	%r8,0+16(%rsp)
3752	leaq	0-128(%rsp),%rsi
3753	cmovzq	%r9,%r12
3754	movq	%r9,0+24(%rsp)
3755	movq	%r14,%r9
3756	leaq	0(%rsp),%rdi
3757	call	__ecp_nistz256_mul_montx
3758
3759.byte	102,72,15,126,203
3760.byte	102,72,15,126,207
3761	call	__ecp_nistz256_sub_fromx
3762
3763	leaq	160+56(%rsp),%rsi
3764.cfi_def_cfa	%rsi,8
3765	movq	-48(%rsi),%r15
3766.cfi_restore	%r15
3767	movq	-40(%rsi),%r14
3768.cfi_restore	%r14
3769	movq	-32(%rsi),%r13
3770.cfi_restore	%r13
3771	movq	-24(%rsi),%r12
3772.cfi_restore	%r12
3773	movq	-16(%rsi),%rbx
3774.cfi_restore	%rbx
3775	movq	-8(%rsi),%rbp
3776.cfi_restore	%rbp
3777	leaq	(%rsi),%rsp
3778.cfi_def_cfa_register	%rsp
3779.Lpoint_doublex_epilogue:
3780	.byte	0xf3,0xc3
3781.cfi_endproc
3782.size	ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
3783.type	ecp_nistz256_point_addx,@function
3784.align	32
3785ecp_nistz256_point_addx:
3786.cfi_startproc
3787.Lpoint_addx:
3788	pushq	%rbp
3789.cfi_adjust_cfa_offset	8
3790.cfi_offset	%rbp,-16
3791	pushq	%rbx
3792.cfi_adjust_cfa_offset	8
3793.cfi_offset	%rbx,-24
3794	pushq	%r12
3795.cfi_adjust_cfa_offset	8
3796.cfi_offset	%r12,-32
3797	pushq	%r13
3798.cfi_adjust_cfa_offset	8
3799.cfi_offset	%r13,-40
3800	pushq	%r14
3801.cfi_adjust_cfa_offset	8
3802.cfi_offset	%r14,-48
3803	pushq	%r15
3804.cfi_adjust_cfa_offset	8
3805.cfi_offset	%r15,-56
3806	subq	$576+8,%rsp
3807.cfi_adjust_cfa_offset	32*18+8
3808.Lpoint_addx_body:
3809
3810	movdqu	0(%rsi),%xmm0
3811	movdqu	16(%rsi),%xmm1
3812	movdqu	32(%rsi),%xmm2
3813	movdqu	48(%rsi),%xmm3
3814	movdqu	64(%rsi),%xmm4
3815	movdqu	80(%rsi),%xmm5
3816	movq	%rsi,%rbx
3817	movq	%rdx,%rsi
3818	movdqa	%xmm0,384(%rsp)
3819	movdqa	%xmm1,384+16(%rsp)
3820	movdqa	%xmm2,416(%rsp)
3821	movdqa	%xmm3,416+16(%rsp)
3822	movdqa	%xmm4,448(%rsp)
3823	movdqa	%xmm5,448+16(%rsp)
3824	por	%xmm4,%xmm5
3825
3826	movdqu	0(%rsi),%xmm0
3827	pshufd	$0xb1,%xmm5,%xmm3
3828	movdqu	16(%rsi),%xmm1
3829	movdqu	32(%rsi),%xmm2
3830	por	%xmm3,%xmm5
3831	movdqu	48(%rsi),%xmm3
3832	movq	64+0(%rsi),%rdx
3833	movq	64+8(%rsi),%r14
3834	movq	64+16(%rsi),%r15
3835	movq	64+24(%rsi),%r8
3836	movdqa	%xmm0,480(%rsp)
3837	pshufd	$0x1e,%xmm5,%xmm4
3838	movdqa	%xmm1,480+16(%rsp)
3839	movdqu	64(%rsi),%xmm0
3840	movdqu	80(%rsi),%xmm1
3841	movdqa	%xmm2,512(%rsp)
3842	movdqa	%xmm3,512+16(%rsp)
3843	por	%xmm4,%xmm5
3844	pxor	%xmm4,%xmm4
3845	por	%xmm0,%xmm1
3846.byte	102,72,15,110,199
3847
3848	leaq	64-128(%rsi),%rsi
3849	movq	%rdx,544+0(%rsp)
3850	movq	%r14,544+8(%rsp)
3851	movq	%r15,544+16(%rsp)
3852	movq	%r8,544+24(%rsp)
3853	leaq	96(%rsp),%rdi
3854	call	__ecp_nistz256_sqr_montx
3855
3856	pcmpeqd	%xmm4,%xmm5
3857	pshufd	$0xb1,%xmm1,%xmm4
3858	por	%xmm1,%xmm4
3859	pshufd	$0,%xmm5,%xmm5
3860	pshufd	$0x1e,%xmm4,%xmm3
3861	por	%xmm3,%xmm4
3862	pxor	%xmm3,%xmm3
3863	pcmpeqd	%xmm3,%xmm4
3864	pshufd	$0,%xmm4,%xmm4
3865	movq	64+0(%rbx),%rdx
3866	movq	64+8(%rbx),%r14
3867	movq	64+16(%rbx),%r15
3868	movq	64+24(%rbx),%r8
3869.byte	102,72,15,110,203
3870
3871	leaq	64-128(%rbx),%rsi
3872	leaq	32(%rsp),%rdi
3873	call	__ecp_nistz256_sqr_montx
3874
3875	movq	544(%rsp),%rdx
3876	leaq	544(%rsp),%rbx
3877	movq	0+96(%rsp),%r9
3878	movq	8+96(%rsp),%r10
3879	leaq	-128+96(%rsp),%rsi
3880	movq	16+96(%rsp),%r11
3881	movq	24+96(%rsp),%r12
3882	leaq	224(%rsp),%rdi
3883	call	__ecp_nistz256_mul_montx
3884
3885	movq	448(%rsp),%rdx
3886	leaq	448(%rsp),%rbx
3887	movq	0+32(%rsp),%r9
3888	movq	8+32(%rsp),%r10
3889	leaq	-128+32(%rsp),%rsi
3890	movq	16+32(%rsp),%r11
3891	movq	24+32(%rsp),%r12
3892	leaq	256(%rsp),%rdi
3893	call	__ecp_nistz256_mul_montx
3894
3895	movq	416(%rsp),%rdx
3896	leaq	416(%rsp),%rbx
3897	movq	0+224(%rsp),%r9
3898	movq	8+224(%rsp),%r10
3899	leaq	-128+224(%rsp),%rsi
3900	movq	16+224(%rsp),%r11
3901	movq	24+224(%rsp),%r12
3902	leaq	224(%rsp),%rdi
3903	call	__ecp_nistz256_mul_montx
3904
3905	movq	512(%rsp),%rdx
3906	leaq	512(%rsp),%rbx
3907	movq	0+256(%rsp),%r9
3908	movq	8+256(%rsp),%r10
3909	leaq	-128+256(%rsp),%rsi
3910	movq	16+256(%rsp),%r11
3911	movq	24+256(%rsp),%r12
3912	leaq	256(%rsp),%rdi
3913	call	__ecp_nistz256_mul_montx
3914
3915	leaq	224(%rsp),%rbx
3916	leaq	64(%rsp),%rdi
3917	call	__ecp_nistz256_sub_fromx
3918
3919	orq	%r13,%r12
3920	movdqa	%xmm4,%xmm2
3921	orq	%r8,%r12
3922	orq	%r9,%r12
3923	por	%xmm5,%xmm2
3924.byte	102,73,15,110,220
3925
3926	movq	384(%rsp),%rdx
3927	leaq	384(%rsp),%rbx
3928	movq	0+96(%rsp),%r9
3929	movq	8+96(%rsp),%r10
3930	leaq	-128+96(%rsp),%rsi
3931	movq	16+96(%rsp),%r11
3932	movq	24+96(%rsp),%r12
3933	leaq	160(%rsp),%rdi
3934	call	__ecp_nistz256_mul_montx
3935
3936	movq	480(%rsp),%rdx
3937	leaq	480(%rsp),%rbx
3938	movq	0+32(%rsp),%r9
3939	movq	8+32(%rsp),%r10
3940	leaq	-128+32(%rsp),%rsi
3941	movq	16+32(%rsp),%r11
3942	movq	24+32(%rsp),%r12
3943	leaq	192(%rsp),%rdi
3944	call	__ecp_nistz256_mul_montx
3945
3946	leaq	160(%rsp),%rbx
3947	leaq	0(%rsp),%rdi
3948	call	__ecp_nistz256_sub_fromx
3949
3950	orq	%r13,%r12
3951	orq	%r8,%r12
3952	orq	%r9,%r12
3953
3954.byte	102,73,15,126,208
3955.byte	102,73,15,126,217
3956	orq	%r8,%r12
3957.byte	0x3e
3958	jnz	.Ladd_proceedx
3959
3960
3961
3962	testq	%r9,%r9
3963	jz	.Ladd_doublex
3964
3965
3966
3967
3968
3969
3970.byte	102,72,15,126,199
3971	pxor	%xmm0,%xmm0
3972	movdqu	%xmm0,0(%rdi)
3973	movdqu	%xmm0,16(%rdi)
3974	movdqu	%xmm0,32(%rdi)
3975	movdqu	%xmm0,48(%rdi)
3976	movdqu	%xmm0,64(%rdi)
3977	movdqu	%xmm0,80(%rdi)
3978	jmp	.Ladd_donex
3979
3980.align	32
3981.Ladd_doublex:
3982.byte	102,72,15,126,206
3983.byte	102,72,15,126,199
3984	addq	$416,%rsp
3985.cfi_adjust_cfa_offset	-416
3986	jmp	.Lpoint_double_shortcutx
3987.cfi_adjust_cfa_offset	416
3988
3989.align	32
3990.Ladd_proceedx:
3991	movq	0+64(%rsp),%rdx
3992	movq	8+64(%rsp),%r14
3993	leaq	-128+64(%rsp),%rsi
3994	movq	16+64(%rsp),%r15
3995	movq	24+64(%rsp),%r8
3996	leaq	96(%rsp),%rdi
3997	call	__ecp_nistz256_sqr_montx
3998
3999	movq	448(%rsp),%rdx
4000	leaq	448(%rsp),%rbx
4001	movq	0+0(%rsp),%r9
4002	movq	8+0(%rsp),%r10
4003	leaq	-128+0(%rsp),%rsi
4004	movq	16+0(%rsp),%r11
4005	movq	24+0(%rsp),%r12
4006	leaq	352(%rsp),%rdi
4007	call	__ecp_nistz256_mul_montx
4008
4009	movq	0+0(%rsp),%rdx
4010	movq	8+0(%rsp),%r14
4011	leaq	-128+0(%rsp),%rsi
4012	movq	16+0(%rsp),%r15
4013	movq	24+0(%rsp),%r8
4014	leaq	32(%rsp),%rdi
4015	call	__ecp_nistz256_sqr_montx
4016
4017	movq	544(%rsp),%rdx
4018	leaq	544(%rsp),%rbx
4019	movq	0+352(%rsp),%r9
4020	movq	8+352(%rsp),%r10
4021	leaq	-128+352(%rsp),%rsi
4022	movq	16+352(%rsp),%r11
4023	movq	24+352(%rsp),%r12
4024	leaq	352(%rsp),%rdi
4025	call	__ecp_nistz256_mul_montx
4026
4027	movq	0(%rsp),%rdx
4028	leaq	0(%rsp),%rbx
4029	movq	0+32(%rsp),%r9
4030	movq	8+32(%rsp),%r10
4031	leaq	-128+32(%rsp),%rsi
4032	movq	16+32(%rsp),%r11
4033	movq	24+32(%rsp),%r12
4034	leaq	128(%rsp),%rdi
4035	call	__ecp_nistz256_mul_montx
4036
4037	movq	160(%rsp),%rdx
4038	leaq	160(%rsp),%rbx
4039	movq	0+32(%rsp),%r9
4040	movq	8+32(%rsp),%r10
4041	leaq	-128+32(%rsp),%rsi
4042	movq	16+32(%rsp),%r11
4043	movq	24+32(%rsp),%r12
4044	leaq	192(%rsp),%rdi
4045	call	__ecp_nistz256_mul_montx
4046
4047
4048
4049
4050	xorq	%r11,%r11
4051	addq	%r12,%r12
4052	leaq	96(%rsp),%rsi
4053	adcq	%r13,%r13
4054	movq	%r12,%rax
4055	adcq	%r8,%r8
4056	adcq	%r9,%r9
4057	movq	%r13,%rbp
4058	adcq	$0,%r11
4059
4060	subq	$-1,%r12
4061	movq	%r8,%rcx
4062	sbbq	%r14,%r13
4063	sbbq	$0,%r8
4064	movq	%r9,%r10
4065	sbbq	%r15,%r9
4066	sbbq	$0,%r11
4067
4068	cmovcq	%rax,%r12
4069	movq	0(%rsi),%rax
4070	cmovcq	%rbp,%r13
4071	movq	8(%rsi),%rbp
4072	cmovcq	%rcx,%r8
4073	movq	16(%rsi),%rcx
4074	cmovcq	%r10,%r9
4075	movq	24(%rsi),%r10
4076
4077	call	__ecp_nistz256_subx
4078
4079	leaq	128(%rsp),%rbx
4080	leaq	288(%rsp),%rdi
4081	call	__ecp_nistz256_sub_fromx
4082
4083	movq	192+0(%rsp),%rax
4084	movq	192+8(%rsp),%rbp
4085	movq	192+16(%rsp),%rcx
4086	movq	192+24(%rsp),%r10
4087	leaq	320(%rsp),%rdi
4088
4089	call	__ecp_nistz256_subx
4090
4091	movq	%r12,0(%rdi)
4092	movq	%r13,8(%rdi)
4093	movq	%r8,16(%rdi)
4094	movq	%r9,24(%rdi)
4095	movq	128(%rsp),%rdx
4096	leaq	128(%rsp),%rbx
4097	movq	0+224(%rsp),%r9
4098	movq	8+224(%rsp),%r10
4099	leaq	-128+224(%rsp),%rsi
4100	movq	16+224(%rsp),%r11
4101	movq	24+224(%rsp),%r12
4102	leaq	256(%rsp),%rdi
4103	call	__ecp_nistz256_mul_montx
4104
4105	movq	320(%rsp),%rdx
4106	leaq	320(%rsp),%rbx
4107	movq	0+64(%rsp),%r9
4108	movq	8+64(%rsp),%r10
4109	leaq	-128+64(%rsp),%rsi
4110	movq	16+64(%rsp),%r11
4111	movq	24+64(%rsp),%r12
4112	leaq	320(%rsp),%rdi
4113	call	__ecp_nistz256_mul_montx
4114
4115	leaq	256(%rsp),%rbx
4116	leaq	320(%rsp),%rdi
4117	call	__ecp_nistz256_sub_fromx
4118
4119.byte	102,72,15,126,199
4120
4121	movdqa	%xmm5,%xmm0
4122	movdqa	%xmm5,%xmm1
4123	pandn	352(%rsp),%xmm0
4124	movdqa	%xmm5,%xmm2
4125	pandn	352+16(%rsp),%xmm1
4126	movdqa	%xmm5,%xmm3
4127	pand	544(%rsp),%xmm2
4128	pand	544+16(%rsp),%xmm3
4129	por	%xmm0,%xmm2
4130	por	%xmm1,%xmm3
4131
4132	movdqa	%xmm4,%xmm0
4133	movdqa	%xmm4,%xmm1
4134	pandn	%xmm2,%xmm0
4135	movdqa	%xmm4,%xmm2
4136	pandn	%xmm3,%xmm1
4137	movdqa	%xmm4,%xmm3
4138	pand	448(%rsp),%xmm2
4139	pand	448+16(%rsp),%xmm3
4140	por	%xmm0,%xmm2
4141	por	%xmm1,%xmm3
4142	movdqu	%xmm2,64(%rdi)
4143	movdqu	%xmm3,80(%rdi)
4144
4145	movdqa	%xmm5,%xmm0
4146	movdqa	%xmm5,%xmm1
4147	pandn	288(%rsp),%xmm0
4148	movdqa	%xmm5,%xmm2
4149	pandn	288+16(%rsp),%xmm1
4150	movdqa	%xmm5,%xmm3
4151	pand	480(%rsp),%xmm2
4152	pand	480+16(%rsp),%xmm3
4153	por	%xmm0,%xmm2
4154	por	%xmm1,%xmm3
4155
4156	movdqa	%xmm4,%xmm0
4157	movdqa	%xmm4,%xmm1
4158	pandn	%xmm2,%xmm0
4159	movdqa	%xmm4,%xmm2
4160	pandn	%xmm3,%xmm1
4161	movdqa	%xmm4,%xmm3
4162	pand	384(%rsp),%xmm2
4163	pand	384+16(%rsp),%xmm3
4164	por	%xmm0,%xmm2
4165	por	%xmm1,%xmm3
4166	movdqu	%xmm2,0(%rdi)
4167	movdqu	%xmm3,16(%rdi)
4168
4169	movdqa	%xmm5,%xmm0
4170	movdqa	%xmm5,%xmm1
4171	pandn	320(%rsp),%xmm0
4172	movdqa	%xmm5,%xmm2
4173	pandn	320+16(%rsp),%xmm1
4174	movdqa	%xmm5,%xmm3
4175	pand	512(%rsp),%xmm2
4176	pand	512+16(%rsp),%xmm3
4177	por	%xmm0,%xmm2
4178	por	%xmm1,%xmm3
4179
4180	movdqa	%xmm4,%xmm0
4181	movdqa	%xmm4,%xmm1
4182	pandn	%xmm2,%xmm0
4183	movdqa	%xmm4,%xmm2
4184	pandn	%xmm3,%xmm1
4185	movdqa	%xmm4,%xmm3
4186	pand	416(%rsp),%xmm2
4187	pand	416+16(%rsp),%xmm3
4188	por	%xmm0,%xmm2
4189	por	%xmm1,%xmm3
4190	movdqu	%xmm2,32(%rdi)
4191	movdqu	%xmm3,48(%rdi)
4192
4193.Ladd_donex:
4194	leaq	576+56(%rsp),%rsi
4195.cfi_def_cfa	%rsi,8
4196	movq	-48(%rsi),%r15
4197.cfi_restore	%r15
4198	movq	-40(%rsi),%r14
4199.cfi_restore	%r14
4200	movq	-32(%rsi),%r13
4201.cfi_restore	%r13
4202	movq	-24(%rsi),%r12
4203.cfi_restore	%r12
4204	movq	-16(%rsi),%rbx
4205.cfi_restore	%rbx
4206	movq	-8(%rsi),%rbp
4207.cfi_restore	%rbp
4208	leaq	(%rsi),%rsp
4209.cfi_def_cfa_register	%rsp
4210.Lpoint_addx_epilogue:
4211	.byte	0xf3,0xc3
4212.cfi_endproc
4213.size	ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
4214.type	ecp_nistz256_point_add_affinex,@function
4215.align	32
4216ecp_nistz256_point_add_affinex:
4217.cfi_startproc
4218.Lpoint_add_affinex:
4219	pushq	%rbp
4220.cfi_adjust_cfa_offset	8
4221.cfi_offset	%rbp,-16
4222	pushq	%rbx
4223.cfi_adjust_cfa_offset	8
4224.cfi_offset	%rbx,-24
4225	pushq	%r12
4226.cfi_adjust_cfa_offset	8
4227.cfi_offset	%r12,-32
4228	pushq	%r13
4229.cfi_adjust_cfa_offset	8
4230.cfi_offset	%r13,-40
4231	pushq	%r14
4232.cfi_adjust_cfa_offset	8
4233.cfi_offset	%r14,-48
4234	pushq	%r15
4235.cfi_adjust_cfa_offset	8
4236.cfi_offset	%r15,-56
4237	subq	$480+8,%rsp
4238.cfi_adjust_cfa_offset	32*15+8
4239.Ladd_affinex_body:
4240
4241	movdqu	0(%rsi),%xmm0
4242	movq	%rdx,%rbx
4243	movdqu	16(%rsi),%xmm1
4244	movdqu	32(%rsi),%xmm2
4245	movdqu	48(%rsi),%xmm3
4246	movdqu	64(%rsi),%xmm4
4247	movdqu	80(%rsi),%xmm5
4248	movq	64+0(%rsi),%rdx
4249	movq	64+8(%rsi),%r14
4250	movq	64+16(%rsi),%r15
4251	movq	64+24(%rsi),%r8
4252	movdqa	%xmm0,320(%rsp)
4253	movdqa	%xmm1,320+16(%rsp)
4254	movdqa	%xmm2,352(%rsp)
4255	movdqa	%xmm3,352+16(%rsp)
4256	movdqa	%xmm4,384(%rsp)
4257	movdqa	%xmm5,384+16(%rsp)
4258	por	%xmm4,%xmm5
4259
4260	movdqu	0(%rbx),%xmm0
4261	pshufd	$0xb1,%xmm5,%xmm3
4262	movdqu	16(%rbx),%xmm1
4263	movdqu	32(%rbx),%xmm2
4264	por	%xmm3,%xmm5
4265	movdqu	48(%rbx),%xmm3
4266	movdqa	%xmm0,416(%rsp)
4267	pshufd	$0x1e,%xmm5,%xmm4
4268	movdqa	%xmm1,416+16(%rsp)
4269	por	%xmm0,%xmm1
4270.byte	102,72,15,110,199
4271	movdqa	%xmm2,448(%rsp)
4272	movdqa	%xmm3,448+16(%rsp)
4273	por	%xmm2,%xmm3
4274	por	%xmm4,%xmm5
4275	pxor	%xmm4,%xmm4
4276	por	%xmm1,%xmm3
4277
4278	leaq	64-128(%rsi),%rsi
4279	leaq	32(%rsp),%rdi
4280	call	__ecp_nistz256_sqr_montx
4281
4282	pcmpeqd	%xmm4,%xmm5
4283	pshufd	$0xb1,%xmm3,%xmm4
4284	movq	0(%rbx),%rdx
4285
4286	movq	%r12,%r9
4287	por	%xmm3,%xmm4
4288	pshufd	$0,%xmm5,%xmm5
4289	pshufd	$0x1e,%xmm4,%xmm3
4290	movq	%r13,%r10
4291	por	%xmm3,%xmm4
4292	pxor	%xmm3,%xmm3
4293	movq	%r14,%r11
4294	pcmpeqd	%xmm3,%xmm4
4295	pshufd	$0,%xmm4,%xmm4
4296
4297	leaq	32-128(%rsp),%rsi
4298	movq	%r15,%r12
4299	leaq	0(%rsp),%rdi
4300	call	__ecp_nistz256_mul_montx
4301
4302	leaq	320(%rsp),%rbx
4303	leaq	64(%rsp),%rdi
4304	call	__ecp_nistz256_sub_fromx
4305
4306	movq	384(%rsp),%rdx
4307	leaq	384(%rsp),%rbx
4308	movq	0+32(%rsp),%r9
4309	movq	8+32(%rsp),%r10
4310	leaq	-128+32(%rsp),%rsi
4311	movq	16+32(%rsp),%r11
4312	movq	24+32(%rsp),%r12
4313	leaq	32(%rsp),%rdi
4314	call	__ecp_nistz256_mul_montx
4315
4316	movq	384(%rsp),%rdx
4317	leaq	384(%rsp),%rbx
4318	movq	0+64(%rsp),%r9
4319	movq	8+64(%rsp),%r10
4320	leaq	-128+64(%rsp),%rsi
4321	movq	16+64(%rsp),%r11
4322	movq	24+64(%rsp),%r12
4323	leaq	288(%rsp),%rdi
4324	call	__ecp_nistz256_mul_montx
4325
4326	movq	448(%rsp),%rdx
4327	leaq	448(%rsp),%rbx
4328	movq	0+32(%rsp),%r9
4329	movq	8+32(%rsp),%r10
4330	leaq	-128+32(%rsp),%rsi
4331	movq	16+32(%rsp),%r11
4332	movq	24+32(%rsp),%r12
4333	leaq	32(%rsp),%rdi
4334	call	__ecp_nistz256_mul_montx
4335
4336	leaq	352(%rsp),%rbx
4337	leaq	96(%rsp),%rdi
4338	call	__ecp_nistz256_sub_fromx
4339
4340	movq	0+64(%rsp),%rdx
4341	movq	8+64(%rsp),%r14
4342	leaq	-128+64(%rsp),%rsi
4343	movq	16+64(%rsp),%r15
4344	movq	24+64(%rsp),%r8
4345	leaq	128(%rsp),%rdi
4346	call	__ecp_nistz256_sqr_montx
4347
4348	movq	0+96(%rsp),%rdx
4349	movq	8+96(%rsp),%r14
4350	leaq	-128+96(%rsp),%rsi
4351	movq	16+96(%rsp),%r15
4352	movq	24+96(%rsp),%r8
4353	leaq	192(%rsp),%rdi
4354	call	__ecp_nistz256_sqr_montx
4355
4356	movq	128(%rsp),%rdx
4357	leaq	128(%rsp),%rbx
4358	movq	0+64(%rsp),%r9
4359	movq	8+64(%rsp),%r10
4360	leaq	-128+64(%rsp),%rsi
4361	movq	16+64(%rsp),%r11
4362	movq	24+64(%rsp),%r12
4363	leaq	160(%rsp),%rdi
4364	call	__ecp_nistz256_mul_montx
4365
4366	movq	320(%rsp),%rdx
4367	leaq	320(%rsp),%rbx
4368	movq	0+128(%rsp),%r9
4369	movq	8+128(%rsp),%r10
4370	leaq	-128+128(%rsp),%rsi
4371	movq	16+128(%rsp),%r11
4372	movq	24+128(%rsp),%r12
4373	leaq	0(%rsp),%rdi
4374	call	__ecp_nistz256_mul_montx
4375
4376
4377
4378
4379	xorq	%r11,%r11
4380	addq	%r12,%r12
4381	leaq	192(%rsp),%rsi
4382	adcq	%r13,%r13
4383	movq	%r12,%rax
4384	adcq	%r8,%r8
4385	adcq	%r9,%r9
4386	movq	%r13,%rbp
4387	adcq	$0,%r11
4388
4389	subq	$-1,%r12
4390	movq	%r8,%rcx
4391	sbbq	%r14,%r13
4392	sbbq	$0,%r8
4393	movq	%r9,%r10
4394	sbbq	%r15,%r9
4395	sbbq	$0,%r11
4396
4397	cmovcq	%rax,%r12
4398	movq	0(%rsi),%rax
4399	cmovcq	%rbp,%r13
4400	movq	8(%rsi),%rbp
4401	cmovcq	%rcx,%r8
4402	movq	16(%rsi),%rcx
4403	cmovcq	%r10,%r9
4404	movq	24(%rsi),%r10
4405
4406	call	__ecp_nistz256_subx
4407
4408	leaq	160(%rsp),%rbx
4409	leaq	224(%rsp),%rdi
4410	call	__ecp_nistz256_sub_fromx
4411
4412	movq	0+0(%rsp),%rax
4413	movq	0+8(%rsp),%rbp
4414	movq	0+16(%rsp),%rcx
4415	movq	0+24(%rsp),%r10
4416	leaq	64(%rsp),%rdi
4417
4418	call	__ecp_nistz256_subx
4419
4420	movq	%r12,0(%rdi)
4421	movq	%r13,8(%rdi)
4422	movq	%r8,16(%rdi)
4423	movq	%r9,24(%rdi)
4424	movq	352(%rsp),%rdx
4425	leaq	352(%rsp),%rbx
4426	movq	0+160(%rsp),%r9
4427	movq	8+160(%rsp),%r10
4428	leaq	-128+160(%rsp),%rsi
4429	movq	16+160(%rsp),%r11
4430	movq	24+160(%rsp),%r12
4431	leaq	32(%rsp),%rdi
4432	call	__ecp_nistz256_mul_montx
4433
4434	movq	96(%rsp),%rdx
4435	leaq	96(%rsp),%rbx
4436	movq	0+64(%rsp),%r9
4437	movq	8+64(%rsp),%r10
4438	leaq	-128+64(%rsp),%rsi
4439	movq	16+64(%rsp),%r11
4440	movq	24+64(%rsp),%r12
4441	leaq	64(%rsp),%rdi
4442	call	__ecp_nistz256_mul_montx
4443
4444	leaq	32(%rsp),%rbx
4445	leaq	256(%rsp),%rdi
4446	call	__ecp_nistz256_sub_fromx
4447
4448.byte	102,72,15,126,199
4449
4450	movdqa	%xmm5,%xmm0
4451	movdqa	%xmm5,%xmm1
4452	pandn	288(%rsp),%xmm0
4453	movdqa	%xmm5,%xmm2
4454	pandn	288+16(%rsp),%xmm1
4455	movdqa	%xmm5,%xmm3
4456	pand	.LONE_mont(%rip),%xmm2
4457	pand	.LONE_mont+16(%rip),%xmm3
4458	por	%xmm0,%xmm2
4459	por	%xmm1,%xmm3
4460
4461	movdqa	%xmm4,%xmm0
4462	movdqa	%xmm4,%xmm1
4463	pandn	%xmm2,%xmm0
4464	movdqa	%xmm4,%xmm2
4465	pandn	%xmm3,%xmm1
4466	movdqa	%xmm4,%xmm3
4467	pand	384(%rsp),%xmm2
4468	pand	384+16(%rsp),%xmm3
4469	por	%xmm0,%xmm2
4470	por	%xmm1,%xmm3
4471	movdqu	%xmm2,64(%rdi)
4472	movdqu	%xmm3,80(%rdi)
4473
4474	movdqa	%xmm5,%xmm0
4475	movdqa	%xmm5,%xmm1
4476	pandn	224(%rsp),%xmm0
4477	movdqa	%xmm5,%xmm2
4478	pandn	224+16(%rsp),%xmm1
4479	movdqa	%xmm5,%xmm3
4480	pand	416(%rsp),%xmm2
4481	pand	416+16(%rsp),%xmm3
4482	por	%xmm0,%xmm2
4483	por	%xmm1,%xmm3
4484
4485	movdqa	%xmm4,%xmm0
4486	movdqa	%xmm4,%xmm1
4487	pandn	%xmm2,%xmm0
4488	movdqa	%xmm4,%xmm2
4489	pandn	%xmm3,%xmm1
4490	movdqa	%xmm4,%xmm3
4491	pand	320(%rsp),%xmm2
4492	pand	320+16(%rsp),%xmm3
4493	por	%xmm0,%xmm2
4494	por	%xmm1,%xmm3
4495	movdqu	%xmm2,0(%rdi)
4496	movdqu	%xmm3,16(%rdi)
4497
4498	movdqa	%xmm5,%xmm0
4499	movdqa	%xmm5,%xmm1
4500	pandn	256(%rsp),%xmm0
4501	movdqa	%xmm5,%xmm2
4502	pandn	256+16(%rsp),%xmm1
4503	movdqa	%xmm5,%xmm3
4504	pand	448(%rsp),%xmm2
4505	pand	448+16(%rsp),%xmm3
4506	por	%xmm0,%xmm2
4507	por	%xmm1,%xmm3
4508
4509	movdqa	%xmm4,%xmm0
4510	movdqa	%xmm4,%xmm1
4511	pandn	%xmm2,%xmm0
4512	movdqa	%xmm4,%xmm2
4513	pandn	%xmm3,%xmm1
4514	movdqa	%xmm4,%xmm3
4515	pand	352(%rsp),%xmm2
4516	pand	352+16(%rsp),%xmm3
4517	por	%xmm0,%xmm2
4518	por	%xmm1,%xmm3
4519	movdqu	%xmm2,32(%rdi)
4520	movdqu	%xmm3,48(%rdi)
4521
4522	leaq	480+56(%rsp),%rsi
4523.cfi_def_cfa	%rsi,8
4524	movq	-48(%rsi),%r15
4525.cfi_restore	%r15
4526	movq	-40(%rsi),%r14
4527.cfi_restore	%r14
4528	movq	-32(%rsi),%r13
4529.cfi_restore	%r13
4530	movq	-24(%rsi),%r12
4531.cfi_restore	%r12
4532	movq	-16(%rsi),%rbx
4533.cfi_restore	%rbx
4534	movq	-8(%rsi),%rbp
4535.cfi_restore	%rbp
4536	leaq	(%rsi),%rsp
4537.cfi_def_cfa_register	%rsp
4538.Ladd_affinex_epilogue:
4539	.byte	0xf3,0xc3
4540.cfi_endproc
4541.size	ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
4542#endif
4543.section	.note.GNU-stack,"",@progbits
4544