• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14.text
15.extern	OPENSSL_ia32cap_P
16.hidden OPENSSL_ia32cap_P
17
18
19.align	64
20.Lpoly:
21.quad	0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
22
23.LOne:
24.long	1,1,1,1,1,1,1,1
25.LTwo:
26.long	2,2,2,2,2,2,2,2
27.LThree:
28.long	3,3,3,3,3,3,3,3
29.LONE_mont:
30.quad	0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
31
32
33.Lord:
34.quad	0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
35.LordK:
36.quad	0xccd1c8aaee00bc4f
37
38
39
40.globl	ecp_nistz256_neg
41.hidden ecp_nistz256_neg
42.type	ecp_nistz256_neg,@function
43.align	32
44ecp_nistz256_neg:
45.cfi_startproc
46	pushq	%r12
47.cfi_adjust_cfa_offset	8
48.cfi_offset	%r12,-16
49	pushq	%r13
50.cfi_adjust_cfa_offset	8
51.cfi_offset	%r13,-24
52.Lneg_body:
53
54	xorq	%r8,%r8
55	xorq	%r9,%r9
56	xorq	%r10,%r10
57	xorq	%r11,%r11
58	xorq	%r13,%r13
59
60	subq	0(%rsi),%r8
61	sbbq	8(%rsi),%r9
62	sbbq	16(%rsi),%r10
63	movq	%r8,%rax
64	sbbq	24(%rsi),%r11
65	leaq	.Lpoly(%rip),%rsi
66	movq	%r9,%rdx
67	sbbq	$0,%r13
68
69	addq	0(%rsi),%r8
70	movq	%r10,%rcx
71	adcq	8(%rsi),%r9
72	adcq	16(%rsi),%r10
73	movq	%r11,%r12
74	adcq	24(%rsi),%r11
75	testq	%r13,%r13
76
77	cmovzq	%rax,%r8
78	cmovzq	%rdx,%r9
79	movq	%r8,0(%rdi)
80	cmovzq	%rcx,%r10
81	movq	%r9,8(%rdi)
82	cmovzq	%r12,%r11
83	movq	%r10,16(%rdi)
84	movq	%r11,24(%rdi)
85
86	movq	0(%rsp),%r13
87.cfi_restore	%r13
88	movq	8(%rsp),%r12
89.cfi_restore	%r12
90	leaq	16(%rsp),%rsp
91.cfi_adjust_cfa_offset	-16
92.Lneg_epilogue:
93	.byte	0xf3,0xc3
94.cfi_endproc
95.size	ecp_nistz256_neg,.-ecp_nistz256_neg
96
97
98
99
100
101
102.globl	ecp_nistz256_ord_mul_mont
103.hidden ecp_nistz256_ord_mul_mont
104.type	ecp_nistz256_ord_mul_mont,@function
105.align	32
106ecp_nistz256_ord_mul_mont:
107.cfi_startproc
108	leaq	OPENSSL_ia32cap_P(%rip),%rcx
109	movq	8(%rcx),%rcx
110	andl	$0x80100,%ecx
111	cmpl	$0x80100,%ecx
112	je	.Lecp_nistz256_ord_mul_montx
113	pushq	%rbp
114.cfi_adjust_cfa_offset	8
115.cfi_offset	%rbp,-16
116	pushq	%rbx
117.cfi_adjust_cfa_offset	8
118.cfi_offset	%rbx,-24
119	pushq	%r12
120.cfi_adjust_cfa_offset	8
121.cfi_offset	%r12,-32
122	pushq	%r13
123.cfi_adjust_cfa_offset	8
124.cfi_offset	%r13,-40
125	pushq	%r14
126.cfi_adjust_cfa_offset	8
127.cfi_offset	%r14,-48
128	pushq	%r15
129.cfi_adjust_cfa_offset	8
130.cfi_offset	%r15,-56
131.Lord_mul_body:
132
133	movq	0(%rdx),%rax
134	movq	%rdx,%rbx
135	leaq	.Lord(%rip),%r14
136	movq	.LordK(%rip),%r15
137
138
139	movq	%rax,%rcx
140	mulq	0(%rsi)
141	movq	%rax,%r8
142	movq	%rcx,%rax
143	movq	%rdx,%r9
144
145	mulq	8(%rsi)
146	addq	%rax,%r9
147	movq	%rcx,%rax
148	adcq	$0,%rdx
149	movq	%rdx,%r10
150
151	mulq	16(%rsi)
152	addq	%rax,%r10
153	movq	%rcx,%rax
154	adcq	$0,%rdx
155
156	movq	%r8,%r13
157	imulq	%r15,%r8
158
159	movq	%rdx,%r11
160	mulq	24(%rsi)
161	addq	%rax,%r11
162	movq	%r8,%rax
163	adcq	$0,%rdx
164	movq	%rdx,%r12
165
166
167	mulq	0(%r14)
168	movq	%r8,%rbp
169	addq	%rax,%r13
170	movq	%r8,%rax
171	adcq	$0,%rdx
172	movq	%rdx,%rcx
173
174	subq	%r8,%r10
175	sbbq	$0,%r8
176
177	mulq	8(%r14)
178	addq	%rcx,%r9
179	adcq	$0,%rdx
180	addq	%rax,%r9
181	movq	%rbp,%rax
182	adcq	%rdx,%r10
183	movq	%rbp,%rdx
184	adcq	$0,%r8
185
186	shlq	$32,%rax
187	shrq	$32,%rdx
188	subq	%rax,%r11
189	movq	8(%rbx),%rax
190	sbbq	%rdx,%rbp
191
192	addq	%r8,%r11
193	adcq	%rbp,%r12
194	adcq	$0,%r13
195
196
197	movq	%rax,%rcx
198	mulq	0(%rsi)
199	addq	%rax,%r9
200	movq	%rcx,%rax
201	adcq	$0,%rdx
202	movq	%rdx,%rbp
203
204	mulq	8(%rsi)
205	addq	%rbp,%r10
206	adcq	$0,%rdx
207	addq	%rax,%r10
208	movq	%rcx,%rax
209	adcq	$0,%rdx
210	movq	%rdx,%rbp
211
212	mulq	16(%rsi)
213	addq	%rbp,%r11
214	adcq	$0,%rdx
215	addq	%rax,%r11
216	movq	%rcx,%rax
217	adcq	$0,%rdx
218
219	movq	%r9,%rcx
220	imulq	%r15,%r9
221
222	movq	%rdx,%rbp
223	mulq	24(%rsi)
224	addq	%rbp,%r12
225	adcq	$0,%rdx
226	xorq	%r8,%r8
227	addq	%rax,%r12
228	movq	%r9,%rax
229	adcq	%rdx,%r13
230	adcq	$0,%r8
231
232
233	mulq	0(%r14)
234	movq	%r9,%rbp
235	addq	%rax,%rcx
236	movq	%r9,%rax
237	adcq	%rdx,%rcx
238
239	subq	%r9,%r11
240	sbbq	$0,%r9
241
242	mulq	8(%r14)
243	addq	%rcx,%r10
244	adcq	$0,%rdx
245	addq	%rax,%r10
246	movq	%rbp,%rax
247	adcq	%rdx,%r11
248	movq	%rbp,%rdx
249	adcq	$0,%r9
250
251	shlq	$32,%rax
252	shrq	$32,%rdx
253	subq	%rax,%r12
254	movq	16(%rbx),%rax
255	sbbq	%rdx,%rbp
256
257	addq	%r9,%r12
258	adcq	%rbp,%r13
259	adcq	$0,%r8
260
261
262	movq	%rax,%rcx
263	mulq	0(%rsi)
264	addq	%rax,%r10
265	movq	%rcx,%rax
266	adcq	$0,%rdx
267	movq	%rdx,%rbp
268
269	mulq	8(%rsi)
270	addq	%rbp,%r11
271	adcq	$0,%rdx
272	addq	%rax,%r11
273	movq	%rcx,%rax
274	adcq	$0,%rdx
275	movq	%rdx,%rbp
276
277	mulq	16(%rsi)
278	addq	%rbp,%r12
279	adcq	$0,%rdx
280	addq	%rax,%r12
281	movq	%rcx,%rax
282	adcq	$0,%rdx
283
284	movq	%r10,%rcx
285	imulq	%r15,%r10
286
287	movq	%rdx,%rbp
288	mulq	24(%rsi)
289	addq	%rbp,%r13
290	adcq	$0,%rdx
291	xorq	%r9,%r9
292	addq	%rax,%r13
293	movq	%r10,%rax
294	adcq	%rdx,%r8
295	adcq	$0,%r9
296
297
298	mulq	0(%r14)
299	movq	%r10,%rbp
300	addq	%rax,%rcx
301	movq	%r10,%rax
302	adcq	%rdx,%rcx
303
304	subq	%r10,%r12
305	sbbq	$0,%r10
306
307	mulq	8(%r14)
308	addq	%rcx,%r11
309	adcq	$0,%rdx
310	addq	%rax,%r11
311	movq	%rbp,%rax
312	adcq	%rdx,%r12
313	movq	%rbp,%rdx
314	adcq	$0,%r10
315
316	shlq	$32,%rax
317	shrq	$32,%rdx
318	subq	%rax,%r13
319	movq	24(%rbx),%rax
320	sbbq	%rdx,%rbp
321
322	addq	%r10,%r13
323	adcq	%rbp,%r8
324	adcq	$0,%r9
325
326
327	movq	%rax,%rcx
328	mulq	0(%rsi)
329	addq	%rax,%r11
330	movq	%rcx,%rax
331	adcq	$0,%rdx
332	movq	%rdx,%rbp
333
334	mulq	8(%rsi)
335	addq	%rbp,%r12
336	adcq	$0,%rdx
337	addq	%rax,%r12
338	movq	%rcx,%rax
339	adcq	$0,%rdx
340	movq	%rdx,%rbp
341
342	mulq	16(%rsi)
343	addq	%rbp,%r13
344	adcq	$0,%rdx
345	addq	%rax,%r13
346	movq	%rcx,%rax
347	adcq	$0,%rdx
348
349	movq	%r11,%rcx
350	imulq	%r15,%r11
351
352	movq	%rdx,%rbp
353	mulq	24(%rsi)
354	addq	%rbp,%r8
355	adcq	$0,%rdx
356	xorq	%r10,%r10
357	addq	%rax,%r8
358	movq	%r11,%rax
359	adcq	%rdx,%r9
360	adcq	$0,%r10
361
362
363	mulq	0(%r14)
364	movq	%r11,%rbp
365	addq	%rax,%rcx
366	movq	%r11,%rax
367	adcq	%rdx,%rcx
368
369	subq	%r11,%r13
370	sbbq	$0,%r11
371
372	mulq	8(%r14)
373	addq	%rcx,%r12
374	adcq	$0,%rdx
375	addq	%rax,%r12
376	movq	%rbp,%rax
377	adcq	%rdx,%r13
378	movq	%rbp,%rdx
379	adcq	$0,%r11
380
381	shlq	$32,%rax
382	shrq	$32,%rdx
383	subq	%rax,%r8
384	sbbq	%rdx,%rbp
385
386	addq	%r11,%r8
387	adcq	%rbp,%r9
388	adcq	$0,%r10
389
390
391	movq	%r12,%rsi
392	subq	0(%r14),%r12
393	movq	%r13,%r11
394	sbbq	8(%r14),%r13
395	movq	%r8,%rcx
396	sbbq	16(%r14),%r8
397	movq	%r9,%rbp
398	sbbq	24(%r14),%r9
399	sbbq	$0,%r10
400
401	cmovcq	%rsi,%r12
402	cmovcq	%r11,%r13
403	cmovcq	%rcx,%r8
404	cmovcq	%rbp,%r9
405
406	movq	%r12,0(%rdi)
407	movq	%r13,8(%rdi)
408	movq	%r8,16(%rdi)
409	movq	%r9,24(%rdi)
410
411	movq	0(%rsp),%r15
412.cfi_restore	%r15
413	movq	8(%rsp),%r14
414.cfi_restore	%r14
415	movq	16(%rsp),%r13
416.cfi_restore	%r13
417	movq	24(%rsp),%r12
418.cfi_restore	%r12
419	movq	32(%rsp),%rbx
420.cfi_restore	%rbx
421	movq	40(%rsp),%rbp
422.cfi_restore	%rbp
423	leaq	48(%rsp),%rsp
424.cfi_adjust_cfa_offset	-48
425.Lord_mul_epilogue:
426	.byte	0xf3,0xc3
427.cfi_endproc
428.size	ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont
429
430
431
432
433
434
435
436.globl	ecp_nistz256_ord_sqr_mont
437.hidden ecp_nistz256_ord_sqr_mont
438.type	ecp_nistz256_ord_sqr_mont,@function
439.align	32
440ecp_nistz256_ord_sqr_mont:
441.cfi_startproc
442	leaq	OPENSSL_ia32cap_P(%rip),%rcx
443	movq	8(%rcx),%rcx
444	andl	$0x80100,%ecx
445	cmpl	$0x80100,%ecx
446	je	.Lecp_nistz256_ord_sqr_montx
447	pushq	%rbp
448.cfi_adjust_cfa_offset	8
449.cfi_offset	%rbp,-16
450	pushq	%rbx
451.cfi_adjust_cfa_offset	8
452.cfi_offset	%rbx,-24
453	pushq	%r12
454.cfi_adjust_cfa_offset	8
455.cfi_offset	%r12,-32
456	pushq	%r13
457.cfi_adjust_cfa_offset	8
458.cfi_offset	%r13,-40
459	pushq	%r14
460.cfi_adjust_cfa_offset	8
461.cfi_offset	%r14,-48
462	pushq	%r15
463.cfi_adjust_cfa_offset	8
464.cfi_offset	%r15,-56
465.Lord_sqr_body:
466
467	movq	0(%rsi),%r8
468	movq	8(%rsi),%rax
469	movq	16(%rsi),%r14
470	movq	24(%rsi),%r15
471	leaq	.Lord(%rip),%rsi
472	movq	%rdx,%rbx
473	jmp	.Loop_ord_sqr
474
475.align	32
476.Loop_ord_sqr:
477
478	movq	%rax,%rbp
479	mulq	%r8
480	movq	%rax,%r9
481.byte	102,72,15,110,205
482	movq	%r14,%rax
483	movq	%rdx,%r10
484
485	mulq	%r8
486	addq	%rax,%r10
487	movq	%r15,%rax
488.byte	102,73,15,110,214
489	adcq	$0,%rdx
490	movq	%rdx,%r11
491
492	mulq	%r8
493	addq	%rax,%r11
494	movq	%r15,%rax
495.byte	102,73,15,110,223
496	adcq	$0,%rdx
497	movq	%rdx,%r12
498
499
500	mulq	%r14
501	movq	%rax,%r13
502	movq	%r14,%rax
503	movq	%rdx,%r14
504
505
506	mulq	%rbp
507	addq	%rax,%r11
508	movq	%r15,%rax
509	adcq	$0,%rdx
510	movq	%rdx,%r15
511
512	mulq	%rbp
513	addq	%rax,%r12
514	adcq	$0,%rdx
515
516	addq	%r15,%r12
517	adcq	%rdx,%r13
518	adcq	$0,%r14
519
520
521	xorq	%r15,%r15
522	movq	%r8,%rax
523	addq	%r9,%r9
524	adcq	%r10,%r10
525	adcq	%r11,%r11
526	adcq	%r12,%r12
527	adcq	%r13,%r13
528	adcq	%r14,%r14
529	adcq	$0,%r15
530
531
532	mulq	%rax
533	movq	%rax,%r8
534.byte	102,72,15,126,200
535	movq	%rdx,%rbp
536
537	mulq	%rax
538	addq	%rbp,%r9
539	adcq	%rax,%r10
540.byte	102,72,15,126,208
541	adcq	$0,%rdx
542	movq	%rdx,%rbp
543
544	mulq	%rax
545	addq	%rbp,%r11
546	adcq	%rax,%r12
547.byte	102,72,15,126,216
548	adcq	$0,%rdx
549	movq	%rdx,%rbp
550
551	movq	%r8,%rcx
552	imulq	32(%rsi),%r8
553
554	mulq	%rax
555	addq	%rbp,%r13
556	adcq	%rax,%r14
557	movq	0(%rsi),%rax
558	adcq	%rdx,%r15
559
560
561	mulq	%r8
562	movq	%r8,%rbp
563	addq	%rax,%rcx
564	movq	8(%rsi),%rax
565	adcq	%rdx,%rcx
566
567	subq	%r8,%r10
568	sbbq	$0,%rbp
569
570	mulq	%r8
571	addq	%rcx,%r9
572	adcq	$0,%rdx
573	addq	%rax,%r9
574	movq	%r8,%rax
575	adcq	%rdx,%r10
576	movq	%r8,%rdx
577	adcq	$0,%rbp
578
579	movq	%r9,%rcx
580	imulq	32(%rsi),%r9
581
582	shlq	$32,%rax
583	shrq	$32,%rdx
584	subq	%rax,%r11
585	movq	0(%rsi),%rax
586	sbbq	%rdx,%r8
587
588	addq	%rbp,%r11
589	adcq	$0,%r8
590
591
592	mulq	%r9
593	movq	%r9,%rbp
594	addq	%rax,%rcx
595	movq	8(%rsi),%rax
596	adcq	%rdx,%rcx
597
598	subq	%r9,%r11
599	sbbq	$0,%rbp
600
601	mulq	%r9
602	addq	%rcx,%r10
603	adcq	$0,%rdx
604	addq	%rax,%r10
605	movq	%r9,%rax
606	adcq	%rdx,%r11
607	movq	%r9,%rdx
608	adcq	$0,%rbp
609
610	movq	%r10,%rcx
611	imulq	32(%rsi),%r10
612
613	shlq	$32,%rax
614	shrq	$32,%rdx
615	subq	%rax,%r8
616	movq	0(%rsi),%rax
617	sbbq	%rdx,%r9
618
619	addq	%rbp,%r8
620	adcq	$0,%r9
621
622
623	mulq	%r10
624	movq	%r10,%rbp
625	addq	%rax,%rcx
626	movq	8(%rsi),%rax
627	adcq	%rdx,%rcx
628
629	subq	%r10,%r8
630	sbbq	$0,%rbp
631
632	mulq	%r10
633	addq	%rcx,%r11
634	adcq	$0,%rdx
635	addq	%rax,%r11
636	movq	%r10,%rax
637	adcq	%rdx,%r8
638	movq	%r10,%rdx
639	adcq	$0,%rbp
640
641	movq	%r11,%rcx
642	imulq	32(%rsi),%r11
643
644	shlq	$32,%rax
645	shrq	$32,%rdx
646	subq	%rax,%r9
647	movq	0(%rsi),%rax
648	sbbq	%rdx,%r10
649
650	addq	%rbp,%r9
651	adcq	$0,%r10
652
653
654	mulq	%r11
655	movq	%r11,%rbp
656	addq	%rax,%rcx
657	movq	8(%rsi),%rax
658	adcq	%rdx,%rcx
659
660	subq	%r11,%r9
661	sbbq	$0,%rbp
662
663	mulq	%r11
664	addq	%rcx,%r8
665	adcq	$0,%rdx
666	addq	%rax,%r8
667	movq	%r11,%rax
668	adcq	%rdx,%r9
669	movq	%r11,%rdx
670	adcq	$0,%rbp
671
672	shlq	$32,%rax
673	shrq	$32,%rdx
674	subq	%rax,%r10
675	sbbq	%rdx,%r11
676
677	addq	%rbp,%r10
678	adcq	$0,%r11
679
680
681	xorq	%rdx,%rdx
682	addq	%r12,%r8
683	adcq	%r13,%r9
684	movq	%r8,%r12
685	adcq	%r14,%r10
686	adcq	%r15,%r11
687	movq	%r9,%rax
688	adcq	$0,%rdx
689
690
691	subq	0(%rsi),%r8
692	movq	%r10,%r14
693	sbbq	8(%rsi),%r9
694	sbbq	16(%rsi),%r10
695	movq	%r11,%r15
696	sbbq	24(%rsi),%r11
697	sbbq	$0,%rdx
698
699	cmovcq	%r12,%r8
700	cmovncq	%r9,%rax
701	cmovncq	%r10,%r14
702	cmovncq	%r11,%r15
703
704	decq	%rbx
705	jnz	.Loop_ord_sqr
706
707	movq	%r8,0(%rdi)
708	movq	%rax,8(%rdi)
709	pxor	%xmm1,%xmm1
710	movq	%r14,16(%rdi)
711	pxor	%xmm2,%xmm2
712	movq	%r15,24(%rdi)
713	pxor	%xmm3,%xmm3
714
715	movq	0(%rsp),%r15
716.cfi_restore	%r15
717	movq	8(%rsp),%r14
718.cfi_restore	%r14
719	movq	16(%rsp),%r13
720.cfi_restore	%r13
721	movq	24(%rsp),%r12
722.cfi_restore	%r12
723	movq	32(%rsp),%rbx
724.cfi_restore	%rbx
725	movq	40(%rsp),%rbp
726.cfi_restore	%rbp
727	leaq	48(%rsp),%rsp
728.cfi_adjust_cfa_offset	-48
729.Lord_sqr_epilogue:
730	.byte	0xf3,0xc3
731.cfi_endproc
732.size	ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
733
734.type	ecp_nistz256_ord_mul_montx,@function
735.align	32
736ecp_nistz256_ord_mul_montx:
737.cfi_startproc
738.Lecp_nistz256_ord_mul_montx:
739	pushq	%rbp
740.cfi_adjust_cfa_offset	8
741.cfi_offset	%rbp,-16
742	pushq	%rbx
743.cfi_adjust_cfa_offset	8
744.cfi_offset	%rbx,-24
745	pushq	%r12
746.cfi_adjust_cfa_offset	8
747.cfi_offset	%r12,-32
748	pushq	%r13
749.cfi_adjust_cfa_offset	8
750.cfi_offset	%r13,-40
751	pushq	%r14
752.cfi_adjust_cfa_offset	8
753.cfi_offset	%r14,-48
754	pushq	%r15
755.cfi_adjust_cfa_offset	8
756.cfi_offset	%r15,-56
757.Lord_mulx_body:
758
759	movq	%rdx,%rbx
760	movq	0(%rdx),%rdx
761	movq	0(%rsi),%r9
762	movq	8(%rsi),%r10
763	movq	16(%rsi),%r11
764	movq	24(%rsi),%r12
765	leaq	-128(%rsi),%rsi
766	leaq	.Lord-128(%rip),%r14
767	movq	.LordK(%rip),%r15
768
769
770	mulxq	%r9,%r8,%r9
771	mulxq	%r10,%rcx,%r10
772	mulxq	%r11,%rbp,%r11
773	addq	%rcx,%r9
774	mulxq	%r12,%rcx,%r12
775	movq	%r8,%rdx
776	mulxq	%r15,%rdx,%rax
777	adcq	%rbp,%r10
778	adcq	%rcx,%r11
779	adcq	$0,%r12
780
781
782	xorq	%r13,%r13
783	mulxq	0+128(%r14),%rcx,%rbp
784	adcxq	%rcx,%r8
785	adoxq	%rbp,%r9
786
787	mulxq	8+128(%r14),%rcx,%rbp
788	adcxq	%rcx,%r9
789	adoxq	%rbp,%r10
790
791	mulxq	16+128(%r14),%rcx,%rbp
792	adcxq	%rcx,%r10
793	adoxq	%rbp,%r11
794
795	mulxq	24+128(%r14),%rcx,%rbp
796	movq	8(%rbx),%rdx
797	adcxq	%rcx,%r11
798	adoxq	%rbp,%r12
799	adcxq	%r8,%r12
800	adoxq	%r8,%r13
801	adcq	$0,%r13
802
803
804	mulxq	0+128(%rsi),%rcx,%rbp
805	adcxq	%rcx,%r9
806	adoxq	%rbp,%r10
807
808	mulxq	8+128(%rsi),%rcx,%rbp
809	adcxq	%rcx,%r10
810	adoxq	%rbp,%r11
811
812	mulxq	16+128(%rsi),%rcx,%rbp
813	adcxq	%rcx,%r11
814	adoxq	%rbp,%r12
815
816	mulxq	24+128(%rsi),%rcx,%rbp
817	movq	%r9,%rdx
818	mulxq	%r15,%rdx,%rax
819	adcxq	%rcx,%r12
820	adoxq	%rbp,%r13
821
822	adcxq	%r8,%r13
823	adoxq	%r8,%r8
824	adcq	$0,%r8
825
826
827	mulxq	0+128(%r14),%rcx,%rbp
828	adcxq	%rcx,%r9
829	adoxq	%rbp,%r10
830
831	mulxq	8+128(%r14),%rcx,%rbp
832	adcxq	%rcx,%r10
833	adoxq	%rbp,%r11
834
835	mulxq	16+128(%r14),%rcx,%rbp
836	adcxq	%rcx,%r11
837	adoxq	%rbp,%r12
838
839	mulxq	24+128(%r14),%rcx,%rbp
840	movq	16(%rbx),%rdx
841	adcxq	%rcx,%r12
842	adoxq	%rbp,%r13
843	adcxq	%r9,%r13
844	adoxq	%r9,%r8
845	adcq	$0,%r8
846
847
848	mulxq	0+128(%rsi),%rcx,%rbp
849	adcxq	%rcx,%r10
850	adoxq	%rbp,%r11
851
852	mulxq	8+128(%rsi),%rcx,%rbp
853	adcxq	%rcx,%r11
854	adoxq	%rbp,%r12
855
856	mulxq	16+128(%rsi),%rcx,%rbp
857	adcxq	%rcx,%r12
858	adoxq	%rbp,%r13
859
860	mulxq	24+128(%rsi),%rcx,%rbp
861	movq	%r10,%rdx
862	mulxq	%r15,%rdx,%rax
863	adcxq	%rcx,%r13
864	adoxq	%rbp,%r8
865
866	adcxq	%r9,%r8
867	adoxq	%r9,%r9
868	adcq	$0,%r9
869
870
871	mulxq	0+128(%r14),%rcx,%rbp
872	adcxq	%rcx,%r10
873	adoxq	%rbp,%r11
874
875	mulxq	8+128(%r14),%rcx,%rbp
876	adcxq	%rcx,%r11
877	adoxq	%rbp,%r12
878
879	mulxq	16+128(%r14),%rcx,%rbp
880	adcxq	%rcx,%r12
881	adoxq	%rbp,%r13
882
883	mulxq	24+128(%r14),%rcx,%rbp
884	movq	24(%rbx),%rdx
885	adcxq	%rcx,%r13
886	adoxq	%rbp,%r8
887	adcxq	%r10,%r8
888	adoxq	%r10,%r9
889	adcq	$0,%r9
890
891
892	mulxq	0+128(%rsi),%rcx,%rbp
893	adcxq	%rcx,%r11
894	adoxq	%rbp,%r12
895
896	mulxq	8+128(%rsi),%rcx,%rbp
897	adcxq	%rcx,%r12
898	adoxq	%rbp,%r13
899
900	mulxq	16+128(%rsi),%rcx,%rbp
901	adcxq	%rcx,%r13
902	adoxq	%rbp,%r8
903
904	mulxq	24+128(%rsi),%rcx,%rbp
905	movq	%r11,%rdx
906	mulxq	%r15,%rdx,%rax
907	adcxq	%rcx,%r8
908	adoxq	%rbp,%r9
909
910	adcxq	%r10,%r9
911	adoxq	%r10,%r10
912	adcq	$0,%r10
913
914
915	mulxq	0+128(%r14),%rcx,%rbp
916	adcxq	%rcx,%r11
917	adoxq	%rbp,%r12
918
919	mulxq	8+128(%r14),%rcx,%rbp
920	adcxq	%rcx,%r12
921	adoxq	%rbp,%r13
922
923	mulxq	16+128(%r14),%rcx,%rbp
924	adcxq	%rcx,%r13
925	adoxq	%rbp,%r8
926
927	mulxq	24+128(%r14),%rcx,%rbp
928	leaq	128(%r14),%r14
929	movq	%r12,%rbx
930	adcxq	%rcx,%r8
931	adoxq	%rbp,%r9
932	movq	%r13,%rdx
933	adcxq	%r11,%r9
934	adoxq	%r11,%r10
935	adcq	$0,%r10
936
937
938
939	movq	%r8,%rcx
940	subq	0(%r14),%r12
941	sbbq	8(%r14),%r13
942	sbbq	16(%r14),%r8
943	movq	%r9,%rbp
944	sbbq	24(%r14),%r9
945	sbbq	$0,%r10
946
947	cmovcq	%rbx,%r12
948	cmovcq	%rdx,%r13
949	cmovcq	%rcx,%r8
950	cmovcq	%rbp,%r9
951
952	movq	%r12,0(%rdi)
953	movq	%r13,8(%rdi)
954	movq	%r8,16(%rdi)
955	movq	%r9,24(%rdi)
956
957	movq	0(%rsp),%r15
958.cfi_restore	%r15
959	movq	8(%rsp),%r14
960.cfi_restore	%r14
961	movq	16(%rsp),%r13
962.cfi_restore	%r13
963	movq	24(%rsp),%r12
964.cfi_restore	%r12
965	movq	32(%rsp),%rbx
966.cfi_restore	%rbx
967	movq	40(%rsp),%rbp
968.cfi_restore	%rbp
969	leaq	48(%rsp),%rsp
970.cfi_adjust_cfa_offset	-48
971.Lord_mulx_epilogue:
972	.byte	0xf3,0xc3
973.cfi_endproc
974.size	ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx
975
976.type	ecp_nistz256_ord_sqr_montx,@function
977.align	32
978ecp_nistz256_ord_sqr_montx:
979.cfi_startproc
980.Lecp_nistz256_ord_sqr_montx:
981	pushq	%rbp
982.cfi_adjust_cfa_offset	8
983.cfi_offset	%rbp,-16
984	pushq	%rbx
985.cfi_adjust_cfa_offset	8
986.cfi_offset	%rbx,-24
987	pushq	%r12
988.cfi_adjust_cfa_offset	8
989.cfi_offset	%r12,-32
990	pushq	%r13
991.cfi_adjust_cfa_offset	8
992.cfi_offset	%r13,-40
993	pushq	%r14
994.cfi_adjust_cfa_offset	8
995.cfi_offset	%r14,-48
996	pushq	%r15
997.cfi_adjust_cfa_offset	8
998.cfi_offset	%r15,-56
999.Lord_sqrx_body:
1000
1001	movq	%rdx,%rbx
1002	movq	0(%rsi),%rdx
1003	movq	8(%rsi),%r14
1004	movq	16(%rsi),%r15
1005	movq	24(%rsi),%r8
1006	leaq	.Lord(%rip),%rsi
1007	jmp	.Loop_ord_sqrx
1008
1009.align	32
1010.Loop_ord_sqrx:
1011	mulxq	%r14,%r9,%r10
1012	mulxq	%r15,%rcx,%r11
1013	movq	%rdx,%rax
1014.byte	102,73,15,110,206
1015	mulxq	%r8,%rbp,%r12
1016	movq	%r14,%rdx
1017	addq	%rcx,%r10
1018.byte	102,73,15,110,215
1019	adcq	%rbp,%r11
1020	adcq	$0,%r12
1021	xorq	%r13,%r13
1022
1023	mulxq	%r15,%rcx,%rbp
1024	adcxq	%rcx,%r11
1025	adoxq	%rbp,%r12
1026
1027	mulxq	%r8,%rcx,%rbp
1028	movq	%r15,%rdx
1029	adcxq	%rcx,%r12
1030	adoxq	%rbp,%r13
1031	adcq	$0,%r13
1032
1033	mulxq	%r8,%rcx,%r14
1034	movq	%rax,%rdx
1035.byte	102,73,15,110,216
1036	xorq	%r15,%r15
1037	adcxq	%r9,%r9
1038	adoxq	%rcx,%r13
1039	adcxq	%r10,%r10
1040	adoxq	%r15,%r14
1041
1042
1043	mulxq	%rdx,%r8,%rbp
1044.byte	102,72,15,126,202
1045	adcxq	%r11,%r11
1046	adoxq	%rbp,%r9
1047	adcxq	%r12,%r12
1048	mulxq	%rdx,%rcx,%rax
1049.byte	102,72,15,126,210
1050	adcxq	%r13,%r13
1051	adoxq	%rcx,%r10
1052	adcxq	%r14,%r14
1053	mulxq	%rdx,%rcx,%rbp
1054.byte	0x67
1055.byte	102,72,15,126,218
1056	adoxq	%rax,%r11
1057	adcxq	%r15,%r15
1058	adoxq	%rcx,%r12
1059	adoxq	%rbp,%r13
1060	mulxq	%rdx,%rcx,%rax
1061	adoxq	%rcx,%r14
1062	adoxq	%rax,%r15
1063
1064
1065	movq	%r8,%rdx
1066	mulxq	32(%rsi),%rdx,%rcx
1067
1068	xorq	%rax,%rax
1069	mulxq	0(%rsi),%rcx,%rbp
1070	adcxq	%rcx,%r8
1071	adoxq	%rbp,%r9
1072	mulxq	8(%rsi),%rcx,%rbp
1073	adcxq	%rcx,%r9
1074	adoxq	%rbp,%r10
1075	mulxq	16(%rsi),%rcx,%rbp
1076	adcxq	%rcx,%r10
1077	adoxq	%rbp,%r11
1078	mulxq	24(%rsi),%rcx,%rbp
1079	adcxq	%rcx,%r11
1080	adoxq	%rbp,%r8
1081	adcxq	%rax,%r8
1082
1083
1084	movq	%r9,%rdx
1085	mulxq	32(%rsi),%rdx,%rcx
1086
1087	mulxq	0(%rsi),%rcx,%rbp
1088	adoxq	%rcx,%r9
1089	adcxq	%rbp,%r10
1090	mulxq	8(%rsi),%rcx,%rbp
1091	adoxq	%rcx,%r10
1092	adcxq	%rbp,%r11
1093	mulxq	16(%rsi),%rcx,%rbp
1094	adoxq	%rcx,%r11
1095	adcxq	%rbp,%r8
1096	mulxq	24(%rsi),%rcx,%rbp
1097	adoxq	%rcx,%r8
1098	adcxq	%rbp,%r9
1099	adoxq	%rax,%r9
1100
1101
1102	movq	%r10,%rdx
1103	mulxq	32(%rsi),%rdx,%rcx
1104
1105	mulxq	0(%rsi),%rcx,%rbp
1106	adcxq	%rcx,%r10
1107	adoxq	%rbp,%r11
1108	mulxq	8(%rsi),%rcx,%rbp
1109	adcxq	%rcx,%r11
1110	adoxq	%rbp,%r8
1111	mulxq	16(%rsi),%rcx,%rbp
1112	adcxq	%rcx,%r8
1113	adoxq	%rbp,%r9
1114	mulxq	24(%rsi),%rcx,%rbp
1115	adcxq	%rcx,%r9
1116	adoxq	%rbp,%r10
1117	adcxq	%rax,%r10
1118
1119
1120	movq	%r11,%rdx
1121	mulxq	32(%rsi),%rdx,%rcx
1122
1123	mulxq	0(%rsi),%rcx,%rbp
1124	adoxq	%rcx,%r11
1125	adcxq	%rbp,%r8
1126	mulxq	8(%rsi),%rcx,%rbp
1127	adoxq	%rcx,%r8
1128	adcxq	%rbp,%r9
1129	mulxq	16(%rsi),%rcx,%rbp
1130	adoxq	%rcx,%r9
1131	adcxq	%rbp,%r10
1132	mulxq	24(%rsi),%rcx,%rbp
1133	adoxq	%rcx,%r10
1134	adcxq	%rbp,%r11
1135	adoxq	%rax,%r11
1136
1137
1138	addq	%r8,%r12
1139	adcq	%r13,%r9
1140	movq	%r12,%rdx
1141	adcq	%r14,%r10
1142	adcq	%r15,%r11
1143	movq	%r9,%r14
1144	adcq	$0,%rax
1145
1146
1147	subq	0(%rsi),%r12
1148	movq	%r10,%r15
1149	sbbq	8(%rsi),%r9
1150	sbbq	16(%rsi),%r10
1151	movq	%r11,%r8
1152	sbbq	24(%rsi),%r11
1153	sbbq	$0,%rax
1154
1155	cmovncq	%r12,%rdx
1156	cmovncq	%r9,%r14
1157	cmovncq	%r10,%r15
1158	cmovncq	%r11,%r8
1159
1160	decq	%rbx
1161	jnz	.Loop_ord_sqrx
1162
1163	movq	%rdx,0(%rdi)
1164	movq	%r14,8(%rdi)
1165	pxor	%xmm1,%xmm1
1166	movq	%r15,16(%rdi)
1167	pxor	%xmm2,%xmm2
1168	movq	%r8,24(%rdi)
1169	pxor	%xmm3,%xmm3
1170
1171	movq	0(%rsp),%r15
1172.cfi_restore	%r15
1173	movq	8(%rsp),%r14
1174.cfi_restore	%r14
1175	movq	16(%rsp),%r13
1176.cfi_restore	%r13
1177	movq	24(%rsp),%r12
1178.cfi_restore	%r12
1179	movq	32(%rsp),%rbx
1180.cfi_restore	%rbx
1181	movq	40(%rsp),%rbp
1182.cfi_restore	%rbp
1183	leaq	48(%rsp),%rsp
1184.cfi_adjust_cfa_offset	-48
1185.Lord_sqrx_epilogue:
1186	.byte	0xf3,0xc3
1187.cfi_endproc
1188.size	ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx
1189
1190
1191
1192
1193
1194
1195.globl	ecp_nistz256_mul_mont
1196.hidden ecp_nistz256_mul_mont
1197.type	ecp_nistz256_mul_mont,@function
1198.align	32
1199ecp_nistz256_mul_mont:
1200.cfi_startproc
1201	leaq	OPENSSL_ia32cap_P(%rip),%rcx
1202	movq	8(%rcx),%rcx
1203	andl	$0x80100,%ecx
1204.Lmul_mont:
1205	pushq	%rbp
1206.cfi_adjust_cfa_offset	8
1207.cfi_offset	%rbp,-16
1208	pushq	%rbx
1209.cfi_adjust_cfa_offset	8
1210.cfi_offset	%rbx,-24
1211	pushq	%r12
1212.cfi_adjust_cfa_offset	8
1213.cfi_offset	%r12,-32
1214	pushq	%r13
1215.cfi_adjust_cfa_offset	8
1216.cfi_offset	%r13,-40
1217	pushq	%r14
1218.cfi_adjust_cfa_offset	8
1219.cfi_offset	%r14,-48
1220	pushq	%r15
1221.cfi_adjust_cfa_offset	8
1222.cfi_offset	%r15,-56
1223.Lmul_body:
1224	cmpl	$0x80100,%ecx
1225	je	.Lmul_montx
1226	movq	%rdx,%rbx
1227	movq	0(%rdx),%rax
1228	movq	0(%rsi),%r9
1229	movq	8(%rsi),%r10
1230	movq	16(%rsi),%r11
1231	movq	24(%rsi),%r12
1232
1233	call	__ecp_nistz256_mul_montq
1234	jmp	.Lmul_mont_done
1235
1236.align	32
1237.Lmul_montx:
1238	movq	%rdx,%rbx
1239	movq	0(%rdx),%rdx
1240	movq	0(%rsi),%r9
1241	movq	8(%rsi),%r10
1242	movq	16(%rsi),%r11
1243	movq	24(%rsi),%r12
1244	leaq	-128(%rsi),%rsi
1245
1246	call	__ecp_nistz256_mul_montx
1247.Lmul_mont_done:
1248	movq	0(%rsp),%r15
1249.cfi_restore	%r15
1250	movq	8(%rsp),%r14
1251.cfi_restore	%r14
1252	movq	16(%rsp),%r13
1253.cfi_restore	%r13
1254	movq	24(%rsp),%r12
1255.cfi_restore	%r12
1256	movq	32(%rsp),%rbx
1257.cfi_restore	%rbx
1258	movq	40(%rsp),%rbp
1259.cfi_restore	%rbp
1260	leaq	48(%rsp),%rsp
1261.cfi_adjust_cfa_offset	-48
1262.Lmul_epilogue:
1263	.byte	0xf3,0xc3
1264.cfi_endproc
1265.size	ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
1266
1267.type	__ecp_nistz256_mul_montq,@function
1268.align	32
1269__ecp_nistz256_mul_montq:
1270.cfi_startproc
1271
1272
1273	movq	%rax,%rbp
1274	mulq	%r9
1275	movq	.Lpoly+8(%rip),%r14
1276	movq	%rax,%r8
1277	movq	%rbp,%rax
1278	movq	%rdx,%r9
1279
1280	mulq	%r10
1281	movq	.Lpoly+24(%rip),%r15
1282	addq	%rax,%r9
1283	movq	%rbp,%rax
1284	adcq	$0,%rdx
1285	movq	%rdx,%r10
1286
1287	mulq	%r11
1288	addq	%rax,%r10
1289	movq	%rbp,%rax
1290	adcq	$0,%rdx
1291	movq	%rdx,%r11
1292
1293	mulq	%r12
1294	addq	%rax,%r11
1295	movq	%r8,%rax
1296	adcq	$0,%rdx
1297	xorq	%r13,%r13
1298	movq	%rdx,%r12
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309	movq	%r8,%rbp
1310	shlq	$32,%r8
1311	mulq	%r15
1312	shrq	$32,%rbp
1313	addq	%r8,%r9
1314	adcq	%rbp,%r10
1315	adcq	%rax,%r11
1316	movq	8(%rbx),%rax
1317	adcq	%rdx,%r12
1318	adcq	$0,%r13
1319	xorq	%r8,%r8
1320
1321
1322
1323	movq	%rax,%rbp
1324	mulq	0(%rsi)
1325	addq	%rax,%r9
1326	movq	%rbp,%rax
1327	adcq	$0,%rdx
1328	movq	%rdx,%rcx
1329
1330	mulq	8(%rsi)
1331	addq	%rcx,%r10
1332	adcq	$0,%rdx
1333	addq	%rax,%r10
1334	movq	%rbp,%rax
1335	adcq	$0,%rdx
1336	movq	%rdx,%rcx
1337
1338	mulq	16(%rsi)
1339	addq	%rcx,%r11
1340	adcq	$0,%rdx
1341	addq	%rax,%r11
1342	movq	%rbp,%rax
1343	adcq	$0,%rdx
1344	movq	%rdx,%rcx
1345
1346	mulq	24(%rsi)
1347	addq	%rcx,%r12
1348	adcq	$0,%rdx
1349	addq	%rax,%r12
1350	movq	%r9,%rax
1351	adcq	%rdx,%r13
1352	adcq	$0,%r8
1353
1354
1355
1356	movq	%r9,%rbp
1357	shlq	$32,%r9
1358	mulq	%r15
1359	shrq	$32,%rbp
1360	addq	%r9,%r10
1361	adcq	%rbp,%r11
1362	adcq	%rax,%r12
1363	movq	16(%rbx),%rax
1364	adcq	%rdx,%r13
1365	adcq	$0,%r8
1366	xorq	%r9,%r9
1367
1368
1369
1370	movq	%rax,%rbp
1371	mulq	0(%rsi)
1372	addq	%rax,%r10
1373	movq	%rbp,%rax
1374	adcq	$0,%rdx
1375	movq	%rdx,%rcx
1376
1377	mulq	8(%rsi)
1378	addq	%rcx,%r11
1379	adcq	$0,%rdx
1380	addq	%rax,%r11
1381	movq	%rbp,%rax
1382	adcq	$0,%rdx
1383	movq	%rdx,%rcx
1384
1385	mulq	16(%rsi)
1386	addq	%rcx,%r12
1387	adcq	$0,%rdx
1388	addq	%rax,%r12
1389	movq	%rbp,%rax
1390	adcq	$0,%rdx
1391	movq	%rdx,%rcx
1392
1393	mulq	24(%rsi)
1394	addq	%rcx,%r13
1395	adcq	$0,%rdx
1396	addq	%rax,%r13
1397	movq	%r10,%rax
1398	adcq	%rdx,%r8
1399	adcq	$0,%r9
1400
1401
1402
1403	movq	%r10,%rbp
1404	shlq	$32,%r10
1405	mulq	%r15
1406	shrq	$32,%rbp
1407	addq	%r10,%r11
1408	adcq	%rbp,%r12
1409	adcq	%rax,%r13
1410	movq	24(%rbx),%rax
1411	adcq	%rdx,%r8
1412	adcq	$0,%r9
1413	xorq	%r10,%r10
1414
1415
1416
1417	movq	%rax,%rbp
1418	mulq	0(%rsi)
1419	addq	%rax,%r11
1420	movq	%rbp,%rax
1421	adcq	$0,%rdx
1422	movq	%rdx,%rcx
1423
1424	mulq	8(%rsi)
1425	addq	%rcx,%r12
1426	adcq	$0,%rdx
1427	addq	%rax,%r12
1428	movq	%rbp,%rax
1429	adcq	$0,%rdx
1430	movq	%rdx,%rcx
1431
1432	mulq	16(%rsi)
1433	addq	%rcx,%r13
1434	adcq	$0,%rdx
1435	addq	%rax,%r13
1436	movq	%rbp,%rax
1437	adcq	$0,%rdx
1438	movq	%rdx,%rcx
1439
1440	mulq	24(%rsi)
1441	addq	%rcx,%r8
1442	adcq	$0,%rdx
1443	addq	%rax,%r8
1444	movq	%r11,%rax
1445	adcq	%rdx,%r9
1446	adcq	$0,%r10
1447
1448
1449
1450	movq	%r11,%rbp
1451	shlq	$32,%r11
1452	mulq	%r15
1453	shrq	$32,%rbp
1454	addq	%r11,%r12
1455	adcq	%rbp,%r13
1456	movq	%r12,%rcx
1457	adcq	%rax,%r8
1458	adcq	%rdx,%r9
1459	movq	%r13,%rbp
1460	adcq	$0,%r10
1461
1462
1463
1464	subq	$-1,%r12
1465	movq	%r8,%rbx
1466	sbbq	%r14,%r13
1467	sbbq	$0,%r8
1468	movq	%r9,%rdx
1469	sbbq	%r15,%r9
1470	sbbq	$0,%r10
1471
1472	cmovcq	%rcx,%r12
1473	cmovcq	%rbp,%r13
1474	movq	%r12,0(%rdi)
1475	cmovcq	%rbx,%r8
1476	movq	%r13,8(%rdi)
1477	cmovcq	%rdx,%r9
1478	movq	%r8,16(%rdi)
1479	movq	%r9,24(%rdi)
1480
1481	.byte	0xf3,0xc3
1482.cfi_endproc
1483.size	__ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
1484
1485
1486
1487
1488
1489
1490
1491
1492.globl	ecp_nistz256_sqr_mont
1493.hidden ecp_nistz256_sqr_mont
1494.type	ecp_nistz256_sqr_mont,@function
1495.align	32
1496ecp_nistz256_sqr_mont:
1497.cfi_startproc
1498	leaq	OPENSSL_ia32cap_P(%rip),%rcx
1499	movq	8(%rcx),%rcx
1500	andl	$0x80100,%ecx
1501	pushq	%rbp
1502.cfi_adjust_cfa_offset	8
1503.cfi_offset	%rbp,-16
1504	pushq	%rbx
1505.cfi_adjust_cfa_offset	8
1506.cfi_offset	%rbx,-24
1507	pushq	%r12
1508.cfi_adjust_cfa_offset	8
1509.cfi_offset	%r12,-32
1510	pushq	%r13
1511.cfi_adjust_cfa_offset	8
1512.cfi_offset	%r13,-40
1513	pushq	%r14
1514.cfi_adjust_cfa_offset	8
1515.cfi_offset	%r14,-48
1516	pushq	%r15
1517.cfi_adjust_cfa_offset	8
1518.cfi_offset	%r15,-56
1519.Lsqr_body:
1520	cmpl	$0x80100,%ecx
1521	je	.Lsqr_montx
1522	movq	0(%rsi),%rax
1523	movq	8(%rsi),%r14
1524	movq	16(%rsi),%r15
1525	movq	24(%rsi),%r8
1526
1527	call	__ecp_nistz256_sqr_montq
1528	jmp	.Lsqr_mont_done
1529
1530.align	32
1531.Lsqr_montx:
1532	movq	0(%rsi),%rdx
1533	movq	8(%rsi),%r14
1534	movq	16(%rsi),%r15
1535	movq	24(%rsi),%r8
1536	leaq	-128(%rsi),%rsi
1537
1538	call	__ecp_nistz256_sqr_montx
1539.Lsqr_mont_done:
1540	movq	0(%rsp),%r15
1541.cfi_restore	%r15
1542	movq	8(%rsp),%r14
1543.cfi_restore	%r14
1544	movq	16(%rsp),%r13
1545.cfi_restore	%r13
1546	movq	24(%rsp),%r12
1547.cfi_restore	%r12
1548	movq	32(%rsp),%rbx
1549.cfi_restore	%rbx
1550	movq	40(%rsp),%rbp
1551.cfi_restore	%rbp
1552	leaq	48(%rsp),%rsp
1553.cfi_adjust_cfa_offset	-48
1554.Lsqr_epilogue:
1555	.byte	0xf3,0xc3
1556.cfi_endproc
1557.size	ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
1558
1559.type	__ecp_nistz256_sqr_montq,@function
1560.align	32
1561__ecp_nistz256_sqr_montq:
1562.cfi_startproc
1563	movq	%rax,%r13
1564	mulq	%r14
1565	movq	%rax,%r9
1566	movq	%r15,%rax
1567	movq	%rdx,%r10
1568
1569	mulq	%r13
1570	addq	%rax,%r10
1571	movq	%r8,%rax
1572	adcq	$0,%rdx
1573	movq	%rdx,%r11
1574
1575	mulq	%r13
1576	addq	%rax,%r11
1577	movq	%r15,%rax
1578	adcq	$0,%rdx
1579	movq	%rdx,%r12
1580
1581
1582	mulq	%r14
1583	addq	%rax,%r11
1584	movq	%r8,%rax
1585	adcq	$0,%rdx
1586	movq	%rdx,%rbp
1587
1588	mulq	%r14
1589	addq	%rax,%r12
1590	movq	%r8,%rax
1591	adcq	$0,%rdx
1592	addq	%rbp,%r12
1593	movq	%rdx,%r13
1594	adcq	$0,%r13
1595
1596
1597	mulq	%r15
1598	xorq	%r15,%r15
1599	addq	%rax,%r13
1600	movq	0(%rsi),%rax
1601	movq	%rdx,%r14
1602	adcq	$0,%r14
1603
1604	addq	%r9,%r9
1605	adcq	%r10,%r10
1606	adcq	%r11,%r11
1607	adcq	%r12,%r12
1608	adcq	%r13,%r13
1609	adcq	%r14,%r14
1610	adcq	$0,%r15
1611
1612	mulq	%rax
1613	movq	%rax,%r8
1614	movq	8(%rsi),%rax
1615	movq	%rdx,%rcx
1616
1617	mulq	%rax
1618	addq	%rcx,%r9
1619	adcq	%rax,%r10
1620	movq	16(%rsi),%rax
1621	adcq	$0,%rdx
1622	movq	%rdx,%rcx
1623
1624	mulq	%rax
1625	addq	%rcx,%r11
1626	adcq	%rax,%r12
1627	movq	24(%rsi),%rax
1628	adcq	$0,%rdx
1629	movq	%rdx,%rcx
1630
1631	mulq	%rax
1632	addq	%rcx,%r13
1633	adcq	%rax,%r14
1634	movq	%r8,%rax
1635	adcq	%rdx,%r15
1636
1637	movq	.Lpoly+8(%rip),%rsi
1638	movq	.Lpoly+24(%rip),%rbp
1639
1640
1641
1642
1643	movq	%r8,%rcx
1644	shlq	$32,%r8
1645	mulq	%rbp
1646	shrq	$32,%rcx
1647	addq	%r8,%r9
1648	adcq	%rcx,%r10
1649	adcq	%rax,%r11
1650	movq	%r9,%rax
1651	adcq	$0,%rdx
1652
1653
1654
1655	movq	%r9,%rcx
1656	shlq	$32,%r9
1657	movq	%rdx,%r8
1658	mulq	%rbp
1659	shrq	$32,%rcx
1660	addq	%r9,%r10
1661	adcq	%rcx,%r11
1662	adcq	%rax,%r8
1663	movq	%r10,%rax
1664	adcq	$0,%rdx
1665
1666
1667
1668	movq	%r10,%rcx
1669	shlq	$32,%r10
1670	movq	%rdx,%r9
1671	mulq	%rbp
1672	shrq	$32,%rcx
1673	addq	%r10,%r11
1674	adcq	%rcx,%r8
1675	adcq	%rax,%r9
1676	movq	%r11,%rax
1677	adcq	$0,%rdx
1678
1679
1680
1681	movq	%r11,%rcx
1682	shlq	$32,%r11
1683	movq	%rdx,%r10
1684	mulq	%rbp
1685	shrq	$32,%rcx
1686	addq	%r11,%r8
1687	adcq	%rcx,%r9
1688	adcq	%rax,%r10
1689	adcq	$0,%rdx
1690	xorq	%r11,%r11
1691
1692
1693
1694	addq	%r8,%r12
1695	adcq	%r9,%r13
1696	movq	%r12,%r8
1697	adcq	%r10,%r14
1698	adcq	%rdx,%r15
1699	movq	%r13,%r9
1700	adcq	$0,%r11
1701
1702	subq	$-1,%r12
1703	movq	%r14,%r10
1704	sbbq	%rsi,%r13
1705	sbbq	$0,%r14
1706	movq	%r15,%rcx
1707	sbbq	%rbp,%r15
1708	sbbq	$0,%r11
1709
1710	cmovcq	%r8,%r12
1711	cmovcq	%r9,%r13
1712	movq	%r12,0(%rdi)
1713	cmovcq	%r10,%r14
1714	movq	%r13,8(%rdi)
1715	cmovcq	%rcx,%r15
1716	movq	%r14,16(%rdi)
1717	movq	%r15,24(%rdi)
1718
1719	.byte	0xf3,0xc3
1720.cfi_endproc
1721.size	__ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
1722.type	__ecp_nistz256_mul_montx,@function
1723.align	32
1724__ecp_nistz256_mul_montx:
1725.cfi_startproc
1726
1727
1728	mulxq	%r9,%r8,%r9
1729	mulxq	%r10,%rcx,%r10
1730	movq	$32,%r14
1731	xorq	%r13,%r13
1732	mulxq	%r11,%rbp,%r11
1733	movq	.Lpoly+24(%rip),%r15
1734	adcq	%rcx,%r9
1735	mulxq	%r12,%rcx,%r12
1736	movq	%r8,%rdx
1737	adcq	%rbp,%r10
1738	shlxq	%r14,%r8,%rbp
1739	adcq	%rcx,%r11
1740	shrxq	%r14,%r8,%rcx
1741	adcq	$0,%r12
1742
1743
1744
1745	addq	%rbp,%r9
1746	adcq	%rcx,%r10
1747
1748	mulxq	%r15,%rcx,%rbp
1749	movq	8(%rbx),%rdx
1750	adcq	%rcx,%r11
1751	adcq	%rbp,%r12
1752	adcq	$0,%r13
1753	xorq	%r8,%r8
1754
1755
1756
1757	mulxq	0+128(%rsi),%rcx,%rbp
1758	adcxq	%rcx,%r9
1759	adoxq	%rbp,%r10
1760
1761	mulxq	8+128(%rsi),%rcx,%rbp
1762	adcxq	%rcx,%r10
1763	adoxq	%rbp,%r11
1764
1765	mulxq	16+128(%rsi),%rcx,%rbp
1766	adcxq	%rcx,%r11
1767	adoxq	%rbp,%r12
1768
1769	mulxq	24+128(%rsi),%rcx,%rbp
1770	movq	%r9,%rdx
1771	adcxq	%rcx,%r12
1772	shlxq	%r14,%r9,%rcx
1773	adoxq	%rbp,%r13
1774	shrxq	%r14,%r9,%rbp
1775
1776	adcxq	%r8,%r13
1777	adoxq	%r8,%r8
1778	adcq	$0,%r8
1779
1780
1781
1782	addq	%rcx,%r10
1783	adcq	%rbp,%r11
1784
1785	mulxq	%r15,%rcx,%rbp
1786	movq	16(%rbx),%rdx
1787	adcq	%rcx,%r12
1788	adcq	%rbp,%r13
1789	adcq	$0,%r8
1790	xorq	%r9,%r9
1791
1792
1793
1794	mulxq	0+128(%rsi),%rcx,%rbp
1795	adcxq	%rcx,%r10
1796	adoxq	%rbp,%r11
1797
1798	mulxq	8+128(%rsi),%rcx,%rbp
1799	adcxq	%rcx,%r11
1800	adoxq	%rbp,%r12
1801
1802	mulxq	16+128(%rsi),%rcx,%rbp
1803	adcxq	%rcx,%r12
1804	adoxq	%rbp,%r13
1805
1806	mulxq	24+128(%rsi),%rcx,%rbp
1807	movq	%r10,%rdx
1808	adcxq	%rcx,%r13
1809	shlxq	%r14,%r10,%rcx
1810	adoxq	%rbp,%r8
1811	shrxq	%r14,%r10,%rbp
1812
1813	adcxq	%r9,%r8
1814	adoxq	%r9,%r9
1815	adcq	$0,%r9
1816
1817
1818
1819	addq	%rcx,%r11
1820	adcq	%rbp,%r12
1821
1822	mulxq	%r15,%rcx,%rbp
1823	movq	24(%rbx),%rdx
1824	adcq	%rcx,%r13
1825	adcq	%rbp,%r8
1826	adcq	$0,%r9
1827	xorq	%r10,%r10
1828
1829
1830
1831	mulxq	0+128(%rsi),%rcx,%rbp
1832	adcxq	%rcx,%r11
1833	adoxq	%rbp,%r12
1834
1835	mulxq	8+128(%rsi),%rcx,%rbp
1836	adcxq	%rcx,%r12
1837	adoxq	%rbp,%r13
1838
1839	mulxq	16+128(%rsi),%rcx,%rbp
1840	adcxq	%rcx,%r13
1841	adoxq	%rbp,%r8
1842
1843	mulxq	24+128(%rsi),%rcx,%rbp
1844	movq	%r11,%rdx
1845	adcxq	%rcx,%r8
1846	shlxq	%r14,%r11,%rcx
1847	adoxq	%rbp,%r9
1848	shrxq	%r14,%r11,%rbp
1849
1850	adcxq	%r10,%r9
1851	adoxq	%r10,%r10
1852	adcq	$0,%r10
1853
1854
1855
1856	addq	%rcx,%r12
1857	adcq	%rbp,%r13
1858
1859	mulxq	%r15,%rcx,%rbp
1860	movq	%r12,%rbx
1861	movq	.Lpoly+8(%rip),%r14
1862	adcq	%rcx,%r8
1863	movq	%r13,%rdx
1864	adcq	%rbp,%r9
1865	adcq	$0,%r10
1866
1867
1868
1869	xorl	%eax,%eax
1870	movq	%r8,%rcx
1871	sbbq	$-1,%r12
1872	sbbq	%r14,%r13
1873	sbbq	$0,%r8
1874	movq	%r9,%rbp
1875	sbbq	%r15,%r9
1876	sbbq	$0,%r10
1877
1878	cmovcq	%rbx,%r12
1879	cmovcq	%rdx,%r13
1880	movq	%r12,0(%rdi)
1881	cmovcq	%rcx,%r8
1882	movq	%r13,8(%rdi)
1883	cmovcq	%rbp,%r9
1884	movq	%r8,16(%rdi)
1885	movq	%r9,24(%rdi)
1886
1887	.byte	0xf3,0xc3
1888.cfi_endproc
1889.size	__ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
1890
1891.type	__ecp_nistz256_sqr_montx,@function
1892.align	32
1893__ecp_nistz256_sqr_montx:
1894.cfi_startproc
1895	mulxq	%r14,%r9,%r10
1896	mulxq	%r15,%rcx,%r11
1897	xorl	%eax,%eax
1898	adcq	%rcx,%r10
1899	mulxq	%r8,%rbp,%r12
1900	movq	%r14,%rdx
1901	adcq	%rbp,%r11
1902	adcq	$0,%r12
1903	xorq	%r13,%r13
1904
1905
1906	mulxq	%r15,%rcx,%rbp
1907	adcxq	%rcx,%r11
1908	adoxq	%rbp,%r12
1909
1910	mulxq	%r8,%rcx,%rbp
1911	movq	%r15,%rdx
1912	adcxq	%rcx,%r12
1913	adoxq	%rbp,%r13
1914	adcq	$0,%r13
1915
1916
1917	mulxq	%r8,%rcx,%r14
1918	movq	0+128(%rsi),%rdx
1919	xorq	%r15,%r15
1920	adcxq	%r9,%r9
1921	adoxq	%rcx,%r13
1922	adcxq	%r10,%r10
1923	adoxq	%r15,%r14
1924
1925	mulxq	%rdx,%r8,%rbp
1926	movq	8+128(%rsi),%rdx
1927	adcxq	%r11,%r11
1928	adoxq	%rbp,%r9
1929	adcxq	%r12,%r12
1930	mulxq	%rdx,%rcx,%rax
1931	movq	16+128(%rsi),%rdx
1932	adcxq	%r13,%r13
1933	adoxq	%rcx,%r10
1934	adcxq	%r14,%r14
1935.byte	0x67
1936	mulxq	%rdx,%rcx,%rbp
1937	movq	24+128(%rsi),%rdx
1938	adoxq	%rax,%r11
1939	adcxq	%r15,%r15
1940	adoxq	%rcx,%r12
1941	movq	$32,%rsi
1942	adoxq	%rbp,%r13
1943.byte	0x67,0x67
1944	mulxq	%rdx,%rcx,%rax
1945	movq	.Lpoly+24(%rip),%rdx
1946	adoxq	%rcx,%r14
1947	shlxq	%rsi,%r8,%rcx
1948	adoxq	%rax,%r15
1949	shrxq	%rsi,%r8,%rax
1950	movq	%rdx,%rbp
1951
1952
1953	addq	%rcx,%r9
1954	adcq	%rax,%r10
1955
1956	mulxq	%r8,%rcx,%r8
1957	adcq	%rcx,%r11
1958	shlxq	%rsi,%r9,%rcx
1959	adcq	$0,%r8
1960	shrxq	%rsi,%r9,%rax
1961
1962
1963	addq	%rcx,%r10
1964	adcq	%rax,%r11
1965
1966	mulxq	%r9,%rcx,%r9
1967	adcq	%rcx,%r8
1968	shlxq	%rsi,%r10,%rcx
1969	adcq	$0,%r9
1970	shrxq	%rsi,%r10,%rax
1971
1972
1973	addq	%rcx,%r11
1974	adcq	%rax,%r8
1975
1976	mulxq	%r10,%rcx,%r10
1977	adcq	%rcx,%r9
1978	shlxq	%rsi,%r11,%rcx
1979	adcq	$0,%r10
1980	shrxq	%rsi,%r11,%rax
1981
1982
1983	addq	%rcx,%r8
1984	adcq	%rax,%r9
1985
1986	mulxq	%r11,%rcx,%r11
1987	adcq	%rcx,%r10
1988	adcq	$0,%r11
1989
1990	xorq	%rdx,%rdx
1991	addq	%r8,%r12
1992	movq	.Lpoly+8(%rip),%rsi
1993	adcq	%r9,%r13
1994	movq	%r12,%r8
1995	adcq	%r10,%r14
1996	adcq	%r11,%r15
1997	movq	%r13,%r9
1998	adcq	$0,%rdx
1999
2000	subq	$-1,%r12
2001	movq	%r14,%r10
2002	sbbq	%rsi,%r13
2003	sbbq	$0,%r14
2004	movq	%r15,%r11
2005	sbbq	%rbp,%r15
2006	sbbq	$0,%rdx
2007
2008	cmovcq	%r8,%r12
2009	cmovcq	%r9,%r13
2010	movq	%r12,0(%rdi)
2011	cmovcq	%r10,%r14
2012	movq	%r13,8(%rdi)
2013	cmovcq	%r11,%r15
2014	movq	%r14,16(%rdi)
2015	movq	%r15,24(%rdi)
2016
2017	.byte	0xf3,0xc3
2018.cfi_endproc
2019.size	__ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
2020
2021
2022.globl	ecp_nistz256_select_w5
2023.hidden ecp_nistz256_select_w5
2024.type	ecp_nistz256_select_w5,@function
2025.align	32
2026ecp_nistz256_select_w5:
2027.cfi_startproc
2028	leaq	OPENSSL_ia32cap_P(%rip),%rax
2029	movq	8(%rax),%rax
2030	testl	$32,%eax
2031	jnz	.Lavx2_select_w5
2032	movdqa	.LOne(%rip),%xmm0
2033	movd	%edx,%xmm1
2034
2035	pxor	%xmm2,%xmm2
2036	pxor	%xmm3,%xmm3
2037	pxor	%xmm4,%xmm4
2038	pxor	%xmm5,%xmm5
2039	pxor	%xmm6,%xmm6
2040	pxor	%xmm7,%xmm7
2041
2042	movdqa	%xmm0,%xmm8
2043	pshufd	$0,%xmm1,%xmm1
2044
2045	movq	$16,%rax
2046.Lselect_loop_sse_w5:
2047
2048	movdqa	%xmm8,%xmm15
2049	paddd	%xmm0,%xmm8
2050	pcmpeqd	%xmm1,%xmm15
2051
2052	movdqa	0(%rsi),%xmm9
2053	movdqa	16(%rsi),%xmm10
2054	movdqa	32(%rsi),%xmm11
2055	movdqa	48(%rsi),%xmm12
2056	movdqa	64(%rsi),%xmm13
2057	movdqa	80(%rsi),%xmm14
2058	leaq	96(%rsi),%rsi
2059
2060	pand	%xmm15,%xmm9
2061	pand	%xmm15,%xmm10
2062	por	%xmm9,%xmm2
2063	pand	%xmm15,%xmm11
2064	por	%xmm10,%xmm3
2065	pand	%xmm15,%xmm12
2066	por	%xmm11,%xmm4
2067	pand	%xmm15,%xmm13
2068	por	%xmm12,%xmm5
2069	pand	%xmm15,%xmm14
2070	por	%xmm13,%xmm6
2071	por	%xmm14,%xmm7
2072
2073	decq	%rax
2074	jnz	.Lselect_loop_sse_w5
2075
2076	movdqu	%xmm2,0(%rdi)
2077	movdqu	%xmm3,16(%rdi)
2078	movdqu	%xmm4,32(%rdi)
2079	movdqu	%xmm5,48(%rdi)
2080	movdqu	%xmm6,64(%rdi)
2081	movdqu	%xmm7,80(%rdi)
2082	.byte	0xf3,0xc3
2083.cfi_endproc
2084.LSEH_end_ecp_nistz256_select_w5:
2085.size	ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
2086
2087
2088
2089.globl	ecp_nistz256_select_w7
2090.hidden ecp_nistz256_select_w7
2091.type	ecp_nistz256_select_w7,@function
2092.align	32
2093ecp_nistz256_select_w7:
2094.cfi_startproc
2095	leaq	OPENSSL_ia32cap_P(%rip),%rax
2096	movq	8(%rax),%rax
2097	testl	$32,%eax
2098	jnz	.Lavx2_select_w7
2099	movdqa	.LOne(%rip),%xmm8
2100	movd	%edx,%xmm1
2101
2102	pxor	%xmm2,%xmm2
2103	pxor	%xmm3,%xmm3
2104	pxor	%xmm4,%xmm4
2105	pxor	%xmm5,%xmm5
2106
2107	movdqa	%xmm8,%xmm0
2108	pshufd	$0,%xmm1,%xmm1
2109	movq	$64,%rax
2110
2111.Lselect_loop_sse_w7:
2112	movdqa	%xmm8,%xmm15
2113	paddd	%xmm0,%xmm8
2114	movdqa	0(%rsi),%xmm9
2115	movdqa	16(%rsi),%xmm10
2116	pcmpeqd	%xmm1,%xmm15
2117	movdqa	32(%rsi),%xmm11
2118	movdqa	48(%rsi),%xmm12
2119	leaq	64(%rsi),%rsi
2120
2121	pand	%xmm15,%xmm9
2122	pand	%xmm15,%xmm10
2123	por	%xmm9,%xmm2
2124	pand	%xmm15,%xmm11
2125	por	%xmm10,%xmm3
2126	pand	%xmm15,%xmm12
2127	por	%xmm11,%xmm4
2128	prefetcht0	255(%rsi)
2129	por	%xmm12,%xmm5
2130
2131	decq	%rax
2132	jnz	.Lselect_loop_sse_w7
2133
2134	movdqu	%xmm2,0(%rdi)
2135	movdqu	%xmm3,16(%rdi)
2136	movdqu	%xmm4,32(%rdi)
2137	movdqu	%xmm5,48(%rdi)
2138	.byte	0xf3,0xc3
2139.cfi_endproc
2140.LSEH_end_ecp_nistz256_select_w7:
2141.size	ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
2142
2143
2144.type	ecp_nistz256_avx2_select_w5,@function
2145.align	32
2146ecp_nistz256_avx2_select_w5:
2147.cfi_startproc
2148.Lavx2_select_w5:
2149	vzeroupper
2150	vmovdqa	.LTwo(%rip),%ymm0
2151
2152	vpxor	%ymm2,%ymm2,%ymm2
2153	vpxor	%ymm3,%ymm3,%ymm3
2154	vpxor	%ymm4,%ymm4,%ymm4
2155
2156	vmovdqa	.LOne(%rip),%ymm5
2157	vmovdqa	.LTwo(%rip),%ymm10
2158
2159	vmovd	%edx,%xmm1
2160	vpermd	%ymm1,%ymm2,%ymm1
2161
2162	movq	$8,%rax
2163.Lselect_loop_avx2_w5:
2164
2165	vmovdqa	0(%rsi),%ymm6
2166	vmovdqa	32(%rsi),%ymm7
2167	vmovdqa	64(%rsi),%ymm8
2168
2169	vmovdqa	96(%rsi),%ymm11
2170	vmovdqa	128(%rsi),%ymm12
2171	vmovdqa	160(%rsi),%ymm13
2172
2173	vpcmpeqd	%ymm1,%ymm5,%ymm9
2174	vpcmpeqd	%ymm1,%ymm10,%ymm14
2175
2176	vpaddd	%ymm0,%ymm5,%ymm5
2177	vpaddd	%ymm0,%ymm10,%ymm10
2178	leaq	192(%rsi),%rsi
2179
2180	vpand	%ymm9,%ymm6,%ymm6
2181	vpand	%ymm9,%ymm7,%ymm7
2182	vpand	%ymm9,%ymm8,%ymm8
2183	vpand	%ymm14,%ymm11,%ymm11
2184	vpand	%ymm14,%ymm12,%ymm12
2185	vpand	%ymm14,%ymm13,%ymm13
2186
2187	vpxor	%ymm6,%ymm2,%ymm2
2188	vpxor	%ymm7,%ymm3,%ymm3
2189	vpxor	%ymm8,%ymm4,%ymm4
2190	vpxor	%ymm11,%ymm2,%ymm2
2191	vpxor	%ymm12,%ymm3,%ymm3
2192	vpxor	%ymm13,%ymm4,%ymm4
2193
2194	decq	%rax
2195	jnz	.Lselect_loop_avx2_w5
2196
2197	vmovdqu	%ymm2,0(%rdi)
2198	vmovdqu	%ymm3,32(%rdi)
2199	vmovdqu	%ymm4,64(%rdi)
2200	vzeroupper
2201	.byte	0xf3,0xc3
2202.cfi_endproc
2203.LSEH_end_ecp_nistz256_avx2_select_w5:
2204.size	ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
2205
2206
2207
2208.globl	ecp_nistz256_avx2_select_w7
2209.hidden ecp_nistz256_avx2_select_w7
2210.type	ecp_nistz256_avx2_select_w7,@function
2211.align	32
2212ecp_nistz256_avx2_select_w7:
2213.cfi_startproc
2214.Lavx2_select_w7:
2215	vzeroupper
2216	vmovdqa	.LThree(%rip),%ymm0
2217
2218	vpxor	%ymm2,%ymm2,%ymm2
2219	vpxor	%ymm3,%ymm3,%ymm3
2220
2221	vmovdqa	.LOne(%rip),%ymm4
2222	vmovdqa	.LTwo(%rip),%ymm8
2223	vmovdqa	.LThree(%rip),%ymm12
2224
2225	vmovd	%edx,%xmm1
2226	vpermd	%ymm1,%ymm2,%ymm1
2227
2228
2229	movq	$21,%rax
2230.Lselect_loop_avx2_w7:
2231
2232	vmovdqa	0(%rsi),%ymm5
2233	vmovdqa	32(%rsi),%ymm6
2234
2235	vmovdqa	64(%rsi),%ymm9
2236	vmovdqa	96(%rsi),%ymm10
2237
2238	vmovdqa	128(%rsi),%ymm13
2239	vmovdqa	160(%rsi),%ymm14
2240
2241	vpcmpeqd	%ymm1,%ymm4,%ymm7
2242	vpcmpeqd	%ymm1,%ymm8,%ymm11
2243	vpcmpeqd	%ymm1,%ymm12,%ymm15
2244
2245	vpaddd	%ymm0,%ymm4,%ymm4
2246	vpaddd	%ymm0,%ymm8,%ymm8
2247	vpaddd	%ymm0,%ymm12,%ymm12
2248	leaq	192(%rsi),%rsi
2249
2250	vpand	%ymm7,%ymm5,%ymm5
2251	vpand	%ymm7,%ymm6,%ymm6
2252	vpand	%ymm11,%ymm9,%ymm9
2253	vpand	%ymm11,%ymm10,%ymm10
2254	vpand	%ymm15,%ymm13,%ymm13
2255	vpand	%ymm15,%ymm14,%ymm14
2256
2257	vpxor	%ymm5,%ymm2,%ymm2
2258	vpxor	%ymm6,%ymm3,%ymm3
2259	vpxor	%ymm9,%ymm2,%ymm2
2260	vpxor	%ymm10,%ymm3,%ymm3
2261	vpxor	%ymm13,%ymm2,%ymm2
2262	vpxor	%ymm14,%ymm3,%ymm3
2263
2264	decq	%rax
2265	jnz	.Lselect_loop_avx2_w7
2266
2267
2268	vmovdqa	0(%rsi),%ymm5
2269	vmovdqa	32(%rsi),%ymm6
2270
2271	vpcmpeqd	%ymm1,%ymm4,%ymm7
2272
2273	vpand	%ymm7,%ymm5,%ymm5
2274	vpand	%ymm7,%ymm6,%ymm6
2275
2276	vpxor	%ymm5,%ymm2,%ymm2
2277	vpxor	%ymm6,%ymm3,%ymm3
2278
2279	vmovdqu	%ymm2,0(%rdi)
2280	vmovdqu	%ymm3,32(%rdi)
2281	vzeroupper
2282	.byte	0xf3,0xc3
2283.cfi_endproc
2284.LSEH_end_ecp_nistz256_avx2_select_w7:
2285.size	ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
2286.type	__ecp_nistz256_add_toq,@function
2287.align	32
2288__ecp_nistz256_add_toq:
2289.cfi_startproc
2290	xorq	%r11,%r11
2291	addq	0(%rbx),%r12
2292	adcq	8(%rbx),%r13
2293	movq	%r12,%rax
2294	adcq	16(%rbx),%r8
2295	adcq	24(%rbx),%r9
2296	movq	%r13,%rbp
2297	adcq	$0,%r11
2298
2299	subq	$-1,%r12
2300	movq	%r8,%rcx
2301	sbbq	%r14,%r13
2302	sbbq	$0,%r8
2303	movq	%r9,%r10
2304	sbbq	%r15,%r9
2305	sbbq	$0,%r11
2306
2307	cmovcq	%rax,%r12
2308	cmovcq	%rbp,%r13
2309	movq	%r12,0(%rdi)
2310	cmovcq	%rcx,%r8
2311	movq	%r13,8(%rdi)
2312	cmovcq	%r10,%r9
2313	movq	%r8,16(%rdi)
2314	movq	%r9,24(%rdi)
2315
2316	.byte	0xf3,0xc3
2317.cfi_endproc
2318.size	__ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
2319
2320.type	__ecp_nistz256_sub_fromq,@function
2321.align	32
2322__ecp_nistz256_sub_fromq:
2323.cfi_startproc
2324	subq	0(%rbx),%r12
2325	sbbq	8(%rbx),%r13
2326	movq	%r12,%rax
2327	sbbq	16(%rbx),%r8
2328	sbbq	24(%rbx),%r9
2329	movq	%r13,%rbp
2330	sbbq	%r11,%r11
2331
2332	addq	$-1,%r12
2333	movq	%r8,%rcx
2334	adcq	%r14,%r13
2335	adcq	$0,%r8
2336	movq	%r9,%r10
2337	adcq	%r15,%r9
2338	testq	%r11,%r11
2339
2340	cmovzq	%rax,%r12
2341	cmovzq	%rbp,%r13
2342	movq	%r12,0(%rdi)
2343	cmovzq	%rcx,%r8
2344	movq	%r13,8(%rdi)
2345	cmovzq	%r10,%r9
2346	movq	%r8,16(%rdi)
2347	movq	%r9,24(%rdi)
2348
2349	.byte	0xf3,0xc3
2350.cfi_endproc
2351.size	__ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
2352
2353.type	__ecp_nistz256_subq,@function
2354.align	32
2355__ecp_nistz256_subq:
2356.cfi_startproc
2357	subq	%r12,%rax
2358	sbbq	%r13,%rbp
2359	movq	%rax,%r12
2360	sbbq	%r8,%rcx
2361	sbbq	%r9,%r10
2362	movq	%rbp,%r13
2363	sbbq	%r11,%r11
2364
2365	addq	$-1,%rax
2366	movq	%rcx,%r8
2367	adcq	%r14,%rbp
2368	adcq	$0,%rcx
2369	movq	%r10,%r9
2370	adcq	%r15,%r10
2371	testq	%r11,%r11
2372
2373	cmovnzq	%rax,%r12
2374	cmovnzq	%rbp,%r13
2375	cmovnzq	%rcx,%r8
2376	cmovnzq	%r10,%r9
2377
2378	.byte	0xf3,0xc3
2379.cfi_endproc
2380.size	__ecp_nistz256_subq,.-__ecp_nistz256_subq
2381
2382.type	__ecp_nistz256_mul_by_2q,@function
2383.align	32
2384__ecp_nistz256_mul_by_2q:
2385.cfi_startproc
2386	xorq	%r11,%r11
2387	addq	%r12,%r12
2388	adcq	%r13,%r13
2389	movq	%r12,%rax
2390	adcq	%r8,%r8
2391	adcq	%r9,%r9
2392	movq	%r13,%rbp
2393	adcq	$0,%r11
2394
2395	subq	$-1,%r12
2396	movq	%r8,%rcx
2397	sbbq	%r14,%r13
2398	sbbq	$0,%r8
2399	movq	%r9,%r10
2400	sbbq	%r15,%r9
2401	sbbq	$0,%r11
2402
2403	cmovcq	%rax,%r12
2404	cmovcq	%rbp,%r13
2405	movq	%r12,0(%rdi)
2406	cmovcq	%rcx,%r8
2407	movq	%r13,8(%rdi)
2408	cmovcq	%r10,%r9
2409	movq	%r8,16(%rdi)
2410	movq	%r9,24(%rdi)
2411
2412	.byte	0xf3,0xc3
2413.cfi_endproc
2414.size	__ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
2415.globl	ecp_nistz256_point_double
2416.hidden ecp_nistz256_point_double
2417.type	ecp_nistz256_point_double,@function
2418.align	32
2419ecp_nistz256_point_double:
2420.cfi_startproc
2421	leaq	OPENSSL_ia32cap_P(%rip),%rcx
2422	movq	8(%rcx),%rcx
2423	andl	$0x80100,%ecx
2424	cmpl	$0x80100,%ecx
2425	je	.Lpoint_doublex
2426	pushq	%rbp
2427.cfi_adjust_cfa_offset	8
2428.cfi_offset	%rbp,-16
2429	pushq	%rbx
2430.cfi_adjust_cfa_offset	8
2431.cfi_offset	%rbx,-24
2432	pushq	%r12
2433.cfi_adjust_cfa_offset	8
2434.cfi_offset	%r12,-32
2435	pushq	%r13
2436.cfi_adjust_cfa_offset	8
2437.cfi_offset	%r13,-40
2438	pushq	%r14
2439.cfi_adjust_cfa_offset	8
2440.cfi_offset	%r14,-48
2441	pushq	%r15
2442.cfi_adjust_cfa_offset	8
2443.cfi_offset	%r15,-56
2444	subq	$160+8,%rsp
2445.cfi_adjust_cfa_offset	32*5+8
2446.Lpoint_doubleq_body:
2447
2448.Lpoint_double_shortcutq:
2449	movdqu	0(%rsi),%xmm0
2450	movq	%rsi,%rbx
2451	movdqu	16(%rsi),%xmm1
2452	movq	32+0(%rsi),%r12
2453	movq	32+8(%rsi),%r13
2454	movq	32+16(%rsi),%r8
2455	movq	32+24(%rsi),%r9
2456	movq	.Lpoly+8(%rip),%r14
2457	movq	.Lpoly+24(%rip),%r15
2458	movdqa	%xmm0,96(%rsp)
2459	movdqa	%xmm1,96+16(%rsp)
2460	leaq	32(%rdi),%r10
2461	leaq	64(%rdi),%r11
2462.byte	102,72,15,110,199
2463.byte	102,73,15,110,202
2464.byte	102,73,15,110,211
2465
2466	leaq	0(%rsp),%rdi
2467	call	__ecp_nistz256_mul_by_2q
2468
2469	movq	64+0(%rsi),%rax
2470	movq	64+8(%rsi),%r14
2471	movq	64+16(%rsi),%r15
2472	movq	64+24(%rsi),%r8
2473	leaq	64-0(%rsi),%rsi
2474	leaq	64(%rsp),%rdi
2475	call	__ecp_nistz256_sqr_montq
2476
2477	movq	0+0(%rsp),%rax
2478	movq	8+0(%rsp),%r14
2479	leaq	0+0(%rsp),%rsi
2480	movq	16+0(%rsp),%r15
2481	movq	24+0(%rsp),%r8
2482	leaq	0(%rsp),%rdi
2483	call	__ecp_nistz256_sqr_montq
2484
2485	movq	32(%rbx),%rax
2486	movq	64+0(%rbx),%r9
2487	movq	64+8(%rbx),%r10
2488	movq	64+16(%rbx),%r11
2489	movq	64+24(%rbx),%r12
2490	leaq	64-0(%rbx),%rsi
2491	leaq	32(%rbx),%rbx
2492.byte	102,72,15,126,215
2493	call	__ecp_nistz256_mul_montq
2494	call	__ecp_nistz256_mul_by_2q
2495
2496	movq	96+0(%rsp),%r12
2497	movq	96+8(%rsp),%r13
2498	leaq	64(%rsp),%rbx
2499	movq	96+16(%rsp),%r8
2500	movq	96+24(%rsp),%r9
2501	leaq	32(%rsp),%rdi
2502	call	__ecp_nistz256_add_toq
2503
2504	movq	96+0(%rsp),%r12
2505	movq	96+8(%rsp),%r13
2506	leaq	64(%rsp),%rbx
2507	movq	96+16(%rsp),%r8
2508	movq	96+24(%rsp),%r9
2509	leaq	64(%rsp),%rdi
2510	call	__ecp_nistz256_sub_fromq
2511
2512	movq	0+0(%rsp),%rax
2513	movq	8+0(%rsp),%r14
2514	leaq	0+0(%rsp),%rsi
2515	movq	16+0(%rsp),%r15
2516	movq	24+0(%rsp),%r8
2517.byte	102,72,15,126,207
2518	call	__ecp_nistz256_sqr_montq
2519	xorq	%r9,%r9
2520	movq	%r12,%rax
2521	addq	$-1,%r12
2522	movq	%r13,%r10
2523	adcq	%rsi,%r13
2524	movq	%r14,%rcx
2525	adcq	$0,%r14
2526	movq	%r15,%r8
2527	adcq	%rbp,%r15
2528	adcq	$0,%r9
2529	xorq	%rsi,%rsi
2530	testq	$1,%rax
2531
2532	cmovzq	%rax,%r12
2533	cmovzq	%r10,%r13
2534	cmovzq	%rcx,%r14
2535	cmovzq	%r8,%r15
2536	cmovzq	%rsi,%r9
2537
2538	movq	%r13,%rax
2539	shrq	$1,%r12
2540	shlq	$63,%rax
2541	movq	%r14,%r10
2542	shrq	$1,%r13
2543	orq	%rax,%r12
2544	shlq	$63,%r10
2545	movq	%r15,%rcx
2546	shrq	$1,%r14
2547	orq	%r10,%r13
2548	shlq	$63,%rcx
2549	movq	%r12,0(%rdi)
2550	shrq	$1,%r15
2551	movq	%r13,8(%rdi)
2552	shlq	$63,%r9
2553	orq	%rcx,%r14
2554	orq	%r9,%r15
2555	movq	%r14,16(%rdi)
2556	movq	%r15,24(%rdi)
2557	movq	64(%rsp),%rax
2558	leaq	64(%rsp),%rbx
2559	movq	0+32(%rsp),%r9
2560	movq	8+32(%rsp),%r10
2561	leaq	0+32(%rsp),%rsi
2562	movq	16+32(%rsp),%r11
2563	movq	24+32(%rsp),%r12
2564	leaq	32(%rsp),%rdi
2565	call	__ecp_nistz256_mul_montq
2566
2567	leaq	128(%rsp),%rdi
2568	call	__ecp_nistz256_mul_by_2q
2569
2570	leaq	32(%rsp),%rbx
2571	leaq	32(%rsp),%rdi
2572	call	__ecp_nistz256_add_toq
2573
2574	movq	96(%rsp),%rax
2575	leaq	96(%rsp),%rbx
2576	movq	0+0(%rsp),%r9
2577	movq	8+0(%rsp),%r10
2578	leaq	0+0(%rsp),%rsi
2579	movq	16+0(%rsp),%r11
2580	movq	24+0(%rsp),%r12
2581	leaq	0(%rsp),%rdi
2582	call	__ecp_nistz256_mul_montq
2583
2584	leaq	128(%rsp),%rdi
2585	call	__ecp_nistz256_mul_by_2q
2586
2587	movq	0+32(%rsp),%rax
2588	movq	8+32(%rsp),%r14
2589	leaq	0+32(%rsp),%rsi
2590	movq	16+32(%rsp),%r15
2591	movq	24+32(%rsp),%r8
2592.byte	102,72,15,126,199
2593	call	__ecp_nistz256_sqr_montq
2594
2595	leaq	128(%rsp),%rbx
2596	movq	%r14,%r8
2597	movq	%r15,%r9
2598	movq	%rsi,%r14
2599	movq	%rbp,%r15
2600	call	__ecp_nistz256_sub_fromq
2601
2602	movq	0+0(%rsp),%rax
2603	movq	0+8(%rsp),%rbp
2604	movq	0+16(%rsp),%rcx
2605	movq	0+24(%rsp),%r10
2606	leaq	0(%rsp),%rdi
2607	call	__ecp_nistz256_subq
2608
2609	movq	32(%rsp),%rax
2610	leaq	32(%rsp),%rbx
2611	movq	%r12,%r14
2612	xorl	%ecx,%ecx
2613	movq	%r12,0+0(%rsp)
2614	movq	%r13,%r10
2615	movq	%r13,0+8(%rsp)
2616	cmovzq	%r8,%r11
2617	movq	%r8,0+16(%rsp)
2618	leaq	0-0(%rsp),%rsi
2619	cmovzq	%r9,%r12
2620	movq	%r9,0+24(%rsp)
2621	movq	%r14,%r9
2622	leaq	0(%rsp),%rdi
2623	call	__ecp_nistz256_mul_montq
2624
2625.byte	102,72,15,126,203
2626.byte	102,72,15,126,207
2627	call	__ecp_nistz256_sub_fromq
2628
2629	leaq	160+56(%rsp),%rsi
2630.cfi_def_cfa	%rsi,8
2631	movq	-48(%rsi),%r15
2632.cfi_restore	%r15
2633	movq	-40(%rsi),%r14
2634.cfi_restore	%r14
2635	movq	-32(%rsi),%r13
2636.cfi_restore	%r13
2637	movq	-24(%rsi),%r12
2638.cfi_restore	%r12
2639	movq	-16(%rsi),%rbx
2640.cfi_restore	%rbx
2641	movq	-8(%rsi),%rbp
2642.cfi_restore	%rbp
2643	leaq	(%rsi),%rsp
2644.cfi_def_cfa_register	%rsp
2645.Lpoint_doubleq_epilogue:
2646	.byte	0xf3,0xc3
2647.cfi_endproc
2648.size	ecp_nistz256_point_double,.-ecp_nistz256_point_double
2649.globl	ecp_nistz256_point_add
2650.hidden ecp_nistz256_point_add
2651.type	ecp_nistz256_point_add,@function
2652.align	32
2653ecp_nistz256_point_add:
2654.cfi_startproc
2655	leaq	OPENSSL_ia32cap_P(%rip),%rcx
2656	movq	8(%rcx),%rcx
2657	andl	$0x80100,%ecx
2658	cmpl	$0x80100,%ecx
2659	je	.Lpoint_addx
2660	pushq	%rbp
2661.cfi_adjust_cfa_offset	8
2662.cfi_offset	%rbp,-16
2663	pushq	%rbx
2664.cfi_adjust_cfa_offset	8
2665.cfi_offset	%rbx,-24
2666	pushq	%r12
2667.cfi_adjust_cfa_offset	8
2668.cfi_offset	%r12,-32
2669	pushq	%r13
2670.cfi_adjust_cfa_offset	8
2671.cfi_offset	%r13,-40
2672	pushq	%r14
2673.cfi_adjust_cfa_offset	8
2674.cfi_offset	%r14,-48
2675	pushq	%r15
2676.cfi_adjust_cfa_offset	8
2677.cfi_offset	%r15,-56
2678	subq	$576+8,%rsp
2679.cfi_adjust_cfa_offset	32*18+8
2680.Lpoint_addq_body:
2681
2682	movdqu	0(%rsi),%xmm0
2683	movdqu	16(%rsi),%xmm1
2684	movdqu	32(%rsi),%xmm2
2685	movdqu	48(%rsi),%xmm3
2686	movdqu	64(%rsi),%xmm4
2687	movdqu	80(%rsi),%xmm5
2688	movq	%rsi,%rbx
2689	movq	%rdx,%rsi
2690	movdqa	%xmm0,384(%rsp)
2691	movdqa	%xmm1,384+16(%rsp)
2692	movdqa	%xmm2,416(%rsp)
2693	movdqa	%xmm3,416+16(%rsp)
2694	movdqa	%xmm4,448(%rsp)
2695	movdqa	%xmm5,448+16(%rsp)
2696	por	%xmm4,%xmm5
2697
2698	movdqu	0(%rsi),%xmm0
2699	pshufd	$0xb1,%xmm5,%xmm3
2700	movdqu	16(%rsi),%xmm1
2701	movdqu	32(%rsi),%xmm2
2702	por	%xmm3,%xmm5
2703	movdqu	48(%rsi),%xmm3
2704	movq	64+0(%rsi),%rax
2705	movq	64+8(%rsi),%r14
2706	movq	64+16(%rsi),%r15
2707	movq	64+24(%rsi),%r8
2708	movdqa	%xmm0,480(%rsp)
2709	pshufd	$0x1e,%xmm5,%xmm4
2710	movdqa	%xmm1,480+16(%rsp)
2711	movdqu	64(%rsi),%xmm0
2712	movdqu	80(%rsi),%xmm1
2713	movdqa	%xmm2,512(%rsp)
2714	movdqa	%xmm3,512+16(%rsp)
2715	por	%xmm4,%xmm5
2716	pxor	%xmm4,%xmm4
2717	por	%xmm0,%xmm1
2718.byte	102,72,15,110,199
2719
2720	leaq	64-0(%rsi),%rsi
2721	movq	%rax,544+0(%rsp)
2722	movq	%r14,544+8(%rsp)
2723	movq	%r15,544+16(%rsp)
2724	movq	%r8,544+24(%rsp)
2725	leaq	96(%rsp),%rdi
2726	call	__ecp_nistz256_sqr_montq
2727
2728	pcmpeqd	%xmm4,%xmm5
2729	pshufd	$0xb1,%xmm1,%xmm4
2730	por	%xmm1,%xmm4
2731	pshufd	$0,%xmm5,%xmm5
2732	pshufd	$0x1e,%xmm4,%xmm3
2733	por	%xmm3,%xmm4
2734	pxor	%xmm3,%xmm3
2735	pcmpeqd	%xmm3,%xmm4
2736	pshufd	$0,%xmm4,%xmm4
2737	movq	64+0(%rbx),%rax
2738	movq	64+8(%rbx),%r14
2739	movq	64+16(%rbx),%r15
2740	movq	64+24(%rbx),%r8
2741.byte	102,72,15,110,203
2742
2743	leaq	64-0(%rbx),%rsi
2744	leaq	32(%rsp),%rdi
2745	call	__ecp_nistz256_sqr_montq
2746
2747	movq	544(%rsp),%rax
2748	leaq	544(%rsp),%rbx
2749	movq	0+96(%rsp),%r9
2750	movq	8+96(%rsp),%r10
2751	leaq	0+96(%rsp),%rsi
2752	movq	16+96(%rsp),%r11
2753	movq	24+96(%rsp),%r12
2754	leaq	224(%rsp),%rdi
2755	call	__ecp_nistz256_mul_montq
2756
2757	movq	448(%rsp),%rax
2758	leaq	448(%rsp),%rbx
2759	movq	0+32(%rsp),%r9
2760	movq	8+32(%rsp),%r10
2761	leaq	0+32(%rsp),%rsi
2762	movq	16+32(%rsp),%r11
2763	movq	24+32(%rsp),%r12
2764	leaq	256(%rsp),%rdi
2765	call	__ecp_nistz256_mul_montq
2766
2767	movq	416(%rsp),%rax
2768	leaq	416(%rsp),%rbx
2769	movq	0+224(%rsp),%r9
2770	movq	8+224(%rsp),%r10
2771	leaq	0+224(%rsp),%rsi
2772	movq	16+224(%rsp),%r11
2773	movq	24+224(%rsp),%r12
2774	leaq	224(%rsp),%rdi
2775	call	__ecp_nistz256_mul_montq
2776
2777	movq	512(%rsp),%rax
2778	leaq	512(%rsp),%rbx
2779	movq	0+256(%rsp),%r9
2780	movq	8+256(%rsp),%r10
2781	leaq	0+256(%rsp),%rsi
2782	movq	16+256(%rsp),%r11
2783	movq	24+256(%rsp),%r12
2784	leaq	256(%rsp),%rdi
2785	call	__ecp_nistz256_mul_montq
2786
2787	leaq	224(%rsp),%rbx
2788	leaq	64(%rsp),%rdi
2789	call	__ecp_nistz256_sub_fromq
2790
2791	orq	%r13,%r12
2792	movdqa	%xmm4,%xmm2
2793	orq	%r8,%r12
2794	orq	%r9,%r12
2795	por	%xmm5,%xmm2
2796.byte	102,73,15,110,220
2797
2798	movq	384(%rsp),%rax
2799	leaq	384(%rsp),%rbx
2800	movq	0+96(%rsp),%r9
2801	movq	8+96(%rsp),%r10
2802	leaq	0+96(%rsp),%rsi
2803	movq	16+96(%rsp),%r11
2804	movq	24+96(%rsp),%r12
2805	leaq	160(%rsp),%rdi
2806	call	__ecp_nistz256_mul_montq
2807
2808	movq	480(%rsp),%rax
2809	leaq	480(%rsp),%rbx
2810	movq	0+32(%rsp),%r9
2811	movq	8+32(%rsp),%r10
2812	leaq	0+32(%rsp),%rsi
2813	movq	16+32(%rsp),%r11
2814	movq	24+32(%rsp),%r12
2815	leaq	192(%rsp),%rdi
2816	call	__ecp_nistz256_mul_montq
2817
2818	leaq	160(%rsp),%rbx
2819	leaq	0(%rsp),%rdi
2820	call	__ecp_nistz256_sub_fromq
2821
2822	orq	%r13,%r12
2823	orq	%r8,%r12
2824	orq	%r9,%r12
2825
2826.byte	0x3e
2827	jnz	.Ladd_proceedq
2828.byte	102,73,15,126,208
2829.byte	102,73,15,126,217
2830	testq	%r8,%r8
2831	jnz	.Ladd_proceedq
2832	testq	%r9,%r9
2833	jz	.Ladd_doubleq
2834
2835.byte	102,72,15,126,199
2836	pxor	%xmm0,%xmm0
2837	movdqu	%xmm0,0(%rdi)
2838	movdqu	%xmm0,16(%rdi)
2839	movdqu	%xmm0,32(%rdi)
2840	movdqu	%xmm0,48(%rdi)
2841	movdqu	%xmm0,64(%rdi)
2842	movdqu	%xmm0,80(%rdi)
2843	jmp	.Ladd_doneq
2844
2845.align	32
2846.Ladd_doubleq:
2847.byte	102,72,15,126,206
2848.byte	102,72,15,126,199
2849	addq	$416,%rsp
2850.cfi_adjust_cfa_offset	-416
2851	jmp	.Lpoint_double_shortcutq
2852.cfi_adjust_cfa_offset	416
2853
2854.align	32
2855.Ladd_proceedq:
2856	movq	0+64(%rsp),%rax
2857	movq	8+64(%rsp),%r14
2858	leaq	0+64(%rsp),%rsi
2859	movq	16+64(%rsp),%r15
2860	movq	24+64(%rsp),%r8
2861	leaq	96(%rsp),%rdi
2862	call	__ecp_nistz256_sqr_montq
2863
2864	movq	448(%rsp),%rax
2865	leaq	448(%rsp),%rbx
2866	movq	0+0(%rsp),%r9
2867	movq	8+0(%rsp),%r10
2868	leaq	0+0(%rsp),%rsi
2869	movq	16+0(%rsp),%r11
2870	movq	24+0(%rsp),%r12
2871	leaq	352(%rsp),%rdi
2872	call	__ecp_nistz256_mul_montq
2873
2874	movq	0+0(%rsp),%rax
2875	movq	8+0(%rsp),%r14
2876	leaq	0+0(%rsp),%rsi
2877	movq	16+0(%rsp),%r15
2878	movq	24+0(%rsp),%r8
2879	leaq	32(%rsp),%rdi
2880	call	__ecp_nistz256_sqr_montq
2881
2882	movq	544(%rsp),%rax
2883	leaq	544(%rsp),%rbx
2884	movq	0+352(%rsp),%r9
2885	movq	8+352(%rsp),%r10
2886	leaq	0+352(%rsp),%rsi
2887	movq	16+352(%rsp),%r11
2888	movq	24+352(%rsp),%r12
2889	leaq	352(%rsp),%rdi
2890	call	__ecp_nistz256_mul_montq
2891
2892	movq	0(%rsp),%rax
2893	leaq	0(%rsp),%rbx
2894	movq	0+32(%rsp),%r9
2895	movq	8+32(%rsp),%r10
2896	leaq	0+32(%rsp),%rsi
2897	movq	16+32(%rsp),%r11
2898	movq	24+32(%rsp),%r12
2899	leaq	128(%rsp),%rdi
2900	call	__ecp_nistz256_mul_montq
2901
2902	movq	160(%rsp),%rax
2903	leaq	160(%rsp),%rbx
2904	movq	0+32(%rsp),%r9
2905	movq	8+32(%rsp),%r10
2906	leaq	0+32(%rsp),%rsi
2907	movq	16+32(%rsp),%r11
2908	movq	24+32(%rsp),%r12
2909	leaq	192(%rsp),%rdi
2910	call	__ecp_nistz256_mul_montq
2911
2912
2913
2914
2915	xorq	%r11,%r11
2916	addq	%r12,%r12
2917	leaq	96(%rsp),%rsi
2918	adcq	%r13,%r13
2919	movq	%r12,%rax
2920	adcq	%r8,%r8
2921	adcq	%r9,%r9
2922	movq	%r13,%rbp
2923	adcq	$0,%r11
2924
2925	subq	$-1,%r12
2926	movq	%r8,%rcx
2927	sbbq	%r14,%r13
2928	sbbq	$0,%r8
2929	movq	%r9,%r10
2930	sbbq	%r15,%r9
2931	sbbq	$0,%r11
2932
2933	cmovcq	%rax,%r12
2934	movq	0(%rsi),%rax
2935	cmovcq	%rbp,%r13
2936	movq	8(%rsi),%rbp
2937	cmovcq	%rcx,%r8
2938	movq	16(%rsi),%rcx
2939	cmovcq	%r10,%r9
2940	movq	24(%rsi),%r10
2941
2942	call	__ecp_nistz256_subq
2943
2944	leaq	128(%rsp),%rbx
2945	leaq	288(%rsp),%rdi
2946	call	__ecp_nistz256_sub_fromq
2947
2948	movq	192+0(%rsp),%rax
2949	movq	192+8(%rsp),%rbp
2950	movq	192+16(%rsp),%rcx
2951	movq	192+24(%rsp),%r10
2952	leaq	320(%rsp),%rdi
2953
2954	call	__ecp_nistz256_subq
2955
2956	movq	%r12,0(%rdi)
2957	movq	%r13,8(%rdi)
2958	movq	%r8,16(%rdi)
2959	movq	%r9,24(%rdi)
2960	movq	128(%rsp),%rax
2961	leaq	128(%rsp),%rbx
2962	movq	0+224(%rsp),%r9
2963	movq	8+224(%rsp),%r10
2964	leaq	0+224(%rsp),%rsi
2965	movq	16+224(%rsp),%r11
2966	movq	24+224(%rsp),%r12
2967	leaq	256(%rsp),%rdi
2968	call	__ecp_nistz256_mul_montq
2969
2970	movq	320(%rsp),%rax
2971	leaq	320(%rsp),%rbx
2972	movq	0+64(%rsp),%r9
2973	movq	8+64(%rsp),%r10
2974	leaq	0+64(%rsp),%rsi
2975	movq	16+64(%rsp),%r11
2976	movq	24+64(%rsp),%r12
2977	leaq	320(%rsp),%rdi
2978	call	__ecp_nistz256_mul_montq
2979
2980	leaq	256(%rsp),%rbx
2981	leaq	320(%rsp),%rdi
2982	call	__ecp_nistz256_sub_fromq
2983
2984.byte	102,72,15,126,199
2985
2986	movdqa	%xmm5,%xmm0
2987	movdqa	%xmm5,%xmm1
2988	pandn	352(%rsp),%xmm0
2989	movdqa	%xmm5,%xmm2
2990	pandn	352+16(%rsp),%xmm1
2991	movdqa	%xmm5,%xmm3
2992	pand	544(%rsp),%xmm2
2993	pand	544+16(%rsp),%xmm3
2994	por	%xmm0,%xmm2
2995	por	%xmm1,%xmm3
2996
2997	movdqa	%xmm4,%xmm0
2998	movdqa	%xmm4,%xmm1
2999	pandn	%xmm2,%xmm0
3000	movdqa	%xmm4,%xmm2
3001	pandn	%xmm3,%xmm1
3002	movdqa	%xmm4,%xmm3
3003	pand	448(%rsp),%xmm2
3004	pand	448+16(%rsp),%xmm3
3005	por	%xmm0,%xmm2
3006	por	%xmm1,%xmm3
3007	movdqu	%xmm2,64(%rdi)
3008	movdqu	%xmm3,80(%rdi)
3009
3010	movdqa	%xmm5,%xmm0
3011	movdqa	%xmm5,%xmm1
3012	pandn	288(%rsp),%xmm0
3013	movdqa	%xmm5,%xmm2
3014	pandn	288+16(%rsp),%xmm1
3015	movdqa	%xmm5,%xmm3
3016	pand	480(%rsp),%xmm2
3017	pand	480+16(%rsp),%xmm3
3018	por	%xmm0,%xmm2
3019	por	%xmm1,%xmm3
3020
3021	movdqa	%xmm4,%xmm0
3022	movdqa	%xmm4,%xmm1
3023	pandn	%xmm2,%xmm0
3024	movdqa	%xmm4,%xmm2
3025	pandn	%xmm3,%xmm1
3026	movdqa	%xmm4,%xmm3
3027	pand	384(%rsp),%xmm2
3028	pand	384+16(%rsp),%xmm3
3029	por	%xmm0,%xmm2
3030	por	%xmm1,%xmm3
3031	movdqu	%xmm2,0(%rdi)
3032	movdqu	%xmm3,16(%rdi)
3033
3034	movdqa	%xmm5,%xmm0
3035	movdqa	%xmm5,%xmm1
3036	pandn	320(%rsp),%xmm0
3037	movdqa	%xmm5,%xmm2
3038	pandn	320+16(%rsp),%xmm1
3039	movdqa	%xmm5,%xmm3
3040	pand	512(%rsp),%xmm2
3041	pand	512+16(%rsp),%xmm3
3042	por	%xmm0,%xmm2
3043	por	%xmm1,%xmm3
3044
3045	movdqa	%xmm4,%xmm0
3046	movdqa	%xmm4,%xmm1
3047	pandn	%xmm2,%xmm0
3048	movdqa	%xmm4,%xmm2
3049	pandn	%xmm3,%xmm1
3050	movdqa	%xmm4,%xmm3
3051	pand	416(%rsp),%xmm2
3052	pand	416+16(%rsp),%xmm3
3053	por	%xmm0,%xmm2
3054	por	%xmm1,%xmm3
3055	movdqu	%xmm2,32(%rdi)
3056	movdqu	%xmm3,48(%rdi)
3057
3058.Ladd_doneq:
3059	leaq	576+56(%rsp),%rsi
3060.cfi_def_cfa	%rsi,8
3061	movq	-48(%rsi),%r15
3062.cfi_restore	%r15
3063	movq	-40(%rsi),%r14
3064.cfi_restore	%r14
3065	movq	-32(%rsi),%r13
3066.cfi_restore	%r13
3067	movq	-24(%rsi),%r12
3068.cfi_restore	%r12
3069	movq	-16(%rsi),%rbx
3070.cfi_restore	%rbx
3071	movq	-8(%rsi),%rbp
3072.cfi_restore	%rbp
3073	leaq	(%rsi),%rsp
3074.cfi_def_cfa_register	%rsp
3075.Lpoint_addq_epilogue:
3076	.byte	0xf3,0xc3
3077.cfi_endproc
3078.size	ecp_nistz256_point_add,.-ecp_nistz256_point_add
3079.globl	ecp_nistz256_point_add_affine
3080.hidden ecp_nistz256_point_add_affine
3081.type	ecp_nistz256_point_add_affine,@function
3082.align	32
3083ecp_nistz256_point_add_affine:
3084.cfi_startproc
3085	leaq	OPENSSL_ia32cap_P(%rip),%rcx
3086	movq	8(%rcx),%rcx
3087	andl	$0x80100,%ecx
3088	cmpl	$0x80100,%ecx
3089	je	.Lpoint_add_affinex
3090	pushq	%rbp
3091.cfi_adjust_cfa_offset	8
3092.cfi_offset	%rbp,-16
3093	pushq	%rbx
3094.cfi_adjust_cfa_offset	8
3095.cfi_offset	%rbx,-24
3096	pushq	%r12
3097.cfi_adjust_cfa_offset	8
3098.cfi_offset	%r12,-32
3099	pushq	%r13
3100.cfi_adjust_cfa_offset	8
3101.cfi_offset	%r13,-40
3102	pushq	%r14
3103.cfi_adjust_cfa_offset	8
3104.cfi_offset	%r14,-48
3105	pushq	%r15
3106.cfi_adjust_cfa_offset	8
3107.cfi_offset	%r15,-56
3108	subq	$480+8,%rsp
3109.cfi_adjust_cfa_offset	32*15+8
3110.Ladd_affineq_body:
3111
3112	movdqu	0(%rsi),%xmm0
3113	movq	%rdx,%rbx
3114	movdqu	16(%rsi),%xmm1
3115	movdqu	32(%rsi),%xmm2
3116	movdqu	48(%rsi),%xmm3
3117	movdqu	64(%rsi),%xmm4
3118	movdqu	80(%rsi),%xmm5
3119	movq	64+0(%rsi),%rax
3120	movq	64+8(%rsi),%r14
3121	movq	64+16(%rsi),%r15
3122	movq	64+24(%rsi),%r8
3123	movdqa	%xmm0,320(%rsp)
3124	movdqa	%xmm1,320+16(%rsp)
3125	movdqa	%xmm2,352(%rsp)
3126	movdqa	%xmm3,352+16(%rsp)
3127	movdqa	%xmm4,384(%rsp)
3128	movdqa	%xmm5,384+16(%rsp)
3129	por	%xmm4,%xmm5
3130
3131	movdqu	0(%rbx),%xmm0
3132	pshufd	$0xb1,%xmm5,%xmm3
3133	movdqu	16(%rbx),%xmm1
3134	movdqu	32(%rbx),%xmm2
3135	por	%xmm3,%xmm5
3136	movdqu	48(%rbx),%xmm3
3137	movdqa	%xmm0,416(%rsp)
3138	pshufd	$0x1e,%xmm5,%xmm4
3139	movdqa	%xmm1,416+16(%rsp)
3140	por	%xmm0,%xmm1
3141.byte	102,72,15,110,199
3142	movdqa	%xmm2,448(%rsp)
3143	movdqa	%xmm3,448+16(%rsp)
3144	por	%xmm2,%xmm3
3145	por	%xmm4,%xmm5
3146	pxor	%xmm4,%xmm4
3147	por	%xmm1,%xmm3
3148
3149	leaq	64-0(%rsi),%rsi
3150	leaq	32(%rsp),%rdi
3151	call	__ecp_nistz256_sqr_montq
3152
3153	pcmpeqd	%xmm4,%xmm5
3154	pshufd	$0xb1,%xmm3,%xmm4
3155	movq	0(%rbx),%rax
3156
3157	movq	%r12,%r9
3158	por	%xmm3,%xmm4
3159	pshufd	$0,%xmm5,%xmm5
3160	pshufd	$0x1e,%xmm4,%xmm3
3161	movq	%r13,%r10
3162	por	%xmm3,%xmm4
3163	pxor	%xmm3,%xmm3
3164	movq	%r14,%r11
3165	pcmpeqd	%xmm3,%xmm4
3166	pshufd	$0,%xmm4,%xmm4
3167
3168	leaq	32-0(%rsp),%rsi
3169	movq	%r15,%r12
3170	leaq	0(%rsp),%rdi
3171	call	__ecp_nistz256_mul_montq
3172
3173	leaq	320(%rsp),%rbx
3174	leaq	64(%rsp),%rdi
3175	call	__ecp_nistz256_sub_fromq
3176
3177	movq	384(%rsp),%rax
3178	leaq	384(%rsp),%rbx
3179	movq	0+32(%rsp),%r9
3180	movq	8+32(%rsp),%r10
3181	leaq	0+32(%rsp),%rsi
3182	movq	16+32(%rsp),%r11
3183	movq	24+32(%rsp),%r12
3184	leaq	32(%rsp),%rdi
3185	call	__ecp_nistz256_mul_montq
3186
3187	movq	384(%rsp),%rax
3188	leaq	384(%rsp),%rbx
3189	movq	0+64(%rsp),%r9
3190	movq	8+64(%rsp),%r10
3191	leaq	0+64(%rsp),%rsi
3192	movq	16+64(%rsp),%r11
3193	movq	24+64(%rsp),%r12
3194	leaq	288(%rsp),%rdi
3195	call	__ecp_nistz256_mul_montq
3196
3197	movq	448(%rsp),%rax
3198	leaq	448(%rsp),%rbx
3199	movq	0+32(%rsp),%r9
3200	movq	8+32(%rsp),%r10
3201	leaq	0+32(%rsp),%rsi
3202	movq	16+32(%rsp),%r11
3203	movq	24+32(%rsp),%r12
3204	leaq	32(%rsp),%rdi
3205	call	__ecp_nistz256_mul_montq
3206
3207	leaq	352(%rsp),%rbx
3208	leaq	96(%rsp),%rdi
3209	call	__ecp_nistz256_sub_fromq
3210
3211	movq	0+64(%rsp),%rax
3212	movq	8+64(%rsp),%r14
3213	leaq	0+64(%rsp),%rsi
3214	movq	16+64(%rsp),%r15
3215	movq	24+64(%rsp),%r8
3216	leaq	128(%rsp),%rdi
3217	call	__ecp_nistz256_sqr_montq
3218
3219	movq	0+96(%rsp),%rax
3220	movq	8+96(%rsp),%r14
3221	leaq	0+96(%rsp),%rsi
3222	movq	16+96(%rsp),%r15
3223	movq	24+96(%rsp),%r8
3224	leaq	192(%rsp),%rdi
3225	call	__ecp_nistz256_sqr_montq
3226
3227	movq	128(%rsp),%rax
3228	leaq	128(%rsp),%rbx
3229	movq	0+64(%rsp),%r9
3230	movq	8+64(%rsp),%r10
3231	leaq	0+64(%rsp),%rsi
3232	movq	16+64(%rsp),%r11
3233	movq	24+64(%rsp),%r12
3234	leaq	160(%rsp),%rdi
3235	call	__ecp_nistz256_mul_montq
3236
3237	movq	320(%rsp),%rax
3238	leaq	320(%rsp),%rbx
3239	movq	0+128(%rsp),%r9
3240	movq	8+128(%rsp),%r10
3241	leaq	0+128(%rsp),%rsi
3242	movq	16+128(%rsp),%r11
3243	movq	24+128(%rsp),%r12
3244	leaq	0(%rsp),%rdi
3245	call	__ecp_nistz256_mul_montq
3246
3247
3248
3249
3250	xorq	%r11,%r11
3251	addq	%r12,%r12
3252	leaq	192(%rsp),%rsi
3253	adcq	%r13,%r13
3254	movq	%r12,%rax
3255	adcq	%r8,%r8
3256	adcq	%r9,%r9
3257	movq	%r13,%rbp
3258	adcq	$0,%r11
3259
3260	subq	$-1,%r12
3261	movq	%r8,%rcx
3262	sbbq	%r14,%r13
3263	sbbq	$0,%r8
3264	movq	%r9,%r10
3265	sbbq	%r15,%r9
3266	sbbq	$0,%r11
3267
3268	cmovcq	%rax,%r12
3269	movq	0(%rsi),%rax
3270	cmovcq	%rbp,%r13
3271	movq	8(%rsi),%rbp
3272	cmovcq	%rcx,%r8
3273	movq	16(%rsi),%rcx
3274	cmovcq	%r10,%r9
3275	movq	24(%rsi),%r10
3276
3277	call	__ecp_nistz256_subq
3278
3279	leaq	160(%rsp),%rbx
3280	leaq	224(%rsp),%rdi
3281	call	__ecp_nistz256_sub_fromq
3282
3283	movq	0+0(%rsp),%rax
3284	movq	0+8(%rsp),%rbp
3285	movq	0+16(%rsp),%rcx
3286	movq	0+24(%rsp),%r10
3287	leaq	64(%rsp),%rdi
3288
3289	call	__ecp_nistz256_subq
3290
3291	movq	%r12,0(%rdi)
3292	movq	%r13,8(%rdi)
3293	movq	%r8,16(%rdi)
3294	movq	%r9,24(%rdi)
3295	movq	352(%rsp),%rax
3296	leaq	352(%rsp),%rbx
3297	movq	0+160(%rsp),%r9
3298	movq	8+160(%rsp),%r10
3299	leaq	0+160(%rsp),%rsi
3300	movq	16+160(%rsp),%r11
3301	movq	24+160(%rsp),%r12
3302	leaq	32(%rsp),%rdi
3303	call	__ecp_nistz256_mul_montq
3304
3305	movq	96(%rsp),%rax
3306	leaq	96(%rsp),%rbx
3307	movq	0+64(%rsp),%r9
3308	movq	8+64(%rsp),%r10
3309	leaq	0+64(%rsp),%rsi
3310	movq	16+64(%rsp),%r11
3311	movq	24+64(%rsp),%r12
3312	leaq	64(%rsp),%rdi
3313	call	__ecp_nistz256_mul_montq
3314
3315	leaq	32(%rsp),%rbx
3316	leaq	256(%rsp),%rdi
3317	call	__ecp_nistz256_sub_fromq
3318
3319.byte	102,72,15,126,199
3320
3321	movdqa	%xmm5,%xmm0
3322	movdqa	%xmm5,%xmm1
3323	pandn	288(%rsp),%xmm0
3324	movdqa	%xmm5,%xmm2
3325	pandn	288+16(%rsp),%xmm1
3326	movdqa	%xmm5,%xmm3
3327	pand	.LONE_mont(%rip),%xmm2
3328	pand	.LONE_mont+16(%rip),%xmm3
3329	por	%xmm0,%xmm2
3330	por	%xmm1,%xmm3
3331
3332	movdqa	%xmm4,%xmm0
3333	movdqa	%xmm4,%xmm1
3334	pandn	%xmm2,%xmm0
3335	movdqa	%xmm4,%xmm2
3336	pandn	%xmm3,%xmm1
3337	movdqa	%xmm4,%xmm3
3338	pand	384(%rsp),%xmm2
3339	pand	384+16(%rsp),%xmm3
3340	por	%xmm0,%xmm2
3341	por	%xmm1,%xmm3
3342	movdqu	%xmm2,64(%rdi)
3343	movdqu	%xmm3,80(%rdi)
3344
3345	movdqa	%xmm5,%xmm0
3346	movdqa	%xmm5,%xmm1
3347	pandn	224(%rsp),%xmm0
3348	movdqa	%xmm5,%xmm2
3349	pandn	224+16(%rsp),%xmm1
3350	movdqa	%xmm5,%xmm3
3351	pand	416(%rsp),%xmm2
3352	pand	416+16(%rsp),%xmm3
3353	por	%xmm0,%xmm2
3354	por	%xmm1,%xmm3
3355
3356	movdqa	%xmm4,%xmm0
3357	movdqa	%xmm4,%xmm1
3358	pandn	%xmm2,%xmm0
3359	movdqa	%xmm4,%xmm2
3360	pandn	%xmm3,%xmm1
3361	movdqa	%xmm4,%xmm3
3362	pand	320(%rsp),%xmm2
3363	pand	320+16(%rsp),%xmm3
3364	por	%xmm0,%xmm2
3365	por	%xmm1,%xmm3
3366	movdqu	%xmm2,0(%rdi)
3367	movdqu	%xmm3,16(%rdi)
3368
3369	movdqa	%xmm5,%xmm0
3370	movdqa	%xmm5,%xmm1
3371	pandn	256(%rsp),%xmm0
3372	movdqa	%xmm5,%xmm2
3373	pandn	256+16(%rsp),%xmm1
3374	movdqa	%xmm5,%xmm3
3375	pand	448(%rsp),%xmm2
3376	pand	448+16(%rsp),%xmm3
3377	por	%xmm0,%xmm2
3378	por	%xmm1,%xmm3
3379
3380	movdqa	%xmm4,%xmm0
3381	movdqa	%xmm4,%xmm1
3382	pandn	%xmm2,%xmm0
3383	movdqa	%xmm4,%xmm2
3384	pandn	%xmm3,%xmm1
3385	movdqa	%xmm4,%xmm3
3386	pand	352(%rsp),%xmm2
3387	pand	352+16(%rsp),%xmm3
3388	por	%xmm0,%xmm2
3389	por	%xmm1,%xmm3
3390	movdqu	%xmm2,32(%rdi)
3391	movdqu	%xmm3,48(%rdi)
3392
3393	leaq	480+56(%rsp),%rsi
3394.cfi_def_cfa	%rsi,8
3395	movq	-48(%rsi),%r15
3396.cfi_restore	%r15
3397	movq	-40(%rsi),%r14
3398.cfi_restore	%r14
3399	movq	-32(%rsi),%r13
3400.cfi_restore	%r13
3401	movq	-24(%rsi),%r12
3402.cfi_restore	%r12
3403	movq	-16(%rsi),%rbx
3404.cfi_restore	%rbx
3405	movq	-8(%rsi),%rbp
3406.cfi_restore	%rbp
3407	leaq	(%rsi),%rsp
3408.cfi_def_cfa_register	%rsp
3409.Ladd_affineq_epilogue:
3410	.byte	0xf3,0xc3
3411.cfi_endproc
3412.size	ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
3413.type	__ecp_nistz256_add_tox,@function
3414.align	32
3415__ecp_nistz256_add_tox:
3416.cfi_startproc
3417	xorq	%r11,%r11
3418	adcq	0(%rbx),%r12
3419	adcq	8(%rbx),%r13
3420	movq	%r12,%rax
3421	adcq	16(%rbx),%r8
3422	adcq	24(%rbx),%r9
3423	movq	%r13,%rbp
3424	adcq	$0,%r11
3425
3426	xorq	%r10,%r10
3427	sbbq	$-1,%r12
3428	movq	%r8,%rcx
3429	sbbq	%r14,%r13
3430	sbbq	$0,%r8
3431	movq	%r9,%r10
3432	sbbq	%r15,%r9
3433	sbbq	$0,%r11
3434
3435	cmovcq	%rax,%r12
3436	cmovcq	%rbp,%r13
3437	movq	%r12,0(%rdi)
3438	cmovcq	%rcx,%r8
3439	movq	%r13,8(%rdi)
3440	cmovcq	%r10,%r9
3441	movq	%r8,16(%rdi)
3442	movq	%r9,24(%rdi)
3443
3444	.byte	0xf3,0xc3
3445.cfi_endproc
3446.size	__ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
3447
3448.type	__ecp_nistz256_sub_fromx,@function
3449.align	32
3450__ecp_nistz256_sub_fromx:
3451.cfi_startproc
3452	xorq	%r11,%r11
3453	sbbq	0(%rbx),%r12
3454	sbbq	8(%rbx),%r13
3455	movq	%r12,%rax
3456	sbbq	16(%rbx),%r8
3457	sbbq	24(%rbx),%r9
3458	movq	%r13,%rbp
3459	sbbq	$0,%r11
3460
3461	xorq	%r10,%r10
3462	adcq	$-1,%r12
3463	movq	%r8,%rcx
3464	adcq	%r14,%r13
3465	adcq	$0,%r8
3466	movq	%r9,%r10
3467	adcq	%r15,%r9
3468
3469	btq	$0,%r11
3470	cmovncq	%rax,%r12
3471	cmovncq	%rbp,%r13
3472	movq	%r12,0(%rdi)
3473	cmovncq	%rcx,%r8
3474	movq	%r13,8(%rdi)
3475	cmovncq	%r10,%r9
3476	movq	%r8,16(%rdi)
3477	movq	%r9,24(%rdi)
3478
3479	.byte	0xf3,0xc3
3480.cfi_endproc
3481.size	__ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
3482
3483.type	__ecp_nistz256_subx,@function
3484.align	32
3485__ecp_nistz256_subx:
3486.cfi_startproc
3487	xorq	%r11,%r11
3488	sbbq	%r12,%rax
3489	sbbq	%r13,%rbp
3490	movq	%rax,%r12
3491	sbbq	%r8,%rcx
3492	sbbq	%r9,%r10
3493	movq	%rbp,%r13
3494	sbbq	$0,%r11
3495
3496	xorq	%r9,%r9
3497	adcq	$-1,%rax
3498	movq	%rcx,%r8
3499	adcq	%r14,%rbp
3500	adcq	$0,%rcx
3501	movq	%r10,%r9
3502	adcq	%r15,%r10
3503
3504	btq	$0,%r11
3505	cmovcq	%rax,%r12
3506	cmovcq	%rbp,%r13
3507	cmovcq	%rcx,%r8
3508	cmovcq	%r10,%r9
3509
3510	.byte	0xf3,0xc3
3511.cfi_endproc
3512.size	__ecp_nistz256_subx,.-__ecp_nistz256_subx
3513
3514.type	__ecp_nistz256_mul_by_2x,@function
3515.align	32
3516__ecp_nistz256_mul_by_2x:
3517.cfi_startproc
3518	xorq	%r11,%r11
3519	adcq	%r12,%r12
3520	adcq	%r13,%r13
3521	movq	%r12,%rax
3522	adcq	%r8,%r8
3523	adcq	%r9,%r9
3524	movq	%r13,%rbp
3525	adcq	$0,%r11
3526
3527	xorq	%r10,%r10
3528	sbbq	$-1,%r12
3529	movq	%r8,%rcx
3530	sbbq	%r14,%r13
3531	sbbq	$0,%r8
3532	movq	%r9,%r10
3533	sbbq	%r15,%r9
3534	sbbq	$0,%r11
3535
3536	cmovcq	%rax,%r12
3537	cmovcq	%rbp,%r13
3538	movq	%r12,0(%rdi)
3539	cmovcq	%rcx,%r8
3540	movq	%r13,8(%rdi)
3541	cmovcq	%r10,%r9
3542	movq	%r8,16(%rdi)
3543	movq	%r9,24(%rdi)
3544
3545	.byte	0xf3,0xc3
3546.cfi_endproc
3547.size	__ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
3548.type	ecp_nistz256_point_doublex,@function
3549.align	32
3550ecp_nistz256_point_doublex:
3551.cfi_startproc
3552.Lpoint_doublex:
3553	pushq	%rbp
3554.cfi_adjust_cfa_offset	8
3555.cfi_offset	%rbp,-16
3556	pushq	%rbx
3557.cfi_adjust_cfa_offset	8
3558.cfi_offset	%rbx,-24
3559	pushq	%r12
3560.cfi_adjust_cfa_offset	8
3561.cfi_offset	%r12,-32
3562	pushq	%r13
3563.cfi_adjust_cfa_offset	8
3564.cfi_offset	%r13,-40
3565	pushq	%r14
3566.cfi_adjust_cfa_offset	8
3567.cfi_offset	%r14,-48
3568	pushq	%r15
3569.cfi_adjust_cfa_offset	8
3570.cfi_offset	%r15,-56
3571	subq	$160+8,%rsp
3572.cfi_adjust_cfa_offset	32*5+8
3573.Lpoint_doublex_body:
3574
3575.Lpoint_double_shortcutx:
3576	movdqu	0(%rsi),%xmm0
3577	movq	%rsi,%rbx
3578	movdqu	16(%rsi),%xmm1
3579	movq	32+0(%rsi),%r12
3580	movq	32+8(%rsi),%r13
3581	movq	32+16(%rsi),%r8
3582	movq	32+24(%rsi),%r9
3583	movq	.Lpoly+8(%rip),%r14
3584	movq	.Lpoly+24(%rip),%r15
3585	movdqa	%xmm0,96(%rsp)
3586	movdqa	%xmm1,96+16(%rsp)
3587	leaq	32(%rdi),%r10
3588	leaq	64(%rdi),%r11
3589.byte	102,72,15,110,199
3590.byte	102,73,15,110,202
3591.byte	102,73,15,110,211
3592
3593	leaq	0(%rsp),%rdi
3594	call	__ecp_nistz256_mul_by_2x
3595
3596	movq	64+0(%rsi),%rdx
3597	movq	64+8(%rsi),%r14
3598	movq	64+16(%rsi),%r15
3599	movq	64+24(%rsi),%r8
3600	leaq	64-128(%rsi),%rsi
3601	leaq	64(%rsp),%rdi
3602	call	__ecp_nistz256_sqr_montx
3603
3604	movq	0+0(%rsp),%rdx
3605	movq	8+0(%rsp),%r14
3606	leaq	-128+0(%rsp),%rsi
3607	movq	16+0(%rsp),%r15
3608	movq	24+0(%rsp),%r8
3609	leaq	0(%rsp),%rdi
3610	call	__ecp_nistz256_sqr_montx
3611
3612	movq	32(%rbx),%rdx
3613	movq	64+0(%rbx),%r9
3614	movq	64+8(%rbx),%r10
3615	movq	64+16(%rbx),%r11
3616	movq	64+24(%rbx),%r12
3617	leaq	64-128(%rbx),%rsi
3618	leaq	32(%rbx),%rbx
3619.byte	102,72,15,126,215
3620	call	__ecp_nistz256_mul_montx
3621	call	__ecp_nistz256_mul_by_2x
3622
3623	movq	96+0(%rsp),%r12
3624	movq	96+8(%rsp),%r13
3625	leaq	64(%rsp),%rbx
3626	movq	96+16(%rsp),%r8
3627	movq	96+24(%rsp),%r9
3628	leaq	32(%rsp),%rdi
3629	call	__ecp_nistz256_add_tox
3630
3631	movq	96+0(%rsp),%r12
3632	movq	96+8(%rsp),%r13
3633	leaq	64(%rsp),%rbx
3634	movq	96+16(%rsp),%r8
3635	movq	96+24(%rsp),%r9
3636	leaq	64(%rsp),%rdi
3637	call	__ecp_nistz256_sub_fromx
3638
3639	movq	0+0(%rsp),%rdx
3640	movq	8+0(%rsp),%r14
3641	leaq	-128+0(%rsp),%rsi
3642	movq	16+0(%rsp),%r15
3643	movq	24+0(%rsp),%r8
3644.byte	102,72,15,126,207
3645	call	__ecp_nistz256_sqr_montx
3646	xorq	%r9,%r9
3647	movq	%r12,%rax
3648	addq	$-1,%r12
3649	movq	%r13,%r10
3650	adcq	%rsi,%r13
3651	movq	%r14,%rcx
3652	adcq	$0,%r14
3653	movq	%r15,%r8
3654	adcq	%rbp,%r15
3655	adcq	$0,%r9
3656	xorq	%rsi,%rsi
3657	testq	$1,%rax
3658
3659	cmovzq	%rax,%r12
3660	cmovzq	%r10,%r13
3661	cmovzq	%rcx,%r14
3662	cmovzq	%r8,%r15
3663	cmovzq	%rsi,%r9
3664
3665	movq	%r13,%rax
3666	shrq	$1,%r12
3667	shlq	$63,%rax
3668	movq	%r14,%r10
3669	shrq	$1,%r13
3670	orq	%rax,%r12
3671	shlq	$63,%r10
3672	movq	%r15,%rcx
3673	shrq	$1,%r14
3674	orq	%r10,%r13
3675	shlq	$63,%rcx
3676	movq	%r12,0(%rdi)
3677	shrq	$1,%r15
3678	movq	%r13,8(%rdi)
3679	shlq	$63,%r9
3680	orq	%rcx,%r14
3681	orq	%r9,%r15
3682	movq	%r14,16(%rdi)
3683	movq	%r15,24(%rdi)
3684	movq	64(%rsp),%rdx
3685	leaq	64(%rsp),%rbx
3686	movq	0+32(%rsp),%r9
3687	movq	8+32(%rsp),%r10
3688	leaq	-128+32(%rsp),%rsi
3689	movq	16+32(%rsp),%r11
3690	movq	24+32(%rsp),%r12
3691	leaq	32(%rsp),%rdi
3692	call	__ecp_nistz256_mul_montx
3693
3694	leaq	128(%rsp),%rdi
3695	call	__ecp_nistz256_mul_by_2x
3696
3697	leaq	32(%rsp),%rbx
3698	leaq	32(%rsp),%rdi
3699	call	__ecp_nistz256_add_tox
3700
3701	movq	96(%rsp),%rdx
3702	leaq	96(%rsp),%rbx
3703	movq	0+0(%rsp),%r9
3704	movq	8+0(%rsp),%r10
3705	leaq	-128+0(%rsp),%rsi
3706	movq	16+0(%rsp),%r11
3707	movq	24+0(%rsp),%r12
3708	leaq	0(%rsp),%rdi
3709	call	__ecp_nistz256_mul_montx
3710
3711	leaq	128(%rsp),%rdi
3712	call	__ecp_nistz256_mul_by_2x
3713
3714	movq	0+32(%rsp),%rdx
3715	movq	8+32(%rsp),%r14
3716	leaq	-128+32(%rsp),%rsi
3717	movq	16+32(%rsp),%r15
3718	movq	24+32(%rsp),%r8
3719.byte	102,72,15,126,199
3720	call	__ecp_nistz256_sqr_montx
3721
3722	leaq	128(%rsp),%rbx
3723	movq	%r14,%r8
3724	movq	%r15,%r9
3725	movq	%rsi,%r14
3726	movq	%rbp,%r15
3727	call	__ecp_nistz256_sub_fromx
3728
3729	movq	0+0(%rsp),%rax
3730	movq	0+8(%rsp),%rbp
3731	movq	0+16(%rsp),%rcx
3732	movq	0+24(%rsp),%r10
3733	leaq	0(%rsp),%rdi
3734	call	__ecp_nistz256_subx
3735
3736	movq	32(%rsp),%rdx
3737	leaq	32(%rsp),%rbx
3738	movq	%r12,%r14
3739	xorl	%ecx,%ecx
3740	movq	%r12,0+0(%rsp)
3741	movq	%r13,%r10
3742	movq	%r13,0+8(%rsp)
3743	cmovzq	%r8,%r11
3744	movq	%r8,0+16(%rsp)
3745	leaq	0-128(%rsp),%rsi
3746	cmovzq	%r9,%r12
3747	movq	%r9,0+24(%rsp)
3748	movq	%r14,%r9
3749	leaq	0(%rsp),%rdi
3750	call	__ecp_nistz256_mul_montx
3751
3752.byte	102,72,15,126,203
3753.byte	102,72,15,126,207
3754	call	__ecp_nistz256_sub_fromx
3755
3756	leaq	160+56(%rsp),%rsi
3757.cfi_def_cfa	%rsi,8
3758	movq	-48(%rsi),%r15
3759.cfi_restore	%r15
3760	movq	-40(%rsi),%r14
3761.cfi_restore	%r14
3762	movq	-32(%rsi),%r13
3763.cfi_restore	%r13
3764	movq	-24(%rsi),%r12
3765.cfi_restore	%r12
3766	movq	-16(%rsi),%rbx
3767.cfi_restore	%rbx
3768	movq	-8(%rsi),%rbp
3769.cfi_restore	%rbp
3770	leaq	(%rsi),%rsp
3771.cfi_def_cfa_register	%rsp
3772.Lpoint_doublex_epilogue:
3773	.byte	0xf3,0xc3
3774.cfi_endproc
3775.size	ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
3776.type	ecp_nistz256_point_addx,@function
3777.align	32
3778ecp_nistz256_point_addx:
3779.cfi_startproc
3780.Lpoint_addx:
3781	pushq	%rbp
3782.cfi_adjust_cfa_offset	8
3783.cfi_offset	%rbp,-16
3784	pushq	%rbx
3785.cfi_adjust_cfa_offset	8
3786.cfi_offset	%rbx,-24
3787	pushq	%r12
3788.cfi_adjust_cfa_offset	8
3789.cfi_offset	%r12,-32
3790	pushq	%r13
3791.cfi_adjust_cfa_offset	8
3792.cfi_offset	%r13,-40
3793	pushq	%r14
3794.cfi_adjust_cfa_offset	8
3795.cfi_offset	%r14,-48
3796	pushq	%r15
3797.cfi_adjust_cfa_offset	8
3798.cfi_offset	%r15,-56
3799	subq	$576+8,%rsp
3800.cfi_adjust_cfa_offset	32*18+8
3801.Lpoint_addx_body:
3802
3803	movdqu	0(%rsi),%xmm0
3804	movdqu	16(%rsi),%xmm1
3805	movdqu	32(%rsi),%xmm2
3806	movdqu	48(%rsi),%xmm3
3807	movdqu	64(%rsi),%xmm4
3808	movdqu	80(%rsi),%xmm5
3809	movq	%rsi,%rbx
3810	movq	%rdx,%rsi
3811	movdqa	%xmm0,384(%rsp)
3812	movdqa	%xmm1,384+16(%rsp)
3813	movdqa	%xmm2,416(%rsp)
3814	movdqa	%xmm3,416+16(%rsp)
3815	movdqa	%xmm4,448(%rsp)
3816	movdqa	%xmm5,448+16(%rsp)
3817	por	%xmm4,%xmm5
3818
3819	movdqu	0(%rsi),%xmm0
3820	pshufd	$0xb1,%xmm5,%xmm3
3821	movdqu	16(%rsi),%xmm1
3822	movdqu	32(%rsi),%xmm2
3823	por	%xmm3,%xmm5
3824	movdqu	48(%rsi),%xmm3
3825	movq	64+0(%rsi),%rdx
3826	movq	64+8(%rsi),%r14
3827	movq	64+16(%rsi),%r15
3828	movq	64+24(%rsi),%r8
3829	movdqa	%xmm0,480(%rsp)
3830	pshufd	$0x1e,%xmm5,%xmm4
3831	movdqa	%xmm1,480+16(%rsp)
3832	movdqu	64(%rsi),%xmm0
3833	movdqu	80(%rsi),%xmm1
3834	movdqa	%xmm2,512(%rsp)
3835	movdqa	%xmm3,512+16(%rsp)
3836	por	%xmm4,%xmm5
3837	pxor	%xmm4,%xmm4
3838	por	%xmm0,%xmm1
3839.byte	102,72,15,110,199
3840
3841	leaq	64-128(%rsi),%rsi
3842	movq	%rdx,544+0(%rsp)
3843	movq	%r14,544+8(%rsp)
3844	movq	%r15,544+16(%rsp)
3845	movq	%r8,544+24(%rsp)
3846	leaq	96(%rsp),%rdi
3847	call	__ecp_nistz256_sqr_montx
3848
3849	pcmpeqd	%xmm4,%xmm5
3850	pshufd	$0xb1,%xmm1,%xmm4
3851	por	%xmm1,%xmm4
3852	pshufd	$0,%xmm5,%xmm5
3853	pshufd	$0x1e,%xmm4,%xmm3
3854	por	%xmm3,%xmm4
3855	pxor	%xmm3,%xmm3
3856	pcmpeqd	%xmm3,%xmm4
3857	pshufd	$0,%xmm4,%xmm4
3858	movq	64+0(%rbx),%rdx
3859	movq	64+8(%rbx),%r14
3860	movq	64+16(%rbx),%r15
3861	movq	64+24(%rbx),%r8
3862.byte	102,72,15,110,203
3863
3864	leaq	64-128(%rbx),%rsi
3865	leaq	32(%rsp),%rdi
3866	call	__ecp_nistz256_sqr_montx
3867
3868	movq	544(%rsp),%rdx
3869	leaq	544(%rsp),%rbx
3870	movq	0+96(%rsp),%r9
3871	movq	8+96(%rsp),%r10
3872	leaq	-128+96(%rsp),%rsi
3873	movq	16+96(%rsp),%r11
3874	movq	24+96(%rsp),%r12
3875	leaq	224(%rsp),%rdi
3876	call	__ecp_nistz256_mul_montx
3877
3878	movq	448(%rsp),%rdx
3879	leaq	448(%rsp),%rbx
3880	movq	0+32(%rsp),%r9
3881	movq	8+32(%rsp),%r10
3882	leaq	-128+32(%rsp),%rsi
3883	movq	16+32(%rsp),%r11
3884	movq	24+32(%rsp),%r12
3885	leaq	256(%rsp),%rdi
3886	call	__ecp_nistz256_mul_montx
3887
3888	movq	416(%rsp),%rdx
3889	leaq	416(%rsp),%rbx
3890	movq	0+224(%rsp),%r9
3891	movq	8+224(%rsp),%r10
3892	leaq	-128+224(%rsp),%rsi
3893	movq	16+224(%rsp),%r11
3894	movq	24+224(%rsp),%r12
3895	leaq	224(%rsp),%rdi
3896	call	__ecp_nistz256_mul_montx
3897
3898	movq	512(%rsp),%rdx
3899	leaq	512(%rsp),%rbx
3900	movq	0+256(%rsp),%r9
3901	movq	8+256(%rsp),%r10
3902	leaq	-128+256(%rsp),%rsi
3903	movq	16+256(%rsp),%r11
3904	movq	24+256(%rsp),%r12
3905	leaq	256(%rsp),%rdi
3906	call	__ecp_nistz256_mul_montx
3907
3908	leaq	224(%rsp),%rbx
3909	leaq	64(%rsp),%rdi
3910	call	__ecp_nistz256_sub_fromx
3911
3912	orq	%r13,%r12
3913	movdqa	%xmm4,%xmm2
3914	orq	%r8,%r12
3915	orq	%r9,%r12
3916	por	%xmm5,%xmm2
3917.byte	102,73,15,110,220
3918
3919	movq	384(%rsp),%rdx
3920	leaq	384(%rsp),%rbx
3921	movq	0+96(%rsp),%r9
3922	movq	8+96(%rsp),%r10
3923	leaq	-128+96(%rsp),%rsi
3924	movq	16+96(%rsp),%r11
3925	movq	24+96(%rsp),%r12
3926	leaq	160(%rsp),%rdi
3927	call	__ecp_nistz256_mul_montx
3928
3929	movq	480(%rsp),%rdx
3930	leaq	480(%rsp),%rbx
3931	movq	0+32(%rsp),%r9
3932	movq	8+32(%rsp),%r10
3933	leaq	-128+32(%rsp),%rsi
3934	movq	16+32(%rsp),%r11
3935	movq	24+32(%rsp),%r12
3936	leaq	192(%rsp),%rdi
3937	call	__ecp_nistz256_mul_montx
3938
3939	leaq	160(%rsp),%rbx
3940	leaq	0(%rsp),%rdi
3941	call	__ecp_nistz256_sub_fromx
3942
3943	orq	%r13,%r12
3944	orq	%r8,%r12
3945	orq	%r9,%r12
3946
3947.byte	0x3e
3948	jnz	.Ladd_proceedx
3949.byte	102,73,15,126,208
3950.byte	102,73,15,126,217
3951	testq	%r8,%r8
3952	jnz	.Ladd_proceedx
3953	testq	%r9,%r9
3954	jz	.Ladd_doublex
3955
3956.byte	102,72,15,126,199
3957	pxor	%xmm0,%xmm0
3958	movdqu	%xmm0,0(%rdi)
3959	movdqu	%xmm0,16(%rdi)
3960	movdqu	%xmm0,32(%rdi)
3961	movdqu	%xmm0,48(%rdi)
3962	movdqu	%xmm0,64(%rdi)
3963	movdqu	%xmm0,80(%rdi)
3964	jmp	.Ladd_donex
3965
3966.align	32
3967.Ladd_doublex:
3968.byte	102,72,15,126,206
3969.byte	102,72,15,126,199
3970	addq	$416,%rsp
3971.cfi_adjust_cfa_offset	-416
3972	jmp	.Lpoint_double_shortcutx
3973.cfi_adjust_cfa_offset	416
3974
3975.align	32
3976.Ladd_proceedx:
3977	movq	0+64(%rsp),%rdx
3978	movq	8+64(%rsp),%r14
3979	leaq	-128+64(%rsp),%rsi
3980	movq	16+64(%rsp),%r15
3981	movq	24+64(%rsp),%r8
3982	leaq	96(%rsp),%rdi
3983	call	__ecp_nistz256_sqr_montx
3984
3985	movq	448(%rsp),%rdx
3986	leaq	448(%rsp),%rbx
3987	movq	0+0(%rsp),%r9
3988	movq	8+0(%rsp),%r10
3989	leaq	-128+0(%rsp),%rsi
3990	movq	16+0(%rsp),%r11
3991	movq	24+0(%rsp),%r12
3992	leaq	352(%rsp),%rdi
3993	call	__ecp_nistz256_mul_montx
3994
3995	movq	0+0(%rsp),%rdx
3996	movq	8+0(%rsp),%r14
3997	leaq	-128+0(%rsp),%rsi
3998	movq	16+0(%rsp),%r15
3999	movq	24+0(%rsp),%r8
4000	leaq	32(%rsp),%rdi
4001	call	__ecp_nistz256_sqr_montx
4002
4003	movq	544(%rsp),%rdx
4004	leaq	544(%rsp),%rbx
4005	movq	0+352(%rsp),%r9
4006	movq	8+352(%rsp),%r10
4007	leaq	-128+352(%rsp),%rsi
4008	movq	16+352(%rsp),%r11
4009	movq	24+352(%rsp),%r12
4010	leaq	352(%rsp),%rdi
4011	call	__ecp_nistz256_mul_montx
4012
4013	movq	0(%rsp),%rdx
4014	leaq	0(%rsp),%rbx
4015	movq	0+32(%rsp),%r9
4016	movq	8+32(%rsp),%r10
4017	leaq	-128+32(%rsp),%rsi
4018	movq	16+32(%rsp),%r11
4019	movq	24+32(%rsp),%r12
4020	leaq	128(%rsp),%rdi
4021	call	__ecp_nistz256_mul_montx
4022
4023	movq	160(%rsp),%rdx
4024	leaq	160(%rsp),%rbx
4025	movq	0+32(%rsp),%r9
4026	movq	8+32(%rsp),%r10
4027	leaq	-128+32(%rsp),%rsi
4028	movq	16+32(%rsp),%r11
4029	movq	24+32(%rsp),%r12
4030	leaq	192(%rsp),%rdi
4031	call	__ecp_nistz256_mul_montx
4032
4033
4034
4035
4036	xorq	%r11,%r11
4037	addq	%r12,%r12
4038	leaq	96(%rsp),%rsi
4039	adcq	%r13,%r13
4040	movq	%r12,%rax
4041	adcq	%r8,%r8
4042	adcq	%r9,%r9
4043	movq	%r13,%rbp
4044	adcq	$0,%r11
4045
4046	subq	$-1,%r12
4047	movq	%r8,%rcx
4048	sbbq	%r14,%r13
4049	sbbq	$0,%r8
4050	movq	%r9,%r10
4051	sbbq	%r15,%r9
4052	sbbq	$0,%r11
4053
4054	cmovcq	%rax,%r12
4055	movq	0(%rsi),%rax
4056	cmovcq	%rbp,%r13
4057	movq	8(%rsi),%rbp
4058	cmovcq	%rcx,%r8
4059	movq	16(%rsi),%rcx
4060	cmovcq	%r10,%r9
4061	movq	24(%rsi),%r10
4062
4063	call	__ecp_nistz256_subx
4064
4065	leaq	128(%rsp),%rbx
4066	leaq	288(%rsp),%rdi
4067	call	__ecp_nistz256_sub_fromx
4068
4069	movq	192+0(%rsp),%rax
4070	movq	192+8(%rsp),%rbp
4071	movq	192+16(%rsp),%rcx
4072	movq	192+24(%rsp),%r10
4073	leaq	320(%rsp),%rdi
4074
4075	call	__ecp_nistz256_subx
4076
4077	movq	%r12,0(%rdi)
4078	movq	%r13,8(%rdi)
4079	movq	%r8,16(%rdi)
4080	movq	%r9,24(%rdi)
4081	movq	128(%rsp),%rdx
4082	leaq	128(%rsp),%rbx
4083	movq	0+224(%rsp),%r9
4084	movq	8+224(%rsp),%r10
4085	leaq	-128+224(%rsp),%rsi
4086	movq	16+224(%rsp),%r11
4087	movq	24+224(%rsp),%r12
4088	leaq	256(%rsp),%rdi
4089	call	__ecp_nistz256_mul_montx
4090
4091	movq	320(%rsp),%rdx
4092	leaq	320(%rsp),%rbx
4093	movq	0+64(%rsp),%r9
4094	movq	8+64(%rsp),%r10
4095	leaq	-128+64(%rsp),%rsi
4096	movq	16+64(%rsp),%r11
4097	movq	24+64(%rsp),%r12
4098	leaq	320(%rsp),%rdi
4099	call	__ecp_nistz256_mul_montx
4100
4101	leaq	256(%rsp),%rbx
4102	leaq	320(%rsp),%rdi
4103	call	__ecp_nistz256_sub_fromx
4104
4105.byte	102,72,15,126,199
4106
4107	movdqa	%xmm5,%xmm0
4108	movdqa	%xmm5,%xmm1
4109	pandn	352(%rsp),%xmm0
4110	movdqa	%xmm5,%xmm2
4111	pandn	352+16(%rsp),%xmm1
4112	movdqa	%xmm5,%xmm3
4113	pand	544(%rsp),%xmm2
4114	pand	544+16(%rsp),%xmm3
4115	por	%xmm0,%xmm2
4116	por	%xmm1,%xmm3
4117
4118	movdqa	%xmm4,%xmm0
4119	movdqa	%xmm4,%xmm1
4120	pandn	%xmm2,%xmm0
4121	movdqa	%xmm4,%xmm2
4122	pandn	%xmm3,%xmm1
4123	movdqa	%xmm4,%xmm3
4124	pand	448(%rsp),%xmm2
4125	pand	448+16(%rsp),%xmm3
4126	por	%xmm0,%xmm2
4127	por	%xmm1,%xmm3
4128	movdqu	%xmm2,64(%rdi)
4129	movdqu	%xmm3,80(%rdi)
4130
4131	movdqa	%xmm5,%xmm0
4132	movdqa	%xmm5,%xmm1
4133	pandn	288(%rsp),%xmm0
4134	movdqa	%xmm5,%xmm2
4135	pandn	288+16(%rsp),%xmm1
4136	movdqa	%xmm5,%xmm3
4137	pand	480(%rsp),%xmm2
4138	pand	480+16(%rsp),%xmm3
4139	por	%xmm0,%xmm2
4140	por	%xmm1,%xmm3
4141
4142	movdqa	%xmm4,%xmm0
4143	movdqa	%xmm4,%xmm1
4144	pandn	%xmm2,%xmm0
4145	movdqa	%xmm4,%xmm2
4146	pandn	%xmm3,%xmm1
4147	movdqa	%xmm4,%xmm3
4148	pand	384(%rsp),%xmm2
4149	pand	384+16(%rsp),%xmm3
4150	por	%xmm0,%xmm2
4151	por	%xmm1,%xmm3
4152	movdqu	%xmm2,0(%rdi)
4153	movdqu	%xmm3,16(%rdi)
4154
4155	movdqa	%xmm5,%xmm0
4156	movdqa	%xmm5,%xmm1
4157	pandn	320(%rsp),%xmm0
4158	movdqa	%xmm5,%xmm2
4159	pandn	320+16(%rsp),%xmm1
4160	movdqa	%xmm5,%xmm3
4161	pand	512(%rsp),%xmm2
4162	pand	512+16(%rsp),%xmm3
4163	por	%xmm0,%xmm2
4164	por	%xmm1,%xmm3
4165
4166	movdqa	%xmm4,%xmm0
4167	movdqa	%xmm4,%xmm1
4168	pandn	%xmm2,%xmm0
4169	movdqa	%xmm4,%xmm2
4170	pandn	%xmm3,%xmm1
4171	movdqa	%xmm4,%xmm3
4172	pand	416(%rsp),%xmm2
4173	pand	416+16(%rsp),%xmm3
4174	por	%xmm0,%xmm2
4175	por	%xmm1,%xmm3
4176	movdqu	%xmm2,32(%rdi)
4177	movdqu	%xmm3,48(%rdi)
4178
4179.Ladd_donex:
4180	leaq	576+56(%rsp),%rsi
4181.cfi_def_cfa	%rsi,8
4182	movq	-48(%rsi),%r15
4183.cfi_restore	%r15
4184	movq	-40(%rsi),%r14
4185.cfi_restore	%r14
4186	movq	-32(%rsi),%r13
4187.cfi_restore	%r13
4188	movq	-24(%rsi),%r12
4189.cfi_restore	%r12
4190	movq	-16(%rsi),%rbx
4191.cfi_restore	%rbx
4192	movq	-8(%rsi),%rbp
4193.cfi_restore	%rbp
4194	leaq	(%rsi),%rsp
4195.cfi_def_cfa_register	%rsp
4196.Lpoint_addx_epilogue:
4197	.byte	0xf3,0xc3
4198.cfi_endproc
4199.size	ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
4200.type	ecp_nistz256_point_add_affinex,@function
4201.align	32
4202ecp_nistz256_point_add_affinex:
4203.cfi_startproc
4204.Lpoint_add_affinex:
4205	pushq	%rbp
4206.cfi_adjust_cfa_offset	8
4207.cfi_offset	%rbp,-16
4208	pushq	%rbx
4209.cfi_adjust_cfa_offset	8
4210.cfi_offset	%rbx,-24
4211	pushq	%r12
4212.cfi_adjust_cfa_offset	8
4213.cfi_offset	%r12,-32
4214	pushq	%r13
4215.cfi_adjust_cfa_offset	8
4216.cfi_offset	%r13,-40
4217	pushq	%r14
4218.cfi_adjust_cfa_offset	8
4219.cfi_offset	%r14,-48
4220	pushq	%r15
4221.cfi_adjust_cfa_offset	8
4222.cfi_offset	%r15,-56
4223	subq	$480+8,%rsp
4224.cfi_adjust_cfa_offset	32*15+8
4225.Ladd_affinex_body:
4226
4227	movdqu	0(%rsi),%xmm0
4228	movq	%rdx,%rbx
4229	movdqu	16(%rsi),%xmm1
4230	movdqu	32(%rsi),%xmm2
4231	movdqu	48(%rsi),%xmm3
4232	movdqu	64(%rsi),%xmm4
4233	movdqu	80(%rsi),%xmm5
4234	movq	64+0(%rsi),%rdx
4235	movq	64+8(%rsi),%r14
4236	movq	64+16(%rsi),%r15
4237	movq	64+24(%rsi),%r8
4238	movdqa	%xmm0,320(%rsp)
4239	movdqa	%xmm1,320+16(%rsp)
4240	movdqa	%xmm2,352(%rsp)
4241	movdqa	%xmm3,352+16(%rsp)
4242	movdqa	%xmm4,384(%rsp)
4243	movdqa	%xmm5,384+16(%rsp)
4244	por	%xmm4,%xmm5
4245
4246	movdqu	0(%rbx),%xmm0
4247	pshufd	$0xb1,%xmm5,%xmm3
4248	movdqu	16(%rbx),%xmm1
4249	movdqu	32(%rbx),%xmm2
4250	por	%xmm3,%xmm5
4251	movdqu	48(%rbx),%xmm3
4252	movdqa	%xmm0,416(%rsp)
4253	pshufd	$0x1e,%xmm5,%xmm4
4254	movdqa	%xmm1,416+16(%rsp)
4255	por	%xmm0,%xmm1
4256.byte	102,72,15,110,199
4257	movdqa	%xmm2,448(%rsp)
4258	movdqa	%xmm3,448+16(%rsp)
4259	por	%xmm2,%xmm3
4260	por	%xmm4,%xmm5
4261	pxor	%xmm4,%xmm4
4262	por	%xmm1,%xmm3
4263
4264	leaq	64-128(%rsi),%rsi
4265	leaq	32(%rsp),%rdi
4266	call	__ecp_nistz256_sqr_montx
4267
4268	pcmpeqd	%xmm4,%xmm5
4269	pshufd	$0xb1,%xmm3,%xmm4
4270	movq	0(%rbx),%rdx
4271
4272	movq	%r12,%r9
4273	por	%xmm3,%xmm4
4274	pshufd	$0,%xmm5,%xmm5
4275	pshufd	$0x1e,%xmm4,%xmm3
4276	movq	%r13,%r10
4277	por	%xmm3,%xmm4
4278	pxor	%xmm3,%xmm3
4279	movq	%r14,%r11
4280	pcmpeqd	%xmm3,%xmm4
4281	pshufd	$0,%xmm4,%xmm4
4282
4283	leaq	32-128(%rsp),%rsi
4284	movq	%r15,%r12
4285	leaq	0(%rsp),%rdi
4286	call	__ecp_nistz256_mul_montx
4287
4288	leaq	320(%rsp),%rbx
4289	leaq	64(%rsp),%rdi
4290	call	__ecp_nistz256_sub_fromx
4291
4292	movq	384(%rsp),%rdx
4293	leaq	384(%rsp),%rbx
4294	movq	0+32(%rsp),%r9
4295	movq	8+32(%rsp),%r10
4296	leaq	-128+32(%rsp),%rsi
4297	movq	16+32(%rsp),%r11
4298	movq	24+32(%rsp),%r12
4299	leaq	32(%rsp),%rdi
4300	call	__ecp_nistz256_mul_montx
4301
4302	movq	384(%rsp),%rdx
4303	leaq	384(%rsp),%rbx
4304	movq	0+64(%rsp),%r9
4305	movq	8+64(%rsp),%r10
4306	leaq	-128+64(%rsp),%rsi
4307	movq	16+64(%rsp),%r11
4308	movq	24+64(%rsp),%r12
4309	leaq	288(%rsp),%rdi
4310	call	__ecp_nistz256_mul_montx
4311
4312	movq	448(%rsp),%rdx
4313	leaq	448(%rsp),%rbx
4314	movq	0+32(%rsp),%r9
4315	movq	8+32(%rsp),%r10
4316	leaq	-128+32(%rsp),%rsi
4317	movq	16+32(%rsp),%r11
4318	movq	24+32(%rsp),%r12
4319	leaq	32(%rsp),%rdi
4320	call	__ecp_nistz256_mul_montx
4321
4322	leaq	352(%rsp),%rbx
4323	leaq	96(%rsp),%rdi
4324	call	__ecp_nistz256_sub_fromx
4325
4326	movq	0+64(%rsp),%rdx
4327	movq	8+64(%rsp),%r14
4328	leaq	-128+64(%rsp),%rsi
4329	movq	16+64(%rsp),%r15
4330	movq	24+64(%rsp),%r8
4331	leaq	128(%rsp),%rdi
4332	call	__ecp_nistz256_sqr_montx
4333
4334	movq	0+96(%rsp),%rdx
4335	movq	8+96(%rsp),%r14
4336	leaq	-128+96(%rsp),%rsi
4337	movq	16+96(%rsp),%r15
4338	movq	24+96(%rsp),%r8
4339	leaq	192(%rsp),%rdi
4340	call	__ecp_nistz256_sqr_montx
4341
4342	movq	128(%rsp),%rdx
4343	leaq	128(%rsp),%rbx
4344	movq	0+64(%rsp),%r9
4345	movq	8+64(%rsp),%r10
4346	leaq	-128+64(%rsp),%rsi
4347	movq	16+64(%rsp),%r11
4348	movq	24+64(%rsp),%r12
4349	leaq	160(%rsp),%rdi
4350	call	__ecp_nistz256_mul_montx
4351
4352	movq	320(%rsp),%rdx
4353	leaq	320(%rsp),%rbx
4354	movq	0+128(%rsp),%r9
4355	movq	8+128(%rsp),%r10
4356	leaq	-128+128(%rsp),%rsi
4357	movq	16+128(%rsp),%r11
4358	movq	24+128(%rsp),%r12
4359	leaq	0(%rsp),%rdi
4360	call	__ecp_nistz256_mul_montx
4361
4362
4363
4364
4365	xorq	%r11,%r11
4366	addq	%r12,%r12
4367	leaq	192(%rsp),%rsi
4368	adcq	%r13,%r13
4369	movq	%r12,%rax
4370	adcq	%r8,%r8
4371	adcq	%r9,%r9
4372	movq	%r13,%rbp
4373	adcq	$0,%r11
4374
4375	subq	$-1,%r12
4376	movq	%r8,%rcx
4377	sbbq	%r14,%r13
4378	sbbq	$0,%r8
4379	movq	%r9,%r10
4380	sbbq	%r15,%r9
4381	sbbq	$0,%r11
4382
4383	cmovcq	%rax,%r12
4384	movq	0(%rsi),%rax
4385	cmovcq	%rbp,%r13
4386	movq	8(%rsi),%rbp
4387	cmovcq	%rcx,%r8
4388	movq	16(%rsi),%rcx
4389	cmovcq	%r10,%r9
4390	movq	24(%rsi),%r10
4391
4392	call	__ecp_nistz256_subx
4393
4394	leaq	160(%rsp),%rbx
4395	leaq	224(%rsp),%rdi
4396	call	__ecp_nistz256_sub_fromx
4397
4398	movq	0+0(%rsp),%rax
4399	movq	0+8(%rsp),%rbp
4400	movq	0+16(%rsp),%rcx
4401	movq	0+24(%rsp),%r10
4402	leaq	64(%rsp),%rdi
4403
4404	call	__ecp_nistz256_subx
4405
4406	movq	%r12,0(%rdi)
4407	movq	%r13,8(%rdi)
4408	movq	%r8,16(%rdi)
4409	movq	%r9,24(%rdi)
4410	movq	352(%rsp),%rdx
4411	leaq	352(%rsp),%rbx
4412	movq	0+160(%rsp),%r9
4413	movq	8+160(%rsp),%r10
4414	leaq	-128+160(%rsp),%rsi
4415	movq	16+160(%rsp),%r11
4416	movq	24+160(%rsp),%r12
4417	leaq	32(%rsp),%rdi
4418	call	__ecp_nistz256_mul_montx
4419
4420	movq	96(%rsp),%rdx
4421	leaq	96(%rsp),%rbx
4422	movq	0+64(%rsp),%r9
4423	movq	8+64(%rsp),%r10
4424	leaq	-128+64(%rsp),%rsi
4425	movq	16+64(%rsp),%r11
4426	movq	24+64(%rsp),%r12
4427	leaq	64(%rsp),%rdi
4428	call	__ecp_nistz256_mul_montx
4429
4430	leaq	32(%rsp),%rbx
4431	leaq	256(%rsp),%rdi
4432	call	__ecp_nistz256_sub_fromx
4433
4434.byte	102,72,15,126,199
4435
4436	movdqa	%xmm5,%xmm0
4437	movdqa	%xmm5,%xmm1
4438	pandn	288(%rsp),%xmm0
4439	movdqa	%xmm5,%xmm2
4440	pandn	288+16(%rsp),%xmm1
4441	movdqa	%xmm5,%xmm3
4442	pand	.LONE_mont(%rip),%xmm2
4443	pand	.LONE_mont+16(%rip),%xmm3
4444	por	%xmm0,%xmm2
4445	por	%xmm1,%xmm3
4446
4447	movdqa	%xmm4,%xmm0
4448	movdqa	%xmm4,%xmm1
4449	pandn	%xmm2,%xmm0
4450	movdqa	%xmm4,%xmm2
4451	pandn	%xmm3,%xmm1
4452	movdqa	%xmm4,%xmm3
4453	pand	384(%rsp),%xmm2
4454	pand	384+16(%rsp),%xmm3
4455	por	%xmm0,%xmm2
4456	por	%xmm1,%xmm3
4457	movdqu	%xmm2,64(%rdi)
4458	movdqu	%xmm3,80(%rdi)
4459
4460	movdqa	%xmm5,%xmm0
4461	movdqa	%xmm5,%xmm1
4462	pandn	224(%rsp),%xmm0
4463	movdqa	%xmm5,%xmm2
4464	pandn	224+16(%rsp),%xmm1
4465	movdqa	%xmm5,%xmm3
4466	pand	416(%rsp),%xmm2
4467	pand	416+16(%rsp),%xmm3
4468	por	%xmm0,%xmm2
4469	por	%xmm1,%xmm3
4470
4471	movdqa	%xmm4,%xmm0
4472	movdqa	%xmm4,%xmm1
4473	pandn	%xmm2,%xmm0
4474	movdqa	%xmm4,%xmm2
4475	pandn	%xmm3,%xmm1
4476	movdqa	%xmm4,%xmm3
4477	pand	320(%rsp),%xmm2
4478	pand	320+16(%rsp),%xmm3
4479	por	%xmm0,%xmm2
4480	por	%xmm1,%xmm3
4481	movdqu	%xmm2,0(%rdi)
4482	movdqu	%xmm3,16(%rdi)
4483
4484	movdqa	%xmm5,%xmm0
4485	movdqa	%xmm5,%xmm1
4486	pandn	256(%rsp),%xmm0
4487	movdqa	%xmm5,%xmm2
4488	pandn	256+16(%rsp),%xmm1
4489	movdqa	%xmm5,%xmm3
4490	pand	448(%rsp),%xmm2
4491	pand	448+16(%rsp),%xmm3
4492	por	%xmm0,%xmm2
4493	por	%xmm1,%xmm3
4494
4495	movdqa	%xmm4,%xmm0
4496	movdqa	%xmm4,%xmm1
4497	pandn	%xmm2,%xmm0
4498	movdqa	%xmm4,%xmm2
4499	pandn	%xmm3,%xmm1
4500	movdqa	%xmm4,%xmm3
4501	pand	352(%rsp),%xmm2
4502	pand	352+16(%rsp),%xmm3
4503	por	%xmm0,%xmm2
4504	por	%xmm1,%xmm3
4505	movdqu	%xmm2,32(%rdi)
4506	movdqu	%xmm3,48(%rdi)
4507
4508	leaq	480+56(%rsp),%rsi
4509.cfi_def_cfa	%rsi,8
4510	movq	-48(%rsi),%r15
4511.cfi_restore	%r15
4512	movq	-40(%rsi),%r14
4513.cfi_restore	%r14
4514	movq	-32(%rsi),%r13
4515.cfi_restore	%r13
4516	movq	-24(%rsi),%r12
4517.cfi_restore	%r12
4518	movq	-16(%rsi),%rbx
4519.cfi_restore	%rbx
4520	movq	-8(%rsi),%rbp
4521.cfi_restore	%rbp
4522	leaq	(%rsi),%rsp
4523.cfi_def_cfa_register	%rsp
4524.Ladd_affinex_epilogue:
4525	.byte	0xf3,0xc3
4526.cfi_endproc
4527.size	ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
4528#endif
4529