• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8
9%include "ring_core_generated/prefix_symbols_nasm.inc"
10section	.text code align=64
11
12EXTERN	OPENSSL_ia32cap_P
13
14
15ALIGN	64
16$L$poly:
17	DQ	0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001
18
19$L$One:
20	DD	1,1,1,1,1,1,1,1
21$L$Two:
22	DD	2,2,2,2,2,2,2,2
23$L$Three:
24	DD	3,3,3,3,3,3,3,3
25$L$ONE_mont:
26	DQ	0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe
27
28
29$L$ord:
30	DQ	0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000
31$L$ordK:
32	DQ	0xccd1c8aaee00bc4f
33
34
35
36global	nistz256_neg
37
38ALIGN	32
39nistz256_neg:
40	mov	QWORD[8+rsp],rdi	;WIN64 prologue
41	mov	QWORD[16+rsp],rsi
42	mov	rax,rsp
43$L$SEH_begin_nistz256_neg:
44	mov	rdi,rcx
45	mov	rsi,rdx
46
47
48
49	push	r12
50
51	push	r13
52
53$L$neg_body:
54
55	xor	r8,r8
56	xor	r9,r9
57	xor	r10,r10
58	xor	r11,r11
59	xor	r13,r13
60
61	sub	r8,QWORD[rsi]
62	sbb	r9,QWORD[8+rsi]
63	sbb	r10,QWORD[16+rsi]
64	mov	rax,r8
65	sbb	r11,QWORD[24+rsi]
66	lea	rsi,[$L$poly]
67	mov	rdx,r9
68	sbb	r13,0
69
70	add	r8,QWORD[rsi]
71	mov	rcx,r10
72	adc	r9,QWORD[8+rsi]
73	adc	r10,QWORD[16+rsi]
74	mov	r12,r11
75	adc	r11,QWORD[24+rsi]
76	test	r13,r13
77
78	cmovz	r8,rax
79	cmovz	r9,rdx
80	mov	QWORD[rdi],r8
81	cmovz	r10,rcx
82	mov	QWORD[8+rdi],r9
83	cmovz	r11,r12
84	mov	QWORD[16+rdi],r10
85	mov	QWORD[24+rdi],r11
86
87	mov	r13,QWORD[rsp]
88
89	mov	r12,QWORD[8+rsp]
90
91	lea	rsp,[16+rsp]
92
93$L$neg_epilogue:
94	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
95	mov	rsi,QWORD[16+rsp]
96	DB	0F3h,0C3h		;repret
97
98$L$SEH_end_nistz256_neg:
99
100
101
102
103
104
105global	p256_scalar_mul_mont
106
107ALIGN	32
108p256_scalar_mul_mont:
109	mov	QWORD[8+rsp],rdi	;WIN64 prologue
110	mov	QWORD[16+rsp],rsi
111	mov	rax,rsp
112$L$SEH_begin_p256_scalar_mul_mont:
113	mov	rdi,rcx
114	mov	rsi,rdx
115	mov	rdx,r8
116
117
118
119	lea	rcx,[OPENSSL_ia32cap_P]
120	mov	rcx,QWORD[8+rcx]
121	and	ecx,0x80100
122	cmp	ecx,0x80100
123	je	NEAR $L$ecp_nistz256_ord_mul_montx
124	push	rbp
125
126	push	rbx
127
128	push	r12
129
130	push	r13
131
132	push	r14
133
134	push	r15
135
136$L$ord_mul_body:
137
138	mov	rax,QWORD[rdx]
139	mov	rbx,rdx
140	lea	r14,[$L$ord]
141	mov	r15,QWORD[$L$ordK]
142
143
144	mov	rcx,rax
145	mul	QWORD[rsi]
146	mov	r8,rax
147	mov	rax,rcx
148	mov	r9,rdx
149
150	mul	QWORD[8+rsi]
151	add	r9,rax
152	mov	rax,rcx
153	adc	rdx,0
154	mov	r10,rdx
155
156	mul	QWORD[16+rsi]
157	add	r10,rax
158	mov	rax,rcx
159	adc	rdx,0
160
161	mov	r13,r8
162	imul	r8,r15
163
164	mov	r11,rdx
165	mul	QWORD[24+rsi]
166	add	r11,rax
167	mov	rax,r8
168	adc	rdx,0
169	mov	r12,rdx
170
171
172	mul	QWORD[r14]
173	mov	rbp,r8
174	add	r13,rax
175	mov	rax,r8
176	adc	rdx,0
177	mov	rcx,rdx
178
179	sub	r10,r8
180	sbb	r8,0
181
182	mul	QWORD[8+r14]
183	add	r9,rcx
184	adc	rdx,0
185	add	r9,rax
186	mov	rax,rbp
187	adc	r10,rdx
188	mov	rdx,rbp
189	adc	r8,0
190
191	shl	rax,32
192	shr	rdx,32
193	sub	r11,rax
194	mov	rax,QWORD[8+rbx]
195	sbb	rbp,rdx
196
197	add	r11,r8
198	adc	r12,rbp
199	adc	r13,0
200
201
202	mov	rcx,rax
203	mul	QWORD[rsi]
204	add	r9,rax
205	mov	rax,rcx
206	adc	rdx,0
207	mov	rbp,rdx
208
209	mul	QWORD[8+rsi]
210	add	r10,rbp
211	adc	rdx,0
212	add	r10,rax
213	mov	rax,rcx
214	adc	rdx,0
215	mov	rbp,rdx
216
217	mul	QWORD[16+rsi]
218	add	r11,rbp
219	adc	rdx,0
220	add	r11,rax
221	mov	rax,rcx
222	adc	rdx,0
223
224	mov	rcx,r9
225	imul	r9,r15
226
227	mov	rbp,rdx
228	mul	QWORD[24+rsi]
229	add	r12,rbp
230	adc	rdx,0
231	xor	r8,r8
232	add	r12,rax
233	mov	rax,r9
234	adc	r13,rdx
235	adc	r8,0
236
237
238	mul	QWORD[r14]
239	mov	rbp,r9
240	add	rcx,rax
241	mov	rax,r9
242	adc	rcx,rdx
243
244	sub	r11,r9
245	sbb	r9,0
246
247	mul	QWORD[8+r14]
248	add	r10,rcx
249	adc	rdx,0
250	add	r10,rax
251	mov	rax,rbp
252	adc	r11,rdx
253	mov	rdx,rbp
254	adc	r9,0
255
256	shl	rax,32
257	shr	rdx,32
258	sub	r12,rax
259	mov	rax,QWORD[16+rbx]
260	sbb	rbp,rdx
261
262	add	r12,r9
263	adc	r13,rbp
264	adc	r8,0
265
266
267	mov	rcx,rax
268	mul	QWORD[rsi]
269	add	r10,rax
270	mov	rax,rcx
271	adc	rdx,0
272	mov	rbp,rdx
273
274	mul	QWORD[8+rsi]
275	add	r11,rbp
276	adc	rdx,0
277	add	r11,rax
278	mov	rax,rcx
279	adc	rdx,0
280	mov	rbp,rdx
281
282	mul	QWORD[16+rsi]
283	add	r12,rbp
284	adc	rdx,0
285	add	r12,rax
286	mov	rax,rcx
287	adc	rdx,0
288
289	mov	rcx,r10
290	imul	r10,r15
291
292	mov	rbp,rdx
293	mul	QWORD[24+rsi]
294	add	r13,rbp
295	adc	rdx,0
296	xor	r9,r9
297	add	r13,rax
298	mov	rax,r10
299	adc	r8,rdx
300	adc	r9,0
301
302
303	mul	QWORD[r14]
304	mov	rbp,r10
305	add	rcx,rax
306	mov	rax,r10
307	adc	rcx,rdx
308
309	sub	r12,r10
310	sbb	r10,0
311
312	mul	QWORD[8+r14]
313	add	r11,rcx
314	adc	rdx,0
315	add	r11,rax
316	mov	rax,rbp
317	adc	r12,rdx
318	mov	rdx,rbp
319	adc	r10,0
320
321	shl	rax,32
322	shr	rdx,32
323	sub	r13,rax
324	mov	rax,QWORD[24+rbx]
325	sbb	rbp,rdx
326
327	add	r13,r10
328	adc	r8,rbp
329	adc	r9,0
330
331
332	mov	rcx,rax
333	mul	QWORD[rsi]
334	add	r11,rax
335	mov	rax,rcx
336	adc	rdx,0
337	mov	rbp,rdx
338
339	mul	QWORD[8+rsi]
340	add	r12,rbp
341	adc	rdx,0
342	add	r12,rax
343	mov	rax,rcx
344	adc	rdx,0
345	mov	rbp,rdx
346
347	mul	QWORD[16+rsi]
348	add	r13,rbp
349	adc	rdx,0
350	add	r13,rax
351	mov	rax,rcx
352	adc	rdx,0
353
354	mov	rcx,r11
355	imul	r11,r15
356
357	mov	rbp,rdx
358	mul	QWORD[24+rsi]
359	add	r8,rbp
360	adc	rdx,0
361	xor	r10,r10
362	add	r8,rax
363	mov	rax,r11
364	adc	r9,rdx
365	adc	r10,0
366
367
368	mul	QWORD[r14]
369	mov	rbp,r11
370	add	rcx,rax
371	mov	rax,r11
372	adc	rcx,rdx
373
374	sub	r13,r11
375	sbb	r11,0
376
377	mul	QWORD[8+r14]
378	add	r12,rcx
379	adc	rdx,0
380	add	r12,rax
381	mov	rax,rbp
382	adc	r13,rdx
383	mov	rdx,rbp
384	adc	r11,0
385
386	shl	rax,32
387	shr	rdx,32
388	sub	r8,rax
389	sbb	rbp,rdx
390
391	add	r8,r11
392	adc	r9,rbp
393	adc	r10,0
394
395
396	mov	rsi,r12
397	sub	r12,QWORD[r14]
398	mov	r11,r13
399	sbb	r13,QWORD[8+r14]
400	mov	rcx,r8
401	sbb	r8,QWORD[16+r14]
402	mov	rbp,r9
403	sbb	r9,QWORD[24+r14]
404	sbb	r10,0
405
406	cmovc	r12,rsi
407	cmovc	r13,r11
408	cmovc	r8,rcx
409	cmovc	r9,rbp
410
411	mov	QWORD[rdi],r12
412	mov	QWORD[8+rdi],r13
413	mov	QWORD[16+rdi],r8
414	mov	QWORD[24+rdi],r9
415
416	mov	r15,QWORD[rsp]
417
418	mov	r14,QWORD[8+rsp]
419
420	mov	r13,QWORD[16+rsp]
421
422	mov	r12,QWORD[24+rsp]
423
424	mov	rbx,QWORD[32+rsp]
425
426	mov	rbp,QWORD[40+rsp]
427
428	lea	rsp,[48+rsp]
429
430$L$ord_mul_epilogue:
431	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
432	mov	rsi,QWORD[16+rsp]
433	DB	0F3h,0C3h		;repret
434
435$L$SEH_end_p256_scalar_mul_mont:
436
437
438
439
440
441
442
443global	p256_scalar_sqr_rep_mont
444
445ALIGN	32
446p256_scalar_sqr_rep_mont:
447	mov	QWORD[8+rsp],rdi	;WIN64 prologue
448	mov	QWORD[16+rsp],rsi
449	mov	rax,rsp
450$L$SEH_begin_p256_scalar_sqr_rep_mont:
451	mov	rdi,rcx
452	mov	rsi,rdx
453	mov	rdx,r8
454
455
456
457	lea	rcx,[OPENSSL_ia32cap_P]
458	mov	rcx,QWORD[8+rcx]
459	and	ecx,0x80100
460	cmp	ecx,0x80100
461	je	NEAR $L$ecp_nistz256_ord_sqr_montx
462	push	rbp
463
464	push	rbx
465
466	push	r12
467
468	push	r13
469
470	push	r14
471
472	push	r15
473
474$L$ord_sqr_body:
475
476	mov	r8,QWORD[rsi]
477	mov	rax,QWORD[8+rsi]
478	mov	r14,QWORD[16+rsi]
479	mov	r15,QWORD[24+rsi]
480	lea	rsi,[$L$ord]
481	mov	rbx,rdx
482	jmp	NEAR $L$oop_ord_sqr
483
484ALIGN	32
485$L$oop_ord_sqr:
486
487	mov	rbp,rax
488	mul	r8
489	mov	r9,rax
490DB	102,72,15,110,205
491	mov	rax,r14
492	mov	r10,rdx
493
494	mul	r8
495	add	r10,rax
496	mov	rax,r15
497DB	102,73,15,110,214
498	adc	rdx,0
499	mov	r11,rdx
500
501	mul	r8
502	add	r11,rax
503	mov	rax,r15
504DB	102,73,15,110,223
505	adc	rdx,0
506	mov	r12,rdx
507
508
509	mul	r14
510	mov	r13,rax
511	mov	rax,r14
512	mov	r14,rdx
513
514
515	mul	rbp
516	add	r11,rax
517	mov	rax,r15
518	adc	rdx,0
519	mov	r15,rdx
520
521	mul	rbp
522	add	r12,rax
523	adc	rdx,0
524
525	add	r12,r15
526	adc	r13,rdx
527	adc	r14,0
528
529
530	xor	r15,r15
531	mov	rax,r8
532	add	r9,r9
533	adc	r10,r10
534	adc	r11,r11
535	adc	r12,r12
536	adc	r13,r13
537	adc	r14,r14
538	adc	r15,0
539
540
541	mul	rax
542	mov	r8,rax
543DB	102,72,15,126,200
544	mov	rbp,rdx
545
546	mul	rax
547	add	r9,rbp
548	adc	r10,rax
549DB	102,72,15,126,208
550	adc	rdx,0
551	mov	rbp,rdx
552
553	mul	rax
554	add	r11,rbp
555	adc	r12,rax
556DB	102,72,15,126,216
557	adc	rdx,0
558	mov	rbp,rdx
559
560	mov	rcx,r8
561	imul	r8,QWORD[32+rsi]
562
563	mul	rax
564	add	r13,rbp
565	adc	r14,rax
566	mov	rax,QWORD[rsi]
567	adc	r15,rdx
568
569
570	mul	r8
571	mov	rbp,r8
572	add	rcx,rax
573	mov	rax,QWORD[8+rsi]
574	adc	rcx,rdx
575
576	sub	r10,r8
577	sbb	rbp,0
578
579	mul	r8
580	add	r9,rcx
581	adc	rdx,0
582	add	r9,rax
583	mov	rax,r8
584	adc	r10,rdx
585	mov	rdx,r8
586	adc	rbp,0
587
588	mov	rcx,r9
589	imul	r9,QWORD[32+rsi]
590
591	shl	rax,32
592	shr	rdx,32
593	sub	r11,rax
594	mov	rax,QWORD[rsi]
595	sbb	r8,rdx
596
597	add	r11,rbp
598	adc	r8,0
599
600
601	mul	r9
602	mov	rbp,r9
603	add	rcx,rax
604	mov	rax,QWORD[8+rsi]
605	adc	rcx,rdx
606
607	sub	r11,r9
608	sbb	rbp,0
609
610	mul	r9
611	add	r10,rcx
612	adc	rdx,0
613	add	r10,rax
614	mov	rax,r9
615	adc	r11,rdx
616	mov	rdx,r9
617	adc	rbp,0
618
619	mov	rcx,r10
620	imul	r10,QWORD[32+rsi]
621
622	shl	rax,32
623	shr	rdx,32
624	sub	r8,rax
625	mov	rax,QWORD[rsi]
626	sbb	r9,rdx
627
628	add	r8,rbp
629	adc	r9,0
630
631
632	mul	r10
633	mov	rbp,r10
634	add	rcx,rax
635	mov	rax,QWORD[8+rsi]
636	adc	rcx,rdx
637
638	sub	r8,r10
639	sbb	rbp,0
640
641	mul	r10
642	add	r11,rcx
643	adc	rdx,0
644	add	r11,rax
645	mov	rax,r10
646	adc	r8,rdx
647	mov	rdx,r10
648	adc	rbp,0
649
650	mov	rcx,r11
651	imul	r11,QWORD[32+rsi]
652
653	shl	rax,32
654	shr	rdx,32
655	sub	r9,rax
656	mov	rax,QWORD[rsi]
657	sbb	r10,rdx
658
659	add	r9,rbp
660	adc	r10,0
661
662
663	mul	r11
664	mov	rbp,r11
665	add	rcx,rax
666	mov	rax,QWORD[8+rsi]
667	adc	rcx,rdx
668
669	sub	r9,r11
670	sbb	rbp,0
671
672	mul	r11
673	add	r8,rcx
674	adc	rdx,0
675	add	r8,rax
676	mov	rax,r11
677	adc	r9,rdx
678	mov	rdx,r11
679	adc	rbp,0
680
681	shl	rax,32
682	shr	rdx,32
683	sub	r10,rax
684	sbb	r11,rdx
685
686	add	r10,rbp
687	adc	r11,0
688
689
690	xor	rdx,rdx
691	add	r8,r12
692	adc	r9,r13
693	mov	r12,r8
694	adc	r10,r14
695	adc	r11,r15
696	mov	rax,r9
697	adc	rdx,0
698
699
700	sub	r8,QWORD[rsi]
701	mov	r14,r10
702	sbb	r9,QWORD[8+rsi]
703	sbb	r10,QWORD[16+rsi]
704	mov	r15,r11
705	sbb	r11,QWORD[24+rsi]
706	sbb	rdx,0
707
708	cmovc	r8,r12
709	cmovnc	rax,r9
710	cmovnc	r14,r10
711	cmovnc	r15,r11
712
713	dec	rbx
714	jnz	NEAR $L$oop_ord_sqr
715
716	mov	QWORD[rdi],r8
717	mov	QWORD[8+rdi],rax
718	pxor	xmm1,xmm1
719	mov	QWORD[16+rdi],r14
720	pxor	xmm2,xmm2
721	mov	QWORD[24+rdi],r15
722	pxor	xmm3,xmm3
723
724	mov	r15,QWORD[rsp]
725
726	mov	r14,QWORD[8+rsp]
727
728	mov	r13,QWORD[16+rsp]
729
730	mov	r12,QWORD[24+rsp]
731
732	mov	rbx,QWORD[32+rsp]
733
734	mov	rbp,QWORD[40+rsp]
735
736	lea	rsp,[48+rsp]
737
738$L$ord_sqr_epilogue:
739	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
740	mov	rsi,QWORD[16+rsp]
741	DB	0F3h,0C3h		;repret
742
743$L$SEH_end_p256_scalar_sqr_rep_mont:
744
745
746ALIGN	32
747ecp_nistz256_ord_mul_montx:
748	mov	QWORD[8+rsp],rdi	;WIN64 prologue
749	mov	QWORD[16+rsp],rsi
750	mov	rax,rsp
751$L$SEH_begin_ecp_nistz256_ord_mul_montx:
752	mov	rdi,rcx
753	mov	rsi,rdx
754	mov	rdx,r8
755
756
757
758$L$ecp_nistz256_ord_mul_montx:
759	push	rbp
760
761	push	rbx
762
763	push	r12
764
765	push	r13
766
767	push	r14
768
769	push	r15
770
771$L$ord_mulx_body:
772
773	mov	rbx,rdx
774	mov	rdx,QWORD[rdx]
775	mov	r9,QWORD[rsi]
776	mov	r10,QWORD[8+rsi]
777	mov	r11,QWORD[16+rsi]
778	mov	r12,QWORD[24+rsi]
779	lea	rsi,[((-128))+rsi]
780	lea	r14,[(($L$ord-128))]
781	mov	r15,QWORD[$L$ordK]
782
783
784	mulx	r9,r8,r9
785	mulx	r10,rcx,r10
786	mulx	r11,rbp,r11
787	add	r9,rcx
788	mulx	r12,rcx,r12
789	mov	rdx,r8
790	mulx	rax,rdx,r15
791	adc	r10,rbp
792	adc	r11,rcx
793	adc	r12,0
794
795
796	xor	r13,r13
797	mulx	rbp,rcx,QWORD[((0+128))+r14]
798	adcx	r8,rcx
799	adox	r9,rbp
800
801	mulx	rbp,rcx,QWORD[((8+128))+r14]
802	adcx	r9,rcx
803	adox	r10,rbp
804
805	mulx	rbp,rcx,QWORD[((16+128))+r14]
806	adcx	r10,rcx
807	adox	r11,rbp
808
809	mulx	rbp,rcx,QWORD[((24+128))+r14]
810	mov	rdx,QWORD[8+rbx]
811	adcx	r11,rcx
812	adox	r12,rbp
813	adcx	r12,r8
814	adox	r13,r8
815	adc	r13,0
816
817
818	mulx	rbp,rcx,QWORD[((0+128))+rsi]
819	adcx	r9,rcx
820	adox	r10,rbp
821
822	mulx	rbp,rcx,QWORD[((8+128))+rsi]
823	adcx	r10,rcx
824	adox	r11,rbp
825
826	mulx	rbp,rcx,QWORD[((16+128))+rsi]
827	adcx	r11,rcx
828	adox	r12,rbp
829
830	mulx	rbp,rcx,QWORD[((24+128))+rsi]
831	mov	rdx,r9
832	mulx	rax,rdx,r15
833	adcx	r12,rcx
834	adox	r13,rbp
835
836	adcx	r13,r8
837	adox	r8,r8
838	adc	r8,0
839
840
841	mulx	rbp,rcx,QWORD[((0+128))+r14]
842	adcx	r9,rcx
843	adox	r10,rbp
844
845	mulx	rbp,rcx,QWORD[((8+128))+r14]
846	adcx	r10,rcx
847	adox	r11,rbp
848
849	mulx	rbp,rcx,QWORD[((16+128))+r14]
850	adcx	r11,rcx
851	adox	r12,rbp
852
853	mulx	rbp,rcx,QWORD[((24+128))+r14]
854	mov	rdx,QWORD[16+rbx]
855	adcx	r12,rcx
856	adox	r13,rbp
857	adcx	r13,r9
858	adox	r8,r9
859	adc	r8,0
860
861
862	mulx	rbp,rcx,QWORD[((0+128))+rsi]
863	adcx	r10,rcx
864	adox	r11,rbp
865
866	mulx	rbp,rcx,QWORD[((8+128))+rsi]
867	adcx	r11,rcx
868	adox	r12,rbp
869
870	mulx	rbp,rcx,QWORD[((16+128))+rsi]
871	adcx	r12,rcx
872	adox	r13,rbp
873
874	mulx	rbp,rcx,QWORD[((24+128))+rsi]
875	mov	rdx,r10
876	mulx	rax,rdx,r15
877	adcx	r13,rcx
878	adox	r8,rbp
879
880	adcx	r8,r9
881	adox	r9,r9
882	adc	r9,0
883
884
885	mulx	rbp,rcx,QWORD[((0+128))+r14]
886	adcx	r10,rcx
887	adox	r11,rbp
888
889	mulx	rbp,rcx,QWORD[((8+128))+r14]
890	adcx	r11,rcx
891	adox	r12,rbp
892
893	mulx	rbp,rcx,QWORD[((16+128))+r14]
894	adcx	r12,rcx
895	adox	r13,rbp
896
897	mulx	rbp,rcx,QWORD[((24+128))+r14]
898	mov	rdx,QWORD[24+rbx]
899	adcx	r13,rcx
900	adox	r8,rbp
901	adcx	r8,r10
902	adox	r9,r10
903	adc	r9,0
904
905
906	mulx	rbp,rcx,QWORD[((0+128))+rsi]
907	adcx	r11,rcx
908	adox	r12,rbp
909
910	mulx	rbp,rcx,QWORD[((8+128))+rsi]
911	adcx	r12,rcx
912	adox	r13,rbp
913
914	mulx	rbp,rcx,QWORD[((16+128))+rsi]
915	adcx	r13,rcx
916	adox	r8,rbp
917
918	mulx	rbp,rcx,QWORD[((24+128))+rsi]
919	mov	rdx,r11
920	mulx	rax,rdx,r15
921	adcx	r8,rcx
922	adox	r9,rbp
923
924	adcx	r9,r10
925	adox	r10,r10
926	adc	r10,0
927
928
929	mulx	rbp,rcx,QWORD[((0+128))+r14]
930	adcx	r11,rcx
931	adox	r12,rbp
932
933	mulx	rbp,rcx,QWORD[((8+128))+r14]
934	adcx	r12,rcx
935	adox	r13,rbp
936
937	mulx	rbp,rcx,QWORD[((16+128))+r14]
938	adcx	r13,rcx
939	adox	r8,rbp
940
941	mulx	rbp,rcx,QWORD[((24+128))+r14]
942	lea	r14,[128+r14]
943	mov	rbx,r12
944	adcx	r8,rcx
945	adox	r9,rbp
946	mov	rdx,r13
947	adcx	r9,r11
948	adox	r10,r11
949	adc	r10,0
950
951
952
953	mov	rcx,r8
954	sub	r12,QWORD[r14]
955	sbb	r13,QWORD[8+r14]
956	sbb	r8,QWORD[16+r14]
957	mov	rbp,r9
958	sbb	r9,QWORD[24+r14]
959	sbb	r10,0
960
961	cmovc	r12,rbx
962	cmovc	r13,rdx
963	cmovc	r8,rcx
964	cmovc	r9,rbp
965
966	mov	QWORD[rdi],r12
967	mov	QWORD[8+rdi],r13
968	mov	QWORD[16+rdi],r8
969	mov	QWORD[24+rdi],r9
970
971	mov	r15,QWORD[rsp]
972
973	mov	r14,QWORD[8+rsp]
974
975	mov	r13,QWORD[16+rsp]
976
977	mov	r12,QWORD[24+rsp]
978
979	mov	rbx,QWORD[32+rsp]
980
981	mov	rbp,QWORD[40+rsp]
982
983	lea	rsp,[48+rsp]
984
985$L$ord_mulx_epilogue:
986	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
987	mov	rsi,QWORD[16+rsp]
988	DB	0F3h,0C3h		;repret
989
990$L$SEH_end_ecp_nistz256_ord_mul_montx:
991
992
993ALIGN	32
994ecp_nistz256_ord_sqr_montx:
995	mov	QWORD[8+rsp],rdi	;WIN64 prologue
996	mov	QWORD[16+rsp],rsi
997	mov	rax,rsp
998$L$SEH_begin_ecp_nistz256_ord_sqr_montx:
999	mov	rdi,rcx
1000	mov	rsi,rdx
1001	mov	rdx,r8
1002
1003
1004
1005$L$ecp_nistz256_ord_sqr_montx:
1006	push	rbp
1007
1008	push	rbx
1009
1010	push	r12
1011
1012	push	r13
1013
1014	push	r14
1015
1016	push	r15
1017
1018$L$ord_sqrx_body:
1019
1020	mov	rbx,rdx
1021	mov	rdx,QWORD[rsi]
1022	mov	r14,QWORD[8+rsi]
1023	mov	r15,QWORD[16+rsi]
1024	mov	r8,QWORD[24+rsi]
1025	lea	rsi,[$L$ord]
1026	jmp	NEAR $L$oop_ord_sqrx
1027
1028ALIGN	32
1029$L$oop_ord_sqrx:
1030	mulx	r10,r9,r14
1031	mulx	r11,rcx,r15
1032	mov	rax,rdx
1033DB	102,73,15,110,206
1034	mulx	r12,rbp,r8
1035	mov	rdx,r14
1036	add	r10,rcx
1037DB	102,73,15,110,215
1038	adc	r11,rbp
1039	adc	r12,0
1040	xor	r13,r13
1041
1042	mulx	rbp,rcx,r15
1043	adcx	r11,rcx
1044	adox	r12,rbp
1045
1046	mulx	rbp,rcx,r8
1047	mov	rdx,r15
1048	adcx	r12,rcx
1049	adox	r13,rbp
1050	adc	r13,0
1051
1052	mulx	r14,rcx,r8
1053	mov	rdx,rax
1054DB	102,73,15,110,216
1055	xor	r15,r15
1056	adcx	r9,r9
1057	adox	r13,rcx
1058	adcx	r10,r10
1059	adox	r14,r15
1060
1061
1062	mulx	rbp,r8,rdx
1063DB	102,72,15,126,202
1064	adcx	r11,r11
1065	adox	r9,rbp
1066	adcx	r12,r12
1067	mulx	rax,rcx,rdx
1068DB	102,72,15,126,210
1069	adcx	r13,r13
1070	adox	r10,rcx
1071	adcx	r14,r14
1072	mulx	rbp,rcx,rdx
1073DB	0x67
1074DB	102,72,15,126,218
1075	adox	r11,rax
1076	adcx	r15,r15
1077	adox	r12,rcx
1078	adox	r13,rbp
1079	mulx	rax,rcx,rdx
1080	adox	r14,rcx
1081	adox	r15,rax
1082
1083
1084	mov	rdx,r8
1085	mulx	rcx,rdx,QWORD[32+rsi]
1086
1087	xor	rax,rax
1088	mulx	rbp,rcx,QWORD[rsi]
1089	adcx	r8,rcx
1090	adox	r9,rbp
1091	mulx	rbp,rcx,QWORD[8+rsi]
1092	adcx	r9,rcx
1093	adox	r10,rbp
1094	mulx	rbp,rcx,QWORD[16+rsi]
1095	adcx	r10,rcx
1096	adox	r11,rbp
1097	mulx	rbp,rcx,QWORD[24+rsi]
1098	adcx	r11,rcx
1099	adox	r8,rbp
1100	adcx	r8,rax
1101
1102
1103	mov	rdx,r9
1104	mulx	rcx,rdx,QWORD[32+rsi]
1105
1106	mulx	rbp,rcx,QWORD[rsi]
1107	adox	r9,rcx
1108	adcx	r10,rbp
1109	mulx	rbp,rcx,QWORD[8+rsi]
1110	adox	r10,rcx
1111	adcx	r11,rbp
1112	mulx	rbp,rcx,QWORD[16+rsi]
1113	adox	r11,rcx
1114	adcx	r8,rbp
1115	mulx	rbp,rcx,QWORD[24+rsi]
1116	adox	r8,rcx
1117	adcx	r9,rbp
1118	adox	r9,rax
1119
1120
1121	mov	rdx,r10
1122	mulx	rcx,rdx,QWORD[32+rsi]
1123
1124	mulx	rbp,rcx,QWORD[rsi]
1125	adcx	r10,rcx
1126	adox	r11,rbp
1127	mulx	rbp,rcx,QWORD[8+rsi]
1128	adcx	r11,rcx
1129	adox	r8,rbp
1130	mulx	rbp,rcx,QWORD[16+rsi]
1131	adcx	r8,rcx
1132	adox	r9,rbp
1133	mulx	rbp,rcx,QWORD[24+rsi]
1134	adcx	r9,rcx
1135	adox	r10,rbp
1136	adcx	r10,rax
1137
1138
1139	mov	rdx,r11
1140	mulx	rcx,rdx,QWORD[32+rsi]
1141
1142	mulx	rbp,rcx,QWORD[rsi]
1143	adox	r11,rcx
1144	adcx	r8,rbp
1145	mulx	rbp,rcx,QWORD[8+rsi]
1146	adox	r8,rcx
1147	adcx	r9,rbp
1148	mulx	rbp,rcx,QWORD[16+rsi]
1149	adox	r9,rcx
1150	adcx	r10,rbp
1151	mulx	rbp,rcx,QWORD[24+rsi]
1152	adox	r10,rcx
1153	adcx	r11,rbp
1154	adox	r11,rax
1155
1156
1157	add	r12,r8
1158	adc	r9,r13
1159	mov	rdx,r12
1160	adc	r10,r14
1161	adc	r11,r15
1162	mov	r14,r9
1163	adc	rax,0
1164
1165
1166	sub	r12,QWORD[rsi]
1167	mov	r15,r10
1168	sbb	r9,QWORD[8+rsi]
1169	sbb	r10,QWORD[16+rsi]
1170	mov	r8,r11
1171	sbb	r11,QWORD[24+rsi]
1172	sbb	rax,0
1173
1174	cmovnc	rdx,r12
1175	cmovnc	r14,r9
1176	cmovnc	r15,r10
1177	cmovnc	r8,r11
1178
1179	dec	rbx
1180	jnz	NEAR $L$oop_ord_sqrx
1181
1182	mov	QWORD[rdi],rdx
1183	mov	QWORD[8+rdi],r14
1184	pxor	xmm1,xmm1
1185	mov	QWORD[16+rdi],r15
1186	pxor	xmm2,xmm2
1187	mov	QWORD[24+rdi],r8
1188	pxor	xmm3,xmm3
1189
1190	mov	r15,QWORD[rsp]
1191
1192	mov	r14,QWORD[8+rsp]
1193
1194	mov	r13,QWORD[16+rsp]
1195
1196	mov	r12,QWORD[24+rsp]
1197
1198	mov	rbx,QWORD[32+rsp]
1199
1200	mov	rbp,QWORD[40+rsp]
1201
1202	lea	rsp,[48+rsp]
1203
1204$L$ord_sqrx_epilogue:
1205	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1206	mov	rsi,QWORD[16+rsp]
1207	DB	0F3h,0C3h		;repret
1208
1209$L$SEH_end_ecp_nistz256_ord_sqr_montx:
1210
1211
1212
1213
1214
1215
1216global	p256_mul_mont
1217
1218ALIGN	32
1219p256_mul_mont:
1220	mov	QWORD[8+rsp],rdi	;WIN64 prologue
1221	mov	QWORD[16+rsp],rsi
1222	mov	rax,rsp
1223$L$SEH_begin_p256_mul_mont:
1224	mov	rdi,rcx
1225	mov	rsi,rdx
1226	mov	rdx,r8
1227
1228
1229
1230	lea	rcx,[OPENSSL_ia32cap_P]
1231	mov	rcx,QWORD[8+rcx]
1232	and	ecx,0x80100
1233$L$mul_mont:
1234	push	rbp
1235
1236	push	rbx
1237
1238	push	r12
1239
1240	push	r13
1241
1242	push	r14
1243
1244	push	r15
1245
1246$L$mul_body:
1247	cmp	ecx,0x80100
1248	je	NEAR $L$mul_montx
1249	mov	rbx,rdx
1250	mov	rax,QWORD[rdx]
1251	mov	r9,QWORD[rsi]
1252	mov	r10,QWORD[8+rsi]
1253	mov	r11,QWORD[16+rsi]
1254	mov	r12,QWORD[24+rsi]
1255
1256	call	__ecp_nistz256_mul_montq
1257	jmp	NEAR $L$mul_mont_done
1258
1259ALIGN	32
1260$L$mul_montx:
1261	mov	rbx,rdx
1262	mov	rdx,QWORD[rdx]
1263	mov	r9,QWORD[rsi]
1264	mov	r10,QWORD[8+rsi]
1265	mov	r11,QWORD[16+rsi]
1266	mov	r12,QWORD[24+rsi]
1267	lea	rsi,[((-128))+rsi]
1268
1269	call	__ecp_nistz256_mul_montx
1270$L$mul_mont_done:
1271	mov	r15,QWORD[rsp]
1272
1273	mov	r14,QWORD[8+rsp]
1274
1275	mov	r13,QWORD[16+rsp]
1276
1277	mov	r12,QWORD[24+rsp]
1278
1279	mov	rbx,QWORD[32+rsp]
1280
1281	mov	rbp,QWORD[40+rsp]
1282
1283	lea	rsp,[48+rsp]
1284
1285$L$mul_epilogue:
1286	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1287	mov	rsi,QWORD[16+rsp]
1288	DB	0F3h,0C3h		;repret
1289
1290$L$SEH_end_p256_mul_mont:
1291
1292
1293ALIGN	32
1294__ecp_nistz256_mul_montq:
1295
1296
1297
1298	mov	rbp,rax
1299	mul	r9
1300	mov	r14,QWORD[(($L$poly+8))]
1301	mov	r8,rax
1302	mov	rax,rbp
1303	mov	r9,rdx
1304
1305	mul	r10
1306	mov	r15,QWORD[(($L$poly+24))]
1307	add	r9,rax
1308	mov	rax,rbp
1309	adc	rdx,0
1310	mov	r10,rdx
1311
1312	mul	r11
1313	add	r10,rax
1314	mov	rax,rbp
1315	adc	rdx,0
1316	mov	r11,rdx
1317
1318	mul	r12
1319	add	r11,rax
1320	mov	rax,r8
1321	adc	rdx,0
1322	xor	r13,r13
1323	mov	r12,rdx
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334	mov	rbp,r8
1335	shl	r8,32
1336	mul	r15
1337	shr	rbp,32
1338	add	r9,r8
1339	adc	r10,rbp
1340	adc	r11,rax
1341	mov	rax,QWORD[8+rbx]
1342	adc	r12,rdx
1343	adc	r13,0
1344	xor	r8,r8
1345
1346
1347
1348	mov	rbp,rax
1349	mul	QWORD[rsi]
1350	add	r9,rax
1351	mov	rax,rbp
1352	adc	rdx,0
1353	mov	rcx,rdx
1354
1355	mul	QWORD[8+rsi]
1356	add	r10,rcx
1357	adc	rdx,0
1358	add	r10,rax
1359	mov	rax,rbp
1360	adc	rdx,0
1361	mov	rcx,rdx
1362
1363	mul	QWORD[16+rsi]
1364	add	r11,rcx
1365	adc	rdx,0
1366	add	r11,rax
1367	mov	rax,rbp
1368	adc	rdx,0
1369	mov	rcx,rdx
1370
1371	mul	QWORD[24+rsi]
1372	add	r12,rcx
1373	adc	rdx,0
1374	add	r12,rax
1375	mov	rax,r9
1376	adc	r13,rdx
1377	adc	r8,0
1378
1379
1380
1381	mov	rbp,r9
1382	shl	r9,32
1383	mul	r15
1384	shr	rbp,32
1385	add	r10,r9
1386	adc	r11,rbp
1387	adc	r12,rax
1388	mov	rax,QWORD[16+rbx]
1389	adc	r13,rdx
1390	adc	r8,0
1391	xor	r9,r9
1392
1393
1394
1395	mov	rbp,rax
1396	mul	QWORD[rsi]
1397	add	r10,rax
1398	mov	rax,rbp
1399	adc	rdx,0
1400	mov	rcx,rdx
1401
1402	mul	QWORD[8+rsi]
1403	add	r11,rcx
1404	adc	rdx,0
1405	add	r11,rax
1406	mov	rax,rbp
1407	adc	rdx,0
1408	mov	rcx,rdx
1409
1410	mul	QWORD[16+rsi]
1411	add	r12,rcx
1412	adc	rdx,0
1413	add	r12,rax
1414	mov	rax,rbp
1415	adc	rdx,0
1416	mov	rcx,rdx
1417
1418	mul	QWORD[24+rsi]
1419	add	r13,rcx
1420	adc	rdx,0
1421	add	r13,rax
1422	mov	rax,r10
1423	adc	r8,rdx
1424	adc	r9,0
1425
1426
1427
1428	mov	rbp,r10
1429	shl	r10,32
1430	mul	r15
1431	shr	rbp,32
1432	add	r11,r10
1433	adc	r12,rbp
1434	adc	r13,rax
1435	mov	rax,QWORD[24+rbx]
1436	adc	r8,rdx
1437	adc	r9,0
1438	xor	r10,r10
1439
1440
1441
1442	mov	rbp,rax
1443	mul	QWORD[rsi]
1444	add	r11,rax
1445	mov	rax,rbp
1446	adc	rdx,0
1447	mov	rcx,rdx
1448
1449	mul	QWORD[8+rsi]
1450	add	r12,rcx
1451	adc	rdx,0
1452	add	r12,rax
1453	mov	rax,rbp
1454	adc	rdx,0
1455	mov	rcx,rdx
1456
1457	mul	QWORD[16+rsi]
1458	add	r13,rcx
1459	adc	rdx,0
1460	add	r13,rax
1461	mov	rax,rbp
1462	adc	rdx,0
1463	mov	rcx,rdx
1464
1465	mul	QWORD[24+rsi]
1466	add	r8,rcx
1467	adc	rdx,0
1468	add	r8,rax
1469	mov	rax,r11
1470	adc	r9,rdx
1471	adc	r10,0
1472
1473
1474
1475	mov	rbp,r11
1476	shl	r11,32
1477	mul	r15
1478	shr	rbp,32
1479	add	r12,r11
1480	adc	r13,rbp
1481	mov	rcx,r12
1482	adc	r8,rax
1483	adc	r9,rdx
1484	mov	rbp,r13
1485	adc	r10,0
1486
1487
1488
1489	sub	r12,-1
1490	mov	rbx,r8
1491	sbb	r13,r14
1492	sbb	r8,0
1493	mov	rdx,r9
1494	sbb	r9,r15
1495	sbb	r10,0
1496
1497	cmovc	r12,rcx
1498	cmovc	r13,rbp
1499	mov	QWORD[rdi],r12
1500	cmovc	r8,rbx
1501	mov	QWORD[8+rdi],r13
1502	cmovc	r9,rdx
1503	mov	QWORD[16+rdi],r8
1504	mov	QWORD[24+rdi],r9
1505
1506	DB	0F3h,0C3h		;repret
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517global	p256_sqr_mont
1518
1519ALIGN	32
1520p256_sqr_mont:
1521	mov	QWORD[8+rsp],rdi	;WIN64 prologue
1522	mov	QWORD[16+rsp],rsi
1523	mov	rax,rsp
1524$L$SEH_begin_p256_sqr_mont:
1525	mov	rdi,rcx
1526	mov	rsi,rdx
1527
1528
1529
1530	lea	rcx,[OPENSSL_ia32cap_P]
1531	mov	rcx,QWORD[8+rcx]
1532	and	ecx,0x80100
1533	push	rbp
1534
1535	push	rbx
1536
1537	push	r12
1538
1539	push	r13
1540
1541	push	r14
1542
1543	push	r15
1544
1545$L$sqr_body:
1546	cmp	ecx,0x80100
1547	je	NEAR $L$sqr_montx
1548	mov	rax,QWORD[rsi]
1549	mov	r14,QWORD[8+rsi]
1550	mov	r15,QWORD[16+rsi]
1551	mov	r8,QWORD[24+rsi]
1552
1553	call	__ecp_nistz256_sqr_montq
1554	jmp	NEAR $L$sqr_mont_done
1555
1556ALIGN	32
1557$L$sqr_montx:
1558	mov	rdx,QWORD[rsi]
1559	mov	r14,QWORD[8+rsi]
1560	mov	r15,QWORD[16+rsi]
1561	mov	r8,QWORD[24+rsi]
1562	lea	rsi,[((-128))+rsi]
1563
1564	call	__ecp_nistz256_sqr_montx
1565$L$sqr_mont_done:
1566	mov	r15,QWORD[rsp]
1567
1568	mov	r14,QWORD[8+rsp]
1569
1570	mov	r13,QWORD[16+rsp]
1571
1572	mov	r12,QWORD[24+rsp]
1573
1574	mov	rbx,QWORD[32+rsp]
1575
1576	mov	rbp,QWORD[40+rsp]
1577
1578	lea	rsp,[48+rsp]
1579
1580$L$sqr_epilogue:
1581	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1582	mov	rsi,QWORD[16+rsp]
1583	DB	0F3h,0C3h		;repret
1584
1585$L$SEH_end_p256_sqr_mont:
1586
1587
1588ALIGN	32
1589__ecp_nistz256_sqr_montq:
1590
1591	mov	r13,rax
1592	mul	r14
1593	mov	r9,rax
1594	mov	rax,r15
1595	mov	r10,rdx
1596
1597	mul	r13
1598	add	r10,rax
1599	mov	rax,r8
1600	adc	rdx,0
1601	mov	r11,rdx
1602
1603	mul	r13
1604	add	r11,rax
1605	mov	rax,r15
1606	adc	rdx,0
1607	mov	r12,rdx
1608
1609
1610	mul	r14
1611	add	r11,rax
1612	mov	rax,r8
1613	adc	rdx,0
1614	mov	rbp,rdx
1615
1616	mul	r14
1617	add	r12,rax
1618	mov	rax,r8
1619	adc	rdx,0
1620	add	r12,rbp
1621	mov	r13,rdx
1622	adc	r13,0
1623
1624
1625	mul	r15
1626	xor	r15,r15
1627	add	r13,rax
1628	mov	rax,QWORD[rsi]
1629	mov	r14,rdx
1630	adc	r14,0
1631
1632	add	r9,r9
1633	adc	r10,r10
1634	adc	r11,r11
1635	adc	r12,r12
1636	adc	r13,r13
1637	adc	r14,r14
1638	adc	r15,0
1639
1640	mul	rax
1641	mov	r8,rax
1642	mov	rax,QWORD[8+rsi]
1643	mov	rcx,rdx
1644
1645	mul	rax
1646	add	r9,rcx
1647	adc	r10,rax
1648	mov	rax,QWORD[16+rsi]
1649	adc	rdx,0
1650	mov	rcx,rdx
1651
1652	mul	rax
1653	add	r11,rcx
1654	adc	r12,rax
1655	mov	rax,QWORD[24+rsi]
1656	adc	rdx,0
1657	mov	rcx,rdx
1658
1659	mul	rax
1660	add	r13,rcx
1661	adc	r14,rax
1662	mov	rax,r8
1663	adc	r15,rdx
1664
1665	mov	rsi,QWORD[(($L$poly+8))]
1666	mov	rbp,QWORD[(($L$poly+24))]
1667
1668
1669
1670
1671	mov	rcx,r8
1672	shl	r8,32
1673	mul	rbp
1674	shr	rcx,32
1675	add	r9,r8
1676	adc	r10,rcx
1677	adc	r11,rax
1678	mov	rax,r9
1679	adc	rdx,0
1680
1681
1682
1683	mov	rcx,r9
1684	shl	r9,32
1685	mov	r8,rdx
1686	mul	rbp
1687	shr	rcx,32
1688	add	r10,r9
1689	adc	r11,rcx
1690	adc	r8,rax
1691	mov	rax,r10
1692	adc	rdx,0
1693
1694
1695
1696	mov	rcx,r10
1697	shl	r10,32
1698	mov	r9,rdx
1699	mul	rbp
1700	shr	rcx,32
1701	add	r11,r10
1702	adc	r8,rcx
1703	adc	r9,rax
1704	mov	rax,r11
1705	adc	rdx,0
1706
1707
1708
1709	mov	rcx,r11
1710	shl	r11,32
1711	mov	r10,rdx
1712	mul	rbp
1713	shr	rcx,32
1714	add	r8,r11
1715	adc	r9,rcx
1716	adc	r10,rax
1717	adc	rdx,0
1718	xor	r11,r11
1719
1720
1721
1722	add	r12,r8
1723	adc	r13,r9
1724	mov	r8,r12
1725	adc	r14,r10
1726	adc	r15,rdx
1727	mov	r9,r13
1728	adc	r11,0
1729
1730	sub	r12,-1
1731	mov	r10,r14
1732	sbb	r13,rsi
1733	sbb	r14,0
1734	mov	rcx,r15
1735	sbb	r15,rbp
1736	sbb	r11,0
1737
1738	cmovc	r12,r8
1739	cmovc	r13,r9
1740	mov	QWORD[rdi],r12
1741	cmovc	r14,r10
1742	mov	QWORD[8+rdi],r13
1743	cmovc	r15,rcx
1744	mov	QWORD[16+rdi],r14
1745	mov	QWORD[24+rdi],r15
1746
1747	DB	0F3h,0C3h		;repret
1748
1749
1750
1751ALIGN	32
1752__ecp_nistz256_mul_montx:
1753
1754
1755
1756	mulx	r9,r8,r9
1757	mulx	r10,rcx,r10
1758	mov	r14,32
1759	xor	r13,r13
1760	mulx	r11,rbp,r11
1761	mov	r15,QWORD[(($L$poly+24))]
1762	adc	r9,rcx
1763	mulx	r12,rcx,r12
1764	mov	rdx,r8
1765	adc	r10,rbp
1766	shlx	rbp,r8,r14
1767	adc	r11,rcx
1768	shrx	rcx,r8,r14
1769	adc	r12,0
1770
1771
1772
1773	add	r9,rbp
1774	adc	r10,rcx
1775
1776	mulx	rbp,rcx,r15
1777	mov	rdx,QWORD[8+rbx]
1778	adc	r11,rcx
1779	adc	r12,rbp
1780	adc	r13,0
1781	xor	r8,r8
1782
1783
1784
1785	mulx	rbp,rcx,QWORD[((0+128))+rsi]
1786	adcx	r9,rcx
1787	adox	r10,rbp
1788
1789	mulx	rbp,rcx,QWORD[((8+128))+rsi]
1790	adcx	r10,rcx
1791	adox	r11,rbp
1792
1793	mulx	rbp,rcx,QWORD[((16+128))+rsi]
1794	adcx	r11,rcx
1795	adox	r12,rbp
1796
1797	mulx	rbp,rcx,QWORD[((24+128))+rsi]
1798	mov	rdx,r9
1799	adcx	r12,rcx
1800	shlx	rcx,r9,r14
1801	adox	r13,rbp
1802	shrx	rbp,r9,r14
1803
1804	adcx	r13,r8
1805	adox	r8,r8
1806	adc	r8,0
1807
1808
1809
1810	add	r10,rcx
1811	adc	r11,rbp
1812
1813	mulx	rbp,rcx,r15
1814	mov	rdx,QWORD[16+rbx]
1815	adc	r12,rcx
1816	adc	r13,rbp
1817	adc	r8,0
1818	xor	r9,r9
1819
1820
1821
1822	mulx	rbp,rcx,QWORD[((0+128))+rsi]
1823	adcx	r10,rcx
1824	adox	r11,rbp
1825
1826	mulx	rbp,rcx,QWORD[((8+128))+rsi]
1827	adcx	r11,rcx
1828	adox	r12,rbp
1829
1830	mulx	rbp,rcx,QWORD[((16+128))+rsi]
1831	adcx	r12,rcx
1832	adox	r13,rbp
1833
1834	mulx	rbp,rcx,QWORD[((24+128))+rsi]
1835	mov	rdx,r10
1836	adcx	r13,rcx
1837	shlx	rcx,r10,r14
1838	adox	r8,rbp
1839	shrx	rbp,r10,r14
1840
1841	adcx	r8,r9
1842	adox	r9,r9
1843	adc	r9,0
1844
1845
1846
1847	add	r11,rcx
1848	adc	r12,rbp
1849
1850	mulx	rbp,rcx,r15
1851	mov	rdx,QWORD[24+rbx]
1852	adc	r13,rcx
1853	adc	r8,rbp
1854	adc	r9,0
1855	xor	r10,r10
1856
1857
1858
1859	mulx	rbp,rcx,QWORD[((0+128))+rsi]
1860	adcx	r11,rcx
1861	adox	r12,rbp
1862
1863	mulx	rbp,rcx,QWORD[((8+128))+rsi]
1864	adcx	r12,rcx
1865	adox	r13,rbp
1866
1867	mulx	rbp,rcx,QWORD[((16+128))+rsi]
1868	adcx	r13,rcx
1869	adox	r8,rbp
1870
1871	mulx	rbp,rcx,QWORD[((24+128))+rsi]
1872	mov	rdx,r11
1873	adcx	r8,rcx
1874	shlx	rcx,r11,r14
1875	adox	r9,rbp
1876	shrx	rbp,r11,r14
1877
1878	adcx	r9,r10
1879	adox	r10,r10
1880	adc	r10,0
1881
1882
1883
1884	add	r12,rcx
1885	adc	r13,rbp
1886
1887	mulx	rbp,rcx,r15
1888	mov	rbx,r12
1889	mov	r14,QWORD[(($L$poly+8))]
1890	adc	r8,rcx
1891	mov	rdx,r13
1892	adc	r9,rbp
1893	adc	r10,0
1894
1895
1896
1897	xor	eax,eax
1898	mov	rcx,r8
1899	sbb	r12,-1
1900	sbb	r13,r14
1901	sbb	r8,0
1902	mov	rbp,r9
1903	sbb	r9,r15
1904	sbb	r10,0
1905
1906	cmovc	r12,rbx
1907	cmovc	r13,rdx
1908	mov	QWORD[rdi],r12
1909	cmovc	r8,rcx
1910	mov	QWORD[8+rdi],r13
1911	cmovc	r9,rbp
1912	mov	QWORD[16+rdi],r8
1913	mov	QWORD[24+rdi],r9
1914
1915	DB	0F3h,0C3h		;repret
1916
1917
1918
1919
1920ALIGN	32
1921__ecp_nistz256_sqr_montx:
1922
1923	mulx	r10,r9,r14
1924	mulx	r11,rcx,r15
1925	xor	eax,eax
1926	adc	r10,rcx
1927	mulx	r12,rbp,r8
1928	mov	rdx,r14
1929	adc	r11,rbp
1930	adc	r12,0
1931	xor	r13,r13
1932
1933
1934	mulx	rbp,rcx,r15
1935	adcx	r11,rcx
1936	adox	r12,rbp
1937
1938	mulx	rbp,rcx,r8
1939	mov	rdx,r15
1940	adcx	r12,rcx
1941	adox	r13,rbp
1942	adc	r13,0
1943
1944
1945	mulx	r14,rcx,r8
1946	mov	rdx,QWORD[((0+128))+rsi]
1947	xor	r15,r15
1948	adcx	r9,r9
1949	adox	r13,rcx
1950	adcx	r10,r10
1951	adox	r14,r15
1952
1953	mulx	rbp,r8,rdx
1954	mov	rdx,QWORD[((8+128))+rsi]
1955	adcx	r11,r11
1956	adox	r9,rbp
1957	adcx	r12,r12
1958	mulx	rax,rcx,rdx
1959	mov	rdx,QWORD[((16+128))+rsi]
1960	adcx	r13,r13
1961	adox	r10,rcx
1962	adcx	r14,r14
1963DB	0x67
1964	mulx	rbp,rcx,rdx
1965	mov	rdx,QWORD[((24+128))+rsi]
1966	adox	r11,rax
1967	adcx	r15,r15
1968	adox	r12,rcx
1969	mov	rsi,32
1970	adox	r13,rbp
1971DB	0x67,0x67
1972	mulx	rax,rcx,rdx
1973	mov	rdx,QWORD[(($L$poly+24))]
1974	adox	r14,rcx
1975	shlx	rcx,r8,rsi
1976	adox	r15,rax
1977	shrx	rax,r8,rsi
1978	mov	rbp,rdx
1979
1980
1981	add	r9,rcx
1982	adc	r10,rax
1983
1984	mulx	r8,rcx,r8
1985	adc	r11,rcx
1986	shlx	rcx,r9,rsi
1987	adc	r8,0
1988	shrx	rax,r9,rsi
1989
1990
1991	add	r10,rcx
1992	adc	r11,rax
1993
1994	mulx	r9,rcx,r9
1995	adc	r8,rcx
1996	shlx	rcx,r10,rsi
1997	adc	r9,0
1998	shrx	rax,r10,rsi
1999
2000
2001	add	r11,rcx
2002	adc	r8,rax
2003
2004	mulx	r10,rcx,r10
2005	adc	r9,rcx
2006	shlx	rcx,r11,rsi
2007	adc	r10,0
2008	shrx	rax,r11,rsi
2009
2010
2011	add	r8,rcx
2012	adc	r9,rax
2013
2014	mulx	r11,rcx,r11
2015	adc	r10,rcx
2016	adc	r11,0
2017
2018	xor	rdx,rdx
2019	add	r12,r8
2020	mov	rsi,QWORD[(($L$poly+8))]
2021	adc	r13,r9
2022	mov	r8,r12
2023	adc	r14,r10
2024	adc	r15,r11
2025	mov	r9,r13
2026	adc	rdx,0
2027
2028	sub	r12,-1
2029	mov	r10,r14
2030	sbb	r13,rsi
2031	sbb	r14,0
2032	mov	r11,r15
2033	sbb	r15,rbp
2034	sbb	rdx,0
2035
2036	cmovc	r12,r8
2037	cmovc	r13,r9
2038	mov	QWORD[rdi],r12
2039	cmovc	r14,r10
2040	mov	QWORD[8+rdi],r13
2041	cmovc	r15,r11
2042	mov	QWORD[16+rdi],r14
2043	mov	QWORD[24+rdi],r15
2044
2045	DB	0F3h,0C3h		;repret
2046
2047
2048
2049
2050global	nistz256_select_w5
2051
2052ALIGN	32
2053nistz256_select_w5:
2054
2055	lea	rax,[OPENSSL_ia32cap_P]
2056	mov	rax,QWORD[8+rax]
2057	test	eax,32
2058	jnz	NEAR $L$avx2_select_w5
2059	lea	rax,[((-136))+rsp]
2060$L$SEH_begin_nistz256_select_w5:
2061DB	0x48,0x8d,0x60,0xe0
2062DB	0x0f,0x29,0x70,0xe0
2063DB	0x0f,0x29,0x78,0xf0
2064DB	0x44,0x0f,0x29,0x00
2065DB	0x44,0x0f,0x29,0x48,0x10
2066DB	0x44,0x0f,0x29,0x50,0x20
2067DB	0x44,0x0f,0x29,0x58,0x30
2068DB	0x44,0x0f,0x29,0x60,0x40
2069DB	0x44,0x0f,0x29,0x68,0x50
2070DB	0x44,0x0f,0x29,0x70,0x60
2071DB	0x44,0x0f,0x29,0x78,0x70
2072	movdqa	xmm0,XMMWORD[$L$One]
2073	movd	xmm1,r8d
2074
2075	pxor	xmm2,xmm2
2076	pxor	xmm3,xmm3
2077	pxor	xmm4,xmm4
2078	pxor	xmm5,xmm5
2079	pxor	xmm6,xmm6
2080	pxor	xmm7,xmm7
2081
2082	movdqa	xmm8,xmm0
2083	pshufd	xmm1,xmm1,0
2084
2085	mov	rax,16
2086$L$select_loop_sse_w5:
2087
2088	movdqa	xmm15,xmm8
2089	paddd	xmm8,xmm0
2090	pcmpeqd	xmm15,xmm1
2091
2092	movdqa	xmm9,XMMWORD[rdx]
2093	movdqa	xmm10,XMMWORD[16+rdx]
2094	movdqa	xmm11,XMMWORD[32+rdx]
2095	movdqa	xmm12,XMMWORD[48+rdx]
2096	movdqa	xmm13,XMMWORD[64+rdx]
2097	movdqa	xmm14,XMMWORD[80+rdx]
2098	lea	rdx,[96+rdx]
2099
2100	pand	xmm9,xmm15
2101	pand	xmm10,xmm15
2102	por	xmm2,xmm9
2103	pand	xmm11,xmm15
2104	por	xmm3,xmm10
2105	pand	xmm12,xmm15
2106	por	xmm4,xmm11
2107	pand	xmm13,xmm15
2108	por	xmm5,xmm12
2109	pand	xmm14,xmm15
2110	por	xmm6,xmm13
2111	por	xmm7,xmm14
2112
2113	dec	rax
2114	jnz	NEAR $L$select_loop_sse_w5
2115
2116	movdqu	XMMWORD[rcx],xmm2
2117	movdqu	XMMWORD[16+rcx],xmm3
2118	movdqu	XMMWORD[32+rcx],xmm4
2119	movdqu	XMMWORD[48+rcx],xmm5
2120	movdqu	XMMWORD[64+rcx],xmm6
2121	movdqu	XMMWORD[80+rcx],xmm7
2122	movaps	xmm6,XMMWORD[rsp]
2123	movaps	xmm7,XMMWORD[16+rsp]
2124	movaps	xmm8,XMMWORD[32+rsp]
2125	movaps	xmm9,XMMWORD[48+rsp]
2126	movaps	xmm10,XMMWORD[64+rsp]
2127	movaps	xmm11,XMMWORD[80+rsp]
2128	movaps	xmm12,XMMWORD[96+rsp]
2129	movaps	xmm13,XMMWORD[112+rsp]
2130	movaps	xmm14,XMMWORD[128+rsp]
2131	movaps	xmm15,XMMWORD[144+rsp]
2132	lea	rsp,[168+rsp]
2133	DB	0F3h,0C3h		;repret
2134
2135$L$SEH_end_nistz256_select_w5:
2136
2137
2138
2139
2140global	nistz256_select_w7
2141
2142ALIGN	32
2143nistz256_select_w7:
2144
2145	lea	rax,[OPENSSL_ia32cap_P]
2146	mov	rax,QWORD[8+rax]
2147	test	eax,32
2148	jnz	NEAR $L$avx2_select_w7
2149	lea	rax,[((-136))+rsp]
2150$L$SEH_begin_nistz256_select_w7:
2151DB	0x48,0x8d,0x60,0xe0
2152DB	0x0f,0x29,0x70,0xe0
2153DB	0x0f,0x29,0x78,0xf0
2154DB	0x44,0x0f,0x29,0x00
2155DB	0x44,0x0f,0x29,0x48,0x10
2156DB	0x44,0x0f,0x29,0x50,0x20
2157DB	0x44,0x0f,0x29,0x58,0x30
2158DB	0x44,0x0f,0x29,0x60,0x40
2159DB	0x44,0x0f,0x29,0x68,0x50
2160DB	0x44,0x0f,0x29,0x70,0x60
2161DB	0x44,0x0f,0x29,0x78,0x70
2162	movdqa	xmm8,XMMWORD[$L$One]
2163	movd	xmm1,r8d
2164
2165	pxor	xmm2,xmm2
2166	pxor	xmm3,xmm3
2167	pxor	xmm4,xmm4
2168	pxor	xmm5,xmm5
2169
2170	movdqa	xmm0,xmm8
2171	pshufd	xmm1,xmm1,0
2172	mov	rax,64
2173
2174$L$select_loop_sse_w7:
2175	movdqa	xmm15,xmm8
2176	paddd	xmm8,xmm0
2177	movdqa	xmm9,XMMWORD[rdx]
2178	movdqa	xmm10,XMMWORD[16+rdx]
2179	pcmpeqd	xmm15,xmm1
2180	movdqa	xmm11,XMMWORD[32+rdx]
2181	movdqa	xmm12,XMMWORD[48+rdx]
2182	lea	rdx,[64+rdx]
2183
2184	pand	xmm9,xmm15
2185	pand	xmm10,xmm15
2186	por	xmm2,xmm9
2187	pand	xmm11,xmm15
2188	por	xmm3,xmm10
2189	pand	xmm12,xmm15
2190	por	xmm4,xmm11
2191	prefetcht0	[255+rdx]
2192	por	xmm5,xmm12
2193
2194	dec	rax
2195	jnz	NEAR $L$select_loop_sse_w7
2196
2197	movdqu	XMMWORD[rcx],xmm2
2198	movdqu	XMMWORD[16+rcx],xmm3
2199	movdqu	XMMWORD[32+rcx],xmm4
2200	movdqu	XMMWORD[48+rcx],xmm5
2201	movaps	xmm6,XMMWORD[rsp]
2202	movaps	xmm7,XMMWORD[16+rsp]
2203	movaps	xmm8,XMMWORD[32+rsp]
2204	movaps	xmm9,XMMWORD[48+rsp]
2205	movaps	xmm10,XMMWORD[64+rsp]
2206	movaps	xmm11,XMMWORD[80+rsp]
2207	movaps	xmm12,XMMWORD[96+rsp]
2208	movaps	xmm13,XMMWORD[112+rsp]
2209	movaps	xmm14,XMMWORD[128+rsp]
2210	movaps	xmm15,XMMWORD[144+rsp]
2211	lea	rsp,[168+rsp]
2212	DB	0F3h,0C3h		;repret
2213
2214$L$SEH_end_nistz256_select_w7:
2215
2216
2217
2218
2219ALIGN	32
2220ecp_nistz256_avx2_select_w5:
2221
2222$L$avx2_select_w5:
2223	vzeroupper
2224	lea	rax,[((-136))+rsp]
2225	mov	r11,rsp
2226$L$SEH_begin_ecp_nistz256_avx2_select_w5:
2227DB	0x48,0x8d,0x60,0xe0
2228DB	0xc5,0xf8,0x29,0x70,0xe0
2229DB	0xc5,0xf8,0x29,0x78,0xf0
2230DB	0xc5,0x78,0x29,0x40,0x00
2231DB	0xc5,0x78,0x29,0x48,0x10
2232DB	0xc5,0x78,0x29,0x50,0x20
2233DB	0xc5,0x78,0x29,0x58,0x30
2234DB	0xc5,0x78,0x29,0x60,0x40
2235DB	0xc5,0x78,0x29,0x68,0x50
2236DB	0xc5,0x78,0x29,0x70,0x60
2237DB	0xc5,0x78,0x29,0x78,0x70
2238	vmovdqa	ymm0,YMMWORD[$L$Two]
2239
2240	vpxor	ymm2,ymm2,ymm2
2241	vpxor	ymm3,ymm3,ymm3
2242	vpxor	ymm4,ymm4,ymm4
2243
2244	vmovdqa	ymm5,YMMWORD[$L$One]
2245	vmovdqa	ymm10,YMMWORD[$L$Two]
2246
2247	vmovd	xmm1,r8d
2248	vpermd	ymm1,ymm2,ymm1
2249
2250	mov	rax,8
2251$L$select_loop_avx2_w5:
2252
2253	vmovdqa	ymm6,YMMWORD[rdx]
2254	vmovdqa	ymm7,YMMWORD[32+rdx]
2255	vmovdqa	ymm8,YMMWORD[64+rdx]
2256
2257	vmovdqa	ymm11,YMMWORD[96+rdx]
2258	vmovdqa	ymm12,YMMWORD[128+rdx]
2259	vmovdqa	ymm13,YMMWORD[160+rdx]
2260
2261	vpcmpeqd	ymm9,ymm5,ymm1
2262	vpcmpeqd	ymm14,ymm10,ymm1
2263
2264	vpaddd	ymm5,ymm5,ymm0
2265	vpaddd	ymm10,ymm10,ymm0
2266	lea	rdx,[192+rdx]
2267
2268	vpand	ymm6,ymm6,ymm9
2269	vpand	ymm7,ymm7,ymm9
2270	vpand	ymm8,ymm8,ymm9
2271	vpand	ymm11,ymm11,ymm14
2272	vpand	ymm12,ymm12,ymm14
2273	vpand	ymm13,ymm13,ymm14
2274
2275	vpxor	ymm2,ymm2,ymm6
2276	vpxor	ymm3,ymm3,ymm7
2277	vpxor	ymm4,ymm4,ymm8
2278	vpxor	ymm2,ymm2,ymm11
2279	vpxor	ymm3,ymm3,ymm12
2280	vpxor	ymm4,ymm4,ymm13
2281
2282	dec	rax
2283	jnz	NEAR $L$select_loop_avx2_w5
2284
2285	vmovdqu	YMMWORD[rcx],ymm2
2286	vmovdqu	YMMWORD[32+rcx],ymm3
2287	vmovdqu	YMMWORD[64+rcx],ymm4
2288	vzeroupper
2289	movaps	xmm6,XMMWORD[rsp]
2290	movaps	xmm7,XMMWORD[16+rsp]
2291	movaps	xmm8,XMMWORD[32+rsp]
2292	movaps	xmm9,XMMWORD[48+rsp]
2293	movaps	xmm10,XMMWORD[64+rsp]
2294	movaps	xmm11,XMMWORD[80+rsp]
2295	movaps	xmm12,XMMWORD[96+rsp]
2296	movaps	xmm13,XMMWORD[112+rsp]
2297	movaps	xmm14,XMMWORD[128+rsp]
2298	movaps	xmm15,XMMWORD[144+rsp]
2299	lea	rsp,[r11]
2300	DB	0F3h,0C3h		;repret
2301
2302$L$SEH_end_ecp_nistz256_avx2_select_w5:
2303
2304
2305
2306
2307
2308ALIGN	32
2309ecp_nistz256_avx2_select_w7:
2310
2311$L$avx2_select_w7:
2312	vzeroupper
2313	mov	r11,rsp
2314	lea	rax,[((-136))+rsp]
2315$L$SEH_begin_ecp_nistz256_avx2_select_w7:
2316DB	0x48,0x8d,0x60,0xe0
2317DB	0xc5,0xf8,0x29,0x70,0xe0
2318DB	0xc5,0xf8,0x29,0x78,0xf0
2319DB	0xc5,0x78,0x29,0x40,0x00
2320DB	0xc5,0x78,0x29,0x48,0x10
2321DB	0xc5,0x78,0x29,0x50,0x20
2322DB	0xc5,0x78,0x29,0x58,0x30
2323DB	0xc5,0x78,0x29,0x60,0x40
2324DB	0xc5,0x78,0x29,0x68,0x50
2325DB	0xc5,0x78,0x29,0x70,0x60
2326DB	0xc5,0x78,0x29,0x78,0x70
2327	vmovdqa	ymm0,YMMWORD[$L$Three]
2328
2329	vpxor	ymm2,ymm2,ymm2
2330	vpxor	ymm3,ymm3,ymm3
2331
2332	vmovdqa	ymm4,YMMWORD[$L$One]
2333	vmovdqa	ymm8,YMMWORD[$L$Two]
2334	vmovdqa	ymm12,YMMWORD[$L$Three]
2335
2336	vmovd	xmm1,r8d
2337	vpermd	ymm1,ymm2,ymm1
2338
2339
2340	mov	rax,21
2341$L$select_loop_avx2_w7:
2342
2343	vmovdqa	ymm5,YMMWORD[rdx]
2344	vmovdqa	ymm6,YMMWORD[32+rdx]
2345
2346	vmovdqa	ymm9,YMMWORD[64+rdx]
2347	vmovdqa	ymm10,YMMWORD[96+rdx]
2348
2349	vmovdqa	ymm13,YMMWORD[128+rdx]
2350	vmovdqa	ymm14,YMMWORD[160+rdx]
2351
2352	vpcmpeqd	ymm7,ymm4,ymm1
2353	vpcmpeqd	ymm11,ymm8,ymm1
2354	vpcmpeqd	ymm15,ymm12,ymm1
2355
2356	vpaddd	ymm4,ymm4,ymm0
2357	vpaddd	ymm8,ymm8,ymm0
2358	vpaddd	ymm12,ymm12,ymm0
2359	lea	rdx,[192+rdx]
2360
2361	vpand	ymm5,ymm5,ymm7
2362	vpand	ymm6,ymm6,ymm7
2363	vpand	ymm9,ymm9,ymm11
2364	vpand	ymm10,ymm10,ymm11
2365	vpand	ymm13,ymm13,ymm15
2366	vpand	ymm14,ymm14,ymm15
2367
2368	vpxor	ymm2,ymm2,ymm5
2369	vpxor	ymm3,ymm3,ymm6
2370	vpxor	ymm2,ymm2,ymm9
2371	vpxor	ymm3,ymm3,ymm10
2372	vpxor	ymm2,ymm2,ymm13
2373	vpxor	ymm3,ymm3,ymm14
2374
2375	dec	rax
2376	jnz	NEAR $L$select_loop_avx2_w7
2377
2378
2379	vmovdqa	ymm5,YMMWORD[rdx]
2380	vmovdqa	ymm6,YMMWORD[32+rdx]
2381
2382	vpcmpeqd	ymm7,ymm4,ymm1
2383
2384	vpand	ymm5,ymm5,ymm7
2385	vpand	ymm6,ymm6,ymm7
2386
2387	vpxor	ymm2,ymm2,ymm5
2388	vpxor	ymm3,ymm3,ymm6
2389
2390	vmovdqu	YMMWORD[rcx],ymm2
2391	vmovdqu	YMMWORD[32+rcx],ymm3
2392	vzeroupper
2393	movaps	xmm6,XMMWORD[rsp]
2394	movaps	xmm7,XMMWORD[16+rsp]
2395	movaps	xmm8,XMMWORD[32+rsp]
2396	movaps	xmm9,XMMWORD[48+rsp]
2397	movaps	xmm10,XMMWORD[64+rsp]
2398	movaps	xmm11,XMMWORD[80+rsp]
2399	movaps	xmm12,XMMWORD[96+rsp]
2400	movaps	xmm13,XMMWORD[112+rsp]
2401	movaps	xmm14,XMMWORD[128+rsp]
2402	movaps	xmm15,XMMWORD[144+rsp]
2403	lea	rsp,[r11]
2404	DB	0F3h,0C3h		;repret
2405
2406$L$SEH_end_ecp_nistz256_avx2_select_w7:
2407
2408
2409ALIGN	32
2410__ecp_nistz256_add_toq:
2411
2412	xor	r11,r11
2413	add	r12,QWORD[rbx]
2414	adc	r13,QWORD[8+rbx]
2415	mov	rax,r12
2416	adc	r8,QWORD[16+rbx]
2417	adc	r9,QWORD[24+rbx]
2418	mov	rbp,r13
2419	adc	r11,0
2420
2421	sub	r12,-1
2422	mov	rcx,r8
2423	sbb	r13,r14
2424	sbb	r8,0
2425	mov	r10,r9
2426	sbb	r9,r15
2427	sbb	r11,0
2428
2429	cmovc	r12,rax
2430	cmovc	r13,rbp
2431	mov	QWORD[rdi],r12
2432	cmovc	r8,rcx
2433	mov	QWORD[8+rdi],r13
2434	cmovc	r9,r10
2435	mov	QWORD[16+rdi],r8
2436	mov	QWORD[24+rdi],r9
2437
2438	DB	0F3h,0C3h		;repret
2439
2440
2441
2442
2443ALIGN	32
2444__ecp_nistz256_sub_fromq:
2445
2446	sub	r12,QWORD[rbx]
2447	sbb	r13,QWORD[8+rbx]
2448	mov	rax,r12
2449	sbb	r8,QWORD[16+rbx]
2450	sbb	r9,QWORD[24+rbx]
2451	mov	rbp,r13
2452	sbb	r11,r11
2453
2454	add	r12,-1
2455	mov	rcx,r8
2456	adc	r13,r14
2457	adc	r8,0
2458	mov	r10,r9
2459	adc	r9,r15
2460	test	r11,r11
2461
2462	cmovz	r12,rax
2463	cmovz	r13,rbp
2464	mov	QWORD[rdi],r12
2465	cmovz	r8,rcx
2466	mov	QWORD[8+rdi],r13
2467	cmovz	r9,r10
2468	mov	QWORD[16+rdi],r8
2469	mov	QWORD[24+rdi],r9
2470
2471	DB	0F3h,0C3h		;repret
2472
2473
2474
2475
2476ALIGN	32
2477__ecp_nistz256_subq:
2478
2479	sub	rax,r12
2480	sbb	rbp,r13
2481	mov	r12,rax
2482	sbb	rcx,r8
2483	sbb	r10,r9
2484	mov	r13,rbp
2485	sbb	r11,r11
2486
2487	add	rax,-1
2488	mov	r8,rcx
2489	adc	rbp,r14
2490	adc	rcx,0
2491	mov	r9,r10
2492	adc	r10,r15
2493	test	r11,r11
2494
2495	cmovnz	r12,rax
2496	cmovnz	r13,rbp
2497	cmovnz	r8,rcx
2498	cmovnz	r9,r10
2499
2500	DB	0F3h,0C3h		;repret
2501
2502
2503
2504
2505ALIGN	32
2506__ecp_nistz256_mul_by_2q:
2507
2508	xor	r11,r11
2509	add	r12,r12
2510	adc	r13,r13
2511	mov	rax,r12
2512	adc	r8,r8
2513	adc	r9,r9
2514	mov	rbp,r13
2515	adc	r11,0
2516
2517	sub	r12,-1
2518	mov	rcx,r8
2519	sbb	r13,r14
2520	sbb	r8,0
2521	mov	r10,r9
2522	sbb	r9,r15
2523	sbb	r11,0
2524
2525	cmovc	r12,rax
2526	cmovc	r13,rbp
2527	mov	QWORD[rdi],r12
2528	cmovc	r8,rcx
2529	mov	QWORD[8+rdi],r13
2530	cmovc	r9,r10
2531	mov	QWORD[16+rdi],r8
2532	mov	QWORD[24+rdi],r9
2533
2534	DB	0F3h,0C3h		;repret
2535
2536
2537global	p256_point_double
2538
2539ALIGN	32
2540p256_point_double:
2541	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2542	mov	QWORD[16+rsp],rsi
2543	mov	rax,rsp
2544$L$SEH_begin_p256_point_double:
2545	mov	rdi,rcx
2546	mov	rsi,rdx
2547
2548
2549
2550	lea	rcx,[OPENSSL_ia32cap_P]
2551	mov	rcx,QWORD[8+rcx]
2552	and	ecx,0x80100
2553	cmp	ecx,0x80100
2554	je	NEAR $L$point_doublex
2555	push	rbp
2556
2557	push	rbx
2558
2559	push	r12
2560
2561	push	r13
2562
2563	push	r14
2564
2565	push	r15
2566
2567	sub	rsp,32*5+8
2568
2569$L$point_doubleq_body:
2570
2571$L$point_double_shortcutq:
2572	movdqu	xmm0,XMMWORD[rsi]
2573	mov	rbx,rsi
2574	movdqu	xmm1,XMMWORD[16+rsi]
2575	mov	r12,QWORD[((32+0))+rsi]
2576	mov	r13,QWORD[((32+8))+rsi]
2577	mov	r8,QWORD[((32+16))+rsi]
2578	mov	r9,QWORD[((32+24))+rsi]
2579	mov	r14,QWORD[(($L$poly+8))]
2580	mov	r15,QWORD[(($L$poly+24))]
2581	movdqa	XMMWORD[96+rsp],xmm0
2582	movdqa	XMMWORD[(96+16)+rsp],xmm1
2583	lea	r10,[32+rdi]
2584	lea	r11,[64+rdi]
2585DB	102,72,15,110,199
2586DB	102,73,15,110,202
2587DB	102,73,15,110,211
2588
2589	lea	rdi,[rsp]
2590	call	__ecp_nistz256_mul_by_2q
2591
2592	mov	rax,QWORD[((64+0))+rsi]
2593	mov	r14,QWORD[((64+8))+rsi]
2594	mov	r15,QWORD[((64+16))+rsi]
2595	mov	r8,QWORD[((64+24))+rsi]
2596	lea	rsi,[((64-0))+rsi]
2597	lea	rdi,[64+rsp]
2598	call	__ecp_nistz256_sqr_montq
2599
2600	mov	rax,QWORD[((0+0))+rsp]
2601	mov	r14,QWORD[((8+0))+rsp]
2602	lea	rsi,[((0+0))+rsp]
2603	mov	r15,QWORD[((16+0))+rsp]
2604	mov	r8,QWORD[((24+0))+rsp]
2605	lea	rdi,[rsp]
2606	call	__ecp_nistz256_sqr_montq
2607
2608	mov	rax,QWORD[32+rbx]
2609	mov	r9,QWORD[((64+0))+rbx]
2610	mov	r10,QWORD[((64+8))+rbx]
2611	mov	r11,QWORD[((64+16))+rbx]
2612	mov	r12,QWORD[((64+24))+rbx]
2613	lea	rsi,[((64-0))+rbx]
2614	lea	rbx,[32+rbx]
2615DB	102,72,15,126,215
2616	call	__ecp_nistz256_mul_montq
2617	call	__ecp_nistz256_mul_by_2q
2618
2619	mov	r12,QWORD[((96+0))+rsp]
2620	mov	r13,QWORD[((96+8))+rsp]
2621	lea	rbx,[64+rsp]
2622	mov	r8,QWORD[((96+16))+rsp]
2623	mov	r9,QWORD[((96+24))+rsp]
2624	lea	rdi,[32+rsp]
2625	call	__ecp_nistz256_add_toq
2626
2627	mov	r12,QWORD[((96+0))+rsp]
2628	mov	r13,QWORD[((96+8))+rsp]
2629	lea	rbx,[64+rsp]
2630	mov	r8,QWORD[((96+16))+rsp]
2631	mov	r9,QWORD[((96+24))+rsp]
2632	lea	rdi,[64+rsp]
2633	call	__ecp_nistz256_sub_fromq
2634
2635	mov	rax,QWORD[((0+0))+rsp]
2636	mov	r14,QWORD[((8+0))+rsp]
2637	lea	rsi,[((0+0))+rsp]
2638	mov	r15,QWORD[((16+0))+rsp]
2639	mov	r8,QWORD[((24+0))+rsp]
2640DB	102,72,15,126,207
2641	call	__ecp_nistz256_sqr_montq
2642	xor	r9,r9
2643	mov	rax,r12
2644	add	r12,-1
2645	mov	r10,r13
2646	adc	r13,rsi
2647	mov	rcx,r14
2648	adc	r14,0
2649	mov	r8,r15
2650	adc	r15,rbp
2651	adc	r9,0
2652	xor	rsi,rsi
2653	test	rax,1
2654
2655	cmovz	r12,rax
2656	cmovz	r13,r10
2657	cmovz	r14,rcx
2658	cmovz	r15,r8
2659	cmovz	r9,rsi
2660
2661	mov	rax,r13
2662	shr	r12,1
2663	shl	rax,63
2664	mov	r10,r14
2665	shr	r13,1
2666	or	r12,rax
2667	shl	r10,63
2668	mov	rcx,r15
2669	shr	r14,1
2670	or	r13,r10
2671	shl	rcx,63
2672	mov	QWORD[rdi],r12
2673	shr	r15,1
2674	mov	QWORD[8+rdi],r13
2675	shl	r9,63
2676	or	r14,rcx
2677	or	r15,r9
2678	mov	QWORD[16+rdi],r14
2679	mov	QWORD[24+rdi],r15
2680	mov	rax,QWORD[64+rsp]
2681	lea	rbx,[64+rsp]
2682	mov	r9,QWORD[((0+32))+rsp]
2683	mov	r10,QWORD[((8+32))+rsp]
2684	lea	rsi,[((0+32))+rsp]
2685	mov	r11,QWORD[((16+32))+rsp]
2686	mov	r12,QWORD[((24+32))+rsp]
2687	lea	rdi,[32+rsp]
2688	call	__ecp_nistz256_mul_montq
2689
2690	lea	rdi,[128+rsp]
2691	call	__ecp_nistz256_mul_by_2q
2692
2693	lea	rbx,[32+rsp]
2694	lea	rdi,[32+rsp]
2695	call	__ecp_nistz256_add_toq
2696
2697	mov	rax,QWORD[96+rsp]
2698	lea	rbx,[96+rsp]
2699	mov	r9,QWORD[((0+0))+rsp]
2700	mov	r10,QWORD[((8+0))+rsp]
2701	lea	rsi,[((0+0))+rsp]
2702	mov	r11,QWORD[((16+0))+rsp]
2703	mov	r12,QWORD[((24+0))+rsp]
2704	lea	rdi,[rsp]
2705	call	__ecp_nistz256_mul_montq
2706
2707	lea	rdi,[128+rsp]
2708	call	__ecp_nistz256_mul_by_2q
2709
2710	mov	rax,QWORD[((0+32))+rsp]
2711	mov	r14,QWORD[((8+32))+rsp]
2712	lea	rsi,[((0+32))+rsp]
2713	mov	r15,QWORD[((16+32))+rsp]
2714	mov	r8,QWORD[((24+32))+rsp]
2715DB	102,72,15,126,199
2716	call	__ecp_nistz256_sqr_montq
2717
2718	lea	rbx,[128+rsp]
2719	mov	r8,r14
2720	mov	r9,r15
2721	mov	r14,rsi
2722	mov	r15,rbp
2723	call	__ecp_nistz256_sub_fromq
2724
2725	mov	rax,QWORD[((0+0))+rsp]
2726	mov	rbp,QWORD[((0+8))+rsp]
2727	mov	rcx,QWORD[((0+16))+rsp]
2728	mov	r10,QWORD[((0+24))+rsp]
2729	lea	rdi,[rsp]
2730	call	__ecp_nistz256_subq
2731
2732	mov	rax,QWORD[32+rsp]
2733	lea	rbx,[32+rsp]
2734	mov	r14,r12
2735	xor	ecx,ecx
2736	mov	QWORD[((0+0))+rsp],r12
2737	mov	r10,r13
2738	mov	QWORD[((0+8))+rsp],r13
2739	cmovz	r11,r8
2740	mov	QWORD[((0+16))+rsp],r8
2741	lea	rsi,[((0-0))+rsp]
2742	cmovz	r12,r9
2743	mov	QWORD[((0+24))+rsp],r9
2744	mov	r9,r14
2745	lea	rdi,[rsp]
2746	call	__ecp_nistz256_mul_montq
2747
2748DB	102,72,15,126,203
2749DB	102,72,15,126,207
2750	call	__ecp_nistz256_sub_fromq
2751
2752	lea	rsi,[((160+56))+rsp]
2753
2754	mov	r15,QWORD[((-48))+rsi]
2755
2756	mov	r14,QWORD[((-40))+rsi]
2757
2758	mov	r13,QWORD[((-32))+rsi]
2759
2760	mov	r12,QWORD[((-24))+rsi]
2761
2762	mov	rbx,QWORD[((-16))+rsi]
2763
2764	mov	rbp,QWORD[((-8))+rsi]
2765
2766	lea	rsp,[rsi]
2767
2768$L$point_doubleq_epilogue:
2769	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
2770	mov	rsi,QWORD[16+rsp]
2771	DB	0F3h,0C3h		;repret
2772
2773$L$SEH_end_p256_point_double:
2774global	p256_point_add
2775
2776ALIGN	32
2777p256_point_add:
2778	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2779	mov	QWORD[16+rsp],rsi
2780	mov	rax,rsp
2781$L$SEH_begin_p256_point_add:
2782	mov	rdi,rcx
2783	mov	rsi,rdx
2784	mov	rdx,r8
2785
2786
2787
2788	lea	rcx,[OPENSSL_ia32cap_P]
2789	mov	rcx,QWORD[8+rcx]
2790	and	ecx,0x80100
2791	cmp	ecx,0x80100
2792	je	NEAR $L$point_addx
2793	push	rbp
2794
2795	push	rbx
2796
2797	push	r12
2798
2799	push	r13
2800
2801	push	r14
2802
2803	push	r15
2804
2805	sub	rsp,32*18+8
2806
2807$L$point_addq_body:
2808
2809	movdqu	xmm0,XMMWORD[rsi]
2810	movdqu	xmm1,XMMWORD[16+rsi]
2811	movdqu	xmm2,XMMWORD[32+rsi]
2812	movdqu	xmm3,XMMWORD[48+rsi]
2813	movdqu	xmm4,XMMWORD[64+rsi]
2814	movdqu	xmm5,XMMWORD[80+rsi]
2815	mov	rbx,rsi
2816	mov	rsi,rdx
2817	movdqa	XMMWORD[384+rsp],xmm0
2818	movdqa	XMMWORD[(384+16)+rsp],xmm1
2819	movdqa	XMMWORD[416+rsp],xmm2
2820	movdqa	XMMWORD[(416+16)+rsp],xmm3
2821	movdqa	XMMWORD[448+rsp],xmm4
2822	movdqa	XMMWORD[(448+16)+rsp],xmm5
2823	por	xmm5,xmm4
2824
2825	movdqu	xmm0,XMMWORD[rsi]
2826	pshufd	xmm3,xmm5,0xb1
2827	movdqu	xmm1,XMMWORD[16+rsi]
2828	movdqu	xmm2,XMMWORD[32+rsi]
2829	por	xmm5,xmm3
2830	movdqu	xmm3,XMMWORD[48+rsi]
2831	mov	rax,QWORD[((64+0))+rsi]
2832	mov	r14,QWORD[((64+8))+rsi]
2833	mov	r15,QWORD[((64+16))+rsi]
2834	mov	r8,QWORD[((64+24))+rsi]
2835	movdqa	XMMWORD[480+rsp],xmm0
2836	pshufd	xmm4,xmm5,0x1e
2837	movdqa	XMMWORD[(480+16)+rsp],xmm1
2838	movdqu	xmm0,XMMWORD[64+rsi]
2839	movdqu	xmm1,XMMWORD[80+rsi]
2840	movdqa	XMMWORD[512+rsp],xmm2
2841	movdqa	XMMWORD[(512+16)+rsp],xmm3
2842	por	xmm5,xmm4
2843	pxor	xmm4,xmm4
2844	por	xmm1,xmm0
2845DB	102,72,15,110,199
2846
2847	lea	rsi,[((64-0))+rsi]
2848	mov	QWORD[((544+0))+rsp],rax
2849	mov	QWORD[((544+8))+rsp],r14
2850	mov	QWORD[((544+16))+rsp],r15
2851	mov	QWORD[((544+24))+rsp],r8
2852	lea	rdi,[96+rsp]
2853	call	__ecp_nistz256_sqr_montq
2854
2855	pcmpeqd	xmm5,xmm4
2856	pshufd	xmm4,xmm1,0xb1
2857	por	xmm4,xmm1
2858	pshufd	xmm5,xmm5,0
2859	pshufd	xmm3,xmm4,0x1e
2860	por	xmm4,xmm3
2861	pxor	xmm3,xmm3
2862	pcmpeqd	xmm4,xmm3
2863	pshufd	xmm4,xmm4,0
2864	mov	rax,QWORD[((64+0))+rbx]
2865	mov	r14,QWORD[((64+8))+rbx]
2866	mov	r15,QWORD[((64+16))+rbx]
2867	mov	r8,QWORD[((64+24))+rbx]
2868DB	102,72,15,110,203
2869
2870	lea	rsi,[((64-0))+rbx]
2871	lea	rdi,[32+rsp]
2872	call	__ecp_nistz256_sqr_montq
2873
2874	mov	rax,QWORD[544+rsp]
2875	lea	rbx,[544+rsp]
2876	mov	r9,QWORD[((0+96))+rsp]
2877	mov	r10,QWORD[((8+96))+rsp]
2878	lea	rsi,[((0+96))+rsp]
2879	mov	r11,QWORD[((16+96))+rsp]
2880	mov	r12,QWORD[((24+96))+rsp]
2881	lea	rdi,[224+rsp]
2882	call	__ecp_nistz256_mul_montq
2883
2884	mov	rax,QWORD[448+rsp]
2885	lea	rbx,[448+rsp]
2886	mov	r9,QWORD[((0+32))+rsp]
2887	mov	r10,QWORD[((8+32))+rsp]
2888	lea	rsi,[((0+32))+rsp]
2889	mov	r11,QWORD[((16+32))+rsp]
2890	mov	r12,QWORD[((24+32))+rsp]
2891	lea	rdi,[256+rsp]
2892	call	__ecp_nistz256_mul_montq
2893
2894	mov	rax,QWORD[416+rsp]
2895	lea	rbx,[416+rsp]
2896	mov	r9,QWORD[((0+224))+rsp]
2897	mov	r10,QWORD[((8+224))+rsp]
2898	lea	rsi,[((0+224))+rsp]
2899	mov	r11,QWORD[((16+224))+rsp]
2900	mov	r12,QWORD[((24+224))+rsp]
2901	lea	rdi,[224+rsp]
2902	call	__ecp_nistz256_mul_montq
2903
2904	mov	rax,QWORD[512+rsp]
2905	lea	rbx,[512+rsp]
2906	mov	r9,QWORD[((0+256))+rsp]
2907	mov	r10,QWORD[((8+256))+rsp]
2908	lea	rsi,[((0+256))+rsp]
2909	mov	r11,QWORD[((16+256))+rsp]
2910	mov	r12,QWORD[((24+256))+rsp]
2911	lea	rdi,[256+rsp]
2912	call	__ecp_nistz256_mul_montq
2913
2914	lea	rbx,[224+rsp]
2915	lea	rdi,[64+rsp]
2916	call	__ecp_nistz256_sub_fromq
2917
2918	or	r12,r13
2919	movdqa	xmm2,xmm4
2920	or	r12,r8
2921	or	r12,r9
2922	por	xmm2,xmm5
2923DB	102,73,15,110,220
2924
2925	mov	rax,QWORD[384+rsp]
2926	lea	rbx,[384+rsp]
2927	mov	r9,QWORD[((0+96))+rsp]
2928	mov	r10,QWORD[((8+96))+rsp]
2929	lea	rsi,[((0+96))+rsp]
2930	mov	r11,QWORD[((16+96))+rsp]
2931	mov	r12,QWORD[((24+96))+rsp]
2932	lea	rdi,[160+rsp]
2933	call	__ecp_nistz256_mul_montq
2934
2935	mov	rax,QWORD[480+rsp]
2936	lea	rbx,[480+rsp]
2937	mov	r9,QWORD[((0+32))+rsp]
2938	mov	r10,QWORD[((8+32))+rsp]
2939	lea	rsi,[((0+32))+rsp]
2940	mov	r11,QWORD[((16+32))+rsp]
2941	mov	r12,QWORD[((24+32))+rsp]
2942	lea	rdi,[192+rsp]
2943	call	__ecp_nistz256_mul_montq
2944
2945	lea	rbx,[160+rsp]
2946	lea	rdi,[rsp]
2947	call	__ecp_nistz256_sub_fromq
2948
2949	or	r12,r13
2950	or	r12,r8
2951	or	r12,r9
2952
2953DB	102,73,15,126,208
2954DB	102,73,15,126,217
2955	or	r12,r8
2956DB	0x3e
2957	jnz	NEAR $L$add_proceedq
2958
2959
2960
2961	test	r9,r9
2962	jz	NEAR $L$add_doubleq
2963
2964
2965
2966
2967
2968
2969DB	102,72,15,126,199
2970	pxor	xmm0,xmm0
2971	movdqu	XMMWORD[rdi],xmm0
2972	movdqu	XMMWORD[16+rdi],xmm0
2973	movdqu	XMMWORD[32+rdi],xmm0
2974	movdqu	XMMWORD[48+rdi],xmm0
2975	movdqu	XMMWORD[64+rdi],xmm0
2976	movdqu	XMMWORD[80+rdi],xmm0
2977	jmp	NEAR $L$add_doneq
2978
2979ALIGN	32
2980$L$add_doubleq:
2981DB	102,72,15,126,206
2982DB	102,72,15,126,199
2983	add	rsp,416
2984
2985	jmp	NEAR $L$point_double_shortcutq
2986
2987
2988ALIGN	32
2989$L$add_proceedq:
2990	mov	rax,QWORD[((0+64))+rsp]
2991	mov	r14,QWORD[((8+64))+rsp]
2992	lea	rsi,[((0+64))+rsp]
2993	mov	r15,QWORD[((16+64))+rsp]
2994	mov	r8,QWORD[((24+64))+rsp]
2995	lea	rdi,[96+rsp]
2996	call	__ecp_nistz256_sqr_montq
2997
2998	mov	rax,QWORD[448+rsp]
2999	lea	rbx,[448+rsp]
3000	mov	r9,QWORD[((0+0))+rsp]
3001	mov	r10,QWORD[((8+0))+rsp]
3002	lea	rsi,[((0+0))+rsp]
3003	mov	r11,QWORD[((16+0))+rsp]
3004	mov	r12,QWORD[((24+0))+rsp]
3005	lea	rdi,[352+rsp]
3006	call	__ecp_nistz256_mul_montq
3007
3008	mov	rax,QWORD[((0+0))+rsp]
3009	mov	r14,QWORD[((8+0))+rsp]
3010	lea	rsi,[((0+0))+rsp]
3011	mov	r15,QWORD[((16+0))+rsp]
3012	mov	r8,QWORD[((24+0))+rsp]
3013	lea	rdi,[32+rsp]
3014	call	__ecp_nistz256_sqr_montq
3015
3016	mov	rax,QWORD[544+rsp]
3017	lea	rbx,[544+rsp]
3018	mov	r9,QWORD[((0+352))+rsp]
3019	mov	r10,QWORD[((8+352))+rsp]
3020	lea	rsi,[((0+352))+rsp]
3021	mov	r11,QWORD[((16+352))+rsp]
3022	mov	r12,QWORD[((24+352))+rsp]
3023	lea	rdi,[352+rsp]
3024	call	__ecp_nistz256_mul_montq
3025
3026	mov	rax,QWORD[rsp]
3027	lea	rbx,[rsp]
3028	mov	r9,QWORD[((0+32))+rsp]
3029	mov	r10,QWORD[((8+32))+rsp]
3030	lea	rsi,[((0+32))+rsp]
3031	mov	r11,QWORD[((16+32))+rsp]
3032	mov	r12,QWORD[((24+32))+rsp]
3033	lea	rdi,[128+rsp]
3034	call	__ecp_nistz256_mul_montq
3035
3036	mov	rax,QWORD[160+rsp]
3037	lea	rbx,[160+rsp]
3038	mov	r9,QWORD[((0+32))+rsp]
3039	mov	r10,QWORD[((8+32))+rsp]
3040	lea	rsi,[((0+32))+rsp]
3041	mov	r11,QWORD[((16+32))+rsp]
3042	mov	r12,QWORD[((24+32))+rsp]
3043	lea	rdi,[192+rsp]
3044	call	__ecp_nistz256_mul_montq
3045
3046
3047
3048
3049	xor	r11,r11
3050	add	r12,r12
3051	lea	rsi,[96+rsp]
3052	adc	r13,r13
3053	mov	rax,r12
3054	adc	r8,r8
3055	adc	r9,r9
3056	mov	rbp,r13
3057	adc	r11,0
3058
3059	sub	r12,-1
3060	mov	rcx,r8
3061	sbb	r13,r14
3062	sbb	r8,0
3063	mov	r10,r9
3064	sbb	r9,r15
3065	sbb	r11,0
3066
3067	cmovc	r12,rax
3068	mov	rax,QWORD[rsi]
3069	cmovc	r13,rbp
3070	mov	rbp,QWORD[8+rsi]
3071	cmovc	r8,rcx
3072	mov	rcx,QWORD[16+rsi]
3073	cmovc	r9,r10
3074	mov	r10,QWORD[24+rsi]
3075
3076	call	__ecp_nistz256_subq
3077
3078	lea	rbx,[128+rsp]
3079	lea	rdi,[288+rsp]
3080	call	__ecp_nistz256_sub_fromq
3081
3082	mov	rax,QWORD[((192+0))+rsp]
3083	mov	rbp,QWORD[((192+8))+rsp]
3084	mov	rcx,QWORD[((192+16))+rsp]
3085	mov	r10,QWORD[((192+24))+rsp]
3086	lea	rdi,[320+rsp]
3087
3088	call	__ecp_nistz256_subq
3089
3090	mov	QWORD[rdi],r12
3091	mov	QWORD[8+rdi],r13
3092	mov	QWORD[16+rdi],r8
3093	mov	QWORD[24+rdi],r9
3094	mov	rax,QWORD[128+rsp]
3095	lea	rbx,[128+rsp]
3096	mov	r9,QWORD[((0+224))+rsp]
3097	mov	r10,QWORD[((8+224))+rsp]
3098	lea	rsi,[((0+224))+rsp]
3099	mov	r11,QWORD[((16+224))+rsp]
3100	mov	r12,QWORD[((24+224))+rsp]
3101	lea	rdi,[256+rsp]
3102	call	__ecp_nistz256_mul_montq
3103
3104	mov	rax,QWORD[320+rsp]
3105	lea	rbx,[320+rsp]
3106	mov	r9,QWORD[((0+64))+rsp]
3107	mov	r10,QWORD[((8+64))+rsp]
3108	lea	rsi,[((0+64))+rsp]
3109	mov	r11,QWORD[((16+64))+rsp]
3110	mov	r12,QWORD[((24+64))+rsp]
3111	lea	rdi,[320+rsp]
3112	call	__ecp_nistz256_mul_montq
3113
3114	lea	rbx,[256+rsp]
3115	lea	rdi,[320+rsp]
3116	call	__ecp_nistz256_sub_fromq
3117
3118DB	102,72,15,126,199
3119
3120	movdqa	xmm0,xmm5
3121	movdqa	xmm1,xmm5
3122	pandn	xmm0,XMMWORD[352+rsp]
3123	movdqa	xmm2,xmm5
3124	pandn	xmm1,XMMWORD[((352+16))+rsp]
3125	movdqa	xmm3,xmm5
3126	pand	xmm2,XMMWORD[544+rsp]
3127	pand	xmm3,XMMWORD[((544+16))+rsp]
3128	por	xmm2,xmm0
3129	por	xmm3,xmm1
3130
3131	movdqa	xmm0,xmm4
3132	movdqa	xmm1,xmm4
3133	pandn	xmm0,xmm2
3134	movdqa	xmm2,xmm4
3135	pandn	xmm1,xmm3
3136	movdqa	xmm3,xmm4
3137	pand	xmm2,XMMWORD[448+rsp]
3138	pand	xmm3,XMMWORD[((448+16))+rsp]
3139	por	xmm2,xmm0
3140	por	xmm3,xmm1
3141	movdqu	XMMWORD[64+rdi],xmm2
3142	movdqu	XMMWORD[80+rdi],xmm3
3143
3144	movdqa	xmm0,xmm5
3145	movdqa	xmm1,xmm5
3146	pandn	xmm0,XMMWORD[288+rsp]
3147	movdqa	xmm2,xmm5
3148	pandn	xmm1,XMMWORD[((288+16))+rsp]
3149	movdqa	xmm3,xmm5
3150	pand	xmm2,XMMWORD[480+rsp]
3151	pand	xmm3,XMMWORD[((480+16))+rsp]
3152	por	xmm2,xmm0
3153	por	xmm3,xmm1
3154
3155	movdqa	xmm0,xmm4
3156	movdqa	xmm1,xmm4
3157	pandn	xmm0,xmm2
3158	movdqa	xmm2,xmm4
3159	pandn	xmm1,xmm3
3160	movdqa	xmm3,xmm4
3161	pand	xmm2,XMMWORD[384+rsp]
3162	pand	xmm3,XMMWORD[((384+16))+rsp]
3163	por	xmm2,xmm0
3164	por	xmm3,xmm1
3165	movdqu	XMMWORD[rdi],xmm2
3166	movdqu	XMMWORD[16+rdi],xmm3
3167
3168	movdqa	xmm0,xmm5
3169	movdqa	xmm1,xmm5
3170	pandn	xmm0,XMMWORD[320+rsp]
3171	movdqa	xmm2,xmm5
3172	pandn	xmm1,XMMWORD[((320+16))+rsp]
3173	movdqa	xmm3,xmm5
3174	pand	xmm2,XMMWORD[512+rsp]
3175	pand	xmm3,XMMWORD[((512+16))+rsp]
3176	por	xmm2,xmm0
3177	por	xmm3,xmm1
3178
3179	movdqa	xmm0,xmm4
3180	movdqa	xmm1,xmm4
3181	pandn	xmm0,xmm2
3182	movdqa	xmm2,xmm4
3183	pandn	xmm1,xmm3
3184	movdqa	xmm3,xmm4
3185	pand	xmm2,XMMWORD[416+rsp]
3186	pand	xmm3,XMMWORD[((416+16))+rsp]
3187	por	xmm2,xmm0
3188	por	xmm3,xmm1
3189	movdqu	XMMWORD[32+rdi],xmm2
3190	movdqu	XMMWORD[48+rdi],xmm3
3191
3192$L$add_doneq:
3193	lea	rsi,[((576+56))+rsp]
3194
3195	mov	r15,QWORD[((-48))+rsi]
3196
3197	mov	r14,QWORD[((-40))+rsi]
3198
3199	mov	r13,QWORD[((-32))+rsi]
3200
3201	mov	r12,QWORD[((-24))+rsi]
3202
3203	mov	rbx,QWORD[((-16))+rsi]
3204
3205	mov	rbp,QWORD[((-8))+rsi]
3206
3207	lea	rsp,[rsi]
3208
3209$L$point_addq_epilogue:
3210	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
3211	mov	rsi,QWORD[16+rsp]
3212	DB	0F3h,0C3h		;repret
3213
3214$L$SEH_end_p256_point_add:
3215global	p256_point_add_affine
3216
3217ALIGN	32
3218p256_point_add_affine:
3219	mov	QWORD[8+rsp],rdi	;WIN64 prologue
3220	mov	QWORD[16+rsp],rsi
3221	mov	rax,rsp
3222$L$SEH_begin_p256_point_add_affine:
3223	mov	rdi,rcx
3224	mov	rsi,rdx
3225	mov	rdx,r8
3226
3227
3228
3229	lea	rcx,[OPENSSL_ia32cap_P]
3230	mov	rcx,QWORD[8+rcx]
3231	and	ecx,0x80100
3232	cmp	ecx,0x80100
3233	je	NEAR $L$point_add_affinex
3234	push	rbp
3235
3236	push	rbx
3237
3238	push	r12
3239
3240	push	r13
3241
3242	push	r14
3243
3244	push	r15
3245
3246	sub	rsp,32*15+8
3247
3248$L$add_affineq_body:
3249
3250	movdqu	xmm0,XMMWORD[rsi]
3251	mov	rbx,rdx
3252	movdqu	xmm1,XMMWORD[16+rsi]
3253	movdqu	xmm2,XMMWORD[32+rsi]
3254	movdqu	xmm3,XMMWORD[48+rsi]
3255	movdqu	xmm4,XMMWORD[64+rsi]
3256	movdqu	xmm5,XMMWORD[80+rsi]
3257	mov	rax,QWORD[((64+0))+rsi]
3258	mov	r14,QWORD[((64+8))+rsi]
3259	mov	r15,QWORD[((64+16))+rsi]
3260	mov	r8,QWORD[((64+24))+rsi]
3261	movdqa	XMMWORD[320+rsp],xmm0
3262	movdqa	XMMWORD[(320+16)+rsp],xmm1
3263	movdqa	XMMWORD[352+rsp],xmm2
3264	movdqa	XMMWORD[(352+16)+rsp],xmm3
3265	movdqa	XMMWORD[384+rsp],xmm4
3266	movdqa	XMMWORD[(384+16)+rsp],xmm5
3267	por	xmm5,xmm4
3268
3269	movdqu	xmm0,XMMWORD[rbx]
3270	pshufd	xmm3,xmm5,0xb1
3271	movdqu	xmm1,XMMWORD[16+rbx]
3272	movdqu	xmm2,XMMWORD[32+rbx]
3273	por	xmm5,xmm3
3274	movdqu	xmm3,XMMWORD[48+rbx]
3275	movdqa	XMMWORD[416+rsp],xmm0
3276	pshufd	xmm4,xmm5,0x1e
3277	movdqa	XMMWORD[(416+16)+rsp],xmm1
3278	por	xmm1,xmm0
3279DB	102,72,15,110,199
3280	movdqa	XMMWORD[448+rsp],xmm2
3281	movdqa	XMMWORD[(448+16)+rsp],xmm3
3282	por	xmm3,xmm2
3283	por	xmm5,xmm4
3284	pxor	xmm4,xmm4
3285	por	xmm3,xmm1
3286
3287	lea	rsi,[((64-0))+rsi]
3288	lea	rdi,[32+rsp]
3289	call	__ecp_nistz256_sqr_montq
3290
3291	pcmpeqd	xmm5,xmm4
3292	pshufd	xmm4,xmm3,0xb1
3293	mov	rax,QWORD[rbx]
3294
3295	mov	r9,r12
3296	por	xmm4,xmm3
3297	pshufd	xmm5,xmm5,0
3298	pshufd	xmm3,xmm4,0x1e
3299	mov	r10,r13
3300	por	xmm4,xmm3
3301	pxor	xmm3,xmm3
3302	mov	r11,r14
3303	pcmpeqd	xmm4,xmm3
3304	pshufd	xmm4,xmm4,0
3305
3306	lea	rsi,[((32-0))+rsp]
3307	mov	r12,r15
3308	lea	rdi,[rsp]
3309	call	__ecp_nistz256_mul_montq
3310
3311	lea	rbx,[320+rsp]
3312	lea	rdi,[64+rsp]
3313	call	__ecp_nistz256_sub_fromq
3314
3315	mov	rax,QWORD[384+rsp]
3316	lea	rbx,[384+rsp]
3317	mov	r9,QWORD[((0+32))+rsp]
3318	mov	r10,QWORD[((8+32))+rsp]
3319	lea	rsi,[((0+32))+rsp]
3320	mov	r11,QWORD[((16+32))+rsp]
3321	mov	r12,QWORD[((24+32))+rsp]
3322	lea	rdi,[32+rsp]
3323	call	__ecp_nistz256_mul_montq
3324
3325	mov	rax,QWORD[384+rsp]
3326	lea	rbx,[384+rsp]
3327	mov	r9,QWORD[((0+64))+rsp]
3328	mov	r10,QWORD[((8+64))+rsp]
3329	lea	rsi,[((0+64))+rsp]
3330	mov	r11,QWORD[((16+64))+rsp]
3331	mov	r12,QWORD[((24+64))+rsp]
3332	lea	rdi,[288+rsp]
3333	call	__ecp_nistz256_mul_montq
3334
3335	mov	rax,QWORD[448+rsp]
3336	lea	rbx,[448+rsp]
3337	mov	r9,QWORD[((0+32))+rsp]
3338	mov	r10,QWORD[((8+32))+rsp]
3339	lea	rsi,[((0+32))+rsp]
3340	mov	r11,QWORD[((16+32))+rsp]
3341	mov	r12,QWORD[((24+32))+rsp]
3342	lea	rdi,[32+rsp]
3343	call	__ecp_nistz256_mul_montq
3344
3345	lea	rbx,[352+rsp]
3346	lea	rdi,[96+rsp]
3347	call	__ecp_nistz256_sub_fromq
3348
3349	mov	rax,QWORD[((0+64))+rsp]
3350	mov	r14,QWORD[((8+64))+rsp]
3351	lea	rsi,[((0+64))+rsp]
3352	mov	r15,QWORD[((16+64))+rsp]
3353	mov	r8,QWORD[((24+64))+rsp]
3354	lea	rdi,[128+rsp]
3355	call	__ecp_nistz256_sqr_montq
3356
3357	mov	rax,QWORD[((0+96))+rsp]
3358	mov	r14,QWORD[((8+96))+rsp]
3359	lea	rsi,[((0+96))+rsp]
3360	mov	r15,QWORD[((16+96))+rsp]
3361	mov	r8,QWORD[((24+96))+rsp]
3362	lea	rdi,[192+rsp]
3363	call	__ecp_nistz256_sqr_montq
3364
3365	mov	rax,QWORD[128+rsp]
3366	lea	rbx,[128+rsp]
3367	mov	r9,QWORD[((0+64))+rsp]
3368	mov	r10,QWORD[((8+64))+rsp]
3369	lea	rsi,[((0+64))+rsp]
3370	mov	r11,QWORD[((16+64))+rsp]
3371	mov	r12,QWORD[((24+64))+rsp]
3372	lea	rdi,[160+rsp]
3373	call	__ecp_nistz256_mul_montq
3374
3375	mov	rax,QWORD[320+rsp]
3376	lea	rbx,[320+rsp]
3377	mov	r9,QWORD[((0+128))+rsp]
3378	mov	r10,QWORD[((8+128))+rsp]
3379	lea	rsi,[((0+128))+rsp]
3380	mov	r11,QWORD[((16+128))+rsp]
3381	mov	r12,QWORD[((24+128))+rsp]
3382	lea	rdi,[rsp]
3383	call	__ecp_nistz256_mul_montq
3384
3385
3386
3387
3388	xor	r11,r11
3389	add	r12,r12
3390	lea	rsi,[192+rsp]
3391	adc	r13,r13
3392	mov	rax,r12
3393	adc	r8,r8
3394	adc	r9,r9
3395	mov	rbp,r13
3396	adc	r11,0
3397
3398	sub	r12,-1
3399	mov	rcx,r8
3400	sbb	r13,r14
3401	sbb	r8,0
3402	mov	r10,r9
3403	sbb	r9,r15
3404	sbb	r11,0
3405
3406	cmovc	r12,rax
3407	mov	rax,QWORD[rsi]
3408	cmovc	r13,rbp
3409	mov	rbp,QWORD[8+rsi]
3410	cmovc	r8,rcx
3411	mov	rcx,QWORD[16+rsi]
3412	cmovc	r9,r10
3413	mov	r10,QWORD[24+rsi]
3414
3415	call	__ecp_nistz256_subq
3416
3417	lea	rbx,[160+rsp]
3418	lea	rdi,[224+rsp]
3419	call	__ecp_nistz256_sub_fromq
3420
3421	mov	rax,QWORD[((0+0))+rsp]
3422	mov	rbp,QWORD[((0+8))+rsp]
3423	mov	rcx,QWORD[((0+16))+rsp]
3424	mov	r10,QWORD[((0+24))+rsp]
3425	lea	rdi,[64+rsp]
3426
3427	call	__ecp_nistz256_subq
3428
3429	mov	QWORD[rdi],r12
3430	mov	QWORD[8+rdi],r13
3431	mov	QWORD[16+rdi],r8
3432	mov	QWORD[24+rdi],r9
3433	mov	rax,QWORD[352+rsp]
3434	lea	rbx,[352+rsp]
3435	mov	r9,QWORD[((0+160))+rsp]
3436	mov	r10,QWORD[((8+160))+rsp]
3437	lea	rsi,[((0+160))+rsp]
3438	mov	r11,QWORD[((16+160))+rsp]
3439	mov	r12,QWORD[((24+160))+rsp]
3440	lea	rdi,[32+rsp]
3441	call	__ecp_nistz256_mul_montq
3442
3443	mov	rax,QWORD[96+rsp]
3444	lea	rbx,[96+rsp]
3445	mov	r9,QWORD[((0+64))+rsp]
3446	mov	r10,QWORD[((8+64))+rsp]
3447	lea	rsi,[((0+64))+rsp]
3448	mov	r11,QWORD[((16+64))+rsp]
3449	mov	r12,QWORD[((24+64))+rsp]
3450	lea	rdi,[64+rsp]
3451	call	__ecp_nistz256_mul_montq
3452
3453	lea	rbx,[32+rsp]
3454	lea	rdi,[256+rsp]
3455	call	__ecp_nistz256_sub_fromq
3456
3457DB	102,72,15,126,199
3458
3459	movdqa	xmm0,xmm5
3460	movdqa	xmm1,xmm5
3461	pandn	xmm0,XMMWORD[288+rsp]
3462	movdqa	xmm2,xmm5
3463	pandn	xmm1,XMMWORD[((288+16))+rsp]
3464	movdqa	xmm3,xmm5
3465	pand	xmm2,XMMWORD[$L$ONE_mont]
3466	pand	xmm3,XMMWORD[(($L$ONE_mont+16))]
3467	por	xmm2,xmm0
3468	por	xmm3,xmm1
3469
3470	movdqa	xmm0,xmm4
3471	movdqa	xmm1,xmm4
3472	pandn	xmm0,xmm2
3473	movdqa	xmm2,xmm4
3474	pandn	xmm1,xmm3
3475	movdqa	xmm3,xmm4
3476	pand	xmm2,XMMWORD[384+rsp]
3477	pand	xmm3,XMMWORD[((384+16))+rsp]
3478	por	xmm2,xmm0
3479	por	xmm3,xmm1
3480	movdqu	XMMWORD[64+rdi],xmm2
3481	movdqu	XMMWORD[80+rdi],xmm3
3482
3483	movdqa	xmm0,xmm5
3484	movdqa	xmm1,xmm5
3485	pandn	xmm0,XMMWORD[224+rsp]
3486	movdqa	xmm2,xmm5
3487	pandn	xmm1,XMMWORD[((224+16))+rsp]
3488	movdqa	xmm3,xmm5
3489	pand	xmm2,XMMWORD[416+rsp]
3490	pand	xmm3,XMMWORD[((416+16))+rsp]
3491	por	xmm2,xmm0
3492	por	xmm3,xmm1
3493
3494	movdqa	xmm0,xmm4
3495	movdqa	xmm1,xmm4
3496	pandn	xmm0,xmm2
3497	movdqa	xmm2,xmm4
3498	pandn	xmm1,xmm3
3499	movdqa	xmm3,xmm4
3500	pand	xmm2,XMMWORD[320+rsp]
3501	pand	xmm3,XMMWORD[((320+16))+rsp]
3502	por	xmm2,xmm0
3503	por	xmm3,xmm1
3504	movdqu	XMMWORD[rdi],xmm2
3505	movdqu	XMMWORD[16+rdi],xmm3
3506
3507	movdqa	xmm0,xmm5
3508	movdqa	xmm1,xmm5
3509	pandn	xmm0,XMMWORD[256+rsp]
3510	movdqa	xmm2,xmm5
3511	pandn	xmm1,XMMWORD[((256+16))+rsp]
3512	movdqa	xmm3,xmm5
3513	pand	xmm2,XMMWORD[448+rsp]
3514	pand	xmm3,XMMWORD[((448+16))+rsp]
3515	por	xmm2,xmm0
3516	por	xmm3,xmm1
3517
3518	movdqa	xmm0,xmm4
3519	movdqa	xmm1,xmm4
3520	pandn	xmm0,xmm2
3521	movdqa	xmm2,xmm4
3522	pandn	xmm1,xmm3
3523	movdqa	xmm3,xmm4
3524	pand	xmm2,XMMWORD[352+rsp]
3525	pand	xmm3,XMMWORD[((352+16))+rsp]
3526	por	xmm2,xmm0
3527	por	xmm3,xmm1
3528	movdqu	XMMWORD[32+rdi],xmm2
3529	movdqu	XMMWORD[48+rdi],xmm3
3530
3531	lea	rsi,[((480+56))+rsp]
3532
3533	mov	r15,QWORD[((-48))+rsi]
3534
3535	mov	r14,QWORD[((-40))+rsi]
3536
3537	mov	r13,QWORD[((-32))+rsi]
3538
3539	mov	r12,QWORD[((-24))+rsi]
3540
3541	mov	rbx,QWORD[((-16))+rsi]
3542
3543	mov	rbp,QWORD[((-8))+rsi]
3544
3545	lea	rsp,[rsi]
3546
3547$L$add_affineq_epilogue:
3548	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
3549	mov	rsi,QWORD[16+rsp]
3550	DB	0F3h,0C3h		;repret
3551
3552$L$SEH_end_p256_point_add_affine:
3553
3554ALIGN	32
3555__ecp_nistz256_add_tox:
3556
3557	xor	r11,r11
3558	adc	r12,QWORD[rbx]
3559	adc	r13,QWORD[8+rbx]
3560	mov	rax,r12
3561	adc	r8,QWORD[16+rbx]
3562	adc	r9,QWORD[24+rbx]
3563	mov	rbp,r13
3564	adc	r11,0
3565
3566	xor	r10,r10
3567	sbb	r12,-1
3568	mov	rcx,r8
3569	sbb	r13,r14
3570	sbb	r8,0
3571	mov	r10,r9
3572	sbb	r9,r15
3573	sbb	r11,0
3574
3575	cmovc	r12,rax
3576	cmovc	r13,rbp
3577	mov	QWORD[rdi],r12
3578	cmovc	r8,rcx
3579	mov	QWORD[8+rdi],r13
3580	cmovc	r9,r10
3581	mov	QWORD[16+rdi],r8
3582	mov	QWORD[24+rdi],r9
3583
3584	DB	0F3h,0C3h		;repret
3585
3586
3587
3588
3589ALIGN	32
3590__ecp_nistz256_sub_fromx:
3591
3592	xor	r11,r11
3593	sbb	r12,QWORD[rbx]
3594	sbb	r13,QWORD[8+rbx]
3595	mov	rax,r12
3596	sbb	r8,QWORD[16+rbx]
3597	sbb	r9,QWORD[24+rbx]
3598	mov	rbp,r13
3599	sbb	r11,0
3600
3601	xor	r10,r10
3602	adc	r12,-1
3603	mov	rcx,r8
3604	adc	r13,r14
3605	adc	r8,0
3606	mov	r10,r9
3607	adc	r9,r15
3608
3609	bt	r11,0
3610	cmovnc	r12,rax
3611	cmovnc	r13,rbp
3612	mov	QWORD[rdi],r12
3613	cmovnc	r8,rcx
3614	mov	QWORD[8+rdi],r13
3615	cmovnc	r9,r10
3616	mov	QWORD[16+rdi],r8
3617	mov	QWORD[24+rdi],r9
3618
3619	DB	0F3h,0C3h		;repret
3620
3621
3622
3623
3624ALIGN	32
3625__ecp_nistz256_subx:
3626
3627	xor	r11,r11
3628	sbb	rax,r12
3629	sbb	rbp,r13
3630	mov	r12,rax
3631	sbb	rcx,r8
3632	sbb	r10,r9
3633	mov	r13,rbp
3634	sbb	r11,0
3635
3636	xor	r9,r9
3637	adc	rax,-1
3638	mov	r8,rcx
3639	adc	rbp,r14
3640	adc	rcx,0
3641	mov	r9,r10
3642	adc	r10,r15
3643
3644	bt	r11,0
3645	cmovc	r12,rax
3646	cmovc	r13,rbp
3647	cmovc	r8,rcx
3648	cmovc	r9,r10
3649
3650	DB	0F3h,0C3h		;repret
3651
3652
3653
3654
3655ALIGN	32
3656__ecp_nistz256_mul_by_2x:
3657
3658	xor	r11,r11
3659	adc	r12,r12
3660	adc	r13,r13
3661	mov	rax,r12
3662	adc	r8,r8
3663	adc	r9,r9
3664	mov	rbp,r13
3665	adc	r11,0
3666
3667	xor	r10,r10
3668	sbb	r12,-1
3669	mov	rcx,r8
3670	sbb	r13,r14
3671	sbb	r8,0
3672	mov	r10,r9
3673	sbb	r9,r15
3674	sbb	r11,0
3675
3676	cmovc	r12,rax
3677	cmovc	r13,rbp
3678	mov	QWORD[rdi],r12
3679	cmovc	r8,rcx
3680	mov	QWORD[8+rdi],r13
3681	cmovc	r9,r10
3682	mov	QWORD[16+rdi],r8
3683	mov	QWORD[24+rdi],r9
3684
3685	DB	0F3h,0C3h		;repret
3686
3687
3688
3689ALIGN	32
3690p256_point_doublex:
3691	mov	QWORD[8+rsp],rdi	;WIN64 prologue
3692	mov	QWORD[16+rsp],rsi
3693	mov	rax,rsp
3694$L$SEH_begin_p256_point_doublex:
3695	mov	rdi,rcx
3696	mov	rsi,rdx
3697
3698
3699
3700$L$point_doublex:
3701	push	rbp
3702
3703	push	rbx
3704
3705	push	r12
3706
3707	push	r13
3708
3709	push	r14
3710
3711	push	r15
3712
3713	sub	rsp,32*5+8
3714
3715$L$point_doublex_body:
3716
3717$L$point_double_shortcutx:
3718	movdqu	xmm0,XMMWORD[rsi]
3719	mov	rbx,rsi
3720	movdqu	xmm1,XMMWORD[16+rsi]
3721	mov	r12,QWORD[((32+0))+rsi]
3722	mov	r13,QWORD[((32+8))+rsi]
3723	mov	r8,QWORD[((32+16))+rsi]
3724	mov	r9,QWORD[((32+24))+rsi]
3725	mov	r14,QWORD[(($L$poly+8))]
3726	mov	r15,QWORD[(($L$poly+24))]
3727	movdqa	XMMWORD[96+rsp],xmm0
3728	movdqa	XMMWORD[(96+16)+rsp],xmm1
3729	lea	r10,[32+rdi]
3730	lea	r11,[64+rdi]
3731DB	102,72,15,110,199
3732DB	102,73,15,110,202
3733DB	102,73,15,110,211
3734
3735	lea	rdi,[rsp]
3736	call	__ecp_nistz256_mul_by_2x
3737
3738	mov	rdx,QWORD[((64+0))+rsi]
3739	mov	r14,QWORD[((64+8))+rsi]
3740	mov	r15,QWORD[((64+16))+rsi]
3741	mov	r8,QWORD[((64+24))+rsi]
3742	lea	rsi,[((64-128))+rsi]
3743	lea	rdi,[64+rsp]
3744	call	__ecp_nistz256_sqr_montx
3745
3746	mov	rdx,QWORD[((0+0))+rsp]
3747	mov	r14,QWORD[((8+0))+rsp]
3748	lea	rsi,[((-128+0))+rsp]
3749	mov	r15,QWORD[((16+0))+rsp]
3750	mov	r8,QWORD[((24+0))+rsp]
3751	lea	rdi,[rsp]
3752	call	__ecp_nistz256_sqr_montx
3753
3754	mov	rdx,QWORD[32+rbx]
3755	mov	r9,QWORD[((64+0))+rbx]
3756	mov	r10,QWORD[((64+8))+rbx]
3757	mov	r11,QWORD[((64+16))+rbx]
3758	mov	r12,QWORD[((64+24))+rbx]
3759	lea	rsi,[((64-128))+rbx]
3760	lea	rbx,[32+rbx]
3761DB	102,72,15,126,215
3762	call	__ecp_nistz256_mul_montx
3763	call	__ecp_nistz256_mul_by_2x
3764
3765	mov	r12,QWORD[((96+0))+rsp]
3766	mov	r13,QWORD[((96+8))+rsp]
3767	lea	rbx,[64+rsp]
3768	mov	r8,QWORD[((96+16))+rsp]
3769	mov	r9,QWORD[((96+24))+rsp]
3770	lea	rdi,[32+rsp]
3771	call	__ecp_nistz256_add_tox
3772
3773	mov	r12,QWORD[((96+0))+rsp]
3774	mov	r13,QWORD[((96+8))+rsp]
3775	lea	rbx,[64+rsp]
3776	mov	r8,QWORD[((96+16))+rsp]
3777	mov	r9,QWORD[((96+24))+rsp]
3778	lea	rdi,[64+rsp]
3779	call	__ecp_nistz256_sub_fromx
3780
3781	mov	rdx,QWORD[((0+0))+rsp]
3782	mov	r14,QWORD[((8+0))+rsp]
3783	lea	rsi,[((-128+0))+rsp]
3784	mov	r15,QWORD[((16+0))+rsp]
3785	mov	r8,QWORD[((24+0))+rsp]
3786DB	102,72,15,126,207
3787	call	__ecp_nistz256_sqr_montx
3788	xor	r9,r9
3789	mov	rax,r12
3790	add	r12,-1
3791	mov	r10,r13
3792	adc	r13,rsi
3793	mov	rcx,r14
3794	adc	r14,0
3795	mov	r8,r15
3796	adc	r15,rbp
3797	adc	r9,0
3798	xor	rsi,rsi
3799	test	rax,1
3800
3801	cmovz	r12,rax
3802	cmovz	r13,r10
3803	cmovz	r14,rcx
3804	cmovz	r15,r8
3805	cmovz	r9,rsi
3806
3807	mov	rax,r13
3808	shr	r12,1
3809	shl	rax,63
3810	mov	r10,r14
3811	shr	r13,1
3812	or	r12,rax
3813	shl	r10,63
3814	mov	rcx,r15
3815	shr	r14,1
3816	or	r13,r10
3817	shl	rcx,63
3818	mov	QWORD[rdi],r12
3819	shr	r15,1
3820	mov	QWORD[8+rdi],r13
3821	shl	r9,63
3822	or	r14,rcx
3823	or	r15,r9
3824	mov	QWORD[16+rdi],r14
3825	mov	QWORD[24+rdi],r15
3826	mov	rdx,QWORD[64+rsp]
3827	lea	rbx,[64+rsp]
3828	mov	r9,QWORD[((0+32))+rsp]
3829	mov	r10,QWORD[((8+32))+rsp]
3830	lea	rsi,[((-128+32))+rsp]
3831	mov	r11,QWORD[((16+32))+rsp]
3832	mov	r12,QWORD[((24+32))+rsp]
3833	lea	rdi,[32+rsp]
3834	call	__ecp_nistz256_mul_montx
3835
3836	lea	rdi,[128+rsp]
3837	call	__ecp_nistz256_mul_by_2x
3838
3839	lea	rbx,[32+rsp]
3840	lea	rdi,[32+rsp]
3841	call	__ecp_nistz256_add_tox
3842
3843	mov	rdx,QWORD[96+rsp]
3844	lea	rbx,[96+rsp]
3845	mov	r9,QWORD[((0+0))+rsp]
3846	mov	r10,QWORD[((8+0))+rsp]
3847	lea	rsi,[((-128+0))+rsp]
3848	mov	r11,QWORD[((16+0))+rsp]
3849	mov	r12,QWORD[((24+0))+rsp]
3850	lea	rdi,[rsp]
3851	call	__ecp_nistz256_mul_montx
3852
3853	lea	rdi,[128+rsp]
3854	call	__ecp_nistz256_mul_by_2x
3855
3856	mov	rdx,QWORD[((0+32))+rsp]
3857	mov	r14,QWORD[((8+32))+rsp]
3858	lea	rsi,[((-128+32))+rsp]
3859	mov	r15,QWORD[((16+32))+rsp]
3860	mov	r8,QWORD[((24+32))+rsp]
3861DB	102,72,15,126,199
3862	call	__ecp_nistz256_sqr_montx
3863
3864	lea	rbx,[128+rsp]
3865	mov	r8,r14
3866	mov	r9,r15
3867	mov	r14,rsi
3868	mov	r15,rbp
3869	call	__ecp_nistz256_sub_fromx
3870
3871	mov	rax,QWORD[((0+0))+rsp]
3872	mov	rbp,QWORD[((0+8))+rsp]
3873	mov	rcx,QWORD[((0+16))+rsp]
3874	mov	r10,QWORD[((0+24))+rsp]
3875	lea	rdi,[rsp]
3876	call	__ecp_nistz256_subx
3877
3878	mov	rdx,QWORD[32+rsp]
3879	lea	rbx,[32+rsp]
3880	mov	r14,r12
3881	xor	ecx,ecx
3882	mov	QWORD[((0+0))+rsp],r12
3883	mov	r10,r13
3884	mov	QWORD[((0+8))+rsp],r13
3885	cmovz	r11,r8
3886	mov	QWORD[((0+16))+rsp],r8
3887	lea	rsi,[((0-128))+rsp]
3888	cmovz	r12,r9
3889	mov	QWORD[((0+24))+rsp],r9
3890	mov	r9,r14
3891	lea	rdi,[rsp]
3892	call	__ecp_nistz256_mul_montx
3893
3894DB	102,72,15,126,203
3895DB	102,72,15,126,207
3896	call	__ecp_nistz256_sub_fromx
3897
3898	lea	rsi,[((160+56))+rsp]
3899
3900	mov	r15,QWORD[((-48))+rsi]
3901
3902	mov	r14,QWORD[((-40))+rsi]
3903
3904	mov	r13,QWORD[((-32))+rsi]
3905
3906	mov	r12,QWORD[((-24))+rsi]
3907
3908	mov	rbx,QWORD[((-16))+rsi]
3909
3910	mov	rbp,QWORD[((-8))+rsi]
3911
3912	lea	rsp,[rsi]
3913
3914$L$point_doublex_epilogue:
3915	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
3916	mov	rsi,QWORD[16+rsp]
3917	DB	0F3h,0C3h		;repret
3918
3919$L$SEH_end_p256_point_doublex:
3920
3921ALIGN	32
3922p256_point_addx:
3923	mov	QWORD[8+rsp],rdi	;WIN64 prologue
3924	mov	QWORD[16+rsp],rsi
3925	mov	rax,rsp
3926$L$SEH_begin_p256_point_addx:
3927	mov	rdi,rcx
3928	mov	rsi,rdx
3929	mov	rdx,r8
3930
3931
3932
3933$L$point_addx:
3934	push	rbp
3935
3936	push	rbx
3937
3938	push	r12
3939
3940	push	r13
3941
3942	push	r14
3943
3944	push	r15
3945
3946	sub	rsp,32*18+8
3947
3948$L$point_addx_body:
3949
3950	movdqu	xmm0,XMMWORD[rsi]
3951	movdqu	xmm1,XMMWORD[16+rsi]
3952	movdqu	xmm2,XMMWORD[32+rsi]
3953	movdqu	xmm3,XMMWORD[48+rsi]
3954	movdqu	xmm4,XMMWORD[64+rsi]
3955	movdqu	xmm5,XMMWORD[80+rsi]
3956	mov	rbx,rsi
3957	mov	rsi,rdx
3958	movdqa	XMMWORD[384+rsp],xmm0
3959	movdqa	XMMWORD[(384+16)+rsp],xmm1
3960	movdqa	XMMWORD[416+rsp],xmm2
3961	movdqa	XMMWORD[(416+16)+rsp],xmm3
3962	movdqa	XMMWORD[448+rsp],xmm4
3963	movdqa	XMMWORD[(448+16)+rsp],xmm5
3964	por	xmm5,xmm4
3965
3966	movdqu	xmm0,XMMWORD[rsi]
3967	pshufd	xmm3,xmm5,0xb1
3968	movdqu	xmm1,XMMWORD[16+rsi]
3969	movdqu	xmm2,XMMWORD[32+rsi]
3970	por	xmm5,xmm3
3971	movdqu	xmm3,XMMWORD[48+rsi]
3972	mov	rdx,QWORD[((64+0))+rsi]
3973	mov	r14,QWORD[((64+8))+rsi]
3974	mov	r15,QWORD[((64+16))+rsi]
3975	mov	r8,QWORD[((64+24))+rsi]
3976	movdqa	XMMWORD[480+rsp],xmm0
3977	pshufd	xmm4,xmm5,0x1e
3978	movdqa	XMMWORD[(480+16)+rsp],xmm1
3979	movdqu	xmm0,XMMWORD[64+rsi]
3980	movdqu	xmm1,XMMWORD[80+rsi]
3981	movdqa	XMMWORD[512+rsp],xmm2
3982	movdqa	XMMWORD[(512+16)+rsp],xmm3
3983	por	xmm5,xmm4
3984	pxor	xmm4,xmm4
3985	por	xmm1,xmm0
3986DB	102,72,15,110,199
3987
3988	lea	rsi,[((64-128))+rsi]
3989	mov	QWORD[((544+0))+rsp],rdx
3990	mov	QWORD[((544+8))+rsp],r14
3991	mov	QWORD[((544+16))+rsp],r15
3992	mov	QWORD[((544+24))+rsp],r8
3993	lea	rdi,[96+rsp]
3994	call	__ecp_nistz256_sqr_montx
3995
3996	pcmpeqd	xmm5,xmm4
3997	pshufd	xmm4,xmm1,0xb1
3998	por	xmm4,xmm1
3999	pshufd	xmm5,xmm5,0
4000	pshufd	xmm3,xmm4,0x1e
4001	por	xmm4,xmm3
4002	pxor	xmm3,xmm3
4003	pcmpeqd	xmm4,xmm3
4004	pshufd	xmm4,xmm4,0
4005	mov	rdx,QWORD[((64+0))+rbx]
4006	mov	r14,QWORD[((64+8))+rbx]
4007	mov	r15,QWORD[((64+16))+rbx]
4008	mov	r8,QWORD[((64+24))+rbx]
4009DB	102,72,15,110,203
4010
4011	lea	rsi,[((64-128))+rbx]
4012	lea	rdi,[32+rsp]
4013	call	__ecp_nistz256_sqr_montx
4014
4015	mov	rdx,QWORD[544+rsp]
4016	lea	rbx,[544+rsp]
4017	mov	r9,QWORD[((0+96))+rsp]
4018	mov	r10,QWORD[((8+96))+rsp]
4019	lea	rsi,[((-128+96))+rsp]
4020	mov	r11,QWORD[((16+96))+rsp]
4021	mov	r12,QWORD[((24+96))+rsp]
4022	lea	rdi,[224+rsp]
4023	call	__ecp_nistz256_mul_montx
4024
4025	mov	rdx,QWORD[448+rsp]
4026	lea	rbx,[448+rsp]
4027	mov	r9,QWORD[((0+32))+rsp]
4028	mov	r10,QWORD[((8+32))+rsp]
4029	lea	rsi,[((-128+32))+rsp]
4030	mov	r11,QWORD[((16+32))+rsp]
4031	mov	r12,QWORD[((24+32))+rsp]
4032	lea	rdi,[256+rsp]
4033	call	__ecp_nistz256_mul_montx
4034
4035	mov	rdx,QWORD[416+rsp]
4036	lea	rbx,[416+rsp]
4037	mov	r9,QWORD[((0+224))+rsp]
4038	mov	r10,QWORD[((8+224))+rsp]
4039	lea	rsi,[((-128+224))+rsp]
4040	mov	r11,QWORD[((16+224))+rsp]
4041	mov	r12,QWORD[((24+224))+rsp]
4042	lea	rdi,[224+rsp]
4043	call	__ecp_nistz256_mul_montx
4044
4045	mov	rdx,QWORD[512+rsp]
4046	lea	rbx,[512+rsp]
4047	mov	r9,QWORD[((0+256))+rsp]
4048	mov	r10,QWORD[((8+256))+rsp]
4049	lea	rsi,[((-128+256))+rsp]
4050	mov	r11,QWORD[((16+256))+rsp]
4051	mov	r12,QWORD[((24+256))+rsp]
4052	lea	rdi,[256+rsp]
4053	call	__ecp_nistz256_mul_montx
4054
4055	lea	rbx,[224+rsp]
4056	lea	rdi,[64+rsp]
4057	call	__ecp_nistz256_sub_fromx
4058
4059	or	r12,r13
4060	movdqa	xmm2,xmm4
4061	or	r12,r8
4062	or	r12,r9
4063	por	xmm2,xmm5
4064DB	102,73,15,110,220
4065
4066	mov	rdx,QWORD[384+rsp]
4067	lea	rbx,[384+rsp]
4068	mov	r9,QWORD[((0+96))+rsp]
4069	mov	r10,QWORD[((8+96))+rsp]
4070	lea	rsi,[((-128+96))+rsp]
4071	mov	r11,QWORD[((16+96))+rsp]
4072	mov	r12,QWORD[((24+96))+rsp]
4073	lea	rdi,[160+rsp]
4074	call	__ecp_nistz256_mul_montx
4075
4076	mov	rdx,QWORD[480+rsp]
4077	lea	rbx,[480+rsp]
4078	mov	r9,QWORD[((0+32))+rsp]
4079	mov	r10,QWORD[((8+32))+rsp]
4080	lea	rsi,[((-128+32))+rsp]
4081	mov	r11,QWORD[((16+32))+rsp]
4082	mov	r12,QWORD[((24+32))+rsp]
4083	lea	rdi,[192+rsp]
4084	call	__ecp_nistz256_mul_montx
4085
4086	lea	rbx,[160+rsp]
4087	lea	rdi,[rsp]
4088	call	__ecp_nistz256_sub_fromx
4089
4090	or	r12,r13
4091	or	r12,r8
4092	or	r12,r9
4093
4094DB	102,73,15,126,208
4095DB	102,73,15,126,217
4096	or	r12,r8
4097DB	0x3e
4098	jnz	NEAR $L$add_proceedx
4099
4100
4101
4102	test	r9,r9
4103	jz	NEAR $L$add_doublex
4104
4105
4106
4107
4108
4109
4110DB	102,72,15,126,199
4111	pxor	xmm0,xmm0
4112	movdqu	XMMWORD[rdi],xmm0
4113	movdqu	XMMWORD[16+rdi],xmm0
4114	movdqu	XMMWORD[32+rdi],xmm0
4115	movdqu	XMMWORD[48+rdi],xmm0
4116	movdqu	XMMWORD[64+rdi],xmm0
4117	movdqu	XMMWORD[80+rdi],xmm0
4118	jmp	NEAR $L$add_donex
4119
4120ALIGN	32
4121$L$add_doublex:
4122DB	102,72,15,126,206
4123DB	102,72,15,126,199
4124	add	rsp,416
4125
4126	jmp	NEAR $L$point_double_shortcutx
4127
4128
4129ALIGN	32
4130$L$add_proceedx:
4131	mov	rdx,QWORD[((0+64))+rsp]
4132	mov	r14,QWORD[((8+64))+rsp]
4133	lea	rsi,[((-128+64))+rsp]
4134	mov	r15,QWORD[((16+64))+rsp]
4135	mov	r8,QWORD[((24+64))+rsp]
4136	lea	rdi,[96+rsp]
4137	call	__ecp_nistz256_sqr_montx
4138
4139	mov	rdx,QWORD[448+rsp]
4140	lea	rbx,[448+rsp]
4141	mov	r9,QWORD[((0+0))+rsp]
4142	mov	r10,QWORD[((8+0))+rsp]
4143	lea	rsi,[((-128+0))+rsp]
4144	mov	r11,QWORD[((16+0))+rsp]
4145	mov	r12,QWORD[((24+0))+rsp]
4146	lea	rdi,[352+rsp]
4147	call	__ecp_nistz256_mul_montx
4148
4149	mov	rdx,QWORD[((0+0))+rsp]
4150	mov	r14,QWORD[((8+0))+rsp]
4151	lea	rsi,[((-128+0))+rsp]
4152	mov	r15,QWORD[((16+0))+rsp]
4153	mov	r8,QWORD[((24+0))+rsp]
4154	lea	rdi,[32+rsp]
4155	call	__ecp_nistz256_sqr_montx
4156
4157	mov	rdx,QWORD[544+rsp]
4158	lea	rbx,[544+rsp]
4159	mov	r9,QWORD[((0+352))+rsp]
4160	mov	r10,QWORD[((8+352))+rsp]
4161	lea	rsi,[((-128+352))+rsp]
4162	mov	r11,QWORD[((16+352))+rsp]
4163	mov	r12,QWORD[((24+352))+rsp]
4164	lea	rdi,[352+rsp]
4165	call	__ecp_nistz256_mul_montx
4166
4167	mov	rdx,QWORD[rsp]
4168	lea	rbx,[rsp]
4169	mov	r9,QWORD[((0+32))+rsp]
4170	mov	r10,QWORD[((8+32))+rsp]
4171	lea	rsi,[((-128+32))+rsp]
4172	mov	r11,QWORD[((16+32))+rsp]
4173	mov	r12,QWORD[((24+32))+rsp]
4174	lea	rdi,[128+rsp]
4175	call	__ecp_nistz256_mul_montx
4176
4177	mov	rdx,QWORD[160+rsp]
4178	lea	rbx,[160+rsp]
4179	mov	r9,QWORD[((0+32))+rsp]
4180	mov	r10,QWORD[((8+32))+rsp]
4181	lea	rsi,[((-128+32))+rsp]
4182	mov	r11,QWORD[((16+32))+rsp]
4183	mov	r12,QWORD[((24+32))+rsp]
4184	lea	rdi,[192+rsp]
4185	call	__ecp_nistz256_mul_montx
4186
4187
4188
4189
4190	xor	r11,r11
4191	add	r12,r12
4192	lea	rsi,[96+rsp]
4193	adc	r13,r13
4194	mov	rax,r12
4195	adc	r8,r8
4196	adc	r9,r9
4197	mov	rbp,r13
4198	adc	r11,0
4199
4200	sub	r12,-1
4201	mov	rcx,r8
4202	sbb	r13,r14
4203	sbb	r8,0
4204	mov	r10,r9
4205	sbb	r9,r15
4206	sbb	r11,0
4207
4208	cmovc	r12,rax
4209	mov	rax,QWORD[rsi]
4210	cmovc	r13,rbp
4211	mov	rbp,QWORD[8+rsi]
4212	cmovc	r8,rcx
4213	mov	rcx,QWORD[16+rsi]
4214	cmovc	r9,r10
4215	mov	r10,QWORD[24+rsi]
4216
4217	call	__ecp_nistz256_subx
4218
4219	lea	rbx,[128+rsp]
4220	lea	rdi,[288+rsp]
4221	call	__ecp_nistz256_sub_fromx
4222
4223	mov	rax,QWORD[((192+0))+rsp]
4224	mov	rbp,QWORD[((192+8))+rsp]
4225	mov	rcx,QWORD[((192+16))+rsp]
4226	mov	r10,QWORD[((192+24))+rsp]
4227	lea	rdi,[320+rsp]
4228
4229	call	__ecp_nistz256_subx
4230
4231	mov	QWORD[rdi],r12
4232	mov	QWORD[8+rdi],r13
4233	mov	QWORD[16+rdi],r8
4234	mov	QWORD[24+rdi],r9
4235	mov	rdx,QWORD[128+rsp]
4236	lea	rbx,[128+rsp]
4237	mov	r9,QWORD[((0+224))+rsp]
4238	mov	r10,QWORD[((8+224))+rsp]
4239	lea	rsi,[((-128+224))+rsp]
4240	mov	r11,QWORD[((16+224))+rsp]
4241	mov	r12,QWORD[((24+224))+rsp]
4242	lea	rdi,[256+rsp]
4243	call	__ecp_nistz256_mul_montx
4244
4245	mov	rdx,QWORD[320+rsp]
4246	lea	rbx,[320+rsp]
4247	mov	r9,QWORD[((0+64))+rsp]
4248	mov	r10,QWORD[((8+64))+rsp]
4249	lea	rsi,[((-128+64))+rsp]
4250	mov	r11,QWORD[((16+64))+rsp]
4251	mov	r12,QWORD[((24+64))+rsp]
4252	lea	rdi,[320+rsp]
4253	call	__ecp_nistz256_mul_montx
4254
4255	lea	rbx,[256+rsp]
4256	lea	rdi,[320+rsp]
4257	call	__ecp_nistz256_sub_fromx
4258
4259DB	102,72,15,126,199
4260
4261	movdqa	xmm0,xmm5
4262	movdqa	xmm1,xmm5
4263	pandn	xmm0,XMMWORD[352+rsp]
4264	movdqa	xmm2,xmm5
4265	pandn	xmm1,XMMWORD[((352+16))+rsp]
4266	movdqa	xmm3,xmm5
4267	pand	xmm2,XMMWORD[544+rsp]
4268	pand	xmm3,XMMWORD[((544+16))+rsp]
4269	por	xmm2,xmm0
4270	por	xmm3,xmm1
4271
4272	movdqa	xmm0,xmm4
4273	movdqa	xmm1,xmm4
4274	pandn	xmm0,xmm2
4275	movdqa	xmm2,xmm4
4276	pandn	xmm1,xmm3
4277	movdqa	xmm3,xmm4
4278	pand	xmm2,XMMWORD[448+rsp]
4279	pand	xmm3,XMMWORD[((448+16))+rsp]
4280	por	xmm2,xmm0
4281	por	xmm3,xmm1
4282	movdqu	XMMWORD[64+rdi],xmm2
4283	movdqu	XMMWORD[80+rdi],xmm3
4284
4285	movdqa	xmm0,xmm5
4286	movdqa	xmm1,xmm5
4287	pandn	xmm0,XMMWORD[288+rsp]
4288	movdqa	xmm2,xmm5
4289	pandn	xmm1,XMMWORD[((288+16))+rsp]
4290	movdqa	xmm3,xmm5
4291	pand	xmm2,XMMWORD[480+rsp]
4292	pand	xmm3,XMMWORD[((480+16))+rsp]
4293	por	xmm2,xmm0
4294	por	xmm3,xmm1
4295
4296	movdqa	xmm0,xmm4
4297	movdqa	xmm1,xmm4
4298	pandn	xmm0,xmm2
4299	movdqa	xmm2,xmm4
4300	pandn	xmm1,xmm3
4301	movdqa	xmm3,xmm4
4302	pand	xmm2,XMMWORD[384+rsp]
4303	pand	xmm3,XMMWORD[((384+16))+rsp]
4304	por	xmm2,xmm0
4305	por	xmm3,xmm1
4306	movdqu	XMMWORD[rdi],xmm2
4307	movdqu	XMMWORD[16+rdi],xmm3
4308
4309	movdqa	xmm0,xmm5
4310	movdqa	xmm1,xmm5
4311	pandn	xmm0,XMMWORD[320+rsp]
4312	movdqa	xmm2,xmm5
4313	pandn	xmm1,XMMWORD[((320+16))+rsp]
4314	movdqa	xmm3,xmm5
4315	pand	xmm2,XMMWORD[512+rsp]
4316	pand	xmm3,XMMWORD[((512+16))+rsp]
4317	por	xmm2,xmm0
4318	por	xmm3,xmm1
4319
4320	movdqa	xmm0,xmm4
4321	movdqa	xmm1,xmm4
4322	pandn	xmm0,xmm2
4323	movdqa	xmm2,xmm4
4324	pandn	xmm1,xmm3
4325	movdqa	xmm3,xmm4
4326	pand	xmm2,XMMWORD[416+rsp]
4327	pand	xmm3,XMMWORD[((416+16))+rsp]
4328	por	xmm2,xmm0
4329	por	xmm3,xmm1
4330	movdqu	XMMWORD[32+rdi],xmm2
4331	movdqu	XMMWORD[48+rdi],xmm3
4332
4333$L$add_donex:
4334	lea	rsi,[((576+56))+rsp]
4335
4336	mov	r15,QWORD[((-48))+rsi]
4337
4338	mov	r14,QWORD[((-40))+rsi]
4339
4340	mov	r13,QWORD[((-32))+rsi]
4341
4342	mov	r12,QWORD[((-24))+rsi]
4343
4344	mov	rbx,QWORD[((-16))+rsi]
4345
4346	mov	rbp,QWORD[((-8))+rsi]
4347
4348	lea	rsp,[rsi]
4349
4350$L$point_addx_epilogue:
4351	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
4352	mov	rsi,QWORD[16+rsp]
4353	DB	0F3h,0C3h		;repret
4354
4355$L$SEH_end_p256_point_addx:
4356
4357ALIGN	32
4358p256_point_add_affinex:
4359	mov	QWORD[8+rsp],rdi	;WIN64 prologue
4360	mov	QWORD[16+rsp],rsi
4361	mov	rax,rsp
4362$L$SEH_begin_p256_point_add_affinex:
4363	mov	rdi,rcx
4364	mov	rsi,rdx
4365	mov	rdx,r8
4366
4367
4368
4369$L$point_add_affinex:
4370	push	rbp
4371
4372	push	rbx
4373
4374	push	r12
4375
4376	push	r13
4377
4378	push	r14
4379
4380	push	r15
4381
4382	sub	rsp,32*15+8
4383
4384$L$add_affinex_body:
4385
4386	movdqu	xmm0,XMMWORD[rsi]
4387	mov	rbx,rdx
4388	movdqu	xmm1,XMMWORD[16+rsi]
4389	movdqu	xmm2,XMMWORD[32+rsi]
4390	movdqu	xmm3,XMMWORD[48+rsi]
4391	movdqu	xmm4,XMMWORD[64+rsi]
4392	movdqu	xmm5,XMMWORD[80+rsi]
4393	mov	rdx,QWORD[((64+0))+rsi]
4394	mov	r14,QWORD[((64+8))+rsi]
4395	mov	r15,QWORD[((64+16))+rsi]
4396	mov	r8,QWORD[((64+24))+rsi]
4397	movdqa	XMMWORD[320+rsp],xmm0
4398	movdqa	XMMWORD[(320+16)+rsp],xmm1
4399	movdqa	XMMWORD[352+rsp],xmm2
4400	movdqa	XMMWORD[(352+16)+rsp],xmm3
4401	movdqa	XMMWORD[384+rsp],xmm4
4402	movdqa	XMMWORD[(384+16)+rsp],xmm5
4403	por	xmm5,xmm4
4404
4405	movdqu	xmm0,XMMWORD[rbx]
4406	pshufd	xmm3,xmm5,0xb1
4407	movdqu	xmm1,XMMWORD[16+rbx]
4408	movdqu	xmm2,XMMWORD[32+rbx]
4409	por	xmm5,xmm3
4410	movdqu	xmm3,XMMWORD[48+rbx]
4411	movdqa	XMMWORD[416+rsp],xmm0
4412	pshufd	xmm4,xmm5,0x1e
4413	movdqa	XMMWORD[(416+16)+rsp],xmm1
4414	por	xmm1,xmm0
4415DB	102,72,15,110,199
4416	movdqa	XMMWORD[448+rsp],xmm2
4417	movdqa	XMMWORD[(448+16)+rsp],xmm3
4418	por	xmm3,xmm2
4419	por	xmm5,xmm4
4420	pxor	xmm4,xmm4
4421	por	xmm3,xmm1
4422
4423	lea	rsi,[((64-128))+rsi]
4424	lea	rdi,[32+rsp]
4425	call	__ecp_nistz256_sqr_montx
4426
4427	pcmpeqd	xmm5,xmm4
4428	pshufd	xmm4,xmm3,0xb1
4429	mov	rdx,QWORD[rbx]
4430
4431	mov	r9,r12
4432	por	xmm4,xmm3
4433	pshufd	xmm5,xmm5,0
4434	pshufd	xmm3,xmm4,0x1e
4435	mov	r10,r13
4436	por	xmm4,xmm3
4437	pxor	xmm3,xmm3
4438	mov	r11,r14
4439	pcmpeqd	xmm4,xmm3
4440	pshufd	xmm4,xmm4,0
4441
4442	lea	rsi,[((32-128))+rsp]
4443	mov	r12,r15
4444	lea	rdi,[rsp]
4445	call	__ecp_nistz256_mul_montx
4446
4447	lea	rbx,[320+rsp]
4448	lea	rdi,[64+rsp]
4449	call	__ecp_nistz256_sub_fromx
4450
4451	mov	rdx,QWORD[384+rsp]
4452	lea	rbx,[384+rsp]
4453	mov	r9,QWORD[((0+32))+rsp]
4454	mov	r10,QWORD[((8+32))+rsp]
4455	lea	rsi,[((-128+32))+rsp]
4456	mov	r11,QWORD[((16+32))+rsp]
4457	mov	r12,QWORD[((24+32))+rsp]
4458	lea	rdi,[32+rsp]
4459	call	__ecp_nistz256_mul_montx
4460
4461	mov	rdx,QWORD[384+rsp]
4462	lea	rbx,[384+rsp]
4463	mov	r9,QWORD[((0+64))+rsp]
4464	mov	r10,QWORD[((8+64))+rsp]
4465	lea	rsi,[((-128+64))+rsp]
4466	mov	r11,QWORD[((16+64))+rsp]
4467	mov	r12,QWORD[((24+64))+rsp]
4468	lea	rdi,[288+rsp]
4469	call	__ecp_nistz256_mul_montx
4470
4471	mov	rdx,QWORD[448+rsp]
4472	lea	rbx,[448+rsp]
4473	mov	r9,QWORD[((0+32))+rsp]
4474	mov	r10,QWORD[((8+32))+rsp]
4475	lea	rsi,[((-128+32))+rsp]
4476	mov	r11,QWORD[((16+32))+rsp]
4477	mov	r12,QWORD[((24+32))+rsp]
4478	lea	rdi,[32+rsp]
4479	call	__ecp_nistz256_mul_montx
4480
4481	lea	rbx,[352+rsp]
4482	lea	rdi,[96+rsp]
4483	call	__ecp_nistz256_sub_fromx
4484
4485	mov	rdx,QWORD[((0+64))+rsp]
4486	mov	r14,QWORD[((8+64))+rsp]
4487	lea	rsi,[((-128+64))+rsp]
4488	mov	r15,QWORD[((16+64))+rsp]
4489	mov	r8,QWORD[((24+64))+rsp]
4490	lea	rdi,[128+rsp]
4491	call	__ecp_nistz256_sqr_montx
4492
4493	mov	rdx,QWORD[((0+96))+rsp]
4494	mov	r14,QWORD[((8+96))+rsp]
4495	lea	rsi,[((-128+96))+rsp]
4496	mov	r15,QWORD[((16+96))+rsp]
4497	mov	r8,QWORD[((24+96))+rsp]
4498	lea	rdi,[192+rsp]
4499	call	__ecp_nistz256_sqr_montx
4500
4501	mov	rdx,QWORD[128+rsp]
4502	lea	rbx,[128+rsp]
4503	mov	r9,QWORD[((0+64))+rsp]
4504	mov	r10,QWORD[((8+64))+rsp]
4505	lea	rsi,[((-128+64))+rsp]
4506	mov	r11,QWORD[((16+64))+rsp]
4507	mov	r12,QWORD[((24+64))+rsp]
4508	lea	rdi,[160+rsp]
4509	call	__ecp_nistz256_mul_montx
4510
4511	mov	rdx,QWORD[320+rsp]
4512	lea	rbx,[320+rsp]
4513	mov	r9,QWORD[((0+128))+rsp]
4514	mov	r10,QWORD[((8+128))+rsp]
4515	lea	rsi,[((-128+128))+rsp]
4516	mov	r11,QWORD[((16+128))+rsp]
4517	mov	r12,QWORD[((24+128))+rsp]
4518	lea	rdi,[rsp]
4519	call	__ecp_nistz256_mul_montx
4520
4521
4522
4523
4524	xor	r11,r11
4525	add	r12,r12
4526	lea	rsi,[192+rsp]
4527	adc	r13,r13
4528	mov	rax,r12
4529	adc	r8,r8
4530	adc	r9,r9
4531	mov	rbp,r13
4532	adc	r11,0
4533
4534	sub	r12,-1
4535	mov	rcx,r8
4536	sbb	r13,r14
4537	sbb	r8,0
4538	mov	r10,r9
4539	sbb	r9,r15
4540	sbb	r11,0
4541
4542	cmovc	r12,rax
4543	mov	rax,QWORD[rsi]
4544	cmovc	r13,rbp
4545	mov	rbp,QWORD[8+rsi]
4546	cmovc	r8,rcx
4547	mov	rcx,QWORD[16+rsi]
4548	cmovc	r9,r10
4549	mov	r10,QWORD[24+rsi]
4550
4551	call	__ecp_nistz256_subx
4552
4553	lea	rbx,[160+rsp]
4554	lea	rdi,[224+rsp]
4555	call	__ecp_nistz256_sub_fromx
4556
4557	mov	rax,QWORD[((0+0))+rsp]
4558	mov	rbp,QWORD[((0+8))+rsp]
4559	mov	rcx,QWORD[((0+16))+rsp]
4560	mov	r10,QWORD[((0+24))+rsp]
4561	lea	rdi,[64+rsp]
4562
4563	call	__ecp_nistz256_subx
4564
4565	mov	QWORD[rdi],r12
4566	mov	QWORD[8+rdi],r13
4567	mov	QWORD[16+rdi],r8
4568	mov	QWORD[24+rdi],r9
4569	mov	rdx,QWORD[352+rsp]
4570	lea	rbx,[352+rsp]
4571	mov	r9,QWORD[((0+160))+rsp]
4572	mov	r10,QWORD[((8+160))+rsp]
4573	lea	rsi,[((-128+160))+rsp]
4574	mov	r11,QWORD[((16+160))+rsp]
4575	mov	r12,QWORD[((24+160))+rsp]
4576	lea	rdi,[32+rsp]
4577	call	__ecp_nistz256_mul_montx
4578
4579	mov	rdx,QWORD[96+rsp]
4580	lea	rbx,[96+rsp]
4581	mov	r9,QWORD[((0+64))+rsp]
4582	mov	r10,QWORD[((8+64))+rsp]
4583	lea	rsi,[((-128+64))+rsp]
4584	mov	r11,QWORD[((16+64))+rsp]
4585	mov	r12,QWORD[((24+64))+rsp]
4586	lea	rdi,[64+rsp]
4587	call	__ecp_nistz256_mul_montx
4588
4589	lea	rbx,[32+rsp]
4590	lea	rdi,[256+rsp]
4591	call	__ecp_nistz256_sub_fromx
4592
4593DB	102,72,15,126,199
4594
4595	movdqa	xmm0,xmm5
4596	movdqa	xmm1,xmm5
4597	pandn	xmm0,XMMWORD[288+rsp]
4598	movdqa	xmm2,xmm5
4599	pandn	xmm1,XMMWORD[((288+16))+rsp]
4600	movdqa	xmm3,xmm5
4601	pand	xmm2,XMMWORD[$L$ONE_mont]
4602	pand	xmm3,XMMWORD[(($L$ONE_mont+16))]
4603	por	xmm2,xmm0
4604	por	xmm3,xmm1
4605
4606	movdqa	xmm0,xmm4
4607	movdqa	xmm1,xmm4
4608	pandn	xmm0,xmm2
4609	movdqa	xmm2,xmm4
4610	pandn	xmm1,xmm3
4611	movdqa	xmm3,xmm4
4612	pand	xmm2,XMMWORD[384+rsp]
4613	pand	xmm3,XMMWORD[((384+16))+rsp]
4614	por	xmm2,xmm0
4615	por	xmm3,xmm1
4616	movdqu	XMMWORD[64+rdi],xmm2
4617	movdqu	XMMWORD[80+rdi],xmm3
4618
4619	movdqa	xmm0,xmm5
4620	movdqa	xmm1,xmm5
4621	pandn	xmm0,XMMWORD[224+rsp]
4622	movdqa	xmm2,xmm5
4623	pandn	xmm1,XMMWORD[((224+16))+rsp]
4624	movdqa	xmm3,xmm5
4625	pand	xmm2,XMMWORD[416+rsp]
4626	pand	xmm3,XMMWORD[((416+16))+rsp]
4627	por	xmm2,xmm0
4628	por	xmm3,xmm1
4629
4630	movdqa	xmm0,xmm4
4631	movdqa	xmm1,xmm4
4632	pandn	xmm0,xmm2
4633	movdqa	xmm2,xmm4
4634	pandn	xmm1,xmm3
4635	movdqa	xmm3,xmm4
4636	pand	xmm2,XMMWORD[320+rsp]
4637	pand	xmm3,XMMWORD[((320+16))+rsp]
4638	por	xmm2,xmm0
4639	por	xmm3,xmm1
4640	movdqu	XMMWORD[rdi],xmm2
4641	movdqu	XMMWORD[16+rdi],xmm3
4642
4643	movdqa	xmm0,xmm5
4644	movdqa	xmm1,xmm5
4645	pandn	xmm0,XMMWORD[256+rsp]
4646	movdqa	xmm2,xmm5
4647	pandn	xmm1,XMMWORD[((256+16))+rsp]
4648	movdqa	xmm3,xmm5
4649	pand	xmm2,XMMWORD[448+rsp]
4650	pand	xmm3,XMMWORD[((448+16))+rsp]
4651	por	xmm2,xmm0
4652	por	xmm3,xmm1
4653
4654	movdqa	xmm0,xmm4
4655	movdqa	xmm1,xmm4
4656	pandn	xmm0,xmm2
4657	movdqa	xmm2,xmm4
4658	pandn	xmm1,xmm3
4659	movdqa	xmm3,xmm4
4660	pand	xmm2,XMMWORD[352+rsp]
4661	pand	xmm3,XMMWORD[((352+16))+rsp]
4662	por	xmm2,xmm0
4663	por	xmm3,xmm1
4664	movdqu	XMMWORD[32+rdi],xmm2
4665	movdqu	XMMWORD[48+rdi],xmm3
4666
4667	lea	rsi,[((480+56))+rsp]
4668
4669	mov	r15,QWORD[((-48))+rsi]
4670
4671	mov	r14,QWORD[((-40))+rsi]
4672
4673	mov	r13,QWORD[((-32))+rsi]
4674
4675	mov	r12,QWORD[((-24))+rsi]
4676
4677	mov	rbx,QWORD[((-16))+rsi]
4678
4679	mov	rbp,QWORD[((-8))+rsi]
4680
4681	lea	rsp,[rsi]
4682
4683$L$add_affinex_epilogue:
4684	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
4685	mov	rsi,QWORD[16+rsp]
4686	DB	0F3h,0C3h		;repret
4687
4688$L$SEH_end_p256_point_add_affinex:
4689EXTERN	__imp_RtlVirtualUnwind
4690
4691
4692ALIGN	16
4693short_handler:
4694	push	rsi
4695	push	rdi
4696	push	rbx
4697	push	rbp
4698	push	r12
4699	push	r13
4700	push	r14
4701	push	r15
4702	pushfq
4703	sub	rsp,64
4704
4705	mov	rax,QWORD[120+r8]
4706	mov	rbx,QWORD[248+r8]
4707
4708	mov	rsi,QWORD[8+r9]
4709	mov	r11,QWORD[56+r9]
4710
4711	mov	r10d,DWORD[r11]
4712	lea	r10,[r10*1+rsi]
4713	cmp	rbx,r10
4714	jb	NEAR $L$common_seh_tail
4715
4716	mov	rax,QWORD[152+r8]
4717
4718	mov	r10d,DWORD[4+r11]
4719	lea	r10,[r10*1+rsi]
4720	cmp	rbx,r10
4721	jae	NEAR $L$common_seh_tail
4722
4723	lea	rax,[16+rax]
4724
4725	mov	r12,QWORD[((-8))+rax]
4726	mov	r13,QWORD[((-16))+rax]
4727	mov	QWORD[216+r8],r12
4728	mov	QWORD[224+r8],r13
4729
4730	jmp	NEAR $L$common_seh_tail
4731
4732
4733
4734ALIGN	16
4735full_handler:
4736	push	rsi
4737	push	rdi
4738	push	rbx
4739	push	rbp
4740	push	r12
4741	push	r13
4742	push	r14
4743	push	r15
4744	pushfq
4745	sub	rsp,64
4746
4747	mov	rax,QWORD[120+r8]
4748	mov	rbx,QWORD[248+r8]
4749
4750	mov	rsi,QWORD[8+r9]
4751	mov	r11,QWORD[56+r9]
4752
4753	mov	r10d,DWORD[r11]
4754	lea	r10,[r10*1+rsi]
4755	cmp	rbx,r10
4756	jb	NEAR $L$common_seh_tail
4757
4758	mov	rax,QWORD[152+r8]
4759
4760	mov	r10d,DWORD[4+r11]
4761	lea	r10,[r10*1+rsi]
4762	cmp	rbx,r10
4763	jae	NEAR $L$common_seh_tail
4764
4765	mov	r10d,DWORD[8+r11]
4766	lea	rax,[r10*1+rax]
4767
4768	mov	rbp,QWORD[((-8))+rax]
4769	mov	rbx,QWORD[((-16))+rax]
4770	mov	r12,QWORD[((-24))+rax]
4771	mov	r13,QWORD[((-32))+rax]
4772	mov	r14,QWORD[((-40))+rax]
4773	mov	r15,QWORD[((-48))+rax]
4774	mov	QWORD[144+r8],rbx
4775	mov	QWORD[160+r8],rbp
4776	mov	QWORD[216+r8],r12
4777	mov	QWORD[224+r8],r13
4778	mov	QWORD[232+r8],r14
4779	mov	QWORD[240+r8],r15
4780
4781$L$common_seh_tail:
4782	mov	rdi,QWORD[8+rax]
4783	mov	rsi,QWORD[16+rax]
4784	mov	QWORD[152+r8],rax
4785	mov	QWORD[168+r8],rsi
4786	mov	QWORD[176+r8],rdi
4787
4788	mov	rdi,QWORD[40+r9]
4789	mov	rsi,r8
4790	mov	ecx,154
4791	DD	0xa548f3fc
4792
4793	mov	rsi,r9
4794	xor	rcx,rcx
4795	mov	rdx,QWORD[8+rsi]
4796	mov	r8,QWORD[rsi]
4797	mov	r9,QWORD[16+rsi]
4798	mov	r10,QWORD[40+rsi]
4799	lea	r11,[56+rsi]
4800	lea	r12,[24+rsi]
4801	mov	QWORD[32+rsp],r10
4802	mov	QWORD[40+rsp],r11
4803	mov	QWORD[48+rsp],r12
4804	mov	QWORD[56+rsp],rcx
4805	call	QWORD[__imp_RtlVirtualUnwind]
4806
4807	mov	eax,1
4808	add	rsp,64
4809	popfq
4810	pop	r15
4811	pop	r14
4812	pop	r13
4813	pop	r12
4814	pop	rbp
4815	pop	rbx
4816	pop	rdi
4817	pop	rsi
4818	DB	0F3h,0C3h		;repret
4819
4820
4821section	.pdata rdata align=4
4822ALIGN	4
4823	DD	$L$SEH_begin_nistz256_neg wrt ..imagebase
4824	DD	$L$SEH_end_nistz256_neg wrt ..imagebase
4825	DD	$L$SEH_info_nistz256_neg wrt ..imagebase
4826
4827	DD	$L$SEH_begin_p256_scalar_mul_mont wrt ..imagebase
4828	DD	$L$SEH_end_p256_scalar_mul_mont wrt ..imagebase
4829	DD	$L$SEH_info_p256_scalar_mul_mont wrt ..imagebase
4830
4831	DD	$L$SEH_begin_p256_scalar_sqr_rep_mont wrt ..imagebase
4832	DD	$L$SEH_end_p256_scalar_sqr_rep_mont wrt ..imagebase
4833	DD	$L$SEH_info_p256_scalar_sqr_rep_mont wrt ..imagebase
4834	DD	$L$SEH_begin_ecp_nistz256_ord_mul_montx wrt ..imagebase
4835	DD	$L$SEH_end_ecp_nistz256_ord_mul_montx wrt ..imagebase
4836	DD	$L$SEH_info_ecp_nistz256_ord_mul_montx wrt ..imagebase
4837
4838	DD	$L$SEH_begin_ecp_nistz256_ord_sqr_montx wrt ..imagebase
4839	DD	$L$SEH_end_ecp_nistz256_ord_sqr_montx wrt ..imagebase
4840	DD	$L$SEH_info_ecp_nistz256_ord_sqr_montx wrt ..imagebase
4841	DD	$L$SEH_begin_p256_mul_mont wrt ..imagebase
4842	DD	$L$SEH_end_p256_mul_mont wrt ..imagebase
4843	DD	$L$SEH_info_p256_mul_mont wrt ..imagebase
4844
4845	DD	$L$SEH_begin_p256_sqr_mont wrt ..imagebase
4846	DD	$L$SEH_end_p256_sqr_mont wrt ..imagebase
4847	DD	$L$SEH_info_p256_sqr_mont wrt ..imagebase
4848
4849	DD	$L$SEH_begin_nistz256_select_w5 wrt ..imagebase
4850	DD	$L$SEH_end_nistz256_select_w5 wrt ..imagebase
4851	DD	$L$SEH_info_ecp_nistz256_select_wX wrt ..imagebase
4852
4853	DD	$L$SEH_begin_nistz256_select_w7 wrt ..imagebase
4854	DD	$L$SEH_end_nistz256_select_w7 wrt ..imagebase
4855	DD	$L$SEH_info_ecp_nistz256_select_wX wrt ..imagebase
4856	DD	$L$SEH_begin_ecp_nistz256_avx2_select_w5 wrt ..imagebase
4857	DD	$L$SEH_end_ecp_nistz256_avx2_select_w5 wrt ..imagebase
4858	DD	$L$SEH_info_ecp_nistz256_avx2_select_wX wrt ..imagebase
4859
4860	DD	$L$SEH_begin_ecp_nistz256_avx2_select_w7 wrt ..imagebase
4861	DD	$L$SEH_end_ecp_nistz256_avx2_select_w7 wrt ..imagebase
4862	DD	$L$SEH_info_ecp_nistz256_avx2_select_wX wrt ..imagebase
4863	DD	$L$SEH_begin_p256_point_double wrt ..imagebase
4864	DD	$L$SEH_end_p256_point_double wrt ..imagebase
4865	DD	$L$SEH_info_p256_point_double wrt ..imagebase
4866
4867	DD	$L$SEH_begin_p256_point_add wrt ..imagebase
4868	DD	$L$SEH_end_p256_point_add wrt ..imagebase
4869	DD	$L$SEH_info_p256_point_add wrt ..imagebase
4870
4871	DD	$L$SEH_begin_p256_point_add_affine wrt ..imagebase
4872	DD	$L$SEH_end_p256_point_add_affine wrt ..imagebase
4873	DD	$L$SEH_info_p256_point_add_affine wrt ..imagebase
4874	DD	$L$SEH_begin_p256_point_doublex wrt ..imagebase
4875	DD	$L$SEH_end_p256_point_doublex wrt ..imagebase
4876	DD	$L$SEH_info_p256_point_doublex wrt ..imagebase
4877
4878	DD	$L$SEH_begin_p256_point_addx wrt ..imagebase
4879	DD	$L$SEH_end_p256_point_addx wrt ..imagebase
4880	DD	$L$SEH_info_p256_point_addx wrt ..imagebase
4881
4882	DD	$L$SEH_begin_p256_point_add_affinex wrt ..imagebase
4883	DD	$L$SEH_end_p256_point_add_affinex wrt ..imagebase
4884	DD	$L$SEH_info_p256_point_add_affinex wrt ..imagebase
4885
4886section	.xdata rdata align=8
4887ALIGN	8
4888$L$SEH_info_nistz256_neg:
4889DB	9,0,0,0
4890	DD	short_handler wrt ..imagebase
4891	DD	$L$neg_body wrt ..imagebase,$L$neg_epilogue wrt ..imagebase
4892$L$SEH_info_p256_scalar_mul_mont:
4893DB	9,0,0,0
4894	DD	full_handler wrt ..imagebase
4895	DD	$L$ord_mul_body wrt ..imagebase,$L$ord_mul_epilogue wrt ..imagebase
4896	DD	48,0
4897$L$SEH_info_p256_scalar_sqr_rep_mont:
4898DB	9,0,0,0
4899	DD	full_handler wrt ..imagebase
4900	DD	$L$ord_sqr_body wrt ..imagebase,$L$ord_sqr_epilogue wrt ..imagebase
4901	DD	48,0
4902$L$SEH_info_ecp_nistz256_ord_mul_montx:
4903DB	9,0,0,0
4904	DD	full_handler wrt ..imagebase
4905	DD	$L$ord_mulx_body wrt ..imagebase,$L$ord_mulx_epilogue wrt ..imagebase
4906	DD	48,0
4907$L$SEH_info_ecp_nistz256_ord_sqr_montx:
4908DB	9,0,0,0
4909	DD	full_handler wrt ..imagebase
4910	DD	$L$ord_sqrx_body wrt ..imagebase,$L$ord_sqrx_epilogue wrt ..imagebase
4911	DD	48,0
4912$L$SEH_info_p256_mul_mont:
4913DB	9,0,0,0
4914	DD	full_handler wrt ..imagebase
4915	DD	$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
4916	DD	48,0
4917$L$SEH_info_p256_sqr_mont:
4918DB	9,0,0,0
4919	DD	full_handler wrt ..imagebase
4920	DD	$L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
4921	DD	48,0
4922$L$SEH_info_ecp_nistz256_select_wX:
4923DB	0x01,0x33,0x16,0x00
4924DB	0x33,0xf8,0x09,0x00
4925DB	0x2e,0xe8,0x08,0x00
4926DB	0x29,0xd8,0x07,0x00
4927DB	0x24,0xc8,0x06,0x00
4928DB	0x1f,0xb8,0x05,0x00
4929DB	0x1a,0xa8,0x04,0x00
4930DB	0x15,0x98,0x03,0x00
4931DB	0x10,0x88,0x02,0x00
4932DB	0x0c,0x78,0x01,0x00
4933DB	0x08,0x68,0x00,0x00
4934DB	0x04,0x01,0x15,0x00
4935ALIGN	8
4936$L$SEH_info_ecp_nistz256_avx2_select_wX:
4937DB	0x01,0x36,0x17,0x0b
4938DB	0x36,0xf8,0x09,0x00
4939DB	0x31,0xe8,0x08,0x00
4940DB	0x2c,0xd8,0x07,0x00
4941DB	0x27,0xc8,0x06,0x00
4942DB	0x22,0xb8,0x05,0x00
4943DB	0x1d,0xa8,0x04,0x00
4944DB	0x18,0x98,0x03,0x00
4945DB	0x13,0x88,0x02,0x00
4946DB	0x0e,0x78,0x01,0x00
4947DB	0x09,0x68,0x00,0x00
4948DB	0x04,0x01,0x15,0x00
4949DB	0x00,0xb3,0x00,0x00
4950ALIGN	8
4951$L$SEH_info_p256_point_double:
4952DB	9,0,0,0
4953	DD	full_handler wrt ..imagebase
4954	DD	$L$point_doubleq_body wrt ..imagebase,$L$point_doubleq_epilogue wrt ..imagebase
4955	DD	32*5+56,0
4956$L$SEH_info_p256_point_add:
4957DB	9,0,0,0
4958	DD	full_handler wrt ..imagebase
4959	DD	$L$point_addq_body wrt ..imagebase,$L$point_addq_epilogue wrt ..imagebase
4960	DD	32*18+56,0
4961$L$SEH_info_p256_point_add_affine:
4962DB	9,0,0,0
4963	DD	full_handler wrt ..imagebase
4964	DD	$L$add_affineq_body wrt ..imagebase,$L$add_affineq_epilogue wrt ..imagebase
4965	DD	32*15+56,0
4966ALIGN	8
4967$L$SEH_info_p256_point_doublex:
4968DB	9,0,0,0
4969	DD	full_handler wrt ..imagebase
4970	DD	$L$point_doublex_body wrt ..imagebase,$L$point_doublex_epilogue wrt ..imagebase
4971	DD	32*5+56,0
4972$L$SEH_info_p256_point_addx:
4973DB	9,0,0,0
4974	DD	full_handler wrt ..imagebase
4975	DD	$L$point_addx_body wrt ..imagebase,$L$point_addx_epilogue wrt ..imagebase
4976	DD	32*18+56,0
4977$L$SEH_info_p256_point_add_affinex:
4978DB	9,0,0,0
4979	DD	full_handler wrt ..imagebase
4980	DD	$L$add_affinex_body wrt ..imagebase,$L$add_affinex_epilogue wrt ..imagebase
4981	DD	32*15+56,0
4982