• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1%ifidn __OUTPUT_FORMAT__,obj
2section	code	use32 class=code align=64
3%elifidn __OUTPUT_FORMAT__,win32
4%ifdef __YASM_VERSION_ID__
5%if __YASM_VERSION_ID__ < 01010000h
6%error yasm version 1.1.0 or later needed.
7%endif
8; Yasm automatically includes .00 and complains about redefining it.
9; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
10%else
11$@feat.00 equ 1
12%endif
13section	.text	code align=64
14%else
15section	.text	code
16%endif
17global	_bn_mul_comba8
18align	16
19_bn_mul_comba8:
20L$_bn_mul_comba8_begin:
21	push	esi
22	mov	esi,DWORD [12+esp]
23	push	edi
24	mov	edi,DWORD [20+esp]
25	push	ebp
26	push	ebx
27	xor	ebx,ebx
28	mov	eax,DWORD [esi]
29	xor	ecx,ecx
30	mov	edx,DWORD [edi]
31	; ################## Calculate word 0
32	xor	ebp,ebp
33	; mul a[0]*b[0]
34	mul	edx
35	add	ebx,eax
36	mov	eax,DWORD [20+esp]
37	adc	ecx,edx
38	mov	edx,DWORD [edi]
39	adc	ebp,0
40	mov	DWORD [eax],ebx
41	mov	eax,DWORD [4+esi]
42	; saved r[0]
43	; ################## Calculate word 1
44	xor	ebx,ebx
45	; mul a[1]*b[0]
46	mul	edx
47	add	ecx,eax
48	mov	eax,DWORD [esi]
49	adc	ebp,edx
50	mov	edx,DWORD [4+edi]
51	adc	ebx,0
52	; mul a[0]*b[1]
53	mul	edx
54	add	ecx,eax
55	mov	eax,DWORD [20+esp]
56	adc	ebp,edx
57	mov	edx,DWORD [edi]
58	adc	ebx,0
59	mov	DWORD [4+eax],ecx
60	mov	eax,DWORD [8+esi]
61	; saved r[1]
62	; ################## Calculate word 2
63	xor	ecx,ecx
64	; mul a[2]*b[0]
65	mul	edx
66	add	ebp,eax
67	mov	eax,DWORD [4+esi]
68	adc	ebx,edx
69	mov	edx,DWORD [4+edi]
70	adc	ecx,0
71	; mul a[1]*b[1]
72	mul	edx
73	add	ebp,eax
74	mov	eax,DWORD [esi]
75	adc	ebx,edx
76	mov	edx,DWORD [8+edi]
77	adc	ecx,0
78	; mul a[0]*b[2]
79	mul	edx
80	add	ebp,eax
81	mov	eax,DWORD [20+esp]
82	adc	ebx,edx
83	mov	edx,DWORD [edi]
84	adc	ecx,0
85	mov	DWORD [8+eax],ebp
86	mov	eax,DWORD [12+esi]
87	; saved r[2]
88	; ################## Calculate word 3
89	xor	ebp,ebp
90	; mul a[3]*b[0]
91	mul	edx
92	add	ebx,eax
93	mov	eax,DWORD [8+esi]
94	adc	ecx,edx
95	mov	edx,DWORD [4+edi]
96	adc	ebp,0
97	; mul a[2]*b[1]
98	mul	edx
99	add	ebx,eax
100	mov	eax,DWORD [4+esi]
101	adc	ecx,edx
102	mov	edx,DWORD [8+edi]
103	adc	ebp,0
104	; mul a[1]*b[2]
105	mul	edx
106	add	ebx,eax
107	mov	eax,DWORD [esi]
108	adc	ecx,edx
109	mov	edx,DWORD [12+edi]
110	adc	ebp,0
111	; mul a[0]*b[3]
112	mul	edx
113	add	ebx,eax
114	mov	eax,DWORD [20+esp]
115	adc	ecx,edx
116	mov	edx,DWORD [edi]
117	adc	ebp,0
118	mov	DWORD [12+eax],ebx
119	mov	eax,DWORD [16+esi]
120	; saved r[3]
121	; ################## Calculate word 4
122	xor	ebx,ebx
123	; mul a[4]*b[0]
124	mul	edx
125	add	ecx,eax
126	mov	eax,DWORD [12+esi]
127	adc	ebp,edx
128	mov	edx,DWORD [4+edi]
129	adc	ebx,0
130	; mul a[3]*b[1]
131	mul	edx
132	add	ecx,eax
133	mov	eax,DWORD [8+esi]
134	adc	ebp,edx
135	mov	edx,DWORD [8+edi]
136	adc	ebx,0
137	; mul a[2]*b[2]
138	mul	edx
139	add	ecx,eax
140	mov	eax,DWORD [4+esi]
141	adc	ebp,edx
142	mov	edx,DWORD [12+edi]
143	adc	ebx,0
144	; mul a[1]*b[3]
145	mul	edx
146	add	ecx,eax
147	mov	eax,DWORD [esi]
148	adc	ebp,edx
149	mov	edx,DWORD [16+edi]
150	adc	ebx,0
151	; mul a[0]*b[4]
152	mul	edx
153	add	ecx,eax
154	mov	eax,DWORD [20+esp]
155	adc	ebp,edx
156	mov	edx,DWORD [edi]
157	adc	ebx,0
158	mov	DWORD [16+eax],ecx
159	mov	eax,DWORD [20+esi]
160	; saved r[4]
161	; ################## Calculate word 5
162	xor	ecx,ecx
163	; mul a[5]*b[0]
164	mul	edx
165	add	ebp,eax
166	mov	eax,DWORD [16+esi]
167	adc	ebx,edx
168	mov	edx,DWORD [4+edi]
169	adc	ecx,0
170	; mul a[4]*b[1]
171	mul	edx
172	add	ebp,eax
173	mov	eax,DWORD [12+esi]
174	adc	ebx,edx
175	mov	edx,DWORD [8+edi]
176	adc	ecx,0
177	; mul a[3]*b[2]
178	mul	edx
179	add	ebp,eax
180	mov	eax,DWORD [8+esi]
181	adc	ebx,edx
182	mov	edx,DWORD [12+edi]
183	adc	ecx,0
184	; mul a[2]*b[3]
185	mul	edx
186	add	ebp,eax
187	mov	eax,DWORD [4+esi]
188	adc	ebx,edx
189	mov	edx,DWORD [16+edi]
190	adc	ecx,0
191	; mul a[1]*b[4]
192	mul	edx
193	add	ebp,eax
194	mov	eax,DWORD [esi]
195	adc	ebx,edx
196	mov	edx,DWORD [20+edi]
197	adc	ecx,0
198	; mul a[0]*b[5]
199	mul	edx
200	add	ebp,eax
201	mov	eax,DWORD [20+esp]
202	adc	ebx,edx
203	mov	edx,DWORD [edi]
204	adc	ecx,0
205	mov	DWORD [20+eax],ebp
206	mov	eax,DWORD [24+esi]
207	; saved r[5]
208	; ################## Calculate word 6
209	xor	ebp,ebp
210	; mul a[6]*b[0]
211	mul	edx
212	add	ebx,eax
213	mov	eax,DWORD [20+esi]
214	adc	ecx,edx
215	mov	edx,DWORD [4+edi]
216	adc	ebp,0
217	; mul a[5]*b[1]
218	mul	edx
219	add	ebx,eax
220	mov	eax,DWORD [16+esi]
221	adc	ecx,edx
222	mov	edx,DWORD [8+edi]
223	adc	ebp,0
224	; mul a[4]*b[2]
225	mul	edx
226	add	ebx,eax
227	mov	eax,DWORD [12+esi]
228	adc	ecx,edx
229	mov	edx,DWORD [12+edi]
230	adc	ebp,0
231	; mul a[3]*b[3]
232	mul	edx
233	add	ebx,eax
234	mov	eax,DWORD [8+esi]
235	adc	ecx,edx
236	mov	edx,DWORD [16+edi]
237	adc	ebp,0
238	; mul a[2]*b[4]
239	mul	edx
240	add	ebx,eax
241	mov	eax,DWORD [4+esi]
242	adc	ecx,edx
243	mov	edx,DWORD [20+edi]
244	adc	ebp,0
245	; mul a[1]*b[5]
246	mul	edx
247	add	ebx,eax
248	mov	eax,DWORD [esi]
249	adc	ecx,edx
250	mov	edx,DWORD [24+edi]
251	adc	ebp,0
252	; mul a[0]*b[6]
253	mul	edx
254	add	ebx,eax
255	mov	eax,DWORD [20+esp]
256	adc	ecx,edx
257	mov	edx,DWORD [edi]
258	adc	ebp,0
259	mov	DWORD [24+eax],ebx
260	mov	eax,DWORD [28+esi]
261	; saved r[6]
262	; ################## Calculate word 7
263	xor	ebx,ebx
264	; mul a[7]*b[0]
265	mul	edx
266	add	ecx,eax
267	mov	eax,DWORD [24+esi]
268	adc	ebp,edx
269	mov	edx,DWORD [4+edi]
270	adc	ebx,0
271	; mul a[6]*b[1]
272	mul	edx
273	add	ecx,eax
274	mov	eax,DWORD [20+esi]
275	adc	ebp,edx
276	mov	edx,DWORD [8+edi]
277	adc	ebx,0
278	; mul a[5]*b[2]
279	mul	edx
280	add	ecx,eax
281	mov	eax,DWORD [16+esi]
282	adc	ebp,edx
283	mov	edx,DWORD [12+edi]
284	adc	ebx,0
285	; mul a[4]*b[3]
286	mul	edx
287	add	ecx,eax
288	mov	eax,DWORD [12+esi]
289	adc	ebp,edx
290	mov	edx,DWORD [16+edi]
291	adc	ebx,0
292	; mul a[3]*b[4]
293	mul	edx
294	add	ecx,eax
295	mov	eax,DWORD [8+esi]
296	adc	ebp,edx
297	mov	edx,DWORD [20+edi]
298	adc	ebx,0
299	; mul a[2]*b[5]
300	mul	edx
301	add	ecx,eax
302	mov	eax,DWORD [4+esi]
303	adc	ebp,edx
304	mov	edx,DWORD [24+edi]
305	adc	ebx,0
306	; mul a[1]*b[6]
307	mul	edx
308	add	ecx,eax
309	mov	eax,DWORD [esi]
310	adc	ebp,edx
311	mov	edx,DWORD [28+edi]
312	adc	ebx,0
313	; mul a[0]*b[7]
314	mul	edx
315	add	ecx,eax
316	mov	eax,DWORD [20+esp]
317	adc	ebp,edx
318	mov	edx,DWORD [4+edi]
319	adc	ebx,0
320	mov	DWORD [28+eax],ecx
321	mov	eax,DWORD [28+esi]
322	; saved r[7]
323	; ################## Calculate word 8
324	xor	ecx,ecx
325	; mul a[7]*b[1]
326	mul	edx
327	add	ebp,eax
328	mov	eax,DWORD [24+esi]
329	adc	ebx,edx
330	mov	edx,DWORD [8+edi]
331	adc	ecx,0
332	; mul a[6]*b[2]
333	mul	edx
334	add	ebp,eax
335	mov	eax,DWORD [20+esi]
336	adc	ebx,edx
337	mov	edx,DWORD [12+edi]
338	adc	ecx,0
339	; mul a[5]*b[3]
340	mul	edx
341	add	ebp,eax
342	mov	eax,DWORD [16+esi]
343	adc	ebx,edx
344	mov	edx,DWORD [16+edi]
345	adc	ecx,0
346	; mul a[4]*b[4]
347	mul	edx
348	add	ebp,eax
349	mov	eax,DWORD [12+esi]
350	adc	ebx,edx
351	mov	edx,DWORD [20+edi]
352	adc	ecx,0
353	; mul a[3]*b[5]
354	mul	edx
355	add	ebp,eax
356	mov	eax,DWORD [8+esi]
357	adc	ebx,edx
358	mov	edx,DWORD [24+edi]
359	adc	ecx,0
360	; mul a[2]*b[6]
361	mul	edx
362	add	ebp,eax
363	mov	eax,DWORD [4+esi]
364	adc	ebx,edx
365	mov	edx,DWORD [28+edi]
366	adc	ecx,0
367	; mul a[1]*b[7]
368	mul	edx
369	add	ebp,eax
370	mov	eax,DWORD [20+esp]
371	adc	ebx,edx
372	mov	edx,DWORD [8+edi]
373	adc	ecx,0
374	mov	DWORD [32+eax],ebp
375	mov	eax,DWORD [28+esi]
376	; saved r[8]
377	; ################## Calculate word 9
378	xor	ebp,ebp
379	; mul a[7]*b[2]
380	mul	edx
381	add	ebx,eax
382	mov	eax,DWORD [24+esi]
383	adc	ecx,edx
384	mov	edx,DWORD [12+edi]
385	adc	ebp,0
386	; mul a[6]*b[3]
387	mul	edx
388	add	ebx,eax
389	mov	eax,DWORD [20+esi]
390	adc	ecx,edx
391	mov	edx,DWORD [16+edi]
392	adc	ebp,0
393	; mul a[5]*b[4]
394	mul	edx
395	add	ebx,eax
396	mov	eax,DWORD [16+esi]
397	adc	ecx,edx
398	mov	edx,DWORD [20+edi]
399	adc	ebp,0
400	; mul a[4]*b[5]
401	mul	edx
402	add	ebx,eax
403	mov	eax,DWORD [12+esi]
404	adc	ecx,edx
405	mov	edx,DWORD [24+edi]
406	adc	ebp,0
407	; mul a[3]*b[6]
408	mul	edx
409	add	ebx,eax
410	mov	eax,DWORD [8+esi]
411	adc	ecx,edx
412	mov	edx,DWORD [28+edi]
413	adc	ebp,0
414	; mul a[2]*b[7]
415	mul	edx
416	add	ebx,eax
417	mov	eax,DWORD [20+esp]
418	adc	ecx,edx
419	mov	edx,DWORD [12+edi]
420	adc	ebp,0
421	mov	DWORD [36+eax],ebx
422	mov	eax,DWORD [28+esi]
423	; saved r[9]
424	; ################## Calculate word 10
425	xor	ebx,ebx
426	; mul a[7]*b[3]
427	mul	edx
428	add	ecx,eax
429	mov	eax,DWORD [24+esi]
430	adc	ebp,edx
431	mov	edx,DWORD [16+edi]
432	adc	ebx,0
433	; mul a[6]*b[4]
434	mul	edx
435	add	ecx,eax
436	mov	eax,DWORD [20+esi]
437	adc	ebp,edx
438	mov	edx,DWORD [20+edi]
439	adc	ebx,0
440	; mul a[5]*b[5]
441	mul	edx
442	add	ecx,eax
443	mov	eax,DWORD [16+esi]
444	adc	ebp,edx
445	mov	edx,DWORD [24+edi]
446	adc	ebx,0
447	; mul a[4]*b[6]
448	mul	edx
449	add	ecx,eax
450	mov	eax,DWORD [12+esi]
451	adc	ebp,edx
452	mov	edx,DWORD [28+edi]
453	adc	ebx,0
454	; mul a[3]*b[7]
455	mul	edx
456	add	ecx,eax
457	mov	eax,DWORD [20+esp]
458	adc	ebp,edx
459	mov	edx,DWORD [16+edi]
460	adc	ebx,0
461	mov	DWORD [40+eax],ecx
462	mov	eax,DWORD [28+esi]
463	; saved r[10]
464	; ################## Calculate word 11
465	xor	ecx,ecx
466	; mul a[7]*b[4]
467	mul	edx
468	add	ebp,eax
469	mov	eax,DWORD [24+esi]
470	adc	ebx,edx
471	mov	edx,DWORD [20+edi]
472	adc	ecx,0
473	; mul a[6]*b[5]
474	mul	edx
475	add	ebp,eax
476	mov	eax,DWORD [20+esi]
477	adc	ebx,edx
478	mov	edx,DWORD [24+edi]
479	adc	ecx,0
480	; mul a[5]*b[6]
481	mul	edx
482	add	ebp,eax
483	mov	eax,DWORD [16+esi]
484	adc	ebx,edx
485	mov	edx,DWORD [28+edi]
486	adc	ecx,0
487	; mul a[4]*b[7]
488	mul	edx
489	add	ebp,eax
490	mov	eax,DWORD [20+esp]
491	adc	ebx,edx
492	mov	edx,DWORD [20+edi]
493	adc	ecx,0
494	mov	DWORD [44+eax],ebp
495	mov	eax,DWORD [28+esi]
496	; saved r[11]
497	; ################## Calculate word 12
498	xor	ebp,ebp
499	; mul a[7]*b[5]
500	mul	edx
501	add	ebx,eax
502	mov	eax,DWORD [24+esi]
503	adc	ecx,edx
504	mov	edx,DWORD [24+edi]
505	adc	ebp,0
506	; mul a[6]*b[6]
507	mul	edx
508	add	ebx,eax
509	mov	eax,DWORD [20+esi]
510	adc	ecx,edx
511	mov	edx,DWORD [28+edi]
512	adc	ebp,0
513	; mul a[5]*b[7]
514	mul	edx
515	add	ebx,eax
516	mov	eax,DWORD [20+esp]
517	adc	ecx,edx
518	mov	edx,DWORD [24+edi]
519	adc	ebp,0
520	mov	DWORD [48+eax],ebx
521	mov	eax,DWORD [28+esi]
522	; saved r[12]
523	; ################## Calculate word 13
524	xor	ebx,ebx
525	; mul a[7]*b[6]
526	mul	edx
527	add	ecx,eax
528	mov	eax,DWORD [24+esi]
529	adc	ebp,edx
530	mov	edx,DWORD [28+edi]
531	adc	ebx,0
532	; mul a[6]*b[7]
533	mul	edx
534	add	ecx,eax
535	mov	eax,DWORD [20+esp]
536	adc	ebp,edx
537	mov	edx,DWORD [28+edi]
538	adc	ebx,0
539	mov	DWORD [52+eax],ecx
540	mov	eax,DWORD [28+esi]
541	; saved r[13]
542	; ################## Calculate word 14
543	xor	ecx,ecx
544	; mul a[7]*b[7]
545	mul	edx
546	add	ebp,eax
547	mov	eax,DWORD [20+esp]
548	adc	ebx,edx
549	adc	ecx,0
550	mov	DWORD [56+eax],ebp
551	; saved r[14]
552	; save r[15]
553	mov	DWORD [60+eax],ebx
554	pop	ebx
555	pop	ebp
556	pop	edi
557	pop	esi
558	ret
559global	_bn_mul_comba4
560align	16
561_bn_mul_comba4:
562L$_bn_mul_comba4_begin:
563	push	esi
564	mov	esi,DWORD [12+esp]
565	push	edi
566	mov	edi,DWORD [20+esp]
567	push	ebp
568	push	ebx
569	xor	ebx,ebx
570	mov	eax,DWORD [esi]
571	xor	ecx,ecx
572	mov	edx,DWORD [edi]
573	; ################## Calculate word 0
574	xor	ebp,ebp
575	; mul a[0]*b[0]
576	mul	edx
577	add	ebx,eax
578	mov	eax,DWORD [20+esp]
579	adc	ecx,edx
580	mov	edx,DWORD [edi]
581	adc	ebp,0
582	mov	DWORD [eax],ebx
583	mov	eax,DWORD [4+esi]
584	; saved r[0]
585	; ################## Calculate word 1
586	xor	ebx,ebx
587	; mul a[1]*b[0]
588	mul	edx
589	add	ecx,eax
590	mov	eax,DWORD [esi]
591	adc	ebp,edx
592	mov	edx,DWORD [4+edi]
593	adc	ebx,0
594	; mul a[0]*b[1]
595	mul	edx
596	add	ecx,eax
597	mov	eax,DWORD [20+esp]
598	adc	ebp,edx
599	mov	edx,DWORD [edi]
600	adc	ebx,0
601	mov	DWORD [4+eax],ecx
602	mov	eax,DWORD [8+esi]
603	; saved r[1]
604	; ################## Calculate word 2
605	xor	ecx,ecx
606	; mul a[2]*b[0]
607	mul	edx
608	add	ebp,eax
609	mov	eax,DWORD [4+esi]
610	adc	ebx,edx
611	mov	edx,DWORD [4+edi]
612	adc	ecx,0
613	; mul a[1]*b[1]
614	mul	edx
615	add	ebp,eax
616	mov	eax,DWORD [esi]
617	adc	ebx,edx
618	mov	edx,DWORD [8+edi]
619	adc	ecx,0
620	; mul a[0]*b[2]
621	mul	edx
622	add	ebp,eax
623	mov	eax,DWORD [20+esp]
624	adc	ebx,edx
625	mov	edx,DWORD [edi]
626	adc	ecx,0
627	mov	DWORD [8+eax],ebp
628	mov	eax,DWORD [12+esi]
629	; saved r[2]
630	; ################## Calculate word 3
631	xor	ebp,ebp
632	; mul a[3]*b[0]
633	mul	edx
634	add	ebx,eax
635	mov	eax,DWORD [8+esi]
636	adc	ecx,edx
637	mov	edx,DWORD [4+edi]
638	adc	ebp,0
639	; mul a[2]*b[1]
640	mul	edx
641	add	ebx,eax
642	mov	eax,DWORD [4+esi]
643	adc	ecx,edx
644	mov	edx,DWORD [8+edi]
645	adc	ebp,0
646	; mul a[1]*b[2]
647	mul	edx
648	add	ebx,eax
649	mov	eax,DWORD [esi]
650	adc	ecx,edx
651	mov	edx,DWORD [12+edi]
652	adc	ebp,0
653	; mul a[0]*b[3]
654	mul	edx
655	add	ebx,eax
656	mov	eax,DWORD [20+esp]
657	adc	ecx,edx
658	mov	edx,DWORD [4+edi]
659	adc	ebp,0
660	mov	DWORD [12+eax],ebx
661	mov	eax,DWORD [12+esi]
662	; saved r[3]
663	; ################## Calculate word 4
664	xor	ebx,ebx
665	; mul a[3]*b[1]
666	mul	edx
667	add	ecx,eax
668	mov	eax,DWORD [8+esi]
669	adc	ebp,edx
670	mov	edx,DWORD [8+edi]
671	adc	ebx,0
672	; mul a[2]*b[2]
673	mul	edx
674	add	ecx,eax
675	mov	eax,DWORD [4+esi]
676	adc	ebp,edx
677	mov	edx,DWORD [12+edi]
678	adc	ebx,0
679	; mul a[1]*b[3]
680	mul	edx
681	add	ecx,eax
682	mov	eax,DWORD [20+esp]
683	adc	ebp,edx
684	mov	edx,DWORD [8+edi]
685	adc	ebx,0
686	mov	DWORD [16+eax],ecx
687	mov	eax,DWORD [12+esi]
688	; saved r[4]
689	; ################## Calculate word 5
690	xor	ecx,ecx
691	; mul a[3]*b[2]
692	mul	edx
693	add	ebp,eax
694	mov	eax,DWORD [8+esi]
695	adc	ebx,edx
696	mov	edx,DWORD [12+edi]
697	adc	ecx,0
698	; mul a[2]*b[3]
699	mul	edx
700	add	ebp,eax
701	mov	eax,DWORD [20+esp]
702	adc	ebx,edx
703	mov	edx,DWORD [12+edi]
704	adc	ecx,0
705	mov	DWORD [20+eax],ebp
706	mov	eax,DWORD [12+esi]
707	; saved r[5]
708	; ################## Calculate word 6
709	xor	ebp,ebp
710	; mul a[3]*b[3]
711	mul	edx
712	add	ebx,eax
713	mov	eax,DWORD [20+esp]
714	adc	ecx,edx
715	adc	ebp,0
716	mov	DWORD [24+eax],ebx
717	; saved r[6]
718	; save r[7]
719	mov	DWORD [28+eax],ecx
720	pop	ebx
721	pop	ebp
722	pop	edi
723	pop	esi
724	ret
725global	_bn_sqr_comba8
726align	16
727_bn_sqr_comba8:
728L$_bn_sqr_comba8_begin:
729	push	esi
730	push	edi
731	push	ebp
732	push	ebx
733	mov	edi,DWORD [20+esp]
734	mov	esi,DWORD [24+esp]
735	xor	ebx,ebx
736	xor	ecx,ecx
737	mov	eax,DWORD [esi]
738	; ############### Calculate word 0
739	xor	ebp,ebp
740	; sqr a[0]*a[0]
741	mul	eax
742	add	ebx,eax
743	adc	ecx,edx
744	mov	edx,DWORD [esi]
745	adc	ebp,0
746	mov	DWORD [edi],ebx
747	mov	eax,DWORD [4+esi]
748	; saved r[0]
749	; ############### Calculate word 1
750	xor	ebx,ebx
751	; sqr a[1]*a[0]
752	mul	edx
753	add	eax,eax
754	adc	edx,edx
755	adc	ebx,0
756	add	ecx,eax
757	adc	ebp,edx
758	mov	eax,DWORD [8+esi]
759	adc	ebx,0
760	mov	DWORD [4+edi],ecx
761	mov	edx,DWORD [esi]
762	; saved r[1]
763	; ############### Calculate word 2
764	xor	ecx,ecx
765	; sqr a[2]*a[0]
766	mul	edx
767	add	eax,eax
768	adc	edx,edx
769	adc	ecx,0
770	add	ebp,eax
771	adc	ebx,edx
772	mov	eax,DWORD [4+esi]
773	adc	ecx,0
774	; sqr a[1]*a[1]
775	mul	eax
776	add	ebp,eax
777	adc	ebx,edx
778	mov	edx,DWORD [esi]
779	adc	ecx,0
780	mov	DWORD [8+edi],ebp
781	mov	eax,DWORD [12+esi]
782	; saved r[2]
783	; ############### Calculate word 3
784	xor	ebp,ebp
785	; sqr a[3]*a[0]
786	mul	edx
787	add	eax,eax
788	adc	edx,edx
789	adc	ebp,0
790	add	ebx,eax
791	adc	ecx,edx
792	mov	eax,DWORD [8+esi]
793	adc	ebp,0
794	mov	edx,DWORD [4+esi]
795	; sqr a[2]*a[1]
796	mul	edx
797	add	eax,eax
798	adc	edx,edx
799	adc	ebp,0
800	add	ebx,eax
801	adc	ecx,edx
802	mov	eax,DWORD [16+esi]
803	adc	ebp,0
804	mov	DWORD [12+edi],ebx
805	mov	edx,DWORD [esi]
806	; saved r[3]
807	; ############### Calculate word 4
808	xor	ebx,ebx
809	; sqr a[4]*a[0]
810	mul	edx
811	add	eax,eax
812	adc	edx,edx
813	adc	ebx,0
814	add	ecx,eax
815	adc	ebp,edx
816	mov	eax,DWORD [12+esi]
817	adc	ebx,0
818	mov	edx,DWORD [4+esi]
819	; sqr a[3]*a[1]
820	mul	edx
821	add	eax,eax
822	adc	edx,edx
823	adc	ebx,0
824	add	ecx,eax
825	adc	ebp,edx
826	mov	eax,DWORD [8+esi]
827	adc	ebx,0
828	; sqr a[2]*a[2]
829	mul	eax
830	add	ecx,eax
831	adc	ebp,edx
832	mov	edx,DWORD [esi]
833	adc	ebx,0
834	mov	DWORD [16+edi],ecx
835	mov	eax,DWORD [20+esi]
836	; saved r[4]
837	; ############### Calculate word 5
838	xor	ecx,ecx
839	; sqr a[5]*a[0]
840	mul	edx
841	add	eax,eax
842	adc	edx,edx
843	adc	ecx,0
844	add	ebp,eax
845	adc	ebx,edx
846	mov	eax,DWORD [16+esi]
847	adc	ecx,0
848	mov	edx,DWORD [4+esi]
849	; sqr a[4]*a[1]
850	mul	edx
851	add	eax,eax
852	adc	edx,edx
853	adc	ecx,0
854	add	ebp,eax
855	adc	ebx,edx
856	mov	eax,DWORD [12+esi]
857	adc	ecx,0
858	mov	edx,DWORD [8+esi]
859	; sqr a[3]*a[2]
860	mul	edx
861	add	eax,eax
862	adc	edx,edx
863	adc	ecx,0
864	add	ebp,eax
865	adc	ebx,edx
866	mov	eax,DWORD [24+esi]
867	adc	ecx,0
868	mov	DWORD [20+edi],ebp
869	mov	edx,DWORD [esi]
870	; saved r[5]
871	; ############### Calculate word 6
872	xor	ebp,ebp
873	; sqr a[6]*a[0]
874	mul	edx
875	add	eax,eax
876	adc	edx,edx
877	adc	ebp,0
878	add	ebx,eax
879	adc	ecx,edx
880	mov	eax,DWORD [20+esi]
881	adc	ebp,0
882	mov	edx,DWORD [4+esi]
883	; sqr a[5]*a[1]
884	mul	edx
885	add	eax,eax
886	adc	edx,edx
887	adc	ebp,0
888	add	ebx,eax
889	adc	ecx,edx
890	mov	eax,DWORD [16+esi]
891	adc	ebp,0
892	mov	edx,DWORD [8+esi]
893	; sqr a[4]*a[2]
894	mul	edx
895	add	eax,eax
896	adc	edx,edx
897	adc	ebp,0
898	add	ebx,eax
899	adc	ecx,edx
900	mov	eax,DWORD [12+esi]
901	adc	ebp,0
902	; sqr a[3]*a[3]
903	mul	eax
904	add	ebx,eax
905	adc	ecx,edx
906	mov	edx,DWORD [esi]
907	adc	ebp,0
908	mov	DWORD [24+edi],ebx
909	mov	eax,DWORD [28+esi]
910	; saved r[6]
911	; ############### Calculate word 7
912	xor	ebx,ebx
913	; sqr a[7]*a[0]
914	mul	edx
915	add	eax,eax
916	adc	edx,edx
917	adc	ebx,0
918	add	ecx,eax
919	adc	ebp,edx
920	mov	eax,DWORD [24+esi]
921	adc	ebx,0
922	mov	edx,DWORD [4+esi]
923	; sqr a[6]*a[1]
924	mul	edx
925	add	eax,eax
926	adc	edx,edx
927	adc	ebx,0
928	add	ecx,eax
929	adc	ebp,edx
930	mov	eax,DWORD [20+esi]
931	adc	ebx,0
932	mov	edx,DWORD [8+esi]
933	; sqr a[5]*a[2]
934	mul	edx
935	add	eax,eax
936	adc	edx,edx
937	adc	ebx,0
938	add	ecx,eax
939	adc	ebp,edx
940	mov	eax,DWORD [16+esi]
941	adc	ebx,0
942	mov	edx,DWORD [12+esi]
943	; sqr a[4]*a[3]
944	mul	edx
945	add	eax,eax
946	adc	edx,edx
947	adc	ebx,0
948	add	ecx,eax
949	adc	ebp,edx
950	mov	eax,DWORD [28+esi]
951	adc	ebx,0
952	mov	DWORD [28+edi],ecx
953	mov	edx,DWORD [4+esi]
954	; saved r[7]
955	; ############### Calculate word 8
956	xor	ecx,ecx
957	; sqr a[7]*a[1]
958	mul	edx
959	add	eax,eax
960	adc	edx,edx
961	adc	ecx,0
962	add	ebp,eax
963	adc	ebx,edx
964	mov	eax,DWORD [24+esi]
965	adc	ecx,0
966	mov	edx,DWORD [8+esi]
967	; sqr a[6]*a[2]
968	mul	edx
969	add	eax,eax
970	adc	edx,edx
971	adc	ecx,0
972	add	ebp,eax
973	adc	ebx,edx
974	mov	eax,DWORD [20+esi]
975	adc	ecx,0
976	mov	edx,DWORD [12+esi]
977	; sqr a[5]*a[3]
978	mul	edx
979	add	eax,eax
980	adc	edx,edx
981	adc	ecx,0
982	add	ebp,eax
983	adc	ebx,edx
984	mov	eax,DWORD [16+esi]
985	adc	ecx,0
986	; sqr a[4]*a[4]
987	mul	eax
988	add	ebp,eax
989	adc	ebx,edx
990	mov	edx,DWORD [8+esi]
991	adc	ecx,0
992	mov	DWORD [32+edi],ebp
993	mov	eax,DWORD [28+esi]
994	; saved r[8]
995	; ############### Calculate word 9
996	xor	ebp,ebp
997	; sqr a[7]*a[2]
998	mul	edx
999	add	eax,eax
1000	adc	edx,edx
1001	adc	ebp,0
1002	add	ebx,eax
1003	adc	ecx,edx
1004	mov	eax,DWORD [24+esi]
1005	adc	ebp,0
1006	mov	edx,DWORD [12+esi]
1007	; sqr a[6]*a[3]
1008	mul	edx
1009	add	eax,eax
1010	adc	edx,edx
1011	adc	ebp,0
1012	add	ebx,eax
1013	adc	ecx,edx
1014	mov	eax,DWORD [20+esi]
1015	adc	ebp,0
1016	mov	edx,DWORD [16+esi]
1017	; sqr a[5]*a[4]
1018	mul	edx
1019	add	eax,eax
1020	adc	edx,edx
1021	adc	ebp,0
1022	add	ebx,eax
1023	adc	ecx,edx
1024	mov	eax,DWORD [28+esi]
1025	adc	ebp,0
1026	mov	DWORD [36+edi],ebx
1027	mov	edx,DWORD [12+esi]
1028	; saved r[9]
1029	; ############### Calculate word 10
1030	xor	ebx,ebx
1031	; sqr a[7]*a[3]
1032	mul	edx
1033	add	eax,eax
1034	adc	edx,edx
1035	adc	ebx,0
1036	add	ecx,eax
1037	adc	ebp,edx
1038	mov	eax,DWORD [24+esi]
1039	adc	ebx,0
1040	mov	edx,DWORD [16+esi]
1041	; sqr a[6]*a[4]
1042	mul	edx
1043	add	eax,eax
1044	adc	edx,edx
1045	adc	ebx,0
1046	add	ecx,eax
1047	adc	ebp,edx
1048	mov	eax,DWORD [20+esi]
1049	adc	ebx,0
1050	; sqr a[5]*a[5]
1051	mul	eax
1052	add	ecx,eax
1053	adc	ebp,edx
1054	mov	edx,DWORD [16+esi]
1055	adc	ebx,0
1056	mov	DWORD [40+edi],ecx
1057	mov	eax,DWORD [28+esi]
1058	; saved r[10]
1059	; ############### Calculate word 11
1060	xor	ecx,ecx
1061	; sqr a[7]*a[4]
1062	mul	edx
1063	add	eax,eax
1064	adc	edx,edx
1065	adc	ecx,0
1066	add	ebp,eax
1067	adc	ebx,edx
1068	mov	eax,DWORD [24+esi]
1069	adc	ecx,0
1070	mov	edx,DWORD [20+esi]
1071	; sqr a[6]*a[5]
1072	mul	edx
1073	add	eax,eax
1074	adc	edx,edx
1075	adc	ecx,0
1076	add	ebp,eax
1077	adc	ebx,edx
1078	mov	eax,DWORD [28+esi]
1079	adc	ecx,0
1080	mov	DWORD [44+edi],ebp
1081	mov	edx,DWORD [20+esi]
1082	; saved r[11]
1083	; ############### Calculate word 12
1084	xor	ebp,ebp
1085	; sqr a[7]*a[5]
1086	mul	edx
1087	add	eax,eax
1088	adc	edx,edx
1089	adc	ebp,0
1090	add	ebx,eax
1091	adc	ecx,edx
1092	mov	eax,DWORD [24+esi]
1093	adc	ebp,0
1094	; sqr a[6]*a[6]
1095	mul	eax
1096	add	ebx,eax
1097	adc	ecx,edx
1098	mov	edx,DWORD [24+esi]
1099	adc	ebp,0
1100	mov	DWORD [48+edi],ebx
1101	mov	eax,DWORD [28+esi]
1102	; saved r[12]
1103	; ############### Calculate word 13
1104	xor	ebx,ebx
1105	; sqr a[7]*a[6]
1106	mul	edx
1107	add	eax,eax
1108	adc	edx,edx
1109	adc	ebx,0
1110	add	ecx,eax
1111	adc	ebp,edx
1112	mov	eax,DWORD [28+esi]
1113	adc	ebx,0
1114	mov	DWORD [52+edi],ecx
1115	; saved r[13]
1116	; ############### Calculate word 14
1117	xor	ecx,ecx
1118	; sqr a[7]*a[7]
1119	mul	eax
1120	add	ebp,eax
1121	adc	ebx,edx
1122	adc	ecx,0
1123	mov	DWORD [56+edi],ebp
1124	; saved r[14]
1125	mov	DWORD [60+edi],ebx
1126	pop	ebx
1127	pop	ebp
1128	pop	edi
1129	pop	esi
1130	ret
1131global	_bn_sqr_comba4
1132align	16
1133_bn_sqr_comba4:
1134L$_bn_sqr_comba4_begin:
1135	push	esi
1136	push	edi
1137	push	ebp
1138	push	ebx
1139	mov	edi,DWORD [20+esp]
1140	mov	esi,DWORD [24+esp]
1141	xor	ebx,ebx
1142	xor	ecx,ecx
1143	mov	eax,DWORD [esi]
1144	; ############### Calculate word 0
1145	xor	ebp,ebp
1146	; sqr a[0]*a[0]
1147	mul	eax
1148	add	ebx,eax
1149	adc	ecx,edx
1150	mov	edx,DWORD [esi]
1151	adc	ebp,0
1152	mov	DWORD [edi],ebx
1153	mov	eax,DWORD [4+esi]
1154	; saved r[0]
1155	; ############### Calculate word 1
1156	xor	ebx,ebx
1157	; sqr a[1]*a[0]
1158	mul	edx
1159	add	eax,eax
1160	adc	edx,edx
1161	adc	ebx,0
1162	add	ecx,eax
1163	adc	ebp,edx
1164	mov	eax,DWORD [8+esi]
1165	adc	ebx,0
1166	mov	DWORD [4+edi],ecx
1167	mov	edx,DWORD [esi]
1168	; saved r[1]
1169	; ############### Calculate word 2
1170	xor	ecx,ecx
1171	; sqr a[2]*a[0]
1172	mul	edx
1173	add	eax,eax
1174	adc	edx,edx
1175	adc	ecx,0
1176	add	ebp,eax
1177	adc	ebx,edx
1178	mov	eax,DWORD [4+esi]
1179	adc	ecx,0
1180	; sqr a[1]*a[1]
1181	mul	eax
1182	add	ebp,eax
1183	adc	ebx,edx
1184	mov	edx,DWORD [esi]
1185	adc	ecx,0
1186	mov	DWORD [8+edi],ebp
1187	mov	eax,DWORD [12+esi]
1188	; saved r[2]
1189	; ############### Calculate word 3
1190	xor	ebp,ebp
1191	; sqr a[3]*a[0]
1192	mul	edx
1193	add	eax,eax
1194	adc	edx,edx
1195	adc	ebp,0
1196	add	ebx,eax
1197	adc	ecx,edx
1198	mov	eax,DWORD [8+esi]
1199	adc	ebp,0
1200	mov	edx,DWORD [4+esi]
1201	; sqr a[2]*a[1]
1202	mul	edx
1203	add	eax,eax
1204	adc	edx,edx
1205	adc	ebp,0
1206	add	ebx,eax
1207	adc	ecx,edx
1208	mov	eax,DWORD [12+esi]
1209	adc	ebp,0
1210	mov	DWORD [12+edi],ebx
1211	mov	edx,DWORD [4+esi]
1212	; saved r[3]
1213	; ############### Calculate word 4
1214	xor	ebx,ebx
1215	; sqr a[3]*a[1]
1216	mul	edx
1217	add	eax,eax
1218	adc	edx,edx
1219	adc	ebx,0
1220	add	ecx,eax
1221	adc	ebp,edx
1222	mov	eax,DWORD [8+esi]
1223	adc	ebx,0
1224	; sqr a[2]*a[2]
1225	mul	eax
1226	add	ecx,eax
1227	adc	ebp,edx
1228	mov	edx,DWORD [8+esi]
1229	adc	ebx,0
1230	mov	DWORD [16+edi],ecx
1231	mov	eax,DWORD [12+esi]
1232	; saved r[4]
1233	; ############### Calculate word 5
1234	xor	ecx,ecx
1235	; sqr a[3]*a[2]
1236	mul	edx
1237	add	eax,eax
1238	adc	edx,edx
1239	adc	ecx,0
1240	add	ebp,eax
1241	adc	ebx,edx
1242	mov	eax,DWORD [12+esi]
1243	adc	ecx,0
1244	mov	DWORD [20+edi],ebp
1245	; saved r[5]
1246	; ############### Calculate word 6
1247	xor	ebp,ebp
1248	; sqr a[3]*a[3]
1249	mul	eax
1250	add	ebx,eax
1251	adc	ecx,edx
1252	adc	ebp,0
1253	mov	DWORD [24+edi],ebx
1254	; saved r[6]
1255	mov	DWORD [28+edi],ecx
1256	pop	ebx
1257	pop	ebp
1258	pop	edi
1259	pop	esi
1260	ret
1261