• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__,obj
8section	code	use32 class=code align=64
9%elifidn __OUTPUT_FORMAT__,win32
10$@feat.00 equ 1
11section	.text	code align=64
12%else
13section	.text	code
14%endif
15global	_bn_mul_comba8
16align	16
17_bn_mul_comba8:
18L$_bn_mul_comba8_begin:
19	push	esi
20	mov	esi,DWORD [12+esp]
21	push	edi
22	mov	edi,DWORD [20+esp]
23	push	ebp
24	push	ebx
25	xor	ebx,ebx
26	mov	eax,DWORD [esi]
27	xor	ecx,ecx
28	mov	edx,DWORD [edi]
29	; ################## Calculate word 0
30	xor	ebp,ebp
31	; mul a[0]*b[0]
32	mul	edx
33	add	ebx,eax
34	mov	eax,DWORD [20+esp]
35	adc	ecx,edx
36	mov	edx,DWORD [edi]
37	adc	ebp,0
38	mov	DWORD [eax],ebx
39	mov	eax,DWORD [4+esi]
40	; saved r[0]
41	; ################## Calculate word 1
42	xor	ebx,ebx
43	; mul a[1]*b[0]
44	mul	edx
45	add	ecx,eax
46	mov	eax,DWORD [esi]
47	adc	ebp,edx
48	mov	edx,DWORD [4+edi]
49	adc	ebx,0
50	; mul a[0]*b[1]
51	mul	edx
52	add	ecx,eax
53	mov	eax,DWORD [20+esp]
54	adc	ebp,edx
55	mov	edx,DWORD [edi]
56	adc	ebx,0
57	mov	DWORD [4+eax],ecx
58	mov	eax,DWORD [8+esi]
59	; saved r[1]
60	; ################## Calculate word 2
61	xor	ecx,ecx
62	; mul a[2]*b[0]
63	mul	edx
64	add	ebp,eax
65	mov	eax,DWORD [4+esi]
66	adc	ebx,edx
67	mov	edx,DWORD [4+edi]
68	adc	ecx,0
69	; mul a[1]*b[1]
70	mul	edx
71	add	ebp,eax
72	mov	eax,DWORD [esi]
73	adc	ebx,edx
74	mov	edx,DWORD [8+edi]
75	adc	ecx,0
76	; mul a[0]*b[2]
77	mul	edx
78	add	ebp,eax
79	mov	eax,DWORD [20+esp]
80	adc	ebx,edx
81	mov	edx,DWORD [edi]
82	adc	ecx,0
83	mov	DWORD [8+eax],ebp
84	mov	eax,DWORD [12+esi]
85	; saved r[2]
86	; ################## Calculate word 3
87	xor	ebp,ebp
88	; mul a[3]*b[0]
89	mul	edx
90	add	ebx,eax
91	mov	eax,DWORD [8+esi]
92	adc	ecx,edx
93	mov	edx,DWORD [4+edi]
94	adc	ebp,0
95	; mul a[2]*b[1]
96	mul	edx
97	add	ebx,eax
98	mov	eax,DWORD [4+esi]
99	adc	ecx,edx
100	mov	edx,DWORD [8+edi]
101	adc	ebp,0
102	; mul a[1]*b[2]
103	mul	edx
104	add	ebx,eax
105	mov	eax,DWORD [esi]
106	adc	ecx,edx
107	mov	edx,DWORD [12+edi]
108	adc	ebp,0
109	; mul a[0]*b[3]
110	mul	edx
111	add	ebx,eax
112	mov	eax,DWORD [20+esp]
113	adc	ecx,edx
114	mov	edx,DWORD [edi]
115	adc	ebp,0
116	mov	DWORD [12+eax],ebx
117	mov	eax,DWORD [16+esi]
118	; saved r[3]
119	; ################## Calculate word 4
120	xor	ebx,ebx
121	; mul a[4]*b[0]
122	mul	edx
123	add	ecx,eax
124	mov	eax,DWORD [12+esi]
125	adc	ebp,edx
126	mov	edx,DWORD [4+edi]
127	adc	ebx,0
128	; mul a[3]*b[1]
129	mul	edx
130	add	ecx,eax
131	mov	eax,DWORD [8+esi]
132	adc	ebp,edx
133	mov	edx,DWORD [8+edi]
134	adc	ebx,0
135	; mul a[2]*b[2]
136	mul	edx
137	add	ecx,eax
138	mov	eax,DWORD [4+esi]
139	adc	ebp,edx
140	mov	edx,DWORD [12+edi]
141	adc	ebx,0
142	; mul a[1]*b[3]
143	mul	edx
144	add	ecx,eax
145	mov	eax,DWORD [esi]
146	adc	ebp,edx
147	mov	edx,DWORD [16+edi]
148	adc	ebx,0
149	; mul a[0]*b[4]
150	mul	edx
151	add	ecx,eax
152	mov	eax,DWORD [20+esp]
153	adc	ebp,edx
154	mov	edx,DWORD [edi]
155	adc	ebx,0
156	mov	DWORD [16+eax],ecx
157	mov	eax,DWORD [20+esi]
158	; saved r[4]
159	; ################## Calculate word 5
160	xor	ecx,ecx
161	; mul a[5]*b[0]
162	mul	edx
163	add	ebp,eax
164	mov	eax,DWORD [16+esi]
165	adc	ebx,edx
166	mov	edx,DWORD [4+edi]
167	adc	ecx,0
168	; mul a[4]*b[1]
169	mul	edx
170	add	ebp,eax
171	mov	eax,DWORD [12+esi]
172	adc	ebx,edx
173	mov	edx,DWORD [8+edi]
174	adc	ecx,0
175	; mul a[3]*b[2]
176	mul	edx
177	add	ebp,eax
178	mov	eax,DWORD [8+esi]
179	adc	ebx,edx
180	mov	edx,DWORD [12+edi]
181	adc	ecx,0
182	; mul a[2]*b[3]
183	mul	edx
184	add	ebp,eax
185	mov	eax,DWORD [4+esi]
186	adc	ebx,edx
187	mov	edx,DWORD [16+edi]
188	adc	ecx,0
189	; mul a[1]*b[4]
190	mul	edx
191	add	ebp,eax
192	mov	eax,DWORD [esi]
193	adc	ebx,edx
194	mov	edx,DWORD [20+edi]
195	adc	ecx,0
196	; mul a[0]*b[5]
197	mul	edx
198	add	ebp,eax
199	mov	eax,DWORD [20+esp]
200	adc	ebx,edx
201	mov	edx,DWORD [edi]
202	adc	ecx,0
203	mov	DWORD [20+eax],ebp
204	mov	eax,DWORD [24+esi]
205	; saved r[5]
206	; ################## Calculate word 6
207	xor	ebp,ebp
208	; mul a[6]*b[0]
209	mul	edx
210	add	ebx,eax
211	mov	eax,DWORD [20+esi]
212	adc	ecx,edx
213	mov	edx,DWORD [4+edi]
214	adc	ebp,0
215	; mul a[5]*b[1]
216	mul	edx
217	add	ebx,eax
218	mov	eax,DWORD [16+esi]
219	adc	ecx,edx
220	mov	edx,DWORD [8+edi]
221	adc	ebp,0
222	; mul a[4]*b[2]
223	mul	edx
224	add	ebx,eax
225	mov	eax,DWORD [12+esi]
226	adc	ecx,edx
227	mov	edx,DWORD [12+edi]
228	adc	ebp,0
229	; mul a[3]*b[3]
230	mul	edx
231	add	ebx,eax
232	mov	eax,DWORD [8+esi]
233	adc	ecx,edx
234	mov	edx,DWORD [16+edi]
235	adc	ebp,0
236	; mul a[2]*b[4]
237	mul	edx
238	add	ebx,eax
239	mov	eax,DWORD [4+esi]
240	adc	ecx,edx
241	mov	edx,DWORD [20+edi]
242	adc	ebp,0
243	; mul a[1]*b[5]
244	mul	edx
245	add	ebx,eax
246	mov	eax,DWORD [esi]
247	adc	ecx,edx
248	mov	edx,DWORD [24+edi]
249	adc	ebp,0
250	; mul a[0]*b[6]
251	mul	edx
252	add	ebx,eax
253	mov	eax,DWORD [20+esp]
254	adc	ecx,edx
255	mov	edx,DWORD [edi]
256	adc	ebp,0
257	mov	DWORD [24+eax],ebx
258	mov	eax,DWORD [28+esi]
259	; saved r[6]
260	; ################## Calculate word 7
261	xor	ebx,ebx
262	; mul a[7]*b[0]
263	mul	edx
264	add	ecx,eax
265	mov	eax,DWORD [24+esi]
266	adc	ebp,edx
267	mov	edx,DWORD [4+edi]
268	adc	ebx,0
269	; mul a[6]*b[1]
270	mul	edx
271	add	ecx,eax
272	mov	eax,DWORD [20+esi]
273	adc	ebp,edx
274	mov	edx,DWORD [8+edi]
275	adc	ebx,0
276	; mul a[5]*b[2]
277	mul	edx
278	add	ecx,eax
279	mov	eax,DWORD [16+esi]
280	adc	ebp,edx
281	mov	edx,DWORD [12+edi]
282	adc	ebx,0
283	; mul a[4]*b[3]
284	mul	edx
285	add	ecx,eax
286	mov	eax,DWORD [12+esi]
287	adc	ebp,edx
288	mov	edx,DWORD [16+edi]
289	adc	ebx,0
290	; mul a[3]*b[4]
291	mul	edx
292	add	ecx,eax
293	mov	eax,DWORD [8+esi]
294	adc	ebp,edx
295	mov	edx,DWORD [20+edi]
296	adc	ebx,0
297	; mul a[2]*b[5]
298	mul	edx
299	add	ecx,eax
300	mov	eax,DWORD [4+esi]
301	adc	ebp,edx
302	mov	edx,DWORD [24+edi]
303	adc	ebx,0
304	; mul a[1]*b[6]
305	mul	edx
306	add	ecx,eax
307	mov	eax,DWORD [esi]
308	adc	ebp,edx
309	mov	edx,DWORD [28+edi]
310	adc	ebx,0
311	; mul a[0]*b[7]
312	mul	edx
313	add	ecx,eax
314	mov	eax,DWORD [20+esp]
315	adc	ebp,edx
316	mov	edx,DWORD [4+edi]
317	adc	ebx,0
318	mov	DWORD [28+eax],ecx
319	mov	eax,DWORD [28+esi]
320	; saved r[7]
321	; ################## Calculate word 8
322	xor	ecx,ecx
323	; mul a[7]*b[1]
324	mul	edx
325	add	ebp,eax
326	mov	eax,DWORD [24+esi]
327	adc	ebx,edx
328	mov	edx,DWORD [8+edi]
329	adc	ecx,0
330	; mul a[6]*b[2]
331	mul	edx
332	add	ebp,eax
333	mov	eax,DWORD [20+esi]
334	adc	ebx,edx
335	mov	edx,DWORD [12+edi]
336	adc	ecx,0
337	; mul a[5]*b[3]
338	mul	edx
339	add	ebp,eax
340	mov	eax,DWORD [16+esi]
341	adc	ebx,edx
342	mov	edx,DWORD [16+edi]
343	adc	ecx,0
344	; mul a[4]*b[4]
345	mul	edx
346	add	ebp,eax
347	mov	eax,DWORD [12+esi]
348	adc	ebx,edx
349	mov	edx,DWORD [20+edi]
350	adc	ecx,0
351	; mul a[3]*b[5]
352	mul	edx
353	add	ebp,eax
354	mov	eax,DWORD [8+esi]
355	adc	ebx,edx
356	mov	edx,DWORD [24+edi]
357	adc	ecx,0
358	; mul a[2]*b[6]
359	mul	edx
360	add	ebp,eax
361	mov	eax,DWORD [4+esi]
362	adc	ebx,edx
363	mov	edx,DWORD [28+edi]
364	adc	ecx,0
365	; mul a[1]*b[7]
366	mul	edx
367	add	ebp,eax
368	mov	eax,DWORD [20+esp]
369	adc	ebx,edx
370	mov	edx,DWORD [8+edi]
371	adc	ecx,0
372	mov	DWORD [32+eax],ebp
373	mov	eax,DWORD [28+esi]
374	; saved r[8]
375	; ################## Calculate word 9
376	xor	ebp,ebp
377	; mul a[7]*b[2]
378	mul	edx
379	add	ebx,eax
380	mov	eax,DWORD [24+esi]
381	adc	ecx,edx
382	mov	edx,DWORD [12+edi]
383	adc	ebp,0
384	; mul a[6]*b[3]
385	mul	edx
386	add	ebx,eax
387	mov	eax,DWORD [20+esi]
388	adc	ecx,edx
389	mov	edx,DWORD [16+edi]
390	adc	ebp,0
391	; mul a[5]*b[4]
392	mul	edx
393	add	ebx,eax
394	mov	eax,DWORD [16+esi]
395	adc	ecx,edx
396	mov	edx,DWORD [20+edi]
397	adc	ebp,0
398	; mul a[4]*b[5]
399	mul	edx
400	add	ebx,eax
401	mov	eax,DWORD [12+esi]
402	adc	ecx,edx
403	mov	edx,DWORD [24+edi]
404	adc	ebp,0
405	; mul a[3]*b[6]
406	mul	edx
407	add	ebx,eax
408	mov	eax,DWORD [8+esi]
409	adc	ecx,edx
410	mov	edx,DWORD [28+edi]
411	adc	ebp,0
412	; mul a[2]*b[7]
413	mul	edx
414	add	ebx,eax
415	mov	eax,DWORD [20+esp]
416	adc	ecx,edx
417	mov	edx,DWORD [12+edi]
418	adc	ebp,0
419	mov	DWORD [36+eax],ebx
420	mov	eax,DWORD [28+esi]
421	; saved r[9]
422	; ################## Calculate word 10
423	xor	ebx,ebx
424	; mul a[7]*b[3]
425	mul	edx
426	add	ecx,eax
427	mov	eax,DWORD [24+esi]
428	adc	ebp,edx
429	mov	edx,DWORD [16+edi]
430	adc	ebx,0
431	; mul a[6]*b[4]
432	mul	edx
433	add	ecx,eax
434	mov	eax,DWORD [20+esi]
435	adc	ebp,edx
436	mov	edx,DWORD [20+edi]
437	adc	ebx,0
438	; mul a[5]*b[5]
439	mul	edx
440	add	ecx,eax
441	mov	eax,DWORD [16+esi]
442	adc	ebp,edx
443	mov	edx,DWORD [24+edi]
444	adc	ebx,0
445	; mul a[4]*b[6]
446	mul	edx
447	add	ecx,eax
448	mov	eax,DWORD [12+esi]
449	adc	ebp,edx
450	mov	edx,DWORD [28+edi]
451	adc	ebx,0
452	; mul a[3]*b[7]
453	mul	edx
454	add	ecx,eax
455	mov	eax,DWORD [20+esp]
456	adc	ebp,edx
457	mov	edx,DWORD [16+edi]
458	adc	ebx,0
459	mov	DWORD [40+eax],ecx
460	mov	eax,DWORD [28+esi]
461	; saved r[10]
462	; ################## Calculate word 11
463	xor	ecx,ecx
464	; mul a[7]*b[4]
465	mul	edx
466	add	ebp,eax
467	mov	eax,DWORD [24+esi]
468	adc	ebx,edx
469	mov	edx,DWORD [20+edi]
470	adc	ecx,0
471	; mul a[6]*b[5]
472	mul	edx
473	add	ebp,eax
474	mov	eax,DWORD [20+esi]
475	adc	ebx,edx
476	mov	edx,DWORD [24+edi]
477	adc	ecx,0
478	; mul a[5]*b[6]
479	mul	edx
480	add	ebp,eax
481	mov	eax,DWORD [16+esi]
482	adc	ebx,edx
483	mov	edx,DWORD [28+edi]
484	adc	ecx,0
485	; mul a[4]*b[7]
486	mul	edx
487	add	ebp,eax
488	mov	eax,DWORD [20+esp]
489	adc	ebx,edx
490	mov	edx,DWORD [20+edi]
491	adc	ecx,0
492	mov	DWORD [44+eax],ebp
493	mov	eax,DWORD [28+esi]
494	; saved r[11]
495	; ################## Calculate word 12
496	xor	ebp,ebp
497	; mul a[7]*b[5]
498	mul	edx
499	add	ebx,eax
500	mov	eax,DWORD [24+esi]
501	adc	ecx,edx
502	mov	edx,DWORD [24+edi]
503	adc	ebp,0
504	; mul a[6]*b[6]
505	mul	edx
506	add	ebx,eax
507	mov	eax,DWORD [20+esi]
508	adc	ecx,edx
509	mov	edx,DWORD [28+edi]
510	adc	ebp,0
511	; mul a[5]*b[7]
512	mul	edx
513	add	ebx,eax
514	mov	eax,DWORD [20+esp]
515	adc	ecx,edx
516	mov	edx,DWORD [24+edi]
517	adc	ebp,0
518	mov	DWORD [48+eax],ebx
519	mov	eax,DWORD [28+esi]
520	; saved r[12]
521	; ################## Calculate word 13
522	xor	ebx,ebx
523	; mul a[7]*b[6]
524	mul	edx
525	add	ecx,eax
526	mov	eax,DWORD [24+esi]
527	adc	ebp,edx
528	mov	edx,DWORD [28+edi]
529	adc	ebx,0
530	; mul a[6]*b[7]
531	mul	edx
532	add	ecx,eax
533	mov	eax,DWORD [20+esp]
534	adc	ebp,edx
535	mov	edx,DWORD [28+edi]
536	adc	ebx,0
537	mov	DWORD [52+eax],ecx
538	mov	eax,DWORD [28+esi]
539	; saved r[13]
540	; ################## Calculate word 14
541	xor	ecx,ecx
542	; mul a[7]*b[7]
543	mul	edx
544	add	ebp,eax
545	mov	eax,DWORD [20+esp]
546	adc	ebx,edx
547	adc	ecx,0
548	mov	DWORD [56+eax],ebp
549	; saved r[14]
550	; save r[15]
551	mov	DWORD [60+eax],ebx
552	pop	ebx
553	pop	ebp
554	pop	edi
555	pop	esi
556	ret
557global	_bn_mul_comba4
558align	16
559_bn_mul_comba4:
560L$_bn_mul_comba4_begin:
561	push	esi
562	mov	esi,DWORD [12+esp]
563	push	edi
564	mov	edi,DWORD [20+esp]
565	push	ebp
566	push	ebx
567	xor	ebx,ebx
568	mov	eax,DWORD [esi]
569	xor	ecx,ecx
570	mov	edx,DWORD [edi]
571	; ################## Calculate word 0
572	xor	ebp,ebp
573	; mul a[0]*b[0]
574	mul	edx
575	add	ebx,eax
576	mov	eax,DWORD [20+esp]
577	adc	ecx,edx
578	mov	edx,DWORD [edi]
579	adc	ebp,0
580	mov	DWORD [eax],ebx
581	mov	eax,DWORD [4+esi]
582	; saved r[0]
583	; ################## Calculate word 1
584	xor	ebx,ebx
585	; mul a[1]*b[0]
586	mul	edx
587	add	ecx,eax
588	mov	eax,DWORD [esi]
589	adc	ebp,edx
590	mov	edx,DWORD [4+edi]
591	adc	ebx,0
592	; mul a[0]*b[1]
593	mul	edx
594	add	ecx,eax
595	mov	eax,DWORD [20+esp]
596	adc	ebp,edx
597	mov	edx,DWORD [edi]
598	adc	ebx,0
599	mov	DWORD [4+eax],ecx
600	mov	eax,DWORD [8+esi]
601	; saved r[1]
602	; ################## Calculate word 2
603	xor	ecx,ecx
604	; mul a[2]*b[0]
605	mul	edx
606	add	ebp,eax
607	mov	eax,DWORD [4+esi]
608	adc	ebx,edx
609	mov	edx,DWORD [4+edi]
610	adc	ecx,0
611	; mul a[1]*b[1]
612	mul	edx
613	add	ebp,eax
614	mov	eax,DWORD [esi]
615	adc	ebx,edx
616	mov	edx,DWORD [8+edi]
617	adc	ecx,0
618	; mul a[0]*b[2]
619	mul	edx
620	add	ebp,eax
621	mov	eax,DWORD [20+esp]
622	adc	ebx,edx
623	mov	edx,DWORD [edi]
624	adc	ecx,0
625	mov	DWORD [8+eax],ebp
626	mov	eax,DWORD [12+esi]
627	; saved r[2]
628	; ################## Calculate word 3
629	xor	ebp,ebp
630	; mul a[3]*b[0]
631	mul	edx
632	add	ebx,eax
633	mov	eax,DWORD [8+esi]
634	adc	ecx,edx
635	mov	edx,DWORD [4+edi]
636	adc	ebp,0
637	; mul a[2]*b[1]
638	mul	edx
639	add	ebx,eax
640	mov	eax,DWORD [4+esi]
641	adc	ecx,edx
642	mov	edx,DWORD [8+edi]
643	adc	ebp,0
644	; mul a[1]*b[2]
645	mul	edx
646	add	ebx,eax
647	mov	eax,DWORD [esi]
648	adc	ecx,edx
649	mov	edx,DWORD [12+edi]
650	adc	ebp,0
651	; mul a[0]*b[3]
652	mul	edx
653	add	ebx,eax
654	mov	eax,DWORD [20+esp]
655	adc	ecx,edx
656	mov	edx,DWORD [4+edi]
657	adc	ebp,0
658	mov	DWORD [12+eax],ebx
659	mov	eax,DWORD [12+esi]
660	; saved r[3]
661	; ################## Calculate word 4
662	xor	ebx,ebx
663	; mul a[3]*b[1]
664	mul	edx
665	add	ecx,eax
666	mov	eax,DWORD [8+esi]
667	adc	ebp,edx
668	mov	edx,DWORD [8+edi]
669	adc	ebx,0
670	; mul a[2]*b[2]
671	mul	edx
672	add	ecx,eax
673	mov	eax,DWORD [4+esi]
674	adc	ebp,edx
675	mov	edx,DWORD [12+edi]
676	adc	ebx,0
677	; mul a[1]*b[3]
678	mul	edx
679	add	ecx,eax
680	mov	eax,DWORD [20+esp]
681	adc	ebp,edx
682	mov	edx,DWORD [8+edi]
683	adc	ebx,0
684	mov	DWORD [16+eax],ecx
685	mov	eax,DWORD [12+esi]
686	; saved r[4]
687	; ################## Calculate word 5
688	xor	ecx,ecx
689	; mul a[3]*b[2]
690	mul	edx
691	add	ebp,eax
692	mov	eax,DWORD [8+esi]
693	adc	ebx,edx
694	mov	edx,DWORD [12+edi]
695	adc	ecx,0
696	; mul a[2]*b[3]
697	mul	edx
698	add	ebp,eax
699	mov	eax,DWORD [20+esp]
700	adc	ebx,edx
701	mov	edx,DWORD [12+edi]
702	adc	ecx,0
703	mov	DWORD [20+eax],ebp
704	mov	eax,DWORD [12+esi]
705	; saved r[5]
706	; ################## Calculate word 6
707	xor	ebp,ebp
708	; mul a[3]*b[3]
709	mul	edx
710	add	ebx,eax
711	mov	eax,DWORD [20+esp]
712	adc	ecx,edx
713	adc	ebp,0
714	mov	DWORD [24+eax],ebx
715	; saved r[6]
716	; save r[7]
717	mov	DWORD [28+eax],ecx
718	pop	ebx
719	pop	ebp
720	pop	edi
721	pop	esi
722	ret
723global	_bn_sqr_comba8
724align	16
725_bn_sqr_comba8:
726L$_bn_sqr_comba8_begin:
727	push	esi
728	push	edi
729	push	ebp
730	push	ebx
731	mov	edi,DWORD [20+esp]
732	mov	esi,DWORD [24+esp]
733	xor	ebx,ebx
734	xor	ecx,ecx
735	mov	eax,DWORD [esi]
736	; ############### Calculate word 0
737	xor	ebp,ebp
738	; sqr a[0]*a[0]
739	mul	eax
740	add	ebx,eax
741	adc	ecx,edx
742	mov	edx,DWORD [esi]
743	adc	ebp,0
744	mov	DWORD [edi],ebx
745	mov	eax,DWORD [4+esi]
746	; saved r[0]
747	; ############### Calculate word 1
748	xor	ebx,ebx
749	; sqr a[1]*a[0]
750	mul	edx
751	add	eax,eax
752	adc	edx,edx
753	adc	ebx,0
754	add	ecx,eax
755	adc	ebp,edx
756	mov	eax,DWORD [8+esi]
757	adc	ebx,0
758	mov	DWORD [4+edi],ecx
759	mov	edx,DWORD [esi]
760	; saved r[1]
761	; ############### Calculate word 2
762	xor	ecx,ecx
763	; sqr a[2]*a[0]
764	mul	edx
765	add	eax,eax
766	adc	edx,edx
767	adc	ecx,0
768	add	ebp,eax
769	adc	ebx,edx
770	mov	eax,DWORD [4+esi]
771	adc	ecx,0
772	; sqr a[1]*a[1]
773	mul	eax
774	add	ebp,eax
775	adc	ebx,edx
776	mov	edx,DWORD [esi]
777	adc	ecx,0
778	mov	DWORD [8+edi],ebp
779	mov	eax,DWORD [12+esi]
780	; saved r[2]
781	; ############### Calculate word 3
782	xor	ebp,ebp
783	; sqr a[3]*a[0]
784	mul	edx
785	add	eax,eax
786	adc	edx,edx
787	adc	ebp,0
788	add	ebx,eax
789	adc	ecx,edx
790	mov	eax,DWORD [8+esi]
791	adc	ebp,0
792	mov	edx,DWORD [4+esi]
793	; sqr a[2]*a[1]
794	mul	edx
795	add	eax,eax
796	adc	edx,edx
797	adc	ebp,0
798	add	ebx,eax
799	adc	ecx,edx
800	mov	eax,DWORD [16+esi]
801	adc	ebp,0
802	mov	DWORD [12+edi],ebx
803	mov	edx,DWORD [esi]
804	; saved r[3]
805	; ############### Calculate word 4
806	xor	ebx,ebx
807	; sqr a[4]*a[0]
808	mul	edx
809	add	eax,eax
810	adc	edx,edx
811	adc	ebx,0
812	add	ecx,eax
813	adc	ebp,edx
814	mov	eax,DWORD [12+esi]
815	adc	ebx,0
816	mov	edx,DWORD [4+esi]
817	; sqr a[3]*a[1]
818	mul	edx
819	add	eax,eax
820	adc	edx,edx
821	adc	ebx,0
822	add	ecx,eax
823	adc	ebp,edx
824	mov	eax,DWORD [8+esi]
825	adc	ebx,0
826	; sqr a[2]*a[2]
827	mul	eax
828	add	ecx,eax
829	adc	ebp,edx
830	mov	edx,DWORD [esi]
831	adc	ebx,0
832	mov	DWORD [16+edi],ecx
833	mov	eax,DWORD [20+esi]
834	; saved r[4]
835	; ############### Calculate word 5
836	xor	ecx,ecx
837	; sqr a[5]*a[0]
838	mul	edx
839	add	eax,eax
840	adc	edx,edx
841	adc	ecx,0
842	add	ebp,eax
843	adc	ebx,edx
844	mov	eax,DWORD [16+esi]
845	adc	ecx,0
846	mov	edx,DWORD [4+esi]
847	; sqr a[4]*a[1]
848	mul	edx
849	add	eax,eax
850	adc	edx,edx
851	adc	ecx,0
852	add	ebp,eax
853	adc	ebx,edx
854	mov	eax,DWORD [12+esi]
855	adc	ecx,0
856	mov	edx,DWORD [8+esi]
857	; sqr a[3]*a[2]
858	mul	edx
859	add	eax,eax
860	adc	edx,edx
861	adc	ecx,0
862	add	ebp,eax
863	adc	ebx,edx
864	mov	eax,DWORD [24+esi]
865	adc	ecx,0
866	mov	DWORD [20+edi],ebp
867	mov	edx,DWORD [esi]
868	; saved r[5]
869	; ############### Calculate word 6
870	xor	ebp,ebp
871	; sqr a[6]*a[0]
872	mul	edx
873	add	eax,eax
874	adc	edx,edx
875	adc	ebp,0
876	add	ebx,eax
877	adc	ecx,edx
878	mov	eax,DWORD [20+esi]
879	adc	ebp,0
880	mov	edx,DWORD [4+esi]
881	; sqr a[5]*a[1]
882	mul	edx
883	add	eax,eax
884	adc	edx,edx
885	adc	ebp,0
886	add	ebx,eax
887	adc	ecx,edx
888	mov	eax,DWORD [16+esi]
889	adc	ebp,0
890	mov	edx,DWORD [8+esi]
891	; sqr a[4]*a[2]
892	mul	edx
893	add	eax,eax
894	adc	edx,edx
895	adc	ebp,0
896	add	ebx,eax
897	adc	ecx,edx
898	mov	eax,DWORD [12+esi]
899	adc	ebp,0
900	; sqr a[3]*a[3]
901	mul	eax
902	add	ebx,eax
903	adc	ecx,edx
904	mov	edx,DWORD [esi]
905	adc	ebp,0
906	mov	DWORD [24+edi],ebx
907	mov	eax,DWORD [28+esi]
908	; saved r[6]
909	; ############### Calculate word 7
910	xor	ebx,ebx
911	; sqr a[7]*a[0]
912	mul	edx
913	add	eax,eax
914	adc	edx,edx
915	adc	ebx,0
916	add	ecx,eax
917	adc	ebp,edx
918	mov	eax,DWORD [24+esi]
919	adc	ebx,0
920	mov	edx,DWORD [4+esi]
921	; sqr a[6]*a[1]
922	mul	edx
923	add	eax,eax
924	adc	edx,edx
925	adc	ebx,0
926	add	ecx,eax
927	adc	ebp,edx
928	mov	eax,DWORD [20+esi]
929	adc	ebx,0
930	mov	edx,DWORD [8+esi]
931	; sqr a[5]*a[2]
932	mul	edx
933	add	eax,eax
934	adc	edx,edx
935	adc	ebx,0
936	add	ecx,eax
937	adc	ebp,edx
938	mov	eax,DWORD [16+esi]
939	adc	ebx,0
940	mov	edx,DWORD [12+esi]
941	; sqr a[4]*a[3]
942	mul	edx
943	add	eax,eax
944	adc	edx,edx
945	adc	ebx,0
946	add	ecx,eax
947	adc	ebp,edx
948	mov	eax,DWORD [28+esi]
949	adc	ebx,0
950	mov	DWORD [28+edi],ecx
951	mov	edx,DWORD [4+esi]
952	; saved r[7]
953	; ############### Calculate word 8
954	xor	ecx,ecx
955	; sqr a[7]*a[1]
956	mul	edx
957	add	eax,eax
958	adc	edx,edx
959	adc	ecx,0
960	add	ebp,eax
961	adc	ebx,edx
962	mov	eax,DWORD [24+esi]
963	adc	ecx,0
964	mov	edx,DWORD [8+esi]
965	; sqr a[6]*a[2]
966	mul	edx
967	add	eax,eax
968	adc	edx,edx
969	adc	ecx,0
970	add	ebp,eax
971	adc	ebx,edx
972	mov	eax,DWORD [20+esi]
973	adc	ecx,0
974	mov	edx,DWORD [12+esi]
975	; sqr a[5]*a[3]
976	mul	edx
977	add	eax,eax
978	adc	edx,edx
979	adc	ecx,0
980	add	ebp,eax
981	adc	ebx,edx
982	mov	eax,DWORD [16+esi]
983	adc	ecx,0
984	; sqr a[4]*a[4]
985	mul	eax
986	add	ebp,eax
987	adc	ebx,edx
988	mov	edx,DWORD [8+esi]
989	adc	ecx,0
990	mov	DWORD [32+edi],ebp
991	mov	eax,DWORD [28+esi]
992	; saved r[8]
993	; ############### Calculate word 9
994	xor	ebp,ebp
995	; sqr a[7]*a[2]
996	mul	edx
997	add	eax,eax
998	adc	edx,edx
999	adc	ebp,0
1000	add	ebx,eax
1001	adc	ecx,edx
1002	mov	eax,DWORD [24+esi]
1003	adc	ebp,0
1004	mov	edx,DWORD [12+esi]
1005	; sqr a[6]*a[3]
1006	mul	edx
1007	add	eax,eax
1008	adc	edx,edx
1009	adc	ebp,0
1010	add	ebx,eax
1011	adc	ecx,edx
1012	mov	eax,DWORD [20+esi]
1013	adc	ebp,0
1014	mov	edx,DWORD [16+esi]
1015	; sqr a[5]*a[4]
1016	mul	edx
1017	add	eax,eax
1018	adc	edx,edx
1019	adc	ebp,0
1020	add	ebx,eax
1021	adc	ecx,edx
1022	mov	eax,DWORD [28+esi]
1023	adc	ebp,0
1024	mov	DWORD [36+edi],ebx
1025	mov	edx,DWORD [12+esi]
1026	; saved r[9]
1027	; ############### Calculate word 10
1028	xor	ebx,ebx
1029	; sqr a[7]*a[3]
1030	mul	edx
1031	add	eax,eax
1032	adc	edx,edx
1033	adc	ebx,0
1034	add	ecx,eax
1035	adc	ebp,edx
1036	mov	eax,DWORD [24+esi]
1037	adc	ebx,0
1038	mov	edx,DWORD [16+esi]
1039	; sqr a[6]*a[4]
1040	mul	edx
1041	add	eax,eax
1042	adc	edx,edx
1043	adc	ebx,0
1044	add	ecx,eax
1045	adc	ebp,edx
1046	mov	eax,DWORD [20+esi]
1047	adc	ebx,0
1048	; sqr a[5]*a[5]
1049	mul	eax
1050	add	ecx,eax
1051	adc	ebp,edx
1052	mov	edx,DWORD [16+esi]
1053	adc	ebx,0
1054	mov	DWORD [40+edi],ecx
1055	mov	eax,DWORD [28+esi]
1056	; saved r[10]
1057	; ############### Calculate word 11
1058	xor	ecx,ecx
1059	; sqr a[7]*a[4]
1060	mul	edx
1061	add	eax,eax
1062	adc	edx,edx
1063	adc	ecx,0
1064	add	ebp,eax
1065	adc	ebx,edx
1066	mov	eax,DWORD [24+esi]
1067	adc	ecx,0
1068	mov	edx,DWORD [20+esi]
1069	; sqr a[6]*a[5]
1070	mul	edx
1071	add	eax,eax
1072	adc	edx,edx
1073	adc	ecx,0
1074	add	ebp,eax
1075	adc	ebx,edx
1076	mov	eax,DWORD [28+esi]
1077	adc	ecx,0
1078	mov	DWORD [44+edi],ebp
1079	mov	edx,DWORD [20+esi]
1080	; saved r[11]
1081	; ############### Calculate word 12
1082	xor	ebp,ebp
1083	; sqr a[7]*a[5]
1084	mul	edx
1085	add	eax,eax
1086	adc	edx,edx
1087	adc	ebp,0
1088	add	ebx,eax
1089	adc	ecx,edx
1090	mov	eax,DWORD [24+esi]
1091	adc	ebp,0
1092	; sqr a[6]*a[6]
1093	mul	eax
1094	add	ebx,eax
1095	adc	ecx,edx
1096	mov	edx,DWORD [24+esi]
1097	adc	ebp,0
1098	mov	DWORD [48+edi],ebx
1099	mov	eax,DWORD [28+esi]
1100	; saved r[12]
1101	; ############### Calculate word 13
1102	xor	ebx,ebx
1103	; sqr a[7]*a[6]
1104	mul	edx
1105	add	eax,eax
1106	adc	edx,edx
1107	adc	ebx,0
1108	add	ecx,eax
1109	adc	ebp,edx
1110	mov	eax,DWORD [28+esi]
1111	adc	ebx,0
1112	mov	DWORD [52+edi],ecx
1113	; saved r[13]
1114	; ############### Calculate word 14
1115	xor	ecx,ecx
1116	; sqr a[7]*a[7]
1117	mul	eax
1118	add	ebp,eax
1119	adc	ebx,edx
1120	adc	ecx,0
1121	mov	DWORD [56+edi],ebp
1122	; saved r[14]
1123	mov	DWORD [60+edi],ebx
1124	pop	ebx
1125	pop	ebp
1126	pop	edi
1127	pop	esi
1128	ret
1129global	_bn_sqr_comba4
1130align	16
1131_bn_sqr_comba4:
1132L$_bn_sqr_comba4_begin:
1133	push	esi
1134	push	edi
1135	push	ebp
1136	push	ebx
1137	mov	edi,DWORD [20+esp]
1138	mov	esi,DWORD [24+esp]
1139	xor	ebx,ebx
1140	xor	ecx,ecx
1141	mov	eax,DWORD [esi]
1142	; ############### Calculate word 0
1143	xor	ebp,ebp
1144	; sqr a[0]*a[0]
1145	mul	eax
1146	add	ebx,eax
1147	adc	ecx,edx
1148	mov	edx,DWORD [esi]
1149	adc	ebp,0
1150	mov	DWORD [edi],ebx
1151	mov	eax,DWORD [4+esi]
1152	; saved r[0]
1153	; ############### Calculate word 1
1154	xor	ebx,ebx
1155	; sqr a[1]*a[0]
1156	mul	edx
1157	add	eax,eax
1158	adc	edx,edx
1159	adc	ebx,0
1160	add	ecx,eax
1161	adc	ebp,edx
1162	mov	eax,DWORD [8+esi]
1163	adc	ebx,0
1164	mov	DWORD [4+edi],ecx
1165	mov	edx,DWORD [esi]
1166	; saved r[1]
1167	; ############### Calculate word 2
1168	xor	ecx,ecx
1169	; sqr a[2]*a[0]
1170	mul	edx
1171	add	eax,eax
1172	adc	edx,edx
1173	adc	ecx,0
1174	add	ebp,eax
1175	adc	ebx,edx
1176	mov	eax,DWORD [4+esi]
1177	adc	ecx,0
1178	; sqr a[1]*a[1]
1179	mul	eax
1180	add	ebp,eax
1181	adc	ebx,edx
1182	mov	edx,DWORD [esi]
1183	adc	ecx,0
1184	mov	DWORD [8+edi],ebp
1185	mov	eax,DWORD [12+esi]
1186	; saved r[2]
1187	; ############### Calculate word 3
1188	xor	ebp,ebp
1189	; sqr a[3]*a[0]
1190	mul	edx
1191	add	eax,eax
1192	adc	edx,edx
1193	adc	ebp,0
1194	add	ebx,eax
1195	adc	ecx,edx
1196	mov	eax,DWORD [8+esi]
1197	adc	ebp,0
1198	mov	edx,DWORD [4+esi]
1199	; sqr a[2]*a[1]
1200	mul	edx
1201	add	eax,eax
1202	adc	edx,edx
1203	adc	ebp,0
1204	add	ebx,eax
1205	adc	ecx,edx
1206	mov	eax,DWORD [12+esi]
1207	adc	ebp,0
1208	mov	DWORD [12+edi],ebx
1209	mov	edx,DWORD [4+esi]
1210	; saved r[3]
1211	; ############### Calculate word 4
1212	xor	ebx,ebx
1213	; sqr a[3]*a[1]
1214	mul	edx
1215	add	eax,eax
1216	adc	edx,edx
1217	adc	ebx,0
1218	add	ecx,eax
1219	adc	ebp,edx
1220	mov	eax,DWORD [8+esi]
1221	adc	ebx,0
1222	; sqr a[2]*a[2]
1223	mul	eax
1224	add	ecx,eax
1225	adc	ebp,edx
1226	mov	edx,DWORD [8+esi]
1227	adc	ebx,0
1228	mov	DWORD [16+edi],ecx
1229	mov	eax,DWORD [12+esi]
1230	; saved r[4]
1231	; ############### Calculate word 5
1232	xor	ecx,ecx
1233	; sqr a[3]*a[2]
1234	mul	edx
1235	add	eax,eax
1236	adc	edx,edx
1237	adc	ecx,0
1238	add	ebp,eax
1239	adc	ebx,edx
1240	mov	eax,DWORD [12+esi]
1241	adc	ecx,0
1242	mov	DWORD [20+edi],ebp
1243	; saved r[5]
1244	; ############### Calculate word 6
1245	xor	ebp,ebp
1246	; sqr a[3]*a[3]
1247	mul	eax
1248	add	ebx,eax
1249	adc	ecx,edx
1250	adc	ebp,0
1251	mov	DWORD [24+edi],ebx
1252	; saved r[6]
1253	mov	DWORD [28+edi],ecx
1254	pop	ebx
1255	pop	ebp
1256	pop	edi
1257	pop	esi
1258	ret
1259