• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__, win32
8%ifidn __OUTPUT_FORMAT__,obj
9section	code	use32 class=code align=64
10%elifidn __OUTPUT_FORMAT__,win32
11$@feat.00 equ 1
12section	.text	code align=64
13%else
14section	.text	code
15%endif
16;extern	_OPENSSL_ia32cap_P
17global	_bn_mul_add_words
18align	16
19_bn_mul_add_words:
20L$_bn_mul_add_words_begin:
21	lea	eax,[_OPENSSL_ia32cap_P]
22	bt	DWORD [eax],26
23	jnc	NEAR L$000maw_non_sse2
24	mov	eax,DWORD [4+esp]
25	mov	edx,DWORD [8+esp]
26	mov	ecx,DWORD [12+esp]
27	movd	mm0,DWORD [16+esp]
28	pxor	mm1,mm1
29	jmp	NEAR L$001maw_sse2_entry
30align	16
31L$002maw_sse2_unrolled:
32	movd	mm3,DWORD [eax]
33	paddq	mm1,mm3
34	movd	mm2,DWORD [edx]
35	pmuludq	mm2,mm0
36	movd	mm4,DWORD [4+edx]
37	pmuludq	mm4,mm0
38	movd	mm6,DWORD [8+edx]
39	pmuludq	mm6,mm0
40	movd	mm7,DWORD [12+edx]
41	pmuludq	mm7,mm0
42	paddq	mm1,mm2
43	movd	mm3,DWORD [4+eax]
44	paddq	mm3,mm4
45	movd	mm5,DWORD [8+eax]
46	paddq	mm5,mm6
47	movd	mm4,DWORD [12+eax]
48	paddq	mm7,mm4
49	movd	DWORD [eax],mm1
50	movd	mm2,DWORD [16+edx]
51	pmuludq	mm2,mm0
52	psrlq	mm1,32
53	movd	mm4,DWORD [20+edx]
54	pmuludq	mm4,mm0
55	paddq	mm1,mm3
56	movd	mm6,DWORD [24+edx]
57	pmuludq	mm6,mm0
58	movd	DWORD [4+eax],mm1
59	psrlq	mm1,32
60	movd	mm3,DWORD [28+edx]
61	add	edx,32
62	pmuludq	mm3,mm0
63	paddq	mm1,mm5
64	movd	mm5,DWORD [16+eax]
65	paddq	mm2,mm5
66	movd	DWORD [8+eax],mm1
67	psrlq	mm1,32
68	paddq	mm1,mm7
69	movd	mm5,DWORD [20+eax]
70	paddq	mm4,mm5
71	movd	DWORD [12+eax],mm1
72	psrlq	mm1,32
73	paddq	mm1,mm2
74	movd	mm5,DWORD [24+eax]
75	paddq	mm6,mm5
76	movd	DWORD [16+eax],mm1
77	psrlq	mm1,32
78	paddq	mm1,mm4
79	movd	mm5,DWORD [28+eax]
80	paddq	mm3,mm5
81	movd	DWORD [20+eax],mm1
82	psrlq	mm1,32
83	paddq	mm1,mm6
84	movd	DWORD [24+eax],mm1
85	psrlq	mm1,32
86	paddq	mm1,mm3
87	movd	DWORD [28+eax],mm1
88	lea	eax,[32+eax]
89	psrlq	mm1,32
90	sub	ecx,8
91	jz	NEAR L$003maw_sse2_exit
92L$001maw_sse2_entry:
93	test	ecx,4294967288
94	jnz	NEAR L$002maw_sse2_unrolled
95align	4
96L$004maw_sse2_loop:
97	movd	mm2,DWORD [edx]
98	movd	mm3,DWORD [eax]
99	pmuludq	mm2,mm0
100	lea	edx,[4+edx]
101	paddq	mm1,mm3
102	paddq	mm1,mm2
103	movd	DWORD [eax],mm1
104	sub	ecx,1
105	psrlq	mm1,32
106	lea	eax,[4+eax]
107	jnz	NEAR L$004maw_sse2_loop
108L$003maw_sse2_exit:
109	movd	eax,mm1
110	emms
111	ret
112align	16
113L$000maw_non_sse2:
114	push	ebp
115	push	ebx
116	push	esi
117	push	edi
118	;
119	xor	esi,esi
120	mov	edi,DWORD [20+esp]
121	mov	ecx,DWORD [28+esp]
122	mov	ebx,DWORD [24+esp]
123	and	ecx,4294967288
124	mov	ebp,DWORD [32+esp]
125	push	ecx
126	jz	NEAR L$005maw_finish
127align	16
128L$006maw_loop:
129	; Round 0
130	mov	eax,DWORD [ebx]
131	mul	ebp
132	add	eax,esi
133	adc	edx,0
134	add	eax,DWORD [edi]
135	adc	edx,0
136	mov	DWORD [edi],eax
137	mov	esi,edx
138	; Round 4
139	mov	eax,DWORD [4+ebx]
140	mul	ebp
141	add	eax,esi
142	adc	edx,0
143	add	eax,DWORD [4+edi]
144	adc	edx,0
145	mov	DWORD [4+edi],eax
146	mov	esi,edx
147	; Round 8
148	mov	eax,DWORD [8+ebx]
149	mul	ebp
150	add	eax,esi
151	adc	edx,0
152	add	eax,DWORD [8+edi]
153	adc	edx,0
154	mov	DWORD [8+edi],eax
155	mov	esi,edx
156	; Round 12
157	mov	eax,DWORD [12+ebx]
158	mul	ebp
159	add	eax,esi
160	adc	edx,0
161	add	eax,DWORD [12+edi]
162	adc	edx,0
163	mov	DWORD [12+edi],eax
164	mov	esi,edx
165	; Round 16
166	mov	eax,DWORD [16+ebx]
167	mul	ebp
168	add	eax,esi
169	adc	edx,0
170	add	eax,DWORD [16+edi]
171	adc	edx,0
172	mov	DWORD [16+edi],eax
173	mov	esi,edx
174	; Round 20
175	mov	eax,DWORD [20+ebx]
176	mul	ebp
177	add	eax,esi
178	adc	edx,0
179	add	eax,DWORD [20+edi]
180	adc	edx,0
181	mov	DWORD [20+edi],eax
182	mov	esi,edx
183	; Round 24
184	mov	eax,DWORD [24+ebx]
185	mul	ebp
186	add	eax,esi
187	adc	edx,0
188	add	eax,DWORD [24+edi]
189	adc	edx,0
190	mov	DWORD [24+edi],eax
191	mov	esi,edx
192	; Round 28
193	mov	eax,DWORD [28+ebx]
194	mul	ebp
195	add	eax,esi
196	adc	edx,0
197	add	eax,DWORD [28+edi]
198	adc	edx,0
199	mov	DWORD [28+edi],eax
200	mov	esi,edx
201	;
202	sub	ecx,8
203	lea	ebx,[32+ebx]
204	lea	edi,[32+edi]
205	jnz	NEAR L$006maw_loop
206L$005maw_finish:
207	mov	ecx,DWORD [32+esp]
208	and	ecx,7
209	jnz	NEAR L$007maw_finish2
210	jmp	NEAR L$008maw_end
211L$007maw_finish2:
212	; Tail Round 0
213	mov	eax,DWORD [ebx]
214	mul	ebp
215	add	eax,esi
216	adc	edx,0
217	add	eax,DWORD [edi]
218	adc	edx,0
219	dec	ecx
220	mov	DWORD [edi],eax
221	mov	esi,edx
222	jz	NEAR L$008maw_end
223	; Tail Round 1
224	mov	eax,DWORD [4+ebx]
225	mul	ebp
226	add	eax,esi
227	adc	edx,0
228	add	eax,DWORD [4+edi]
229	adc	edx,0
230	dec	ecx
231	mov	DWORD [4+edi],eax
232	mov	esi,edx
233	jz	NEAR L$008maw_end
234	; Tail Round 2
235	mov	eax,DWORD [8+ebx]
236	mul	ebp
237	add	eax,esi
238	adc	edx,0
239	add	eax,DWORD [8+edi]
240	adc	edx,0
241	dec	ecx
242	mov	DWORD [8+edi],eax
243	mov	esi,edx
244	jz	NEAR L$008maw_end
245	; Tail Round 3
246	mov	eax,DWORD [12+ebx]
247	mul	ebp
248	add	eax,esi
249	adc	edx,0
250	add	eax,DWORD [12+edi]
251	adc	edx,0
252	dec	ecx
253	mov	DWORD [12+edi],eax
254	mov	esi,edx
255	jz	NEAR L$008maw_end
256	; Tail Round 4
257	mov	eax,DWORD [16+ebx]
258	mul	ebp
259	add	eax,esi
260	adc	edx,0
261	add	eax,DWORD [16+edi]
262	adc	edx,0
263	dec	ecx
264	mov	DWORD [16+edi],eax
265	mov	esi,edx
266	jz	NEAR L$008maw_end
267	; Tail Round 5
268	mov	eax,DWORD [20+ebx]
269	mul	ebp
270	add	eax,esi
271	adc	edx,0
272	add	eax,DWORD [20+edi]
273	adc	edx,0
274	dec	ecx
275	mov	DWORD [20+edi],eax
276	mov	esi,edx
277	jz	NEAR L$008maw_end
278	; Tail Round 6
279	mov	eax,DWORD [24+ebx]
280	mul	ebp
281	add	eax,esi
282	adc	edx,0
283	add	eax,DWORD [24+edi]
284	adc	edx,0
285	mov	DWORD [24+edi],eax
286	mov	esi,edx
287L$008maw_end:
288	mov	eax,esi
289	pop	ecx
290	pop	edi
291	pop	esi
292	pop	ebx
293	pop	ebp
294	ret
295global	_bn_mul_words
296align	16
297_bn_mul_words:
298L$_bn_mul_words_begin:
299	lea	eax,[_OPENSSL_ia32cap_P]
300	bt	DWORD [eax],26
301	jnc	NEAR L$009mw_non_sse2
302	mov	eax,DWORD [4+esp]
303	mov	edx,DWORD [8+esp]
304	mov	ecx,DWORD [12+esp]
305	movd	mm0,DWORD [16+esp]
306	pxor	mm1,mm1
307align	16
308L$010mw_sse2_loop:
309	movd	mm2,DWORD [edx]
310	pmuludq	mm2,mm0
311	lea	edx,[4+edx]
312	paddq	mm1,mm2
313	movd	DWORD [eax],mm1
314	sub	ecx,1
315	psrlq	mm1,32
316	lea	eax,[4+eax]
317	jnz	NEAR L$010mw_sse2_loop
318	movd	eax,mm1
319	emms
320	ret
321align	16
322L$009mw_non_sse2:
323	push	ebp
324	push	ebx
325	push	esi
326	push	edi
327	;
328	xor	esi,esi
329	mov	edi,DWORD [20+esp]
330	mov	ebx,DWORD [24+esp]
331	mov	ebp,DWORD [28+esp]
332	mov	ecx,DWORD [32+esp]
333	and	ebp,4294967288
334	jz	NEAR L$011mw_finish
335L$012mw_loop:
336	; Round 0
337	mov	eax,DWORD [ebx]
338	mul	ecx
339	add	eax,esi
340	adc	edx,0
341	mov	DWORD [edi],eax
342	mov	esi,edx
343	; Round 4
344	mov	eax,DWORD [4+ebx]
345	mul	ecx
346	add	eax,esi
347	adc	edx,0
348	mov	DWORD [4+edi],eax
349	mov	esi,edx
350	; Round 8
351	mov	eax,DWORD [8+ebx]
352	mul	ecx
353	add	eax,esi
354	adc	edx,0
355	mov	DWORD [8+edi],eax
356	mov	esi,edx
357	; Round 12
358	mov	eax,DWORD [12+ebx]
359	mul	ecx
360	add	eax,esi
361	adc	edx,0
362	mov	DWORD [12+edi],eax
363	mov	esi,edx
364	; Round 16
365	mov	eax,DWORD [16+ebx]
366	mul	ecx
367	add	eax,esi
368	adc	edx,0
369	mov	DWORD [16+edi],eax
370	mov	esi,edx
371	; Round 20
372	mov	eax,DWORD [20+ebx]
373	mul	ecx
374	add	eax,esi
375	adc	edx,0
376	mov	DWORD [20+edi],eax
377	mov	esi,edx
378	; Round 24
379	mov	eax,DWORD [24+ebx]
380	mul	ecx
381	add	eax,esi
382	adc	edx,0
383	mov	DWORD [24+edi],eax
384	mov	esi,edx
385	; Round 28
386	mov	eax,DWORD [28+ebx]
387	mul	ecx
388	add	eax,esi
389	adc	edx,0
390	mov	DWORD [28+edi],eax
391	mov	esi,edx
392	;
393	add	ebx,32
394	add	edi,32
395	sub	ebp,8
396	jz	NEAR L$011mw_finish
397	jmp	NEAR L$012mw_loop
398L$011mw_finish:
399	mov	ebp,DWORD [28+esp]
400	and	ebp,7
401	jnz	NEAR L$013mw_finish2
402	jmp	NEAR L$014mw_end
403L$013mw_finish2:
404	; Tail Round 0
405	mov	eax,DWORD [ebx]
406	mul	ecx
407	add	eax,esi
408	adc	edx,0
409	mov	DWORD [edi],eax
410	mov	esi,edx
411	dec	ebp
412	jz	NEAR L$014mw_end
413	; Tail Round 1
414	mov	eax,DWORD [4+ebx]
415	mul	ecx
416	add	eax,esi
417	adc	edx,0
418	mov	DWORD [4+edi],eax
419	mov	esi,edx
420	dec	ebp
421	jz	NEAR L$014mw_end
422	; Tail Round 2
423	mov	eax,DWORD [8+ebx]
424	mul	ecx
425	add	eax,esi
426	adc	edx,0
427	mov	DWORD [8+edi],eax
428	mov	esi,edx
429	dec	ebp
430	jz	NEAR L$014mw_end
431	; Tail Round 3
432	mov	eax,DWORD [12+ebx]
433	mul	ecx
434	add	eax,esi
435	adc	edx,0
436	mov	DWORD [12+edi],eax
437	mov	esi,edx
438	dec	ebp
439	jz	NEAR L$014mw_end
440	; Tail Round 4
441	mov	eax,DWORD [16+ebx]
442	mul	ecx
443	add	eax,esi
444	adc	edx,0
445	mov	DWORD [16+edi],eax
446	mov	esi,edx
447	dec	ebp
448	jz	NEAR L$014mw_end
449	; Tail Round 5
450	mov	eax,DWORD [20+ebx]
451	mul	ecx
452	add	eax,esi
453	adc	edx,0
454	mov	DWORD [20+edi],eax
455	mov	esi,edx
456	dec	ebp
457	jz	NEAR L$014mw_end
458	; Tail Round 6
459	mov	eax,DWORD [24+ebx]
460	mul	ecx
461	add	eax,esi
462	adc	edx,0
463	mov	DWORD [24+edi],eax
464	mov	esi,edx
465L$014mw_end:
466	mov	eax,esi
467	pop	edi
468	pop	esi
469	pop	ebx
470	pop	ebp
471	ret
472global	_bn_sqr_words
473align	16
474_bn_sqr_words:
475L$_bn_sqr_words_begin:
476	lea	eax,[_OPENSSL_ia32cap_P]
477	bt	DWORD [eax],26
478	jnc	NEAR L$015sqr_non_sse2
479	mov	eax,DWORD [4+esp]
480	mov	edx,DWORD [8+esp]
481	mov	ecx,DWORD [12+esp]
482align	16
483L$016sqr_sse2_loop:
484	movd	mm0,DWORD [edx]
485	pmuludq	mm0,mm0
486	lea	edx,[4+edx]
487	movq	[eax],mm0
488	sub	ecx,1
489	lea	eax,[8+eax]
490	jnz	NEAR L$016sqr_sse2_loop
491	emms
492	ret
493align	16
494L$015sqr_non_sse2:
495	push	ebp
496	push	ebx
497	push	esi
498	push	edi
499	;
500	mov	esi,DWORD [20+esp]
501	mov	edi,DWORD [24+esp]
502	mov	ebx,DWORD [28+esp]
503	and	ebx,4294967288
504	jz	NEAR L$017sw_finish
505L$018sw_loop:
506	; Round 0
507	mov	eax,DWORD [edi]
508	mul	eax
509	mov	DWORD [esi],eax
510	mov	DWORD [4+esi],edx
511	; Round 4
512	mov	eax,DWORD [4+edi]
513	mul	eax
514	mov	DWORD [8+esi],eax
515	mov	DWORD [12+esi],edx
516	; Round 8
517	mov	eax,DWORD [8+edi]
518	mul	eax
519	mov	DWORD [16+esi],eax
520	mov	DWORD [20+esi],edx
521	; Round 12
522	mov	eax,DWORD [12+edi]
523	mul	eax
524	mov	DWORD [24+esi],eax
525	mov	DWORD [28+esi],edx
526	; Round 16
527	mov	eax,DWORD [16+edi]
528	mul	eax
529	mov	DWORD [32+esi],eax
530	mov	DWORD [36+esi],edx
531	; Round 20
532	mov	eax,DWORD [20+edi]
533	mul	eax
534	mov	DWORD [40+esi],eax
535	mov	DWORD [44+esi],edx
536	; Round 24
537	mov	eax,DWORD [24+edi]
538	mul	eax
539	mov	DWORD [48+esi],eax
540	mov	DWORD [52+esi],edx
541	; Round 28
542	mov	eax,DWORD [28+edi]
543	mul	eax
544	mov	DWORD [56+esi],eax
545	mov	DWORD [60+esi],edx
546	;
547	add	edi,32
548	add	esi,64
549	sub	ebx,8
550	jnz	NEAR L$018sw_loop
551L$017sw_finish:
552	mov	ebx,DWORD [28+esp]
553	and	ebx,7
554	jz	NEAR L$019sw_end
555	; Tail Round 0
556	mov	eax,DWORD [edi]
557	mul	eax
558	mov	DWORD [esi],eax
559	dec	ebx
560	mov	DWORD [4+esi],edx
561	jz	NEAR L$019sw_end
562	; Tail Round 1
563	mov	eax,DWORD [4+edi]
564	mul	eax
565	mov	DWORD [8+esi],eax
566	dec	ebx
567	mov	DWORD [12+esi],edx
568	jz	NEAR L$019sw_end
569	; Tail Round 2
570	mov	eax,DWORD [8+edi]
571	mul	eax
572	mov	DWORD [16+esi],eax
573	dec	ebx
574	mov	DWORD [20+esi],edx
575	jz	NEAR L$019sw_end
576	; Tail Round 3
577	mov	eax,DWORD [12+edi]
578	mul	eax
579	mov	DWORD [24+esi],eax
580	dec	ebx
581	mov	DWORD [28+esi],edx
582	jz	NEAR L$019sw_end
583	; Tail Round 4
584	mov	eax,DWORD [16+edi]
585	mul	eax
586	mov	DWORD [32+esi],eax
587	dec	ebx
588	mov	DWORD [36+esi],edx
589	jz	NEAR L$019sw_end
590	; Tail Round 5
591	mov	eax,DWORD [20+edi]
592	mul	eax
593	mov	DWORD [40+esi],eax
594	dec	ebx
595	mov	DWORD [44+esi],edx
596	jz	NEAR L$019sw_end
597	; Tail Round 6
598	mov	eax,DWORD [24+edi]
599	mul	eax
600	mov	DWORD [48+esi],eax
601	mov	DWORD [52+esi],edx
602L$019sw_end:
603	pop	edi
604	pop	esi
605	pop	ebx
606	pop	ebp
607	ret
608global	_bn_div_words
609align	16
610_bn_div_words:
611L$_bn_div_words_begin:
612	mov	edx,DWORD [4+esp]
613	mov	eax,DWORD [8+esp]
614	mov	ecx,DWORD [12+esp]
615	div	ecx
616	ret
617global	_bn_add_words
618align	16
619_bn_add_words:
620L$_bn_add_words_begin:
621	push	ebp
622	push	ebx
623	push	esi
624	push	edi
625	;
626	mov	ebx,DWORD [20+esp]
627	mov	esi,DWORD [24+esp]
628	mov	edi,DWORD [28+esp]
629	mov	ebp,DWORD [32+esp]
630	xor	eax,eax
631	and	ebp,4294967288
632	jz	NEAR L$020aw_finish
633L$021aw_loop:
634	; Round 0
635	mov	ecx,DWORD [esi]
636	mov	edx,DWORD [edi]
637	add	ecx,eax
638	mov	eax,0
639	adc	eax,eax
640	add	ecx,edx
641	adc	eax,0
642	mov	DWORD [ebx],ecx
643	; Round 1
644	mov	ecx,DWORD [4+esi]
645	mov	edx,DWORD [4+edi]
646	add	ecx,eax
647	mov	eax,0
648	adc	eax,eax
649	add	ecx,edx
650	adc	eax,0
651	mov	DWORD [4+ebx],ecx
652	; Round 2
653	mov	ecx,DWORD [8+esi]
654	mov	edx,DWORD [8+edi]
655	add	ecx,eax
656	mov	eax,0
657	adc	eax,eax
658	add	ecx,edx
659	adc	eax,0
660	mov	DWORD [8+ebx],ecx
661	; Round 3
662	mov	ecx,DWORD [12+esi]
663	mov	edx,DWORD [12+edi]
664	add	ecx,eax
665	mov	eax,0
666	adc	eax,eax
667	add	ecx,edx
668	adc	eax,0
669	mov	DWORD [12+ebx],ecx
670	; Round 4
671	mov	ecx,DWORD [16+esi]
672	mov	edx,DWORD [16+edi]
673	add	ecx,eax
674	mov	eax,0
675	adc	eax,eax
676	add	ecx,edx
677	adc	eax,0
678	mov	DWORD [16+ebx],ecx
679	; Round 5
680	mov	ecx,DWORD [20+esi]
681	mov	edx,DWORD [20+edi]
682	add	ecx,eax
683	mov	eax,0
684	adc	eax,eax
685	add	ecx,edx
686	adc	eax,0
687	mov	DWORD [20+ebx],ecx
688	; Round 6
689	mov	ecx,DWORD [24+esi]
690	mov	edx,DWORD [24+edi]
691	add	ecx,eax
692	mov	eax,0
693	adc	eax,eax
694	add	ecx,edx
695	adc	eax,0
696	mov	DWORD [24+ebx],ecx
697	; Round 7
698	mov	ecx,DWORD [28+esi]
699	mov	edx,DWORD [28+edi]
700	add	ecx,eax
701	mov	eax,0
702	adc	eax,eax
703	add	ecx,edx
704	adc	eax,0
705	mov	DWORD [28+ebx],ecx
706	;
707	add	esi,32
708	add	edi,32
709	add	ebx,32
710	sub	ebp,8
711	jnz	NEAR L$021aw_loop
712L$020aw_finish:
713	mov	ebp,DWORD [32+esp]
714	and	ebp,7
715	jz	NEAR L$022aw_end
716	; Tail Round 0
717	mov	ecx,DWORD [esi]
718	mov	edx,DWORD [edi]
719	add	ecx,eax
720	mov	eax,0
721	adc	eax,eax
722	add	ecx,edx
723	adc	eax,0
724	dec	ebp
725	mov	DWORD [ebx],ecx
726	jz	NEAR L$022aw_end
727	; Tail Round 1
728	mov	ecx,DWORD [4+esi]
729	mov	edx,DWORD [4+edi]
730	add	ecx,eax
731	mov	eax,0
732	adc	eax,eax
733	add	ecx,edx
734	adc	eax,0
735	dec	ebp
736	mov	DWORD [4+ebx],ecx
737	jz	NEAR L$022aw_end
738	; Tail Round 2
739	mov	ecx,DWORD [8+esi]
740	mov	edx,DWORD [8+edi]
741	add	ecx,eax
742	mov	eax,0
743	adc	eax,eax
744	add	ecx,edx
745	adc	eax,0
746	dec	ebp
747	mov	DWORD [8+ebx],ecx
748	jz	NEAR L$022aw_end
749	; Tail Round 3
750	mov	ecx,DWORD [12+esi]
751	mov	edx,DWORD [12+edi]
752	add	ecx,eax
753	mov	eax,0
754	adc	eax,eax
755	add	ecx,edx
756	adc	eax,0
757	dec	ebp
758	mov	DWORD [12+ebx],ecx
759	jz	NEAR L$022aw_end
760	; Tail Round 4
761	mov	ecx,DWORD [16+esi]
762	mov	edx,DWORD [16+edi]
763	add	ecx,eax
764	mov	eax,0
765	adc	eax,eax
766	add	ecx,edx
767	adc	eax,0
768	dec	ebp
769	mov	DWORD [16+ebx],ecx
770	jz	NEAR L$022aw_end
771	; Tail Round 5
772	mov	ecx,DWORD [20+esi]
773	mov	edx,DWORD [20+edi]
774	add	ecx,eax
775	mov	eax,0
776	adc	eax,eax
777	add	ecx,edx
778	adc	eax,0
779	dec	ebp
780	mov	DWORD [20+ebx],ecx
781	jz	NEAR L$022aw_end
782	; Tail Round 6
783	mov	ecx,DWORD [24+esi]
784	mov	edx,DWORD [24+edi]
785	add	ecx,eax
786	mov	eax,0
787	adc	eax,eax
788	add	ecx,edx
789	adc	eax,0
790	mov	DWORD [24+ebx],ecx
791L$022aw_end:
792	pop	edi
793	pop	esi
794	pop	ebx
795	pop	ebp
796	ret
797global	_bn_sub_words
798align	16
799_bn_sub_words:
800L$_bn_sub_words_begin:
801	push	ebp
802	push	ebx
803	push	esi
804	push	edi
805	;
806	mov	ebx,DWORD [20+esp]
807	mov	esi,DWORD [24+esp]
808	mov	edi,DWORD [28+esp]
809	mov	ebp,DWORD [32+esp]
810	xor	eax,eax
811	and	ebp,4294967288
812	jz	NEAR L$023aw_finish
813L$024aw_loop:
814	; Round 0
815	mov	ecx,DWORD [esi]
816	mov	edx,DWORD [edi]
817	sub	ecx,eax
818	mov	eax,0
819	adc	eax,eax
820	sub	ecx,edx
821	adc	eax,0
822	mov	DWORD [ebx],ecx
823	; Round 1
824	mov	ecx,DWORD [4+esi]
825	mov	edx,DWORD [4+edi]
826	sub	ecx,eax
827	mov	eax,0
828	adc	eax,eax
829	sub	ecx,edx
830	adc	eax,0
831	mov	DWORD [4+ebx],ecx
832	; Round 2
833	mov	ecx,DWORD [8+esi]
834	mov	edx,DWORD [8+edi]
835	sub	ecx,eax
836	mov	eax,0
837	adc	eax,eax
838	sub	ecx,edx
839	adc	eax,0
840	mov	DWORD [8+ebx],ecx
841	; Round 3
842	mov	ecx,DWORD [12+esi]
843	mov	edx,DWORD [12+edi]
844	sub	ecx,eax
845	mov	eax,0
846	adc	eax,eax
847	sub	ecx,edx
848	adc	eax,0
849	mov	DWORD [12+ebx],ecx
850	; Round 4
851	mov	ecx,DWORD [16+esi]
852	mov	edx,DWORD [16+edi]
853	sub	ecx,eax
854	mov	eax,0
855	adc	eax,eax
856	sub	ecx,edx
857	adc	eax,0
858	mov	DWORD [16+ebx],ecx
859	; Round 5
860	mov	ecx,DWORD [20+esi]
861	mov	edx,DWORD [20+edi]
862	sub	ecx,eax
863	mov	eax,0
864	adc	eax,eax
865	sub	ecx,edx
866	adc	eax,0
867	mov	DWORD [20+ebx],ecx
868	; Round 6
869	mov	ecx,DWORD [24+esi]
870	mov	edx,DWORD [24+edi]
871	sub	ecx,eax
872	mov	eax,0
873	adc	eax,eax
874	sub	ecx,edx
875	adc	eax,0
876	mov	DWORD [24+ebx],ecx
877	; Round 7
878	mov	ecx,DWORD [28+esi]
879	mov	edx,DWORD [28+edi]
880	sub	ecx,eax
881	mov	eax,0
882	adc	eax,eax
883	sub	ecx,edx
884	adc	eax,0
885	mov	DWORD [28+ebx],ecx
886	;
887	add	esi,32
888	add	edi,32
889	add	ebx,32
890	sub	ebp,8
891	jnz	NEAR L$024aw_loop
892L$023aw_finish:
893	mov	ebp,DWORD [32+esp]
894	and	ebp,7
895	jz	NEAR L$025aw_end
896	; Tail Round 0
897	mov	ecx,DWORD [esi]
898	mov	edx,DWORD [edi]
899	sub	ecx,eax
900	mov	eax,0
901	adc	eax,eax
902	sub	ecx,edx
903	adc	eax,0
904	dec	ebp
905	mov	DWORD [ebx],ecx
906	jz	NEAR L$025aw_end
907	; Tail Round 1
908	mov	ecx,DWORD [4+esi]
909	mov	edx,DWORD [4+edi]
910	sub	ecx,eax
911	mov	eax,0
912	adc	eax,eax
913	sub	ecx,edx
914	adc	eax,0
915	dec	ebp
916	mov	DWORD [4+ebx],ecx
917	jz	NEAR L$025aw_end
918	; Tail Round 2
919	mov	ecx,DWORD [8+esi]
920	mov	edx,DWORD [8+edi]
921	sub	ecx,eax
922	mov	eax,0
923	adc	eax,eax
924	sub	ecx,edx
925	adc	eax,0
926	dec	ebp
927	mov	DWORD [8+ebx],ecx
928	jz	NEAR L$025aw_end
929	; Tail Round 3
930	mov	ecx,DWORD [12+esi]
931	mov	edx,DWORD [12+edi]
932	sub	ecx,eax
933	mov	eax,0
934	adc	eax,eax
935	sub	ecx,edx
936	adc	eax,0
937	dec	ebp
938	mov	DWORD [12+ebx],ecx
939	jz	NEAR L$025aw_end
940	; Tail Round 4
941	mov	ecx,DWORD [16+esi]
942	mov	edx,DWORD [16+edi]
943	sub	ecx,eax
944	mov	eax,0
945	adc	eax,eax
946	sub	ecx,edx
947	adc	eax,0
948	dec	ebp
949	mov	DWORD [16+ebx],ecx
950	jz	NEAR L$025aw_end
951	; Tail Round 5
952	mov	ecx,DWORD [20+esi]
953	mov	edx,DWORD [20+edi]
954	sub	ecx,eax
955	mov	eax,0
956	adc	eax,eax
957	sub	ecx,edx
958	adc	eax,0
959	dec	ebp
960	mov	DWORD [20+ebx],ecx
961	jz	NEAR L$025aw_end
962	; Tail Round 6
963	mov	ecx,DWORD [24+esi]
964	mov	edx,DWORD [24+edi]
965	sub	ecx,eax
966	mov	eax,0
967	adc	eax,eax
968	sub	ecx,edx
969	adc	eax,0
970	mov	DWORD [24+ebx],ecx
971L$025aw_end:
972	pop	edi
973	pop	esi
974	pop	ebx
975	pop	ebp
976	ret
977segment	.bss
978common	_OPENSSL_ia32cap_P 16
979%else
980; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
981ret
982%endif
983