• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1%ifidn __OUTPUT_FORMAT__,obj
2section	code	use32 class=code align=64
3%elifidn __OUTPUT_FORMAT__,win32
4$@feat.00 equ 1
5section	.text	code align=64
6%else
7section	.text	code
8%endif
9;extern	_OPENSSL_ia32cap_P
10global	_bn_mul_mont
11align	16
12_bn_mul_mont:
13L$_bn_mul_mont_begin:
14	push	ebp
15	push	ebx
16	push	esi
17	push	edi
18	xor	eax,eax
19	mov	edi,DWORD [40+esp]
20	cmp	edi,4
21	jl	NEAR L$000just_leave
22	lea	esi,[20+esp]
23	lea	edx,[24+esp]
24	add	edi,2
25	neg	edi
26	lea	ebp,[edi*4+esp-32]
27	neg	edi
28	mov	eax,ebp
29	sub	eax,edx
30	and	eax,2047
31	sub	ebp,eax
32	xor	edx,ebp
33	and	edx,2048
34	xor	edx,2048
35	sub	ebp,edx
36	and	ebp,-64
37	mov	eax,esp
38	sub	eax,ebp
39	and	eax,-4096
40	mov	edx,esp
41	lea	esp,[eax*1+ebp]
42	mov	eax,DWORD [esp]
43	cmp	esp,ebp
44	ja	NEAR L$001page_walk
45	jmp	NEAR L$002page_walk_done
46align	16
47L$001page_walk:
48	lea	esp,[esp-4096]
49	mov	eax,DWORD [esp]
50	cmp	esp,ebp
51	ja	NEAR L$001page_walk
52L$002page_walk_done:
53	mov	eax,DWORD [esi]
54	mov	ebx,DWORD [4+esi]
55	mov	ecx,DWORD [8+esi]
56	mov	ebp,DWORD [12+esi]
57	mov	esi,DWORD [16+esi]
58	mov	esi,DWORD [esi]
59	mov	DWORD [4+esp],eax
60	mov	DWORD [8+esp],ebx
61	mov	DWORD [12+esp],ecx
62	mov	DWORD [16+esp],ebp
63	mov	DWORD [20+esp],esi
64	lea	ebx,[edi-3]
65	mov	DWORD [24+esp],edx
66	lea	eax,[_OPENSSL_ia32cap_P]
67	bt	DWORD [eax],26
68	jnc	NEAR L$003non_sse2
69	mov	eax,-1
70	movd	mm7,eax
71	mov	esi,DWORD [8+esp]
72	mov	edi,DWORD [12+esp]
73	mov	ebp,DWORD [16+esp]
74	xor	edx,edx
75	xor	ecx,ecx
76	movd	mm4,DWORD [edi]
77	movd	mm5,DWORD [esi]
78	movd	mm3,DWORD [ebp]
79	pmuludq	mm5,mm4
80	movq	mm2,mm5
81	movq	mm0,mm5
82	pand	mm0,mm7
83	pmuludq	mm5,[20+esp]
84	pmuludq	mm3,mm5
85	paddq	mm3,mm0
86	movd	mm1,DWORD [4+ebp]
87	movd	mm0,DWORD [4+esi]
88	psrlq	mm2,32
89	psrlq	mm3,32
90	inc	ecx
91align	16
92L$0041st:
93	pmuludq	mm0,mm4
94	pmuludq	mm1,mm5
95	paddq	mm2,mm0
96	paddq	mm3,mm1
97	movq	mm0,mm2
98	pand	mm0,mm7
99	movd	mm1,DWORD [4+ecx*4+ebp]
100	paddq	mm3,mm0
101	movd	mm0,DWORD [4+ecx*4+esi]
102	psrlq	mm2,32
103	movd	DWORD [28+ecx*4+esp],mm3
104	psrlq	mm3,32
105	lea	ecx,[1+ecx]
106	cmp	ecx,ebx
107	jl	NEAR L$0041st
108	pmuludq	mm0,mm4
109	pmuludq	mm1,mm5
110	paddq	mm2,mm0
111	paddq	mm3,mm1
112	movq	mm0,mm2
113	pand	mm0,mm7
114	paddq	mm3,mm0
115	movd	DWORD [28+ecx*4+esp],mm3
116	psrlq	mm2,32
117	psrlq	mm3,32
118	paddq	mm3,mm2
119	movq	[32+ebx*4+esp],mm3
120	inc	edx
121L$005outer:
122	xor	ecx,ecx
123	movd	mm4,DWORD [edx*4+edi]
124	movd	mm5,DWORD [esi]
125	movd	mm6,DWORD [32+esp]
126	movd	mm3,DWORD [ebp]
127	pmuludq	mm5,mm4
128	paddq	mm5,mm6
129	movq	mm0,mm5
130	movq	mm2,mm5
131	pand	mm0,mm7
132	pmuludq	mm5,[20+esp]
133	pmuludq	mm3,mm5
134	paddq	mm3,mm0
135	movd	mm6,DWORD [36+esp]
136	movd	mm1,DWORD [4+ebp]
137	movd	mm0,DWORD [4+esi]
138	psrlq	mm2,32
139	psrlq	mm3,32
140	paddq	mm2,mm6
141	inc	ecx
142	dec	ebx
143L$006inner:
144	pmuludq	mm0,mm4
145	pmuludq	mm1,mm5
146	paddq	mm2,mm0
147	paddq	mm3,mm1
148	movq	mm0,mm2
149	movd	mm6,DWORD [36+ecx*4+esp]
150	pand	mm0,mm7
151	movd	mm1,DWORD [4+ecx*4+ebp]
152	paddq	mm3,mm0
153	movd	mm0,DWORD [4+ecx*4+esi]
154	psrlq	mm2,32
155	movd	DWORD [28+ecx*4+esp],mm3
156	psrlq	mm3,32
157	paddq	mm2,mm6
158	dec	ebx
159	lea	ecx,[1+ecx]
160	jnz	NEAR L$006inner
161	mov	ebx,ecx
162	pmuludq	mm0,mm4
163	pmuludq	mm1,mm5
164	paddq	mm2,mm0
165	paddq	mm3,mm1
166	movq	mm0,mm2
167	pand	mm0,mm7
168	paddq	mm3,mm0
169	movd	DWORD [28+ecx*4+esp],mm3
170	psrlq	mm2,32
171	psrlq	mm3,32
172	movd	mm6,DWORD [36+ebx*4+esp]
173	paddq	mm3,mm2
174	paddq	mm3,mm6
175	movq	[32+ebx*4+esp],mm3
176	lea	edx,[1+edx]
177	cmp	edx,ebx
178	jle	NEAR L$005outer
179	emms
180	jmp	NEAR L$007common_tail
181align	16
182L$003non_sse2:
183	mov	esi,DWORD [8+esp]
184	lea	ebp,[1+ebx]
185	mov	edi,DWORD [12+esp]
186	xor	ecx,ecx
187	mov	edx,esi
188	and	ebp,1
189	sub	edx,edi
190	lea	eax,[4+ebx*4+edi]
191	or	ebp,edx
192	mov	edi,DWORD [edi]
193	jz	NEAR L$008bn_sqr_mont
194	mov	DWORD [28+esp],eax
195	mov	eax,DWORD [esi]
196	xor	edx,edx
197align	16
198L$009mull:
199	mov	ebp,edx
200	mul	edi
201	add	ebp,eax
202	lea	ecx,[1+ecx]
203	adc	edx,0
204	mov	eax,DWORD [ecx*4+esi]
205	cmp	ecx,ebx
206	mov	DWORD [28+ecx*4+esp],ebp
207	jl	NEAR L$009mull
208	mov	ebp,edx
209	mul	edi
210	mov	edi,DWORD [20+esp]
211	add	eax,ebp
212	mov	esi,DWORD [16+esp]
213	adc	edx,0
214	imul	edi,DWORD [32+esp]
215	mov	DWORD [32+ebx*4+esp],eax
216	xor	ecx,ecx
217	mov	DWORD [36+ebx*4+esp],edx
218	mov	DWORD [40+ebx*4+esp],ecx
219	mov	eax,DWORD [esi]
220	mul	edi
221	add	eax,DWORD [32+esp]
222	mov	eax,DWORD [4+esi]
223	adc	edx,0
224	inc	ecx
225	jmp	NEAR L$0102ndmadd
226align	16
227L$0111stmadd:
228	mov	ebp,edx
229	mul	edi
230	add	ebp,DWORD [32+ecx*4+esp]
231	lea	ecx,[1+ecx]
232	adc	edx,0
233	add	ebp,eax
234	mov	eax,DWORD [ecx*4+esi]
235	adc	edx,0
236	cmp	ecx,ebx
237	mov	DWORD [28+ecx*4+esp],ebp
238	jl	NEAR L$0111stmadd
239	mov	ebp,edx
240	mul	edi
241	add	eax,DWORD [32+ebx*4+esp]
242	mov	edi,DWORD [20+esp]
243	adc	edx,0
244	mov	esi,DWORD [16+esp]
245	add	ebp,eax
246	adc	edx,0
247	imul	edi,DWORD [32+esp]
248	xor	ecx,ecx
249	add	edx,DWORD [36+ebx*4+esp]
250	mov	DWORD [32+ebx*4+esp],ebp
251	adc	ecx,0
252	mov	eax,DWORD [esi]
253	mov	DWORD [36+ebx*4+esp],edx
254	mov	DWORD [40+ebx*4+esp],ecx
255	mul	edi
256	add	eax,DWORD [32+esp]
257	mov	eax,DWORD [4+esi]
258	adc	edx,0
259	mov	ecx,1
260align	16
261L$0102ndmadd:
262	mov	ebp,edx
263	mul	edi
264	add	ebp,DWORD [32+ecx*4+esp]
265	lea	ecx,[1+ecx]
266	adc	edx,0
267	add	ebp,eax
268	mov	eax,DWORD [ecx*4+esi]
269	adc	edx,0
270	cmp	ecx,ebx
271	mov	DWORD [24+ecx*4+esp],ebp
272	jl	NEAR L$0102ndmadd
273	mov	ebp,edx
274	mul	edi
275	add	ebp,DWORD [32+ebx*4+esp]
276	adc	edx,0
277	add	ebp,eax
278	adc	edx,0
279	mov	DWORD [28+ebx*4+esp],ebp
280	xor	eax,eax
281	mov	ecx,DWORD [12+esp]
282	add	edx,DWORD [36+ebx*4+esp]
283	adc	eax,DWORD [40+ebx*4+esp]
284	lea	ecx,[4+ecx]
285	mov	DWORD [32+ebx*4+esp],edx
286	cmp	ecx,DWORD [28+esp]
287	mov	DWORD [36+ebx*4+esp],eax
288	je	NEAR L$007common_tail
289	mov	edi,DWORD [ecx]
290	mov	esi,DWORD [8+esp]
291	mov	DWORD [12+esp],ecx
292	xor	ecx,ecx
293	xor	edx,edx
294	mov	eax,DWORD [esi]
295	jmp	NEAR L$0111stmadd
296align	16
297L$008bn_sqr_mont:
298	mov	DWORD [esp],ebx
299	mov	DWORD [12+esp],ecx
300	mov	eax,edi
301	mul	edi
302	mov	DWORD [32+esp],eax
303	mov	ebx,edx
304	shr	edx,1
305	and	ebx,1
306	inc	ecx
307align	16
308L$012sqr:
309	mov	eax,DWORD [ecx*4+esi]
310	mov	ebp,edx
311	mul	edi
312	add	eax,ebp
313	lea	ecx,[1+ecx]
314	adc	edx,0
315	lea	ebp,[eax*2+ebx]
316	shr	eax,31
317	cmp	ecx,DWORD [esp]
318	mov	ebx,eax
319	mov	DWORD [28+ecx*4+esp],ebp
320	jl	NEAR L$012sqr
321	mov	eax,DWORD [ecx*4+esi]
322	mov	ebp,edx
323	mul	edi
324	add	eax,ebp
325	mov	edi,DWORD [20+esp]
326	adc	edx,0
327	mov	esi,DWORD [16+esp]
328	lea	ebp,[eax*2+ebx]
329	imul	edi,DWORD [32+esp]
330	shr	eax,31
331	mov	DWORD [32+ecx*4+esp],ebp
332	lea	ebp,[edx*2+eax]
333	mov	eax,DWORD [esi]
334	shr	edx,31
335	mov	DWORD [36+ecx*4+esp],ebp
336	mov	DWORD [40+ecx*4+esp],edx
337	mul	edi
338	add	eax,DWORD [32+esp]
339	mov	ebx,ecx
340	adc	edx,0
341	mov	eax,DWORD [4+esi]
342	mov	ecx,1
343align	16
344L$0133rdmadd:
345	mov	ebp,edx
346	mul	edi
347	add	ebp,DWORD [32+ecx*4+esp]
348	adc	edx,0
349	add	ebp,eax
350	mov	eax,DWORD [4+ecx*4+esi]
351	adc	edx,0
352	mov	DWORD [28+ecx*4+esp],ebp
353	mov	ebp,edx
354	mul	edi
355	add	ebp,DWORD [36+ecx*4+esp]
356	lea	ecx,[2+ecx]
357	adc	edx,0
358	add	ebp,eax
359	mov	eax,DWORD [ecx*4+esi]
360	adc	edx,0
361	cmp	ecx,ebx
362	mov	DWORD [24+ecx*4+esp],ebp
363	jl	NEAR L$0133rdmadd
364	mov	ebp,edx
365	mul	edi
366	add	ebp,DWORD [32+ebx*4+esp]
367	adc	edx,0
368	add	ebp,eax
369	adc	edx,0
370	mov	DWORD [28+ebx*4+esp],ebp
371	mov	ecx,DWORD [12+esp]
372	xor	eax,eax
373	mov	esi,DWORD [8+esp]
374	add	edx,DWORD [36+ebx*4+esp]
375	adc	eax,DWORD [40+ebx*4+esp]
376	mov	DWORD [32+ebx*4+esp],edx
377	cmp	ecx,ebx
378	mov	DWORD [36+ebx*4+esp],eax
379	je	NEAR L$007common_tail
380	mov	edi,DWORD [4+ecx*4+esi]
381	lea	ecx,[1+ecx]
382	mov	eax,edi
383	mov	DWORD [12+esp],ecx
384	mul	edi
385	add	eax,DWORD [32+ecx*4+esp]
386	adc	edx,0
387	mov	DWORD [32+ecx*4+esp],eax
388	xor	ebp,ebp
389	cmp	ecx,ebx
390	lea	ecx,[1+ecx]
391	je	NEAR L$014sqrlast
392	mov	ebx,edx
393	shr	edx,1
394	and	ebx,1
395align	16
396L$015sqradd:
397	mov	eax,DWORD [ecx*4+esi]
398	mov	ebp,edx
399	mul	edi
400	add	eax,ebp
401	lea	ebp,[eax*1+eax]
402	adc	edx,0
403	shr	eax,31
404	add	ebp,DWORD [32+ecx*4+esp]
405	lea	ecx,[1+ecx]
406	adc	eax,0
407	add	ebp,ebx
408	adc	eax,0
409	cmp	ecx,DWORD [esp]
410	mov	DWORD [28+ecx*4+esp],ebp
411	mov	ebx,eax
412	jle	NEAR L$015sqradd
413	mov	ebp,edx
414	add	edx,edx
415	shr	ebp,31
416	add	edx,ebx
417	adc	ebp,0
418L$014sqrlast:
419	mov	edi,DWORD [20+esp]
420	mov	esi,DWORD [16+esp]
421	imul	edi,DWORD [32+esp]
422	add	edx,DWORD [32+ecx*4+esp]
423	mov	eax,DWORD [esi]
424	adc	ebp,0
425	mov	DWORD [32+ecx*4+esp],edx
426	mov	DWORD [36+ecx*4+esp],ebp
427	mul	edi
428	add	eax,DWORD [32+esp]
429	lea	ebx,[ecx-1]
430	adc	edx,0
431	mov	ecx,1
432	mov	eax,DWORD [4+esi]
433	jmp	NEAR L$0133rdmadd
434align	16
435L$007common_tail:
436	mov	ebp,DWORD [16+esp]
437	mov	edi,DWORD [4+esp]
438	lea	esi,[32+esp]
439	mov	eax,DWORD [esi]
440	mov	ecx,ebx
441	xor	edx,edx
442align	16
443L$016sub:
444	sbb	eax,DWORD [edx*4+ebp]
445	mov	DWORD [edx*4+edi],eax
446	dec	ecx
447	mov	eax,DWORD [4+edx*4+esi]
448	lea	edx,[1+edx]
449	jge	NEAR L$016sub
450	sbb	eax,0
451	mov	edx,-1
452	xor	edx,eax
453	jmp	NEAR L$017copy
454align	16
455L$017copy:
456	mov	esi,DWORD [32+ebx*4+esp]
457	mov	ebp,DWORD [ebx*4+edi]
458	mov	DWORD [32+ebx*4+esp],ecx
459	and	esi,eax
460	and	ebp,edx
461	or	ebp,esi
462	mov	DWORD [ebx*4+edi],ebp
463	dec	ebx
464	jge	NEAR L$017copy
465	mov	esp,DWORD [24+esp]
466	mov	eax,1
467L$000just_leave:
468	pop	edi
469	pop	esi
470	pop	ebx
471	pop	ebp
472	ret
473db	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
474db	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
475db	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
476db	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
477db	111,114,103,62,0
478segment	.bss
479common	_OPENSSL_ia32cap_P 16
480