• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#if defined(__i386__)
2.text
3.globl	bn_mul_mont
4.hidden	bn_mul_mont
5.type	bn_mul_mont,@function
6.align	16
7bn_mul_mont:
8.L_bn_mul_mont_begin:
9	pushl	%ebp
10	pushl	%ebx
11	pushl	%esi
12	pushl	%edi
13	xorl	%eax,%eax
14	movl	40(%esp),%edi
15	cmpl	$4,%edi
16	jl	.L000just_leave
17	leal	20(%esp),%esi
18	leal	24(%esp),%edx
19	addl	$2,%edi
20	negl	%edi
21	leal	-32(%esp,%edi,4),%ebp
22	negl	%edi
23	movl	%ebp,%eax
24	subl	%edx,%eax
25	andl	$2047,%eax
26	subl	%eax,%ebp
27	xorl	%ebp,%edx
28	andl	$2048,%edx
29	xorl	$2048,%edx
30	subl	%edx,%ebp
31	andl	$-64,%ebp
32	movl	%esp,%eax
33	subl	%ebp,%eax
34	andl	$-4096,%eax
35	movl	%esp,%edx
36	leal	(%ebp,%eax,1),%esp
37	movl	(%esp),%eax
38	cmpl	%ebp,%esp
39	ja	.L001page_walk
40	jmp	.L002page_walk_done
41.align	16
42.L001page_walk:
43	leal	-4096(%esp),%esp
44	movl	(%esp),%eax
45	cmpl	%ebp,%esp
46	ja	.L001page_walk
47.L002page_walk_done:
48	movl	(%esi),%eax
49	movl	4(%esi),%ebx
50	movl	8(%esi),%ecx
51	movl	12(%esi),%ebp
52	movl	16(%esi),%esi
53	movl	(%esi),%esi
54	movl	%eax,4(%esp)
55	movl	%ebx,8(%esp)
56	movl	%ecx,12(%esp)
57	movl	%ebp,16(%esp)
58	movl	%esi,20(%esp)
59	leal	-3(%edi),%ebx
60	movl	%edx,24(%esp)
61	call	.L003PIC_me_up
62.L003PIC_me_up:
63	popl	%eax
64	leal	OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
65	btl	$26,(%eax)
66	jnc	.L004non_sse2
67	movl	$-1,%eax
68	movd	%eax,%mm7
69	movl	8(%esp),%esi
70	movl	12(%esp),%edi
71	movl	16(%esp),%ebp
72	xorl	%edx,%edx
73	xorl	%ecx,%ecx
74	movd	(%edi),%mm4
75	movd	(%esi),%mm5
76	movd	(%ebp),%mm3
77	pmuludq	%mm4,%mm5
78	movq	%mm5,%mm2
79	movq	%mm5,%mm0
80	pand	%mm7,%mm0
81	pmuludq	20(%esp),%mm5
82	pmuludq	%mm5,%mm3
83	paddq	%mm0,%mm3
84	movd	4(%ebp),%mm1
85	movd	4(%esi),%mm0
86	psrlq	$32,%mm2
87	psrlq	$32,%mm3
88	incl	%ecx
89.align	16
90.L0051st:
91	pmuludq	%mm4,%mm0
92	pmuludq	%mm5,%mm1
93	paddq	%mm0,%mm2
94	paddq	%mm1,%mm3
95	movq	%mm2,%mm0
96	pand	%mm7,%mm0
97	movd	4(%ebp,%ecx,4),%mm1
98	paddq	%mm0,%mm3
99	movd	4(%esi,%ecx,4),%mm0
100	psrlq	$32,%mm2
101	movd	%mm3,28(%esp,%ecx,4)
102	psrlq	$32,%mm3
103	leal	1(%ecx),%ecx
104	cmpl	%ebx,%ecx
105	jl	.L0051st
106	pmuludq	%mm4,%mm0
107	pmuludq	%mm5,%mm1
108	paddq	%mm0,%mm2
109	paddq	%mm1,%mm3
110	movq	%mm2,%mm0
111	pand	%mm7,%mm0
112	paddq	%mm0,%mm3
113	movd	%mm3,28(%esp,%ecx,4)
114	psrlq	$32,%mm2
115	psrlq	$32,%mm3
116	paddq	%mm2,%mm3
117	movq	%mm3,32(%esp,%ebx,4)
118	incl	%edx
119.L006outer:
120	xorl	%ecx,%ecx
121	movd	(%edi,%edx,4),%mm4
122	movd	(%esi),%mm5
123	movd	32(%esp),%mm6
124	movd	(%ebp),%mm3
125	pmuludq	%mm4,%mm5
126	paddq	%mm6,%mm5
127	movq	%mm5,%mm0
128	movq	%mm5,%mm2
129	pand	%mm7,%mm0
130	pmuludq	20(%esp),%mm5
131	pmuludq	%mm5,%mm3
132	paddq	%mm0,%mm3
133	movd	36(%esp),%mm6
134	movd	4(%ebp),%mm1
135	movd	4(%esi),%mm0
136	psrlq	$32,%mm2
137	psrlq	$32,%mm3
138	paddq	%mm6,%mm2
139	incl	%ecx
140	decl	%ebx
141.L007inner:
142	pmuludq	%mm4,%mm0
143	pmuludq	%mm5,%mm1
144	paddq	%mm0,%mm2
145	paddq	%mm1,%mm3
146	movq	%mm2,%mm0
147	movd	36(%esp,%ecx,4),%mm6
148	pand	%mm7,%mm0
149	movd	4(%ebp,%ecx,4),%mm1
150	paddq	%mm0,%mm3
151	movd	4(%esi,%ecx,4),%mm0
152	psrlq	$32,%mm2
153	movd	%mm3,28(%esp,%ecx,4)
154	psrlq	$32,%mm3
155	paddq	%mm6,%mm2
156	decl	%ebx
157	leal	1(%ecx),%ecx
158	jnz	.L007inner
159	movl	%ecx,%ebx
160	pmuludq	%mm4,%mm0
161	pmuludq	%mm5,%mm1
162	paddq	%mm0,%mm2
163	paddq	%mm1,%mm3
164	movq	%mm2,%mm0
165	pand	%mm7,%mm0
166	paddq	%mm0,%mm3
167	movd	%mm3,28(%esp,%ecx,4)
168	psrlq	$32,%mm2
169	psrlq	$32,%mm3
170	movd	36(%esp,%ebx,4),%mm6
171	paddq	%mm2,%mm3
172	paddq	%mm6,%mm3
173	movq	%mm3,32(%esp,%ebx,4)
174	leal	1(%edx),%edx
175	cmpl	%ebx,%edx
176	jle	.L006outer
177	emms
178	jmp	.L008common_tail
179.align	16
180.L004non_sse2:
181	movl	8(%esp),%esi
182	leal	1(%ebx),%ebp
183	movl	12(%esp),%edi
184	xorl	%ecx,%ecx
185	movl	%esi,%edx
186	andl	$1,%ebp
187	subl	%edi,%edx
188	leal	4(%edi,%ebx,4),%eax
189	orl	%edx,%ebp
190	movl	(%edi),%edi
191	jz	.L009bn_sqr_mont
192	movl	%eax,28(%esp)
193	movl	(%esi),%eax
194	xorl	%edx,%edx
195.align	16
196.L010mull:
197	movl	%edx,%ebp
198	mull	%edi
199	addl	%eax,%ebp
200	leal	1(%ecx),%ecx
201	adcl	$0,%edx
202	movl	(%esi,%ecx,4),%eax
203	cmpl	%ebx,%ecx
204	movl	%ebp,28(%esp,%ecx,4)
205	jl	.L010mull
206	movl	%edx,%ebp
207	mull	%edi
208	movl	20(%esp),%edi
209	addl	%ebp,%eax
210	movl	16(%esp),%esi
211	adcl	$0,%edx
212	imull	32(%esp),%edi
213	movl	%eax,32(%esp,%ebx,4)
214	xorl	%ecx,%ecx
215	movl	%edx,36(%esp,%ebx,4)
216	movl	%ecx,40(%esp,%ebx,4)
217	movl	(%esi),%eax
218	mull	%edi
219	addl	32(%esp),%eax
220	movl	4(%esi),%eax
221	adcl	$0,%edx
222	incl	%ecx
223	jmp	.L0112ndmadd
224.align	16
225.L0121stmadd:
226	movl	%edx,%ebp
227	mull	%edi
228	addl	32(%esp,%ecx,4),%ebp
229	leal	1(%ecx),%ecx
230	adcl	$0,%edx
231	addl	%eax,%ebp
232	movl	(%esi,%ecx,4),%eax
233	adcl	$0,%edx
234	cmpl	%ebx,%ecx
235	movl	%ebp,28(%esp,%ecx,4)
236	jl	.L0121stmadd
237	movl	%edx,%ebp
238	mull	%edi
239	addl	32(%esp,%ebx,4),%eax
240	movl	20(%esp),%edi
241	adcl	$0,%edx
242	movl	16(%esp),%esi
243	addl	%eax,%ebp
244	adcl	$0,%edx
245	imull	32(%esp),%edi
246	xorl	%ecx,%ecx
247	addl	36(%esp,%ebx,4),%edx
248	movl	%ebp,32(%esp,%ebx,4)
249	adcl	$0,%ecx
250	movl	(%esi),%eax
251	movl	%edx,36(%esp,%ebx,4)
252	movl	%ecx,40(%esp,%ebx,4)
253	mull	%edi
254	addl	32(%esp),%eax
255	movl	4(%esi),%eax
256	adcl	$0,%edx
257	movl	$1,%ecx
258.align	16
259.L0112ndmadd:
260	movl	%edx,%ebp
261	mull	%edi
262	addl	32(%esp,%ecx,4),%ebp
263	leal	1(%ecx),%ecx
264	adcl	$0,%edx
265	addl	%eax,%ebp
266	movl	(%esi,%ecx,4),%eax
267	adcl	$0,%edx
268	cmpl	%ebx,%ecx
269	movl	%ebp,24(%esp,%ecx,4)
270	jl	.L0112ndmadd
271	movl	%edx,%ebp
272	mull	%edi
273	addl	32(%esp,%ebx,4),%ebp
274	adcl	$0,%edx
275	addl	%eax,%ebp
276	adcl	$0,%edx
277	movl	%ebp,28(%esp,%ebx,4)
278	xorl	%eax,%eax
279	movl	12(%esp),%ecx
280	addl	36(%esp,%ebx,4),%edx
281	adcl	40(%esp,%ebx,4),%eax
282	leal	4(%ecx),%ecx
283	movl	%edx,32(%esp,%ebx,4)
284	cmpl	28(%esp),%ecx
285	movl	%eax,36(%esp,%ebx,4)
286	je	.L008common_tail
287	movl	(%ecx),%edi
288	movl	8(%esp),%esi
289	movl	%ecx,12(%esp)
290	xorl	%ecx,%ecx
291	xorl	%edx,%edx
292	movl	(%esi),%eax
293	jmp	.L0121stmadd
294.align	16
295.L009bn_sqr_mont:
296	movl	%ebx,(%esp)
297	movl	%ecx,12(%esp)
298	movl	%edi,%eax
299	mull	%edi
300	movl	%eax,32(%esp)
301	movl	%edx,%ebx
302	shrl	$1,%edx
303	andl	$1,%ebx
304	incl	%ecx
305.align	16
306.L013sqr:
307	movl	(%esi,%ecx,4),%eax
308	movl	%edx,%ebp
309	mull	%edi
310	addl	%ebp,%eax
311	leal	1(%ecx),%ecx
312	adcl	$0,%edx
313	leal	(%ebx,%eax,2),%ebp
314	shrl	$31,%eax
315	cmpl	(%esp),%ecx
316	movl	%eax,%ebx
317	movl	%ebp,28(%esp,%ecx,4)
318	jl	.L013sqr
319	movl	(%esi,%ecx,4),%eax
320	movl	%edx,%ebp
321	mull	%edi
322	addl	%ebp,%eax
323	movl	20(%esp),%edi
324	adcl	$0,%edx
325	movl	16(%esp),%esi
326	leal	(%ebx,%eax,2),%ebp
327	imull	32(%esp),%edi
328	shrl	$31,%eax
329	movl	%ebp,32(%esp,%ecx,4)
330	leal	(%eax,%edx,2),%ebp
331	movl	(%esi),%eax
332	shrl	$31,%edx
333	movl	%ebp,36(%esp,%ecx,4)
334	movl	%edx,40(%esp,%ecx,4)
335	mull	%edi
336	addl	32(%esp),%eax
337	movl	%ecx,%ebx
338	adcl	$0,%edx
339	movl	4(%esi),%eax
340	movl	$1,%ecx
341.align	16
342.L0143rdmadd:
343	movl	%edx,%ebp
344	mull	%edi
345	addl	32(%esp,%ecx,4),%ebp
346	adcl	$0,%edx
347	addl	%eax,%ebp
348	movl	4(%esi,%ecx,4),%eax
349	adcl	$0,%edx
350	movl	%ebp,28(%esp,%ecx,4)
351	movl	%edx,%ebp
352	mull	%edi
353	addl	36(%esp,%ecx,4),%ebp
354	leal	2(%ecx),%ecx
355	adcl	$0,%edx
356	addl	%eax,%ebp
357	movl	(%esi,%ecx,4),%eax
358	adcl	$0,%edx
359	cmpl	%ebx,%ecx
360	movl	%ebp,24(%esp,%ecx,4)
361	jl	.L0143rdmadd
362	movl	%edx,%ebp
363	mull	%edi
364	addl	32(%esp,%ebx,4),%ebp
365	adcl	$0,%edx
366	addl	%eax,%ebp
367	adcl	$0,%edx
368	movl	%ebp,28(%esp,%ebx,4)
369	movl	12(%esp),%ecx
370	xorl	%eax,%eax
371	movl	8(%esp),%esi
372	addl	36(%esp,%ebx,4),%edx
373	adcl	40(%esp,%ebx,4),%eax
374	movl	%edx,32(%esp,%ebx,4)
375	cmpl	%ebx,%ecx
376	movl	%eax,36(%esp,%ebx,4)
377	je	.L008common_tail
378	movl	4(%esi,%ecx,4),%edi
379	leal	1(%ecx),%ecx
380	movl	%edi,%eax
381	movl	%ecx,12(%esp)
382	mull	%edi
383	addl	32(%esp,%ecx,4),%eax
384	adcl	$0,%edx
385	movl	%eax,32(%esp,%ecx,4)
386	xorl	%ebp,%ebp
387	cmpl	%ebx,%ecx
388	leal	1(%ecx),%ecx
389	je	.L015sqrlast
390	movl	%edx,%ebx
391	shrl	$1,%edx
392	andl	$1,%ebx
393.align	16
394.L016sqradd:
395	movl	(%esi,%ecx,4),%eax
396	movl	%edx,%ebp
397	mull	%edi
398	addl	%ebp,%eax
399	leal	(%eax,%eax,1),%ebp
400	adcl	$0,%edx
401	shrl	$31,%eax
402	addl	32(%esp,%ecx,4),%ebp
403	leal	1(%ecx),%ecx
404	adcl	$0,%eax
405	addl	%ebx,%ebp
406	adcl	$0,%eax
407	cmpl	(%esp),%ecx
408	movl	%ebp,28(%esp,%ecx,4)
409	movl	%eax,%ebx
410	jle	.L016sqradd
411	movl	%edx,%ebp
412	addl	%edx,%edx
413	shrl	$31,%ebp
414	addl	%ebx,%edx
415	adcl	$0,%ebp
416.L015sqrlast:
417	movl	20(%esp),%edi
418	movl	16(%esp),%esi
419	imull	32(%esp),%edi
420	addl	32(%esp,%ecx,4),%edx
421	movl	(%esi),%eax
422	adcl	$0,%ebp
423	movl	%edx,32(%esp,%ecx,4)
424	movl	%ebp,36(%esp,%ecx,4)
425	mull	%edi
426	addl	32(%esp),%eax
427	leal	-1(%ecx),%ebx
428	adcl	$0,%edx
429	movl	$1,%ecx
430	movl	4(%esi),%eax
431	jmp	.L0143rdmadd
432.align	16
433.L008common_tail:
434	movl	16(%esp),%ebp
435	movl	4(%esp),%edi
436	leal	32(%esp),%esi
437	movl	(%esi),%eax
438	movl	%ebx,%ecx
439	xorl	%edx,%edx
440.align	16
441.L017sub:
442	sbbl	(%ebp,%edx,4),%eax
443	movl	%eax,(%edi,%edx,4)
444	decl	%ecx
445	movl	4(%esi,%edx,4),%eax
446	leal	1(%edx),%edx
447	jge	.L017sub
448	sbbl	$0,%eax
449	andl	%eax,%esi
450	notl	%eax
451	movl	%edi,%ebp
452	andl	%eax,%ebp
453	orl	%ebp,%esi
454.align	16
455.L018copy:
456	movl	(%esi,%ebx,4),%eax
457	movl	%eax,(%edi,%ebx,4)
458	movl	%ecx,32(%esp,%ebx,4)
459	decl	%ebx
460	jge	.L018copy
461	movl	24(%esp),%esp
462	movl	$1,%eax
463.L000just_leave:
464	popl	%edi
465	popl	%esi
466	popl	%ebx
467	popl	%ebp
468	ret
469.size	bn_mul_mont,.-.L_bn_mul_mont_begin
470.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
471.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
472.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
473.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
474.byte	111,114,103,62,0
475#endif
476