• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1.text
2.globl	bn_mul_mont
3.type	bn_mul_mont,@function
4.align	16
5bn_mul_mont:
6.L_bn_mul_mont_begin:
7	%ifdef __CET__
8
9.byte	243,15,30,251
10	%endif
11
12	pushl	%ebp
13	pushl	%ebx
14	pushl	%esi
15	pushl	%edi
16	xorl	%eax,%eax
17	movl	40(%esp),%edi
18	cmpl	$4,%edi
19	jl	.L000just_leave
20	leal	20(%esp),%esi
21	leal	24(%esp),%edx
22	addl	$2,%edi
23	negl	%edi
24	leal	-32(%esp,%edi,4),%ebp
25	negl	%edi
26	movl	%ebp,%eax
27	subl	%edx,%eax
28	andl	$2047,%eax
29	subl	%eax,%ebp
30	xorl	%ebp,%edx
31	andl	$2048,%edx
32	xorl	$2048,%edx
33	subl	%edx,%ebp
34	andl	$-64,%ebp
35	movl	%esp,%eax
36	subl	%ebp,%eax
37	andl	$-4096,%eax
38	movl	%esp,%edx
39	leal	(%ebp,%eax,1),%esp
40	movl	(%esp),%eax
41	cmpl	%ebp,%esp
42	ja	.L001page_walk
43	jmp	.L002page_walk_done
44.align	16
45.L001page_walk:
46	leal	-4096(%esp),%esp
47	movl	(%esp),%eax
48	cmpl	%ebp,%esp
49	ja	.L001page_walk
50.L002page_walk_done:
51	movl	(%esi),%eax
52	movl	4(%esi),%ebx
53	movl	8(%esi),%ecx
54	movl	12(%esi),%ebp
55	movl	16(%esi),%esi
56	movl	(%esi),%esi
57	movl	%eax,4(%esp)
58	movl	%ebx,8(%esp)
59	movl	%ecx,12(%esp)
60	movl	%ebp,16(%esp)
61	movl	%esi,20(%esp)
62	leal	-3(%edi),%ebx
63	movl	%edx,24(%esp)
64	call	.L003PIC_me_up
65.L003PIC_me_up:
66	popl	%eax
67	leal	OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
68	btl	$26,(%eax)
69	jnc	.L004non_sse2
70	movl	$-1,%eax
71	movd	%eax,%mm7
72	movl	8(%esp),%esi
73	movl	12(%esp),%edi
74	movl	16(%esp),%ebp
75	xorl	%edx,%edx
76	xorl	%ecx,%ecx
77	movd	(%edi),%mm4
78	movd	(%esi),%mm5
79	movd	(%ebp),%mm3
80	pmuludq	%mm4,%mm5
81	movq	%mm5,%mm2
82	movq	%mm5,%mm0
83	pand	%mm7,%mm0
84	pmuludq	20(%esp),%mm5
85	pmuludq	%mm5,%mm3
86	paddq	%mm0,%mm3
87	movd	4(%ebp),%mm1
88	movd	4(%esi),%mm0
89	psrlq	$32,%mm2
90	psrlq	$32,%mm3
91	incl	%ecx
92.align	16
93.L0051st:
94	pmuludq	%mm4,%mm0
95	pmuludq	%mm5,%mm1
96	paddq	%mm0,%mm2
97	paddq	%mm1,%mm3
98	movq	%mm2,%mm0
99	pand	%mm7,%mm0
100	movd	4(%ebp,%ecx,4),%mm1
101	paddq	%mm0,%mm3
102	movd	4(%esi,%ecx,4),%mm0
103	psrlq	$32,%mm2
104	movd	%mm3,28(%esp,%ecx,4)
105	psrlq	$32,%mm3
106	leal	1(%ecx),%ecx
107	cmpl	%ebx,%ecx
108	jl	.L0051st
109	pmuludq	%mm4,%mm0
110	pmuludq	%mm5,%mm1
111	paddq	%mm0,%mm2
112	paddq	%mm1,%mm3
113	movq	%mm2,%mm0
114	pand	%mm7,%mm0
115	paddq	%mm0,%mm3
116	movd	%mm3,28(%esp,%ecx,4)
117	psrlq	$32,%mm2
118	psrlq	$32,%mm3
119	paddq	%mm2,%mm3
120	movq	%mm3,32(%esp,%ebx,4)
121	incl	%edx
122.L006outer:
123	xorl	%ecx,%ecx
124	movd	(%edi,%edx,4),%mm4
125	movd	(%esi),%mm5
126	movd	32(%esp),%mm6
127	movd	(%ebp),%mm3
128	pmuludq	%mm4,%mm5
129	paddq	%mm6,%mm5
130	movq	%mm5,%mm0
131	movq	%mm5,%mm2
132	pand	%mm7,%mm0
133	pmuludq	20(%esp),%mm5
134	pmuludq	%mm5,%mm3
135	paddq	%mm0,%mm3
136	movd	36(%esp),%mm6
137	movd	4(%ebp),%mm1
138	movd	4(%esi),%mm0
139	psrlq	$32,%mm2
140	psrlq	$32,%mm3
141	paddq	%mm6,%mm2
142	incl	%ecx
143	decl	%ebx
144.L007inner:
145	pmuludq	%mm4,%mm0
146	pmuludq	%mm5,%mm1
147	paddq	%mm0,%mm2
148	paddq	%mm1,%mm3
149	movq	%mm2,%mm0
150	movd	36(%esp,%ecx,4),%mm6
151	pand	%mm7,%mm0
152	movd	4(%ebp,%ecx,4),%mm1
153	paddq	%mm0,%mm3
154	movd	4(%esi,%ecx,4),%mm0
155	psrlq	$32,%mm2
156	movd	%mm3,28(%esp,%ecx,4)
157	psrlq	$32,%mm3
158	paddq	%mm6,%mm2
159	decl	%ebx
160	leal	1(%ecx),%ecx
161	jnz	.L007inner
162	movl	%ecx,%ebx
163	pmuludq	%mm4,%mm0
164	pmuludq	%mm5,%mm1
165	paddq	%mm0,%mm2
166	paddq	%mm1,%mm3
167	movq	%mm2,%mm0
168	pand	%mm7,%mm0
169	paddq	%mm0,%mm3
170	movd	%mm3,28(%esp,%ecx,4)
171	psrlq	$32,%mm2
172	psrlq	$32,%mm3
173	movd	36(%esp,%ebx,4),%mm6
174	paddq	%mm2,%mm3
175	paddq	%mm6,%mm3
176	movq	%mm3,32(%esp,%ebx,4)
177	leal	1(%edx),%edx
178	cmpl	%ebx,%edx
179	jle	.L006outer
180	emms
181	jmp	.L008common_tail
182.align	16
183.L004non_sse2:
184	movl	8(%esp),%esi
185	leal	1(%ebx),%ebp
186	movl	12(%esp),%edi
187	xorl	%ecx,%ecx
188	movl	%esi,%edx
189	andl	$1,%ebp
190	subl	%edi,%edx
191	leal	4(%edi,%ebx,4),%eax
192	orl	%edx,%ebp
193	movl	(%edi),%edi
194	jz	.L009bn_sqr_mont
195	movl	%eax,28(%esp)
196	movl	(%esi),%eax
197	xorl	%edx,%edx
198.align	16
199.L010mull:
200	movl	%edx,%ebp
201	mull	%edi
202	addl	%eax,%ebp
203	leal	1(%ecx),%ecx
204	adcl	$0,%edx
205	movl	(%esi,%ecx,4),%eax
206	cmpl	%ebx,%ecx
207	movl	%ebp,28(%esp,%ecx,4)
208	jl	.L010mull
209	movl	%edx,%ebp
210	mull	%edi
211	movl	20(%esp),%edi
212	addl	%ebp,%eax
213	movl	16(%esp),%esi
214	adcl	$0,%edx
215	imull	32(%esp),%edi
216	movl	%eax,32(%esp,%ebx,4)
217	xorl	%ecx,%ecx
218	movl	%edx,36(%esp,%ebx,4)
219	movl	%ecx,40(%esp,%ebx,4)
220	movl	(%esi),%eax
221	mull	%edi
222	addl	32(%esp),%eax
223	movl	4(%esi),%eax
224	adcl	$0,%edx
225	incl	%ecx
226	jmp	.L0112ndmadd
227.align	16
228.L0121stmadd:
229	movl	%edx,%ebp
230	mull	%edi
231	addl	32(%esp,%ecx,4),%ebp
232	leal	1(%ecx),%ecx
233	adcl	$0,%edx
234	addl	%eax,%ebp
235	movl	(%esi,%ecx,4),%eax
236	adcl	$0,%edx
237	cmpl	%ebx,%ecx
238	movl	%ebp,28(%esp,%ecx,4)
239	jl	.L0121stmadd
240	movl	%edx,%ebp
241	mull	%edi
242	addl	32(%esp,%ebx,4),%eax
243	movl	20(%esp),%edi
244	adcl	$0,%edx
245	movl	16(%esp),%esi
246	addl	%eax,%ebp
247	adcl	$0,%edx
248	imull	32(%esp),%edi
249	xorl	%ecx,%ecx
250	addl	36(%esp,%ebx,4),%edx
251	movl	%ebp,32(%esp,%ebx,4)
252	adcl	$0,%ecx
253	movl	(%esi),%eax
254	movl	%edx,36(%esp,%ebx,4)
255	movl	%ecx,40(%esp,%ebx,4)
256	mull	%edi
257	addl	32(%esp),%eax
258	movl	4(%esi),%eax
259	adcl	$0,%edx
260	movl	$1,%ecx
261.align	16
262.L0112ndmadd:
263	movl	%edx,%ebp
264	mull	%edi
265	addl	32(%esp,%ecx,4),%ebp
266	leal	1(%ecx),%ecx
267	adcl	$0,%edx
268	addl	%eax,%ebp
269	movl	(%esi,%ecx,4),%eax
270	adcl	$0,%edx
271	cmpl	%ebx,%ecx
272	movl	%ebp,24(%esp,%ecx,4)
273	jl	.L0112ndmadd
274	movl	%edx,%ebp
275	mull	%edi
276	addl	32(%esp,%ebx,4),%ebp
277	adcl	$0,%edx
278	addl	%eax,%ebp
279	adcl	$0,%edx
280	movl	%ebp,28(%esp,%ebx,4)
281	xorl	%eax,%eax
282	movl	12(%esp),%ecx
283	addl	36(%esp,%ebx,4),%edx
284	adcl	40(%esp,%ebx,4),%eax
285	leal	4(%ecx),%ecx
286	movl	%edx,32(%esp,%ebx,4)
287	cmpl	28(%esp),%ecx
288	movl	%eax,36(%esp,%ebx,4)
289	je	.L008common_tail
290	movl	(%ecx),%edi
291	movl	8(%esp),%esi
292	movl	%ecx,12(%esp)
293	xorl	%ecx,%ecx
294	xorl	%edx,%edx
295	movl	(%esi),%eax
296	jmp	.L0121stmadd
297.align	16
298.L009bn_sqr_mont:
299	movl	%ebx,(%esp)
300	movl	%ecx,12(%esp)
301	movl	%edi,%eax
302	mull	%edi
303	movl	%eax,32(%esp)
304	movl	%edx,%ebx
305	shrl	$1,%edx
306	andl	$1,%ebx
307	incl	%ecx
308.align	16
309.L013sqr:
310	movl	(%esi,%ecx,4),%eax
311	movl	%edx,%ebp
312	mull	%edi
313	addl	%ebp,%eax
314	leal	1(%ecx),%ecx
315	adcl	$0,%edx
316	leal	(%ebx,%eax,2),%ebp
317	shrl	$31,%eax
318	cmpl	(%esp),%ecx
319	movl	%eax,%ebx
320	movl	%ebp,28(%esp,%ecx,4)
321	jl	.L013sqr
322	movl	(%esi,%ecx,4),%eax
323	movl	%edx,%ebp
324	mull	%edi
325	addl	%ebp,%eax
326	movl	20(%esp),%edi
327	adcl	$0,%edx
328	movl	16(%esp),%esi
329	leal	(%ebx,%eax,2),%ebp
330	imull	32(%esp),%edi
331	shrl	$31,%eax
332	movl	%ebp,32(%esp,%ecx,4)
333	leal	(%eax,%edx,2),%ebp
334	movl	(%esi),%eax
335	shrl	$31,%edx
336	movl	%ebp,36(%esp,%ecx,4)
337	movl	%edx,40(%esp,%ecx,4)
338	mull	%edi
339	addl	32(%esp),%eax
340	movl	%ecx,%ebx
341	adcl	$0,%edx
342	movl	4(%esi),%eax
343	movl	$1,%ecx
344.align	16
345.L0143rdmadd:
346	movl	%edx,%ebp
347	mull	%edi
348	addl	32(%esp,%ecx,4),%ebp
349	adcl	$0,%edx
350	addl	%eax,%ebp
351	movl	4(%esi,%ecx,4),%eax
352	adcl	$0,%edx
353	movl	%ebp,28(%esp,%ecx,4)
354	movl	%edx,%ebp
355	mull	%edi
356	addl	36(%esp,%ecx,4),%ebp
357	leal	2(%ecx),%ecx
358	adcl	$0,%edx
359	addl	%eax,%ebp
360	movl	(%esi,%ecx,4),%eax
361	adcl	$0,%edx
362	cmpl	%ebx,%ecx
363	movl	%ebp,24(%esp,%ecx,4)
364	jl	.L0143rdmadd
365	movl	%edx,%ebp
366	mull	%edi
367	addl	32(%esp,%ebx,4),%ebp
368	adcl	$0,%edx
369	addl	%eax,%ebp
370	adcl	$0,%edx
371	movl	%ebp,28(%esp,%ebx,4)
372	movl	12(%esp),%ecx
373	xorl	%eax,%eax
374	movl	8(%esp),%esi
375	addl	36(%esp,%ebx,4),%edx
376	adcl	40(%esp,%ebx,4),%eax
377	movl	%edx,32(%esp,%ebx,4)
378	cmpl	%ebx,%ecx
379	movl	%eax,36(%esp,%ebx,4)
380	je	.L008common_tail
381	movl	4(%esi,%ecx,4),%edi
382	leal	1(%ecx),%ecx
383	movl	%edi,%eax
384	movl	%ecx,12(%esp)
385	mull	%edi
386	addl	32(%esp,%ecx,4),%eax
387	adcl	$0,%edx
388	movl	%eax,32(%esp,%ecx,4)
389	xorl	%ebp,%ebp
390	cmpl	%ebx,%ecx
391	leal	1(%ecx),%ecx
392	je	.L015sqrlast
393	movl	%edx,%ebx
394	shrl	$1,%edx
395	andl	$1,%ebx
396.align	16
397.L016sqradd:
398	movl	(%esi,%ecx,4),%eax
399	movl	%edx,%ebp
400	mull	%edi
401	addl	%ebp,%eax
402	leal	(%eax,%eax,1),%ebp
403	adcl	$0,%edx
404	shrl	$31,%eax
405	addl	32(%esp,%ecx,4),%ebp
406	leal	1(%ecx),%ecx
407	adcl	$0,%eax
408	addl	%ebx,%ebp
409	adcl	$0,%eax
410	cmpl	(%esp),%ecx
411	movl	%ebp,28(%esp,%ecx,4)
412	movl	%eax,%ebx
413	jle	.L016sqradd
414	movl	%edx,%ebp
415	addl	%edx,%edx
416	shrl	$31,%ebp
417	addl	%ebx,%edx
418	adcl	$0,%ebp
419.L015sqrlast:
420	movl	20(%esp),%edi
421	movl	16(%esp),%esi
422	imull	32(%esp),%edi
423	addl	32(%esp,%ecx,4),%edx
424	movl	(%esi),%eax
425	adcl	$0,%ebp
426	movl	%edx,32(%esp,%ecx,4)
427	movl	%ebp,36(%esp,%ecx,4)
428	mull	%edi
429	addl	32(%esp),%eax
430	leal	-1(%ecx),%ebx
431	adcl	$0,%edx
432	movl	$1,%ecx
433	movl	4(%esi),%eax
434	jmp	.L0143rdmadd
435.align	16
436.L008common_tail:
437	movl	16(%esp),%ebp
438	movl	4(%esp),%edi
439	leal	32(%esp),%esi
440	movl	(%esi),%eax
441	movl	%ebx,%ecx
442	xorl	%edx,%edx
443.align	16
444.L017sub:
445	sbbl	(%ebp,%edx,4),%eax
446	movl	%eax,(%edi,%edx,4)
447	decl	%ecx
448	movl	4(%esi,%edx,4),%eax
449	leal	1(%edx),%edx
450	jge	.L017sub
451	sbbl	$0,%eax
452	movl	$-1,%edx
453	xorl	%eax,%edx
454	jmp	.L018copy
455.align	16
456.L018copy:
457	movl	32(%esp,%ebx,4),%esi
458	movl	(%edi,%ebx,4),%ebp
459	movl	%ecx,32(%esp,%ebx,4)
460	andl	%eax,%esi
461	andl	%edx,%ebp
462	orl	%esi,%ebp
463	movl	%ebp,(%edi,%ebx,4)
464	decl	%ebx
465	jge	.L018copy
466	movl	24(%esp),%esp
467	movl	$1,%eax
468.L000just_leave:
469	popl	%edi
470	popl	%esi
471	popl	%ebx
472	popl	%ebp
473	ret
474.size	bn_mul_mont,.-.L_bn_mul_mont_begin
475.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
476.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
477.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
478.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
479.byte	111,114,103,62,0
480.comm	OPENSSL_ia32cap_P,16,4
481
482	.section ".note.gnu.property", "a"
483	.p2align 2
484	.long 1f - 0f
485	.long 4f - 1f
486	.long 5
4870:
488	.asciz "GNU"
4891:
490	.p2align 2
491	.long 0xc0000002
492	.long 3f - 2f
4932:
494	.long 3
4953:
496	.p2align 2
4974:
498