• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__i386__)
5#if defined(BORINGSSL_PREFIX)
6#include <boringssl_prefix_symbols_asm.h>
7#endif
8.text
9.globl	bn_mul_mont
10.hidden	bn_mul_mont
11.type	bn_mul_mont,@function
12.align	16
13bn_mul_mont:
14.L_bn_mul_mont_begin:
15	pushl	%ebp
16	pushl	%ebx
17	pushl	%esi
18	pushl	%edi
19	xorl	%eax,%eax
20	movl	40(%esp),%edi
21	cmpl	$4,%edi
22	jl	.L000just_leave
23	leal	20(%esp),%esi
24	leal	24(%esp),%edx
25	addl	$2,%edi
26	negl	%edi
27	leal	-32(%esp,%edi,4),%ebp
28	negl	%edi
29	movl	%ebp,%eax
30	subl	%edx,%eax
31	andl	$2047,%eax
32	subl	%eax,%ebp
33	xorl	%ebp,%edx
34	andl	$2048,%edx
35	xorl	$2048,%edx
36	subl	%edx,%ebp
37	andl	$-64,%ebp
38	movl	%esp,%eax
39	subl	%ebp,%eax
40	andl	$-4096,%eax
41	movl	%esp,%edx
42	leal	(%ebp,%eax,1),%esp
43	movl	(%esp),%eax
44	cmpl	%ebp,%esp
45	ja	.L001page_walk
46	jmp	.L002page_walk_done
47.align	16
48.L001page_walk:
49	leal	-4096(%esp),%esp
50	movl	(%esp),%eax
51	cmpl	%ebp,%esp
52	ja	.L001page_walk
53.L002page_walk_done:
54	movl	(%esi),%eax
55	movl	4(%esi),%ebx
56	movl	8(%esi),%ecx
57	movl	12(%esi),%ebp
58	movl	16(%esi),%esi
59	movl	(%esi),%esi
60	movl	%eax,4(%esp)
61	movl	%ebx,8(%esp)
62	movl	%ecx,12(%esp)
63	movl	%ebp,16(%esp)
64	movl	%esi,20(%esp)
65	leal	-3(%edi),%ebx
66	movl	%edx,24(%esp)
67	call	.L003PIC_me_up
68.L003PIC_me_up:
69	popl	%eax
70	leal	OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
71	btl	$26,(%eax)
72	jnc	.L004non_sse2
73	movl	$-1,%eax
74	movd	%eax,%mm7
75	movl	8(%esp),%esi
76	movl	12(%esp),%edi
77	movl	16(%esp),%ebp
78	xorl	%edx,%edx
79	xorl	%ecx,%ecx
80	movd	(%edi),%mm4
81	movd	(%esi),%mm5
82	movd	(%ebp),%mm3
83	pmuludq	%mm4,%mm5
84	movq	%mm5,%mm2
85	movq	%mm5,%mm0
86	pand	%mm7,%mm0
87	pmuludq	20(%esp),%mm5
88	pmuludq	%mm5,%mm3
89	paddq	%mm0,%mm3
90	movd	4(%ebp),%mm1
91	movd	4(%esi),%mm0
92	psrlq	$32,%mm2
93	psrlq	$32,%mm3
94	incl	%ecx
95.align	16
96.L0051st:
97	pmuludq	%mm4,%mm0
98	pmuludq	%mm5,%mm1
99	paddq	%mm0,%mm2
100	paddq	%mm1,%mm3
101	movq	%mm2,%mm0
102	pand	%mm7,%mm0
103	movd	4(%ebp,%ecx,4),%mm1
104	paddq	%mm0,%mm3
105	movd	4(%esi,%ecx,4),%mm0
106	psrlq	$32,%mm2
107	movd	%mm3,28(%esp,%ecx,4)
108	psrlq	$32,%mm3
109	leal	1(%ecx),%ecx
110	cmpl	%ebx,%ecx
111	jl	.L0051st
112	pmuludq	%mm4,%mm0
113	pmuludq	%mm5,%mm1
114	paddq	%mm0,%mm2
115	paddq	%mm1,%mm3
116	movq	%mm2,%mm0
117	pand	%mm7,%mm0
118	paddq	%mm0,%mm3
119	movd	%mm3,28(%esp,%ecx,4)
120	psrlq	$32,%mm2
121	psrlq	$32,%mm3
122	paddq	%mm2,%mm3
123	movq	%mm3,32(%esp,%ebx,4)
124	incl	%edx
125.L006outer:
126	xorl	%ecx,%ecx
127	movd	(%edi,%edx,4),%mm4
128	movd	(%esi),%mm5
129	movd	32(%esp),%mm6
130	movd	(%ebp),%mm3
131	pmuludq	%mm4,%mm5
132	paddq	%mm6,%mm5
133	movq	%mm5,%mm0
134	movq	%mm5,%mm2
135	pand	%mm7,%mm0
136	pmuludq	20(%esp),%mm5
137	pmuludq	%mm5,%mm3
138	paddq	%mm0,%mm3
139	movd	36(%esp),%mm6
140	movd	4(%ebp),%mm1
141	movd	4(%esi),%mm0
142	psrlq	$32,%mm2
143	psrlq	$32,%mm3
144	paddq	%mm6,%mm2
145	incl	%ecx
146	decl	%ebx
147.L007inner:
148	pmuludq	%mm4,%mm0
149	pmuludq	%mm5,%mm1
150	paddq	%mm0,%mm2
151	paddq	%mm1,%mm3
152	movq	%mm2,%mm0
153	movd	36(%esp,%ecx,4),%mm6
154	pand	%mm7,%mm0
155	movd	4(%ebp,%ecx,4),%mm1
156	paddq	%mm0,%mm3
157	movd	4(%esi,%ecx,4),%mm0
158	psrlq	$32,%mm2
159	movd	%mm3,28(%esp,%ecx,4)
160	psrlq	$32,%mm3
161	paddq	%mm6,%mm2
162	decl	%ebx
163	leal	1(%ecx),%ecx
164	jnz	.L007inner
165	movl	%ecx,%ebx
166	pmuludq	%mm4,%mm0
167	pmuludq	%mm5,%mm1
168	paddq	%mm0,%mm2
169	paddq	%mm1,%mm3
170	movq	%mm2,%mm0
171	pand	%mm7,%mm0
172	paddq	%mm0,%mm3
173	movd	%mm3,28(%esp,%ecx,4)
174	psrlq	$32,%mm2
175	psrlq	$32,%mm3
176	movd	36(%esp,%ebx,4),%mm6
177	paddq	%mm2,%mm3
178	paddq	%mm6,%mm3
179	movq	%mm3,32(%esp,%ebx,4)
180	leal	1(%edx),%edx
181	cmpl	%ebx,%edx
182	jle	.L006outer
183	emms
184	jmp	.L008common_tail
185.align	16
186.L004non_sse2:
187	movl	8(%esp),%esi
188	leal	1(%ebx),%ebp
189	movl	12(%esp),%edi
190	xorl	%ecx,%ecx
191	movl	%esi,%edx
192	andl	$1,%ebp
193	subl	%edi,%edx
194	leal	4(%edi,%ebx,4),%eax
195	orl	%edx,%ebp
196	movl	(%edi),%edi
197	jz	.L009bn_sqr_mont
198	movl	%eax,28(%esp)
199	movl	(%esi),%eax
200	xorl	%edx,%edx
201.align	16
202.L010mull:
203	movl	%edx,%ebp
204	mull	%edi
205	addl	%eax,%ebp
206	leal	1(%ecx),%ecx
207	adcl	$0,%edx
208	movl	(%esi,%ecx,4),%eax
209	cmpl	%ebx,%ecx
210	movl	%ebp,28(%esp,%ecx,4)
211	jl	.L010mull
212	movl	%edx,%ebp
213	mull	%edi
214	movl	20(%esp),%edi
215	addl	%ebp,%eax
216	movl	16(%esp),%esi
217	adcl	$0,%edx
218	imull	32(%esp),%edi
219	movl	%eax,32(%esp,%ebx,4)
220	xorl	%ecx,%ecx
221	movl	%edx,36(%esp,%ebx,4)
222	movl	%ecx,40(%esp,%ebx,4)
223	movl	(%esi),%eax
224	mull	%edi
225	addl	32(%esp),%eax
226	movl	4(%esi),%eax
227	adcl	$0,%edx
228	incl	%ecx
229	jmp	.L0112ndmadd
230.align	16
231.L0121stmadd:
232	movl	%edx,%ebp
233	mull	%edi
234	addl	32(%esp,%ecx,4),%ebp
235	leal	1(%ecx),%ecx
236	adcl	$0,%edx
237	addl	%eax,%ebp
238	movl	(%esi,%ecx,4),%eax
239	adcl	$0,%edx
240	cmpl	%ebx,%ecx
241	movl	%ebp,28(%esp,%ecx,4)
242	jl	.L0121stmadd
243	movl	%edx,%ebp
244	mull	%edi
245	addl	32(%esp,%ebx,4),%eax
246	movl	20(%esp),%edi
247	adcl	$0,%edx
248	movl	16(%esp),%esi
249	addl	%eax,%ebp
250	adcl	$0,%edx
251	imull	32(%esp),%edi
252	xorl	%ecx,%ecx
253	addl	36(%esp,%ebx,4),%edx
254	movl	%ebp,32(%esp,%ebx,4)
255	adcl	$0,%ecx
256	movl	(%esi),%eax
257	movl	%edx,36(%esp,%ebx,4)
258	movl	%ecx,40(%esp,%ebx,4)
259	mull	%edi
260	addl	32(%esp),%eax
261	movl	4(%esi),%eax
262	adcl	$0,%edx
263	movl	$1,%ecx
264.align	16
265.L0112ndmadd:
266	movl	%edx,%ebp
267	mull	%edi
268	addl	32(%esp,%ecx,4),%ebp
269	leal	1(%ecx),%ecx
270	adcl	$0,%edx
271	addl	%eax,%ebp
272	movl	(%esi,%ecx,4),%eax
273	adcl	$0,%edx
274	cmpl	%ebx,%ecx
275	movl	%ebp,24(%esp,%ecx,4)
276	jl	.L0112ndmadd
277	movl	%edx,%ebp
278	mull	%edi
279	addl	32(%esp,%ebx,4),%ebp
280	adcl	$0,%edx
281	addl	%eax,%ebp
282	adcl	$0,%edx
283	movl	%ebp,28(%esp,%ebx,4)
284	xorl	%eax,%eax
285	movl	12(%esp),%ecx
286	addl	36(%esp,%ebx,4),%edx
287	adcl	40(%esp,%ebx,4),%eax
288	leal	4(%ecx),%ecx
289	movl	%edx,32(%esp,%ebx,4)
290	cmpl	28(%esp),%ecx
291	movl	%eax,36(%esp,%ebx,4)
292	je	.L008common_tail
293	movl	(%ecx),%edi
294	movl	8(%esp),%esi
295	movl	%ecx,12(%esp)
296	xorl	%ecx,%ecx
297	xorl	%edx,%edx
298	movl	(%esi),%eax
299	jmp	.L0121stmadd
300.align	16
301.L009bn_sqr_mont:
302	movl	%ebx,(%esp)
303	movl	%ecx,12(%esp)
304	movl	%edi,%eax
305	mull	%edi
306	movl	%eax,32(%esp)
307	movl	%edx,%ebx
308	shrl	$1,%edx
309	andl	$1,%ebx
310	incl	%ecx
311.align	16
312.L013sqr:
313	movl	(%esi,%ecx,4),%eax
314	movl	%edx,%ebp
315	mull	%edi
316	addl	%ebp,%eax
317	leal	1(%ecx),%ecx
318	adcl	$0,%edx
319	leal	(%ebx,%eax,2),%ebp
320	shrl	$31,%eax
321	cmpl	(%esp),%ecx
322	movl	%eax,%ebx
323	movl	%ebp,28(%esp,%ecx,4)
324	jl	.L013sqr
325	movl	(%esi,%ecx,4),%eax
326	movl	%edx,%ebp
327	mull	%edi
328	addl	%ebp,%eax
329	movl	20(%esp),%edi
330	adcl	$0,%edx
331	movl	16(%esp),%esi
332	leal	(%ebx,%eax,2),%ebp
333	imull	32(%esp),%edi
334	shrl	$31,%eax
335	movl	%ebp,32(%esp,%ecx,4)
336	leal	(%eax,%edx,2),%ebp
337	movl	(%esi),%eax
338	shrl	$31,%edx
339	movl	%ebp,36(%esp,%ecx,4)
340	movl	%edx,40(%esp,%ecx,4)
341	mull	%edi
342	addl	32(%esp),%eax
343	movl	%ecx,%ebx
344	adcl	$0,%edx
345	movl	4(%esi),%eax
346	movl	$1,%ecx
347.align	16
348.L0143rdmadd:
349	movl	%edx,%ebp
350	mull	%edi
351	addl	32(%esp,%ecx,4),%ebp
352	adcl	$0,%edx
353	addl	%eax,%ebp
354	movl	4(%esi,%ecx,4),%eax
355	adcl	$0,%edx
356	movl	%ebp,28(%esp,%ecx,4)
357	movl	%edx,%ebp
358	mull	%edi
359	addl	36(%esp,%ecx,4),%ebp
360	leal	2(%ecx),%ecx
361	adcl	$0,%edx
362	addl	%eax,%ebp
363	movl	(%esi,%ecx,4),%eax
364	adcl	$0,%edx
365	cmpl	%ebx,%ecx
366	movl	%ebp,24(%esp,%ecx,4)
367	jl	.L0143rdmadd
368	movl	%edx,%ebp
369	mull	%edi
370	addl	32(%esp,%ebx,4),%ebp
371	adcl	$0,%edx
372	addl	%eax,%ebp
373	adcl	$0,%edx
374	movl	%ebp,28(%esp,%ebx,4)
375	movl	12(%esp),%ecx
376	xorl	%eax,%eax
377	movl	8(%esp),%esi
378	addl	36(%esp,%ebx,4),%edx
379	adcl	40(%esp,%ebx,4),%eax
380	movl	%edx,32(%esp,%ebx,4)
381	cmpl	%ebx,%ecx
382	movl	%eax,36(%esp,%ebx,4)
383	je	.L008common_tail
384	movl	4(%esi,%ecx,4),%edi
385	leal	1(%ecx),%ecx
386	movl	%edi,%eax
387	movl	%ecx,12(%esp)
388	mull	%edi
389	addl	32(%esp,%ecx,4),%eax
390	adcl	$0,%edx
391	movl	%eax,32(%esp,%ecx,4)
392	xorl	%ebp,%ebp
393	cmpl	%ebx,%ecx
394	leal	1(%ecx),%ecx
395	je	.L015sqrlast
396	movl	%edx,%ebx
397	shrl	$1,%edx
398	andl	$1,%ebx
399.align	16
400.L016sqradd:
401	movl	(%esi,%ecx,4),%eax
402	movl	%edx,%ebp
403	mull	%edi
404	addl	%ebp,%eax
405	leal	(%eax,%eax,1),%ebp
406	adcl	$0,%edx
407	shrl	$31,%eax
408	addl	32(%esp,%ecx,4),%ebp
409	leal	1(%ecx),%ecx
410	adcl	$0,%eax
411	addl	%ebx,%ebp
412	adcl	$0,%eax
413	cmpl	(%esp),%ecx
414	movl	%ebp,28(%esp,%ecx,4)
415	movl	%eax,%ebx
416	jle	.L016sqradd
417	movl	%edx,%ebp
418	addl	%edx,%edx
419	shrl	$31,%ebp
420	addl	%ebx,%edx
421	adcl	$0,%ebp
422.L015sqrlast:
423	movl	20(%esp),%edi
424	movl	16(%esp),%esi
425	imull	32(%esp),%edi
426	addl	32(%esp,%ecx,4),%edx
427	movl	(%esi),%eax
428	adcl	$0,%ebp
429	movl	%edx,32(%esp,%ecx,4)
430	movl	%ebp,36(%esp,%ecx,4)
431	mull	%edi
432	addl	32(%esp),%eax
433	leal	-1(%ecx),%ebx
434	adcl	$0,%edx
435	movl	$1,%ecx
436	movl	4(%esi),%eax
437	jmp	.L0143rdmadd
438.align	16
439.L008common_tail:
440	movl	16(%esp),%ebp
441	movl	4(%esp),%edi
442	leal	32(%esp),%esi
443	movl	(%esi),%eax
444	movl	%ebx,%ecx
445	xorl	%edx,%edx
446.align	16
447.L017sub:
448	sbbl	(%ebp,%edx,4),%eax
449	movl	%eax,(%edi,%edx,4)
450	decl	%ecx
451	movl	4(%esi,%edx,4),%eax
452	leal	1(%edx),%edx
453	jge	.L017sub
454	sbbl	$0,%eax
455	movl	$-1,%edx
456	xorl	%eax,%edx
457	jmp	.L018copy
458.align	16
459.L018copy:
460	movl	32(%esp,%ebx,4),%esi
461	movl	(%edi,%ebx,4),%ebp
462	movl	%ecx,32(%esp,%ebx,4)
463	andl	%eax,%esi
464	andl	%edx,%ebp
465	orl	%esi,%ebp
466	movl	%ebp,(%edi,%ebx,4)
467	decl	%ebx
468	jge	.L018copy
469	movl	24(%esp),%esp
470	movl	$1,%eax
471.L000just_leave:
472	popl	%edi
473	popl	%esi
474	popl	%ebx
475	popl	%ebp
476	ret
477.size	bn_mul_mont,.-.L_bn_mul_mont_begin
478.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
479.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
480.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
481.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
482.byte	111,114,103,62,0
483#endif
484.section	.note.GNU-stack,"",@progbits
485