• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__,obj
8section	code	use32 class=code align=64
9%elifidn __OUTPUT_FORMAT__,win32
10$@feat.00 equ 1
11section	.text	code align=64
12%else
13section	.text	code
14%endif
15;extern	_OPENSSL_ia32cap_P
16global	_bn_mul_mont
17align	16
18_bn_mul_mont:
19L$_bn_mul_mont_begin:
20	push	ebp
21	push	ebx
22	push	esi
23	push	edi
24	xor	eax,eax
25	mov	edi,DWORD [40+esp]
26	cmp	edi,4
27	jl	NEAR L$000just_leave
28	lea	esi,[20+esp]
29	lea	edx,[24+esp]
30	add	edi,2
31	neg	edi
32	lea	ebp,[edi*4+esp-32]
33	neg	edi
34	mov	eax,ebp
35	sub	eax,edx
36	and	eax,2047
37	sub	ebp,eax
38	xor	edx,ebp
39	and	edx,2048
40	xor	edx,2048
41	sub	ebp,edx
42	and	ebp,-64
43	mov	eax,esp
44	sub	eax,ebp
45	and	eax,-4096
46	mov	edx,esp
47	lea	esp,[eax*1+ebp]
48	mov	eax,DWORD [esp]
49	cmp	esp,ebp
50	ja	NEAR L$001page_walk
51	jmp	NEAR L$002page_walk_done
52align	16
53L$001page_walk:
54	lea	esp,[esp-4096]
55	mov	eax,DWORD [esp]
56	cmp	esp,ebp
57	ja	NEAR L$001page_walk
58L$002page_walk_done:
59	mov	eax,DWORD [esi]
60	mov	ebx,DWORD [4+esi]
61	mov	ecx,DWORD [8+esi]
62	mov	ebp,DWORD [12+esi]
63	mov	esi,DWORD [16+esi]
64	mov	esi,DWORD [esi]
65	mov	DWORD [4+esp],eax
66	mov	DWORD [8+esp],ebx
67	mov	DWORD [12+esp],ecx
68	mov	DWORD [16+esp],ebp
69	mov	DWORD [20+esp],esi
70	lea	ebx,[edi-3]
71	mov	DWORD [24+esp],edx
72	lea	eax,[_OPENSSL_ia32cap_P]
73	bt	DWORD [eax],26
74	jnc	NEAR L$003non_sse2
75	mov	eax,-1
76	movd	mm7,eax
77	mov	esi,DWORD [8+esp]
78	mov	edi,DWORD [12+esp]
79	mov	ebp,DWORD [16+esp]
80	xor	edx,edx
81	xor	ecx,ecx
82	movd	mm4,DWORD [edi]
83	movd	mm5,DWORD [esi]
84	movd	mm3,DWORD [ebp]
85	pmuludq	mm5,mm4
86	movq	mm2,mm5
87	movq	mm0,mm5
88	pand	mm0,mm7
89	pmuludq	mm5,[20+esp]
90	pmuludq	mm3,mm5
91	paddq	mm3,mm0
92	movd	mm1,DWORD [4+ebp]
93	movd	mm0,DWORD [4+esi]
94	psrlq	mm2,32
95	psrlq	mm3,32
96	inc	ecx
97align	16
98L$0041st:
99	pmuludq	mm0,mm4
100	pmuludq	mm1,mm5
101	paddq	mm2,mm0
102	paddq	mm3,mm1
103	movq	mm0,mm2
104	pand	mm0,mm7
105	movd	mm1,DWORD [4+ecx*4+ebp]
106	paddq	mm3,mm0
107	movd	mm0,DWORD [4+ecx*4+esi]
108	psrlq	mm2,32
109	movd	DWORD [28+ecx*4+esp],mm3
110	psrlq	mm3,32
111	lea	ecx,[1+ecx]
112	cmp	ecx,ebx
113	jl	NEAR L$0041st
114	pmuludq	mm0,mm4
115	pmuludq	mm1,mm5
116	paddq	mm2,mm0
117	paddq	mm3,mm1
118	movq	mm0,mm2
119	pand	mm0,mm7
120	paddq	mm3,mm0
121	movd	DWORD [28+ecx*4+esp],mm3
122	psrlq	mm2,32
123	psrlq	mm3,32
124	paddq	mm3,mm2
125	movq	[32+ebx*4+esp],mm3
126	inc	edx
127L$005outer:
128	xor	ecx,ecx
129	movd	mm4,DWORD [edx*4+edi]
130	movd	mm5,DWORD [esi]
131	movd	mm6,DWORD [32+esp]
132	movd	mm3,DWORD [ebp]
133	pmuludq	mm5,mm4
134	paddq	mm5,mm6
135	movq	mm0,mm5
136	movq	mm2,mm5
137	pand	mm0,mm7
138	pmuludq	mm5,[20+esp]
139	pmuludq	mm3,mm5
140	paddq	mm3,mm0
141	movd	mm6,DWORD [36+esp]
142	movd	mm1,DWORD [4+ebp]
143	movd	mm0,DWORD [4+esi]
144	psrlq	mm2,32
145	psrlq	mm3,32
146	paddq	mm2,mm6
147	inc	ecx
148	dec	ebx
149L$006inner:
150	pmuludq	mm0,mm4
151	pmuludq	mm1,mm5
152	paddq	mm2,mm0
153	paddq	mm3,mm1
154	movq	mm0,mm2
155	movd	mm6,DWORD [36+ecx*4+esp]
156	pand	mm0,mm7
157	movd	mm1,DWORD [4+ecx*4+ebp]
158	paddq	mm3,mm0
159	movd	mm0,DWORD [4+ecx*4+esi]
160	psrlq	mm2,32
161	movd	DWORD [28+ecx*4+esp],mm3
162	psrlq	mm3,32
163	paddq	mm2,mm6
164	dec	ebx
165	lea	ecx,[1+ecx]
166	jnz	NEAR L$006inner
167	mov	ebx,ecx
168	pmuludq	mm0,mm4
169	pmuludq	mm1,mm5
170	paddq	mm2,mm0
171	paddq	mm3,mm1
172	movq	mm0,mm2
173	pand	mm0,mm7
174	paddq	mm3,mm0
175	movd	DWORD [28+ecx*4+esp],mm3
176	psrlq	mm2,32
177	psrlq	mm3,32
178	movd	mm6,DWORD [36+ebx*4+esp]
179	paddq	mm3,mm2
180	paddq	mm3,mm6
181	movq	[32+ebx*4+esp],mm3
182	lea	edx,[1+edx]
183	cmp	edx,ebx
184	jle	NEAR L$005outer
185	emms
186	jmp	NEAR L$007common_tail
187align	16
188L$003non_sse2:
189	mov	esi,DWORD [8+esp]
190	lea	ebp,[1+ebx]
191	mov	edi,DWORD [12+esp]
192	xor	ecx,ecx
193	mov	edx,esi
194	and	ebp,1
195	sub	edx,edi
196	lea	eax,[4+ebx*4+edi]
197	or	ebp,edx
198	mov	edi,DWORD [edi]
199	jz	NEAR L$008bn_sqr_mont
200	mov	DWORD [28+esp],eax
201	mov	eax,DWORD [esi]
202	xor	edx,edx
203align	16
204L$009mull:
205	mov	ebp,edx
206	mul	edi
207	add	ebp,eax
208	lea	ecx,[1+ecx]
209	adc	edx,0
210	mov	eax,DWORD [ecx*4+esi]
211	cmp	ecx,ebx
212	mov	DWORD [28+ecx*4+esp],ebp
213	jl	NEAR L$009mull
214	mov	ebp,edx
215	mul	edi
216	mov	edi,DWORD [20+esp]
217	add	eax,ebp
218	mov	esi,DWORD [16+esp]
219	adc	edx,0
220	imul	edi,DWORD [32+esp]
221	mov	DWORD [32+ebx*4+esp],eax
222	xor	ecx,ecx
223	mov	DWORD [36+ebx*4+esp],edx
224	mov	DWORD [40+ebx*4+esp],ecx
225	mov	eax,DWORD [esi]
226	mul	edi
227	add	eax,DWORD [32+esp]
228	mov	eax,DWORD [4+esi]
229	adc	edx,0
230	inc	ecx
231	jmp	NEAR L$0102ndmadd
232align	16
233L$0111stmadd:
234	mov	ebp,edx
235	mul	edi
236	add	ebp,DWORD [32+ecx*4+esp]
237	lea	ecx,[1+ecx]
238	adc	edx,0
239	add	ebp,eax
240	mov	eax,DWORD [ecx*4+esi]
241	adc	edx,0
242	cmp	ecx,ebx
243	mov	DWORD [28+ecx*4+esp],ebp
244	jl	NEAR L$0111stmadd
245	mov	ebp,edx
246	mul	edi
247	add	eax,DWORD [32+ebx*4+esp]
248	mov	edi,DWORD [20+esp]
249	adc	edx,0
250	mov	esi,DWORD [16+esp]
251	add	ebp,eax
252	adc	edx,0
253	imul	edi,DWORD [32+esp]
254	xor	ecx,ecx
255	add	edx,DWORD [36+ebx*4+esp]
256	mov	DWORD [32+ebx*4+esp],ebp
257	adc	ecx,0
258	mov	eax,DWORD [esi]
259	mov	DWORD [36+ebx*4+esp],edx
260	mov	DWORD [40+ebx*4+esp],ecx
261	mul	edi
262	add	eax,DWORD [32+esp]
263	mov	eax,DWORD [4+esi]
264	adc	edx,0
265	mov	ecx,1
266align	16
267L$0102ndmadd:
268	mov	ebp,edx
269	mul	edi
270	add	ebp,DWORD [32+ecx*4+esp]
271	lea	ecx,[1+ecx]
272	adc	edx,0
273	add	ebp,eax
274	mov	eax,DWORD [ecx*4+esi]
275	adc	edx,0
276	cmp	ecx,ebx
277	mov	DWORD [24+ecx*4+esp],ebp
278	jl	NEAR L$0102ndmadd
279	mov	ebp,edx
280	mul	edi
281	add	ebp,DWORD [32+ebx*4+esp]
282	adc	edx,0
283	add	ebp,eax
284	adc	edx,0
285	mov	DWORD [28+ebx*4+esp],ebp
286	xor	eax,eax
287	mov	ecx,DWORD [12+esp]
288	add	edx,DWORD [36+ebx*4+esp]
289	adc	eax,DWORD [40+ebx*4+esp]
290	lea	ecx,[4+ecx]
291	mov	DWORD [32+ebx*4+esp],edx
292	cmp	ecx,DWORD [28+esp]
293	mov	DWORD [36+ebx*4+esp],eax
294	je	NEAR L$007common_tail
295	mov	edi,DWORD [ecx]
296	mov	esi,DWORD [8+esp]
297	mov	DWORD [12+esp],ecx
298	xor	ecx,ecx
299	xor	edx,edx
300	mov	eax,DWORD [esi]
301	jmp	NEAR L$0111stmadd
302align	16
303L$008bn_sqr_mont:
304	mov	DWORD [esp],ebx
305	mov	DWORD [12+esp],ecx
306	mov	eax,edi
307	mul	edi
308	mov	DWORD [32+esp],eax
309	mov	ebx,edx
310	shr	edx,1
311	and	ebx,1
312	inc	ecx
313align	16
314L$012sqr:
315	mov	eax,DWORD [ecx*4+esi]
316	mov	ebp,edx
317	mul	edi
318	add	eax,ebp
319	lea	ecx,[1+ecx]
320	adc	edx,0
321	lea	ebp,[eax*2+ebx]
322	shr	eax,31
323	cmp	ecx,DWORD [esp]
324	mov	ebx,eax
325	mov	DWORD [28+ecx*4+esp],ebp
326	jl	NEAR L$012sqr
327	mov	eax,DWORD [ecx*4+esi]
328	mov	ebp,edx
329	mul	edi
330	add	eax,ebp
331	mov	edi,DWORD [20+esp]
332	adc	edx,0
333	mov	esi,DWORD [16+esp]
334	lea	ebp,[eax*2+ebx]
335	imul	edi,DWORD [32+esp]
336	shr	eax,31
337	mov	DWORD [32+ecx*4+esp],ebp
338	lea	ebp,[edx*2+eax]
339	mov	eax,DWORD [esi]
340	shr	edx,31
341	mov	DWORD [36+ecx*4+esp],ebp
342	mov	DWORD [40+ecx*4+esp],edx
343	mul	edi
344	add	eax,DWORD [32+esp]
345	mov	ebx,ecx
346	adc	edx,0
347	mov	eax,DWORD [4+esi]
348	mov	ecx,1
349align	16
350L$0133rdmadd:
351	mov	ebp,edx
352	mul	edi
353	add	ebp,DWORD [32+ecx*4+esp]
354	adc	edx,0
355	add	ebp,eax
356	mov	eax,DWORD [4+ecx*4+esi]
357	adc	edx,0
358	mov	DWORD [28+ecx*4+esp],ebp
359	mov	ebp,edx
360	mul	edi
361	add	ebp,DWORD [36+ecx*4+esp]
362	lea	ecx,[2+ecx]
363	adc	edx,0
364	add	ebp,eax
365	mov	eax,DWORD [ecx*4+esi]
366	adc	edx,0
367	cmp	ecx,ebx
368	mov	DWORD [24+ecx*4+esp],ebp
369	jl	NEAR L$0133rdmadd
370	mov	ebp,edx
371	mul	edi
372	add	ebp,DWORD [32+ebx*4+esp]
373	adc	edx,0
374	add	ebp,eax
375	adc	edx,0
376	mov	DWORD [28+ebx*4+esp],ebp
377	mov	ecx,DWORD [12+esp]
378	xor	eax,eax
379	mov	esi,DWORD [8+esp]
380	add	edx,DWORD [36+ebx*4+esp]
381	adc	eax,DWORD [40+ebx*4+esp]
382	mov	DWORD [32+ebx*4+esp],edx
383	cmp	ecx,ebx
384	mov	DWORD [36+ebx*4+esp],eax
385	je	NEAR L$007common_tail
386	mov	edi,DWORD [4+ecx*4+esi]
387	lea	ecx,[1+ecx]
388	mov	eax,edi
389	mov	DWORD [12+esp],ecx
390	mul	edi
391	add	eax,DWORD [32+ecx*4+esp]
392	adc	edx,0
393	mov	DWORD [32+ecx*4+esp],eax
394	xor	ebp,ebp
395	cmp	ecx,ebx
396	lea	ecx,[1+ecx]
397	je	NEAR L$014sqrlast
398	mov	ebx,edx
399	shr	edx,1
400	and	ebx,1
401align	16
402L$015sqradd:
403	mov	eax,DWORD [ecx*4+esi]
404	mov	ebp,edx
405	mul	edi
406	add	eax,ebp
407	lea	ebp,[eax*1+eax]
408	adc	edx,0
409	shr	eax,31
410	add	ebp,DWORD [32+ecx*4+esp]
411	lea	ecx,[1+ecx]
412	adc	eax,0
413	add	ebp,ebx
414	adc	eax,0
415	cmp	ecx,DWORD [esp]
416	mov	DWORD [28+ecx*4+esp],ebp
417	mov	ebx,eax
418	jle	NEAR L$015sqradd
419	mov	ebp,edx
420	add	edx,edx
421	shr	ebp,31
422	add	edx,ebx
423	adc	ebp,0
424L$014sqrlast:
425	mov	edi,DWORD [20+esp]
426	mov	esi,DWORD [16+esp]
427	imul	edi,DWORD [32+esp]
428	add	edx,DWORD [32+ecx*4+esp]
429	mov	eax,DWORD [esi]
430	adc	ebp,0
431	mov	DWORD [32+ecx*4+esp],edx
432	mov	DWORD [36+ecx*4+esp],ebp
433	mul	edi
434	add	eax,DWORD [32+esp]
435	lea	ebx,[ecx-1]
436	adc	edx,0
437	mov	ecx,1
438	mov	eax,DWORD [4+esi]
439	jmp	NEAR L$0133rdmadd
440align	16
441L$007common_tail:
442	mov	ebp,DWORD [16+esp]
443	mov	edi,DWORD [4+esp]
444	lea	esi,[32+esp]
445	mov	eax,DWORD [esi]
446	mov	ecx,ebx
447	xor	edx,edx
448align	16
449L$016sub:
450	sbb	eax,DWORD [edx*4+ebp]
451	mov	DWORD [edx*4+edi],eax
452	dec	ecx
453	mov	eax,DWORD [4+edx*4+esi]
454	lea	edx,[1+edx]
455	jge	NEAR L$016sub
456	sbb	eax,0
457	mov	edx,-1
458	xor	edx,eax
459	jmp	NEAR L$017copy
460align	16
461L$017copy:
462	mov	esi,DWORD [32+ebx*4+esp]
463	mov	ebp,DWORD [ebx*4+edi]
464	mov	DWORD [32+ebx*4+esp],ecx
465	and	esi,eax
466	and	ebp,edx
467	or	ebp,esi
468	mov	DWORD [ebx*4+edi],ebp
469	dec	ebx
470	jge	NEAR L$017copy
471	mov	esp,DWORD [24+esp]
472	mov	eax,1
473L$000just_leave:
474	pop	edi
475	pop	esi
476	pop	ebx
477	pop	ebp
478	ret
479db	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
480db	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
481db	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
482db	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
483db	111,114,103,62,0
484segment	.bss
485common	_OPENSSL_ia32cap_P 16
486