• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__, win32
8%ifidn __OUTPUT_FORMAT__,obj
9section	code	use32 class=code align=64
10%elifidn __OUTPUT_FORMAT__,win32
11$@feat.00 equ 1
12section	.text	code align=64
13%else
14section	.text	code
15%endif
16;extern	_OPENSSL_ia32cap_P
17global	_bn_mul_mont
18align	16
19_bn_mul_mont:
20L$_bn_mul_mont_begin:
21	push	ebp
22	push	ebx
23	push	esi
24	push	edi
25	xor	eax,eax
26	mov	edi,DWORD [40+esp]
27	cmp	edi,4
28	jl	NEAR L$000just_leave
29	lea	esi,[20+esp]
30	lea	edx,[24+esp]
31	add	edi,2
32	neg	edi
33	lea	ebp,[edi*4+esp-32]
34	neg	edi
35	mov	eax,ebp
36	sub	eax,edx
37	and	eax,2047
38	sub	ebp,eax
39	xor	edx,ebp
40	and	edx,2048
41	xor	edx,2048
42	sub	ebp,edx
43	and	ebp,-64
44	mov	eax,esp
45	sub	eax,ebp
46	and	eax,-4096
47	mov	edx,esp
48	lea	esp,[eax*1+ebp]
49	mov	eax,DWORD [esp]
50	cmp	esp,ebp
51	ja	NEAR L$001page_walk
52	jmp	NEAR L$002page_walk_done
53align	16
54L$001page_walk:
55	lea	esp,[esp-4096]
56	mov	eax,DWORD [esp]
57	cmp	esp,ebp
58	ja	NEAR L$001page_walk
59L$002page_walk_done:
60	mov	eax,DWORD [esi]
61	mov	ebx,DWORD [4+esi]
62	mov	ecx,DWORD [8+esi]
63	mov	ebp,DWORD [12+esi]
64	mov	esi,DWORD [16+esi]
65	mov	esi,DWORD [esi]
66	mov	DWORD [4+esp],eax
67	mov	DWORD [8+esp],ebx
68	mov	DWORD [12+esp],ecx
69	mov	DWORD [16+esp],ebp
70	mov	DWORD [20+esp],esi
71	lea	ebx,[edi-3]
72	mov	DWORD [24+esp],edx
73	lea	eax,[_OPENSSL_ia32cap_P]
74	bt	DWORD [eax],26
75	jnc	NEAR L$003non_sse2
76	mov	eax,-1
77	movd	mm7,eax
78	mov	esi,DWORD [8+esp]
79	mov	edi,DWORD [12+esp]
80	mov	ebp,DWORD [16+esp]
81	xor	edx,edx
82	xor	ecx,ecx
83	movd	mm4,DWORD [edi]
84	movd	mm5,DWORD [esi]
85	movd	mm3,DWORD [ebp]
86	pmuludq	mm5,mm4
87	movq	mm2,mm5
88	movq	mm0,mm5
89	pand	mm0,mm7
90	pmuludq	mm5,[20+esp]
91	pmuludq	mm3,mm5
92	paddq	mm3,mm0
93	movd	mm1,DWORD [4+ebp]
94	movd	mm0,DWORD [4+esi]
95	psrlq	mm2,32
96	psrlq	mm3,32
97	inc	ecx
98align	16
99L$0041st:
100	pmuludq	mm0,mm4
101	pmuludq	mm1,mm5
102	paddq	mm2,mm0
103	paddq	mm3,mm1
104	movq	mm0,mm2
105	pand	mm0,mm7
106	movd	mm1,DWORD [4+ecx*4+ebp]
107	paddq	mm3,mm0
108	movd	mm0,DWORD [4+ecx*4+esi]
109	psrlq	mm2,32
110	movd	DWORD [28+ecx*4+esp],mm3
111	psrlq	mm3,32
112	lea	ecx,[1+ecx]
113	cmp	ecx,ebx
114	jl	NEAR L$0041st
115	pmuludq	mm0,mm4
116	pmuludq	mm1,mm5
117	paddq	mm2,mm0
118	paddq	mm3,mm1
119	movq	mm0,mm2
120	pand	mm0,mm7
121	paddq	mm3,mm0
122	movd	DWORD [28+ecx*4+esp],mm3
123	psrlq	mm2,32
124	psrlq	mm3,32
125	paddq	mm3,mm2
126	movq	[32+ebx*4+esp],mm3
127	inc	edx
128L$005outer:
129	xor	ecx,ecx
130	movd	mm4,DWORD [edx*4+edi]
131	movd	mm5,DWORD [esi]
132	movd	mm6,DWORD [32+esp]
133	movd	mm3,DWORD [ebp]
134	pmuludq	mm5,mm4
135	paddq	mm5,mm6
136	movq	mm0,mm5
137	movq	mm2,mm5
138	pand	mm0,mm7
139	pmuludq	mm5,[20+esp]
140	pmuludq	mm3,mm5
141	paddq	mm3,mm0
142	movd	mm6,DWORD [36+esp]
143	movd	mm1,DWORD [4+ebp]
144	movd	mm0,DWORD [4+esi]
145	psrlq	mm2,32
146	psrlq	mm3,32
147	paddq	mm2,mm6
148	inc	ecx
149	dec	ebx
150L$006inner:
151	pmuludq	mm0,mm4
152	pmuludq	mm1,mm5
153	paddq	mm2,mm0
154	paddq	mm3,mm1
155	movq	mm0,mm2
156	movd	mm6,DWORD [36+ecx*4+esp]
157	pand	mm0,mm7
158	movd	mm1,DWORD [4+ecx*4+ebp]
159	paddq	mm3,mm0
160	movd	mm0,DWORD [4+ecx*4+esi]
161	psrlq	mm2,32
162	movd	DWORD [28+ecx*4+esp],mm3
163	psrlq	mm3,32
164	paddq	mm2,mm6
165	dec	ebx
166	lea	ecx,[1+ecx]
167	jnz	NEAR L$006inner
168	mov	ebx,ecx
169	pmuludq	mm0,mm4
170	pmuludq	mm1,mm5
171	paddq	mm2,mm0
172	paddq	mm3,mm1
173	movq	mm0,mm2
174	pand	mm0,mm7
175	paddq	mm3,mm0
176	movd	DWORD [28+ecx*4+esp],mm3
177	psrlq	mm2,32
178	psrlq	mm3,32
179	movd	mm6,DWORD [36+ebx*4+esp]
180	paddq	mm3,mm2
181	paddq	mm3,mm6
182	movq	[32+ebx*4+esp],mm3
183	lea	edx,[1+edx]
184	cmp	edx,ebx
185	jle	NEAR L$005outer
186	emms
187	jmp	NEAR L$007common_tail
188align	16
189L$003non_sse2:
190	mov	esi,DWORD [8+esp]
191	lea	ebp,[1+ebx]
192	mov	edi,DWORD [12+esp]
193	xor	ecx,ecx
194	mov	edx,esi
195	and	ebp,1
196	sub	edx,edi
197	lea	eax,[4+ebx*4+edi]
198	or	ebp,edx
199	mov	edi,DWORD [edi]
200	jz	NEAR L$008bn_sqr_mont
201	mov	DWORD [28+esp],eax
202	mov	eax,DWORD [esi]
203	xor	edx,edx
204align	16
205L$009mull:
206	mov	ebp,edx
207	mul	edi
208	add	ebp,eax
209	lea	ecx,[1+ecx]
210	adc	edx,0
211	mov	eax,DWORD [ecx*4+esi]
212	cmp	ecx,ebx
213	mov	DWORD [28+ecx*4+esp],ebp
214	jl	NEAR L$009mull
215	mov	ebp,edx
216	mul	edi
217	mov	edi,DWORD [20+esp]
218	add	eax,ebp
219	mov	esi,DWORD [16+esp]
220	adc	edx,0
221	imul	edi,DWORD [32+esp]
222	mov	DWORD [32+ebx*4+esp],eax
223	xor	ecx,ecx
224	mov	DWORD [36+ebx*4+esp],edx
225	mov	DWORD [40+ebx*4+esp],ecx
226	mov	eax,DWORD [esi]
227	mul	edi
228	add	eax,DWORD [32+esp]
229	mov	eax,DWORD [4+esi]
230	adc	edx,0
231	inc	ecx
232	jmp	NEAR L$0102ndmadd
233align	16
234L$0111stmadd:
235	mov	ebp,edx
236	mul	edi
237	add	ebp,DWORD [32+ecx*4+esp]
238	lea	ecx,[1+ecx]
239	adc	edx,0
240	add	ebp,eax
241	mov	eax,DWORD [ecx*4+esi]
242	adc	edx,0
243	cmp	ecx,ebx
244	mov	DWORD [28+ecx*4+esp],ebp
245	jl	NEAR L$0111stmadd
246	mov	ebp,edx
247	mul	edi
248	add	eax,DWORD [32+ebx*4+esp]
249	mov	edi,DWORD [20+esp]
250	adc	edx,0
251	mov	esi,DWORD [16+esp]
252	add	ebp,eax
253	adc	edx,0
254	imul	edi,DWORD [32+esp]
255	xor	ecx,ecx
256	add	edx,DWORD [36+ebx*4+esp]
257	mov	DWORD [32+ebx*4+esp],ebp
258	adc	ecx,0
259	mov	eax,DWORD [esi]
260	mov	DWORD [36+ebx*4+esp],edx
261	mov	DWORD [40+ebx*4+esp],ecx
262	mul	edi
263	add	eax,DWORD [32+esp]
264	mov	eax,DWORD [4+esi]
265	adc	edx,0
266	mov	ecx,1
267align	16
268L$0102ndmadd:
269	mov	ebp,edx
270	mul	edi
271	add	ebp,DWORD [32+ecx*4+esp]
272	lea	ecx,[1+ecx]
273	adc	edx,0
274	add	ebp,eax
275	mov	eax,DWORD [ecx*4+esi]
276	adc	edx,0
277	cmp	ecx,ebx
278	mov	DWORD [24+ecx*4+esp],ebp
279	jl	NEAR L$0102ndmadd
280	mov	ebp,edx
281	mul	edi
282	add	ebp,DWORD [32+ebx*4+esp]
283	adc	edx,0
284	add	ebp,eax
285	adc	edx,0
286	mov	DWORD [28+ebx*4+esp],ebp
287	xor	eax,eax
288	mov	ecx,DWORD [12+esp]
289	add	edx,DWORD [36+ebx*4+esp]
290	adc	eax,DWORD [40+ebx*4+esp]
291	lea	ecx,[4+ecx]
292	mov	DWORD [32+ebx*4+esp],edx
293	cmp	ecx,DWORD [28+esp]
294	mov	DWORD [36+ebx*4+esp],eax
295	je	NEAR L$007common_tail
296	mov	edi,DWORD [ecx]
297	mov	esi,DWORD [8+esp]
298	mov	DWORD [12+esp],ecx
299	xor	ecx,ecx
300	xor	edx,edx
301	mov	eax,DWORD [esi]
302	jmp	NEAR L$0111stmadd
303align	16
304L$008bn_sqr_mont:
305	mov	DWORD [esp],ebx
306	mov	DWORD [12+esp],ecx
307	mov	eax,edi
308	mul	edi
309	mov	DWORD [32+esp],eax
310	mov	ebx,edx
311	shr	edx,1
312	and	ebx,1
313	inc	ecx
314align	16
315L$012sqr:
316	mov	eax,DWORD [ecx*4+esi]
317	mov	ebp,edx
318	mul	edi
319	add	eax,ebp
320	lea	ecx,[1+ecx]
321	adc	edx,0
322	lea	ebp,[eax*2+ebx]
323	shr	eax,31
324	cmp	ecx,DWORD [esp]
325	mov	ebx,eax
326	mov	DWORD [28+ecx*4+esp],ebp
327	jl	NEAR L$012sqr
328	mov	eax,DWORD [ecx*4+esi]
329	mov	ebp,edx
330	mul	edi
331	add	eax,ebp
332	mov	edi,DWORD [20+esp]
333	adc	edx,0
334	mov	esi,DWORD [16+esp]
335	lea	ebp,[eax*2+ebx]
336	imul	edi,DWORD [32+esp]
337	shr	eax,31
338	mov	DWORD [32+ecx*4+esp],ebp
339	lea	ebp,[edx*2+eax]
340	mov	eax,DWORD [esi]
341	shr	edx,31
342	mov	DWORD [36+ecx*4+esp],ebp
343	mov	DWORD [40+ecx*4+esp],edx
344	mul	edi
345	add	eax,DWORD [32+esp]
346	mov	ebx,ecx
347	adc	edx,0
348	mov	eax,DWORD [4+esi]
349	mov	ecx,1
350align	16
351L$0133rdmadd:
352	mov	ebp,edx
353	mul	edi
354	add	ebp,DWORD [32+ecx*4+esp]
355	adc	edx,0
356	add	ebp,eax
357	mov	eax,DWORD [4+ecx*4+esi]
358	adc	edx,0
359	mov	DWORD [28+ecx*4+esp],ebp
360	mov	ebp,edx
361	mul	edi
362	add	ebp,DWORD [36+ecx*4+esp]
363	lea	ecx,[2+ecx]
364	adc	edx,0
365	add	ebp,eax
366	mov	eax,DWORD [ecx*4+esi]
367	adc	edx,0
368	cmp	ecx,ebx
369	mov	DWORD [24+ecx*4+esp],ebp
370	jl	NEAR L$0133rdmadd
371	mov	ebp,edx
372	mul	edi
373	add	ebp,DWORD [32+ebx*4+esp]
374	adc	edx,0
375	add	ebp,eax
376	adc	edx,0
377	mov	DWORD [28+ebx*4+esp],ebp
378	mov	ecx,DWORD [12+esp]
379	xor	eax,eax
380	mov	esi,DWORD [8+esp]
381	add	edx,DWORD [36+ebx*4+esp]
382	adc	eax,DWORD [40+ebx*4+esp]
383	mov	DWORD [32+ebx*4+esp],edx
384	cmp	ecx,ebx
385	mov	DWORD [36+ebx*4+esp],eax
386	je	NEAR L$007common_tail
387	mov	edi,DWORD [4+ecx*4+esi]
388	lea	ecx,[1+ecx]
389	mov	eax,edi
390	mov	DWORD [12+esp],ecx
391	mul	edi
392	add	eax,DWORD [32+ecx*4+esp]
393	adc	edx,0
394	mov	DWORD [32+ecx*4+esp],eax
395	xor	ebp,ebp
396	cmp	ecx,ebx
397	lea	ecx,[1+ecx]
398	je	NEAR L$014sqrlast
399	mov	ebx,edx
400	shr	edx,1
401	and	ebx,1
402align	16
403L$015sqradd:
404	mov	eax,DWORD [ecx*4+esi]
405	mov	ebp,edx
406	mul	edi
407	add	eax,ebp
408	lea	ebp,[eax*1+eax]
409	adc	edx,0
410	shr	eax,31
411	add	ebp,DWORD [32+ecx*4+esp]
412	lea	ecx,[1+ecx]
413	adc	eax,0
414	add	ebp,ebx
415	adc	eax,0
416	cmp	ecx,DWORD [esp]
417	mov	DWORD [28+ecx*4+esp],ebp
418	mov	ebx,eax
419	jle	NEAR L$015sqradd
420	mov	ebp,edx
421	add	edx,edx
422	shr	ebp,31
423	add	edx,ebx
424	adc	ebp,0
425L$014sqrlast:
426	mov	edi,DWORD [20+esp]
427	mov	esi,DWORD [16+esp]
428	imul	edi,DWORD [32+esp]
429	add	edx,DWORD [32+ecx*4+esp]
430	mov	eax,DWORD [esi]
431	adc	ebp,0
432	mov	DWORD [32+ecx*4+esp],edx
433	mov	DWORD [36+ecx*4+esp],ebp
434	mul	edi
435	add	eax,DWORD [32+esp]
436	lea	ebx,[ecx-1]
437	adc	edx,0
438	mov	ecx,1
439	mov	eax,DWORD [4+esi]
440	jmp	NEAR L$0133rdmadd
441align	16
442L$007common_tail:
443	mov	ebp,DWORD [16+esp]
444	mov	edi,DWORD [4+esp]
445	lea	esi,[32+esp]
446	mov	eax,DWORD [esi]
447	mov	ecx,ebx
448	xor	edx,edx
449align	16
450L$016sub:
451	sbb	eax,DWORD [edx*4+ebp]
452	mov	DWORD [edx*4+edi],eax
453	dec	ecx
454	mov	eax,DWORD [4+edx*4+esi]
455	lea	edx,[1+edx]
456	jge	NEAR L$016sub
457	sbb	eax,0
458	mov	edx,-1
459	xor	edx,eax
460	jmp	NEAR L$017copy
461align	16
462L$017copy:
463	mov	esi,DWORD [32+ebx*4+esp]
464	mov	ebp,DWORD [ebx*4+edi]
465	mov	DWORD [32+ebx*4+esp],ecx
466	and	esi,eax
467	and	ebp,edx
468	or	ebp,esi
469	mov	DWORD [ebx*4+edi],ebp
470	dec	ebx
471	jge	NEAR L$017copy
472	mov	esp,DWORD [24+esp]
473	mov	eax,1
474L$000just_leave:
475	pop	edi
476	pop	esi
477	pop	ebx
478	pop	ebp
479	ret
480db	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
481db	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
482db	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
483db	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
484db	111,114,103,62,0
485segment	.bss
486common	_OPENSSL_ia32cap_P 16
487%else
488; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
489ret
490%endif
491