• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1%ifidn __OUTPUT_FORMAT__,obj
2section	code	use32 class=code align=64
3%elifidn __OUTPUT_FORMAT__,win32
4$@feat.00 equ 1
5section	.text	code align=64
6%else
7section	.text	code
8%endif
9;extern	_OPENSSL_ia32cap_P
10align	16
11__mul_1x1_mmx:
12	sub	esp,36
13	mov	ecx,eax
14	lea	edx,[eax*1+eax]
15	and	ecx,1073741823
16	lea	ebp,[edx*1+edx]
17	mov	DWORD [esp],0
18	and	edx,2147483647
19	movd	mm2,eax
20	movd	mm3,ebx
21	mov	DWORD [4+esp],ecx
22	xor	ecx,edx
23	pxor	mm5,mm5
24	pxor	mm4,mm4
25	mov	DWORD [8+esp],edx
26	xor	edx,ebp
27	mov	DWORD [12+esp],ecx
28	pcmpgtd	mm5,mm2
29	paddd	mm2,mm2
30	xor	ecx,edx
31	mov	DWORD [16+esp],ebp
32	xor	ebp,edx
33	pand	mm5,mm3
34	pcmpgtd	mm4,mm2
35	mov	DWORD [20+esp],ecx
36	xor	ebp,ecx
37	psllq	mm5,31
38	pand	mm4,mm3
39	mov	DWORD [24+esp],edx
40	mov	esi,7
41	mov	DWORD [28+esp],ebp
42	mov	ebp,esi
43	and	esi,ebx
44	shr	ebx,3
45	mov	edi,ebp
46	psllq	mm4,30
47	and	edi,ebx
48	shr	ebx,3
49	movd	mm0,DWORD [esi*4+esp]
50	mov	esi,ebp
51	and	esi,ebx
52	shr	ebx,3
53	movd	mm2,DWORD [edi*4+esp]
54	mov	edi,ebp
55	psllq	mm2,3
56	and	edi,ebx
57	shr	ebx,3
58	pxor	mm0,mm2
59	movd	mm1,DWORD [esi*4+esp]
60	mov	esi,ebp
61	psllq	mm1,6
62	and	esi,ebx
63	shr	ebx,3
64	pxor	mm0,mm1
65	movd	mm2,DWORD [edi*4+esp]
66	mov	edi,ebp
67	psllq	mm2,9
68	and	edi,ebx
69	shr	ebx,3
70	pxor	mm0,mm2
71	movd	mm1,DWORD [esi*4+esp]
72	mov	esi,ebp
73	psllq	mm1,12
74	and	esi,ebx
75	shr	ebx,3
76	pxor	mm0,mm1
77	movd	mm2,DWORD [edi*4+esp]
78	mov	edi,ebp
79	psllq	mm2,15
80	and	edi,ebx
81	shr	ebx,3
82	pxor	mm0,mm2
83	movd	mm1,DWORD [esi*4+esp]
84	mov	esi,ebp
85	psllq	mm1,18
86	and	esi,ebx
87	shr	ebx,3
88	pxor	mm0,mm1
89	movd	mm2,DWORD [edi*4+esp]
90	mov	edi,ebp
91	psllq	mm2,21
92	and	edi,ebx
93	shr	ebx,3
94	pxor	mm0,mm2
95	movd	mm1,DWORD [esi*4+esp]
96	mov	esi,ebp
97	psllq	mm1,24
98	and	esi,ebx
99	shr	ebx,3
100	pxor	mm0,mm1
101	movd	mm2,DWORD [edi*4+esp]
102	pxor	mm0,mm4
103	psllq	mm2,27
104	pxor	mm0,mm2
105	movd	mm1,DWORD [esi*4+esp]
106	pxor	mm0,mm5
107	psllq	mm1,30
108	add	esp,36
109	pxor	mm0,mm1
110	ret
111align	16
112__mul_1x1_ialu:
113	sub	esp,36
114	mov	ecx,eax
115	lea	edx,[eax*1+eax]
116	lea	ebp,[eax*4]
117	and	ecx,1073741823
118	lea	edi,[eax*1+eax]
119	sar	eax,31
120	mov	DWORD [esp],0
121	and	edx,2147483647
122	mov	DWORD [4+esp],ecx
123	xor	ecx,edx
124	mov	DWORD [8+esp],edx
125	xor	edx,ebp
126	mov	DWORD [12+esp],ecx
127	xor	ecx,edx
128	mov	DWORD [16+esp],ebp
129	xor	ebp,edx
130	mov	DWORD [20+esp],ecx
131	xor	ebp,ecx
132	sar	edi,31
133	and	eax,ebx
134	mov	DWORD [24+esp],edx
135	and	edi,ebx
136	mov	DWORD [28+esp],ebp
137	mov	edx,eax
138	shl	eax,31
139	mov	ecx,edi
140	shr	edx,1
141	mov	esi,7
142	shl	edi,30
143	and	esi,ebx
144	shr	ecx,2
145	xor	eax,edi
146	shr	ebx,3
147	mov	edi,7
148	and	edi,ebx
149	shr	ebx,3
150	xor	edx,ecx
151	xor	eax,DWORD [esi*4+esp]
152	mov	esi,7
153	and	esi,ebx
154	shr	ebx,3
155	mov	ebp,DWORD [edi*4+esp]
156	mov	edi,7
157	mov	ecx,ebp
158	shl	ebp,3
159	and	edi,ebx
160	shr	ecx,29
161	xor	eax,ebp
162	shr	ebx,3
163	xor	edx,ecx
164	mov	ecx,DWORD [esi*4+esp]
165	mov	esi,7
166	mov	ebp,ecx
167	shl	ecx,6
168	and	esi,ebx
169	shr	ebp,26
170	xor	eax,ecx
171	shr	ebx,3
172	xor	edx,ebp
173	mov	ebp,DWORD [edi*4+esp]
174	mov	edi,7
175	mov	ecx,ebp
176	shl	ebp,9
177	and	edi,ebx
178	shr	ecx,23
179	xor	eax,ebp
180	shr	ebx,3
181	xor	edx,ecx
182	mov	ecx,DWORD [esi*4+esp]
183	mov	esi,7
184	mov	ebp,ecx
185	shl	ecx,12
186	and	esi,ebx
187	shr	ebp,20
188	xor	eax,ecx
189	shr	ebx,3
190	xor	edx,ebp
191	mov	ebp,DWORD [edi*4+esp]
192	mov	edi,7
193	mov	ecx,ebp
194	shl	ebp,15
195	and	edi,ebx
196	shr	ecx,17
197	xor	eax,ebp
198	shr	ebx,3
199	xor	edx,ecx
200	mov	ecx,DWORD [esi*4+esp]
201	mov	esi,7
202	mov	ebp,ecx
203	shl	ecx,18
204	and	esi,ebx
205	shr	ebp,14
206	xor	eax,ecx
207	shr	ebx,3
208	xor	edx,ebp
209	mov	ebp,DWORD [edi*4+esp]
210	mov	edi,7
211	mov	ecx,ebp
212	shl	ebp,21
213	and	edi,ebx
214	shr	ecx,11
215	xor	eax,ebp
216	shr	ebx,3
217	xor	edx,ecx
218	mov	ecx,DWORD [esi*4+esp]
219	mov	esi,7
220	mov	ebp,ecx
221	shl	ecx,24
222	and	esi,ebx
223	shr	ebp,8
224	xor	eax,ecx
225	shr	ebx,3
226	xor	edx,ebp
227	mov	ebp,DWORD [edi*4+esp]
228	mov	ecx,ebp
229	shl	ebp,27
230	mov	edi,DWORD [esi*4+esp]
231	shr	ecx,5
232	mov	esi,edi
233	xor	eax,ebp
234	shl	edi,30
235	xor	edx,ecx
236	shr	esi,2
237	xor	eax,edi
238	xor	edx,esi
239	add	esp,36
240	ret
241global	_bn_GF2m_mul_2x2
242align	16
243_bn_GF2m_mul_2x2:
244L$_bn_GF2m_mul_2x2_begin:
245	lea	edx,[_OPENSSL_ia32cap_P]
246	mov	eax,DWORD [edx]
247	mov	edx,DWORD [4+edx]
248	test	eax,8388608
249	jz	NEAR L$000ialu
250	test	eax,16777216
251	jz	NEAR L$001mmx
252	test	edx,2
253	jz	NEAR L$001mmx
254	movups	xmm0,[8+esp]
255	shufps	xmm0,xmm0,177
256db	102,15,58,68,192,1
257	mov	eax,DWORD [4+esp]
258	movups	[eax],xmm0
259	ret
260align	16
261L$001mmx:
262	push	ebp
263	push	ebx
264	push	esi
265	push	edi
266	mov	eax,DWORD [24+esp]
267	mov	ebx,DWORD [32+esp]
268	call	__mul_1x1_mmx
269	movq	mm7,mm0
270	mov	eax,DWORD [28+esp]
271	mov	ebx,DWORD [36+esp]
272	call	__mul_1x1_mmx
273	movq	mm6,mm0
274	mov	eax,DWORD [24+esp]
275	mov	ebx,DWORD [32+esp]
276	xor	eax,DWORD [28+esp]
277	xor	ebx,DWORD [36+esp]
278	call	__mul_1x1_mmx
279	pxor	mm0,mm7
280	mov	eax,DWORD [20+esp]
281	pxor	mm0,mm6
282	movq	mm2,mm0
283	psllq	mm0,32
284	pop	edi
285	psrlq	mm2,32
286	pop	esi
287	pxor	mm0,mm6
288	pop	ebx
289	pxor	mm2,mm7
290	movq	[eax],mm0
291	pop	ebp
292	movq	[8+eax],mm2
293	emms
294	ret
295align	16
296L$000ialu:
297	push	ebp
298	push	ebx
299	push	esi
300	push	edi
301	sub	esp,20
302	mov	eax,DWORD [44+esp]
303	mov	ebx,DWORD [52+esp]
304	call	__mul_1x1_ialu
305	mov	DWORD [8+esp],eax
306	mov	DWORD [12+esp],edx
307	mov	eax,DWORD [48+esp]
308	mov	ebx,DWORD [56+esp]
309	call	__mul_1x1_ialu
310	mov	DWORD [esp],eax
311	mov	DWORD [4+esp],edx
312	mov	eax,DWORD [44+esp]
313	mov	ebx,DWORD [52+esp]
314	xor	eax,DWORD [48+esp]
315	xor	ebx,DWORD [56+esp]
316	call	__mul_1x1_ialu
317	mov	ebp,DWORD [40+esp]
318	mov	ebx,DWORD [esp]
319	mov	ecx,DWORD [4+esp]
320	mov	edi,DWORD [8+esp]
321	mov	esi,DWORD [12+esp]
322	xor	eax,edx
323	xor	edx,ecx
324	xor	eax,ebx
325	mov	DWORD [ebp],ebx
326	xor	edx,edi
327	mov	DWORD [12+ebp],esi
328	xor	eax,esi
329	add	esp,20
330	xor	edx,esi
331	pop	edi
332	xor	eax,edx
333	pop	esi
334	mov	DWORD [8+ebp],edx
335	pop	ebx
336	mov	DWORD [4+ebp],eax
337	pop	ebp
338	ret
339db	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
340db	99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
341db	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
342db	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
343db	62,0
344segment	.bss
345common	_OPENSSL_ia32cap_P 16
346