• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env perl
2
3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4push(@INC, "${dir}perlasm", "perlasm");
5require "x86asm.pl";
6
7&asm_init($ARGV[0],"x86cpuid");
8
9for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
10
11&function_begin("OPENSSL_ia32_cpuid");
12	&xor	("edx","edx");
13	&pushf	();
14	&pop	("eax");
15	&mov	("ecx","eax");
16	&xor	("eax",1<<21);
17	&push	("eax");
18	&popf	();
19	&pushf	();
20	&pop	("eax");
21	&xor	("ecx","eax");
22	&bt	("ecx",21);
23	&jnc	(&label("done"));
24	&xor	("eax","eax");
25	&cpuid	();
26	&mov	("edi","eax");		# max value for standard query level
27
28	&xor	("eax","eax");
29	&cmp	("ebx",0x756e6547);	# "Genu"
30	&setne	(&LB("eax"));
31	&mov	("ebp","eax");
32	&cmp	("edx",0x49656e69);	# "ineI"
33	&setne	(&LB("eax"));
34	&or	("ebp","eax");
35	&cmp	("ecx",0x6c65746e);	# "ntel"
36	&setne	(&LB("eax"));
37	&or	("ebp","eax");		# 0 indicates Intel CPU
38	&jz	(&label("intel"));
39
40	&cmp	("ebx",0x68747541);	# "Auth"
41	&setne	(&LB("eax"));
42	&mov	("esi","eax");
43	&cmp	("edx",0x69746E65);	# "enti"
44	&setne	(&LB("eax"));
45	&or	("esi","eax");
46	&cmp	("ecx",0x444D4163);	# "cAMD"
47	&setne	(&LB("eax"));
48	&or	("esi","eax");		# 0 indicates AMD CPU
49	&jnz	(&label("intel"));
50
51	# AMD specific
52	&mov	("eax",0x80000000);
53	&cpuid	();
54	&cmp	("eax",0x80000008);
55	&jb	(&label("intel"));
56
57	&mov	("eax",0x80000008);
58	&cpuid	();
59	&movz	("esi",&LB("ecx"));	# number of cores - 1
60	&inc	("esi");		# number of cores
61
62	&mov	("eax",1);
63	&cpuid	();
64	&bt	("edx",28);
65	&jnc	(&label("done"));
66	&shr	("ebx",16);
67	&and	("ebx",0xff);
68	&cmp	("ebx","esi");
69	&ja	(&label("done"));
70	&and	("edx",0xefffffff);	# clear hyper-threading bit
71	&jmp	(&label("done"));
72
73&set_label("intel");
74	&cmp	("edi",4);
75	&mov	("edi",-1);
76	&jb	(&label("nocacheinfo"));
77
78	&mov	("eax",4);
79	&mov	("ecx",0);		# query L1D
80	&cpuid	();
81	&mov	("edi","eax");
82	&shr	("edi",14);
83	&and	("edi",0xfff);		# number of cores -1 per L1D
84
85&set_label("nocacheinfo");
86	&mov	("eax",1);
87	&cpuid	();
88	&cmp	("ebp",0);
89	&jne	(&label("notP4"));
90	&and	(&HB("eax"),15);	# familiy ID
91	&cmp	(&HB("eax"),15);	# P4?
92	&jne	(&label("notP4"));
93	&or	("edx",1<<20);		# use reserved bit to engage RC4_CHAR
94&set_label("notP4");
95	&bt	("edx",28);		# test hyper-threading bit
96	&jnc	(&label("done"));
97	&and	("edx",0xefffffff);
98	&cmp	("edi",0);
99	&je	(&label("done"));
100
101	&or	("edx",0x10000000);
102	&shr	("ebx",16);
103	&cmp	(&LB("ebx"),1);
104	&ja	(&label("done"));
105	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
106&set_label("done");
107	&mov	("eax","edx");
108	&mov	("edx","ecx");
109&function_end("OPENSSL_ia32_cpuid");
110
111&external_label("OPENSSL_ia32cap_P");
112
113&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
114	&xor	("eax","eax");
115	&xor	("edx","edx");
116	&picmeup("ecx","OPENSSL_ia32cap_P");
117	&bt	(&DWP(0,"ecx"),4);
118	&jnc	(&label("notsc"));
119	&rdtsc	();
120&set_label("notsc");
121	&ret	();
122&function_end_B("OPENSSL_rdtsc");
123
124# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
125# but it's safe to call it on any [supported] 32-bit platform...
126# Just check for [non-]zero return value...
127&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
128	&picmeup("ecx","OPENSSL_ia32cap_P");
129	&bt	(&DWP(0,"ecx"),4);
130	&jnc	(&label("nohalt"));	# no TSC
131
132	&data_word(0x9058900e);		# push %cs; pop %eax
133	&and	("eax",3);
134	&jnz	(&label("nohalt"));	# not enough privileges
135
136	&pushf	();
137	&pop	("eax")
138	&bt	("eax",9);
139	&jnc	(&label("nohalt"));	# interrupts are disabled
140
141	&rdtsc	();
142	&push	("edx");
143	&push	("eax");
144	&halt	();
145	&rdtsc	();
146
147	&sub	("eax",&DWP(0,"esp"));
148	&sbb	("edx",&DWP(4,"esp"));
149	&add	("esp",8);
150	&ret	();
151
152&set_label("nohalt");
153	&xor	("eax","eax");
154	&xor	("edx","edx");
155	&ret	();
156&function_end_B("OPENSSL_instrument_halt");
157
158# Essentially there is only one use for this function. Under DJGPP:
159#
160#	#include <go32.h>
161#	...
162#	i=OPENSSL_far_spin(_dos_ds,0x46c);
163#	...
164# to obtain the number of spins till closest timer interrupt.
165
166&function_begin_B("OPENSSL_far_spin");
167	&pushf	();
168	&pop	("eax")
169	&bt	("eax",9);
170	&jnc	(&label("nospin"));	# interrupts are disabled
171
172	&mov	("eax",&DWP(4,"esp"));
173	&mov	("ecx",&DWP(8,"esp"));
174	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
175	&xor	("eax","eax");
176	&mov	("edx",&DWP(0,"ecx"));
177	&jmp	(&label("spin"));
178
179	&align	(16);
180&set_label("spin");
181	&inc	("eax");
182	&cmp	("edx",&DWP(0,"ecx"));
183	&je	(&label("spin"));
184
185	&data_word (0x1f909090);	# pop	%ds
186	&ret	();
187
188&set_label("nospin");
189	&xor	("eax","eax");
190	&xor	("edx","edx");
191	&ret	();
192&function_end_B("OPENSSL_far_spin");
193
194&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
195	&xor	("eax","eax");
196	&xor	("edx","edx");
197	&picmeup("ecx","OPENSSL_ia32cap_P");
198	&mov	("ecx",&DWP(0,"ecx"));
199	&bt	(&DWP(0,"ecx"),1);
200	&jnc	(&label("no_x87"));
201	if ($sse2) {
202		&bt	(&DWP(0,"ecx"),26);
203		&jnc	(&label("no_sse2"));
204		&pxor	("xmm0","xmm0");
205		&pxor	("xmm1","xmm1");
206		&pxor	("xmm2","xmm2");
207		&pxor	("xmm3","xmm3");
208		&pxor	("xmm4","xmm4");
209		&pxor	("xmm5","xmm5");
210		&pxor	("xmm6","xmm6");
211		&pxor	("xmm7","xmm7");
212	&set_label("no_sse2");
213	}
214	# just a bunch of fldz to zap the fp/mm bank followed by finit...
215	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
216&set_label("no_x87");
217	&lea	("eax",&DWP(4,"esp"));
218	&ret	();
219&function_end_B("OPENSSL_wipe_cpu");
220
221&function_begin_B("OPENSSL_atomic_add");
222	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
223	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
224	&push	("ebx");
225	&nop	();
226	&mov	("eax",&DWP(0,"edx"));
227&set_label("spin");
228	&lea	("ebx",&DWP(0,"eax","ecx"));
229	&nop	();
230	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is envolved and is always reloaded
231	&jne	(&label("spin"));
232	&mov	("eax","ebx");	# OpenSSL expects the new value
233	&pop	("ebx");
234	&ret	();
235&function_end_B("OPENSSL_atomic_add");
236
237# This function can become handy under Win32 in situations when
238# we don't know which calling convention, __stdcall or __cdecl(*),
239# indirect callee is using. In C it can be deployed as
240#
241#ifdef OPENSSL_CPUID_OBJ
242#	type OPENSSL_indirect_call(void *f,...);
243#	...
244#	OPENSSL_indirect_call(func,[up to $max arguments]);
245#endif
246#
247# (*)	it's designed to work even for __fastcall if number of
248#	arguments is 1 or 2!
249&function_begin_B("OPENSSL_indirect_call");
250	{
251	my $i,$max=7;		# $max has to be chosen as 4*n-1
252				# in order to preserve eventual
253				# stack alignment
254	&push	("ebp");
255	&mov	("ebp","esp");
256	&sub	("esp",$max*4);
257	&mov	("ecx",&DWP(12,"ebp"));
258	&mov	(&DWP(0,"esp"),"ecx");
259	&mov	("edx",&DWP(16,"ebp"));
260	&mov	(&DWP(4,"esp"),"edx");
261	for($i=2;$i<$max;$i++)
262		{
263		# Some copies will be redundant/bogus...
264		&mov	("eax",&DWP(12+$i*4,"ebp"));
265		&mov	(&DWP(0+$i*4,"esp"),"eax");
266		}
267	&call_ptr	(&DWP(8,"ebp"));# make the call...
268	&mov	("esp","ebp");	# ... and just restore the stack pointer
269				# without paying attention to what we called,
270				# (__cdecl *func) or (__stdcall *one).
271	&pop	("ebp");
272	&ret	();
273	}
274&function_end_B("OPENSSL_indirect_call");
275
276&function_begin_B("OPENSSL_cleanse");
277	&mov	("edx",&wparam(0));
278	&mov	("ecx",&wparam(1));
279	&xor	("eax","eax");
280	&cmp	("ecx",7);
281	&jae	(&label("lot"));
282	&cmp	("ecx",0);
283	&je	(&label("ret"));
284&set_label("little");
285	&mov	(&BP(0,"edx"),"al");
286	&sub	("ecx",1);
287	&lea	("edx",&DWP(1,"edx"));
288	&jnz	(&label("little"));
289&set_label("ret");
290	&ret	();
291
292&set_label("lot",16);
293	&test	("edx",3);
294	&jz	(&label("aligned"));
295	&mov	(&BP(0,"edx"),"al");
296	&lea	("ecx",&DWP(-1,"ecx"));
297	&lea	("edx",&DWP(1,"edx"));
298	&jmp	(&label("lot"));
299&set_label("aligned");
300	&mov	(&DWP(0,"edx"),"eax");
301	&lea	("ecx",&DWP(-4,"ecx"));
302	&test	("ecx",-4);
303	&lea	("edx",&DWP(4,"edx"));
304	&jnz	(&label("aligned"));
305	&cmp	("ecx",0);
306	&jne	(&label("little"));
307	&ret	();
308&function_end_B("OPENSSL_cleanse");
309
310&initseg("OPENSSL_cpuid_setup");
311
312&asm_finish();
313