• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-
2  * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD: src/lib/msun/i387/fenv.c,v 1.2 2005/03/17 22:21:46 das Exp $
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/types.h>
31 #include "npx.h"
32 #include "fenv.h"
33 
34 /*
35  * As compared to the x87 control word, the SSE unit's control word
36  * has the rounding control bits offset by 3 and the exception mask
37  * bits offset by 7.
38  */
39 #define	_SSE_ROUND_SHIFT	3
40 #define	_SSE_EMASK_SHIFT	7
41 
42 const fenv_t __fe_dfl_env = {
43 	__INITIAL_NPXCW__, /*__control*/
44 	0x0000,            /*__mxcsr_hi*/
45 	0x0000,            /*__status*/
46 	0x1f80,            /*__mxcsr_lo*/
47 	0xffffffff,        /*__tag*/
48 	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
49 	  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/
50 };
51 
52 #define __fldcw(__cw)           __asm __volatile("fldcw %0" : : "m" (__cw))
53 #define __fldenv(__env)         __asm __volatile("fldenv %0" : : "m" (__env))
54 #define	__fldenvx(__env)	__asm __volatile("fldenv %0" : : "m" (__env)  \
55 				: "st", "st(1)", "st(2)", "st(3)", "st(4)",   \
56 				"st(5)", "st(6)", "st(7)")
57 #define __fnclex()              __asm __volatile("fnclex")
58 #define __fnstenv(__env)        __asm __volatile("fnstenv %0" : "=m" (*(__env)))
59 #define __fnstcw(__cw)          __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
60 #define __fnstsw(__sw)          __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
61 #define __fwait()               __asm __volatile("fwait")
62 #define __ldmxcsr(__csr)        __asm __volatile("ldmxcsr %0" : : "m" (__csr))
63 #define __stmxcsr(__csr)        __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
64 
65 /* After testing for SSE support once, we cache the result in __has_sse. */
66 enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
67 #ifdef __SSE__
68 #define __HAS_SSE()     1
69 #else
70 #define __HAS_SSE()     (__has_sse == __SSE_YES ||                      \
71                          (__has_sse == __SSE_UNK && __test_sse()))
72 #endif
73 
74 enum __sse_support __has_sse =
75 #ifdef __SSE__
76 	__SSE_YES;
77 #else
78 	__SSE_UNK;
79 #endif
80 
81 #ifndef __SSE__
82 #define	getfl(x)	__asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x)))
83 #define	setfl(x)	__asm __volatile("pushl %0\n\tpopfl" : : "g" (x))
84 #define	cpuid_dx(x)	__asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t"  \
85 					 "cpuid\n\tpopl %%ebx"		      \
86 					: "=d" (*(x)) : : "eax", "ecx")
87 
88 /*
89  * Test for SSE support on this processor.  We need to do this because
90  * we need to use ldmxcsr/stmxcsr to get correct results if any part
91  * of the program was compiled to use SSE floating-point, but we can't
92  * use SSE on older processors.
93  */
94 int
__test_sse(void)95 __test_sse(void)
96 {
97 	int flag, nflag;
98 	int dx_features;
99 
100 	/* Am I a 486? */
101 	getfl(&flag);
102 	nflag = flag ^ 0x200000;
103 	setfl(nflag);
104 	getfl(&nflag);
105 	if (flag != nflag) {
106 		/* Not a 486, so CPUID should work. */
107 		cpuid_dx(&dx_features);
108 		if (dx_features & 0x2000000) {
109 			__has_sse = __SSE_YES;
110 			return (1);
111 		}
112 	}
113 	__has_sse = __SSE_NO;
114 	return (0);
115 }
116 #endif /* __SSE__ */
117 
118 int
fesetexceptflag(const fexcept_t * flagp,int excepts)119 fesetexceptflag(const fexcept_t *flagp, int excepts)
120 {
121 	fenv_t env;
122 	__uint32_t mxcsr;
123 
124 	excepts &= FE_ALL_EXCEPT;
125 	if (excepts) { /* Do nothing if excepts is 0 */
126 		__fnstenv(&env);
127 		env.__status &= ~excepts;
128 		env.__status |= *flagp & excepts;
129 		__fnclex();
130 		__fldenv(env);
131 		if (__HAS_SSE()) {
132 			__stmxcsr(&mxcsr);
133 			mxcsr &= ~excepts;
134 			mxcsr |= *flagp & excepts;
135 			__ldmxcsr(mxcsr);
136 		}
137 	}
138 
139 	return (0);
140 }
141 
142 int
feraiseexcept(int excepts)143 feraiseexcept(int excepts)
144 {
145 	fexcept_t ex = excepts;
146 
147 	fesetexceptflag(&ex, excepts);
148 	__fwait();
149 	return (0);
150 }
151 
152 int
fegetenv(fenv_t * envp)153 fegetenv(fenv_t *envp)
154 {
155 	__uint32_t mxcsr;
156 
157 	__fnstenv(envp);
158 	/*
159 	 * fnstenv masks all exceptions, so we need to restore
160 	 * the old control word to avoid this side effect.
161 	 */
162 	__fldcw(envp->__control);
163 	if (__HAS_SSE()) {
164 		__stmxcsr(&mxcsr);
165 		envp->__mxcsr_hi = mxcsr >> 16;
166 		envp->__mxcsr_lo = mxcsr & 0xffff;
167 	}
168 	return (0);
169 }
170 
171 int
feholdexcept(fenv_t * envp)172 feholdexcept(fenv_t *envp)
173 {
174 	__uint32_t mxcsr;
175 	fenv_t env;
176 
177 	__fnstenv(&env);
178 	*envp = env;
179 	env.__status &= ~FE_ALL_EXCEPT;
180 	env.__control |= FE_ALL_EXCEPT;
181 	__fnclex();
182 	__fldenv(env);
183 	if (__HAS_SSE()) {
184 		__stmxcsr(&mxcsr);
185 		envp->__mxcsr_hi = mxcsr >> 16;
186 		envp->__mxcsr_lo = mxcsr & 0xffff;
187 		mxcsr &= ~FE_ALL_EXCEPT;
188 		mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT;
189 		__ldmxcsr(mxcsr);
190 	}
191 	return (0);
192 }
193 
194 int
feupdateenv(const fenv_t * envp)195 feupdateenv(const fenv_t *envp)
196 {
197 	__uint32_t mxcsr;
198 	__uint16_t status;
199 
200 	__fnstsw(&status);
201 	if (__HAS_SSE()) {
202 		__stmxcsr(&mxcsr);
203 	} else {
204 		mxcsr = 0;
205 	}
206 	fesetenv(envp);
207 	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
208 	return (0);
209 }
210 
211 int
feenableexcept(int mask)212 feenableexcept(int mask)
213 {
214 	__uint32_t mxcsr;
215 	__uint16_t control, omask;
216 
217 	mask &= FE_ALL_EXCEPT;
218 	__fnstcw(&control);
219 	if (__HAS_SSE()) {
220 		__stmxcsr(&mxcsr);
221 	} else {
222 		mxcsr = 0;
223 	}
224 	omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
225 	if (mask) {
226 		control &= ~mask;
227 		__fldcw(control);
228 		if (__HAS_SSE()) {
229 			mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
230 			__ldmxcsr(mxcsr);
231 		}
232 	}
233 	return (omask);
234 }
235 
236 int
fedisableexcept(int mask)237 fedisableexcept(int mask)
238 {
239 	__uint32_t mxcsr;
240 	__uint16_t control, omask;
241 
242 	mask &= FE_ALL_EXCEPT;
243 	__fnstcw(&control);
244 	if (__HAS_SSE()) {
245 		__stmxcsr(&mxcsr);
246 	} else {
247 		mxcsr = 0;
248 	}
249 	omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
250 	if (mask) {
251 		control |= mask;
252 		__fldcw(control);
253 		if (__HAS_SSE()) {
254 			mxcsr |= mask << _SSE_EMASK_SHIFT;
255 			__ldmxcsr(mxcsr);
256 		}
257 	}
258 	return (omask);
259 }
260 
261 int
feclearexcept(int excepts)262 feclearexcept(int excepts)
263 {
264 	fenv_t env;
265 	__uint32_t mxcsr;
266 
267 	excepts &= FE_ALL_EXCEPT;
268 	if (excepts) { /* Do nothing if excepts is 0 */
269 		__fnstenv(&env);
270 		env.__status &= ~excepts;
271 		__fnclex();
272 		__fldenv(env);
273 		if (__HAS_SSE()) {
274 			__stmxcsr(&mxcsr);
275 			mxcsr &= ~excepts;
276 			__ldmxcsr(mxcsr);
277 		}
278 	}
279 	return (0);
280 }
281 
282 int
fegetexceptflag(fexcept_t * flagp,int excepts)283 fegetexceptflag(fexcept_t *flagp, int excepts)
284 {
285 	__uint32_t mxcsr;
286 	__uint16_t status;
287 
288 	excepts &= FE_ALL_EXCEPT;
289 	__fnstsw(&status);
290 	if (__HAS_SSE()) {
291 		__stmxcsr(&mxcsr);
292 	} else {
293 		mxcsr = 0;
294 	}
295 	*flagp = (status | mxcsr) & excepts;
296 	return (0);
297 }
298 
299 int
fetestexcept(int excepts)300 fetestexcept(int excepts)
301 {
302 	__uint32_t mxcsr;
303 	__uint16_t status;
304 
305 	excepts &= FE_ALL_EXCEPT;
306 	if (excepts) { /* Do nothing if excepts is 0 */
307 		__fnstsw(&status);
308 		if (__HAS_SSE()) {
309 			__stmxcsr(&mxcsr);
310 		} else {
311 			mxcsr = 0;
312 		}
313 		return ((status | mxcsr) & excepts);
314 	}
315 	return (0);
316 }
317 
318 int
fegetround(void)319 fegetround(void)
320 {
321 	__uint16_t control;
322 
323 	/*
324 	 * We assume that the x87 and the SSE unit agree on the
325 	 * rounding mode.  Reading the control word on the x87 turns
326 	 * out to be about 5 times faster than reading it on the SSE
327 	 * unit on an Opteron 244.
328 	 */
329 	__fnstcw(&control);
330 	return (control & _ROUND_MASK);
331 }
332 
333 int
fesetround(int round)334 fesetround(int round)
335 {
336 	__uint32_t mxcsr;
337 	__uint16_t control;
338 
339 	if (round & ~_ROUND_MASK) {
340 		return (-1);
341 	} else {
342 		__fnstcw(&control);
343 		control &= ~_ROUND_MASK;
344 		control |= round;
345 		__fldcw(control);
346 		if (__HAS_SSE()) {
347 			__stmxcsr(&mxcsr);
348 			mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
349 			mxcsr |= round << _SSE_ROUND_SHIFT;
350 			__ldmxcsr(mxcsr);
351 		}
352 		return (0);
353 	}
354 }
355 
356 int
fesetenv(const fenv_t * envp)357 fesetenv(const fenv_t *envp)
358 {
359 	fenv_t env = *envp;
360 	__uint32_t mxcsr;
361 
362 	mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo);
363 	env.__mxcsr_hi = 0xffff;
364 	env.__mxcsr_lo = 0xffff;
365 	/*
366 	 * XXX Using fldenvx() instead of fldenv() tells the compiler that this
367 	 * instruction clobbers the i387 register stack.  This happens because
368 	 * we restore the tag word from the saved environment.  Normally, this
369 	 * would happen anyway and we wouldn't care, because the ABI allows
370 	 * function calls to clobber the i387 regs.  However, fesetenv() is
371 	 * inlined, so we need to be more careful.
372 	 */
373 	__fldenvx(env);
374 	if (__HAS_SSE()) {
375 		__ldmxcsr(mxcsr);
376 	}
377 	return (0);
378 }
379 
380 int
fegetexcept(void)381 fegetexcept(void)
382 {
383 	__uint16_t control;
384 
385 	/*
386 	 * We assume that the masks for the x87 and the SSE unit are
387 	 * the same.
388 	 */
389 	__fnstcw(&control);
390 	return (~control & FE_ALL_EXCEPT);
391 }
392