1 /*-
2 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: src/lib/msun/i387/fenv.c,v 1.2 2005/03/17 22:21:46 das Exp $
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/types.h>
31 #include "npx.h"
32 #include "fenv.h"
33
34 /*
35 * As compared to the x87 control word, the SSE unit's control word
36 * has the rounding control bits offset by 3 and the exception mask
37 * bits offset by 7.
38 */
39 #define _SSE_ROUND_SHIFT 3
40 #define _SSE_EMASK_SHIFT 7
41
42 const fenv_t __fe_dfl_env = {
43 __INITIAL_NPXCW__, /*__control*/
44 0x0000, /*__mxcsr_hi*/
45 0x0000, /*__status*/
46 0x1f80, /*__mxcsr_lo*/
47 0xffffffff, /*__tag*/
48 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
49 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/
50 };
51
52 #define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw))
53 #define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env))
54 #define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \
55 : "st", "st(1)", "st(2)", "st(3)", "st(4)", \
56 "st(5)", "st(6)", "st(7)")
57 #define __fnclex() __asm __volatile("fnclex")
58 #define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env)))
59 #define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
60 #define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
61 #define __fwait() __asm __volatile("fwait")
62 #define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr))
63 #define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
64
65 /* After testing for SSE support once, we cache the result in __has_sse. */
66 enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
67 #ifdef __SSE__
68 #define __HAS_SSE() 1
69 #else
70 #define __HAS_SSE() (__has_sse == __SSE_YES || \
71 (__has_sse == __SSE_UNK && __test_sse()))
72 #endif
73
74 enum __sse_support __has_sse =
75 #ifdef __SSE__
76 __SSE_YES;
77 #else
78 __SSE_UNK;
79 #endif
80
81 #ifndef __SSE__
82 #define getfl(x) __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x)))
83 #define setfl(x) __asm __volatile("pushl %0\n\tpopfl" : : "g" (x))
84 #define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t" \
85 "cpuid\n\tpopl %%ebx" \
86 : "=d" (*(x)) : : "eax", "ecx")
87
88 /*
89 * Test for SSE support on this processor. We need to do this because
90 * we need to use ldmxcsr/stmxcsr to get correct results if any part
91 * of the program was compiled to use SSE floating-point, but we can't
92 * use SSE on older processors.
93 */
94 int
__test_sse(void)95 __test_sse(void)
96 {
97 int flag, nflag;
98 int dx_features;
99
100 /* Am I a 486? */
101 getfl(&flag);
102 nflag = flag ^ 0x200000;
103 setfl(nflag);
104 getfl(&nflag);
105 if (flag != nflag) {
106 /* Not a 486, so CPUID should work. */
107 cpuid_dx(&dx_features);
108 if (dx_features & 0x2000000) {
109 __has_sse = __SSE_YES;
110 return (1);
111 }
112 }
113 __has_sse = __SSE_NO;
114 return (0);
115 }
116 #endif /* __SSE__ */
117
118 int
fesetexceptflag(const fexcept_t * flagp,int excepts)119 fesetexceptflag(const fexcept_t *flagp, int excepts)
120 {
121 fenv_t env;
122 __uint32_t mxcsr;
123
124 excepts &= FE_ALL_EXCEPT;
125 if (excepts) { /* Do nothing if excepts is 0 */
126 __fnstenv(&env);
127 env.__status &= ~excepts;
128 env.__status |= *flagp & excepts;
129 __fnclex();
130 __fldenv(env);
131 if (__HAS_SSE()) {
132 __stmxcsr(&mxcsr);
133 mxcsr &= ~excepts;
134 mxcsr |= *flagp & excepts;
135 __ldmxcsr(mxcsr);
136 }
137 }
138
139 return (0);
140 }
141
142 int
feraiseexcept(int excepts)143 feraiseexcept(int excepts)
144 {
145 fexcept_t ex = excepts;
146
147 fesetexceptflag(&ex, excepts);
148 __fwait();
149 return (0);
150 }
151
152 int
fegetenv(fenv_t * envp)153 fegetenv(fenv_t *envp)
154 {
155 __uint32_t mxcsr;
156
157 __fnstenv(envp);
158 /*
159 * fnstenv masks all exceptions, so we need to restore
160 * the old control word to avoid this side effect.
161 */
162 __fldcw(envp->__control);
163 if (__HAS_SSE()) {
164 __stmxcsr(&mxcsr);
165 envp->__mxcsr_hi = mxcsr >> 16;
166 envp->__mxcsr_lo = mxcsr & 0xffff;
167 }
168 return (0);
169 }
170
171 int
feholdexcept(fenv_t * envp)172 feholdexcept(fenv_t *envp)
173 {
174 __uint32_t mxcsr;
175 fenv_t env;
176
177 __fnstenv(&env);
178 *envp = env;
179 env.__status &= ~FE_ALL_EXCEPT;
180 env.__control |= FE_ALL_EXCEPT;
181 __fnclex();
182 __fldenv(env);
183 if (__HAS_SSE()) {
184 __stmxcsr(&mxcsr);
185 envp->__mxcsr_hi = mxcsr >> 16;
186 envp->__mxcsr_lo = mxcsr & 0xffff;
187 mxcsr &= ~FE_ALL_EXCEPT;
188 mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT;
189 __ldmxcsr(mxcsr);
190 }
191 return (0);
192 }
193
194 int
feupdateenv(const fenv_t * envp)195 feupdateenv(const fenv_t *envp)
196 {
197 __uint32_t mxcsr;
198 __uint16_t status;
199
200 __fnstsw(&status);
201 if (__HAS_SSE()) {
202 __stmxcsr(&mxcsr);
203 } else {
204 mxcsr = 0;
205 }
206 fesetenv(envp);
207 feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
208 return (0);
209 }
210
211 int
feenableexcept(int mask)212 feenableexcept(int mask)
213 {
214 __uint32_t mxcsr;
215 __uint16_t control, omask;
216
217 mask &= FE_ALL_EXCEPT;
218 __fnstcw(&control);
219 if (__HAS_SSE()) {
220 __stmxcsr(&mxcsr);
221 } else {
222 mxcsr = 0;
223 }
224 omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
225 if (mask) {
226 control &= ~mask;
227 __fldcw(control);
228 if (__HAS_SSE()) {
229 mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
230 __ldmxcsr(mxcsr);
231 }
232 }
233 return (omask);
234 }
235
236 int
fedisableexcept(int mask)237 fedisableexcept(int mask)
238 {
239 __uint32_t mxcsr;
240 __uint16_t control, omask;
241
242 mask &= FE_ALL_EXCEPT;
243 __fnstcw(&control);
244 if (__HAS_SSE()) {
245 __stmxcsr(&mxcsr);
246 } else {
247 mxcsr = 0;
248 }
249 omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
250 if (mask) {
251 control |= mask;
252 __fldcw(control);
253 if (__HAS_SSE()) {
254 mxcsr |= mask << _SSE_EMASK_SHIFT;
255 __ldmxcsr(mxcsr);
256 }
257 }
258 return (omask);
259 }
260
261 int
feclearexcept(int excepts)262 feclearexcept(int excepts)
263 {
264 fenv_t env;
265 __uint32_t mxcsr;
266
267 excepts &= FE_ALL_EXCEPT;
268 if (excepts) { /* Do nothing if excepts is 0 */
269 __fnstenv(&env);
270 env.__status &= ~excepts;
271 __fnclex();
272 __fldenv(env);
273 if (__HAS_SSE()) {
274 __stmxcsr(&mxcsr);
275 mxcsr &= ~excepts;
276 __ldmxcsr(mxcsr);
277 }
278 }
279 return (0);
280 }
281
282 int
fegetexceptflag(fexcept_t * flagp,int excepts)283 fegetexceptflag(fexcept_t *flagp, int excepts)
284 {
285 __uint32_t mxcsr;
286 __uint16_t status;
287
288 excepts &= FE_ALL_EXCEPT;
289 __fnstsw(&status);
290 if (__HAS_SSE()) {
291 __stmxcsr(&mxcsr);
292 } else {
293 mxcsr = 0;
294 }
295 *flagp = (status | mxcsr) & excepts;
296 return (0);
297 }
298
299 int
fetestexcept(int excepts)300 fetestexcept(int excepts)
301 {
302 __uint32_t mxcsr;
303 __uint16_t status;
304
305 excepts &= FE_ALL_EXCEPT;
306 if (excepts) { /* Do nothing if excepts is 0 */
307 __fnstsw(&status);
308 if (__HAS_SSE()) {
309 __stmxcsr(&mxcsr);
310 } else {
311 mxcsr = 0;
312 }
313 return ((status | mxcsr) & excepts);
314 }
315 return (0);
316 }
317
318 int
fegetround(void)319 fegetround(void)
320 {
321 __uint16_t control;
322
323 /*
324 * We assume that the x87 and the SSE unit agree on the
325 * rounding mode. Reading the control word on the x87 turns
326 * out to be about 5 times faster than reading it on the SSE
327 * unit on an Opteron 244.
328 */
329 __fnstcw(&control);
330 return (control & _ROUND_MASK);
331 }
332
333 int
fesetround(int round)334 fesetround(int round)
335 {
336 __uint32_t mxcsr;
337 __uint16_t control;
338
339 if (round & ~_ROUND_MASK) {
340 return (-1);
341 } else {
342 __fnstcw(&control);
343 control &= ~_ROUND_MASK;
344 control |= round;
345 __fldcw(control);
346 if (__HAS_SSE()) {
347 __stmxcsr(&mxcsr);
348 mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
349 mxcsr |= round << _SSE_ROUND_SHIFT;
350 __ldmxcsr(mxcsr);
351 }
352 return (0);
353 }
354 }
355
356 int
fesetenv(const fenv_t * envp)357 fesetenv(const fenv_t *envp)
358 {
359 fenv_t env = *envp;
360 __uint32_t mxcsr;
361
362 mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo);
363 env.__mxcsr_hi = 0xffff;
364 env.__mxcsr_lo = 0xffff;
365 /*
366 * XXX Using fldenvx() instead of fldenv() tells the compiler that this
367 * instruction clobbers the i387 register stack. This happens because
368 * we restore the tag word from the saved environment. Normally, this
369 * would happen anyway and we wouldn't care, because the ABI allows
370 * function calls to clobber the i387 regs. However, fesetenv() is
371 * inlined, so we need to be more careful.
372 */
373 __fldenvx(env);
374 if (__HAS_SSE()) {
375 __ldmxcsr(mxcsr);
376 }
377 return (0);
378 }
379
380 int
fegetexcept(void)381 fegetexcept(void)
382 {
383 __uint16_t control;
384
385 /*
386 * We assume that the masks for the x87 and the SSE unit are
387 * the same.
388 */
389 __fnstcw(&control);
390 return (~control & FE_ALL_EXCEPT);
391 }
392