• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "rounding_mode.h"
17 
18 #if (defined(__arm__) || defined(__aarch64__))
19 #define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
20 #define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
21 
22 #define _ARM_FE_FTZ 0x1000000
23 #define _ARM_FE_NFTZ 0x0
24 #if defined(__aarch64__)
25 #define _FPU_GETCW(cw) __asm__("MRS %0,FPCR" : "=r"(cw))
26 #define _FPU_SETCW(cw) __asm__("MSR FPCR,%0" : : "ri"(cw))
27 #else
28 #define _FPU_GETCW(cw) __asm__("VMRS %0,FPSCR" : "=r"(cw))
29 #define _FPU_SETCW(cw) __asm__("VMSR FPSCR,%0" : : "ri"(cw))
30 #endif
31 #endif
32 
33 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
34 #define _ARM_FE_TONEAREST 0x0
35 #define _ARM_FE_UPWARD 0x400000
36 #define _ARM_FE_DOWNWARD 0x800000
37 #define _ARM_FE_TOWARDZERO 0xc00000
set_round(RoundingMode r,Type outType)38 RoundingMode set_round(RoundingMode r, Type outType)
39 {
40     static const int flt_rounds[kRoundingModeCount] = {
41         _ARM_FE_TONEAREST, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
42         _ARM_FE_TOWARDZERO
43     };
44     static const int int_rounds[kRoundingModeCount] = {
45         _ARM_FE_TOWARDZERO, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
46         _ARM_FE_TOWARDZERO
47     };
48     const int *p = int_rounds;
49     if (outType == kfloat || outType == kdouble) p = flt_rounds;
50 
51     int fpscr = 0;
52     RoundingMode oldRound = get_round();
53 
54     _FPU_GETCW(fpscr);
55     _FPU_SETCW(p[r] | (fpscr & ~FPSCR_ROUND_MASK));
56 
57     return oldRound;
58 }
59 
get_round(void)60 RoundingMode get_round(void)
61 {
62     int fpscr;
63     int oldRound;
64 
65     _FPU_GETCW(fpscr);
66     oldRound = (fpscr & FPSCR_ROUND_MASK);
67 
68     switch (oldRound)
69     {
70         case _ARM_FE_TONEAREST: return kRoundToNearestEven;
71         case _ARM_FE_UPWARD: return kRoundUp;
72         case _ARM_FE_DOWNWARD: return kRoundDown;
73         case _ARM_FE_TOWARDZERO: return kRoundTowardZero;
74     }
75 
76     return kDefaultRoundingMode;
77 }
78 
79 #elif !(defined(_WIN32) && defined(_MSC_VER))
set_round(RoundingMode r,Type outType)80 RoundingMode set_round(RoundingMode r, Type outType)
81 {
82     static const int flt_rounds[kRoundingModeCount] = {
83         FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
84     };
85     static const int int_rounds[kRoundingModeCount] = {
86         FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
87     };
88     const int *p = int_rounds;
89     if (outType == kfloat || outType == kdouble) p = flt_rounds;
90     int oldRound = fegetround();
91     fesetround(p[r]);
92 
93     switch (oldRound)
94     {
95         case FE_TONEAREST: return kRoundToNearestEven;
96         case FE_UPWARD: return kRoundUp;
97         case FE_DOWNWARD: return kRoundDown;
98         case FE_TOWARDZERO: return kRoundTowardZero;
99         default: abort(); // ??!
100     }
101     return kDefaultRoundingMode; // never happens
102 }
103 
get_round(void)104 RoundingMode get_round(void)
105 {
106     int oldRound = fegetround();
107 
108     switch (oldRound)
109     {
110         case FE_TONEAREST: return kRoundToNearestEven;
111         case FE_UPWARD: return kRoundUp;
112         case FE_DOWNWARD: return kRoundDown;
113         case FE_TOWARDZERO: return kRoundTowardZero;
114     }
115 
116     return kDefaultRoundingMode;
117 }
118 
119 #else
set_round(RoundingMode r,Type outType)120 RoundingMode set_round(RoundingMode r, Type outType)
121 {
122     static const int flt_rounds[kRoundingModeCount] = { _RC_NEAR, _RC_NEAR,
123                                                         _RC_UP, _RC_DOWN,
124                                                         _RC_CHOP };
125     static const int int_rounds[kRoundingModeCount] = { _RC_CHOP, _RC_NEAR,
126                                                         _RC_UP, _RC_DOWN,
127                                                         _RC_CHOP };
128     const int *p =
129         (outType == kfloat || outType == kdouble) ? flt_rounds : int_rounds;
130     unsigned int oldRound;
131 
132     int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
133     if (err)
134     {
135         vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n",
136                    __FILE__, __LINE__);
137         return kDefaultRoundingMode; // what else never happens
138     }
139 
140     oldRound &= _MCW_RC;
141 
142     RoundingMode old = (oldRound == _RC_NEAR)
143         ? kRoundToNearestEven
144         : (oldRound == _RC_UP) ? kRoundUp
145                                : (oldRound == _RC_DOWN)
146                 ? kRoundDown
147                 : (oldRound == _RC_CHOP) ? kRoundTowardZero
148                                          : kDefaultRoundingMode;
149 
150     _controlfp_s(&oldRound, p[r], _MCW_RC); // setting new rounding mode
151     return old; // returning old rounding mode
152 }
153 
get_round(void)154 RoundingMode get_round(void)
155 {
156     unsigned int oldRound;
157 
158     int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
159     oldRound &= _MCW_RC;
160     return (oldRound == _RC_NEAR)
161         ? kRoundToNearestEven
162         : (oldRound == _RC_UP) ? kRoundUp
163                                : (oldRound == _RC_DOWN)
164                 ? kRoundDown
165                 : (oldRound == _RC_CHOP) ? kRoundTowardZero
166                                          : kDefaultRoundingMode;
167 }
168 
169 #endif
170 
171 //
172 // FlushToZero() sets the host processor into ftz mode.  It is intended to have
173 // a remote effect on the behavior of the code in basic_test_conversions.c. Some
174 // host processors may not support this mode, which case you'll need to do some
175 // clamping in software by testing against FLT_MIN or DBL_MIN in that file.
176 //
177 // Note: IEEE-754 says conversions are basic operations.  As such they do *NOT*
178 // have the behavior in section 7.5.3 of the OpenCL spec. They *ALWAYS* flush to
179 // zero for subnormal inputs or outputs when FTZ mode is on like other basic
180 // operators do (e.g. add, subtract, multiply, divide, etc.)
181 //
182 // Configuring hardware to FTZ mode varies by platform.
183 // CAUTION: Some C implementations may also fail to behave properly in this
184 // mode.
185 //
186 //  On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
187 //  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR --
188 //  provided that SSE/SSE2
189 //          is used for floating point computation! If your OS uses x87, you'll
190 //          need to figure out how to turn that off for the conversions code in
191 //          basic_test_conversions.c so that they flush to zero properly.
192 //          Otherwise, you'll need to add appropriate software clamping to
193 //          basic_test_conversions.c in which case, these function are at
194 //          liberty to do nothing.
195 //
196 #if defined(__i386__) || defined(__x86_64__) || defined(_WIN32)
197 #include <xmmintrin.h>
198 #elif defined(__PPC__)
199 #include <fpu_control.h>
200 #endif
FlushToZero(void)201 void *FlushToZero(void)
202 {
203 #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
204 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
205     union {
206         int i;
207         void *p;
208     } u = { _mm_getcsr() };
209     _mm_setcsr(u.i | 0x8040);
210     return u.p;
211 #elif defined(__arm__) || defined(__aarch64__)
212     int fpscr;
213     _FPU_GETCW(fpscr);
214     _FPU_SETCW(fpscr | FPSCR_FZ);
215     return NULL;
216 #elif defined(__PPC__)
217     fpu_control_t flags = 0;
218     _FPU_GETCW(flags);
219     flags |= _FPU_MASK_NI;
220     _FPU_SETCW(flags);
221     return NULL;
222 #else
223 #error Unknown arch
224 #endif
225 #else
226 #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
227 #endif
228 }
229 
230 // Undo the effects of FlushToZero above, restoring the host to default
231 // behavior, using the information passed in p.
UnFlushToZero(void * p)232 void UnFlushToZero(void *p)
233 {
234 #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
235 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
236     union {
237         void *p;
238         int i;
239     } u = { p };
240     _mm_setcsr(u.i);
241 #elif defined(__arm__) || defined(__aarch64__)
242     int fpscr;
243     _FPU_GETCW(fpscr);
244     _FPU_SETCW(fpscr & ~FPSCR_FZ);
245 #elif defined(__PPC__)
246     fpu_control_t flags = 0;
247     _FPU_GETCW(flags);
248     flags &= ~_FPU_MASK_NI;
249     _FPU_SETCW(flags);
250 #else
251 #error Unknown arch
252 #endif
253 #else
254 #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
255 #endif
256 }
257