1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "rounding_mode.h"
17
18 #if (defined(__arm__) || defined(__aarch64__))
19 #define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
20 #define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
21
22 #define _ARM_FE_FTZ 0x1000000
23 #define _ARM_FE_NFTZ 0x0
24 #if defined(__aarch64__)
25 #define _FPU_GETCW(cw) __asm__("MRS %0,FPCR" : "=r"(cw))
26 #define _FPU_SETCW(cw) __asm__("MSR FPCR,%0" : : "ri"(cw))
27 #else
28 #define _FPU_GETCW(cw) __asm__("VMRS %0,FPSCR" : "=r"(cw))
29 #define _FPU_SETCW(cw) __asm__("VMSR FPSCR,%0" : : "ri"(cw))
30 #endif
31 #endif
32
33 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
34 #define _ARM_FE_TONEAREST 0x0
35 #define _ARM_FE_UPWARD 0x400000
36 #define _ARM_FE_DOWNWARD 0x800000
37 #define _ARM_FE_TOWARDZERO 0xc00000
set_round(RoundingMode r,Type outType)38 RoundingMode set_round(RoundingMode r, Type outType)
39 {
40 static const int flt_rounds[kRoundingModeCount] = {
41 _ARM_FE_TONEAREST, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
42 _ARM_FE_TOWARDZERO
43 };
44 static const int int_rounds[kRoundingModeCount] = {
45 _ARM_FE_TOWARDZERO, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
46 _ARM_FE_TOWARDZERO
47 };
48 const int *p = int_rounds;
49 if (outType == kfloat || outType == kdouble) p = flt_rounds;
50
51 int fpscr = 0;
52 RoundingMode oldRound = get_round();
53
54 _FPU_GETCW(fpscr);
55 _FPU_SETCW(p[r] | (fpscr & ~FPSCR_ROUND_MASK));
56
57 return oldRound;
58 }
59
get_round(void)60 RoundingMode get_round(void)
61 {
62 int fpscr;
63 int oldRound;
64
65 _FPU_GETCW(fpscr);
66 oldRound = (fpscr & FPSCR_ROUND_MASK);
67
68 switch (oldRound)
69 {
70 case _ARM_FE_TONEAREST: return kRoundToNearestEven;
71 case _ARM_FE_UPWARD: return kRoundUp;
72 case _ARM_FE_DOWNWARD: return kRoundDown;
73 case _ARM_FE_TOWARDZERO: return kRoundTowardZero;
74 }
75
76 return kDefaultRoundingMode;
77 }
78
79 #elif !(defined(_WIN32) && defined(_MSC_VER))
set_round(RoundingMode r,Type outType)80 RoundingMode set_round(RoundingMode r, Type outType)
81 {
82 static const int flt_rounds[kRoundingModeCount] = {
83 FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
84 };
85 static const int int_rounds[kRoundingModeCount] = {
86 FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
87 };
88 const int *p = int_rounds;
89 if (outType == kfloat || outType == kdouble) p = flt_rounds;
90 int oldRound = fegetround();
91 fesetround(p[r]);
92
93 switch (oldRound)
94 {
95 case FE_TONEAREST: return kRoundToNearestEven;
96 case FE_UPWARD: return kRoundUp;
97 case FE_DOWNWARD: return kRoundDown;
98 case FE_TOWARDZERO: return kRoundTowardZero;
99 default: abort(); // ??!
100 }
101 return kDefaultRoundingMode; // never happens
102 }
103
get_round(void)104 RoundingMode get_round(void)
105 {
106 int oldRound = fegetround();
107
108 switch (oldRound)
109 {
110 case FE_TONEAREST: return kRoundToNearestEven;
111 case FE_UPWARD: return kRoundUp;
112 case FE_DOWNWARD: return kRoundDown;
113 case FE_TOWARDZERO: return kRoundTowardZero;
114 }
115
116 return kDefaultRoundingMode;
117 }
118
119 #else
set_round(RoundingMode r,Type outType)120 RoundingMode set_round(RoundingMode r, Type outType)
121 {
122 static const int flt_rounds[kRoundingModeCount] = { _RC_NEAR, _RC_NEAR,
123 _RC_UP, _RC_DOWN,
124 _RC_CHOP };
125 static const int int_rounds[kRoundingModeCount] = { _RC_CHOP, _RC_NEAR,
126 _RC_UP, _RC_DOWN,
127 _RC_CHOP };
128 const int *p =
129 (outType == kfloat || outType == kdouble) ? flt_rounds : int_rounds;
130 unsigned int oldRound;
131
132 int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
133 if (err)
134 {
135 vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n",
136 __FILE__, __LINE__);
137 return kDefaultRoundingMode; // what else never happens
138 }
139
140 oldRound &= _MCW_RC;
141
142 RoundingMode old = (oldRound == _RC_NEAR)
143 ? kRoundToNearestEven
144 : (oldRound == _RC_UP) ? kRoundUp
145 : (oldRound == _RC_DOWN)
146 ? kRoundDown
147 : (oldRound == _RC_CHOP) ? kRoundTowardZero
148 : kDefaultRoundingMode;
149
150 _controlfp_s(&oldRound, p[r], _MCW_RC); // setting new rounding mode
151 return old; // returning old rounding mode
152 }
153
get_round(void)154 RoundingMode get_round(void)
155 {
156 unsigned int oldRound;
157
158 int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
159 oldRound &= _MCW_RC;
160 return (oldRound == _RC_NEAR)
161 ? kRoundToNearestEven
162 : (oldRound == _RC_UP) ? kRoundUp
163 : (oldRound == _RC_DOWN)
164 ? kRoundDown
165 : (oldRound == _RC_CHOP) ? kRoundTowardZero
166 : kDefaultRoundingMode;
167 }
168
169 #endif
170
171 //
172 // FlushToZero() sets the host processor into ftz mode. It is intended to have
173 // a remote effect on the behavior of the code in basic_test_conversions.c. Some
174 // host processors may not support this mode, which case you'll need to do some
175 // clamping in software by testing against FLT_MIN or DBL_MIN in that file.
176 //
177 // Note: IEEE-754 says conversions are basic operations. As such they do *NOT*
178 // have the behavior in section 7.5.3 of the OpenCL spec. They *ALWAYS* flush to
179 // zero for subnormal inputs or outputs when FTZ mode is on like other basic
180 // operators do (e.g. add, subtract, multiply, divide, etc.)
181 //
182 // Configuring hardware to FTZ mode varies by platform.
183 // CAUTION: Some C implementations may also fail to behave properly in this
184 // mode.
185 //
186 // On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
187 // On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR --
188 // provided that SSE/SSE2
189 // is used for floating point computation! If your OS uses x87, you'll
190 // need to figure out how to turn that off for the conversions code in
191 // basic_test_conversions.c so that they flush to zero properly.
192 // Otherwise, you'll need to add appropriate software clamping to
193 // basic_test_conversions.c in which case, these function are at
194 // liberty to do nothing.
195 //
196 #if defined(__i386__) || defined(__x86_64__) || defined(_WIN32)
197 #include <xmmintrin.h>
198 #elif defined(__PPC__)
199 #include <fpu_control.h>
200 #endif
FlushToZero(void)201 void *FlushToZero(void)
202 {
203 #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
204 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
205 union {
206 int i;
207 void *p;
208 } u = { _mm_getcsr() };
209 _mm_setcsr(u.i | 0x8040);
210 return u.p;
211 #elif defined(__arm__) || defined(__aarch64__)
212 int fpscr;
213 _FPU_GETCW(fpscr);
214 _FPU_SETCW(fpscr | FPSCR_FZ);
215 return NULL;
216 #elif defined(__PPC__)
217 fpu_control_t flags = 0;
218 _FPU_GETCW(flags);
219 flags |= _FPU_MASK_NI;
220 _FPU_SETCW(flags);
221 return NULL;
222 #else
223 #error Unknown arch
224 #endif
225 #else
226 #error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
227 #endif
228 }
229
230 // Undo the effects of FlushToZero above, restoring the host to default
231 // behavior, using the information passed in p.
UnFlushToZero(void * p)232 void UnFlushToZero(void *p)
233 {
234 #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
235 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
236 union {
237 void *p;
238 int i;
239 } u = { p };
240 _mm_setcsr(u.i);
241 #elif defined(__arm__) || defined(__aarch64__)
242 int fpscr;
243 _FPU_GETCW(fpscr);
244 _FPU_SETCW(fpscr & ~FPSCR_FZ);
245 #elif defined(__PPC__)
246 fpu_control_t flags = 0;
247 _FPU_GETCW(flags);
248 flags &= ~_FPU_MASK_NI;
249 _FPU_SETCW(flags);
250 #else
251 #error Unknown arch
252 #endif
253 #else
254 #error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
255 #endif
256 }
257