1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "rounding_mode.h"
17
18 #if (defined( __arm__ ) || defined(__aarch64__))
19 #define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
20 #define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
21
22 #define _ARM_FE_FTZ 0x1000000
23 #define _ARM_FE_NFTZ 0x0
24 #if defined(__aarch64__)
25 #define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
26 #define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
27 #else
28 #define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
29 #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
30 #endif
31 #endif
32
33 #if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
34 #define _ARM_FE_TONEAREST 0x0
35 #define _ARM_FE_UPWARD 0x400000
36 #define _ARM_FE_DOWNWARD 0x800000
37 #define _ARM_FE_TOWARDZERO 0xc00000
set_round(RoundingMode r,Type outType)38 RoundingMode set_round( RoundingMode r, Type outType )
39 {
40 static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
41 _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
42 static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
43 _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
44 const int *p = int_rounds;
45 if( outType == kfloat || outType == kdouble )
46 p = flt_rounds;
47
48 int fpscr = 0;
49 RoundingMode oldRound = get_round();
50
51 _FPU_GETCW(fpscr);
52 _FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
53
54 return oldRound;
55 }
56
get_round(void)57 RoundingMode get_round( void )
58 {
59 int fpscr;
60 int oldRound;
61
62 _FPU_GETCW(fpscr);
63 oldRound = (fpscr & FPSCR_ROUND_MASK);
64
65 switch( oldRound )
66 {
67 case _ARM_FE_TONEAREST:
68 return kRoundToNearestEven;
69 case _ARM_FE_UPWARD:
70 return kRoundUp;
71 case _ARM_FE_DOWNWARD:
72 return kRoundDown;
73 case _ARM_FE_TOWARDZERO:
74 return kRoundTowardZero;
75 }
76
77 return kDefaultRoundingMode;
78 }
79
80 #elif !(defined(_WIN32) && defined(_MSC_VER))
set_round(RoundingMode r,Type outType)81 RoundingMode set_round( RoundingMode r, Type outType )
82 {
83 static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
84 static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
85 const int *p = int_rounds;
86 if( outType == kfloat || outType == kdouble )
87 p = flt_rounds;
88 int oldRound = fegetround();
89 fesetround( p[r] );
90
91 switch( oldRound )
92 {
93 case FE_TONEAREST:
94 return kRoundToNearestEven;
95 case FE_UPWARD:
96 return kRoundUp;
97 case FE_DOWNWARD:
98 return kRoundDown;
99 case FE_TOWARDZERO:
100 return kRoundTowardZero;
101 default:
102 abort(); // ??!
103 }
104 return kDefaultRoundingMode; //never happens
105 }
106
get_round(void)107 RoundingMode get_round( void )
108 {
109 int oldRound = fegetround();
110
111 switch( oldRound )
112 {
113 case FE_TONEAREST:
114 return kRoundToNearestEven;
115 case FE_UPWARD:
116 return kRoundUp;
117 case FE_DOWNWARD:
118 return kRoundDown;
119 case FE_TOWARDZERO:
120 return kRoundTowardZero;
121 }
122
123 return kDefaultRoundingMode;
124 }
125
126 #else
set_round(RoundingMode r,Type outType)127 RoundingMode set_round( RoundingMode r, Type outType )
128 {
129 static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
130 static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
131 const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
132 unsigned int oldRound;
133
134 int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
135 if (err) {
136 vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
137 return kDefaultRoundingMode; //what else never happens
138 }
139
140 oldRound &= _MCW_RC;
141
142 RoundingMode old =
143 (oldRound == _RC_NEAR)? kRoundToNearestEven :
144 (oldRound == _RC_UP)? kRoundUp :
145 (oldRound == _RC_DOWN)? kRoundDown :
146 (oldRound == _RC_CHOP)? kRoundTowardZero:
147 kDefaultRoundingMode;
148
149 _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
150 return old; //returning old rounding mode
151 }
152
get_round(void)153 RoundingMode get_round( void )
154 {
155 unsigned int oldRound;
156
157 int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
158 oldRound &= _MCW_RC;
159 return
160 (oldRound == _RC_NEAR)? kRoundToNearestEven :
161 (oldRound == _RC_UP)? kRoundUp :
162 (oldRound == _RC_DOWN)? kRoundDown :
163 (oldRound == _RC_CHOP)? kRoundTowardZero:
164 kDefaultRoundingMode;
165 }
166
167 #endif
168
169 //
170 // FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in
171 // basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
172 // software by testing against FLT_MIN or DBL_MIN in that file.
173 //
174 // Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of
175 // the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
176 // operators do (e.g. add, subtract, multiply, divide, etc.)
177 //
178 // Configuring hardware to FTZ mode varies by platform.
179 // CAUTION: Some C implementations may also fail to behave properly in this mode.
180 //
181 // On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
182 // On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
183 // is used for floating point computation! If your OS uses x87, you'll need to figure out how
184 // to turn that off for the conversions code in basic_test_conversions.c so that they flush to
185 // zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
186 // in which case, these function are at liberty to do nothing.
187 //
188 #if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
189 #include <xmmintrin.h>
190 #elif defined( __PPC__ )
191 #include <fpu_control.h>
192 #endif
FlushToZero(void)193 void *FlushToZero( void )
194 {
195 #if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
196 #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
197 union{ int i; void *p; }u = { _mm_getcsr() };
198 _mm_setcsr( u.i | 0x8040 );
199 return u.p;
200 #elif defined( __arm__ ) || defined(__aarch64__)
201 int fpscr;
202 _FPU_GETCW(fpscr);
203 _FPU_SETCW(fpscr | FPSCR_FZ);
204 return NULL;
205 #elif defined( __PPC__ )
206 fpu_control_t flags = 0;
207 _FPU_GETCW(flags);
208 flags |= _FPU_MASK_NI;
209 _FPU_SETCW(flags);
210 return NULL;
211 #else
212 #error Unknown arch
213 #endif
214 #else
215 #error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
216 #endif
217 }
218
219 // Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
UnFlushToZero(void * p)220 void UnFlushToZero( void *p)
221 {
222 #if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
223 #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
224 union{ void *p; int i; }u = { p };
225 _mm_setcsr( u.i );
226 #elif defined( __arm__ ) || defined(__aarch64__)
227 int fpscr;
228 _FPU_GETCW(fpscr);
229 _FPU_SETCW(fpscr & ~FPSCR_FZ);
230 #elif defined( __PPC__)
231 fpu_control_t flags = 0;
232 _FPU_GETCW(flags);
233 flags &= ~_FPU_MASK_NI;
234 _FPU_SETCW(flags);
235 #else
236 #error Unknown arch
237 #endif
238 #else
239 #error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
240 #endif
241 }
242