• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "rounding_mode.h"
17 
18 #if (defined( __arm__ ) || defined(__aarch64__))
19     #define FPSCR_FZ    (1 << 24)       // Flush-To-Zero mode
20     #define FPSCR_ROUND_MASK (3 << 22)  // Rounding mode:
21 
22     #define _ARM_FE_FTZ     0x1000000
23     #define _ARM_FE_NFTZ    0x0
24     #if defined(__aarch64__)
25         #define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
26         #define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
27     #else
28         #define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
29         #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
30     #endif
31 #endif
32 
33 #if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
34 #define _ARM_FE_TONEAREST           0x0
35 #define _ARM_FE_UPWARD              0x400000
36 #define _ARM_FE_DOWNWARD            0x800000
37 #define _ARM_FE_TOWARDZERO          0xc00000
set_round(RoundingMode r,Type outType)38 RoundingMode set_round( RoundingMode r, Type outType )
39 {
40     static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
41                                                           _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
42     static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
43                                                           _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
44     const int *p = int_rounds;
45     if( outType == kfloat || outType == kdouble )
46         p = flt_rounds;
47 
48     int fpscr = 0;
49     RoundingMode oldRound = get_round();
50 
51     _FPU_GETCW(fpscr);
52     _FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
53 
54     return oldRound;
55 }
56 
get_round(void)57 RoundingMode get_round( void )
58 {
59     int fpscr;
60     int oldRound;
61 
62     _FPU_GETCW(fpscr);
63     oldRound = (fpscr & FPSCR_ROUND_MASK);
64 
65     switch( oldRound )
66     {
67         case _ARM_FE_TONEAREST:
68             return kRoundToNearestEven;
69         case _ARM_FE_UPWARD:
70             return kRoundUp;
71         case _ARM_FE_DOWNWARD:
72             return kRoundDown;
73         case _ARM_FE_TOWARDZERO:
74             return kRoundTowardZero;
75     }
76 
77     return kDefaultRoundingMode;
78 }
79 
80 #elif !(defined(_WIN32) && defined(_MSC_VER))
set_round(RoundingMode r,Type outType)81 RoundingMode set_round( RoundingMode r, Type outType )
82 {
83     static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
84     static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
85     const int *p = int_rounds;
86     if( outType == kfloat || outType == kdouble )
87         p = flt_rounds;
88     int oldRound = fegetround();
89     fesetround( p[r] );
90 
91     switch( oldRound )
92     {
93         case FE_TONEAREST:
94             return kRoundToNearestEven;
95         case FE_UPWARD:
96             return kRoundUp;
97         case FE_DOWNWARD:
98             return kRoundDown;
99         case FE_TOWARDZERO:
100             return kRoundTowardZero;
101         default:
102             abort();    // ??!
103     }
104     return kDefaultRoundingMode;    //never happens
105 }
106 
get_round(void)107 RoundingMode get_round( void )
108 {
109     int oldRound = fegetround();
110 
111     switch( oldRound )
112     {
113         case FE_TONEAREST:
114             return kRoundToNearestEven;
115         case FE_UPWARD:
116             return kRoundUp;
117         case FE_DOWNWARD:
118             return kRoundDown;
119         case FE_TOWARDZERO:
120             return kRoundTowardZero;
121     }
122 
123     return kDefaultRoundingMode;
124 }
125 
126 #else
set_round(RoundingMode r,Type outType)127 RoundingMode set_round( RoundingMode r, Type outType )
128 {
129     static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
130     static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
131     const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
132     unsigned int oldRound;
133 
134     int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
135     if (err) {
136         vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
137         return kDefaultRoundingMode;    //what else never happens
138     }
139 
140     oldRound &= _MCW_RC;
141 
142     RoundingMode old =
143         (oldRound == _RC_NEAR)? kRoundToNearestEven :
144         (oldRound == _RC_UP)?   kRoundUp :
145         (oldRound == _RC_DOWN)? kRoundDown :
146         (oldRound == _RC_CHOP)? kRoundTowardZero:
147         kDefaultRoundingMode;
148 
149     _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
150     return old;    //returning old rounding mode
151 }
152 
get_round(void)153 RoundingMode get_round( void )
154 {
155     unsigned int oldRound;
156 
157     int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
158     oldRound &= _MCW_RC;
159     return
160         (oldRound == _RC_NEAR)? kRoundToNearestEven :
161         (oldRound == _RC_UP)?   kRoundUp :
162         (oldRound == _RC_DOWN)? kRoundDown :
163         (oldRound == _RC_CHOP)? kRoundTowardZero:
164         kDefaultRoundingMode;
165 }
166 
167 #endif
168 
169 //
170 // FlushToZero() sets the host processor into ftz mode.  It is intended to have a remote effect on the behavior of the code in
171 // basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
172 // software by testing against FLT_MIN or DBL_MIN in that file.
173 //
174 // Note: IEEE-754 says conversions are basic operations.  As such they do *NOT* have the behavior in section 7.5.3 of
175 // the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
176 // operators do (e.g. add, subtract, multiply, divide, etc.)
177 //
178 // Configuring hardware to FTZ mode varies by platform.
179 // CAUTION: Some C implementations may also fail to behave properly in this mode.
180 //
181 //  On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
182 //  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
183 //          is used for floating point computation! If your OS uses x87, you'll need to figure out how
184 //          to turn that off for the conversions code in basic_test_conversions.c so that they flush to
185 //          zero properly.  Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
186 //          in which case, these function are at liberty to do nothing.
187 //
188 #if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
189     #include <xmmintrin.h>
190 #elif defined( __PPC__ )
191     #include <fpu_control.h>
192 #endif
FlushToZero(void)193 void *FlushToZero( void )
194 {
195 #if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
196     #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
197         union{ int i;  void *p; }u = { _mm_getcsr() };
198         _mm_setcsr( u.i | 0x8040 );
199         return u.p;
200     #elif defined( __arm__ ) || defined(__aarch64__)
201         int fpscr;
202         _FPU_GETCW(fpscr);
203         _FPU_SETCW(fpscr | FPSCR_FZ);
204         return NULL;
205     #elif defined( __PPC__ )
206         fpu_control_t flags = 0;
207         _FPU_GETCW(flags);
208         flags |= _FPU_MASK_NI;
209         _FPU_SETCW(flags);
210         return NULL;
211         #else
212         #error Unknown arch
213     #endif
214 #else
215     #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
216 #endif
217 }
218 
219 // Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
UnFlushToZero(void * p)220 void UnFlushToZero( void *p)
221 {
222 #if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
223     #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
224         union{ void *p; int i;  }u = { p };
225         _mm_setcsr( u.i );
226     #elif defined( __arm__ ) || defined(__aarch64__)
227         int fpscr;
228         _FPU_GETCW(fpscr);
229         _FPU_SETCW(fpscr & ~FPSCR_FZ);
230     #elif defined( __PPC__)
231         fpu_control_t flags = 0;
232         _FPU_GETCW(flags);
233         flags &= ~_FPU_MASK_NI;
234         _FPU_SETCW(flags);
235         #else
236         #error Unknown arch
237     #endif
238 #else
239     #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
240 #endif
241 }
242