1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef _fpcontrol_h
17 #define _fpcontrol_h
18
19 // In order to get tests for correctly rounded operations (e.g. multiply) to
20 // work properly we need to be able to set the reference hardware to FTZ mode if
21 // the device hardware is running in that mode. We have explored all other
22 // options short of writing correctly rounded operations in integer code, and
23 // have found this is the only way to correctly verify operation.
24 //
25 // Non-Apple implementations will need to provide their own implentation for
26 // these features. If the reference hardware and device are both running in the
27 // same state (either FTZ or IEEE compliant modes) then these functions may be
28 // empty. If the device is running in non-default rounding mode (e.g. round
29 // toward zero), then these functions should also set the reference device into
30 // that rounding mode.
31 #if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) \
32 || defined(__MINGW32__)
33 typedef int FPU_mode_type;
34 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
35 || defined(__MINGW32__)
36 #include <xmmintrin.h>
37 #elif defined(__PPC__)
38 #include <fpu_control.h>
39 extern __thread fpu_control_t fpu_control;
40 #endif
41 // Set the reference hardware floating point unit to FTZ mode
ForceFTZ(FPU_mode_type * mode)42 static inline void ForceFTZ(FPU_mode_type *mode)
43 {
44 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
45 || defined(__MINGW32__)
46 *mode = _mm_getcsr();
47 _mm_setcsr(*mode | 0x8040);
48 #elif defined(__PPC__)
49 *mode = fpu_control;
50 fpu_control |= _FPU_MASK_NI;
51 #elif defined(__arm__)
52 unsigned fpscr;
53 __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
54 *mode = fpscr;
55 __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
56 // Add 64 bit support
57 #elif defined(__aarch64__)
58 unsigned fpscr;
59 __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
60 *mode = fpscr;
61 __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
62 #else
63 #error ForceFTZ needs an implentation
64 #endif
65 }
66
67 // Disable the denorm flush to zero
DisableFTZ(FPU_mode_type * mode)68 static inline void DisableFTZ(FPU_mode_type *mode)
69 {
70 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
71 || defined(__MINGW32__)
72 *mode = _mm_getcsr();
73 _mm_setcsr(*mode & ~0x8040);
74 #elif defined(__PPC__)
75 *mode = fpu_control;
76 fpu_control &= ~_FPU_MASK_NI;
77 #elif defined(__arm__)
78 unsigned fpscr;
79 __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
80 *mode = fpscr;
81 __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
82 // Add 64 bit support
83 #elif defined(__aarch64__)
84 unsigned fpscr;
85 __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
86 *mode = fpscr;
87 __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
88 #else
89 #error DisableFTZ needs an implentation
90 #endif
91 }
92
93 // Restore the reference hardware to floating point state indicated by *mode
RestoreFPState(FPU_mode_type * mode)94 static inline void RestoreFPState(FPU_mode_type *mode)
95 {
96 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
97 || defined(__MINGW32__)
98 _mm_setcsr(*mode);
99 #elif defined(__PPC__)
100 fpu_control = *mode;
101 #elif defined(__arm__)
102 __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
103 // Add 64 bit support
104 #elif defined(__aarch64__)
105 __asm__ volatile("msr fpcr, %0" ::"r"(*mode));
106 #else
107 #error RestoreFPState needs an implementation
108 #endif
109 }
110 #else
111 #error ForceFTZ and RestoreFPState need implentations
112 #endif
113
114 #endif
115