1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef _fpcontrol_h
17 #define _fpcontrol_h
18 
19 #include <cstdint>
20 
21 // In order to get tests for correctly rounded operations (e.g. multiply) to
22 // work properly we need to be able to set the reference hardware to FTZ mode if
23 // the device hardware is running in that mode.  We have explored all other
24 // options short of writing correctly rounded operations in integer code, and
25 // have found this is the only way to correctly verify operation.
26 //
27 // Non-Apple implementations will need to provide their own implentation for
28 // these features.  If the reference hardware and device are both running in the
29 // same state (either FTZ or IEEE compliant modes) then these functions may be
30 // empty.  If the device is running in non-default rounding mode (e.g. round
31 // toward zero), then these functions should also set the reference device into
32 // that rounding mode.
33 #if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__)              \
34     || defined(__MINGW32__)
35 #ifdef _MSC_VER
36 typedef int FPU_mode_type;
37 #else
38 typedef int64_t FPU_mode_type;
39 #endif
40 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
41     || defined(__MINGW32__)
42 #include <xmmintrin.h>
43 #elif defined(__PPC__)
44 #include <fpu_control.h>
45 extern __thread fpu_control_t fpu_control;
46 #endif
47 // Set the reference hardware floating point unit to FTZ mode
ForceFTZ(FPU_mode_type * mode)48 inline void ForceFTZ(FPU_mode_type *mode)
49 {
50 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
51     || defined(__MINGW32__)
52     *mode = _mm_getcsr();
53     _mm_setcsr(*mode | 0x8040);
54 #elif defined(__PPC__)
55     *mode = fpu_control;
56     fpu_control |= _FPU_MASK_NI;
57 #elif defined(__arm__)
58     unsigned fpscr;
59     __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
60     *mode = fpscr;
61     __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
62     // Add 64 bit support
63 #elif defined(__aarch64__)
64     uint64_t fpscr;
65     __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
66     *mode = fpscr;
67     __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
68 #else
69 #error ForceFTZ needs an implentation
70 #endif
71 }
72 
73 // Disable the denorm flush to zero
DisableFTZ(FPU_mode_type * mode)74 inline void DisableFTZ(FPU_mode_type *mode)
75 {
76 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
77     || defined(__MINGW32__)
78     *mode = _mm_getcsr();
79     _mm_setcsr(*mode & ~0x8040);
80 #elif defined(__PPC__)
81     *mode = fpu_control;
82     fpu_control &= ~_FPU_MASK_NI;
83 #elif defined(__arm__)
84     unsigned fpscr;
85     __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
86     *mode = fpscr;
87     __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
88     // Add 64 bit support
89 #elif defined(__aarch64__)
90     uint64_t fpscr;
91     __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
92     *mode = fpscr;
93     __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
94 #else
95 #error DisableFTZ needs an implentation
96 #endif
97 }
98 
99 // Restore the reference hardware to floating point state indicated by *mode
RestoreFPState(FPU_mode_type * mode)100 inline void RestoreFPState(FPU_mode_type *mode)
101 {
102 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
103     || defined(__MINGW32__)
104     _mm_setcsr(*mode);
105 #elif defined(__PPC__)
106     fpu_control = *mode;
107 #elif defined(__arm__)
108     __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
109     // Add 64 bit support
110 #elif defined(__aarch64__)
111     __asm__ volatile("msr fpcr, %0" ::"r"(*mode));
112 #else
113 #error RestoreFPState needs an implementation
114 #endif
115 }
116 #else
117 #error ForceFTZ and RestoreFPState need implentations
118 #endif
119 
120 #endif
121