• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef UTILITY_H
17 #define UTILITY_H
18 
19 #include "harness/compat.h"
20 
21 #ifdef __APPLE__
22 #include <OpenCL/opencl.h>
23 #else
24 #include <CL/opencl.h>
25 #endif
26 #include <stdio.h>
27 #include "harness/rounding_mode.h"
28 #include "harness/fpcontrol.h"
29 #include "harness/testHarness.h"
30 #include "harness/ThreadPool.h"
31 #define BUFFER_SIZE         (1024*1024*2)
32 
33 #if defined( __GNUC__ )
34     #define UNUSED  __attribute__ ((unused))
35 #else
36     #define UNUSED
37 #endif
38 
39 extern int gWimpyBufferSize;
40 extern int gWimpyReductionFactor;
41 
42 #define VECTOR_SIZE_COUNT   6
43 extern const char *sizeNames[VECTOR_SIZE_COUNT];
44 extern const int   sizeValues[VECTOR_SIZE_COUNT];
45 
46 extern cl_device_id     gDevice;
47 extern cl_context       gContext;
48 extern cl_command_queue gQueue;
49 extern void             *gIn;
50 extern void             *gIn2;
51 extern void             *gIn3;
52 extern void             *gOut_Ref;
53 extern void             *gOut_Ref2;
54 extern void             *gOut[VECTOR_SIZE_COUNT];
55 extern void             *gOut2[VECTOR_SIZE_COUNT];
56 extern cl_mem           gInBuffer;
57 extern cl_mem           gInBuffer2;
58 extern cl_mem           gInBuffer3;
59 extern cl_mem           gOutBuffer[VECTOR_SIZE_COUNT];
60 extern cl_mem           gOutBuffer2[VECTOR_SIZE_COUNT];
61 extern uint32_t         gComputeDevices;
62 extern uint32_t         gSimdSize;
63 extern int              gSkipCorrectnessTesting;
64 extern int              gMeasureTimes;
65 extern int              gReportAverageTimes;
66 extern int              gForceFTZ;
67 extern volatile int     gTestFastRelaxed;
68 extern int              gFastRelaxedDerived;
69 extern int              gWimpyMode;
70 extern int              gHasDouble;
71 extern int              gIsInRTZMode;
72 extern int              gInfNanSupport;
73 extern int              gIsEmbedded;
74 extern int              gVerboseBruteForce;
75 extern uint32_t         gMaxVectorSizeIndex;
76 extern uint32_t         gMinVectorSizeIndex;
77 extern uint32_t         gDeviceFrequency;
78 extern cl_device_fp_config gFloatCapabilities;
79 extern cl_device_fp_config gDoubleCapabilities;
80 
81 #define LOWER_IS_BETTER     0
82 #define HIGHER_IS_BETTER    1
83 
84 #include "harness/errorHelpers.h"
85 
86 #if defined (_MSC_VER )
87     //Deal with missing scalbn on windows
88     #define scalbnf( _a, _i )       ldexpf( _a, _i )
89     #define scalbn( _a, _i )        ldexp( _a, _i )
90     #define scalbnl( _a, _i )       ldexpl( _a, _i )
91 #endif
92 
93 float Abs_Error( float test, double reference );
94 float Ulp_Error( float test, double reference );
95 //float Ulp_Error_Half( float test, double reference );
96 float Bruteforce_Ulp_Error_Double( double test, long double reference );
97 
98 uint64_t GetTime( void );
99 double SubtractTime( uint64_t endTime, uint64_t startTime );
100 int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p );
101 int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p );
102 
103 // used to convert a bucket of bits into a search pattern through double
104 static inline double DoubleFromUInt32( uint32_t bits );
DoubleFromUInt32(uint32_t bits)105 static inline double DoubleFromUInt32( uint32_t bits )
106 {
107     union{ uint64_t u; double d;} u;
108 
109     // split 0x89abcdef to 0x89abc00000000def
110     u.u = bits & 0xfffU;
111     u.u |= (uint64_t) (bits & ~0xfffU) << 32;
112 
113     // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
114     u.u -= (bits & 0x800U) << 1;
115 
116     // return result
117     return u.d;
118 }
119 
120 void _LogBuildError( cl_program p, int line, const char *file );
121 #define LogBuildError( program )        _LogBuildError( program, __LINE__, __FILE__ )
122 
123 #define PERF_LOOP_COUNT 100
124 
125 // Note: though this takes a double, this is for use with single precision tests
IsFloatSubnormal(double x)126 static inline int IsFloatSubnormal( double x )
127 {
128 #if 2 == FLT_RADIX
129     // Do this in integer to avoid problems with FTZ behavior
130     union{ float d; uint32_t u;}u;
131     u.d = fabsf((float)x);
132     return (u.u-1) < 0x007fffffU;
133 #else
134     // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
135     return fabs(x) < (double) FLT_MIN && x != 0.0;
136 #endif
137 }
138 
139 
IsDoubleSubnormal(long double x)140 static inline int IsDoubleSubnormal( long double x )
141 {
142 #if 2 == FLT_RADIX
143     // Do this in integer to avoid problems with FTZ behavior
144     union{ double d; uint64_t u;}u;
145     u.d = fabs((double) x);
146     return (u.u-1) < 0x000fffffffffffffULL;
147 #else
148     // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
149     return fabs(x) < (double) DBL_MIN && x != 0.0;
150 #endif
151 }
152 
153 //The spec is fairly clear that we may enforce a hard cutoff to prevent premature flushing to zero.
154 // However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + ulp_limit to be flushed to zero.
IsFloatResultSubnormal(double x,float ulps)155 static inline int IsFloatResultSubnormal( double x, float ulps )
156 {
157     x = fabs(x) - MAKE_HEX_DOUBLE( 0x1.0p-149, 0x1, -149) * (double) ulps;
158     return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
159 }
160 
IsFloatResultSubnormalAbsError(double x,float abs_err)161 static inline int IsFloatResultSubnormalAbsError( double x , float abs_err)
162 {
163   x = x - abs_err;
164   return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
165 }
166 
IsDoubleResultSubnormal(long double x,float ulps)167 static inline int IsDoubleResultSubnormal( long double x, float ulps )
168 {
169     x = fabsl(x) - MAKE_HEX_LONG( 0x1.0p-1074, 0x1, -1074) * (long double) ulps;
170     return x < MAKE_HEX_LONG( 0x1.0p-1022, 0x1, -1022 );
171 }
172 
IsFloatInfinity(double x)173 static inline int IsFloatInfinity(double x)
174 {
175   union { cl_float d; cl_uint u; } u;
176   u.d = (cl_float) x;
177   return ((u.u & 0x7fffffffU) == 0x7F800000U);
178 }
179 
IsFloatMaxFloat(double x)180 static inline int IsFloatMaxFloat(double x)
181 {
182   union { cl_float d; cl_uint u; } u;
183   u.d = (cl_float) x;
184   return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
185 }
186 
IsFloatNaN(double x)187 static inline int IsFloatNaN(double x)
188 {
189   union { cl_float d; cl_uint u; } u;
190   u.d = (cl_float) x;
191   return ((u.u & 0x7fffffffU) > 0x7F800000U);
192 }
193 
194 extern cl_uint RoundUpToNextPowerOfTwo( cl_uint x );
195 
196 // Windows (since long double got deprecated) sets the x87 to 53-bit precision
197 // (that's x87 default state).  This causes problems with the tests that
198 // convert long and ulong to float and double or otherwise deal with values
199 // that need more precision than 53-bit. So, set the x87 to 64-bit precision.
Force64BitFPUPrecision(void)200 static inline void Force64BitFPUPrecision(void)
201 {
202 #if __MINGW32__
203     // The usual method is to use _controlfp as follows:
204     //     #include <float.h>
205     //     _controlfp(_PC_64, _MCW_PC);
206     //
207     // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
208     // divergent code just use inline assembly which works for both.
209     unsigned short int orig_cw = 0;
210     unsigned short int new_cw = 0;
211     __asm__ __volatile__ ("fstcw %0":"=m" (orig_cw));
212     new_cw = orig_cw | 0x0300;   // set precision to 64-bit
213     __asm__ __volatile__ ("fldcw  %0"::"m" (new_cw));
214 #elif defined( _WIN32 ) && defined( __INTEL_COMPILER )
215     // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not* work on win.x64:
216     // > On the x64 architecture, changing the floating point precision is not supported.
217     // (Taken from http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
218     int cw;
219     __asm { fnstcw cw };    // Get current value of FPU control word.
220     cw = cw & 0xfffffcff | ( 3 << 8 ); // Set Precision Control to Double Extended Precision.
221     __asm { fldcw cw };     // Set new value of FPU control word.
222 #else
223     /* Implement for other platforms if needed */
224 #endif
225 }
226 
227 extern
228 void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
229 
230 typedef union
231 {
232     int32_t i;
233     float   f;
234 }int32f_t;
235 
236 typedef union
237 {
238     int64_t l;
239     double  d;
240 }int64d_t;
241 
242 void MulD(double *rhi, double *rlo, double u, double v);
243 void AddD(double *rhi, double *rlo, double a, double b);
244 void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
245 void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
246 void DivideDD(double *chi, double *clo, double a, double b);
247 int compareFloats(float x, float y);
248 int compareDoubles(double x, double y);
249 
250 void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed);
251 
252 #endif /* UTILITY_H */
253 
254 
255