1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef CL_UTILS_H
17 #define CL_UTILS_H
18
19 #include "harness/testHarness.h"
20 #include "harness/compat.h"
21
22 #include <stdio.h>
23
24 #if !defined(_WIN32)
25 #include <sys/param.h>
26 #endif
27
28
29 #ifdef __MINGW32__
30 #define __mingw_printf printf
31 #endif
32 #include "harness/errorHelpers.h"
33
34 #include "harness/ThreadPool.h"
35
36
37
38 #include "test_config.h"
39
40 #ifdef __APPLE__
41 #include <OpenCL/opencl.h>
42 #else
43 #include <CL/opencl.h>
44 #endif
45
46 extern void *gIn_half;
47 extern void *gOut_half;
48 extern void *gOut_half_reference;
49 extern void *gOut_half_reference_double;
50 extern void *gIn_single;
51 extern void *gOut_single;
52 extern void *gOut_single_reference;
53 extern void *gIn_double;
54 // extern void *gOut_double;
55 // extern void *gOut_double_reference;
56 extern cl_mem gInBuffer_half;
57 extern cl_mem gOutBuffer_half;
58 extern cl_mem gInBuffer_single;
59 extern cl_mem gOutBuffer_single;
60 extern cl_mem gInBuffer_double;
61 // extern cl_mem gOutBuffer_double;
62
63 extern cl_context gContext;
64 extern cl_command_queue gQueue;
65 extern uint32_t gDeviceFrequency;
66 extern uint32_t gComputeDevices;
67 extern size_t gMaxThreadGroupSize;
68 extern size_t gWorkGroupSize;
69 extern int gTestDouble;
70 extern int gReportTimes;
71
72 // gWimpyMode indicates if we run the test in wimpy mode where we limit the
73 // size of 32 bit ranges to a much smaller set. This is meant to be used
74 // as a smoke test
75 extern bool gWimpyMode;
76 extern int gWimpyReductionFactor;
77
78 uint64_t ReadTime( void );
79 double SubtractTime( uint64_t endTime, uint64_t startTime );
80
81 cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned);
82 cl_uint runsOverBy(cl_uint count, int vectorSizeIdx, bool aligned);
83
84 void printSource(const char * src[], int len);
85
86 extern const char *vector_size_name_extensions[kVectorSizeCount+kStrangeVectorSizeCount];
87 extern const char *vector_size_strings[kVectorSizeCount+kStrangeVectorSizeCount];
88 extern const char *align_divisors[kVectorSizeCount+kStrangeVectorSizeCount];
89 extern const char *align_types[kVectorSizeCount+kStrangeVectorSizeCount];
90
91 test_status InitCL( cl_device_id device );
92 void ReleaseCL( void );
93 int RunKernel( cl_device_id device, cl_kernel kernel, void *inBuf, void *outBuf, uint32_t blockCount , int extraArg);
94 cl_program MakeProgram( cl_device_id device, const char *source[], int count );
95
as_float(cl_uint u)96 static inline float as_float(cl_uint u) { union { cl_uint u; float f; }v; v.u = u; return v.f; }
as_double(cl_ulong u)97 static inline double as_double(cl_ulong u) { union { cl_ulong u; double d; }v; v.u = u; return v.d; }
98
99 // used to convert a bucket of bits into a search pattern through double
100 static inline cl_ulong DoubleFromUInt( cl_uint bits );
DoubleFromUInt(cl_uint bits)101 static inline cl_ulong DoubleFromUInt( cl_uint bits )
102 {
103 // split 0x89abcdef to 0x89abcd00000000ef
104 cl_ulong u = ((cl_ulong)(bits & ~0xffU) << 32) | ((cl_ulong)(bits & 0xffU));
105
106 // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
107 u -= (cl_ulong)((bits & 0x80U) << 1);
108
109 return u;
110 }
111
IsHalfSubnormal(uint16_t x)112 static inline int IsHalfSubnormal( uint16_t x )
113 {
114 // this relies on interger overflow to exclude 0 as a subnormal
115 return ( ( x & 0x7fffU ) - 1U ) < 0x03ffU;
116 }
117
118 // prevent silent failures due to missing FLT_RADIX
119 #ifndef FLT_RADIX
120 #error FLT_RADIX is not defined by float.h
121 #endif
122
IsFloatSubnormal(double x)123 static inline int IsFloatSubnormal( double x )
124 {
125 #if 2 == FLT_RADIX
126 // Do this in integer to avoid problems with FTZ behavior
127 union{ float d; uint32_t u;}u;
128 u.d = fabsf((float) x);
129 return (u.u-1) < 0x007fffffU;
130 #else
131 // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
132 return fabs(x) < (double) FLT_MIN && x != 0.0;
133 #endif
134 }
135
IsDoubleSubnormal(long double x)136 static inline int IsDoubleSubnormal( long double x )
137 {
138 #if 2 == FLT_RADIX
139 // Do this in integer to avoid problems with FTZ behavior
140 union{ double d; uint64_t u;}u;
141 u.d = fabs((double)x);
142 return (u.u-1) < 0x000fffffffffffffULL;
143 #else
144 // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
145 return fabs(x) < (double) DBL_MIN && x != 0.0;
146 #endif
147 }
148
149 #endif /* CL_UTILS_H */
150
151
152
153