1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 #include "harness/conversions.h"
18 #include "harness/stringHelpers.h"
19 #include "harness/typeWrappers.h"
20
21 #include <limits.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <vector>
27
28 #include "procs.h"
29
30 // clang-format off
31
32 static char extension[128] = { 0 };
33 static char strLoad[128] = { 0 };
34 static char strStore[128] = { 0 };
35 static const char *regLoad = "as_%s%s(src[tid]);\n";
36 static const char *v3Load = "as_%s%s(vload3(tid,(__global %s*)src));\n";
37 static const char *regStore = "dst[tid] = tmp;\n";
38 static const char *v3Store = "vstore3(tmp, tid, (__global %s*)dst);\n";
39
40 static const char* astype_kernel_pattern[] = {
41 extension,
42 "__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
43 "{\n"
44 " int tid = get_global_id( 0 );\n",
45 " %s%s tmp = ", strLoad,
46 " ", strStore,
47 "}\n"};
48
49 // clang-format on
50
test_astype_set(cl_device_id device,cl_context context,cl_command_queue queue,ExplicitType inVecType,ExplicitType outVecType,unsigned int vecSize,unsigned int outVecSize,int numElements)51 int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
52 unsigned int vecSize, unsigned int outVecSize,
53 int numElements )
54 {
55 int error;
56
57 clProgramWrapper program;
58 clKernelWrapper kernel;
59 clMemWrapper streams[ 2 ];
60
61 size_t threads[ 1 ], localThreads[ 1 ];
62 size_t typeSize = get_explicit_type_size( inVecType );
63 size_t outTypeSize = get_explicit_type_size(outVecType);
64 char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
65 MTdataHolder d(gRandomSeed);
66
67 std::ostringstream sstr;
68 if (outVecType == kDouble || inVecType == kDouble)
69 sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
70
71 if (outVecType == kHalf || inVecType == kHalf)
72 sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
73
74 strcpy(extension, sstr.str().c_str());
75
76 if (vecSize == 3)
77 std::snprintf(strLoad, sizeof(strLoad), v3Load,
78 get_explicit_type_name(outVecType), sizeNames[outVecSize],
79 get_explicit_type_name(inVecType));
80 else
81 std::snprintf(strLoad, sizeof(strLoad), regLoad,
82 get_explicit_type_name(outVecType),
83 sizeNames[outVecSize]);
84
85 if (outVecSize == 3)
86 std::snprintf(strStore, sizeof(strStore), v3Store,
87 get_explicit_type_name(outVecType));
88 else
89 std::snprintf(strStore, sizeof(strStore), "%s", regStore);
90
91 auto str =
92 concat_kernel(astype_kernel_pattern,
93 sizeof(astype_kernel_pattern) / sizeof(const char *));
94 std::string kernelSource =
95 str_sprintf(str, get_explicit_type_name(inVecType), sizeNames[vecSize],
96 get_explicit_type_name(outVecType), sizeNames[outVecSize],
97 get_explicit_type_name(outVecType), sizeNames[outVecSize]);
98
99 const char *ptr = kernelSource.c_str();
100 error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
101 test_error( error, "Unable to create testing kernel" );
102
103 // Create some input values
104 size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
105 std::vector<char> inBuffer(inBufferSize);
106 size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
107 std::vector<char> outBuffer(outBufferSize);
108
109 generate_random_data(inVecType, numElements * vecSize, d,
110 &inBuffer.front());
111
112 // Create I/O streams and set arguments
113 streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
114 &inBuffer.front(), &error);
115 test_error( error, "Unable to create I/O stream" );
116 streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
117 test_error( error, "Unable to create I/O stream" );
118
119 error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
120 test_error( error, "Unable to set kernel argument" );
121 error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
122 test_error( error, "Unable to set kernel argument" );
123
124
125 // Run the kernel
126 threads[ 0 ] = numElements;
127 error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
128 test_error( error, "Unable to get group size to run with" );
129
130 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
131 test_error( error, "Unable to run kernel" );
132
133 // Get the results and compare
134 // The beauty is that astype is supposed to return the bit pattern as a different type, which means
135 // the output should have the exact same bit pattern as the input. No interpretation necessary!
136 error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize,
137 &outBuffer.front(), 0, NULL, NULL);
138 test_error( error, "Unable to read results" );
139
140 char *expected = &inBuffer.front();
141 char *actual = &outBuffer.front();
142 size_t compSize = typeSize*vecSize;
143 if(outTypeSize*outVecSize < compSize) {
144 compSize = outTypeSize*outVecSize;
145 }
146
147 if(outVecSize == 4 && vecSize == 3)
148 {
149 // as_type4(vec3) should compile but produce undefined results??
150 return 0;
151 }
152
153 if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
154 {
155 // as_typen(vecm) should compile and run but produce
156 // implementation-defined results for m != n
157 // and n*sizeof(type) = sizeof(vecm)
158 return 0;
159 }
160
161 for( int i = 0; i < numElements; i++ )
162 {
163 if( memcmp( expected, actual, compSize ) != 0 )
164 {
165 char expectedString[ 1024 ], actualString[ 1024 ];
166 log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
167 (int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
168 GetDataVectorString( expected, typeSize, vecSize, expectedString ),
169 GetDataVectorString( actual, typeSize, vecSize, actualString ) );
170 log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
171 kernelSource.c_str(), (int)threads[0],
172 (int)localThreads[0]);
173 return 1;
174 }
175 expected += typeSize * vecSize;
176 actual += outTypeSize * outVecSize;
177 }
178
179 return 0;
180 }
181
test_astype(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)182 int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
183 {
184 // Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
185 // legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
186 // for us to verify what is "valid". So the only thing we can test are types that match in size independent
187 // of the element count (char -> uchar, etc)
188 const std::vector<ExplicitType> vecTypes = { kChar, kUChar, kShort,
189 kUShort, kInt, kUInt,
190 kLong, kULong, kFloat,
191 kHalf, kDouble };
192 const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
193 unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
194 size_t inTypeSize, outTypeSize;
195 int error = 0;
196
197 bool fp16Support = is_extension_available(device, "cl_khr_fp16");
198 bool fp64Support = is_extension_available(device, "cl_khr_fp64");
199
200 auto skip_type = [&](ExplicitType et) {
201 if ((et == kLong || et == kULong) && !gHasLong)
202 return true;
203 else if (et == kDouble && !fp64Support)
204 return true;
205 else if (et == kHalf && !fp16Support)
206 return true;
207 return false;
208 };
209
210 for (inTypeIdx = 0; inTypeIdx < vecTypes.size(); inTypeIdx++)
211 {
212 inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
213
214 if (skip_type(vecTypes[inTypeIdx])) continue;
215
216 for (outTypeIdx = 0; outTypeIdx < vecTypes.size(); outTypeIdx++)
217 {
218 outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
219
220 if (skip_type(vecTypes[outTypeIdx])) continue;
221
222 // change this check
223 if( inTypeIdx == outTypeIdx ) {
224 continue;
225 }
226
227 log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
228 fflush( stdout );
229
230 for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
231 {
232 for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
233 {
234 if(vecSizes[sizeIdx]*inTypeSize !=
235 vecSizes[outSizeIdx]*outTypeSize )
236 {
237 continue;
238 }
239 error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
240 }
241 }
242 if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
243 get_explicit_type_size(vecTypes[outTypeIdx])) {
244 // as_type3(vec4) allowed, as_type4(vec3) not allowed
245 error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
246 error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
247 }
248
249 }
250 }
251 return error;
252 }
253
254
255