• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 #include "harness/conversions.h"
18 #include "harness/stringHelpers.h"
19 #include "harness/typeWrappers.h"
20 
21 #include <limits.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <vector>
27 
28 #include "procs.h"
29 
30 // clang-format off
31 
32 static char extension[128] = { 0 };
33 static char strLoad[128] = { 0 };
34 static char strStore[128] = { 0 };
35 static const char *regLoad = "as_%s%s(src[tid]);\n";
36 static const char *v3Load = "as_%s%s(vload3(tid,(__global %s*)src));\n";
37 static const char *regStore = "dst[tid] = tmp;\n";
38 static const char *v3Store = "vstore3(tmp, tid, (__global %s*)dst);\n";
39 
40 static const char* astype_kernel_pattern[] = {
41 extension,
42 "__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
43 "{\n"
44 "    int tid = get_global_id( 0 );\n",
45 "    %s%s tmp = ", strLoad,
46 "    ", strStore,
47 "}\n"};
48 
49 // clang-format on
50 
test_astype_set(cl_device_id device,cl_context context,cl_command_queue queue,ExplicitType inVecType,ExplicitType outVecType,unsigned int vecSize,unsigned int outVecSize,int numElements)51 int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
52                     unsigned int vecSize, unsigned int outVecSize,
53                     int numElements )
54 {
55     int error;
56 
57     clProgramWrapper program;
58     clKernelWrapper kernel;
59     clMemWrapper streams[ 2 ];
60 
61     size_t threads[ 1 ], localThreads[ 1 ];
62     size_t typeSize = get_explicit_type_size( inVecType );
63     size_t outTypeSize = get_explicit_type_size(outVecType);
64     char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
65     MTdataHolder d(gRandomSeed);
66 
67     std::ostringstream sstr;
68     if (outVecType == kDouble || inVecType == kDouble)
69         sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
70 
71     if (outVecType == kHalf || inVecType == kHalf)
72         sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
73 
74     strcpy(extension, sstr.str().c_str());
75 
76     if (vecSize == 3)
77         std::snprintf(strLoad, sizeof(strLoad), v3Load,
78                       get_explicit_type_name(outVecType), sizeNames[outVecSize],
79                       get_explicit_type_name(inVecType));
80     else
81         std::snprintf(strLoad, sizeof(strLoad), regLoad,
82                       get_explicit_type_name(outVecType),
83                       sizeNames[outVecSize]);
84 
85     if (outVecSize == 3)
86         std::snprintf(strStore, sizeof(strStore), v3Store,
87                       get_explicit_type_name(outVecType));
88     else
89         std::snprintf(strStore, sizeof(strStore), "%s", regStore);
90 
91     auto str =
92         concat_kernel(astype_kernel_pattern,
93                       sizeof(astype_kernel_pattern) / sizeof(const char *));
94     std::string kernelSource =
95         str_sprintf(str, get_explicit_type_name(inVecType), sizeNames[vecSize],
96                     get_explicit_type_name(outVecType), sizeNames[outVecSize],
97                     get_explicit_type_name(outVecType), sizeNames[outVecSize]);
98 
99     const char *ptr = kernelSource.c_str();
100     error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
101     test_error( error, "Unable to create testing kernel" );
102 
103     // Create some input values
104     size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
105     std::vector<char> inBuffer(inBufferSize);
106     size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
107     std::vector<char> outBuffer(outBufferSize);
108 
109     generate_random_data(inVecType, numElements * vecSize, d,
110                          &inBuffer.front());
111 
112     // Create I/O streams and set arguments
113     streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
114                                 &inBuffer.front(), &error);
115     test_error( error, "Unable to create I/O stream" );
116     streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
117     test_error( error, "Unable to create I/O stream" );
118 
119     error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
120     test_error( error, "Unable to set kernel argument" );
121     error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
122     test_error( error, "Unable to set kernel argument" );
123 
124 
125     // Run the kernel
126     threads[ 0 ] = numElements;
127     error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
128     test_error( error, "Unable to get group size to run with" );
129 
130     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
131     test_error( error, "Unable to run kernel" );
132 
133     // Get the results and compare
134     // The beauty is that astype is supposed to return the bit pattern as a different type, which means
135     // the output should have the exact same bit pattern as the input. No interpretation necessary!
136     error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize,
137                                 &outBuffer.front(), 0, NULL, NULL);
138     test_error( error, "Unable to read results" );
139 
140     char *expected = &inBuffer.front();
141     char *actual = &outBuffer.front();
142     size_t compSize = typeSize*vecSize;
143     if(outTypeSize*outVecSize < compSize) {
144         compSize = outTypeSize*outVecSize;
145     }
146 
147     if(outVecSize == 4 && vecSize == 3)
148     {
149         // as_type4(vec3) should compile but produce undefined results??
150         return 0;
151     }
152 
153     if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
154     {
155         // as_typen(vecm) should compile and run but produce
156         // implementation-defined results for m != n
157         // and n*sizeof(type) = sizeof(vecm)
158         return 0;
159     }
160 
161     for( int i = 0; i < numElements; i++ )
162     {
163         if( memcmp( expected, actual, compSize ) != 0 )
164         {
165             char expectedString[ 1024 ], actualString[ 1024 ];
166             log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
167                       (int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
168                       GetDataVectorString( expected, typeSize, vecSize, expectedString ),
169                       GetDataVectorString( actual, typeSize, vecSize, actualString ) );
170             log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
171                       kernelSource.c_str(), (int)threads[0],
172                       (int)localThreads[0]);
173             return 1;
174         }
175         expected += typeSize * vecSize;
176         actual += outTypeSize * outVecSize;
177     }
178 
179     return 0;
180 }
181 
test_astype(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)182 int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
183 {
184     // Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
185     // legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
186     // for us to verify what is "valid". So the only thing we can test are types that match in size independent
187     // of the element count (char -> uchar, etc)
188     const std::vector<ExplicitType> vecTypes = { kChar,   kUChar, kShort,
189                                                  kUShort, kInt,   kUInt,
190                                                  kLong,   kULong, kFloat,
191                                                  kHalf,   kDouble };
192     const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
193     unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
194     size_t inTypeSize, outTypeSize;
195     int error = 0;
196 
197     bool fp16Support = is_extension_available(device, "cl_khr_fp16");
198     bool fp64Support = is_extension_available(device, "cl_khr_fp64");
199 
200     auto skip_type = [&](ExplicitType et) {
201         if ((et == kLong || et == kULong) && !gHasLong)
202             return true;
203         else if (et == kDouble && !fp64Support)
204             return true;
205         else if (et == kHalf && !fp16Support)
206             return true;
207         return false;
208     };
209 
210     for (inTypeIdx = 0; inTypeIdx < vecTypes.size(); inTypeIdx++)
211     {
212         inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
213 
214         if (skip_type(vecTypes[inTypeIdx])) continue;
215 
216         for (outTypeIdx = 0; outTypeIdx < vecTypes.size(); outTypeIdx++)
217         {
218             outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
219 
220             if (skip_type(vecTypes[outTypeIdx])) continue;
221 
222             // change this check
223             if( inTypeIdx == outTypeIdx ) {
224                 continue;
225             }
226 
227             log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
228             fflush( stdout );
229 
230             for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
231             {
232                 for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
233                 {
234                     if(vecSizes[sizeIdx]*inTypeSize !=
235                        vecSizes[outSizeIdx]*outTypeSize )
236                     {
237                         continue;
238                     }
239                     error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
240                 }
241             }
242             if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
243                get_explicit_type_size(vecTypes[outTypeIdx])) {
244                 // as_type3(vec4) allowed, as_type4(vec3) not allowed
245                 error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
246                 error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
247             }
248 
249         }
250     }
251     return error;
252 }
253 
254 
255