• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 #define str(s) #s
26 
27 #define __popcnt(x, __T, __n, __r) \
28     { \
29         __T y = x; \
30         __r = 0; \
31         int k; \
32         for(k = 0; k < __n; k++) \
33         { \
34             if(y & (__T)0x1) __r++; \
35             y >>= (__T)1; \
36         } \
37     }
38 
39 #define __verify_popcount_func(__T) \
40     static int verify_popcount_##__T( const void *p, const void *r, size_t n, const char *sizeName, size_t vecSize ) \
41     { \
42         const __T *inA = (const __T *) p; \
43         const __T *outptr = (const __T *) r; \
44         size_t i; \
45         int _n = sizeof(__T)*8; \
46         __T ref; \
47         for(i = 0; i < n; i++) \
48         { \
49             __T x = inA[i]; \
50             __T res = outptr[i]; \
51             __popcnt(x, __T, _n, ref); \
52             if(res != ref) \
53             { \
54                 log_info( "%ld) Failure for popcount( (%s%s) 0x%x ) = *%d vs %d\n", i, str(__T), sizeName, x, (int)ref, (int)res ); \
55                 return -1; \
56             }\
57         } \
58         return 0; \
59     }
60 
61 __verify_popcount_func(cl_char);
62 __verify_popcount_func(cl_uchar);
63 __verify_popcount_func(cl_short);
64 __verify_popcount_func(cl_ushort);
65 __verify_popcount_func(cl_int);
66 __verify_popcount_func(cl_uint);
67 __verify_popcount_func(cl_long);
68 __verify_popcount_func(cl_ulong);
69 
70 typedef int (*verifyFunc)( const void *, const void *, size_t n, const char *sizeName, size_t vecSize);
71 static const verifyFunc verify[] = {   verify_popcount_cl_char, verify_popcount_cl_uchar,
72     verify_popcount_cl_short, verify_popcount_cl_ushort,
73     verify_popcount_cl_int, verify_popcount_cl_uint,
74     verify_popcount_cl_long, verify_popcount_cl_ulong };
75 
76 static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
77 
78 static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
79 static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
80 static const char *vector_param_size_names[] = { "", "2", "", "4", "8", "16" };
81 static const size_t  kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
82 
printSrc(const char * src[],int nSrcStrings)83 static void printSrc(const char *src[], int nSrcStrings) {
84     int i;
85     for(i = 0; i < nSrcStrings; ++i) {
86         log_info("%s", src[i]);
87     }
88 }
89 
test_popcount(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)90 int test_popcount(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
91 {
92     cl_int *input_ptr[1], *output_ptr, *p;
93     int err;
94     int i;
95     cl_uint vectorSize;
96     cl_uint type;
97     MTdata d;
98     int fail_count = 0;
99 
100     size_t length = sizeof(cl_int) * 8 * n_elems;
101 
102     input_ptr[0] = (cl_int*)malloc(length);
103     output_ptr   = (cl_int*)malloc(length);
104 
105     d = init_genrand( gRandomSeed );
106     p = input_ptr[0];
107     for (i=0; i<8 * n_elems; i++)
108         p[i] = genrand_int32(d);
109     free_mtdata(d);  d = NULL;
110 
111     for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
112     {
113         //embedded devices don't support long/ulong so skip over
114         if (! gHasLong && strstr(test_str_names[type],"long"))
115         {
116            log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
117            continue;
118         }
119 
120         verifyFunc f = verify[ type ];
121         // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
122         size_t elementCount = length / kSizes[type];
123         cl_mem streams[2];
124 
125         log_info( "%s", test_str_names[type] );
126         fflush( stdout );
127 
128         // Set up data streams for the type
129         streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
130         if (!streams[0])
131         {
132             log_error("clCreateBuffer failed\n");
133             return -1;
134         }
135         streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
136         if (!streams[1])
137         {
138             log_error("clCreateBuffer failed\n");
139             return -1;
140         }
141 
142         err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
143         if (err != CL_SUCCESS)
144         {
145             log_error("clEnqueueWriteBuffer failed\n");
146             return -1;
147         }
148 
149         for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
150         {
151             cl_program program = NULL;
152             cl_kernel kernel = NULL;
153 
154             const char *source[] = {
155                 "__kernel void test_popcount_", test_str_names[type], vector_size_names[vectorSize],
156                 "(__global ", test_str_names[type], vector_param_size_names[vectorSize],
157                 " *srcA, __global ", test_str_names[type], vector_param_size_names[vectorSize],
158                 " *dst)\n"
159                 "{\n"
160                 "    int  tid = get_global_id(0);\n"
161                 "\n"
162                 "    ", test_str_names[type], vector_size_names[vectorSize], " sA;\n",
163                 "    sA = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcA )" : "srcA[tid]", ";\n",
164                 "    ", test_str_names[type], vector_size_names[vectorSize], " dstVal = popcount(sA);\n"
165                 "     ", ( vector_sizes[ vectorSize ] == 3 ) ? "vstore3( dstVal, tid, dst )" : "dst[ tid ] = dstVal", ";\n",
166                 "}\n" };
167 
168 
169             char kernelName[128];
170             snprintf( kernelName, sizeof( kernelName ), "test_popcount_%s%s", test_str_names[type], vector_size_names[vectorSize] );
171 
172             err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
173 
174             if (err) {
175                 return -1;
176             }
177 
178             err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
179             err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
180             if (err != CL_SUCCESS)
181             {
182                 log_error("clSetKernelArgs failed\n");
183                 return -1;
184             }
185 
186             //Wipe the output buffer clean
187             uint32_t pattern = 0xdeadbeef;
188             memset_pattern4( output_ptr, &pattern, length );
189             err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
190             if (err != CL_SUCCESS)
191             {
192                 log_error("clEnqueueWriteBuffer failed\n");
193                 return -1;
194             }
195 
196             size_t size = elementCount / (vector_sizes[vectorSize]);
197             err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
198             if (err != CL_SUCCESS)
199             {
200                 log_error("clEnqueueNDRangeKernel failed\n");
201                 return -1;
202             }
203 
204             err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
205             if (err != CL_SUCCESS)
206             {
207                 log_error("clEnqueueReadBuffer failed\n");
208                 return -1;
209             }
210 
211             char *inP = (char *)input_ptr[0];
212             char *outP = (char *)output_ptr;
213 
214             for( size_t e = 0; e < size; e++ )
215             {
216                 if( f( inP, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
217                     printSrc(source, sizeof(source)/sizeof(source[0]));
218                     ++fail_count; break; // return -1;
219                 }
220                 inP += kSizes[type] * ( (vector_sizes[vectorSize]) );
221                 outP += kSizes[type] * ( (vector_sizes[vectorSize]) );
222             }
223 
224             clReleaseKernel( kernel );
225             clReleaseProgram( program );
226             log_info( "." );
227             fflush( stdout );
228         }
229 
230         clReleaseMemObject( streams[0] );
231         clReleaseMemObject( streams[1] );
232         log_info( "done\n" );
233     }
234 
235 
236     if(fail_count) {
237         log_info("Failed on %d types\n", fail_count);
238         return -1;
239     }
240 
241     free(input_ptr[0]);
242     free(output_ptr);
243 
244     return err;
245 }
246 
247 
248