• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "harness/conversions.h"
18 #include "harness/typeWrappers.h"
19 #include "harness/errorHelpers.h"
20 
21 
22 
23 
24 #define DEBUG 0
25 #define DEPTH 16
26 // Limit the maximum code size for any given kernel.
27 #define MAX_CODE_SIZE (1024*32)
28 
29 const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1};
30 const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"};
31 
32 // Creates a kernel by enumerating all possible ways of building the vector out of vloads
33 // skip_to_results will skip results up to a given number. If the amount of code generated
34 // is greater than MAX_CODE_SIZE, this function will return the number of results used,
35 // which can then be used as the skip_to_result value to continue where it left off.
create_kernel(ExplicitType type,int output_size,char * program,int * number_of_results,int skip_to_result)36 int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) {
37 
38     int number_of_sizes;
39 
40     switch (output_size) {
41         case 1:
42             number_of_sizes = 1;
43             break;
44         case 2:
45             number_of_sizes = 2;
46             break;
47         case 3:
48             number_of_sizes = 3;
49             break;
50         case 4:
51             number_of_sizes = 4;
52             break;
53         case 8:
54             number_of_sizes = 5;
55             break;
56         case 16:
57             number_of_sizes = 6;
58             break;
59         default:
60             log_error("Invalid size: %d\n", output_size);
61             return -1;
62     }
63 
64     int total_results = 0;
65     int current_result = 0;
66     int total_vloads = 0;
67     int total_program_length = 0;
68     int aborted_due_to_size = 0;
69 
70     if (skip_to_result < 0)
71         skip_to_result = 0;
72 
73     // The line of code for the vector creation
74     char line[1024];
75     // Keep track of what size vector we are using in each position so we can iterate through all fo them
76     int pos[DEPTH];
77     int max_size = output_size;
78     if (DEBUG > 1) log_info("max_size: %d\n", max_size);
79 
80     program[0] = '\0';
81     sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n",
82             type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
83             get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]);
84     total_program_length += (int)strlen(program);
85 
86     char storePrefix[ 128 ], storeSuffix[ 128 ];
87 
88     // Start out trying sizes 1,1,1,1,1...
89     for (int i=0; i<DEPTH; i++)
90         pos[i] = 0;
91 
92     int done = 0;
93     while (!done) {
94         if (DEBUG > 1) {
95             log_info("pos size[] = [");
96             for (int k=0; k<DEPTH; k++)
97                 log_info(" %d ", pos[k]);
98             log_info("]\n");
99         }
100 
101         // Go through the selected vector sizes and see if the first n of them fit the
102         //  required size exactly.
103         int size_so_far = 0;
104         int vloads;
105         for ( vloads=0; vloads<DEPTH; vloads++) {
106             if (size_so_far + sizes[pos[vloads]] <= max_size) {
107                 size_so_far += sizes[pos[vloads]];
108             } else {
109                 break;
110             }
111         }
112         if (DEBUG > 1)  log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far);
113 
114         // If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations
115         //  of the sizes to the right. Prune them from the search.
116         if (size_so_far != max_size) {
117             // Zero all the sizes to the right
118             for (int k=vloads+1; k<DEPTH; k++) {
119                 pos[k] = 0;
120             }
121             // Increment this current size and propagate the values up if needed
122             for (int d=vloads; d>=0; d--) {
123                 pos[d]++;
124                 if (pos[d] >= number_of_sizes) {
125                     pos[d] = 0;
126                     if (d == 0) {
127                         // If we rolled over then we are done
128                         done = 1;
129                         break;
130                     }
131                 } else {
132                     break;
133                 }
134             }
135             // Go on to the next size since this one (and all others "under" it) didn't fit
136             continue;
137         }
138 
139 
140         // Generate the actual load line if we are building this part
141         line[0]= '\0';
142         if (skip_to_result == 0 || total_results >= skip_to_result) {
143             if( number_of_sizes == 3 )
144             {
145                 sprintf( storePrefix, "vstore3( " );
146                 sprintf( storeSuffix, ", %d, result )", current_result );
147             }
148             else
149             {
150                 sprintf( storePrefix, "result[%d] = ", current_result );
151                 storeSuffix[ 0 ] = 0;
152             }
153 
154             sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size);
155             current_result++;
156 
157             int offset = 0;
158             for (int i=0; i<vloads; i++) {
159                 if (pos[i] == 0)
160                     sprintf(line + strlen(line), "src[%d]", offset);
161                 else
162                     sprintf(line + strlen(line), "vload%s(0,src+%d)", size_names[pos[i]], offset);
163                 offset += sizes[pos[i]];
164                 if (i<(vloads-1))
165                     sprintf(line + strlen(line), ",");
166             }
167             sprintf(line + strlen(line), ")%s;\n", storeSuffix);
168 
169             strcat(program, line);
170             total_vloads += vloads;
171         }
172         total_results++;
173         total_program_length += (int)strlen(line);
174         if (total_program_length > MAX_CODE_SIZE) {
175             aborted_due_to_size = 1;
176             done = 1;
177         }
178 
179 
180         if (DEBUG) log_info("line is: %s", line);
181 
182         // If we did not use all of them, then we ignore any changes further to the right.
183         // We do this by causing those loops to skip on the next iteration.
184         if (vloads < DEPTH) {
185             if (DEBUG > 1) log_info("done with this depth\n");
186             for (int k=vloads; k<DEPTH; k++)
187                 pos[k] = number_of_sizes;
188         }
189 
190         // Increment the far right size by 1, rolling over as needed
191         for (int d=DEPTH-1; d>=0; d--) {
192             pos[d]++;
193             if (pos[d] >= number_of_sizes) {
194                 pos[d] = 0;
195                 if (d == 0) {
196                     // If we rolled over at the far-left then we are done
197                     done = 1;
198                     break;
199                 }
200             } else {
201                 break;
202             }
203         }
204         if (done)
205             break;
206 
207         // Continue until we are done.
208     }
209     strcat(program, "}\n\n"); //log_info("%s\n", program);
210     total_program_length += 3;
211     if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n",
212                         get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads);
213     *number_of_results = current_result;
214     if (aborted_due_to_size)
215         return total_results;
216     return 0;
217 }
218 
219 
220 
221 
test_vector_creation(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)222 int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
223 {
224     ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
225     unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16};
226 
227     char *program_source;
228     int error;
229     int total_errors = 0;
230 
231     cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
232     cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
233     void *input_data_converted;
234     void *output_data;
235 
236     int number_of_results;;
237 
238     input_data_converted = malloc(sizeof(cl_double)*16);
239     program_source = (char*)malloc(sizeof(char)*1024*1024*4);
240 
241     // Iterate over all the types
242     for (int type_index=0; type_index<10; type_index++) {
243     if(!gHasLong && ((vecType[type_index] == kLong)  || (vecType[type_index] == kULong)))
244     {
245       log_info("Long/ULong data type not supported on this device\n");
246       continue;
247     }
248 
249         clMemWrapper input;
250 
251         if (vecType[type_index] == kDouble) {
252             if (!is_extension_available(deviceID, "cl_khr_fp64")) {
253                 log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
254                 continue;
255             }
256             log_info("Testing doubles.\n");
257         }
258 
259         // Convert the data to the right format for the test.
260         memset(input_data_converted, 0xff, sizeof(cl_double)*16);
261         if (vecType[type_index] != kDouble) {
262             for (int j=0; j<16; j++) {
263                 convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j,
264                                        kInt, 0, kRoundToEven, vecType[type_index]);
265             }
266         } else {
267             memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16);
268         }
269 
270         input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16,
271                                (vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error);
272         if (error) {
273             print_error(error, "clCreateBuffer failed");
274             total_errors++;
275             continue;
276         }
277 
278         // Iterate over all the vector sizes.
279         for (int size_index=1; size_index< 5; size_index++) {
280             size_t global[] = {1,1,1};
281             int number_generated = -1;
282             int previous_number_generated = 0;
283 
284             log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]);
285             while (number_generated != 0) {
286                 clMemWrapper output;
287                 clKernelWrapper kernel;
288                 clProgramWrapper program;
289 
290                 number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated);
291                 if (number_generated != 0) {
292                     if (previous_number_generated == 0)
293                         log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0);
294                     log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1);
295                 }
296 
297                 error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation");
298                 if (error) {
299                     log_error("create_single_kernel_helper failed.\n");
300                     total_errors++;
301                     break;
302                 }
303 
304                 output = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
305                                         number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
306                                         NULL, &error);
307                 if (error) {
308                     print_error(error, "clCreateBuffer failed");
309                     total_errors++;
310                     break;
311                 }
312 
313                 error = clSetKernelArg(kernel, 0, sizeof(input), &input);
314                 error |= clSetKernelArg(kernel, 1, sizeof(output), &output);
315                 if (error) {
316                     print_error(error, "clSetKernelArg failed");
317                     total_errors++;
318                     break;
319                 }
320 
321                 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
322                 if (error) {
323                     print_error(error, "clEnqueueNDRangeKernel failed");
324                     total_errors++;
325                     break;
326                 }
327 
328                 error = clFinish(queue);
329                 if (error) {
330                     print_error(error, "clFinish failed");
331                     total_errors++;
332                     break;
333                 }
334 
335                 output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
336                 if (output_data == NULL) {
337                     log_error("Failed to allocate memory for output data.\n");
338                     total_errors++;
339                     break;
340                 }
341                 memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
342                 error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,
343                                             number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
344                                             output_data, 0, NULL, NULL);
345                 if (error) {
346                     print_error(error, "clEnqueueReadBuffer failed");
347                     total_errors++;
348                     free(output_data);
349                     break;
350                 }
351 
352                 // Check the results
353                 char *res = (char *)output_data;
354                 char *exp = (char *)input_data_converted;
355                 for (int i=0; i<number_of_results; i++) {
356                     // If they do not match, then print out why
357                     if (memcmp(input_data_converted,
358                                res + i*(get_explicit_type_size(vecType[type_index])*vecSizes[size_index]),
359                                get_explicit_type_size(vecType[type_index])*vecSizes[size_index])
360                         ) {
361                         log_error("Data failed to validate for result %d\n", i);
362 
363                         // Find the line in the program that failed. This is ugly.
364                         char search[32];
365                         char found_line[1024];
366                         found_line[0]='\0';
367                         search[0]='\0';
368                         sprintf(search, "result[%d] = (", i);
369                         char *start_loc = strstr(program_source, search);
370                         if (start_loc == NULL)
371                             log_error("Failed to find program source for failure for %s in \n%s", search, program_source);
372                         else {
373                           char *end_loc = strstr(start_loc, "\n");
374                           memcpy(&found_line, start_loc, (end_loc-start_loc));
375                           found_line[end_loc-start_loc]='\0';
376                           log_error("Failed vector line: %s\n", found_line);
377                         }
378 
379                         for (int j=0; j<(int)vecSizes[size_index]; j++) {
380                             char expected_value[64];
381                             char returned_value[64];
382                             expected_value[0]='\0';
383                             returned_value[0]='\0';
384                             print_type_to_string(vecType[type_index], (void*)(res+get_explicit_type_size(vecType[type_index])*(i*vecSizes[size_index]+j)), returned_value);
385                             print_type_to_string(vecType[type_index], (void*)(exp+get_explicit_type_size(vecType[type_index])*j), expected_value);
386                             log_error("index [%d, component %d]: got: %s expected: %s\n", i, j,
387                                       returned_value, expected_value);
388                         }
389 
390                         total_errors++;
391                     }
392                 }
393                 free(output_data);
394                 previous_number_generated = number_generated;
395             } // number_generated != 0
396 
397         } // vector sizes
398     } // vector types
399 
400     free(input_data_converted);
401     free(program_source);
402 
403     return total_errors;
404 }
405 
406 
407