1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "allocation_execute.h"
17 #include "allocation_functions.h"
18
19
20 const char *buffer_kernel_pattern = {
21 "__kernel void sample_test(%s __global uint *result, __global %s *array_sizes, uint per_item)\n"
22 "{\n"
23 "\tint tid = get_global_id(0);\n"
24 "\tuint r = 0;\n"
25 "\t%s i;\n"
26 "\tfor(i=(%s)tid*(%s)per_item; i<(%s)(1+tid)*(%s)per_item; i++) {\n"
27 "%s"
28 "\t}\n"
29 "\tresult[tid] = r;\n"
30 "}\n" };
31
32 const char *image_kernel_pattern = {
33 "__kernel void sample_test(%s __global uint *result)\n"
34 "{\n"
35 "\tuint4 color;\n"
36 "\tcolor = (uint4)(0);\n"
37 "%s"
38 "\tint x, y;\n"
39 "%s"
40 "\tresult[get_global_id(0)] += color.x + color.y + color.z + color.w;\n"
41 "}\n" };
42
43 const char *read_pattern = {
44 "\tfor(y=0; y<get_image_height(image%d); y++)\n"
45 "\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
46 "\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
47 "\t\t\t\tcolor += read_imageui(image%d, sampler, (int2)(x,y));\n"
48 "\t\t\t}\n"
49 };
50
51 const char *offset_pattern =
52 "\tconst uint4 offset = (uint4)(0,1,2,3);\n";
53
54 const char *sampler_pattern =
55 "\tconst sampler_t sampler = CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n";
56
57
58 const char *write_pattern = {
59 "\tfor(y=0; y<get_image_height(image%d); y++)\n"
60 "\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
61 "\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
62 "\t\t\t\tcolor = (uint4)x*(uint4)y+offset;\n"
63 "\t\t\t\twrite_imageui(image%d, (int2)(x,y), color);\n"
64 "\t\t\t}\n"
65 "\tbarrier(CLK_LOCAL_MEM_FENCE);\n"
66 };
67
68
check_image(cl_command_queue queue,cl_mem mem)69 int check_image(cl_command_queue queue, cl_mem mem) {
70 int error;
71 cl_mem_object_type type;
72 size_t width, height;
73 size_t origin[3], region[3], x, j;
74 cl_uint *data;
75
76 error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
77 if (error) {
78 print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
79 return -1;
80 }
81
82 if (type == CL_MEM_OBJECT_BUFFER) {
83 log_error("Expected image object, not buffer.\n");
84 return -1;
85 } else if (type == CL_MEM_OBJECT_IMAGE2D) {
86 error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
87 if (error) {
88 print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
89 return -1;
90 }
91 error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
92 if (error) {
93 print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
94 return -1;
95 }
96 }
97
98
99 data = (cl_uint*)malloc(width*4*sizeof(cl_uint));
100 if (data == NULL) {
101 log_error("Failed to malloc host buffer for writing into image.\n");
102 return FAILED_ABORT;
103 }
104 origin[0] = 0;
105 origin[1] = 0;
106 origin[2] = 0;
107 region[0] = width;
108 region[1] = 1;
109 region[2] = 1;
110 for (origin[1] = 0; origin[1] < height; origin[1]++) {
111 error = clEnqueueReadImage(queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
112 if (error) {
113 print_error(error, "clEnqueueReadImage failed");
114 free(data);
115 return error;
116 }
117
118 for (x=0; x<width; x++) {
119 for (j=0; j<4; j++) {
120 if (data[x*4+j] != (cl_uint)(x*origin[1]+j)) {
121 log_error("Pixel %d, %d, component %d, expected %u, got %u.\n",
122 (int)x, (int)origin[1], (int)j, (cl_uint)(x*origin[1]+j), data[x*4+j]);
123 return -1;
124 }
125 }
126 }
127 }
128 free(data);
129 return 0;
130 }
131
132
133 #define NUM_OF_WORK_ITEMS 8192*2
134
execute_kernel(cl_context context,cl_command_queue * queue,cl_device_id device_id,int test,cl_mem mems[],int number_of_mems_used,int verify_checksum)135 int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum) {
136
137 char *argument_string;
138 char *access_string;
139 char *kernel_string;
140 int i, error, result;
141 clKernelWrapper kernel;
142 clProgramWrapper program;
143 clMemWrapper result_mem;
144 char *ptr;
145 size_t global_dims[3];
146 cl_uint per_item;
147 cl_uint per_item_uint;
148 cl_uint returned_results[NUM_OF_WORK_ITEMS], final_result;
149 clEventWrapper event;
150 cl_int event_status;
151
152 // Allocate memory for the kernel source
153 argument_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*64);
154 access_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10));
155 kernel_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10+64)+1024);
156 argument_string[0] = '\0';
157 access_string[0] = '\0';
158 kernel_string[0] = '\0';
159
160 // Zero the results.
161 for (i=0; i<NUM_OF_WORK_ITEMS; i++)
162 returned_results[i] = 0;
163
164 // detect if device supports ulong/int64
165 //detect whether profile of the device is embedded
166 bool support64 = true;
167 char profile[1024] = "";
168 error = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
169 test_error(error, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
170 if ((NULL != strstr(profile, "EMBEDDED_PROFILE")) &&
171 (!is_extension_available(device_id, "cles_khr_int64"))) {
172 support64 = false;
173 }
174
175 // Build the kernel source
176 if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
177 for(i=0; i<number_of_mems_used; i++) {
178 sprintf(argument_string + strlen(argument_string), " __global uint *buffer%d, ", i);
179 sprintf(access_string + strlen( access_string), "\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
180 }
181 char type[10];
182 if (support64) {
183 sprintf(type, "ulong");
184 }
185 else {
186 sprintf(type, "uint");
187 }
188 sprintf(kernel_string, buffer_kernel_pattern, argument_string, type, type, type, type, type, type, access_string);
189 }
190 else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING) {
191 for(i=0; i<number_of_mems_used; i++) {
192 sprintf(argument_string + strlen(argument_string), " read_only image2d_t image%d, ", i);
193 sprintf(access_string + strlen(access_string), read_pattern, i, "%", i, i);
194 }
195 sprintf(kernel_string, image_kernel_pattern, argument_string, sampler_pattern, access_string);
196 }
197 else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING) {
198 for(i=0; i<number_of_mems_used; i++) {
199 sprintf(argument_string + strlen(argument_string), " write_only image2d_t image%d, ", i);
200 sprintf(access_string + strlen( access_string), write_pattern, i, "%", i, i);
201 }
202 sprintf(kernel_string, image_kernel_pattern, argument_string, offset_pattern, access_string);
203 }
204 ptr = kernel_string;
205
206 // Create the kernel
207 error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" );
208
209 free(argument_string);
210 free(access_string);
211 free(kernel_string);
212
213 result = check_allocation_error(context, device_id, error, queue);
214 if (result != SUCCEEDED) {
215 if (result == FAILED_TOO_BIG)
216 log_info("\t\tCreate kernel failed: %s.\n", IGetErrorString(error));
217 else
218 print_error(error, "Create kernel and program failed");
219 return result;
220 }
221
222 // Set the arguments
223 for (i=0; i<number_of_mems_used; i++) {
224 error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mems[i]);
225 test_error(error, "clSetKernelArg failed");
226 }
227
228 // Set the result
229 result_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, &error);
230 test_error(error, "clCreateBuffer failed");
231 error = clSetKernelArg(kernel, i, sizeof(result_mem), &result_mem);
232 test_error(error, "clSetKernelArg failed");
233
234 // Thread dimensions for execution
235 global_dims[0] = NUM_OF_WORK_ITEMS; global_dims[1] = 1; global_dims[2] = 1;
236
237 // We have extra arguments for the buffer kernel because we need to pass in the buffer sizes
238 cl_ulong *ulSizes = NULL;
239 cl_uint *uiSizes = NULL;
240 if (support64) {
241 ulSizes = (cl_ulong*)malloc(sizeof(cl_ulong)*number_of_mems_used);
242 }
243 else {
244 uiSizes = (cl_uint*)malloc(sizeof(cl_uint)*number_of_mems_used);
245 }
246 cl_ulong max_size = 0;
247 clMemWrapper buffer_sizes;
248 if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
249 for (i=0; i<number_of_mems_used; i++) {
250 size_t size;
251 error = clGetMemObjectInfo(mems[i], CL_MEM_SIZE, sizeof(size), &size, NULL);
252 test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
253 if (support64) {
254 ulSizes[i] = size/sizeof(cl_uint);
255 }
256 else {
257 uiSizes[i] = (cl_uint)size/sizeof(cl_uint);
258 }
259 if (size/sizeof(cl_uint) > max_size)
260 max_size = size/sizeof(cl_uint);
261 }
262 if (support64) {
263 buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_ulong)*number_of_mems_used, ulSizes, &error);
264 }
265 else {
266 buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*number_of_mems_used, uiSizes, &error);
267 }
268 test_error_abort(error, "clCreateBuffer failed");
269 error = clSetKernelArg(kernel, number_of_mems_used+1, sizeof(cl_mem), &buffer_sizes);
270 test_error(error, "clSetKernelArg failed");
271 per_item = (cl_uint)ceil((double)max_size/global_dims[0]);
272 if (per_item > CL_UINT_MAX)
273 log_error("Size is too large for a uint parameter to the kernel. Expect invalid results.\n");
274 per_item_uint = (cl_uint)per_item;
275 error = clSetKernelArg(kernel, number_of_mems_used+2, sizeof(per_item_uint), &per_item_uint);
276 test_error(error, "clSetKernelArg failed");
277 }
278 if (ulSizes) {
279 free(ulSizes);
280 }
281 if (uiSizes) {
282 free(uiSizes);
283 }
284
285 size_t local_dims[3] = {1,1,1};
286 error = get_max_common_work_group_size(context, kernel, global_dims[0], &local_dims[0]);
287 test_error(error, "get_max_common_work_group_size failed");
288
289 // Execute the kernel
290 error = clEnqueueNDRangeKernel(*queue, kernel, 1, NULL, global_dims, local_dims, 0, NULL, &event);
291 result = check_allocation_error(context, device_id, error, queue);
292 if (result != SUCCEEDED) {
293 if (result == FAILED_TOO_BIG)
294 log_info("\t\tExecute kernel failed: %s (global dim: %ld, local dim: %ld)\n", IGetErrorString(error), global_dims[0], local_dims[0]);
295 else
296 print_error(error, "clEnqueueNDRangeKernel failed");
297 return result;
298 }
299
300 // Finish the test
301 error = clFinish(*queue);
302
303 result = check_allocation_error(context, device_id, error, queue);
304
305 if (result != SUCCEEDED) {
306 if (result == FAILED_TOO_BIG)
307 log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
308 else
309 print_error(error, "clFinish failed");
310 return result;
311 }
312
313 // Verify that the event from the execution did not have an error
314 error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
315 test_error_abort(error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
316 if (event_status < 0) {
317 result = check_allocation_error(context, device_id, event_status, queue);
318 if (result != SUCCEEDED) {
319 if (result == FAILED_TOO_BIG)
320 log_info("\t\tEvent returned from kernel execution indicates failure: %s.\n", IGetErrorString(event_status));
321 else
322 print_error(event_status, "clEnqueueNDRangeKernel failed");
323 return result;
324 }
325 }
326
327 // If we are not verifying the checksum return here
328 if (!verify_checksum) {
329 log_info("Note: Allocations were not initialized so kernel execution can not verify correct results.\n");
330 return SUCCEEDED;
331 }
332
333 // Verify the checksum.
334 // Read back the result
335 error = clEnqueueReadBuffer(*queue, result_mem, CL_TRUE, 0, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, 0, NULL, NULL);
336 test_error_abort(error, "clEnqueueReadBuffer failed");
337 final_result = 0;
338 if (test == BUFFER || test == IMAGE_READ || test == BUFFER_NON_BLOCKING || test == IMAGE_READ_NON_BLOCKING) {
339 // For buffers or read images we are just looking at the sum of what each thread summed up
340 for (i=0; i<NUM_OF_WORK_ITEMS; i++) {
341 final_result += returned_results[i];
342 }
343 if (final_result != checksum) {
344 log_error("\t\tChecksum failed to verify. Expected %u got %u.\n", checksum, final_result);
345 return FAILED_ABORT;
346 }
347 log_info("\t\tChecksum verified (%u == %u).\n", checksum, final_result);
348 } else {
349 // For write images we need to verify the values
350 for (i=0; i<number_of_mems_used; i++) {
351 if (check_image(*queue, mems[i])) {
352 log_error("\t\tImage contents failed to verify for image %d.\n", (int)i);
353 return FAILED_ABORT;
354 }
355 }
356 log_info("\t\tImage contents verified.\n");
357 }
358
359 // Finish the test
360 error = clFinish(*queue);
361 result = check_allocation_error(context, device_id, error, queue);
362 if (result != SUCCEEDED) {
363 if (result == FAILED_TOO_BIG)
364 log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
365 else
366 print_error(error, "clFinish failed");
367 return result;
368 }
369
370 return SUCCEEDED;
371 }
372
373
374