1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include "harness/rounding_mode.h"
24
25 #include "procs.h"
26
27 static const char *enqueued_local_size_2d_code =
28 "__kernel void test_enqueued_local_size_2d(global int *dst)\n"
29 "{\n"
30 " if ((get_global_id(0) == 0) && (get_global_id(1) == 0))\n"
31 " {\n"
32 " dst[0] = (int)get_enqueued_local_size(0)\n;"
33 " dst[1] = (int)get_enqueued_local_size(1)\n;"
34 " }\n"
35 "}\n";
36
37 static const char *enqueued_local_size_1d_code =
38 "__kernel void test_enqueued_local_size_1d(global int *dst)\n"
39 "{\n"
40 " int tid_x = get_global_id(0);\n"
41 " if (get_global_id(0) == 0)\n"
42 " {\n"
43 " dst[tid_x] = (int)get_enqueued_local_size(0)\n;"
44 " }\n"
45 "}\n";
46
47
48 static int
verify_enqueued_local_size(int * result,size_t * expected,int n)49 verify_enqueued_local_size(int *result, size_t *expected, int n)
50 {
51 int i;
52 for (i=0; i<n; i++)
53 {
54 if (result[i] != (int)expected[i])
55 {
56 log_error("get_enqueued_local_size failed\n");
57 return -1;
58 }
59 }
60 log_info("get_enqueued_local_size passed\n");
61 return 0;
62 }
63
64
65 int
test_enqueued_local_size(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)66 test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
67 {
68 cl_mem streams;
69 cl_program program[2];
70 cl_kernel kernel[2];
71
72 int *output_ptr;
73 size_t globalsize[2];
74 size_t localsize[2];
75 int err;
76
77 // For an OpenCL-3.0 device that does not support non-uniform work-groups
78 // we cannot enqueue local sizes which do not divide the global dimensions
79 // but we can still run the test checking that get_enqueued_local_size ==
80 // get_local_size.
81 bool use_uniform_work_groups{ false };
82 if (get_device_cl_version(device) >= Version(3, 0))
83 {
84 cl_bool areNonUniformWorkGroupsSupported = false;
85 err = clGetDeviceInfo(device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
86 sizeof(areNonUniformWorkGroupsSupported),
87 &areNonUniformWorkGroupsSupported, nullptr);
88 test_error_ret(err, "clGetDeviceInfo failed.", TEST_FAIL);
89
90 if (CL_FALSE == areNonUniformWorkGroupsSupported)
91 {
92 log_info("Non-uniform work group sizes are not supported, "
93 "enqueuing with uniform workgroups\n");
94 use_uniform_work_groups = true;
95 }
96 }
97
98 output_ptr = (int*)malloc(2 * sizeof(int));
99
100 streams =
101 clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(int), NULL, &err);
102 test_error( err, "clCreateBuffer failed.");
103
104 std::string cl_std = "-cl-std=CL";
105 cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
106 err = create_single_kernel_helper_with_build_options(
107 context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
108 "test_enqueued_local_size_1d", cl_std.c_str());
109 test_error( err, "create_single_kernel_helper failed");
110 err = create_single_kernel_helper_with_build_options(
111 context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
112 "test_enqueued_local_size_2d", cl_std.c_str());
113 test_error( err, "create_single_kernel_helper failed");
114
115 err = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
116 test_error( err, "clSetKernelArgs failed.");
117 err = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
118 test_error( err, "clSetKernelArgs failed.");
119
120 globalsize[0] = (size_t)num_elements;
121 globalsize[1] = (size_t)num_elements;
122
123 size_t max_wgs;
124 err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_wgs), &max_wgs, NULL);
125 test_error( err, "clGetDeviceInfo failed.");
126
127 localsize[0] = MIN(16, max_wgs);
128 localsize[1] = MIN(11, max_wgs / localsize[0]);
129 // If we need to use uniform workgroups because non-uniform workgroups are
130 // not supported, round up to the next global size that is divisible by the
131 // local size.
132 if (use_uniform_work_groups)
133 {
134 if (globalsize[0] % localsize[0])
135 {
136 globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
137 }
138 if (globalsize[1] % localsize[1])
139 {
140 globalsize[1] += (localsize[1] - (globalsize[1] % localsize[1]));
141 }
142 }
143
144 err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL);
145 test_error( err, "clEnqueueNDRangeKernel failed.");
146
147 err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
148 test_error( err, "clEnqueueReadBuffer failed.");
149
150 err = verify_enqueued_local_size(output_ptr, localsize, 2);
151
152 globalsize[0] = (size_t)num_elements;
153 localsize[0] = 9;
154 if (use_uniform_work_groups && (globalsize[0] % localsize[0]))
155 {
156 globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
157 }
158 err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL);
159 test_error( err, "clEnqueueNDRangeKernel failed.");
160
161 err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
162 test_error( err, "clEnqueueReadBuffer failed.");
163
164 err = verify_enqueued_local_size(output_ptr, localsize, 1);
165
166 // cleanup
167 clReleaseMemObject(streams);
168 clReleaseKernel(kernel[0]);
169 clReleaseKernel(kernel[1]);
170 clReleaseProgram(program[0]);
171 clReleaseProgram(program[1]);
172 free(output_ptr);
173
174 return err;
175 }
176