1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/typeWrappers.h"
18 #include "harness/conversions.h"
19
20 const char *subgroup_dispatch_kernel[] = {
21 "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
22 "__kernel void subgroup_dispatch_kernel(__global int *output)\n"
23 "{\n"
24 " size_t size = get_num_sub_groups ();\n"
25 "\n"
26 " output[0] = size;\n"
27 "\n"
28 "}\n" };
29
flatten_ndrange(size_t * ndrange,size_t dim)30 size_t flatten_ndrange(size_t* ndrange, size_t dim)
31 {
32 switch(dim)
33 {
34 case 1:
35 return *ndrange;
36 case 2:
37 return ndrange[0] * ndrange[1];
38 case 3:
39 return ndrange[0] * ndrange[1] * ndrange[2];
40 default:
41 log_error("ERROR: bad ndrange value");
42 return 0;
43 }
44 }
45
get_sub_group_num(cl_command_queue queue,cl_kernel kernel,clMemWrapper & out,size_t & size,size_t local_size,size_t dim)46 cl_int get_sub_group_num(cl_command_queue queue, cl_kernel kernel, clMemWrapper& out, size_t& size, size_t local_size, size_t dim)
47 {
48 size_t ndrange[3] = {local_size, 1, 1};
49 cl_int error = CL_SUCCESS;
50 size = 0;
51 error = clSetKernelArg(kernel, 0, sizeof(out), &out);
52 error += clEnqueueNDRangeKernel(queue, kernel, dim, NULL, ndrange, ndrange, 0, NULL, NULL);
53 error += clEnqueueReadBuffer(queue, out, CL_TRUE, 0, 4, &size, 0, NULL, NULL);
54 return error;
55 }
56
test_sub_group_dispatch(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)57 int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
58 {
59 static const size_t gsize0 = 80;
60 int i, error;
61 size_t realSize;
62 size_t kernel_max_subgroup_size, kernel_subgroup_count;
63 size_t global[] = {1,1,1};
64 size_t max_local;
65
66 cl_platform_id platform;
67 clProgramWrapper program;
68 clKernelWrapper kernel;
69 clMemWrapper out;
70
71 size_t ret_ndrange1d;
72 size_t ret_ndrange2d[2];
73 size_t ret_ndrange3d[3];
74
75 size_t ret_ndrange2d_flattened;
76 size_t ret_ndrange3d_flattened;
77
78 error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, subgroup_dispatch_kernel, "subgroup_dispatch_kernel", "-cl-std=CL2.0");
79 if (error != 0)
80 return error;
81
82 out = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(size_t), NULL, &error);
83 test_error(error, "clCreateBuffer failed");
84
85 error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_local, NULL);
86 test_error(error, "clGetDeviceInfo failed");
87
88
89 error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL);
90 test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM");
91
92 // Get the max subgroup size
93 error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
94 sizeof(max_local), &max_local, sizeof(kernel_max_subgroup_size), (void *)&kernel_max_subgroup_size, &realSize);
95 test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE");
96 log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE for the kernel is %d.\n", (int)kernel_max_subgroup_size);
97
98 if (realSize != sizeof(kernel_max_subgroup_size)) {
99 log_error( "ERROR: Returned size of max sub group size not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_max_subgroup_size), (int)realSize );
100 return -1;
101 }
102
103 // Get the number of subgroup for max local size
104 error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
105 sizeof(max_local), &max_local, sizeof(kernel_subgroup_count), (void *)&kernel_subgroup_count, &realSize);
106 test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE");
107 log_info("The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE for the kernel is %d.\n", (int)kernel_subgroup_count);
108
109 if (realSize != sizeof(kernel_subgroup_count)) {
110 log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
111 return -1;
112 }
113
114 // test CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT
115 for (size_t i = kernel_subgroup_count; i > 0; --i)
116 {
117 // test all 3 different dimention of requested local size
118 size_t expect_size = kernel_max_subgroup_size * i;
119 size_t kernel_ret_size = 0;
120 error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange1d), &ret_ndrange1d, &realSize);
121 test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
122 if (realSize != sizeof(ret_ndrange1d)) {
123 log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
124 return -1;
125 }
126
127 if (ret_ndrange1d != expect_size)
128 {
129 log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange1d );
130 return -1;
131 }
132
133 error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange1d, 1);
134 test_error(error, "Failed to query number of subgroups from kernel");
135 if (i != kernel_ret_size)
136 {
137 log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
138 return -1;
139 }
140
141 error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange2d), ret_ndrange2d, &realSize);
142 test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
143 if (realSize != sizeof(ret_ndrange2d)) {
144 log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
145 return -1;
146 }
147
148 ret_ndrange2d_flattened = flatten_ndrange(ret_ndrange2d, 2);
149 if (ret_ndrange2d_flattened != expect_size ||
150 ret_ndrange2d[1] != 1)
151 {
152 log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange2d_flattened );
153 return -1;
154 }
155
156 error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange2d_flattened, 2);
157 test_error(error, "Failed to query number of subgroups from kernel");
158 if (i != kernel_ret_size)
159 {
160 log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
161 return -1;
162 }
163
164 error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange3d), ret_ndrange3d, &realSize);
165 test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
166 if (realSize != sizeof(ret_ndrange3d)) {
167 log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
168 return -1;
169 }
170
171 ret_ndrange3d_flattened = flatten_ndrange(ret_ndrange3d, 3);
172 if (ret_ndrange3d_flattened != expect_size ||
173 ret_ndrange3d[1] != 1 ||
174 ret_ndrange3d[2] != 1)
175 {
176 log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange3d_flattened );
177 return -1;
178 }
179
180 error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange3d_flattened, 3);
181 test_error(error, "Failed to query number of subgroups from kernel");
182 if (i != kernel_ret_size)
183 {
184 log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
185 return -1;
186 }
187 }
188
189 // test when input subgroup count exceeds max wg size
190 size_t large_sg_size = kernel_subgroup_count + 1;
191 error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange1d), &ret_ndrange1d, &realSize);
192 test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
193 if (ret_ndrange1d != 0)
194 {
195 log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", 0, (int)ret_ndrange1d );
196 return -1;
197 }
198
199 error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange2d), ret_ndrange2d, &realSize);
200 test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
201 if (ret_ndrange2d[0] != 0 ||
202 ret_ndrange2d[1] != 0)
203 {
204 log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT!" );
205 return -1;
206 }
207
208 error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange3d), ret_ndrange3d, &realSize);
209 test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
210 if (ret_ndrange3d[0] != 0 ||
211 ret_ndrange3d[1] != 0 ||
212 ret_ndrange3d[2] != 0)
213 {
214 log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT!" );
215 return -1;
216 }
217
218 return 0;
219 }
220