1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "subhelpers.h"
18
19 typedef struct
20 {
21 cl_uint maxSubGroupSize;
22 cl_uint numSubGroups;
23 } result_data;
24
25
test_sub_group_info(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,bool useCoreSubgroups)26 int test_sub_group_info(cl_device_id device, cl_context context,
27 cl_command_queue queue, int num_elements,
28 bool useCoreSubgroups)
29 {
30 static const size_t gsize0 = 80;
31 int i, error;
32 size_t realSize;
33 size_t kernel_max_subgroup_size, kernel_subgroup_count;
34 size_t global[] = { gsize0, 14, 10 };
35 size_t local[] = { 0, 0, 0 };
36 result_data result[gsize0];
37
38 cl_uint max_dimensions;
39
40 error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
41 sizeof(max_dimensions), &max_dimensions, NULL);
42 test_error(error,
43 "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
44
45 cl_platform_id platform;
46 clProgramWrapper program;
47 clKernelWrapper kernel;
48 clMemWrapper out;
49 std::stringstream kernel_sstr;
50 if (useCoreSubgroups)
51 {
52 kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
53 }
54 kernel_sstr
55 << "\n"
56 "typedef struct {\n"
57 " uint maxSubGroupSize;\n"
58 " uint numSubGroups;\n"
59 "} result_data;\n"
60 "\n"
61 "__kernel void query_kernel( __global result_data *outData )\n"
62 "{\n"
63 " int gid = get_global_id( 0 );\n"
64 " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
65 " outData[gid].numSubGroups = get_num_sub_groups();\n"
66 "}";
67
68 const std::string &kernel_str = kernel_sstr.str();
69 const char *kernel_src = kernel_str.c_str();
70 error = create_single_kernel_helper_with_build_options(
71 context, &program, &kernel, 1, &kernel_src, "query_kernel",
72 "-cl-std=CL2.0");
73 if (error != 0) return error;
74
75 // Determine some local dimensions to use for the test.
76 if (max_dimensions == 1)
77 {
78 error = get_max_common_work_group_size(context, kernel, global[0],
79 &local[0]);
80 test_error(error, "get_max_common_work_group_size failed");
81 }
82 else if (max_dimensions == 2)
83 {
84 error =
85 get_max_common_2D_work_group_size(context, kernel, global, local);
86 test_error(error, "get_max_common_2D_work_group_size failed");
87 }
88 else
89 {
90 error =
91 get_max_common_3D_work_group_size(context, kernel, global, local);
92 test_error(error, "get_max_common_3D_work_group_size failed");
93 }
94
95 error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
96 (void *)&platform, NULL);
97 test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM");
98
99 subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups);
100 clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr =
101 subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
102 if (clGetKernelSubGroupInfo_ptr == NULL)
103 {
104 log_error("ERROR: %s function not available",
105 subgroupsApiSet.clGetKernelSubGroupInfo_name);
106 return TEST_FAIL;
107 }
108
109 error = clGetKernelSubGroupInfo_ptr(
110 kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, sizeof(local),
111 (void *)&local, sizeof(kernel_max_subgroup_size),
112 (void *)&kernel_max_subgroup_size, &realSize);
113 if (error != CL_SUCCESS)
114 {
115 log_error("ERROR: %s function error for "
116 "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
117 subgroupsApiSet.clGetKernelSubGroupInfo_name);
118 return TEST_FAIL;
119 }
120 log_info(
121 "The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE for the kernel is %d.\n",
122 (int)kernel_max_subgroup_size);
123 if (realSize != sizeof(kernel_max_subgroup_size))
124 {
125 log_error("ERROR: Returned size of max sub group size not valid! "
126 "(Expected %d, got %d)\n",
127 (int)sizeof(kernel_max_subgroup_size), (int)realSize);
128 return TEST_FAIL;
129 }
130 error = clGetKernelSubGroupInfo_ptr(
131 kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(local),
132 (void *)&local, sizeof(kernel_subgroup_count),
133 (void *)&kernel_subgroup_count, &realSize);
134 if (error != CL_SUCCESS)
135 {
136 log_error("ERROR: %s function error "
137 "for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
138 subgroupsApiSet.clGetKernelSubGroupInfo_name);
139 return TEST_FAIL;
140 }
141 log_info(
142 "The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE for the kernel is %d.\n",
143 (int)kernel_subgroup_count);
144
145 if (realSize != sizeof(kernel_subgroup_count))
146 {
147 log_error("ERROR: Returned size of sub group count not valid! "
148 "(Expected %d, got %d)\n",
149 (int)sizeof(kernel_subgroup_count), (int)realSize);
150 return TEST_FAIL;
151 }
152
153 // Verify that the kernel gets the same max_subgroup_size and subgroup_count
154 out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(result), NULL,
155 &error);
156 test_error(error, "clCreateBuffer failed");
157
158 error = clSetKernelArg(kernel, 0, sizeof(out), &out);
159 test_error(error, "clSetKernelArg failed");
160
161 error = clEnqueueNDRangeKernel(queue, kernel, max_dimensions, NULL, global,
162 local, 0, NULL, NULL);
163 test_error(error, "clEnqueueNDRangeKernel failed");
164
165 error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result),
166 &result, 0, NULL, NULL);
167 test_error(error, "clEnqueueReadBuffer failed");
168
169 error = clFinish(queue);
170 test_error(error, "clFinish failed");
171
172 for (i = 0; i < (int)gsize0; ++i)
173 {
174 if (result[i].maxSubGroupSize != (cl_uint)kernel_max_subgroup_size)
175 {
176 log_error("ERROR: get_max_subgroup_size() doesn't match result "
177 "from clGetKernelSubGroupInfoKHR, %u vs %u\n",
178 result[i].maxSubGroupSize,
179 (cl_uint)kernel_max_subgroup_size);
180 return -1;
181 }
182 if (result[i].numSubGroups != (cl_uint)kernel_subgroup_count)
183 {
184 log_error("ERROR: get_num_sub_groups() doesn't match result from "
185 "clGetKernelSubGroupInfoKHR, %u vs %u\n",
186 result[i].numSubGroups, (cl_uint)kernel_subgroup_count);
187 return -1;
188 }
189 }
190
191 return 0;
192 }
193
test_sub_group_info_core(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)194 int test_sub_group_info_core(cl_device_id device, cl_context context,
195 cl_command_queue queue, int num_elements)
196 {
197 return test_sub_group_info(device, context, queue, num_elements, true);
198 }
199
test_sub_group_info_ext(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)200 int test_sub_group_info_ext(cl_device_id device, cl_context context,
201 cl_command_queue queue, int num_elements)
202 {
203 bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
204
205 if (!hasExtension)
206 {
207 log_info(
208 "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
209 return TEST_SKIPPED_ITSELF;
210 }
211
212 return test_sub_group_info(device, context, queue, num_elements, false);
213 }