• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
17 #define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
18 
19 #include <vector>
20 #include <algorithm>
21 
22 // Common for all OpenCL C++ tests
23 #include "../common.hpp"
24 // Common for tests of sub-group functions
25 #include "common.hpp"
26 
27 template <class CL_INT_TYPE, work_group_op op>
generate_sg_scan_inclusive_kernel_code()28 std::string generate_sg_scan_inclusive_kernel_code()
29 {
30     return "#include <opencl_memory>\n"
31            "#include <opencl_work_item>\n"
32            "#include <opencl_work_group>\n"
33            "using namespace cl;\n"
34            "__kernel void test_sg_scan_inclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
35                                                 "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
36            "{\n"
37            "    ulong tid = get_global_id(0);\n"
38            "    " + type_name<CL_INT_TYPE>() + " result = sub_group_scan_inclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
39            "    output[tid] = result;\n"
40            "}\n";
41 }
42 
43 template <class CL_INT_TYPE>
verify_sg_scan_inclusive_add(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size,size_t sg_size)44 int verify_sg_scan_inclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
45 {
46     size_t i, j, k;
47     for (i = 0; i < in.size(); i += wg_size)
48     {
49         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
50         {
51             CL_INT_TYPE sum = 0;
52             // Check if all work-items in sub-group stored correct value
53             for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
54             {
55                 sum += in[i + j + k];
56                 if (sum != out[i + j + k])
57                 {
58                     log_info(
59                         "sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
60                         type_name<cl_uint>().c_str(),
61                         i + j,
62                         static_cast<size_t>(sum),
63                         static_cast<size_t>(out[i + j + k]));
64                     return -1;
65                 }
66             }
67         }
68     }
69     return CL_SUCCESS;
70 }
71 
72 template <class CL_INT_TYPE>
verify_sg_scan_inclusive_min(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size,size_t sg_size)73 int verify_sg_scan_inclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
74 {
75     size_t i, j, k;
76     for (i = 0; i < in.size(); i += wg_size)
77     {
78         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
79         {
80             CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
81             // Check if all work-items in sub-group stored correct value
82             for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
83             {
84                 min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
85                 if (min != out[i + j + k])
86                 {
87                     log_info(
88                         "sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
89                         type_name<cl_uint>().c_str(),
90                         i + j,
91                         static_cast<size_t>(min),
92                         static_cast<size_t>(out[i + j + k]));
93                     return -1;
94                 }
95             }
96         }
97     }
98     return CL_SUCCESS;
99 }
100 
101 template <class CL_INT_TYPE>
verify_sg_scan_inclusive_max(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size,size_t sg_size)102 int verify_sg_scan_inclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
103 {
104     size_t i, j, k;
105     for (i = 0; i < in.size(); i += wg_size)
106     {
107         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
108         {
109             CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
110             // Check if all work-items in sub-group stored correct value
111             for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
112             {
113                 max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
114                 if (max != out[i + j + k])
115                 {
116                     log_info(
117                         "sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
118                         type_name<cl_uint>().c_str(),
119                         i + j,
120                         static_cast<size_t>(max),
121                         static_cast<size_t>(out[i + j + k]));
122                     return -1;
123                 }
124             }
125         }
126     }
127     return CL_SUCCESS;
128 }
129 
130 template <class CL_INT_TYPE, work_group_op op>
verify_sg_scan_inclusive(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size,size_t sg_size)131 int verify_sg_scan_inclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
132 {
133     switch (op)
134     {
135         case work_group_op::add:
136             return verify_sg_scan_inclusive_add(in, out, wg_size, sg_size);
137         case work_group_op::min:
138             return verify_sg_scan_inclusive_min(in, out, wg_size, sg_size);
139         case work_group_op::max:
140             return verify_sg_scan_inclusive_max(in, out, wg_size, sg_size);
141     }
142     return -1;
143 }
144 
145 template <class CL_INT_TYPE, work_group_op op>
sub_group_scan_inclusive(cl_device_id device,cl_context context,cl_command_queue queue,size_t count)146 int sub_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
147 {
148     // don't run test for unsupported types
149     if(!type_supported<CL_INT_TYPE>(device))
150     {
151         return CL_SUCCESS;
152     }
153 
154     cl_mem buffers[2];
155     cl_program program;
156     cl_kernel kernel;
157     size_t wg_size;
158     size_t sg_max_size;
159     size_t work_size[1];
160     int err;
161 
162     std::string code_str = generate_sg_scan_inclusive_kernel_code<CL_INT_TYPE, op>();
163 // -----------------------------------------------------------------------------------
164 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
165 // -----------------------------------------------------------------------------------
166 // Only OpenCL C++ to SPIR-V compilation
167 #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
168     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
169     RETURN_ON_ERROR(err)
170     return err;
171 // Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
172 #elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
173     log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
174     return CL_SUCCESS;
175 #else
176     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
177     RETURN_ON_ERROR(err)
178 #endif
179 
180     err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
181     RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
182 
183     size_t param_value_size = 0;
184     err = clGetKernelSubGroupInfo(
185         kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
186         sizeof(size_t), static_cast<void*>(&wg_size),
187         sizeof(size_t), static_cast<void*>(&sg_max_size),
188         &param_value_size
189     );
190     RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
191 
192     // Verify size of returned param
193     if(param_value_size != sizeof(size_t))
194     {
195         RETURN_ON_ERROR_MSG(-1,
196             "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
197             sizeof(size_t),
198             param_value_size
199         )
200     }
201 
202     // Calculate global work size
203     size_t flat_work_size;
204     size_t wg_number = static_cast<size_t>(
205         std::ceil(static_cast<double>(count) / wg_size)
206     );
207     flat_work_size = wg_number * wg_size;
208     work_size[0] = flat_work_size;
209 
210     std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
211     std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
212 
213     buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
214     RETURN_ON_CL_ERROR(err, "clCreateBuffer");
215 
216     buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
217     RETURN_ON_CL_ERROR(err, "clCreateBuffer");
218 
219     err = clEnqueueWriteBuffer(
220         queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
221         static_cast<void *>(input.data()), 0, NULL, NULL
222     );
223     RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
224 
225     err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
226     err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
227     RETURN_ON_CL_ERROR(err, "clSetKernelArg");
228 
229     err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
230     RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
231 
232     err = clEnqueueReadBuffer(
233         queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
234         static_cast<void *>(output.data()), 0, NULL, NULL
235     );
236     RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
237 
238     if (verify_sg_scan_inclusive<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
239     {
240         RETURN_ON_ERROR_MSG(-1, "sub_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
241     }
242     log_info("sub_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
243 
244     clReleaseMemObject(buffers[0]);
245     clReleaseMemObject(buffers[1]);
246     clReleaseKernel(kernel);
247     clReleaseProgram(program);
248     return err;
249 }
250 
AUTO_TEST_CASE(test_sub_group_scan_inclusive_add)251 AUTO_TEST_CASE(test_sub_group_scan_inclusive_add)
252 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
253 {
254     int error = CL_SUCCESS;
255     int local_error = CL_SUCCESS;
256 
257     local_error = sub_group_scan_inclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
258     CHECK_ERROR(local_error)
259     error |= local_error;
260 
261     local_error = sub_group_scan_inclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
262     CHECK_ERROR(local_error)
263     error |= local_error;
264 
265     local_error = sub_group_scan_inclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
266     CHECK_ERROR(local_error)
267     error |= local_error;
268 
269     local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
270     CHECK_ERROR(local_error)
271     error |= local_error;
272 
273     if(error != CL_SUCCESS)
274         return -1;
275     return CL_SUCCESS;
276 }
277 
AUTO_TEST_CASE(test_sub_group_scan_inclusive_min)278 AUTO_TEST_CASE(test_sub_group_scan_inclusive_min)
279 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
280 {
281     int error = CL_SUCCESS;
282     int local_error = CL_SUCCESS;
283 
284     local_error = sub_group_scan_inclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
285     CHECK_ERROR(local_error)
286     error |= local_error;
287 
288     local_error = sub_group_scan_inclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
289     CHECK_ERROR(local_error)
290     error |= local_error;
291 
292     local_error = sub_group_scan_inclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
293     CHECK_ERROR(local_error)
294     error |= local_error;
295 
296     local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
297     CHECK_ERROR(local_error)
298     error |= local_error;
299 
300     if(error != CL_SUCCESS)
301         return -1;
302     return CL_SUCCESS;
303 }
304 
AUTO_TEST_CASE(test_sub_group_scan_inclusive_max)305 AUTO_TEST_CASE(test_sub_group_scan_inclusive_max)
306 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
307 {
308     int error = CL_SUCCESS;
309     int local_error = CL_SUCCESS;
310 
311     local_error = sub_group_scan_inclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
312     CHECK_ERROR(local_error)
313     error |= local_error;
314 
315     local_error = sub_group_scan_inclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
316     CHECK_ERROR(local_error)
317     error |= local_error;
318 
319     local_error = sub_group_scan_inclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
320     CHECK_ERROR(local_error)
321     error |= local_error;
322 
323     local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
324     CHECK_ERROR(local_error)
325     error |= local_error;
326 
327     if(error != CL_SUCCESS)
328         return -1;
329     return CL_SUCCESS;
330 }
331 
332 #endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
333