• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP
17 #define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP
18 
19 #include <vector>
20 #include <algorithm>
21 
22 // Common for all OpenCL C++ tests
23 #include "../common.hpp"
24 // Common for tests of work-group functions
25 #include "common.hpp"
26 
27 // -----------------------------------------------------------------------------------
28 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
29 // -----------------------------------------------------------------------------------
30 #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
31 template <class CL_INT_TYPE, work_group_op op>
generate_wg_scan_inclusive_kernel_code()32 std::string generate_wg_scan_inclusive_kernel_code()
33 {
34     return
35         "__kernel void test_wg_scan_inclusive(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
36         "{\n"
37         "    ulong tid = get_global_id(0);\n"
38         "\n"
39         "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_inclusive_" + to_string(op) + "(input[tid]);\n"
40         "    output[tid] = result;\n"
41         "}\n";
42 }
43 #else
44 template <class CL_INT_TYPE, work_group_op op>
generate_wg_scan_inclusive_kernel_code()45 std::string generate_wg_scan_inclusive_kernel_code()
46 {
47     return "#include <opencl_memory>\n"
48            "#include <opencl_work_item>\n"
49            "#include <opencl_work_group>\n"
50            "using namespace cl;\n"
51            "__kernel void test_wg_scan_inclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
52                                                 "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
53            "{\n"
54            "    ulong tid = get_global_id(0);\n"
55            "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_inclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
56            "    output[tid] = result;\n"
57            "}\n";
58 }
59 #endif
60 
61 template <class CL_INT_TYPE>
verify_wg_scan_inclusive_add(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size)62 int verify_wg_scan_inclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
63 {
64     size_t i, j;
65     for (i = 0; i < in.size(); i += wg_size)
66     {
67         CL_INT_TYPE sum = 0;
68 
69         // Check if all work-items in work-group wrote correct value
70         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
71         {
72             sum += in[i + j];
73             if (sum != out[i + j])
74             {
75                 log_info(
76                     "work_group_scan_inclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
77                     type_name<CL_INT_TYPE>().c_str(),
78                     i + j,
79                     static_cast<size_t>(sum),
80                     static_cast<size_t>(out[i + j]));
81                 return -1;
82             }
83         }
84     }
85     return CL_SUCCESS;
86 }
87 
88 template <class CL_INT_TYPE>
verify_wg_scan_inclusive_min(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size)89 int verify_wg_scan_inclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
90 {
91     size_t i, j;
92     for (i = 0; i < in.size(); i += wg_size)
93     {
94         CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
95 
96         // Check if all work-items in work-group wrote correct value
97         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
98         {
99             min = (std::min)(min, in[i + j]);
100             if (min != out[i + j])
101             {
102                 log_info(
103                     "work_group_scan_inclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
104                     type_name<CL_INT_TYPE>().c_str(),
105                     i + j,
106                     static_cast<size_t>(min),
107                     static_cast<size_t>(out[i + j]));
108                 return -1;
109             }
110         }
111     }
112     return CL_SUCCESS;
113 }
114 
115 template <class CL_INT_TYPE>
verify_wg_scan_inclusive_max(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size)116 int verify_wg_scan_inclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
117 {
118     size_t i, j;
119     for (i = 0; i < in.size(); i += wg_size)
120     {
121         CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
122 
123         // Check if all work-items in work-group wrote correct value
124         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
125         {
126             max = (std::max)(max, in[i + j]);
127             if (max != out[i + j])
128             {
129                 log_info(
130                     "work_group_scan_inclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
131                     type_name<CL_INT_TYPE>().c_str(),
132                     i + j,
133                     static_cast<size_t>(max),
134                     static_cast<size_t>(out[i + j]));
135                 return -1;
136             }
137         }
138     }
139     return CL_SUCCESS;
140 }
141 
142 template <class CL_INT_TYPE, work_group_op op>
verify_wg_scan_inclusive(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size)143 int verify_wg_scan_inclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
144 {
145     switch (op)
146     {
147         case work_group_op::add:
148             return verify_wg_scan_inclusive_add(in, out, wg_size);
149         case work_group_op::min:
150             return verify_wg_scan_inclusive_min(in, out, wg_size);
151         case work_group_op::max:
152             return verify_wg_scan_inclusive_max(in, out, wg_size);
153     }
154     return -1;
155 }
156 
157 template <class CL_INT_TYPE, work_group_op op>
work_group_scan_inclusive(cl_device_id device,cl_context context,cl_command_queue queue,size_t count)158 int work_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
159 {
160     // don't run test for unsupported types
161     if(!type_supported<CL_INT_TYPE>(device))
162     {
163         return CL_SUCCESS;
164     }
165 
166     cl_mem buffers[2];
167     cl_program program;
168     cl_kernel kernel;
169     size_t wg_size;
170     size_t work_size[1];
171     int err;
172 
173     std::string code_str = generate_wg_scan_inclusive_kernel_code<CL_INT_TYPE, op>();
174 // -----------------------------------------------------------------------------------
175 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
176 // -----------------------------------------------------------------------------------
177 // Only OpenCL C++ to SPIR-V compilation
178 #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
179     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive");
180     RETURN_ON_ERROR(err)
181     return err;
182 // Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
183 #elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
184     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive", "-cl-std=CL2.0", false);
185     RETURN_ON_ERROR(err)
186 #else
187     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive");
188     RETURN_ON_ERROR(err)
189 #endif
190 
191     err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
192     RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
193 
194     // Calculate global work size
195     size_t flat_work_size;
196     size_t wg_number = static_cast<size_t>(
197         std::ceil(static_cast<double>(count) / wg_size)
198     );
199     flat_work_size = wg_number * wg_size;
200     work_size[0] = flat_work_size;
201 
202     std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
203     std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
204 
205     buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
206     RETURN_ON_CL_ERROR(err, "clCreateBuffer");
207 
208     buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
209     RETURN_ON_CL_ERROR(err, "clCreateBuffer");
210 
211     err = clEnqueueWriteBuffer(
212         queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
213         static_cast<void *>(input.data()), 0, NULL, NULL
214     );
215     RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
216 
217     err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
218     err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
219     RETURN_ON_CL_ERROR(err, "clSetKernelArg");
220 
221     err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
222     RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
223 
224     err = clEnqueueReadBuffer(
225         queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
226         static_cast<void *>(output.data()), 0, NULL, NULL
227     );
228     RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
229 
230     if (verify_wg_scan_inclusive<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
231     {
232         RETURN_ON_ERROR_MSG(-1, "work_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
233     }
234     log_info("work_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
235 
236     clReleaseMemObject(buffers[0]);
237     clReleaseMemObject(buffers[1]);
238     clReleaseKernel(kernel);
239     clReleaseProgram(program);
240     return err;
241 }
242 
AUTO_TEST_CASE(test_work_group_scan_inclusive_add)243 AUTO_TEST_CASE(test_work_group_scan_inclusive_add)
244 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
245 {
246     int error = CL_SUCCESS;
247     int local_error = CL_SUCCESS;
248 
249     local_error = work_group_scan_inclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
250     CHECK_ERROR(local_error)
251     error |= local_error;
252 
253     local_error = work_group_scan_inclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
254     CHECK_ERROR(local_error)
255     error |= local_error;
256 
257     local_error = work_group_scan_inclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
258     CHECK_ERROR(local_error)
259     error |= local_error;
260 
261     local_error = work_group_scan_inclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
262     CHECK_ERROR(local_error)
263     error |= local_error;
264 
265     if(error != CL_SUCCESS)
266         return -1;
267     return CL_SUCCESS;
268 }
269 
AUTO_TEST_CASE(test_work_group_scan_inclusive_min)270 AUTO_TEST_CASE(test_work_group_scan_inclusive_min)
271 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
272 {
273     int error = CL_SUCCESS;
274     int local_error = CL_SUCCESS;
275 
276     local_error = work_group_scan_inclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
277     CHECK_ERROR(local_error)
278     error |= local_error;
279 
280     local_error = work_group_scan_inclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
281     CHECK_ERROR(local_error)
282     error |= local_error;
283 
284     local_error = work_group_scan_inclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
285     CHECK_ERROR(local_error)
286     error |= local_error;
287 
288     local_error = work_group_scan_inclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
289     CHECK_ERROR(local_error)
290     error |= local_error;
291 
292     if(error != CL_SUCCESS)
293         return -1;
294     return CL_SUCCESS;
295 }
296 
AUTO_TEST_CASE(test_work_group_scan_inclusive_max)297 AUTO_TEST_CASE(test_work_group_scan_inclusive_max)
298 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
299 {
300     int error = CL_SUCCESS;
301     int local_error = CL_SUCCESS;
302 
303     local_error = work_group_scan_inclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
304     CHECK_ERROR(local_error)
305     error |= local_error;
306 
307     local_error = work_group_scan_inclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
308     CHECK_ERROR(local_error)
309     error |= local_error;
310 
311     local_error = work_group_scan_inclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
312     CHECK_ERROR(local_error)
313     error |= local_error;
314 
315     local_error = work_group_scan_inclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
316     CHECK_ERROR(local_error)
317     error |= local_error;
318 
319     if(error != CL_SUCCESS)
320         return -1;
321     return CL_SUCCESS;
322 }
323 
324 #endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP
325