• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP
17 #define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP
18 
19 #include <vector>
20 #include <limits>
21 #include <algorithm>
22 
23 // Common for all OpenCL C++ tests
24 #include "../common.hpp"
25 // Common for tests of work-group functions
26 #include "common.hpp"
27 
28 // -----------------------------------------------------------------------------------
29 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
30 // -----------------------------------------------------------------------------------
31 #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
32 template <class CL_INT_TYPE, work_group_op op>
generate_wg_reduce_kernel_code()33 std::string generate_wg_reduce_kernel_code()
34 {
35     return
36         "__kernel void test_wg_reduce(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
37         "{\n"
38         "    ulong tid = get_global_id(0);\n"
39         "\n"
40         "    " + type_name<CL_INT_TYPE>() + " result = work_group_reduce_" + to_string(op) + "(input[tid]);\n"
41         "    output[tid] = result;\n"
42         "}\n";
43 }
44 #else
45 template <class CL_INT_TYPE, work_group_op op>
generate_wg_reduce_kernel_code()46 std::string generate_wg_reduce_kernel_code()
47 {
48     return "#include <opencl_memory>\n"
49            "#include <opencl_work_item>\n"
50            "#include <opencl_work_group>\n"
51            "using namespace cl;\n"
52            "__kernel void test_wg_reduce(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
53                                         "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
54            "{\n"
55            "    ulong tid = get_global_id(0);\n"
56            "    " + type_name<CL_INT_TYPE>() + " result = work_group_reduce<work_group_op::" + to_string(op) + ">(input[tid]);\n"
57            "    output[tid] = result;\n"
58            "}\n";
59 }
60 #endif
61 
62 template <class CL_INT_TYPE>
verify_wg_reduce_add(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size)63 int verify_wg_reduce_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
64 {
65     size_t i, j;
66     for (i = 0; i < in.size(); i += wg_size)
67     {
68         CL_INT_TYPE sum = 0;
69         // Work-group sum
70         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
71             sum += in[i + j];
72 
73         // Check if all work-items in work-group stored correct value
74         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
75         {
76             if (sum != out[i + j])
77             {
78                 log_info(
79                     "work_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n",
80                     type_name<CL_INT_TYPE>().c_str(),
81                     i + j,
82                     static_cast<size_t>(sum),
83                     static_cast<size_t>(out[i + j]));
84                 return -1;
85             }
86         }
87     }
88     return 0;
89 }
90 
91 template <class CL_INT_TYPE>
verify_wg_reduce_min(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size)92 int verify_wg_reduce_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
93 {
94     size_t i, j;
95     for (i = 0; i < in.size(); i += wg_size)
96     {
97         CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
98         // Work-group min
99         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
100             min = std::min<CL_INT_TYPE>(min, in[i + j]);
101 
102         // Check if all work-items in work-group stored correct value
103         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
104         {
105             if (min != out[i + j])
106             {
107                 log_info(
108                     "work_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n",
109                     type_name<CL_INT_TYPE>().c_str(),
110                     i + j,
111                     static_cast<size_t>(min),
112                     static_cast<size_t>(out[i + j]));
113                 return -1;
114             }
115         }
116     }
117     return 0;
118 }
119 
120 template <class CL_INT_TYPE>
verify_wg_reduce_max(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size)121 int verify_wg_reduce_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
122 {
123     size_t i, j;
124     for (i = 0; i < in.size(); i += wg_size)
125     {
126         CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
127         // Work-group max
128         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
129             max = std::max<CL_INT_TYPE>(max, in[i + j]);
130 
131         // Check if all work-items in work-group stored correct value
132         for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
133         {
134             if (max != out[i + j])
135             {
136                 log_info(
137                     "work_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n",
138                     type_name<CL_INT_TYPE>().c_str(),
139                     i + j,
140                     static_cast<size_t>(max),
141                     static_cast<size_t>(out[i + j]));
142                 return -1;
143             }
144         }
145     }
146     return 0;
147 }
148 
149 template <class CL_INT_TYPE, work_group_op op>
verify_wg_reduce(const std::vector<CL_INT_TYPE> & in,const std::vector<CL_INT_TYPE> & out,size_t wg_size)150 int verify_wg_reduce(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
151 {
152     switch (op)
153     {
154         case work_group_op::add:
155             return verify_wg_reduce_add(in, out, wg_size);
156         case work_group_op::min:
157             return verify_wg_reduce_min(in, out, wg_size);
158         case work_group_op::max:
159             return verify_wg_reduce_max(in, out, wg_size);
160     }
161     return -1;
162 }
163 
164 template <class CL_INT_TYPE, work_group_op op>
work_group_reduce(cl_device_id device,cl_context context,cl_command_queue queue,size_t count)165 int work_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
166 {
167     // don't run test for unsupported types
168     if(!type_supported<CL_INT_TYPE>(device))
169     {
170         return CL_SUCCESS;
171     }
172 
173     cl_mem buffers[2];
174     cl_program program;
175     cl_kernel kernel;
176     size_t wg_size;
177     size_t work_size[1];
178     int err;
179 
180     std::string code_str = generate_wg_reduce_kernel_code<CL_INT_TYPE, op>();
181 // -----------------------------------------------------------------------------------
182 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
183 // -----------------------------------------------------------------------------------
184 // Only OpenCL C++ to SPIR-V compilation
185 #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
186     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce");
187     RETURN_ON_ERROR(err)
188     return err;
189 // Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
190 #elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
191     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce", "-cl-std=CL2.0", false);
192     RETURN_ON_ERROR(err)
193 #else
194     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce");
195     RETURN_ON_ERROR(err)
196 #endif
197 
198     err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
199     RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
200 
201     // Calculate global work size
202     size_t flat_work_size;
203     size_t wg_number = static_cast<size_t>(
204         std::ceil(static_cast<double>(count) / wg_size)
205     );
206     flat_work_size = wg_number * wg_size;
207     work_size[0] = flat_work_size;
208 
209     std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
210     std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
211 
212     buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
213     RETURN_ON_CL_ERROR(err, "clCreateBuffer");
214 
215     buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
216     RETURN_ON_CL_ERROR(err, "clCreateBuffer");
217 
218     err = clEnqueueWriteBuffer(
219         queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
220         static_cast<void *>(input.data()), 0, NULL, NULL
221     );
222     RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
223 
224     err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
225     err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
226     RETURN_ON_CL_ERROR(err, "clSetKernelArg");
227 
228     err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
229     RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
230 
231     err = clEnqueueReadBuffer(
232         queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
233         static_cast<void *>(output.data()), 0, NULL, NULL
234     );
235     RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
236 
237     if (verify_wg_reduce<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
238     {
239         RETURN_ON_ERROR_MSG(-1, "work_group_reduce_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
240     }
241     log_info("work_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
242 
243     clReleaseMemObject(buffers[0]);
244     clReleaseMemObject(buffers[1]);
245     clReleaseKernel(kernel);
246     clReleaseProgram(program);
247     return err;
248 }
249 
AUTO_TEST_CASE(test_work_group_reduce_add)250 AUTO_TEST_CASE(test_work_group_reduce_add)
251 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
252 {
253     int error = CL_SUCCESS;
254     int local_error = CL_SUCCESS;
255 
256     local_error = work_group_reduce<cl_int, work_group_op::add>(device, context, queue, n_elems);
257     CHECK_ERROR(local_error)
258     error |= local_error;
259 
260     local_error = work_group_reduce<cl_uint, work_group_op::add>(device, context, queue, n_elems);
261     CHECK_ERROR(local_error)
262     error |= local_error;
263 
264     local_error = work_group_reduce<cl_long, work_group_op::add>(device, context, queue, n_elems);
265     CHECK_ERROR(local_error)
266     error |= local_error;
267 
268     local_error = work_group_reduce<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
269     CHECK_ERROR(local_error)
270     error |= local_error;
271 
272     if(error != CL_SUCCESS)
273         return -1;
274     return CL_SUCCESS;
275 }
276 
AUTO_TEST_CASE(test_work_group_reduce_min)277 AUTO_TEST_CASE(test_work_group_reduce_min)
278 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
279 {
280     int error = CL_SUCCESS;
281     int local_error = CL_SUCCESS;
282 
283     local_error = work_group_reduce<cl_int, work_group_op::min>(device, context, queue, n_elems);
284     CHECK_ERROR(local_error)
285     error |= local_error;
286 
287     local_error = work_group_reduce<cl_uint, work_group_op::min>(device, context, queue, n_elems);
288     CHECK_ERROR(local_error)
289     error |= local_error;
290 
291     local_error = work_group_reduce<cl_long, work_group_op::min>(device, context, queue, n_elems);
292     CHECK_ERROR(local_error)
293     error |= local_error;
294 
295     local_error = work_group_reduce<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
296     CHECK_ERROR(local_error)
297     error |= local_error;
298 
299     if(error != CL_SUCCESS)
300         return -1;
301     return CL_SUCCESS;
302 }
303 
AUTO_TEST_CASE(test_work_group_reduce_max)304 AUTO_TEST_CASE(test_work_group_reduce_max)
305 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
306 {
307     int error = CL_SUCCESS;
308     int local_error = CL_SUCCESS;
309 
310     local_error = work_group_reduce<cl_int, work_group_op::max>(device, context, queue, n_elems);
311     CHECK_ERROR(local_error)
312     error |= local_error;
313 
314     local_error = work_group_reduce<cl_uint, work_group_op::max>(device, context, queue, n_elems);
315     CHECK_ERROR(local_error)
316     error |= local_error;
317 
318     local_error = work_group_reduce<cl_long, work_group_op::max>(device, context, queue, n_elems);
319     CHECK_ERROR(local_error)
320     error |= local_error;
321 
322     local_error = work_group_reduce<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
323     CHECK_ERROR(local_error)
324     error |= local_error;
325 
326     if(error != CL_SUCCESS)
327         return -1;
328     return CL_SUCCESS;
329 }
330 
331 #endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP
332