1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP
17 #define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP
18
19 #include <vector>
20 #include <limits>
21 #include <algorithm>
22
23 // Common for all OpenCL C++ tests
24 #include "../common.hpp"
25 // Common for tests of work-group functions
26 #include "common.hpp"
27
28 // -----------------------------------------------------------------------------------
29 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
30 // -----------------------------------------------------------------------------------
31 #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
generate_wg_all_kernel_code()32 std::string generate_wg_all_kernel_code()
33 {
34 return
35 "__kernel void test_wg_all(global uint *input, global uint *output)\n"
36 "{\n"
37 " ulong tid = get_global_id(0);\n"
38 "\n"
39 " int result = work_group_all(input[tid] < input[tid+1]);\n"
40 " if(result == 0) {\n output[tid] = 0;\n return;\n }\n"
41 " output[tid] = 1;\n"
42 "}\n";
43 }
44 #else
generate_wg_all_kernel_code()45 std::string generate_wg_all_kernel_code()
46 {
47 return "#include <opencl_memory>\n"
48 "#include <opencl_work_item>\n"
49 "#include <opencl_work_group>\n"
50 "using namespace cl;\n"
51 "__kernel void test_wg_all(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
52 "{\n"
53 " ulong tid = get_global_id(0);\n"
54 " bool result = work_group_all(input[tid] < input[tid+1]);\n"
55 " if(!result) {\n output[tid] = 0;\n return;\n }\n"
56 " output[tid] = 1;\n"
57 "}\n";
58 }
59 #endif
60
verify_wg_all(const std::vector<cl_uint> & in,const std::vector<cl_uint> & out,size_t count,size_t wg_size)61 int verify_wg_all(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size)
62 {
63 size_t i, j;
64 for (i = 0; i < count; i += wg_size)
65 {
66 // Work-group all
67 bool all = true;
68 for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
69 {
70 if(!(in[i+j] < in[i+j+1]))
71 {
72 all = false;
73 break;
74 }
75 }
76
77 // Convert bool to uint
78 cl_uint all_uint = all ? 1 : 0;
79 // Check if all work-items in work-group stored correct value
80 for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
81 {
82 if (all_uint != out[i + j])
83 {
84 log_info(
85 "work_group_all %s: Error at %lu: expected = %lu, got = %lu\n",
86 type_name<cl_uint>().c_str(),
87 i + j,
88 static_cast<size_t>(all_uint),
89 static_cast<size_t>(out[i + j]));
90 return -1;
91 }
92 }
93 }
94 return CL_SUCCESS;
95 }
96
generate_input_wg_all(size_t count,size_t wg_size)97 std::vector<cl_uint> generate_input_wg_all(size_t count, size_t wg_size)
98 {
99 std::vector<cl_uint> input(count, cl_uint(0));
100 size_t j = wg_size;
101 for(size_t i = 0; i < count; i++)
102 {
103 input[i] = static_cast<cl_uint>(i);
104 // In one place in ~half of workgroups input[tid] < input[tid+1] will
105 // generate false, that means for that workgroups work_group_all()
106 // should return false
107 if((j == wg_size/2) && (i > count/2))
108 {
109 input[i] = input[i - 1];
110 }
111 j--;
112 if(j == 0)
113 {
114 j = wg_size;
115 }
116 }
117 return input;
118 }
119
generate_output_wg_all(size_t count,size_t wg_size)120 std::vector<cl_uint> generate_output_wg_all(size_t count, size_t wg_size)
121 {
122 (void) wg_size;
123 return std::vector<cl_uint>(count, cl_uint(1));
124 }
125
work_group_all(cl_device_id device,cl_context context,cl_command_queue queue,size_t count)126 int work_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
127 {
128 cl_mem buffers[2];
129 cl_program program;
130 cl_kernel kernel;
131 size_t wg_size;
132 size_t work_size[1];
133 int err;
134
135 std::string code_str = generate_wg_all_kernel_code();
136 // -----------------------------------------------------------------------------------
137 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
138 // -----------------------------------------------------------------------------------
139 // Only OpenCL C++ to SPIR-V compilation
140 #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
141 err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all");
142 RETURN_ON_ERROR(err)
143 return err;
144 // Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
145 #elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
146 err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all", "-cl-std=CL2.0", false);
147 RETURN_ON_ERROR(err)
148 #else
149 err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all");
150 RETURN_ON_ERROR(err)
151 #endif
152
153 err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
154 RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
155
156 // Calculate global work size
157 size_t flat_work_size;
158 size_t wg_number = static_cast<size_t>(
159 std::ceil(static_cast<double>(count) / wg_size)
160 );
161 flat_work_size = wg_number * wg_size;
162 work_size[0] = flat_work_size;
163
164 std::vector<cl_uint> input = generate_input_wg_all(flat_work_size + 1, wg_size);
165 std::vector<cl_uint> output = generate_output_wg_all(flat_work_size, wg_size);
166
167 buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
168 RETURN_ON_CL_ERROR(err, "clCreateBuffer");
169
170 buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
171 RETURN_ON_CL_ERROR(err, "clCreateBuffer");
172
173 err = clEnqueueWriteBuffer(
174 queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
175 static_cast<void *>(input.data()), 0, NULL, NULL
176 );
177 RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
178
179 err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
180 err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
181 RETURN_ON_CL_ERROR(err, "clSetKernelArg");
182
183 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
184 RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
185
186 err = clEnqueueReadBuffer(
187 queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
188 static_cast<void *>(output.data()), 0, NULL, NULL
189 );
190 RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
191
192 if (verify_wg_all(input, output, flat_work_size, wg_size) != CL_SUCCESS)
193 {
194 RETURN_ON_ERROR_MSG(-1, "work_group_all failed");
195 }
196 log_info("work_group_all passed\n");
197
198 clReleaseMemObject(buffers[0]);
199 clReleaseMemObject(buffers[1]);
200 clReleaseKernel(kernel);
201 clReleaseProgram(program);
202 return err;
203 }
204
AUTO_TEST_CASE(test_work_group_all)205 AUTO_TEST_CASE(test_work_group_all)
206 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
207 {
208 int err = CL_SUCCESS;
209
210 err = work_group_all(device, context, queue, n_elems);
211 CHECK_ERROR(err)
212
213 if(err != CL_SUCCESS)
214 return -1;
215 return CL_SUCCESS;
216 }
217
218 #endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP
219