• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
17 #define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
18 
19 #include <vector>
20 #include <limits>
21 #include <algorithm>
22 
23 // Common for all OpenCL C++ tests
24 #include "../common.hpp"
25 // Common for tests of sub-group functions
26 #include "common.hpp"
27 
generate_sg_broadcast_kernel_code()28 std::string generate_sg_broadcast_kernel_code()
29 {
30     return
31         "#include <opencl_memory>\n"
32         "#include <opencl_work_item>\n"
33         "#include <opencl_work_group>\n"
34         "using namespace cl;\n"
35         "__kernel void test_sg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
36         "{\n"
37         "    ulong tid = get_global_id(0);\n"
38         "    uint result = sub_group_broadcast(input[tid], 0);\n"
39         "    output[tid] = result;\n"
40         "}\n";
41 }
42 
43 int
verify_sg_broadcast(const std::vector<cl_uint> & in,const std::vector<cl_uint> & out,size_t count,size_t wg_size,size_t sg_size)44 verify_sg_broadcast(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
45 {
46     size_t i, j, k;
47     for (i = 0; i < count; i += wg_size)
48     {
49         for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
50         {
51             // sub-group broadcast
52             cl_uint broadcast_result = in[i+j];
53 
54             // Check if all work-items in sub-group stored correct value
55             for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
56             {
57                 if (broadcast_result != out[i + j + k])
58                 {
59                     log_info(
60                         "sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
61                         type_name<cl_uint>().c_str(),
62                         i + j,
63                         static_cast<size_t>(broadcast_result),
64                         static_cast<size_t>(out[i + j + k]));
65                     return -1;
66                 }
67             }
68         }
69     }
70     return CL_SUCCESS;
71 }
72 
generate_input_sg_broadcast(size_t count,size_t wg_size)73 std::vector<cl_uint> generate_input_sg_broadcast(size_t count, size_t wg_size)
74 {
75     std::vector<cl_uint> input(count, cl_uint(0));
76     size_t j = wg_size;
77     for(size_t i = 0; i < count; i++)
78     {
79         input[i] = static_cast<cl_uint>(j);
80         j--;
81         if(j == 0)
82         {
83             j = wg_size;
84         }
85     }
86     return input;
87 }
88 
generate_output_sg_broadcast(size_t count,size_t wg_size)89 std::vector<cl_uint> generate_output_sg_broadcast(size_t count, size_t wg_size)
90 {
91     (void) wg_size;
92     return std::vector<cl_uint>(count, cl_uint(1));
93 }
94 
sub_group_broadcast(cl_device_id device,cl_context context,cl_command_queue queue,size_t count)95 int sub_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
96 {
97     cl_mem buffers[2];
98     cl_program program;
99     cl_kernel kernel;
100     size_t wg_size;
101     size_t sg_max_size;
102     size_t work_size[] = { 1 };
103     int err;
104 
105     // Get kernel source code
106     std::string code_str = generate_sg_broadcast_kernel_code();
107 
108 // -----------------------------------------------------------------------------------
109 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
110 // -----------------------------------------------------------------------------------
111 // Only OpenCL C++ to SPIR-V compilation
112 #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
113     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
114     RETURN_ON_ERROR(err)
115     return err;
116 // Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
117 #elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
118     log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
119     return CL_SUCCESS;
120 #else
121     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
122     RETURN_ON_ERROR(err)
123 #endif
124 
125     // Get max flat workgroup size
126     err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
127     RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
128 
129     size_t param_value_size = 0;
130     err = clGetKernelSubGroupInfo(
131         kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
132         sizeof(size_t), static_cast<void*>(&wg_size),
133         sizeof(size_t), static_cast<void*>(&sg_max_size),
134         &param_value_size
135     );
136     RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
137 
138     // Verify size of returned param
139     if(param_value_size != sizeof(size_t))
140     {
141         RETURN_ON_ERROR_MSG(-1,
142             "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
143             sizeof(size_t),
144             param_value_size
145         )
146     }
147 
148     // Calculate global work size
149     size_t flat_work_size = count;
150     size_t wg_number = static_cast<size_t>(
151         std::ceil(static_cast<double>(count) / wg_size)
152     );
153     flat_work_size = wg_number * wg_size;
154     work_size[0] = flat_work_size;
155 
156     std::vector<cl_uint> input = generate_input_sg_broadcast(flat_work_size, wg_size);
157     std::vector<cl_uint> output = generate_output_sg_broadcast(flat_work_size, wg_size);
158 
159     buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL,&err);
160     RETURN_ON_CL_ERROR(err, "clCreateBuffer");
161 
162     buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
163     RETURN_ON_CL_ERROR(err, "clCreateBuffer");
164 
165     err = clEnqueueWriteBuffer(
166         queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
167         static_cast<void *>(input.data()), 0, NULL, NULL
168     );
169     RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
170 
171     err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
172     err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
173     RETURN_ON_CL_ERROR(err, "clSetKernelArg");
174 
175     err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
176     RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
177 
178     err = clEnqueueReadBuffer(
179         queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
180         static_cast<void *>(output.data()), 0, NULL, NULL
181     );
182     RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
183 
184     int result = verify_sg_broadcast( input, output, work_size[0], wg_size, sg_max_size);
185     RETURN_ON_ERROR_MSG(result, "sub_group_broadcast failed")
186     log_info("sub_group_broadcast passed\n");
187 
188     clReleaseMemObject(buffers[0]);
189     clReleaseMemObject(buffers[1]);
190     clReleaseKernel(kernel);
191     clReleaseProgram(program);
192     return err;
193 }
194 
AUTO_TEST_CASE(test_sub_group_broadcast)195 AUTO_TEST_CASE(test_sub_group_broadcast)
196 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
197 {
198     int err = CL_SUCCESS;
199     err = sub_group_broadcast(device, context, queue, n_elems);
200     CHECK_ERROR(err)
201     return err;
202 }
203 
204 #endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
205