• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <assert.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include "procs.h"
25 #include "harness/errorHelpers.h"
26 #define TEST_INT_VALUE 100
27 
28 const char* pipe_subgroups_kernel_code = {
29     "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
30     "__kernel void test_pipe_subgroups_divergence_write(__global int *src, __write_only pipe int out_pipe, __global int *active_work_item_buffer)\n"
31     "{\n"
32     "    int gid = get_global_id(0);\n"
33     "    reserve_id_t res_id; \n"
34     "\n"
35     "    if(get_sub_group_id() % 2 == 0)\n"
36     "    {\n"
37     "        active_work_item_buffer[gid] = 1;\n"
38     "        res_id = sub_group_reserve_write_pipe(out_pipe, get_sub_group_size());\n"
39     "        if(is_valid_reserve_id(res_id))\n"
40     "        {\n"
41     "            write_pipe(out_pipe, res_id, get_sub_group_local_id(), &src[gid]);\n"
42     "            sub_group_commit_write_pipe(out_pipe, res_id);\n"
43     "        }\n"
44     "    }\n"
45     "}\n"
46     "\n"
47     "__kernel void test_pipe_subgroups_divergence_read(__read_only pipe int in_pipe, __global int *dst)\n"
48     "{\n"
49     "    int gid = get_global_id(0);\n"
50     "    reserve_id_t res_id; \n"
51     "\n"
52     "    if(get_sub_group_id() % 2 == 0)\n"
53     "    {\n"
54     "        res_id = sub_group_reserve_read_pipe(in_pipe, get_sub_group_size());\n"
55     "        if(is_valid_reserve_id(res_id))\n"
56     "        {\n"
57     "            read_pipe(in_pipe, res_id, get_sub_group_local_id(), &dst[gid]);\n"
58     "            sub_group_commit_read_pipe(in_pipe, res_id);\n"
59     "        }\n"
60     "    }\n"
61     "}\n"
62 };
63 
verify_result(void * ptr1,void * ptr2,int n)64 static int verify_result(void *ptr1, void *ptr2, int n)
65 {
66     int     i;
67     int        sum_input = 0, sum_output = 0;
68     cl_int    *inptr = (cl_int *)ptr1;
69     cl_int    *outptr = (cl_int *)ptr2;
70 
71     for(i = 0; i < n; i++)
72     {
73         sum_input += inptr[i];
74     }
75     sum_input *= TEST_INT_VALUE;
76     for(i = 0; i < n; i++)
77     {
78         if(outptr[i] == TEST_INT_VALUE){
79             sum_output += outptr[i];
80         }
81     }
82 
83     if(sum_input != sum_output){
84         return -1;
85     }
86     return 0;
87 }
88 
test_pipe_subgroups_divergence(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)89 int test_pipe_subgroups_divergence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
90 {
91     clMemWrapper pipe;
92     clMemWrapper buffers[3];
93     cl_int *outptr;
94     cl_int *inptr;
95     cl_int *active_work_item_buffer;
96     clProgramWrapper program;
97     clKernelWrapper kernel[2];
98     size_t global_work_size[3];
99     size_t local_work_size[3];
100     cl_int err;
101     cl_int size;
102     int i;
103     size_t subgroup_count;
104     clEventWrapper producer_sync_event = NULL;
105     clEventWrapper consumer_sync_event = NULL;
106     BufferOwningPtr<cl_int> BufferInPtr;
107     BufferOwningPtr<cl_int> BufferOutPtr;
108     const char *kernelName[] = { "test_pipe_subgroups_divergence_write",
109                                  "test_pipe_subgroups_divergence_read" };
110 
111     size_t min_alignment = get_min_alignment(context);
112 
113     global_work_size[0] = (cl_uint)num_elements;
114 
115     if (!is_extension_available(deviceID, "cl_khr_subgroups"))
116     {
117         log_info(
118             "cl_khr_subgroups is not supported on this platoform. Skipping "
119             "test.\n");
120         return CL_SUCCESS;
121     }
122 
123     size = sizeof(int) * num_elements;
124     inptr = (cl_int *)align_malloc(size, min_alignment);
125     outptr = (cl_int *)align_malloc(size, min_alignment);
126     active_work_item_buffer = (cl_int *)align_malloc(size, min_alignment);
127 
128     for(i = 0; i < num_elements; i++){
129         inptr[i] = TEST_INT_VALUE;
130         outptr[i] = 0;
131         active_work_item_buffer[i] = 0;
132     }
133     BufferInPtr.reset(inptr, nullptr, 0, size, true);
134     BufferOutPtr.reset(outptr, nullptr, 0, size, true);
135 
136     buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, inptr, &err);
137     test_error_ret(err, " clCreateBuffer failed", -1);
138 
139     buffers[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  size, outptr, &err);
140     test_error_ret(err, " clCreateBuffer failed", -1);
141 
142     buffers[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  size, active_work_item_buffer, &err);
143     test_error_ret(err, " clCreateBuffer failed", -1);
144 
145     pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(int), num_elements, NULL, &err);
146     test_error_ret(err, " clCreatePipe failed", -1);
147 
148     // Create producer kernel
149     err = create_single_kernel_helper(
150         context, &program, &kernel[0], 1,
151         (const char **)&pipe_subgroups_kernel_code, kernelName[0]);
152     test_error_ret(err, " Error creating program", -1);
153 
154     //Create consumer kernel
155     kernel[1] = clCreateKernel(program, kernelName[1], &err);
156     test_error_ret(err, " Error creating kernel", -1);
157 
158     err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
159     err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void*)&pipe);
160     err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), (void*)&buffers[2]);
161     err |= clSetKernelArg(kernel[1], 0, sizeof(cl_mem), (void*)&pipe);
162     err |= clSetKernelArg(kernel[1], 1, sizeof(cl_mem), (void*)&buffers[1]);
163     test_error_ret(err, " clSetKernelArg failed", -1);
164 
165     err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
166     test_error_ret(err, " Unable to get work group size to use", -1);
167 
168     cl_platform_id platform;
169     err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform),
170                           &platform, NULL);
171     test_error_ret(err, " clGetDeviceInfo failed", -1);
172 
173     clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR =
174         (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(
175             platform, "clGetKernelSubGroupInfoKHR");
176 
177     err = clGetKernelSubGroupInfoKHR(kernel[0], deviceID, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, sizeof(local_work_size[0]), &local_work_size[0], sizeof(subgroup_count), &subgroup_count, NULL);
178     test_error_ret(err, " clGetKernelSubGroupInfoKHR failed", -1);
179     if(subgroup_count <= 1)
180     {
181         log_info("Only 1 subgroup per workgroup for the kernel. Hence no divergence among subgroups possible. Skipping test.\n");
182         return CL_SUCCESS;
183     }
184 
185     // Launch Producer kernel
186     err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, &producer_sync_event );
187     test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
188 
189     err = clEnqueueReadBuffer(queue, buffers[2], true, 0, size, active_work_item_buffer, 1, &producer_sync_event, NULL);
190     test_error_ret(err, " clEnqueueReadBuffer failed", -1);
191 
192     // Launch Consumer kernel
193     err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, local_work_size, 1, &producer_sync_event, &consumer_sync_event );
194     test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
195 
196     err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size, outptr, 1, &consumer_sync_event, NULL);
197     test_error_ret(err, " clEnqueueReadBuffer failed", -1);
198 
199     if( verify_result( active_work_item_buffer, outptr, num_elements)){
200         log_error("test_pipe_subgroups_divergence failed\n");
201         return -1;
202     }
203     else {
204         log_info("test_pipe_subgroups_divergence passed\n");
205     }
206 
207     return 0;
208 }
209