• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include "harness/rounding_mode.h"
24 
25 #include "procs.h"
26 
27 static const char *enqueued_local_size_2d_code =
28 "__kernel void test_enqueued_local_size_2d(global int *dst)\n"
29 "{\n"
30 "    if ((get_global_id(0) == 0) && (get_global_id(1) == 0))\n"
31 "    {\n"
32 "        dst[0] = (int)get_enqueued_local_size(0)\n;"
33 "        dst[1] = (int)get_enqueued_local_size(1)\n;"
34 "    }\n"
35 "}\n";
36 
37 static const char *enqueued_local_size_1d_code =
38 "__kernel void test_enqueued_local_size_1d(global int *dst)\n"
39 "{\n"
40 "    int  tid_x = get_global_id(0);\n"
41 "    if (get_global_id(0) == 0)\n"
42 "    {\n"
43 "        dst[tid_x] = (int)get_enqueued_local_size(0)\n;"
44 "    }\n"
45 "}\n";
46 
47 
48 static int
verify_enqueued_local_size(int * result,size_t * expected,int n)49 verify_enqueued_local_size(int *result, size_t *expected, int n)
50 {
51     int i;
52     for (i=0; i<n; i++)
53     {
54         if (result[i] != (int)expected[i])
55         {
56             log_error("get_enqueued_local_size failed\n");
57             return -1;
58         }
59     }
60     log_info("get_enqueued_local_size passed\n");
61     return 0;
62 }
63 
64 
65 int
test_enqueued_local_size(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)66 test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
67 {
68     cl_mem streams;
69     cl_program program[2];
70     cl_kernel kernel[2];
71 
72     int *output_ptr;
73     size_t globalsize[2];
74     size_t localsize[2];
75     int err;
76 
77     // For an OpenCL-3.0 device that does not support non-uniform work-groups
78     // we cannot enqueue local sizes which do not divide the global dimensions
79     // but we can still run the test checking that get_enqueued_local_size ==
80     // get_local_size.
81     bool use_uniform_work_groups{ false };
82     if (get_device_cl_version(device) >= Version(3, 0))
83     {
84         cl_bool areNonUniformWorkGroupsSupported = false;
85         err = clGetDeviceInfo(device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
86                               sizeof(areNonUniformWorkGroupsSupported),
87                               &areNonUniformWorkGroupsSupported, nullptr);
88         test_error_ret(err, "clGetDeviceInfo failed.", TEST_FAIL);
89 
90         if (CL_FALSE == areNonUniformWorkGroupsSupported)
91         {
92             log_info("Non-uniform work group sizes are not supported, "
93                      "enqueuing with uniform workgroups\n");
94             use_uniform_work_groups = true;
95         }
96     }
97 
98     output_ptr   = (int*)malloc(2 * sizeof(int));
99 
100     streams =
101         clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(int), NULL, &err);
102     test_error( err, "clCreateBuffer failed.");
103 
104     std::string cl_std = "-cl-std=CL";
105     cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
106     err = create_single_kernel_helper_with_build_options(
107         context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
108         "test_enqueued_local_size_1d", cl_std.c_str());
109     test_error( err, "create_single_kernel_helper failed");
110     err = create_single_kernel_helper_with_build_options(
111         context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
112         "test_enqueued_local_size_2d", cl_std.c_str());
113     test_error( err, "create_single_kernel_helper failed");
114 
115     err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
116     test_error( err, "clSetKernelArgs failed.");
117     err  = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
118     test_error( err, "clSetKernelArgs failed.");
119 
120     globalsize[0] = (size_t)num_elements;
121     globalsize[1] = (size_t)num_elements;
122 
123     size_t max_wgs;
124     err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_wgs), &max_wgs, NULL);
125     test_error( err, "clGetDeviceInfo failed.");
126 
127     localsize[0] = MIN(16, max_wgs);
128     localsize[1] = MIN(11, max_wgs / localsize[0]);
129     // If we need to use uniform workgroups because non-uniform workgroups are
130     // not supported, round up to the next global size that is divisible by the
131     // local size.
132     if (use_uniform_work_groups)
133     {
134         if (globalsize[0] % localsize[0])
135         {
136             globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
137         }
138         if (globalsize[1] % localsize[1])
139         {
140             globalsize[1] += (localsize[1] - (globalsize[1] % localsize[1]));
141         }
142     }
143 
144     err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL);
145     test_error( err, "clEnqueueNDRangeKernel failed.");
146 
147     err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
148     test_error( err, "clEnqueueReadBuffer failed.");
149 
150     err = verify_enqueued_local_size(output_ptr, localsize, 2);
151 
152     globalsize[0] = (size_t)num_elements;
153     localsize[0] = 9;
154     if (use_uniform_work_groups && (globalsize[0] % localsize[0]))
155     {
156         globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
157     }
158     err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL);
159     test_error( err, "clEnqueueNDRangeKernel failed.");
160 
161     err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
162     test_error( err, "clEnqueueReadBuffer failed.");
163 
164     err = verify_enqueued_local_size(output_ptr, localsize, 1);
165 
166     // cleanup
167     clReleaseMemObject(streams);
168     clReleaseKernel(kernel[0]);
169     clReleaseKernel(kernel[1]);
170     clReleaseProgram(program[0]);
171     clReleaseProgram(program[1]);
172     free(output_ptr);
173 
174     return err;
175 }
176