• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017-2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <algorithm>
19 #include <limits>
20 #include <vector>
21 
22 #include "procs.h"
23 
make_kernel_string(const std::string & type,const std::string & kernelName,const std::string & func)24 static std::string make_kernel_string(const std::string &type,
25                                       const std::string &kernelName,
26                                       const std::string &func)
27 {
28     // Build a kernel string of the form:
29     // __kernel void KERNEL_NAME(global TYPE *input, global TYPE *output) {
30     //     int  tid = get_global_id(0);
31     //     output[tid] = FUNC(input[tid]);
32     // }
33 
34     std::ostringstream os;
35     os << "__kernel void " << kernelName << "(global " << type
36        << " *input, global " << type << " *output) {\n";
37     os << "    int tid = get_global_id(0);\n";
38     os << "    output[tid] = " << func << "(input[tid]);\n";
39     os << "}\n";
40     return os.str();
41 }
42 
43 template <typename T> struct TestTypeInfo
44 {
45 };
46 
47 template <> struct TestTypeInfo<cl_int>
48 {
49     static constexpr const char *deviceName = "int";
50 };
51 
52 template <> struct TestTypeInfo<cl_uint>
53 {
54     static constexpr const char *deviceName = "uint";
55 };
56 
57 template <> struct TestTypeInfo<cl_long>
58 {
59     static constexpr const char *deviceName = "long";
60 };
61 
62 template <> struct TestTypeInfo<cl_ulong>
63 {
64     static constexpr const char *deviceName = "ulong";
65 };
66 
67 template <typename T> struct Add
68 {
69     using Type = T;
70     static constexpr const char *opName = "add";
71     static constexpr T identityValue = 0;
combineAdd72     static T combine(T a, T b) { return a + b; }
73 };
74 
75 template <typename T> struct Max
76 {
77     using Type = T;
78     static constexpr const char *opName = "max";
79     static constexpr T identityValue = std::numeric_limits<T>::min();
combineMax80     static T combine(T a, T b) { return std::max(a, b); }
81 };
82 
83 template <typename T> struct Min
84 {
85     using Type = T;
86     static constexpr const char *opName = "min";
87     static constexpr T identityValue = std::numeric_limits<T>::max();
combineMin88     static T combine(T a, T b) { return std::min(a, b); }
89 };
90 
91 template <typename C> struct Reduce
92 {
93     using Type = typename C::Type;
94 
95     static constexpr const char *testName = "work_group_reduce";
96     static constexpr const char *testOpName = C::opName;
97     static constexpr const char *deviceTypeName =
98         TestTypeInfo<Type>::deviceName;
99     static constexpr const char *kernelName = "test_wg_reduce";
verifyReduce100     static int verify(Type *inptr, Type *outptr, size_t n_elems,
101                       size_t max_wg_size)
102     {
103         for (size_t i = 0; i < n_elems; i += max_wg_size)
104         {
105             size_t wg_size = std::min(max_wg_size, n_elems - i);
106 
107             Type result = C::identityValue;
108             for (size_t j = 0; j < wg_size; j++)
109             {
110                 result = C::combine(result, inptr[i + j]);
111             }
112 
113             for (size_t j = 0; j < wg_size; j++)
114             {
115                 if (result != outptr[i + j])
116                 {
117                     log_info("%s_%s: Error at %zu\n", testName, testOpName,
118                              i + j);
119                     return -1;
120                 }
121             }
122         }
123         return 0;
124     }
125 };
126 
127 template <typename C> struct ScanInclusive
128 {
129     using Type = typename C::Type;
130 
131     static constexpr const char *testName = "work_group_scan_inclusive";
132     static constexpr const char *testOpName = C::opName;
133     static constexpr const char *deviceTypeName =
134         TestTypeInfo<Type>::deviceName;
135     static constexpr const char *kernelName = "test_wg_scan_inclusive";
verifyScanInclusive136     static int verify(Type *inptr, Type *outptr, size_t n_elems,
137                       size_t max_wg_size)
138     {
139         for (size_t i = 0; i < n_elems; i += max_wg_size)
140         {
141             size_t wg_size = std::min(max_wg_size, n_elems - i);
142 
143             Type result = C::identityValue;
144             for (size_t j = 0; j < wg_size; ++j)
145             {
146                 result = C::combine(result, inptr[i + j]);
147                 if (result != outptr[i + j])
148                 {
149                     log_info("%s_%s: Error at %zu\n", testName, testOpName,
150                              i + j);
151                     return -1;
152                 }
153             }
154         }
155         return 0;
156     }
157 };
158 
159 template <typename C> struct ScanExclusive
160 {
161     using Type = typename C::Type;
162 
163     static constexpr const char *testName = "work_group_scan_exclusive";
164     static constexpr const char *testOpName = C::opName;
165     static constexpr const char *deviceTypeName =
166         TestTypeInfo<Type>::deviceName;
167     static constexpr const char *kernelName = "test_wg_scan_exclusive";
verifyScanExclusive168     static int verify(Type *inptr, Type *outptr, size_t n_elems,
169                       size_t max_wg_size)
170     {
171         for (size_t i = 0; i < n_elems; i += max_wg_size)
172         {
173             size_t wg_size = std::min(max_wg_size, n_elems - i);
174 
175             Type result = C::identityValue;
176             for (size_t j = 0; j < wg_size; ++j)
177             {
178                 if (result != outptr[i + j])
179                 {
180                     log_info("%s_%s: Error at %zu\n", testName, testOpName,
181                              i + j);
182                     return -1;
183                 }
184                 result = C::combine(result, inptr[i + j]);
185             }
186         }
187         return 0;
188     }
189 };
190 
191 template <typename TestInfo>
run_test(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)192 static int run_test(cl_device_id device, cl_context context,
193                     cl_command_queue queue, int n_elems)
194 {
195     using T = typename TestInfo::Type;
196 
197     cl_int err = CL_SUCCESS;
198 
199     clProgramWrapper program;
200     clKernelWrapper kernel;
201 
202     std::string funcName = TestInfo::testName;
203     funcName += "_";
204     funcName += TestInfo::testOpName;
205 
206     std::string kernelName = TestInfo::kernelName;
207     kernelName += "_";
208     kernelName += TestInfo::testOpName;
209     kernelName += "_";
210     kernelName += TestInfo::deviceTypeName;
211 
212     std::string kernelString =
213         make_kernel_string(TestInfo::deviceTypeName, kernelName, funcName);
214 
215     const char *kernel_source = kernelString.c_str();
216     err = create_single_kernel_helper(context, &program, &kernel, 1,
217                                       &kernel_source, kernelName.c_str());
218     test_error(err, "Unable to create test kernel");
219 
220     size_t wg_size[1];
221     err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
222     test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
223 
224     clMemWrapper src = clCreateBuffer(context, CL_MEM_READ_WRITE,
225                                       sizeof(T) * n_elems, NULL, &err);
226     test_error(err, "Unable to create source buffer");
227 
228     clMemWrapper dst = clCreateBuffer(context, CL_MEM_READ_WRITE,
229                                       sizeof(T) * n_elems, NULL, &err);
230     test_error(err, "Unable to create destination buffer");
231 
232     std::vector<T> input_ptr(n_elems);
233 
234     MTdataHolder d(gRandomSeed);
235     for (int i = 0; i < n_elems; i++)
236     {
237         input_ptr[i] = (T)genrand_int64(d);
238     }
239 
240     err = clEnqueueWriteBuffer(queue, src, CL_TRUE, 0, sizeof(T) * n_elems,
241                                input_ptr.data(), 0, NULL, NULL);
242     test_error(err, "clWriteBuffer to initialize src buffer failed");
243 
244     err = clSetKernelArg(kernel, 0, sizeof(src), &src);
245     test_error(err, "Unable to set src buffer kernel arg");
246     err |= clSetKernelArg(kernel, 1, sizeof(dst), &dst);
247     test_error(err, "Unable to set dst buffer kernel arg");
248 
249     size_t global_work_size[] = { (size_t)n_elems };
250     err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
251                                  wg_size, 0, NULL, NULL);
252     test_error(err, "Unable to enqueue test kernel");
253 
254     std::vector<T> output_ptr(n_elems);
255 
256     cl_uint dead = 0xdeaddead;
257     memset_pattern4(output_ptr.data(), &dead, sizeof(T) * n_elems);
258     err = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(T) * n_elems,
259                               output_ptr.data(), 0, NULL, NULL);
260     test_error(err, "clEnqueueReadBuffer to read read dst buffer failed");
261 
262     if (TestInfo::verify(input_ptr.data(), output_ptr.data(), n_elems,
263                          wg_size[0]))
264     {
265         log_error("%s_%s %s failed\n", TestInfo::testName, TestInfo::testOpName,
266                   TestInfo::deviceTypeName);
267         return TEST_FAIL;
268     }
269 
270     log_info("%s_%s %s passed\n", TestInfo::testName, TestInfo::testOpName,
271              TestInfo::deviceTypeName);
272     return TEST_PASS;
273 }
274 
test_work_group_reduce_add(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)275 int test_work_group_reduce_add(cl_device_id device, cl_context context,
276                                cl_command_queue queue, int n_elems)
277 {
278     int result = TEST_PASS;
279 
280     result |= run_test<Reduce<Add<cl_int>>>(device, context, queue, n_elems);
281     result |= run_test<Reduce<Add<cl_uint>>>(device, context, queue, n_elems);
282 
283     if (gHasLong)
284     {
285         result |=
286             run_test<Reduce<Add<cl_long>>>(device, context, queue, n_elems);
287         result |=
288             run_test<Reduce<Add<cl_ulong>>>(device, context, queue, n_elems);
289     }
290 
291     return result;
292 }
293 
test_work_group_reduce_max(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)294 int test_work_group_reduce_max(cl_device_id device, cl_context context,
295                                cl_command_queue queue, int n_elems)
296 {
297     int result = TEST_PASS;
298 
299     result |= run_test<Reduce<Max<cl_int>>>(device, context, queue, n_elems);
300     result |= run_test<Reduce<Max<cl_uint>>>(device, context, queue, n_elems);
301 
302     if (gHasLong)
303     {
304         result |=
305             run_test<Reduce<Max<cl_long>>>(device, context, queue, n_elems);
306         result |=
307             run_test<Reduce<Max<cl_ulong>>>(device, context, queue, n_elems);
308     }
309 
310     return result;
311 }
312 
test_work_group_reduce_min(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)313 int test_work_group_reduce_min(cl_device_id device, cl_context context,
314                                cl_command_queue queue, int n_elems)
315 {
316     int result = TEST_PASS;
317 
318     result |= run_test<Reduce<Min<cl_int>>>(device, context, queue, n_elems);
319     result |= run_test<Reduce<Min<cl_uint>>>(device, context, queue, n_elems);
320 
321     if (gHasLong)
322     {
323         result |=
324             run_test<Reduce<Min<cl_long>>>(device, context, queue, n_elems);
325         result |=
326             run_test<Reduce<Min<cl_ulong>>>(device, context, queue, n_elems);
327     }
328 
329     return result;
330 }
331 
test_work_group_scan_inclusive_add(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)332 int test_work_group_scan_inclusive_add(cl_device_id device, cl_context context,
333                                        cl_command_queue queue, int n_elems)
334 {
335     int result = TEST_PASS;
336 
337     result |=
338         run_test<ScanInclusive<Add<cl_int>>>(device, context, queue, n_elems);
339     result |=
340         run_test<ScanInclusive<Add<cl_uint>>>(device, context, queue, n_elems);
341 
342     if (gHasLong)
343     {
344         result |= run_test<ScanInclusive<Add<cl_long>>>(device, context, queue,
345                                                         n_elems);
346         result |= run_test<ScanInclusive<Add<cl_ulong>>>(device, context, queue,
347                                                          n_elems);
348     }
349 
350     return result;
351 }
352 
test_work_group_scan_inclusive_max(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)353 int test_work_group_scan_inclusive_max(cl_device_id device, cl_context context,
354                                        cl_command_queue queue, int n_elems)
355 {
356     int result = TEST_PASS;
357 
358     result |=
359         run_test<ScanInclusive<Max<cl_int>>>(device, context, queue, n_elems);
360     result |=
361         run_test<ScanInclusive<Max<cl_uint>>>(device, context, queue, n_elems);
362 
363     if (gHasLong)
364     {
365         result |= run_test<ScanInclusive<Max<cl_long>>>(device, context, queue,
366                                                         n_elems);
367         result |= run_test<ScanInclusive<Max<cl_ulong>>>(device, context, queue,
368                                                          n_elems);
369     }
370 
371     return result;
372 }
373 
test_work_group_scan_inclusive_min(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)374 int test_work_group_scan_inclusive_min(cl_device_id device, cl_context context,
375                                        cl_command_queue queue, int n_elems)
376 {
377     int result = TEST_PASS;
378 
379     result |=
380         run_test<ScanInclusive<Min<cl_int>>>(device, context, queue, n_elems);
381     result |=
382         run_test<ScanInclusive<Min<cl_uint>>>(device, context, queue, n_elems);
383 
384     if (gHasLong)
385     {
386         result |= run_test<ScanInclusive<Min<cl_long>>>(device, context, queue,
387                                                         n_elems);
388         result |= run_test<ScanInclusive<Min<cl_ulong>>>(device, context, queue,
389                                                          n_elems);
390     }
391 
392     return result;
393 }
394 
test_work_group_scan_exclusive_add(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)395 int test_work_group_scan_exclusive_add(cl_device_id device, cl_context context,
396                                        cl_command_queue queue, int n_elems)
397 {
398     int result = TEST_PASS;
399 
400     result |=
401         run_test<ScanExclusive<Add<cl_int>>>(device, context, queue, n_elems);
402     result |=
403         run_test<ScanExclusive<Add<cl_uint>>>(device, context, queue, n_elems);
404 
405     if (gHasLong)
406     {
407         result |= run_test<ScanExclusive<Add<cl_long>>>(device, context, queue,
408                                                         n_elems);
409         result |= run_test<ScanExclusive<Add<cl_ulong>>>(device, context, queue,
410                                                          n_elems);
411     }
412 
413     return result;
414 }
415 
test_work_group_scan_exclusive_max(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)416 int test_work_group_scan_exclusive_max(cl_device_id device, cl_context context,
417                                        cl_command_queue queue, int n_elems)
418 {
419     int result = TEST_PASS;
420 
421     result |=
422         run_test<ScanExclusive<Max<cl_int>>>(device, context, queue, n_elems);
423     result |=
424         run_test<ScanExclusive<Max<cl_uint>>>(device, context, queue, n_elems);
425 
426     if (gHasLong)
427     {
428         result |= run_test<ScanExclusive<Max<cl_long>>>(device, context, queue,
429                                                         n_elems);
430         result |= run_test<ScanExclusive<Max<cl_ulong>>>(device, context, queue,
431                                                          n_elems);
432     }
433 
434     return result;
435 }
436 
test_work_group_scan_exclusive_min(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)437 int test_work_group_scan_exclusive_min(cl_device_id device, cl_context context,
438                                        cl_command_queue queue, int n_elems)
439 {
440     int result = TEST_PASS;
441 
442     result |=
443         run_test<ScanExclusive<Min<cl_int>>>(device, context, queue, n_elems);
444     result |=
445         run_test<ScanExclusive<Min<cl_uint>>>(device, context, queue, n_elems);
446 
447     if (gHasLong)
448     {
449         result |= run_test<ScanExclusive<Min<cl_long>>>(device, context, queue,
450                                                         n_elems);
451         result |= run_test<ScanExclusive<Min<cl_ulong>>>(device, context, queue,
452                                                          n_elems);
453     }
454 
455     return result;
456 }
457