1 //
2 // Copyright (c) 2017-2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <algorithm>
19 #include <limits>
20 #include <vector>
21
22 #include "procs.h"
23
make_kernel_string(const std::string & type,const std::string & kernelName,const std::string & func)24 static std::string make_kernel_string(const std::string &type,
25 const std::string &kernelName,
26 const std::string &func)
27 {
28 // Build a kernel string of the form:
29 // __kernel void KERNEL_NAME(global TYPE *input, global TYPE *output) {
30 // int tid = get_global_id(0);
31 // output[tid] = FUNC(input[tid]);
32 // }
33
34 std::ostringstream os;
35 os << "__kernel void " << kernelName << "(global " << type
36 << " *input, global " << type << " *output) {\n";
37 os << " int tid = get_global_id(0);\n";
38 os << " output[tid] = " << func << "(input[tid]);\n";
39 os << "}\n";
40 return os.str();
41 }
42
43 template <typename T> struct TestTypeInfo
44 {
45 };
46
47 template <> struct TestTypeInfo<cl_int>
48 {
49 static constexpr const char *deviceName = "int";
50 };
51
52 template <> struct TestTypeInfo<cl_uint>
53 {
54 static constexpr const char *deviceName = "uint";
55 };
56
57 template <> struct TestTypeInfo<cl_long>
58 {
59 static constexpr const char *deviceName = "long";
60 };
61
62 template <> struct TestTypeInfo<cl_ulong>
63 {
64 static constexpr const char *deviceName = "ulong";
65 };
66
67 template <typename T> struct Add
68 {
69 using Type = T;
70 static constexpr const char *opName = "add";
71 static constexpr T identityValue = 0;
combineAdd72 static T combine(T a, T b) { return a + b; }
73 };
74
75 template <typename T> struct Max
76 {
77 using Type = T;
78 static constexpr const char *opName = "max";
79 static constexpr T identityValue = std::numeric_limits<T>::min();
combineMax80 static T combine(T a, T b) { return std::max(a, b); }
81 };
82
83 template <typename T> struct Min
84 {
85 using Type = T;
86 static constexpr const char *opName = "min";
87 static constexpr T identityValue = std::numeric_limits<T>::max();
combineMin88 static T combine(T a, T b) { return std::min(a, b); }
89 };
90
91 template <typename C> struct Reduce
92 {
93 using Type = typename C::Type;
94
95 static constexpr const char *testName = "work_group_reduce";
96 static constexpr const char *testOpName = C::opName;
97 static constexpr const char *deviceTypeName =
98 TestTypeInfo<Type>::deviceName;
99 static constexpr const char *kernelName = "test_wg_reduce";
verifyReduce100 static int verify(Type *inptr, Type *outptr, size_t n_elems,
101 size_t max_wg_size)
102 {
103 for (size_t i = 0; i < n_elems; i += max_wg_size)
104 {
105 size_t wg_size = std::min(max_wg_size, n_elems - i);
106
107 Type result = C::identityValue;
108 for (size_t j = 0; j < wg_size; j++)
109 {
110 result = C::combine(result, inptr[i + j]);
111 }
112
113 for (size_t j = 0; j < wg_size; j++)
114 {
115 if (result != outptr[i + j])
116 {
117 log_info("%s_%s: Error at %zu\n", testName, testOpName,
118 i + j);
119 return -1;
120 }
121 }
122 }
123 return 0;
124 }
125 };
126
127 template <typename C> struct ScanInclusive
128 {
129 using Type = typename C::Type;
130
131 static constexpr const char *testName = "work_group_scan_inclusive";
132 static constexpr const char *testOpName = C::opName;
133 static constexpr const char *deviceTypeName =
134 TestTypeInfo<Type>::deviceName;
135 static constexpr const char *kernelName = "test_wg_scan_inclusive";
verifyScanInclusive136 static int verify(Type *inptr, Type *outptr, size_t n_elems,
137 size_t max_wg_size)
138 {
139 for (size_t i = 0; i < n_elems; i += max_wg_size)
140 {
141 size_t wg_size = std::min(max_wg_size, n_elems - i);
142
143 Type result = C::identityValue;
144 for (size_t j = 0; j < wg_size; ++j)
145 {
146 result = C::combine(result, inptr[i + j]);
147 if (result != outptr[i + j])
148 {
149 log_info("%s_%s: Error at %zu\n", testName, testOpName,
150 i + j);
151 return -1;
152 }
153 }
154 }
155 return 0;
156 }
157 };
158
159 template <typename C> struct ScanExclusive
160 {
161 using Type = typename C::Type;
162
163 static constexpr const char *testName = "work_group_scan_exclusive";
164 static constexpr const char *testOpName = C::opName;
165 static constexpr const char *deviceTypeName =
166 TestTypeInfo<Type>::deviceName;
167 static constexpr const char *kernelName = "test_wg_scan_exclusive";
verifyScanExclusive168 static int verify(Type *inptr, Type *outptr, size_t n_elems,
169 size_t max_wg_size)
170 {
171 for (size_t i = 0; i < n_elems; i += max_wg_size)
172 {
173 size_t wg_size = std::min(max_wg_size, n_elems - i);
174
175 Type result = C::identityValue;
176 for (size_t j = 0; j < wg_size; ++j)
177 {
178 if (result != outptr[i + j])
179 {
180 log_info("%s_%s: Error at %zu\n", testName, testOpName,
181 i + j);
182 return -1;
183 }
184 result = C::combine(result, inptr[i + j]);
185 }
186 }
187 return 0;
188 }
189 };
190
191 template <typename TestInfo>
run_test(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)192 static int run_test(cl_device_id device, cl_context context,
193 cl_command_queue queue, int n_elems)
194 {
195 using T = typename TestInfo::Type;
196
197 cl_int err = CL_SUCCESS;
198
199 clProgramWrapper program;
200 clKernelWrapper kernel;
201
202 std::string funcName = TestInfo::testName;
203 funcName += "_";
204 funcName += TestInfo::testOpName;
205
206 std::string kernelName = TestInfo::kernelName;
207 kernelName += "_";
208 kernelName += TestInfo::testOpName;
209 kernelName += "_";
210 kernelName += TestInfo::deviceTypeName;
211
212 std::string kernelString =
213 make_kernel_string(TestInfo::deviceTypeName, kernelName, funcName);
214
215 const char *kernel_source = kernelString.c_str();
216 err = create_single_kernel_helper(context, &program, &kernel, 1,
217 &kernel_source, kernelName.c_str());
218 test_error(err, "Unable to create test kernel");
219
220 size_t wg_size[1];
221 err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
222 test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
223
224 clMemWrapper src = clCreateBuffer(context, CL_MEM_READ_WRITE,
225 sizeof(T) * n_elems, NULL, &err);
226 test_error(err, "Unable to create source buffer");
227
228 clMemWrapper dst = clCreateBuffer(context, CL_MEM_READ_WRITE,
229 sizeof(T) * n_elems, NULL, &err);
230 test_error(err, "Unable to create destination buffer");
231
232 std::vector<T> input_ptr(n_elems);
233
234 MTdataHolder d(gRandomSeed);
235 for (int i = 0; i < n_elems; i++)
236 {
237 input_ptr[i] = (T)genrand_int64(d);
238 }
239
240 err = clEnqueueWriteBuffer(queue, src, CL_TRUE, 0, sizeof(T) * n_elems,
241 input_ptr.data(), 0, NULL, NULL);
242 test_error(err, "clWriteBuffer to initialize src buffer failed");
243
244 err = clSetKernelArg(kernel, 0, sizeof(src), &src);
245 test_error(err, "Unable to set src buffer kernel arg");
246 err |= clSetKernelArg(kernel, 1, sizeof(dst), &dst);
247 test_error(err, "Unable to set dst buffer kernel arg");
248
249 size_t global_work_size[] = { (size_t)n_elems };
250 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
251 wg_size, 0, NULL, NULL);
252 test_error(err, "Unable to enqueue test kernel");
253
254 std::vector<T> output_ptr(n_elems);
255
256 cl_uint dead = 0xdeaddead;
257 memset_pattern4(output_ptr.data(), &dead, sizeof(T) * n_elems);
258 err = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(T) * n_elems,
259 output_ptr.data(), 0, NULL, NULL);
260 test_error(err, "clEnqueueReadBuffer to read read dst buffer failed");
261
262 if (TestInfo::verify(input_ptr.data(), output_ptr.data(), n_elems,
263 wg_size[0]))
264 {
265 log_error("%s_%s %s failed\n", TestInfo::testName, TestInfo::testOpName,
266 TestInfo::deviceTypeName);
267 return TEST_FAIL;
268 }
269
270 log_info("%s_%s %s passed\n", TestInfo::testName, TestInfo::testOpName,
271 TestInfo::deviceTypeName);
272 return TEST_PASS;
273 }
274
test_work_group_reduce_add(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)275 int test_work_group_reduce_add(cl_device_id device, cl_context context,
276 cl_command_queue queue, int n_elems)
277 {
278 int result = TEST_PASS;
279
280 result |= run_test<Reduce<Add<cl_int>>>(device, context, queue, n_elems);
281 result |= run_test<Reduce<Add<cl_uint>>>(device, context, queue, n_elems);
282
283 if (gHasLong)
284 {
285 result |=
286 run_test<Reduce<Add<cl_long>>>(device, context, queue, n_elems);
287 result |=
288 run_test<Reduce<Add<cl_ulong>>>(device, context, queue, n_elems);
289 }
290
291 return result;
292 }
293
test_work_group_reduce_max(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)294 int test_work_group_reduce_max(cl_device_id device, cl_context context,
295 cl_command_queue queue, int n_elems)
296 {
297 int result = TEST_PASS;
298
299 result |= run_test<Reduce<Max<cl_int>>>(device, context, queue, n_elems);
300 result |= run_test<Reduce<Max<cl_uint>>>(device, context, queue, n_elems);
301
302 if (gHasLong)
303 {
304 result |=
305 run_test<Reduce<Max<cl_long>>>(device, context, queue, n_elems);
306 result |=
307 run_test<Reduce<Max<cl_ulong>>>(device, context, queue, n_elems);
308 }
309
310 return result;
311 }
312
test_work_group_reduce_min(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)313 int test_work_group_reduce_min(cl_device_id device, cl_context context,
314 cl_command_queue queue, int n_elems)
315 {
316 int result = TEST_PASS;
317
318 result |= run_test<Reduce<Min<cl_int>>>(device, context, queue, n_elems);
319 result |= run_test<Reduce<Min<cl_uint>>>(device, context, queue, n_elems);
320
321 if (gHasLong)
322 {
323 result |=
324 run_test<Reduce<Min<cl_long>>>(device, context, queue, n_elems);
325 result |=
326 run_test<Reduce<Min<cl_ulong>>>(device, context, queue, n_elems);
327 }
328
329 return result;
330 }
331
test_work_group_scan_inclusive_add(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)332 int test_work_group_scan_inclusive_add(cl_device_id device, cl_context context,
333 cl_command_queue queue, int n_elems)
334 {
335 int result = TEST_PASS;
336
337 result |=
338 run_test<ScanInclusive<Add<cl_int>>>(device, context, queue, n_elems);
339 result |=
340 run_test<ScanInclusive<Add<cl_uint>>>(device, context, queue, n_elems);
341
342 if (gHasLong)
343 {
344 result |= run_test<ScanInclusive<Add<cl_long>>>(device, context, queue,
345 n_elems);
346 result |= run_test<ScanInclusive<Add<cl_ulong>>>(device, context, queue,
347 n_elems);
348 }
349
350 return result;
351 }
352
test_work_group_scan_inclusive_max(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)353 int test_work_group_scan_inclusive_max(cl_device_id device, cl_context context,
354 cl_command_queue queue, int n_elems)
355 {
356 int result = TEST_PASS;
357
358 result |=
359 run_test<ScanInclusive<Max<cl_int>>>(device, context, queue, n_elems);
360 result |=
361 run_test<ScanInclusive<Max<cl_uint>>>(device, context, queue, n_elems);
362
363 if (gHasLong)
364 {
365 result |= run_test<ScanInclusive<Max<cl_long>>>(device, context, queue,
366 n_elems);
367 result |= run_test<ScanInclusive<Max<cl_ulong>>>(device, context, queue,
368 n_elems);
369 }
370
371 return result;
372 }
373
test_work_group_scan_inclusive_min(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)374 int test_work_group_scan_inclusive_min(cl_device_id device, cl_context context,
375 cl_command_queue queue, int n_elems)
376 {
377 int result = TEST_PASS;
378
379 result |=
380 run_test<ScanInclusive<Min<cl_int>>>(device, context, queue, n_elems);
381 result |=
382 run_test<ScanInclusive<Min<cl_uint>>>(device, context, queue, n_elems);
383
384 if (gHasLong)
385 {
386 result |= run_test<ScanInclusive<Min<cl_long>>>(device, context, queue,
387 n_elems);
388 result |= run_test<ScanInclusive<Min<cl_ulong>>>(device, context, queue,
389 n_elems);
390 }
391
392 return result;
393 }
394
test_work_group_scan_exclusive_add(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)395 int test_work_group_scan_exclusive_add(cl_device_id device, cl_context context,
396 cl_command_queue queue, int n_elems)
397 {
398 int result = TEST_PASS;
399
400 result |=
401 run_test<ScanExclusive<Add<cl_int>>>(device, context, queue, n_elems);
402 result |=
403 run_test<ScanExclusive<Add<cl_uint>>>(device, context, queue, n_elems);
404
405 if (gHasLong)
406 {
407 result |= run_test<ScanExclusive<Add<cl_long>>>(device, context, queue,
408 n_elems);
409 result |= run_test<ScanExclusive<Add<cl_ulong>>>(device, context, queue,
410 n_elems);
411 }
412
413 return result;
414 }
415
test_work_group_scan_exclusive_max(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)416 int test_work_group_scan_exclusive_max(cl_device_id device, cl_context context,
417 cl_command_queue queue, int n_elems)
418 {
419 int result = TEST_PASS;
420
421 result |=
422 run_test<ScanExclusive<Max<cl_int>>>(device, context, queue, n_elems);
423 result |=
424 run_test<ScanExclusive<Max<cl_uint>>>(device, context, queue, n_elems);
425
426 if (gHasLong)
427 {
428 result |= run_test<ScanExclusive<Max<cl_long>>>(device, context, queue,
429 n_elems);
430 result |= run_test<ScanExclusive<Max<cl_ulong>>>(device, context, queue,
431 n_elems);
432 }
433
434 return result;
435 }
436
test_work_group_scan_exclusive_min(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)437 int test_work_group_scan_exclusive_min(cl_device_id device, cl_context context,
438 cl_command_queue queue, int n_elems)
439 {
440 int result = TEST_PASS;
441
442 result |=
443 run_test<ScanExclusive<Min<cl_int>>>(device, context, queue, n_elems);
444 result |=
445 run_test<ScanExclusive<Min<cl_uint>>>(device, context, queue, n_elems);
446
447 if (gHasLong)
448 {
449 result |= run_test<ScanExclusive<Min<cl_long>>>(device, context, queue,
450 n_elems);
451 result |= run_test<ScanExclusive<Min<cl_ulong>>>(device, context, queue,
452 n_elems);
453 }
454
455 return result;
456 }
457