1 //
2 // Copyright (c) 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <functional>
18 #include <string>
19 #include <vector>
20
21 #include "procs.h"
22
23 template <typename T> struct TestDef
24 {
25 const char *name;
26 const char *kernel_code;
27 std::function<T(T, T, T)> ref;
28 };
29
30 template <typename T, unsigned N>
test_intmath(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,std::string typestr)31 int test_intmath(cl_device_id device, cl_context context,
32 cl_command_queue queue, int num_elements, std::string typestr)
33 {
34 TestDef<T> tests[] = {
35 // Test addition
36 {
37 "test_add",
38 R"(
39 __kernel void test_add(__global TYPE *srcA,
40 __global TYPE *srcB,
41 __global TYPE *srcC,
42 __global TYPE *dst)
43 {
44 int tid = get_global_id(0);
45 dst[tid] = srcA[tid] + srcB[tid];
46 };
47 )",
48 [](T a, T b, T c) { return a + b; },
49 },
50
51 // Test subtraction
52 {
53 "test_sub",
54 R"(
55 __kernel void test_sub(__global TYPE *srcA,
56 __global TYPE *srcB,
57 __global TYPE *srcC,
58 __global TYPE *dst)
59 {
60 int tid = get_global_id(0);
61 dst[tid] = srcA[tid] - srcB[tid];
62 };
63 )",
64 [](T a, T b, T c) { return a - b; },
65 },
66
67 // Test multiplication
68 {
69 "test_mul",
70 R"(
71 __kernel void test_mul(__global TYPE *srcA,
72 __global TYPE *srcB,
73 __global TYPE *srcC,
74 __global TYPE *dst)
75 {
76 int tid = get_global_id(0);
77 dst[tid] = srcA[tid] * srcB[tid];
78 };
79 )",
80 [](T a, T b, T c) { return a * b; },
81 },
82
83 // Test multiply-accumulate
84 {
85 "test_mad",
86 R"(
87 __kernel void test_mad(__global TYPE *srcA,
88 __global TYPE *srcB,
89 __global TYPE *srcC,
90 __global TYPE *dst)
91 {
92 int tid = get_global_id(0);
93 dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];
94 };
95 )",
96 [](T a, T b, T c) { return a * b + c; },
97 },
98 };
99
100 clMemWrapper streams[4];
101 cl_int err;
102
103 if (std::is_same<T, cl_ulong>::value && !gHasLong)
104 {
105 log_info("64-bit integers are not supported on this device. Skipping "
106 "test.\n");
107 return TEST_SKIPPED_ITSELF;
108 }
109
110 // Create host buffers and fill with random data.
111 std::vector<T> inputA(num_elements * N);
112 std::vector<T> inputB(num_elements * N);
113 std::vector<T> inputC(num_elements * N);
114 std::vector<T> output(num_elements * N);
115 MTdataHolder d(gRandomSeed);
116 for (int i = 0; i < num_elements; i++)
117 {
118 inputA[i] = (T)genrand_int64(d);
119 inputB[i] = (T)genrand_int64(d);
120 inputC[i] = (T)genrand_int64(d);
121 }
122
123 size_t datasize = sizeof(T) * num_elements * N;
124
125 // Create device buffers.
126 for (int i = 0; i < ARRAY_SIZE(streams); i++)
127 {
128 streams[i] =
129 clCreateBuffer(context, CL_MEM_READ_WRITE, datasize, NULL, &err);
130 test_error(err, "clCreateBuffer failed");
131 }
132
133 // Copy input data to device.
134 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, datasize,
135 inputA.data(), 0, NULL, NULL);
136 test_error(err, "clEnqueueWriteBuffer failed\n");
137 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, datasize,
138 inputB.data(), 0, NULL, NULL);
139 test_error(err, "clEnqueueWriteBuffer failed\n");
140 err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, datasize,
141 inputC.data(), 0, NULL, NULL);
142 test_error(err, "clEnqueueWriteBuffer failed\n");
143
144 std::string build_options = "-DTYPE=";
145 build_options += typestr;
146
147 // Run test for each operation
148 for (auto test : tests)
149 {
150 log_info("%s... ", test.name);
151
152 // Create kernel and set args
153 clProgramWrapper program;
154 clKernelWrapper kernel;
155 err = create_single_kernel_helper(context, &program, &kernel, 1,
156 &test.kernel_code, test.name,
157 build_options.c_str());
158 test_error(err, "create_single_kernel_helper failed\n");
159
160 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]);
161 err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &streams[1]);
162 err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &streams[2]);
163 err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &streams[3]);
164 test_error(err, "clSetKernelArgs failed\n");
165
166 // Run kernel
167 size_t threads[1] = { static_cast<size_t>(num_elements) };
168 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0,
169 NULL, NULL);
170 test_error(err, "clEnqueueNDRangeKernel failed\n");
171
172 // Read results
173 err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, datasize,
174 output.data(), 0, NULL, NULL);
175 test_error(err, "clEnqueueReadBuffer failed\n");
176
177 // Verify results
178 for (int i = 0; i < num_elements * N; i++)
179 {
180 T r = test.ref(inputA[i], inputB[i], inputC[i]);
181 if (r != output[i])
182 {
183 log_error("\n\nverification failed at index %d\n", i);
184 log_error("-> inputs: %llu, %llu, %llu\n",
185 static_cast<cl_uint>(inputA[i]),
186 static_cast<cl_uint>(inputB[i]),
187 static_cast<cl_uint>(inputC[i]));
188 log_error("-> expected %llu, got %llu\n\n",
189 static_cast<cl_uint>(r),
190 static_cast<cl_uint>(output[i]));
191 return TEST_FAIL;
192 }
193 }
194 log_info("passed\n");
195 }
196
197 return TEST_PASS;
198 }
199
test_intmath_int(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)200 int test_intmath_int(cl_device_id device, cl_context context,
201 cl_command_queue queue, int num_elements)
202 {
203 return test_intmath<cl_uint, 1>(device, context, queue, num_elements,
204 "uint");
205 }
206
test_intmath_int2(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)207 int test_intmath_int2(cl_device_id device, cl_context context,
208 cl_command_queue queue, int num_elements)
209 {
210 return test_intmath<cl_uint, 2>(device, context, queue, num_elements,
211 "uint2");
212 }
213
test_intmath_int4(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)214 int test_intmath_int4(cl_device_id device, cl_context context,
215 cl_command_queue queue, int num_elements)
216 {
217 return test_intmath<cl_uint, 4>(device, context, queue, num_elements,
218 "uint4");
219 }
220
test_intmath_long(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)221 int test_intmath_long(cl_device_id device, cl_context context,
222 cl_command_queue queue, int num_elements)
223 {
224 return test_intmath<cl_ulong, 1>(device, context, queue, num_elements,
225 "ulong");
226 }
227
test_intmath_long2(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)228 int test_intmath_long2(cl_device_id device, cl_context context,
229 cl_command_queue queue, int num_elements)
230 {
231 return test_intmath<cl_ulong, 2>(device, context, queue, num_elements,
232 "ulong2");
233 }
234
test_intmath_long4(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)235 int test_intmath_long4(cl_device_id device, cl_context context,
236 cl_command_queue queue, int num_elements)
237 {
238 return test_intmath<cl_ulong, 4>(device, context, queue, num_elements,
239 "ulong4");
240 }
241