1 //
2 // Copyright (c) 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include <functional>
18 #include <string>
19 #include <vector>
20 
21 #include "procs.h"
22 
23 template <typename T> struct TestDef
24 {
25     const char *name;
26     const char *kernel_code;
27     std::function<T(T, T, T)> ref;
28 };
29 
30 template <typename T, unsigned N>
test_intmath(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,std::string typestr)31 int test_intmath(cl_device_id device, cl_context context,
32                  cl_command_queue queue, int num_elements, std::string typestr)
33 {
34     TestDef<T> tests[] = {
35         // Test addition
36         {
37             "test_add",
38             R"(
39   __kernel void test_add(__global TYPE *srcA,
40                          __global TYPE *srcB,
41                          __global TYPE *srcC,
42                          __global TYPE *dst)
43   {
44       int  tid = get_global_id(0);
45       dst[tid] = srcA[tid] + srcB[tid];
46   };
47 )",
48             [](T a, T b, T c) { return a + b; },
49         },
50 
51         // Test subtraction
52         {
53             "test_sub",
54             R"(
55   __kernel void test_sub(__global TYPE *srcA,
56                          __global TYPE *srcB,
57                          __global TYPE *srcC,
58                          __global TYPE *dst)
59   {
60       int  tid = get_global_id(0);
61       dst[tid] = srcA[tid] - srcB[tid];
62   };
63 )",
64             [](T a, T b, T c) { return a - b; },
65         },
66 
67         // Test multiplication
68         {
69             "test_mul",
70             R"(
71   __kernel void test_mul(__global TYPE *srcA,
72                          __global TYPE *srcB,
73                          __global TYPE *srcC,
74                          __global TYPE *dst)
75   {
76       int  tid = get_global_id(0);
77       dst[tid] = srcA[tid] * srcB[tid];
78   };
79 )",
80             [](T a, T b, T c) { return a * b; },
81         },
82 
83         // Test multiply-accumulate
84         {
85             "test_mad",
86             R"(
87   __kernel void test_mad(__global TYPE *srcA,
88                          __global TYPE *srcB,
89                          __global TYPE *srcC,
90                          __global TYPE *dst)
91   {
92       int  tid = get_global_id(0);
93       dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];
94   };
95 )",
96             [](T a, T b, T c) { return a * b + c; },
97         },
98     };
99 
100     clMemWrapper streams[4];
101     cl_int err;
102 
103     if (std::is_same<T, cl_ulong>::value && !gHasLong)
104     {
105         log_info("64-bit integers are not supported on this device. Skipping "
106                  "test.\n");
107         return TEST_SKIPPED_ITSELF;
108     }
109 
110     // Create host buffers and fill with random data.
111     std::vector<T> inputA(num_elements * N);
112     std::vector<T> inputB(num_elements * N);
113     std::vector<T> inputC(num_elements * N);
114     std::vector<T> output(num_elements * N);
115     MTdataHolder d(gRandomSeed);
116     for (int i = 0; i < num_elements; i++)
117     {
118         inputA[i] = (T)genrand_int64(d);
119         inputB[i] = (T)genrand_int64(d);
120         inputC[i] = (T)genrand_int64(d);
121     }
122 
123     size_t datasize = sizeof(T) * num_elements * N;
124 
125     // Create device buffers.
126     for (int i = 0; i < ARRAY_SIZE(streams); i++)
127     {
128         streams[i] =
129             clCreateBuffer(context, CL_MEM_READ_WRITE, datasize, NULL, &err);
130         test_error(err, "clCreateBuffer failed");
131     }
132 
133     // Copy input data to device.
134     err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, datasize,
135                                inputA.data(), 0, NULL, NULL);
136     test_error(err, "clEnqueueWriteBuffer failed\n");
137     err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, datasize,
138                                inputB.data(), 0, NULL, NULL);
139     test_error(err, "clEnqueueWriteBuffer failed\n");
140     err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, datasize,
141                                inputC.data(), 0, NULL, NULL);
142     test_error(err, "clEnqueueWriteBuffer failed\n");
143 
144     std::string build_options = "-DTYPE=";
145     build_options += typestr;
146 
147     // Run test for each operation
148     for (auto test : tests)
149     {
150         log_info("%s... ", test.name);
151 
152         // Create kernel and set args
153         clProgramWrapper program;
154         clKernelWrapper kernel;
155         err = create_single_kernel_helper(context, &program, &kernel, 1,
156                                           &test.kernel_code, test.name,
157                                           build_options.c_str());
158         test_error(err, "create_single_kernel_helper failed\n");
159 
160         err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]);
161         err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &streams[1]);
162         err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &streams[2]);
163         err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &streams[3]);
164         test_error(err, "clSetKernelArgs failed\n");
165 
166         // Run kernel
167         size_t threads[1] = { static_cast<size_t>(num_elements) };
168         err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0,
169                                      NULL, NULL);
170         test_error(err, "clEnqueueNDRangeKernel failed\n");
171 
172         // Read results
173         err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, datasize,
174                                   output.data(), 0, NULL, NULL);
175         test_error(err, "clEnqueueReadBuffer failed\n");
176 
177         // Verify results
178         for (int i = 0; i < num_elements * N; i++)
179         {
180             T r = test.ref(inputA[i], inputB[i], inputC[i]);
181             if (r != output[i])
182             {
183                 log_error("\n\nverification failed at index %d\n", i);
184                 log_error("-> inputs: %llu, %llu, %llu\n",
185                           static_cast<cl_uint>(inputA[i]),
186                           static_cast<cl_uint>(inputB[i]),
187                           static_cast<cl_uint>(inputC[i]));
188                 log_error("-> expected %llu, got %llu\n\n",
189                           static_cast<cl_uint>(r),
190                           static_cast<cl_uint>(output[i]));
191                 return TEST_FAIL;
192             }
193         }
194         log_info("passed\n");
195     }
196 
197     return TEST_PASS;
198 }
199 
test_intmath_int(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)200 int test_intmath_int(cl_device_id device, cl_context context,
201                      cl_command_queue queue, int num_elements)
202 {
203     return test_intmath<cl_uint, 1>(device, context, queue, num_elements,
204                                     "uint");
205 }
206 
test_intmath_int2(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)207 int test_intmath_int2(cl_device_id device, cl_context context,
208                       cl_command_queue queue, int num_elements)
209 {
210     return test_intmath<cl_uint, 2>(device, context, queue, num_elements,
211                                     "uint2");
212 }
213 
test_intmath_int4(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)214 int test_intmath_int4(cl_device_id device, cl_context context,
215                       cl_command_queue queue, int num_elements)
216 {
217     return test_intmath<cl_uint, 4>(device, context, queue, num_elements,
218                                     "uint4");
219 }
220 
test_intmath_long(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)221 int test_intmath_long(cl_device_id device, cl_context context,
222                       cl_command_queue queue, int num_elements)
223 {
224     return test_intmath<cl_ulong, 1>(device, context, queue, num_elements,
225                                      "ulong");
226 }
227 
test_intmath_long2(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)228 int test_intmath_long2(cl_device_id device, cl_context context,
229                        cl_command_queue queue, int num_elements)
230 {
231     return test_intmath<cl_ulong, 2>(device, context, queue, num_elements,
232                                      "ulong2");
233 }
234 
test_intmath_long4(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)235 int test_intmath_long4(cl_device_id device, cl_context context,
236                        cl_command_queue queue, int num_elements)
237 {
238     return test_intmath<cl_ulong, 4>(device, context, queue, num_elements,
239                                      "ulong4");
240 }
241