1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25 #define NUM_PROGRAMS 6
26
27 static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
28
29
30 const char *int_mul24_kernel_code =
31 "__kernel void test_int_mul24(__global int *srcA, __global int *srcB, __global int *dst)\n"
32 "{\n"
33 " int tid = get_global_id(0);\n"
34 "\n"
35 " dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
36 "}\n";
37
38 const char *int2_mul24_kernel_code =
39 "__kernel void test_int2_mul24(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
40 "{\n"
41 " int tid = get_global_id(0);\n"
42 "\n"
43 " dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
44 "}\n";
45
46 const char *int3_mul24_kernel_code =
47 "__kernel void test_int3_mul24(__global int *srcA, __global int *srcB, __global int *dst)\n"
48 "{\n"
49 " int tid = get_global_id(0);\n"
50 " int3 tmp = mul24(vload3(tid, srcA), vload3(tid, srcB));\n"
51 " vstore3(tmp, tid, dst);\n"
52 "}\n";
53
54 const char *int4_mul24_kernel_code =
55 "__kernel void test_int4_mul24(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
56 "{\n"
57 " int tid = get_global_id(0);\n"
58 "\n"
59 " dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
60 "}\n";
61
62 const char *int8_mul24_kernel_code =
63 "__kernel void test_int8_mul24(__global int8 *srcA, __global int8 *srcB, __global int8 *dst)\n"
64 "{\n"
65 " int tid = get_global_id(0);\n"
66 "\n"
67 " dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
68 "}\n";
69
70 const char *int16_mul24_kernel_code =
71 "__kernel void test_int16_mul24(__global int16 *srcA, __global int16 *srcB, __global int16 *dst)\n"
72 "{\n"
73 " int tid = get_global_id(0);\n"
74 "\n"
75 " dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
76 "}\n";
77
78 const char *uint_mul24_kernel_code =
79 "__kernel void test_int_mul24(__global uint *srcA, __global uint *srcB, __global uint *dst)\n"
80 "{\n"
81 " int tid = get_global_id(0);\n"
82 "\n"
83 " dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
84 "}\n";
85
86 const char *uint2_mul24_kernel_code =
87 "__kernel void test_int2_mul24(__global uint2 *srcA, __global uint2 *srcB, __global uint2 *dst)\n"
88 "{\n"
89 " int tid = get_global_id(0);\n"
90 "\n"
91 " dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
92 "}\n";
93
94 const char *uint3_mul24_kernel_code =
95 "__kernel void test_int3_mul24(__global uint *srcA, __global uint *srcB, __global uint *dst)\n"
96 "{\n"
97 " int tid = get_global_id(0);\n"
98 " uint3 tmp = mul24(vload3(tid, srcA), vload3(tid, srcB));\n"
99 " vstore3(tmp, tid, dst);\n"
100 "}\n";
101
102 const char *uint4_mul24_kernel_code =
103 "__kernel void test_int4_mul24(__global uint4 *srcA, __global uint4 *srcB, __global uint4 *dst)\n"
104 "{\n"
105 " int tid = get_global_id(0);\n"
106 "\n"
107 " dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
108 "}\n";
109
110 const char *uint8_mul24_kernel_code =
111 "__kernel void test_int8_mul24(__global uint8 *srcA, __global uint8 *srcB, __global uint8 *dst)\n"
112 "{\n"
113 " int tid = get_global_id(0);\n"
114 "\n"
115 " dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
116 "}\n";
117
118 const char *uint16_mul24_kernel_code =
119 "__kernel void test_int16_mul24(__global uint16 *srcA, __global uint16 *srcB, __global uint16 *dst)\n"
120 "{\n"
121 " int tid = get_global_id(0);\n"
122 "\n"
123 " dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
124 "}\n";
125
126
127 int
verify_int_mul24(int * inptrA,int * inptrB,int * outptr,size_t n,size_t vecSize)128 verify_int_mul24(int *inptrA, int *inptrB, int *outptr, size_t n, size_t vecSize)
129 {
130 int r;
131 size_t i;
132
133 for (i=0; i<n; i++)
134 {
135 int a = (inptrA[i] << 8 ) >> 8;
136 int b = (inptrB[i] << 8 ) >> 8;
137 r = a * b;
138 if (r != outptr[i])
139 return -1;
140 }
141
142 return 0;
143 }
144
145 int
verify_uint_mul24(cl_uint * inptrA,cl_uint * inptrB,cl_uint * outptr,size_t n,size_t vecSize)146 verify_uint_mul24(cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n, size_t vecSize)
147 {
148 cl_uint r;
149 size_t i;
150
151 for (i=0; i<n; i++)
152 {
153 r = (inptrA[i] & 0xffffffU) * (inptrB[i] & 0xffffffU);
154 if (r != outptr[i])
155 {
156 log_error( "failed at %ld: 0x%8.8x * 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, inptrA[i], inptrB[i], r, outptr[i] );
157 return -1;
158 }
159 }
160
161 return 0;
162 }
163
random_int24(MTdata d)164 static inline int random_int24( MTdata d )
165 {
166 int result = genrand_int32(d);
167
168 return (result << 8) >> 8;
169 }
170
171
172 static const char *test_str_names[] = { "int", "int2", "int3", "int4", "int8", "int16", "uint", "uint2", "uint3", "uint4", "uint8", "uint16" };
173
test_integer_mul24(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)174 int test_integer_mul24(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
175 {
176 cl_mem streams[3];
177 cl_int *input_ptr[2], *output_ptr, *p;
178
179 cl_program program[NUM_PROGRAMS*2];
180 cl_kernel kernel[NUM_PROGRAMS*2];
181 size_t threads[1];
182
183 int num_elements;
184 int err;
185 int i;
186 MTdata d;
187
188 size_t length = sizeof(cl_int) * 16 * n_elems;
189 num_elements = n_elems * 16;
190
191 input_ptr[0] = (cl_int*)malloc(length);
192 input_ptr[1] = (cl_int*)malloc(length);
193 output_ptr = (cl_int*)malloc(length);
194
195 streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
196 if (!streams[0])
197 {
198 log_error("clCreateBuffer failed\n");
199 return -1;
200 }
201 streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
202 if (!streams[1])
203 {
204 log_error("clCreateBuffer failed\n");
205 return -1;
206 }
207 streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
208 if (!streams[2])
209 {
210 log_error("clCreateBuffer failed\n");
211 return -1;
212 }
213
214 d = init_genrand( gRandomSeed );
215 p = input_ptr[0];
216 for (i=0; i<num_elements; i++)
217 p[i] = random_int24(d);
218 p = input_ptr[1];
219 for (i=0; i<num_elements; i++)
220 p[i] = random_int24(d);
221 free_mtdata(d); d = NULL;
222
223 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
224 if (err != CL_SUCCESS)
225 {
226 log_error("clEnqueueWriteBuffer failed\n");
227 return -1;
228 }
229 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
230 if (err != CL_SUCCESS)
231 {
232 log_error("clEnqueueWriteBuffer failed\n");
233 return -1;
234 }
235 err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_mul24_kernel_code, "test_int_mul24");
236 if (err)
237 return -1;
238 err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int2_mul24_kernel_code, "test_int2_mul24");
239 if (err)
240 return -1;
241 err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int3_mul24_kernel_code, "test_int3_mul24");
242 if (err)
243 return -1;
244 err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int4_mul24_kernel_code, "test_int4_mul24");
245 if (err)
246 return -1;
247 err = create_single_kernel_helper(context, &program[4], &kernel[4], 1, &int8_mul24_kernel_code, "test_int8_mul24");
248 if (err)
249 return -1;
250 err = create_single_kernel_helper(context, &program[5], &kernel[5], 1, &int16_mul24_kernel_code, "test_int16_mul24");
251 if (err)
252 return -1;
253
254 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS], &kernel[NUM_PROGRAMS], 1, &uint_mul24_kernel_code, "test_int_mul24");
255 if (err)
256 return -1;
257 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+1], &kernel[NUM_PROGRAMS+1], 1, &uint2_mul24_kernel_code, "test_int2_mul24");
258 if (err)
259 return -1;
260 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+2], &kernel[NUM_PROGRAMS+2], 1, &uint3_mul24_kernel_code, "test_int3_mul24");
261 if (err)
262 return -1;
263 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+3], &kernel[NUM_PROGRAMS+3], 1, &uint4_mul24_kernel_code, "test_int4_mul24");
264 if (err)
265 return -1;
266 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+4], &kernel[NUM_PROGRAMS+4], 1, &uint8_mul24_kernel_code, "test_int8_mul24");
267 if (err)
268 return -1;
269 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+5], &kernel[NUM_PROGRAMS+5], 1, &uint16_mul24_kernel_code, "test_int16_mul24");
270 if (err)
271 return -1;
272
273 for (i=0; i<2*NUM_PROGRAMS; i++)
274 {
275 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0]);
276 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1]);
277 err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2]);
278 if (err != CL_SUCCESS)
279 {
280 log_error("clSetKernelArgs failed\n");
281 return -1;
282 }
283 }
284
285 // test signed
286 threads[0] = (unsigned int)n_elems;
287 for (i=0; i<NUM_PROGRAMS; i++)
288 {
289 err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
290 if (err != CL_SUCCESS)
291 {
292 log_error("clEnqueueNDRangeKernel failed\n");
293 return -1;
294 }
295
296 err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
297 if (err != CL_SUCCESS)
298 {
299 log_error("clEnqueueReadBuffer failed\n");
300 return -1;
301 }
302
303 err = verify_int_mul24(input_ptr[0], input_ptr[1], output_ptr, vector_sizes[i], vector_sizes[i]);
304 if (err)
305 {
306 log_error("INT_MUL24 %s test failed\n", test_str_names[i]);
307 err = -1;
308 }
309 else
310 {
311 log_info("INT_MUL24 %s test passed\n", test_str_names[i]);
312 err = 0;
313 }
314
315 if (err)
316 break;
317 }
318
319 // clamp the set of input values to be in range
320 p = input_ptr[0];
321 for (i=0; i<num_elements; i++)
322 p[i] &= 0xffffffU;
323 p = input_ptr[1];
324 for (i=0; i<num_elements; i++)
325 p[i] &= 0xffffffU;
326
327 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
328 if (err != CL_SUCCESS)
329 {
330 log_error("clEnqueueWriteBuffer failed\n");
331 return -1;
332 }
333 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
334 if (err != CL_SUCCESS)
335 {
336 log_error("clEnqueueWriteBuffer failed\n");
337 return -1;
338 }
339
340 // test unsigned
341 for (i=NUM_PROGRAMS; i<2*NUM_PROGRAMS; i++)
342 {
343 err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
344 if (err != CL_SUCCESS)
345 {
346 log_error("clEnqueueNDRangeKernel failed\n");
347 return -1;
348 }
349
350 err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
351 if (err != CL_SUCCESS)
352 {
353 log_error("clEnqueueReadBuffer failed\n");
354 return -1;
355 }
356
357 err = verify_uint_mul24((cl_uint*) input_ptr[0], (cl_uint*) input_ptr[1], (cl_uint*) output_ptr, n_elems * vector_sizes[i-NUM_PROGRAMS], vector_sizes[i-NUM_PROGRAMS]);
358 if (err)
359 {
360 log_error("UINT_MUL24 %s test failed\n", test_str_names[i]);
361 err = -1;
362 }
363 else
364 {
365 log_info("UINT_MUL24 %s test passed\n", test_str_names[i]);
366 err = 0;
367 }
368
369 if (err)
370 break;
371 }
372
373
374 // cleanup
375 clReleaseMemObject(streams[0]);
376 clReleaseMemObject(streams[1]);
377 clReleaseMemObject(streams[2]);
378 for (i=0; i<2*NUM_PROGRAMS; i++)
379 {
380 clReleaseKernel(kernel[i]);
381 clReleaseProgram(program[i]);
382 }
383 free(input_ptr[0]);
384 free(input_ptr[1]);
385 free(output_ptr);
386 return err;
387 }
388
389
390