1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20
21 #include <vector>
22
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26
27
28 #ifdef CL_VERSION_2_0
29 extern int gWimpyMode;
30 static const char* multi_queue_simple_block1[] =
31 {
32 NL, "void block_fn(size_t tid, int mul, __global int* res)"
33 NL, "{"
34 NL, " res[tid] = mul * 7 - 21;"
35 NL, "}"
36 NL, ""
37 NL, "kernel void multi_queue_simple_block1(__global int* res)"
38 NL, "{"
39 NL, " int multiplier = 3;"
40 NL, " size_t tid = get_global_id(0);"
41 NL, ""
42 NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
43 NL, ""
44 NL, " res[tid] = -1;"
45 NL, " queue_t def_q = get_default_queue();"
46 NL, " ndrange_t ndrange = ndrange_1D(1);"
47 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
48 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
49 NL, "}"
50 NL
51 };
52
53 static const char* multi_queue_simple_block2[] =
54 {
55 NL, "void block_fn(size_t tid, int mul, __global int* res)"
56 NL, "{"
57 NL, " res[tid] = mul * 7 - 21;"
58 NL, "}"
59 NL, ""
60 NL, "kernel void multi_queue_simple_block2(__global int* res)"
61 NL, "{"
62 NL, " int multiplier = 3;"
63 NL, " size_t tid = get_global_id(0);"
64 NL, ""
65 NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
66 NL, ""
67 NL, " res[tid] = -1;"
68 NL, " queue_t def_q = get_default_queue();"
69 NL, " ndrange_t ndrange = ndrange_1D(1);"
70 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
71 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
72 NL, "}"
73 NL
74 };
75
76 static const char* multi_queue_simple_block3[] =
77 {
78 NL, "void block_fn(size_t tid, int mul, __global int* res)"
79 NL, "{"
80 NL, " res[tid] = mul * 7 - 21;"
81 NL, "}"
82 NL, ""
83 NL, "kernel void multi_queue_simple_block3(__global int* res)"
84 NL, "{"
85 NL, " int multiplier = 3;"
86 NL, " size_t tid = get_global_id(0);"
87 NL, ""
88 NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
89 NL, ""
90 NL, " res[tid] = -1;"
91 NL, " queue_t def_q = get_default_queue();"
92 NL, " ndrange_t ndrange = ndrange_1D(1);"
93 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
94 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
95 NL, "}"
96 NL
97 };
98
99 static const char* multi_queue_simple_block4[] =
100 {
101 NL, "void block_fn(size_t tid, int mul, __global int* res)"
102 NL, "{"
103 NL, " res[tid] = mul * 7 - 21;"
104 NL, "}"
105 NL, ""
106 NL, "kernel void multi_queue_simple_block4(__global int* res)"
107 NL, "{"
108 NL, " int multiplier = 3;"
109 NL, " size_t tid = get_global_id(0);"
110 NL, ""
111 NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
112 NL, ""
113 NL, " res[tid] = -1;"
114 NL, " queue_t def_q = get_default_queue();"
115 NL, " ndrange_t ndrange = ndrange_1D(1);"
116 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
117 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
118 NL, "}"
119 NL
120 };
121
122 static const kernel_src sources_multi_queue_block[] =
123 {
124 KERNEL(multi_queue_simple_block1),
125 KERNEL(multi_queue_simple_block2),
126 KERNEL(multi_queue_simple_block3),
127 KERNEL(multi_queue_simple_block4),
128 };
129 static const size_t num_kernels_multi_queue_block = arr_size(sources_multi_queue_block);
130
131
test_host_multi_queue(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)132 int test_host_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
133 {
134 cl_uint i;
135 cl_int err_ret, res = 0;
136 clCommandQueueWrapper dev_queue;
137 cl_int kernel_results[MAX_GWS] = {0};
138
139 size_t ret_len;
140 cl_uint max_queues = 1;
141 cl_uint maxQueueSize = 0;
142 err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
143 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
144
145 err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
146 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
147
148 size_t max_local_size = 1;
149 err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
150 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
151
152 cl_queue_properties queue_prop_def[] =
153 {
154 CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
155 CL_QUEUE_SIZE, maxQueueSize,
156 0
157 };
158
159 dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
160 test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
161
162 cl_uint n = num_kernels_multi_queue_block; // Number of host queues
163 std::vector<clCommandQueueWrapper> queues(n);
164 std::vector<cl_command_queue> q(n);
165 std::vector<clProgramWrapper> program(n);
166 std::vector<clKernelWrapper> kernel(n);
167 std::vector<clMemWrapper> mem(n);
168 std::vector<clEventWrapper> event(n);
169
170 for(i = 0; i < n; ++i)
171 {
172 queues[i] = clCreateCommandQueueWithProperties(context, device, NULL, &err_ret);
173 if(check_error(err_ret, "clCreateCommandQueueWithProperties() failed")) { res = -1; break; }
174 q[i] = queues[i];
175 }
176
177 if(err_ret == CL_SUCCESS)
178 {
179 for(i = 0; i < n; ++i)
180 {
181 size_t global = MAX_GWS;
182 if(gWimpyMode)
183 {
184 global = 16;
185 }
186
187 err_ret |= create_single_kernel_helper_with_build_options(context, &program[i], &kernel[i], sources_multi_queue_block[i].num_lines, sources_multi_queue_block[i].lines, sources_multi_queue_block[i].kernel_name, "-cl-std=CL2.0");
188 if(check_error(err_ret, "Create single kernel failed")) { res = -1; break; }
189
190 mem[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(kernel_results), kernel_results, &err_ret);
191 if(check_error(err_ret, "clCreateBuffer() failed")) { res = -1; break; }
192
193 err_ret |= clSetKernelArg(kernel[i], 0, sizeof(cl_mem), &mem[i]);
194 if(check_error(err_ret, "clSetKernelArg(0) failed")) { res = -1; break; }
195
196 err_ret |= clEnqueueNDRangeKernel(q[i], kernel[i], 1, NULL, &global, 0, 0, NULL, &event[i]);
197 if(check_error(err_ret, "clEnqueueNDRangeKernel() failed")) { res = -1; break; }
198 }
199 }
200
201 if(err_ret == CL_SUCCESS)
202 {
203 for(i = 0; i < n; ++i)
204 {
205 cl_int status;
206 err_ret = clEnqueueReadBuffer(q[i], mem[i], CL_TRUE, 0, sizeof(kernel_results), kernel_results, 0, NULL, NULL);
207 if(check_error(err_ret, "clEnqueueReadBuffer() failed")) { res = -1; break; }
208
209 err_ret = clGetEventInfo(event[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, &ret_len);
210 if(check_error(err_ret, "clGetEventInfo() failed")) { res = -1; break; }
211
212 #if CL_COMPLETE != CL_SUCCESS
213 #error Fix me!
214 #endif
215 // This hack is possible because both CL_COMPLETE and CL_SUCCESS defined as 0x00
216 if(check_error(status, "Kernel execution status %d", status)) { err_ret = status; res = -1; break; }
217 else if(kernel_results[0] != 0 && check_error(-1, "'%s' kernel results validation failed = %d", sources_multi_queue_block[i].kernel_name, kernel_results[0])) { res = -1; break; }
218 }
219 }
220
221 return res;
222 }
223
224
225
226
227 #endif
228
229