1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "basic_command_buffer.h"
18 #include "procs.h"
19
20 #include <vector>
21
22 namespace {
23
24 ////////////////////////////////////////////////////////////////////////////////
25 // Command-buffer profiling test cases:
26 // -all commands are recorded to a single command-queue
27 // -profiling a command-buffer with simultaneous use
28
29 template <bool simultaneous_request>
30 struct CommandBufferProfiling : public BasicCommandBufferTest
31 {
CommandBufferProfiling__anon798c27cf0111::CommandBufferProfiling32 CommandBufferProfiling(cl_device_id device, cl_context context,
33 cl_command_queue queue)
34 : BasicCommandBufferTest(device, context, queue), wait_event(nullptr)
35 {
36 simultaneous_use_requested = simultaneous_request;
37 if (simultaneous_request) buffer_size_multiplier = 2;
38 }
39
40 //--------------------------------------------------------------------------
Skip__anon798c27cf0111::CommandBufferProfiling41 bool Skip() override
42 {
43 if (BasicCommandBufferTest::Skip()) return true;
44
45 Version version = get_device_cl_version(device);
46 const cl_device_info host_queue_query = version >= Version(2, 0)
47 ? CL_DEVICE_QUEUE_ON_HOST_PROPERTIES
48 : CL_DEVICE_QUEUE_PROPERTIES;
49
50 cl_command_queue_properties host_queue_props = 0;
51 int error =
52 clGetDeviceInfo(device, host_queue_query, sizeof(host_queue_props),
53 &host_queue_props, NULL);
54 if (error != CL_SUCCESS)
55 {
56 print_error(
57 error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
58 return true;
59 }
60
61 if ((host_queue_props & CL_QUEUE_PROFILING_ENABLE) == 0)
62 {
63 log_info(
64 "Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
65 return true;
66 }
67 return (simultaneous_use_requested && !simultaneous_use_support);
68 }
69
70 //--------------------------------------------------------------------------
SetUp__anon798c27cf0111::CommandBufferProfiling71 cl_int SetUp(int elements) override
72 {
73 cl_int error = CL_SUCCESS;
74 queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE,
75 &error);
76 test_error(error, "clCreateCommandQueue failed");
77
78 return BasicCommandBufferTest::SetUp(elements);
79 }
80
81 //--------------------------------------------------------------------------
Run__anon798c27cf0111::CommandBufferProfiling82 cl_int Run() override
83 {
84 cl_int error = CL_SUCCESS;
85
86 // record command buffer
87 error = RecordCommandBuffer();
88 test_error(error, "RecordCommandBuffer failed");
89
90 if (simultaneous_use_requested)
91 {
92 // enqueue simultaneous command-buffers with profiling command queue
93 error = RunSimultaneous();
94 test_error(error, "RunSimultaneous failed");
95 }
96 else
97 {
98 // enqueue single command-buffer with profiling command queue
99 error = RunSingle();
100 test_error(error, "RunSingle failed");
101 }
102
103 return CL_SUCCESS;
104 }
105
106 //--------------------------------------------------------------------------
RecordCommandBuffer__anon798c27cf0111::CommandBufferProfiling107 cl_int RecordCommandBuffer()
108 {
109 cl_int error = CL_SUCCESS;
110
111 error = clCommandNDRangeKernelKHR(
112 command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
113 nullptr, 0, nullptr, nullptr, nullptr);
114 test_error(error, "clCommandNDRangeKernelKHR failed");
115
116 error = clFinalizeCommandBufferKHR(command_buffer);
117 test_error(error, "clFinalizeCommandBufferKHR failed");
118 return CL_SUCCESS;
119 }
120
121 //--------------------------------------------------------------------------
122 #define ADD_PROF_PARAM(prop) \
123 { \
124 prop, #prop, 0 \
125 }
126 struct ProfilingParam
127 {
128 cl_profiling_info param;
129 std::string name;
130 cl_ulong value;
131 };
132
133 //--------------------------------------------------------------------------
VerifyResult__anon798c27cf0111::CommandBufferProfiling134 cl_int VerifyResult(const clEventWrapper& event)
135 {
136 cl_int error = CL_SUCCESS;
137 cl_int status;
138 error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
139 sizeof(status), &status, NULL);
140 test_error(error, "clGetEventInfo() failed");
141
142 if (status != CL_SUCCESS)
143 test_fail("Kernel execution status %d! (%s:%d)\n", status, __FILE__,
144 __LINE__);
145
146 std::vector<ProfilingParam> prof_params = {
147 ADD_PROF_PARAM(CL_PROFILING_COMMAND_QUEUED),
148 ADD_PROF_PARAM(CL_PROFILING_COMMAND_SUBMIT),
149 ADD_PROF_PARAM(CL_PROFILING_COMMAND_START),
150 ADD_PROF_PARAM(CL_PROFILING_COMMAND_END),
151 };
152
153 // gather profiling timestamps
154 for (auto&& p : prof_params)
155 {
156 error = clGetEventProfilingInfo(event, p.param, sizeof(p.value),
157 &p.value, NULL);
158 test_error(error, "clGetEventProfilingInfo() failed");
159 }
160
161 // verify the results by comparing timestamps
162 bool all_vals_0 = prof_params.front().value != 0;
163 for (size_t i = 1; i < prof_params.size(); i++)
164 {
165 all_vals_0 = (prof_params[i].value != 0) ? false : all_vals_0;
166 if (prof_params[i - 1].value > prof_params[i].value)
167 {
168 log_error("Profiling %s=0x%x should be smaller than or equal "
169 "to %s=0x%x for "
170 "kernels that use the on-device queue",
171 prof_params[i - 1].name.c_str(),
172 prof_params[i - 1].param, prof_params[i].name.c_str(),
173 prof_params[i].param);
174 return TEST_FAIL;
175 }
176 }
177
178 if (all_vals_0)
179 {
180 log_error("All values are 0. This is exceedingly unlikely.\n");
181 return TEST_FAIL;
182 }
183
184 log_info("Profiling info for command-buffer kernel succeeded.\n");
185 return TEST_PASS;
186 }
187
188 //--------------------------------------------------------------------------
RunSingle__anon798c27cf0111::CommandBufferProfiling189 cl_int RunSingle()
190 {
191 cl_int error = CL_SUCCESS;
192 std::vector<cl_int> output_data(num_elements);
193
194 error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
195 data_size(), 0, nullptr, nullptr);
196 test_error(error, "clEnqueueFillBuffer failed");
197
198 clEventWrapper query_event;
199 error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
200 nullptr, &query_event);
201 test_error(error, "clEnqueueCommandBufferKHR failed");
202
203 error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
204 output_data.data(), 0, nullptr, nullptr);
205 test_error(error, "clEnqueueReadBuffer failed");
206
207 error = VerifyResult(query_event);
208 test_error(error, "VerifyResult failed");
209
210 return CL_SUCCESS;
211 }
212
213 //--------------------------------------------------------------------------
214 struct SimulPassData
215 {
216 cl_int offset;
217 std::vector<cl_int> output_buffer;
218 clEventWrapper query_event;
219 };
220
221 //--------------------------------------------------------------------------
EnqueueSimultaneousPass__anon798c27cf0111::CommandBufferProfiling222 cl_int EnqueueSimultaneousPass(SimulPassData& pd)
223 {
224 cl_int error = clEnqueueFillBuffer(
225 queue, out_mem, &pattern, sizeof(cl_int),
226 pd.offset * sizeof(cl_int), data_size(), 0, nullptr, nullptr);
227 test_error(error, "clEnqueueFillBuffer failed");
228
229 error = clEnqueueFillBuffer(queue, off_mem, &pd.offset, sizeof(cl_int),
230 0, sizeof(cl_int), 0, nullptr, nullptr);
231 test_error(error, "clEnqueueFillBuffer failed");
232
233 if (!wait_event)
234 {
235 wait_event = clCreateUserEvent(context, &error);
236 test_error(error, "clCreateUserEvent failed");
237 }
238
239 error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
240 &wait_event, &pd.query_event);
241 test_error(error, "clEnqueueCommandBufferKHR failed");
242
243 error = clEnqueueReadBuffer(
244 queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int), data_size(),
245 pd.output_buffer.data(), 0, nullptr, nullptr);
246 test_error(error, "clEnqueueReadBuffer failed");
247
248 return CL_SUCCESS;
249 }
250
251 //--------------------------------------------------------------------------
RunSimultaneous__anon798c27cf0111::CommandBufferProfiling252 cl_int RunSimultaneous()
253 {
254 cl_int error = CL_SUCCESS;
255 cl_int offset = static_cast<cl_int>(num_elements);
256
257 std::vector<SimulPassData> simul_passes = {
258 { 0, std::vector<cl_int>(num_elements) },
259 { offset, std::vector<cl_int>(num_elements) }
260 };
261
262 for (auto&& pass : simul_passes)
263 {
264 error = EnqueueSimultaneousPass(pass);
265 test_error(error, "EnqueueSimultaneousPass failed");
266 }
267
268 error = clSetUserEventStatus(wait_event, CL_COMPLETE);
269 test_error(error, "clSetUserEventStatus failed");
270
271 error = clFinish(queue);
272 test_error(error, "clFinish failed");
273
274 for (auto&& pass : simul_passes)
275 {
276 error = VerifyResult(pass.query_event);
277 test_error(error, "VerifyResult failed");
278 }
279
280 return CL_SUCCESS;
281 }
282
283 //--------------------------------------------------------------------------
284 clEventWrapper wait_event;
285
286 const cl_int pattern = 0xA;
287 };
288
289 } // anonymous namespace
290
test_basic_profiling(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)291 int test_basic_profiling(cl_device_id device, cl_context context,
292 cl_command_queue queue, int num_elements)
293 {
294 return MakeAndRunTest<CommandBufferProfiling<false>>(device, context, queue,
295 num_elements);
296 }
297
test_simultaneous_profiling(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)298 int test_simultaneous_profiling(cl_device_id device, cl_context context,
299 cl_command_queue queue, int num_elements)
300 {
301 return MakeAndRunTest<CommandBufferProfiling<true>>(device, context, queue,
302 num_elements);
303 }
304