• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "basic_command_buffer.h"
18 #include "procs.h"
19 
20 #include <vector>
21 
22 namespace {
23 
24 ////////////////////////////////////////////////////////////////////////////////
25 // Command-buffer profiling test cases:
26 // -all commands are recorded to a single command-queue
27 // -profiling a command-buffer with simultaneous use
28 
29 template <bool simultaneous_request>
30 struct CommandBufferProfiling : public BasicCommandBufferTest
31 {
CommandBufferProfiling__anon798c27cf0111::CommandBufferProfiling32     CommandBufferProfiling(cl_device_id device, cl_context context,
33                            cl_command_queue queue)
34         : BasicCommandBufferTest(device, context, queue), wait_event(nullptr)
35     {
36         simultaneous_use_requested = simultaneous_request;
37         if (simultaneous_request) buffer_size_multiplier = 2;
38     }
39 
40     //--------------------------------------------------------------------------
Skip__anon798c27cf0111::CommandBufferProfiling41     bool Skip() override
42     {
43         if (BasicCommandBufferTest::Skip()) return true;
44 
45         Version version = get_device_cl_version(device);
46         const cl_device_info host_queue_query = version >= Version(2, 0)
47             ? CL_DEVICE_QUEUE_ON_HOST_PROPERTIES
48             : CL_DEVICE_QUEUE_PROPERTIES;
49 
50         cl_command_queue_properties host_queue_props = 0;
51         int error =
52             clGetDeviceInfo(device, host_queue_query, sizeof(host_queue_props),
53                             &host_queue_props, NULL);
54         if (error != CL_SUCCESS)
55         {
56             print_error(
57                 error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
58             return true;
59         }
60 
61         if ((host_queue_props & CL_QUEUE_PROFILING_ENABLE) == 0)
62         {
63             log_info(
64                 "Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
65             return true;
66         }
67         return (simultaneous_use_requested && !simultaneous_use_support);
68     }
69 
70     //--------------------------------------------------------------------------
SetUp__anon798c27cf0111::CommandBufferProfiling71     cl_int SetUp(int elements) override
72     {
73         cl_int error = CL_SUCCESS;
74         queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE,
75                                      &error);
76         test_error(error, "clCreateCommandQueue failed");
77 
78         return BasicCommandBufferTest::SetUp(elements);
79     }
80 
81     //--------------------------------------------------------------------------
Run__anon798c27cf0111::CommandBufferProfiling82     cl_int Run() override
83     {
84         cl_int error = CL_SUCCESS;
85 
86         // record command buffer
87         error = RecordCommandBuffer();
88         test_error(error, "RecordCommandBuffer failed");
89 
90         if (simultaneous_use_requested)
91         {
92             // enqueue simultaneous command-buffers with profiling command queue
93             error = RunSimultaneous();
94             test_error(error, "RunSimultaneous failed");
95         }
96         else
97         {
98             // enqueue single command-buffer with profiling command queue
99             error = RunSingle();
100             test_error(error, "RunSingle failed");
101         }
102 
103         return CL_SUCCESS;
104     }
105 
106     //--------------------------------------------------------------------------
RecordCommandBuffer__anon798c27cf0111::CommandBufferProfiling107     cl_int RecordCommandBuffer()
108     {
109         cl_int error = CL_SUCCESS;
110 
111         error = clCommandNDRangeKernelKHR(
112             command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
113             nullptr, 0, nullptr, nullptr, nullptr);
114         test_error(error, "clCommandNDRangeKernelKHR failed");
115 
116         error = clFinalizeCommandBufferKHR(command_buffer);
117         test_error(error, "clFinalizeCommandBufferKHR failed");
118         return CL_SUCCESS;
119     }
120 
121     //--------------------------------------------------------------------------
122 #define ADD_PROF_PARAM(prop)                                                   \
123     {                                                                          \
124         prop, #prop, 0                                                         \
125     }
126     struct ProfilingParam
127     {
128         cl_profiling_info param;
129         std::string name;
130         cl_ulong value;
131     };
132 
133     //--------------------------------------------------------------------------
VerifyResult__anon798c27cf0111::CommandBufferProfiling134     cl_int VerifyResult(const clEventWrapper& event)
135     {
136         cl_int error = CL_SUCCESS;
137         cl_int status;
138         error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
139                                sizeof(status), &status, NULL);
140         test_error(error, "clGetEventInfo() failed");
141 
142         if (status != CL_SUCCESS)
143             test_fail("Kernel execution status %d! (%s:%d)\n", status, __FILE__,
144                       __LINE__);
145 
146         std::vector<ProfilingParam> prof_params = {
147             ADD_PROF_PARAM(CL_PROFILING_COMMAND_QUEUED),
148             ADD_PROF_PARAM(CL_PROFILING_COMMAND_SUBMIT),
149             ADD_PROF_PARAM(CL_PROFILING_COMMAND_START),
150             ADD_PROF_PARAM(CL_PROFILING_COMMAND_END),
151         };
152 
153         // gather profiling timestamps
154         for (auto&& p : prof_params)
155         {
156             error = clGetEventProfilingInfo(event, p.param, sizeof(p.value),
157                                             &p.value, NULL);
158             test_error(error, "clGetEventProfilingInfo() failed");
159         }
160 
161         // verify the results by comparing timestamps
162         bool all_vals_0 = prof_params.front().value != 0;
163         for (size_t i = 1; i < prof_params.size(); i++)
164         {
165             all_vals_0 = (prof_params[i].value != 0) ? false : all_vals_0;
166             if (prof_params[i - 1].value > prof_params[i].value)
167             {
168                 log_error("Profiling %s=0x%x should be smaller than or equal "
169                           "to %s=0x%x for "
170                           "kernels that use the on-device queue",
171                           prof_params[i - 1].name.c_str(),
172                           prof_params[i - 1].param, prof_params[i].name.c_str(),
173                           prof_params[i].param);
174                 return TEST_FAIL;
175             }
176         }
177 
178         if (all_vals_0)
179         {
180             log_error("All values are 0. This is exceedingly unlikely.\n");
181             return TEST_FAIL;
182         }
183 
184         log_info("Profiling info for command-buffer kernel succeeded.\n");
185         return TEST_PASS;
186     }
187 
188     //--------------------------------------------------------------------------
RunSingle__anon798c27cf0111::CommandBufferProfiling189     cl_int RunSingle()
190     {
191         cl_int error = CL_SUCCESS;
192         std::vector<cl_int> output_data(num_elements);
193 
194         error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
195                                     data_size(), 0, nullptr, nullptr);
196         test_error(error, "clEnqueueFillBuffer failed");
197 
198         clEventWrapper query_event;
199         error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
200                                           nullptr, &query_event);
201         test_error(error, "clEnqueueCommandBufferKHR failed");
202 
203         error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
204                                     output_data.data(), 0, nullptr, nullptr);
205         test_error(error, "clEnqueueReadBuffer failed");
206 
207         error = VerifyResult(query_event);
208         test_error(error, "VerifyResult failed");
209 
210         return CL_SUCCESS;
211     }
212 
213     //--------------------------------------------------------------------------
214     struct SimulPassData
215     {
216         cl_int offset;
217         std::vector<cl_int> output_buffer;
218         clEventWrapper query_event;
219     };
220 
221     //--------------------------------------------------------------------------
EnqueueSimultaneousPass__anon798c27cf0111::CommandBufferProfiling222     cl_int EnqueueSimultaneousPass(SimulPassData& pd)
223     {
224         cl_int error = clEnqueueFillBuffer(
225             queue, out_mem, &pattern, sizeof(cl_int),
226             pd.offset * sizeof(cl_int), data_size(), 0, nullptr, nullptr);
227         test_error(error, "clEnqueueFillBuffer failed");
228 
229         error = clEnqueueFillBuffer(queue, off_mem, &pd.offset, sizeof(cl_int),
230                                     0, sizeof(cl_int), 0, nullptr, nullptr);
231         test_error(error, "clEnqueueFillBuffer failed");
232 
233         if (!wait_event)
234         {
235             wait_event = clCreateUserEvent(context, &error);
236             test_error(error, "clCreateUserEvent failed");
237         }
238 
239         error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
240                                           &wait_event, &pd.query_event);
241         test_error(error, "clEnqueueCommandBufferKHR failed");
242 
243         error = clEnqueueReadBuffer(
244             queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int), data_size(),
245             pd.output_buffer.data(), 0, nullptr, nullptr);
246         test_error(error, "clEnqueueReadBuffer failed");
247 
248         return CL_SUCCESS;
249     }
250 
251     //--------------------------------------------------------------------------
RunSimultaneous__anon798c27cf0111::CommandBufferProfiling252     cl_int RunSimultaneous()
253     {
254         cl_int error = CL_SUCCESS;
255         cl_int offset = static_cast<cl_int>(num_elements);
256 
257         std::vector<SimulPassData> simul_passes = {
258             { 0, std::vector<cl_int>(num_elements) },
259             { offset, std::vector<cl_int>(num_elements) }
260         };
261 
262         for (auto&& pass : simul_passes)
263         {
264             error = EnqueueSimultaneousPass(pass);
265             test_error(error, "EnqueueSimultaneousPass failed");
266         }
267 
268         error = clSetUserEventStatus(wait_event, CL_COMPLETE);
269         test_error(error, "clSetUserEventStatus failed");
270 
271         error = clFinish(queue);
272         test_error(error, "clFinish failed");
273 
274         for (auto&& pass : simul_passes)
275         {
276             error = VerifyResult(pass.query_event);
277             test_error(error, "VerifyResult failed");
278         }
279 
280         return CL_SUCCESS;
281     }
282 
283     //--------------------------------------------------------------------------
284     clEventWrapper wait_event;
285 
286     const cl_int pattern = 0xA;
287 };
288 
289 } // anonymous namespace
290 
test_basic_profiling(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)291 int test_basic_profiling(cl_device_id device, cl_context context,
292                          cl_command_queue queue, int num_elements)
293 {
294     return MakeAndRunTest<CommandBufferProfiling<false>>(device, context, queue,
295                                                          num_elements);
296 }
297 
test_simultaneous_profiling(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)298 int test_simultaneous_profiling(cl_device_id device, cl_context context,
299                                 cl_command_queue queue, int num_elements)
300 {
301     return MakeAndRunTest<CommandBufferProfiling<true>>(device, context, queue,
302                                                         num_elements);
303 }
304