• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2021 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // CLCommandQueueVk.cpp: Implements the class methods for CLCommandQueueVk.
7 
8 #include "libANGLE/renderer/vulkan/CLCommandQueueVk.h"
9 #include "libANGLE/renderer/vulkan/CLContextVk.h"
10 #include "libANGLE/renderer/vulkan/CLDeviceVk.h"
11 #include "libANGLE/renderer/vulkan/CLKernelVk.h"
12 #include "libANGLE/renderer/vulkan/CLMemoryVk.h"
13 #include "libANGLE/renderer/vulkan/CLProgramVk.h"
14 #include "libANGLE/renderer/vulkan/cl_types.h"
15 #include "libANGLE/renderer/vulkan/vk_renderer.h"
16 
17 #include "libANGLE/CLBuffer.h"
18 #include "libANGLE/CLCommandQueue.h"
19 #include "libANGLE/CLContext.h"
20 #include "libANGLE/CLEvent.h"
21 #include "libANGLE/CLKernel.h"
22 #include "libANGLE/cl_utils.h"
23 
24 #include "spirv/unified1/NonSemanticClspvReflection.h"
25 
26 namespace rx
27 {
28 
29 class CLAsyncFinishTask : public angle::Closure
30 {
31   public:
CLAsyncFinishTask(CLCommandQueueVk * queueVk)32     CLAsyncFinishTask(CLCommandQueueVk *queueVk) : mQueueVk(queueVk) {}
33 
operator ()()34     void operator()() override
35     {
36         ANGLE_TRACE_EVENT0("gpu.angle", "CLCommandQueueVk::finish (async)");
37         if (IsError(mQueueVk->finish()))
38         {
39             ERR() << "Async finish (clFlush) failed for queue (" << mQueueVk << ")!";
40         }
41     }
42 
43   private:
44     CLCommandQueueVk *mQueueVk;
45 };
46 
CLCommandQueueVk(const cl::CommandQueue & commandQueue)47 CLCommandQueueVk::CLCommandQueueVk(const cl::CommandQueue &commandQueue)
48     : CLCommandQueueImpl(commandQueue),
49       mContext(&commandQueue.getContext().getImpl<CLContextVk>()),
50       mDevice(&commandQueue.getDevice().getImpl<CLDeviceVk>()),
51       mComputePassCommands(nullptr),
52       mCurrentQueueSerialIndex(kInvalidQueueSerialIndex),
53       mHasAnyCommandsPendingSubmission(false)
54 {}
55 
init()56 angle::Result CLCommandQueueVk::init()
57 {
58     ANGLE_CL_IMPL_TRY_ERROR(
59         vk::OutsideRenderPassCommandBuffer::InitializeCommandPool(
60             mContext, &mCommandPool.outsideRenderPassPool,
61             mContext->getRenderer()->getQueueFamilyIndex(), getProtectionType()),
62         CL_OUT_OF_RESOURCES);
63 
64     ANGLE_CL_IMPL_TRY_ERROR(mContext->getRenderer()->getOutsideRenderPassCommandBufferHelper(
65                                 mContext, &mCommandPool.outsideRenderPassPool,
66                                 &mOutsideRenderPassCommandsAllocator, &mComputePassCommands),
67                             CL_OUT_OF_RESOURCES);
68 
69     // Generate initial QueueSerial for command buffer helper
70     ANGLE_CL_IMPL_TRY_ERROR(
71         mContext->getRenderer()->allocateQueueSerialIndex(&mCurrentQueueSerialIndex),
72         CL_OUT_OF_RESOURCES);
73     mComputePassCommands->setQueueSerial(
74         mCurrentQueueSerialIndex,
75         mContext->getRenderer()->generateQueueSerial(mCurrentQueueSerialIndex));
76 
77     // Initialize serials to be valid but appear submitted and finished.
78     mLastFlushedQueueSerial   = QueueSerial(mCurrentQueueSerialIndex, Serial());
79     mLastSubmittedQueueSerial = mLastFlushedQueueSerial;
80 
81     return angle::Result::Continue;
82 }
83 
~CLCommandQueueVk()84 CLCommandQueueVk::~CLCommandQueueVk()
85 {
86     VkDevice vkDevice = mContext->getDevice();
87 
88     if (mCurrentQueueSerialIndex != kInvalidQueueSerialIndex)
89     {
90         mContext->getRenderer()->releaseQueueSerialIndex(mCurrentQueueSerialIndex);
91         mCurrentQueueSerialIndex = kInvalidQueueSerialIndex;
92     }
93 
94     // Recycle the current command buffers
95     mContext->getRenderer()->recycleOutsideRenderPassCommandBufferHelper(&mComputePassCommands);
96     mCommandPool.outsideRenderPassPool.destroy(vkDevice);
97 }
98 
setProperty(cl::CommandQueueProperties properties,cl_bool enable)99 angle::Result CLCommandQueueVk::setProperty(cl::CommandQueueProperties properties, cl_bool enable)
100 {
101     // NOTE: "clSetCommandQueueProperty" has been deprecated as of OpenCL 1.1
102     // http://man.opencl.org/deprecated.html
103     return angle::Result::Continue;
104 }
105 
enqueueReadBuffer(const cl::Buffer & buffer,bool blocking,size_t offset,size_t size,void * ptr,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)106 angle::Result CLCommandQueueVk::enqueueReadBuffer(const cl::Buffer &buffer,
107                                                   bool blocking,
108                                                   size_t offset,
109                                                   size_t size,
110                                                   void *ptr,
111                                                   const cl::EventPtrs &waitEvents,
112                                                   CLEventImpl::CreateFunc *eventCreateFunc)
113 {
114     std::scoped_lock<std::mutex> sl(mCommandQueueMutex);
115 
116     ANGLE_TRY(processWaitlist(waitEvents));
117 
118     if (blocking)
119     {
120         ANGLE_TRY(finishInternal());
121         auto bufferVk = &buffer.getImpl<CLBufferVk>();
122         ANGLE_TRY(bufferVk->copyTo(ptr, offset, size));
123     }
124     else
125     {
126         CLBufferVk &bufferVk = buffer.getImpl<CLBufferVk>();
127 
128         // Reached transfer buffer creation limit/heuristic, finish this current batch
129         if (mHostBufferUpdateList.size() >= kMaxHostBufferUpdateListSize)
130         {
131             ANGLE_TRY(finishInternal());
132         }
133 
134         // Create a transfer buffer and push it in update list
135         mHostBufferUpdateList.emplace_back(
136             cl::Buffer::Cast(this->mContext->getFrontendObject().createBuffer(
137                 nullptr, cl::MemFlags{buffer.getFlags().get() | CL_MEM_USE_HOST_PTR},
138                 buffer.getSize(), ptr)));
139         if (mHostBufferUpdateList.back() == nullptr)
140         {
141             ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
142         }
143         CLBufferVk &transferBufferVk = mHostBufferUpdateList.back()->getImpl<CLBufferVk>();
144         // Release initialization reference, lifetime controlled by RefPointer.
145         mHostBufferUpdateList.back()->release();
146 
147         const VkBufferCopy copyRegion = {offset, offset, size};
148 
149         // We need an execution barrier if buffer can be written to by kernel
150         if (!mComputePassCommands->getCommandBuffer().empty() && bufferVk.isWritable())
151         {
152             VkMemoryBarrier memoryBarrier = {
153                 VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr, VK_ACCESS_SHADER_WRITE_BIT,
154                 VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT};
155             mComputePassCommands->getCommandBuffer().pipelineBarrier(
156                 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1,
157                 &memoryBarrier, 0, nullptr, 0, nullptr);
158         }
159 
160         mComputePassCommands->getCommandBuffer().copyBuffer(
161             bufferVk.getBuffer().getBuffer(), transferBufferVk.getBuffer().getBuffer(), 1,
162             &copyRegion);
163     }
164 
165     ANGLE_TRY(createEvent(eventCreateFunc));
166 
167     return angle::Result::Continue;
168 }
169 
enqueueWriteBuffer(const cl::Buffer & buffer,bool blocking,size_t offset,size_t size,const void * ptr,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)170 angle::Result CLCommandQueueVk::enqueueWriteBuffer(const cl::Buffer &buffer,
171                                                    bool blocking,
172                                                    size_t offset,
173                                                    size_t size,
174                                                    const void *ptr,
175                                                    const cl::EventPtrs &waitEvents,
176                                                    CLEventImpl::CreateFunc *eventCreateFunc)
177 {
178     std::scoped_lock<std::mutex> sl(mCommandQueueMutex);
179 
180     ANGLE_TRY(processWaitlist(waitEvents));
181 
182     auto bufferVk = &buffer.getImpl<CLBufferVk>();
183     ANGLE_TRY(bufferVk->copyFrom(ptr, offset, size));
184     if (blocking)
185     {
186         ANGLE_TRY(finishInternal());
187     }
188 
189     ANGLE_TRY(createEvent(eventCreateFunc));
190 
191     return angle::Result::Continue;
192 }
193 
enqueueReadBufferRect(const cl::Buffer & buffer,bool blocking,const cl::MemOffsets & bufferOrigin,const cl::MemOffsets & hostOrigin,const cl::Coordinate & region,size_t bufferRowPitch,size_t bufferSlicePitch,size_t hostRowPitch,size_t hostSlicePitch,void * ptr,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)194 angle::Result CLCommandQueueVk::enqueueReadBufferRect(const cl::Buffer &buffer,
195                                                       bool blocking,
196                                                       const cl::MemOffsets &bufferOrigin,
197                                                       const cl::MemOffsets &hostOrigin,
198                                                       const cl::Coordinate &region,
199                                                       size_t bufferRowPitch,
200                                                       size_t bufferSlicePitch,
201                                                       size_t hostRowPitch,
202                                                       size_t hostSlicePitch,
203                                                       void *ptr,
204                                                       const cl::EventPtrs &waitEvents,
205                                                       CLEventImpl::CreateFunc *eventCreateFunc)
206 {
207     UNIMPLEMENTED();
208     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
209 }
210 
enqueueWriteBufferRect(const cl::Buffer & buffer,bool blocking,const cl::MemOffsets & bufferOrigin,const cl::MemOffsets & hostOrigin,const cl::Coordinate & region,size_t bufferRowPitch,size_t bufferSlicePitch,size_t hostRowPitch,size_t hostSlicePitch,const void * ptr,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)211 angle::Result CLCommandQueueVk::enqueueWriteBufferRect(const cl::Buffer &buffer,
212                                                        bool blocking,
213                                                        const cl::MemOffsets &bufferOrigin,
214                                                        const cl::MemOffsets &hostOrigin,
215                                                        const cl::Coordinate &region,
216                                                        size_t bufferRowPitch,
217                                                        size_t bufferSlicePitch,
218                                                        size_t hostRowPitch,
219                                                        size_t hostSlicePitch,
220                                                        const void *ptr,
221                                                        const cl::EventPtrs &waitEvents,
222                                                        CLEventImpl::CreateFunc *eventCreateFunc)
223 {
224     UNIMPLEMENTED();
225     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
226 }
227 
enqueueCopyBuffer(const cl::Buffer & srcBuffer,const cl::Buffer & dstBuffer,size_t srcOffset,size_t dstOffset,size_t size,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)228 angle::Result CLCommandQueueVk::enqueueCopyBuffer(const cl::Buffer &srcBuffer,
229                                                   const cl::Buffer &dstBuffer,
230                                                   size_t srcOffset,
231                                                   size_t dstOffset,
232                                                   size_t size,
233                                                   const cl::EventPtrs &waitEvents,
234                                                   CLEventImpl::CreateFunc *eventCreateFunc)
235 {
236     UNIMPLEMENTED();
237     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
238 }
239 
enqueueCopyBufferRect(const cl::Buffer & srcBuffer,const cl::Buffer & dstBuffer,const cl::MemOffsets & srcOrigin,const cl::MemOffsets & dstOrigin,const cl::Coordinate & region,size_t srcRowPitch,size_t srcSlicePitch,size_t dstRowPitch,size_t dstSlicePitch,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)240 angle::Result CLCommandQueueVk::enqueueCopyBufferRect(const cl::Buffer &srcBuffer,
241                                                       const cl::Buffer &dstBuffer,
242                                                       const cl::MemOffsets &srcOrigin,
243                                                       const cl::MemOffsets &dstOrigin,
244                                                       const cl::Coordinate &region,
245                                                       size_t srcRowPitch,
246                                                       size_t srcSlicePitch,
247                                                       size_t dstRowPitch,
248                                                       size_t dstSlicePitch,
249                                                       const cl::EventPtrs &waitEvents,
250                                                       CLEventImpl::CreateFunc *eventCreateFunc)
251 {
252     UNIMPLEMENTED();
253     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
254 }
255 
enqueueFillBuffer(const cl::Buffer & buffer,const void * pattern,size_t patternSize,size_t offset,size_t size,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)256 angle::Result CLCommandQueueVk::enqueueFillBuffer(const cl::Buffer &buffer,
257                                                   const void *pattern,
258                                                   size_t patternSize,
259                                                   size_t offset,
260                                                   size_t size,
261                                                   const cl::EventPtrs &waitEvents,
262                                                   CLEventImpl::CreateFunc *eventCreateFunc)
263 {
264     UNIMPLEMENTED();
265     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
266 }
267 
enqueueMapBuffer(const cl::Buffer & buffer,bool blocking,cl::MapFlags mapFlags,size_t offset,size_t size,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc,void * & mapPtr)268 angle::Result CLCommandQueueVk::enqueueMapBuffer(const cl::Buffer &buffer,
269                                                  bool blocking,
270                                                  cl::MapFlags mapFlags,
271                                                  size_t offset,
272                                                  size_t size,
273                                                  const cl::EventPtrs &waitEvents,
274                                                  CLEventImpl::CreateFunc *eventCreateFunc,
275                                                  void *&mapPtr)
276 {
277     UNIMPLEMENTED();
278     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
279 }
280 
enqueueReadImage(const cl::Image & image,bool blocking,const cl::MemOffsets & origin,const cl::Coordinate & region,size_t rowPitch,size_t slicePitch,void * ptr,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)281 angle::Result CLCommandQueueVk::enqueueReadImage(const cl::Image &image,
282                                                  bool blocking,
283                                                  const cl::MemOffsets &origin,
284                                                  const cl::Coordinate &region,
285                                                  size_t rowPitch,
286                                                  size_t slicePitch,
287                                                  void *ptr,
288                                                  const cl::EventPtrs &waitEvents,
289                                                  CLEventImpl::CreateFunc *eventCreateFunc)
290 {
291     UNIMPLEMENTED();
292     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
293 }
294 
enqueueWriteImage(const cl::Image & image,bool blocking,const cl::MemOffsets & origin,const cl::Coordinate & region,size_t inputRowPitch,size_t inputSlicePitch,const void * ptr,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)295 angle::Result CLCommandQueueVk::enqueueWriteImage(const cl::Image &image,
296                                                   bool blocking,
297                                                   const cl::MemOffsets &origin,
298                                                   const cl::Coordinate &region,
299                                                   size_t inputRowPitch,
300                                                   size_t inputSlicePitch,
301                                                   const void *ptr,
302                                                   const cl::EventPtrs &waitEvents,
303                                                   CLEventImpl::CreateFunc *eventCreateFunc)
304 {
305     UNIMPLEMENTED();
306     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
307 }
308 
enqueueCopyImage(const cl::Image & srcImage,const cl::Image & dstImage,const cl::MemOffsets & srcOrigin,const cl::MemOffsets & dstOrigin,const cl::Coordinate & region,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)309 angle::Result CLCommandQueueVk::enqueueCopyImage(const cl::Image &srcImage,
310                                                  const cl::Image &dstImage,
311                                                  const cl::MemOffsets &srcOrigin,
312                                                  const cl::MemOffsets &dstOrigin,
313                                                  const cl::Coordinate &region,
314                                                  const cl::EventPtrs &waitEvents,
315                                                  CLEventImpl::CreateFunc *eventCreateFunc)
316 {
317     UNIMPLEMENTED();
318     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
319 }
320 
enqueueFillImage(const cl::Image & image,const void * fillColor,const cl::MemOffsets & origin,const cl::Coordinate & region,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)321 angle::Result CLCommandQueueVk::enqueueFillImage(const cl::Image &image,
322                                                  const void *fillColor,
323                                                  const cl::MemOffsets &origin,
324                                                  const cl::Coordinate &region,
325                                                  const cl::EventPtrs &waitEvents,
326                                                  CLEventImpl::CreateFunc *eventCreateFunc)
327 {
328     UNIMPLEMENTED();
329     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
330 }
331 
enqueueCopyImageToBuffer(const cl::Image & srcImage,const cl::Buffer & dstBuffer,const cl::MemOffsets & srcOrigin,const cl::Coordinate & region,size_t dstOffset,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)332 angle::Result CLCommandQueueVk::enqueueCopyImageToBuffer(const cl::Image &srcImage,
333                                                          const cl::Buffer &dstBuffer,
334                                                          const cl::MemOffsets &srcOrigin,
335                                                          const cl::Coordinate &region,
336                                                          size_t dstOffset,
337                                                          const cl::EventPtrs &waitEvents,
338                                                          CLEventImpl::CreateFunc *eventCreateFunc)
339 {
340     UNIMPLEMENTED();
341     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
342 }
343 
enqueueCopyBufferToImage(const cl::Buffer & srcBuffer,const cl::Image & dstImage,size_t srcOffset,const cl::MemOffsets & dstOrigin,const cl::Coordinate & region,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)344 angle::Result CLCommandQueueVk::enqueueCopyBufferToImage(const cl::Buffer &srcBuffer,
345                                                          const cl::Image &dstImage,
346                                                          size_t srcOffset,
347                                                          const cl::MemOffsets &dstOrigin,
348                                                          const cl::Coordinate &region,
349                                                          const cl::EventPtrs &waitEvents,
350                                                          CLEventImpl::CreateFunc *eventCreateFunc)
351 {
352     UNIMPLEMENTED();
353     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
354 }
355 
enqueueMapImage(const cl::Image & image,bool blocking,cl::MapFlags mapFlags,const cl::MemOffsets & origin,const cl::Coordinate & region,size_t * imageRowPitch,size_t * imageSlicePitch,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc,void * & mapPtr)356 angle::Result CLCommandQueueVk::enqueueMapImage(const cl::Image &image,
357                                                 bool blocking,
358                                                 cl::MapFlags mapFlags,
359                                                 const cl::MemOffsets &origin,
360                                                 const cl::Coordinate &region,
361                                                 size_t *imageRowPitch,
362                                                 size_t *imageSlicePitch,
363                                                 const cl::EventPtrs &waitEvents,
364                                                 CLEventImpl::CreateFunc *eventCreateFunc,
365                                                 void *&mapPtr)
366 {
367     UNIMPLEMENTED();
368     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
369 }
370 
enqueueUnmapMemObject(const cl::Memory & memory,void * mappedPtr,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)371 angle::Result CLCommandQueueVk::enqueueUnmapMemObject(const cl::Memory &memory,
372                                                       void *mappedPtr,
373                                                       const cl::EventPtrs &waitEvents,
374                                                       CLEventImpl::CreateFunc *eventCreateFunc)
375 {
376     UNIMPLEMENTED();
377     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
378 }
379 
enqueueMigrateMemObjects(const cl::MemoryPtrs & memObjects,cl::MemMigrationFlags flags,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)380 angle::Result CLCommandQueueVk::enqueueMigrateMemObjects(const cl::MemoryPtrs &memObjects,
381                                                          cl::MemMigrationFlags flags,
382                                                          const cl::EventPtrs &waitEvents,
383                                                          CLEventImpl::CreateFunc *eventCreateFunc)
384 {
385     UNIMPLEMENTED();
386     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
387 }
388 
enqueueNDRangeKernel(const cl::Kernel & kernel,const cl::NDRange & ndrange,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)389 angle::Result CLCommandQueueVk::enqueueNDRangeKernel(const cl::Kernel &kernel,
390                                                      const cl::NDRange &ndrange,
391                                                      const cl::EventPtrs &waitEvents,
392                                                      CLEventImpl::CreateFunc *eventCreateFunc)
393 {
394     std::scoped_lock<std::mutex> sl(mCommandQueueMutex);
395 
396     ANGLE_TRY(processWaitlist(waitEvents));
397 
398     cl::WorkgroupCount workgroupCount;
399     vk::PipelineCacheAccess pipelineCache;
400     vk::PipelineHelper *pipelineHelper = nullptr;
401     CLKernelVk &kernelImpl             = kernel.getImpl<CLKernelVk>();
402 
403     ANGLE_TRY(processKernelResources(kernelImpl, ndrange));
404 
405     // Fetch or create compute pipeline (if we miss in cache)
406     ANGLE_CL_IMPL_TRY_ERROR(mContext->getRenderer()->getPipelineCache(mContext, &pipelineCache),
407                             CL_OUT_OF_RESOURCES);
408     ANGLE_TRY(kernelImpl.getOrCreateComputePipeline(
409         &pipelineCache, ndrange, mCommandQueue.getDevice(), &pipelineHelper, &workgroupCount));
410 
411     mComputePassCommands->retainResource(pipelineHelper);
412     mComputePassCommands->getCommandBuffer().bindComputePipeline(pipelineHelper->getPipeline());
413     mComputePassCommands->getCommandBuffer().dispatch(workgroupCount[0], workgroupCount[1],
414                                                       workgroupCount[2]);
415 
416     ANGLE_TRY(createEvent(eventCreateFunc));
417 
418     return angle::Result::Continue;
419 }
420 
enqueueTask(const cl::Kernel & kernel,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)421 angle::Result CLCommandQueueVk::enqueueTask(const cl::Kernel &kernel,
422                                             const cl::EventPtrs &waitEvents,
423                                             CLEventImpl::CreateFunc *eventCreateFunc)
424 {
425     constexpr size_t globalWorkSize[3] = {1, 0, 0};
426     constexpr size_t localWorkSize[3]  = {1, 0, 0};
427     cl::NDRange ndrange(1, nullptr, globalWorkSize, localWorkSize);
428     return enqueueNDRangeKernel(kernel, ndrange, waitEvents, eventCreateFunc);
429 }
430 
enqueueNativeKernel(cl::UserFunc userFunc,void * args,size_t cbArgs,const cl::BufferPtrs & buffers,const std::vector<size_t> bufferPtrOffsets,const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)431 angle::Result CLCommandQueueVk::enqueueNativeKernel(cl::UserFunc userFunc,
432                                                     void *args,
433                                                     size_t cbArgs,
434                                                     const cl::BufferPtrs &buffers,
435                                                     const std::vector<size_t> bufferPtrOffsets,
436                                                     const cl::EventPtrs &waitEvents,
437                                                     CLEventImpl::CreateFunc *eventCreateFunc)
438 {
439     UNIMPLEMENTED();
440     ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
441 }
442 
enqueueMarkerWithWaitList(const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)443 angle::Result CLCommandQueueVk::enqueueMarkerWithWaitList(const cl::EventPtrs &waitEvents,
444                                                           CLEventImpl::CreateFunc *eventCreateFunc)
445 {
446     std::scoped_lock<std::mutex> sl(mCommandQueueMutex);
447 
448     ANGLE_TRY(processWaitlist(waitEvents));
449     ANGLE_TRY(createEvent(eventCreateFunc));
450 
451     return angle::Result::Continue;
452 }
453 
enqueueMarker(CLEventImpl::CreateFunc & eventCreateFunc)454 angle::Result CLCommandQueueVk::enqueueMarker(CLEventImpl::CreateFunc &eventCreateFunc)
455 {
456     std::scoped_lock<std::mutex> sl(mCommandQueueMutex);
457 
458     // This deprecated API is essentially a super-set of clEnqueueBarrier, where we also return an
459     // event object (i.e. marker) since clEnqueueBarrier does not provide this
460     VkMemoryBarrier memoryBarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr,
461                                      VK_ACCESS_SHADER_WRITE_BIT,
462                                      VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT};
463     mComputePassCommands->getCommandBuffer().pipelineBarrier(
464         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1,
465         &memoryBarrier, 0, nullptr, 0, nullptr);
466 
467     ANGLE_TRY(createEvent(&eventCreateFunc));
468 
469     return angle::Result::Continue;
470 }
471 
enqueueWaitForEvents(const cl::EventPtrs & events)472 angle::Result CLCommandQueueVk::enqueueWaitForEvents(const cl::EventPtrs &events)
473 {
474     std::scoped_lock<std::mutex> sl(mCommandQueueMutex);
475 
476     // Unlike clWaitForEvents, this routine is non-blocking
477     ANGLE_TRY(processWaitlist(events));
478 
479     return angle::Result::Continue;
480 }
481 
enqueueBarrierWithWaitList(const cl::EventPtrs & waitEvents,CLEventImpl::CreateFunc * eventCreateFunc)482 angle::Result CLCommandQueueVk::enqueueBarrierWithWaitList(const cl::EventPtrs &waitEvents,
483                                                            CLEventImpl::CreateFunc *eventCreateFunc)
484 {
485     std::scoped_lock<std::mutex> sl(mCommandQueueMutex);
486 
487     // The barrier command either waits for a list of events to complete, or if the list is empty it
488     // waits for all commands previously enqueued in command_queue to complete before it completes
489     if (waitEvents.empty())
490     {
491         VkMemoryBarrier memoryBarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr,
492                                          VK_ACCESS_SHADER_WRITE_BIT,
493                                          VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT};
494         mComputePassCommands->getCommandBuffer().pipelineBarrier(
495             VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1,
496             &memoryBarrier, 0, nullptr, 0, nullptr);
497     }
498     else
499     {
500         ANGLE_TRY(processWaitlist(waitEvents));
501     }
502 
503     ANGLE_TRY(createEvent(eventCreateFunc));
504 
505     return angle::Result::Continue;
506 }
507 
enqueueBarrier()508 angle::Result CLCommandQueueVk::enqueueBarrier()
509 {
510     std::scoped_lock<std::mutex> sl(mCommandQueueMutex);
511 
512     VkMemoryBarrier memoryBarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr,
513                                      VK_ACCESS_SHADER_WRITE_BIT,
514                                      VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT};
515     mComputePassCommands->getCommandBuffer().pipelineBarrier(
516         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1,
517         &memoryBarrier, 0, nullptr, 0, nullptr);
518 
519     return angle::Result::Continue;
520 }
521 
flush()522 angle::Result CLCommandQueueVk::flush()
523 {
524     ANGLE_TRACE_EVENT0("gpu.angle", "CLCommandQueueVk::flush");
525 
526     // Non-blocking finish
527     // TODO: Ideally we should try to find better impl. to avoid spawning a submit-thread/Task here
528     // https://anglebug.com/8669
529     std::shared_ptr<angle::WaitableEvent> asyncEvent =
530         getPlatform()->postMultiThreadWorkerTask(std::make_shared<CLAsyncFinishTask>(this));
531     ASSERT(asyncEvent != nullptr);
532 
533     return angle::Result::Continue;
534 }
535 
finish()536 angle::Result CLCommandQueueVk::finish()
537 {
538     std::scoped_lock<std::mutex> sl(mCommandQueueMutex);
539 
540     ANGLE_TRACE_EVENT0("gpu.angle", "CLCommandQueueVk::finish");
541 
542     // Blocking finish
543     return finishInternal();
544 }
545 
syncHostBuffers()546 angle::Result CLCommandQueueVk::syncHostBuffers()
547 {
548     for (const cl::MemoryPtr &memoryPtr : mHostBufferUpdateList)
549     {
550         ASSERT(memoryPtr->getHostPtr() != nullptr);
551         CLBufferVk &bufferVk = memoryPtr->getImpl<CLBufferVk>();
552         ANGLE_TRY(
553             bufferVk.copyTo(memoryPtr->getHostPtr(), memoryPtr->getOffset(), memoryPtr->getSize()));
554     }
555     mHostBufferUpdateList.clear();
556 
557     return angle::Result::Continue;
558 }
559 
processKernelResources(CLKernelVk & kernelVk,const cl::NDRange & ndrange)560 angle::Result CLCommandQueueVk::processKernelResources(CLKernelVk &kernelVk,
561                                                        const cl::NDRange &ndrange)
562 {
563     bool needsBarrier = false;
564     UpdateDescriptorSetsBuilder updateDescriptorSetsBuilder;
565     const CLProgramVk::DeviceProgramData *devProgramData =
566         kernelVk.getProgram()->getDeviceProgramData(mCommandQueue.getDevice().getNative());
567     ASSERT(devProgramData != nullptr);
568 
569     // Allocate descriptor set
570     VkDescriptorSet descriptorSet{VK_NULL_HANDLE};
571     ANGLE_TRY(kernelVk.getProgram()->allocateDescriptorSet(
572         kernelVk.getDescriptorSetLayouts()[DescriptorSetIndex::ShaderResource].get(),
573         &descriptorSet));
574 
575     // Push global offset data
576     const VkPushConstantRange *globalOffsetRange = devProgramData->getGlobalOffsetRange();
577     if (globalOffsetRange != nullptr)
578     {
579         mComputePassCommands->getCommandBuffer().pushConstants(
580             kernelVk.getPipelineLayout().get(), VK_SHADER_STAGE_COMPUTE_BIT,
581             globalOffsetRange->offset, globalOffsetRange->size, ndrange.globalWorkOffset.data());
582     }
583 
584     // Push global size data
585     const VkPushConstantRange *globalSizeRange = devProgramData->getGlobalSizeRange();
586     if (globalSizeRange != nullptr)
587     {
588         mComputePassCommands->getCommandBuffer().pushConstants(
589             kernelVk.getPipelineLayout().get(), VK_SHADER_STAGE_COMPUTE_BIT,
590             globalSizeRange->offset, globalSizeRange->size, ndrange.globalWorkSize.data());
591     }
592 
593     // Process each kernel argument/resource
594     for (const auto &arg : kernelVk.getArgs())
595     {
596         switch (arg.type)
597         {
598             case NonSemanticClspvReflectionArgumentUniform:
599             case NonSemanticClspvReflectionArgumentStorageBuffer:
600             {
601                 cl::Memory *clMem = cl::Buffer::Cast(*static_cast<const cl_mem *>(arg.handle));
602                 CLBufferVk &vkMem = clMem->getImpl<CLBufferVk>();
603 
604                 // Retain this resource until its associated dispatch completes
605                 mMemoryCaptures.emplace_back(clMem);
606 
607                 // Handle possible resource RAW hazard
608                 if (arg.type != NonSemanticClspvReflectionArgumentUniform)
609                 {
610                     if (mDependencyTracker.contains(clMem) ||
611                         mDependencyTracker.size() == kMaxDependencyTrackerSize)
612                     {
613                         needsBarrier = true;
614                         mDependencyTracker.clear();
615                     }
616                     mDependencyTracker.insert(clMem);
617                 }
618 
619                 // Update buffer/descriptor info
620                 VkDescriptorBufferInfo &bufferInfo =
621                     updateDescriptorSetsBuilder.allocDescriptorBufferInfo();
622                 bufferInfo.range  = clMem->getSize();
623                 bufferInfo.offset = clMem->getOffset();
624                 bufferInfo.buffer = vkMem.isSubBuffer()
625                                         ? vkMem.getParent()->getBuffer().getBuffer().getHandle()
626                                         : vkMem.getBuffer().getBuffer().getHandle();
627                 VkWriteDescriptorSet &writeDescriptorSet =
628                     updateDescriptorSetsBuilder.allocWriteDescriptorSet();
629                 writeDescriptorSet.descriptorCount = 1;
630                 writeDescriptorSet.descriptorType =
631                     arg.type == NonSemanticClspvReflectionArgumentUniform
632                         ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
633                         : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
634                 writeDescriptorSet.pBufferInfo = &bufferInfo;
635                 writeDescriptorSet.sType       = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
636                 writeDescriptorSet.dstSet      = descriptorSet;
637                 writeDescriptorSet.dstBinding  = arg.descriptorBinding;
638                 break;
639             }
640             case NonSemanticClspvReflectionArgumentPodPushConstant:
641             {
642                 mComputePassCommands->getCommandBuffer().pushConstants(
643                     kernelVk.getPipelineLayout().get(), VK_SHADER_STAGE_COMPUTE_BIT,
644                     arg.pushConstOffset, arg.pushConstantSize, arg.handle);
645                 break;
646             }
647             case NonSemanticClspvReflectionArgumentSampler:
648             case NonSemanticClspvReflectionArgumentPodUniform:
649             case NonSemanticClspvReflectionArgumentStorageImage:
650             case NonSemanticClspvReflectionArgumentSampledImage:
651             case NonSemanticClspvReflectionArgumentPointerUniform:
652             case NonSemanticClspvReflectionArgumentPodStorageBuffer:
653             case NonSemanticClspvReflectionArgumentUniformTexelBuffer:
654             case NonSemanticClspvReflectionArgumentStorageTexelBuffer:
655             case NonSemanticClspvReflectionArgumentPointerPushConstant:
656             default:
657             {
658                 UNIMPLEMENTED();
659                 break;
660             }
661         }
662     }
663 
664     if (needsBarrier)
665     {
666         VkMemoryBarrier memoryBarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr,
667                                          VK_ACCESS_SHADER_WRITE_BIT,
668                                          VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT};
669         mComputePassCommands->getCommandBuffer().pipelineBarrier(
670             VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1,
671             &memoryBarrier, 0, nullptr, 0, nullptr);
672     }
673 
674     mContext->getPerfCounters().writeDescriptorSets =
675         updateDescriptorSetsBuilder.flushDescriptorSetUpdates(mContext->getRenderer()->getDevice());
676 
677     mComputePassCommands->getCommandBuffer().bindDescriptorSets(
678         kernelVk.getPipelineLayout().get(), VK_PIPELINE_BIND_POINT_COMPUTE,
679         DescriptorSetIndex::Internal, 1, &descriptorSet, 0, nullptr);
680 
681     return angle::Result::Continue;
682 }
683 
flushComputePassCommands()684 angle::Result CLCommandQueueVk::flushComputePassCommands()
685 {
686     mLastFlushedQueueSerial = mComputePassCommands->getQueueSerial();
687 
688     // Here, we flush our compute cmds to RendererVk's primary command buffer
689     ANGLE_TRY(mContext->getRenderer()->flushOutsideRPCommands(
690         mContext, getProtectionType(), egl::ContextPriority::Medium, &mComputePassCommands));
691 
692     mHasAnyCommandsPendingSubmission = true;
693 
694     mContext->getPerfCounters().flushedOutsideRenderPassCommandBuffers++;
695 
696     // Generate new serial for next batch of cmds
697     mComputePassCommands->setQueueSerial(
698         mCurrentQueueSerialIndex,
699         mContext->getRenderer()->generateQueueSerial(mCurrentQueueSerialIndex));
700 
701     return angle::Result::Continue;
702 }
703 
processWaitlist(const cl::EventPtrs & waitEvents)704 angle::Result CLCommandQueueVk::processWaitlist(const cl::EventPtrs &waitEvents)
705 {
706     if (!waitEvents.empty())
707     {
708         bool insertedBarrier = false;
709         for (const cl::EventPtr &event : waitEvents)
710         {
711             if (event->getImpl<CLEventVk>().isUserEvent() ||
712                 event->getCommandQueue() != &mCommandQueue)
713             {
714                 // We cannot use a barrier in these cases, therefore defer the event
715                 // handling till submission time
716                 // TODO: Perhaps we could utilize VkEvents here instead and have GPU wait(s)
717                 // https://anglebug.com/8670
718                 mDependantEvents.push_back(event);
719             }
720             else if (event->getCommandQueue() == &mCommandQueue && !insertedBarrier)
721             {
722                 // As long as there is at least one dependant command in same queue,
723                 // we just need to insert one execution barrier
724                 VkMemoryBarrier memoryBarrier = {
725                     VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr, VK_ACCESS_SHADER_WRITE_BIT,
726                     VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT};
727                 mComputePassCommands->getCommandBuffer().pipelineBarrier(
728                     VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
729                     1, &memoryBarrier, 0, nullptr, 0, nullptr);
730 
731                 insertedBarrier = true;
732             }
733         }
734     }
735     return angle::Result::Continue;
736 }
737 
submitCommands()738 angle::Result CLCommandQueueVk::submitCommands()
739 {
740     ANGLE_TRACE_EVENT0("gpu.angle", "CLCommandQueueVk::submitCommands()");
741 
742     // Kick off renderer submit
743     ANGLE_TRY(mContext->getRenderer()->submitCommands(mContext, getProtectionType(),
744                                                       egl::ContextPriority::Medium, nullptr,
745                                                       nullptr, mLastFlushedQueueSerial));
746 
747     mLastSubmittedQueueSerial = mLastFlushedQueueSerial;
748 
749     // Now that we have submitted commands, some of pending garbage may no longer pending
750     // and should be moved to garbage list.
751     mContext->getRenderer()->cleanupPendingSubmissionGarbage();
752 
753     mHasAnyCommandsPendingSubmission = false;
754 
755     return angle::Result::Continue;
756 }
757 
createEvent(CLEventImpl::CreateFunc * createFunc)758 angle::Result CLCommandQueueVk::createEvent(CLEventImpl::CreateFunc *createFunc)
759 {
760     if (createFunc != nullptr)
761     {
762         *createFunc = [this](const cl::Event &event) {
763             auto eventVk = new (std::nothrow) CLEventVk(event);
764             if (eventVk == nullptr)
765             {
766                 ERR() << "Failed to create event obj!";
767                 ANGLE_CL_SET_ERROR(CL_OUT_OF_HOST_MEMORY);
768                 return CLEventImpl::Ptr(nullptr);
769             }
770             eventVk->setQueueSerial(mComputePassCommands->getQueueSerial());
771 
772             // Save a reference to this event
773             mAssociatedEvents.push_back(cl::EventPtr{&eventVk->getFrontendObject()});
774 
775             return CLEventImpl::Ptr(eventVk);
776         };
777     }
778     return angle::Result::Continue;
779 }
780 
finishInternal()781 angle::Result CLCommandQueueVk::finishInternal()
782 {
783     for (cl::EventPtr event : mAssociatedEvents)
784     {
785         ANGLE_TRY(event->getImpl<CLEventVk>().setStatusAndExecuteCallback(CL_SUBMITTED));
786     }
787 
788     if (!mComputePassCommands->empty())
789     {
790         // If we still have dependant events, handle them now
791         if (!mDependantEvents.empty())
792         {
793             for (const auto &depEvent : mDependantEvents)
794             {
795                 if (depEvent->getImpl<CLEventVk>().isUserEvent())
796                 {
797                     // We just wait here for user to set the event object
798                     cl_int status = CL_QUEUED;
799                     ANGLE_TRY(depEvent->getImpl<CLEventVk>().waitForUserEventStatus());
800                     ANGLE_TRY(depEvent->getImpl<CLEventVk>().getCommandExecutionStatus(status));
801                     if (status < 0)
802                     {
803                         ERR() << "Invalid dependant user-event (" << depEvent.get()
804                               << ") status encountered!";
805                         mComputePassCommands->getCommandBuffer().reset();
806                         ANGLE_CL_RETURN_ERROR(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
807                     }
808                 }
809                 else
810                 {
811                     // Otherwise, we just need to submit/finish for dependant event queues
812                     // here that are not associated with this queue
813                     ANGLE_TRY(depEvent->getCommandQueue()->finish());
814                 }
815             }
816             mDependantEvents.clear();
817         }
818 
819         ANGLE_TRY(flushComputePassCommands());
820     }
821 
822     for (cl::EventPtr event : mAssociatedEvents)
823     {
824         ANGLE_TRY(event->getImpl<CLEventVk>().setStatusAndExecuteCallback(CL_RUNNING));
825     }
826 
827     if (mHasAnyCommandsPendingSubmission)
828     {
829         // Submit and wait for fence
830         ANGLE_TRY(submitCommands());
831         ANGLE_TRY(mContext->getRenderer()->finishQueueSerial(mContext, mLastSubmittedQueueSerial));
832 
833         // Ensure any resources are synced back to host on GPU completion
834         ANGLE_TRY(syncHostBuffers());
835     }
836 
837     for (cl::EventPtr event : mAssociatedEvents)
838     {
839         ANGLE_TRY(event->getImpl<CLEventVk>().setStatusAndExecuteCallback(CL_COMPLETE));
840     }
841 
842     mMemoryCaptures.clear();
843     mAssociatedEvents.clear();
844     mDependencyTracker.clear();
845 
846     return angle::Result::Continue;
847 }
848 
849 }  // namespace rx
850