1 // 2 // Copyright 2021 The ANGLE Project Authors. All rights reserved. 3 // Use of this source code is governed by a BSD-style license that can be 4 // found in the LICENSE file. 5 // 6 // CLCommandQueueVk.h: Defines the class interface for CLCommandQueueVk, 7 // implementing CLCommandQueueImpl. 8 9 #ifndef LIBANGLE_RENDERER_VULKAN_CLCOMMANDQUEUEVK_H_ 10 #define LIBANGLE_RENDERER_VULKAN_CLCOMMANDQUEUEVK_H_ 11 12 #include <condition_variable> 13 #include <vector> 14 15 #include "common/PackedCLEnums_autogen.h" 16 #include "common/hash_containers.h" 17 18 #include "libANGLE/renderer/vulkan/CLContextVk.h" 19 #include "libANGLE/renderer/vulkan/CLEventVk.h" 20 #include "libANGLE/renderer/vulkan/CLKernelVk.h" 21 #include "libANGLE/renderer/vulkan/CLMemoryVk.h" 22 #include "libANGLE/renderer/vulkan/cl_types.h" 23 #include "libANGLE/renderer/vulkan/clspv_utils.h" 24 #include "libANGLE/renderer/vulkan/vk_command_buffer_utils.h" 25 #include "libANGLE/renderer/vulkan/vk_helpers.h" 26 #include "libANGLE/renderer/vulkan/vk_utils.h" 27 28 #include "libANGLE/renderer/CLCommandQueueImpl.h" 29 #include "libANGLE/renderer/serial_utils.h" 30 31 #include "libANGLE/CLKernel.h" 32 #include "libANGLE/CLMemory.h" 33 #include "libANGLE/cl_types.h" 34 35 namespace std 36 { 37 // Hash function for QueueSerial so that it can serve as a key for angle::HashMap 38 template <> 39 struct hash<rx::QueueSerial> 40 { 41 size_t operator()(const rx::QueueSerial &queueSerial) const 42 { 43 size_t hash = 0; 44 angle::HashCombine(hash, queueSerial.getSerial().getValue()); 45 angle::HashCombine(hash, queueSerial.getIndex()); 46 return hash; 47 } 48 }; 49 } // namespace std 50 51 namespace rx 52 { 53 54 static constexpr size_t kPrintfBufferSize = 1024 * 1024; 55 class CLCommandQueueVk; 56 57 namespace 58 { 59 60 struct HostTransferConfig 61 { 62 HostTransferConfig() 63 : srcRect(cl::Offset{}, cl::Extents{}, 0, 0, 0), dstRect(cl::Offset{}, cl::Extents{}, 0, 0) 64 {} 65 cl_command_type type{0}; 66 size_t size = 0; 67 size_t offset = 0; 68 void *dstHostPtr = nullptr; 69 70 // Source host pointer that can contain data/pattern/etc 71 const void *srcHostPtr = nullptr; 72 73 size_t patternSize = 0; 74 size_t rowPitch = 0; 75 size_t slicePitch = 0; 76 size_t elementSize = 0; 77 cl::MemOffsets origin; 78 cl::Coordinate region; 79 cl::BufferRect srcRect; 80 cl::BufferRect dstRect; 81 }; 82 struct HostTransferEntry 83 { 84 HostTransferConfig transferConfig; 85 cl::MemoryPtr transferBufferHandle; 86 }; 87 using HostTransferEntries = std::vector<HostTransferEntry>; 88 89 // DispatchWorkThread setups a background thread to wait on the work submitted to Vulkan renderer. 90 class DispatchWorkThread 91 { 92 public: 93 DispatchWorkThread(CLCommandQueueVk *commandQueue); 94 ~DispatchWorkThread(); 95 96 angle::Result init(); 97 void terminate(); 98 99 angle::Result notify(QueueSerial queueSerial); 100 101 private: 102 static constexpr size_t kFixedQueueLimit = 4u; 103 104 angle::Result finishLoop(); 105 106 CLCommandQueueVk *const mCommandQueue; 107 108 std::mutex mThreadMutex; 109 std::condition_variable mHasWorkSubmitted; 110 std::condition_variable mHasEmptySlot; 111 bool mIsTerminating; 112 std::thread mWorkerThread; 113 114 angle::FixedQueue<QueueSerial> mQueueSerials; 115 // Queue serial index associated with the CLCommandQueueVk 116 SerialIndex mQueueSerialIndex; 117 }; 118 119 struct CommandsState 120 { 121 cl::EventPtrs events; 122 cl::MemoryPtrs memories; 123 cl::KernelPtrs kernels; 124 cl::SamplerPtrs samplers; 125 HostTransferEntries hostTransferList; 126 }; 127 using CommandsStateMap = angle::HashMap<QueueSerial, CommandsState>; 128 129 } // namespace 130 131 class CLCommandQueueVk : public CLCommandQueueImpl 132 { 133 public: 134 CLCommandQueueVk(const cl::CommandQueue &commandQueue); 135 ~CLCommandQueueVk() override; 136 137 angle::Result init(); 138 139 angle::Result setProperty(cl::CommandQueueProperties properties, cl_bool enable) override; 140 141 angle::Result enqueueReadBuffer(const cl::Buffer &buffer, 142 bool blocking, 143 size_t offset, 144 size_t size, 145 void *ptr, 146 const cl::EventPtrs &waitEvents, 147 CLEventImpl::CreateFunc *eventCreateFunc) override; 148 149 angle::Result enqueueWriteBuffer(const cl::Buffer &buffer, 150 bool blocking, 151 size_t offset, 152 size_t size, 153 const void *ptr, 154 const cl::EventPtrs &waitEvents, 155 CLEventImpl::CreateFunc *eventCreateFunc) override; 156 157 angle::Result enqueueReadBufferRect(const cl::Buffer &buffer, 158 bool blocking, 159 const cl::MemOffsets &bufferOrigin, 160 const cl::MemOffsets &hostOrigin, 161 const cl::Coordinate ®ion, 162 size_t bufferRowPitch, 163 size_t bufferSlicePitch, 164 size_t hostRowPitch, 165 size_t hostSlicePitch, 166 void *ptr, 167 const cl::EventPtrs &waitEvents, 168 CLEventImpl::CreateFunc *eventCreateFunc) override; 169 170 angle::Result enqueueWriteBufferRect(const cl::Buffer &buffer, 171 bool blocking, 172 const cl::MemOffsets &bufferOrigin, 173 const cl::MemOffsets &hostOrigin, 174 const cl::Coordinate ®ion, 175 size_t bufferRowPitch, 176 size_t bufferSlicePitch, 177 size_t hostRowPitch, 178 size_t hostSlicePitch, 179 const void *ptr, 180 const cl::EventPtrs &waitEvents, 181 CLEventImpl::CreateFunc *eventCreateFunc) override; 182 183 angle::Result enqueueCopyBuffer(const cl::Buffer &srcBuffer, 184 const cl::Buffer &dstBuffer, 185 size_t srcOffset, 186 size_t dstOffset, 187 size_t size, 188 const cl::EventPtrs &waitEvents, 189 CLEventImpl::CreateFunc *eventCreateFunc) override; 190 191 angle::Result enqueueCopyBufferRect(const cl::Buffer &srcBuffer, 192 const cl::Buffer &dstBuffer, 193 const cl::MemOffsets &srcOrigin, 194 const cl::MemOffsets &dstOrigin, 195 const cl::Coordinate ®ion, 196 size_t srcRowPitch, 197 size_t srcSlicePitch, 198 size_t dstRowPitch, 199 size_t dstSlicePitch, 200 const cl::EventPtrs &waitEvents, 201 CLEventImpl::CreateFunc *eventCreateFunc) override; 202 203 angle::Result enqueueFillBuffer(const cl::Buffer &buffer, 204 const void *pattern, 205 size_t patternSize, 206 size_t offset, 207 size_t size, 208 const cl::EventPtrs &waitEvents, 209 CLEventImpl::CreateFunc *eventCreateFunc) override; 210 211 angle::Result enqueueMapBuffer(const cl::Buffer &buffer, 212 bool blocking, 213 cl::MapFlags mapFlags, 214 size_t offset, 215 size_t size, 216 const cl::EventPtrs &waitEvents, 217 CLEventImpl::CreateFunc *eventCreateFunc, 218 void *&mapPtr) override; 219 220 angle::Result enqueueReadImage(const cl::Image &image, 221 bool blocking, 222 const cl::MemOffsets &origin, 223 const cl::Coordinate ®ion, 224 size_t rowPitch, 225 size_t slicePitch, 226 void *ptr, 227 const cl::EventPtrs &waitEvents, 228 CLEventImpl::CreateFunc *eventCreateFunc) override; 229 230 angle::Result enqueueWriteImage(const cl::Image &image, 231 bool blocking, 232 const cl::MemOffsets &origin, 233 const cl::Coordinate ®ion, 234 size_t inputRowPitch, 235 size_t inputSlicePitch, 236 const void *ptr, 237 const cl::EventPtrs &waitEvents, 238 CLEventImpl::CreateFunc *eventCreateFunc) override; 239 240 angle::Result enqueueCopyImage(const cl::Image &srcImage, 241 const cl::Image &dstImage, 242 const cl::MemOffsets &srcOrigin, 243 const cl::MemOffsets &dstOrigin, 244 const cl::Coordinate ®ion, 245 const cl::EventPtrs &waitEvents, 246 CLEventImpl::CreateFunc *eventCreateFunc) override; 247 248 angle::Result enqueueFillImage(const cl::Image &image, 249 const void *fillColor, 250 const cl::MemOffsets &origin, 251 const cl::Coordinate ®ion, 252 const cl::EventPtrs &waitEvents, 253 CLEventImpl::CreateFunc *eventCreateFunc) override; 254 255 angle::Result enqueueCopyImageToBuffer(const cl::Image &srcImage, 256 const cl::Buffer &dstBuffer, 257 const cl::MemOffsets &srcOrigin, 258 const cl::Coordinate ®ion, 259 size_t dstOffset, 260 const cl::EventPtrs &waitEvents, 261 CLEventImpl::CreateFunc *eventCreateFunc) override; 262 263 angle::Result enqueueCopyBufferToImage(const cl::Buffer &srcBuffer, 264 const cl::Image &dstImage, 265 size_t srcOffset, 266 const cl::MemOffsets &dstOrigin, 267 const cl::Coordinate ®ion, 268 const cl::EventPtrs &waitEvents, 269 CLEventImpl::CreateFunc *eventCreateFunc) override; 270 271 angle::Result enqueueMapImage(const cl::Image &image, 272 bool blocking, 273 cl::MapFlags mapFlags, 274 const cl::MemOffsets &origin, 275 const cl::Coordinate ®ion, 276 size_t *imageRowPitch, 277 size_t *imageSlicePitch, 278 const cl::EventPtrs &waitEvents, 279 CLEventImpl::CreateFunc *eventCreateFunc, 280 void *&mapPtr) override; 281 282 angle::Result enqueueUnmapMemObject(const cl::Memory &memory, 283 void *mappedPtr, 284 const cl::EventPtrs &waitEvents, 285 CLEventImpl::CreateFunc *eventCreateFunc) override; 286 287 angle::Result enqueueMigrateMemObjects(const cl::MemoryPtrs &memObjects, 288 cl::MemMigrationFlags flags, 289 const cl::EventPtrs &waitEvents, 290 CLEventImpl::CreateFunc *eventCreateFunc) override; 291 292 angle::Result enqueueNDRangeKernel(const cl::Kernel &kernel, 293 const cl::NDRange &ndrange, 294 const cl::EventPtrs &waitEvents, 295 CLEventImpl::CreateFunc *eventCreateFunc) override; 296 297 angle::Result enqueueTask(const cl::Kernel &kernel, 298 const cl::EventPtrs &waitEvents, 299 CLEventImpl::CreateFunc *eventCreateFunc) override; 300 301 angle::Result enqueueNativeKernel(cl::UserFunc userFunc, 302 void *args, 303 size_t cbArgs, 304 const cl::BufferPtrs &buffers, 305 const std::vector<size_t> &bufferPtrOffsets, 306 const cl::EventPtrs &waitEvents, 307 CLEventImpl::CreateFunc *eventCreateFunc) override; 308 309 angle::Result enqueueMarkerWithWaitList(const cl::EventPtrs &waitEvents, 310 CLEventImpl::CreateFunc *eventCreateFunc) override; 311 312 angle::Result enqueueMarker(CLEventImpl::CreateFunc &eventCreateFunc) override; 313 314 angle::Result enqueueWaitForEvents(const cl::EventPtrs &events) override; 315 316 angle::Result enqueueBarrierWithWaitList(const cl::EventPtrs &waitEvents, 317 CLEventImpl::CreateFunc *eventCreateFunc) override; 318 319 angle::Result enqueueBarrier() override; 320 321 angle::Result flush() override; 322 323 angle::Result finish() override; 324 325 CLPlatformVk *getPlatform() { return mContext->getPlatform(); } 326 CLContextVk *getContext() { return mContext; } 327 328 cl::MemoryPtr getOrCreatePrintfBuffer(); 329 330 angle::Result finishQueueSerial(const QueueSerial queueSerial); 331 332 SerialIndex getQueueSerialIndex() const { return mQueueSerialIndex; } 333 334 bool hasCommandsPendingSubmission() const 335 { 336 return mLastFlushedQueueSerial != mLastSubmittedQueueSerial; 337 } 338 339 void addEventReference(CLEventVk &eventVk); 340 341 private: 342 static constexpr size_t kMaxDependencyTrackerSize = 64; 343 static constexpr size_t kMaxHostBufferUpdateListSize = 16; 344 345 angle::Result resetCommandBufferWithError(cl_int errorCode); 346 347 vk::ProtectionType getProtectionType() const { return vk::ProtectionType::Unprotected; } 348 349 // Create-update-bind the kernel's descriptor set, put push-constants in cmd buffer, capture 350 // kernel resources, and handle kernel execution dependencies 351 angle::Result processKernelResources(CLKernelVk &kernelVk); 352 // Updates global push constants for a given CL kernel 353 angle::Result processGlobalPushConstants(CLKernelVk &kernelVk, const cl::NDRange &ndrange); 354 355 angle::Result submitCommands(); 356 angle::Result finishInternal(); 357 angle::Result flushInternal(); 358 // Wait for the submitted work to the renderer to finish and perform post-processing such as 359 // event status updates etc. This is a blocking call. 360 angle::Result finishQueueSerialInternal(const QueueSerial queueSerial); 361 362 angle::Result syncHostBuffers(HostTransferEntries &hostTransferList); 363 angle::Result flushComputePassCommands(); 364 angle::Result processWaitlist(const cl::EventPtrs &waitEvents); 365 angle::Result createEvent(CLEventImpl::CreateFunc *createFunc, 366 cl::ExecutionStatus initialStatus); 367 368 angle::Result onResourceAccess(const vk::CommandBufferAccess &access); 369 angle::Result getCommandBuffer(const vk::CommandBufferAccess &access, 370 vk::OutsideRenderPassCommandBuffer **commandBufferOut) 371 { 372 ANGLE_TRY(onResourceAccess(access)); 373 *commandBufferOut = &mComputePassCommands->getCommandBuffer(); 374 return angle::Result::Continue; 375 } 376 377 angle::Result processPrintfBuffer(); 378 angle::Result copyImageToFromBuffer(CLImageVk &imageVk, 379 vk::BufferHelper &buffer, 380 const cl::MemOffsets &origin, 381 const cl::Coordinate ®ion, 382 size_t bufferOffset, 383 ImageBufferCopyDirection writeToBuffer); 384 385 bool hasUserEventDependency() const; 386 387 angle::Result insertBarrier(); 388 angle::Result addMemoryDependencies(cl::Memory *clMem); 389 390 angle::Result submitEmptyCommand(); 391 392 CLContextVk *mContext; 393 const CLDeviceVk *mDevice; 394 cl::Memory *mPrintfBuffer; 395 396 vk::SecondaryCommandPools mCommandPool; 397 vk::OutsideRenderPassCommandBufferHelper *mComputePassCommands; 398 vk::SecondaryCommandMemoryAllocator mOutsideRenderPassCommandsAllocator; 399 400 // Queue Serials for this command queue 401 SerialIndex mQueueSerialIndex; 402 QueueSerial mLastSubmittedQueueSerial; 403 QueueSerial mLastFlushedQueueSerial; 404 405 std::mutex mCommandQueueMutex; 406 407 // External dependent events that this queue has to wait on 408 cl::EventPtrs mExternalEvents; 409 410 // Keep track of kernel resources on prior kernel enqueues 411 angle::HashSet<cl::Object *> mDependencyTracker; 412 413 CommandsStateMap mCommandsStateMap; 414 415 // printf handling 416 bool mNeedPrintfHandling; 417 const angle::HashMap<uint32_t, ClspvPrintfInfo> *mPrintfInfos; 418 419 // Host buffer transferring routines 420 angle::Result addToHostTransferList(CLBufferVk *srcBuffer, HostTransferConfig transferEntry); 421 angle::Result addToHostTransferList(CLImageVk *srcImage, HostTransferConfig transferEntry); 422 423 DispatchWorkThread mFinishHandler; 424 }; 425 426 } // namespace rx 427 428 #endif // LIBANGLE_RENDERER_VULKAN_CLCOMMANDQUEUEVK_H_ 429