1 // Copyright 2017 The Dawn Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "dawn_native/vulkan/BufferVk.h" 16 17 #include "dawn_native/CommandBuffer.h" 18 #include "dawn_native/vulkan/DeviceVk.h" 19 #include "dawn_native/vulkan/FencedDeleter.h" 20 #include "dawn_native/vulkan/ResourceHeapVk.h" 21 #include "dawn_native/vulkan/ResourceMemoryAllocatorVk.h" 22 #include "dawn_native/vulkan/UtilsVulkan.h" 23 #include "dawn_native/vulkan/VulkanError.h" 24 25 #include <cstring> 26 27 namespace dawn_native { namespace vulkan { 28 29 namespace { 30 VulkanBufferUsage(wgpu::BufferUsage usage)31 VkBufferUsageFlags VulkanBufferUsage(wgpu::BufferUsage usage) { 32 VkBufferUsageFlags flags = 0; 33 34 if (usage & wgpu::BufferUsage::CopySrc) { 35 flags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; 36 } 37 if (usage & wgpu::BufferUsage::CopyDst) { 38 flags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; 39 } 40 if (usage & wgpu::BufferUsage::Index) { 41 flags |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; 42 } 43 if (usage & wgpu::BufferUsage::Vertex) { 44 flags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; 45 } 46 if (usage & wgpu::BufferUsage::Uniform) { 47 flags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; 48 } 49 if (usage & 50 (wgpu::BufferUsage::Storage | kInternalStorageBuffer | kReadOnlyStorageBuffer)) { 51 flags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; 52 } 53 if (usage & wgpu::BufferUsage::Indirect) { 54 flags |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; 55 } 56 if (usage & wgpu::BufferUsage::QueryResolve) { 57 flags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; 58 } 59 60 return flags; 61 } 62 VulkanPipelineStage(wgpu::BufferUsage usage)63 VkPipelineStageFlags VulkanPipelineStage(wgpu::BufferUsage usage) { 64 VkPipelineStageFlags flags = 0; 65 66 if (usage & kMappableBufferUsages) { 67 flags |= VK_PIPELINE_STAGE_HOST_BIT; 68 } 69 if (usage & (wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst)) { 70 flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; 71 } 72 if (usage & (wgpu::BufferUsage::Index | wgpu::BufferUsage::Vertex)) { 73 flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; 74 } 75 if (usage & (wgpu::BufferUsage::Uniform | wgpu::BufferUsage::Storage | 76 kInternalStorageBuffer | kReadOnlyStorageBuffer)) { 77 flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | 78 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 79 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; 80 } 81 if (usage & wgpu::BufferUsage::Indirect) { 82 flags |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; 83 } 84 if (usage & wgpu::BufferUsage::QueryResolve) { 85 flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; 86 } 87 88 return flags; 89 } 90 VulkanAccessFlags(wgpu::BufferUsage usage)91 VkAccessFlags VulkanAccessFlags(wgpu::BufferUsage usage) { 92 VkAccessFlags flags = 0; 93 94 if (usage & wgpu::BufferUsage::MapRead) { 95 flags |= VK_ACCESS_HOST_READ_BIT; 96 } 97 if (usage & wgpu::BufferUsage::MapWrite) { 98 flags |= VK_ACCESS_HOST_WRITE_BIT; 99 } 100 if (usage & wgpu::BufferUsage::CopySrc) { 101 flags |= VK_ACCESS_TRANSFER_READ_BIT; 102 } 103 if (usage & wgpu::BufferUsage::CopyDst) { 104 flags |= VK_ACCESS_TRANSFER_WRITE_BIT; 105 } 106 if (usage & wgpu::BufferUsage::Index) { 107 flags |= VK_ACCESS_INDEX_READ_BIT; 108 } 109 if (usage & wgpu::BufferUsage::Vertex) { 110 flags |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; 111 } 112 if (usage & wgpu::BufferUsage::Uniform) { 113 flags |= VK_ACCESS_UNIFORM_READ_BIT; 114 } 115 if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer)) { 116 flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; 117 } 118 if (usage & kReadOnlyStorageBuffer) { 119 flags |= VK_ACCESS_SHADER_READ_BIT; 120 } 121 if (usage & wgpu::BufferUsage::Indirect) { 122 flags |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT; 123 } 124 if (usage & wgpu::BufferUsage::QueryResolve) { 125 flags |= VK_ACCESS_TRANSFER_WRITE_BIT; 126 } 127 128 return flags; 129 } 130 131 } // namespace 132 133 // static Create(Device * device,const BufferDescriptor * descriptor)134 ResultOrError<Ref<Buffer>> Buffer::Create(Device* device, const BufferDescriptor* descriptor) { 135 Ref<Buffer> buffer = AcquireRef(new Buffer(device, descriptor)); 136 DAWN_TRY(buffer->Initialize(descriptor->mappedAtCreation)); 137 return std::move(buffer); 138 } 139 Initialize(bool mappedAtCreation)140 MaybeError Buffer::Initialize(bool mappedAtCreation) { 141 // vkCmdFillBuffer requires the size to be a multiple of 4. 142 constexpr size_t kAlignment = 4u; 143 144 uint32_t extraBytes = 0u; 145 if (GetUsage() & (wgpu::BufferUsage::Vertex | wgpu::BufferUsage::Index)) { 146 // vkCmdSetIndexBuffer and vkCmdSetVertexBuffer are invalid if the offset 147 // is equal to the whole buffer size. Allocate at least one more byte so it 148 // is valid to setVertex/IndexBuffer with a zero-sized range at the end 149 // of the buffer with (offset=buffer.size, size=0). 150 extraBytes = 1u; 151 } 152 153 uint64_t size = GetSize(); 154 if (size > std::numeric_limits<uint64_t>::max() - extraBytes) { 155 return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); 156 } 157 158 size += extraBytes; 159 160 // Allocate at least 4 bytes so clamped accesses are always in bounds. 161 // Also, Vulkan requires the size to be non-zero. 162 size = std::max(size, uint64_t(4u)); 163 164 if (size > std::numeric_limits<uint64_t>::max() - kAlignment) { 165 // Alignment would overlow. 166 return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); 167 } 168 mAllocatedSize = Align(size, kAlignment); 169 170 // Avoid passing ludicrously large sizes to drivers because it causes issues: drivers add 171 // some constants to the size passed and align it, but for values close to the maximum 172 // VkDeviceSize this can cause overflows and makes drivers crash or return bad sizes in the 173 // VkmemoryRequirements. See https://gitlab.khronos.org/vulkan/vulkan/issues/1904 174 // Any size with one of two top bits of VkDeviceSize set is a HUGE allocation and we can 175 // safely return an OOM error. 176 if (mAllocatedSize & (uint64_t(3) << uint64_t(62))) { 177 return DAWN_OUT_OF_MEMORY_ERROR("Buffer size is HUGE and could cause overflows"); 178 } 179 180 VkBufferCreateInfo createInfo; 181 createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 182 createInfo.pNext = nullptr; 183 createInfo.flags = 0; 184 createInfo.size = mAllocatedSize; 185 // Add CopyDst for non-mappable buffer initialization with mappedAtCreation 186 // and robust resource initialization. 187 createInfo.usage = VulkanBufferUsage(GetUsage() | wgpu::BufferUsage::CopyDst); 188 createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 189 createInfo.queueFamilyIndexCount = 0; 190 createInfo.pQueueFamilyIndices = 0; 191 192 Device* device = ToBackend(GetDevice()); 193 DAWN_TRY(CheckVkOOMThenSuccess( 194 device->fn.CreateBuffer(device->GetVkDevice(), &createInfo, nullptr, &*mHandle), 195 "vkCreateBuffer")); 196 197 // Gather requirements for the buffer's memory and allocate it. 198 VkMemoryRequirements requirements; 199 device->fn.GetBufferMemoryRequirements(device->GetVkDevice(), mHandle, &requirements); 200 201 MemoryKind requestKind = MemoryKind::Linear; 202 if (GetUsage() & kMappableBufferUsages) { 203 requestKind = MemoryKind::LinearMappable; 204 } 205 DAWN_TRY_ASSIGN(mMemoryAllocation, 206 device->GetResourceMemoryAllocator()->Allocate(requirements, requestKind)); 207 208 // Finally associate it with the buffer. 209 DAWN_TRY(CheckVkSuccess( 210 device->fn.BindBufferMemory(device->GetVkDevice(), mHandle, 211 ToBackend(mMemoryAllocation.GetResourceHeap())->GetMemory(), 212 mMemoryAllocation.GetOffset()), 213 "vkBindBufferMemory")); 214 215 // The buffers with mappedAtCreation == true will be initialized in 216 // BufferBase::MapAtCreation(). 217 if (device->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting) && 218 !mappedAtCreation) { 219 ClearBuffer(device->GetPendingRecordingContext(), 0x01010101); 220 } 221 222 // Initialize the padding bytes to zero. 223 if (device->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse) && !mappedAtCreation) { 224 uint32_t paddingBytes = GetAllocatedSize() - GetSize(); 225 if (paddingBytes > 0) { 226 uint32_t clearSize = Align(paddingBytes, 4); 227 uint64_t clearOffset = GetAllocatedSize() - clearSize; 228 229 CommandRecordingContext* recordingContext = device->GetPendingRecordingContext(); 230 ClearBuffer(recordingContext, 0, clearOffset, clearSize); 231 } 232 } 233 234 SetLabelImpl(); 235 236 return {}; 237 } 238 239 Buffer::~Buffer() = default; 240 GetHandle() const241 VkBuffer Buffer::GetHandle() const { 242 return mHandle; 243 } 244 TransitionUsageNow(CommandRecordingContext * recordingContext,wgpu::BufferUsage usage)245 void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext, 246 wgpu::BufferUsage usage) { 247 VkBufferMemoryBarrier barrier; 248 VkPipelineStageFlags srcStages = 0; 249 VkPipelineStageFlags dstStages = 0; 250 251 if (TransitionUsageAndGetResourceBarrier(usage, &barrier, &srcStages, &dstStages)) { 252 ASSERT(srcStages != 0 && dstStages != 0); 253 ToBackend(GetDevice()) 254 ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0, 255 nullptr, 1u, &barrier, 0, nullptr); 256 } 257 } 258 TransitionUsageAndGetResourceBarrier(wgpu::BufferUsage usage,VkBufferMemoryBarrier * barrier,VkPipelineStageFlags * srcStages,VkPipelineStageFlags * dstStages)259 bool Buffer::TransitionUsageAndGetResourceBarrier(wgpu::BufferUsage usage, 260 VkBufferMemoryBarrier* barrier, 261 VkPipelineStageFlags* srcStages, 262 VkPipelineStageFlags* dstStages) { 263 bool lastIncludesTarget = IsSubset(usage, mLastUsage); 264 bool lastReadOnly = IsSubset(mLastUsage, kReadOnlyBufferUsages); 265 266 // We can skip transitions to already current read-only usages. 267 if (lastIncludesTarget && lastReadOnly) { 268 return false; 269 } 270 271 // Special-case for the initial transition: Vulkan doesn't allow access flags to be 0. 272 if (mLastUsage == wgpu::BufferUsage::None) { 273 mLastUsage = usage; 274 return false; 275 } 276 277 *srcStages |= VulkanPipelineStage(mLastUsage); 278 *dstStages |= VulkanPipelineStage(usage); 279 280 barrier->sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 281 barrier->pNext = nullptr; 282 barrier->srcAccessMask = VulkanAccessFlags(mLastUsage); 283 barrier->dstAccessMask = VulkanAccessFlags(usage); 284 barrier->srcQueueFamilyIndex = 0; 285 barrier->dstQueueFamilyIndex = 0; 286 barrier->buffer = mHandle; 287 barrier->offset = 0; 288 // VK_WHOLE_SIZE doesn't work on old Windows Intel Vulkan drivers, so we don't use it. 289 barrier->size = GetAllocatedSize(); 290 291 mLastUsage = usage; 292 293 return true; 294 } 295 IsCPUWritableAtCreation() const296 bool Buffer::IsCPUWritableAtCreation() const { 297 // TODO(enga): Handle CPU-visible memory on UMA 298 return mMemoryAllocation.GetMappedPointer() != nullptr; 299 } 300 MapAtCreationImpl()301 MaybeError Buffer::MapAtCreationImpl() { 302 return {}; 303 } 304 MapAsyncImpl(wgpu::MapMode mode,size_t offset,size_t size)305 MaybeError Buffer::MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) { 306 Device* device = ToBackend(GetDevice()); 307 308 CommandRecordingContext* recordingContext = device->GetPendingRecordingContext(); 309 310 // TODO(crbug.com/dawn/852): initialize mapped buffer in CPU side. 311 EnsureDataInitialized(recordingContext); 312 313 if (mode & wgpu::MapMode::Read) { 314 TransitionUsageNow(recordingContext, wgpu::BufferUsage::MapRead); 315 } else { 316 ASSERT(mode & wgpu::MapMode::Write); 317 TransitionUsageNow(recordingContext, wgpu::BufferUsage::MapWrite); 318 } 319 return {}; 320 } 321 UnmapImpl()322 void Buffer::UnmapImpl() { 323 // No need to do anything, we keep CPU-visible memory mapped at all time. 324 } 325 GetMappedPointerImpl()326 void* Buffer::GetMappedPointerImpl() { 327 uint8_t* memory = mMemoryAllocation.GetMappedPointer(); 328 ASSERT(memory != nullptr); 329 return memory; 330 } 331 DestroyImpl()332 void Buffer::DestroyImpl() { 333 BufferBase::DestroyImpl(); 334 335 ToBackend(GetDevice())->GetResourceMemoryAllocator()->Deallocate(&mMemoryAllocation); 336 337 if (mHandle != VK_NULL_HANDLE) { 338 ToBackend(GetDevice())->GetFencedDeleter()->DeleteWhenUnused(mHandle); 339 mHandle = VK_NULL_HANDLE; 340 } 341 } 342 EnsureDataInitialized(CommandRecordingContext * recordingContext)343 bool Buffer::EnsureDataInitialized(CommandRecordingContext* recordingContext) { 344 if (!NeedsInitialization()) { 345 return false; 346 } 347 348 InitializeToZero(recordingContext); 349 return true; 350 } 351 EnsureDataInitializedAsDestination(CommandRecordingContext * recordingContext,uint64_t offset,uint64_t size)352 bool Buffer::EnsureDataInitializedAsDestination(CommandRecordingContext* recordingContext, 353 uint64_t offset, 354 uint64_t size) { 355 if (!NeedsInitialization()) { 356 return false; 357 } 358 359 if (IsFullBufferRange(offset, size)) { 360 SetIsDataInitialized(); 361 return false; 362 } 363 364 InitializeToZero(recordingContext); 365 return true; 366 } 367 EnsureDataInitializedAsDestination(CommandRecordingContext * recordingContext,const CopyTextureToBufferCmd * copy)368 bool Buffer::EnsureDataInitializedAsDestination(CommandRecordingContext* recordingContext, 369 const CopyTextureToBufferCmd* copy) { 370 if (!NeedsInitialization()) { 371 return false; 372 } 373 374 if (IsFullBufferOverwrittenInTextureToBufferCopy(copy)) { 375 SetIsDataInitialized(); 376 return false; 377 } 378 379 InitializeToZero(recordingContext); 380 return true; 381 } 382 SetLabelImpl()383 void Buffer::SetLabelImpl() { 384 SetDebugName(ToBackend(GetDevice()), VK_OBJECT_TYPE_BUFFER, 385 reinterpret_cast<uint64_t&>(mHandle), "Dawn_Buffer", GetLabel()); 386 } 387 InitializeToZero(CommandRecordingContext * recordingContext)388 void Buffer::InitializeToZero(CommandRecordingContext* recordingContext) { 389 ASSERT(NeedsInitialization()); 390 391 ClearBuffer(recordingContext, 0u); 392 GetDevice()->IncrementLazyClearCountForTesting(); 393 SetIsDataInitialized(); 394 } 395 ClearBuffer(CommandRecordingContext * recordingContext,uint32_t clearValue,uint64_t offset,uint64_t size)396 void Buffer::ClearBuffer(CommandRecordingContext* recordingContext, 397 uint32_t clearValue, 398 uint64_t offset, 399 uint64_t size) { 400 ASSERT(recordingContext != nullptr); 401 size = size > 0 ? size : GetAllocatedSize(); 402 ASSERT(size > 0); 403 404 TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst); 405 406 Device* device = ToBackend(GetDevice()); 407 // VK_WHOLE_SIZE doesn't work on old Windows Intel Vulkan drivers, so we don't use it. 408 // Note: Allocated size must be a multiple of 4. 409 ASSERT(size % 4 == 0); 410 device->fn.CmdFillBuffer(recordingContext->commandBuffer, mHandle, offset, size, 411 clearValue); 412 } 413 }} // namespace dawn_native::vulkan 414