1 // Copyright 2017 The Dawn Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "dawn_native/d3d12/DeviceD3D12.h" 16 17 #include "common/GPUInfo.h" 18 #include "dawn_native/DynamicUploader.h" 19 #include "dawn_native/Instance.h" 20 #include "dawn_native/d3d12/AdapterD3D12.h" 21 #include "dawn_native/d3d12/BackendD3D12.h" 22 #include "dawn_native/d3d12/BindGroupD3D12.h" 23 #include "dawn_native/d3d12/BindGroupLayoutD3D12.h" 24 #include "dawn_native/d3d12/CommandAllocatorManager.h" 25 #include "dawn_native/d3d12/CommandBufferD3D12.h" 26 #include "dawn_native/d3d12/ComputePipelineD3D12.h" 27 #include "dawn_native/d3d12/D3D11on12Util.h" 28 #include "dawn_native/d3d12/D3D12Error.h" 29 #include "dawn_native/d3d12/PipelineLayoutD3D12.h" 30 #include "dawn_native/d3d12/PlatformFunctions.h" 31 #include "dawn_native/d3d12/QuerySetD3D12.h" 32 #include "dawn_native/d3d12/QueueD3D12.h" 33 #include "dawn_native/d3d12/RenderPipelineD3D12.h" 34 #include "dawn_native/d3d12/ResidencyManagerD3D12.h" 35 #include "dawn_native/d3d12/ResourceAllocatorManagerD3D12.h" 36 #include "dawn_native/d3d12/SamplerD3D12.h" 37 #include "dawn_native/d3d12/SamplerHeapCacheD3D12.h" 38 #include "dawn_native/d3d12/ShaderModuleD3D12.h" 39 #include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h" 40 #include "dawn_native/d3d12/StagingBufferD3D12.h" 41 #include "dawn_native/d3d12/StagingDescriptorAllocatorD3D12.h" 42 #include "dawn_native/d3d12/SwapChainD3D12.h" 43 #include "dawn_native/d3d12/UtilsD3D12.h" 44 45 #include <sstream> 46 47 namespace dawn_native { namespace d3d12 { 48 49 // TODO(dawn:155): Figure out these values. 50 static constexpr uint16_t kShaderVisibleDescriptorHeapSize = 1024; 51 static constexpr uint8_t kAttachmentDescriptorHeapSize = 64; 52 53 // Value may change in the future to better accomodate large clears. 54 static constexpr uint64_t kZeroBufferSize = 1024 * 1024 * 4; // 4 Mb 55 56 static constexpr uint64_t kMaxDebugMessagesToPrint = 5; 57 58 // static Create(Adapter * adapter,const DawnDeviceDescriptor * descriptor)59 ResultOrError<Device*> Device::Create(Adapter* adapter, 60 const DawnDeviceDescriptor* descriptor) { 61 Ref<Device> device = AcquireRef(new Device(adapter, descriptor)); 62 DAWN_TRY(device->Initialize()); 63 return device.Detach(); 64 } 65 Initialize()66 MaybeError Device::Initialize() { 67 InitTogglesFromDriver(); 68 69 mD3d12Device = ToBackend(GetAdapter())->GetDevice(); 70 71 ASSERT(mD3d12Device != nullptr); 72 73 // Create device-global objects 74 D3D12_COMMAND_QUEUE_DESC queueDesc = {}; 75 queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; 76 queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; 77 DAWN_TRY( 78 CheckHRESULT(mD3d12Device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&mCommandQueue)), 79 "D3D12 create command queue")); 80 81 if (IsFeatureEnabled(Feature::TimestampQuery)) { 82 // Get GPU timestamp counter frequency (in ticks/second). This fails if the specified 83 // command queue doesn't support timestamps. D3D12_COMMAND_LIST_TYPE_DIRECT queues 84 // always support timestamps except where there are bugs in Windows container and vGPU 85 // implementations. 86 uint64_t frequency; 87 DAWN_TRY(CheckHRESULT(mCommandQueue->GetTimestampFrequency(&frequency), 88 "D3D12 get timestamp frequency")); 89 // Calculate the period in nanoseconds by the frequency. 90 mTimestampPeriod = static_cast<float>(1e9) / frequency; 91 } 92 93 // If PIX is not attached, the QueryInterface fails. Hence, no need to check the return 94 // value. 95 mCommandQueue.As(&mD3d12SharingContract); 96 97 DAWN_TRY( 98 CheckHRESULT(mD3d12Device->CreateFence(uint64_t(GetLastSubmittedCommandSerial()), 99 D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&mFence)), 100 "D3D12 create fence")); 101 102 mFenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); 103 ASSERT(mFenceEvent != nullptr); 104 105 // Initialize backend services 106 mCommandAllocatorManager = std::make_unique<CommandAllocatorManager>(this); 107 108 // Zero sized allocator is never requested and does not need to exist. 109 for (uint32_t countIndex = 0; countIndex < kNumViewDescriptorAllocators; countIndex++) { 110 mViewAllocators[countIndex + 1] = std::make_unique<StagingDescriptorAllocator>( 111 this, 1u << countIndex, kShaderVisibleDescriptorHeapSize, 112 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); 113 } 114 115 for (uint32_t countIndex = 0; countIndex < kNumSamplerDescriptorAllocators; countIndex++) { 116 mSamplerAllocators[countIndex + 1] = std::make_unique<StagingDescriptorAllocator>( 117 this, 1u << countIndex, kShaderVisibleDescriptorHeapSize, 118 D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); 119 } 120 121 mRenderTargetViewAllocator = std::make_unique<StagingDescriptorAllocator>( 122 this, 1, kAttachmentDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_RTV); 123 124 mDepthStencilViewAllocator = std::make_unique<StagingDescriptorAllocator>( 125 this, 1, kAttachmentDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); 126 127 mSamplerHeapCache = std::make_unique<SamplerHeapCache>(this); 128 129 mResidencyManager = std::make_unique<ResidencyManager>(this); 130 mResourceAllocatorManager = std::make_unique<ResourceAllocatorManager>(this); 131 132 // ShaderVisibleDescriptorAllocators use the ResidencyManager and must be initialized after. 133 DAWN_TRY_ASSIGN( 134 mSamplerShaderVisibleDescriptorAllocator, 135 ShaderVisibleDescriptorAllocator::Create(this, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); 136 137 DAWN_TRY_ASSIGN( 138 mViewShaderVisibleDescriptorAllocator, 139 ShaderVisibleDescriptorAllocator::Create(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); 140 141 // Initialize indirect commands 142 D3D12_INDIRECT_ARGUMENT_DESC argumentDesc = {}; 143 argumentDesc.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH; 144 145 D3D12_COMMAND_SIGNATURE_DESC programDesc = {}; 146 programDesc.ByteStride = 3 * sizeof(uint32_t); 147 programDesc.NumArgumentDescs = 1; 148 programDesc.pArgumentDescs = &argumentDesc; 149 150 GetD3D12Device()->CreateCommandSignature(&programDesc, NULL, 151 IID_PPV_ARGS(&mDispatchIndirectSignature)); 152 153 argumentDesc.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW; 154 programDesc.ByteStride = 4 * sizeof(uint32_t); 155 156 GetD3D12Device()->CreateCommandSignature(&programDesc, NULL, 157 IID_PPV_ARGS(&mDrawIndirectSignature)); 158 159 argumentDesc.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED; 160 programDesc.ByteStride = 5 * sizeof(uint32_t); 161 162 GetD3D12Device()->CreateCommandSignature(&programDesc, NULL, 163 IID_PPV_ARGS(&mDrawIndexedIndirectSignature)); 164 165 DAWN_TRY(DeviceBase::Initialize(new Queue(this))); 166 // Device shouldn't be used until after DeviceBase::Initialize so we must wait until after 167 // device initialization to call NextSerial 168 DAWN_TRY(NextSerial()); 169 170 // The environment can only use DXC when it's available. Override the decision if it is not 171 // applicable. 172 DAWN_TRY(ApplyUseDxcToggle()); 173 174 DAWN_TRY(CreateZeroBuffer()); 175 176 return {}; 177 } 178 ~Device()179 Device::~Device() { 180 Destroy(); 181 } 182 GetD3D12Device() const183 ID3D12Device* Device::GetD3D12Device() const { 184 return mD3d12Device.Get(); 185 } 186 GetCommandQueue() const187 ComPtr<ID3D12CommandQueue> Device::GetCommandQueue() const { 188 return mCommandQueue; 189 } 190 GetSharingContract() const191 ID3D12SharingContract* Device::GetSharingContract() const { 192 return mD3d12SharingContract.Get(); 193 } 194 GetDispatchIndirectSignature() const195 ComPtr<ID3D12CommandSignature> Device::GetDispatchIndirectSignature() const { 196 return mDispatchIndirectSignature; 197 } 198 GetDrawIndirectSignature() const199 ComPtr<ID3D12CommandSignature> Device::GetDrawIndirectSignature() const { 200 return mDrawIndirectSignature; 201 } 202 GetDrawIndexedIndirectSignature() const203 ComPtr<ID3D12CommandSignature> Device::GetDrawIndexedIndirectSignature() const { 204 return mDrawIndexedIndirectSignature; 205 } 206 GetFactory() const207 ComPtr<IDXGIFactory4> Device::GetFactory() const { 208 return ToBackend(GetAdapter())->GetBackend()->GetFactory(); 209 } 210 ApplyUseDxcToggle()211 MaybeError Device::ApplyUseDxcToggle() { 212 if (!ToBackend(GetAdapter())->GetBackend()->GetFunctions()->IsDXCAvailable()) { 213 ForceSetToggle(Toggle::UseDXC, false); 214 } else if (IsFeatureEnabled(Feature::ShaderFloat16)) { 215 // Currently we can only use DXC to compile HLSL shaders using float16. 216 ForceSetToggle(Toggle::UseDXC, true); 217 } 218 219 if (IsToggleEnabled(Toggle::UseDXC)) { 220 DAWN_TRY(ToBackend(GetAdapter())->GetBackend()->EnsureDxcCompiler()); 221 DAWN_TRY(ToBackend(GetAdapter())->GetBackend()->EnsureDxcLibrary()); 222 DAWN_TRY(ToBackend(GetAdapter())->GetBackend()->EnsureDxcValidator()); 223 } 224 225 return {}; 226 } 227 GetDxcLibrary() const228 ComPtr<IDxcLibrary> Device::GetDxcLibrary() const { 229 return ToBackend(GetAdapter())->GetBackend()->GetDxcLibrary(); 230 } 231 GetDxcCompiler() const232 ComPtr<IDxcCompiler> Device::GetDxcCompiler() const { 233 return ToBackend(GetAdapter())->GetBackend()->GetDxcCompiler(); 234 } 235 GetDxcValidator() const236 ComPtr<IDxcValidator> Device::GetDxcValidator() const { 237 return ToBackend(GetAdapter())->GetBackend()->GetDxcValidator(); 238 } 239 GetFunctions() const240 const PlatformFunctions* Device::GetFunctions() const { 241 return ToBackend(GetAdapter())->GetBackend()->GetFunctions(); 242 } 243 GetCommandAllocatorManager() const244 CommandAllocatorManager* Device::GetCommandAllocatorManager() const { 245 return mCommandAllocatorManager.get(); 246 } 247 GetResidencyManager() const248 ResidencyManager* Device::GetResidencyManager() const { 249 return mResidencyManager.get(); 250 } 251 GetPendingCommandContext()252 ResultOrError<CommandRecordingContext*> Device::GetPendingCommandContext() { 253 // Callers of GetPendingCommandList do so to record commands. Only reserve a command 254 // allocator when it is needed so we don't submit empty command lists 255 if (!mPendingCommands.IsOpen()) { 256 DAWN_TRY(mPendingCommands.Open(mD3d12Device.Get(), mCommandAllocatorManager.get())); 257 } 258 return &mPendingCommands; 259 } 260 CreateZeroBuffer()261 MaybeError Device::CreateZeroBuffer() { 262 BufferDescriptor zeroBufferDescriptor; 263 zeroBufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; 264 zeroBufferDescriptor.size = kZeroBufferSize; 265 zeroBufferDescriptor.label = "ZeroBuffer_Internal"; 266 DAWN_TRY_ASSIGN(mZeroBuffer, Buffer::Create(this, &zeroBufferDescriptor)); 267 268 return {}; 269 } 270 ClearBufferToZero(CommandRecordingContext * commandContext,BufferBase * destination,uint64_t offset,uint64_t size)271 MaybeError Device::ClearBufferToZero(CommandRecordingContext* commandContext, 272 BufferBase* destination, 273 uint64_t offset, 274 uint64_t size) { 275 // TODO(crbug.com/dawn/852): It would be ideal to clear the buffer in CreateZeroBuffer, but 276 // the allocation of the staging buffer causes various end2end tests that monitor heap usage 277 // to fail if it's done during device creation. Perhaps ClearUnorderedAccessView*() can be 278 // used to avoid that. 279 if (!mZeroBuffer->IsDataInitialized()) { 280 DynamicUploader* uploader = GetDynamicUploader(); 281 UploadHandle uploadHandle; 282 DAWN_TRY_ASSIGN(uploadHandle, 283 uploader->Allocate(kZeroBufferSize, GetPendingCommandSerial(), 284 kCopyBufferToBufferOffsetAlignment)); 285 286 memset(uploadHandle.mappedBuffer, 0u, kZeroBufferSize); 287 288 CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer, 289 uploadHandle.startOffset, mZeroBuffer.Get(), 0, 290 kZeroBufferSize); 291 292 mZeroBuffer->SetIsDataInitialized(); 293 } 294 295 Buffer* dstBuffer = ToBackend(destination); 296 297 // Necessary to ensure residency of the zero buffer. 298 mZeroBuffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopySrc); 299 dstBuffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopyDst); 300 301 while (size > 0) { 302 uint64_t copySize = std::min(kZeroBufferSize, size); 303 commandContext->GetCommandList()->CopyBufferRegion( 304 dstBuffer->GetD3D12Resource(), offset, mZeroBuffer->GetD3D12Resource(), 0, 305 copySize); 306 307 offset += copySize; 308 size -= copySize; 309 } 310 311 return {}; 312 } 313 TickImpl()314 MaybeError Device::TickImpl() { 315 // Perform cleanup operations to free unused objects 316 ExecutionSerial completedSerial = GetCompletedCommandSerial(); 317 318 mResourceAllocatorManager->Tick(completedSerial); 319 DAWN_TRY(mCommandAllocatorManager->Tick(completedSerial)); 320 mViewShaderVisibleDescriptorAllocator->Tick(completedSerial); 321 mSamplerShaderVisibleDescriptorAllocator->Tick(completedSerial); 322 mRenderTargetViewAllocator->Tick(completedSerial); 323 mDepthStencilViewAllocator->Tick(completedSerial); 324 mUsedComObjectRefs.ClearUpTo(completedSerial); 325 326 if (mPendingCommands.IsOpen()) { 327 DAWN_TRY(ExecutePendingCommandContext()); 328 DAWN_TRY(NextSerial()); 329 } 330 331 DAWN_TRY(CheckDebugLayerAndGenerateErrors()); 332 333 return {}; 334 } 335 NextSerial()336 MaybeError Device::NextSerial() { 337 IncrementLastSubmittedCommandSerial(); 338 339 return CheckHRESULT( 340 mCommandQueue->Signal(mFence.Get(), uint64_t(GetLastSubmittedCommandSerial())), 341 "D3D12 command queue signal fence"); 342 } 343 WaitForSerial(ExecutionSerial serial)344 MaybeError Device::WaitForSerial(ExecutionSerial serial) { 345 DAWN_TRY(CheckPassedSerials()); 346 if (GetCompletedCommandSerial() < serial) { 347 DAWN_TRY(CheckHRESULT(mFence->SetEventOnCompletion(uint64_t(serial), mFenceEvent), 348 "D3D12 set event on completion")); 349 WaitForSingleObject(mFenceEvent, INFINITE); 350 DAWN_TRY(CheckPassedSerials()); 351 } 352 return {}; 353 } 354 CheckAndUpdateCompletedSerials()355 ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() { 356 ExecutionSerial completedSerial = ExecutionSerial(mFence->GetCompletedValue()); 357 if (DAWN_UNLIKELY(completedSerial == ExecutionSerial(UINT64_MAX))) { 358 // GetCompletedValue returns UINT64_MAX if the device was removed. 359 // Try to query the failure reason. 360 DAWN_TRY(CheckHRESULT(mD3d12Device->GetDeviceRemovedReason(), 361 "ID3D12Device::GetDeviceRemovedReason")); 362 // Otherwise, return a generic device lost error. 363 return DAWN_DEVICE_LOST_ERROR("Device lost"); 364 } 365 366 if (completedSerial <= GetCompletedCommandSerial()) { 367 return ExecutionSerial(0); 368 } 369 370 return completedSerial; 371 } 372 ReferenceUntilUnused(ComPtr<IUnknown> object)373 void Device::ReferenceUntilUnused(ComPtr<IUnknown> object) { 374 mUsedComObjectRefs.Enqueue(object, GetPendingCommandSerial()); 375 } 376 ExecutePendingCommandContext()377 MaybeError Device::ExecutePendingCommandContext() { 378 return mPendingCommands.ExecuteCommandList(this); 379 } 380 CreateBindGroupImpl(const BindGroupDescriptor * descriptor)381 ResultOrError<Ref<BindGroupBase>> Device::CreateBindGroupImpl( 382 const BindGroupDescriptor* descriptor) { 383 return BindGroup::Create(this, descriptor); 384 } CreateBindGroupLayoutImpl(const BindGroupLayoutDescriptor * descriptor,PipelineCompatibilityToken pipelineCompatibilityToken)385 ResultOrError<Ref<BindGroupLayoutBase>> Device::CreateBindGroupLayoutImpl( 386 const BindGroupLayoutDescriptor* descriptor, 387 PipelineCompatibilityToken pipelineCompatibilityToken) { 388 return BindGroupLayout::Create(this, descriptor, pipelineCompatibilityToken); 389 } CreateBufferImpl(const BufferDescriptor * descriptor)390 ResultOrError<Ref<BufferBase>> Device::CreateBufferImpl(const BufferDescriptor* descriptor) { 391 return Buffer::Create(this, descriptor); 392 } CreateCommandBuffer(CommandEncoder * encoder,const CommandBufferDescriptor * descriptor)393 ResultOrError<Ref<CommandBufferBase>> Device::CreateCommandBuffer( 394 CommandEncoder* encoder, 395 const CommandBufferDescriptor* descriptor) { 396 return CommandBuffer::Create(encoder, descriptor); 397 } CreateUninitializedComputePipelineImpl(const ComputePipelineDescriptor * descriptor)398 Ref<ComputePipelineBase> Device::CreateUninitializedComputePipelineImpl( 399 const ComputePipelineDescriptor* descriptor) { 400 return ComputePipeline::CreateUninitialized(this, descriptor); 401 } CreatePipelineLayoutImpl(const PipelineLayoutDescriptor * descriptor)402 ResultOrError<Ref<PipelineLayoutBase>> Device::CreatePipelineLayoutImpl( 403 const PipelineLayoutDescriptor* descriptor) { 404 return PipelineLayout::Create(this, descriptor); 405 } CreateQuerySetImpl(const QuerySetDescriptor * descriptor)406 ResultOrError<Ref<QuerySetBase>> Device::CreateQuerySetImpl( 407 const QuerySetDescriptor* descriptor) { 408 return QuerySet::Create(this, descriptor); 409 } CreateUninitializedRenderPipelineImpl(const RenderPipelineDescriptor * descriptor)410 Ref<RenderPipelineBase> Device::CreateUninitializedRenderPipelineImpl( 411 const RenderPipelineDescriptor* descriptor) { 412 return RenderPipeline::CreateUninitialized(this, descriptor); 413 } CreateSamplerImpl(const SamplerDescriptor * descriptor)414 ResultOrError<Ref<SamplerBase>> Device::CreateSamplerImpl(const SamplerDescriptor* descriptor) { 415 return Sampler::Create(this, descriptor); 416 } CreateShaderModuleImpl(const ShaderModuleDescriptor * descriptor,ShaderModuleParseResult * parseResult)417 ResultOrError<Ref<ShaderModuleBase>> Device::CreateShaderModuleImpl( 418 const ShaderModuleDescriptor* descriptor, 419 ShaderModuleParseResult* parseResult) { 420 return ShaderModule::Create(this, descriptor, parseResult); 421 } CreateSwapChainImpl(const SwapChainDescriptor * descriptor)422 ResultOrError<Ref<SwapChainBase>> Device::CreateSwapChainImpl( 423 const SwapChainDescriptor* descriptor) { 424 return OldSwapChain::Create(this, descriptor); 425 } CreateSwapChainImpl(Surface * surface,NewSwapChainBase * previousSwapChain,const SwapChainDescriptor * descriptor)426 ResultOrError<Ref<NewSwapChainBase>> Device::CreateSwapChainImpl( 427 Surface* surface, 428 NewSwapChainBase* previousSwapChain, 429 const SwapChainDescriptor* descriptor) { 430 return SwapChain::Create(this, surface, previousSwapChain, descriptor); 431 } CreateTextureImpl(const TextureDescriptor * descriptor)432 ResultOrError<Ref<TextureBase>> Device::CreateTextureImpl(const TextureDescriptor* descriptor) { 433 return Texture::Create(this, descriptor); 434 } CreateTextureViewImpl(TextureBase * texture,const TextureViewDescriptor * descriptor)435 ResultOrError<Ref<TextureViewBase>> Device::CreateTextureViewImpl( 436 TextureBase* texture, 437 const TextureViewDescriptor* descriptor) { 438 return TextureView::Create(texture, descriptor); 439 } InitializeComputePipelineAsyncImpl(Ref<ComputePipelineBase> computePipeline,WGPUCreateComputePipelineAsyncCallback callback,void * userdata)440 void Device::InitializeComputePipelineAsyncImpl(Ref<ComputePipelineBase> computePipeline, 441 WGPUCreateComputePipelineAsyncCallback callback, 442 void* userdata) { 443 ComputePipeline::InitializeAsync(std::move(computePipeline), callback, userdata); 444 } InitializeRenderPipelineAsyncImpl(Ref<RenderPipelineBase> renderPipeline,WGPUCreateRenderPipelineAsyncCallback callback,void * userdata)445 void Device::InitializeRenderPipelineAsyncImpl(Ref<RenderPipelineBase> renderPipeline, 446 WGPUCreateRenderPipelineAsyncCallback callback, 447 void* userdata) { 448 RenderPipeline::InitializeAsync(std::move(renderPipeline), callback, userdata); 449 } 450 CreateStagingBuffer(size_t size)451 ResultOrError<std::unique_ptr<StagingBufferBase>> Device::CreateStagingBuffer(size_t size) { 452 std::unique_ptr<StagingBufferBase> stagingBuffer = 453 std::make_unique<StagingBuffer>(size, this); 454 DAWN_TRY(stagingBuffer->Initialize()); 455 return std::move(stagingBuffer); 456 } 457 CopyFromStagingToBuffer(StagingBufferBase * source,uint64_t sourceOffset,BufferBase * destination,uint64_t destinationOffset,uint64_t size)458 MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source, 459 uint64_t sourceOffset, 460 BufferBase* destination, 461 uint64_t destinationOffset, 462 uint64_t size) { 463 CommandRecordingContext* commandRecordingContext; 464 DAWN_TRY_ASSIGN(commandRecordingContext, GetPendingCommandContext()); 465 466 Buffer* dstBuffer = ToBackend(destination); 467 468 bool cleared; 469 DAWN_TRY_ASSIGN(cleared, dstBuffer->EnsureDataInitializedAsDestination( 470 commandRecordingContext, destinationOffset, size)); 471 DAWN_UNUSED(cleared); 472 473 CopyFromStagingToBufferImpl(commandRecordingContext, source, sourceOffset, destination, 474 destinationOffset, size); 475 476 return {}; 477 } 478 CopyFromStagingToBufferImpl(CommandRecordingContext * commandContext,StagingBufferBase * source,uint64_t sourceOffset,BufferBase * destination,uint64_t destinationOffset,uint64_t size)479 void Device::CopyFromStagingToBufferImpl(CommandRecordingContext* commandContext, 480 StagingBufferBase* source, 481 uint64_t sourceOffset, 482 BufferBase* destination, 483 uint64_t destinationOffset, 484 uint64_t size) { 485 ASSERT(commandContext != nullptr); 486 Buffer* dstBuffer = ToBackend(destination); 487 StagingBuffer* srcBuffer = ToBackend(source); 488 dstBuffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopyDst); 489 490 commandContext->GetCommandList()->CopyBufferRegion( 491 dstBuffer->GetD3D12Resource(), destinationOffset, srcBuffer->GetResource(), 492 sourceOffset, size); 493 } 494 CopyFromStagingToTexture(const StagingBufferBase * source,const TextureDataLayout & src,TextureCopy * dst,const Extent3D & copySizePixels)495 MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source, 496 const TextureDataLayout& src, 497 TextureCopy* dst, 498 const Extent3D& copySizePixels) { 499 CommandRecordingContext* commandContext; 500 DAWN_TRY_ASSIGN(commandContext, GetPendingCommandContext()); 501 Texture* texture = ToBackend(dst->texture.Get()); 502 ASSERT(texture->GetDimension() != wgpu::TextureDimension::e1D); 503 504 SubresourceRange range = GetSubresourcesAffectedByCopy(*dst, copySizePixels); 505 506 if (IsCompleteSubresourceCopiedTo(texture, copySizePixels, dst->mipLevel)) { 507 texture->SetIsSubresourceContentInitialized(true, range); 508 } else { 509 texture->EnsureSubresourceContentInitialized(commandContext, range); 510 } 511 512 texture->TrackUsageAndTransitionNow(commandContext, wgpu::TextureUsage::CopyDst, range); 513 514 RecordCopyBufferToTexture(commandContext, *dst, ToBackend(source)->GetResource(), 515 src.offset, src.bytesPerRow, src.rowsPerImage, copySizePixels, 516 texture, range.aspects); 517 518 return {}; 519 } 520 DeallocateMemory(ResourceHeapAllocation & allocation)521 void Device::DeallocateMemory(ResourceHeapAllocation& allocation) { 522 mResourceAllocatorManager->DeallocateMemory(allocation); 523 } 524 AllocateMemory(D3D12_HEAP_TYPE heapType,const D3D12_RESOURCE_DESC & resourceDescriptor,D3D12_RESOURCE_STATES initialUsage)525 ResultOrError<ResourceHeapAllocation> Device::AllocateMemory( 526 D3D12_HEAP_TYPE heapType, 527 const D3D12_RESOURCE_DESC& resourceDescriptor, 528 D3D12_RESOURCE_STATES initialUsage) { 529 return mResourceAllocatorManager->AllocateMemory(heapType, resourceDescriptor, 530 initialUsage); 531 } 532 CreateExternalTexture(const TextureDescriptor * descriptor,ComPtr<ID3D12Resource> d3d12Texture,Ref<D3D11on12ResourceCacheEntry> d3d11on12Resource,ExternalMutexSerial acquireMutexKey,ExternalMutexSerial releaseMutexKey,bool isSwapChainTexture,bool isInitialized)533 Ref<TextureBase> Device::CreateExternalTexture( 534 const TextureDescriptor* descriptor, 535 ComPtr<ID3D12Resource> d3d12Texture, 536 Ref<D3D11on12ResourceCacheEntry> d3d11on12Resource, 537 ExternalMutexSerial acquireMutexKey, 538 ExternalMutexSerial releaseMutexKey, 539 bool isSwapChainTexture, 540 bool isInitialized) { 541 Ref<Texture> dawnTexture; 542 if (ConsumedError( 543 Texture::CreateExternalImage(this, descriptor, std::move(d3d12Texture), 544 std::move(d3d11on12Resource), acquireMutexKey, 545 releaseMutexKey, isSwapChainTexture, isInitialized), 546 &dawnTexture)) { 547 return nullptr; 548 } 549 return {dawnTexture}; 550 } 551 GetOrCreateD3D11on12Device()552 ComPtr<ID3D11On12Device> Device::GetOrCreateD3D11on12Device() { 553 if (mD3d11On12Device == nullptr) { 554 ComPtr<ID3D11Device> d3d11Device; 555 D3D_FEATURE_LEVEL d3dFeatureLevel; 556 IUnknown* const iUnknownQueue = mCommandQueue.Get(); 557 if (FAILED(GetFunctions()->d3d11on12CreateDevice(mD3d12Device.Get(), 0, nullptr, 0, 558 &iUnknownQueue, 1, 1, &d3d11Device, 559 nullptr, &d3dFeatureLevel))) { 560 return nullptr; 561 } 562 563 ComPtr<ID3D11On12Device> d3d11on12Device; 564 HRESULT hr = d3d11Device.As(&d3d11on12Device); 565 ASSERT(SUCCEEDED(hr)); 566 567 mD3d11On12Device = std::move(d3d11on12Device); 568 } 569 return mD3d11On12Device; 570 } 571 GetDeviceInfo() const572 const D3D12DeviceInfo& Device::GetDeviceInfo() const { 573 return ToBackend(GetAdapter())->GetDeviceInfo(); 574 } 575 InitTogglesFromDriver()576 void Device::InitTogglesFromDriver() { 577 const bool useResourceHeapTier2 = (GetDeviceInfo().resourceHeapTier >= 2); 578 SetToggle(Toggle::UseD3D12ResourceHeapTier2, useResourceHeapTier2); 579 SetToggle(Toggle::UseD3D12RenderPass, GetDeviceInfo().supportsRenderPass); 580 SetToggle(Toggle::UseD3D12ResidencyManagement, true); 581 SetToggle(Toggle::UseDXC, false); 582 583 // Disable optimizations when using FXC 584 // See https://crbug.com/dawn/1203 585 SetToggle(Toggle::FxcOptimizations, false); 586 587 // By default use the maximum shader-visible heap size allowed. 588 SetToggle(Toggle::UseD3D12SmallShaderVisibleHeapForTesting, false); 589 590 PCIInfo pciInfo = GetAdapter()->GetPCIInfo(); 591 592 // Currently this workaround is only needed on Intel Gen9 and Gen9.5 GPUs. 593 // See http://crbug.com/1161355 for more information. 594 if (gpu_info::IsIntel(pciInfo.vendorId) && 595 (gpu_info::IsSkylake(pciInfo.deviceId) || gpu_info::IsKabylake(pciInfo.deviceId) || 596 gpu_info::IsCoffeelake(pciInfo.deviceId))) { 597 constexpr gpu_info::D3DDriverVersion kFirstDriverVersionWithFix = {30, 0, 100, 9864}; 598 if (gpu_info::CompareD3DDriverVersion(pciInfo.vendorId, 599 ToBackend(GetAdapter())->GetDriverVersion(), 600 kFirstDriverVersionWithFix) < 0) { 601 SetToggle( 602 Toggle::UseTempBufferInSmallFormatTextureToTextureCopyFromGreaterToLessMipLevel, 603 true); 604 } 605 } 606 } 607 WaitForIdleForDestruction()608 MaybeError Device::WaitForIdleForDestruction() { 609 // Immediately forget about all pending commands 610 mPendingCommands.Release(); 611 612 DAWN_TRY(NextSerial()); 613 // Wait for all in-flight commands to finish executing 614 DAWN_TRY(WaitForSerial(GetLastSubmittedCommandSerial())); 615 616 return {}; 617 } 618 CheckDebugLayerAndGenerateErrors()619 MaybeError Device::CheckDebugLayerAndGenerateErrors() { 620 if (!GetAdapter()->GetInstance()->IsBackendValidationEnabled()) { 621 return {}; 622 } 623 624 ComPtr<ID3D12InfoQueue> infoQueue; 625 DAWN_TRY(CheckHRESULT(mD3d12Device.As(&infoQueue), 626 "D3D12 QueryInterface ID3D12Device to ID3D12InfoQueue")); 627 uint64_t totalErrors = infoQueue->GetNumStoredMessagesAllowedByRetrievalFilter(); 628 629 // Check if any errors have occurred otherwise we would be creating an empty error. Note 630 // that we use GetNumStoredMessagesAllowedByRetrievalFilter instead of GetNumStoredMessages 631 // because we only convert WARNINGS or higher messages to dawn errors. 632 if (totalErrors == 0) { 633 return {}; 634 } 635 636 std::ostringstream messages; 637 uint64_t errorsToPrint = std::min(kMaxDebugMessagesToPrint, totalErrors); 638 for (uint64_t i = 0; i < errorsToPrint; ++i) { 639 SIZE_T messageLength = 0; 640 HRESULT hr = infoQueue->GetMessage(i, nullptr, &messageLength); 641 if (FAILED(hr)) { 642 messages << " ID3D12InfoQueue::GetMessage failed with " << hr << '\n'; 643 continue; 644 } 645 646 std::unique_ptr<uint8_t[]> messageData(new uint8_t[messageLength]); 647 D3D12_MESSAGE* message = reinterpret_cast<D3D12_MESSAGE*>(messageData.get()); 648 hr = infoQueue->GetMessage(i, message, &messageLength); 649 if (FAILED(hr)) { 650 messages << " ID3D12InfoQueue::GetMessage failed with " << hr << '\n'; 651 continue; 652 } 653 654 messages << message->pDescription << " (" << message->ID << ")\n"; 655 } 656 if (errorsToPrint < totalErrors) { 657 messages << (totalErrors - errorsToPrint) << " messages silenced\n"; 658 } 659 // We only print up to the first kMaxDebugMessagesToPrint errors 660 infoQueue->ClearStoredMessages(); 661 662 return DAWN_INTERNAL_ERROR(messages.str()); 663 } 664 DestroyImpl()665 void Device::DestroyImpl() { 666 ASSERT(GetState() == State::Disconnected); 667 668 // Immediately forget about all pending commands for the case where device is lost on its 669 // own and WaitForIdleForDestruction isn't called. 670 mPendingCommands.Release(); 671 672 if (mFenceEvent != nullptr) { 673 ::CloseHandle(mFenceEvent); 674 } 675 676 // Release recycled resource heaps. 677 if (mResourceAllocatorManager != nullptr) { 678 mResourceAllocatorManager->DestroyPool(); 679 } 680 681 // We need to handle clearing up com object refs that were enqeued after TickImpl 682 mUsedComObjectRefs.ClearUpTo(std::numeric_limits<ExecutionSerial>::max()); 683 684 ASSERT(mUsedComObjectRefs.Empty()); 685 ASSERT(!mPendingCommands.IsOpen()); 686 } 687 GetViewShaderVisibleDescriptorAllocator() const688 ShaderVisibleDescriptorAllocator* Device::GetViewShaderVisibleDescriptorAllocator() const { 689 return mViewShaderVisibleDescriptorAllocator.get(); 690 } 691 GetSamplerShaderVisibleDescriptorAllocator() const692 ShaderVisibleDescriptorAllocator* Device::GetSamplerShaderVisibleDescriptorAllocator() const { 693 return mSamplerShaderVisibleDescriptorAllocator.get(); 694 } 695 GetViewStagingDescriptorAllocator(uint32_t descriptorCount) const696 StagingDescriptorAllocator* Device::GetViewStagingDescriptorAllocator( 697 uint32_t descriptorCount) const { 698 ASSERT(descriptorCount <= kMaxViewDescriptorsPerBindGroup); 699 // This is Log2 of the next power of two, plus 1. 700 uint32_t allocatorIndex = descriptorCount == 0 ? 0 : Log2Ceil(descriptorCount) + 1; 701 return mViewAllocators[allocatorIndex].get(); 702 } 703 GetSamplerStagingDescriptorAllocator(uint32_t descriptorCount) const704 StagingDescriptorAllocator* Device::GetSamplerStagingDescriptorAllocator( 705 uint32_t descriptorCount) const { 706 ASSERT(descriptorCount <= kMaxSamplerDescriptorsPerBindGroup); 707 // This is Log2 of the next power of two, plus 1. 708 uint32_t allocatorIndex = descriptorCount == 0 ? 0 : Log2Ceil(descriptorCount) + 1; 709 return mSamplerAllocators[allocatorIndex].get(); 710 } 711 GetRenderTargetViewAllocator() const712 StagingDescriptorAllocator* Device::GetRenderTargetViewAllocator() const { 713 return mRenderTargetViewAllocator.get(); 714 } 715 GetDepthStencilViewAllocator() const716 StagingDescriptorAllocator* Device::GetDepthStencilViewAllocator() const { 717 return mDepthStencilViewAllocator.get(); 718 } 719 GetSamplerHeapCache()720 SamplerHeapCache* Device::GetSamplerHeapCache() { 721 return mSamplerHeapCache.get(); 722 } 723 GetOptimalBytesPerRowAlignment() const724 uint32_t Device::GetOptimalBytesPerRowAlignment() const { 725 return D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; 726 } 727 728 // TODO(dawn:512): Once we optimize DynamicUploader allocation with offsets we 729 // should make this return D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT = 512. 730 // Current implementations would try to allocate additional 511 bytes, 731 // so we return 1 and let ComputeTextureCopySplits take care of the alignment. GetOptimalBufferToTextureCopyOffsetAlignment() const732 uint64_t Device::GetOptimalBufferToTextureCopyOffsetAlignment() const { 733 return 1; 734 } 735 GetTimestampPeriodInNS() const736 float Device::GetTimestampPeriodInNS() const { 737 return mTimestampPeriod; 738 } 739 ShouldDuplicateNumWorkgroupsForDispatchIndirect(ComputePipelineBase * computePipeline) const740 bool Device::ShouldDuplicateNumWorkgroupsForDispatchIndirect( 741 ComputePipelineBase* computePipeline) const { 742 return ToBackend(computePipeline)->UsesNumWorkgroups(); 743 } 744 745 }} // namespace dawn_native::d3d12 746