1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "device_vk.h"
17
18 #include <cinttypes>
19 #include <cstdint>
20 #include <vulkan/vulkan_core.h>
21
22 #include <base/containers/vector.h>
23 #include <base/math/mathf.h>
24 #include <render/intf_render_context.h>
25 #include <render/namespace.h>
26
27 #include "device/device.h"
28 #include "device/gpu_program_util.h"
29 #include "device/gpu_resource_manager.h"
30 #include "device/shader_manager.h"
31 #include "device/shader_module.h"
32 #include "perf/cpu_perf_scope.h"
33 #include "platform_vk.h"
34 #include "util/log.h"
35 #include "vulkan/create_functions_vk.h"
36 #include "vulkan/gpu_buffer_vk.h"
37 #include "vulkan/gpu_image_vk.h"
38 #include "vulkan/gpu_memory_allocator_vk.h"
39 #include "vulkan/gpu_program_vk.h"
40 #include "vulkan/gpu_sampler_vk.h"
41 #include "vulkan/gpu_semaphore_vk.h"
42 #include "vulkan/node_context_descriptor_set_manager_vk.h"
43 #include "vulkan/node_context_pool_manager_vk.h"
44 #include "vulkan/pipeline_state_object_vk.h"
45 #include "vulkan/render_backend_vk.h"
46 #include "vulkan/render_frame_sync_vk.h"
47 #include "vulkan/shader_module_vk.h"
48 #include "vulkan/swapchain_vk.h"
49 #include "vulkan/validate_vk.h"
50
51 using namespace BASE_NS;
52
53 RENDER_BEGIN_NAMESPACE()
54 namespace {
55 constexpr string_view DEVICE_EXTENSION_SWAPCHAIN { VK_KHR_SWAPCHAIN_EXTENSION_NAME };
56
57 // promoted to 1.2, requires VK_KHR_create_renderpass2
58 constexpr string_view DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE { VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME };
59 constexpr string_view DEVICE_EXTENSION_CREATE_RENDERPASS2 { VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME };
60
61 constexpr string_view DEVICE_EXTENSION_EXTERNAL_MEMORY { VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME };
62 constexpr string_view DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2 { VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME };
63 constexpr string_view DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME };
64 constexpr string_view DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN { VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME };
65 constexpr string_view DEVICE_EXTENSION_MULTIVIEW { VK_KHR_MULTIVIEW_EXTENSION_NAME };
66 constexpr string_view DEVICE_EXTENSION_MAINTENANCE4 = VK_KHR_MAINTENANCE_4_EXTENSION_NAME;
67 constexpr string_view DEVICE_EXTENSION_DESCRIPTOR_INDEXING = VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME;
68
69 struct ChainWrapper {
70 void** ppNextFeatures { nullptr };
71 void** ppNextProperties { nullptr };
72 };
73
74 struct PhysicalDeviceYcbcrStructsVk {
75 VkPhysicalDeviceSamplerYcbcrConversionFeatures ycbcrConversionFeatures {};
76 };
77
78 #if (RENDER_VULKAN_FSR_ENABLED == 1)
79 struct PhysicalDeviceFragmentShadingRateStructsVk {
80 VkPhysicalDeviceFragmentShadingRateFeaturesKHR physicalDeviceFragmentShadingRateFeatures;
81 VkPhysicalDeviceFragmentShadingRatePropertiesKHR physicalDeviceFragmentShadingRateProperties;
82 };
83 #endif
84
85 #if (RENDER_VULKAN_RT_ENABLED == 1)
86 struct PhysicalDeviceRayTracingStructsVk {
87 VkPhysicalDeviceBufferDeviceAddressFeatures physicalDeviceBufferDeviceAddressFeatures;
88 VkPhysicalDeviceRayTracingPipelineFeaturesKHR physicalDeviceRayTracingPipelineFeatures;
89 VkPhysicalDeviceAccelerationStructureFeaturesKHR physicalDeviceAccelerationStructureFeatures;
90 VkPhysicalDeviceRayQueryFeaturesKHR physicalDeviceRayQueryFeatures;
91 };
92 #endif
93
94 struct PhysicalDeviceMultiviewStructsVk {
95 VkPhysicalDeviceMultiviewFeaturesKHR physicalDeviceMultiviewFeatures;
96 VkPhysicalDeviceMultiviewPropertiesKHR physicalDeviceMultiviewProperties;
97 };
98
99 struct PhysicalDeviceDesciptorIndexingStructsVk {
100 VkPhysicalDeviceDescriptorIndexingFeatures physicalDeviceDescriptorIndexingFeatures;
101 VkPhysicalDeviceDescriptorIndexingProperties physicalDeviceDescriptorIndexingProperties;
102 };
103
104 struct PhysicalDeviceMaintenance4Vk {
105 VkPhysicalDeviceMaintenance4Features maintenance4Features {};
106 };
107
108 struct ChainObjects {
109 unique_ptr<PhysicalDeviceYcbcrStructsVk> ycbcr;
110 #if (RENDER_VULKAN_RT_ENABLED == 1)
111 unique_ptr<PhysicalDeviceRayTracingStructsVk> rt;
112 #endif
113 #if (RENDER_VULKAN_FSR_ENABLED == 1)
114 unique_ptr<PhysicalDeviceFragmentShadingRateStructsVk> fsr;
115 #endif
116 unique_ptr<PhysicalDeviceMultiviewStructsVk> mv;
117 unique_ptr<PhysicalDeviceDesciptorIndexingStructsVk> di;
118 unique_ptr<PhysicalDeviceMaintenance4Vk> maintenance4;
119 };
120
121 // fragment shading rate
122 #if (RENDER_VULKAN_FSR_ENABLED == 1)
123 // VK_KHR_fragment_shading_rate, requires VK_KHR_create_renderpass2, requires VK_KHR_get_physical_device_properties2
124 static constexpr string_view DEVICE_EXTENSION_FRAGMENT_SHADING_RATE { VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME };
125
GetPhysicalDeviceFragmentShadingRateStructs(ChainObjects & co,ChainWrapper & cw)126 void GetPhysicalDeviceFragmentShadingRateStructs(ChainObjects& co, ChainWrapper& cw)
127 {
128 co.fsr = make_unique<PhysicalDeviceFragmentShadingRateStructsVk>();
129 auto& fsr = co.fsr;
130 fsr->physicalDeviceFragmentShadingRateFeatures = {
131 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR, // sType
132 nullptr, // pNext
133 VK_FALSE, // pipelineFragmentShadingRate
134 VK_FALSE, // primitiveFragmentShadingRate
135 VK_FALSE, // attachmentFragmentShadingRate
136 };
137 *cw.ppNextFeatures = &fsr->physicalDeviceFragmentShadingRateFeatures;
138 cw.ppNextFeatures = &fsr->physicalDeviceFragmentShadingRateFeatures.pNext;
139
140 fsr->physicalDeviceFragmentShadingRateProperties = {
141 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, // sType
142 nullptr, // pNext
143 };
144 *cw.ppNextProperties = &fsr->physicalDeviceFragmentShadingRateProperties;
145 cw.ppNextProperties = &fsr->physicalDeviceFragmentShadingRateProperties.pNext;
146 }
147 #endif
148
GetPhysicalDeviceMultiviewFeaturesStructs(ChainObjects & co,ChainWrapper & cw)149 void GetPhysicalDeviceMultiviewFeaturesStructs(ChainObjects& co, ChainWrapper& cw)
150 {
151 co.mv = make_unique<PhysicalDeviceMultiviewStructsVk>();
152 auto& mv = co.mv;
153 mv->physicalDeviceMultiviewFeatures = {
154 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR, // sType
155 nullptr, // pNext
156 VK_FALSE, // multiview
157 VK_FALSE, // multiviewGeometryShader
158 VK_FALSE, // multiviewTessellationShader
159 };
160 *cw.ppNextFeatures = &mv->physicalDeviceMultiviewFeatures;
161 cw.ppNextFeatures = &mv->physicalDeviceMultiviewFeatures.pNext;
162
163 mv->physicalDeviceMultiviewProperties = {
164 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR, // sType
165 nullptr, // pNext
166 0, // maxMultiviewViewCount
167 0, // maxMultiviewInstanceIndex
168 };
169 *cw.ppNextProperties = &mv->physicalDeviceMultiviewProperties;
170 cw.ppNextProperties = &mv->physicalDeviceMultiviewProperties.pNext;
171 }
172
GetPhysicalDeviceDescriptorIndexingFeaturesStructs(ChainObjects & co,ChainWrapper & cw)173 void GetPhysicalDeviceDescriptorIndexingFeaturesStructs(ChainObjects& co, ChainWrapper& cw)
174 {
175 co.di = make_unique<PhysicalDeviceDesciptorIndexingStructsVk>();
176 auto& di = co.di;
177 di->physicalDeviceDescriptorIndexingFeatures = {
178 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES, // sType
179 nullptr, // pNext
180 VK_FALSE, // shaderInputAttachmentArrayDynamicIndexing
181 VK_FALSE, // shaderUniformTexelBufferArrayDynamicIndexing
182 VK_FALSE, // shaderStorageTexelBufferArrayDynamicIndexing
183 VK_FALSE, // shaderUniformBufferArrayNonUniformIndexing
184 VK_FALSE, // shaderSampledImageArrayNonUniformIndexing
185 VK_FALSE, // shaderStorageBufferArrayNonUniformIndexing
186 VK_FALSE, // shaderStorageImageArrayNonUniformIndexing
187 VK_FALSE, // shaderInputAttachmentArrayNonUniformIndexing
188 VK_FALSE, // shaderUniformTexelBufferArrayNonUniformIndexing
189 VK_FALSE, // shaderStorageTexelBufferArrayNonUniformIndexing
190 VK_FALSE, // descriptorBindingUniformBufferUpdateAfterBind
191 VK_FALSE, // descriptorBindingSampledImageUpdateAfterBind
192 VK_FALSE, // descriptorBindingStorageImageUpdateAfterBind
193 VK_FALSE, // descriptorBindingStorageBufferUpdateAfterBind
194 VK_FALSE, // descriptorBindingUniformTexelBufferUpdateAfterBind
195 VK_FALSE, // descriptorBindingStorageTexelBufferUpdateAfterBind
196 VK_FALSE, // descriptorBindingUpdateUnusedWhilePending
197 VK_FALSE, // descriptorBindingPartiallyBound
198 VK_FALSE, // descriptorBindingVariableDescriptorCount
199 VK_FALSE, // runtimeDescriptorArray
200 };
201 *cw.ppNextFeatures = &di->physicalDeviceDescriptorIndexingFeatures;
202 cw.ppNextFeatures = &di->physicalDeviceDescriptorIndexingFeatures.pNext;
203
204 di->physicalDeviceDescriptorIndexingProperties = {
205 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES, // sType
206 nullptr, // pNext
207 0U, // maxUpdateAfterBindDescriptorsInAllPools
208 VK_FALSE, // shaderUniformBufferArrayNonUniformIndexingNative
209 VK_FALSE, // shaderSampledImageArrayNonUniformIndexingNative
210 VK_FALSE, // shaderStorageBufferArrayNonUniformIndexingNative
211 VK_FALSE, // shaderStorageImageArrayNonUniformIndexingNative
212 VK_FALSE, // shaderInputAttachmentArrayNonUniformIndexingNative
213 VK_FALSE, // robustBufferAccessUpdateAfterBind
214 VK_FALSE, // quadDivergentImplicitLod
215 0U, // maxPerStageDescriptorUpdateAfterBindSamplers
216 0U, // maxPerStageDescriptorUpdateAfterBindUniformBuffers
217 0U, // maxPerStageDescriptorUpdateAfterBindStorageBuffers
218 0U, // maxPerStageDescriptorUpdateAfterBindSampledImages
219 0U, // maxPerStageDescriptorUpdateAfterBindStorageImages
220 0U, // maxPerStageDescriptorUpdateAfterBindInputAttachments
221 0U, // maxPerStageUpdateAfterBindResources
222 0U, // maxDescriptorSetUpdateAfterBindSamplers
223 0U, // maxDescriptorSetUpdateAfterBindUniformBuffers
224 0U, // maxDescriptorSetUpdateAfterBindUniformBuffersDynamic
225 0U, // maxDescriptorSetUpdateAfterBindStorageBuffers
226 0U, // maxDescriptorSetUpdateAfterBindStorageBuffersDynamic
227 0U, // maxDescriptorSetUpdateAfterBindSampledImages
228 0U, // maxDescriptorSetUpdateAfterBindStorageImages
229 0U, // maxDescriptorSetUpdateAfterBindInputAttachments
230 };
231 *cw.ppNextProperties = &di->physicalDeviceDescriptorIndexingProperties;
232 cw.ppNextProperties = &di->physicalDeviceDescriptorIndexingProperties.pNext;
233 }
234
235 // ray-tracing
236 #if (RENDER_VULKAN_RT_ENABLED == 1)
237 static constexpr string_view DEVICE_EXTENSION_ACCELERATION_STRUCTURE { "VK_KHR_acceleration_structure" };
238 static constexpr string_view DEVICE_EXTENSION_RAY_QUERY { "VK_KHR_ray_query" };
239 static constexpr string_view DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS { "VK_KHR_deferred_host_operations" };
240 static constexpr string_view DEVICE_EXTENSION_RAY_TRACING_PIPELINE { "VK_KHR_ray_tracing_pipeline" };
241 static constexpr string_view DEVICE_EXTENSION_PIPELINE_LIBRARY { "VK_KHR_pipeline_library" };
242
GetPhysicalDeviceRayTracingStructs(ChainObjects & co,ChainWrapper & cw)243 void GetPhysicalDeviceRayTracingStructs(ChainObjects& co, ChainWrapper& cw)
244 {
245 co.rt = make_unique<PhysicalDeviceRayTracingStructsVk>();
246 auto& rt = co.rt;
247 rt->physicalDeviceBufferDeviceAddressFeatures = {
248 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, // sType
249 nullptr, // pNext
250 VK_FALSE, // bufferDeviceAddress;
251 VK_FALSE, // bufferDeviceAddressCaptureReplay
252 VK_FALSE, // bufferDeviceAddressMultiDevice
253 };
254 rt->physicalDeviceRayTracingPipelineFeatures = {
255 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR, // sType
256 &rt->physicalDeviceBufferDeviceAddressFeatures, // pNext
257 VK_FALSE, // rayTracingPipeline;
258 VK_FALSE, // rayTracingPipelineShaderGroupHandleCaptureReplay;
259 VK_FALSE, // rayTracingPipelineShaderGroupHandleCaptureReplayMixed;
260 VK_FALSE, // rayTracingPipelineTraceRaysIndirect;
261 VK_FALSE, // rayTraversalPrimitiveCulling;
262 };
263 rt->physicalDeviceAccelerationStructureFeatures = {
264 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, // sType
265 &rt->physicalDeviceRayTracingPipelineFeatures, // pNext
266 VK_FALSE, // accelerationStructure;
267 VK_FALSE, // accelerationStructureCaptureReplay
268 VK_FALSE, // accelerationStructureIndirectBuild
269 VK_FALSE, // accelerationStructureHostCommands
270 VK_FALSE, // descriptorBindingAccelerationStructureUpdateAfterBind
271 };
272 rt->physicalDeviceRayQueryFeatures = {
273 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, // sType
274 &rt->physicalDeviceAccelerationStructureFeatures, // pNext
275 true, // rayQuery
276 };
277
278 *cw.ppNextFeatures = &rt->physicalDeviceRayQueryFeatures;
279 cw.ppNextFeatures = &rt->physicalDeviceBufferDeviceAddressFeatures.pNext;
280 }
281 #endif
282
GetPhysicalDeviceYcbcrStructs(ChainObjects & co,ChainWrapper & cw)283 void GetPhysicalDeviceYcbcrStructs(ChainObjects& co, ChainWrapper& cw)
284 {
285 co.ycbcr = make_unique<PhysicalDeviceYcbcrStructsVk>();
286 auto& ycbcr = co.ycbcr;
287 ycbcr->ycbcrConversionFeatures = {
288 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES, // sType
289 nullptr, // pNext
290 VK_FALSE, // samplerYcbcrConversion
291 };
292
293 *cw.ppNextFeatures = &ycbcr->ycbcrConversionFeatures;
294 cw.ppNextFeatures = &ycbcr->ycbcrConversionFeatures.pNext;
295 }
296
GetYcbcrExtFunctions(const VkInstance instance,DeviceVk::ExtFunctions & extFunctions)297 void GetYcbcrExtFunctions(const VkInstance instance, DeviceVk::ExtFunctions& extFunctions)
298 {
299 extFunctions.vkCreateSamplerYcbcrConversion =
300 (PFN_vkCreateSamplerYcbcrConversion)(void*)vkGetInstanceProcAddr(instance, "vkCreateSamplerYcbcrConversion");
301 if (!extFunctions.vkCreateSamplerYcbcrConversion) {
302 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateSamplerYcbcrConversion");
303 }
304 extFunctions.vkDestroySamplerYcbcrConversion =
305 (PFN_vkDestroySamplerYcbcrConversion)vkGetInstanceProcAddr(instance, "vkDestroySamplerYcbcrConversion");
306 if (!extFunctions.vkDestroySamplerYcbcrConversion) {
307 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroySamplerYcbcrConversion");
308 }
309 }
310
GetPhysicalDeviceMaintenance4Structs(ChainObjects & co,ChainWrapper & cw)311 void GetPhysicalDeviceMaintenance4Structs(ChainObjects& co, ChainWrapper& cw)
312 {
313 co.maintenance4 = make_unique<PhysicalDeviceMaintenance4Vk>();
314 auto& m4 = co.maintenance4;
315 m4->maintenance4Features = {
316 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES, // sType
317 nullptr, // pNext
318 true, // maintenance4
319 };
320
321 *cw.ppNextFeatures = &m4->maintenance4Features;
322 cw.ppNextFeatures = &m4->maintenance4Features.pNext;
323 }
324
325 constexpr uint32_t MIN_ALLOCATION_BLOCK_SIZE { 4u * 1024u * 1024u };
326 constexpr uint32_t MAX_ALLOCATION_BLOCK_SIZE { 1024u * 1024u * 1024u };
327 constexpr const QueueProperties DEFAULT_QUEUE {
328 VK_QUEUE_GRAPHICS_BIT, // requiredFlags
329 1, // count
330 1.0f, // priority
331 false, // explicitFlags
332 true, // canPresent
333 };
334
GetAllocatorCreateInfo(const BackendExtraVk * backendExtra)335 PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo GetAllocatorCreateInfo(const BackendExtraVk* backendExtra)
336 {
337 // create default pools
338 PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo createInfo;
339 uint32_t dynamicUboByteSize = 16u * 1024u * 1024u;
340 if (backendExtra) {
341 const auto& sizes = backendExtra->gpuMemoryAllocatorSizes;
342 if (sizes.defaultAllocationBlockSize != ~0u) {
343 createInfo.preferredLargeHeapBlockSize = Math::min(
344 MAX_ALLOCATION_BLOCK_SIZE, Math::max(sizes.defaultAllocationBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
345 }
346 if (sizes.customAllocationDynamicUboBlockSize != ~0u) {
347 dynamicUboByteSize = Math::min(MAX_ALLOCATION_BLOCK_SIZE,
348 Math::max(sizes.customAllocationDynamicUboBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
349 }
350 }
351
352 // staging
353 {
354 GpuBufferDesc desc;
355 desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_SINGLE_SHOT_STAGING;
356 desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
357 MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
358 desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_SRC_BIT;
359 createInfo.customPools.push_back({
360 "STAGING_GPU_BUFFER",
361 PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
362 0u,
363 // if linear allocator is used, depending clients usage pattern, memory can be easily wasted.
364 false,
365 { move(desc) },
366 });
367 }
368 // dynamic uniform ring buffers
369 {
370 GpuBufferDesc desc;
371 desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER;
372 desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
373 MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
374 desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
375 createInfo.customPools.push_back({
376 "DYNAMIC_UNIFORM_GPU_BUFFER",
377 PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
378 dynamicUboByteSize,
379 false,
380 { move(desc) },
381 });
382 }
383
384 return createInfo;
385 }
386
DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,VkDebugUtilsMessageTypeFlagsEXT,const VkDebugUtilsMessengerCallbackDataEXT * pCallbackData,void *)387 VkBool32 VKAPI_PTR DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
388 VkDebugUtilsMessageTypeFlagsEXT /* messageTypes */, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData,
389 void* /* pUserData */)
390 {
391 if (pCallbackData && pCallbackData->pMessageIdName && pCallbackData->pMessage) {
392 if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
393 PLUGIN_LOG_E("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
394 } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
395 (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)) {
396 PLUGIN_LOG_W("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
397 } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
398 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
399 PLUGIN_LOG_I("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
400 } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
401 VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
402 PLUGIN_LOG_V("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
403 }
404 }
405
406 // The application should always return VK_FALSE.
407 return VK_FALSE;
408 }
409
DebugReportCallback(VkDebugReportFlagsEXT flags,VkDebugReportObjectTypeEXT,uint64_t,size_t,int32_t,const char *,const char * pMessage,void *)410 VkBool32 VKAPI_PTR DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT, uint64_t, size_t,
411 int32_t, const char*, const char* pMessage, void*)
412 {
413 if (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) {
414 PLUGIN_LOG_E("%s", pMessage);
415 } else if (flags & (VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT)) {
416 PLUGIN_LOG_W("%s", pMessage);
417 } else if (flags & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) {
418 PLUGIN_LOG_I("%s", pMessage);
419 } else if (flags & VK_DEBUG_REPORT_DEBUG_BIT_EXT) {
420 PLUGIN_LOG_D("%s", pMessage);
421 }
422 return VK_FALSE;
423 }
424
EmplaceDeviceQueue(const VkDevice device,const LowLevelQueueInfo & aQueueInfo,vector<LowLevelGpuQueueVk> & aLowLevelQueues)425 void EmplaceDeviceQueue(
426 const VkDevice device, const LowLevelQueueInfo& aQueueInfo, vector<LowLevelGpuQueueVk>& aLowLevelQueues)
427 {
428 if (!device) {
429 return;
430 }
431
432 for (uint32_t idx = 0; idx < aQueueInfo.queueCount; ++idx) {
433 VkQueue queue = VK_NULL_HANDLE;
434 vkGetDeviceQueue(device, // device
435 aQueueInfo.queueFamilyIndex, // queueFamilyIndex
436 idx, // queueIndex
437 &queue); // pQueue
438 aLowLevelQueues.push_back(LowLevelGpuQueueVk { queue, aQueueInfo });
439 }
440 }
441
CheckValidDepthFormats(const DevicePlatformDataVk & devicePlat,DevicePlatformInternalDataVk & dataInternal)442 void CheckValidDepthFormats(const DevicePlatformDataVk& devicePlat, DevicePlatformInternalDataVk& dataInternal)
443 {
444 constexpr uint32_t DEPTH_FORMAT_COUNT { 4 };
445 constexpr Format DEPTH_FORMATS[DEPTH_FORMAT_COUNT] = { BASE_FORMAT_D24_UNORM_S8_UINT, BASE_FORMAT_D32_SFLOAT,
446 BASE_FORMAT_D16_UNORM, BASE_FORMAT_X8_D24_UNORM_PACK32 };
447 for (const Format& format : DEPTH_FORMATS) {
448 VkFormatProperties formatProperties;
449 vkGetPhysicalDeviceFormatProperties(devicePlat.physicalDevice, // physicalDevice
450 (VkFormat)format, // format
451 &formatProperties); // pFormatProperties
452 const VkFormatFeatureFlags optimalTilingFeatureFlags = formatProperties.optimalTilingFeatures;
453 if (optimalTilingFeatureFlags & VkFormatFeatureFlagBits::VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
454 dataInternal.supportedDepthFormats.push_back(format);
455 }
456 }
457 }
458
GetPreferredDeviceExtensions(const BackendExtraVk * backendExtra,DevicePlatformDataVk & plat)459 vector<string_view> GetPreferredDeviceExtensions(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)
460 {
461 vector<string_view> extensions { DEVICE_EXTENSION_SWAPCHAIN };
462 extensions.push_back(DEVICE_EXTENSION_CREATE_RENDERPASS2);
463 extensions.push_back(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE);
464 extensions.push_back(DEVICE_EXTENSION_MAINTENANCE4);
465 GetPlatformDeviceExtensions(extensions);
466 #if (RENDER_VULKAN_FSR_ENABLED == 1)
467 extensions.push_back(DEVICE_EXTENSION_FRAGMENT_SHADING_RATE);
468 #endif
469 #if (RENDER_VULKAN_RT_ENABLED == 1)
470 extensions.push_back(DEVICE_EXTENSION_ACCELERATION_STRUCTURE);
471 extensions.push_back(DEVICE_EXTENSION_RAY_TRACING_PIPELINE);
472 extensions.push_back(DEVICE_EXTENSION_RAY_QUERY);
473 extensions.push_back(DEVICE_EXTENSION_PIPELINE_LIBRARY);
474 extensions.push_back(DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS);
475 #endif
476 if (plat.deviceApiMinor >= 1) { // enable only for 1.1+
477 extensions.push_back(DEVICE_EXTENSION_MULTIVIEW);
478 }
479 if (plat.deviceApiMinor >= 2) { // enable only for 1.2+
480 extensions.push_back(DEVICE_EXTENSION_DESCRIPTOR_INDEXING);
481 }
482 if (backendExtra) {
483 for (const auto str : backendExtra->extensions.extensionNames) {
484 extensions.push_back(str);
485 }
486 }
487 return extensions;
488 }
489
GetEnabledCommonDeviceExtensions(const unordered_map<string,uint32_t> & enabledDeviceExtensions)490 DeviceVk::CommonDeviceExtensions GetEnabledCommonDeviceExtensions(
491 const unordered_map<string, uint32_t>& enabledDeviceExtensions)
492 {
493 DeviceVk::CommonDeviceExtensions extensions;
494 extensions.swapchain = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SWAPCHAIN);
495 // renderpass2 required on 1.2, we only use renderpass 2 when we need depth stencil resolve
496 extensions.renderPass2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE) &&
497 enabledDeviceExtensions.contains(DEVICE_EXTENSION_CREATE_RENDERPASS2);
498 extensions.externalMemory = enabledDeviceExtensions.contains(DEVICE_EXTENSION_EXTERNAL_MEMORY);
499 extensions.getMemoryRequirements2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2);
500 extensions.queueFamilyForeign = enabledDeviceExtensions.contains(DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN);
501 extensions.samplerYcbcrConversion = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION);
502 extensions.multiView = enabledDeviceExtensions.contains(DEVICE_EXTENSION_MULTIVIEW);
503 extensions.descriptorIndexing = enabledDeviceExtensions.contains(DEVICE_EXTENSION_DESCRIPTOR_INDEXING);
504 #if (RENDER_VULKAN_FSR_ENABLED == 1)
505 extensions.fragmentShadingRate = enabledDeviceExtensions.contains(DEVICE_EXTENSION_FRAGMENT_SHADING_RATE);
506 #endif
507
508 return extensions;
509 }
510
GetCommonDevicePropertiesFunc(const ChainObjects & co)511 CommonDeviceProperties GetCommonDevicePropertiesFunc(const ChainObjects& co)
512 {
513 CommonDeviceProperties cdp;
514 #if (RENDER_VULKAN_FSR_ENABLED == 1)
515 if (co.fsr) {
516 const auto& fsrVk = co.fsr->physicalDeviceFragmentShadingRateProperties;
517 cdp.fragmentShadingRateProperties.minFragmentShadingRateAttachmentTexelSize = {
518 fsrVk.minFragmentShadingRateAttachmentTexelSize.width,
519 fsrVk.minFragmentShadingRateAttachmentTexelSize.height
520 };
521 cdp.fragmentShadingRateProperties.maxFragmentShadingRateAttachmentTexelSize = {
522 fsrVk.maxFragmentShadingRateAttachmentTexelSize.width,
523 fsrVk.maxFragmentShadingRateAttachmentTexelSize.height
524 };
525 cdp.fragmentShadingRateProperties.maxFragmentSize = { fsrVk.maxFragmentSize.width,
526 fsrVk.maxFragmentSize.height };
527 }
528 #endif
529 return cdp;
530 }
531
PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk * backendExtra,DevicePlatformDataVk & plat)532 void PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)
533 {
534 // enable all by default and then disable few
535 plat.enabledPhysicalDeviceFeatures = plat.physicalDeviceProperties.physicalDeviceFeatures;
536 // prepare feature disable for core engine
537 plat.enabledPhysicalDeviceFeatures.geometryShader = VK_FALSE;
538 plat.enabledPhysicalDeviceFeatures.tessellationShader = VK_FALSE;
539 plat.enabledPhysicalDeviceFeatures.sampleRateShading = VK_FALSE;
540 plat.enabledPhysicalDeviceFeatures.occlusionQueryPrecise = VK_FALSE;
541 plat.enabledPhysicalDeviceFeatures.pipelineStatisticsQuery = VK_FALSE;
542 plat.enabledPhysicalDeviceFeatures.shaderTessellationAndGeometryPointSize = VK_FALSE;
543 plat.enabledPhysicalDeviceFeatures.inheritedQueries = VK_FALSE;
544 if (backendExtra) {
545 // check for support and prepare enabling
546 if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
547 const size_t valueCount = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
548 const array_view<const VkBool32> supported(
549 reinterpret_cast<VkBool32*>(&plat.physicalDeviceProperties.physicalDeviceFeatures), valueCount);
550 VkPhysicalDeviceFeatures* wantedFeatures =
551 (&backendExtra->extensions.physicalDeviceFeaturesToEnable->features);
552 const array_view<const VkBool32> wanted(reinterpret_cast<VkBool32*>(wantedFeatures), valueCount);
553
554 array_view<VkBool32> enabledPhysicalDeviceFeatures(
555 reinterpret_cast<VkBool32*>(&plat.enabledPhysicalDeviceFeatures), valueCount);
556 for (size_t idx = 0; idx < valueCount; ++idx) {
557 if (supported[idx] && wanted[idx]) {
558 enabledPhysicalDeviceFeatures[idx] = VK_TRUE;
559 } else if (wanted[idx]) {
560 PLUGIN_LOG_W(
561 "physical device feature not supported/enabled from idx: %u", static_cast<uint32_t>(idx));
562 }
563 }
564 }
565 }
566 }
567
FillDeviceFormatSupport(VkPhysicalDevice physicalDevice,const Format format)568 FormatProperties FillDeviceFormatSupport(VkPhysicalDevice physicalDevice, const Format format)
569 {
570 VkFormatProperties formatProperties;
571 vkGetPhysicalDeviceFormatProperties(physicalDevice, // physicalDevice
572 (VkFormat)format, // format
573 &formatProperties); // pFormatProperties
574 return FormatProperties {
575 (FormatFeatureFlags)formatProperties.linearTilingFeatures,
576 (FormatFeatureFlags)formatProperties.optimalTilingFeatures,
577 (FormatFeatureFlags)formatProperties.bufferFeatures,
578 GpuProgramUtil::FormatByteSize(format),
579 };
580 }
581
FillFormatSupport(VkPhysicalDevice physicalDevice,vector<FormatProperties> & formats)582 void FillFormatSupport(VkPhysicalDevice physicalDevice, vector<FormatProperties>& formats)
583 {
584 const uint32_t fullSize = DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT +
585 DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT;
586 formats.resize(fullSize);
587 for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT; ++idx) {
588 formats[idx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(idx));
589 }
590 // pre-build additional formats
591 for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT; ++idx) {
592 const uint32_t currIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_BASE_IDX;
593 PLUGIN_ASSERT(currIdx < static_cast<uint32_t>(formats.size()));
594 const uint32_t formatIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
595 formats[currIdx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(formatIdx));
596 }
597 }
598
CreateDefaultVulkanObjects(VkDevice device,DeviceVk::DefaultVulkanObjects & dvo)599 void CreateDefaultVulkanObjects(VkDevice device, DeviceVk::DefaultVulkanObjects& dvo)
600 {
601 constexpr VkDescriptorSetLayoutCreateInfo EMPTY_LAYOUT_INFO {
602 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // sType
603 nullptr, // pNext
604 0U, // flags
605 0U, // bindingCount
606 nullptr, // pBindings
607 };
608
609 PLUGIN_ASSERT(!dvo.emptyDescriptorSetLayout);
610 VALIDATE_VK_RESULT(vkCreateDescriptorSetLayout(device, // device
611 &EMPTY_LAYOUT_INFO, // pCreateInfo
612 nullptr, // pAllocator
613 &dvo.emptyDescriptorSetLayout)); // pSetLayout
614 }
DestroyDefaultVulkanObjects(VkDevice vkDevice,DeviceVk::DefaultVulkanObjects & dvo)615 void DestroyDefaultVulkanObjects(VkDevice vkDevice, DeviceVk::DefaultVulkanObjects& dvo)
616 {
617 PLUGIN_ASSERT(dvo.emptyDescriptorSetLayout);
618 vkDestroyDescriptorSetLayout(vkDevice, // device
619 dvo.emptyDescriptorSetLayout, // descriptorSetLayout
620 nullptr); // pAllocator
621 }
622 } // namespace
623
DeviceVk(RenderContext & renderContext,DeviceCreateInfo const & createInfo)624 DeviceVk::DeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo) : Device(renderContext, createInfo)
625 {
626 // assume instance and device will be created internally
627 ownInstanceAndDevice_ = true;
628
629 const auto* backendExtra = static_cast<const BackendExtraVk*>(createInfo.backendConfiguration);
630 // update internal state based the optional backend configuration given by the client. the size of queuProperties
631 // will depend on the enableMultiQueue setting.
632 const auto queueProperties = CheckExternalConfig(backendExtra);
633
634 // these check internally ownInstanceAndDevice_ and skip creation if provided by user
635 CreateInstance();
636 CreatePhysicalDevice();
637
638 if ((!plat_.instance) || (!plat_.physicalDevice)) {
639 PLUGIN_LOG_E("Invalid device.");
640 SetDeviceStatus(false);
641 return;
642 }
643
644 const auto availableQueues = CreateFunctionsVk::GetAvailableQueues(plat_.physicalDevice, queueProperties);
645
646 // own device creation does a lot of work for figuring out what to create, but for external device
647 // CheckExternalConfig stored the enabled extensions and features, and we just need to check what is available.
648 if (ownInstanceAndDevice_) {
649 CreateDevice(backendExtra, availableQueues);
650 } else {
651 commonDeviceExtensions_ = GetEnabledCommonDeviceExtensions(extensions_);
652 platformDeviceExtensions_ = GetEnabledPlatformDeviceExtensions(extensions_);
653 // filling commonDeviceProperties_ isn't done, but at the moment that only contains fragment rate shading.
654 // should walk through BackendExtraVk::extensions::physicalDeviceFeaturesToEnable::pNext and see what's
655 // available.
656 }
657
658 if (!plat_.device) {
659 PLUGIN_LOG_E("Invalid device.");
660 SetDeviceStatus(false);
661 return;
662 }
663
664 CreateDebugFunctions();
665 CreateExtFunctions();
666 CreatePlatformExtFunctions();
667 SortAvailableQueues(availableQueues);
668
669 CheckValidDepthFormats(plat_, platInternal_);
670 FillFormatSupport(plat_.physicalDevice, formatProperties_);
671
672 PLUGIN_ASSERT_MSG(!lowLevelGpuQueues_.graphicsQueues.empty(), "default queue not initialized");
673 if (!lowLevelGpuQueues_.graphicsQueues.empty()) {
674 lowLevelGpuQueues_.defaultQueue = lowLevelGpuQueues_.graphicsQueues[0];
675 } else {
676 PLUGIN_LOG_E("default vulkan queue not initialized");
677 }
678
679 gpuQueueCount_ =
680 static_cast<uint32_t>(lowLevelGpuQueues_.computeQueues.size() + lowLevelGpuQueues_.graphicsQueues.size() +
681 lowLevelGpuQueues_.transferQueues.size());
682
683 const PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo allocatorCreateInfo =
684 GetAllocatorCreateInfo(backendExtra);
685 platformGpuMemoryAllocator_ = make_unique<PlatformGpuMemoryAllocator>(
686 plat_.instance, plat_.physicalDevice, plat_.device, allocatorCreateInfo);
687
688 if (queueProperties.size() > 1) {
689 PLUGIN_LOG_I("gpu queue count: %u", gpuQueueCount_);
690 }
691
692 SetDeviceStatus(true);
693
694 const GpuResourceManager::CreateInfo grmCreateInfo {
695 GpuResourceManager::GPU_RESOURCE_MANAGER_OPTIMIZE_STAGING_MEMORY,
696 };
697 gpuResourceMgr_ = make_unique<GpuResourceManager>(*this, grmCreateInfo);
698 shaderMgr_ = make_unique<ShaderManager>(*this);
699 globalDescriptorSetMgr_ = make_unique<DescriptorSetManagerVk>(*this);
700
701 lowLevelDevice_ = make_unique<LowLevelDeviceVk>(*this);
702
703 CreateDefaultVulkanObjects(plat_.device, defaultVulkanObjects_);
704 }
705
~DeviceVk()706 DeviceVk::~DeviceVk()
707 {
708 WaitForIdle();
709
710 DestroyDefaultVulkanObjects(plat_.device, defaultVulkanObjects_);
711
712 globalDescriptorSetMgr_.reset();
713 // must release handles before taking down gpu resource manager.
714 swapchains_.clear();
715
716 gpuResourceMgr_.reset();
717 shaderMgr_.reset();
718
719 platformGpuMemoryAllocator_.reset();
720
721 if (plat_.pipelineCache) {
722 CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
723 }
724
725 if (ownInstanceAndDevice_) {
726 CreateFunctionsVk::DestroyDevice(plat_.device);
727 CreateFunctionsVk::DestroyDebugMessenger(plat_.instance, debugFunctionUtilities_.debugMessenger);
728 CreateFunctionsVk::DestroyDebugCallback(plat_.instance, debugFunctionUtilities_.debugCallback);
729 CreateFunctionsVk::DestroyInstance(plat_.instance);
730 }
731 }
732
CreateInstance()733 void DeviceVk::CreateInstance()
734 {
735 RENDER_CPU_PERF_SCOPE("CreateInstance", "");
736 const auto instanceWrapper = (plat_.instance == VK_NULL_HANDLE)
737 ? CreateFunctionsVk::CreateInstance(VersionInfo { "core_renderer", 0, 1, 0 },
738 VersionInfo { "core_renderer_app", 0, 1, 0 })
739 : CreateFunctionsVk::GetWrapper(plat_.instance);
740
741 plat_.instance = instanceWrapper.instance;
742 // update with physical device creation
743 plat_.deviceApiMajor = instanceWrapper.apiMajor;
744 plat_.deviceApiMinor = instanceWrapper.apiMinor;
745 if (instanceWrapper.debugUtilsSupported) {
746 debugFunctionUtilities_.debugMessenger =
747 CreateFunctionsVk::CreateDebugMessenger(plat_.instance, DebugMessengerCallback);
748 }
749 if (!debugFunctionUtilities_.debugMessenger && instanceWrapper.debugReportSupported) {
750 debugFunctionUtilities_.debugCallback =
751 CreateFunctionsVk::CreateDebugCallback(plat_.instance, DebugReportCallback);
752 }
753
754 extFunctions_.vkAcquireNextImageKHR =
755 (PFN_vkAcquireNextImageKHR)(void*)vkGetInstanceProcAddr(plat_.instance, "vkAcquireNextImageKHR");
756 if ((plat_.deviceApiMajor >= 1) && (plat_.deviceApiMinor >= 1)) {
757 extFunctions_.vkGetPhysicalDeviceFeatures2 =
758 (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(plat_.instance, "vkGetPhysicalDeviceFeatures2");
759 extFunctions_.vkGetPhysicalDeviceProperties2 =
760 (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(plat_.instance, "vkGetPhysicalDeviceProperties2");
761 }
762 }
763
CreatePhysicalDevice()764 void DeviceVk::CreatePhysicalDevice()
765 {
766 RENDER_CPU_PERF_SCOPE("CreatePhysicalDevice", "");
767 auto physicalDeviceWrapper = (plat_.physicalDevice == VK_NULL_HANDLE)
768 ? CreateFunctionsVk::CreatePhysicalDevice(plat_.instance, DEFAULT_QUEUE)
769 : CreateFunctionsVk::GetWrapper(plat_.physicalDevice);
770 const uint32_t physicalDeviceApiMajor =
771 VK_VERSION_MAJOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
772 const uint32_t physicalDeviceApiMinor =
773 VK_VERSION_MINOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
774 plat_.deviceApiMajor = Math::min(plat_.deviceApiMajor, physicalDeviceApiMajor);
775 plat_.deviceApiMinor = Math::min(plat_.deviceApiMinor, physicalDeviceApiMinor);
776 PLUGIN_LOG_D("device api version %u.%u", plat_.deviceApiMajor, plat_.deviceApiMinor);
777
778 plat_.physicalDevice = physicalDeviceWrapper.physicalDevice;
779 plat_.physicalDeviceProperties = move(physicalDeviceWrapper.physicalDeviceProperties);
780 plat_.physicalDeviceExtensions = move(physicalDeviceWrapper.physicalDeviceExtensions);
781 const auto& memoryProperties = plat_.physicalDeviceProperties.physicalDeviceMemoryProperties;
782 deviceSharedMemoryPropertyFlags_ =
783 (memoryProperties.memoryTypeCount > 0) ? (MemoryPropertyFlags)memoryProperties.memoryTypes[0].propertyFlags : 0;
784 for (uint32_t idx = 1; idx < memoryProperties.memoryTypeCount; ++idx) {
785 const auto memoryPropertyFlags = (MemoryPropertyFlags)memoryProperties.memoryTypes[idx].propertyFlags;
786 // do not compare lazily allocated or protected memory blocks
787 if ((memoryPropertyFlags & (CORE_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | CORE_MEMORY_PROPERTY_PROTECTED_BIT)) ==
788 0) {
789 deviceSharedMemoryPropertyFlags_ &= memoryPropertyFlags;
790 }
791 }
792 }
793
CreateDevice(const BackendExtraVk * backendExtra,const vector<LowLevelQueueInfo> & availableQueues)794 void DeviceVk::CreateDevice(const BackendExtraVk* backendExtra, const vector<LowLevelQueueInfo>& availableQueues)
795 {
796 RENDER_CPU_PERF_SCOPE("CreateDevice", "");
797 vector<string_view> preferredExtensions = GetPreferredDeviceExtensions(backendExtra, plat_);
798 PreparePhysicalDeviceFeaturesForEnabling(backendExtra, plat_);
799
800 ChainWrapper chainWrapper;
801 ChainObjects chainObjects;
802
803 VkPhysicalDeviceFeatures2* physicalDeviceFeatures2Ptr = nullptr;
804 VkPhysicalDeviceFeatures2 physicalDeviceFeatures2 {
805 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, // sType
806 nullptr, // pNext
807 {}, // features
808 };
809 chainWrapper.ppNextFeatures = &physicalDeviceFeatures2.pNext;
810
811 VkPhysicalDeviceProperties2 physicalDeviceProperties2 {
812 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, // sType
813 nullptr, // pNext
814 {}, // properties
815 };
816 chainWrapper.ppNextProperties = &physicalDeviceProperties2.pNext;
817
818 GetPhysicalDeviceYcbcrStructs(chainObjects, chainWrapper);
819 #if (RENDER_VULKAN_RT_ENABLED == 1)
820 GetPhysicalDeviceRayTracingStructs(chainObjects, chainWrapper);
821 #endif
822 #if (RENDER_VULKAN_FSR_ENABLED == 1)
823 if (CreateFunctionsVk::HasExtension(plat_.physicalDeviceExtensions, DEVICE_EXTENSION_FRAGMENT_SHADING_RATE)) {
824 GetPhysicalDeviceFragmentShadingRateStructs(chainObjects, chainWrapper);
825 }
826 #endif
827 if (plat_.deviceApiMinor >= 1) { // enable only for 1.1 + for now
828 GetPhysicalDeviceMultiviewFeaturesStructs(chainObjects, chainWrapper);
829 }
830 if (plat_.deviceApiMinor >= 2) { // enable only for 1.2 + for now
831 GetPhysicalDeviceDescriptorIndexingFeaturesStructs(chainObjects, chainWrapper);
832 }
833 if (CreateFunctionsVk::HasExtension(plat_.physicalDeviceExtensions, DEVICE_EXTENSION_MAINTENANCE4)) {
834 GetPhysicalDeviceMaintenance4Structs(chainObjects, chainWrapper);
835 }
836 if ((plat_.deviceApiMajor >= 1) && (plat_.deviceApiMinor >= 1)) {
837 // pipe user extension physical device features
838 if (backendExtra) {
839 if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
840 *chainWrapper.ppNextFeatures = backendExtra->extensions.physicalDeviceFeaturesToEnable->pNext;
841 }
842 }
843 if (extFunctions_.vkGetPhysicalDeviceFeatures2) {
844 extFunctions_.vkGetPhysicalDeviceFeatures2(plat_.physicalDevice, &physicalDeviceFeatures2);
845 }
846 if (extFunctions_.vkGetPhysicalDeviceProperties2) {
847 extFunctions_.vkGetPhysicalDeviceProperties2(plat_.physicalDevice, &physicalDeviceProperties2);
848 }
849
850 // vkGetPhysicalDeviceFeatures has already filled this and PreparePhysicalDeviceFeaturesForEnabling
851 // disabled/ enabled some features.
852 physicalDeviceFeatures2.features = plat_.enabledPhysicalDeviceFeatures;
853 physicalDeviceFeatures2Ptr = &physicalDeviceFeatures2;
854 }
855 const DeviceWrapper deviceWrapper =
856 CreateFunctionsVk::CreateDevice(plat_.instance, plat_.physicalDevice, plat_.physicalDeviceExtensions,
857 plat_.enabledPhysicalDeviceFeatures, physicalDeviceFeatures2Ptr, availableQueues, preferredExtensions);
858 plat_.device = deviceWrapper.device;
859 for (const auto& ref : deviceWrapper.extensions) {
860 extensions_[ref] = 1u;
861 }
862 commonDeviceExtensions_ = GetEnabledCommonDeviceExtensions(extensions_);
863 platformDeviceExtensions_ = GetEnabledPlatformDeviceExtensions(extensions_);
864 commonDeviceProperties_ = GetCommonDevicePropertiesFunc(chainObjects);
865 }
866
CheckExternalConfig(const BackendExtraVk * backendConfiguration)867 vector<QueueProperties> DeviceVk::CheckExternalConfig(const BackendExtraVk* backendConfiguration)
868 {
869 vector<QueueProperties> queueProperties;
870 queueProperties.push_back(DEFAULT_QUEUE);
871
872 if (!backendConfiguration) {
873 return queueProperties;
874 }
875
876 const auto& extra = *backendConfiguration;
877 if (extra.enableMultiQueue) {
878 queueProperties.push_back(QueueProperties {
879 VK_QUEUE_COMPUTE_BIT, // requiredFlags
880 1, // count
881 1.0f, // priority
882 true, // explicitFlags
883 false, // canPresent
884 });
885 PLUGIN_LOG_I("trying to enable gpu multi-queue, with queue count: %u", (uint32_t)queueProperties.size());
886 }
887
888 if (extra.instance != VK_NULL_HANDLE) {
889 PLUGIN_LOG_D("trying to use application given vulkan instance, device, and physical device");
890 PLUGIN_ASSERT((extra.instance && extra.physicalDevice && extra.device));
891 plat_.instance = extra.instance;
892 plat_.physicalDevice = extra.physicalDevice;
893 plat_.device = extra.device;
894 if (extra.extensions.physicalDeviceFeaturesToEnable) {
895 plat_.enabledPhysicalDeviceFeatures = extra.extensions.physicalDeviceFeaturesToEnable->features;
896 }
897 ownInstanceAndDevice_ = false; // everything given from the application
898
899 const auto myDevice = plat_.physicalDevice;
900 auto& myProperties = plat_.physicalDeviceProperties;
901 vkGetPhysicalDeviceProperties(myDevice, &myProperties.physicalDeviceProperties);
902 vkGetPhysicalDeviceFeatures(myDevice, &myProperties.physicalDeviceFeatures);
903 vkGetPhysicalDeviceMemoryProperties(myDevice, &myProperties.physicalDeviceMemoryProperties);
904
905 for (const auto& extension : extra.extensions.extensionNames) {
906 extensions_[extension] = 1u;
907 }
908 }
909 return queueProperties;
910 }
911
SortAvailableQueues(const vector<LowLevelQueueInfo> & availableQueues)912 void DeviceVk::SortAvailableQueues(const vector<LowLevelQueueInfo>& availableQueues)
913 {
914 for (const auto& ref : availableQueues) {
915 if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_COMPUTE_BIT) {
916 EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.computeQueues);
917 } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_GRAPHICS_BIT) {
918 EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.graphicsQueues);
919 } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_TRANSFER_BIT) {
920 EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.transferQueues);
921 }
922 }
923 }
924
GetBackendType() const925 DeviceBackendType DeviceVk::GetBackendType() const
926 {
927 return DeviceBackendType::VULKAN;
928 }
929
GetPlatformData() const930 const DevicePlatformData& DeviceVk::GetPlatformData() const
931 {
932 return plat_;
933 }
934
GetPlatformDataVk() const935 const DevicePlatformDataVk& DeviceVk::GetPlatformDataVk() const
936 {
937 return plat_;
938 }
939
GetPlatformInternalDataVk() const940 const DevicePlatformInternalDataVk& DeviceVk::GetPlatformInternalDataVk() const
941 {
942 return platInternal_;
943 }
944
GetLowLevelDevice() const945 ILowLevelDevice& DeviceVk::GetLowLevelDevice() const
946 {
947 return *lowLevelDevice_;
948 }
949
GetFormatProperties(const Format format) const950 FormatProperties DeviceVk::GetFormatProperties(const Format format) const
951 {
952 const auto formatSupportSize = static_cast<uint32_t>(formatProperties_.size());
953 const auto formatIdx = static_cast<uint32_t>(format);
954 if (formatIdx < formatSupportSize) {
955 return formatProperties_[formatIdx];
956 } else if ((formatIdx >= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER) &&
957 (formatIdx <= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_END_NUMBER)) {
958 const uint32_t currIdx = formatIdx - DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
959 PLUGIN_UNUSED(currIdx);
960 PLUGIN_ASSERT(currIdx < formatSupportSize);
961 return formatProperties_[currIdx];
962 }
963 return {};
964 }
965
GetAccelerationStructureBuildSizes(const AccelerationStructureBuildGeometryInfo & geometry,BASE_NS::array_view<const AccelerationStructureGeometryTrianglesInfo> triangles,BASE_NS::array_view<const AccelerationStructureGeometryAabbsInfo> aabbs,BASE_NS::array_view<const AccelerationStructureGeometryInstancesInfo> instances) const966 AccelerationStructureBuildSizes DeviceVk::GetAccelerationStructureBuildSizes(
967 const AccelerationStructureBuildGeometryInfo& geometry,
968 BASE_NS::array_view<const AccelerationStructureGeometryTrianglesInfo> triangles,
969 BASE_NS::array_view<const AccelerationStructureGeometryAabbsInfo> aabbs,
970 BASE_NS::array_view<const AccelerationStructureGeometryInstancesInfo> instances) const
971 {
972 #if (RENDER_VULKAN_RT_ENABLED == 1)
973 const VkDevice device = plat_.device;
974
975 const size_t arraySize = triangles.size() + aabbs.size() + instances.size();
976 vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
977 vector<uint32_t> maxPrimitiveCounts(arraySize);
978 uint32_t arrayIndex = 0;
979 for (const auto& trianglesRef : triangles) {
980 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
981 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
982 nullptr, // pNext
983 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR, // geometryType
984 {}, // geometry;
985 VkGeometryFlagsKHR(trianglesRef.geometryFlags), // flags
986 };
987 geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
988 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
989 nullptr, // pNext
990 VkFormat(trianglesRef.vertexFormat), // vertexFormat
991 {}, // vertexData
992 VkDeviceSize(trianglesRef.vertexStride), // vertexStride
993 trianglesRef.maxVertex, // maxVertex
994 VkIndexType(trianglesRef.indexType), // indexType
995 {}, // indexData
996 {}, // transformData
997 };
998 maxPrimitiveCounts[arrayIndex] = trianglesRef.indexCount / 3u; // triangles;
999 arrayIndex++;
1000 }
1001 for (const auto& aabbsRef : aabbs) {
1002 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
1003 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
1004 nullptr, // pNext
1005 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR, // geometryType
1006 {}, // geometry;
1007 0, // flags
1008 };
1009 geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
1010 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
1011 nullptr, // pNext
1012 {}, // data
1013 aabbsRef.stride, // stride
1014 };
1015 maxPrimitiveCounts[arrayIndex] = 1u;
1016 arrayIndex++;
1017 }
1018 for (const auto& instancesRef : instances) {
1019 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
1020 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
1021 nullptr, // pNext
1022 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR, // geometryType
1023 {}, // geometry;
1024 0, // flags
1025 };
1026 geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
1027 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
1028 nullptr, // pNext
1029 instancesRef.arrayOfPointers, // arrayOfPointers
1030 {}, // data
1031 };
1032 maxPrimitiveCounts[arrayIndex] = 1u;
1033 arrayIndex++;
1034 }
1035
1036 const VkAccelerationStructureBuildGeometryInfoKHR geometryInfoVk {
1037 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
1038 nullptr, // pNext
1039 VkAccelerationStructureTypeKHR(geometry.type), // type
1040 VkBuildAccelerationStructureFlagsKHR(geometry.flags), // flags
1041 VkBuildAccelerationStructureModeKHR(geometry.mode), // mode
1042 VK_NULL_HANDLE, // srcAccelerationStructure
1043 VK_NULL_HANDLE, // dstAccelerationStructure
1044 arrayIndex, // geometryCount
1045 geometryData.data(), // pGeometries
1046 nullptr, // ppGeometries
1047 {}, // scratchData
1048 };
1049
1050 VkAccelerationStructureBuildSizesInfoKHR buildSizesInfo {
1051 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // sType
1052 nullptr, // pNext
1053 0, // accelerationStructureSize
1054 0, // updateScratchSize
1055 0, // buildScratchSize
1056 };
1057 if ((arrayIndex > 0) && extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
1058 extFunctions_.vkGetAccelerationStructureBuildSizesKHR(device, // device
1059 VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, // buildType,
1060 &geometryInfoVk, // pBuildInfo
1061 maxPrimitiveCounts.data(), // pMaxPrimitiveCounts
1062 &buildSizesInfo); // pSizeInfo
1063 }
1064
1065 return AccelerationStructureBuildSizes {
1066 static_cast<uint32_t>(buildSizesInfo.accelerationStructureSize),
1067 static_cast<uint32_t>(buildSizesInfo.updateScratchSize),
1068 static_cast<uint32_t>(buildSizesInfo.buildScratchSize),
1069 };
1070 #else
1071 return AccelerationStructureBuildSizes { 0, 0, 0 };
1072 #endif
1073 }
1074
CreateDeviceSwapchain(const SwapchainCreateInfo & swapchainCreateInfo)1075 unique_ptr<Swapchain> DeviceVk::CreateDeviceSwapchain(const SwapchainCreateInfo& swapchainCreateInfo)
1076 {
1077 RENDER_CPU_PERF_SCOPE("CreateDeviceSwapchain", "");
1078 return make_unique<SwapchainVk>(*this, swapchainCreateInfo);
1079 }
1080
DestroyDeviceSwapchain()1081 void DeviceVk::DestroyDeviceSwapchain() {}
1082
GetPlatformGpuMemoryAllocator()1083 PlatformGpuMemoryAllocator* DeviceVk::GetPlatformGpuMemoryAllocator()
1084 {
1085 return platformGpuMemoryAllocator_.get();
1086 }
1087
GetValidGpuQueue(const GpuQueue & gpuQueue) const1088 GpuQueue DeviceVk::GetValidGpuQueue(const GpuQueue& gpuQueue) const
1089 {
1090 const auto getSpecificQueue = [](const uint32_t queueIndex, const GpuQueue::QueueType queueType,
1091 const vector<LowLevelGpuQueueVk>& specificQueues, const GpuQueue& defaultQueue) {
1092 const auto queueCount = (uint32_t)specificQueues.size();
1093 if (queueIndex < queueCount) {
1094 return GpuQueue { queueType, queueIndex };
1095 } else if (queueCount > 0) {
1096 return GpuQueue { queueType, 0 };
1097 }
1098 return defaultQueue;
1099 };
1100
1101 static GpuQueue defaultQueue { GpuQueue::QueueType::GRAPHICS, 0 };
1102 if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
1103 return getSpecificQueue(
1104 gpuQueue.index, GpuQueue::QueueType::COMPUTE, lowLevelGpuQueues_.computeQueues, defaultQueue);
1105 } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
1106 return getSpecificQueue(
1107 gpuQueue.index, GpuQueue::QueueType::GRAPHICS, lowLevelGpuQueues_.graphicsQueues, defaultQueue);
1108 } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
1109 return getSpecificQueue(
1110 gpuQueue.index, GpuQueue::QueueType::TRANSFER, lowLevelGpuQueues_.transferQueues, defaultQueue);
1111 } else {
1112 return defaultQueue;
1113 }
1114 }
1115
GetGpuQueueCount() const1116 uint32_t DeviceVk::GetGpuQueueCount() const
1117 {
1118 return gpuQueueCount_;
1119 }
1120
InitializePipelineCache(array_view<const uint8_t> initialData)1121 void DeviceVk::InitializePipelineCache(array_view<const uint8_t> initialData)
1122 {
1123 RENDER_CPU_PERF_SCOPE("InitializePipelineCache", "");
1124
1125 if (plat_.pipelineCache) {
1126 CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
1127 }
1128 struct CacheHeader {
1129 uint32_t bytes;
1130 uint32_t version;
1131 uint32_t vendorId;
1132 uint32_t deviceId;
1133 uint8_t pipelineCacheUUID[VK_UUID_SIZE];
1134 };
1135 if (initialData.data() && (initialData.size() > sizeof(CacheHeader))) {
1136 CacheHeader header;
1137 CloneData(&header, sizeof(header), initialData.data(), sizeof(header));
1138 const auto& props = plat_.physicalDeviceProperties.physicalDeviceProperties;
1139 if (header.version != VkPipelineCacheHeaderVersion::VK_PIPELINE_CACHE_HEADER_VERSION_ONE ||
1140 header.vendorId != props.vendorID || header.deviceId != props.deviceID ||
1141 memcmp(header.pipelineCacheUUID, props.pipelineCacheUUID, VK_UUID_SIZE) != 0) {
1142 initialData = {};
1143 }
1144 }
1145
1146 plat_.pipelineCache = CreateFunctionsVk::CreatePipelineCache(plat_.device, initialData);
1147 }
1148
GetPipelineCache() const1149 vector<uint8_t> DeviceVk::GetPipelineCache() const
1150 {
1151 vector<uint8_t> deviceData;
1152 if (plat_.pipelineCache) {
1153 size_t dataSize = 0u;
1154 if (auto result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, nullptr);
1155 result == VK_SUCCESS && dataSize) {
1156 deviceData.resize(dataSize);
1157 dataSize = deviceData.size();
1158 result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, deviceData.data());
1159 if (result == VK_SUCCESS) {
1160 deviceData.resize(dataSize);
1161 } else {
1162 deviceData.clear();
1163 }
1164 }
1165 }
1166 return deviceData;
1167 }
1168
GetGpuQueue(const GpuQueue & gpuQueue) const1169 LowLevelGpuQueueVk DeviceVk::GetGpuQueue(const GpuQueue& gpuQueue) const
1170 {
1171 // 1. tries to return the typed queue with given index
1172 // 2. tries to return the typed queue with an index 0
1173 // 3. returns the default queue
1174 const auto getSpecificQueue = [](const uint32_t queueIndex, const vector<LowLevelGpuQueueVk>& specificQueues,
1175 const LowLevelGpuQueueVk& defaultQueue) {
1176 const auto queueCount = (uint32_t)specificQueues.size();
1177 if (queueIndex < queueCount) {
1178 return specificQueues[queueIndex];
1179 } else if (queueCount > 0) {
1180 return specificQueues[0];
1181 }
1182 return defaultQueue;
1183 };
1184
1185 if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
1186 return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.computeQueues, lowLevelGpuQueues_.defaultQueue);
1187 } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
1188 return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.graphicsQueues, lowLevelGpuQueues_.defaultQueue);
1189 } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
1190 return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.transferQueues, lowLevelGpuQueues_.defaultQueue);
1191 } else {
1192 return lowLevelGpuQueues_.defaultQueue;
1193 }
1194 }
1195
GetPresentationGpuQueue() const1196 LowLevelGpuQueueVk DeviceVk::GetPresentationGpuQueue() const
1197 {
1198 // NOTE: expected presentation
1199 return GetGpuQueue(GpuQueue { GpuQueue::QueueType::GRAPHICS, 0 });
1200 }
1201
GetLowLevelGpuQueues() const1202 vector<LowLevelGpuQueueVk> DeviceVk::GetLowLevelGpuQueues() const
1203 {
1204 vector<LowLevelGpuQueueVk> gpuQueues;
1205 gpuQueues.reserve(gpuQueueCount_);
1206 gpuQueues.append(lowLevelGpuQueues_.computeQueues.begin(), lowLevelGpuQueues_.computeQueues.end());
1207 gpuQueues.append(lowLevelGpuQueues_.graphicsQueues.begin(), lowLevelGpuQueues_.graphicsQueues.end());
1208 gpuQueues.append(lowLevelGpuQueues_.transferQueues.begin(), lowLevelGpuQueues_.transferQueues.end());
1209 return gpuQueues;
1210 }
1211
WaitForIdle()1212 void DeviceVk::WaitForIdle()
1213 {
1214 RENDER_CPU_PERF_SCOPE("WaitForIdle", "");
1215 if (plat_.device) {
1216 if (!isRenderbackendRunning_) {
1217 PLUGIN_LOG_D("Device - WaitForIdle");
1218 vkDeviceWaitIdle(plat_.device); // device
1219 } else {
1220 PLUGIN_LOG_E("Device WaitForIdle can only called when render backend is not running");
1221 }
1222 }
1223 }
1224
Activate()1225 void DeviceVk::Activate() {}
1226
Deactivate()1227 void DeviceVk::Deactivate() {}
1228
AllowThreadedProcessing() const1229 bool DeviceVk::AllowThreadedProcessing() const
1230 {
1231 return true;
1232 }
1233
GetFeatureConfigurations() const1234 const DeviceVk::FeatureConfigurations& DeviceVk::GetFeatureConfigurations() const
1235 {
1236 return featureConfigurations_;
1237 }
1238
GetCommonDeviceExtensions() const1239 const DeviceVk::CommonDeviceExtensions& DeviceVk::GetCommonDeviceExtensions() const
1240 {
1241 return commonDeviceExtensions_;
1242 }
1243
GetPlatformDeviceExtensions() const1244 const PlatformDeviceExtensions& DeviceVk::GetPlatformDeviceExtensions() const
1245 {
1246 return platformDeviceExtensions_;
1247 }
1248
HasDeviceExtension(const string_view extensionName) const1249 bool DeviceVk::HasDeviceExtension(const string_view extensionName) const
1250 {
1251 return extensions_.contains(extensionName);
1252 }
1253
CreateDeviceVk(RenderContext & renderContext,DeviceCreateInfo const & createInfo)1254 unique_ptr<Device> CreateDeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo)
1255 {
1256 RENDER_CPU_PERF_SCOPE("CreateDeviceVk", "");
1257 return make_unique<DeviceVk>(renderContext, createInfo);
1258 }
1259
CreateGpuBuffer(const GpuBufferDesc & desc)1260 unique_ptr<GpuBuffer> DeviceVk::CreateGpuBuffer(const GpuBufferDesc& desc)
1261 {
1262 RENDER_CPU_PERF_SCOPE("CreateGpuBuffer", "");
1263 return make_unique<GpuBufferVk>(*this, desc);
1264 }
1265
CreateGpuBuffer(const GpuAccelerationStructureDesc & descAccel)1266 unique_ptr<GpuBuffer> DeviceVk::CreateGpuBuffer(const GpuAccelerationStructureDesc& descAccel)
1267 {
1268 RENDER_CPU_PERF_SCOPE("CreateGpuBuffer", "");
1269 return make_unique<GpuBufferVk>(*this, descAccel);
1270 }
1271
CreateGpuImage(const GpuImageDesc & desc)1272 unique_ptr<GpuImage> DeviceVk::CreateGpuImage(const GpuImageDesc& desc)
1273 {
1274 RENDER_CPU_PERF_SCOPE("CreateGpuImage", "");
1275 return make_unique<GpuImageVk>(*this, desc);
1276 }
1277
CreateGpuImageView(const GpuImageDesc & desc,const GpuImagePlatformData & platformData,const uintptr_t hwBuffer)1278 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
1279 const GpuImageDesc& desc, const GpuImagePlatformData& platformData, const uintptr_t hwBuffer)
1280 {
1281 RENDER_CPU_PERF_SCOPE("CreateGpuImageView", "");
1282 return make_unique<GpuImageVk>(*this, desc, platformData, hwBuffer);
1283 }
1284
CreateGpuImageView(const GpuImageDesc & desc,const GpuImagePlatformData & platformData)1285 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(const GpuImageDesc& desc, const GpuImagePlatformData& platformData)
1286 {
1287 RENDER_CPU_PERF_SCOPE("CreateGpuImageView", "");
1288 return CreateGpuImageView(desc, platformData, 0);
1289 }
1290
CreateGpuImageViews(const Swapchain & swapchain)1291 vector<unique_ptr<GpuImage>> DeviceVk::CreateGpuImageViews(const Swapchain& swapchain)
1292 {
1293 RENDER_CPU_PERF_SCOPE("CreateGpuImageViews", "");
1294 const GpuImageDesc& desc = swapchain.GetDesc();
1295 const auto& swapchainPlat = static_cast<const SwapchainVk&>(swapchain).GetPlatformData();
1296
1297 vector<unique_ptr<GpuImage>> gpuImages(swapchainPlat.swapchainImages.images.size());
1298 for (size_t idx = 0; idx < gpuImages.size(); ++idx) {
1299 GpuImagePlatformDataVk gpuImagePlat;
1300 gpuImagePlat.image = swapchainPlat.swapchainImages.images[idx];
1301 gpuImagePlat.imageView = swapchainPlat.swapchainImages.imageViews[idx];
1302 gpuImages[idx] = this->CreateGpuImageView(desc, gpuImagePlat);
1303 }
1304 return gpuImages;
1305 }
1306
CreateGpuImageView(const GpuImageDesc & desc,const BackendSpecificImageDesc & platformData)1307 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
1308 const GpuImageDesc& desc, const BackendSpecificImageDesc& platformData)
1309 {
1310 RENDER_CPU_PERF_SCOPE("CreateGpuImageView", "");
1311 const auto& imageDesc = (const ImageDescVk&)platformData;
1312 GpuImagePlatformDataVk platData;
1313 platData.image = imageDesc.image;
1314 platData.imageView = imageDesc.imageView;
1315 return CreateGpuImageView(desc, platData, imageDesc.platformHwBuffer);
1316 }
1317
CreateGpuSampler(const GpuSamplerDesc & desc)1318 unique_ptr<GpuSampler> DeviceVk::CreateGpuSampler(const GpuSamplerDesc& desc)
1319 {
1320 RENDER_CPU_PERF_SCOPE("CreateGpuSampler", "");
1321 return make_unique<GpuSamplerVk>(*this, desc);
1322 }
1323
CreateRenderFrameSync()1324 unique_ptr<RenderFrameSync> DeviceVk::CreateRenderFrameSync()
1325 {
1326 RENDER_CPU_PERF_SCOPE("CreateRenderFrameSync", "");
1327 return make_unique<RenderFrameSyncVk>(*this);
1328 }
1329
CreateRenderBackend(GpuResourceManager & gpuResourceMgr,CORE_NS::ITaskQueue * const queue)1330 unique_ptr<RenderBackend> DeviceVk::CreateRenderBackend(
1331 GpuResourceManager& gpuResourceMgr, CORE_NS::ITaskQueue* const queue)
1332 {
1333 return make_unique<RenderBackendVk>(*this, gpuResourceMgr, queue);
1334 }
1335
CreateShaderModule(const ShaderModuleCreateInfo & data)1336 unique_ptr<ShaderModule> DeviceVk::CreateShaderModule(const ShaderModuleCreateInfo& data)
1337 {
1338 RENDER_CPU_PERF_SCOPE("CreateShaderModule", "");
1339 return make_unique<ShaderModuleVk>(*this, data);
1340 }
1341
CreateComputeShaderModule(const ShaderModuleCreateInfo & data)1342 unique_ptr<ShaderModule> DeviceVk::CreateComputeShaderModule(const ShaderModuleCreateInfo& data)
1343 {
1344 RENDER_CPU_PERF_SCOPE("CreateComputeShaderModule", "");
1345 return make_unique<ShaderModuleVk>(*this, data);
1346 }
1347
CreateGpuShaderProgram(const GpuShaderProgramCreateData & data)1348 unique_ptr<GpuShaderProgram> DeviceVk::CreateGpuShaderProgram(const GpuShaderProgramCreateData& data)
1349 {
1350 RENDER_CPU_PERF_SCOPE("CreateGpuShaderProgram", "");
1351 return make_unique<GpuShaderProgramVk>(data);
1352 }
1353
CreateGpuComputeProgram(const GpuComputeProgramCreateData & data)1354 unique_ptr<GpuComputeProgram> DeviceVk::CreateGpuComputeProgram(const GpuComputeProgramCreateData& data)
1355 {
1356 RENDER_CPU_PERF_SCOPE("CreateGpuComputeProgram", "");
1357 return make_unique<GpuComputeProgramVk>(data);
1358 }
1359
CreateNodeContextDescriptorSetManager()1360 unique_ptr<NodeContextDescriptorSetManager> DeviceVk::CreateNodeContextDescriptorSetManager()
1361 {
1362 return make_unique<NodeContextDescriptorSetManagerVk>(*this);
1363 }
1364
CreateNodeContextPoolManager(GpuResourceManager & gpuResourceMgr,const GpuQueue & gpuQueue)1365 unique_ptr<NodeContextPoolManager> DeviceVk::CreateNodeContextPoolManager(
1366 GpuResourceManager& gpuResourceMgr, const GpuQueue& gpuQueue)
1367 {
1368 return make_unique<NodeContextPoolManagerVk>(*this, gpuResourceMgr, gpuQueue);
1369 }
1370
CreateGraphicsPipelineStateObject(const GpuShaderProgram & gpuProgram,const GraphicsState & graphicsState,const PipelineLayout & pipelineLayout,const VertexInputDeclarationView & vertexInputDeclaration,const ShaderSpecializationConstantDataView & specializationConstants,const array_view<const DynamicStateEnum> dynamicStates,const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> & renderPassSubpassDescs,const uint32_t subpassIndex,const LowLevelRenderPassData * renderPassData,const LowLevelPipelineLayoutData * pipelineLayoutData)1371 unique_ptr<GraphicsPipelineStateObject> DeviceVk::CreateGraphicsPipelineStateObject(const GpuShaderProgram& gpuProgram,
1372 const GraphicsState& graphicsState, const PipelineLayout& pipelineLayout,
1373 const VertexInputDeclarationView& vertexInputDeclaration,
1374 const ShaderSpecializationConstantDataView& specializationConstants,
1375 const array_view<const DynamicStateEnum> dynamicStates, const RenderPassDesc& renderPassDesc,
1376 const array_view<const RenderPassSubpassDesc>& renderPassSubpassDescs, const uint32_t subpassIndex,
1377 const LowLevelRenderPassData* renderPassData, const LowLevelPipelineLayoutData* pipelineLayoutData)
1378 {
1379 RENDER_CPU_PERF_SCOPE("CreateGraphicsPipelineStateObject", "");
1380 PLUGIN_ASSERT(renderPassData);
1381 PLUGIN_ASSERT(pipelineLayoutData);
1382 return make_unique<GraphicsPipelineStateObjectVk>(*this, gpuProgram, graphicsState, pipelineLayout,
1383 vertexInputDeclaration, specializationConstants, dynamicStates, renderPassSubpassDescs, subpassIndex,
1384 *renderPassData, *pipelineLayoutData);
1385 }
1386
CreateComputePipelineStateObject(const GpuComputeProgram & gpuProgram,const PipelineLayout & pipelineLayout,const ShaderSpecializationConstantDataView & specializationConstants,const LowLevelPipelineLayoutData * pipelineLayoutData)1387 unique_ptr<ComputePipelineStateObject> DeviceVk::CreateComputePipelineStateObject(const GpuComputeProgram& gpuProgram,
1388 const PipelineLayout& pipelineLayout, const ShaderSpecializationConstantDataView& specializationConstants,
1389 const LowLevelPipelineLayoutData* pipelineLayoutData)
1390 {
1391 RENDER_CPU_PERF_SCOPE("CreateComputePipelineStateObject", "");
1392 PLUGIN_ASSERT(pipelineLayoutData);
1393 return make_unique<ComputePipelineStateObjectVk>(
1394 *this, gpuProgram, pipelineLayout, specializationConstants, *pipelineLayoutData);
1395 }
1396
CreateGpuSemaphore()1397 unique_ptr<GpuSemaphore> DeviceVk::CreateGpuSemaphore()
1398 {
1399 RENDER_CPU_PERF_SCOPE("CreateGpuSemaphore", "");
1400 return make_unique<GpuSemaphoreVk>(*this);
1401 }
1402
CreateGpuSemaphoreView(const uint64_t handle)1403 unique_ptr<GpuSemaphore> DeviceVk::CreateGpuSemaphoreView(const uint64_t handle)
1404 {
1405 RENDER_CPU_PERF_SCOPE("CreateGpuSemaphoreView", "");
1406 return make_unique<GpuSemaphoreVk>(*this, handle);
1407 }
1408
GetDebugFunctionUtilities() const1409 const DebugFunctionUtilitiesVk& DeviceVk::GetDebugFunctionUtilities() const
1410 {
1411 return debugFunctionUtilities_;
1412 }
1413
CreateDebugFunctions()1414 void DeviceVk::CreateDebugFunctions()
1415 {
1416 RENDER_CPU_PERF_SCOPE("CreateDebugFunctions", "");
1417 if (!plat_.device) {
1418 return;
1419 }
1420
1421 #if (RENDER_VULKAN_VALIDATION_ENABLED == 1)
1422 debugFunctionUtilities_.vkSetDebugUtilsObjectNameEXT =
1423 (PFN_vkSetDebugUtilsObjectNameEXT)(void*)vkGetDeviceProcAddr(plat_.device, "vkSetDebugUtilsObjectNameEXT");
1424 #endif
1425 #if (RENDER_DEBUG_MARKERS_ENABLED == 1) || (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1426 debugFunctionUtilities_.vkCmdBeginDebugUtilsLabelEXT =
1427 (PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdBeginDebugUtilsLabelEXT");
1428 debugFunctionUtilities_.vkCmdEndDebugUtilsLabelEXT =
1429 (PFN_vkCmdEndDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdEndDebugUtilsLabelEXT");
1430 #endif
1431 }
1432
GetExtFunctions() const1433 const DeviceVk::ExtFunctions& DeviceVk::GetExtFunctions() const
1434 {
1435 return extFunctions_;
1436 }
1437
GetPlatformExtFunctions() const1438 const PlatformExtFunctions& DeviceVk::GetPlatformExtFunctions() const
1439 {
1440 return platformExtFunctions_;
1441 }
1442
GetDefaultVulkanObjects() const1443 const DeviceVk::DefaultVulkanObjects& DeviceVk::GetDefaultVulkanObjects() const
1444 {
1445 return defaultVulkanObjects_;
1446 }
1447
CreateExtFunctions()1448 void DeviceVk::CreateExtFunctions()
1449 {
1450 RENDER_CPU_PERF_SCOPE("CreateExtFunctions", "");
1451 if (commonDeviceExtensions_.renderPass2) {
1452 extFunctions_.vkCreateRenderPass2KHR =
1453 (PFN_vkCreateRenderPass2KHR)(void*)vkGetInstanceProcAddr(plat_.instance, "vkCreateRenderPass2KHR");
1454 if (!extFunctions_.vkCreateRenderPass2KHR) {
1455 commonDeviceExtensions_.renderPass2 = false;
1456 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateRenderPass2KHR");
1457 }
1458 }
1459 if (commonDeviceExtensions_.getMemoryRequirements2) {
1460 extFunctions_.vkGetImageMemoryRequirements2 = (PFN_vkGetImageMemoryRequirements2)vkGetInstanceProcAddr(
1461 plat_.instance, "vkGetImageMemoryRequirements2KHR");
1462 if (!extFunctions_.vkGetImageMemoryRequirements2) {
1463 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetImageMemoryRequirements2");
1464 }
1465 }
1466 if (commonDeviceExtensions_.samplerYcbcrConversion) {
1467 GetYcbcrExtFunctions(plat_.instance, extFunctions_);
1468 }
1469 #if (RENDER_VULKAN_FSR_ENABLED == 1)
1470 if (commonDeviceExtensions_.fragmentShadingRate) {
1471 extFunctions_.vkCmdSetFragmentShadingRateKHR =
1472 (PFN_vkCmdSetFragmentShadingRateKHR)vkGetInstanceProcAddr(plat_.instance, "vkCmdSetFragmentShadingRateKHR");
1473 }
1474 #endif
1475
1476 #if (RENDER_VULKAN_RT_ENABLED == 1)
1477 extFunctions_.vkGetAccelerationStructureBuildSizesKHR =
1478 (PFN_vkGetAccelerationStructureBuildSizesKHR)vkGetInstanceProcAddr(
1479 plat_.instance, "vkGetAccelerationStructureBuildSizesKHR");
1480 if (!extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
1481 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureBuildSizesKHR");
1482 }
1483 extFunctions_.vkCmdBuildAccelerationStructuresKHR = (PFN_vkCmdBuildAccelerationStructuresKHR)vkGetInstanceProcAddr(
1484 plat_.instance, "vkCmdBuildAccelerationStructuresKHR");
1485 if (!extFunctions_.vkCmdBuildAccelerationStructuresKHR) {
1486 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCmdBuildAccelerationStructuresKHR");
1487 }
1488 extFunctions_.vkCreateAccelerationStructureKHR =
1489 (PFN_vkCreateAccelerationStructureKHR)vkGetInstanceProcAddr(plat_.instance, "vkCreateAccelerationStructureKHR");
1490 if (!extFunctions_.vkCreateAccelerationStructureKHR) {
1491 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateAccelerationStructureKHR");
1492 }
1493 extFunctions_.vkDestroyAccelerationStructureKHR = (PFN_vkDestroyAccelerationStructureKHR)vkGetInstanceProcAddr(
1494 plat_.instance, "vkDestroyAccelerationStructureKHR");
1495 if (!extFunctions_.vkDestroyAccelerationStructureKHR) {
1496 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroyAccelerationStructureKHR");
1497 }
1498 extFunctions_.vkGetAccelerationStructureDeviceAddressKHR =
1499 (PFN_vkGetAccelerationStructureDeviceAddressKHR)vkGetInstanceProcAddr(
1500 plat_.instance, "vkGetAccelerationStructureDeviceAddressKHR");
1501 if (!extFunctions_.vkGetAccelerationStructureDeviceAddressKHR) {
1502 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureDeviceAddressKHR");
1503 }
1504 #endif
1505 }
1506
LowLevelDeviceVk(DeviceVk & deviceVk)1507 LowLevelDeviceVk::LowLevelDeviceVk(DeviceVk& deviceVk)
1508 : deviceVk_(deviceVk), gpuResourceMgr_(static_cast<GpuResourceManager&>(deviceVk_.GetGpuResourceManager()))
1509 {}
1510
GetBackendType() const1511 DeviceBackendType LowLevelDeviceVk::GetBackendType() const
1512 {
1513 return DeviceBackendType::VULKAN;
1514 }
1515
GetPlatformDataVk() const1516 const DevicePlatformDataVk& LowLevelDeviceVk::GetPlatformDataVk() const
1517 {
1518 return deviceVk_.GetPlatformDataVk();
1519 }
1520
GetBuffer(RenderHandle handle) const1521 GpuBufferPlatformDataVk LowLevelDeviceVk::GetBuffer(RenderHandle handle) const
1522 {
1523 if (deviceVk_.GetLockResourceBackendAccess()) {
1524 auto* buffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(handle);
1525 if (buffer) {
1526 return buffer->GetPlatformData();
1527 }
1528 } else {
1529 PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1530 }
1531 return {};
1532 }
1533
GetImage(RenderHandle handle) const1534 GpuImagePlatformDataVk LowLevelDeviceVk::GetImage(RenderHandle handle) const
1535 {
1536 if (deviceVk_.GetLockResourceBackendAccess()) {
1537 auto* image = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
1538 if (image) {
1539 return image->GetPlatformData();
1540 }
1541 } else {
1542 PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1543 }
1544 return {};
1545 }
1546
GetSampler(RenderHandle handle) const1547 GpuSamplerPlatformDataVk LowLevelDeviceVk::GetSampler(RenderHandle handle) const
1548 {
1549 if (deviceVk_.GetLockResourceBackendAccess()) {
1550 auto* sampler = gpuResourceMgr_.GetSampler<GpuSamplerVk>(handle);
1551 if (sampler) {
1552 return sampler->GetPlatformData();
1553 }
1554 } else {
1555 PLUGIN_LOG_E("low level device methods can be only used within specific methods");
1556 }
1557 return {};
1558 }
1559 RENDER_END_NAMESPACE()
1560