• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "device_vk.h"
17 
18 #include <cinttypes>
19 #include <cstdint>
20 #include <vulkan/vulkan_core.h>
21 
22 #include <base/containers/vector.h>
23 #include <base/math/mathf.h>
24 #include <render/intf_render_context.h>
25 #include <render/namespace.h>
26 
27 #include "device/device.h"
28 #include "device/gpu_program_util.h"
29 #include "device/gpu_resource_manager.h"
30 #include "device/shader_manager.h"
31 #include "device/shader_module.h"
32 #include "perf/cpu_perf_scope.h"
33 #include "platform_vk.h"
34 #include "util/log.h"
35 #include "vulkan/create_functions_vk.h"
36 #include "vulkan/gpu_buffer_vk.h"
37 #include "vulkan/gpu_image_vk.h"
38 #include "vulkan/gpu_memory_allocator_vk.h"
39 #include "vulkan/gpu_program_vk.h"
40 #include "vulkan/gpu_sampler_vk.h"
41 #include "vulkan/gpu_semaphore_vk.h"
42 #include "vulkan/node_context_descriptor_set_manager_vk.h"
43 #include "vulkan/node_context_pool_manager_vk.h"
44 #include "vulkan/pipeline_state_object_vk.h"
45 #include "vulkan/render_backend_vk.h"
46 #include "vulkan/render_frame_sync_vk.h"
47 #include "vulkan/shader_module_vk.h"
48 #include "vulkan/swapchain_vk.h"
49 #include "vulkan/validate_vk.h"
50 
51 using namespace BASE_NS;
52 
53 RENDER_BEGIN_NAMESPACE()
54 namespace {
55 constexpr string_view DEVICE_EXTENSION_SWAPCHAIN { VK_KHR_SWAPCHAIN_EXTENSION_NAME };
56 
57 // promoted to 1.2, requires VK_KHR_create_renderpass2
58 constexpr string_view DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE { VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME };
59 constexpr string_view DEVICE_EXTENSION_CREATE_RENDERPASS2 { VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME };
60 
61 constexpr string_view DEVICE_EXTENSION_EXTERNAL_MEMORY { VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME };
62 constexpr string_view DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2 { VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME };
63 constexpr string_view DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME };
64 constexpr string_view DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN { VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME };
65 constexpr string_view DEVICE_EXTENSION_MULTIVIEW { VK_KHR_MULTIVIEW_EXTENSION_NAME };
66 constexpr string_view DEVICE_EXTENSION_MAINTENANCE4 = VK_KHR_MAINTENANCE_4_EXTENSION_NAME;
67 constexpr string_view DEVICE_EXTENSION_DESCRIPTOR_INDEXING = VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME;
68 
69 struct ChainWrapper {
70     void** ppNextFeatures { nullptr };
71     void** ppNextProperties { nullptr };
72 };
73 
74 struct PhysicalDeviceYcbcrStructsVk {
75     VkPhysicalDeviceSamplerYcbcrConversionFeatures ycbcrConversionFeatures {};
76 };
77 
78 #if (RENDER_VULKAN_FSR_ENABLED == 1)
79 struct PhysicalDeviceFragmentShadingRateStructsVk {
80     VkPhysicalDeviceFragmentShadingRateFeaturesKHR physicalDeviceFragmentShadingRateFeatures;
81     VkPhysicalDeviceFragmentShadingRatePropertiesKHR physicalDeviceFragmentShadingRateProperties;
82 };
83 #endif
84 
85 #if (RENDER_VULKAN_RT_ENABLED == 1)
86 struct PhysicalDeviceRayTracingStructsVk {
87     VkPhysicalDeviceBufferDeviceAddressFeatures physicalDeviceBufferDeviceAddressFeatures;
88     VkPhysicalDeviceRayTracingPipelineFeaturesKHR physicalDeviceRayTracingPipelineFeatures;
89     VkPhysicalDeviceAccelerationStructureFeaturesKHR physicalDeviceAccelerationStructureFeatures;
90     VkPhysicalDeviceRayQueryFeaturesKHR physicalDeviceRayQueryFeatures;
91 };
92 #endif
93 
94 struct PhysicalDeviceMultiviewStructsVk {
95     VkPhysicalDeviceMultiviewFeaturesKHR physicalDeviceMultiviewFeatures;
96     VkPhysicalDeviceMultiviewPropertiesKHR physicalDeviceMultiviewProperties;
97 };
98 
99 struct PhysicalDeviceDesciptorIndexingStructsVk {
100     VkPhysicalDeviceDescriptorIndexingFeatures physicalDeviceDescriptorIndexingFeatures;
101     VkPhysicalDeviceDescriptorIndexingProperties physicalDeviceDescriptorIndexingProperties;
102 };
103 
104 struct PhysicalDeviceMaintenance4Vk {
105     VkPhysicalDeviceMaintenance4Features maintenance4Features {};
106 };
107 
108 struct ChainObjects {
109     unique_ptr<PhysicalDeviceYcbcrStructsVk> ycbcr;
110 #if (RENDER_VULKAN_RT_ENABLED == 1)
111     unique_ptr<PhysicalDeviceRayTracingStructsVk> rt;
112 #endif
113 #if (RENDER_VULKAN_FSR_ENABLED == 1)
114     unique_ptr<PhysicalDeviceFragmentShadingRateStructsVk> fsr;
115 #endif
116     unique_ptr<PhysicalDeviceMultiviewStructsVk> mv;
117     unique_ptr<PhysicalDeviceDesciptorIndexingStructsVk> di;
118     unique_ptr<PhysicalDeviceMaintenance4Vk> maintenance4;
119 };
120 
121 // fragment shading rate
122 #if (RENDER_VULKAN_FSR_ENABLED == 1)
123 // VK_KHR_fragment_shading_rate, requires VK_KHR_create_renderpass2, requires VK_KHR_get_physical_device_properties2
124 static constexpr string_view DEVICE_EXTENSION_FRAGMENT_SHADING_RATE { VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME };
125 
GetPhysicalDeviceFragmentShadingRateStructs(ChainObjects & co,ChainWrapper & cw)126 void GetPhysicalDeviceFragmentShadingRateStructs(ChainObjects& co, ChainWrapper& cw)
127 {
128     co.fsr = make_unique<PhysicalDeviceFragmentShadingRateStructsVk>();
129     auto& fsr = co.fsr;
130     fsr->physicalDeviceFragmentShadingRateFeatures = {
131         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR, // sType
132         nullptr,                                                              // pNext
133         VK_FALSE,                                                             // pipelineFragmentShadingRate
134         VK_FALSE,                                                             // primitiveFragmentShadingRate
135         VK_FALSE,                                                             // attachmentFragmentShadingRate
136     };
137     *cw.ppNextFeatures = &fsr->physicalDeviceFragmentShadingRateFeatures;
138     cw.ppNextFeatures = &fsr->physicalDeviceFragmentShadingRateFeatures.pNext;
139 
140     fsr->physicalDeviceFragmentShadingRateProperties = {
141         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, // sType
142         nullptr,                                                                // pNext
143     };
144     *cw.ppNextProperties = &fsr->physicalDeviceFragmentShadingRateProperties;
145     cw.ppNextProperties = &fsr->physicalDeviceFragmentShadingRateProperties.pNext;
146 }
147 #endif
148 
GetPhysicalDeviceMultiviewFeaturesStructs(ChainObjects & co,ChainWrapper & cw)149 void GetPhysicalDeviceMultiviewFeaturesStructs(ChainObjects& co, ChainWrapper& cw)
150 {
151     co.mv = make_unique<PhysicalDeviceMultiviewStructsVk>();
152     auto& mv = co.mv;
153     mv->physicalDeviceMultiviewFeatures = {
154         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR, // sType
155         nullptr,                                                  // pNext
156         VK_FALSE,                                                 // multiview
157         VK_FALSE,                                                 // multiviewGeometryShader
158         VK_FALSE,                                                 // multiviewTessellationShader
159     };
160     *cw.ppNextFeatures = &mv->physicalDeviceMultiviewFeatures;
161     cw.ppNextFeatures = &mv->physicalDeviceMultiviewFeatures.pNext;
162 
163     mv->physicalDeviceMultiviewProperties = {
164         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR, // sType
165         nullptr,                                                    // pNext
166         0,                                                          // maxMultiviewViewCount
167         0,                                                          // maxMultiviewInstanceIndex
168     };
169     *cw.ppNextProperties = &mv->physicalDeviceMultiviewProperties;
170     cw.ppNextProperties = &mv->physicalDeviceMultiviewProperties.pNext;
171 }
172 
GetPhysicalDeviceDescriptorIndexingFeaturesStructs(ChainObjects & co,ChainWrapper & cw)173 void GetPhysicalDeviceDescriptorIndexingFeaturesStructs(ChainObjects& co, ChainWrapper& cw)
174 {
175     co.di = make_unique<PhysicalDeviceDesciptorIndexingStructsVk>();
176     auto& di = co.di;
177     di->physicalDeviceDescriptorIndexingFeatures = {
178         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES, // sType
179         nullptr,                                                        // pNext
180         VK_FALSE,                                                       // shaderInputAttachmentArrayDynamicIndexing
181         VK_FALSE,                                                       // shaderUniformTexelBufferArrayDynamicIndexing
182         VK_FALSE,                                                       // shaderStorageTexelBufferArrayDynamicIndexing
183         VK_FALSE,                                                       // shaderUniformBufferArrayNonUniformIndexing
184         VK_FALSE,                                                       // shaderSampledImageArrayNonUniformIndexing
185         VK_FALSE,                                                       // shaderStorageBufferArrayNonUniformIndexing
186         VK_FALSE,                                                       // shaderStorageImageArrayNonUniformIndexing
187         VK_FALSE,                                                       // shaderInputAttachmentArrayNonUniformIndexing
188         VK_FALSE, // shaderUniformTexelBufferArrayNonUniformIndexing
189         VK_FALSE, // shaderStorageTexelBufferArrayNonUniformIndexing
190         VK_FALSE, // descriptorBindingUniformBufferUpdateAfterBind
191         VK_FALSE, // descriptorBindingSampledImageUpdateAfterBind
192         VK_FALSE, // descriptorBindingStorageImageUpdateAfterBind
193         VK_FALSE, // descriptorBindingStorageBufferUpdateAfterBind
194         VK_FALSE, // descriptorBindingUniformTexelBufferUpdateAfterBind
195         VK_FALSE, // descriptorBindingStorageTexelBufferUpdateAfterBind
196         VK_FALSE, // descriptorBindingUpdateUnusedWhilePending
197         VK_FALSE, // descriptorBindingPartiallyBound
198         VK_FALSE, // descriptorBindingVariableDescriptorCount
199         VK_FALSE, // runtimeDescriptorArray
200     };
201     *cw.ppNextFeatures = &di->physicalDeviceDescriptorIndexingFeatures;
202     cw.ppNextFeatures = &di->physicalDeviceDescriptorIndexingFeatures.pNext;
203 
204     di->physicalDeviceDescriptorIndexingProperties = {
205         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES, // sType
206         nullptr,                                                          // pNext
207         0U,                                                               // maxUpdateAfterBindDescriptorsInAllPools
208         VK_FALSE, // shaderUniformBufferArrayNonUniformIndexingNative
209         VK_FALSE, // shaderSampledImageArrayNonUniformIndexingNative
210         VK_FALSE, // shaderStorageBufferArrayNonUniformIndexingNative
211         VK_FALSE, // shaderStorageImageArrayNonUniformIndexingNative
212         VK_FALSE, // shaderInputAttachmentArrayNonUniformIndexingNative
213         VK_FALSE, // robustBufferAccessUpdateAfterBind
214         VK_FALSE, // quadDivergentImplicitLod
215         0U,       // maxPerStageDescriptorUpdateAfterBindSamplers
216         0U,       // maxPerStageDescriptorUpdateAfterBindUniformBuffers
217         0U,       // maxPerStageDescriptorUpdateAfterBindStorageBuffers
218         0U,       // maxPerStageDescriptorUpdateAfterBindSampledImages
219         0U,       // maxPerStageDescriptorUpdateAfterBindStorageImages
220         0U,       // maxPerStageDescriptorUpdateAfterBindInputAttachments
221         0U,       // maxPerStageUpdateAfterBindResources
222         0U,       // maxDescriptorSetUpdateAfterBindSamplers
223         0U,       // maxDescriptorSetUpdateAfterBindUniformBuffers
224         0U,       // maxDescriptorSetUpdateAfterBindUniformBuffersDynamic
225         0U,       // maxDescriptorSetUpdateAfterBindStorageBuffers
226         0U,       // maxDescriptorSetUpdateAfterBindStorageBuffersDynamic
227         0U,       // maxDescriptorSetUpdateAfterBindSampledImages
228         0U,       // maxDescriptorSetUpdateAfterBindStorageImages
229         0U,       // maxDescriptorSetUpdateAfterBindInputAttachments
230     };
231     *cw.ppNextProperties = &di->physicalDeviceDescriptorIndexingProperties;
232     cw.ppNextProperties = &di->physicalDeviceDescriptorIndexingProperties.pNext;
233 }
234 
235 // ray-tracing
236 #if (RENDER_VULKAN_RT_ENABLED == 1)
237 static constexpr string_view DEVICE_EXTENSION_ACCELERATION_STRUCTURE { "VK_KHR_acceleration_structure" };
238 static constexpr string_view DEVICE_EXTENSION_RAY_QUERY { "VK_KHR_ray_query" };
239 static constexpr string_view DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS { "VK_KHR_deferred_host_operations" };
240 static constexpr string_view DEVICE_EXTENSION_RAY_TRACING_PIPELINE { "VK_KHR_ray_tracing_pipeline" };
241 static constexpr string_view DEVICE_EXTENSION_PIPELINE_LIBRARY { "VK_KHR_pipeline_library" };
242 
GetPhysicalDeviceRayTracingStructs(ChainObjects & co,ChainWrapper & cw)243 void GetPhysicalDeviceRayTracingStructs(ChainObjects& co, ChainWrapper& cw)
244 {
245     co.rt = make_unique<PhysicalDeviceRayTracingStructsVk>();
246     auto& rt = co.rt;
247     rt->physicalDeviceBufferDeviceAddressFeatures = {
248         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, // sType
249         nullptr,                                                          // pNext
250         VK_FALSE,                                                         // bufferDeviceAddress;
251         VK_FALSE,                                                         // bufferDeviceAddressCaptureReplay
252         VK_FALSE,                                                         // bufferDeviceAddressMultiDevice
253     };
254     rt->physicalDeviceRayTracingPipelineFeatures = {
255         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR, // sType
256         &rt->physicalDeviceBufferDeviceAddressFeatures,                      // pNext
257         VK_FALSE,                                                            // rayTracingPipeline;
258         VK_FALSE, // rayTracingPipelineShaderGroupHandleCaptureReplay;
259         VK_FALSE, // rayTracingPipelineShaderGroupHandleCaptureReplayMixed;
260         VK_FALSE, // rayTracingPipelineTraceRaysIndirect;
261         VK_FALSE, // rayTraversalPrimitiveCulling;
262     };
263     rt->physicalDeviceAccelerationStructureFeatures = {
264         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, // sType
265         &rt->physicalDeviceRayTracingPipelineFeatures,                         // pNext
266         VK_FALSE,                                                              // accelerationStructure;
267         VK_FALSE,                                                              // accelerationStructureCaptureReplay
268         VK_FALSE,                                                              // accelerationStructureIndirectBuild
269         VK_FALSE,                                                              // accelerationStructureHostCommands
270         VK_FALSE, // descriptorBindingAccelerationStructureUpdateAfterBind
271     };
272     rt->physicalDeviceRayQueryFeatures = {
273         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, // sType
274         &rt->physicalDeviceAccelerationStructureFeatures,         // pNext
275         true,                                                     // rayQuery
276     };
277 
278     *cw.ppNextFeatures = &rt->physicalDeviceRayQueryFeatures;
279     cw.ppNextFeatures = &rt->physicalDeviceBufferDeviceAddressFeatures.pNext;
280 }
281 #endif
282 
GetPhysicalDeviceYcbcrStructs(ChainObjects & co,ChainWrapper & cw)283 void GetPhysicalDeviceYcbcrStructs(ChainObjects& co, ChainWrapper& cw)
284 {
285     co.ycbcr = make_unique<PhysicalDeviceYcbcrStructsVk>();
286     auto& ycbcr = co.ycbcr;
287     ycbcr->ycbcrConversionFeatures = {
288         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES, // sType
289         nullptr,                                                             // pNext
290         VK_FALSE,                                                            // samplerYcbcrConversion
291     };
292 
293     *cw.ppNextFeatures = &ycbcr->ycbcrConversionFeatures;
294     cw.ppNextFeatures = &ycbcr->ycbcrConversionFeatures.pNext;
295 }
296 
GetYcbcrExtFunctions(const VkInstance instance,DeviceVk::ExtFunctions & extFunctions)297 void GetYcbcrExtFunctions(const VkInstance instance, DeviceVk::ExtFunctions& extFunctions)
298 {
299     extFunctions.vkCreateSamplerYcbcrConversion =
300         (PFN_vkCreateSamplerYcbcrConversion)(void*)vkGetInstanceProcAddr(instance, "vkCreateSamplerYcbcrConversion");
301     if (!extFunctions.vkCreateSamplerYcbcrConversion) {
302         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateSamplerYcbcrConversion");
303     }
304     extFunctions.vkDestroySamplerYcbcrConversion =
305         (PFN_vkDestroySamplerYcbcrConversion)vkGetInstanceProcAddr(instance, "vkDestroySamplerYcbcrConversion");
306     if (!extFunctions.vkDestroySamplerYcbcrConversion) {
307         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroySamplerYcbcrConversion");
308     }
309 }
310 
GetPhysicalDeviceMaintenance4Structs(ChainObjects & co,ChainWrapper & cw)311 void GetPhysicalDeviceMaintenance4Structs(ChainObjects& co, ChainWrapper& cw)
312 {
313     co.maintenance4 = make_unique<PhysicalDeviceMaintenance4Vk>();
314     auto& m4 = co.maintenance4;
315     m4->maintenance4Features = {
316         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES, // sType
317         nullptr,                                                  // pNext
318         true,                                                     // maintenance4
319     };
320 
321     *cw.ppNextFeatures = &m4->maintenance4Features;
322     cw.ppNextFeatures = &m4->maintenance4Features.pNext;
323 }
324 
325 constexpr uint32_t MIN_ALLOCATION_BLOCK_SIZE { 4u * 1024u * 1024u };
326 constexpr uint32_t MAX_ALLOCATION_BLOCK_SIZE { 1024u * 1024u * 1024u };
327 constexpr const QueueProperties DEFAULT_QUEUE {
328     VK_QUEUE_GRAPHICS_BIT, // requiredFlags
329     1,                     // count
330     1.0f,                  // priority
331     false,                 // explicitFlags
332     true,                  // canPresent
333 };
334 
GetAllocatorCreateInfo(const BackendExtraVk * backendExtra)335 PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo GetAllocatorCreateInfo(const BackendExtraVk* backendExtra)
336 {
337     // create default pools
338     PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo createInfo;
339     uint32_t dynamicUboByteSize = 16u * 1024u * 1024u;
340     if (backendExtra) {
341         const auto& sizes = backendExtra->gpuMemoryAllocatorSizes;
342         if (sizes.defaultAllocationBlockSize != ~0u) {
343             createInfo.preferredLargeHeapBlockSize = Math::min(
344                 MAX_ALLOCATION_BLOCK_SIZE, Math::max(sizes.defaultAllocationBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
345         }
346         if (sizes.customAllocationDynamicUboBlockSize != ~0u) {
347             dynamicUboByteSize = Math::min(MAX_ALLOCATION_BLOCK_SIZE,
348                 Math::max(sizes.customAllocationDynamicUboBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
349         }
350     }
351 
352     // staging
353     {
354         GpuBufferDesc desc;
355         desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_SINGLE_SHOT_STAGING;
356         desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
357                                    MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
358         desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_SRC_BIT;
359         createInfo.customPools.push_back({
360             "STAGING_GPU_BUFFER",
361             PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
362             0u,
363             // if linear allocator is used, depending clients usage pattern, memory can be easily wasted.
364             false,
365             { move(desc) },
366         });
367     }
368     // dynamic uniform ring buffers
369     {
370         GpuBufferDesc desc;
371         desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER;
372         desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
373                                    MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
374         desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
375         createInfo.customPools.push_back({
376             "DYNAMIC_UNIFORM_GPU_BUFFER",
377             PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
378             dynamicUboByteSize,
379             false,
380             { move(desc) },
381         });
382     }
383 
384     return createInfo;
385 }
386 
DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,VkDebugUtilsMessageTypeFlagsEXT,const VkDebugUtilsMessengerCallbackDataEXT * pCallbackData,void *)387 VkBool32 VKAPI_PTR DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
388     VkDebugUtilsMessageTypeFlagsEXT /* messageTypes */, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData,
389     void* /* pUserData */)
390 {
391     if (pCallbackData && pCallbackData->pMessageIdName && pCallbackData->pMessage) {
392         if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
393             PLUGIN_LOG_E("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
394         } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
395                    (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)) {
396             PLUGIN_LOG_W("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
397         } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
398                    VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
399             PLUGIN_LOG_I("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
400         } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
401                    VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
402             PLUGIN_LOG_V("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
403         }
404     }
405 
406     // The application should always return VK_FALSE.
407     return VK_FALSE;
408 }
409 
DebugReportCallback(VkDebugReportFlagsEXT flags,VkDebugReportObjectTypeEXT,uint64_t,size_t,int32_t,const char *,const char * pMessage,void *)410 VkBool32 VKAPI_PTR DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT, uint64_t, size_t,
411     int32_t, const char*, const char* pMessage, void*)
412 {
413     if (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) {
414         PLUGIN_LOG_E("%s", pMessage);
415     } else if (flags & (VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT)) {
416         PLUGIN_LOG_W("%s", pMessage);
417     } else if (flags & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) {
418         PLUGIN_LOG_I("%s", pMessage);
419     } else if (flags & VK_DEBUG_REPORT_DEBUG_BIT_EXT) {
420         PLUGIN_LOG_D("%s", pMessage);
421     }
422     return VK_FALSE;
423 }
424 
EmplaceDeviceQueue(const VkDevice device,const LowLevelQueueInfo & aQueueInfo,vector<LowLevelGpuQueueVk> & aLowLevelQueues)425 void EmplaceDeviceQueue(
426     const VkDevice device, const LowLevelQueueInfo& aQueueInfo, vector<LowLevelGpuQueueVk>& aLowLevelQueues)
427 {
428     if (!device) {
429         return;
430     }
431 
432     for (uint32_t idx = 0; idx < aQueueInfo.queueCount; ++idx) {
433         VkQueue queue = VK_NULL_HANDLE;
434         vkGetDeviceQueue(device,         // device
435             aQueueInfo.queueFamilyIndex, // queueFamilyIndex
436             idx,                         // queueIndex
437             &queue);                     // pQueue
438         aLowLevelQueues.push_back(LowLevelGpuQueueVk { queue, aQueueInfo });
439     }
440 }
441 
CheckValidDepthFormats(const DevicePlatformDataVk & devicePlat,DevicePlatformInternalDataVk & dataInternal)442 void CheckValidDepthFormats(const DevicePlatformDataVk& devicePlat, DevicePlatformInternalDataVk& dataInternal)
443 {
444     constexpr uint32_t DEPTH_FORMAT_COUNT { 4 };
445     constexpr Format DEPTH_FORMATS[DEPTH_FORMAT_COUNT] = { BASE_FORMAT_D24_UNORM_S8_UINT, BASE_FORMAT_D32_SFLOAT,
446         BASE_FORMAT_D16_UNORM, BASE_FORMAT_X8_D24_UNORM_PACK32 };
447     for (const Format& format : DEPTH_FORMATS) {
448         VkFormatProperties formatProperties;
449         vkGetPhysicalDeviceFormatProperties(devicePlat.physicalDevice, // physicalDevice
450             (VkFormat)format,                                          // format
451             &formatProperties);                                        // pFormatProperties
452         const VkFormatFeatureFlags optimalTilingFeatureFlags = formatProperties.optimalTilingFeatures;
453         if (optimalTilingFeatureFlags & VkFormatFeatureFlagBits::VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
454             dataInternal.supportedDepthFormats.push_back(format);
455         }
456     }
457 }
458 
GetPreferredDeviceExtensions(const BackendExtraVk * backendExtra,DevicePlatformDataVk & plat)459 vector<string_view> GetPreferredDeviceExtensions(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)
460 {
461     vector<string_view> extensions { DEVICE_EXTENSION_SWAPCHAIN };
462     extensions.push_back(DEVICE_EXTENSION_CREATE_RENDERPASS2);
463     extensions.push_back(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE);
464     extensions.push_back(DEVICE_EXTENSION_MAINTENANCE4);
465     GetPlatformDeviceExtensions(extensions);
466 #if (RENDER_VULKAN_FSR_ENABLED == 1)
467     extensions.push_back(DEVICE_EXTENSION_FRAGMENT_SHADING_RATE);
468 #endif
469 #if (RENDER_VULKAN_RT_ENABLED == 1)
470     extensions.push_back(DEVICE_EXTENSION_ACCELERATION_STRUCTURE);
471     extensions.push_back(DEVICE_EXTENSION_RAY_TRACING_PIPELINE);
472     extensions.push_back(DEVICE_EXTENSION_RAY_QUERY);
473     extensions.push_back(DEVICE_EXTENSION_PIPELINE_LIBRARY);
474     extensions.push_back(DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS);
475 #endif
476     if (plat.deviceApiMinor >= 1) { // enable only for 1.1+
477         extensions.push_back(DEVICE_EXTENSION_MULTIVIEW);
478     }
479     if (plat.deviceApiMinor >= 2) { // enable only for 1.2+
480         extensions.push_back(DEVICE_EXTENSION_DESCRIPTOR_INDEXING);
481     }
482     if (backendExtra) {
483         for (const auto str : backendExtra->extensions.extensionNames) {
484             extensions.push_back(str);
485         }
486     }
487     return extensions;
488 }
489 
GetEnabledCommonDeviceExtensions(const unordered_map<string,uint32_t> & enabledDeviceExtensions)490 DeviceVk::CommonDeviceExtensions GetEnabledCommonDeviceExtensions(
491     const unordered_map<string, uint32_t>& enabledDeviceExtensions)
492 {
493     DeviceVk::CommonDeviceExtensions extensions;
494     extensions.swapchain = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SWAPCHAIN);
495     // renderpass2 required on 1.2, we only use renderpass 2 when we need depth stencil resolve
496     extensions.renderPass2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE) &&
497                              enabledDeviceExtensions.contains(DEVICE_EXTENSION_CREATE_RENDERPASS2);
498     extensions.externalMemory = enabledDeviceExtensions.contains(DEVICE_EXTENSION_EXTERNAL_MEMORY);
499     extensions.getMemoryRequirements2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2);
500     extensions.queueFamilyForeign = enabledDeviceExtensions.contains(DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN);
501     extensions.samplerYcbcrConversion = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION);
502     extensions.multiView = enabledDeviceExtensions.contains(DEVICE_EXTENSION_MULTIVIEW);
503     extensions.descriptorIndexing = enabledDeviceExtensions.contains(DEVICE_EXTENSION_DESCRIPTOR_INDEXING);
504 #if (RENDER_VULKAN_FSR_ENABLED == 1)
505     extensions.fragmentShadingRate = enabledDeviceExtensions.contains(DEVICE_EXTENSION_FRAGMENT_SHADING_RATE);
506 #endif
507 
508     return extensions;
509 }
510 
GetCommonDevicePropertiesFunc(const ChainObjects & co)511 CommonDeviceProperties GetCommonDevicePropertiesFunc(const ChainObjects& co)
512 {
513     CommonDeviceProperties cdp;
514 #if (RENDER_VULKAN_FSR_ENABLED == 1)
515     if (co.fsr) {
516         const auto& fsrVk = co.fsr->physicalDeviceFragmentShadingRateProperties;
517         cdp.fragmentShadingRateProperties.minFragmentShadingRateAttachmentTexelSize = {
518             fsrVk.minFragmentShadingRateAttachmentTexelSize.width,
519             fsrVk.minFragmentShadingRateAttachmentTexelSize.height
520         };
521         cdp.fragmentShadingRateProperties.maxFragmentShadingRateAttachmentTexelSize = {
522             fsrVk.maxFragmentShadingRateAttachmentTexelSize.width,
523             fsrVk.maxFragmentShadingRateAttachmentTexelSize.height
524         };
525         cdp.fragmentShadingRateProperties.maxFragmentSize = { fsrVk.maxFragmentSize.width,
526             fsrVk.maxFragmentSize.height };
527     }
528 #endif
529     return cdp;
530 }
531 
PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk * backendExtra,DevicePlatformDataVk & plat)532 void PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)
533 {
534     // enable all by default and then disable few
535     plat.enabledPhysicalDeviceFeatures = plat.physicalDeviceProperties.physicalDeviceFeatures;
536     // prepare feature disable for core engine
537     plat.enabledPhysicalDeviceFeatures.geometryShader = VK_FALSE;
538     plat.enabledPhysicalDeviceFeatures.tessellationShader = VK_FALSE;
539     plat.enabledPhysicalDeviceFeatures.sampleRateShading = VK_FALSE;
540     plat.enabledPhysicalDeviceFeatures.occlusionQueryPrecise = VK_FALSE;
541     plat.enabledPhysicalDeviceFeatures.pipelineStatisticsQuery = VK_FALSE;
542     plat.enabledPhysicalDeviceFeatures.shaderTessellationAndGeometryPointSize = VK_FALSE;
543     plat.enabledPhysicalDeviceFeatures.inheritedQueries = VK_FALSE;
544     if (backendExtra) {
545         // check for support and prepare enabling
546         if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
547             const size_t valueCount = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
548             const array_view<const VkBool32> supported(
549                 reinterpret_cast<VkBool32*>(&plat.physicalDeviceProperties.physicalDeviceFeatures), valueCount);
550             VkPhysicalDeviceFeatures* wantedFeatures =
551                 (&backendExtra->extensions.physicalDeviceFeaturesToEnable->features);
552             const array_view<const VkBool32> wanted(reinterpret_cast<VkBool32*>(wantedFeatures), valueCount);
553 
554             array_view<VkBool32> enabledPhysicalDeviceFeatures(
555                 reinterpret_cast<VkBool32*>(&plat.enabledPhysicalDeviceFeatures), valueCount);
556             for (size_t idx = 0; idx < valueCount; ++idx) {
557                 if (supported[idx] && wanted[idx]) {
558                     enabledPhysicalDeviceFeatures[idx] = VK_TRUE;
559                 } else if (wanted[idx]) {
560                     PLUGIN_LOG_W(
561                         "physical device feature not supported/enabled from idx: %u", static_cast<uint32_t>(idx));
562                 }
563             }
564         }
565     }
566 }
567 
FillDeviceFormatSupport(VkPhysicalDevice physicalDevice,const Format format)568 FormatProperties FillDeviceFormatSupport(VkPhysicalDevice physicalDevice, const Format format)
569 {
570     VkFormatProperties formatProperties;
571     vkGetPhysicalDeviceFormatProperties(physicalDevice, // physicalDevice
572         (VkFormat)format,                               // format
573         &formatProperties);                             // pFormatProperties
574     return FormatProperties {
575         (FormatFeatureFlags)formatProperties.linearTilingFeatures,
576         (FormatFeatureFlags)formatProperties.optimalTilingFeatures,
577         (FormatFeatureFlags)formatProperties.bufferFeatures,
578         GpuProgramUtil::FormatByteSize(format),
579     };
580 }
581 
FillFormatSupport(VkPhysicalDevice physicalDevice,vector<FormatProperties> & formats)582 void FillFormatSupport(VkPhysicalDevice physicalDevice, vector<FormatProperties>& formats)
583 {
584     const uint32_t fullSize = DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT +
585                               DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT;
586     formats.resize(fullSize);
587     for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT; ++idx) {
588         formats[idx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(idx));
589     }
590     // pre-build additional formats
591     for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT; ++idx) {
592         const uint32_t currIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_BASE_IDX;
593         PLUGIN_ASSERT(currIdx < static_cast<uint32_t>(formats.size()));
594         const uint32_t formatIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
595         formats[currIdx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(formatIdx));
596     }
597 }
598 
CreateDefaultVulkanObjects(VkDevice device,DeviceVk::DefaultVulkanObjects & dvo)599 void CreateDefaultVulkanObjects(VkDevice device, DeviceVk::DefaultVulkanObjects& dvo)
600 {
601     constexpr VkDescriptorSetLayoutCreateInfo EMPTY_LAYOUT_INFO {
602         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // sType
603         nullptr,                                             // pNext
604         0U,                                                  // flags
605         0U,                                                  // bindingCount
606         nullptr,                                             // pBindings
607     };
608 
609     PLUGIN_ASSERT(!dvo.emptyDescriptorSetLayout);
610     VALIDATE_VK_RESULT(vkCreateDescriptorSetLayout(device, // device
611         &EMPTY_LAYOUT_INFO,                                // pCreateInfo
612         nullptr,                                           // pAllocator
613         &dvo.emptyDescriptorSetLayout));                   // pSetLayout
614 }
DestroyDefaultVulkanObjects(VkDevice vkDevice,DeviceVk::DefaultVulkanObjects & dvo)615 void DestroyDefaultVulkanObjects(VkDevice vkDevice, DeviceVk::DefaultVulkanObjects& dvo)
616 {
617     PLUGIN_ASSERT(dvo.emptyDescriptorSetLayout);
618     vkDestroyDescriptorSetLayout(vkDevice, // device
619         dvo.emptyDescriptorSetLayout,      // descriptorSetLayout
620         nullptr);                          // pAllocator
621 }
622 } // namespace
623 
DeviceVk(RenderContext & renderContext,DeviceCreateInfo const & createInfo)624 DeviceVk::DeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo) : Device(renderContext, createInfo)
625 {
626     // assume instance and device will be created internally
627     ownInstanceAndDevice_ = true;
628 
629     const auto* backendExtra = static_cast<const BackendExtraVk*>(createInfo.backendConfiguration);
630     // update internal state based the optional backend configuration given by the client. the size of queuProperties
631     // will depend on the enableMultiQueue setting.
632     const auto queueProperties = CheckExternalConfig(backendExtra);
633 
634     // these check internally ownInstanceAndDevice_ and skip creation if provided by user
635     CreateInstance();
636     CreatePhysicalDevice();
637 
638     if ((!plat_.instance) || (!plat_.physicalDevice)) {
639         PLUGIN_LOG_E("Invalid device.");
640         SetDeviceStatus(false);
641         return;
642     }
643 
644     const auto availableQueues = CreateFunctionsVk::GetAvailableQueues(plat_.physicalDevice, queueProperties);
645 
646     // own device creation does a lot of work for figuring out what to create, but for external device
647     // CheckExternalConfig stored the enabled extensions and features, and we just need to check what is available.
648     if (ownInstanceAndDevice_) {
649         CreateDevice(backendExtra, availableQueues);
650     } else {
651         commonDeviceExtensions_ = GetEnabledCommonDeviceExtensions(extensions_);
652         platformDeviceExtensions_ = GetEnabledPlatformDeviceExtensions(extensions_);
653         // filling commonDeviceProperties_ isn't done, but at the moment that only contains fragment rate shading.
654         // should walk through BackendExtraVk::extensions::physicalDeviceFeaturesToEnable::pNext and see what's
655         // available.
656     }
657 
658     if (!plat_.device) {
659         PLUGIN_LOG_E("Invalid device.");
660         SetDeviceStatus(false);
661         return;
662     }
663 
664     CreateDebugFunctions();
665     CreateExtFunctions();
666     CreatePlatformExtFunctions();
667     SortAvailableQueues(availableQueues);
668 
669     CheckValidDepthFormats(plat_, platInternal_);
670     FillFormatSupport(plat_.physicalDevice, formatProperties_);
671 
672     PLUGIN_ASSERT_MSG(!lowLevelGpuQueues_.graphicsQueues.empty(), "default queue not initialized");
673     if (!lowLevelGpuQueues_.graphicsQueues.empty()) {
674         lowLevelGpuQueues_.defaultQueue = lowLevelGpuQueues_.graphicsQueues[0];
675     } else {
676         PLUGIN_LOG_E("default vulkan queue not initialized");
677     }
678 
679     gpuQueueCount_ =
680         static_cast<uint32_t>(lowLevelGpuQueues_.computeQueues.size() + lowLevelGpuQueues_.graphicsQueues.size() +
681                               lowLevelGpuQueues_.transferQueues.size());
682 
683     const PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo allocatorCreateInfo =
684         GetAllocatorCreateInfo(backendExtra);
685     platformGpuMemoryAllocator_ = make_unique<PlatformGpuMemoryAllocator>(
686         plat_.instance, plat_.physicalDevice, plat_.device, allocatorCreateInfo);
687 
688     if (queueProperties.size() > 1) {
689         PLUGIN_LOG_I("gpu queue count: %u", gpuQueueCount_);
690     }
691 
692     SetDeviceStatus(true);
693 
694     const GpuResourceManager::CreateInfo grmCreateInfo {
695         GpuResourceManager::GPU_RESOURCE_MANAGER_OPTIMIZE_STAGING_MEMORY,
696     };
697     gpuResourceMgr_ = make_unique<GpuResourceManager>(*this, grmCreateInfo);
698     shaderMgr_ = make_unique<ShaderManager>(*this);
699     globalDescriptorSetMgr_ = make_unique<DescriptorSetManagerVk>(*this);
700 
701     lowLevelDevice_ = make_unique<LowLevelDeviceVk>(*this);
702 
703     CreateDefaultVulkanObjects(plat_.device, defaultVulkanObjects_);
704 }
705 
~DeviceVk()706 DeviceVk::~DeviceVk()
707 {
708     WaitForIdle();
709 
710     DestroyDefaultVulkanObjects(plat_.device, defaultVulkanObjects_);
711 
712     globalDescriptorSetMgr_.reset();
713     // must release handles before taking down gpu resource manager.
714     swapchains_.clear();
715 
716     gpuResourceMgr_.reset();
717     shaderMgr_.reset();
718 
719     platformGpuMemoryAllocator_.reset();
720 
721     if (plat_.pipelineCache) {
722         CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
723     }
724 
725     if (ownInstanceAndDevice_) {
726         CreateFunctionsVk::DestroyDevice(plat_.device);
727         CreateFunctionsVk::DestroyDebugMessenger(plat_.instance, debugFunctionUtilities_.debugMessenger);
728         CreateFunctionsVk::DestroyDebugCallback(plat_.instance, debugFunctionUtilities_.debugCallback);
729         CreateFunctionsVk::DestroyInstance(plat_.instance);
730     }
731 }
732 
CreateInstance()733 void DeviceVk::CreateInstance()
734 {
735     RENDER_CPU_PERF_SCOPE("CreateInstance", "");
736     const auto instanceWrapper = (plat_.instance == VK_NULL_HANDLE)
737                                      ? CreateFunctionsVk::CreateInstance(VersionInfo { "core_renderer", 0, 1, 0 },
738                                                                          VersionInfo { "core_renderer_app", 0, 1, 0 })
739                                      : CreateFunctionsVk::GetWrapper(plat_.instance);
740 
741     plat_.instance = instanceWrapper.instance;
742     // update with physical device creation
743     plat_.deviceApiMajor = instanceWrapper.apiMajor;
744     plat_.deviceApiMinor = instanceWrapper.apiMinor;
745     if (instanceWrapper.debugUtilsSupported) {
746         debugFunctionUtilities_.debugMessenger =
747             CreateFunctionsVk::CreateDebugMessenger(plat_.instance, DebugMessengerCallback);
748     }
749     if (!debugFunctionUtilities_.debugMessenger && instanceWrapper.debugReportSupported) {
750         debugFunctionUtilities_.debugCallback =
751             CreateFunctionsVk::CreateDebugCallback(plat_.instance, DebugReportCallback);
752     }
753 
754     extFunctions_.vkAcquireNextImageKHR =
755         (PFN_vkAcquireNextImageKHR)(void*)vkGetInstanceProcAddr(plat_.instance, "vkAcquireNextImageKHR");
756     if ((plat_.deviceApiMajor >= 1) && (plat_.deviceApiMinor >= 1)) {
757         extFunctions_.vkGetPhysicalDeviceFeatures2 =
758             (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(plat_.instance, "vkGetPhysicalDeviceFeatures2");
759         extFunctions_.vkGetPhysicalDeviceProperties2 =
760             (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(plat_.instance, "vkGetPhysicalDeviceProperties2");
761     }
762 }
763 
CreatePhysicalDevice()764 void DeviceVk::CreatePhysicalDevice()
765 {
766     RENDER_CPU_PERF_SCOPE("CreatePhysicalDevice", "");
767     auto physicalDeviceWrapper = (plat_.physicalDevice == VK_NULL_HANDLE)
768                                      ? CreateFunctionsVk::CreatePhysicalDevice(plat_.instance, DEFAULT_QUEUE)
769                                      : CreateFunctionsVk::GetWrapper(plat_.physicalDevice);
770     const uint32_t physicalDeviceApiMajor =
771         VK_VERSION_MAJOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
772     const uint32_t physicalDeviceApiMinor =
773         VK_VERSION_MINOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
774     plat_.deviceApiMajor = Math::min(plat_.deviceApiMajor, physicalDeviceApiMajor);
775     plat_.deviceApiMinor = Math::min(plat_.deviceApiMinor, physicalDeviceApiMinor);
776     PLUGIN_LOG_D("device api version %u.%u", plat_.deviceApiMajor, plat_.deviceApiMinor);
777 
778     plat_.physicalDevice = physicalDeviceWrapper.physicalDevice;
779     plat_.physicalDeviceProperties = move(physicalDeviceWrapper.physicalDeviceProperties);
780     plat_.physicalDeviceExtensions = move(physicalDeviceWrapper.physicalDeviceExtensions);
781     const auto& memoryProperties = plat_.physicalDeviceProperties.physicalDeviceMemoryProperties;
782     deviceSharedMemoryPropertyFlags_ =
783         (memoryProperties.memoryTypeCount > 0) ? (MemoryPropertyFlags)memoryProperties.memoryTypes[0].propertyFlags : 0;
784     for (uint32_t idx = 1; idx < memoryProperties.memoryTypeCount; ++idx) {
785         const auto memoryPropertyFlags = (MemoryPropertyFlags)memoryProperties.memoryTypes[idx].propertyFlags;
786         // do not compare lazily allocated or protected memory blocks
787         if ((memoryPropertyFlags & (CORE_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | CORE_MEMORY_PROPERTY_PROTECTED_BIT)) ==
788             0) {
789             deviceSharedMemoryPropertyFlags_ &= memoryPropertyFlags;
790         }
791     }
792 }
793 
CreateDevice(const BackendExtraVk * backendExtra,const vector<LowLevelQueueInfo> & availableQueues)794 void DeviceVk::CreateDevice(const BackendExtraVk* backendExtra, const vector<LowLevelQueueInfo>& availableQueues)
795 {
796     RENDER_CPU_PERF_SCOPE("CreateDevice", "");
797     vector<string_view> preferredExtensions = GetPreferredDeviceExtensions(backendExtra, plat_);
798     PreparePhysicalDeviceFeaturesForEnabling(backendExtra, plat_);
799 
800     ChainWrapper chainWrapper;
801     ChainObjects chainObjects;
802 
803     VkPhysicalDeviceFeatures2* physicalDeviceFeatures2Ptr = nullptr;
804     VkPhysicalDeviceFeatures2 physicalDeviceFeatures2 {
805         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, // sType
806         nullptr,                                      // pNext
807         {},                                           // features
808     };
809     chainWrapper.ppNextFeatures = &physicalDeviceFeatures2.pNext;
810 
811     VkPhysicalDeviceProperties2 physicalDeviceProperties2 {
812         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, // sType
813         nullptr,                                        // pNext
814         {},                                             // properties
815     };
816     chainWrapper.ppNextProperties = &physicalDeviceProperties2.pNext;
817 
818     GetPhysicalDeviceYcbcrStructs(chainObjects, chainWrapper);
819 #if (RENDER_VULKAN_RT_ENABLED == 1)
820     GetPhysicalDeviceRayTracingStructs(chainObjects, chainWrapper);
821 #endif
822 #if (RENDER_VULKAN_FSR_ENABLED == 1)
823     if (CreateFunctionsVk::HasExtension(plat_.physicalDeviceExtensions, DEVICE_EXTENSION_FRAGMENT_SHADING_RATE)) {
824         GetPhysicalDeviceFragmentShadingRateStructs(chainObjects, chainWrapper);
825     }
826 #endif
827     if (plat_.deviceApiMinor >= 1) { // enable only for 1.1 + for now
828         GetPhysicalDeviceMultiviewFeaturesStructs(chainObjects, chainWrapper);
829     }
830     if (plat_.deviceApiMinor >= 2) { // enable only for 1.2 + for now
831         GetPhysicalDeviceDescriptorIndexingFeaturesStructs(chainObjects, chainWrapper);
832     }
833     if (CreateFunctionsVk::HasExtension(plat_.physicalDeviceExtensions, DEVICE_EXTENSION_MAINTENANCE4)) {
834         GetPhysicalDeviceMaintenance4Structs(chainObjects, chainWrapper);
835     }
836     if ((plat_.deviceApiMajor >= 1) && (plat_.deviceApiMinor >= 1)) {
837         // pipe user extension physical device features
838         if (backendExtra) {
839             if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
840                 *chainWrapper.ppNextFeatures = backendExtra->extensions.physicalDeviceFeaturesToEnable->pNext;
841             }
842         }
843         if (extFunctions_.vkGetPhysicalDeviceFeatures2) {
844             extFunctions_.vkGetPhysicalDeviceFeatures2(plat_.physicalDevice, &physicalDeviceFeatures2);
845         }
846         if (extFunctions_.vkGetPhysicalDeviceProperties2) {
847             extFunctions_.vkGetPhysicalDeviceProperties2(plat_.physicalDevice, &physicalDeviceProperties2);
848         }
849 
850         // vkGetPhysicalDeviceFeatures has already filled this and PreparePhysicalDeviceFeaturesForEnabling
851         // disabled/ enabled some features.
852         physicalDeviceFeatures2.features = plat_.enabledPhysicalDeviceFeatures;
853         physicalDeviceFeatures2Ptr = &physicalDeviceFeatures2;
854     }
855     const DeviceWrapper deviceWrapper =
856         CreateFunctionsVk::CreateDevice(plat_.instance, plat_.physicalDevice, plat_.physicalDeviceExtensions,
857             plat_.enabledPhysicalDeviceFeatures, physicalDeviceFeatures2Ptr, availableQueues, preferredExtensions);
858     plat_.device = deviceWrapper.device;
859     for (const auto& ref : deviceWrapper.extensions) {
860         extensions_[ref] = 1u;
861     }
862     commonDeviceExtensions_ = GetEnabledCommonDeviceExtensions(extensions_);
863     platformDeviceExtensions_ = GetEnabledPlatformDeviceExtensions(extensions_);
864     commonDeviceProperties_ = GetCommonDevicePropertiesFunc(chainObjects);
865 }
866 
CheckExternalConfig(const BackendExtraVk * backendConfiguration)867 vector<QueueProperties> DeviceVk::CheckExternalConfig(const BackendExtraVk* backendConfiguration)
868 {
869     vector<QueueProperties> queueProperties;
870     queueProperties.push_back(DEFAULT_QUEUE);
871 
872     if (!backendConfiguration) {
873         return queueProperties;
874     }
875 
876     const auto& extra = *backendConfiguration;
877     if (extra.enableMultiQueue) {
878         queueProperties.push_back(QueueProperties {
879             VK_QUEUE_COMPUTE_BIT, // requiredFlags
880             1,                    // count
881             1.0f,                 // priority
882             true,                 // explicitFlags
883             false,                // canPresent
884         });
885         PLUGIN_LOG_I("trying to enable gpu multi-queue, with queue count: %u", (uint32_t)queueProperties.size());
886     }
887 
888     if (extra.instance != VK_NULL_HANDLE) {
889         PLUGIN_LOG_D("trying to use application given vulkan instance, device, and physical device");
890         PLUGIN_ASSERT((extra.instance && extra.physicalDevice && extra.device));
891         plat_.instance = extra.instance;
892         plat_.physicalDevice = extra.physicalDevice;
893         plat_.device = extra.device;
894         if (extra.extensions.physicalDeviceFeaturesToEnable) {
895             plat_.enabledPhysicalDeviceFeatures = extra.extensions.physicalDeviceFeaturesToEnable->features;
896         }
897         ownInstanceAndDevice_ = false; // everything given from the application
898 
899         const auto myDevice = plat_.physicalDevice;
900         auto& myProperties = plat_.physicalDeviceProperties;
901         vkGetPhysicalDeviceProperties(myDevice, &myProperties.physicalDeviceProperties);
902         vkGetPhysicalDeviceFeatures(myDevice, &myProperties.physicalDeviceFeatures);
903         vkGetPhysicalDeviceMemoryProperties(myDevice, &myProperties.physicalDeviceMemoryProperties);
904 
905         for (const auto& extension : extra.extensions.extensionNames) {
906             extensions_[extension] = 1u;
907         }
908     }
909     return queueProperties;
910 }
911 
SortAvailableQueues(const vector<LowLevelQueueInfo> & availableQueues)912 void DeviceVk::SortAvailableQueues(const vector<LowLevelQueueInfo>& availableQueues)
913 {
914     for (const auto& ref : availableQueues) {
915         if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_COMPUTE_BIT) {
916             EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.computeQueues);
917         } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_GRAPHICS_BIT) {
918             EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.graphicsQueues);
919         } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_TRANSFER_BIT) {
920             EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.transferQueues);
921         }
922     }
923 }
924 
GetBackendType() const925 DeviceBackendType DeviceVk::GetBackendType() const
926 {
927     return DeviceBackendType::VULKAN;
928 }
929 
GetPlatformData() const930 const DevicePlatformData& DeviceVk::GetPlatformData() const
931 {
932     return plat_;
933 }
934 
GetPlatformDataVk() const935 const DevicePlatformDataVk& DeviceVk::GetPlatformDataVk() const
936 {
937     return plat_;
938 }
939 
GetPlatformInternalDataVk() const940 const DevicePlatformInternalDataVk& DeviceVk::GetPlatformInternalDataVk() const
941 {
942     return platInternal_;
943 }
944 
GetLowLevelDevice() const945 ILowLevelDevice& DeviceVk::GetLowLevelDevice() const
946 {
947     return *lowLevelDevice_;
948 }
949 
GetFormatProperties(const Format format) const950 FormatProperties DeviceVk::GetFormatProperties(const Format format) const
951 {
952     const auto formatSupportSize = static_cast<uint32_t>(formatProperties_.size());
953     const auto formatIdx = static_cast<uint32_t>(format);
954     if (formatIdx < formatSupportSize) {
955         return formatProperties_[formatIdx];
956     } else if ((formatIdx >= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER) &&
957                (formatIdx <= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_END_NUMBER)) {
958         const uint32_t currIdx = formatIdx - DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
959         PLUGIN_UNUSED(currIdx);
960         PLUGIN_ASSERT(currIdx < formatSupportSize);
961         return formatProperties_[currIdx];
962     }
963     return {};
964 }
965 
GetAccelerationStructureBuildSizes(const AccelerationStructureBuildGeometryInfo & geometry,BASE_NS::array_view<const AccelerationStructureGeometryTrianglesInfo> triangles,BASE_NS::array_view<const AccelerationStructureGeometryAabbsInfo> aabbs,BASE_NS::array_view<const AccelerationStructureGeometryInstancesInfo> instances) const966 AccelerationStructureBuildSizes DeviceVk::GetAccelerationStructureBuildSizes(
967     const AccelerationStructureBuildGeometryInfo& geometry,
968     BASE_NS::array_view<const AccelerationStructureGeometryTrianglesInfo> triangles,
969     BASE_NS::array_view<const AccelerationStructureGeometryAabbsInfo> aabbs,
970     BASE_NS::array_view<const AccelerationStructureGeometryInstancesInfo> instances) const
971 {
972 #if (RENDER_VULKAN_RT_ENABLED == 1)
973     const VkDevice device = plat_.device;
974 
975     const size_t arraySize = triangles.size() + aabbs.size() + instances.size();
976     vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
977     vector<uint32_t> maxPrimitiveCounts(arraySize);
978     uint32_t arrayIndex = 0;
979     for (const auto& trianglesRef : triangles) {
980         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
981             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
982             nullptr,                                               // pNext
983             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR,     // geometryType
984             {},                                                    // geometry;
985             VkGeometryFlagsKHR(trianglesRef.geometryFlags),        // flags
986         };
987         geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
988             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
989             nullptr,                                                              // pNext
990             VkFormat(trianglesRef.vertexFormat),                                  // vertexFormat
991             {},                                                                   // vertexData
992             VkDeviceSize(trianglesRef.vertexStride),                              // vertexStride
993             trianglesRef.maxVertex,                                               // maxVertex
994             VkIndexType(trianglesRef.indexType),                                  // indexType
995             {},                                                                   // indexData
996             {},                                                                   // transformData
997         };
998         maxPrimitiveCounts[arrayIndex] = trianglesRef.indexCount / 3u; // triangles;
999         arrayIndex++;
1000     }
1001     for (const auto& aabbsRef : aabbs) {
1002         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
1003             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
1004             nullptr,                                               // pNext
1005             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR,         // geometryType
1006             {},                                                    // geometry;
1007             0,                                                     // flags
1008         };
1009         geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
1010             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
1011             nullptr,                                                          // pNext
1012             {},                                                               // data
1013             aabbsRef.stride,                                                  // stride
1014         };
1015         maxPrimitiveCounts[arrayIndex] = 1u;
1016         arrayIndex++;
1017     }
1018     for (const auto& instancesRef : instances) {
1019         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
1020             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
1021             nullptr,                                               // pNext
1022             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR,     // geometryType
1023             {},                                                    // geometry;
1024             0,                                                     // flags
1025         };
1026         geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
1027             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
1028             nullptr,                                                              // pNext
1029             instancesRef.arrayOfPointers,                                         // arrayOfPointers
1030             {},                                                                   // data
1031         };
1032         maxPrimitiveCounts[arrayIndex] = 1u;
1033         arrayIndex++;
1034     }
1035 
1036     const VkAccelerationStructureBuildGeometryInfoKHR geometryInfoVk {
1037         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
1038         nullptr,                                                          // pNext
1039         VkAccelerationStructureTypeKHR(geometry.type),                    // type
1040         VkBuildAccelerationStructureFlagsKHR(geometry.flags),             // flags
1041         VkBuildAccelerationStructureModeKHR(geometry.mode),               // mode
1042         VK_NULL_HANDLE,                                                   // srcAccelerationStructure
1043         VK_NULL_HANDLE,                                                   // dstAccelerationStructure
1044         arrayIndex,                                                       // geometryCount
1045         geometryData.data(),                                              // pGeometries
1046         nullptr,                                                          // ppGeometries
1047         {},                                                               // scratchData
1048     };
1049 
1050     VkAccelerationStructureBuildSizesInfoKHR buildSizesInfo {
1051         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // sType
1052         nullptr,                                                       // pNext
1053         0,                                                             // accelerationStructureSize
1054         0,                                                             // updateScratchSize
1055         0,                                                             // buildScratchSize
1056     };
1057     if ((arrayIndex > 0) && extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
1058         extFunctions_.vkGetAccelerationStructureBuildSizesKHR(device, // device
1059             VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,          // buildType,
1060             &geometryInfoVk,                                          // pBuildInfo
1061             maxPrimitiveCounts.data(),                                // pMaxPrimitiveCounts
1062             &buildSizesInfo);                                         // pSizeInfo
1063     }
1064 
1065     return AccelerationStructureBuildSizes {
1066         static_cast<uint32_t>(buildSizesInfo.accelerationStructureSize),
1067         static_cast<uint32_t>(buildSizesInfo.updateScratchSize),
1068         static_cast<uint32_t>(buildSizesInfo.buildScratchSize),
1069     };
1070 #else
1071     return AccelerationStructureBuildSizes { 0, 0, 0 };
1072 #endif
1073 }
1074 
CreateDeviceSwapchain(const SwapchainCreateInfo & swapchainCreateInfo)1075 unique_ptr<Swapchain> DeviceVk::CreateDeviceSwapchain(const SwapchainCreateInfo& swapchainCreateInfo)
1076 {
1077     RENDER_CPU_PERF_SCOPE("CreateDeviceSwapchain", "");
1078     return make_unique<SwapchainVk>(*this, swapchainCreateInfo);
1079 }
1080 
DestroyDeviceSwapchain()1081 void DeviceVk::DestroyDeviceSwapchain() {}
1082 
GetPlatformGpuMemoryAllocator()1083 PlatformGpuMemoryAllocator* DeviceVk::GetPlatformGpuMemoryAllocator()
1084 {
1085     return platformGpuMemoryAllocator_.get();
1086 }
1087 
GetValidGpuQueue(const GpuQueue & gpuQueue) const1088 GpuQueue DeviceVk::GetValidGpuQueue(const GpuQueue& gpuQueue) const
1089 {
1090     const auto getSpecificQueue = [](const uint32_t queueIndex, const GpuQueue::QueueType queueType,
1091                                       const vector<LowLevelGpuQueueVk>& specificQueues, const GpuQueue& defaultQueue) {
1092         const auto queueCount = (uint32_t)specificQueues.size();
1093         if (queueIndex < queueCount) {
1094             return GpuQueue { queueType, queueIndex };
1095         } else if (queueCount > 0) {
1096             return GpuQueue { queueType, 0 };
1097         }
1098         return defaultQueue;
1099     };
1100 
1101     static GpuQueue defaultQueue { GpuQueue::QueueType::GRAPHICS, 0 };
1102     if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
1103         return getSpecificQueue(
1104             gpuQueue.index, GpuQueue::QueueType::COMPUTE, lowLevelGpuQueues_.computeQueues, defaultQueue);
1105     } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
1106         return getSpecificQueue(
1107             gpuQueue.index, GpuQueue::QueueType::GRAPHICS, lowLevelGpuQueues_.graphicsQueues, defaultQueue);
1108     } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
1109         return getSpecificQueue(
1110             gpuQueue.index, GpuQueue::QueueType::TRANSFER, lowLevelGpuQueues_.transferQueues, defaultQueue);
1111     } else {
1112         return defaultQueue;
1113     }
1114 }
1115 
GetGpuQueueCount() const1116 uint32_t DeviceVk::GetGpuQueueCount() const
1117 {
1118     return gpuQueueCount_;
1119 }
1120 
InitializePipelineCache(array_view<const uint8_t> initialData)1121 void DeviceVk::InitializePipelineCache(array_view<const uint8_t> initialData)
1122 {
1123     RENDER_CPU_PERF_SCOPE("InitializePipelineCache", "");
1124 
1125     if (plat_.pipelineCache) {
1126         CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
1127     }
1128     struct CacheHeader {
1129         uint32_t bytes;
1130         uint32_t version;
1131         uint32_t vendorId;
1132         uint32_t deviceId;
1133         uint8_t pipelineCacheUUID[VK_UUID_SIZE];
1134     };
1135     if (initialData.data() && (initialData.size() > sizeof(CacheHeader))) {
1136         CacheHeader header;
1137         CloneData(&header, sizeof(header), initialData.data(), sizeof(header));
1138         const auto& props = plat_.physicalDeviceProperties.physicalDeviceProperties;
1139         if (header.version != VkPipelineCacheHeaderVersion::VK_PIPELINE_CACHE_HEADER_VERSION_ONE ||
1140             header.vendorId != props.vendorID || header.deviceId != props.deviceID ||
1141             memcmp(header.pipelineCacheUUID, props.pipelineCacheUUID, VK_UUID_SIZE) != 0) {
1142             initialData = {};
1143         }
1144     }
1145 
1146     plat_.pipelineCache = CreateFunctionsVk::CreatePipelineCache(plat_.device, initialData);
1147 }
1148 
GetPipelineCache() const1149 vector<uint8_t> DeviceVk::GetPipelineCache() const
1150 {
1151     vector<uint8_t> deviceData;
1152     if (plat_.pipelineCache) {
1153         size_t dataSize = 0u;
1154         if (auto result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, nullptr);
1155             result == VK_SUCCESS && dataSize) {
1156             deviceData.resize(dataSize);
1157             dataSize = deviceData.size();
1158             result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, deviceData.data());
1159             if (result == VK_SUCCESS) {
1160                 deviceData.resize(dataSize);
1161             } else {
1162                 deviceData.clear();
1163             }
1164         }
1165     }
1166     return deviceData;
1167 }
1168 
GetGpuQueue(const GpuQueue & gpuQueue) const1169 LowLevelGpuQueueVk DeviceVk::GetGpuQueue(const GpuQueue& gpuQueue) const
1170 {
1171     // 1. tries to return the typed queue with given index
1172     // 2. tries to return the typed queue with an index 0
1173     // 3. returns the default queue
1174     const auto getSpecificQueue = [](const uint32_t queueIndex, const vector<LowLevelGpuQueueVk>& specificQueues,
1175                                       const LowLevelGpuQueueVk& defaultQueue) {
1176         const auto queueCount = (uint32_t)specificQueues.size();
1177         if (queueIndex < queueCount) {
1178             return specificQueues[queueIndex];
1179         } else if (queueCount > 0) {
1180             return specificQueues[0];
1181         }
1182         return defaultQueue;
1183     };
1184 
1185     if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
1186         return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.computeQueues, lowLevelGpuQueues_.defaultQueue);
1187     } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
1188         return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.graphicsQueues, lowLevelGpuQueues_.defaultQueue);
1189     } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
1190         return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.transferQueues, lowLevelGpuQueues_.defaultQueue);
1191     } else {
1192         return lowLevelGpuQueues_.defaultQueue;
1193     }
1194 }
1195 
GetPresentationGpuQueue() const1196 LowLevelGpuQueueVk DeviceVk::GetPresentationGpuQueue() const
1197 {
1198     // NOTE: expected presentation
1199     return GetGpuQueue(GpuQueue { GpuQueue::QueueType::GRAPHICS, 0 });
1200 }
1201 
GetLowLevelGpuQueues() const1202 vector<LowLevelGpuQueueVk> DeviceVk::GetLowLevelGpuQueues() const
1203 {
1204     vector<LowLevelGpuQueueVk> gpuQueues;
1205     gpuQueues.reserve(gpuQueueCount_);
1206     gpuQueues.append(lowLevelGpuQueues_.computeQueues.begin(), lowLevelGpuQueues_.computeQueues.end());
1207     gpuQueues.append(lowLevelGpuQueues_.graphicsQueues.begin(), lowLevelGpuQueues_.graphicsQueues.end());
1208     gpuQueues.append(lowLevelGpuQueues_.transferQueues.begin(), lowLevelGpuQueues_.transferQueues.end());
1209     return gpuQueues;
1210 }
1211 
WaitForIdle()1212 void DeviceVk::WaitForIdle()
1213 {
1214     RENDER_CPU_PERF_SCOPE("WaitForIdle", "");
1215     if (plat_.device) {
1216         if (!isRenderbackendRunning_) {
1217             PLUGIN_LOG_D("Device - WaitForIdle");
1218             vkDeviceWaitIdle(plat_.device); // device
1219         } else {
1220             PLUGIN_LOG_E("Device WaitForIdle can only called when render backend is not running");
1221         }
1222     }
1223 }
1224 
Activate()1225 void DeviceVk::Activate() {}
1226 
Deactivate()1227 void DeviceVk::Deactivate() {}
1228 
AllowThreadedProcessing() const1229 bool DeviceVk::AllowThreadedProcessing() const
1230 {
1231     return true;
1232 }
1233 
GetFeatureConfigurations() const1234 const DeviceVk::FeatureConfigurations& DeviceVk::GetFeatureConfigurations() const
1235 {
1236     return featureConfigurations_;
1237 }
1238 
GetCommonDeviceExtensions() const1239 const DeviceVk::CommonDeviceExtensions& DeviceVk::GetCommonDeviceExtensions() const
1240 {
1241     return commonDeviceExtensions_;
1242 }
1243 
GetPlatformDeviceExtensions() const1244 const PlatformDeviceExtensions& DeviceVk::GetPlatformDeviceExtensions() const
1245 {
1246     return platformDeviceExtensions_;
1247 }
1248 
HasDeviceExtension(const string_view extensionName) const1249 bool DeviceVk::HasDeviceExtension(const string_view extensionName) const
1250 {
1251     return extensions_.contains(extensionName);
1252 }
1253 
CreateDeviceVk(RenderContext & renderContext,DeviceCreateInfo const & createInfo)1254 unique_ptr<Device> CreateDeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo)
1255 {
1256     RENDER_CPU_PERF_SCOPE("CreateDeviceVk", "");
1257     return make_unique<DeviceVk>(renderContext, createInfo);
1258 }
1259 
CreateGpuBuffer(const GpuBufferDesc & desc)1260 unique_ptr<GpuBuffer> DeviceVk::CreateGpuBuffer(const GpuBufferDesc& desc)
1261 {
1262     RENDER_CPU_PERF_SCOPE("CreateGpuBuffer", "");
1263     return make_unique<GpuBufferVk>(*this, desc);
1264 }
1265 
CreateGpuBuffer(const GpuAccelerationStructureDesc & descAccel)1266 unique_ptr<GpuBuffer> DeviceVk::CreateGpuBuffer(const GpuAccelerationStructureDesc& descAccel)
1267 {
1268     RENDER_CPU_PERF_SCOPE("CreateGpuBuffer", "");
1269     return make_unique<GpuBufferVk>(*this, descAccel);
1270 }
1271 
CreateGpuImage(const GpuImageDesc & desc)1272 unique_ptr<GpuImage> DeviceVk::CreateGpuImage(const GpuImageDesc& desc)
1273 {
1274     RENDER_CPU_PERF_SCOPE("CreateGpuImage", "");
1275     return make_unique<GpuImageVk>(*this, desc);
1276 }
1277 
CreateGpuImageView(const GpuImageDesc & desc,const GpuImagePlatformData & platformData,const uintptr_t hwBuffer)1278 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
1279     const GpuImageDesc& desc, const GpuImagePlatformData& platformData, const uintptr_t hwBuffer)
1280 {
1281     RENDER_CPU_PERF_SCOPE("CreateGpuImageView", "");
1282     return make_unique<GpuImageVk>(*this, desc, platformData, hwBuffer);
1283 }
1284 
CreateGpuImageView(const GpuImageDesc & desc,const GpuImagePlatformData & platformData)1285 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(const GpuImageDesc& desc, const GpuImagePlatformData& platformData)
1286 {
1287     RENDER_CPU_PERF_SCOPE("CreateGpuImageView", "");
1288     return CreateGpuImageView(desc, platformData, 0);
1289 }
1290 
CreateGpuImageViews(const Swapchain & swapchain)1291 vector<unique_ptr<GpuImage>> DeviceVk::CreateGpuImageViews(const Swapchain& swapchain)
1292 {
1293     RENDER_CPU_PERF_SCOPE("CreateGpuImageViews", "");
1294     const GpuImageDesc& desc = swapchain.GetDesc();
1295     const auto& swapchainPlat = static_cast<const SwapchainVk&>(swapchain).GetPlatformData();
1296 
1297     vector<unique_ptr<GpuImage>> gpuImages(swapchainPlat.swapchainImages.images.size());
1298     for (size_t idx = 0; idx < gpuImages.size(); ++idx) {
1299         GpuImagePlatformDataVk gpuImagePlat;
1300         gpuImagePlat.image = swapchainPlat.swapchainImages.images[idx];
1301         gpuImagePlat.imageView = swapchainPlat.swapchainImages.imageViews[idx];
1302         gpuImages[idx] = this->CreateGpuImageView(desc, gpuImagePlat);
1303     }
1304     return gpuImages;
1305 }
1306 
CreateGpuImageView(const GpuImageDesc & desc,const BackendSpecificImageDesc & platformData)1307 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
1308     const GpuImageDesc& desc, const BackendSpecificImageDesc& platformData)
1309 {
1310     RENDER_CPU_PERF_SCOPE("CreateGpuImageView", "");
1311     const auto& imageDesc = (const ImageDescVk&)platformData;
1312     GpuImagePlatformDataVk platData;
1313     platData.image = imageDesc.image;
1314     platData.imageView = imageDesc.imageView;
1315     return CreateGpuImageView(desc, platData, imageDesc.platformHwBuffer);
1316 }
1317 
CreateGpuSampler(const GpuSamplerDesc & desc)1318 unique_ptr<GpuSampler> DeviceVk::CreateGpuSampler(const GpuSamplerDesc& desc)
1319 {
1320     RENDER_CPU_PERF_SCOPE("CreateGpuSampler", "");
1321     return make_unique<GpuSamplerVk>(*this, desc);
1322 }
1323 
CreateRenderFrameSync()1324 unique_ptr<RenderFrameSync> DeviceVk::CreateRenderFrameSync()
1325 {
1326     RENDER_CPU_PERF_SCOPE("CreateRenderFrameSync", "");
1327     return make_unique<RenderFrameSyncVk>(*this);
1328 }
1329 
CreateRenderBackend(GpuResourceManager & gpuResourceMgr,CORE_NS::ITaskQueue * const queue)1330 unique_ptr<RenderBackend> DeviceVk::CreateRenderBackend(
1331     GpuResourceManager& gpuResourceMgr, CORE_NS::ITaskQueue* const queue)
1332 {
1333     return make_unique<RenderBackendVk>(*this, gpuResourceMgr, queue);
1334 }
1335 
CreateShaderModule(const ShaderModuleCreateInfo & data)1336 unique_ptr<ShaderModule> DeviceVk::CreateShaderModule(const ShaderModuleCreateInfo& data)
1337 {
1338     RENDER_CPU_PERF_SCOPE("CreateShaderModule", "");
1339     return make_unique<ShaderModuleVk>(*this, data);
1340 }
1341 
CreateComputeShaderModule(const ShaderModuleCreateInfo & data)1342 unique_ptr<ShaderModule> DeviceVk::CreateComputeShaderModule(const ShaderModuleCreateInfo& data)
1343 {
1344     RENDER_CPU_PERF_SCOPE("CreateComputeShaderModule", "");
1345     return make_unique<ShaderModuleVk>(*this, data);
1346 }
1347 
CreateGpuShaderProgram(const GpuShaderProgramCreateData & data)1348 unique_ptr<GpuShaderProgram> DeviceVk::CreateGpuShaderProgram(const GpuShaderProgramCreateData& data)
1349 {
1350     RENDER_CPU_PERF_SCOPE("CreateGpuShaderProgram", "");
1351     return make_unique<GpuShaderProgramVk>(data);
1352 }
1353 
CreateGpuComputeProgram(const GpuComputeProgramCreateData & data)1354 unique_ptr<GpuComputeProgram> DeviceVk::CreateGpuComputeProgram(const GpuComputeProgramCreateData& data)
1355 {
1356     RENDER_CPU_PERF_SCOPE("CreateGpuComputeProgram", "");
1357     return make_unique<GpuComputeProgramVk>(data);
1358 }
1359 
CreateNodeContextDescriptorSetManager()1360 unique_ptr<NodeContextDescriptorSetManager> DeviceVk::CreateNodeContextDescriptorSetManager()
1361 {
1362     return make_unique<NodeContextDescriptorSetManagerVk>(*this);
1363 }
1364 
CreateNodeContextPoolManager(GpuResourceManager & gpuResourceMgr,const GpuQueue & gpuQueue)1365 unique_ptr<NodeContextPoolManager> DeviceVk::CreateNodeContextPoolManager(
1366     GpuResourceManager& gpuResourceMgr, const GpuQueue& gpuQueue)
1367 {
1368     return make_unique<NodeContextPoolManagerVk>(*this, gpuResourceMgr, gpuQueue);
1369 }
1370 
CreateGraphicsPipelineStateObject(const GpuShaderProgram & gpuProgram,const GraphicsState & graphicsState,const PipelineLayout & pipelineLayout,const VertexInputDeclarationView & vertexInputDeclaration,const ShaderSpecializationConstantDataView & specializationConstants,const array_view<const DynamicStateEnum> dynamicStates,const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> & renderPassSubpassDescs,const uint32_t subpassIndex,const LowLevelRenderPassData * renderPassData,const LowLevelPipelineLayoutData * pipelineLayoutData)1371 unique_ptr<GraphicsPipelineStateObject> DeviceVk::CreateGraphicsPipelineStateObject(const GpuShaderProgram& gpuProgram,
1372     const GraphicsState& graphicsState, const PipelineLayout& pipelineLayout,
1373     const VertexInputDeclarationView& vertexInputDeclaration,
1374     const ShaderSpecializationConstantDataView& specializationConstants,
1375     const array_view<const DynamicStateEnum> dynamicStates, const RenderPassDesc& renderPassDesc,
1376     const array_view<const RenderPassSubpassDesc>& renderPassSubpassDescs, const uint32_t subpassIndex,
1377     const LowLevelRenderPassData* renderPassData, const LowLevelPipelineLayoutData* pipelineLayoutData)
1378 {
1379     RENDER_CPU_PERF_SCOPE("CreateGraphicsPipelineStateObject", "");
1380     PLUGIN_ASSERT(renderPassData);
1381     PLUGIN_ASSERT(pipelineLayoutData);
1382     return make_unique<GraphicsPipelineStateObjectVk>(*this, gpuProgram, graphicsState, pipelineLayout,
1383         vertexInputDeclaration, specializationConstants, dynamicStates, renderPassSubpassDescs, subpassIndex,
1384         *renderPassData, *pipelineLayoutData);
1385 }
1386 
CreateComputePipelineStateObject(const GpuComputeProgram & gpuProgram,const PipelineLayout & pipelineLayout,const ShaderSpecializationConstantDataView & specializationConstants,const LowLevelPipelineLayoutData * pipelineLayoutData)1387 unique_ptr<ComputePipelineStateObject> DeviceVk::CreateComputePipelineStateObject(const GpuComputeProgram& gpuProgram,
1388     const PipelineLayout& pipelineLayout, const ShaderSpecializationConstantDataView& specializationConstants,
1389     const LowLevelPipelineLayoutData* pipelineLayoutData)
1390 {
1391     RENDER_CPU_PERF_SCOPE("CreateComputePipelineStateObject", "");
1392     PLUGIN_ASSERT(pipelineLayoutData);
1393     return make_unique<ComputePipelineStateObjectVk>(
1394         *this, gpuProgram, pipelineLayout, specializationConstants, *pipelineLayoutData);
1395 }
1396 
CreateGpuSemaphore()1397 unique_ptr<GpuSemaphore> DeviceVk::CreateGpuSemaphore()
1398 {
1399     RENDER_CPU_PERF_SCOPE("CreateGpuSemaphore", "");
1400     return make_unique<GpuSemaphoreVk>(*this);
1401 }
1402 
CreateGpuSemaphoreView(const uint64_t handle)1403 unique_ptr<GpuSemaphore> DeviceVk::CreateGpuSemaphoreView(const uint64_t handle)
1404 {
1405     RENDER_CPU_PERF_SCOPE("CreateGpuSemaphoreView", "");
1406     return make_unique<GpuSemaphoreVk>(*this, handle);
1407 }
1408 
GetDebugFunctionUtilities() const1409 const DebugFunctionUtilitiesVk& DeviceVk::GetDebugFunctionUtilities() const
1410 {
1411     return debugFunctionUtilities_;
1412 }
1413 
CreateDebugFunctions()1414 void DeviceVk::CreateDebugFunctions()
1415 {
1416     RENDER_CPU_PERF_SCOPE("CreateDebugFunctions", "");
1417     if (!plat_.device) {
1418         return;
1419     }
1420 
1421 #if (RENDER_VULKAN_VALIDATION_ENABLED == 1)
1422     debugFunctionUtilities_.vkSetDebugUtilsObjectNameEXT =
1423         (PFN_vkSetDebugUtilsObjectNameEXT)(void*)vkGetDeviceProcAddr(plat_.device, "vkSetDebugUtilsObjectNameEXT");
1424 #endif
1425 #if (RENDER_DEBUG_MARKERS_ENABLED == 1) || (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1426     debugFunctionUtilities_.vkCmdBeginDebugUtilsLabelEXT =
1427         (PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdBeginDebugUtilsLabelEXT");
1428     debugFunctionUtilities_.vkCmdEndDebugUtilsLabelEXT =
1429         (PFN_vkCmdEndDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdEndDebugUtilsLabelEXT");
1430 #endif
1431 }
1432 
GetExtFunctions() const1433 const DeviceVk::ExtFunctions& DeviceVk::GetExtFunctions() const
1434 {
1435     return extFunctions_;
1436 }
1437 
GetPlatformExtFunctions() const1438 const PlatformExtFunctions& DeviceVk::GetPlatformExtFunctions() const
1439 {
1440     return platformExtFunctions_;
1441 }
1442 
GetDefaultVulkanObjects() const1443 const DeviceVk::DefaultVulkanObjects& DeviceVk::GetDefaultVulkanObjects() const
1444 {
1445     return defaultVulkanObjects_;
1446 }
1447 
CreateExtFunctions()1448 void DeviceVk::CreateExtFunctions()
1449 {
1450     RENDER_CPU_PERF_SCOPE("CreateExtFunctions", "");
1451     if (commonDeviceExtensions_.renderPass2) {
1452         extFunctions_.vkCreateRenderPass2KHR =
1453             (PFN_vkCreateRenderPass2KHR)(void*)vkGetInstanceProcAddr(plat_.instance, "vkCreateRenderPass2KHR");
1454         if (!extFunctions_.vkCreateRenderPass2KHR) {
1455             commonDeviceExtensions_.renderPass2 = false;
1456             PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateRenderPass2KHR");
1457         }
1458     }
1459     if (commonDeviceExtensions_.getMemoryRequirements2) {
1460         extFunctions_.vkGetImageMemoryRequirements2 = (PFN_vkGetImageMemoryRequirements2)vkGetInstanceProcAddr(
1461             plat_.instance, "vkGetImageMemoryRequirements2KHR");
1462         if (!extFunctions_.vkGetImageMemoryRequirements2) {
1463             PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetImageMemoryRequirements2");
1464         }
1465     }
1466     if (commonDeviceExtensions_.samplerYcbcrConversion) {
1467         GetYcbcrExtFunctions(plat_.instance, extFunctions_);
1468     }
1469 #if (RENDER_VULKAN_FSR_ENABLED == 1)
1470     if (commonDeviceExtensions_.fragmentShadingRate) {
1471         extFunctions_.vkCmdSetFragmentShadingRateKHR =
1472             (PFN_vkCmdSetFragmentShadingRateKHR)vkGetInstanceProcAddr(plat_.instance, "vkCmdSetFragmentShadingRateKHR");
1473     }
1474 #endif
1475 
1476 #if (RENDER_VULKAN_RT_ENABLED == 1)
1477     extFunctions_.vkGetAccelerationStructureBuildSizesKHR =
1478         (PFN_vkGetAccelerationStructureBuildSizesKHR)vkGetInstanceProcAddr(
1479             plat_.instance, "vkGetAccelerationStructureBuildSizesKHR");
1480     if (!extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
1481         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureBuildSizesKHR");
1482     }
1483     extFunctions_.vkCmdBuildAccelerationStructuresKHR = (PFN_vkCmdBuildAccelerationStructuresKHR)vkGetInstanceProcAddr(
1484         plat_.instance, "vkCmdBuildAccelerationStructuresKHR");
1485     if (!extFunctions_.vkCmdBuildAccelerationStructuresKHR) {
1486         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCmdBuildAccelerationStructuresKHR");
1487     }
1488     extFunctions_.vkCreateAccelerationStructureKHR =
1489         (PFN_vkCreateAccelerationStructureKHR)vkGetInstanceProcAddr(plat_.instance, "vkCreateAccelerationStructureKHR");
1490     if (!extFunctions_.vkCreateAccelerationStructureKHR) {
1491         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateAccelerationStructureKHR");
1492     }
1493     extFunctions_.vkDestroyAccelerationStructureKHR = (PFN_vkDestroyAccelerationStructureKHR)vkGetInstanceProcAddr(
1494         plat_.instance, "vkDestroyAccelerationStructureKHR");
1495     if (!extFunctions_.vkDestroyAccelerationStructureKHR) {
1496         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroyAccelerationStructureKHR");
1497     }
1498     extFunctions_.vkGetAccelerationStructureDeviceAddressKHR =
1499         (PFN_vkGetAccelerationStructureDeviceAddressKHR)vkGetInstanceProcAddr(
1500             plat_.instance, "vkGetAccelerationStructureDeviceAddressKHR");
1501     if (!extFunctions_.vkGetAccelerationStructureDeviceAddressKHR) {
1502         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureDeviceAddressKHR");
1503     }
1504 #endif
1505 }
1506 
LowLevelDeviceVk(DeviceVk & deviceVk)1507 LowLevelDeviceVk::LowLevelDeviceVk(DeviceVk& deviceVk)
1508     : deviceVk_(deviceVk), gpuResourceMgr_(static_cast<GpuResourceManager&>(deviceVk_.GetGpuResourceManager()))
1509 {}
1510 
GetBackendType() const1511 DeviceBackendType LowLevelDeviceVk::GetBackendType() const
1512 {
1513     return DeviceBackendType::VULKAN;
1514 }
1515 
GetPlatformDataVk() const1516 const DevicePlatformDataVk& LowLevelDeviceVk::GetPlatformDataVk() const
1517 {
1518     return deviceVk_.GetPlatformDataVk();
1519 }
1520 
GetBuffer(RenderHandle handle) const1521 GpuBufferPlatformDataVk LowLevelDeviceVk::GetBuffer(RenderHandle handle) const
1522 {
1523     if (deviceVk_.GetLockResourceBackendAccess()) {
1524         auto* buffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(handle);
1525         if (buffer) {
1526             return buffer->GetPlatformData();
1527         }
1528     } else {
1529         PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1530     }
1531     return {};
1532 }
1533 
GetImage(RenderHandle handle) const1534 GpuImagePlatformDataVk LowLevelDeviceVk::GetImage(RenderHandle handle) const
1535 {
1536     if (deviceVk_.GetLockResourceBackendAccess()) {
1537         auto* image = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
1538         if (image) {
1539             return image->GetPlatformData();
1540         }
1541     } else {
1542         PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1543     }
1544     return {};
1545 }
1546 
GetSampler(RenderHandle handle) const1547 GpuSamplerPlatformDataVk LowLevelDeviceVk::GetSampler(RenderHandle handle) const
1548 {
1549     if (deviceVk_.GetLockResourceBackendAccess()) {
1550         auto* sampler = gpuResourceMgr_.GetSampler<GpuSamplerVk>(handle);
1551         if (sampler) {
1552             return sampler->GetPlatformData();
1553         }
1554     } else {
1555         PLUGIN_LOG_E("low level device methods can be only used within specific methods");
1556     }
1557     return {};
1558 }
1559 RENDER_END_NAMESPACE()
1560