• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "device_vk.h"
17 
18 #include <algorithm>
19 #include <cinttypes>
20 #include <cstdint>
21 #include <vulkan/vulkan.h>
22 
23 #include <base/containers/vector.h>
24 #include <base/math/mathf.h>
25 #include <core/engine_info.h>
26 #include <render/intf_render_context.h>
27 #include <render/namespace.h>
28 
29 #include "device/device.h"
30 #include "device/gpu_program_util.h"
31 #include "device/gpu_resource_manager.h"
32 #include "device/shader_manager.h"
33 #include "device/shader_module.h"
34 #include "platform_vk.h"
35 #include "util/log.h"
36 #include "vulkan/create_functions_vk.h"
37 #include "vulkan/gpu_acceleration_structure_vk.h"
38 #include "vulkan/gpu_buffer_vk.h"
39 #include "vulkan/gpu_image_vk.h"
40 #include "vulkan/gpu_memory_allocator_vk.h"
41 #include "vulkan/gpu_program_vk.h"
42 #include "vulkan/gpu_query_vk.h"
43 #include "vulkan/gpu_sampler_vk.h"
44 #include "vulkan/node_context_descriptor_set_manager_vk.h"
45 #include "vulkan/node_context_pool_manager_vk.h"
46 #include "vulkan/pipeline_state_object_vk.h"
47 #include "vulkan/render_backend_vk.h"
48 #include "vulkan/render_frame_sync_vk.h"
49 #include "vulkan/shader_module_vk.h"
50 #include "vulkan/swapchain_vk.h"
51 #include "vulkan/validate_vk.h"
52 
53 using namespace BASE_NS;
54 
55 RENDER_BEGIN_NAMESPACE()
56 namespace {
57 static constexpr string_view DEVICE_EXTENSION_SWAPCHAIN { VK_KHR_SWAPCHAIN_EXTENSION_NAME };
58 
59 // promoted to 1.2, requires VK_KHR_create_renderpass2
60 static constexpr string_view DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE { VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME };
61 static constexpr string_view DEVICE_EXTENSION_CREATE_RENDERPASS2 { VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME };
62 
63 static constexpr string_view DEVICE_EXTENSION_EXTERNAL_MEMORY { VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME };
64 static constexpr string_view DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2 {
65     VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME
66 };
67 static constexpr string_view DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION {
68     VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME
69 };
70 static constexpr string_view DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN { VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME };
71 
GetYcbcrExtFunctions(const VkInstance instance,DeviceVk::ExtFunctions & extFunctions)72 void GetYcbcrExtFunctions(const VkInstance instance, DeviceVk::ExtFunctions& extFunctions)
73 {
74     extFunctions.vkCreateSamplerYcbcrConversion =
75         (PFN_vkCreateSamplerYcbcrConversion)vkGetInstanceProcAddr(instance, "vkCreateSamplerYcbcrConversion");
76     if (!extFunctions.vkCreateSamplerYcbcrConversion) {
77         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateSamplerYcbcrConversion");
78     }
79     extFunctions.vkDestroySamplerYcbcrConversion =
80         (PFN_vkDestroySamplerYcbcrConversion)vkGetInstanceProcAddr(instance, "vkDestroySamplerYcbcrConversion");
81     if (!extFunctions.vkDestroySamplerYcbcrConversion) {
82         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroySamplerYcbcrConversion");
83     }
84 }
85 
86 // ray-tracing
87 #if (RENDER_VULKAN_RT_ENABLED == 1)
88 static constexpr string_view DEVICE_EXTENSION_ACCELERATION_STRUCTURE { "VK_KHR_acceleration_structure" };
89 static constexpr string_view DEVICE_EXTENSION_RAY_QUERY { "VK_KHR_ray_query" };
90 static constexpr string_view DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS { "VK_KHR_deferred_host_operations" };
91 static constexpr string_view DEVICE_EXTENSION_RAY_TRACING_PIPELINE { "VK_KHR_ray_tracing_pipeline" };
92 static constexpr string_view DEVICE_EXTENSION_PIPELINE_LIBRARY { "VK_KHR_pipeline_library" };
93 #endif
94 
95 constexpr uint32_t MIN_ALLOCATION_BLOCK_SIZE { 4u * 1024u * 1024u };
96 constexpr uint32_t MAX_ALLOCATION_BLOCK_SIZE { 1024u * 1024u * 1024u };
97 static constexpr const QueueProperties DEFAULT_QUEUE {
98     VK_QUEUE_GRAPHICS_BIT, // requiredFlags
99     1,                     // count
100     1.0f,                  // priority
101     false,                 // explicitFlags
102     true,                  // canPresent
103 };
104 
GetAllocatorCreateInfo(const BackendExtraVk * backendExtra)105 PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo GetAllocatorCreateInfo(const BackendExtraVk* backendExtra)
106 {
107     // create default pools
108     PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo createInfo;
109     uint32_t dynamicUboByteSize = 16u * 1024u * 1024u;
110     if (backendExtra) {
111         const auto& sizes = backendExtra->gpuMemoryAllocatorSizes;
112         if (sizes.defaultAllocationBlockSize != ~0u) {
113             createInfo.preferredLargeHeapBlockSize = Math::min(
114                 MAX_ALLOCATION_BLOCK_SIZE, Math::max(sizes.defaultAllocationBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
115         }
116         if (sizes.customAllocationDynamicUboBlockSize != ~0u) {
117             dynamicUboByteSize = Math::min(MAX_ALLOCATION_BLOCK_SIZE,
118                 Math::max(sizes.customAllocationDynamicUboBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
119         }
120     }
121 
122     // staging
123     {
124         GpuBufferDesc desc;
125         desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_SINGLE_SHOT_STAGING;
126         desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
127                                    MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
128         desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_SRC_BIT;
129         createInfo.customPools.push_back({
130             "STAGING_GPU_BUFFER",
131             PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
132             0u,
133             // if linear allocator is used, depending clients usage pattern, memory can be easily wasted.
134             false,
135             { move(desc) },
136         });
137     }
138     // dynamic uniform ring buffers
139     {
140         GpuBufferDesc desc;
141         desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER;
142         desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
143                                    MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
144         desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
145         createInfo.customPools.push_back({
146             "DYNAMIC_UNIFORM_GPU_BUFFER",
147             PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
148             dynamicUboByteSize,
149             false,
150             { move(desc) },
151         });
152     }
153 
154     return createInfo;
155 }
156 
DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,VkDebugUtilsMessageTypeFlagsEXT messageTypes,const VkDebugUtilsMessengerCallbackDataEXT * pCallbackData,void * pUserData)157 VkBool32 VKAPI_PTR DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
158     VkDebugUtilsMessageTypeFlagsEXT messageTypes, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData,
159     void* pUserData)
160 {
161     if (pCallbackData && pCallbackData->pMessage) {
162         if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
163             PLUGIN_LOG_E("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
164         } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
165                    (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)) {
166             PLUGIN_LOG_W("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
167         } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
168                    VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
169             PLUGIN_LOG_I("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
170         } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
171                    VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
172             PLUGIN_LOG_V("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
173         }
174         PLUGIN_ASSERT_MSG(
175             ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) == 0,
176             "VALIDATION ERROR");
177     }
178 
179     // The application should always return VK_FALSE.
180     return VK_FALSE;
181 }
182 
DebugReportCallback(VkDebugReportFlagsEXT flags,VkDebugReportObjectTypeEXT,uint64_t,size_t,int32_t,const char *,const char * pMessage,void *)183 VkBool32 VKAPI_PTR DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT, uint64_t, size_t,
184     int32_t, const char*, const char* pMessage, void*)
185 {
186     if (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) {
187         PLUGIN_LOG_E("%s", pMessage);
188     } else if (flags & (VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT)) {
189         PLUGIN_LOG_W("%s", pMessage);
190     } else if (flags & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) {
191         PLUGIN_LOG_I("%s", pMessage);
192     } else if (flags & VK_DEBUG_REPORT_DEBUG_BIT_EXT) {
193         PLUGIN_LOG_D("%s", pMessage);
194     }
195     PLUGIN_ASSERT_MSG((flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) == 0, "VALIDATION ERROR");
196     return VK_TRUE;
197 }
198 
EmplaceDeviceQueue(const VkDevice device,const LowLevelQueueInfo & aQueueInfo,vector<LowLevelGpuQueueVk> & aLowLevelQueues)199 void EmplaceDeviceQueue(
200     const VkDevice device, const LowLevelQueueInfo& aQueueInfo, vector<LowLevelGpuQueueVk>& aLowLevelQueues)
201 {
202     for (uint32_t idx = 0; idx < aQueueInfo.queueCount; ++idx) {
203         VkQueue queue = VK_NULL_HANDLE;
204         vkGetDeviceQueue(device,         // device
205             aQueueInfo.queueFamilyIndex, // queueFamilyIndex
206             idx,                         // queueIndex
207             &queue);                     // pQueue
208         aLowLevelQueues.push_back(LowLevelGpuQueueVk { queue, aQueueInfo });
209     }
210 }
211 
CheckValidDepthFormats(const DevicePlatformDataVk & devicePlat,DevicePlatformInternalDataVk & dataInternal)212 void CheckValidDepthFormats(const DevicePlatformDataVk& devicePlat, DevicePlatformInternalDataVk& dataInternal)
213 {
214     constexpr uint32_t DEPTH_FORMAT_COUNT { 4 };
215     constexpr Format DEPTH_FORMATS[DEPTH_FORMAT_COUNT] = { BASE_FORMAT_D24_UNORM_S8_UINT, BASE_FORMAT_D32_SFLOAT,
216         BASE_FORMAT_D16_UNORM, BASE_FORMAT_X8_D24_UNORM_PACK32 };
217     for (uint32_t idx = 0; idx < DEPTH_FORMAT_COUNT; ++idx) {
218         VkFormatProperties formatProperties;
219         Format format = DEPTH_FORMATS[idx];
220         vkGetPhysicalDeviceFormatProperties(devicePlat.physicalDevice, // physicalDevice
221             (VkFormat)format,                                          // format
222             &formatProperties);                                        // pFormatProperties
223         const VkFormatFeatureFlags optimalTilingFeatureFlags = formatProperties.optimalTilingFeatures;
224         if (optimalTilingFeatureFlags & VkFormatFeatureFlagBits::VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
225             dataInternal.supportedDepthFormats.push_back(format);
226         }
227     }
228 }
229 
GetPreferredDeviceExtensions(const BackendExtraVk * backendExtra)230 vector<string_view> GetPreferredDeviceExtensions(const BackendExtraVk* backendExtra)
231 {
232     vector<string_view> extensions { DEVICE_EXTENSION_SWAPCHAIN };
233     extensions.push_back(DEVICE_EXTENSION_CREATE_RENDERPASS2);
234     extensions.push_back(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE);
235     GetPlatformDeviceExtensions(extensions);
236 #if (RENDER_VULKAN_RT_ENABLED == 1)
237     extensions.push_back(DEVICE_EXTENSION_ACCELERATION_STRUCTURE);
238     extensions.push_back(DEVICE_EXTENSION_RAY_TRACING_PIPELINE);
239     extensions.push_back(DEVICE_EXTENSION_RAY_QUERY);
240     extensions.push_back(DEVICE_EXTENSION_PIPELINE_LIBRARY);
241     extensions.push_back(DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS);
242 #endif
243     if (backendExtra) {
244         for (const auto str : backendExtra->extensions.extensionNames) {
245             extensions.push_back(str);
246         }
247     }
248     return extensions;
249 }
250 
GetEnabledCommonDeviceExtensions(const unordered_map<string,uint32_t> & enabledDeviceExtensions)251 DeviceVk::CommonDeviceExtensions GetEnabledCommonDeviceExtensions(
252     const unordered_map<string, uint32_t>& enabledDeviceExtensions)
253 {
254     DeviceVk::CommonDeviceExtensions extensions;
255     extensions.swapchain = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SWAPCHAIN);
256     // renderpass2 required on 1.2, we only use renderpass 2 when we need depth stencil resolve
257     extensions.renderPass2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE) &&
258                              enabledDeviceExtensions.contains(DEVICE_EXTENSION_CREATE_RENDERPASS2);
259     extensions.externalMemory = enabledDeviceExtensions.contains(DEVICE_EXTENSION_EXTERNAL_MEMORY);
260     extensions.getMemoryRequirements2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2);
261     extensions.queueFamilyForeign = enabledDeviceExtensions.contains(DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN);
262     extensions.samplerYcbcrConversion = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION);
263 
264     return extensions;
265 }
266 
PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk * backendExtra,DevicePlatformDataVk & plat)267 void PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)
268 {
269     // enable all by default and then disable few
270     plat.enabledPhysicalDeviceFeatures = plat.physicalDeviceProperties.physicalDeviceFeatures;
271     // prepare feature disable for core engine
272     plat.enabledPhysicalDeviceFeatures.geometryShader = VK_FALSE;
273     plat.enabledPhysicalDeviceFeatures.tessellationShader = VK_FALSE;
274     plat.enabledPhysicalDeviceFeatures.sampleRateShading = VK_FALSE;
275     plat.enabledPhysicalDeviceFeatures.occlusionQueryPrecise = VK_FALSE;
276     plat.enabledPhysicalDeviceFeatures.pipelineStatisticsQuery = VK_FALSE;
277     plat.enabledPhysicalDeviceFeatures.shaderTessellationAndGeometryPointSize = VK_FALSE;
278     plat.enabledPhysicalDeviceFeatures.inheritedQueries = VK_FALSE;
279     if (backendExtra) {
280         // check for support and prepare enabling
281         if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
282             const size_t valueCount = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
283             const array_view<const VkBool32> supported(
284                 reinterpret_cast<VkBool32*>(&plat.physicalDeviceProperties.physicalDeviceFeatures), valueCount);
285             VkPhysicalDeviceFeatures* wantedFeatures =
286                 (&backendExtra->extensions.physicalDeviceFeaturesToEnable->features);
287             const array_view<const VkBool32> wanted(reinterpret_cast<VkBool32*>(wantedFeatures), valueCount);
288 
289             array_view<VkBool32> enabledPhysicalDeviceFeatures(
290                 reinterpret_cast<VkBool32*>(&plat.enabledPhysicalDeviceFeatures), valueCount);
291             for (size_t idx = 0; idx < valueCount; ++idx) {
292                 if (supported[idx] && wanted[idx]) {
293                     enabledPhysicalDeviceFeatures[idx] = VK_TRUE;
294                 } else if (wanted[idx]) {
295                     PLUGIN_LOG_W(
296                         "physical device feature not supported/enabled from idx: %u", static_cast<uint32_t>(idx));
297                 }
298             }
299         }
300     }
301 }
302 
FillDeviceFormatSupport(VkPhysicalDevice physicalDevice,const Format format)303 FormatProperties FillDeviceFormatSupport(VkPhysicalDevice physicalDevice, const Format format)
304 {
305     VkFormatProperties formatProperties;
306     vkGetPhysicalDeviceFormatProperties(physicalDevice, // physicalDevice
307         (VkFormat)format,                               // format
308         &formatProperties);                             // pFormatProperties
309     return FormatProperties {
310         (FormatFeatureFlags)formatProperties.linearTilingFeatures,
311         (FormatFeatureFlags)formatProperties.optimalTilingFeatures,
312         (FormatFeatureFlags)formatProperties.bufferFeatures,
313         GpuProgramUtil::FormatByteSize(format),
314     };
315 }
316 
FillFormatSupport(VkPhysicalDevice physicalDevice,vector<FormatProperties> & formats)317 void FillFormatSupport(VkPhysicalDevice physicalDevice, vector<FormatProperties>& formats)
318 {
319     const uint32_t fullSize = DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT +
320                               DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT;
321     formats.resize(fullSize);
322     for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT; ++idx) {
323         formats[idx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(idx));
324     }
325     // pre-build additional formats
326     for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT; ++idx) {
327         const uint32_t currIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_BASE_IDX;
328         PLUGIN_ASSERT(currIdx < static_cast<uint32_t>(formats.size()));
329         const uint32_t formatIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
330         formats[currIdx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(formatIdx));
331     }
332 }
333 } // namespace
334 
DeviceVk(RenderContext & renderContext,DeviceCreateInfo const & createInfo)335 DeviceVk::DeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo) : Device(renderContext, createInfo)
336 {
337     // assume instance and device will be created internally
338     ownInstanceAndDevice_ = true;
339 
340     const BackendExtraVk* backendExtra = static_cast<const BackendExtraVk*>(createInfo.backendConfiguration);
341     // update internal state based the optional backend configuration given by the client. the size of queuProperties
342     // will depend on the enableMultiQueue setting.
343     const auto queueProperties = CheckExternalConfig(backendExtra);
344 
345     // client didn't give the vulkan intance so create own
346     if (ownInstanceAndDevice_) {
347         CreateInstanceAndPhysicalDevice();
348     }
349     const auto availableQueues = CreateFunctionsVk::GetAvailableQueues(plat_.physicalDevice, queueProperties);
350     if (ownInstanceAndDevice_) {
351         CreateDevice(backendExtra, availableQueues);
352         CreateDebugFunctions();
353     }
354     CreateExtFunctions();
355     CreatePlatformExtFunctions();
356     SortAvailableQueues(availableQueues);
357 
358     CheckValidDepthFormats(plat_, platInternal_);
359     FillFormatSupport(plat_.physicalDevice, formatProperties_);
360 
361     PLUGIN_ASSERT_MSG(!lowLevelGpuQueues_.graphicsQueues.empty(), "default queue not initialized");
362     if (!lowLevelGpuQueues_.graphicsQueues.empty()) {
363         lowLevelGpuQueues_.defaultQueue = lowLevelGpuQueues_.graphicsQueues[0];
364     } else {
365         PLUGIN_LOG_E("default vulkan queue not initialized");
366     }
367 
368     gpuQueueCount_ =
369         static_cast<uint32_t>(lowLevelGpuQueues_.computeQueues.size() + lowLevelGpuQueues_.graphicsQueues.size() +
370                               lowLevelGpuQueues_.transferQueues.size());
371 
372     const PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo allocatorCreateInfo =
373         GetAllocatorCreateInfo(backendExtra);
374     platformGpuMemoryAllocator_ = make_unique<PlatformGpuMemoryAllocator>(
375         plat_.instance, plat_.physicalDevice, plat_.device, allocatorCreateInfo);
376 
377     if (queueProperties.size() > 1) {
378         PLUGIN_LOG_I("gpu queue count: %u", gpuQueueCount_);
379     }
380 
381     SetDeviceStatus(true);
382 
383     const GpuResourceManager::CreateInfo grmCreateInfo {
384         GpuResourceManager::GPU_RESOURCE_MANAGER_OPTIMIZE_STAGING_MEMORY,
385     };
386     gpuResourceMgr_ = make_unique<GpuResourceManager>(*this, grmCreateInfo);
387     shaderMgr_ = make_unique<ShaderManager>(*this);
388 
389     lowLevelDevice_ = make_unique<LowLevelDeviceVk>(*this);
390 }
391 
~DeviceVk()392 DeviceVk::~DeviceVk()
393 {
394     WaitForIdle();
395 
396     gpuResourceMgr_.reset();
397     shaderMgr_.reset();
398 
399     platformGpuMemoryAllocator_.reset();
400     swapchain_.reset();
401 
402     if (plat_.pipelineCache) {
403         CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
404     }
405 
406     if (ownInstanceAndDevice_) {
407         CreateFunctionsVk::DestroyDevice(plat_.device);
408         CreateFunctionsVk::DestroyDebugMessenger(plat_.instance, debugFunctionUtilities_.debugMessenger);
409         CreateFunctionsVk::DestroyDebugCallback(plat_.instance, debugFunctionUtilities_.debugCallback);
410         CreateFunctionsVk::DestroyInstance(plat_.instance);
411     }
412 }
413 
CreateInstanceAndPhysicalDevice()414 void DeviceVk::CreateInstanceAndPhysicalDevice()
415 {
416     const VersionInfo engineInfo { "core_prototype", 0, 1, 0 };
417     const VersionInfo appInfo { "core_prototype_app", 0, 1, 0 };
418 
419     const auto instanceWrapper = CreateFunctionsVk::CreateInstance(engineInfo, appInfo);
420     plat_.instance = instanceWrapper.instance;
421     if (instanceWrapper.debugUtilsSupported) {
422         debugFunctionUtilities_.debugMessenger =
423             CreateFunctionsVk::CreateDebugMessenger(plat_.instance, DebugMessengerCallback);
424     }
425     if (!debugFunctionUtilities_.debugMessenger && instanceWrapper.debugReportSupported) {
426         debugFunctionUtilities_.debugCallback =
427             CreateFunctionsVk::CreateDebugCallback(plat_.instance, DebugReportCallback);
428     }
429     auto physicalDeviceWrapper = CreateFunctionsVk::CreatePhysicalDevice(plat_.instance, DEFAULT_QUEUE);
430     const uint32_t physicalDeviceApiMajor =
431         VK_VERSION_MAJOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
432     const uint32_t physicalDeviceApiMinor =
433         VK_VERSION_MINOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
434     plat_.deviceApiMajor = std::min(instanceWrapper.apiMajor, physicalDeviceApiMajor);
435     plat_.deviceApiMinor = std::min(instanceWrapper.apiMinor, physicalDeviceApiMinor);
436     PLUGIN_LOG_D("device api version %u.%u", plat_.deviceApiMajor, plat_.deviceApiMinor);
437 
438     plat_.physicalDevice = physicalDeviceWrapper.physicalDevice;
439     plat_.physicalDeviceProperties = move(physicalDeviceWrapper.physicalDeviceProperties);
440     plat_.physicalDeviceExtensions = move(physicalDeviceWrapper.physicalDeviceExtensions);
441     const auto& memoryProperties = plat_.physicalDeviceProperties.physicalDeviceMemoryProperties;
442     deviceSharedMemoryPropertyFlags_ =
443         (memoryProperties.memoryTypeCount > 0) ? (MemoryPropertyFlags)memoryProperties.memoryTypes[0].propertyFlags : 0;
444     for (uint32_t idx = 1; idx < memoryProperties.memoryTypeCount; ++idx) {
445         const MemoryPropertyFlags memoryPropertyFlags =
446             (MemoryPropertyFlags)memoryProperties.memoryTypes[idx].propertyFlags;
447         // do not compare lazily allocated or protected memory blocks
448         if ((memoryPropertyFlags & (CORE_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | CORE_MEMORY_PROPERTY_PROTECTED_BIT)) ==
449             0) {
450             deviceSharedMemoryPropertyFlags_ &= memoryPropertyFlags;
451         }
452     }
453 }
454 
CreateDevice(const BackendExtraVk * backendExtra,const vector<LowLevelQueueInfo> & availableQueues)455 void DeviceVk::CreateDevice(const BackendExtraVk* backendExtra, const vector<LowLevelQueueInfo>& availableQueues)
456 {
457     vector<string_view> preferredExtensions = GetPreferredDeviceExtensions(backendExtra);
458     PreparePhysicalDeviceFeaturesForEnabling(backendExtra, plat_);
459 
460     VkPhysicalDeviceFeatures2* physicalDeviceFeatures2Ptr = nullptr;
461     VkPhysicalDeviceSamplerYcbcrConversionFeatures ycbcrConversionFeatures {
462         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES, // sType
463         nullptr,                                                             // pNext
464         true,                                                                // samplerYcbcrConversion
465     };
466     VkPhysicalDeviceFeatures2 physicalDeviceFeatures2 {
467         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, // sType
468         &ycbcrConversionFeatures,                     // pNext
469         {},                                           // features
470     };
471     void* pNextForBackendExtra = ycbcrConversionFeatures.pNext;
472 #if (RENDER_VULKAN_RT_ENABLED == 1)
473     VkPhysicalDeviceBufferDeviceAddressFeatures pdBufferDeviceAddressFeatures {
474         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, // sType
475         nullptr,                                                          // pNext
476         true,                                                             // bufferDeviceAddress;
477         false,                                                            // bufferDeviceAddressCaptureReplay
478         false,                                                            // bufferDeviceAddressMultiDevice
479     };
480     VkPhysicalDeviceRayTracingPipelineFeaturesKHR pdRayTracingPipelineFeatures {
481         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR, // sType
482         &pdBufferDeviceAddressFeatures,                                      // pNext
483         true,                                                                // rayTracingPipeline;
484         false, // rayTracingPipelineShaderGroupHandleCaptureReplay;
485         false, // rayTracingPipelineShaderGroupHandleCaptureReplayMixed;
486         false, // rayTracingPipelineTraceRaysIndirect;
487         false, // rayTraversalPrimitiveCulling;
488     };
489     VkPhysicalDeviceAccelerationStructureFeaturesKHR pdAccelerationStructureFeatures {
490         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, // sType
491         &pdRayTracingPipelineFeatures,                                         // pNext
492         true,                                                                  // accelerationStructure;
493         false,                                                                 // accelerationStructureCaptureReplay
494         false,                                                                 // accelerationStructureIndirectBuild
495         false,                                                                 // accelerationStructureHostCommands
496         false, // descriptorBindingAccelerationStructureUpdateAfterBind
497     };
498     VkPhysicalDeviceRayQueryFeaturesKHR pdRayQueryFeatures {
499         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, // sType
500         &pdAccelerationStructureFeatures,                         // pNext
501         true,                                                     // rayQuery
502     };
503 
504     // ray tracing to pNext first
505     ycbcrConversionFeatures.pNext = &pdRayQueryFeatures;
506     // backend extra will be put to pNext of ray tracing extensions
507     pNextForBackendExtra = pdBufferDeviceAddressFeatures.pNext;
508 #endif
509     if ((plat_.deviceApiMajor >= 1) && (plat_.deviceApiMinor >= 1)) {
510         // pipe user extension physical device features
511         if (backendExtra) {
512             if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
513                 pNextForBackendExtra = backendExtra->extensions.physicalDeviceFeaturesToEnable->pNext;
514             }
515         }
516         // NOTE: on some platforms Vulkan library has only the entrypoints for 1.0. To avoid variation just fetch the
517         // function always.
518         PFN_vkGetPhysicalDeviceFeatures2 vkGetPhysicalDeviceFeatures2 =
519             (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(plat_.instance, "vkGetPhysicalDeviceFeatures2");
520         vkGetPhysicalDeviceFeatures2(plat_.physicalDevice, &physicalDeviceFeatures2);
521 
522         // vkGetPhysicalDeviceFeatures has already filled this and PreparePhysicalDeviceFeaturesForEnabling
523         // disabled/ enabled some features.
524         physicalDeviceFeatures2.features = plat_.enabledPhysicalDeviceFeatures;
525         physicalDeviceFeatures2Ptr = &physicalDeviceFeatures2;
526     }
527     const DeviceWrapper deviceWrapper =
528         CreateFunctionsVk::CreateDevice(plat_.instance, plat_.physicalDevice, plat_.physicalDeviceExtensions,
529             plat_.enabledPhysicalDeviceFeatures, physicalDeviceFeatures2Ptr, availableQueues, preferredExtensions);
530     plat_.device = deviceWrapper.device;
531     for (const auto& ref : deviceWrapper.extensions) {
532         extensions_[ref] = 1u;
533     }
534     commonDeviceExtensions_ = GetEnabledCommonDeviceExtensions(extensions_);
535     platformDeviceExtensions_ = GetEnabledPlatformDeviceExtensions(extensions_);
536 }
537 
CheckExternalConfig(const BackendExtraVk * backendConfiguration)538 vector<QueueProperties> DeviceVk::CheckExternalConfig(const BackendExtraVk* backendConfiguration)
539 {
540     vector<QueueProperties> queueProperties;
541     queueProperties.push_back(DEFAULT_QUEUE);
542 
543     if (!backendConfiguration) {
544         return queueProperties;
545     }
546 
547     const auto& extra = *backendConfiguration;
548     if (extra.enableMultiQueue) {
549         queueProperties.push_back(QueueProperties {
550             VK_QUEUE_COMPUTE_BIT, // requiredFlags
551             1,                    // count
552             1.0f,                 // priority
553             true,                 // explicitFlags
554             false,                // canPresent
555         });
556         PLUGIN_LOG_I("trying to enable gpu multi-queue, with queue count: %u", (uint32_t)queueProperties.size());
557     }
558 
559     if (extra.instance != VK_NULL_HANDLE) {
560         PLUGIN_LOG_D("trying to use application given vulkan instance, device, and physical device");
561         PLUGIN_ASSERT((extra.instance && extra.physicalDevice && extra.device));
562         plat_.instance = extra.instance;
563         plat_.physicalDevice = extra.physicalDevice;
564         plat_.device = extra.device;
565         ownInstanceAndDevice_ = false; // everything given from the application
566 
567         const auto myDevice = plat_.physicalDevice;
568         auto& myProperties = plat_.physicalDeviceProperties;
569         vkGetPhysicalDeviceProperties(myDevice, &myProperties.physicalDeviceProperties);
570         vkGetPhysicalDeviceFeatures(myDevice, &myProperties.physicalDeviceFeatures);
571         vkGetPhysicalDeviceMemoryProperties(myDevice, &myProperties.physicalDeviceMemoryProperties);
572     }
573     return queueProperties;
574 }
575 
SortAvailableQueues(const vector<LowLevelQueueInfo> & availableQueues)576 void DeviceVk::SortAvailableQueues(const vector<LowLevelQueueInfo>& availableQueues)
577 {
578     for (const auto& ref : availableQueues) {
579         if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_COMPUTE_BIT) {
580             EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.computeQueues);
581         } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_GRAPHICS_BIT) {
582             EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.graphicsQueues);
583         } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_TRANSFER_BIT) {
584             EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.transferQueues);
585         }
586     }
587 }
588 
GetBackendType() const589 DeviceBackendType DeviceVk::GetBackendType() const
590 {
591     return DeviceBackendType::VULKAN;
592 }
593 
GetPlatformData() const594 const DevicePlatformData& DeviceVk::GetPlatformData() const
595 {
596     return plat_;
597 }
598 
GetPlatformDataVk() const599 const DevicePlatformDataVk& DeviceVk::GetPlatformDataVk() const
600 {
601     return plat_;
602 }
603 
GetPlatformInternalDataVk() const604 const DevicePlatformInternalDataVk& DeviceVk::GetPlatformInternalDataVk() const
605 {
606     return platInternal_;
607 }
608 
GetLowLevelDevice() const609 ILowLevelDevice& DeviceVk::GetLowLevelDevice() const
610 {
611     return *lowLevelDevice_;
612 }
613 
GetFormatProperties(const Format format) const614 FormatProperties DeviceVk::GetFormatProperties(const Format format) const
615 {
616     const uint32_t formatSupportSize = static_cast<uint32_t>(formatProperties_.size());
617     const uint32_t formatIdx = static_cast<uint32_t>(format);
618     if (formatIdx < formatSupportSize) {
619         return formatProperties_[formatIdx];
620     } else if ((formatIdx >= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER) &&
621                (formatIdx <= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_END_NUMBER)) {
622         const uint32_t currIdx = formatIdx - DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
623         PLUGIN_UNUSED(currIdx);
624         PLUGIN_ASSERT(currIdx < formatSupportSize);
625         return formatProperties_[formatIdx];
626     }
627     return {};
628 }
629 
GetAccelerationStructureBuildSizes(const AccelerationStructureBuildGeometryInfo & geometry,BASE_NS::array_view<const AccelerationStructureGeometryTrianglesInfo> triangles,BASE_NS::array_view<const AccelerationStructureGeometryAabbsInfo> aabbs,BASE_NS::array_view<const AccelerationStructureGeometryInstancesInfo> instances) const630 AccelerationStructureBuildSizes DeviceVk::GetAccelerationStructureBuildSizes(
631     const AccelerationStructureBuildGeometryInfo& geometry,
632     BASE_NS::array_view<const AccelerationStructureGeometryTrianglesInfo> triangles,
633     BASE_NS::array_view<const AccelerationStructureGeometryAabbsInfo> aabbs,
634     BASE_NS::array_view<const AccelerationStructureGeometryInstancesInfo> instances) const
635 {
636 #if (RENDER_VULKAN_RT_ENABLED == 1)
637     const VkDevice device = plat_.device;
638 
639     const size_t arraySize = triangles.size() + aabbs.size() + instances.size();
640     vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
641     vector<uint32_t> maxPrimitiveCounts(arraySize);
642     uint32_t arrayIndex = 0;
643     for (const auto& trianglesRef : triangles) {
644         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
645             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
646             nullptr,                                               // pNext
647             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR,     // geometryType
648             {},                                                    // geometry;
649             VkGeometryFlagsKHR(trianglesRef.geometryFlags),        // flags
650         };
651         geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
652             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
653             nullptr,                                                              // pNext
654             VkFormat(trianglesRef.vertexFormat),                                  // vertexFormat
655             {},                                                                   // vertexData
656             VkDeviceSize(trianglesRef.vertexStride),                              // vertexStride
657             trianglesRef.maxVertex,                                               // maxVertex
658             VkIndexType(trianglesRef.indexType),                                  // indexType
659             {},                                                                   // indexData
660             {},                                                                   // transformData
661         };
662         maxPrimitiveCounts[arrayIndex] = trianglesRef.indexCount / 3u; // triangles;
663         arrayIndex++;
664     }
665     for (const auto& aabbsRef : aabbs) {
666         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
667             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
668             nullptr,                                               // pNext
669             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR,         // geometryType
670             {},                                                    // geometry;
671             0,                                                     // flags
672         };
673         geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
674             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
675             nullptr,                                                          // pNext
676             {},                                                               // data
677             aabbsRef.stride,                                                  // stride
678         };
679         maxPrimitiveCounts[arrayIndex] = 1u;
680         arrayIndex++;
681     }
682     for (const auto& instancesRef : instances) {
683         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
684             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
685             nullptr,                                               // pNext
686             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR,     // geometryType
687             {},                                                    // geometry;
688             0,                                                     // flags
689         };
690         geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
691             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
692             nullptr,                                                              // pNext
693             instancesRef.arrayOfPointers,                                         // arrayOfPointers
694             {},                                                                   // data
695         };
696         maxPrimitiveCounts[arrayIndex] = 1u;
697         arrayIndex++;
698     }
699 
700     const VkAccelerationStructureBuildGeometryInfoKHR geometryInfoVk {
701         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
702         nullptr,                                                          // pNext
703         VkAccelerationStructureTypeKHR(geometry.type),                    // type
704         VkBuildAccelerationStructureFlagsKHR(geometry.flags),             // flags
705         VkBuildAccelerationStructureModeKHR(geometry.mode),               // mode
706         VK_NULL_HANDLE,                                                   // srcAccelerationStructure
707         VK_NULL_HANDLE,                                                   // dstAccelerationStructure
708         arrayIndex,                                                       // geometryCount
709         geometryData.data(),                                              // pGeometries
710         nullptr,                                                          // ppGeometries
711         {},                                                               // scratchData
712     };
713 
714     VkAccelerationStructureBuildSizesInfoKHR buildSizesInfo {
715         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // sType
716         nullptr,                                                       // pNext
717         0,                                                             // accelerationStructureSize
718         0,                                                             // updateScratchSize
719         0,                                                             // buildScratchSize
720     };
721     if ((arrayIndex > 0) && extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
722         extFunctions_.vkGetAccelerationStructureBuildSizesKHR(device, // device
723             VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,          // buildType,
724             &geometryInfoVk,                                          // pBuildInfo
725             maxPrimitiveCounts.data(),                                // pMaxPrimitiveCounts
726             &buildSizesInfo);                                         // pSizeInfo
727     }
728 
729     return AccelerationStructureBuildSizes {
730         static_cast<uint32_t>(buildSizesInfo.accelerationStructureSize),
731         static_cast<uint32_t>(buildSizesInfo.updateScratchSize),
732         static_cast<uint32_t>(buildSizesInfo.buildScratchSize),
733     };
734 #else
735     return AccelerationStructureBuildSizes { 0, 0, 0 };
736 #endif
737 }
738 
CreateDeviceSwapchain(const SwapchainCreateInfo & swapchainCreateInfo)739 void DeviceVk::CreateDeviceSwapchain(const SwapchainCreateInfo& swapchainCreateInfo)
740 {
741     WaitForIdle();
742     swapchain_.reset();
743     swapchain_ = make_unique<SwapchainVk>(*this, swapchainCreateInfo);
744 }
745 
DestroyDeviceSwapchain()746 void DeviceVk::DestroyDeviceSwapchain()
747 {
748     WaitForIdle();
749     swapchain_.reset();
750 }
751 
GetPlatformGpuMemoryAllocator()752 PlatformGpuMemoryAllocator* DeviceVk::GetPlatformGpuMemoryAllocator()
753 {
754     return platformGpuMemoryAllocator_.get();
755 }
756 
GetValidGpuQueue(const GpuQueue & gpuQueue) const757 GpuQueue DeviceVk::GetValidGpuQueue(const GpuQueue& gpuQueue) const
758 {
759     const auto getSpecificQueue = [](const uint32_t queueIndex, const GpuQueue::QueueType queueType,
760                                       const vector<LowLevelGpuQueueVk>& specificQueues, const GpuQueue& defaultQueue) {
761         const uint32_t queueCount = (uint32_t)specificQueues.size();
762         if (queueIndex < queueCount) {
763             return GpuQueue { queueType, queueIndex };
764         } else if (queueCount > 0) {
765             return GpuQueue { queueType, 0 };
766         }
767         return defaultQueue;
768     };
769 
770     GpuQueue defaultQueue { GpuQueue::QueueType::GRAPHICS, 0 };
771     if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
772         return getSpecificQueue(
773             gpuQueue.index, GpuQueue::QueueType::COMPUTE, lowLevelGpuQueues_.computeQueues, defaultQueue);
774     } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
775         return getSpecificQueue(
776             gpuQueue.index, GpuQueue::QueueType::GRAPHICS, lowLevelGpuQueues_.graphicsQueues, defaultQueue);
777     } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
778         return getSpecificQueue(
779             gpuQueue.index, GpuQueue::QueueType::TRANSFER, lowLevelGpuQueues_.transferQueues, defaultQueue);
780     } else {
781         return defaultQueue;
782     }
783 }
784 
GetGpuQueueCount() const785 uint32_t DeviceVk::GetGpuQueueCount() const
786 {
787     return gpuQueueCount_;
788 }
789 
InitializePipelineCache(array_view<const uint8_t> initialData)790 void DeviceVk::InitializePipelineCache(array_view<const uint8_t> initialData)
791 {
792     if (plat_.pipelineCache) {
793         CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
794     }
795     struct CacheHeader {
796         uint32_t bytes;
797         uint32_t version;
798         uint32_t vendorId;
799         uint32_t deviceId;
800         uint8_t pipelineCacheUUID[VK_UUID_SIZE];
801     };
802     if (initialData.size() > sizeof(CacheHeader)) {
803         CacheHeader header;
804         CloneData(&header, sizeof(header), initialData.data(), sizeof(header));
805         const auto& props = plat_.physicalDeviceProperties.physicalDeviceProperties;
806         if (header.version != VkPipelineCacheHeaderVersion::VK_PIPELINE_CACHE_HEADER_VERSION_ONE ||
807             header.vendorId != props.vendorID || header.deviceId != props.deviceID ||
808             memcmp(header.pipelineCacheUUID, props.pipelineCacheUUID, VK_UUID_SIZE)) {
809             initialData = {};
810         }
811     }
812 
813     plat_.pipelineCache = CreateFunctionsVk::CreatePipelineCache(plat_.device, initialData);
814 }
815 
GetPipelineCache() const816 vector<uint8_t> DeviceVk::GetPipelineCache() const
817 {
818     vector<uint8_t> deviceData;
819     if (plat_.pipelineCache) {
820         size_t dataSize = 0u;
821         if (auto result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, nullptr);
822             result == VK_SUCCESS && dataSize) {
823             deviceData.resize(dataSize);
824             dataSize = deviceData.size();
825             result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, deviceData.data());
826             if (result == VK_SUCCESS) {
827                 deviceData.resize(dataSize);
828             } else {
829                 deviceData.clear();
830             }
831         }
832     }
833     return deviceData;
834 }
835 
GetGpuQueue(const GpuQueue & gpuQueue) const836 LowLevelGpuQueueVk DeviceVk::GetGpuQueue(const GpuQueue& gpuQueue) const
837 {
838     // 1. tries to return the typed queue with given index
839     // 2. tries to return the typed queue with an index 0
840     // 3. returns the default queue
841     const auto getSpecificQueue = [](const uint32_t queueIndex, const vector<LowLevelGpuQueueVk>& specificQueues,
842                                       const LowLevelGpuQueueVk& defaultQueue) {
843         const uint32_t queueCount = (uint32_t)specificQueues.size();
844         if (queueIndex < queueCount) {
845             return specificQueues[queueIndex];
846         } else if (queueCount > 0) {
847             return specificQueues[0];
848         }
849         return defaultQueue;
850     };
851 
852     if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
853         return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.computeQueues, lowLevelGpuQueues_.defaultQueue);
854     } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
855         return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.graphicsQueues, lowLevelGpuQueues_.defaultQueue);
856     } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
857         return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.transferQueues, lowLevelGpuQueues_.defaultQueue);
858     } else {
859         return lowLevelGpuQueues_.defaultQueue;
860     }
861 }
862 
GetPresentationGpuQueue() const863 LowLevelGpuQueueVk DeviceVk::GetPresentationGpuQueue() const
864 {
865     // NOTE: expected presentation
866     return GetGpuQueue(GpuQueue { GpuQueue::QueueType::GRAPHICS, 0 });
867 }
868 
GetLowLevelGpuQueues() const869 vector<LowLevelGpuQueueVk> DeviceVk::GetLowLevelGpuQueues() const
870 {
871     vector<LowLevelGpuQueueVk> gpuQueues;
872     gpuQueues.reserve(gpuQueueCount_);
873     gpuQueues.insert(gpuQueues.end(), lowLevelGpuQueues_.computeQueues.begin(), lowLevelGpuQueues_.computeQueues.end());
874     gpuQueues.insert(
875         gpuQueues.end(), lowLevelGpuQueues_.graphicsQueues.begin(), lowLevelGpuQueues_.graphicsQueues.end());
876     gpuQueues.insert(
877         gpuQueues.end(), lowLevelGpuQueues_.transferQueues.begin(), lowLevelGpuQueues_.transferQueues.end());
878     return gpuQueues;
879 }
880 
WaitForIdle()881 void DeviceVk::WaitForIdle()
882 {
883     if (plat_.device) {
884         if (!isRenderbackendRunning_) {
885             PLUGIN_LOG_D("Device - WaitForIdle");
886             vkDeviceWaitIdle(plat_.device); // device
887         } else {
888             PLUGIN_LOG_E("Device WaitForIdle can only called when render backend is not running");
889         }
890     }
891 }
892 
Activate()893 void DeviceVk::Activate() {}
894 
Deactivate()895 void DeviceVk::Deactivate() {}
896 
AllowThreadedProcessing() const897 bool DeviceVk::AllowThreadedProcessing() const
898 {
899     return true;
900 }
901 
GetFeatureConfigurations() const902 const DeviceVk::FeatureConfigurations& DeviceVk::GetFeatureConfigurations() const
903 {
904     return featureConfigurations_;
905 }
906 
GetCommonDeviceExtensions() const907 const DeviceVk::CommonDeviceExtensions& DeviceVk::GetCommonDeviceExtensions() const
908 {
909     return commonDeviceExtensions_;
910 }
911 
GetPlatformDeviceExtensions() const912 const PlatformDeviceExtensions& DeviceVk::GetPlatformDeviceExtensions() const
913 {
914     return platformDeviceExtensions_;
915 }
916 
HasDeviceExtension(const string_view extensionName) const917 bool DeviceVk::HasDeviceExtension(const string_view extensionName) const
918 {
919     return extensions_.contains(extensionName);
920 }
921 
CreateDeviceVk(RenderContext & renderContext,DeviceCreateInfo const & createInfo)922 unique_ptr<Device> CreateDeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo)
923 {
924     return make_unique<DeviceVk>(renderContext, createInfo);
925 }
926 
CreateGpuBuffer(const GpuBufferDesc & desc)927 unique_ptr<GpuBuffer> DeviceVk::CreateGpuBuffer(const GpuBufferDesc& desc)
928 {
929     return make_unique<GpuBufferVk>(*this, desc);
930 }
931 
CreateGpuImage(const GpuImageDesc & desc)932 unique_ptr<GpuImage> DeviceVk::CreateGpuImage(const GpuImageDesc& desc)
933 {
934     return make_unique<GpuImageVk>(*this, desc);
935 }
936 
CreateGpuImageView(const GpuImageDesc & desc,const GpuImagePlatformData & platformData,const uintptr_t hwBuffer)937 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
938     const GpuImageDesc& desc, const GpuImagePlatformData& platformData, const uintptr_t hwBuffer)
939 {
940     return make_unique<GpuImageVk>(*this, desc, platformData, hwBuffer);
941 }
942 
CreateGpuImageView(const GpuImageDesc & desc,const GpuImagePlatformData & platformData)943 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(const GpuImageDesc& desc, const GpuImagePlatformData& platformData)
944 {
945     return CreateGpuImageView(desc, platformData, 0);
946 }
947 
CreateGpuImageViews(const Swapchain & swapchain)948 vector<unique_ptr<GpuImage>> DeviceVk::CreateGpuImageViews(const Swapchain& swapchain)
949 {
950     const GpuImageDesc& desc = swapchain.GetDesc();
951     const auto& swapchainPlat = static_cast<const SwapchainVk&>(swapchain).GetPlatformData();
952 
953     vector<unique_ptr<GpuImage>> gpuImages(swapchainPlat.swapchainImages.images.size());
954     for (size_t idx = 0; idx < gpuImages.size(); ++idx) {
955         GpuImagePlatformDataVk gpuImagePlat;
956         gpuImagePlat.image = swapchainPlat.swapchainImages.images[idx];
957         gpuImagePlat.imageView = swapchainPlat.swapchainImages.imageViews[idx];
958         gpuImages[idx] = this->CreateGpuImageView(desc, gpuImagePlat);
959     }
960     return gpuImages;
961 }
962 
CreateGpuImageView(const GpuImageDesc & desc,const BackendSpecificImageDesc & platformData)963 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
964     const GpuImageDesc& desc, const BackendSpecificImageDesc& platformData)
965 {
966     const ImageDescVk& imageDesc = (const ImageDescVk&)platformData;
967     GpuImagePlatformDataVk platData;
968     platData.image = imageDesc.image;
969     platData.imageView = imageDesc.imageView;
970     return CreateGpuImageView(desc, platData, imageDesc.platformHwBuffer);
971 }
972 
CreateGpuSampler(const GpuSamplerDesc & desc)973 unique_ptr<GpuSampler> DeviceVk::CreateGpuSampler(const GpuSamplerDesc& desc)
974 {
975     return make_unique<GpuSamplerVk>(*this, desc);
976 }
977 
CreateGpuAccelerationStructure(const GpuAccelerationStructureDesc & desc)978 unique_ptr<GpuAccelerationStructure> DeviceVk::CreateGpuAccelerationStructure(const GpuAccelerationStructureDesc& desc)
979 {
980     return make_unique<GpuAccelerationStructureVk>(*this, desc);
981 }
982 
CreateRenderFrameSync()983 unique_ptr<RenderFrameSync> DeviceVk::CreateRenderFrameSync()
984 {
985     return make_unique<RenderFrameSyncVk>(*this);
986 }
987 
CreateRenderBackend(GpuResourceManager & gpuResourceMgr,const CORE_NS::IParallelTaskQueue::Ptr & queue)988 unique_ptr<RenderBackend> DeviceVk::CreateRenderBackend(
989     GpuResourceManager& gpuResourceMgr, const CORE_NS::IParallelTaskQueue::Ptr& queue)
990 {
991     return make_unique<RenderBackendVk>(*this, gpuResourceMgr, queue);
992 }
993 
CreateShaderModule(const ShaderModuleCreateInfo & data)994 unique_ptr<ShaderModule> DeviceVk::CreateShaderModule(const ShaderModuleCreateInfo& data)
995 {
996     return make_unique<ShaderModuleVk>(*this, data);
997 }
998 
CreateComputeShaderModule(const ShaderModuleCreateInfo & data)999 unique_ptr<ShaderModule> DeviceVk::CreateComputeShaderModule(const ShaderModuleCreateInfo& data)
1000 {
1001     return make_unique<ShaderModuleVk>(*this, data);
1002 }
1003 
CreateGpuShaderProgram(const GpuShaderProgramCreateData & data)1004 unique_ptr<GpuShaderProgram> DeviceVk::CreateGpuShaderProgram(const GpuShaderProgramCreateData& data)
1005 {
1006     return make_unique<GpuShaderProgramVk>(*this, data);
1007 }
1008 
CreateGpuComputeProgram(const GpuComputeProgramCreateData & data)1009 unique_ptr<GpuComputeProgram> DeviceVk::CreateGpuComputeProgram(const GpuComputeProgramCreateData& data)
1010 {
1011     return make_unique<GpuComputeProgramVk>(*this, data);
1012 }
1013 
CreateNodeContextDescriptorSetManager()1014 unique_ptr<NodeContextDescriptorSetManager> DeviceVk::CreateNodeContextDescriptorSetManager()
1015 {
1016     return make_unique<NodeContextDescriptorSetManagerVk>(*this);
1017 }
1018 
CreateNodeContextPoolManager(GpuResourceManager & gpuResourceMgr,const GpuQueue & gpuQueue)1019 unique_ptr<NodeContextPoolManager> DeviceVk::CreateNodeContextPoolManager(
1020     GpuResourceManager& gpuResourceMgr, const GpuQueue& gpuQueue)
1021 {
1022     return make_unique<NodeContextPoolManagerVk>(*this, gpuResourceMgr, gpuQueue);
1023 }
1024 
CreateGraphicsPipelineStateObject(const GpuShaderProgram & gpuProgram,const GraphicsState & graphicsState,const PipelineLayout & pipelineLayout,const VertexInputDeclarationView & vertexInputDeclaration,const ShaderSpecializationConstantDataView & specializationConstants,const DynamicStateFlags dynamicStateFlags,const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> & renderPassSubpassDescs,const uint32_t subpassIndex,const LowLevelRenderPassData * renderPassData,const LowLevelPipelineLayoutData * pipelineLayoutData)1025 unique_ptr<GraphicsPipelineStateObject> DeviceVk::CreateGraphicsPipelineStateObject(const GpuShaderProgram& gpuProgram,
1026     const GraphicsState& graphicsState, const PipelineLayout& pipelineLayout,
1027     const VertexInputDeclarationView& vertexInputDeclaration,
1028     const ShaderSpecializationConstantDataView& specializationConstants, const DynamicStateFlags dynamicStateFlags,
1029     const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc>& renderPassSubpassDescs,
1030     const uint32_t subpassIndex, const LowLevelRenderPassData* renderPassData,
1031     const LowLevelPipelineLayoutData* pipelineLayoutData)
1032 {
1033     PLUGIN_ASSERT(renderPassData);
1034     PLUGIN_ASSERT(pipelineLayoutData);
1035     return make_unique<GraphicsPipelineStateObjectVk>(*this, gpuProgram, graphicsState, pipelineLayout,
1036         vertexInputDeclaration, specializationConstants, dynamicStateFlags, renderPassDesc, renderPassSubpassDescs,
1037         subpassIndex, *renderPassData, *pipelineLayoutData);
1038 }
1039 
CreateComputePipelineStateObject(const GpuComputeProgram & gpuProgram,const PipelineLayout & pipelineLayout,const ShaderSpecializationConstantDataView & specializationConstants,const LowLevelPipelineLayoutData * pipelineLayoutData)1040 unique_ptr<ComputePipelineStateObject> DeviceVk::CreateComputePipelineStateObject(const GpuComputeProgram& gpuProgram,
1041     const PipelineLayout& pipelineLayout, const ShaderSpecializationConstantDataView& specializationConstants,
1042     const LowLevelPipelineLayoutData* pipelineLayoutData)
1043 {
1044     PLUGIN_ASSERT(pipelineLayoutData);
1045     return make_unique<ComputePipelineStateObjectVk>(
1046         *this, gpuProgram, pipelineLayout, specializationConstants, *pipelineLayoutData);
1047 }
1048 
GetDebugFunctionUtilities() const1049 const DebugFunctionUtilitiesVk& DeviceVk::GetDebugFunctionUtilities() const
1050 {
1051     return debugFunctionUtilities_;
1052 }
1053 
CreateDebugFunctions()1054 void DeviceVk::CreateDebugFunctions()
1055 {
1056 #if (RENDER_VULKAN_VALIDATION_ENABLED == 1)
1057     debugFunctionUtilities_.vkSetDebugUtilsObjectNameEXT =
1058         (PFN_vkSetDebugUtilsObjectNameEXT)vkGetDeviceProcAddr(plat_.device, "vkSetDebugUtilsObjectNameEXT");
1059 #endif
1060 #if (RENDER_DEBUG_MARKERS_ENABLED == 1) || (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1061     debugFunctionUtilities_.vkCmdBeginDebugUtilsLabelEXT =
1062         (PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdBeginDebugUtilsLabelEXT");
1063     debugFunctionUtilities_.vkCmdEndDebugUtilsLabelEXT =
1064         (PFN_vkCmdEndDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdEndDebugUtilsLabelEXT");
1065 #endif
1066 }
1067 
GetExtFunctions() const1068 const DeviceVk::ExtFunctions& DeviceVk::GetExtFunctions() const
1069 {
1070     return extFunctions_;
1071 }
1072 
GetPlatformExtFunctions() const1073 const PlatformExtFunctions& DeviceVk::GetPlatformExtFunctions() const
1074 {
1075     return platformExtFunctions_;
1076 }
1077 
CreateExtFunctions()1078 void DeviceVk::CreateExtFunctions()
1079 {
1080     if (commonDeviceExtensions_.renderPass2) {
1081         extFunctions_.vkCreateRenderPass2KHR =
1082             (PFN_vkCreateRenderPass2KHR)vkGetInstanceProcAddr(plat_.instance, "vkCreateRenderPass2KHR");
1083         if (!extFunctions_.vkCreateRenderPass2KHR) {
1084             commonDeviceExtensions_.renderPass2 = false;
1085             PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateRenderPass2KHR");
1086         }
1087     }
1088 
1089     if (commonDeviceExtensions_.getMemoryRequirements2) {
1090         extFunctions_.vkGetImageMemoryRequirements2 = (PFN_vkGetImageMemoryRequirements2)vkGetInstanceProcAddr(
1091             plat_.instance, "vkGetImageMemoryRequirements2KHR");
1092         if (!extFunctions_.vkGetImageMemoryRequirements2) {
1093             PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetImageMemoryRequirements2");
1094         }
1095     }
1096 
1097     if (commonDeviceExtensions_.samplerYcbcrConversion) {
1098         GetYcbcrExtFunctions(plat_.instance, extFunctions_);
1099     }
1100 
1101     extFunctions_.vkAcquireNextImageKHR =
1102         (PFN_vkAcquireNextImageKHR)vkGetInstanceProcAddr(plat_.instance, "vkAcquireNextImageKHR");
1103 
1104 #if (RENDER_VULKAN_RT_ENABLED == 1)
1105     extFunctions_.vkGetAccelerationStructureBuildSizesKHR =
1106         (PFN_vkGetAccelerationStructureBuildSizesKHR)vkGetInstanceProcAddr(
1107             plat_.instance, "vkGetAccelerationStructureBuildSizesKHR");
1108     if (!extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
1109         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureBuildSizesKHR");
1110     }
1111     extFunctions_.vkCmdBuildAccelerationStructuresKHR = (PFN_vkCmdBuildAccelerationStructuresKHR)vkGetInstanceProcAddr(
1112         plat_.instance, "vkCmdBuildAccelerationStructuresKHR");
1113     if (!extFunctions_.vkCmdBuildAccelerationStructuresKHR) {
1114         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCmdBuildAccelerationStructuresKHR");
1115     }
1116     extFunctions_.vkCreateAccelerationStructureKHR =
1117         (PFN_vkCreateAccelerationStructureKHR)vkGetInstanceProcAddr(plat_.instance, "vkCreateAccelerationStructureKHR");
1118     if (!extFunctions_.vkCreateAccelerationStructureKHR) {
1119         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateAccelerationStructureKHR");
1120     }
1121     extFunctions_.vkDestroyAccelerationStructureKHR = (PFN_vkDestroyAccelerationStructureKHR)vkGetInstanceProcAddr(
1122         plat_.instance, "vkDestroyAccelerationStructureKHR");
1123     if (!extFunctions_.vkDestroyAccelerationStructureKHR) {
1124         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroyAccelerationStructureKHR");
1125     }
1126     extFunctions_.vkGetAccelerationStructureDeviceAddressKHR =
1127         (PFN_vkGetAccelerationStructureDeviceAddressKHR)vkGetInstanceProcAddr(
1128             plat_.instance, "vkGetAccelerationStructureDeviceAddressKHR");
1129     if (!extFunctions_.vkGetAccelerationStructureDeviceAddressKHR) {
1130         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureDeviceAddressKHR");
1131     }
1132 #endif
1133 }
1134 
LowLevelDeviceVk(DeviceVk & deviceVk)1135 LowLevelDeviceVk::LowLevelDeviceVk(DeviceVk& deviceVk)
1136     : deviceVk_(deviceVk), gpuResourceMgr_(static_cast<GpuResourceManager&>(deviceVk_.GetGpuResourceManager()))
1137 {}
1138 
GetBackendType() const1139 DeviceBackendType LowLevelDeviceVk::GetBackendType() const
1140 {
1141     return DeviceBackendType::VULKAN;
1142 }
1143 
GetPlatformDataVk() const1144 const DevicePlatformDataVk& LowLevelDeviceVk::GetPlatformDataVk() const
1145 {
1146     return deviceVk_.GetPlatformDataVk();
1147 }
1148 
GetBuffer(RenderHandle handle) const1149 GpuBufferPlatformDataVk LowLevelDeviceVk::GetBuffer(RenderHandle handle) const
1150 {
1151     if (deviceVk_.GetLockResourceBackendAccess()) {
1152         GpuBufferVk* buffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(handle);
1153         if (buffer) {
1154             return buffer->GetPlatformData();
1155         }
1156     } else {
1157         PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1158     }
1159     return {};
1160 }
1161 
GetImage(RenderHandle handle) const1162 GpuImagePlatformDataVk LowLevelDeviceVk::GetImage(RenderHandle handle) const
1163 {
1164     if (deviceVk_.GetLockResourceBackendAccess()) {
1165         GpuImageVk* image = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
1166         if (image) {
1167             return image->GetPlatformData();
1168         }
1169     } else {
1170         PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1171     }
1172     return {};
1173 }
1174 
GetSampler(RenderHandle handle) const1175 GpuSamplerPlatformDataVk LowLevelDeviceVk::GetSampler(RenderHandle handle) const
1176 {
1177     if (deviceVk_.GetLockResourceBackendAccess()) {
1178         GpuSamplerVk* sampler = gpuResourceMgr_.GetSampler<GpuSamplerVk>(handle);
1179         if (sampler) {
1180             return sampler->GetPlatformData();
1181         }
1182     } else {
1183         PLUGIN_LOG_E("low level device methods can be only used within specific methods");
1184     }
1185     return {};
1186 }
1187 RENDER_END_NAMESPACE()
1188