1 /*
2 * Copyright (C) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "device_vk.h"
17
18 #include <algorithm>
19 #include <cinttypes>
20 #include <cstdint>
21 #include <vulkan/vulkan.h>
22
23 #include <base/containers/vector.h>
24 #include <base/math/mathf.h>
25 #include <core/engine_info.h>
26 #include <render/intf_render_context.h>
27 #include <render/namespace.h>
28
29 #include "device/device.h"
30 #include "device/gpu_program_util.h"
31 #include "device/gpu_resource_manager.h"
32 #include "device/shader_manager.h"
33 #include "device/shader_module.h"
34 #include "platform_vk.h"
35 #include "util/log.h"
36 #include "vulkan/create_functions_vk.h"
37 #include "vulkan/gpu_acceleration_structure_vk.h"
38 #include "vulkan/gpu_buffer_vk.h"
39 #include "vulkan/gpu_image_vk.h"
40 #include "vulkan/gpu_memory_allocator_vk.h"
41 #include "vulkan/gpu_program_vk.h"
42 #include "vulkan/gpu_query_vk.h"
43 #include "vulkan/gpu_sampler_vk.h"
44 #include "vulkan/node_context_descriptor_set_manager_vk.h"
45 #include "vulkan/node_context_pool_manager_vk.h"
46 #include "vulkan/pipeline_state_object_vk.h"
47 #include "vulkan/render_backend_vk.h"
48 #include "vulkan/render_frame_sync_vk.h"
49 #include "vulkan/shader_module_vk.h"
50 #include "vulkan/swapchain_vk.h"
51 #include "vulkan/validate_vk.h"
52
53 using namespace BASE_NS;
54
55 RENDER_BEGIN_NAMESPACE()
56 namespace {
57 static constexpr string_view DEVICE_EXTENSION_SWAPCHAIN { VK_KHR_SWAPCHAIN_EXTENSION_NAME };
58
59 // promoted to 1.2, requires VK_KHR_create_renderpass2
60 static constexpr string_view DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE { VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME };
61 static constexpr string_view DEVICE_EXTENSION_CREATE_RENDERPASS2 { VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME };
62
63 static constexpr string_view DEVICE_EXTENSION_EXTERNAL_MEMORY { VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME };
64 static constexpr string_view DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2 {
65 VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME
66 };
67 static constexpr string_view DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION {
68 VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME
69 };
70 static constexpr string_view DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN { VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME };
71
GetYcbcrExtFunctions(const VkInstance instance,DeviceVk::ExtFunctions & extFunctions)72 void GetYcbcrExtFunctions(const VkInstance instance, DeviceVk::ExtFunctions& extFunctions)
73 {
74 extFunctions.vkCreateSamplerYcbcrConversion =
75 (PFN_vkCreateSamplerYcbcrConversion)vkGetInstanceProcAddr(instance, "vkCreateSamplerYcbcrConversion");
76 if (!extFunctions.vkCreateSamplerYcbcrConversion) {
77 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateSamplerYcbcrConversion");
78 }
79 extFunctions.vkDestroySamplerYcbcrConversion =
80 (PFN_vkDestroySamplerYcbcrConversion)vkGetInstanceProcAddr(instance, "vkDestroySamplerYcbcrConversion");
81 if (!extFunctions.vkDestroySamplerYcbcrConversion) {
82 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroySamplerYcbcrConversion");
83 }
84 }
85
86 // ray-tracing
87 #if (RENDER_VULKAN_RT_ENABLED == 1)
88 static constexpr string_view DEVICE_EXTENSION_ACCELERATION_STRUCTURE { "VK_KHR_acceleration_structure" };
89 static constexpr string_view DEVICE_EXTENSION_RAY_QUERY { "VK_KHR_ray_query" };
90 static constexpr string_view DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS { "VK_KHR_deferred_host_operations" };
91 static constexpr string_view DEVICE_EXTENSION_RAY_TRACING_PIPELINE { "VK_KHR_ray_tracing_pipeline" };
92 static constexpr string_view DEVICE_EXTENSION_PIPELINE_LIBRARY { "VK_KHR_pipeline_library" };
93 #endif
94
95 constexpr uint32_t MIN_ALLOCATION_BLOCK_SIZE { 4u * 1024u * 1024u };
96 constexpr uint32_t MAX_ALLOCATION_BLOCK_SIZE { 1024u * 1024u * 1024u };
97 static constexpr const QueueProperties DEFAULT_QUEUE {
98 VK_QUEUE_GRAPHICS_BIT, // requiredFlags
99 1, // count
100 1.0f, // priority
101 false, // explicitFlags
102 true, // canPresent
103 };
104
GetAllocatorCreateInfo(const BackendExtraVk * backendExtra)105 PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo GetAllocatorCreateInfo(const BackendExtraVk* backendExtra)
106 {
107 // create default pools
108 PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo createInfo;
109 uint32_t dynamicUboByteSize = 16u * 1024u * 1024u;
110 if (backendExtra) {
111 const auto& sizes = backendExtra->gpuMemoryAllocatorSizes;
112 if (sizes.defaultAllocationBlockSize != ~0u) {
113 createInfo.preferredLargeHeapBlockSize = Math::min(
114 MAX_ALLOCATION_BLOCK_SIZE, Math::max(sizes.defaultAllocationBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
115 }
116 if (sizes.customAllocationDynamicUboBlockSize != ~0u) {
117 dynamicUboByteSize = Math::min(MAX_ALLOCATION_BLOCK_SIZE,
118 Math::max(sizes.customAllocationDynamicUboBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
119 }
120 }
121
122 // staging
123 {
124 GpuBufferDesc desc;
125 desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_SINGLE_SHOT_STAGING;
126 desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
127 MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
128 desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_SRC_BIT;
129 createInfo.customPools.push_back({
130 "STAGING_GPU_BUFFER",
131 PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
132 0u,
133 // if linear allocator is used, depending clients usage pattern, memory can be easily wasted.
134 false,
135 { move(desc) },
136 });
137 }
138 // dynamic uniform ring buffers
139 {
140 GpuBufferDesc desc;
141 desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER;
142 desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
143 MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
144 desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
145 createInfo.customPools.push_back({
146 "DYNAMIC_UNIFORM_GPU_BUFFER",
147 PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
148 dynamicUboByteSize,
149 false,
150 { move(desc) },
151 });
152 }
153
154 return createInfo;
155 }
156
DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,VkDebugUtilsMessageTypeFlagsEXT messageTypes,const VkDebugUtilsMessengerCallbackDataEXT * pCallbackData,void * pUserData)157 VkBool32 VKAPI_PTR DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
158 VkDebugUtilsMessageTypeFlagsEXT messageTypes, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData,
159 void* pUserData)
160 {
161 if (pCallbackData && pCallbackData->pMessage) {
162 if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
163 PLUGIN_LOG_E("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
164 } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
165 (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)) {
166 PLUGIN_LOG_W("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
167 } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
168 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
169 PLUGIN_LOG_I("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
170 } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
171 VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
172 PLUGIN_LOG_V("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
173 }
174 PLUGIN_ASSERT_MSG(
175 ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) == 0,
176 "VALIDATION ERROR");
177 }
178
179 // The application should always return VK_FALSE.
180 return VK_FALSE;
181 }
182
DebugReportCallback(VkDebugReportFlagsEXT flags,VkDebugReportObjectTypeEXT,uint64_t,size_t,int32_t,const char *,const char * pMessage,void *)183 VkBool32 VKAPI_PTR DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT, uint64_t, size_t,
184 int32_t, const char*, const char* pMessage, void*)
185 {
186 if (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) {
187 PLUGIN_LOG_E("%s", pMessage);
188 } else if (flags & (VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT)) {
189 PLUGIN_LOG_W("%s", pMessage);
190 } else if (flags & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) {
191 PLUGIN_LOG_I("%s", pMessage);
192 } else if (flags & VK_DEBUG_REPORT_DEBUG_BIT_EXT) {
193 PLUGIN_LOG_D("%s", pMessage);
194 }
195 PLUGIN_ASSERT_MSG((flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) == 0, "VALIDATION ERROR");
196 return VK_TRUE;
197 }
198
EmplaceDeviceQueue(const VkDevice device,const LowLevelQueueInfo & aQueueInfo,vector<LowLevelGpuQueueVk> & aLowLevelQueues)199 void EmplaceDeviceQueue(
200 const VkDevice device, const LowLevelQueueInfo& aQueueInfo, vector<LowLevelGpuQueueVk>& aLowLevelQueues)
201 {
202 for (uint32_t idx = 0; idx < aQueueInfo.queueCount; ++idx) {
203 VkQueue queue = VK_NULL_HANDLE;
204 vkGetDeviceQueue(device, // device
205 aQueueInfo.queueFamilyIndex, // queueFamilyIndex
206 idx, // queueIndex
207 &queue); // pQueue
208 aLowLevelQueues.push_back(LowLevelGpuQueueVk { queue, aQueueInfo });
209 }
210 }
211
CheckValidDepthFormats(const DevicePlatformDataVk & devicePlat,DevicePlatformInternalDataVk & dataInternal)212 void CheckValidDepthFormats(const DevicePlatformDataVk& devicePlat, DevicePlatformInternalDataVk& dataInternal)
213 {
214 constexpr uint32_t DEPTH_FORMAT_COUNT { 4 };
215 constexpr Format DEPTH_FORMATS[DEPTH_FORMAT_COUNT] = { BASE_FORMAT_D24_UNORM_S8_UINT, BASE_FORMAT_D32_SFLOAT,
216 BASE_FORMAT_D16_UNORM, BASE_FORMAT_X8_D24_UNORM_PACK32 };
217 for (uint32_t idx = 0; idx < DEPTH_FORMAT_COUNT; ++idx) {
218 VkFormatProperties formatProperties;
219 Format format = DEPTH_FORMATS[idx];
220 vkGetPhysicalDeviceFormatProperties(devicePlat.physicalDevice, // physicalDevice
221 (VkFormat)format, // format
222 &formatProperties); // pFormatProperties
223 const VkFormatFeatureFlags optimalTilingFeatureFlags = formatProperties.optimalTilingFeatures;
224 if (optimalTilingFeatureFlags & VkFormatFeatureFlagBits::VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
225 dataInternal.supportedDepthFormats.push_back(format);
226 }
227 }
228 }
229
GetPreferredDeviceExtensions(const BackendExtraVk * backendExtra)230 vector<string_view> GetPreferredDeviceExtensions(const BackendExtraVk* backendExtra)
231 {
232 vector<string_view> extensions { DEVICE_EXTENSION_SWAPCHAIN };
233 extensions.push_back(DEVICE_EXTENSION_CREATE_RENDERPASS2);
234 extensions.push_back(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE);
235 GetPlatformDeviceExtensions(extensions);
236 #if (RENDER_VULKAN_RT_ENABLED == 1)
237 extensions.push_back(DEVICE_EXTENSION_ACCELERATION_STRUCTURE);
238 extensions.push_back(DEVICE_EXTENSION_RAY_TRACING_PIPELINE);
239 extensions.push_back(DEVICE_EXTENSION_RAY_QUERY);
240 extensions.push_back(DEVICE_EXTENSION_PIPELINE_LIBRARY);
241 extensions.push_back(DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS);
242 #endif
243 if (backendExtra) {
244 for (const auto str : backendExtra->extensions.extensionNames) {
245 extensions.push_back(str);
246 }
247 }
248 return extensions;
249 }
250
GetEnabledCommonDeviceExtensions(const unordered_map<string,uint32_t> & enabledDeviceExtensions)251 DeviceVk::CommonDeviceExtensions GetEnabledCommonDeviceExtensions(
252 const unordered_map<string, uint32_t>& enabledDeviceExtensions)
253 {
254 DeviceVk::CommonDeviceExtensions extensions;
255 extensions.swapchain = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SWAPCHAIN);
256 // renderpass2 required on 1.2, we only use renderpass 2 when we need depth stencil resolve
257 extensions.renderPass2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE) &&
258 enabledDeviceExtensions.contains(DEVICE_EXTENSION_CREATE_RENDERPASS2);
259 extensions.externalMemory = enabledDeviceExtensions.contains(DEVICE_EXTENSION_EXTERNAL_MEMORY);
260 extensions.getMemoryRequirements2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2);
261 extensions.queueFamilyForeign = enabledDeviceExtensions.contains(DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN);
262 extensions.samplerYcbcrConversion = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION);
263
264 return extensions;
265 }
266
PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk * backendExtra,DevicePlatformDataVk & plat)267 void PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)
268 {
269 // enable all by default and then disable few
270 plat.enabledPhysicalDeviceFeatures = plat.physicalDeviceProperties.physicalDeviceFeatures;
271 // prepare feature disable for core engine
272 plat.enabledPhysicalDeviceFeatures.geometryShader = VK_FALSE;
273 plat.enabledPhysicalDeviceFeatures.tessellationShader = VK_FALSE;
274 plat.enabledPhysicalDeviceFeatures.sampleRateShading = VK_FALSE;
275 plat.enabledPhysicalDeviceFeatures.occlusionQueryPrecise = VK_FALSE;
276 plat.enabledPhysicalDeviceFeatures.pipelineStatisticsQuery = VK_FALSE;
277 plat.enabledPhysicalDeviceFeatures.shaderTessellationAndGeometryPointSize = VK_FALSE;
278 plat.enabledPhysicalDeviceFeatures.inheritedQueries = VK_FALSE;
279 if (backendExtra) {
280 // check for support and prepare enabling
281 if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
282 const size_t valueCount = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
283 const array_view<const VkBool32> supported(
284 reinterpret_cast<VkBool32*>(&plat.physicalDeviceProperties.physicalDeviceFeatures), valueCount);
285 VkPhysicalDeviceFeatures* wantedFeatures =
286 (&backendExtra->extensions.physicalDeviceFeaturesToEnable->features);
287 const array_view<const VkBool32> wanted(reinterpret_cast<VkBool32*>(wantedFeatures), valueCount);
288
289 array_view<VkBool32> enabledPhysicalDeviceFeatures(
290 reinterpret_cast<VkBool32*>(&plat.enabledPhysicalDeviceFeatures), valueCount);
291 for (size_t idx = 0; idx < valueCount; ++idx) {
292 if (supported[idx] && wanted[idx]) {
293 enabledPhysicalDeviceFeatures[idx] = VK_TRUE;
294 } else if (wanted[idx]) {
295 PLUGIN_LOG_W(
296 "physical device feature not supported/enabled from idx: %u", static_cast<uint32_t>(idx));
297 }
298 }
299 }
300 }
301 }
302
FillDeviceFormatSupport(VkPhysicalDevice physicalDevice,const Format format)303 FormatProperties FillDeviceFormatSupport(VkPhysicalDevice physicalDevice, const Format format)
304 {
305 VkFormatProperties formatProperties;
306 vkGetPhysicalDeviceFormatProperties(physicalDevice, // physicalDevice
307 (VkFormat)format, // format
308 &formatProperties); // pFormatProperties
309 return FormatProperties {
310 (FormatFeatureFlags)formatProperties.linearTilingFeatures,
311 (FormatFeatureFlags)formatProperties.optimalTilingFeatures,
312 (FormatFeatureFlags)formatProperties.bufferFeatures,
313 GpuProgramUtil::FormatByteSize(format),
314 };
315 }
316
FillFormatSupport(VkPhysicalDevice physicalDevice,vector<FormatProperties> & formats)317 void FillFormatSupport(VkPhysicalDevice physicalDevice, vector<FormatProperties>& formats)
318 {
319 const uint32_t fullSize = DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT +
320 DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT;
321 formats.resize(fullSize);
322 for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT; ++idx) {
323 formats[idx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(idx));
324 }
325 // pre-build additional formats
326 for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT; ++idx) {
327 const uint32_t currIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_BASE_IDX;
328 PLUGIN_ASSERT(currIdx < static_cast<uint32_t>(formats.size()));
329 const uint32_t formatIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
330 formats[currIdx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(formatIdx));
331 }
332 }
333 } // namespace
334
DeviceVk(RenderContext & renderContext,DeviceCreateInfo const & createInfo)335 DeviceVk::DeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo) : Device(renderContext, createInfo)
336 {
337 // assume instance and device will be created internally
338 ownInstanceAndDevice_ = true;
339
340 const BackendExtraVk* backendExtra = static_cast<const BackendExtraVk*>(createInfo.backendConfiguration);
341 // update internal state based the optional backend configuration given by the client. the size of queuProperties
342 // will depend on the enableMultiQueue setting.
343 const auto queueProperties = CheckExternalConfig(backendExtra);
344
345 // client didn't give the vulkan intance so create own
346 if (ownInstanceAndDevice_) {
347 CreateInstanceAndPhysicalDevice();
348 }
349 const auto availableQueues = CreateFunctionsVk::GetAvailableQueues(plat_.physicalDevice, queueProperties);
350 if (ownInstanceAndDevice_) {
351 CreateDevice(backendExtra, availableQueues);
352 CreateDebugFunctions();
353 }
354 CreateExtFunctions();
355 CreatePlatformExtFunctions();
356 SortAvailableQueues(availableQueues);
357
358 CheckValidDepthFormats(plat_, platInternal_);
359 FillFormatSupport(plat_.physicalDevice, formatProperties_);
360
361 PLUGIN_ASSERT_MSG(!lowLevelGpuQueues_.graphicsQueues.empty(), "default queue not initialized");
362 if (!lowLevelGpuQueues_.graphicsQueues.empty()) {
363 lowLevelGpuQueues_.defaultQueue = lowLevelGpuQueues_.graphicsQueues[0];
364 } else {
365 PLUGIN_LOG_E("default vulkan queue not initialized");
366 }
367
368 gpuQueueCount_ =
369 static_cast<uint32_t>(lowLevelGpuQueues_.computeQueues.size() + lowLevelGpuQueues_.graphicsQueues.size() +
370 lowLevelGpuQueues_.transferQueues.size());
371
372 const PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo allocatorCreateInfo =
373 GetAllocatorCreateInfo(backendExtra);
374 platformGpuMemoryAllocator_ = make_unique<PlatformGpuMemoryAllocator>(
375 plat_.instance, plat_.physicalDevice, plat_.device, allocatorCreateInfo);
376
377 if (queueProperties.size() > 1) {
378 PLUGIN_LOG_I("gpu queue count: %u", gpuQueueCount_);
379 }
380
381 SetDeviceStatus(true);
382
383 const GpuResourceManager::CreateInfo grmCreateInfo {
384 GpuResourceManager::GPU_RESOURCE_MANAGER_OPTIMIZE_STAGING_MEMORY,
385 };
386 gpuResourceMgr_ = make_unique<GpuResourceManager>(*this, grmCreateInfo);
387 shaderMgr_ = make_unique<ShaderManager>(*this);
388
389 lowLevelDevice_ = make_unique<LowLevelDeviceVk>(*this);
390 }
391
~DeviceVk()392 DeviceVk::~DeviceVk()
393 {
394 WaitForIdle();
395
396 gpuResourceMgr_.reset();
397 shaderMgr_.reset();
398
399 platformGpuMemoryAllocator_.reset();
400 swapchain_.reset();
401
402 if (plat_.pipelineCache) {
403 CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
404 }
405
406 if (ownInstanceAndDevice_) {
407 CreateFunctionsVk::DestroyDevice(plat_.device);
408 CreateFunctionsVk::DestroyDebugMessenger(plat_.instance, debugFunctionUtilities_.debugMessenger);
409 CreateFunctionsVk::DestroyDebugCallback(plat_.instance, debugFunctionUtilities_.debugCallback);
410 CreateFunctionsVk::DestroyInstance(plat_.instance);
411 }
412 }
413
CreateInstanceAndPhysicalDevice()414 void DeviceVk::CreateInstanceAndPhysicalDevice()
415 {
416 const VersionInfo engineInfo { "core_prototype", 0, 1, 0 };
417 const VersionInfo appInfo { "core_prototype_app", 0, 1, 0 };
418
419 const auto instanceWrapper = CreateFunctionsVk::CreateInstance(engineInfo, appInfo);
420 plat_.instance = instanceWrapper.instance;
421 if (instanceWrapper.debugUtilsSupported) {
422 debugFunctionUtilities_.debugMessenger =
423 CreateFunctionsVk::CreateDebugMessenger(plat_.instance, DebugMessengerCallback);
424 }
425 if (!debugFunctionUtilities_.debugMessenger && instanceWrapper.debugReportSupported) {
426 debugFunctionUtilities_.debugCallback =
427 CreateFunctionsVk::CreateDebugCallback(plat_.instance, DebugReportCallback);
428 }
429 auto physicalDeviceWrapper = CreateFunctionsVk::CreatePhysicalDevice(plat_.instance, DEFAULT_QUEUE);
430 const uint32_t physicalDeviceApiMajor =
431 VK_VERSION_MAJOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
432 const uint32_t physicalDeviceApiMinor =
433 VK_VERSION_MINOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
434 plat_.deviceApiMajor = std::min(instanceWrapper.apiMajor, physicalDeviceApiMajor);
435 plat_.deviceApiMinor = std::min(instanceWrapper.apiMinor, physicalDeviceApiMinor);
436 PLUGIN_LOG_D("device api version %u.%u", plat_.deviceApiMajor, plat_.deviceApiMinor);
437
438 plat_.physicalDevice = physicalDeviceWrapper.physicalDevice;
439 plat_.physicalDeviceProperties = move(physicalDeviceWrapper.physicalDeviceProperties);
440 plat_.physicalDeviceExtensions = move(physicalDeviceWrapper.physicalDeviceExtensions);
441 const auto& memoryProperties = plat_.physicalDeviceProperties.physicalDeviceMemoryProperties;
442 deviceSharedMemoryPropertyFlags_ =
443 (memoryProperties.memoryTypeCount > 0) ? (MemoryPropertyFlags)memoryProperties.memoryTypes[0].propertyFlags : 0;
444 for (uint32_t idx = 1; idx < memoryProperties.memoryTypeCount; ++idx) {
445 const MemoryPropertyFlags memoryPropertyFlags =
446 (MemoryPropertyFlags)memoryProperties.memoryTypes[idx].propertyFlags;
447 // do not compare lazily allocated or protected memory blocks
448 if ((memoryPropertyFlags & (CORE_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | CORE_MEMORY_PROPERTY_PROTECTED_BIT)) ==
449 0) {
450 deviceSharedMemoryPropertyFlags_ &= memoryPropertyFlags;
451 }
452 }
453 }
454
CreateDevice(const BackendExtraVk * backendExtra,const vector<LowLevelQueueInfo> & availableQueues)455 void DeviceVk::CreateDevice(const BackendExtraVk* backendExtra, const vector<LowLevelQueueInfo>& availableQueues)
456 {
457 vector<string_view> preferredExtensions = GetPreferredDeviceExtensions(backendExtra);
458 PreparePhysicalDeviceFeaturesForEnabling(backendExtra, plat_);
459
460 VkPhysicalDeviceFeatures2* physicalDeviceFeatures2Ptr = nullptr;
461 VkPhysicalDeviceSamplerYcbcrConversionFeatures ycbcrConversionFeatures {
462 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES, // sType
463 nullptr, // pNext
464 true, // samplerYcbcrConversion
465 };
466 VkPhysicalDeviceFeatures2 physicalDeviceFeatures2 {
467 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, // sType
468 &ycbcrConversionFeatures, // pNext
469 {}, // features
470 };
471 void* pNextForBackendExtra = ycbcrConversionFeatures.pNext;
472 #if (RENDER_VULKAN_RT_ENABLED == 1)
473 VkPhysicalDeviceBufferDeviceAddressFeatures pdBufferDeviceAddressFeatures {
474 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, // sType
475 nullptr, // pNext
476 true, // bufferDeviceAddress;
477 false, // bufferDeviceAddressCaptureReplay
478 false, // bufferDeviceAddressMultiDevice
479 };
480 VkPhysicalDeviceRayTracingPipelineFeaturesKHR pdRayTracingPipelineFeatures {
481 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR, // sType
482 &pdBufferDeviceAddressFeatures, // pNext
483 true, // rayTracingPipeline;
484 false, // rayTracingPipelineShaderGroupHandleCaptureReplay;
485 false, // rayTracingPipelineShaderGroupHandleCaptureReplayMixed;
486 false, // rayTracingPipelineTraceRaysIndirect;
487 false, // rayTraversalPrimitiveCulling;
488 };
489 VkPhysicalDeviceAccelerationStructureFeaturesKHR pdAccelerationStructureFeatures {
490 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, // sType
491 &pdRayTracingPipelineFeatures, // pNext
492 true, // accelerationStructure;
493 false, // accelerationStructureCaptureReplay
494 false, // accelerationStructureIndirectBuild
495 false, // accelerationStructureHostCommands
496 false, // descriptorBindingAccelerationStructureUpdateAfterBind
497 };
498 VkPhysicalDeviceRayQueryFeaturesKHR pdRayQueryFeatures {
499 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, // sType
500 &pdAccelerationStructureFeatures, // pNext
501 true, // rayQuery
502 };
503
504 // ray tracing to pNext first
505 ycbcrConversionFeatures.pNext = &pdRayQueryFeatures;
506 // backend extra will be put to pNext of ray tracing extensions
507 pNextForBackendExtra = pdBufferDeviceAddressFeatures.pNext;
508 #endif
509 if ((plat_.deviceApiMajor >= 1) && (plat_.deviceApiMinor >= 1)) {
510 // pipe user extension physical device features
511 if (backendExtra) {
512 if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
513 pNextForBackendExtra = backendExtra->extensions.physicalDeviceFeaturesToEnable->pNext;
514 }
515 }
516 // NOTE: on some platforms Vulkan library has only the entrypoints for 1.0. To avoid variation just fetch the
517 // function always.
518 PFN_vkGetPhysicalDeviceFeatures2 vkGetPhysicalDeviceFeatures2 =
519 (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(plat_.instance, "vkGetPhysicalDeviceFeatures2");
520 vkGetPhysicalDeviceFeatures2(plat_.physicalDevice, &physicalDeviceFeatures2);
521
522 // vkGetPhysicalDeviceFeatures has already filled this and PreparePhysicalDeviceFeaturesForEnabling
523 // disabled/ enabled some features.
524 physicalDeviceFeatures2.features = plat_.enabledPhysicalDeviceFeatures;
525 physicalDeviceFeatures2Ptr = &physicalDeviceFeatures2;
526 }
527 const DeviceWrapper deviceWrapper =
528 CreateFunctionsVk::CreateDevice(plat_.instance, plat_.physicalDevice, plat_.physicalDeviceExtensions,
529 plat_.enabledPhysicalDeviceFeatures, physicalDeviceFeatures2Ptr, availableQueues, preferredExtensions);
530 plat_.device = deviceWrapper.device;
531 for (const auto& ref : deviceWrapper.extensions) {
532 extensions_[ref] = 1u;
533 }
534 commonDeviceExtensions_ = GetEnabledCommonDeviceExtensions(extensions_);
535 platformDeviceExtensions_ = GetEnabledPlatformDeviceExtensions(extensions_);
536 }
537
CheckExternalConfig(const BackendExtraVk * backendConfiguration)538 vector<QueueProperties> DeviceVk::CheckExternalConfig(const BackendExtraVk* backendConfiguration)
539 {
540 vector<QueueProperties> queueProperties;
541 queueProperties.push_back(DEFAULT_QUEUE);
542
543 if (!backendConfiguration) {
544 return queueProperties;
545 }
546
547 const auto& extra = *backendConfiguration;
548 if (extra.enableMultiQueue) {
549 queueProperties.push_back(QueueProperties {
550 VK_QUEUE_COMPUTE_BIT, // requiredFlags
551 1, // count
552 1.0f, // priority
553 true, // explicitFlags
554 false, // canPresent
555 });
556 PLUGIN_LOG_I("trying to enable gpu multi-queue, with queue count: %u", (uint32_t)queueProperties.size());
557 }
558
559 if (extra.instance != VK_NULL_HANDLE) {
560 PLUGIN_LOG_D("trying to use application given vulkan instance, device, and physical device");
561 PLUGIN_ASSERT((extra.instance && extra.physicalDevice && extra.device));
562 plat_.instance = extra.instance;
563 plat_.physicalDevice = extra.physicalDevice;
564 plat_.device = extra.device;
565 ownInstanceAndDevice_ = false; // everything given from the application
566
567 const auto myDevice = plat_.physicalDevice;
568 auto& myProperties = plat_.physicalDeviceProperties;
569 vkGetPhysicalDeviceProperties(myDevice, &myProperties.physicalDeviceProperties);
570 vkGetPhysicalDeviceFeatures(myDevice, &myProperties.physicalDeviceFeatures);
571 vkGetPhysicalDeviceMemoryProperties(myDevice, &myProperties.physicalDeviceMemoryProperties);
572 }
573 return queueProperties;
574 }
575
SortAvailableQueues(const vector<LowLevelQueueInfo> & availableQueues)576 void DeviceVk::SortAvailableQueues(const vector<LowLevelQueueInfo>& availableQueues)
577 {
578 for (const auto& ref : availableQueues) {
579 if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_COMPUTE_BIT) {
580 EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.computeQueues);
581 } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_GRAPHICS_BIT) {
582 EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.graphicsQueues);
583 } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_TRANSFER_BIT) {
584 EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.transferQueues);
585 }
586 }
587 }
588
GetBackendType() const589 DeviceBackendType DeviceVk::GetBackendType() const
590 {
591 return DeviceBackendType::VULKAN;
592 }
593
GetPlatformData() const594 const DevicePlatformData& DeviceVk::GetPlatformData() const
595 {
596 return plat_;
597 }
598
GetPlatformDataVk() const599 const DevicePlatformDataVk& DeviceVk::GetPlatformDataVk() const
600 {
601 return plat_;
602 }
603
GetPlatformInternalDataVk() const604 const DevicePlatformInternalDataVk& DeviceVk::GetPlatformInternalDataVk() const
605 {
606 return platInternal_;
607 }
608
GetLowLevelDevice() const609 ILowLevelDevice& DeviceVk::GetLowLevelDevice() const
610 {
611 return *lowLevelDevice_;
612 }
613
GetFormatProperties(const Format format) const614 FormatProperties DeviceVk::GetFormatProperties(const Format format) const
615 {
616 const uint32_t formatSupportSize = static_cast<uint32_t>(formatProperties_.size());
617 const uint32_t formatIdx = static_cast<uint32_t>(format);
618 if (formatIdx < formatSupportSize) {
619 return formatProperties_[formatIdx];
620 } else if ((formatIdx >= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER) &&
621 (formatIdx <= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_END_NUMBER)) {
622 const uint32_t currIdx = formatIdx - DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
623 PLUGIN_UNUSED(currIdx);
624 PLUGIN_ASSERT(currIdx < formatSupportSize);
625 return formatProperties_[formatIdx];
626 }
627 return {};
628 }
629
GetAccelerationStructureBuildSizes(const AccelerationStructureBuildGeometryInfo & geometry,BASE_NS::array_view<const AccelerationStructureGeometryTrianglesInfo> triangles,BASE_NS::array_view<const AccelerationStructureGeometryAabbsInfo> aabbs,BASE_NS::array_view<const AccelerationStructureGeometryInstancesInfo> instances) const630 AccelerationStructureBuildSizes DeviceVk::GetAccelerationStructureBuildSizes(
631 const AccelerationStructureBuildGeometryInfo& geometry,
632 BASE_NS::array_view<const AccelerationStructureGeometryTrianglesInfo> triangles,
633 BASE_NS::array_view<const AccelerationStructureGeometryAabbsInfo> aabbs,
634 BASE_NS::array_view<const AccelerationStructureGeometryInstancesInfo> instances) const
635 {
636 #if (RENDER_VULKAN_RT_ENABLED == 1)
637 const VkDevice device = plat_.device;
638
639 const size_t arraySize = triangles.size() + aabbs.size() + instances.size();
640 vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
641 vector<uint32_t> maxPrimitiveCounts(arraySize);
642 uint32_t arrayIndex = 0;
643 for (const auto& trianglesRef : triangles) {
644 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
645 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
646 nullptr, // pNext
647 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR, // geometryType
648 {}, // geometry;
649 VkGeometryFlagsKHR(trianglesRef.geometryFlags), // flags
650 };
651 geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
652 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
653 nullptr, // pNext
654 VkFormat(trianglesRef.vertexFormat), // vertexFormat
655 {}, // vertexData
656 VkDeviceSize(trianglesRef.vertexStride), // vertexStride
657 trianglesRef.maxVertex, // maxVertex
658 VkIndexType(trianglesRef.indexType), // indexType
659 {}, // indexData
660 {}, // transformData
661 };
662 maxPrimitiveCounts[arrayIndex] = trianglesRef.indexCount / 3u; // triangles;
663 arrayIndex++;
664 }
665 for (const auto& aabbsRef : aabbs) {
666 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
667 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
668 nullptr, // pNext
669 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR, // geometryType
670 {}, // geometry;
671 0, // flags
672 };
673 geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
674 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
675 nullptr, // pNext
676 {}, // data
677 aabbsRef.stride, // stride
678 };
679 maxPrimitiveCounts[arrayIndex] = 1u;
680 arrayIndex++;
681 }
682 for (const auto& instancesRef : instances) {
683 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
684 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
685 nullptr, // pNext
686 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR, // geometryType
687 {}, // geometry;
688 0, // flags
689 };
690 geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
691 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
692 nullptr, // pNext
693 instancesRef.arrayOfPointers, // arrayOfPointers
694 {}, // data
695 };
696 maxPrimitiveCounts[arrayIndex] = 1u;
697 arrayIndex++;
698 }
699
700 const VkAccelerationStructureBuildGeometryInfoKHR geometryInfoVk {
701 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
702 nullptr, // pNext
703 VkAccelerationStructureTypeKHR(geometry.type), // type
704 VkBuildAccelerationStructureFlagsKHR(geometry.flags), // flags
705 VkBuildAccelerationStructureModeKHR(geometry.mode), // mode
706 VK_NULL_HANDLE, // srcAccelerationStructure
707 VK_NULL_HANDLE, // dstAccelerationStructure
708 arrayIndex, // geometryCount
709 geometryData.data(), // pGeometries
710 nullptr, // ppGeometries
711 {}, // scratchData
712 };
713
714 VkAccelerationStructureBuildSizesInfoKHR buildSizesInfo {
715 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // sType
716 nullptr, // pNext
717 0, // accelerationStructureSize
718 0, // updateScratchSize
719 0, // buildScratchSize
720 };
721 if ((arrayIndex > 0) && extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
722 extFunctions_.vkGetAccelerationStructureBuildSizesKHR(device, // device
723 VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, // buildType,
724 &geometryInfoVk, // pBuildInfo
725 maxPrimitiveCounts.data(), // pMaxPrimitiveCounts
726 &buildSizesInfo); // pSizeInfo
727 }
728
729 return AccelerationStructureBuildSizes {
730 static_cast<uint32_t>(buildSizesInfo.accelerationStructureSize),
731 static_cast<uint32_t>(buildSizesInfo.updateScratchSize),
732 static_cast<uint32_t>(buildSizesInfo.buildScratchSize),
733 };
734 #else
735 return AccelerationStructureBuildSizes { 0, 0, 0 };
736 #endif
737 }
738
CreateDeviceSwapchain(const SwapchainCreateInfo & swapchainCreateInfo)739 void DeviceVk::CreateDeviceSwapchain(const SwapchainCreateInfo& swapchainCreateInfo)
740 {
741 WaitForIdle();
742 swapchain_.reset();
743 swapchain_ = make_unique<SwapchainVk>(*this, swapchainCreateInfo);
744 }
745
DestroyDeviceSwapchain()746 void DeviceVk::DestroyDeviceSwapchain()
747 {
748 WaitForIdle();
749 swapchain_.reset();
750 }
751
GetPlatformGpuMemoryAllocator()752 PlatformGpuMemoryAllocator* DeviceVk::GetPlatformGpuMemoryAllocator()
753 {
754 return platformGpuMemoryAllocator_.get();
755 }
756
GetValidGpuQueue(const GpuQueue & gpuQueue) const757 GpuQueue DeviceVk::GetValidGpuQueue(const GpuQueue& gpuQueue) const
758 {
759 const auto getSpecificQueue = [](const uint32_t queueIndex, const GpuQueue::QueueType queueType,
760 const vector<LowLevelGpuQueueVk>& specificQueues, const GpuQueue& defaultQueue) {
761 const uint32_t queueCount = (uint32_t)specificQueues.size();
762 if (queueIndex < queueCount) {
763 return GpuQueue { queueType, queueIndex };
764 } else if (queueCount > 0) {
765 return GpuQueue { queueType, 0 };
766 }
767 return defaultQueue;
768 };
769
770 GpuQueue defaultQueue { GpuQueue::QueueType::GRAPHICS, 0 };
771 if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
772 return getSpecificQueue(
773 gpuQueue.index, GpuQueue::QueueType::COMPUTE, lowLevelGpuQueues_.computeQueues, defaultQueue);
774 } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
775 return getSpecificQueue(
776 gpuQueue.index, GpuQueue::QueueType::GRAPHICS, lowLevelGpuQueues_.graphicsQueues, defaultQueue);
777 } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
778 return getSpecificQueue(
779 gpuQueue.index, GpuQueue::QueueType::TRANSFER, lowLevelGpuQueues_.transferQueues, defaultQueue);
780 } else {
781 return defaultQueue;
782 }
783 }
784
GetGpuQueueCount() const785 uint32_t DeviceVk::GetGpuQueueCount() const
786 {
787 return gpuQueueCount_;
788 }
789
InitializePipelineCache(array_view<const uint8_t> initialData)790 void DeviceVk::InitializePipelineCache(array_view<const uint8_t> initialData)
791 {
792 if (plat_.pipelineCache) {
793 CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
794 }
795 struct CacheHeader {
796 uint32_t bytes;
797 uint32_t version;
798 uint32_t vendorId;
799 uint32_t deviceId;
800 uint8_t pipelineCacheUUID[VK_UUID_SIZE];
801 };
802 if (initialData.size() > sizeof(CacheHeader)) {
803 CacheHeader header;
804 CloneData(&header, sizeof(header), initialData.data(), sizeof(header));
805 const auto& props = plat_.physicalDeviceProperties.physicalDeviceProperties;
806 if (header.version != VkPipelineCacheHeaderVersion::VK_PIPELINE_CACHE_HEADER_VERSION_ONE ||
807 header.vendorId != props.vendorID || header.deviceId != props.deviceID ||
808 memcmp(header.pipelineCacheUUID, props.pipelineCacheUUID, VK_UUID_SIZE)) {
809 initialData = {};
810 }
811 }
812
813 plat_.pipelineCache = CreateFunctionsVk::CreatePipelineCache(plat_.device, initialData);
814 }
815
GetPipelineCache() const816 vector<uint8_t> DeviceVk::GetPipelineCache() const
817 {
818 vector<uint8_t> deviceData;
819 if (plat_.pipelineCache) {
820 size_t dataSize = 0u;
821 if (auto result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, nullptr);
822 result == VK_SUCCESS && dataSize) {
823 deviceData.resize(dataSize);
824 dataSize = deviceData.size();
825 result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, deviceData.data());
826 if (result == VK_SUCCESS) {
827 deviceData.resize(dataSize);
828 } else {
829 deviceData.clear();
830 }
831 }
832 }
833 return deviceData;
834 }
835
GetGpuQueue(const GpuQueue & gpuQueue) const836 LowLevelGpuQueueVk DeviceVk::GetGpuQueue(const GpuQueue& gpuQueue) const
837 {
838 // 1. tries to return the typed queue with given index
839 // 2. tries to return the typed queue with an index 0
840 // 3. returns the default queue
841 const auto getSpecificQueue = [](const uint32_t queueIndex, const vector<LowLevelGpuQueueVk>& specificQueues,
842 const LowLevelGpuQueueVk& defaultQueue) {
843 const uint32_t queueCount = (uint32_t)specificQueues.size();
844 if (queueIndex < queueCount) {
845 return specificQueues[queueIndex];
846 } else if (queueCount > 0) {
847 return specificQueues[0];
848 }
849 return defaultQueue;
850 };
851
852 if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
853 return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.computeQueues, lowLevelGpuQueues_.defaultQueue);
854 } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
855 return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.graphicsQueues, lowLevelGpuQueues_.defaultQueue);
856 } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
857 return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.transferQueues, lowLevelGpuQueues_.defaultQueue);
858 } else {
859 return lowLevelGpuQueues_.defaultQueue;
860 }
861 }
862
GetPresentationGpuQueue() const863 LowLevelGpuQueueVk DeviceVk::GetPresentationGpuQueue() const
864 {
865 // NOTE: expected presentation
866 return GetGpuQueue(GpuQueue { GpuQueue::QueueType::GRAPHICS, 0 });
867 }
868
GetLowLevelGpuQueues() const869 vector<LowLevelGpuQueueVk> DeviceVk::GetLowLevelGpuQueues() const
870 {
871 vector<LowLevelGpuQueueVk> gpuQueues;
872 gpuQueues.reserve(gpuQueueCount_);
873 gpuQueues.insert(gpuQueues.end(), lowLevelGpuQueues_.computeQueues.begin(), lowLevelGpuQueues_.computeQueues.end());
874 gpuQueues.insert(
875 gpuQueues.end(), lowLevelGpuQueues_.graphicsQueues.begin(), lowLevelGpuQueues_.graphicsQueues.end());
876 gpuQueues.insert(
877 gpuQueues.end(), lowLevelGpuQueues_.transferQueues.begin(), lowLevelGpuQueues_.transferQueues.end());
878 return gpuQueues;
879 }
880
WaitForIdle()881 void DeviceVk::WaitForIdle()
882 {
883 if (plat_.device) {
884 if (!isRenderbackendRunning_) {
885 PLUGIN_LOG_D("Device - WaitForIdle");
886 vkDeviceWaitIdle(plat_.device); // device
887 } else {
888 PLUGIN_LOG_E("Device WaitForIdle can only called when render backend is not running");
889 }
890 }
891 }
892
Activate()893 void DeviceVk::Activate() {}
894
Deactivate()895 void DeviceVk::Deactivate() {}
896
AllowThreadedProcessing() const897 bool DeviceVk::AllowThreadedProcessing() const
898 {
899 return true;
900 }
901
GetFeatureConfigurations() const902 const DeviceVk::FeatureConfigurations& DeviceVk::GetFeatureConfigurations() const
903 {
904 return featureConfigurations_;
905 }
906
GetCommonDeviceExtensions() const907 const DeviceVk::CommonDeviceExtensions& DeviceVk::GetCommonDeviceExtensions() const
908 {
909 return commonDeviceExtensions_;
910 }
911
GetPlatformDeviceExtensions() const912 const PlatformDeviceExtensions& DeviceVk::GetPlatformDeviceExtensions() const
913 {
914 return platformDeviceExtensions_;
915 }
916
HasDeviceExtension(const string_view extensionName) const917 bool DeviceVk::HasDeviceExtension(const string_view extensionName) const
918 {
919 return extensions_.contains(extensionName);
920 }
921
CreateDeviceVk(RenderContext & renderContext,DeviceCreateInfo const & createInfo)922 unique_ptr<Device> CreateDeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo)
923 {
924 return make_unique<DeviceVk>(renderContext, createInfo);
925 }
926
CreateGpuBuffer(const GpuBufferDesc & desc)927 unique_ptr<GpuBuffer> DeviceVk::CreateGpuBuffer(const GpuBufferDesc& desc)
928 {
929 return make_unique<GpuBufferVk>(*this, desc);
930 }
931
CreateGpuImage(const GpuImageDesc & desc)932 unique_ptr<GpuImage> DeviceVk::CreateGpuImage(const GpuImageDesc& desc)
933 {
934 return make_unique<GpuImageVk>(*this, desc);
935 }
936
CreateGpuImageView(const GpuImageDesc & desc,const GpuImagePlatformData & platformData,const uintptr_t hwBuffer)937 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
938 const GpuImageDesc& desc, const GpuImagePlatformData& platformData, const uintptr_t hwBuffer)
939 {
940 return make_unique<GpuImageVk>(*this, desc, platformData, hwBuffer);
941 }
942
CreateGpuImageView(const GpuImageDesc & desc,const GpuImagePlatformData & platformData)943 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(const GpuImageDesc& desc, const GpuImagePlatformData& platformData)
944 {
945 return CreateGpuImageView(desc, platformData, 0);
946 }
947
CreateGpuImageViews(const Swapchain & swapchain)948 vector<unique_ptr<GpuImage>> DeviceVk::CreateGpuImageViews(const Swapchain& swapchain)
949 {
950 const GpuImageDesc& desc = swapchain.GetDesc();
951 const auto& swapchainPlat = static_cast<const SwapchainVk&>(swapchain).GetPlatformData();
952
953 vector<unique_ptr<GpuImage>> gpuImages(swapchainPlat.swapchainImages.images.size());
954 for (size_t idx = 0; idx < gpuImages.size(); ++idx) {
955 GpuImagePlatformDataVk gpuImagePlat;
956 gpuImagePlat.image = swapchainPlat.swapchainImages.images[idx];
957 gpuImagePlat.imageView = swapchainPlat.swapchainImages.imageViews[idx];
958 gpuImages[idx] = this->CreateGpuImageView(desc, gpuImagePlat);
959 }
960 return gpuImages;
961 }
962
CreateGpuImageView(const GpuImageDesc & desc,const BackendSpecificImageDesc & platformData)963 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
964 const GpuImageDesc& desc, const BackendSpecificImageDesc& platformData)
965 {
966 const ImageDescVk& imageDesc = (const ImageDescVk&)platformData;
967 GpuImagePlatformDataVk platData;
968 platData.image = imageDesc.image;
969 platData.imageView = imageDesc.imageView;
970 return CreateGpuImageView(desc, platData, imageDesc.platformHwBuffer);
971 }
972
CreateGpuSampler(const GpuSamplerDesc & desc)973 unique_ptr<GpuSampler> DeviceVk::CreateGpuSampler(const GpuSamplerDesc& desc)
974 {
975 return make_unique<GpuSamplerVk>(*this, desc);
976 }
977
CreateGpuAccelerationStructure(const GpuAccelerationStructureDesc & desc)978 unique_ptr<GpuAccelerationStructure> DeviceVk::CreateGpuAccelerationStructure(const GpuAccelerationStructureDesc& desc)
979 {
980 return make_unique<GpuAccelerationStructureVk>(*this, desc);
981 }
982
CreateRenderFrameSync()983 unique_ptr<RenderFrameSync> DeviceVk::CreateRenderFrameSync()
984 {
985 return make_unique<RenderFrameSyncVk>(*this);
986 }
987
CreateRenderBackend(GpuResourceManager & gpuResourceMgr,const CORE_NS::IParallelTaskQueue::Ptr & queue)988 unique_ptr<RenderBackend> DeviceVk::CreateRenderBackend(
989 GpuResourceManager& gpuResourceMgr, const CORE_NS::IParallelTaskQueue::Ptr& queue)
990 {
991 return make_unique<RenderBackendVk>(*this, gpuResourceMgr, queue);
992 }
993
CreateShaderModule(const ShaderModuleCreateInfo & data)994 unique_ptr<ShaderModule> DeviceVk::CreateShaderModule(const ShaderModuleCreateInfo& data)
995 {
996 return make_unique<ShaderModuleVk>(*this, data);
997 }
998
CreateComputeShaderModule(const ShaderModuleCreateInfo & data)999 unique_ptr<ShaderModule> DeviceVk::CreateComputeShaderModule(const ShaderModuleCreateInfo& data)
1000 {
1001 return make_unique<ShaderModuleVk>(*this, data);
1002 }
1003
CreateGpuShaderProgram(const GpuShaderProgramCreateData & data)1004 unique_ptr<GpuShaderProgram> DeviceVk::CreateGpuShaderProgram(const GpuShaderProgramCreateData& data)
1005 {
1006 return make_unique<GpuShaderProgramVk>(*this, data);
1007 }
1008
CreateGpuComputeProgram(const GpuComputeProgramCreateData & data)1009 unique_ptr<GpuComputeProgram> DeviceVk::CreateGpuComputeProgram(const GpuComputeProgramCreateData& data)
1010 {
1011 return make_unique<GpuComputeProgramVk>(*this, data);
1012 }
1013
CreateNodeContextDescriptorSetManager()1014 unique_ptr<NodeContextDescriptorSetManager> DeviceVk::CreateNodeContextDescriptorSetManager()
1015 {
1016 return make_unique<NodeContextDescriptorSetManagerVk>(*this);
1017 }
1018
CreateNodeContextPoolManager(GpuResourceManager & gpuResourceMgr,const GpuQueue & gpuQueue)1019 unique_ptr<NodeContextPoolManager> DeviceVk::CreateNodeContextPoolManager(
1020 GpuResourceManager& gpuResourceMgr, const GpuQueue& gpuQueue)
1021 {
1022 return make_unique<NodeContextPoolManagerVk>(*this, gpuResourceMgr, gpuQueue);
1023 }
1024
CreateGraphicsPipelineStateObject(const GpuShaderProgram & gpuProgram,const GraphicsState & graphicsState,const PipelineLayout & pipelineLayout,const VertexInputDeclarationView & vertexInputDeclaration,const ShaderSpecializationConstantDataView & specializationConstants,const DynamicStateFlags dynamicStateFlags,const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> & renderPassSubpassDescs,const uint32_t subpassIndex,const LowLevelRenderPassData * renderPassData,const LowLevelPipelineLayoutData * pipelineLayoutData)1025 unique_ptr<GraphicsPipelineStateObject> DeviceVk::CreateGraphicsPipelineStateObject(const GpuShaderProgram& gpuProgram,
1026 const GraphicsState& graphicsState, const PipelineLayout& pipelineLayout,
1027 const VertexInputDeclarationView& vertexInputDeclaration,
1028 const ShaderSpecializationConstantDataView& specializationConstants, const DynamicStateFlags dynamicStateFlags,
1029 const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc>& renderPassSubpassDescs,
1030 const uint32_t subpassIndex, const LowLevelRenderPassData* renderPassData,
1031 const LowLevelPipelineLayoutData* pipelineLayoutData)
1032 {
1033 PLUGIN_ASSERT(renderPassData);
1034 PLUGIN_ASSERT(pipelineLayoutData);
1035 return make_unique<GraphicsPipelineStateObjectVk>(*this, gpuProgram, graphicsState, pipelineLayout,
1036 vertexInputDeclaration, specializationConstants, dynamicStateFlags, renderPassDesc, renderPassSubpassDescs,
1037 subpassIndex, *renderPassData, *pipelineLayoutData);
1038 }
1039
CreateComputePipelineStateObject(const GpuComputeProgram & gpuProgram,const PipelineLayout & pipelineLayout,const ShaderSpecializationConstantDataView & specializationConstants,const LowLevelPipelineLayoutData * pipelineLayoutData)1040 unique_ptr<ComputePipelineStateObject> DeviceVk::CreateComputePipelineStateObject(const GpuComputeProgram& gpuProgram,
1041 const PipelineLayout& pipelineLayout, const ShaderSpecializationConstantDataView& specializationConstants,
1042 const LowLevelPipelineLayoutData* pipelineLayoutData)
1043 {
1044 PLUGIN_ASSERT(pipelineLayoutData);
1045 return make_unique<ComputePipelineStateObjectVk>(
1046 *this, gpuProgram, pipelineLayout, specializationConstants, *pipelineLayoutData);
1047 }
1048
GetDebugFunctionUtilities() const1049 const DebugFunctionUtilitiesVk& DeviceVk::GetDebugFunctionUtilities() const
1050 {
1051 return debugFunctionUtilities_;
1052 }
1053
CreateDebugFunctions()1054 void DeviceVk::CreateDebugFunctions()
1055 {
1056 #if (RENDER_VULKAN_VALIDATION_ENABLED == 1)
1057 debugFunctionUtilities_.vkSetDebugUtilsObjectNameEXT =
1058 (PFN_vkSetDebugUtilsObjectNameEXT)vkGetDeviceProcAddr(plat_.device, "vkSetDebugUtilsObjectNameEXT");
1059 #endif
1060 #if (RENDER_DEBUG_MARKERS_ENABLED == 1) || (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1061 debugFunctionUtilities_.vkCmdBeginDebugUtilsLabelEXT =
1062 (PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdBeginDebugUtilsLabelEXT");
1063 debugFunctionUtilities_.vkCmdEndDebugUtilsLabelEXT =
1064 (PFN_vkCmdEndDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdEndDebugUtilsLabelEXT");
1065 #endif
1066 }
1067
GetExtFunctions() const1068 const DeviceVk::ExtFunctions& DeviceVk::GetExtFunctions() const
1069 {
1070 return extFunctions_;
1071 }
1072
GetPlatformExtFunctions() const1073 const PlatformExtFunctions& DeviceVk::GetPlatformExtFunctions() const
1074 {
1075 return platformExtFunctions_;
1076 }
1077
CreateExtFunctions()1078 void DeviceVk::CreateExtFunctions()
1079 {
1080 if (commonDeviceExtensions_.renderPass2) {
1081 extFunctions_.vkCreateRenderPass2KHR =
1082 (PFN_vkCreateRenderPass2KHR)vkGetInstanceProcAddr(plat_.instance, "vkCreateRenderPass2KHR");
1083 if (!extFunctions_.vkCreateRenderPass2KHR) {
1084 commonDeviceExtensions_.renderPass2 = false;
1085 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateRenderPass2KHR");
1086 }
1087 }
1088
1089 if (commonDeviceExtensions_.getMemoryRequirements2) {
1090 extFunctions_.vkGetImageMemoryRequirements2 = (PFN_vkGetImageMemoryRequirements2)vkGetInstanceProcAddr(
1091 plat_.instance, "vkGetImageMemoryRequirements2KHR");
1092 if (!extFunctions_.vkGetImageMemoryRequirements2) {
1093 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetImageMemoryRequirements2");
1094 }
1095 }
1096
1097 if (commonDeviceExtensions_.samplerYcbcrConversion) {
1098 GetYcbcrExtFunctions(plat_.instance, extFunctions_);
1099 }
1100
1101 extFunctions_.vkAcquireNextImageKHR =
1102 (PFN_vkAcquireNextImageKHR)vkGetInstanceProcAddr(plat_.instance, "vkAcquireNextImageKHR");
1103
1104 #if (RENDER_VULKAN_RT_ENABLED == 1)
1105 extFunctions_.vkGetAccelerationStructureBuildSizesKHR =
1106 (PFN_vkGetAccelerationStructureBuildSizesKHR)vkGetInstanceProcAddr(
1107 plat_.instance, "vkGetAccelerationStructureBuildSizesKHR");
1108 if (!extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
1109 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureBuildSizesKHR");
1110 }
1111 extFunctions_.vkCmdBuildAccelerationStructuresKHR = (PFN_vkCmdBuildAccelerationStructuresKHR)vkGetInstanceProcAddr(
1112 plat_.instance, "vkCmdBuildAccelerationStructuresKHR");
1113 if (!extFunctions_.vkCmdBuildAccelerationStructuresKHR) {
1114 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCmdBuildAccelerationStructuresKHR");
1115 }
1116 extFunctions_.vkCreateAccelerationStructureKHR =
1117 (PFN_vkCreateAccelerationStructureKHR)vkGetInstanceProcAddr(plat_.instance, "vkCreateAccelerationStructureKHR");
1118 if (!extFunctions_.vkCreateAccelerationStructureKHR) {
1119 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateAccelerationStructureKHR");
1120 }
1121 extFunctions_.vkDestroyAccelerationStructureKHR = (PFN_vkDestroyAccelerationStructureKHR)vkGetInstanceProcAddr(
1122 plat_.instance, "vkDestroyAccelerationStructureKHR");
1123 if (!extFunctions_.vkDestroyAccelerationStructureKHR) {
1124 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroyAccelerationStructureKHR");
1125 }
1126 extFunctions_.vkGetAccelerationStructureDeviceAddressKHR =
1127 (PFN_vkGetAccelerationStructureDeviceAddressKHR)vkGetInstanceProcAddr(
1128 plat_.instance, "vkGetAccelerationStructureDeviceAddressKHR");
1129 if (!extFunctions_.vkGetAccelerationStructureDeviceAddressKHR) {
1130 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureDeviceAddressKHR");
1131 }
1132 #endif
1133 }
1134
LowLevelDeviceVk(DeviceVk & deviceVk)1135 LowLevelDeviceVk::LowLevelDeviceVk(DeviceVk& deviceVk)
1136 : deviceVk_(deviceVk), gpuResourceMgr_(static_cast<GpuResourceManager&>(deviceVk_.GetGpuResourceManager()))
1137 {}
1138
GetBackendType() const1139 DeviceBackendType LowLevelDeviceVk::GetBackendType() const
1140 {
1141 return DeviceBackendType::VULKAN;
1142 }
1143
GetPlatformDataVk() const1144 const DevicePlatformDataVk& LowLevelDeviceVk::GetPlatformDataVk() const
1145 {
1146 return deviceVk_.GetPlatformDataVk();
1147 }
1148
GetBuffer(RenderHandle handle) const1149 GpuBufferPlatformDataVk LowLevelDeviceVk::GetBuffer(RenderHandle handle) const
1150 {
1151 if (deviceVk_.GetLockResourceBackendAccess()) {
1152 GpuBufferVk* buffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(handle);
1153 if (buffer) {
1154 return buffer->GetPlatformData();
1155 }
1156 } else {
1157 PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1158 }
1159 return {};
1160 }
1161
GetImage(RenderHandle handle) const1162 GpuImagePlatformDataVk LowLevelDeviceVk::GetImage(RenderHandle handle) const
1163 {
1164 if (deviceVk_.GetLockResourceBackendAccess()) {
1165 GpuImageVk* image = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
1166 if (image) {
1167 return image->GetPlatformData();
1168 }
1169 } else {
1170 PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1171 }
1172 return {};
1173 }
1174
GetSampler(RenderHandle handle) const1175 GpuSamplerPlatformDataVk LowLevelDeviceVk::GetSampler(RenderHandle handle) const
1176 {
1177 if (deviceVk_.GetLockResourceBackendAccess()) {
1178 GpuSamplerVk* sampler = gpuResourceMgr_.GetSampler<GpuSamplerVk>(handle);
1179 if (sampler) {
1180 return sampler->GetPlatformData();
1181 }
1182 } else {
1183 PLUGIN_LOG_E("low level device methods can be only used within specific methods");
1184 }
1185 return {};
1186 }
1187 RENDER_END_NAMESPACE()
1188