1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "dzn_private.h"
25
26 #include "vk_alloc.h"
27 #include "vk_common_entrypoints.h"
28 #include "vk_cmd_enqueue_entrypoints.h"
29 #include "vk_debug_report.h"
30 #include "vk_format.h"
31 #include "vk_sync_dummy.h"
32 #include "vk_util.h"
33
34 #include "git_sha1.h"
35
36 #include "util/u_debug.h"
37 #include "util/disk_cache.h"
38 #include "util/macros.h"
39 #include "util/mesa-sha1.h"
40 #include "util/u_dl.h"
41
42 #include "util/driconf.h"
43
44 #include "glsl_types.h"
45
46 #include "dxil_validator.h"
47
48 #include "git_sha1.h"
49
50 #include <string.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <c99_alloca.h>
54
55 #ifdef _WIN32
56 #include <windows.h>
57 #include <shlobj.h>
58 #include "dzn_dxgi.h"
59 #endif
60
61 #include <directx/d3d12sdklayers.h>
62
63 #define DZN_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
64
65 #define MAX_TIER2_MEMORY_TYPES 4
66
67 const VkExternalMemoryHandleTypeFlags opaque_external_flag =
68 #ifdef _WIN32
69 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
70 #else
71 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
72 #endif
73
74 static const struct vk_instance_extension_table instance_extensions = {
75 .KHR_get_physical_device_properties2 = true,
76 .KHR_device_group_creation = true,
77 #ifdef DZN_USE_WSI_PLATFORM
78 .KHR_surface = true,
79 .KHR_get_surface_capabilities2 = true,
80 #endif
81 #ifdef VK_USE_PLATFORM_WIN32_KHR
82 .KHR_win32_surface = true,
83 #endif
84 #ifdef VK_USE_PLATFORM_XCB_KHR
85 .KHR_xcb_surface = true,
86 #endif
87 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
88 .KHR_wayland_surface = true,
89 #endif
90 #ifdef VK_USE_PLATFORM_XLIB_KHR
91 .KHR_xlib_surface = true,
92 #endif
93 #ifndef VK_USE_PLATFORM_WIN32_KHR
94 .EXT_headless_surface = true,
95 #endif
96 .EXT_debug_report = true,
97 .EXT_debug_utils = true,
98 };
99
100 static void
dzn_physical_device_get_extensions(struct dzn_physical_device * pdev)101 dzn_physical_device_get_extensions(struct dzn_physical_device *pdev)
102 {
103 pdev->vk.supported_extensions = (struct vk_device_extension_table) {
104 .KHR_16bit_storage = pdev->options4.Native16BitShaderOpsSupported,
105 .KHR_bind_memory2 = true,
106 .KHR_create_renderpass2 = true,
107 .KHR_dedicated_allocation = true,
108 .KHR_depth_stencil_resolve = true,
109 .KHR_descriptor_update_template = true,
110 .KHR_device_group = true,
111 .KHR_draw_indirect_count = true,
112 .KHR_driver_properties = true,
113 .KHR_dynamic_rendering = true,
114 .KHR_external_memory = true,
115 .KHR_external_semaphore = true,
116 #ifdef _WIN32
117 .KHR_external_memory_win32 = true,
118 .KHR_external_semaphore_win32 = true,
119 #else
120 .KHR_external_memory_fd = true,
121 .KHR_external_semaphore_fd = true,
122 #endif
123 .KHR_image_format_list = true,
124 .KHR_imageless_framebuffer = true,
125 .KHR_get_memory_requirements2 = true,
126 .KHR_maintenance1 = true,
127 .KHR_maintenance2 = true,
128 .KHR_maintenance3 = true,
129 .KHR_multiview = true,
130 .KHR_relaxed_block_layout = true,
131 .KHR_sampler_mirror_clamp_to_edge = true,
132 .KHR_separate_depth_stencil_layouts = true,
133 .KHR_shader_draw_parameters = true,
134 .KHR_shader_expect_assume = true,
135 .KHR_shader_float16_int8 = pdev->options4.Native16BitShaderOpsSupported,
136 .KHR_shader_float_controls = true,
137 .KHR_shader_integer_dot_product = true,
138 .KHR_spirv_1_4 = true,
139 .KHR_storage_buffer_storage_class = true,
140 #ifdef DZN_USE_WSI_PLATFORM
141 .KHR_swapchain = true,
142 #endif
143 .KHR_synchronization2 = true,
144 .KHR_timeline_semaphore = true,
145 .KHR_uniform_buffer_standard_layout = true,
146 .EXT_descriptor_indexing = pdev->shader_model >= D3D_SHADER_MODEL_6_6,
147 #if defined(_WIN32)
148 .EXT_external_memory_host = pdev->dev13,
149 #endif
150 .EXT_scalar_block_layout = true,
151 .EXT_separate_stencil_usage = true,
152 .EXT_shader_subgroup_ballot = true,
153 .EXT_shader_subgroup_vote = true,
154 .EXT_subgroup_size_control = true,
155 .EXT_vertex_attribute_divisor = true,
156 .MSFT_layered_driver = true,
157 };
158 }
159
160 VKAPI_ATTR VkResult VKAPI_CALL
dzn_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)161 dzn_EnumerateInstanceExtensionProperties(const char *pLayerName,
162 uint32_t *pPropertyCount,
163 VkExtensionProperties *pProperties)
164 {
165 /* We don't support any layers */
166 if (pLayerName)
167 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
168
169 return vk_enumerate_instance_extension_properties(
170 &instance_extensions, pPropertyCount, pProperties);
171 }
172
173 static const struct debug_control dzn_debug_options[] = {
174 { "sync", DZN_DEBUG_SYNC },
175 { "nir", DZN_DEBUG_NIR },
176 { "dxil", DZN_DEBUG_DXIL },
177 { "warp", DZN_DEBUG_WARP },
178 { "internal", DZN_DEBUG_INTERNAL },
179 { "signature", DZN_DEBUG_SIG },
180 { "gbv", DZN_DEBUG_GBV },
181 { "d3d12", DZN_DEBUG_D3D12 },
182 { "debugger", DZN_DEBUG_DEBUGGER },
183 { "redirects", DZN_DEBUG_REDIRECTS },
184 { "bindless", DZN_DEBUG_BINDLESS },
185 { "nobindless", DZN_DEBUG_NO_BINDLESS },
186 { "experimental", DZN_DEBUG_EXPERIMENTAL },
187 { "multiview", DZN_DEBUG_MULTIVIEW },
188 { NULL, 0 }
189 };
190
191 static void
dzn_physical_device_destroy(struct vk_physical_device * physical)192 dzn_physical_device_destroy(struct vk_physical_device *physical)
193 {
194 struct dzn_physical_device *pdev = container_of(physical, struct dzn_physical_device, vk);
195 struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk);
196
197 if (pdev->dev)
198 ID3D12Device1_Release(pdev->dev);
199
200 if (pdev->dev10)
201 ID3D12Device1_Release(pdev->dev10);
202
203 if (pdev->dev11)
204 ID3D12Device1_Release(pdev->dev11);
205
206 if (pdev->dev12)
207 ID3D12Device1_Release(pdev->dev12);
208
209 if (pdev->dev13)
210 ID3D12Device1_Release(pdev->dev13);
211
212 if (pdev->adapter)
213 IUnknown_Release(pdev->adapter);
214
215 dzn_wsi_finish(pdev);
216 vk_physical_device_finish(&pdev->vk);
217 vk_free(&instance->vk.alloc, pdev);
218 }
219
220 static void
dzn_instance_destroy(struct dzn_instance * instance,const VkAllocationCallbacks * alloc)221 dzn_instance_destroy(struct dzn_instance *instance, const VkAllocationCallbacks *alloc)
222 {
223 if (!instance)
224 return;
225
226 vk_instance_finish(&instance->vk);
227
228 #ifdef _WIN32
229 dxil_destroy_validator(instance->dxil_validator);
230 #endif
231
232 if (instance->factory)
233 ID3D12DeviceFactory_Release(instance->factory);
234
235 if (instance->d3d12_mod)
236 util_dl_close(instance->d3d12_mod);
237
238 vk_free2(vk_default_allocator(), alloc, instance);
239 }
240
241 #ifdef _WIN32
242 extern IMAGE_DOS_HEADER __ImageBase;
243 static const char *
try_find_d3d12core_next_to_self(char * path,size_t path_arr_size)244 try_find_d3d12core_next_to_self(char *path, size_t path_arr_size)
245 {
246 uint32_t path_size = GetModuleFileNameA((HINSTANCE)&__ImageBase,
247 path, path_arr_size);
248 if (!path_arr_size || path_size == path_arr_size) {
249 mesa_loge("Unable to get path to self\n");
250 return NULL;
251 }
252
253 char *last_slash = strrchr(path, '\\');
254 if (!last_slash) {
255 mesa_loge("Unable to get path to self\n");
256 return NULL;
257 }
258
259 *(last_slash + 1) = '\0';
260 if (strcat_s(path, path_arr_size, "D3D12Core.dll") != 0) {
261 mesa_loge("Unable to get path to D3D12Core.dll next to self\n");
262 return NULL;
263 }
264
265 if (GetFileAttributesA(path) == INVALID_FILE_ATTRIBUTES) {
266 return NULL;
267 }
268
269 *(last_slash + 1) = '\0';
270 return path;
271 }
272 #endif
273
274 static ID3D12DeviceFactory *
try_create_device_factory(struct util_dl_library * d3d12_mod)275 try_create_device_factory(struct util_dl_library *d3d12_mod)
276 {
277 /* A device factory allows us to isolate things like debug layer enablement from other callers,
278 * and can potentially even refer to a different D3D12 redist implementation from others.
279 */
280 ID3D12DeviceFactory *factory = NULL;
281
282 PFN_D3D12_GET_INTERFACE D3D12GetInterface = (PFN_D3D12_GET_INTERFACE)util_dl_get_proc_address(d3d12_mod, "D3D12GetInterface");
283 if (!D3D12GetInterface) {
284 mesa_loge("Failed to retrieve D3D12GetInterface\n");
285 return NULL;
286 }
287
288 #ifdef _WIN32
289 /* First, try to create a device factory from a DLL-parallel D3D12Core.dll */
290 ID3D12SDKConfiguration *sdk_config = NULL;
291 if (SUCCEEDED(D3D12GetInterface(&CLSID_D3D12SDKConfiguration, &IID_ID3D12SDKConfiguration, (void **)&sdk_config))) {
292 ID3D12SDKConfiguration1 *sdk_config1 = NULL;
293 if (SUCCEEDED(IUnknown_QueryInterface(sdk_config, &IID_ID3D12SDKConfiguration1, (void **)&sdk_config1))) {
294 char self_path[MAX_PATH];
295 const char *d3d12core_path = try_find_d3d12core_next_to_self(self_path, sizeof(self_path));
296 if (d3d12core_path) {
297 if (SUCCEEDED(ID3D12SDKConfiguration1_CreateDeviceFactory(sdk_config1, D3D12_PREVIEW_SDK_VERSION, d3d12core_path, &IID_ID3D12DeviceFactory, (void **)&factory)) ||
298 SUCCEEDED(ID3D12SDKConfiguration1_CreateDeviceFactory(sdk_config1, D3D12_SDK_VERSION, d3d12core_path, &IID_ID3D12DeviceFactory, (void **)&factory))) {
299 ID3D12SDKConfiguration_Release(sdk_config);
300 ID3D12SDKConfiguration1_Release(sdk_config1);
301 return factory;
302 }
303 }
304
305 /* Nope, seems we don't have a matching D3D12Core.dll next to ourselves */
306 ID3D12SDKConfiguration1_Release(sdk_config1);
307 }
308
309 /* It's possible there's a D3D12Core.dll next to the .exe, for development/testing purposes. If so, we'll be notified
310 * by environment variables what the relative path is and the version to use.
311 */
312 const char *d3d12core_relative_path = getenv("DZN_AGILITY_RELATIVE_PATH");
313 const char *d3d12core_sdk_version = getenv("DZN_AGILITY_SDK_VERSION");
314 if (d3d12core_relative_path && d3d12core_sdk_version) {
315 ID3D12SDKConfiguration_SetSDKVersion(sdk_config, atoi(d3d12core_sdk_version), d3d12core_relative_path);
316 }
317 ID3D12SDKConfiguration_Release(sdk_config);
318 }
319 #endif
320
321 (void)D3D12GetInterface(&CLSID_D3D12DeviceFactory, &IID_ID3D12DeviceFactory, (void **)&factory);
322 return factory;
323 }
324
325 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyInstance(VkInstance instance,const VkAllocationCallbacks * pAllocator)326 dzn_DestroyInstance(VkInstance instance,
327 const VkAllocationCallbacks *pAllocator)
328 {
329 dzn_instance_destroy(dzn_instance_from_handle(instance), pAllocator);
330 }
331
332 static void
dzn_physical_device_init_uuids(struct dzn_physical_device * pdev)333 dzn_physical_device_init_uuids(struct dzn_physical_device *pdev)
334 {
335 const char *mesa_version = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1;
336
337 struct mesa_sha1 sha1_ctx;
338 uint8_t sha1[SHA1_DIGEST_LENGTH];
339 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
340
341 /* The pipeline cache UUID is used for determining when a pipeline cache is
342 * invalid. Our cache is device-agnostic, but it does depend on the features
343 * provided by the D3D12 driver, so let's hash the build ID plus some
344 * caps that might impact our NIR lowering passes.
345 */
346 _mesa_sha1_init(&sha1_ctx);
347 _mesa_sha1_update(&sha1_ctx, mesa_version, strlen(mesa_version));
348 disk_cache_get_function_identifier(dzn_physical_device_init_uuids, &sha1_ctx);
349 _mesa_sha1_update(&sha1_ctx, &pdev->options, sizeof(pdev->options));
350 _mesa_sha1_update(&sha1_ctx, &pdev->options2, sizeof(pdev->options2));
351 _mesa_sha1_final(&sha1_ctx, sha1);
352 memcpy(pdev->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
353
354 /* The driver UUID is used for determining sharability of images and memory
355 * between two Vulkan instances in separate processes. People who want to
356 * share memory need to also check the device UUID (below) so all this
357 * needs to be is the build-id.
358 */
359 _mesa_sha1_compute(mesa_version, strlen(mesa_version), sha1);
360 memcpy(pdev->driver_uuid, sha1, VK_UUID_SIZE);
361
362 /* The device UUID uniquely identifies the given device within the machine. */
363 _mesa_sha1_init(&sha1_ctx);
364 _mesa_sha1_update(&sha1_ctx, &pdev->desc.vendor_id, sizeof(pdev->desc.vendor_id));
365 _mesa_sha1_update(&sha1_ctx, &pdev->desc.device_id, sizeof(pdev->desc.device_id));
366 _mesa_sha1_update(&sha1_ctx, &pdev->desc.subsys_id, sizeof(pdev->desc.subsys_id));
367 _mesa_sha1_update(&sha1_ctx, &pdev->desc.revision, sizeof(pdev->desc.revision));
368 _mesa_sha1_final(&sha1_ctx, sha1);
369 memcpy(pdev->device_uuid, sha1, VK_UUID_SIZE);
370 }
371
372 const struct vk_pipeline_cache_object_ops *const dzn_pipeline_cache_import_ops[] = {
373 &dzn_cached_blob_ops,
374 NULL,
375 };
376
377 static void
dzn_physical_device_cache_caps(struct dzn_physical_device * pdev)378 dzn_physical_device_cache_caps(struct dzn_physical_device *pdev)
379 {
380 D3D_FEATURE_LEVEL checklist[] = {
381 D3D_FEATURE_LEVEL_11_0,
382 D3D_FEATURE_LEVEL_11_1,
383 D3D_FEATURE_LEVEL_12_0,
384 D3D_FEATURE_LEVEL_12_1,
385 D3D_FEATURE_LEVEL_12_2,
386 };
387
388 D3D12_FEATURE_DATA_FEATURE_LEVELS levels = {
389 .NumFeatureLevels = ARRAY_SIZE(checklist),
390 .pFeatureLevelsRequested = checklist,
391 };
392
393 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_FEATURE_LEVELS, &levels, sizeof(levels));
394 pdev->feature_level = levels.MaxSupportedFeatureLevel;
395
396 static const D3D_SHADER_MODEL valid_shader_models[] = {
397 D3D_SHADER_MODEL_6_8 ,D3D_SHADER_MODEL_6_7, D3D_SHADER_MODEL_6_6, D3D_SHADER_MODEL_6_5,
398 D3D_SHADER_MODEL_6_4, D3D_SHADER_MODEL_6_3, D3D_SHADER_MODEL_6_2, D3D_SHADER_MODEL_6_1,
399 };
400 for (UINT i = 0; i < ARRAY_SIZE(valid_shader_models); ++i) {
401 D3D12_FEATURE_DATA_SHADER_MODEL shader_model = { valid_shader_models[i] };
402 if (SUCCEEDED(ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model)))) {
403 pdev->shader_model = shader_model.HighestShaderModel;
404 break;
405 }
406 }
407
408 D3D_ROOT_SIGNATURE_VERSION root_sig_versions[] = {
409 D3D_ROOT_SIGNATURE_VERSION_1_2,
410 D3D_ROOT_SIGNATURE_VERSION_1_1
411 };
412 for (UINT i = 0; i < ARRAY_SIZE(root_sig_versions); ++i) {
413 D3D12_FEATURE_DATA_ROOT_SIGNATURE root_sig = { root_sig_versions[i] };
414 if (SUCCEEDED(ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_ROOT_SIGNATURE, &root_sig, sizeof(root_sig)))) {
415 pdev->root_sig_version = root_sig.HighestVersion;
416 break;
417 }
418 }
419
420 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_ARCHITECTURE1, &pdev->architecture, sizeof(pdev->architecture));
421 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS, &pdev->options, sizeof(pdev->options));
422 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS1, &pdev->options1, sizeof(pdev->options1));
423 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS2, &pdev->options2, sizeof(pdev->options2));
424 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS3, &pdev->options3, sizeof(pdev->options3));
425 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS4, &pdev->options4, sizeof(pdev->options4));
426 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS12, &pdev->options12, sizeof(pdev->options12));
427 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS13, &pdev->options13, sizeof(pdev->options13));
428 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS14, &pdev->options14, sizeof(pdev->options14));
429 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS15, &pdev->options15, sizeof(pdev->options15));
430 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS16, &pdev->options16, sizeof(pdev->options16));
431 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS17, &pdev->options17, sizeof(pdev->options17));
432 if (FAILED(ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS19, &pdev->options19, sizeof(pdev->options19)))) {
433 pdev->options19.MaxSamplerDescriptorHeapSize = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
434 pdev->options19.MaxSamplerDescriptorHeapSizeWithStaticSamplers = pdev->options19.MaxSamplerDescriptorHeapSize;
435 pdev->options19.MaxViewDescriptorHeapSize = D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1;
436 }
437 {
438 D3D12_FEATURE_DATA_FORMAT_SUPPORT a4b4g4r4_support = {
439 .Format = DXGI_FORMAT_A4B4G4R4_UNORM
440 };
441 pdev->support_a4b4g4r4 =
442 SUCCEEDED(ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_FORMAT_SUPPORT, &a4b4g4r4_support, sizeof(a4b4g4r4_support)));
443 }
444
445 pdev->queue_families[pdev->queue_family_count++] = (struct dzn_queue_family) {
446 .props = {
447 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
448 VK_QUEUE_COMPUTE_BIT |
449 VK_QUEUE_TRANSFER_BIT,
450 .queueCount = 4,
451 .timestampValidBits = 64,
452 .minImageTransferGranularity = { 0, 0, 0 },
453 },
454 .desc = {
455 .Type = D3D12_COMMAND_LIST_TYPE_DIRECT,
456 },
457 };
458
459 pdev->queue_families[pdev->queue_family_count++] = (struct dzn_queue_family) {
460 .props = {
461 .queueFlags = VK_QUEUE_COMPUTE_BIT |
462 VK_QUEUE_TRANSFER_BIT,
463 .queueCount = 8,
464 .timestampValidBits = 64,
465 .minImageTransferGranularity = { 0, 0, 0 },
466 },
467 .desc = {
468 .Type = D3D12_COMMAND_LIST_TYPE_COMPUTE,
469 },
470 };
471
472 assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
473
474 D3D12_COMMAND_QUEUE_DESC queue_desc = {
475 .Type = D3D12_COMMAND_LIST_TYPE_DIRECT,
476 .Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL,
477 .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE,
478 .NodeMask = 0,
479 };
480
481 ID3D12CommandQueue *cmdqueue;
482 ID3D12Device1_CreateCommandQueue(pdev->dev, &queue_desc,
483 &IID_ID3D12CommandQueue,
484 (void **)&cmdqueue);
485
486 uint64_t ts_freq;
487 ID3D12CommandQueue_GetTimestampFrequency(cmdqueue, &ts_freq);
488 pdev->timestamp_period = 1000000000.0f / ts_freq;
489 ID3D12CommandQueue_Release(cmdqueue);
490 }
491
492 static void
dzn_physical_device_init_memory(struct dzn_physical_device * pdev)493 dzn_physical_device_init_memory(struct dzn_physical_device *pdev)
494 {
495 VkPhysicalDeviceMemoryProperties *mem = &pdev->memory;
496
497 /* For each pair of elements X and Y returned in memoryTypes, X must be placed at a lower index position than Y if:
498 * - the set of bit flags returned in the propertyFlags member of X is a strict subset of the set of bit flags
499 * returned in the propertyFlags member of Y; or
500 * - the propertyFlags members of X and Y are equal, and X belongs to a memory heap with greater performance
501 * (as determined in an implementation-specific manner) ; or
502 * - the propertyFlags members of Y includes VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD or
503 * VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD and X does not
504 * See: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceMemoryProperties.html
505 */
506
507 mem->memoryHeapCount = 0;
508 mem->memoryTypeCount = 0;
509
510 VkMemoryPropertyFlags ram_device_local_property = 0;
511 VkMemoryHeapFlags ram_device_local_heap_flag = 0;
512
513 if (pdev->architecture.UMA) {
514 /* All memory is considered device-local for UMA even though it's just RAM */
515 ram_device_local_property = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
516 ram_device_local_heap_flag = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
517 }
518
519 mem->memoryHeaps[mem->memoryHeapCount++] = (VkMemoryHeap) {
520 .size = pdev->desc.shared_system_memory,
521 .flags = ram_device_local_heap_flag,
522 };
523
524 /* Three non-device-local memory types: host non-visible, host write-combined, and host cached */
525 mem->memoryTypes[mem->memoryTypeCount++] = (VkMemoryType){
526 .propertyFlags = ram_device_local_property,
527 .heapIndex = mem->memoryHeapCount - 1,
528 };
529 mem->memoryTypes[mem->memoryTypeCount++] = (VkMemoryType){
530 .propertyFlags = ram_device_local_property |
531 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
532 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
533 .heapIndex = mem->memoryHeapCount - 1,
534 };
535 mem->memoryTypes[mem->memoryTypeCount++] = (VkMemoryType) {
536 .propertyFlags = ram_device_local_property |
537 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
538 VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
539 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
540 .heapIndex = mem->memoryHeapCount - 1,
541 };
542
543 if (!pdev->architecture.UMA) {
544 /* Add a device-local memory heap/type */
545 mem->memoryHeaps[mem->memoryHeapCount++] = (VkMemoryHeap){
546 .size = pdev->desc.dedicated_video_memory,
547 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
548 };
549 mem->memoryTypes[mem->memoryTypeCount++] = (VkMemoryType){
550 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
551 .heapIndex = mem->memoryHeapCount - 1,
552 };
553 }
554
555 assert(mem->memoryTypeCount <= MAX_TIER2_MEMORY_TYPES);
556
557 if (pdev->options.ResourceHeapTier == D3D12_RESOURCE_HEAP_TIER_1) {
558 unsigned oldMemoryTypeCount = mem->memoryTypeCount;
559 VkMemoryType oldMemoryTypes[MAX_TIER2_MEMORY_TYPES];
560
561 memcpy(oldMemoryTypes, mem->memoryTypes, oldMemoryTypeCount * sizeof(VkMemoryType));
562
563 mem->memoryTypeCount = 0;
564 for (unsigned oldMemoryTypeIdx = 0; oldMemoryTypeIdx < oldMemoryTypeCount; ++oldMemoryTypeIdx) {
565 D3D12_HEAP_FLAGS flags[] = {
566 D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
567 D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES,
568 /* Note: Vulkan requires *all* images to come from the same memory type as long as
569 * the tiling property (and a few other misc properties) are the same. So, this
570 * non-RT/DS texture flag will only be used for TILING_LINEAR textures, which
571 * can't be render targets.
572 */
573 D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES
574 };
575 for (int i = 0; i < ARRAY_SIZE(flags); ++i) {
576 D3D12_HEAP_FLAGS flag = flags[i];
577 pdev->heap_flags_for_mem_type[mem->memoryTypeCount] = flag;
578 mem->memoryTypes[mem->memoryTypeCount] = oldMemoryTypes[oldMemoryTypeIdx];
579 mem->memoryTypeCount++;
580 }
581 }
582 }
583 }
584
585 static D3D12_HEAP_FLAGS
dzn_physical_device_get_heap_flags_for_mem_type(const struct dzn_physical_device * pdev,uint32_t mem_type)586 dzn_physical_device_get_heap_flags_for_mem_type(const struct dzn_physical_device *pdev,
587 uint32_t mem_type)
588 {
589 return pdev->heap_flags_for_mem_type[mem_type];
590 }
591
592 uint32_t
dzn_physical_device_get_mem_type_mask_for_resource(const struct dzn_physical_device * pdev,const D3D12_RESOURCE_DESC * desc,bool shared)593 dzn_physical_device_get_mem_type_mask_for_resource(const struct dzn_physical_device *pdev,
594 const D3D12_RESOURCE_DESC *desc,
595 bool shared)
596 {
597 if (pdev->options.ResourceHeapTier > D3D12_RESOURCE_HEAP_TIER_1 && !shared)
598 return (1u << pdev->memory.memoryTypeCount) - 1;
599
600 D3D12_HEAP_FLAGS deny_flag = D3D12_HEAP_FLAG_NONE;
601 if (pdev->options.ResourceHeapTier <= D3D12_RESOURCE_HEAP_TIER_1) {
602 if (desc->Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
603 deny_flag = D3D12_HEAP_FLAG_DENY_BUFFERS;
604 else if (desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))
605 deny_flag = D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES;
606 else
607 deny_flag = D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES;
608 }
609
610 uint32_t mask = 0;
611 for (unsigned i = 0; i < pdev->memory.memoryTypeCount; ++i) {
612 if (shared && (pdev->memory.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
613 continue;
614 if ((pdev->heap_flags_for_mem_type[i] & deny_flag) == D3D12_HEAP_FLAG_NONE)
615 mask |= (1 << i);
616 }
617 return mask;
618 }
619
620 static uint32_t
dzn_physical_device_get_max_mip_level(bool is_3d)621 dzn_physical_device_get_max_mip_level(bool is_3d)
622 {
623 return is_3d ? 11 : 14;
624 }
625
626 static uint32_t
dzn_physical_device_get_max_extent(bool is_3d)627 dzn_physical_device_get_max_extent(bool is_3d)
628 {
629 uint32_t max_mip = dzn_physical_device_get_max_mip_level(is_3d);
630
631 return 1 << max_mip;
632 }
633
634 static uint32_t
dzn_physical_device_get_max_array_layers()635 dzn_physical_device_get_max_array_layers()
636 {
637 return dzn_physical_device_get_max_extent(false);
638 }
639
640 static void
dzn_physical_device_get_features(const struct dzn_physical_device * pdev,struct vk_features * features)641 dzn_physical_device_get_features(const struct dzn_physical_device *pdev,
642 struct vk_features *features)
643 {
644 struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk);
645
646 bool support_descriptor_indexing = pdev->shader_model >= D3D_SHADER_MODEL_6_6 &&
647 !(instance->debug_flags & DZN_DEBUG_NO_BINDLESS);
648 bool support_8bit = driQueryOptionb(&instance->dri_options, "dzn_enable_8bit_loads_stores") &&
649 pdev->options4.Native16BitShaderOpsSupported;
650
651 *features = (struct vk_features) {
652 .robustBufferAccess = true, /* This feature is mandatory */
653 .fullDrawIndexUint32 = false,
654 .imageCubeArray = true,
655 .independentBlend = true,
656 .geometryShader = true,
657 .tessellationShader = false,
658 .sampleRateShading = true,
659 .dualSrcBlend = false,
660 .logicOp = false,
661 .multiDrawIndirect = true,
662 .drawIndirectFirstInstance = true,
663 .depthClamp = true,
664 .depthBiasClamp = true,
665 .fillModeNonSolid = true,
666 .depthBounds = pdev->options2.DepthBoundsTestSupported,
667 .wideLines = driQueryOptionb(&instance->dri_options, "dzn_claim_wide_lines"),
668 .largePoints = false,
669 .alphaToOne = false,
670 .multiViewport = false,
671 .samplerAnisotropy = true,
672 .textureCompressionETC2 = false,
673 .textureCompressionASTC_LDR = false,
674 .textureCompressionBC = true,
675 .occlusionQueryPrecise = true,
676 .pipelineStatisticsQuery = true,
677 .vertexPipelineStoresAndAtomics = true,
678 .fragmentStoresAndAtomics = true,
679 .shaderTessellationAndGeometryPointSize = false,
680 .shaderImageGatherExtended = true,
681 .shaderStorageImageExtendedFormats = pdev->options.TypedUAVLoadAdditionalFormats,
682 .shaderStorageImageMultisample = false,
683 .shaderStorageImageReadWithoutFormat = true,
684 .shaderStorageImageWriteWithoutFormat = true,
685 .shaderUniformBufferArrayDynamicIndexing = true,
686 .shaderSampledImageArrayDynamicIndexing = true,
687 .shaderStorageBufferArrayDynamicIndexing = true,
688 .shaderStorageImageArrayDynamicIndexing = true,
689 .shaderClipDistance = true,
690 .shaderCullDistance = true,
691 .shaderFloat64 = pdev->options.DoublePrecisionFloatShaderOps,
692 .shaderInt64 = pdev->options1.Int64ShaderOps,
693 .shaderInt16 = pdev->options4.Native16BitShaderOpsSupported,
694 .shaderResourceResidency = false,
695 .shaderResourceMinLod = false,
696 .sparseBinding = false,
697 .sparseResidencyBuffer = false,
698 .sparseResidencyImage2D = false,
699 .sparseResidencyImage3D = false,
700 .sparseResidency2Samples = false,
701 .sparseResidency4Samples = false,
702 .sparseResidency8Samples = false,
703 .sparseResidency16Samples = false,
704 .sparseResidencyAliased = false,
705 .variableMultisampleRate = false,
706 .inheritedQueries = false,
707
708 .storageBuffer16BitAccess = pdev->options4.Native16BitShaderOpsSupported,
709 .uniformAndStorageBuffer16BitAccess = pdev->options4.Native16BitShaderOpsSupported,
710 .storagePushConstant16 = false,
711 .storageInputOutput16 = false,
712 .multiview = true,
713 .multiviewGeometryShader = true,
714 .multiviewTessellationShader = false,
715 .variablePointersStorageBuffer = false,
716 .variablePointers = false,
717 .protectedMemory = false,
718 .samplerYcbcrConversion = false,
719 .shaderDrawParameters = true,
720
721 .samplerMirrorClampToEdge = true,
722 .drawIndirectCount = true,
723 .storageBuffer8BitAccess = support_8bit,
724 .uniformAndStorageBuffer8BitAccess = support_8bit,
725 .storagePushConstant8 = support_8bit,
726 .shaderBufferInt64Atomics = false,
727 .shaderSharedInt64Atomics = false,
728 .shaderFloat16 = pdev->options4.Native16BitShaderOpsSupported,
729 .shaderInt8 = support_8bit,
730
731 .descriptorIndexing = support_descriptor_indexing,
732 .shaderInputAttachmentArrayDynamicIndexing = true,
733 .shaderUniformTexelBufferArrayDynamicIndexing = true,
734 .shaderStorageTexelBufferArrayDynamicIndexing = true,
735 .shaderUniformBufferArrayNonUniformIndexing = support_descriptor_indexing,
736 .shaderSampledImageArrayNonUniformIndexing = support_descriptor_indexing,
737 .shaderStorageBufferArrayNonUniformIndexing = support_descriptor_indexing,
738 .shaderStorageImageArrayNonUniformIndexing = support_descriptor_indexing,
739 .shaderInputAttachmentArrayNonUniformIndexing = support_descriptor_indexing,
740 .shaderUniformTexelBufferArrayNonUniformIndexing = support_descriptor_indexing,
741 .shaderStorageTexelBufferArrayNonUniformIndexing = support_descriptor_indexing,
742 .descriptorBindingUniformBufferUpdateAfterBind = support_descriptor_indexing,
743 .descriptorBindingSampledImageUpdateAfterBind = support_descriptor_indexing,
744 .descriptorBindingStorageImageUpdateAfterBind = support_descriptor_indexing,
745 .descriptorBindingStorageBufferUpdateAfterBind = support_descriptor_indexing,
746 .descriptorBindingUniformTexelBufferUpdateAfterBind = support_descriptor_indexing,
747 .descriptorBindingStorageTexelBufferUpdateAfterBind = support_descriptor_indexing,
748 .descriptorBindingUpdateUnusedWhilePending = support_descriptor_indexing,
749 .descriptorBindingPartiallyBound = support_descriptor_indexing,
750 .descriptorBindingVariableDescriptorCount = support_descriptor_indexing,
751 .runtimeDescriptorArray = support_descriptor_indexing,
752
753 .samplerFilterMinmax = false,
754 .scalarBlockLayout = true,
755 .imagelessFramebuffer = true,
756 .uniformBufferStandardLayout = true,
757 .shaderSubgroupExtendedTypes = true,
758 .separateDepthStencilLayouts = true,
759 .hostQueryReset = true,
760 .timelineSemaphore = true,
761 .bufferDeviceAddress = false,
762 .bufferDeviceAddressCaptureReplay = false,
763 .bufferDeviceAddressMultiDevice = false,
764 .vulkanMemoryModel = false,
765 .vulkanMemoryModelDeviceScope = false,
766 .vulkanMemoryModelAvailabilityVisibilityChains = false,
767 .shaderOutputViewportIndex = false,
768 .shaderOutputLayer = false,
769 .subgroupBroadcastDynamicId = true,
770
771 .robustImageAccess = false,
772 .inlineUniformBlock = false,
773 .descriptorBindingInlineUniformBlockUpdateAfterBind = false,
774 .pipelineCreationCacheControl = false,
775 .privateData = true,
776 .shaderDemoteToHelperInvocation = false,
777 .shaderTerminateInvocation = false,
778 .subgroupSizeControl = pdev->options1.WaveOps && pdev->shader_model >= D3D_SHADER_MODEL_6_6,
779 .computeFullSubgroups = true,
780 .synchronization2 = true,
781 .textureCompressionASTC_HDR = false,
782 .shaderZeroInitializeWorkgroupMemory = false,
783 .dynamicRendering = true,
784 .shaderIntegerDotProduct = true,
785 .maintenance4 = false,
786 .shaderExpectAssume = true,
787
788 .vertexAttributeInstanceRateDivisor = true,
789 .vertexAttributeInstanceRateZeroDivisor = true,
790 };
791 }
792
793 static void
dzn_physical_device_get_properties(const struct dzn_physical_device * pdev,struct vk_properties * properties)794 dzn_physical_device_get_properties(const struct dzn_physical_device *pdev,
795 struct vk_properties *properties)
796 {
797 /* minimum from the D3D and Vulkan specs */
798 const VkSampleCountFlags supported_sample_counts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
799
800 VkPhysicalDeviceType devtype = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
801 if (pdev->desc.is_warp)
802 devtype = VK_PHYSICAL_DEVICE_TYPE_CPU;
803 else if (!pdev->architecture.UMA) {
804 devtype = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
805 }
806
807 *properties = (struct vk_properties){
808 .apiVersion = DZN_API_VERSION,
809 .driverVersion = vk_get_driver_version(),
810
811 .vendorID = pdev->desc.vendor_id,
812 .deviceID = pdev->desc.device_id,
813 .deviceType = devtype,
814
815 /* Limits */
816 .maxImageDimension1D = D3D12_REQ_TEXTURE1D_U_DIMENSION,
817 .maxImageDimension2D = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION,
818 .maxImageDimension3D = D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION,
819 .maxImageDimensionCube = D3D12_REQ_TEXTURECUBE_DIMENSION,
820 .maxImageArrayLayers = D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION,
821
822 /* from here on, we simply use the minimum values from the spec for now */
823 .maxTexelBufferElements = 1 << D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP,
824 .maxUniformBufferRange = D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * D3D12_STANDARD_VECTOR_SIZE * sizeof(float),
825 .maxStorageBufferRange = 1 << D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP,
826 .maxPushConstantsSize = 128,
827 .maxMemoryAllocationCount = 4096,
828 .maxSamplerAllocationCount = 4000,
829 .bufferImageGranularity = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
830 .sparseAddressSpaceSize = 0,
831 .maxBoundDescriptorSets = MAX_SETS,
832 .maxPerStageDescriptorSamplers =
833 pdev->options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ?
834 16u : MAX_DESCS_PER_SAMPLER_HEAP,
835 .maxPerStageDescriptorUniformBuffers =
836 pdev->options.ResourceBindingTier <= D3D12_RESOURCE_BINDING_TIER_2 ?
837 14u : MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
838 .maxPerStageDescriptorStorageBuffers =
839 pdev->options.ResourceBindingTier <= D3D12_RESOURCE_BINDING_TIER_2 ?
840 64u : MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
841 .maxPerStageDescriptorSampledImages =
842 pdev->options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ?
843 128u : MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
844 .maxPerStageDescriptorStorageImages =
845 pdev->options.ResourceBindingTier <= D3D12_RESOURCE_BINDING_TIER_2 ?
846 64u : MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
847 .maxPerStageDescriptorInputAttachments =
848 pdev->options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ?
849 128u : MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
850 .maxPerStageResources = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
851 .maxDescriptorSetSamplers = MAX_DESCS_PER_SAMPLER_HEAP,
852 .maxDescriptorSetUniformBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
853 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
854 .maxDescriptorSetStorageBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
855 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
856 .maxDescriptorSetSampledImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
857 .maxDescriptorSetStorageImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
858 .maxDescriptorSetInputAttachments = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
859 .maxVertexInputAttributes = MIN2(D3D12_STANDARD_VERTEX_ELEMENT_COUNT, MAX_VERTEX_GENERIC_ATTRIBS),
860 .maxVertexInputBindings = MAX_VBS,
861 .maxVertexInputAttributeOffset = D3D12_REQ_MULTI_ELEMENT_STRUCTURE_SIZE_IN_BYTES - 1,
862 .maxVertexInputBindingStride = D3D12_REQ_MULTI_ELEMENT_STRUCTURE_SIZE_IN_BYTES,
863 .maxVertexOutputComponents = D3D12_VS_OUTPUT_REGISTER_COUNT * D3D12_VS_OUTPUT_REGISTER_COMPONENTS,
864 .maxTessellationGenerationLevel = 0,
865 .maxTessellationPatchSize = 0,
866 .maxTessellationControlPerVertexInputComponents = 0,
867 .maxTessellationControlPerVertexOutputComponents = 0,
868 .maxTessellationControlPerPatchOutputComponents = 0,
869 .maxTessellationControlTotalOutputComponents = 0,
870 .maxTessellationEvaluationInputComponents = 0,
871 .maxTessellationEvaluationOutputComponents = 0,
872 .maxGeometryShaderInvocations = D3D12_GS_MAX_INSTANCE_COUNT,
873 .maxGeometryInputComponents = D3D12_GS_INPUT_REGISTER_COUNT * D3D12_GS_INPUT_REGISTER_COMPONENTS,
874 .maxGeometryOutputComponents = D3D12_GS_OUTPUT_REGISTER_COUNT * D3D12_GS_OUTPUT_REGISTER_COMPONENTS,
875 .maxGeometryOutputVertices = D3D12_GS_MAX_OUTPUT_VERTEX_COUNT_ACROSS_INSTANCES,
876 .maxGeometryTotalOutputComponents = D3D12_REQ_GS_INVOCATION_32BIT_OUTPUT_COMPONENT_LIMIT,
877 .maxFragmentInputComponents = D3D12_PS_INPUT_REGISTER_COUNT * D3D12_PS_INPUT_REGISTER_COMPONENTS,
878 .maxFragmentOutputAttachments = D3D12_PS_OUTPUT_REGISTER_COUNT,
879 .maxFragmentDualSrcAttachments = 0,
880 .maxFragmentCombinedOutputResources = D3D12_PS_OUTPUT_REGISTER_COUNT,
881 .maxComputeSharedMemorySize = D3D12_CS_TGSM_REGISTER_COUNT * sizeof(float),
882 .maxComputeWorkGroupCount = { D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION,
883 D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION,
884 D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION },
885 .maxComputeWorkGroupInvocations = D3D12_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP,
886 .maxComputeWorkGroupSize = { D3D12_CS_THREAD_GROUP_MAX_X, D3D12_CS_THREAD_GROUP_MAX_Y, D3D12_CS_THREAD_GROUP_MAX_Z },
887 .subPixelPrecisionBits = D3D12_SUBPIXEL_FRACTIONAL_BIT_COUNT,
888 .subTexelPrecisionBits = D3D12_SUBTEXEL_FRACTIONAL_BIT_COUNT,
889 .mipmapPrecisionBits = D3D12_MIP_LOD_FRACTIONAL_BIT_COUNT,
890 .maxDrawIndexedIndexValue = 0x00ffffff,
891 .maxDrawIndirectCount = UINT32_MAX,
892 .maxSamplerLodBias = D3D12_MIP_LOD_BIAS_MAX,
893 .maxSamplerAnisotropy = D3D12_REQ_MAXANISOTROPY,
894 .maxViewports = MAX_VP,
895 .maxViewportDimensions = { D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION, D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION },
896 .viewportBoundsRange = { D3D12_VIEWPORT_BOUNDS_MIN, D3D12_VIEWPORT_BOUNDS_MAX },
897 .viewportSubPixelBits = 0,
898 .minMemoryMapAlignment = 64,
899 .minTexelBufferOffsetAlignment = 32,
900 .minUniformBufferOffsetAlignment = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT,
901 .minStorageBufferOffsetAlignment = D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT,
902 .minTexelOffset = D3D12_COMMONSHADER_TEXEL_OFFSET_MAX_NEGATIVE,
903 .maxTexelOffset = D3D12_COMMONSHADER_TEXEL_OFFSET_MAX_POSITIVE,
904 .minTexelGatherOffset = -32,
905 .maxTexelGatherOffset = 31,
906 .minInterpolationOffset = -0.5f,
907 .maxInterpolationOffset = 0.5f,
908 .subPixelInterpolationOffsetBits = 4,
909 .maxFramebufferWidth = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION,
910 .maxFramebufferHeight = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION,
911 .maxFramebufferLayers = D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION,
912 .framebufferColorSampleCounts = supported_sample_counts,
913 .framebufferDepthSampleCounts = supported_sample_counts,
914 .framebufferStencilSampleCounts = supported_sample_counts,
915 .framebufferNoAttachmentsSampleCounts = supported_sample_counts,
916 .maxColorAttachments = MAX_RTS,
917 .sampledImageColorSampleCounts = supported_sample_counts,
918 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
919 .sampledImageDepthSampleCounts = supported_sample_counts,
920 .sampledImageStencilSampleCounts = supported_sample_counts,
921 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
922 .maxSampleMaskWords = 1,
923 .timestampComputeAndGraphics = true,
924 .timestampPeriod = pdev->timestamp_period,
925 .maxClipDistances = D3D12_CLIP_OR_CULL_DISTANCE_COUNT,
926 .maxCullDistances = D3D12_CLIP_OR_CULL_DISTANCE_COUNT,
927 .maxCombinedClipAndCullDistances = D3D12_CLIP_OR_CULL_DISTANCE_COUNT,
928 .discreteQueuePriorities = 2,
929 .pointSizeRange = { 1.0f, 1.0f },
930 .lineWidthRange = { 1.0f, 1.0f },
931 .pointSizeGranularity = 0.0f,
932 .lineWidthGranularity = 0.0f,
933 .strictLines = 0,
934 .standardSampleLocations = true,
935 .optimalBufferCopyOffsetAlignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT,
936 .optimalBufferCopyRowPitchAlignment = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT,
937 .nonCoherentAtomSize = 256,
938
939 /* Core 1.1 */
940 .deviceLUIDValid = true,
941 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
942 .maxMultiviewViewCount = 6,
943 .maxMultiviewInstanceIndex = UINT_MAX,
944 .protectedNoFault = false,
945 /* Vulkan 1.1 wants this value to be at least 1024. Let's stick to this
946 * minimum requirement for now, and hope the total number of samplers
947 * across all descriptor sets doesn't exceed 2048, otherwise we'd exceed
948 * the maximum number of samplers per heap. For any descriptor set
949 * containing more than 1024 descriptors,
950 * vkGetDescriptorSetLayoutSupport() can be called to determine if the
951 * layout is within D3D12 descriptor heap bounds.
952 */
953 .maxPerSetDescriptors = 1024,
954 /* According to the spec, the maximum D3D12 resource size is
955 * min(max(128MB, 0.25f * (amount of dedicated VRAM)), 2GB),
956 * but the limit actually depends on the max(system_ram, VRAM) not
957 * just the VRAM.
958 */
959 .maxMemoryAllocationSize =
960 CLAMP(MAX2(pdev->desc.dedicated_video_memory,
961 pdev->desc.dedicated_system_memory +
962 pdev->desc.shared_system_memory) / 4,
963 128ull * 1024 * 1024, 2ull * 1024 * 1024 * 1024),
964 .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
965 VK_SUBGROUP_FEATURE_BALLOT_BIT |
966 VK_SUBGROUP_FEATURE_VOTE_BIT |
967 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
968 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
969 VK_SUBGROUP_FEATURE_QUAD_BIT |
970 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT,
971 .subgroupSupportedStages = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT |
972 VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_VERTEX_BIT,
973 .subgroupQuadOperationsInAllStages = true,
974 .subgroupSize = pdev->options1.WaveOps ? pdev->options1.WaveLaneCountMin : 1,
975
976 /* Core 1.2 */
977 .driverID = VK_DRIVER_ID_MESA_DOZEN,
978 .conformanceVersion = (VkConformanceVersion){
979 .major = 0,
980 .minor = 0,
981 .subminor = 0,
982 .patch = 0,
983 },
984 .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
985 .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
986 .shaderSignedZeroInfNanPreserveFloat16 = false,
987 .shaderSignedZeroInfNanPreserveFloat32 = false,
988 .shaderSignedZeroInfNanPreserveFloat64 = false,
989 .shaderDenormPreserveFloat16 = true,
990 .shaderDenormPreserveFloat32 = pdev->shader_model >= D3D_SHADER_MODEL_6_2,
991 .shaderDenormPreserveFloat64 = true,
992 .shaderDenormFlushToZeroFloat16 = false,
993 .shaderDenormFlushToZeroFloat32 = true,
994 .shaderDenormFlushToZeroFloat64 = false,
995 .shaderRoundingModeRTEFloat16 = true,
996 .shaderRoundingModeRTEFloat32 = true,
997 .shaderRoundingModeRTEFloat64 = true,
998 .shaderRoundingModeRTZFloat16 = false,
999 .shaderRoundingModeRTZFloat32 = false,
1000 .shaderRoundingModeRTZFloat64 = false,
1001 .shaderUniformBufferArrayNonUniformIndexingNative = true,
1002 .shaderSampledImageArrayNonUniformIndexingNative = true,
1003 .shaderStorageBufferArrayNonUniformIndexingNative = true,
1004 .shaderStorageImageArrayNonUniformIndexingNative = true,
1005 .shaderInputAttachmentArrayNonUniformIndexingNative = true,
1006 .robustBufferAccessUpdateAfterBind = true,
1007 .quadDivergentImplicitLod = false,
1008 .maxUpdateAfterBindDescriptorsInAllPools = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1009 .maxPerStageDescriptorUpdateAfterBindSamplers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1010 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1011 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1012 .maxPerStageDescriptorUpdateAfterBindSampledImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1013 .maxPerStageDescriptorUpdateAfterBindStorageImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1014 .maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1015 .maxPerStageUpdateAfterBindResources = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1016 .maxDescriptorSetUpdateAfterBindSamplers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1017 .maxDescriptorSetUpdateAfterBindUniformBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1018 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1019 .maxDescriptorSetUpdateAfterBindStorageBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1020 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1021 .maxDescriptorSetUpdateAfterBindSampledImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1022 .maxDescriptorSetUpdateAfterBindStorageImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1023 .maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCS_PER_CBV_SRV_UAV_HEAP,
1024
1025 .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT |
1026 VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
1027 .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
1028 .independentResolveNone = true,
1029 .independentResolve = true,
1030 .filterMinmaxSingleComponentFormats = false,
1031 .filterMinmaxImageComponentMapping = false,
1032 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
1033 .framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1034
1035 /* Core 1.3 */
1036 .minSubgroupSize = pdev->options1.WaveOps ? pdev->options1.WaveLaneCountMin : 1,
1037 .maxSubgroupSize = pdev->options1.WaveOps ? pdev->options1.WaveLaneCountMax : 1,
1038 .maxComputeWorkgroupSubgroups = D3D12_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP /
1039 (pdev->options1.WaveOps ? pdev->options1.WaveLaneCountMin : 1),
1040 .requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT,
1041 .integerDotProduct4x8BitPackedSignedAccelerated = pdev->shader_model >= D3D_SHADER_MODEL_6_4,
1042 .integerDotProduct4x8BitPackedUnsignedAccelerated = pdev->shader_model >= D3D_SHADER_MODEL_6_4,
1043 .integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdev->shader_model >= D3D_SHADER_MODEL_6_4,
1044 .integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdev->shader_model >= D3D_SHADER_MODEL_6_4,
1045
1046 /* VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT */
1047 .maxVertexAttribDivisor = UINT32_MAX,
1048
1049 /* VkPhysicalDeviceExternalMemoryHostPropertiesEXT */
1050 .minImportedHostPointerAlignment = 65536,
1051
1052 /* VkPhysicalDeviceLayeredDriverPropertiesMSFT */
1053 .underlyingAPI = VK_LAYERED_DRIVER_UNDERLYING_API_D3D12_MSFT,
1054 };
1055
1056 snprintf(properties->deviceName,
1057 sizeof(properties->deviceName),
1058 "Microsoft Direct3D12 (%s)", pdev->desc.description);
1059 memcpy(properties->pipelineCacheUUID,
1060 pdev->pipeline_cache_uuid, VK_UUID_SIZE);
1061 memcpy(properties->driverUUID, pdev->driver_uuid, VK_UUID_SIZE);
1062 memcpy(properties->deviceUUID, pdev->device_uuid, VK_UUID_SIZE);
1063 memcpy(properties->deviceLUID, &pdev->desc.adapter_luid, VK_LUID_SIZE);
1064
1065 STATIC_ASSERT(sizeof(pdev->desc.adapter_luid) == sizeof(properties->deviceLUID));
1066
1067 snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "Dozen");
1068 snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1069 }
1070
1071 static VkResult
dzn_physical_device_create(struct vk_instance * instance,IUnknown * adapter,const struct dzn_physical_device_desc * desc)1072 dzn_physical_device_create(struct vk_instance *instance,
1073 IUnknown *adapter,
1074 const struct dzn_physical_device_desc *desc)
1075 {
1076 struct dzn_physical_device *pdev =
1077 vk_zalloc(&instance->alloc, sizeof(*pdev), 8,
1078 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1079
1080 if (!pdev)
1081 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1082
1083 struct vk_physical_device_dispatch_table dispatch_table;
1084 vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
1085 &dzn_physical_device_entrypoints,
1086 true);
1087 vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
1088 &wsi_physical_device_entrypoints,
1089 false);
1090
1091 VkResult result =
1092 vk_physical_device_init(&pdev->vk, instance,
1093 NULL, NULL, NULL, /* We set up extensions later */
1094 &dispatch_table);
1095 if (result != VK_SUCCESS) {
1096 vk_free(&instance->alloc, pdev);
1097 return result;
1098 }
1099
1100 pdev->desc = *desc;
1101 pdev->adapter = adapter;
1102 IUnknown_AddRef(adapter);
1103 list_addtail(&pdev->vk.link, &instance->physical_devices.list);
1104
1105 vk_warn_non_conformant_implementation("dzn");
1106
1107 struct dzn_instance *dzn_instance = container_of(instance, struct dzn_instance, vk);
1108
1109 uint32_t num_sync_types = 0;
1110 pdev->sync_types[num_sync_types++] = &dzn_sync_type;
1111 pdev->sync_types[num_sync_types++] = &dzn_instance->sync_binary_type.sync;
1112 pdev->sync_types[num_sync_types++] = &vk_sync_dummy_type;
1113 pdev->sync_types[num_sync_types] = NULL;
1114 assert(num_sync_types <= MAX_SYNC_TYPES);
1115 pdev->vk.supported_sync_types = pdev->sync_types;
1116
1117 pdev->vk.pipeline_cache_import_ops = dzn_pipeline_cache_import_ops;
1118
1119 pdev->dev = d3d12_create_device(dzn_instance->d3d12_mod,
1120 pdev->adapter,
1121 dzn_instance->factory,
1122 !dzn_instance->dxil_validator);
1123 if (!pdev->dev) {
1124 list_del(&pdev->vk.link);
1125 dzn_physical_device_destroy(&pdev->vk);
1126 return vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
1127 }
1128
1129 if (FAILED(ID3D12Device1_QueryInterface(pdev->dev, &IID_ID3D12Device10, (void **)&pdev->dev10)))
1130 pdev->dev10 = NULL;
1131 if (FAILED(ID3D12Device1_QueryInterface(pdev->dev, &IID_ID3D12Device11, (void **)&pdev->dev11)))
1132 pdev->dev11 = NULL;
1133 if (FAILED(ID3D12Device1_QueryInterface(pdev->dev, &IID_ID3D12Device12, (void **)&pdev->dev12)))
1134 pdev->dev12 = NULL;
1135 if (FAILED(ID3D12Device1_QueryInterface(pdev->dev, &IID_ID3D12Device13, (void **)&pdev->dev13)))
1136 pdev->dev13 = NULL;
1137 dzn_physical_device_cache_caps(pdev);
1138 dzn_physical_device_init_memory(pdev);
1139 dzn_physical_device_init_uuids(pdev);
1140
1141 if (dzn_instance->debug_flags & DZN_DEBUG_MULTIVIEW)
1142 pdev->options3.ViewInstancingTier = D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED;
1143
1144 dzn_physical_device_get_extensions(pdev);
1145 if (driQueryOptionb(&dzn_instance->dri_options, "dzn_enable_8bit_loads_stores") &&
1146 pdev->options4.Native16BitShaderOpsSupported)
1147 pdev->vk.supported_extensions.KHR_8bit_storage = true;
1148 if (dzn_instance->debug_flags & DZN_DEBUG_NO_BINDLESS)
1149 pdev->vk.supported_extensions.EXT_descriptor_indexing = false;
1150 dzn_physical_device_get_features(pdev, &pdev->vk.supported_features);
1151 dzn_physical_device_get_properties(pdev, &pdev->vk.properties);
1152
1153 result = dzn_wsi_init(pdev);
1154 if (result != VK_SUCCESS || !pdev->dev) {
1155 list_del(&pdev->vk.link);
1156 dzn_physical_device_destroy(&pdev->vk);
1157 return result;
1158 }
1159
1160 return VK_SUCCESS;
1161 }
1162
1163 static DXGI_FORMAT
dzn_get_most_capable_format_for_casting(VkFormat format,VkImageCreateFlags create_flags)1164 dzn_get_most_capable_format_for_casting(VkFormat format, VkImageCreateFlags create_flags)
1165 {
1166 enum pipe_format pfmt = vk_format_to_pipe_format(format);
1167 bool block_compressed = util_format_is_compressed(pfmt);
1168 if (block_compressed &&
1169 !(create_flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT))
1170 return dzn_image_get_dxgi_format(NULL, format, 0, 0);
1171 unsigned blksz = util_format_get_blocksize(pfmt);
1172 switch (blksz) {
1173 case 1: return DXGI_FORMAT_R8_UNORM;
1174 case 2: return DXGI_FORMAT_R16_UNORM;
1175 case 4: return DXGI_FORMAT_R32_FLOAT;
1176 case 8: return DXGI_FORMAT_R32G32_FLOAT;
1177 case 12: return DXGI_FORMAT_R32G32B32_FLOAT;
1178 case 16: return DXGI_FORMAT_R32G32B32A32_FLOAT;
1179 default: unreachable("Unsupported format bit size");;
1180 }
1181 }
1182
1183 D3D12_FEATURE_DATA_FORMAT_SUPPORT
dzn_physical_device_get_format_support(struct dzn_physical_device * pdev,VkFormat format,VkImageCreateFlags create_flags)1184 dzn_physical_device_get_format_support(struct dzn_physical_device *pdev,
1185 VkFormat format,
1186 VkImageCreateFlags create_flags)
1187 {
1188 VkImageUsageFlags usage =
1189 vk_format_is_depth_or_stencil(format) ?
1190 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : 0;
1191 VkImageAspectFlags aspects = 0;
1192
1193 if (vk_format_has_depth(format))
1194 aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
1195 if (vk_format_has_stencil(format))
1196 aspects = VK_IMAGE_ASPECT_STENCIL_BIT;
1197
1198 D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = {
1199 .Format = dzn_image_get_dxgi_format(pdev, format, usage, aspects),
1200 };
1201
1202 /* KHR_maintenance2: If an image is created with the extended usage flag
1203 * (or if properties are queried with that flag), then if any compatible
1204 * format can support a given usage, it should be considered supported.
1205 * With the exception of depth, which are limited in their cast set,
1206 * we can do this by just picking a single most-capable format to query
1207 * the support for, instead of the originally requested format. */
1208 if (aspects == 0 && dfmt_info.Format != DXGI_FORMAT_UNKNOWN &&
1209 (create_flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)) {
1210 dfmt_info.Format = dzn_get_most_capable_format_for_casting(format, create_flags);
1211 }
1212
1213 ASSERTED HRESULT hres =
1214 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_FORMAT_SUPPORT,
1215 &dfmt_info, sizeof(dfmt_info));
1216 assert(!FAILED(hres));
1217
1218 if (usage != VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)
1219 return dfmt_info;
1220
1221 /* Depth/stencil resources have different format when they're accessed
1222 * as textures, query the capabilities for this format too.
1223 */
1224 dzn_foreach_aspect(aspect, aspects) {
1225 D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info2 = {
1226 .Format = dzn_image_get_dxgi_format(pdev, format, 0, aspect),
1227 };
1228
1229 hres = ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_FORMAT_SUPPORT,
1230 &dfmt_info2, sizeof(dfmt_info2));
1231 assert(!FAILED(hres));
1232
1233 #define DS_SRV_FORMAT_SUPPORT1_MASK \
1234 (D3D12_FORMAT_SUPPORT1_SHADER_LOAD | \
1235 D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE | \
1236 D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON | \
1237 D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_MONO_TEXT | \
1238 D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE | \
1239 D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD | \
1240 D3D12_FORMAT_SUPPORT1_SHADER_GATHER | \
1241 D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW | \
1242 D3D12_FORMAT_SUPPORT1_SHADER_GATHER_COMPARISON)
1243
1244 dfmt_info.Support1 |= dfmt_info2.Support1 & DS_SRV_FORMAT_SUPPORT1_MASK;
1245 dfmt_info.Support2 |= dfmt_info2.Support2;
1246 }
1247
1248 return dfmt_info;
1249 }
1250
1251 static void
dzn_physical_device_get_format_properties(struct dzn_physical_device * pdev,VkFormat format,VkFormatProperties2 * properties)1252 dzn_physical_device_get_format_properties(struct dzn_physical_device *pdev,
1253 VkFormat format,
1254 VkFormatProperties2 *properties)
1255 {
1256 D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info =
1257 dzn_physical_device_get_format_support(pdev, format, 0);
1258 VkFormatProperties *base_props = &properties->formatProperties;
1259
1260 vk_foreach_struct(ext, properties->pNext) {
1261 dzn_debug_ignored_stype(ext->sType);
1262 }
1263
1264 if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN) {
1265 if (dzn_graphics_pipeline_patch_vi_format(format) != format)
1266 *base_props = (VkFormatProperties){
1267 .bufferFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT,
1268 };
1269 else
1270 *base_props = (VkFormatProperties) { 0 };
1271 return;
1272 }
1273
1274 *base_props = (VkFormatProperties) {
1275 .linearTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT,
1276 .optimalTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT,
1277 .bufferFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT,
1278 };
1279
1280 if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER)
1281 base_props->bufferFeatures |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
1282
1283 #define TEX_FLAGS (D3D12_FORMAT_SUPPORT1_TEXTURE1D | \
1284 D3D12_FORMAT_SUPPORT1_TEXTURE2D | \
1285 D3D12_FORMAT_SUPPORT1_TEXTURE3D | \
1286 D3D12_FORMAT_SUPPORT1_TEXTURECUBE)
1287 if ((dfmt_info.Support1 & TEX_FLAGS) &&
1288 (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD)) {
1289 base_props->optimalTilingFeatures |=
1290 VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT;
1291 }
1292
1293 if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) {
1294 base_props->optimalTilingFeatures |=
1295 VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
1296 }
1297
1298 if ((dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) &&
1299 (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW)) {
1300 base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
1301 if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BUFFER)
1302 base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
1303 }
1304
1305 #define ATOMIC_FLAGS (D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD | \
1306 D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS | \
1307 D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE | \
1308 D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE | \
1309 D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX | \
1310 D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX)
1311 if ((dfmt_info.Support2 & ATOMIC_FLAGS) == ATOMIC_FLAGS) {
1312 base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
1313 base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
1314 }
1315
1316 if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BUFFER)
1317 base_props->bufferFeatures |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
1318
1319 /* Color/depth/stencil attachment cap implies input attachement cap, and input
1320 * attachment loads are lowered to texture loads in dozen, hence the requirement
1321 * to have shader-load support.
1322 */
1323 if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) {
1324 if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) {
1325 base_props->optimalTilingFeatures |=
1326 VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
1327 }
1328
1329 if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BLENDABLE)
1330 base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
1331
1332 if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) {
1333 base_props->optimalTilingFeatures |=
1334 VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
1335 }
1336 }
1337
1338 /* B4G4R4A4 support is required, but d3d12 doesn't support it. The needed
1339 * d3d12 format would be A4R4G4B4. We map this format to d3d12's B4G4R4A4,
1340 * which is Vulkan's A4R4G4B4, and adjust the SRV component-mapping to fake
1341 * B4G4R4A4, but that forces us to limit the usage to sampling, which,
1342 * luckily, is exactly what we need to support the required features.
1343 *
1344 * However, since this involves swizzling the alpha channel, it can cause
1345 * problems for border colors. Fortunately, d3d12 added an A4B4G4R4 format,
1346 * which still isn't quite right (it'd be Vulkan R4G4B4A4), but can be
1347 * swizzled by just swapping R and B, so no border color issues arise.
1348 */
1349 if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
1350 VkFormatFeatureFlags bgra4_req_features =
1351 VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
1352 VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
1353 VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
1354 VK_FORMAT_FEATURE_BLIT_SRC_BIT |
1355 VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
1356 base_props->optimalTilingFeatures &= bgra4_req_features;
1357 base_props->bufferFeatures =
1358 VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
1359 }
1360
1361 /* depth/stencil format shouldn't advertise buffer features */
1362 if (vk_format_is_depth_or_stencil(format))
1363 base_props->bufferFeatures = 0;
1364 }
1365
1366 static VkResult
dzn_physical_device_get_image_format_properties(struct dzn_physical_device * pdev,const VkPhysicalDeviceImageFormatInfo2 * info,VkImageFormatProperties2 * properties)1367 dzn_physical_device_get_image_format_properties(struct dzn_physical_device *pdev,
1368 const VkPhysicalDeviceImageFormatInfo2 *info,
1369 VkImageFormatProperties2 *properties)
1370 {
1371 const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
1372 VkExternalImageFormatProperties *external_props = NULL;
1373
1374 properties->imageFormatProperties = (VkImageFormatProperties) { 0 };
1375
1376 VkImageUsageFlags usage = info->usage;
1377
1378 /* Extract input structs */
1379 vk_foreach_struct_const(s, info->pNext) {
1380 switch (s->sType) {
1381 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
1382 external_info = (const VkPhysicalDeviceExternalImageFormatInfo *)s;
1383 break;
1384 case VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO:
1385 usage |= ((const VkImageStencilUsageCreateInfo *)s)->stencilUsage;
1386 break;
1387 default:
1388 dzn_debug_ignored_stype(s->sType);
1389 break;
1390 }
1391 }
1392
1393 assert(info->tiling == VK_IMAGE_TILING_OPTIMAL || info->tiling == VK_IMAGE_TILING_LINEAR);
1394
1395 /* Extract output structs */
1396 vk_foreach_struct(s, properties->pNext) {
1397 switch (s->sType) {
1398 case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
1399 external_props = (VkExternalImageFormatProperties *)s;
1400 external_props->externalMemoryProperties = (VkExternalMemoryProperties) { 0 };
1401 break;
1402 default:
1403 dzn_debug_ignored_stype(s->sType);
1404 break;
1405 }
1406 }
1407
1408 if (external_info && external_info->handleType != 0) {
1409 const VkExternalMemoryHandleTypeFlags d3d12_resource_handle_types =
1410 VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT | opaque_external_flag;
1411 const VkExternalMemoryHandleTypeFlags d3d11_texture_handle_types =
1412 VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT | d3d12_resource_handle_types;
1413 const VkExternalMemoryFeatureFlags import_export_feature_flags =
1414 VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
1415 const VkExternalMemoryFeatureFlags dedicated_feature_flags =
1416 VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT | import_export_feature_flags;
1417
1418 switch (external_info->handleType) {
1419 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT:
1420 external_props->externalMemoryProperties.compatibleHandleTypes = d3d11_texture_handle_types;
1421 external_props->externalMemoryProperties.exportFromImportedHandleTypes = d3d11_texture_handle_types;
1422 external_props->externalMemoryProperties.externalMemoryFeatures = dedicated_feature_flags;
1423 break;
1424 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT:
1425 external_props->externalMemoryProperties.compatibleHandleTypes = d3d12_resource_handle_types;
1426 external_props->externalMemoryProperties.exportFromImportedHandleTypes = d3d12_resource_handle_types;
1427 external_props->externalMemoryProperties.externalMemoryFeatures = dedicated_feature_flags;
1428 break;
1429 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT:
1430 external_props->externalMemoryProperties.compatibleHandleTypes =
1431 external_props->externalMemoryProperties.exportFromImportedHandleTypes =
1432 VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | opaque_external_flag;
1433 external_props->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags;
1434 break;
1435 #ifdef _WIN32
1436 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT:
1437 #else
1438 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
1439 #endif
1440 external_props->externalMemoryProperties.compatibleHandleTypes = d3d11_texture_handle_types;
1441 external_props->externalMemoryProperties.exportFromImportedHandleTypes = d3d11_texture_handle_types;
1442 external_props->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags;
1443 break;
1444 #if defined(_WIN32)
1445 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
1446 if (pdev->dev13) {
1447 external_props->externalMemoryProperties.compatibleHandleTypes =
1448 external_props->externalMemoryProperties.exportFromImportedHandleTypes =
1449 VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | opaque_external_flag;
1450 external_props->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags;
1451 break;
1452 }
1453 FALLTHROUGH;
1454 #endif
1455 default:
1456 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1457 }
1458
1459 /* Linear textures not supported, but there's nothing else we can deduce from just a handle type */
1460 if (info->tiling != VK_IMAGE_TILING_OPTIMAL &&
1461 external_info->handleType != VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT)
1462 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1463 }
1464
1465 if (info->tiling != VK_IMAGE_TILING_OPTIMAL &&
1466 (usage & ~(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)))
1467 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1468
1469 if (info->tiling != VK_IMAGE_TILING_OPTIMAL &&
1470 vk_format_is_depth_or_stencil(info->format))
1471 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1472
1473 D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info =
1474 dzn_physical_device_get_format_support(pdev, info->format, info->flags);
1475 if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN)
1476 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1477
1478 bool is_bgra4 = info->format == VK_FORMAT_B4G4R4A4_UNORM_PACK16 &&
1479 !(info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT);
1480
1481 if ((info->type == VK_IMAGE_TYPE_1D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE1D)) ||
1482 (info->type == VK_IMAGE_TYPE_2D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE2D)) ||
1483 (info->type == VK_IMAGE_TYPE_3D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE3D)) ||
1484 ((info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
1485 !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURECUBE)))
1486 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1487
1488 /* Due to extended capability querying, we might see 1D support for BC, but we don't actually have it */
1489 if (vk_format_is_block_compressed(info->format) && info->type == VK_IMAGE_TYPE_1D)
1490 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1491
1492 if ((usage & VK_IMAGE_USAGE_SAMPLED_BIT) &&
1493 /* Note: format support for SAMPLED is not necessarily accurate for integer formats */
1494 !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD))
1495 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1496
1497 if ((usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) &&
1498 (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) || is_bgra4))
1499 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1500
1501 if ((usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
1502 (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) || is_bgra4))
1503 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1504
1505 if ((usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
1506 (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) || is_bgra4))
1507 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1508
1509 if ((usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
1510 (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) || is_bgra4))
1511 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1512
1513 if (info->type == VK_IMAGE_TYPE_3D && info->tiling != VK_IMAGE_TILING_OPTIMAL)
1514 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1515
1516 bool is_3d = info->type == VK_IMAGE_TYPE_3D;
1517 uint32_t max_extent = dzn_physical_device_get_max_extent(is_3d);
1518
1519 if (info->tiling == VK_IMAGE_TILING_OPTIMAL &&
1520 dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_MIP)
1521 properties->imageFormatProperties.maxMipLevels = dzn_physical_device_get_max_mip_level(is_3d) + 1;
1522 else
1523 properties->imageFormatProperties.maxMipLevels = 1;
1524
1525 if (info->tiling == VK_IMAGE_TILING_OPTIMAL && info->type != VK_IMAGE_TYPE_3D)
1526 properties->imageFormatProperties.maxArrayLayers = dzn_physical_device_get_max_array_layers();
1527 else
1528 properties->imageFormatProperties.maxArrayLayers = 1;
1529
1530 switch (info->type) {
1531 case VK_IMAGE_TYPE_1D:
1532 properties->imageFormatProperties.maxExtent.width = max_extent;
1533 properties->imageFormatProperties.maxExtent.height = 1;
1534 properties->imageFormatProperties.maxExtent.depth = 1;
1535 break;
1536 case VK_IMAGE_TYPE_2D:
1537 properties->imageFormatProperties.maxExtent.width = max_extent;
1538 properties->imageFormatProperties.maxExtent.height = max_extent;
1539 properties->imageFormatProperties.maxExtent.depth = 1;
1540 break;
1541 case VK_IMAGE_TYPE_3D:
1542 properties->imageFormatProperties.maxExtent.width = max_extent;
1543 properties->imageFormatProperties.maxExtent.height = max_extent;
1544 properties->imageFormatProperties.maxExtent.depth = max_extent;
1545 break;
1546 default:
1547 unreachable("bad VkImageType");
1548 }
1549
1550 /* From the Vulkan 1.0 spec, section 34.1.1. Supported Sample Counts:
1551 *
1552 * sampleCounts will be set to VK_SAMPLE_COUNT_1_BIT if at least one of the
1553 * following conditions is true:
1554 *
1555 * - tiling is VK_IMAGE_TILING_LINEAR
1556 * - type is not VK_IMAGE_TYPE_2D
1557 * - flags contains VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT
1558 * - neither the VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT flag nor the
1559 * VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT flag in
1560 * VkFormatProperties::optimalTilingFeatures returned by
1561 * vkGetPhysicalDeviceFormatProperties is set.
1562 *
1563 * D3D12 has a few more constraints:
1564 * - no UAVs on multisample resources
1565 */
1566 properties->imageFormatProperties.sampleCounts = VK_SAMPLE_COUNT_1_BIT;
1567 if (info->tiling != VK_IMAGE_TILING_LINEAR &&
1568 info->type == VK_IMAGE_TYPE_2D &&
1569 !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
1570 (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD) &&
1571 !is_bgra4 &&
1572 !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1573 for (uint32_t s = VK_SAMPLE_COUNT_2_BIT; s < VK_SAMPLE_COUNT_64_BIT; s <<= 1) {
1574 D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS ms_info = {
1575 .Format = dfmt_info.Format,
1576 .SampleCount = s,
1577 };
1578
1579 HRESULT hres =
1580 ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS,
1581 &ms_info, sizeof(ms_info));
1582 if (!FAILED(hres) && ms_info.NumQualityLevels > 0)
1583 properties->imageFormatProperties.sampleCounts |= s;
1584 }
1585 }
1586
1587 /* TODO: set correct value here */
1588 properties->imageFormatProperties.maxResourceSize = UINT32_MAX;
1589
1590 return VK_SUCCESS;
1591 }
1592
1593 VKAPI_ATTR void VKAPI_CALL
dzn_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,VkFormat format,VkFormatProperties2 * pFormatProperties)1594 dzn_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
1595 VkFormat format,
1596 VkFormatProperties2 *pFormatProperties)
1597 {
1598 VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice);
1599
1600 dzn_physical_device_get_format_properties(pdev, format, pFormatProperties);
1601 }
1602
1603 VKAPI_ATTR VkResult VKAPI_CALL
dzn_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceImageFormatInfo2 * info,VkImageFormatProperties2 * props)1604 dzn_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,
1605 const VkPhysicalDeviceImageFormatInfo2 *info,
1606 VkImageFormatProperties2 *props)
1607 {
1608 VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice);
1609
1610 return dzn_physical_device_get_image_format_properties(pdev, info, props);
1611 }
1612
1613 VKAPI_ATTR VkResult VKAPI_CALL
dzn_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice,VkFormat format,VkImageType type,VkImageTiling tiling,VkImageUsageFlags usage,VkImageCreateFlags createFlags,VkImageFormatProperties * pImageFormatProperties)1614 dzn_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice,
1615 VkFormat format,
1616 VkImageType type,
1617 VkImageTiling tiling,
1618 VkImageUsageFlags usage,
1619 VkImageCreateFlags createFlags,
1620 VkImageFormatProperties *pImageFormatProperties)
1621 {
1622 const VkPhysicalDeviceImageFormatInfo2 info = {
1623 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
1624 .format = format,
1625 .type = type,
1626 .tiling = tiling,
1627 .usage = usage,
1628 .flags = createFlags,
1629 };
1630
1631 VkImageFormatProperties2 props = { 0 };
1632
1633 VkResult result =
1634 dzn_GetPhysicalDeviceImageFormatProperties2(physicalDevice, &info, &props);
1635 *pImageFormatProperties = props.imageFormatProperties;
1636
1637 return result;
1638 }
1639
1640 VKAPI_ATTR void VKAPI_CALL
dzn_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice,VkFormat format,VkImageType type,VkSampleCountFlagBits samples,VkImageUsageFlags usage,VkImageTiling tiling,uint32_t * pPropertyCount,VkSparseImageFormatProperties * pProperties)1641 dzn_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice,
1642 VkFormat format,
1643 VkImageType type,
1644 VkSampleCountFlagBits samples,
1645 VkImageUsageFlags usage,
1646 VkImageTiling tiling,
1647 uint32_t *pPropertyCount,
1648 VkSparseImageFormatProperties *pProperties)
1649 {
1650 *pPropertyCount = 0;
1651 }
1652
1653 VKAPI_ATTR void VKAPI_CALL
dzn_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceSparseImageFormatInfo2 * pFormatInfo,uint32_t * pPropertyCount,VkSparseImageFormatProperties2 * pProperties)1654 dzn_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice,
1655 const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
1656 uint32_t *pPropertyCount,
1657 VkSparseImageFormatProperties2 *pProperties)
1658 {
1659 *pPropertyCount = 0;
1660 }
1661
1662 VKAPI_ATTR void VKAPI_CALL
dzn_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalBufferInfo * pExternalBufferInfo,VkExternalBufferProperties * pExternalBufferProperties)1663 dzn_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice,
1664 const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
1665 VkExternalBufferProperties *pExternalBufferProperties)
1666 {
1667 #if defined(_WIN32)
1668 VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice);
1669 #endif
1670
1671 const VkExternalMemoryHandleTypeFlags d3d12_resource_handle_types =
1672 VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT | opaque_external_flag;
1673 const VkExternalMemoryFeatureFlags import_export_feature_flags =
1674 VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
1675 const VkExternalMemoryFeatureFlags dedicated_feature_flags =
1676 VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT | import_export_feature_flags;
1677 switch (pExternalBufferInfo->handleType) {
1678 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT:
1679 pExternalBufferProperties->externalMemoryProperties.compatibleHandleTypes = d3d12_resource_handle_types;
1680 pExternalBufferProperties->externalMemoryProperties.exportFromImportedHandleTypes = d3d12_resource_handle_types;
1681 pExternalBufferProperties->externalMemoryProperties.externalMemoryFeatures = dedicated_feature_flags;
1682 break;
1683 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT:
1684 pExternalBufferProperties->externalMemoryProperties.compatibleHandleTypes =
1685 pExternalBufferProperties->externalMemoryProperties.exportFromImportedHandleTypes =
1686 VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | opaque_external_flag;
1687 pExternalBufferProperties->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags;
1688 break;
1689 #ifdef _WIN32
1690 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT:
1691 #else
1692 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
1693 #endif
1694 pExternalBufferProperties->externalMemoryProperties.compatibleHandleTypes =
1695 pExternalBufferProperties->externalMemoryProperties.exportFromImportedHandleTypes =
1696 VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | d3d12_resource_handle_types;
1697 pExternalBufferProperties->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags;
1698 break;
1699 #if defined(_WIN32)
1700 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
1701 if (pdev->dev13) {
1702 pExternalBufferProperties->externalMemoryProperties.compatibleHandleTypes =
1703 pExternalBufferProperties->externalMemoryProperties.exportFromImportedHandleTypes =
1704 VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | opaque_external_flag;
1705 pExternalBufferProperties->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags;
1706 break;
1707 }
1708 FALLTHROUGH;
1709 #endif
1710 default:
1711 pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryProperties){ 0 };
1712 break;
1713 }
1714 }
1715
1716 VkResult
dzn_instance_add_physical_device(struct vk_instance * instance,IUnknown * adapter,const struct dzn_physical_device_desc * desc)1717 dzn_instance_add_physical_device(struct vk_instance *instance,
1718 IUnknown *adapter,
1719 const struct dzn_physical_device_desc *desc)
1720 {
1721 struct dzn_instance *dzn_instance = container_of(instance, struct dzn_instance, vk);
1722 if ((dzn_instance->debug_flags & DZN_DEBUG_WARP) &&
1723 !desc->is_warp)
1724 return VK_SUCCESS;
1725
1726 return dzn_physical_device_create(instance, adapter, desc);
1727 }
1728
1729 static VkResult
dzn_enumerate_physical_devices(struct vk_instance * instance)1730 dzn_enumerate_physical_devices(struct vk_instance *instance)
1731 {
1732 VkResult result = dzn_enumerate_physical_devices_dxcore(instance);
1733 #ifdef _WIN32
1734 if (result != VK_SUCCESS)
1735 result = dzn_enumerate_physical_devices_dxgi(instance);
1736 #endif
1737
1738 return result;
1739 }
1740
1741 static const driOptionDescription dzn_dri_options[] = {
1742 DRI_CONF_SECTION_DEBUG
1743 DRI_CONF_DZN_CLAIM_WIDE_LINES(false)
1744 DRI_CONF_DZN_ENABLE_8BIT_LOADS_STORES(false)
1745 DRI_CONF_DZN_DISABLE(false)
1746 DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
1747 DRI_CONF_SECTION_END
1748 };
1749
1750 static void
dzn_init_dri_config(struct dzn_instance * instance)1751 dzn_init_dri_config(struct dzn_instance *instance)
1752 {
1753 driParseOptionInfo(&instance->available_dri_options, dzn_dri_options,
1754 ARRAY_SIZE(dzn_dri_options));
1755 driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "dzn", NULL, NULL,
1756 instance->vk.app_info.app_name, instance->vk.app_info.app_version,
1757 instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
1758 }
1759
1760 static VkResult
dzn_instance_create(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * out)1761 dzn_instance_create(const VkInstanceCreateInfo *pCreateInfo,
1762 const VkAllocationCallbacks *pAllocator,
1763 VkInstance *out)
1764 {
1765 struct dzn_instance *instance =
1766 vk_zalloc2(vk_default_allocator(), pAllocator, sizeof(*instance), 8,
1767 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1768 if (!instance)
1769 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1770
1771 struct vk_instance_dispatch_table dispatch_table;
1772 vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1773 &dzn_instance_entrypoints,
1774 true);
1775 vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1776 &wsi_instance_entrypoints,
1777 false);
1778
1779 VkResult result =
1780 vk_instance_init(&instance->vk, &instance_extensions,
1781 &dispatch_table, pCreateInfo,
1782 pAllocator ? pAllocator : vk_default_allocator());
1783 if (result != VK_SUCCESS) {
1784 vk_free2(vk_default_allocator(), pAllocator, instance);
1785 return result;
1786 }
1787
1788 instance->vk.physical_devices.enumerate = dzn_enumerate_physical_devices;
1789 instance->vk.physical_devices.destroy = dzn_physical_device_destroy;
1790 instance->debug_flags =
1791 parse_debug_string(getenv("DZN_DEBUG"), dzn_debug_options);
1792
1793 #ifdef _WIN32
1794 if (instance->debug_flags & DZN_DEBUG_DEBUGGER) {
1795 /* wait for debugger to attach... */
1796 while (!IsDebuggerPresent()) {
1797 Sleep(100);
1798 }
1799 }
1800
1801 if (instance->debug_flags & DZN_DEBUG_REDIRECTS) {
1802 char home[MAX_PATH], path[MAX_PATH];
1803 if (SUCCEEDED(SHGetFolderPathA(NULL, CSIDL_PROFILE, NULL, 0, home))) {
1804 snprintf(path, sizeof(path), "%s\\stderr.txt", home);
1805 freopen(path, "w", stderr);
1806 snprintf(path, sizeof(path), "%s\\stdout.txt", home);
1807 freopen(path, "w", stdout);
1808 }
1809 }
1810 #endif
1811
1812 bool missing_validator = false;
1813 #ifdef _WIN32
1814 if ((instance->debug_flags & DZN_DEBUG_EXPERIMENTAL) == 0) {
1815 instance->dxil_validator = dxil_create_validator(NULL);
1816 missing_validator = !instance->dxil_validator;
1817 }
1818 #endif
1819
1820 if (missing_validator) {
1821 dzn_instance_destroy(instance, pAllocator);
1822 return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
1823 }
1824
1825 instance->d3d12_mod = util_dl_open(UTIL_DL_PREFIX "d3d12" UTIL_DL_EXT);
1826 if (!instance->d3d12_mod) {
1827 dzn_instance_destroy(instance, pAllocator);
1828 return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
1829 }
1830
1831 instance->d3d12.serialize_root_sig = d3d12_get_serialize_root_sig(instance->d3d12_mod);
1832 if (!instance->d3d12.serialize_root_sig) {
1833 dzn_instance_destroy(instance, pAllocator);
1834 return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
1835 }
1836
1837 instance->factory = try_create_device_factory(instance->d3d12_mod);
1838
1839 if (instance->debug_flags & DZN_DEBUG_D3D12)
1840 d3d12_enable_debug_layer(instance->d3d12_mod, instance->factory);
1841 if (instance->debug_flags & DZN_DEBUG_GBV)
1842 d3d12_enable_gpu_validation(instance->d3d12_mod, instance->factory);
1843
1844 instance->sync_binary_type = vk_sync_binary_get_type(&dzn_sync_type);
1845 dzn_init_dri_config(instance);
1846
1847 if (driQueryOptionb(&instance->dri_options, "dzn_disable")) {
1848 dzn_instance_destroy(instance, pAllocator);
1849 return vk_errorf(NULL, VK_ERROR_INITIALIZATION_FAILED, "dzn_disable set, failing instance creation");
1850 }
1851
1852 *out = dzn_instance_to_handle(instance);
1853 return VK_SUCCESS;
1854 }
1855
1856 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1857 dzn_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1858 const VkAllocationCallbacks *pAllocator,
1859 VkInstance *pInstance)
1860 {
1861 return dzn_instance_create(pCreateInfo, pAllocator, pInstance);
1862 }
1863
1864 VKAPI_ATTR VkResult VKAPI_CALL
dzn_EnumerateInstanceVersion(uint32_t * pApiVersion)1865 dzn_EnumerateInstanceVersion(uint32_t *pApiVersion)
1866 {
1867 *pApiVersion = DZN_API_VERSION;
1868 return VK_SUCCESS;
1869 }
1870
1871
1872 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
dzn_GetInstanceProcAddr(VkInstance _instance,const char * pName)1873 dzn_GetInstanceProcAddr(VkInstance _instance,
1874 const char *pName)
1875 {
1876 VK_FROM_HANDLE(dzn_instance, instance, _instance);
1877 return vk_instance_get_proc_addr(&instance->vk,
1878 &dzn_instance_entrypoints,
1879 pName);
1880 }
1881
1882 /* Windows will use a dll definition file to avoid build errors. */
1883 #ifdef _WIN32
1884 #undef PUBLIC
1885 #define PUBLIC
1886 #endif
1887
1888 PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)1889 vk_icdGetInstanceProcAddr(VkInstance instance,
1890 const char *pName)
1891 {
1892 return dzn_GetInstanceProcAddr(instance, pName);
1893 }
1894
1895 VKAPI_ATTR void VKAPI_CALL
dzn_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1896 dzn_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,
1897 uint32_t *pQueueFamilyPropertyCount,
1898 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1899 {
1900 VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice);
1901 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1902 pQueueFamilyProperties, pQueueFamilyPropertyCount);
1903
1904 for (uint32_t i = 0; i < pdev->queue_family_count; i++) {
1905 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
1906 p->queueFamilyProperties = pdev->queue_families[i].props;
1907
1908 vk_foreach_struct(ext, pQueueFamilyProperties->pNext) {
1909 dzn_debug_ignored_stype(ext->sType);
1910 }
1911 }
1912 }
1913 }
1914
1915 VKAPI_ATTR void VKAPI_CALL
dzn_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)1916 dzn_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
1917 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1918 {
1919 VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice);
1920
1921 *pMemoryProperties = pdev->memory;
1922 }
1923
1924 VKAPI_ATTR void VKAPI_CALL
dzn_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1925 dzn_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
1926 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1927 {
1928 dzn_GetPhysicalDeviceMemoryProperties(physicalDevice,
1929 &pMemoryProperties->memoryProperties);
1930
1931 vk_foreach_struct(ext, pMemoryProperties->pNext) {
1932 dzn_debug_ignored_stype(ext->sType);
1933 }
1934 }
1935
1936 VKAPI_ATTR VkResult VKAPI_CALL
dzn_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)1937 dzn_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
1938 VkLayerProperties *pProperties)
1939 {
1940 if (pProperties == NULL) {
1941 *pPropertyCount = 0;
1942 return VK_SUCCESS;
1943 }
1944
1945 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1946 }
1947
1948 static VkResult
dzn_queue_sync_wait(struct dzn_queue * queue,const struct vk_sync_wait * wait)1949 dzn_queue_sync_wait(struct dzn_queue *queue, const struct vk_sync_wait *wait)
1950 {
1951 if (wait->sync->type == &vk_sync_dummy_type)
1952 return VK_SUCCESS;
1953
1954 struct dzn_device *device = container_of(queue->vk.base.device, struct dzn_device, vk);
1955 assert(wait->sync->type == &dzn_sync_type);
1956 struct dzn_sync *sync = container_of(wait->sync, struct dzn_sync, vk);
1957 uint64_t value =
1958 (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? wait->wait_value : 1;
1959
1960 assert(sync->fence != NULL);
1961
1962 if (value > 0 && FAILED(ID3D12CommandQueue_Wait(queue->cmdqueue, sync->fence, value)))
1963 return vk_error(device, VK_ERROR_UNKNOWN);
1964
1965 return VK_SUCCESS;
1966 }
1967
1968 static VkResult
dzn_queue_sync_signal(struct dzn_queue * queue,const struct vk_sync_signal * signal)1969 dzn_queue_sync_signal(struct dzn_queue *queue, const struct vk_sync_signal *signal)
1970 {
1971 if (signal->sync->type == &vk_sync_dummy_type)
1972 return VK_SUCCESS;
1973
1974 struct dzn_device *device = container_of(queue->vk.base.device, struct dzn_device, vk);
1975 assert(signal->sync->type == &dzn_sync_type);
1976 struct dzn_sync *sync = container_of(signal->sync, struct dzn_sync, vk);
1977 uint64_t value =
1978 (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? signal->signal_value : 1;
1979 assert(value > 0);
1980
1981 assert(sync->fence != NULL);
1982
1983 if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, sync->fence, value)))
1984 return vk_error(device, VK_ERROR_UNKNOWN);
1985
1986 return VK_SUCCESS;
1987 }
1988
1989 static VkResult
dzn_queue_submit(struct vk_queue * q,struct vk_queue_submit * info)1990 dzn_queue_submit(struct vk_queue *q,
1991 struct vk_queue_submit *info)
1992 {
1993 struct dzn_queue *queue = container_of(q, struct dzn_queue, vk);
1994 struct dzn_device *device = container_of(q->base.device, struct dzn_device, vk);
1995 VkResult result = VK_SUCCESS;
1996
1997 for (uint32_t i = 0; i < info->wait_count; i++) {
1998 result = dzn_queue_sync_wait(queue, &info->waits[i]);
1999 if (result != VK_SUCCESS)
2000 return result;
2001 }
2002
2003 ID3D12CommandList **cmdlists = alloca(info->command_buffer_count * sizeof(ID3D12CommandList*));
2004
2005 for (uint32_t i = 0; i < info->command_buffer_count; i++) {
2006 struct dzn_cmd_buffer *cmd_buffer =
2007 container_of(info->command_buffers[i], struct dzn_cmd_buffer, vk);
2008
2009 cmdlists[i] = (ID3D12CommandList *)cmd_buffer->cmdlist;
2010
2011 util_dynarray_foreach(&cmd_buffer->queries.reset, struct dzn_cmd_buffer_query_range, range) {
2012 mtx_lock(&range->qpool->queries_lock);
2013 for (uint32_t q = range->start; q < range->start + range->count; q++) {
2014 struct dzn_query *query = &range->qpool->queries[q];
2015 if (query->fence) {
2016 ID3D12Fence_Release(query->fence);
2017 query->fence = NULL;
2018 }
2019 query->fence_value = 0;
2020 }
2021 mtx_unlock(&range->qpool->queries_lock);
2022 }
2023 }
2024
2025 ID3D12CommandQueue_ExecuteCommandLists(queue->cmdqueue, info->command_buffer_count, cmdlists);
2026
2027 for (uint32_t i = 0; i < info->command_buffer_count; i++) {
2028 struct dzn_cmd_buffer* cmd_buffer =
2029 container_of(info->command_buffers[i], struct dzn_cmd_buffer, vk);
2030
2031 util_dynarray_foreach(&cmd_buffer->events.signal, struct dzn_cmd_event_signal, evt) {
2032 if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, evt->event->fence, evt->value ? 1 : 0)))
2033 return vk_error(device, VK_ERROR_UNKNOWN);
2034 }
2035
2036 util_dynarray_foreach(&cmd_buffer->queries.signal, struct dzn_cmd_buffer_query_range, range) {
2037 mtx_lock(&range->qpool->queries_lock);
2038 for (uint32_t q = range->start; q < range->start + range->count; q++) {
2039 struct dzn_query *query = &range->qpool->queries[q];
2040 query->fence_value = queue->fence_point + 1;
2041 query->fence = queue->fence;
2042 ID3D12Fence_AddRef(query->fence);
2043 }
2044 mtx_unlock(&range->qpool->queries_lock);
2045 }
2046 }
2047
2048 for (uint32_t i = 0; i < info->signal_count; i++) {
2049 result = dzn_queue_sync_signal(queue, &info->signals[i]);
2050 if (result != VK_SUCCESS)
2051 return vk_error(device, VK_ERROR_UNKNOWN);
2052 }
2053
2054 if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, queue->fence, ++queue->fence_point)))
2055 return vk_error(device, VK_ERROR_UNKNOWN);
2056
2057 return VK_SUCCESS;
2058 }
2059
2060 static void
dzn_queue_finish(struct dzn_queue * queue)2061 dzn_queue_finish(struct dzn_queue *queue)
2062 {
2063 if (queue->cmdqueue)
2064 ID3D12CommandQueue_Release(queue->cmdqueue);
2065
2066 if (queue->fence)
2067 ID3D12Fence_Release(queue->fence);
2068
2069 vk_queue_finish(&queue->vk);
2070 }
2071
2072 static VkResult
dzn_queue_init(struct dzn_queue * queue,struct dzn_device * device,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)2073 dzn_queue_init(struct dzn_queue *queue,
2074 struct dzn_device *device,
2075 const VkDeviceQueueCreateInfo *pCreateInfo,
2076 uint32_t index_in_family)
2077 {
2078 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2079
2080 VkResult result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
2081 if (result != VK_SUCCESS)
2082 return result;
2083
2084 queue->vk.driver_submit = dzn_queue_submit;
2085
2086 assert(pCreateInfo->queueFamilyIndex < pdev->queue_family_count);
2087
2088 D3D12_COMMAND_QUEUE_DESC queue_desc =
2089 pdev->queue_families[pCreateInfo->queueFamilyIndex].desc;
2090
2091 float priority_in = pCreateInfo->pQueuePriorities[index_in_family];
2092 queue_desc.Priority =
2093 priority_in > 0.5f ? D3D12_COMMAND_QUEUE_PRIORITY_HIGH : D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
2094 queue_desc.NodeMask = 0;
2095
2096 if (FAILED(ID3D12Device1_CreateCommandQueue(device->dev, &queue_desc,
2097 &IID_ID3D12CommandQueue,
2098 (void **)&queue->cmdqueue))) {
2099 dzn_queue_finish(queue);
2100 return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED);
2101 }
2102
2103 if (FAILED(ID3D12Device1_CreateFence(device->dev, 0, D3D12_FENCE_FLAG_NONE,
2104 &IID_ID3D12Fence,
2105 (void **)&queue->fence))) {
2106 dzn_queue_finish(queue);
2107 return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED);
2108 }
2109
2110 return VK_SUCCESS;
2111 }
2112
2113 static VkResult
dzn_device_create_sync_for_memory(struct vk_device * device,VkDeviceMemory memory,bool signal_memory,struct vk_sync ** sync_out)2114 dzn_device_create_sync_for_memory(struct vk_device *device,
2115 VkDeviceMemory memory,
2116 bool signal_memory,
2117 struct vk_sync **sync_out)
2118 {
2119 return vk_sync_create(device, &vk_sync_dummy_type,
2120 0, 1, sync_out);
2121 }
2122
2123 static VkResult
dzn_device_query_init(struct dzn_device * device)2124 dzn_device_query_init(struct dzn_device *device)
2125 {
2126 /* FIXME: create the resource in the default heap */
2127 D3D12_HEAP_PROPERTIES hprops = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, D3D12_HEAP_TYPE_UPLOAD);
2128 D3D12_RESOURCE_DESC rdesc = {
2129 .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
2130 .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
2131 .Width = DZN_QUERY_REFS_RES_SIZE,
2132 .Height = 1,
2133 .DepthOrArraySize = 1,
2134 .MipLevels = 1,
2135 .Format = DXGI_FORMAT_UNKNOWN,
2136 .SampleDesc = { .Count = 1, .Quality = 0 },
2137 .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
2138 .Flags = D3D12_RESOURCE_FLAG_NONE,
2139 };
2140
2141 if (FAILED(ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
2142 D3D12_HEAP_FLAG_NONE,
2143 &rdesc,
2144 D3D12_RESOURCE_STATE_COMMON,
2145 NULL,
2146 &IID_ID3D12Resource,
2147 (void **)&device->queries.refs)))
2148 return vk_error(device->vk.physical, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2149
2150 uint8_t *queries_ref;
2151 if (FAILED(ID3D12Resource_Map(device->queries.refs, 0, NULL, (void **)&queries_ref)))
2152 return vk_error(device->vk.physical, VK_ERROR_OUT_OF_HOST_MEMORY);
2153
2154 memset(queries_ref + DZN_QUERY_REFS_ALL_ONES_OFFSET, 0xff, DZN_QUERY_REFS_SECTION_SIZE);
2155 memset(queries_ref + DZN_QUERY_REFS_ALL_ZEROS_OFFSET, 0x0, DZN_QUERY_REFS_SECTION_SIZE);
2156 ID3D12Resource_Unmap(device->queries.refs, 0, NULL);
2157
2158 return VK_SUCCESS;
2159 }
2160
2161 static void
dzn_device_query_finish(struct dzn_device * device)2162 dzn_device_query_finish(struct dzn_device *device)
2163 {
2164 if (device->queries.refs)
2165 ID3D12Resource_Release(device->queries.refs);
2166 }
2167
2168 static void
dzn_device_destroy(struct dzn_device * device,const VkAllocationCallbacks * pAllocator)2169 dzn_device_destroy(struct dzn_device *device, const VkAllocationCallbacks *pAllocator)
2170 {
2171 if (!device)
2172 return;
2173
2174 struct dzn_instance *instance =
2175 container_of(device->vk.physical->instance, struct dzn_instance, vk);
2176
2177 vk_foreach_queue_safe(q, &device->vk) {
2178 struct dzn_queue *queue = container_of(q, struct dzn_queue, vk);
2179
2180 dzn_queue_finish(queue);
2181 }
2182
2183 dzn_device_query_finish(device);
2184 dzn_meta_finish(device);
2185
2186 dzn_foreach_pool_type(type) {
2187 dzn_descriptor_heap_finish(&device->device_heaps[type].heap);
2188 util_dynarray_fini(&device->device_heaps[type].slot_freelist);
2189 mtx_destroy(&device->device_heaps[type].lock);
2190 }
2191
2192 if (device->dev_config)
2193 ID3D12DeviceConfiguration_Release(device->dev_config);
2194
2195 if (device->dev)
2196 ID3D12Device1_Release(device->dev);
2197
2198 if (device->dev10)
2199 ID3D12Device1_Release(device->dev10);
2200
2201 if (device->dev11)
2202 ID3D12Device1_Release(device->dev11);
2203
2204 if (device->dev12)
2205 ID3D12Device1_Release(device->dev12);
2206
2207 if (device->dev13)
2208 ID3D12Device1_Release(device->dev13);
2209
2210 vk_device_finish(&device->vk);
2211 vk_free2(&instance->vk.alloc, pAllocator, device);
2212 }
2213
2214 static VkResult
dzn_device_check_status(struct vk_device * dev)2215 dzn_device_check_status(struct vk_device *dev)
2216 {
2217 struct dzn_device *device = container_of(dev, struct dzn_device, vk);
2218
2219 if (FAILED(ID3D12Device_GetDeviceRemovedReason(device->dev)))
2220 return vk_device_set_lost(&device->vk, "D3D12 device removed");
2221
2222 return VK_SUCCESS;
2223 }
2224
2225 static VkResult
dzn_device_create(struct dzn_physical_device * pdev,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * out)2226 dzn_device_create(struct dzn_physical_device *pdev,
2227 const VkDeviceCreateInfo *pCreateInfo,
2228 const VkAllocationCallbacks *pAllocator,
2229 VkDevice *out)
2230 {
2231 struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk);
2232
2233 uint32_t graphics_queue_count = 0;
2234 uint32_t queue_count = 0;
2235 for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) {
2236 const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf];
2237 queue_count += qinfo->queueCount;
2238 if (pdev->queue_families[qinfo->queueFamilyIndex].props.queueFlags & VK_QUEUE_GRAPHICS_BIT)
2239 graphics_queue_count += qinfo->queueCount;
2240 }
2241
2242 /* Add a swapchain queue if there's no or too many graphics queues */
2243 if (graphics_queue_count != 1)
2244 queue_count++;
2245
2246 VK_MULTIALLOC(ma);
2247 VK_MULTIALLOC_DECL(&ma, struct dzn_device, device, 1);
2248 VK_MULTIALLOC_DECL(&ma, struct dzn_queue, queues, queue_count);
2249
2250 if (!vk_multialloc_zalloc2(&ma, &instance->vk.alloc, pAllocator,
2251 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
2252 return vk_error(pdev, VK_ERROR_OUT_OF_HOST_MEMORY);
2253
2254 struct vk_device_dispatch_table dispatch_table;
2255
2256 /* For secondary command buffer support, overwrite any command entrypoints
2257 * in the main device-level dispatch table with
2258 * vk_cmd_enqueue_unless_primary_Cmd*.
2259 */
2260 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2261 &vk_cmd_enqueue_unless_primary_device_entrypoints, true);
2262 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2263 &dzn_device_entrypoints, false);
2264 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2265 &wsi_device_entrypoints, false);
2266
2267 /* Populate our primary cmd_dispatch table. */
2268 vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch,
2269 &dzn_device_entrypoints, true);
2270 vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch,
2271 &vk_common_device_entrypoints,
2272 false);
2273
2274 /* Override entrypoints with alternatives based on supported features. */
2275 if (pdev->options12.EnhancedBarriersSupported) {
2276 device->cmd_dispatch.CmdPipelineBarrier2 = dzn_CmdPipelineBarrier2_enhanced;
2277 }
2278
2279 VkResult result =
2280 vk_device_init(&device->vk, &pdev->vk, &dispatch_table, pCreateInfo, pAllocator);
2281 if (result != VK_SUCCESS) {
2282 vk_free2(&device->vk.alloc, pAllocator, device);
2283 return result;
2284 }
2285
2286 /* Must be done after vk_device_init() because this function memset(0) the
2287 * whole struct.
2288 */
2289 device->vk.command_dispatch_table = &device->cmd_dispatch;
2290 device->vk.create_sync_for_memory = dzn_device_create_sync_for_memory;
2291 device->vk.check_status = dzn_device_check_status;
2292
2293 device->dev = pdev->dev;
2294
2295 ID3D12Device1_AddRef(device->dev);
2296
2297 if (pdev->dev10) {
2298 device->dev10 = pdev->dev10;
2299 ID3D12Device1_AddRef(device->dev10);
2300 }
2301 if (pdev->dev11) {
2302 device->dev11 = pdev->dev11;
2303 ID3D12Device1_AddRef(device->dev11);
2304 }
2305
2306 if (pdev->dev12) {
2307 device->dev12 = pdev->dev12;
2308 ID3D12Device1_AddRef(device->dev12);
2309 }
2310
2311 if (pdev->dev13) {
2312 device->dev13 = pdev->dev13;
2313 ID3D12Device1_AddRef(device->dev13);
2314 }
2315
2316 ID3D12InfoQueue *info_queue;
2317 if (SUCCEEDED(ID3D12Device1_QueryInterface(device->dev,
2318 &IID_ID3D12InfoQueue,
2319 (void **)&info_queue))) {
2320 D3D12_MESSAGE_SEVERITY severities[] = {
2321 D3D12_MESSAGE_SEVERITY_INFO,
2322 D3D12_MESSAGE_SEVERITY_WARNING,
2323 };
2324
2325 D3D12_MESSAGE_ID msg_ids[] = {
2326 D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE,
2327 };
2328
2329 D3D12_INFO_QUEUE_FILTER NewFilter = { 0 };
2330 NewFilter.DenyList.NumSeverities = ARRAY_SIZE(severities);
2331 NewFilter.DenyList.pSeverityList = severities;
2332 NewFilter.DenyList.NumIDs = ARRAY_SIZE(msg_ids);
2333 NewFilter.DenyList.pIDList = msg_ids;
2334
2335 ID3D12InfoQueue_PushStorageFilter(info_queue, &NewFilter);
2336 ID3D12InfoQueue_Release(info_queue);
2337 }
2338
2339 IUnknown_QueryInterface(device->dev, &IID_ID3D12DeviceConfiguration, (void **)&device->dev_config);
2340
2341 result = dzn_meta_init(device);
2342 if (result != VK_SUCCESS) {
2343 dzn_device_destroy(device, pAllocator);
2344 return result;
2345 }
2346
2347 result = dzn_device_query_init(device);
2348 if (result != VK_SUCCESS) {
2349 dzn_device_destroy(device, pAllocator);
2350 return result;
2351 }
2352
2353 uint32_t qindex = 0;
2354 for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) {
2355 const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf];
2356
2357 for (uint32_t q = 0; q < qinfo->queueCount; q++) {
2358 result =
2359 dzn_queue_init(&queues[qindex++], device, qinfo, q);
2360 if (result != VK_SUCCESS) {
2361 dzn_device_destroy(device, pAllocator);
2362 return result;
2363 }
2364 if (graphics_queue_count == 1 &&
2365 pdev->queue_families[qinfo->queueFamilyIndex].props.queueFlags & VK_QUEUE_GRAPHICS_BIT)
2366 device->swapchain_queue = &queues[qindex - 1];
2367 }
2368 }
2369
2370 if (!device->swapchain_queue) {
2371 const float swapchain_queue_priority = 0.0f;
2372 VkDeviceQueueCreateInfo swapchain_queue_info = {
2373 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
2374 .flags = 0,
2375 .queueCount = 1,
2376 .pQueuePriorities = &swapchain_queue_priority,
2377 };
2378 for (uint32_t qf = 0; qf < pdev->queue_family_count; qf++) {
2379 if (pdev->queue_families[qf].props.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
2380 swapchain_queue_info.queueFamilyIndex = qf;
2381 break;
2382 }
2383 }
2384 result = dzn_queue_init(&queues[qindex], device, &swapchain_queue_info, 0);
2385 if (result != VK_SUCCESS) {
2386 dzn_device_destroy(device, pAllocator);
2387 return result;
2388 }
2389 device->swapchain_queue = &queues[qindex++];
2390 device->need_swapchain_blits = true;
2391 }
2392
2393 device->support_static_samplers = true;
2394 device->bindless = (instance->debug_flags & DZN_DEBUG_BINDLESS) != 0 ||
2395 device->vk.enabled_features.descriptorIndexing ||
2396 device->vk.enabled_extensions.EXT_descriptor_indexing;
2397
2398 if (device->bindless) {
2399 uint32_t sampler_count = MIN2(pdev->options19.MaxSamplerDescriptorHeapSize, 4000);
2400 device->support_static_samplers = pdev->options19.MaxSamplerDescriptorHeapSizeWithStaticSamplers >= sampler_count;
2401 dzn_foreach_pool_type(type) {
2402 uint32_t descriptor_count = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ?
2403 sampler_count : D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1;
2404 result = dzn_descriptor_heap_init(&device->device_heaps[type].heap, device, type, descriptor_count, true);
2405 if (result != VK_SUCCESS) {
2406 dzn_device_destroy(device, pAllocator);
2407 return result;
2408 }
2409
2410 mtx_init(&device->device_heaps[type].lock, mtx_plain);
2411 util_dynarray_init(&device->device_heaps[type].slot_freelist, NULL);
2412 device->device_heaps[type].next_alloc_slot = 0;
2413 }
2414 }
2415
2416 assert(queue_count == qindex);
2417 *out = dzn_device_to_handle(device);
2418 return VK_SUCCESS;
2419 }
2420
2421 static ID3DBlob *
serialize_root_sig(struct dzn_device * device,const D3D12_VERSIONED_ROOT_SIGNATURE_DESC * desc)2422 serialize_root_sig(struct dzn_device *device,
2423 const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc)
2424 {
2425 struct dzn_instance *instance =
2426 container_of(device->vk.physical->instance, struct dzn_instance, vk);
2427 ID3DBlob *sig = NULL, *error = NULL;
2428
2429 HRESULT hr = device->dev_config ?
2430 ID3D12DeviceConfiguration_SerializeVersionedRootSignature(device->dev_config, desc, &sig, &error) :
2431 instance->d3d12.serialize_root_sig(desc, &sig, &error);
2432
2433 if (FAILED(hr)) {
2434 if (instance->debug_flags & DZN_DEBUG_SIG) {
2435 const char *error_msg = (const char *)ID3D10Blob_GetBufferPointer(error);
2436 fprintf(stderr,
2437 "== SERIALIZE ROOT SIG ERROR =============================================\n"
2438 "%s\n"
2439 "== END ==========================================================\n",
2440 error_msg);
2441 }
2442 }
2443
2444 if (error)
2445 ID3D10Blob_Release(error);
2446
2447 return sig;
2448 }
2449
2450 ID3D12RootSignature *
dzn_device_create_root_sig(struct dzn_device * device,const D3D12_VERSIONED_ROOT_SIGNATURE_DESC * desc)2451 dzn_device_create_root_sig(struct dzn_device *device,
2452 const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc)
2453 {
2454 ID3DBlob *sig = serialize_root_sig(device, desc);
2455 if (!sig)
2456 return NULL;
2457
2458 ID3D12RootSignature *root_sig = NULL;
2459 ID3D12Device1_CreateRootSignature(device->dev, 0,
2460 ID3D10Blob_GetBufferPointer(sig),
2461 ID3D10Blob_GetBufferSize(sig),
2462 &IID_ID3D12RootSignature,
2463 (void **)&root_sig);
2464 ID3D10Blob_Release(sig);
2465 return root_sig;
2466 }
2467
2468 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2469 dzn_CreateDevice(VkPhysicalDevice physicalDevice,
2470 const VkDeviceCreateInfo *pCreateInfo,
2471 const VkAllocationCallbacks *pAllocator,
2472 VkDevice *pDevice)
2473 {
2474 VK_FROM_HANDLE(dzn_physical_device, physical_device, physicalDevice);
2475 VkResult result;
2476
2477 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
2478
2479 /* Check enabled features */
2480 if (pCreateInfo->pEnabledFeatures) {
2481 result = vk_physical_device_check_device_features(&physical_device->vk, pCreateInfo);
2482 if (result != VK_SUCCESS)
2483 return vk_error(physical_device, result);
2484 }
2485
2486 /* Check requested queues and fail if we are requested to create any
2487 * queues with flags we don't support.
2488 */
2489 assert(pCreateInfo->queueCreateInfoCount > 0);
2490 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2491 if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
2492 return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
2493 }
2494
2495 return dzn_device_create(physical_device, pCreateInfo, pAllocator, pDevice);
2496 }
2497
2498 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyDevice(VkDevice dev,const VkAllocationCallbacks * pAllocator)2499 dzn_DestroyDevice(VkDevice dev,
2500 const VkAllocationCallbacks *pAllocator)
2501 {
2502 VK_FROM_HANDLE(dzn_device, device, dev);
2503
2504 device->vk.dispatch_table.DeviceWaitIdle(dev);
2505
2506 dzn_device_destroy(device, pAllocator);
2507 }
2508
2509 static void
dzn_device_memory_destroy(struct dzn_device_memory * mem,const VkAllocationCallbacks * pAllocator)2510 dzn_device_memory_destroy(struct dzn_device_memory *mem,
2511 const VkAllocationCallbacks *pAllocator)
2512 {
2513 if (!mem)
2514 return;
2515
2516 struct dzn_device *device = container_of(mem->base.device, struct dzn_device, vk);
2517
2518 if (mem->map && mem->map_res)
2519 ID3D12Resource_Unmap(mem->map_res, 0, NULL);
2520
2521 if (mem->map_res)
2522 ID3D12Resource_Release(mem->map_res);
2523
2524 if (mem->heap)
2525 ID3D12Heap_Release(mem->heap);
2526
2527 if (mem->dedicated_res)
2528 ID3D12Resource_Release(mem->dedicated_res);
2529
2530 #ifdef _WIN32
2531 if (mem->export_handle)
2532 CloseHandle(mem->export_handle);
2533 #else
2534 if ((intptr_t)mem->export_handle >= 0)
2535 close((int)(intptr_t)mem->export_handle);
2536 #endif
2537
2538 vk_object_base_finish(&mem->base);
2539 vk_free2(&device->vk.alloc, pAllocator, mem);
2540 }
2541
2542 static D3D12_HEAP_PROPERTIES
deduce_heap_properties_from_memory(struct dzn_physical_device * pdevice,const VkMemoryType * mem_type)2543 deduce_heap_properties_from_memory(struct dzn_physical_device *pdevice,
2544 const VkMemoryType *mem_type)
2545 {
2546 D3D12_HEAP_PROPERTIES properties = { .Type = D3D12_HEAP_TYPE_CUSTOM };
2547 properties.MemoryPoolPreference =
2548 ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
2549 !pdevice->architecture.UMA) ?
2550 D3D12_MEMORY_POOL_L1 : D3D12_MEMORY_POOL_L0;
2551 if ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) ||
2552 ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && pdevice->architecture.CacheCoherentUMA)) {
2553 properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
2554 } else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
2555 properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE;
2556 } else {
2557 properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE;
2558 }
2559 return properties;
2560 }
2561
2562 static VkResult
dzn_device_memory_create(struct dzn_device * device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * out)2563 dzn_device_memory_create(struct dzn_device *device,
2564 const VkMemoryAllocateInfo *pAllocateInfo,
2565 const VkAllocationCallbacks *pAllocator,
2566 VkDeviceMemory *out)
2567 {
2568 struct dzn_physical_device *pdevice =
2569 container_of(device->vk.physical, struct dzn_physical_device, vk);
2570
2571 const struct dzn_buffer *buffer = NULL;
2572 const struct dzn_image *image = NULL;
2573
2574 VkExternalMemoryHandleTypeFlags export_flags = 0;
2575 HANDLE import_handle = NULL;
2576 bool imported_from_d3d11 = false;
2577 void *host_pointer = NULL;
2578 #ifdef _WIN32
2579 const wchar_t *import_name = NULL;
2580 const VkExportMemoryWin32HandleInfoKHR *win32_export = NULL;
2581 #endif
2582 vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
2583 switch (ext->sType) {
2584 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO: {
2585 const VkExportMemoryAllocateInfo *exp =
2586 (const VkExportMemoryAllocateInfo *)ext;
2587
2588 export_flags = exp->handleTypes;
2589 break;
2590 }
2591 #ifdef _WIN32
2592 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR: {
2593 const VkImportMemoryWin32HandleInfoKHR *imp =
2594 (const VkImportMemoryWin32HandleInfoKHR *)ext;
2595 switch (imp->handleType) {
2596 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT:
2597 imported_from_d3d11 = true;
2598 FALLTHROUGH;
2599 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT:
2600 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT:
2601 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT:
2602 break;
2603 default:
2604 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2605 }
2606 import_handle = imp->handle;
2607 import_name = imp->name;
2608 break;
2609 }
2610 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR:
2611 win32_export = (const VkExportMemoryWin32HandleInfoKHR *)ext;
2612 break;
2613 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT: {
2614 const VkImportMemoryHostPointerInfoEXT *imp =
2615 (const VkImportMemoryHostPointerInfoEXT *)ext;
2616 host_pointer = imp->pHostPointer;
2617 break;
2618 }
2619 #else
2620 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR: {
2621 const VkImportMemoryFdInfoKHR *imp =
2622 (const VkImportMemoryFdInfoKHR *)ext;
2623 switch (imp->handleType) {
2624 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
2625 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT:
2626 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT:
2627 break;
2628 default:
2629 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2630 }
2631 import_handle = (HANDLE)(intptr_t)imp->fd;
2632 break;
2633 }
2634 #endif
2635 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO: {
2636 const VkMemoryDedicatedAllocateInfo *dedicated =
2637 (const VkMemoryDedicatedAllocateInfo *)ext;
2638
2639 buffer = dzn_buffer_from_handle(dedicated->buffer);
2640 image = dzn_image_from_handle(dedicated->image);
2641 assert(!buffer || !image);
2642 break;
2643 }
2644 default:
2645 dzn_debug_ignored_stype(ext->sType);
2646 break;
2647 }
2648 }
2649
2650 const VkMemoryType *mem_type =
2651 &pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex];
2652
2653 D3D12_HEAP_DESC heap_desc = { 0 };
2654
2655 heap_desc.SizeInBytes = pAllocateInfo->allocationSize;
2656 if (buffer) {
2657 heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
2658 } else if (image) {
2659 heap_desc.Alignment =
2660 image->vk.samples > 1 ?
2661 D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT :
2662 D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
2663 } else {
2664 heap_desc.Alignment =
2665 heap_desc.SizeInBytes >= D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT ?
2666 D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT :
2667 D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
2668 }
2669
2670 if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
2671 image = NULL;
2672
2673 VkExternalMemoryHandleTypeFlags valid_flags =
2674 opaque_external_flag |
2675 (buffer || image ?
2676 VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT :
2677 VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT);
2678 if (image && imported_from_d3d11)
2679 valid_flags |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT;
2680
2681 if (export_flags & ~valid_flags)
2682 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2683
2684 struct dzn_device_memory *mem =
2685 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8,
2686 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2687 if (!mem)
2688 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2689
2690 vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY);
2691 #ifndef _WIN32
2692 mem->export_handle = (HANDLE)(intptr_t)-1;
2693 #endif
2694
2695 /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
2696 assert(pAllocateInfo->allocationSize > 0);
2697
2698 mem->size = pAllocateInfo->allocationSize;
2699
2700 heap_desc.SizeInBytes = ALIGN_POT(heap_desc.SizeInBytes, heap_desc.Alignment);
2701 if (!image && !buffer)
2702 heap_desc.Flags =
2703 dzn_physical_device_get_heap_flags_for_mem_type(pdevice, pAllocateInfo->memoryTypeIndex);
2704 heap_desc.Properties = deduce_heap_properties_from_memory(pdevice, mem_type);
2705 if (export_flags) {
2706 heap_desc.Flags |= D3D12_HEAP_FLAG_SHARED;
2707 assert(host_pointer || heap_desc.Properties.CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE);
2708 }
2709
2710 VkResult error = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2711
2712 #ifdef _WIN32
2713 HANDLE handle_from_name = NULL;
2714 if (import_name) {
2715 if (FAILED(ID3D12Device_OpenSharedHandleByName(device->dev, import_name, GENERIC_ALL, &handle_from_name))) {
2716 error = VK_ERROR_INVALID_EXTERNAL_HANDLE;
2717 goto cleanup;
2718 }
2719 import_handle = handle_from_name;
2720 }
2721 #endif
2722
2723 if (host_pointer) {
2724 error = VK_ERROR_INVALID_EXTERNAL_HANDLE;
2725
2726 #if defined(_WIN32)
2727 if (!device->dev13)
2728 goto cleanup;
2729
2730 if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, host_pointer, heap_desc.SizeInBytes, &IID_ID3D12Heap, (void**)&mem->heap)))
2731 goto cleanup;
2732
2733 D3D12_HEAP_DESC desc = dzn_ID3D12Heap_GetDesc(mem->heap);
2734 if (desc.Properties.Type != D3D12_HEAP_TYPE_CUSTOM)
2735 desc.Properties = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, desc.Properties.Type);
2736
2737 if ((heap_desc.Flags & ~desc.Flags) ||
2738 desc.Properties.CPUPageProperty != heap_desc.Properties.CPUPageProperty ||
2739 desc.Properties.MemoryPoolPreference != heap_desc.Properties.MemoryPoolPreference)
2740 goto cleanup;
2741
2742 mem->map = host_pointer;
2743 mem->res_flags = D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER;
2744 #else
2745 goto cleanup;
2746 #endif
2747 } else if (import_handle) {
2748 error = VK_ERROR_INVALID_EXTERNAL_HANDLE;
2749 if (image || buffer) {
2750 if (FAILED(ID3D12Device_OpenSharedHandle(device->dev, import_handle, &IID_ID3D12Resource, (void **)&mem->dedicated_res)))
2751 goto cleanup;
2752
2753 /* Verify compatibility */
2754 D3D12_RESOURCE_DESC desc = dzn_ID3D12Resource_GetDesc(mem->dedicated_res);
2755 D3D12_HEAP_PROPERTIES opened_props = { 0 };
2756 D3D12_HEAP_FLAGS opened_flags = 0;
2757 ID3D12Resource_GetHeapProperties(mem->dedicated_res, &opened_props, &opened_flags);
2758 if (opened_props.Type != D3D12_HEAP_TYPE_CUSTOM)
2759 opened_props = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, opened_props.Type);
2760
2761 /* Don't validate format, cast lists aren't reflectable so it could be valid */
2762 if (image) {
2763 if (desc.Dimension != image->desc.Dimension ||
2764 desc.MipLevels != image->desc.MipLevels ||
2765 desc.Width != image->desc.Width ||
2766 desc.Height != image->desc.Height ||
2767 desc.DepthOrArraySize != image->desc.DepthOrArraySize ||
2768 (image->desc.Flags & ~desc.Flags) ||
2769 desc.SampleDesc.Count != image->desc.SampleDesc.Count)
2770 goto cleanup;
2771 } else if (desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER ||
2772 desc.Width != buffer->desc.Width ||
2773 buffer->desc.Flags & ~(desc.Flags))
2774 goto cleanup;
2775 if (opened_props.CPUPageProperty != heap_desc.Properties.CPUPageProperty ||
2776 opened_props.MemoryPoolPreference != heap_desc.Properties.MemoryPoolPreference)
2777 goto cleanup;
2778 if ((heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS) && desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
2779 goto cleanup;
2780 if ((heap_desc.Flags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) && (desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET))
2781 goto cleanup;
2782 else if ((heap_desc.Flags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES) && !(desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET))
2783 goto cleanup;
2784 } else {
2785 if (FAILED(ID3D12Device_OpenSharedHandle(device->dev, import_handle, &IID_ID3D12Heap, (void **)&mem->heap)))
2786 goto cleanup;
2787
2788 D3D12_HEAP_DESC desc = dzn_ID3D12Heap_GetDesc(mem->heap);
2789 if (desc.Properties.Type != D3D12_HEAP_TYPE_CUSTOM)
2790 desc.Properties = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, desc.Properties.Type);
2791
2792 if (desc.Alignment < heap_desc.Alignment ||
2793 desc.SizeInBytes < heap_desc.SizeInBytes ||
2794 (heap_desc.Flags & ~desc.Flags) ||
2795 desc.Properties.CPUPageProperty != heap_desc.Properties.CPUPageProperty ||
2796 desc.Properties.MemoryPoolPreference != heap_desc.Properties.MemoryPoolPreference)
2797 goto cleanup;
2798 }
2799 } else if (image) {
2800 if (device->dev10 && image->castable_format_count > 0) {
2801 D3D12_RESOURCE_DESC1 desc = {
2802 .Dimension = image->desc.Dimension,
2803 .Alignment = image->desc.Alignment,
2804 .Width = image->desc.Width,
2805 .Height = image->desc.Height,
2806 .DepthOrArraySize = image->desc.DepthOrArraySize,
2807 .MipLevels = image->desc.MipLevels,
2808 .Format = image->desc.Format,
2809 .SampleDesc = image->desc.SampleDesc,
2810 .Layout = image->desc.Layout,
2811 .Flags = image->desc.Flags,
2812 };
2813 if (FAILED(ID3D12Device10_CreateCommittedResource3(device->dev10, &heap_desc.Properties,
2814 heap_desc.Flags, &desc,
2815 D3D12_BARRIER_LAYOUT_COMMON,
2816 NULL, NULL,
2817 image->castable_format_count,
2818 image->castable_formats,
2819 &IID_ID3D12Resource,
2820 (void **)&mem->dedicated_res)))
2821 goto cleanup;
2822 } else if (FAILED(ID3D12Device1_CreateCommittedResource(device->dev, &heap_desc.Properties,
2823 heap_desc.Flags, &image->desc,
2824 D3D12_RESOURCE_STATE_COMMON,
2825 NULL,
2826 &IID_ID3D12Resource,
2827 (void **)&mem->dedicated_res)))
2828 goto cleanup;
2829 } else if (buffer) {
2830 if (FAILED(ID3D12Device1_CreateCommittedResource(device->dev, &heap_desc.Properties,
2831 heap_desc.Flags, &buffer->desc,
2832 D3D12_RESOURCE_STATE_COMMON,
2833 NULL,
2834 &IID_ID3D12Resource,
2835 (void **)&mem->dedicated_res)))
2836 goto cleanup;
2837 } else {
2838 if (FAILED(ID3D12Device1_CreateHeap(device->dev, &heap_desc,
2839 &IID_ID3D12Heap,
2840 (void **)&mem->heap)))
2841 goto cleanup;
2842 }
2843
2844 if ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
2845 !(heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS) &&
2846 !mem->map){
2847 assert(!image);
2848 if (buffer) {
2849 mem->map_res = mem->dedicated_res;
2850 ID3D12Resource_AddRef(mem->map_res);
2851 } else {
2852 D3D12_RESOURCE_DESC res_desc = { 0 };
2853 res_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
2854 res_desc.Format = DXGI_FORMAT_UNKNOWN;
2855 res_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
2856 res_desc.Width = heap_desc.SizeInBytes;
2857 res_desc.Height = 1;
2858 res_desc.DepthOrArraySize = 1;
2859 res_desc.MipLevels = 1;
2860 res_desc.SampleDesc.Count = 1;
2861 res_desc.SampleDesc.Quality = 0;
2862 res_desc.Flags = D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
2863 res_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
2864 HRESULT hr = ID3D12Device1_CreatePlacedResource(device->dev, mem->heap, 0, &res_desc,
2865 D3D12_RESOURCE_STATE_COMMON,
2866 NULL,
2867 &IID_ID3D12Resource,
2868 (void **)&mem->map_res);
2869 if (FAILED(hr))
2870 goto cleanup;
2871 }
2872 }
2873
2874 if (export_flags) {
2875 error = VK_ERROR_INVALID_EXTERNAL_HANDLE;
2876 ID3D12DeviceChild *shareable = mem->heap ? (void *)mem->heap : (void *)mem->dedicated_res;
2877 DWORD dwAccess = GENERIC_ALL; /* Ignore any provided access, this is the only one D3D allows */
2878 #ifdef _WIN32
2879 const SECURITY_ATTRIBUTES *pAttributes = win32_export ? win32_export->pAttributes : NULL;
2880 const wchar_t *name = win32_export ? win32_export->name : NULL;
2881 #else
2882 const SECURITY_ATTRIBUTES *pAttributes = NULL;
2883 const wchar_t *name = NULL;
2884 #endif
2885 if (FAILED(ID3D12Device_CreateSharedHandle(device->dev, shareable, pAttributes,
2886 dwAccess, name, &mem->export_handle)))
2887 goto cleanup;
2888 }
2889
2890 *out = dzn_device_memory_to_handle(mem);
2891 return VK_SUCCESS;
2892
2893 cleanup:
2894 #ifdef _WIN32
2895 if (handle_from_name)
2896 CloseHandle(handle_from_name);
2897 #endif
2898 dzn_device_memory_destroy(mem, pAllocator);
2899 return vk_error(device, error);
2900 }
2901
2902 VKAPI_ATTR VkResult VKAPI_CALL
dzn_AllocateMemory(VkDevice device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2903 dzn_AllocateMemory(VkDevice device,
2904 const VkMemoryAllocateInfo *pAllocateInfo,
2905 const VkAllocationCallbacks *pAllocator,
2906 VkDeviceMemory *pMem)
2907 {
2908 return dzn_device_memory_create(dzn_device_from_handle(device),
2909 pAllocateInfo, pAllocator, pMem);
2910 }
2911
2912 VKAPI_ATTR void VKAPI_CALL
dzn_FreeMemory(VkDevice device,VkDeviceMemory mem,const VkAllocationCallbacks * pAllocator)2913 dzn_FreeMemory(VkDevice device,
2914 VkDeviceMemory mem,
2915 const VkAllocationCallbacks *pAllocator)
2916 {
2917 dzn_device_memory_destroy(dzn_device_memory_from_handle(mem), pAllocator);
2918 }
2919
2920 VKAPI_ATTR VkResult VKAPI_CALL
dzn_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2921 dzn_MapMemory(VkDevice _device,
2922 VkDeviceMemory _memory,
2923 VkDeviceSize offset,
2924 VkDeviceSize size,
2925 VkMemoryMapFlags flags,
2926 void **ppData)
2927 {
2928 VK_FROM_HANDLE(dzn_device, device, _device);
2929 VK_FROM_HANDLE(dzn_device_memory, mem, _memory);
2930
2931 if (mem == NULL) {
2932 *ppData = NULL;
2933 return VK_SUCCESS;
2934 }
2935
2936 if (mem->map && !mem->map_res) {
2937 *ppData = ((uint8_t *)mem->map) + offset;
2938 return VK_SUCCESS;
2939 }
2940
2941 if (size == VK_WHOLE_SIZE)
2942 size = mem->size - offset;
2943
2944 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
2945 *
2946 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
2947 * assert(size != 0);
2948 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
2949 * equal to the size of the memory minus offset
2950 */
2951 assert(size > 0);
2952 assert(offset + size <= mem->size);
2953
2954 assert(mem->map_res);
2955 D3D12_RANGE range = { 0 };
2956 range.Begin = offset;
2957 range.End = offset + size;
2958 void *map = NULL;
2959 if (FAILED(ID3D12Resource_Map(mem->map_res, 0, &range, &map)))
2960 return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
2961
2962 mem->map = map;
2963 mem->map_size = size;
2964
2965 *ppData = ((uint8_t *) map) + offset;
2966
2967 return VK_SUCCESS;
2968 }
2969
2970 VKAPI_ATTR void VKAPI_CALL
dzn_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2971 dzn_UnmapMemory(VkDevice _device,
2972 VkDeviceMemory _memory)
2973 {
2974 VK_FROM_HANDLE(dzn_device_memory, mem, _memory);
2975
2976 if (mem == NULL)
2977 return;
2978
2979 if (!mem->map_res)
2980 return;
2981
2982 ID3D12Resource_Unmap(mem->map_res, 0, NULL);
2983
2984 mem->map = NULL;
2985 mem->map_size = 0;
2986 }
2987
2988 VKAPI_ATTR VkResult VKAPI_CALL
dzn_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2989 dzn_FlushMappedMemoryRanges(VkDevice _device,
2990 uint32_t memoryRangeCount,
2991 const VkMappedMemoryRange *pMemoryRanges)
2992 {
2993 return VK_SUCCESS;
2994 }
2995
2996 VKAPI_ATTR VkResult VKAPI_CALL
dzn_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2997 dzn_InvalidateMappedMemoryRanges(VkDevice _device,
2998 uint32_t memoryRangeCount,
2999 const VkMappedMemoryRange *pMemoryRanges)
3000 {
3001 return VK_SUCCESS;
3002 }
3003
3004 static void
dzn_buffer_destroy(struct dzn_buffer * buf,const VkAllocationCallbacks * pAllocator)3005 dzn_buffer_destroy(struct dzn_buffer *buf, const VkAllocationCallbacks *pAllocator)
3006 {
3007 if (!buf)
3008 return;
3009
3010 struct dzn_device *device = container_of(buf->base.device, struct dzn_device, vk);
3011
3012 if (buf->res)
3013 ID3D12Resource_Release(buf->res);
3014
3015 dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, buf->cbv_bindless_slot);
3016 dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, buf->uav_bindless_slot);
3017 if (buf->custom_views) {
3018 hash_table_foreach(buf->custom_views, entry) {
3019 free((void *)entry->key);
3020 dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, (int)(intptr_t)entry->data);
3021 }
3022 _mesa_hash_table_destroy(buf->custom_views, NULL);
3023 }
3024
3025 vk_object_base_finish(&buf->base);
3026 vk_free2(&device->vk.alloc, pAllocator, buf);
3027 }
3028
3029 static VkResult
dzn_buffer_create(struct dzn_device * device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * out)3030 dzn_buffer_create(struct dzn_device *device,
3031 const VkBufferCreateInfo *pCreateInfo,
3032 const VkAllocationCallbacks *pAllocator,
3033 VkBuffer *out)
3034 {
3035 struct dzn_buffer *buf =
3036 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*buf), 8,
3037 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3038 if (!buf)
3039 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3040
3041 vk_object_base_init(&device->vk, &buf->base, VK_OBJECT_TYPE_BUFFER);
3042 buf->create_flags = pCreateInfo->flags;
3043 buf->size = pCreateInfo->size;
3044 buf->usage = pCreateInfo->usage;
3045
3046 if (buf->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
3047 buf->size = MAX2(buf->size, ALIGN_POT(buf->size, 256));
3048 if (buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
3049 buf->size = MAX2(buf->size, ALIGN_POT(buf->size, 4));
3050
3051 buf->desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
3052 buf->desc.Format = DXGI_FORMAT_UNKNOWN;
3053 buf->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
3054 buf->desc.Width = buf->size;
3055 buf->desc.Height = 1;
3056 buf->desc.DepthOrArraySize = 1;
3057 buf->desc.MipLevels = 1;
3058 buf->desc.SampleDesc.Count = 1;
3059 buf->desc.SampleDesc.Quality = 0;
3060 buf->desc.Flags = D3D12_RESOURCE_FLAG_NONE;
3061 buf->desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
3062 buf->valid_access =
3063 D3D12_BARRIER_ACCESS_VERTEX_BUFFER |
3064 D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
3065 D3D12_BARRIER_ACCESS_INDEX_BUFFER |
3066 D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
3067 D3D12_BARRIER_ACCESS_STREAM_OUTPUT |
3068 D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT |
3069 D3D12_BARRIER_ACCESS_PREDICATION |
3070 D3D12_BARRIER_ACCESS_COPY_DEST |
3071 D3D12_BARRIER_ACCESS_COPY_SOURCE |
3072 D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ |
3073 D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE;
3074
3075 if (buf->usage &
3076 (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
3077 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)) {
3078 buf->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
3079 buf->valid_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
3080 }
3081
3082 buf->cbv_bindless_slot = buf->uav_bindless_slot = -1;
3083 if (device->bindless) {
3084 if (buf->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) {
3085 buf->cbv_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
3086 if (buf->cbv_bindless_slot < 0) {
3087 dzn_buffer_destroy(buf, pAllocator);
3088 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3089 }
3090 }
3091 if (buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) {
3092 buf->uav_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
3093 if (buf->uav_bindless_slot < 0) {
3094 dzn_buffer_destroy(buf, pAllocator);
3095 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3096 }
3097 }
3098 }
3099
3100 if (device->bindless)
3101 mtx_init(&buf->bindless_view_lock, mtx_plain);
3102
3103 const VkExternalMemoryBufferCreateInfo *external_info =
3104 vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_BUFFER_CREATE_INFO);
3105 if (external_info && external_info->handleTypes != 0)
3106 buf->shared = true;
3107
3108 *out = dzn_buffer_to_handle(buf);
3109 return VK_SUCCESS;
3110 }
3111
3112 DXGI_FORMAT
dzn_buffer_get_dxgi_format(VkFormat format)3113 dzn_buffer_get_dxgi_format(VkFormat format)
3114 {
3115 enum pipe_format pfmt = vk_format_to_pipe_format(format);
3116
3117 return dzn_pipe_to_dxgi_format(pfmt);
3118 }
3119
3120 D3D12_TEXTURE_COPY_LOCATION
dzn_buffer_get_copy_loc(const struct dzn_buffer * buf,VkFormat format,const VkBufferImageCopy2 * region,VkImageAspectFlagBits aspect,uint32_t layer)3121 dzn_buffer_get_copy_loc(const struct dzn_buffer *buf,
3122 VkFormat format,
3123 const VkBufferImageCopy2 *region,
3124 VkImageAspectFlagBits aspect,
3125 uint32_t layer)
3126 {
3127 struct dzn_physical_device *pdev =
3128 container_of(buf->base.device->physical, struct dzn_physical_device, vk);
3129 const uint32_t buffer_row_length =
3130 region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width;
3131
3132 VkFormat plane_format = dzn_image_get_plane_format(format, aspect);
3133
3134 enum pipe_format pfmt = vk_format_to_pipe_format(plane_format);
3135 uint32_t blksz = util_format_get_blocksize(pfmt);
3136 uint32_t blkw = util_format_get_blockwidth(pfmt);
3137 uint32_t blkh = util_format_get_blockheight(pfmt);
3138
3139 D3D12_TEXTURE_COPY_LOCATION loc = {
3140 .pResource = buf->res,
3141 .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
3142 .PlacedFootprint = {
3143 .Footprint = {
3144 .Format =
3145 dzn_image_get_placed_footprint_format(pdev, format, aspect),
3146 .Width = region->imageExtent.width,
3147 .Height = region->imageExtent.height,
3148 .Depth = region->imageExtent.depth,
3149 .RowPitch = blksz * DIV_ROUND_UP(buffer_row_length, blkw),
3150 },
3151 },
3152 };
3153
3154 uint32_t buffer_layer_stride =
3155 loc.PlacedFootprint.Footprint.RowPitch *
3156 DIV_ROUND_UP(loc.PlacedFootprint.Footprint.Height, blkh);
3157
3158 loc.PlacedFootprint.Offset =
3159 region->bufferOffset + (layer * buffer_layer_stride);
3160
3161 return loc;
3162 }
3163
3164 D3D12_TEXTURE_COPY_LOCATION
dzn_buffer_get_line_copy_loc(const struct dzn_buffer * buf,VkFormat format,const VkBufferImageCopy2 * region,const D3D12_TEXTURE_COPY_LOCATION * loc,uint32_t y,uint32_t z,uint32_t * start_x)3165 dzn_buffer_get_line_copy_loc(const struct dzn_buffer *buf, VkFormat format,
3166 const VkBufferImageCopy2 *region,
3167 const D3D12_TEXTURE_COPY_LOCATION *loc,
3168 uint32_t y, uint32_t z, uint32_t *start_x)
3169 {
3170 uint32_t buffer_row_length =
3171 region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width;
3172 uint32_t buffer_image_height =
3173 region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height;
3174
3175 format = dzn_image_get_plane_format(format, region->imageSubresource.aspectMask);
3176
3177 enum pipe_format pfmt = vk_format_to_pipe_format(format);
3178 uint32_t blksz = util_format_get_blocksize(pfmt);
3179 uint32_t blkw = util_format_get_blockwidth(pfmt);
3180 uint32_t blkh = util_format_get_blockheight(pfmt);
3181 uint32_t blkd = util_format_get_blockdepth(pfmt);
3182 D3D12_TEXTURE_COPY_LOCATION new_loc = *loc;
3183 uint32_t buffer_row_stride =
3184 DIV_ROUND_UP(buffer_row_length, blkw) * blksz;
3185 uint32_t buffer_layer_stride =
3186 buffer_row_stride *
3187 DIV_ROUND_UP(buffer_image_height, blkh);
3188
3189 uint64_t tex_offset =
3190 ((y / blkh) * buffer_row_stride) +
3191 ((z / blkd) * buffer_layer_stride);
3192 uint64_t offset = loc->PlacedFootprint.Offset + tex_offset;
3193 uint32_t offset_alignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT;
3194
3195 while (offset_alignment % blksz)
3196 offset_alignment += D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT;
3197
3198 new_loc.PlacedFootprint.Footprint.Height = blkh;
3199 new_loc.PlacedFootprint.Footprint.Depth = 1;
3200 new_loc.PlacedFootprint.Offset = (offset / offset_alignment) * offset_alignment;
3201 *start_x = ((offset % offset_alignment) / blksz) * blkw;
3202 new_loc.PlacedFootprint.Footprint.Width = *start_x + region->imageExtent.width;
3203 new_loc.PlacedFootprint.Footprint.RowPitch =
3204 ALIGN_POT(DIV_ROUND_UP(new_loc.PlacedFootprint.Footprint.Width, blkw) * blksz,
3205 D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
3206 return new_loc;
3207 }
3208
3209 bool
dzn_buffer_supports_region_copy(struct dzn_physical_device * pdev,const D3D12_TEXTURE_COPY_LOCATION * loc)3210 dzn_buffer_supports_region_copy(struct dzn_physical_device *pdev,
3211 const D3D12_TEXTURE_COPY_LOCATION *loc)
3212 {
3213 if (pdev->options13.UnrestrictedBufferTextureCopyPitchSupported)
3214 return true;
3215 return !(loc->PlacedFootprint.Offset & (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1)) &&
3216 !(loc->PlacedFootprint.Footprint.RowPitch & (D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1));
3217 }
3218
3219 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateBuffer(VkDevice device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)3220 dzn_CreateBuffer(VkDevice device,
3221 const VkBufferCreateInfo *pCreateInfo,
3222 const VkAllocationCallbacks *pAllocator,
3223 VkBuffer *pBuffer)
3224 {
3225 return dzn_buffer_create(dzn_device_from_handle(device),
3226 pCreateInfo, pAllocator, pBuffer);
3227 }
3228
3229 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyBuffer(VkDevice device,VkBuffer buffer,const VkAllocationCallbacks * pAllocator)3230 dzn_DestroyBuffer(VkDevice device,
3231 VkBuffer buffer,
3232 const VkAllocationCallbacks *pAllocator)
3233 {
3234 dzn_buffer_destroy(dzn_buffer_from_handle(buffer), pAllocator);
3235 }
3236
3237 VKAPI_ATTR void VKAPI_CALL
dzn_GetBufferMemoryRequirements2(VkDevice dev,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3238 dzn_GetBufferMemoryRequirements2(VkDevice dev,
3239 const VkBufferMemoryRequirementsInfo2 *pInfo,
3240 VkMemoryRequirements2 *pMemoryRequirements)
3241 {
3242 VK_FROM_HANDLE(dzn_device, device, dev);
3243 VK_FROM_HANDLE(dzn_buffer, buffer, pInfo->buffer);
3244 struct dzn_physical_device *pdev =
3245 container_of(device->vk.physical, struct dzn_physical_device, vk);
3246
3247 uint32_t alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
3248 VkDeviceSize size = buffer->size;
3249
3250 if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) {
3251 alignment = MAX2(alignment, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
3252 size = ALIGN_POT(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
3253 }
3254
3255 pMemoryRequirements->memoryRequirements.size = size;
3256 pMemoryRequirements->memoryRequirements.alignment = alignment;
3257 pMemoryRequirements->memoryRequirements.memoryTypeBits =
3258 dzn_physical_device_get_mem_type_mask_for_resource(pdev, &buffer->desc, buffer->shared);
3259
3260 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3261 switch (ext->sType) {
3262 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3263 VkMemoryDedicatedRequirements *requirements =
3264 (VkMemoryDedicatedRequirements *)ext;
3265 requirements->requiresDedicatedAllocation = false;
3266 requirements->prefersDedicatedAllocation = false;
3267 break;
3268 }
3269
3270 default:
3271 dzn_debug_ignored_stype(ext->sType);
3272 break;
3273 }
3274 }
3275 }
3276
3277 VKAPI_ATTR VkResult VKAPI_CALL
dzn_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)3278 dzn_BindBufferMemory2(VkDevice _device,
3279 uint32_t bindInfoCount,
3280 const VkBindBufferMemoryInfo *pBindInfos)
3281 {
3282 VK_FROM_HANDLE(dzn_device, device, _device);
3283
3284 for (uint32_t i = 0; i < bindInfoCount; i++) {
3285 assert(pBindInfos[i].sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
3286
3287 VK_FROM_HANDLE(dzn_device_memory, mem, pBindInfos[i].memory);
3288 VK_FROM_HANDLE(dzn_buffer, buffer, pBindInfos[i].buffer);
3289
3290 if (mem->dedicated_res) {
3291 assert(pBindInfos[i].memoryOffset == 0 &&
3292 buffer->size == mem->size);
3293 buffer->res = mem->dedicated_res;
3294 ID3D12Resource_AddRef(buffer->res);
3295 } else {
3296 D3D12_RESOURCE_DESC desc = buffer->desc;
3297 desc.Flags |= mem->res_flags;
3298 if (FAILED(ID3D12Device1_CreatePlacedResource(device->dev, mem->heap,
3299 pBindInfos[i].memoryOffset,
3300 &buffer->desc,
3301 D3D12_RESOURCE_STATE_COMMON,
3302 NULL,
3303 &IID_ID3D12Resource,
3304 (void **)&buffer->res)))
3305 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3306 }
3307
3308 buffer->gpuva = ID3D12Resource_GetGPUVirtualAddress(buffer->res);
3309
3310 if (device->bindless) {
3311 struct dzn_buffer_desc buf_desc = {
3312 .buffer = buffer,
3313 .offset = 0,
3314 .range = VK_WHOLE_SIZE,
3315 };
3316 if (buffer->cbv_bindless_slot >= 0) {
3317 buf_desc.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
3318 dzn_descriptor_heap_write_buffer_desc(device,
3319 &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
3320 buffer->cbv_bindless_slot,
3321 false,
3322 &buf_desc);
3323 }
3324 if (buffer->uav_bindless_slot >= 0) {
3325 buf_desc.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3326 dzn_descriptor_heap_write_buffer_desc(device,
3327 &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
3328 buffer->uav_bindless_slot,
3329 true,
3330 &buf_desc);
3331 }
3332 }
3333 }
3334
3335 return VK_SUCCESS;
3336 }
3337
3338 static void
dzn_event_destroy(struct dzn_event * event,const VkAllocationCallbacks * pAllocator)3339 dzn_event_destroy(struct dzn_event *event,
3340 const VkAllocationCallbacks *pAllocator)
3341 {
3342 if (!event)
3343 return;
3344
3345 struct dzn_device *device =
3346 container_of(event->base.device, struct dzn_device, vk);
3347
3348 if (event->fence)
3349 ID3D12Fence_Release(event->fence);
3350
3351 vk_object_base_finish(&event->base);
3352 vk_free2(&device->vk.alloc, pAllocator, event);
3353 }
3354
3355 static VkResult
dzn_event_create(struct dzn_device * device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * out)3356 dzn_event_create(struct dzn_device *device,
3357 const VkEventCreateInfo *pCreateInfo,
3358 const VkAllocationCallbacks *pAllocator,
3359 VkEvent *out)
3360 {
3361 struct dzn_event *event =
3362 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,
3363 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3364 if (!event)
3365 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3366
3367 vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
3368
3369 if (FAILED(ID3D12Device1_CreateFence(device->dev, 0, D3D12_FENCE_FLAG_NONE,
3370 &IID_ID3D12Fence,
3371 (void **)&event->fence))) {
3372 dzn_event_destroy(event, pAllocator);
3373 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3374 }
3375
3376 *out = dzn_event_to_handle(event);
3377 return VK_SUCCESS;
3378 }
3379
3380 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateEvent(VkDevice device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)3381 dzn_CreateEvent(VkDevice device,
3382 const VkEventCreateInfo *pCreateInfo,
3383 const VkAllocationCallbacks *pAllocator,
3384 VkEvent *pEvent)
3385 {
3386 return dzn_event_create(dzn_device_from_handle(device),
3387 pCreateInfo, pAllocator, pEvent);
3388 }
3389
3390 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyEvent(VkDevice device,VkEvent event,const VkAllocationCallbacks * pAllocator)3391 dzn_DestroyEvent(VkDevice device,
3392 VkEvent event,
3393 const VkAllocationCallbacks *pAllocator)
3394 {
3395 dzn_event_destroy(dzn_event_from_handle(event), pAllocator);
3396 }
3397
3398 VKAPI_ATTR VkResult VKAPI_CALL
dzn_ResetEvent(VkDevice dev,VkEvent evt)3399 dzn_ResetEvent(VkDevice dev,
3400 VkEvent evt)
3401 {
3402 VK_FROM_HANDLE(dzn_device, device, dev);
3403 VK_FROM_HANDLE(dzn_event, event, evt);
3404
3405 if (FAILED(ID3D12Fence_Signal(event->fence, 0)))
3406 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3407
3408 return VK_SUCCESS;
3409 }
3410
3411 VKAPI_ATTR VkResult VKAPI_CALL
dzn_SetEvent(VkDevice dev,VkEvent evt)3412 dzn_SetEvent(VkDevice dev,
3413 VkEvent evt)
3414 {
3415 VK_FROM_HANDLE(dzn_device, device, dev);
3416 VK_FROM_HANDLE(dzn_event, event, evt);
3417
3418 if (FAILED(ID3D12Fence_Signal(event->fence, 1)))
3419 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3420
3421 return VK_SUCCESS;
3422 }
3423
3424 VKAPI_ATTR VkResult VKAPI_CALL
dzn_GetEventStatus(VkDevice device,VkEvent evt)3425 dzn_GetEventStatus(VkDevice device,
3426 VkEvent evt)
3427 {
3428 VK_FROM_HANDLE(dzn_event, event, evt);
3429
3430 return ID3D12Fence_GetCompletedValue(event->fence) == 0 ?
3431 VK_EVENT_RESET : VK_EVENT_SET;
3432 }
3433
3434 VKAPI_ATTR void VKAPI_CALL
dzn_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3435 dzn_GetDeviceMemoryCommitment(VkDevice device,
3436 VkDeviceMemory memory,
3437 VkDeviceSize *pCommittedMemoryInBytes)
3438 {
3439 VK_FROM_HANDLE(dzn_device_memory, mem, memory);
3440
3441 // TODO: find if there's a way to query/track actual heap residency
3442 *pCommittedMemoryInBytes = mem->size;
3443 }
3444
3445 VKAPI_ATTR VkResult VKAPI_CALL
dzn_QueueBindSparse(VkQueue queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)3446 dzn_QueueBindSparse(VkQueue queue,
3447 uint32_t bindInfoCount,
3448 const VkBindSparseInfo *pBindInfo,
3449 VkFence fence)
3450 {
3451 // FIXME: add proper implem
3452 dzn_stub();
3453 return VK_SUCCESS;
3454 }
3455
3456 static D3D12_TEXTURE_ADDRESS_MODE
dzn_sampler_translate_addr_mode(VkSamplerAddressMode in)3457 dzn_sampler_translate_addr_mode(VkSamplerAddressMode in)
3458 {
3459 switch (in) {
3460 case VK_SAMPLER_ADDRESS_MODE_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
3461 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR;
3462 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3463 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER;
3464 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE;
3465 default: unreachable("Invalid address mode");
3466 }
3467 }
3468
3469 static void
dzn_sampler_destroy(struct dzn_sampler * sampler,const VkAllocationCallbacks * pAllocator)3470 dzn_sampler_destroy(struct dzn_sampler *sampler,
3471 const VkAllocationCallbacks *pAllocator)
3472 {
3473 if (!sampler)
3474 return;
3475
3476 struct dzn_device *device =
3477 container_of(sampler->base.device, struct dzn_device, vk);
3478
3479 dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, sampler->bindless_slot);
3480
3481 vk_object_base_finish(&sampler->base);
3482 vk_free2(&device->vk.alloc, pAllocator, sampler);
3483 }
3484
3485 static VkResult
dzn_sampler_create(struct dzn_device * device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * out)3486 dzn_sampler_create(struct dzn_device *device,
3487 const VkSamplerCreateInfo *pCreateInfo,
3488 const VkAllocationCallbacks *pAllocator,
3489 VkSampler *out)
3490 {
3491 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
3492 struct dzn_sampler *sampler =
3493 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
3494 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3495 if (!sampler)
3496 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3497
3498 vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);
3499
3500 const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = (const VkSamplerCustomBorderColorCreateInfoEXT *)
3501 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
3502
3503 /* TODO: have a sampler pool to allocate shader-invisible descs which we
3504 * can copy to the desc_set when UpdateDescriptorSets() is called.
3505 */
3506 sampler->desc.Filter = dzn_translate_sampler_filter(pdev, pCreateInfo);
3507 sampler->desc.AddressU = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeU);
3508 sampler->desc.AddressV = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeV);
3509 sampler->desc.AddressW = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeW);
3510 sampler->desc.MipLODBias = pCreateInfo->mipLodBias;
3511 sampler->desc.MaxAnisotropy = pCreateInfo->maxAnisotropy;
3512 sampler->desc.MinLOD = pCreateInfo->minLod;
3513 sampler->desc.MaxLOD = pCreateInfo->maxLod;
3514
3515 if (pCreateInfo->compareEnable)
3516 sampler->desc.ComparisonFunc = dzn_translate_compare_op(pCreateInfo->compareOp);
3517
3518 bool reads_border_color =
3519 pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
3520 pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
3521 pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
3522
3523 if (reads_border_color) {
3524 switch (pCreateInfo->borderColor) {
3525 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3526 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3527 sampler->desc.FloatBorderColor[0] = 0.0f;
3528 sampler->desc.FloatBorderColor[1] = 0.0f;
3529 sampler->desc.FloatBorderColor[2] = 0.0f;
3530 sampler->desc.FloatBorderColor[3] =
3531 pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ? 0.0f : 1.0f;
3532 sampler->static_border_color =
3533 pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ?
3534 D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK :
3535 D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK;
3536 break;
3537 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3538 sampler->desc.FloatBorderColor[0] = sampler->desc.FloatBorderColor[1] = 1.0f;
3539 sampler->desc.FloatBorderColor[2] = sampler->desc.FloatBorderColor[3] = 1.0f;
3540 sampler->static_border_color = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE;
3541 break;
3542 case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
3543 sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1;
3544 for (unsigned i = 0; i < ARRAY_SIZE(sampler->desc.FloatBorderColor); i++)
3545 sampler->desc.FloatBorderColor[i] = pBorderColor->customBorderColor.float32[i];
3546 break;
3547 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3548 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3549 sampler->desc.UintBorderColor[0] = 0;
3550 sampler->desc.UintBorderColor[1] = 0;
3551 sampler->desc.UintBorderColor[2] = 0;
3552 sampler->desc.UintBorderColor[3] =
3553 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_TRANSPARENT_BLACK ? 0 : 1;
3554 sampler->static_border_color =
3555 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_TRANSPARENT_BLACK ?
3556 D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK :
3557 D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK_UINT;
3558 sampler->desc.Flags = D3D12_SAMPLER_FLAG_UINT_BORDER_COLOR;
3559 break;
3560 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3561 sampler->desc.UintBorderColor[0] = sampler->desc.UintBorderColor[1] = 1;
3562 sampler->desc.UintBorderColor[2] = sampler->desc.UintBorderColor[3] = 1;
3563 sampler->static_border_color = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE_UINT;
3564 sampler->desc.Flags = D3D12_SAMPLER_FLAG_UINT_BORDER_COLOR;
3565 break;
3566 case VK_BORDER_COLOR_INT_CUSTOM_EXT:
3567 sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1;
3568 for (unsigned i = 0; i < ARRAY_SIZE(sampler->desc.UintBorderColor); i++)
3569 sampler->desc.UintBorderColor[i] = pBorderColor->customBorderColor.uint32[i];
3570 sampler->desc.Flags = D3D12_SAMPLER_FLAG_UINT_BORDER_COLOR;
3571 break;
3572 default:
3573 unreachable("Unsupported border color");
3574 }
3575 }
3576
3577 if (pCreateInfo->unnormalizedCoordinates && pdev->options17.NonNormalizedCoordinateSamplersSupported)
3578 sampler->desc.Flags |= D3D12_SAMPLER_FLAG_NON_NORMALIZED_COORDINATES;
3579
3580 sampler->bindless_slot = -1;
3581 if (device->bindless) {
3582 sampler->bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
3583 if (sampler->bindless_slot < 0) {
3584 dzn_sampler_destroy(sampler, pAllocator);
3585 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3586 }
3587
3588 dzn_descriptor_heap_write_sampler_desc(device,
3589 &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER].heap,
3590 sampler->bindless_slot,
3591 sampler);
3592 }
3593
3594 *out = dzn_sampler_to_handle(sampler);
3595 return VK_SUCCESS;
3596 }
3597
3598 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateSampler(VkDevice device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)3599 dzn_CreateSampler(VkDevice device,
3600 const VkSamplerCreateInfo *pCreateInfo,
3601 const VkAllocationCallbacks *pAllocator,
3602 VkSampler *pSampler)
3603 {
3604 return dzn_sampler_create(dzn_device_from_handle(device),
3605 pCreateInfo, pAllocator, pSampler);
3606 }
3607
3608 VKAPI_ATTR void VKAPI_CALL
dzn_DestroySampler(VkDevice device,VkSampler sampler,const VkAllocationCallbacks * pAllocator)3609 dzn_DestroySampler(VkDevice device,
3610 VkSampler sampler,
3611 const VkAllocationCallbacks *pAllocator)
3612 {
3613 dzn_sampler_destroy(dzn_sampler_from_handle(sampler), pAllocator);
3614 }
3615
3616 int
dzn_device_descriptor_heap_alloc_slot(struct dzn_device * device,D3D12_DESCRIPTOR_HEAP_TYPE type)3617 dzn_device_descriptor_heap_alloc_slot(struct dzn_device *device,
3618 D3D12_DESCRIPTOR_HEAP_TYPE type)
3619 {
3620 struct dzn_device_descriptor_heap *heap = &device->device_heaps[type];
3621 mtx_lock(&heap->lock);
3622
3623 int ret = -1;
3624 if (heap->slot_freelist.size)
3625 ret = util_dynarray_pop(&heap->slot_freelist, int);
3626 else if (heap->next_alloc_slot < heap->heap.desc_count)
3627 ret = heap->next_alloc_slot++;
3628
3629 mtx_unlock(&heap->lock);
3630 return ret;
3631 }
3632
3633 void
dzn_device_descriptor_heap_free_slot(struct dzn_device * device,D3D12_DESCRIPTOR_HEAP_TYPE type,int slot)3634 dzn_device_descriptor_heap_free_slot(struct dzn_device *device,
3635 D3D12_DESCRIPTOR_HEAP_TYPE type,
3636 int slot)
3637 {
3638 struct dzn_device_descriptor_heap *heap = &device->device_heaps[type];
3639 assert(slot < 0 || slot < heap->heap.desc_count);
3640
3641 if (slot < 0)
3642 return;
3643
3644 mtx_lock(&heap->lock);
3645 util_dynarray_append(&heap->slot_freelist, int, slot);
3646 mtx_unlock(&heap->lock);
3647 }
3648
3649 VKAPI_ATTR void VKAPI_CALL
dzn_GetDeviceGroupPeerMemoryFeatures(VkDevice device,uint32_t heapIndex,uint32_t localDeviceIndex,uint32_t remoteDeviceIndex,VkPeerMemoryFeatureFlags * pPeerMemoryFeatures)3650 dzn_GetDeviceGroupPeerMemoryFeatures(VkDevice device,
3651 uint32_t heapIndex,
3652 uint32_t localDeviceIndex,
3653 uint32_t remoteDeviceIndex,
3654 VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
3655 {
3656 *pPeerMemoryFeatures = 0;
3657 }
3658
3659 VKAPI_ATTR void VKAPI_CALL
dzn_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)3660 dzn_GetImageSparseMemoryRequirements2(VkDevice device,
3661 const VkImageSparseMemoryRequirementsInfo2* pInfo,
3662 uint32_t *pSparseMemoryRequirementCount,
3663 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
3664 {
3665 *pSparseMemoryRequirementCount = 0;
3666 }
3667
3668 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateSamplerYcbcrConversion(VkDevice device,const VkSamplerYcbcrConversionCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSamplerYcbcrConversion * pYcbcrConversion)3669 dzn_CreateSamplerYcbcrConversion(VkDevice device,
3670 const VkSamplerYcbcrConversionCreateInfo *pCreateInfo,
3671 const VkAllocationCallbacks *pAllocator,
3672 VkSamplerYcbcrConversion *pYcbcrConversion)
3673 {
3674 unreachable("Ycbcr sampler conversion is not supported");
3675 return VK_SUCCESS;
3676 }
3677
3678 VKAPI_ATTR void VKAPI_CALL
dzn_DestroySamplerYcbcrConversion(VkDevice device,VkSamplerYcbcrConversion YcbcrConversion,const VkAllocationCallbacks * pAllocator)3679 dzn_DestroySamplerYcbcrConversion(VkDevice device,
3680 VkSamplerYcbcrConversion YcbcrConversion,
3681 const VkAllocationCallbacks *pAllocator)
3682 {
3683 unreachable("Ycbcr sampler conversion is not supported");
3684 }
3685
3686 VKAPI_ATTR VkDeviceAddress VKAPI_CALL
dzn_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3687 dzn_GetBufferDeviceAddress(VkDevice device,
3688 const VkBufferDeviceAddressInfo* pInfo)
3689 {
3690 struct dzn_buffer *buffer = dzn_buffer_from_handle(pInfo->buffer);
3691
3692 return buffer->gpuva;
3693 }
3694
3695 VKAPI_ATTR uint64_t VKAPI_CALL
dzn_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3696 dzn_GetBufferOpaqueCaptureAddress(VkDevice device,
3697 const VkBufferDeviceAddressInfo *pInfo)
3698 {
3699 return 0;
3700 }
3701
3702 VKAPI_ATTR uint64_t VKAPI_CALL
dzn_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3703 dzn_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
3704 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3705 {
3706 return 0;
3707 }
3708
3709 #ifdef _WIN32
3710 VKAPI_ATTR VkResult VKAPI_CALL
dzn_GetMemoryWin32HandleKHR(VkDevice device,const VkMemoryGetWin32HandleInfoKHR * pGetWin32HandleInfo,HANDLE * pHandle)3711 dzn_GetMemoryWin32HandleKHR(VkDevice device,
3712 const VkMemoryGetWin32HandleInfoKHR *pGetWin32HandleInfo,
3713 HANDLE *pHandle)
3714 {
3715 VK_FROM_HANDLE(dzn_device_memory, mem, pGetWin32HandleInfo->memory);
3716 if (!mem->export_handle)
3717 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3718
3719 switch (pGetWin32HandleInfo->handleType) {
3720 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT:
3721 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT:
3722 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT:
3723 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT:
3724 if (!DuplicateHandle(GetCurrentProcess(), mem->export_handle, GetCurrentProcess(), pHandle,
3725 0, false, DUPLICATE_SAME_ACCESS))
3726 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3727 return VK_SUCCESS;
3728 default:
3729 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3730 }
3731 }
3732 #else
3733 VKAPI_ATTR VkResult VKAPI_CALL
dzn_GetMemoryFdKHR(VkDevice device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3734 dzn_GetMemoryFdKHR(VkDevice device,
3735 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3736 int *pFd)
3737 {
3738 VK_FROM_HANDLE(dzn_device_memory, mem, pGetFdInfo->memory);
3739 if (!mem->export_handle)
3740 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3741
3742 switch (pGetFdInfo->handleType) {
3743 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
3744 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT:
3745 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT:
3746 *pFd = (int)(intptr_t)mem->export_handle;
3747 mem->export_handle = (HANDLE)(intptr_t)-1;
3748 return VK_SUCCESS;
3749 default:
3750 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3751 }
3752 }
3753 #endif
3754
3755 #ifdef _WIN32
3756 VKAPI_ATTR VkResult VKAPI_CALL
dzn_GetMemoryWin32HandlePropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,HANDLE handle,VkMemoryWin32HandlePropertiesKHR * pProperties)3757 dzn_GetMemoryWin32HandlePropertiesKHR(VkDevice _device,
3758 VkExternalMemoryHandleTypeFlagBits handleType,
3759 HANDLE handle,
3760 VkMemoryWin32HandlePropertiesKHR *pProperties)
3761 {
3762 #else
3763 VKAPI_ATTR VkResult VKAPI_CALL
3764 dzn_GetMemoryFdPropertiesKHR(VkDevice _device,
3765 VkExternalMemoryHandleTypeFlagBits handleType,
3766 int fd,
3767 VkMemoryFdPropertiesKHR *pProperties)
3768 {
3769 HANDLE handle = (HANDLE)(intptr_t)fd;
3770 #endif
3771 VK_FROM_HANDLE(dzn_device, device, _device);
3772 IUnknown *opened_object;
3773 if (FAILED(ID3D12Device_OpenSharedHandle(device->dev, handle, &IID_IUnknown, (void **)&opened_object)))
3774 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3775
3776 VkResult result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
3777 ID3D12Resource *res = NULL;
3778 ID3D12Heap *heap = NULL;
3779 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
3780
3781 switch (handleType) {
3782 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT:
3783 (void)IUnknown_QueryInterface(opened_object, &IID_ID3D12Resource, (void **)&res);
3784 (void)IUnknown_QueryInterface(opened_object, &IID_ID3D12Heap, (void **)&heap);
3785 break;
3786 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT:
3787 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT:
3788 (void)IUnknown_QueryInterface(opened_object, &IID_ID3D12Resource, (void **)&res);
3789 break;
3790 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT:
3791 (void)IUnknown_QueryInterface(opened_object, &IID_ID3D12Heap, (void **)&heap);
3792 break;
3793 default:
3794 goto cleanup;
3795 }
3796 if (!res && !heap)
3797 goto cleanup;
3798
3799 D3D12_HEAP_DESC heap_desc;
3800 if (res)
3801 ID3D12Resource_GetHeapProperties(res, &heap_desc.Properties, &heap_desc.Flags);
3802 else
3803 heap_desc = dzn_ID3D12Heap_GetDesc(heap);
3804 if (heap_desc.Properties.Type != D3D12_HEAP_TYPE_CUSTOM)
3805 heap_desc.Properties = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, heap_desc.Properties.Type);
3806
3807 for (uint32_t i = 0; i < pdev->memory.memoryTypeCount; ++i) {
3808 const VkMemoryType *mem_type = &pdev->memory.memoryTypes[i];
3809 D3D12_HEAP_PROPERTIES required_props = deduce_heap_properties_from_memory(pdev, mem_type);
3810 if (heap_desc.Properties.CPUPageProperty != required_props.CPUPageProperty ||
3811 heap_desc.Properties.MemoryPoolPreference != required_props.MemoryPoolPreference)
3812 continue;
3813
3814 D3D12_HEAP_FLAGS required_flags = dzn_physical_device_get_heap_flags_for_mem_type(pdev, i);
3815 if ((heap_desc.Flags & required_flags) != required_flags)
3816 continue;
3817
3818 pProperties->memoryTypeBits |= (1 << i);
3819 }
3820 result = VK_SUCCESS;
3821
3822 cleanup:
3823 IUnknown_Release(opened_object);
3824 if (res)
3825 ID3D12Resource_Release(res);
3826 if (heap)
3827 ID3D12Heap_Release(heap);
3828 return result;
3829 }
3830
3831 #if defined(_WIN32)
3832 VKAPI_ATTR VkResult VKAPI_CALL
3833 dzn_GetMemoryHostPointerPropertiesEXT(VkDevice _device,
3834 VkExternalMemoryHandleTypeFlagBits handleType,
3835 const void *pHostPointer,
3836 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
3837 {
3838 VK_FROM_HANDLE(dzn_device, device, _device);
3839
3840 if (!device->dev13)
3841 return VK_ERROR_FEATURE_NOT_PRESENT;
3842
3843 ID3D12Heap *heap;
3844 if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, pHostPointer, 1, &IID_ID3D12Heap, (void **)&heap)))
3845 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
3846
3847 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
3848 D3D12_HEAP_DESC heap_desc = dzn_ID3D12Heap_GetDesc(heap);
3849 for (uint32_t i = 0; i < pdev->memory.memoryTypeCount; ++i) {
3850 const VkMemoryType *mem_type = &pdev->memory.memoryTypes[i];
3851 D3D12_HEAP_PROPERTIES required_props = deduce_heap_properties_from_memory(pdev, mem_type);
3852 if (heap_desc.Properties.CPUPageProperty != required_props.CPUPageProperty ||
3853 heap_desc.Properties.MemoryPoolPreference != required_props.MemoryPoolPreference)
3854 continue;
3855
3856 pMemoryHostPointerProperties->memoryTypeBits |= (1 << i);
3857 }
3858 ID3D12Heap_Release(heap);
3859 return VK_SUCCESS;
3860 }
3861 #endif
3862