1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_physical_device.h"
6
7 #include "nak.h"
8 #include "nvk_buffer.h"
9 #include "nvk_descriptor_types.h"
10 #include "nvk_entrypoints.h"
11 #include "nvk_format.h"
12 #include "nvk_image.h"
13 #include "nvk_image_view.h"
14 #include "nvk_instance.h"
15 #include "nvk_sampler.h"
16 #include "nvk_shader.h"
17 #include "nvk_wsi.h"
18 #include "nvkmd/nvkmd.h"
19 #include "nvkmd/nouveau/nvkmd_nouveau.h"
20 #include "git_sha1.h"
21 #include "util/detect_os.h"
22 #include "util/disk_cache.h"
23 #include "util/mesa-sha1.h"
24
25 #if DETECT_OS_ANDROID
26 #include <vulkan/vk_android_native_buffer.h>
27 #include "util/u_gralloc/u_gralloc.h"
28 #endif
29
30 #include "vk_android.h"
31 #include "vk_device.h"
32 #include "vk_drm_syncobj.h"
33 #include "vk_shader_module.h"
34 #include "vulkan/wsi/wsi_common.h"
35
36 #include <sys/sysmacros.h>
37
38 #include "nv_push.h"
39 #include "cl90c0.h"
40 #include "cl91c0.h"
41 #include "cla097.h"
42 #include "cla0c0.h"
43 #include "cla1c0.h"
44 #include "clb097.h"
45 #include "clb0c0.h"
46 #include "clb197.h"
47 #include "clb1c0.h"
48 #include "clc097.h"
49 #include "clc0c0.h"
50 #include "clc1c0.h"
51 #include "clc397.h"
52 #include "clc3c0.h"
53 #include "clc597.h"
54 #include "clc5c0.h"
55 #include "clc797.h"
56 #include "clc997.h"
57
58 static bool
nvk_use_nak(const struct nv_device_info * info)59 nvk_use_nak(const struct nv_device_info *info)
60 {
61 const VkShaderStageFlags vk10_stages =
62 VK_SHADER_STAGE_VERTEX_BIT |
63 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
64 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
65 VK_SHADER_STAGE_GEOMETRY_BIT |
66 VK_SHADER_STAGE_FRAGMENT_BIT |
67 VK_SHADER_STAGE_COMPUTE_BIT;
68
69 return !(vk10_stages & ~nvk_nak_stages(info));
70 }
71
72 static uint32_t
nvk_get_vk_version(const struct nv_device_info * info)73 nvk_get_vk_version(const struct nv_device_info *info)
74 {
75 /* Version override takes priority */
76 const uint32_t version_override = vk_get_version_override();
77 if (version_override)
78 return version_override;
79
80 /* If we're using codegen for anything, lock to version 1.0 */
81 if (!nvk_use_nak(info))
82 return VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
83
84 #if defined(ANDROID_STRICT) && ANDROID_API_LEVEL <= 32
85 return VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION);
86 #endif
87
88 /* Vulkan 1.4 requires hostImageCopy which is currently only supported on
89 * Turing+.
90 */
91 if (info->cls_eng3d < TURING_A)
92 return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
93
94 return VK_MAKE_VERSION(1, 4, VK_HEADER_VERSION);
95 }
96
97 static void
nvk_get_device_extensions(const struct nvk_instance * instance,const struct nv_device_info * info,bool has_tiled_bos,struct vk_device_extension_table * ext)98 nvk_get_device_extensions(const struct nvk_instance *instance,
99 const struct nv_device_info *info,
100 bool has_tiled_bos,
101 struct vk_device_extension_table *ext)
102 {
103 *ext = (struct vk_device_extension_table) {
104 .KHR_8bit_storage = true,
105 .KHR_16bit_storage = true,
106 .KHR_bind_memory2 = true,
107 .KHR_buffer_device_address = true,
108 .KHR_calibrated_timestamps = true,
109 .KHR_compute_shader_derivatives = nvk_use_nak(info) &&
110 info->cls_eng3d >= TURING_A,
111 .KHR_copy_commands2 = true,
112 .KHR_create_renderpass2 = true,
113 .KHR_dedicated_allocation = true,
114 .KHR_depth_stencil_resolve = true,
115 .KHR_descriptor_update_template = true,
116 .KHR_device_group = true,
117 .KHR_draw_indirect_count = info->cls_eng3d >= TURING_A,
118 .KHR_driver_properties = true,
119 .KHR_dynamic_rendering = true,
120 .KHR_dynamic_rendering_local_read = true,
121 .KHR_external_fence = true,
122 .KHR_external_fence_fd = true,
123 .KHR_external_memory = true,
124 .KHR_external_memory_fd = true,
125 .KHR_external_semaphore = true,
126 .KHR_external_semaphore_fd = true,
127 .KHR_format_feature_flags2 = true,
128 .KHR_fragment_shader_barycentric = info->cls_eng3d >= TURING_A &&
129 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
130 .KHR_fragment_shading_rate = info->cls_eng3d >= TURING_A,
131 .KHR_get_memory_requirements2 = true,
132 .KHR_global_priority = true,
133 .KHR_image_format_list = true,
134 .KHR_imageless_framebuffer = true,
135 #ifdef NVK_USE_WSI_PLATFORM
136 .KHR_incremental_present = true,
137 #endif
138 .KHR_index_type_uint8 = true,
139 .KHR_line_rasterization = true,
140 .KHR_load_store_op_none = true,
141 .KHR_maintenance1 = true,
142 .KHR_maintenance2 = true,
143 .KHR_maintenance3 = true,
144 .KHR_maintenance4 = true,
145 .KHR_maintenance5 = true,
146 .KHR_maintenance6 = true,
147 .KHR_maintenance7 = true,
148 .KHR_map_memory2 = true,
149 .KHR_multiview = true,
150 .KHR_pipeline_executable_properties = true,
151 .KHR_pipeline_library = true,
152 #ifdef NVK_USE_WSI_PLATFORM
153 /* Hide these behind dri configs for now since we cannot implement it
154 * reliably on all surfaces yet. There is no surface capability query
155 * for present wait/id, but the feature is useful enough to hide behind
156 * an opt-in mechanism for now. If the instance only enables surface
157 * extensions that unconditionally support present wait, we can also
158 * expose the extension that way.
159 */
160 .KHR_present_id = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
161 wsi_common_vk_instance_supports_present_wait(&instance->vk),
162 .KHR_present_wait = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
163 wsi_common_vk_instance_supports_present_wait(&instance->vk),
164 #endif
165 .KHR_push_descriptor = true,
166 .KHR_relaxed_block_layout = true,
167 .KHR_sampler_mirror_clamp_to_edge = true,
168 .KHR_sampler_ycbcr_conversion = true,
169 .KHR_separate_depth_stencil_layouts = true,
170 .KHR_shader_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
171 nvk_use_nak(info),
172 .KHR_shader_clock = true,
173 .KHR_shader_draw_parameters = true,
174 .KHR_shader_expect_assume = true,
175 .KHR_shader_float_controls = true,
176 .KHR_shader_float_controls2 = true,
177 .KHR_shader_float16_int8 = true,
178 .KHR_shader_integer_dot_product = true,
179 .KHR_shader_maximal_reconvergence = true,
180 .KHR_shader_non_semantic_info = true,
181 .KHR_shader_quad_control = true,
182 .KHR_shader_relaxed_extended_instruction = true,
183 .KHR_shader_subgroup_extended_types = true,
184 .KHR_shader_subgroup_rotate = nvk_use_nak(info),
185 .KHR_shader_subgroup_uniform_control_flow = nvk_use_nak(info),
186 .KHR_shader_terminate_invocation =
187 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
188 .KHR_spirv_1_4 = true,
189 .KHR_storage_buffer_storage_class = true,
190 .KHR_timeline_semaphore = true,
191 #ifdef NVK_USE_WSI_PLATFORM
192 .KHR_swapchain = true,
193 .KHR_swapchain_mutable_format = true,
194 #endif
195 .KHR_synchronization2 = true,
196 .KHR_uniform_buffer_standard_layout = true,
197 .KHR_variable_pointers = true,
198 .KHR_vertex_attribute_divisor = true,
199 .KHR_vulkan_memory_model = nvk_use_nak(info),
200 .KHR_workgroup_memory_explicit_layout = true,
201 .KHR_zero_initialize_workgroup_memory = true,
202 .EXT_4444_formats = true,
203 .EXT_attachment_feedback_loop_layout = true,
204 .EXT_border_color_swizzle = true,
205 .EXT_buffer_device_address = true,
206 .EXT_calibrated_timestamps = true,
207 .EXT_conditional_rendering = true,
208 .EXT_conservative_rasterization = info->cls_eng3d >= MAXWELL_B,
209 .EXT_color_write_enable = true,
210 .EXT_custom_border_color = true,
211 .EXT_depth_bias_control = true,
212 .EXT_depth_clamp_control = true,
213 .EXT_depth_clamp_zero_one = true,
214 .EXT_depth_clip_control = true,
215 .EXT_depth_clip_enable = true,
216 .EXT_depth_range_unrestricted = info->cls_eng3d >= VOLTA_A,
217 .EXT_descriptor_buffer = true,
218 .EXT_descriptor_indexing = true,
219 .EXT_device_generated_commands = true,
220 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
221 .EXT_display_control = true,
222 #endif
223 .EXT_image_drm_format_modifier = has_tiled_bos,
224 .EXT_dynamic_rendering_unused_attachments = true,
225 .EXT_extended_dynamic_state = true,
226 .EXT_extended_dynamic_state2 = true,
227 .EXT_extended_dynamic_state3 = true,
228 .EXT_external_memory_dma_buf = true,
229 .EXT_global_priority = true,
230 .EXT_global_priority_query = true,
231 .EXT_graphics_pipeline_library = true,
232 .EXT_host_query_reset = true,
233 .EXT_host_image_copy = info->cls_eng3d >= TURING_A,
234 .EXT_image_2d_view_of_3d = true,
235 .EXT_image_robustness = true,
236 .EXT_image_sliced_view_of_3d = true,
237 .EXT_image_view_min_lod = true,
238 .EXT_index_type_uint8 = true,
239 .EXT_inline_uniform_block = true,
240 .EXT_legacy_vertex_attributes = true,
241 .EXT_line_rasterization = true,
242 .EXT_load_store_op_none = true,
243 .EXT_map_memory_placed = true,
244 .EXT_memory_budget = true,
245 .EXT_multi_draw = true,
246 .EXT_mutable_descriptor_type = true,
247 .EXT_nested_command_buffer = true,
248 .EXT_non_seamless_cube_map = true,
249 .EXT_pci_bus_info = info->type == NV_DEVICE_TYPE_DIS,
250 .EXT_pipeline_creation_cache_control = true,
251 .EXT_pipeline_creation_feedback = true,
252 .EXT_pipeline_robustness = true,
253 .EXT_physical_device_drm = true,
254 .EXT_post_depth_coverage = true,
255 .EXT_primitive_topology_list_restart = true,
256 .EXT_private_data = true,
257 .EXT_primitives_generated_query = true,
258 .EXT_provoking_vertex = true,
259 .EXT_queue_family_foreign = true,
260 .EXT_robustness2 = true,
261 .EXT_sample_locations = info->cls_eng3d >= MAXWELL_B,
262 .EXT_sampler_filter_minmax = info->cls_eng3d >= MAXWELL_B,
263 .EXT_scalar_block_layout = nvk_use_nak(info),
264 .EXT_separate_stencil_usage = true,
265 .EXT_shader_image_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
266 nvk_use_nak(info),
267 .EXT_shader_demote_to_helper_invocation = true,
268 .EXT_shader_module_identifier = true,
269 .EXT_shader_object = true,
270 .EXT_shader_replicated_composites = true,
271 .EXT_shader_subgroup_ballot = true,
272 .EXT_shader_subgroup_vote = true,
273 .EXT_shader_viewport_index_layer = info->cls_eng3d >= MAXWELL_B,
274 .EXT_subgroup_size_control = true,
275 #ifdef NVK_USE_WSI_PLATFORM
276 .EXT_swapchain_maintenance1 = true,
277 #endif
278 .EXT_texel_buffer_alignment = true,
279 .EXT_tooling_info = true,
280 .EXT_transform_feedback = true,
281 .EXT_vertex_attribute_divisor = true,
282 .EXT_vertex_input_dynamic_state = true,
283 .EXT_ycbcr_2plane_444_formats = true,
284 .EXT_ycbcr_image_arrays = true,
285 #if DETECT_OS_ANDROID
286 .ANDROID_native_buffer = vk_android_get_ugralloc() != NULL,
287 #endif
288 .GOOGLE_decorate_string = true,
289 .GOOGLE_hlsl_functionality1 = true,
290 .GOOGLE_user_type = true,
291 .NV_compute_shader_derivatives = nvk_use_nak(info) &&
292 info->cls_eng3d >= TURING_A,
293 .NV_shader_sm_builtins = true,
294 .VALVE_mutable_descriptor_type = true,
295 };
296 }
297
298 static void
nvk_get_device_features(const struct nv_device_info * info,const struct vk_device_extension_table * supported_extensions,struct vk_features * features)299 nvk_get_device_features(const struct nv_device_info *info,
300 const struct vk_device_extension_table *supported_extensions,
301 struct vk_features *features)
302 {
303 *features = (struct vk_features) {
304 /* Vulkan 1.0 */
305 .robustBufferAccess = true,
306 .fullDrawIndexUint32 = true,
307 .imageCubeArray = true,
308 .independentBlend = true,
309 .geometryShader = true,
310 .tessellationShader = true,
311 .sampleRateShading = true,
312 .dualSrcBlend = true,
313 .logicOp = true,
314 .multiDrawIndirect = true,
315 .drawIndirectFirstInstance = true,
316 .depthClamp = true,
317 .depthBiasClamp = true,
318 .fillModeNonSolid = true,
319 .depthBounds = true,
320 .wideLines = true,
321 .largePoints = true,
322 .alphaToOne = true,
323 .multiViewport = true,
324 .samplerAnisotropy = true,
325 .textureCompressionETC2 = false,
326 .textureCompressionBC = true,
327 .textureCompressionASTC_LDR = false,
328 .occlusionQueryPrecise = true,
329 .pipelineStatisticsQuery = true,
330 .vertexPipelineStoresAndAtomics = true,
331 .fragmentStoresAndAtomics = true,
332 .shaderTessellationAndGeometryPointSize = true,
333 .shaderImageGatherExtended = true,
334 .shaderStorageImageExtendedFormats = true,
335 .shaderStorageImageMultisample = true,
336 .shaderStorageImageReadWithoutFormat = info->cls_eng3d >= MAXWELL_A,
337 .shaderStorageImageWriteWithoutFormat = true,
338 .shaderUniformBufferArrayDynamicIndexing = true,
339 .shaderSampledImageArrayDynamicIndexing = true,
340 .shaderStorageBufferArrayDynamicIndexing = true,
341 .shaderStorageImageArrayDynamicIndexing = true,
342 .shaderClipDistance = true,
343 .shaderCullDistance = true,
344 .shaderFloat64 = true,
345 .shaderInt64 = true,
346 .shaderInt16 = true,
347 .shaderResourceResidency = info->cls_eng3d >= VOLTA_A,
348 .shaderResourceMinLod = info->cls_eng3d >= VOLTA_A,
349 .sparseBinding = true,
350 .sparseResidency2Samples = info->cls_eng3d >= MAXWELL_B,
351 .sparseResidency4Samples = info->cls_eng3d >= MAXWELL_B,
352 .sparseResidency8Samples = info->cls_eng3d >= MAXWELL_B,
353 .sparseResidencyAliased = info->cls_eng3d >= MAXWELL_B,
354 .sparseResidencyBuffer = info->cls_eng3d >= MAXWELL_B,
355 .sparseResidencyImage2D = info->cls_eng3d >= MAXWELL_B,
356 .sparseResidencyImage3D = info->cls_eng3d >= MAXWELL_B,
357 .variableMultisampleRate = true,
358 .inheritedQueries = true,
359
360 /* Vulkan 1.1 */
361 .storageBuffer16BitAccess = true,
362 .uniformAndStorageBuffer16BitAccess = true,
363 .storagePushConstant16 = true,
364 .multiview = true,
365 .multiviewGeometryShader = true,
366 .multiviewTessellationShader = true,
367 .variablePointersStorageBuffer = true,
368 .variablePointers = true,
369 .shaderDrawParameters = true,
370 .samplerYcbcrConversion = true,
371
372 /* Vulkan 1.2 */
373 .samplerMirrorClampToEdge = true,
374 .drawIndirectCount = info->cls_eng3d >= TURING_A,
375 .storageBuffer8BitAccess = true,
376 .uniformAndStorageBuffer8BitAccess = true,
377 .storagePushConstant8 = true,
378 .shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
379 nvk_use_nak(info),
380 .shaderSharedInt64Atomics = false, /* TODO */
381 /* TODO: Fp16 is currently busted on Turing and Volta due to instruction
382 * scheduling issues. Re-enable it once those are sorted.
383 */
384 .shaderFloat16 = info->sm >= 80 && nvk_use_nak(info),
385 .shaderInt8 = true,
386 .descriptorIndexing = true,
387 .shaderInputAttachmentArrayDynamicIndexing = true,
388 .shaderUniformTexelBufferArrayDynamicIndexing = true,
389 .shaderStorageTexelBufferArrayDynamicIndexing = true,
390 .shaderUniformBufferArrayNonUniformIndexing = true,
391 .shaderSampledImageArrayNonUniformIndexing = true,
392 .shaderStorageBufferArrayNonUniformIndexing = true,
393 .shaderStorageImageArrayNonUniformIndexing = true,
394 .shaderInputAttachmentArrayNonUniformIndexing = true,
395 .shaderUniformTexelBufferArrayNonUniformIndexing = true,
396 .shaderStorageTexelBufferArrayNonUniformIndexing = true,
397 .descriptorBindingUniformBufferUpdateAfterBind = true,
398 .descriptorBindingSampledImageUpdateAfterBind = true,
399 .descriptorBindingStorageImageUpdateAfterBind = true,
400 .descriptorBindingStorageBufferUpdateAfterBind = true,
401 .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
402 .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
403 .descriptorBindingUpdateUnusedWhilePending = true,
404 .descriptorBindingPartiallyBound = true,
405 .descriptorBindingVariableDescriptorCount = true,
406 .runtimeDescriptorArray = true,
407 .samplerFilterMinmax = info->cls_eng3d >= MAXWELL_B,
408 .scalarBlockLayout = nvk_use_nak(info),
409 .imagelessFramebuffer = true,
410 .uniformBufferStandardLayout = true,
411 .shaderSubgroupExtendedTypes = true,
412 .separateDepthStencilLayouts = true,
413 .hostQueryReset = true,
414 .timelineSemaphore = true,
415 .bufferDeviceAddress = true,
416 .bufferDeviceAddressCaptureReplay = true,
417 .bufferDeviceAddressMultiDevice = false,
418 .vulkanMemoryModel = nvk_use_nak(info),
419 .vulkanMemoryModelDeviceScope = nvk_use_nak(info),
420 .vulkanMemoryModelAvailabilityVisibilityChains = nvk_use_nak(info),
421 .shaderOutputViewportIndex = info->cls_eng3d >= MAXWELL_B,
422 .shaderOutputLayer = info->cls_eng3d >= MAXWELL_B,
423 .subgroupBroadcastDynamicId = nvk_use_nak(info),
424
425 /* Vulkan 1.3 */
426 .robustImageAccess = true,
427 .inlineUniformBlock = true,
428 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
429 .pipelineCreationCacheControl = true,
430 .privateData = true,
431 .shaderDemoteToHelperInvocation = true,
432 .shaderTerminateInvocation = true,
433 .subgroupSizeControl = true,
434 .computeFullSubgroups = true,
435 .synchronization2 = true,
436 .shaderZeroInitializeWorkgroupMemory = true,
437 .dynamicRendering = true,
438 .shaderIntegerDotProduct = true,
439 .maintenance4 = true,
440
441 /* Vulkan 1.4 */
442 .globalPriorityQuery = true,
443 .shaderSubgroupRotate = nvk_use_nak(info),
444 .shaderSubgroupRotateClustered = nvk_use_nak(info),
445 .shaderFloatControls2 = true,
446 .shaderExpectAssume = true,
447 .rectangularLines = true,
448 .bresenhamLines = true,
449 .smoothLines = true,
450 .stippledRectangularLines = true,
451 .stippledBresenhamLines = true,
452 .stippledSmoothLines = true,
453 .vertexAttributeInstanceRateDivisor = true,
454 .vertexAttributeInstanceRateZeroDivisor = true,
455 .indexTypeUint8 = true,
456 .dynamicRenderingLocalRead = true,
457 .maintenance5 = true,
458 .maintenance6 = true,
459 .pipelineRobustness = true,
460 .hostImageCopy = info->cls_eng3d >= TURING_A,
461 .pushDescriptor = true,
462
463 /* VK_KHR_compute_shader_derivatives */
464 .computeDerivativeGroupQuads = info->cls_eng3d >= TURING_A,
465 .computeDerivativeGroupLinear = info->cls_eng3d >= TURING_A,
466
467 /* VK_KHR_fragment_shader_barycentric */
468 .fragmentShaderBarycentric = info->cls_eng3d >= TURING_A &&
469 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
470
471 /* VK_KHR_fragment_shading_rate */
472 .pipelineFragmentShadingRate = info->cls_eng3d >= TURING_A,
473 .primitiveFragmentShadingRate = info->cls_eng3d >= TURING_A,
474 .attachmentFragmentShadingRate = info->cls_eng3d >= TURING_A,
475
476 /* VK_KHR_maintenance7 */
477 .maintenance7 = true,
478
479 /* VK_KHR_pipeline_executable_properties */
480 .pipelineExecutableInfo = true,
481
482 /* VK_KHR_present_id */
483 .presentId = supported_extensions->KHR_present_id,
484
485 /* VK_KHR_present_wait */
486 .presentWait = supported_extensions->KHR_present_wait,
487
488 /* VK_KHR_shader_quad_control */
489 .shaderQuadControl = nvk_use_nak(info),
490
491 /* VK_KHR_shader_relaxed_extended_instruction */
492 .shaderRelaxedExtendedInstruction = true,
493
494 /* VK_KHR_shader_clock */
495 .shaderSubgroupClock = true,
496 .shaderDeviceClock = true,
497
498 /* VK_KHR_shader_maximal_reconvergence */
499 .shaderMaximalReconvergence = true,
500
501 /* VK_KHR_shader_subgroup_uniform_control_flow */
502 .shaderSubgroupUniformControlFlow = nvk_use_nak(info),
503
504 /* VK_KHR_workgroup_memory_explicit_layout */
505 .workgroupMemoryExplicitLayout = true,
506 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
507 .workgroupMemoryExplicitLayout8BitAccess = nvk_use_nak(info),
508 .workgroupMemoryExplicitLayout16BitAccess = nvk_use_nak(info),
509
510 /* VK_EXT_4444_formats */
511 .formatA4R4G4B4 = true,
512 .formatA4B4G4R4 = true,
513
514 /* VK_EXT_attachment_feedback_loop_layout */
515 .attachmentFeedbackLoopLayout = true,
516
517 /* VK_EXT_border_color_swizzle */
518 .borderColorSwizzle = true,
519 .borderColorSwizzleFromImage = false,
520
521 /* VK_EXT_buffer_device_address */
522 .bufferDeviceAddressCaptureReplayEXT = true,
523
524 /* VK_EXT_color_write_enable */
525 .colorWriteEnable = true,
526
527 /* VK_EXT_conditional_rendering */
528 .conditionalRendering = true,
529 .inheritedConditionalRendering = true,
530
531 /* VK_EXT_custom_border_color */
532 .customBorderColors = true,
533 .customBorderColorWithoutFormat = true,
534
535 /* VK_EXT_depth_bias_control */
536 .depthBiasControl = true,
537 .leastRepresentableValueForceUnormRepresentation = true,
538 .floatRepresentation = false,
539 .depthBiasExact = true,
540
541 /* VK_EXT_depth_clamp_control */
542 .depthClampControl = true,
543
544 /* VK_EXT_depth_clamp_zero_one */
545 .depthClampZeroOne = true,
546
547 /* VK_EXT_depth_clip_control */
548 .depthClipControl = true,
549
550 /* VK_EXT_depth_clip_enable */
551 .depthClipEnable = true,
552
553 /* VK_EXT_descriptor_buffer */
554 .descriptorBuffer = true,
555 .descriptorBufferCaptureReplay = true,
556 .descriptorBufferImageLayoutIgnored = true,
557 .descriptorBufferPushDescriptors = true,
558
559 /* VK_EXT_device_generated_commands */
560 .deviceGeneratedCommands = true,
561 .dynamicGeneratedPipelineLayout = true,
562
563 /* VK_EXT_dynamic_rendering_unused_attachments */
564 .dynamicRenderingUnusedAttachments = true,
565
566 /* VK_EXT_extended_dynamic_state */
567 .extendedDynamicState = true,
568
569 /* VK_EXT_extended_dynamic_state2 */
570 .extendedDynamicState2 = true,
571 .extendedDynamicState2LogicOp = true,
572 .extendedDynamicState2PatchControlPoints = true,
573
574 /* VK_EXT_extended_dynamic_state3 */
575 .extendedDynamicState3TessellationDomainOrigin = true,
576 .extendedDynamicState3DepthClampEnable = true,
577 .extendedDynamicState3PolygonMode = true,
578 .extendedDynamicState3RasterizationSamples = true,
579 .extendedDynamicState3SampleMask = true,
580 .extendedDynamicState3AlphaToCoverageEnable = true,
581 .extendedDynamicState3AlphaToOneEnable = true,
582 .extendedDynamicState3LogicOpEnable = true,
583 .extendedDynamicState3ColorBlendEnable = true,
584 .extendedDynamicState3ColorBlendEquation = true,
585 .extendedDynamicState3ColorWriteMask = true,
586 .extendedDynamicState3RasterizationStream = true,
587 .extendedDynamicState3ConservativeRasterizationMode = false,
588 .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
589 .extendedDynamicState3DepthClipEnable = true,
590 .extendedDynamicState3SampleLocationsEnable = info->cls_eng3d >= MAXWELL_B,
591 .extendedDynamicState3ColorBlendAdvanced = false,
592 .extendedDynamicState3ProvokingVertexMode = true,
593 .extendedDynamicState3LineRasterizationMode = true,
594 .extendedDynamicState3LineStippleEnable = true,
595 .extendedDynamicState3DepthClipNegativeOneToOne = true,
596 .extendedDynamicState3ViewportWScalingEnable = false,
597 .extendedDynamicState3ViewportSwizzle = false,
598 .extendedDynamicState3CoverageToColorEnable = false,
599 .extendedDynamicState3CoverageToColorLocation = false,
600 .extendedDynamicState3CoverageModulationMode = false,
601 .extendedDynamicState3CoverageModulationTableEnable = false,
602 .extendedDynamicState3CoverageModulationTable = false,
603 .extendedDynamicState3CoverageReductionMode = false,
604 .extendedDynamicState3RepresentativeFragmentTestEnable = false,
605 .extendedDynamicState3ShadingRateImageEnable = false,
606
607 /* VK_EXT_graphics_pipeline_library */
608 .graphicsPipelineLibrary = true,
609
610 /* VK_EXT_image_2d_view_of_3d */
611 .image2DViewOf3D = true,
612 .sampler2DViewOf3D = true,
613
614 /* VK_EXT_image_sliced_view_of_3d */
615 .imageSlicedViewOf3D = true,
616
617 #ifdef NVK_USE_WSI_PLATFORM
618 /* VK_EXT_swapchain_maintenance1 */
619 .swapchainMaintenance1 = true,
620 #endif
621
622 /* VK_EXT_image_view_min_lod */
623 .minLod = true,
624
625 /* VK_EXT_legacy_vertex_attributes */
626 .legacyVertexAttributes = true,
627
628 /* VK_EXT_map_memory_placed */
629 .memoryMapPlaced = true,
630 .memoryMapRangePlaced = false,
631 .memoryUnmapReserve = true,
632
633 /* VK_EXT_multi_draw */
634 .multiDraw = true,
635
636 /* VK_EXT_mutable_descriptor_type */
637 .mutableDescriptorType = true,
638
639 /* VK_EXT_nested_command_buffer */
640 .nestedCommandBuffer = true,
641 .nestedCommandBufferRendering = true,
642 .nestedCommandBufferSimultaneousUse = true,
643
644 /* VK_EXT_non_seamless_cube_map */
645 .nonSeamlessCubeMap = true,
646
647 /* VK_EXT_primitive_topology_list_restart */
648 .primitiveTopologyListRestart = true,
649 .primitiveTopologyPatchListRestart = true,
650
651 /* VK_EXT_primitives_generated_query */
652 .primitivesGeneratedQuery = true,
653 .primitivesGeneratedQueryWithNonZeroStreams = true,
654 .primitivesGeneratedQueryWithRasterizerDiscard = true,
655
656 /* VK_EXT_provoking_vertex */
657 .provokingVertexLast = true,
658 .transformFeedbackPreservesProvokingVertex = true,
659
660 /* VK_EXT_robustness2 */
661 .robustBufferAccess2 = true,
662 .robustImageAccess2 = true,
663 .nullDescriptor = true,
664
665 /* VK_EXT_shader_image_atomic_int64 */
666 .shaderImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
667 nvk_use_nak(info),
668 .sparseImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
669 nvk_use_nak(info),
670
671 /* VK_EXT_shader_module_identifier */
672 .shaderModuleIdentifier = true,
673
674 /* VK_EXT_shader_object */
675 .shaderObject = true,
676
677 /* VK_EXT_shader_replicated_composites */
678 .shaderReplicatedComposites = true,
679
680 /* VK_EXT_texel_buffer_alignment */
681 .texelBufferAlignment = true,
682
683 /* VK_EXT_transform_feedback */
684 .transformFeedback = true,
685 .geometryStreams = true,
686
687 /* VK_EXT_vertex_input_dynamic_state */
688 .vertexInputDynamicState = true,
689
690 /* VK_EXT_ycbcr_2plane_444_formats */
691 .ycbcr2plane444Formats = true,
692
693 /* VK_EXT_ycbcr_image_arrays */
694 .ycbcrImageArrays = true,
695
696 /* VK_NV_shader_sm_builtins */
697 .shaderSMBuiltins = true,
698 };
699 }
700
701 static void
nvk_get_device_properties(const struct nvk_instance * instance,const struct nv_device_info * info,bool conformant,struct vk_properties * properties)702 nvk_get_device_properties(const struct nvk_instance *instance,
703 const struct nv_device_info *info,
704 bool conformant,
705 struct vk_properties *properties)
706 {
707 const VkSampleCountFlagBits sample_counts = VK_SAMPLE_COUNT_1_BIT |
708 VK_SAMPLE_COUNT_2_BIT |
709 VK_SAMPLE_COUNT_4_BIT |
710 VK_SAMPLE_COUNT_8_BIT;
711
712 assert(sample_counts <= (NVK_MAX_SAMPLES << 1) - 1);
713
714 uint64_t os_page_size = 4096;
715 os_get_page_size(&os_page_size);
716
717 *properties = (struct vk_properties) {
718 .apiVersion = nvk_get_vk_version(info),
719 .driverVersion = vk_get_driver_version(),
720 .vendorID = instance->force_vk_vendor != 0 ?
721 instance->force_vk_vendor : NVIDIA_VENDOR_ID,
722 .deviceID = info->device_id,
723 .deviceType = info->type == NV_DEVICE_TYPE_DIS ?
724 VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
725 VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
726
727 /* Vulkan 1.0 limits */
728 .maxImageDimension1D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_1D),
729 .maxImageDimension2D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_2D),
730 .maxImageDimension3D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_3D),
731 .maxImageDimensionCube = 0x8000,
732 .maxImageArrayLayers = 2048,
733 .maxTexelBufferElements = 128 * 1024 * 1024,
734 .maxUniformBufferRange = 65536,
735 .maxStorageBufferRange = UINT32_MAX,
736 .maxPushConstantsSize = NVK_MAX_PUSH_SIZE,
737 .maxMemoryAllocationCount = 4096,
738 .maxSamplerAllocationCount = 4000,
739 .bufferImageGranularity = info->cls_eng3d >= MAXWELL_B ? 0x400 : 0x10000,
740 .sparseAddressSpaceSize = NVK_SPARSE_ADDR_SPACE_SIZE,
741 .maxBoundDescriptorSets = NVK_MAX_SETS,
742 .maxPerStageDescriptorSamplers = NVK_MAX_DESCRIPTORS,
743 .maxPerStageDescriptorUniformBuffers = NVK_MAX_DESCRIPTORS,
744 .maxPerStageDescriptorStorageBuffers = NVK_MAX_DESCRIPTORS,
745 .maxPerStageDescriptorSampledImages = NVK_MAX_DESCRIPTORS,
746 .maxPerStageDescriptorStorageImages = NVK_MAX_DESCRIPTORS,
747 .maxPerStageDescriptorInputAttachments = NVK_MAX_DESCRIPTORS,
748 .maxPerStageResources = UINT32_MAX,
749 .maxDescriptorSetSamplers = NVK_MAX_DESCRIPTORS,
750 .maxDescriptorSetUniformBuffers = NVK_MAX_DESCRIPTORS,
751 .maxDescriptorSetUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
752 .maxDescriptorSetStorageBuffers = NVK_MAX_DESCRIPTORS,
753 .maxDescriptorSetStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
754 .maxDescriptorSetSampledImages = NVK_MAX_DESCRIPTORS,
755 .maxDescriptorSetStorageImages = NVK_MAX_DESCRIPTORS,
756 .maxDescriptorSetInputAttachments = NVK_MAX_DESCRIPTORS,
757 .maxVertexInputAttributes = 32,
758 .maxVertexInputBindings = 32,
759 .maxVertexInputAttributeOffset = 2047,
760 .maxVertexInputBindingStride = 2048,
761 .maxVertexOutputComponents = 128,
762 .maxTessellationGenerationLevel = 64,
763 .maxTessellationPatchSize = 32,
764 .maxTessellationControlPerVertexInputComponents = 128,
765 .maxTessellationControlPerVertexOutputComponents = 128,
766 .maxTessellationControlPerPatchOutputComponents = 120,
767 .maxTessellationControlTotalOutputComponents = 4216,
768 .maxTessellationEvaluationInputComponents = 128,
769 .maxTessellationEvaluationOutputComponents = 128,
770 .maxGeometryShaderInvocations = 32,
771 .maxGeometryInputComponents = 128,
772 .maxGeometryOutputComponents = 128,
773 .maxGeometryOutputVertices = 1024,
774 .maxGeometryTotalOutputComponents = 1024,
775 .maxFragmentInputComponents = 128,
776 .maxFragmentOutputAttachments = NVK_MAX_RTS,
777 .maxFragmentDualSrcAttachments = 1,
778 .maxFragmentCombinedOutputResources = 16,
779 .maxComputeSharedMemorySize = NVK_MAX_SHARED_SIZE,
780 .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
781 .maxComputeWorkGroupInvocations = 1024,
782 .maxComputeWorkGroupSize = {1024, 1024, 64},
783 .subPixelPrecisionBits = 8,
784 .subTexelPrecisionBits = 8,
785 .mipmapPrecisionBits = 8,
786 .maxDrawIndexedIndexValue = UINT32_MAX,
787 .maxDrawIndirectCount = UINT32_MAX,
788 .maxSamplerLodBias = 15,
789 .maxSamplerAnisotropy = 16,
790 .maxViewports = NVK_MAX_VIEWPORTS,
791 .maxViewportDimensions = { 32768, 32768 },
792 .viewportBoundsRange = { -65536, 65536 },
793 .viewportSubPixelBits = 8,
794 .minMemoryMapAlignment = os_page_size,
795 .minTexelBufferOffsetAlignment = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
796 .minUniformBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
797 .minStorageBufferOffsetAlignment = NVK_MIN_SSBO_ALIGNMENT,
798 .minTexelOffset = -8,
799 .maxTexelOffset = 7,
800 .minTexelGatherOffset = -32,
801 .maxTexelGatherOffset = 31,
802 .minInterpolationOffset = -0.5,
803 .maxInterpolationOffset = 0.4375,
804 .subPixelInterpolationOffsetBits = 4,
805 .maxFramebufferHeight = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
806 .maxFramebufferWidth = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
807 .maxFramebufferLayers = 2048,
808 .framebufferColorSampleCounts = sample_counts,
809 .framebufferDepthSampleCounts = sample_counts,
810 .framebufferNoAttachmentsSampleCounts = sample_counts,
811 .framebufferStencilSampleCounts = sample_counts,
812 .maxColorAttachments = NVK_MAX_RTS,
813 .sampledImageColorSampleCounts = sample_counts,
814 .sampledImageIntegerSampleCounts = sample_counts,
815 .sampledImageDepthSampleCounts = sample_counts,
816 .sampledImageStencilSampleCounts = sample_counts,
817 .storageImageSampleCounts = sample_counts,
818 .maxSampleMaskWords = 1,
819 .timestampComputeAndGraphics = true,
820 /* FIXME: Is timestamp period actually 1? */
821 .timestampPeriod = 1.0f,
822 .maxClipDistances = 8,
823 .maxCullDistances = 8,
824 .maxCombinedClipAndCullDistances = 8,
825 .discreteQueuePriorities = 2,
826 .pointSizeRange = { 1.0, 2047.94 },
827 .lineWidthRange = { 1, 64 },
828 .pointSizeGranularity = 0.0625,
829 .lineWidthGranularity = 0.0625,
830 .strictLines = true,
831 .standardSampleLocations = true,
832 .optimalBufferCopyOffsetAlignment = 1,
833 .optimalBufferCopyRowPitchAlignment = 1,
834 .nonCoherentAtomSize = 64,
835
836 /* Vulkan 1.0 sparse properties */
837 .sparseResidencyNonResidentStrict = true,
838 .sparseResidencyAlignedMipSize = info->cls_eng3d < MAXWELL_B, /* DXVK/vkd3d-proton requires this to be advertised as VK_FALSE for FL12 */
839 .sparseResidencyStandard2DBlockShape = true,
840 .sparseResidencyStandard2DMultisampleBlockShape = true,
841 .sparseResidencyStandard3DBlockShape = true,
842
843 /* Vulkan 1.1 properties */
844 .subgroupSize = 32,
845 .subgroupSupportedStages = nvk_nak_stages(info),
846 .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
847 VK_SUBGROUP_FEATURE_BALLOT_BIT |
848 VK_SUBGROUP_FEATURE_BASIC_BIT |
849 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
850 VK_SUBGROUP_FEATURE_QUAD_BIT |
851 VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
852 VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR |
853 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
854 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
855 VK_SUBGROUP_FEATURE_VOTE_BIT,
856 .subgroupQuadOperationsInAllStages = false,
857 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
858 .maxMultiviewViewCount = NVK_MAX_MULTIVIEW_VIEW_COUNT,
859 .maxMultiviewInstanceIndex = UINT32_MAX,
860 .maxPerSetDescriptors = UINT32_MAX,
861 .maxMemoryAllocationSize = (1u << 31),
862
863 /* Vulkan 1.2 properties */
864 .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
865 VK_RESOLVE_MODE_AVERAGE_BIT |
866 VK_RESOLVE_MODE_MIN_BIT |
867 VK_RESOLVE_MODE_MAX_BIT,
868 .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
869 VK_RESOLVE_MODE_MIN_BIT |
870 VK_RESOLVE_MODE_MAX_BIT,
871 .independentResolveNone = true,
872 .independentResolve = true,
873 .driverID = VK_DRIVER_ID_MESA_NVK,
874 .conformanceVersion =
875 conformant ? (VkConformanceVersion) { 1, 4, 0, 0 }
876 : (VkConformanceVersion) { 0, 0, 0, 0 },
877 .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
878 .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
879 .shaderSignedZeroInfNanPreserveFloat16 = true,
880 .shaderSignedZeroInfNanPreserveFloat32 = true,
881 .shaderSignedZeroInfNanPreserveFloat64 = true,
882 .shaderDenormPreserveFloat16 = true,
883 .shaderDenormPreserveFloat32 = true,
884 .shaderDenormPreserveFloat64 = true,
885 .shaderDenormFlushToZeroFloat16 = false,
886 .shaderDenormFlushToZeroFloat32 = true,
887 .shaderDenormFlushToZeroFloat64 = false,
888 .shaderRoundingModeRTEFloat16 = true,
889 .shaderRoundingModeRTEFloat32 = true,
890 .shaderRoundingModeRTEFloat64 = true,
891 .shaderRoundingModeRTZFloat16 = false,
892 .shaderRoundingModeRTZFloat32 = true,
893 .shaderRoundingModeRTZFloat64 = true,
894 .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
895 .shaderUniformBufferArrayNonUniformIndexingNative = false,
896 .shaderSampledImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
897 .shaderStorageBufferArrayNonUniformIndexingNative = true,
898 .shaderStorageImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
899 .shaderInputAttachmentArrayNonUniformIndexingNative = false,
900 .robustBufferAccessUpdateAfterBind = true,
901 .quadDivergentImplicitLod = info->cls_eng3d >= TURING_A,
902 .maxPerStageDescriptorUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
903 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
904 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
905 .maxPerStageDescriptorUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
906 .maxPerStageDescriptorUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
907 .maxPerStageDescriptorUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
908 .maxPerStageUpdateAfterBindResources = UINT32_MAX,
909 .maxDescriptorSetUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
910 .maxDescriptorSetUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
911 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
912 .maxDescriptorSetUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
913 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
914 .maxDescriptorSetUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
915 .maxDescriptorSetUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
916 .maxDescriptorSetUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
917 .filterMinmaxSingleComponentFormats = true,
918 .filterMinmaxImageComponentMapping = true,
919 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
920 .framebufferIntegerColorSampleCounts = sample_counts,
921
922 /* Vulkan 1.3 properties */
923 .minSubgroupSize = 32,
924 .maxSubgroupSize = 32,
925 .maxComputeWorkgroupSubgroups = 1024 / 32,
926 .requiredSubgroupSizeStages = 0,
927 .maxInlineUniformBlockSize = 1 << 16,
928 .maxPerStageDescriptorInlineUniformBlocks = 32,
929 .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
930 .maxDescriptorSetInlineUniformBlocks = 6 * 32,
931 .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
932 .maxInlineUniformTotalSize = 1 << 16,
933 .integerDotProduct4x8BitPackedUnsignedAccelerated
934 = info->cls_eng3d >= VOLTA_A,
935 .integerDotProduct4x8BitPackedSignedAccelerated
936 = info->cls_eng3d >= VOLTA_A,
937 .integerDotProduct4x8BitPackedMixedSignednessAccelerated
938 = info->cls_eng3d >= VOLTA_A,
939 .storageTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
940 .storageTexelBufferOffsetSingleTexelAlignment = true,
941 .uniformTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
942 .uniformTexelBufferOffsetSingleTexelAlignment = true,
943 .maxBufferSize = NVK_MAX_BUFFER_SIZE,
944
945 /* Vulkan 1.4 properties */
946 .lineSubPixelPrecisionBits = 8,
947 .maxVertexAttribDivisor = UINT32_MAX,
948 .supportsNonZeroFirstInstance = true,
949 .maxPushDescriptors = NVK_MAX_PUSH_DESCRIPTORS,
950 .dynamicRenderingLocalReadDepthStencilAttachments = true,
951 .dynamicRenderingLocalReadMultisampledAttachments = true,
952 .earlyFragmentMultisampleCoverageAfterSampleCounting = true,
953 .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
954 .depthStencilSwizzleOneSupport = true,
955 .polygonModePointSize = true,
956 .nonStrictSinglePixelWideLinesUseParallelogram = false,
957 .nonStrictWideLinesUseParallelogram = false,
958 .blockTexelViewCompatibleMultipleLayers = true,
959 .maxCombinedImageSamplerDescriptorCount = 3,
960 .fragmentShadingRateClampCombinerInputs = false, /* TODO */
961 .defaultRobustnessStorageBuffers =
962 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
963 .defaultRobustnessUniformBuffers =
964 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
965 .defaultRobustnessVertexInputs =
966 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT,
967 .defaultRobustnessImages =
968 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
969
970 /* VK_KHR_compute_shader_derivatives */
971 .meshAndTaskShaderDerivatives = false,
972
973 /* VK_EXT_conservative_rasterization */
974 .primitiveOverestimationSize = info->cls_eng3d >= VOLTA_A ? 1.0f / 512.0f : 0.0,
975 .maxExtraPrimitiveOverestimationSize = 0.75,
976 .extraPrimitiveOverestimationSizeGranularity = 0.25,
977 .primitiveUnderestimation = info->cls_eng3d >= VOLTA_A,
978 .conservativePointAndLineRasterization = true,
979 .degenerateLinesRasterized = info->cls_eng3d >= VOLTA_A,
980 .degenerateTrianglesRasterized = info->cls_eng3d >= PASCAL_A,
981 .fullyCoveredFragmentShaderInputVariable = false,
982 .conservativeRasterizationPostDepthCoverage = true,
983
984 /* VK_EXT_custom_border_color */
985 .maxCustomBorderColorSamplers = 4000,
986
987 /* VK_EXT_descriptor_buffer */
988 .combinedImageSamplerDescriptorSingleArray = true,
989 .bufferlessPushDescriptors = true,
990 .allowSamplerImageViewPostSubmitCreation = false,
991 .descriptorBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
992 .maxDescriptorBufferBindings = 32,
993 .maxResourceDescriptorBufferBindings = 32,
994 .maxSamplerDescriptorBufferBindings = 32,
995 .maxEmbeddedImmutableSamplerBindings = 32,
996 .maxEmbeddedImmutableSamplers = 4000,
997 .bufferCaptureReplayDescriptorDataSize = 0,
998 .imageCaptureReplayDescriptorDataSize = 0,
999 .imageViewCaptureReplayDescriptorDataSize =
1000 sizeof(struct nvk_image_view_capture),
1001 .samplerCaptureReplayDescriptorDataSize =
1002 sizeof(struct nvk_sampler_capture),
1003 .accelerationStructureCaptureReplayDescriptorDataSize = 0, // todo
1004 .samplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1005 .combinedImageSamplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1006 .sampledImageDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1007 .storageImageDescriptorSize = sizeof(struct nvk_storage_image_descriptor),
1008 .uniformTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1009 .robustUniformTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1010 .storageTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1011 .robustStorageTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1012 .uniformBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1013 .robustUniformBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1014 .storageBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1015 .robustStorageBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1016 .inputAttachmentDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1017 .accelerationStructureDescriptorSize = 0,
1018 .maxSamplerDescriptorBufferRange = UINT32_MAX,
1019 .maxResourceDescriptorBufferRange = UINT32_MAX,
1020 .samplerDescriptorBufferAddressSpaceSize = UINT32_MAX,
1021 .resourceDescriptorBufferAddressSpaceSize = UINT32_MAX,
1022 .descriptorBufferAddressSpaceSize = UINT32_MAX,
1023
1024 /* VK_EXT_device_generated_commands */
1025 .maxIndirectPipelineCount = UINT32_MAX,
1026 .maxIndirectShaderObjectCount = UINT32_MAX,
1027 .maxIndirectSequenceCount = 1 << 20,
1028 .maxIndirectCommandsTokenCount = 16,
1029 .maxIndirectCommandsTokenOffset = 2047,
1030 .maxIndirectCommandsIndirectStride = 1 << 12,
1031 .supportedIndirectCommandsInputModes =
1032 VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT |
1033 VK_INDIRECT_COMMANDS_INPUT_MODE_DXGI_INDEX_BUFFER_EXT,
1034 .supportedIndirectCommandsShaderStages =
1035 NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1036 .supportedIndirectCommandsShaderStagesPipelineBinding =
1037 NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1038 .supportedIndirectCommandsShaderStagesShaderBinding =
1039 NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1040 .deviceGeneratedCommandsTransformFeedback = true,
1041 .deviceGeneratedCommandsMultiDrawIndirectCount = info->cls_eng3d >= TURING_A,
1042
1043 /* VK_EXT_extended_dynamic_state3 */
1044 .dynamicPrimitiveTopologyUnrestricted = true,
1045
1046 /* VK_EXT_graphics_pipeline_library */
1047 .graphicsPipelineLibraryFastLinking = true,
1048 .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
1049
1050 /* VK_KHR_maintenance7 */
1051 .robustFragmentShadingRateAttachmentAccess = false,
1052 .separateDepthStencilAttachmentAccess = false,
1053 .maxDescriptorSetTotalUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1054 .maxDescriptorSetTotalStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1055 .maxDescriptorSetTotalBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS,
1056 .maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1057 .maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1058 .maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS,
1059
1060 /* VK_EXT_legacy_vertex_attributes */
1061 .nativeUnalignedPerformance = true,
1062
1063 /* VK_EXT_map_memory_placed */
1064 .minPlacedMemoryMapAlignment = os_page_size,
1065
1066 /* VK_EXT_multi_draw */
1067 .maxMultiDrawCount = UINT32_MAX,
1068
1069 /* VK_EXT_nested_command_buffer */
1070 .maxCommandBufferNestingLevel = UINT32_MAX,
1071
1072 /* VK_EXT_pci_bus_info */
1073 .pciDomain = info->pci.domain,
1074 .pciBus = info->pci.bus,
1075 .pciDevice = info->pci.dev,
1076 .pciFunction = info->pci.func,
1077
1078 /* VK_EXT_physical_device_drm gets populated later */
1079
1080 /* VK_EXT_provoking_vertex */
1081 .provokingVertexModePerPipeline = true,
1082 .transformFeedbackPreservesTriangleFanProvokingVertex = true,
1083
1084 /* VK_EXT_robustness2 */
1085 .robustStorageBufferAccessSizeAlignment = NVK_SSBO_BOUNDS_CHECK_ALIGNMENT,
1086 .robustUniformBufferAccessSizeAlignment = nvk_min_cbuf_alignment(info),
1087
1088 /* VK_EXT_sample_locations */
1089 .sampleLocationSampleCounts = sample_counts,
1090 .maxSampleLocationGridSize = (VkExtent2D){ 1, 1 },
1091 .sampleLocationCoordinateRange[0] = 0.0f,
1092 .sampleLocationCoordinateRange[1] = 0.9375f,
1093 .sampleLocationSubPixelBits = 4,
1094 .variableSampleLocations = true,
1095
1096 /* VK_EXT_shader_object */
1097 .shaderBinaryVersion = 0,
1098
1099 /* VK_EXT_transform_feedback */
1100 .maxTransformFeedbackStreams = 4,
1101 .maxTransformFeedbackBuffers = 4,
1102 .maxTransformFeedbackBufferSize = UINT32_MAX,
1103 .maxTransformFeedbackStreamDataSize = 2048,
1104 .maxTransformFeedbackBufferDataSize = 512,
1105 .maxTransformFeedbackBufferDataStride = 2048,
1106 .transformFeedbackQueries = true,
1107 .transformFeedbackStreamsLinesTriangles = false,
1108 .transformFeedbackRasterizationStreamSelect = true,
1109 .transformFeedbackDraw = true,
1110
1111 /* VK_KHR_fragment_shader_barycentric */
1112 .triStripVertexOrderIndependentOfProvokingVertex = false,
1113
1114 /* VK_KHR_fragment_shading_rate */
1115 .minFragmentShadingRateAttachmentTexelSize = { 16, 16 },
1116 .maxFragmentShadingRateAttachmentTexelSize = { 16, 16 },
1117 .maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1,
1118 .primitiveFragmentShadingRateWithMultipleViewports = info->cls_eng3d >= AMPERE_B,
1119 .layeredShadingRateAttachments = true,
1120 .fragmentShadingRateNonTrivialCombinerOps = true,
1121 .maxFragmentSize = { 4, 4 },
1122 .maxFragmentSizeAspectRatio = 2,
1123 .maxFragmentShadingRateCoverageSamples = 16,
1124 .maxFragmentShadingRateRasterizationSamples = 16,
1125 .fragmentShadingRateWithShaderDepthStencilWrites = true,
1126 .fragmentShadingRateWithSampleMask = true,
1127 .fragmentShadingRateWithShaderSampleMask = true,
1128 .fragmentShadingRateWithConservativeRasterization = true,
1129 //.fragmentShadingRateWithFragmentShaderInterlock = true,
1130 .fragmentShadingRateWithCustomSampleLocations = true,
1131 .fragmentShadingRateStrictMultiplyCombiner = true,
1132
1133 /* VK_NV_shader_sm_builtins */
1134 .shaderSMCount = (uint32_t)info->tpc_count * info->mp_per_tpc,
1135 .shaderWarpsPerSM = info->max_warps_per_mp,
1136 };
1137
1138 /* Add the driver to the device name (like other Mesa drivers do) */
1139 if (!strcmp(info->device_name, info->chipset_name)) {
1140 snprintf(properties->deviceName, sizeof(properties->deviceName),
1141 "NVK %s", info->device_name);
1142 } else {
1143 snprintf(properties->deviceName, sizeof(properties->deviceName),
1144 "%s (NVK %s)", info->device_name, info->chipset_name);
1145 }
1146
1147 /* VK_EXT_host_image_copy */
1148
1149 /* Not sure if there are layout specific things, so for now just reporting
1150 * all layouts from extensions.
1151 */
1152 static const VkImageLayout supported_layouts[] = {
1153 VK_IMAGE_LAYOUT_GENERAL, /* this one is required by spec */
1154 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1155 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1156 VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1157 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1158 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1159 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1160 VK_IMAGE_LAYOUT_PREINITIALIZED,
1161 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1162 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1163 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1164 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1165 VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1166 VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1167 VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1168 VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1169 VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
1170 VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1171 };
1172
1173 properties->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
1174 properties->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1175 properties->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
1176 properties->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1177
1178 STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
1179 memcpy(properties->optimalTilingLayoutUUID,
1180 instance->driver_build_sha, VK_UUID_SIZE);
1181
1182 properties->identicalMemoryTypeRequirements = false;
1183
1184 /* VK_EXT_shader_module_identifier */
1185 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1186 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1187 memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
1188 vk_shaderModuleIdentifierAlgorithmUUID,
1189 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1190
1191 nv_device_uuid(info, properties->deviceUUID, VK_UUID_SIZE, true);
1192 STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
1193 memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
1194
1195 snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "NVK");
1196 snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1197 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1198 }
1199
1200 static void
nvk_physical_device_init_pipeline_cache(struct nvk_physical_device * pdev)1201 nvk_physical_device_init_pipeline_cache(struct nvk_physical_device *pdev)
1202 {
1203 struct nvk_instance *instance = nvk_physical_device_instance(pdev);
1204
1205 struct mesa_sha1 sha_ctx;
1206 _mesa_sha1_init(&sha_ctx);
1207
1208 _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
1209 sizeof(instance->driver_build_sha));
1210
1211 const uint64_t compiler_flags = nvk_physical_device_compiler_flags(pdev);
1212 _mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
1213
1214 unsigned char sha[SHA1_DIGEST_LENGTH];
1215 _mesa_sha1_final(&sha_ctx, sha);
1216
1217 STATIC_ASSERT(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
1218 memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
1219 memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
1220
1221 #ifdef ENABLE_SHADER_CACHE
1222 char renderer[10];
1223 ASSERTED int len = snprintf(renderer, sizeof(renderer), "nvk_%04x",
1224 pdev->info.chipset);
1225 assert(len == sizeof(renderer) - 2);
1226
1227 char timestamp[41];
1228 _mesa_sha1_format(timestamp, instance->driver_build_sha);
1229
1230 const uint64_t driver_flags = nvk_physical_device_compiler_flags(pdev);
1231 pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1232 #endif
1233 }
1234
1235 static void
nvk_physical_device_free_disk_cache(struct nvk_physical_device * pdev)1236 nvk_physical_device_free_disk_cache(struct nvk_physical_device *pdev)
1237 {
1238 #ifdef ENABLE_SHADER_CACHE
1239 if (pdev->vk.disk_cache) {
1240 disk_cache_destroy(pdev->vk.disk_cache);
1241 pdev->vk.disk_cache = NULL;
1242 }
1243 #else
1244 assert(pdev->vk.disk_cache == NULL);
1245 #endif
1246 }
1247
1248 static uint64_t
nvk_get_sysmem_heap_size(void)1249 nvk_get_sysmem_heap_size(void)
1250 {
1251 uint64_t sysmem_size_B = 0;
1252 if (!os_get_total_physical_memory(&sysmem_size_B))
1253 return 0;
1254
1255 /* Use 3/4 of total size to avoid swapping */
1256 return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1257 }
1258
1259 static uint64_t
nvk_get_sysmem_heap_available(struct nvk_physical_device * pdev)1260 nvk_get_sysmem_heap_available(struct nvk_physical_device *pdev)
1261 {
1262 uint64_t sysmem_size_B = 0;
1263 if (!os_get_available_system_memory(&sysmem_size_B)) {
1264 vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
1265 return 0;
1266 }
1267
1268 /* Use 3/4 of available to avoid swapping */
1269 return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1270 }
1271
1272 static uint64_t
nvk_get_vram_heap_available(struct nvk_physical_device * pdev)1273 nvk_get_vram_heap_available(struct nvk_physical_device *pdev)
1274 {
1275 const uint64_t used = nvkmd_pdev_get_vram_used(pdev->nvkmd);
1276 if (used > pdev->info.vram_size_B)
1277 return 0;
1278
1279 return pdev->info.vram_size_B - used;
1280 }
1281
1282 VkResult
nvk_create_drm_physical_device(struct vk_instance * _instance,struct _drmDevice * drm_device,struct vk_physical_device ** pdev_out)1283 nvk_create_drm_physical_device(struct vk_instance *_instance,
1284 struct _drmDevice *drm_device,
1285 struct vk_physical_device **pdev_out)
1286 {
1287 struct nvk_instance *instance = (struct nvk_instance *)_instance;
1288 VkResult result;
1289
1290 struct nvkmd_pdev *nvkmd;
1291 result = nvkmd_try_create_pdev_for_drm(drm_device, &instance->vk.base,
1292 instance->debug_flags, &nvkmd);
1293 if (result != VK_SUCCESS)
1294 return result;
1295
1296 /* We don't support anything pre-Kepler */
1297 if (nvkmd->dev_info.cls_eng3d < KEPLER_A) {
1298 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1299 goto fail_nvkmd;
1300 }
1301
1302 bool conformant =
1303 nvkmd->dev_info.type == NV_DEVICE_TYPE_DIS &&
1304 nvkmd->dev_info.cls_eng3d >= TURING_A &&
1305 nvkmd->dev_info.cls_eng3d <= ADA_A;
1306
1307 if (!conformant &&
1308 !debug_get_bool_option("NVK_I_WANT_A_BROKEN_VULKAN_DRIVER", false)) {
1309 #ifdef NDEBUG
1310 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1311 #else
1312 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1313 "WARNING: NVK is not well-tested on %s, pass "
1314 "NVK_I_WANT_A_BROKEN_VULKAN_DRIVER=1 "
1315 "if you know what you're doing.",
1316 nvkmd->dev_info.device_name);
1317 #endif
1318 goto fail_nvkmd;
1319 }
1320
1321 if (!conformant)
1322 vk_warn_non_conformant_implementation("NVK");
1323
1324 struct nvk_physical_device *pdev =
1325 vk_zalloc(&instance->vk.alloc, sizeof(*pdev),
1326 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1327
1328 if (pdev == NULL) {
1329 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1330 goto fail_nvkmd;
1331 }
1332
1333 struct vk_physical_device_dispatch_table dispatch_table;
1334 vk_physical_device_dispatch_table_from_entrypoints(
1335 &dispatch_table, &nvk_physical_device_entrypoints, true);
1336 vk_physical_device_dispatch_table_from_entrypoints(
1337 &dispatch_table, &wsi_physical_device_entrypoints, false);
1338
1339 struct vk_device_extension_table supported_extensions;
1340 nvk_get_device_extensions(instance, &nvkmd->dev_info,
1341 nvkmd->kmd_info.has_alloc_tiled,
1342 &supported_extensions);
1343
1344 struct vk_features supported_features;
1345 nvk_get_device_features(&nvkmd->dev_info, &supported_extensions,
1346 &supported_features);
1347
1348 struct vk_properties properties;
1349 nvk_get_device_properties(instance, &nvkmd->dev_info, conformant,
1350 &properties);
1351
1352 if (nvkmd->drm.render_dev) {
1353 properties.drmHasRender = true;
1354 properties.drmRenderMajor = major(nvkmd->drm.render_dev);
1355 properties.drmRenderMinor = minor(nvkmd->drm.render_dev);
1356 }
1357
1358 if (nvkmd->drm.primary_dev) {
1359 properties.drmHasPrimary = true;
1360 properties.drmPrimaryMajor = major(nvkmd->drm.primary_dev);
1361 properties.drmPrimaryMinor = minor(nvkmd->drm.primary_dev);
1362 }
1363
1364 result = vk_physical_device_init(&pdev->vk, &instance->vk,
1365 &supported_extensions,
1366 &supported_features,
1367 &properties,
1368 &dispatch_table);
1369 if (result != VK_SUCCESS)
1370 goto fail_alloc;
1371
1372 pdev->nvkmd = nvkmd;
1373 pdev->info = nvkmd->dev_info;
1374 pdev->debug_flags = instance->debug_flags;
1375
1376 pdev->nak = nak_compiler_create(&pdev->info);
1377 if (pdev->nak == NULL) {
1378 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1379 goto fail_init;
1380 }
1381
1382 nvk_physical_device_init_pipeline_cache(pdev);
1383
1384 uint64_t sysmem_size_B = nvk_get_sysmem_heap_size();
1385 if (sysmem_size_B == 0) {
1386 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1387 "Failed to query total system memory");
1388 goto fail_disk_cache;
1389 }
1390
1391 if (pdev->info.vram_size_B > 0) {
1392 uint32_t vram_heap_idx = pdev->mem_heap_count++;
1393 uint32_t bar_heap_idx = vram_heap_idx;
1394 pdev->mem_heaps[vram_heap_idx] = (struct nvk_memory_heap) {
1395 .size = pdev->info.vram_size_B,
1396 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1397 };
1398
1399 if (pdev->info.bar_size_B > 0 &&
1400 pdev->info.bar_size_B < pdev->info.vram_size_B) {
1401 bar_heap_idx = pdev->mem_heap_count++;
1402 pdev->mem_heaps[bar_heap_idx] = (struct nvk_memory_heap) {
1403 .size = pdev->info.bar_size_B,
1404 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1405 };
1406 }
1407
1408 /* Only set available if we have the ioctl. */
1409 if (nvkmd->kmd_info.has_get_vram_used)
1410 pdev->mem_heaps[vram_heap_idx].available = nvk_get_vram_heap_available;
1411
1412 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1413 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1414 .heapIndex = vram_heap_idx,
1415 };
1416
1417 if (pdev->info.cls_eng3d >= MAXWELL_A) {
1418 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1419 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1420 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1421 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1422 .heapIndex = bar_heap_idx,
1423 };
1424 }
1425 }
1426
1427 uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
1428 pdev->mem_heaps[sysmem_heap_idx] = (struct nvk_memory_heap) {
1429 .size = sysmem_size_B,
1430 /* If we don't have any VRAM (iGPU), claim sysmem as DEVICE_LOCAL */
1431 .flags = pdev->info.vram_size_B == 0
1432 ? VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
1433 : 0,
1434 .available = nvk_get_sysmem_heap_available,
1435 };
1436
1437 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1438 /* TODO: What's the right thing to do here on Tegra? */
1439 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1440 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1441 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1442 .heapIndex = sysmem_heap_idx,
1443 };
1444
1445 assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
1446 assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
1447
1448 pdev->queue_families[pdev->queue_family_count++] = (struct nvk_queue_family) {
1449 .queue_flags = VK_QUEUE_GRAPHICS_BIT |
1450 VK_QUEUE_COMPUTE_BIT |
1451 VK_QUEUE_TRANSFER_BIT |
1452 VK_QUEUE_SPARSE_BINDING_BIT,
1453 .queue_count = 1,
1454 };
1455 assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
1456
1457 pdev->vk.supported_sync_types = nvkmd->sync_types;
1458
1459 #ifdef NVK_USE_WSI_PLATFORM
1460 result = nvk_init_wsi(pdev);
1461 if (result != VK_SUCCESS)
1462 goto fail_disk_cache;
1463 #endif
1464
1465 *pdev_out = &pdev->vk;
1466
1467 return VK_SUCCESS;
1468
1469 fail_disk_cache:
1470 nvk_physical_device_free_disk_cache(pdev);
1471 nak_compiler_destroy(pdev->nak);
1472 fail_init:
1473 vk_physical_device_finish(&pdev->vk);
1474 fail_alloc:
1475 vk_free(&instance->vk.alloc, pdev);
1476 fail_nvkmd:
1477 nvkmd_pdev_destroy(nvkmd);
1478 return result;
1479 }
1480
1481 void
nvk_physical_device_destroy(struct vk_physical_device * vk_pdev)1482 nvk_physical_device_destroy(struct vk_physical_device *vk_pdev)
1483 {
1484 struct nvk_physical_device *pdev =
1485 container_of(vk_pdev, struct nvk_physical_device, vk);
1486
1487 #ifdef NVK_USE_WSI_PLATFORM
1488 nvk_finish_wsi(pdev);
1489 #endif
1490 nvk_physical_device_free_disk_cache(pdev);
1491 nak_compiler_destroy(pdev->nak);
1492 nvkmd_pdev_destroy(pdev->nvkmd);
1493 vk_physical_device_finish(&pdev->vk);
1494 vk_free(&pdev->vk.instance->alloc, pdev);
1495 }
1496
1497 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1498 nvk_GetPhysicalDeviceMemoryProperties2(
1499 VkPhysicalDevice physicalDevice,
1500 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1501 {
1502 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1503
1504 pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
1505 for (int i = 0; i < pdev->mem_heap_count; i++) {
1506 pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap) {
1507 .size = pdev->mem_heaps[i].size,
1508 .flags = pdev->mem_heaps[i].flags,
1509 };
1510 }
1511
1512 pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
1513 for (int i = 0; i < pdev->mem_type_count; i++) {
1514 pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
1515 }
1516
1517 vk_foreach_struct(ext, pMemoryProperties->pNext)
1518 {
1519 switch (ext->sType) {
1520 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1521 VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
1522
1523 for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1524 const struct nvk_memory_heap *heap = &pdev->mem_heaps[i];
1525 uint64_t used = p_atomic_read(&heap->used);
1526
1527 /* From the Vulkan 1.3.278 spec:
1528 *
1529 * "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1530 * values in which memory usages are returned, with one element
1531 * for each memory heap. A heap’s usage is an estimate of how
1532 * much memory the process is currently using in that heap."
1533 *
1534 * TODO: Include internal allocations?
1535 */
1536 p->heapUsage[i] = used;
1537
1538 uint64_t available = heap->size;
1539 if (heap->available)
1540 available = heap->available(pdev);
1541
1542 /* From the Vulkan 1.3.278 spec:
1543 *
1544 * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1545 * values in which memory budgets are returned, with one
1546 * element for each memory heap. A heap’s budget is a rough
1547 * estimate of how much memory the process can allocate from
1548 * that heap before allocations may fail or cause performance
1549 * degradation. The budget includes any currently allocated
1550 * device memory."
1551 *
1552 * and
1553 *
1554 * "The heapBudget value must be less than or equal to
1555 * VkMemoryHeap::size for each heap."
1556 *
1557 * available (queried above) is the total amount free memory
1558 * system-wide and does not include our allocations so we need
1559 * to add that in.
1560 */
1561 uint64_t budget = MIN2(available + used, heap->size);
1562
1563 /* Set the budget at 90% of available to avoid thrashing */
1564 p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
1565 }
1566
1567 /* From the Vulkan 1.3.278 spec:
1568 *
1569 * "The heapBudget and heapUsage values must be zero for array
1570 * elements greater than or equal to
1571 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
1572 * heapBudget value must be non-zero for array elements less than
1573 * VkPhysicalDeviceMemoryProperties::memoryHeapCount."
1574 */
1575 for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
1576 p->heapBudget[i] = 0u;
1577 p->heapUsage[i] = 0u;
1578 }
1579 break;
1580 }
1581 default:
1582 vk_debug_ignored_stype(ext->sType);
1583 break;
1584 }
1585 }
1586 }
1587
1588 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1589 nvk_GetPhysicalDeviceQueueFamilyProperties2(
1590 VkPhysicalDevice physicalDevice,
1591 uint32_t *pQueueFamilyPropertyCount,
1592 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1593 {
1594 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1595 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1596 pQueueFamilyPropertyCount);
1597
1598 for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
1599 const struct nvk_queue_family *queue_family = &pdev->queue_families[i];
1600
1601 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
1602 p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
1603 p->queueFamilyProperties.queueCount = queue_family->queue_count;
1604 p->queueFamilyProperties.timestampValidBits = 64;
1605 p->queueFamilyProperties.minImageTransferGranularity =
1606 (VkExtent3D){1, 1, 1};
1607
1608 vk_foreach_struct(ext, p->pNext) {
1609 switch (ext->sType) {
1610 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES: {
1611 VkQueueFamilyGlobalPriorityProperties *p = (void *)ext;
1612 p->priorityCount = 1;
1613 p->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM;
1614 break;
1615 }
1616
1617 default:
1618 vk_debug_ignored_stype(ext->sType);
1619 break;
1620 }
1621 }
1622 }
1623 }
1624 }
1625
1626 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)1627 nvk_GetPhysicalDeviceMultisamplePropertiesEXT(
1628 VkPhysicalDevice physicalDevice,
1629 VkSampleCountFlagBits samples,
1630 VkMultisamplePropertiesEXT *pMultisampleProperties)
1631 {
1632 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1633
1634 if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
1635 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
1636 } else {
1637 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
1638 }
1639 }
1640
1641 VkExtent2D
nvk_max_shading_rate(const struct nvk_physical_device * pdev,VkSampleCountFlagBits samples)1642 nvk_max_shading_rate(const struct nvk_physical_device *pdev,
1643 VkSampleCountFlagBits samples)
1644 {
1645 const struct nil_Extent4D_Samples px_extent_sa =
1646 nil_px_extent_sa(nil_choose_sample_layout(samples));
1647
1648 assert(px_extent_sa.width <= 4);
1649 assert(px_extent_sa.height <= 4);
1650 assert(px_extent_sa.depth == 1);
1651 assert(px_extent_sa.array_len == 1);
1652
1653 return (VkExtent2D) {
1654 .width = 4 / px_extent_sa.width,
1655 .height = 4 / px_extent_sa.height,
1656 };
1657 }
1658
1659 VKAPI_ATTR VkResult VKAPI_CALL
nvk_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)1660 nvk_GetPhysicalDeviceFragmentShadingRatesKHR(
1661 VkPhysicalDevice physicalDevice,
1662 uint32_t *pFragmentShadingRateCount,
1663 VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
1664 {
1665 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1666 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
1667 pFragmentShadingRates, pFragmentShadingRateCount);
1668
1669
1670 /* From the Vulkan 1.3.297 spec:
1671 *
1672 * "The returned array of fragment shading rates must be ordered from
1673 * largest fragmentSize.width value to smallest, and each set of
1674 * fragment shading rates with the same fragmentSize.width value must be
1675 * ordered from largest fragmentSize.height to smallest. Any two entries
1676 * in the array must not have the same fragmentSize values."
1677 */
1678 VkExtent2D shading_rates[] = {
1679 { 4, 4 },
1680 { 4, 2 },
1681 { 2, 4 },
1682 { 2, 2 },
1683 { 2, 1 },
1684 { 1, 2 },
1685 { 1, 1 },
1686 };
1687
1688 for (uint32_t i = 0; i < ARRAY_SIZE(shading_rates); i++) {
1689 vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, p) {
1690 p->fragmentSize = shading_rates[i];
1691 if (shading_rates[i].width == 1 && shading_rates[i].height == 1) {
1692 /* The Vulkan spec requires us to set ~0 for 1x1. */
1693 p->sampleCounts = ~0;
1694 } else {
1695 for (uint32_t samples = 1; samples <= 16; samples <<= 1) {
1696 VkExtent2D max_rate = nvk_max_shading_rate(pdev, samples);
1697 if (shading_rates[i].width > max_rate.width ||
1698 shading_rates[i].height > max_rate.height)
1699 break;
1700
1701 p->sampleCounts |= samples;
1702 }
1703 }
1704 }
1705 }
1706
1707 return vk_outarray_status(&out);
1708 }
1709