1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_physical_device.h"
6
7 #include "nak.h"
8 #include "nvk_buffer.h"
9 #include "nvk_descriptor_types.h"
10 #include "nvk_entrypoints.h"
11 #include "nvk_format.h"
12 #include "nvk_image.h"
13 #include "nvk_image_view.h"
14 #include "nvk_instance.h"
15 #include "nvk_sampler.h"
16 #include "nvk_shader.h"
17 #include "nvk_wsi.h"
18 #include "nvkmd/nvkmd.h"
19 #include "nvkmd/nouveau/nvkmd_nouveau.h"
20 #include "git_sha1.h"
21 #include "util/detect_os.h"
22 #include "util/disk_cache.h"
23 #include "util/mesa-sha1.h"
24
25 #if DETECT_OS_ANDROID
26 #include <vulkan/vk_android_native_buffer.h>
27 #include "util/u_gralloc/u_gralloc.h"
28 #endif
29
30 #include "vk_android.h"
31 #include "vk_device.h"
32 #include "vk_drm_syncobj.h"
33 #include "vk_shader_module.h"
34 #include "vulkan/wsi/wsi_common.h"
35
36 #include <sys/sysmacros.h>
37
38 #include "nv_push.h"
39 #include "cl90c0.h"
40 #include "cl91c0.h"
41 #include "cla097.h"
42 #include "cla0c0.h"
43 #include "cla1c0.h"
44 #include "clb097.h"
45 #include "clb0c0.h"
46 #include "clb197.h"
47 #include "clb1c0.h"
48 #include "clc097.h"
49 #include "clc0c0.h"
50 #include "clc1c0.h"
51 #include "clc397.h"
52 #include "clc3c0.h"
53 #include "clc597.h"
54 #include "clc5c0.h"
55 #include "clc797.h"
56 #include "clc997.h"
57
58 static bool
nvk_use_nak(const struct nv_device_info * info)59 nvk_use_nak(const struct nv_device_info *info)
60 {
61 const VkShaderStageFlags vk10_stages =
62 VK_SHADER_STAGE_VERTEX_BIT |
63 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
64 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
65 VK_SHADER_STAGE_GEOMETRY_BIT |
66 VK_SHADER_STAGE_FRAGMENT_BIT |
67 VK_SHADER_STAGE_COMPUTE_BIT;
68
69 return !(vk10_stages & ~nvk_nak_stages(info));
70 }
71
72 static uint32_t
nvk_get_vk_version(const struct nv_device_info * info)73 nvk_get_vk_version(const struct nv_device_info *info)
74 {
75 /* Version override takes priority */
76 const uint32_t version_override = vk_get_version_override();
77 if (version_override)
78 return version_override;
79
80 /* If we're using codegen for anything, lock to version 1.0 */
81 if (!nvk_use_nak(info))
82 return VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
83
84 #if defined(ANDROID_STRICT) && ANDROID_API_LEVEL <= 32
85 return VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION);
86 #endif
87
88 /* Vulkan 1.4 requires hostImageCopy which is currently only supported on
89 * Turing+.
90 */
91 if (info->cls_eng3d < TURING_A)
92 return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
93
94 return VK_MAKE_VERSION(1, 4, VK_HEADER_VERSION);
95 }
96
97 static void
nvk_get_device_extensions(const struct nvk_instance * instance,const struct nv_device_info * info,bool has_tiled_bos,struct vk_device_extension_table * ext)98 nvk_get_device_extensions(const struct nvk_instance *instance,
99 const struct nv_device_info *info,
100 bool has_tiled_bos,
101 struct vk_device_extension_table *ext)
102 {
103 *ext = (struct vk_device_extension_table) {
104 .KHR_8bit_storage = true,
105 .KHR_16bit_storage = true,
106 .KHR_bind_memory2 = true,
107 .KHR_buffer_device_address = true,
108 .KHR_calibrated_timestamps = true,
109 .KHR_compute_shader_derivatives = nvk_use_nak(info),
110 .KHR_copy_commands2 = true,
111 .KHR_create_renderpass2 = true,
112 .KHR_dedicated_allocation = true,
113 .KHR_depth_stencil_resolve = true,
114 .KHR_descriptor_update_template = true,
115 .KHR_device_group = true,
116 .KHR_draw_indirect_count = info->cls_eng3d >= TURING_A,
117 .KHR_driver_properties = true,
118 .KHR_dynamic_rendering = true,
119 .KHR_dynamic_rendering_local_read = true,
120 .KHR_external_fence = true,
121 .KHR_external_fence_fd = true,
122 .KHR_external_memory = true,
123 .KHR_external_memory_fd = true,
124 .KHR_external_semaphore = true,
125 .KHR_external_semaphore_fd = true,
126 .KHR_format_feature_flags2 = true,
127 .KHR_fragment_shader_barycentric = info->cls_eng3d >= TURING_A &&
128 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
129 .KHR_fragment_shading_rate = info->cls_eng3d >= TURING_A,
130 .KHR_get_memory_requirements2 = true,
131 .KHR_global_priority = true,
132 .KHR_image_format_list = true,
133 .KHR_imageless_framebuffer = true,
134 #ifdef NVK_USE_WSI_PLATFORM
135 .KHR_incremental_present = true,
136 #endif
137 .KHR_index_type_uint8 = true,
138 .KHR_line_rasterization = true,
139 .KHR_load_store_op_none = true,
140 .KHR_maintenance1 = true,
141 .KHR_maintenance2 = true,
142 .KHR_maintenance3 = true,
143 .KHR_maintenance4 = true,
144 .KHR_maintenance5 = true,
145 .KHR_maintenance6 = true,
146 .KHR_maintenance7 = true,
147 .KHR_map_memory2 = true,
148 .KHR_multiview = true,
149 .KHR_pipeline_executable_properties = true,
150 .KHR_pipeline_library = true,
151 #ifdef NVK_USE_WSI_PLATFORM
152 /* Hide these behind dri configs for now since we cannot implement it
153 * reliably on all surfaces yet. There is no surface capability query
154 * for present wait/id, but the feature is useful enough to hide behind
155 * an opt-in mechanism for now. If the instance only enables surface
156 * extensions that unconditionally support present wait, we can also
157 * expose the extension that way.
158 */
159 .KHR_present_id = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
160 wsi_common_vk_instance_supports_present_wait(&instance->vk),
161 .KHR_present_wait = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
162 wsi_common_vk_instance_supports_present_wait(&instance->vk),
163 #endif
164 .KHR_push_descriptor = true,
165 .KHR_relaxed_block_layout = true,
166 .KHR_sampler_mirror_clamp_to_edge = true,
167 .KHR_sampler_ycbcr_conversion = true,
168 .KHR_separate_depth_stencil_layouts = true,
169 .KHR_shader_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
170 nvk_use_nak(info),
171 .KHR_shader_clock = true,
172 .KHR_shader_draw_parameters = true,
173 .KHR_shader_expect_assume = true,
174 .KHR_shader_float_controls = true,
175 .KHR_shader_float_controls2 = true,
176 .KHR_shader_float16_int8 = true,
177 .KHR_shader_integer_dot_product = true,
178 .KHR_shader_maximal_reconvergence = true,
179 .KHR_shader_non_semantic_info = true,
180 .KHR_shader_quad_control = true,
181 .KHR_shader_relaxed_extended_instruction = true,
182 .KHR_shader_subgroup_extended_types = true,
183 .KHR_shader_subgroup_rotate = nvk_use_nak(info),
184 .KHR_shader_subgroup_uniform_control_flow = nvk_use_nak(info),
185 .KHR_shader_terminate_invocation =
186 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
187 .KHR_spirv_1_4 = true,
188 .KHR_storage_buffer_storage_class = true,
189 .KHR_timeline_semaphore = true,
190 #ifdef NVK_USE_WSI_PLATFORM
191 .KHR_swapchain = true,
192 .KHR_swapchain_mutable_format = true,
193 #endif
194 .KHR_synchronization2 = true,
195 .KHR_uniform_buffer_standard_layout = true,
196 .KHR_variable_pointers = true,
197 .KHR_vertex_attribute_divisor = true,
198 .KHR_vulkan_memory_model = nvk_use_nak(info),
199 .KHR_workgroup_memory_explicit_layout = true,
200 .KHR_zero_initialize_workgroup_memory = true,
201 .EXT_4444_formats = true,
202 .EXT_attachment_feedback_loop_layout = true,
203 .EXT_border_color_swizzle = true,
204 .EXT_buffer_device_address = true,
205 .EXT_calibrated_timestamps = true,
206 .EXT_conditional_rendering = true,
207 .EXT_conservative_rasterization = info->cls_eng3d >= MAXWELL_B,
208 .EXT_color_write_enable = true,
209 .EXT_custom_border_color = true,
210 .EXT_depth_bias_control = true,
211 .EXT_depth_clamp_control = true,
212 .EXT_depth_clamp_zero_one = true,
213 .EXT_depth_clip_control = true,
214 .EXT_depth_clip_enable = true,
215 .EXT_depth_range_unrestricted = info->cls_eng3d >= VOLTA_A,
216 .EXT_descriptor_buffer = true,
217 .EXT_descriptor_indexing = true,
218 .EXT_device_generated_commands = true,
219 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
220 .EXT_display_control = true,
221 #endif
222 .EXT_image_drm_format_modifier = has_tiled_bos,
223 .EXT_dynamic_rendering_unused_attachments = true,
224 .EXT_extended_dynamic_state = true,
225 .EXT_extended_dynamic_state2 = true,
226 .EXT_extended_dynamic_state3 = true,
227 .EXT_external_memory_dma_buf = true,
228 .EXT_global_priority = true,
229 .EXT_global_priority_query = true,
230 .EXT_graphics_pipeline_library = true,
231 .EXT_host_query_reset = true,
232 .EXT_host_image_copy = info->cls_eng3d >= TURING_A,
233 .EXT_image_2d_view_of_3d = true,
234 .EXT_image_robustness = true,
235 .EXT_image_sliced_view_of_3d = true,
236 .EXT_image_view_min_lod = true,
237 .EXT_index_type_uint8 = true,
238 .EXT_inline_uniform_block = true,
239 .EXT_legacy_vertex_attributes = true,
240 .EXT_line_rasterization = true,
241 .EXT_load_store_op_none = true,
242 .EXT_map_memory_placed = true,
243 .EXT_memory_budget = true,
244 .EXT_multi_draw = true,
245 .EXT_mutable_descriptor_type = true,
246 .EXT_nested_command_buffer = true,
247 .EXT_non_seamless_cube_map = true,
248 .EXT_pci_bus_info = info->type == NV_DEVICE_TYPE_DIS,
249 .EXT_pipeline_creation_cache_control = true,
250 .EXT_pipeline_creation_feedback = true,
251 .EXT_pipeline_robustness = true,
252 .EXT_physical_device_drm = true,
253 .EXT_post_depth_coverage = true,
254 .EXT_primitive_topology_list_restart = true,
255 .EXT_private_data = true,
256 .EXT_primitives_generated_query = true,
257 .EXT_provoking_vertex = true,
258 .EXT_queue_family_foreign = true,
259 .EXT_robustness2 = true,
260 .EXT_sample_locations = info->cls_eng3d >= MAXWELL_B,
261 .EXT_sampler_filter_minmax = info->cls_eng3d >= MAXWELL_B,
262 .EXT_scalar_block_layout = nvk_use_nak(info),
263 .EXT_separate_stencil_usage = true,
264 .EXT_shader_image_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
265 nvk_use_nak(info),
266 .EXT_shader_demote_to_helper_invocation = true,
267 .EXT_shader_module_identifier = true,
268 .EXT_shader_object = true,
269 .EXT_shader_replicated_composites = true,
270 .EXT_shader_subgroup_ballot = true,
271 .EXT_shader_subgroup_vote = true,
272 .EXT_shader_viewport_index_layer = info->cls_eng3d >= MAXWELL_B,
273 .EXT_subgroup_size_control = true,
274 #ifdef NVK_USE_WSI_PLATFORM
275 .EXT_swapchain_maintenance1 = true,
276 #endif
277 .EXT_texel_buffer_alignment = true,
278 .EXT_tooling_info = true,
279 .EXT_transform_feedback = true,
280 .EXT_vertex_attribute_divisor = true,
281 .EXT_vertex_input_dynamic_state = true,
282 .EXT_ycbcr_2plane_444_formats = true,
283 .EXT_ycbcr_image_arrays = true,
284 #if DETECT_OS_ANDROID
285 .ANDROID_native_buffer = vk_android_get_ugralloc() != NULL,
286 #endif
287 .GOOGLE_decorate_string = true,
288 .GOOGLE_hlsl_functionality1 = true,
289 .GOOGLE_user_type = true,
290 .NV_compute_shader_derivatives = nvk_use_nak(info),
291 .NV_shader_sm_builtins = true,
292 .VALVE_mutable_descriptor_type = true,
293 };
294 }
295
296 static void
nvk_get_device_features(const struct nv_device_info * info,const struct vk_device_extension_table * supported_extensions,struct vk_features * features)297 nvk_get_device_features(const struct nv_device_info *info,
298 const struct vk_device_extension_table *supported_extensions,
299 struct vk_features *features)
300 {
301 *features = (struct vk_features) {
302 /* Vulkan 1.0 */
303 .robustBufferAccess = true,
304 .fullDrawIndexUint32 = true,
305 .imageCubeArray = true,
306 .independentBlend = true,
307 .geometryShader = true,
308 .tessellationShader = true,
309 .sampleRateShading = true,
310 .dualSrcBlend = true,
311 .logicOp = true,
312 .multiDrawIndirect = true,
313 .drawIndirectFirstInstance = true,
314 .depthClamp = true,
315 .depthBiasClamp = true,
316 .fillModeNonSolid = true,
317 .depthBounds = true,
318 .wideLines = true,
319 .largePoints = true,
320 .alphaToOne = true,
321 .multiViewport = true,
322 .samplerAnisotropy = true,
323 .textureCompressionETC2 = false,
324 .textureCompressionBC = true,
325 .textureCompressionASTC_LDR = false,
326 .occlusionQueryPrecise = true,
327 .pipelineStatisticsQuery = true,
328 .vertexPipelineStoresAndAtomics = true,
329 .fragmentStoresAndAtomics = true,
330 .shaderTessellationAndGeometryPointSize = true,
331 .shaderImageGatherExtended = true,
332 .shaderStorageImageExtendedFormats = true,
333 .shaderStorageImageMultisample = true,
334 .shaderStorageImageReadWithoutFormat = info->cls_eng3d >= MAXWELL_A,
335 .shaderStorageImageWriteWithoutFormat = true,
336 .shaderUniformBufferArrayDynamicIndexing = true,
337 .shaderSampledImageArrayDynamicIndexing = true,
338 .shaderStorageBufferArrayDynamicIndexing = true,
339 .shaderStorageImageArrayDynamicIndexing = true,
340 .shaderClipDistance = true,
341 .shaderCullDistance = true,
342 .shaderFloat64 = true,
343 .shaderInt64 = true,
344 .shaderInt16 = true,
345 .shaderResourceResidency = info->cls_eng3d >= VOLTA_A,
346 .shaderResourceMinLod = info->cls_eng3d >= VOLTA_A,
347 .sparseBinding = true,
348 .sparseResidency2Samples = info->cls_eng3d >= MAXWELL_B,
349 .sparseResidency4Samples = info->cls_eng3d >= MAXWELL_B,
350 .sparseResidency8Samples = info->cls_eng3d >= MAXWELL_B,
351 .sparseResidencyAliased = info->cls_eng3d >= MAXWELL_B,
352 .sparseResidencyBuffer = info->cls_eng3d >= MAXWELL_B,
353 .sparseResidencyImage2D = info->cls_eng3d >= MAXWELL_B,
354 .sparseResidencyImage3D = info->cls_eng3d >= MAXWELL_B,
355 .variableMultisampleRate = true,
356 .inheritedQueries = true,
357
358 /* Vulkan 1.1 */
359 .storageBuffer16BitAccess = true,
360 .uniformAndStorageBuffer16BitAccess = true,
361 .storagePushConstant16 = true,
362 .multiview = true,
363 .multiviewGeometryShader = true,
364 .multiviewTessellationShader = true,
365 .variablePointersStorageBuffer = true,
366 .variablePointers = true,
367 .shaderDrawParameters = true,
368 .samplerYcbcrConversion = true,
369
370 /* Vulkan 1.2 */
371 .samplerMirrorClampToEdge = true,
372 .drawIndirectCount = info->cls_eng3d >= TURING_A,
373 .storageBuffer8BitAccess = true,
374 .uniformAndStorageBuffer8BitAccess = true,
375 .storagePushConstant8 = true,
376 .shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
377 nvk_use_nak(info),
378 .shaderSharedInt64Atomics = false, /* TODO */
379 /* TODO: Fp16 is currently busted on Turing and Volta due to instruction
380 * scheduling issues. Re-enable it once those are sorted.
381 */
382 .shaderFloat16 = info->sm >= 80 && nvk_use_nak(info),
383 .shaderInt8 = true,
384 .descriptorIndexing = true,
385 .shaderInputAttachmentArrayDynamicIndexing = true,
386 .shaderUniformTexelBufferArrayDynamicIndexing = true,
387 .shaderStorageTexelBufferArrayDynamicIndexing = true,
388 .shaderUniformBufferArrayNonUniformIndexing = true,
389 .shaderSampledImageArrayNonUniformIndexing = true,
390 .shaderStorageBufferArrayNonUniformIndexing = true,
391 .shaderStorageImageArrayNonUniformIndexing = true,
392 .shaderInputAttachmentArrayNonUniformIndexing = true,
393 .shaderUniformTexelBufferArrayNonUniformIndexing = true,
394 .shaderStorageTexelBufferArrayNonUniformIndexing = true,
395 .descriptorBindingUniformBufferUpdateAfterBind = true,
396 .descriptorBindingSampledImageUpdateAfterBind = true,
397 .descriptorBindingStorageImageUpdateAfterBind = true,
398 .descriptorBindingStorageBufferUpdateAfterBind = true,
399 .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
400 .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
401 .descriptorBindingUpdateUnusedWhilePending = true,
402 .descriptorBindingPartiallyBound = true,
403 .descriptorBindingVariableDescriptorCount = true,
404 .runtimeDescriptorArray = true,
405 .samplerFilterMinmax = info->cls_eng3d >= MAXWELL_B,
406 .scalarBlockLayout = nvk_use_nak(info),
407 .imagelessFramebuffer = true,
408 .uniformBufferStandardLayout = true,
409 .shaderSubgroupExtendedTypes = true,
410 .separateDepthStencilLayouts = true,
411 .hostQueryReset = true,
412 .timelineSemaphore = true,
413 .bufferDeviceAddress = true,
414 .bufferDeviceAddressCaptureReplay = true,
415 .bufferDeviceAddressMultiDevice = false,
416 .vulkanMemoryModel = nvk_use_nak(info),
417 .vulkanMemoryModelDeviceScope = nvk_use_nak(info),
418 .vulkanMemoryModelAvailabilityVisibilityChains = nvk_use_nak(info),
419 .shaderOutputViewportIndex = info->cls_eng3d >= MAXWELL_B,
420 .shaderOutputLayer = info->cls_eng3d >= MAXWELL_B,
421 .subgroupBroadcastDynamicId = nvk_use_nak(info),
422
423 /* Vulkan 1.3 */
424 .robustImageAccess = true,
425 .inlineUniformBlock = true,
426 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
427 .pipelineCreationCacheControl = true,
428 .privateData = true,
429 .shaderDemoteToHelperInvocation = true,
430 .shaderTerminateInvocation = true,
431 .subgroupSizeControl = true,
432 .computeFullSubgroups = true,
433 .synchronization2 = true,
434 .shaderZeroInitializeWorkgroupMemory = true,
435 .dynamicRendering = true,
436 .shaderIntegerDotProduct = true,
437 .maintenance4 = true,
438
439 /* Vulkan 1.4 */
440 .globalPriorityQuery = true,
441 .shaderSubgroupRotate = nvk_use_nak(info),
442 .shaderSubgroupRotateClustered = nvk_use_nak(info),
443 .shaderFloatControls2 = true,
444 .shaderExpectAssume = true,
445 .rectangularLines = true,
446 .bresenhamLines = true,
447 .smoothLines = true,
448 .stippledRectangularLines = true,
449 .stippledBresenhamLines = true,
450 .stippledSmoothLines = true,
451 .vertexAttributeInstanceRateDivisor = true,
452 .vertexAttributeInstanceRateZeroDivisor = true,
453 .indexTypeUint8 = true,
454 .dynamicRenderingLocalRead = true,
455 .maintenance5 = true,
456 .maintenance6 = true,
457 .pipelineRobustness = true,
458 .hostImageCopy = info->cls_eng3d >= TURING_A,
459 .pushDescriptor = true,
460
461 /* VK_KHR_compute_shader_derivatives */
462 .computeDerivativeGroupQuads = true,
463 .computeDerivativeGroupLinear = true,
464
465 /* VK_KHR_fragment_shader_barycentric */
466 .fragmentShaderBarycentric = info->cls_eng3d >= TURING_A &&
467 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
468
469 /* VK_KHR_fragment_shading_rate */
470 .pipelineFragmentShadingRate = info->cls_eng3d >= TURING_A,
471 .primitiveFragmentShadingRate = info->cls_eng3d >= TURING_A,
472 .attachmentFragmentShadingRate = info->cls_eng3d >= TURING_A,
473
474 /* VK_KHR_maintenance7 */
475 .maintenance7 = true,
476
477 /* VK_KHR_pipeline_executable_properties */
478 .pipelineExecutableInfo = true,
479
480 /* VK_KHR_present_id */
481 .presentId = supported_extensions->KHR_present_id,
482
483 /* VK_KHR_present_wait */
484 .presentWait = supported_extensions->KHR_present_wait,
485
486 /* VK_KHR_shader_quad_control */
487 .shaderQuadControl = nvk_use_nak(info),
488
489 /* VK_KHR_shader_relaxed_extended_instruction */
490 .shaderRelaxedExtendedInstruction = true,
491
492 /* VK_KHR_shader_clock */
493 .shaderSubgroupClock = true,
494 .shaderDeviceClock = true,
495
496 /* VK_KHR_shader_maximal_reconvergence */
497 .shaderMaximalReconvergence = true,
498
499 /* VK_KHR_shader_subgroup_uniform_control_flow */
500 .shaderSubgroupUniformControlFlow = nvk_use_nak(info),
501
502 /* VK_KHR_workgroup_memory_explicit_layout */
503 .workgroupMemoryExplicitLayout = true,
504 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
505 .workgroupMemoryExplicitLayout8BitAccess = nvk_use_nak(info),
506 .workgroupMemoryExplicitLayout16BitAccess = nvk_use_nak(info),
507
508 /* VK_EXT_4444_formats */
509 .formatA4R4G4B4 = true,
510 .formatA4B4G4R4 = true,
511
512 /* VK_EXT_attachment_feedback_loop_layout */
513 .attachmentFeedbackLoopLayout = true,
514
515 /* VK_EXT_border_color_swizzle */
516 .borderColorSwizzle = true,
517 .borderColorSwizzleFromImage = false,
518
519 /* VK_EXT_buffer_device_address */
520 .bufferDeviceAddressCaptureReplayEXT = true,
521
522 /* VK_EXT_color_write_enable */
523 .colorWriteEnable = true,
524
525 /* VK_EXT_conditional_rendering */
526 .conditionalRendering = true,
527 .inheritedConditionalRendering = true,
528
529 /* VK_EXT_custom_border_color */
530 .customBorderColors = true,
531 .customBorderColorWithoutFormat = true,
532
533 /* VK_EXT_depth_bias_control */
534 .depthBiasControl = true,
535 .leastRepresentableValueForceUnormRepresentation = true,
536 .floatRepresentation = false,
537 .depthBiasExact = true,
538
539 /* VK_EXT_depth_clamp_control */
540 .depthClampControl = true,
541
542 /* VK_EXT_depth_clamp_zero_one */
543 .depthClampZeroOne = true,
544
545 /* VK_EXT_depth_clip_control */
546 .depthClipControl = true,
547
548 /* VK_EXT_depth_clip_enable */
549 .depthClipEnable = true,
550
551 /* VK_EXT_descriptor_buffer */
552 .descriptorBuffer = true,
553 .descriptorBufferCaptureReplay = true,
554 .descriptorBufferImageLayoutIgnored = true,
555 .descriptorBufferPushDescriptors = false,
556
557 /* VK_EXT_device_generated_commands */
558 .deviceGeneratedCommands = true,
559 .dynamicGeneratedPipelineLayout = true,
560
561 /* VK_EXT_dynamic_rendering_unused_attachments */
562 .dynamicRenderingUnusedAttachments = true,
563
564 /* VK_EXT_extended_dynamic_state */
565 .extendedDynamicState = true,
566
567 /* VK_EXT_extended_dynamic_state2 */
568 .extendedDynamicState2 = true,
569 .extendedDynamicState2LogicOp = true,
570 .extendedDynamicState2PatchControlPoints = true,
571
572 /* VK_EXT_extended_dynamic_state3 */
573 .extendedDynamicState3TessellationDomainOrigin = true,
574 .extendedDynamicState3DepthClampEnable = true,
575 .extendedDynamicState3PolygonMode = true,
576 .extendedDynamicState3RasterizationSamples = true,
577 .extendedDynamicState3SampleMask = true,
578 .extendedDynamicState3AlphaToCoverageEnable = true,
579 .extendedDynamicState3AlphaToOneEnable = true,
580 .extendedDynamicState3LogicOpEnable = true,
581 .extendedDynamicState3ColorBlendEnable = true,
582 .extendedDynamicState3ColorBlendEquation = true,
583 .extendedDynamicState3ColorWriteMask = true,
584 .extendedDynamicState3RasterizationStream = true,
585 .extendedDynamicState3ConservativeRasterizationMode = false,
586 .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
587 .extendedDynamicState3DepthClipEnable = true,
588 .extendedDynamicState3SampleLocationsEnable = info->cls_eng3d >= MAXWELL_B,
589 .extendedDynamicState3ColorBlendAdvanced = false,
590 .extendedDynamicState3ProvokingVertexMode = true,
591 .extendedDynamicState3LineRasterizationMode = true,
592 .extendedDynamicState3LineStippleEnable = true,
593 .extendedDynamicState3DepthClipNegativeOneToOne = true,
594 .extendedDynamicState3ViewportWScalingEnable = false,
595 .extendedDynamicState3ViewportSwizzle = false,
596 .extendedDynamicState3CoverageToColorEnable = false,
597 .extendedDynamicState3CoverageToColorLocation = false,
598 .extendedDynamicState3CoverageModulationMode = false,
599 .extendedDynamicState3CoverageModulationTableEnable = false,
600 .extendedDynamicState3CoverageModulationTable = false,
601 .extendedDynamicState3CoverageReductionMode = false,
602 .extendedDynamicState3RepresentativeFragmentTestEnable = false,
603 .extendedDynamicState3ShadingRateImageEnable = false,
604
605 /* VK_EXT_graphics_pipeline_library */
606 .graphicsPipelineLibrary = true,
607
608 /* VK_EXT_image_2d_view_of_3d */
609 .image2DViewOf3D = true,
610 .sampler2DViewOf3D = true,
611
612 /* VK_EXT_image_sliced_view_of_3d */
613 .imageSlicedViewOf3D = true,
614
615 #ifdef NVK_USE_WSI_PLATFORM
616 /* VK_EXT_swapchain_maintenance1 */
617 .swapchainMaintenance1 = true,
618 #endif
619
620 /* VK_EXT_image_view_min_lod */
621 .minLod = true,
622
623 /* VK_EXT_legacy_vertex_attributes */
624 .legacyVertexAttributes = true,
625
626 /* VK_EXT_map_memory_placed */
627 .memoryMapPlaced = true,
628 .memoryMapRangePlaced = false,
629 .memoryUnmapReserve = true,
630
631 /* VK_EXT_multi_draw */
632 .multiDraw = true,
633
634 /* VK_EXT_mutable_descriptor_type */
635 .mutableDescriptorType = true,
636
637 /* VK_EXT_nested_command_buffer */
638 .nestedCommandBuffer = true,
639 .nestedCommandBufferRendering = true,
640 .nestedCommandBufferSimultaneousUse = true,
641
642 /* VK_EXT_non_seamless_cube_map */
643 .nonSeamlessCubeMap = true,
644
645 /* VK_EXT_primitive_topology_list_restart */
646 .primitiveTopologyListRestart = true,
647 .primitiveTopologyPatchListRestart = true,
648
649 /* VK_EXT_primitives_generated_query */
650 .primitivesGeneratedQuery = true,
651 .primitivesGeneratedQueryWithNonZeroStreams = true,
652 .primitivesGeneratedQueryWithRasterizerDiscard = true,
653
654 /* VK_EXT_provoking_vertex */
655 .provokingVertexLast = true,
656 .transformFeedbackPreservesProvokingVertex = true,
657
658 /* VK_EXT_robustness2 */
659 .robustBufferAccess2 = true,
660 .robustImageAccess2 = true,
661 .nullDescriptor = true,
662
663 /* VK_EXT_shader_image_atomic_int64 */
664 .shaderImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
665 nvk_use_nak(info),
666 .sparseImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
667 nvk_use_nak(info),
668
669 /* VK_EXT_shader_module_identifier */
670 .shaderModuleIdentifier = true,
671
672 /* VK_EXT_shader_object */
673 .shaderObject = true,
674
675 /* VK_EXT_shader_replicated_composites */
676 .shaderReplicatedComposites = true,
677
678 /* VK_EXT_texel_buffer_alignment */
679 .texelBufferAlignment = true,
680
681 /* VK_EXT_transform_feedback */
682 .transformFeedback = true,
683 .geometryStreams = true,
684
685 /* VK_EXT_vertex_input_dynamic_state */
686 .vertexInputDynamicState = true,
687
688 /* VK_EXT_ycbcr_2plane_444_formats */
689 .ycbcr2plane444Formats = true,
690
691 /* VK_EXT_ycbcr_image_arrays */
692 .ycbcrImageArrays = true,
693
694 /* VK_NV_shader_sm_builtins */
695 .shaderSMBuiltins = true,
696 };
697 }
698
699 static void
nvk_get_device_properties(const struct nvk_instance * instance,const struct nv_device_info * info,bool conformant,struct vk_properties * properties)700 nvk_get_device_properties(const struct nvk_instance *instance,
701 const struct nv_device_info *info,
702 bool conformant,
703 struct vk_properties *properties)
704 {
705 const VkSampleCountFlagBits sample_counts = VK_SAMPLE_COUNT_1_BIT |
706 VK_SAMPLE_COUNT_2_BIT |
707 VK_SAMPLE_COUNT_4_BIT |
708 VK_SAMPLE_COUNT_8_BIT;
709
710 assert(sample_counts <= (NVK_MAX_SAMPLES << 1) - 1);
711
712 uint64_t os_page_size = 4096;
713 os_get_page_size(&os_page_size);
714
715 *properties = (struct vk_properties) {
716 .apiVersion = nvk_get_vk_version(info),
717 .driverVersion = vk_get_driver_version(),
718 .vendorID = instance->force_vk_vendor != 0 ?
719 instance->force_vk_vendor : NVIDIA_VENDOR_ID,
720 .deviceID = info->device_id,
721 .deviceType = info->type == NV_DEVICE_TYPE_DIS ?
722 VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
723 VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
724
725 /* Vulkan 1.0 limits */
726 .maxImageDimension1D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_1D),
727 .maxImageDimension2D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_2D),
728 .maxImageDimension3D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_3D),
729 .maxImageDimensionCube = 0x8000,
730 .maxImageArrayLayers = 2048,
731 .maxTexelBufferElements = 128 * 1024 * 1024,
732 .maxUniformBufferRange = 65536,
733 .maxStorageBufferRange = UINT32_MAX,
734 .maxPushConstantsSize = NVK_MAX_PUSH_SIZE,
735 .maxMemoryAllocationCount = 4096,
736 .maxSamplerAllocationCount = 4000,
737 .bufferImageGranularity = info->cls_eng3d >= MAXWELL_B ? 0x400 : 0x10000,
738 .sparseAddressSpaceSize = NVK_SPARSE_ADDR_SPACE_SIZE,
739 .maxBoundDescriptorSets = NVK_MAX_SETS,
740 .maxPerStageDescriptorSamplers = NVK_MAX_DESCRIPTORS,
741 .maxPerStageDescriptorUniformBuffers = NVK_MAX_DESCRIPTORS,
742 .maxPerStageDescriptorStorageBuffers = NVK_MAX_DESCRIPTORS,
743 .maxPerStageDescriptorSampledImages = NVK_MAX_DESCRIPTORS,
744 .maxPerStageDescriptorStorageImages = NVK_MAX_DESCRIPTORS,
745 .maxPerStageDescriptorInputAttachments = NVK_MAX_DESCRIPTORS,
746 .maxPerStageResources = UINT32_MAX,
747 .maxDescriptorSetSamplers = NVK_MAX_DESCRIPTORS,
748 .maxDescriptorSetUniformBuffers = NVK_MAX_DESCRIPTORS,
749 .maxDescriptorSetUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
750 .maxDescriptorSetStorageBuffers = NVK_MAX_DESCRIPTORS,
751 .maxDescriptorSetStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
752 .maxDescriptorSetSampledImages = NVK_MAX_DESCRIPTORS,
753 .maxDescriptorSetStorageImages = NVK_MAX_DESCRIPTORS,
754 .maxDescriptorSetInputAttachments = NVK_MAX_DESCRIPTORS,
755 .maxVertexInputAttributes = 32,
756 .maxVertexInputBindings = 32,
757 .maxVertexInputAttributeOffset = 2047,
758 .maxVertexInputBindingStride = 2048,
759 .maxVertexOutputComponents = 128,
760 .maxTessellationGenerationLevel = 64,
761 .maxTessellationPatchSize = 32,
762 .maxTessellationControlPerVertexInputComponents = 128,
763 .maxTessellationControlPerVertexOutputComponents = 128,
764 .maxTessellationControlPerPatchOutputComponents = 120,
765 .maxTessellationControlTotalOutputComponents = 4216,
766 .maxTessellationEvaluationInputComponents = 128,
767 .maxTessellationEvaluationOutputComponents = 128,
768 .maxGeometryShaderInvocations = 32,
769 .maxGeometryInputComponents = 128,
770 .maxGeometryOutputComponents = 128,
771 .maxGeometryOutputVertices = 1024,
772 .maxGeometryTotalOutputComponents = 1024,
773 .maxFragmentInputComponents = 128,
774 .maxFragmentOutputAttachments = NVK_MAX_RTS,
775 .maxFragmentDualSrcAttachments = 1,
776 .maxFragmentCombinedOutputResources = 16,
777 .maxComputeSharedMemorySize = NVK_MAX_SHARED_SIZE,
778 .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
779 .maxComputeWorkGroupInvocations = 1024,
780 .maxComputeWorkGroupSize = {1024, 1024, 64},
781 .subPixelPrecisionBits = 8,
782 .subTexelPrecisionBits = 8,
783 .mipmapPrecisionBits = 8,
784 .maxDrawIndexedIndexValue = UINT32_MAX,
785 .maxDrawIndirectCount = UINT32_MAX,
786 .maxSamplerLodBias = 15,
787 .maxSamplerAnisotropy = 16,
788 .maxViewports = NVK_MAX_VIEWPORTS,
789 .maxViewportDimensions = { 32768, 32768 },
790 .viewportBoundsRange = { -65536, 65536 },
791 .viewportSubPixelBits = 8,
792 .minMemoryMapAlignment = os_page_size,
793 .minTexelBufferOffsetAlignment = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
794 .minUniformBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
795 .minStorageBufferOffsetAlignment = NVK_MIN_SSBO_ALIGNMENT,
796 .minTexelOffset = -8,
797 .maxTexelOffset = 7,
798 .minTexelGatherOffset = -32,
799 .maxTexelGatherOffset = 31,
800 .minInterpolationOffset = -0.5,
801 .maxInterpolationOffset = 0.4375,
802 .subPixelInterpolationOffsetBits = 4,
803 .maxFramebufferHeight = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
804 .maxFramebufferWidth = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
805 .maxFramebufferLayers = 2048,
806 .framebufferColorSampleCounts = sample_counts,
807 .framebufferDepthSampleCounts = sample_counts,
808 .framebufferNoAttachmentsSampleCounts = sample_counts,
809 .framebufferStencilSampleCounts = sample_counts,
810 .maxColorAttachments = NVK_MAX_RTS,
811 .sampledImageColorSampleCounts = sample_counts,
812 .sampledImageIntegerSampleCounts = sample_counts,
813 .sampledImageDepthSampleCounts = sample_counts,
814 .sampledImageStencilSampleCounts = sample_counts,
815 .storageImageSampleCounts = sample_counts,
816 .maxSampleMaskWords = 1,
817 .timestampComputeAndGraphics = true,
818 /* FIXME: Is timestamp period actually 1? */
819 .timestampPeriod = 1.0f,
820 .maxClipDistances = 8,
821 .maxCullDistances = 8,
822 .maxCombinedClipAndCullDistances = 8,
823 .discreteQueuePriorities = 2,
824 .pointSizeRange = { 1.0, 2047.94 },
825 .lineWidthRange = { 1, 64 },
826 .pointSizeGranularity = 0.0625,
827 .lineWidthGranularity = 0.0625,
828 .strictLines = true,
829 .standardSampleLocations = true,
830 .optimalBufferCopyOffsetAlignment = 1,
831 .optimalBufferCopyRowPitchAlignment = 1,
832 .nonCoherentAtomSize = 64,
833
834 /* Vulkan 1.0 sparse properties */
835 .sparseResidencyNonResidentStrict = true,
836 .sparseResidencyAlignedMipSize = info->cls_eng3d < MAXWELL_B, /* DXVK/vkd3d-proton requires this to be advertised as VK_FALSE for FL12 */
837 .sparseResidencyStandard2DBlockShape = true,
838 .sparseResidencyStandard2DMultisampleBlockShape = true,
839 .sparseResidencyStandard3DBlockShape = true,
840
841 /* Vulkan 1.1 properties */
842 .subgroupSize = 32,
843 .subgroupSupportedStages = nvk_nak_stages(info),
844 .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
845 VK_SUBGROUP_FEATURE_BALLOT_BIT |
846 VK_SUBGROUP_FEATURE_BASIC_BIT |
847 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
848 VK_SUBGROUP_FEATURE_QUAD_BIT |
849 VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
850 VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR |
851 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
852 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
853 VK_SUBGROUP_FEATURE_VOTE_BIT,
854 .subgroupQuadOperationsInAllStages = false,
855 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
856 .maxMultiviewViewCount = NVK_MAX_MULTIVIEW_VIEW_COUNT,
857 .maxMultiviewInstanceIndex = UINT32_MAX,
858 .maxPerSetDescriptors = UINT32_MAX,
859 .maxMemoryAllocationSize = (1u << 31),
860
861 /* Vulkan 1.2 properties */
862 .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
863 VK_RESOLVE_MODE_AVERAGE_BIT |
864 VK_RESOLVE_MODE_MIN_BIT |
865 VK_RESOLVE_MODE_MAX_BIT,
866 .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
867 VK_RESOLVE_MODE_MIN_BIT |
868 VK_RESOLVE_MODE_MAX_BIT,
869 .independentResolveNone = true,
870 .independentResolve = true,
871 .driverID = VK_DRIVER_ID_MESA_NVK,
872 .conformanceVersion =
873 conformant ? (VkConformanceVersion) { 1, 4, 0, 0 }
874 : (VkConformanceVersion) { 0, 0, 0, 0 },
875 .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
876 .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
877 .shaderSignedZeroInfNanPreserveFloat16 = true,
878 .shaderSignedZeroInfNanPreserveFloat32 = true,
879 .shaderSignedZeroInfNanPreserveFloat64 = true,
880 .shaderDenormPreserveFloat16 = true,
881 .shaderDenormPreserveFloat32 = true,
882 .shaderDenormPreserveFloat64 = true,
883 .shaderDenormFlushToZeroFloat16 = false,
884 .shaderDenormFlushToZeroFloat32 = true,
885 .shaderDenormFlushToZeroFloat64 = false,
886 .shaderRoundingModeRTEFloat16 = true,
887 .shaderRoundingModeRTEFloat32 = true,
888 .shaderRoundingModeRTEFloat64 = true,
889 .shaderRoundingModeRTZFloat16 = false,
890 .shaderRoundingModeRTZFloat32 = true,
891 .shaderRoundingModeRTZFloat64 = true,
892 .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
893 .shaderUniformBufferArrayNonUniformIndexingNative = false,
894 .shaderSampledImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
895 .shaderStorageBufferArrayNonUniformIndexingNative = true,
896 .shaderStorageImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
897 .shaderInputAttachmentArrayNonUniformIndexingNative = false,
898 .robustBufferAccessUpdateAfterBind = true,
899 .quadDivergentImplicitLod = info->cls_eng3d >= TURING_A,
900 .maxPerStageDescriptorUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
901 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
902 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
903 .maxPerStageDescriptorUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
904 .maxPerStageDescriptorUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
905 .maxPerStageDescriptorUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
906 .maxPerStageUpdateAfterBindResources = UINT32_MAX,
907 .maxDescriptorSetUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
908 .maxDescriptorSetUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
909 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
910 .maxDescriptorSetUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
911 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
912 .maxDescriptorSetUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
913 .maxDescriptorSetUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
914 .maxDescriptorSetUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
915 .filterMinmaxSingleComponentFormats = true,
916 .filterMinmaxImageComponentMapping = true,
917 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
918 .framebufferIntegerColorSampleCounts = sample_counts,
919
920 /* Vulkan 1.3 properties */
921 .minSubgroupSize = 32,
922 .maxSubgroupSize = 32,
923 .maxComputeWorkgroupSubgroups = 1024 / 32,
924 .requiredSubgroupSizeStages = 0,
925 .maxInlineUniformBlockSize = 1 << 16,
926 .maxPerStageDescriptorInlineUniformBlocks = 32,
927 .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
928 .maxDescriptorSetInlineUniformBlocks = 6 * 32,
929 .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
930 .maxInlineUniformTotalSize = 1 << 16,
931 .integerDotProduct4x8BitPackedUnsignedAccelerated
932 = info->cls_eng3d >= VOLTA_A,
933 .integerDotProduct4x8BitPackedSignedAccelerated
934 = info->cls_eng3d >= VOLTA_A,
935 .integerDotProduct4x8BitPackedMixedSignednessAccelerated
936 = info->cls_eng3d >= VOLTA_A,
937 .storageTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
938 .storageTexelBufferOffsetSingleTexelAlignment = true,
939 .uniformTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
940 .uniformTexelBufferOffsetSingleTexelAlignment = true,
941 .maxBufferSize = NVK_MAX_BUFFER_SIZE,
942
943 /* Vulkan 1.4 properties */
944 .lineSubPixelPrecisionBits = 8,
945 .maxVertexAttribDivisor = UINT32_MAX,
946 .supportsNonZeroFirstInstance = true,
947 .maxPushDescriptors = NVK_MAX_PUSH_DESCRIPTORS,
948 .dynamicRenderingLocalReadDepthStencilAttachments = true,
949 .dynamicRenderingLocalReadMultisampledAttachments = true,
950 .earlyFragmentMultisampleCoverageAfterSampleCounting = true,
951 .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
952 .depthStencilSwizzleOneSupport = true,
953 .polygonModePointSize = true,
954 .nonStrictSinglePixelWideLinesUseParallelogram = false,
955 .nonStrictWideLinesUseParallelogram = false,
956 .blockTexelViewCompatibleMultipleLayers = true,
957 .maxCombinedImageSamplerDescriptorCount = 3,
958 .fragmentShadingRateClampCombinerInputs = false, /* TODO */
959 .defaultRobustnessStorageBuffers =
960 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
961 .defaultRobustnessUniformBuffers =
962 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
963 .defaultRobustnessVertexInputs =
964 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT,
965 .defaultRobustnessImages =
966 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
967
968 /* VK_KHR_compute_shader_derivatives */
969 .meshAndTaskShaderDerivatives = false,
970
971 /* VK_EXT_conservative_rasterization */
972 .primitiveOverestimationSize = info->cls_eng3d >= VOLTA_A ? 1.0f / 512.0f : 0.0,
973 .maxExtraPrimitiveOverestimationSize = 0.75,
974 .extraPrimitiveOverestimationSizeGranularity = 0.25,
975 .primitiveUnderestimation = info->cls_eng3d >= VOLTA_A,
976 .conservativePointAndLineRasterization = true,
977 .degenerateLinesRasterized = info->cls_eng3d >= VOLTA_A,
978 .degenerateTrianglesRasterized = info->cls_eng3d >= PASCAL_A,
979 .fullyCoveredFragmentShaderInputVariable = false,
980 .conservativeRasterizationPostDepthCoverage = true,
981
982 /* VK_EXT_custom_border_color */
983 .maxCustomBorderColorSamplers = 4000,
984
985 /* VK_EXT_descriptor_buffer */
986 .combinedImageSamplerDescriptorSingleArray = true,
987 .bufferlessPushDescriptors = false,
988 .allowSamplerImageViewPostSubmitCreation = false,
989 .descriptorBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
990 .maxDescriptorBufferBindings = 32,
991 .maxResourceDescriptorBufferBindings = 32,
992 .maxSamplerDescriptorBufferBindings = 32,
993 .maxEmbeddedImmutableSamplerBindings = 32,
994 .maxEmbeddedImmutableSamplers = 4000,
995 .bufferCaptureReplayDescriptorDataSize = 0,
996 .imageCaptureReplayDescriptorDataSize = 0,
997 .imageViewCaptureReplayDescriptorDataSize =
998 sizeof(struct nvk_image_view_capture),
999 .samplerCaptureReplayDescriptorDataSize =
1000 sizeof(struct nvk_sampler_capture),
1001 .accelerationStructureCaptureReplayDescriptorDataSize = 0, // todo
1002 .samplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1003 .combinedImageSamplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1004 .sampledImageDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1005 .storageImageDescriptorSize = sizeof(struct nvk_storage_image_descriptor),
1006 .uniformTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1007 .robustUniformTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1008 .storageTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1009 .robustStorageTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1010 .uniformBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1011 .robustUniformBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1012 .storageBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1013 .robustStorageBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1014 .inputAttachmentDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1015 .accelerationStructureDescriptorSize = 0,
1016 .maxSamplerDescriptorBufferRange = UINT32_MAX,
1017 .maxResourceDescriptorBufferRange = UINT32_MAX,
1018 .samplerDescriptorBufferAddressSpaceSize = UINT32_MAX,
1019 .resourceDescriptorBufferAddressSpaceSize = UINT32_MAX,
1020 .descriptorBufferAddressSpaceSize = UINT32_MAX,
1021
1022 /* VK_EXT_device_generated_commands */
1023 .maxIndirectPipelineCount = UINT32_MAX,
1024 .maxIndirectShaderObjectCount = UINT32_MAX,
1025 .maxIndirectSequenceCount = 1 << 20,
1026 .maxIndirectCommandsTokenCount = 16,
1027 .maxIndirectCommandsTokenOffset = 2047,
1028 .maxIndirectCommandsIndirectStride = 1 << 12,
1029 .supportedIndirectCommandsInputModes =
1030 VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT |
1031 VK_INDIRECT_COMMANDS_INPUT_MODE_DXGI_INDEX_BUFFER_EXT,
1032 .supportedIndirectCommandsShaderStages =
1033 NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1034 .supportedIndirectCommandsShaderStagesPipelineBinding =
1035 NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1036 .supportedIndirectCommandsShaderStagesShaderBinding =
1037 NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1038 .deviceGeneratedCommandsTransformFeedback = true,
1039 .deviceGeneratedCommandsMultiDrawIndirectCount = true,
1040
1041 /* VK_EXT_extended_dynamic_state3 */
1042 .dynamicPrimitiveTopologyUnrestricted = true,
1043
1044 /* VK_EXT_graphics_pipeline_library */
1045 .graphicsPipelineLibraryFastLinking = true,
1046 .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
1047
1048 /* VK_KHR_maintenance7 */
1049 .robustFragmentShadingRateAttachmentAccess = false,
1050 .separateDepthStencilAttachmentAccess = false,
1051 .maxDescriptorSetTotalUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1052 .maxDescriptorSetTotalStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1053 .maxDescriptorSetTotalBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS,
1054 .maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1055 .maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1056 .maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS,
1057
1058 /* VK_EXT_legacy_vertex_attributes */
1059 .nativeUnalignedPerformance = true,
1060
1061 /* VK_EXT_map_memory_placed */
1062 .minPlacedMemoryMapAlignment = os_page_size,
1063
1064 /* VK_EXT_multi_draw */
1065 .maxMultiDrawCount = UINT32_MAX,
1066
1067 /* VK_EXT_nested_command_buffer */
1068 .maxCommandBufferNestingLevel = UINT32_MAX,
1069
1070 /* VK_EXT_pci_bus_info */
1071 .pciDomain = info->pci.domain,
1072 .pciBus = info->pci.bus,
1073 .pciDevice = info->pci.dev,
1074 .pciFunction = info->pci.func,
1075
1076 /* VK_EXT_physical_device_drm gets populated later */
1077
1078 /* VK_EXT_provoking_vertex */
1079 .provokingVertexModePerPipeline = true,
1080 .transformFeedbackPreservesTriangleFanProvokingVertex = true,
1081
1082 /* VK_EXT_robustness2 */
1083 .robustStorageBufferAccessSizeAlignment = NVK_SSBO_BOUNDS_CHECK_ALIGNMENT,
1084 .robustUniformBufferAccessSizeAlignment = nvk_min_cbuf_alignment(info),
1085
1086 /* VK_EXT_sample_locations */
1087 .sampleLocationSampleCounts = sample_counts,
1088 .maxSampleLocationGridSize = (VkExtent2D){ 1, 1 },
1089 .sampleLocationCoordinateRange[0] = 0.0f,
1090 .sampleLocationCoordinateRange[1] = 0.9375f,
1091 .sampleLocationSubPixelBits = 4,
1092 .variableSampleLocations = true,
1093
1094 /* VK_EXT_shader_object */
1095 .shaderBinaryVersion = 0,
1096
1097 /* VK_EXT_transform_feedback */
1098 .maxTransformFeedbackStreams = 4,
1099 .maxTransformFeedbackBuffers = 4,
1100 .maxTransformFeedbackBufferSize = UINT32_MAX,
1101 .maxTransformFeedbackStreamDataSize = 2048,
1102 .maxTransformFeedbackBufferDataSize = 512,
1103 .maxTransformFeedbackBufferDataStride = 2048,
1104 .transformFeedbackQueries = true,
1105 .transformFeedbackStreamsLinesTriangles = false,
1106 .transformFeedbackRasterizationStreamSelect = true,
1107 .transformFeedbackDraw = true,
1108
1109 /* VK_KHR_fragment_shader_barycentric */
1110 .triStripVertexOrderIndependentOfProvokingVertex = false,
1111
1112 /* VK_KHR_fragment_shading_rate */
1113 .minFragmentShadingRateAttachmentTexelSize = { 16, 16 },
1114 .maxFragmentShadingRateAttachmentTexelSize = { 16, 16 },
1115 .maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1,
1116 .primitiveFragmentShadingRateWithMultipleViewports = info->cls_eng3d >= AMPERE_B,
1117 .layeredShadingRateAttachments = true,
1118 .fragmentShadingRateNonTrivialCombinerOps = true,
1119 .maxFragmentSize = { 4, 4 },
1120 .maxFragmentSizeAspectRatio = 2,
1121 .maxFragmentShadingRateCoverageSamples = 16,
1122 .maxFragmentShadingRateRasterizationSamples = 16,
1123 .fragmentShadingRateWithShaderDepthStencilWrites = true,
1124 .fragmentShadingRateWithSampleMask = true,
1125 .fragmentShadingRateWithShaderSampleMask = true,
1126 .fragmentShadingRateWithConservativeRasterization = true,
1127 //.fragmentShadingRateWithFragmentShaderInterlock = true,
1128 .fragmentShadingRateWithCustomSampleLocations = true,
1129 .fragmentShadingRateStrictMultiplyCombiner = true,
1130
1131 /* VK_NV_shader_sm_builtins */
1132 .shaderSMCount = (uint32_t)info->tpc_count * info->mp_per_tpc,
1133 .shaderWarpsPerSM = info->max_warps_per_mp,
1134 };
1135
1136 /* Add the driver to the device name (like other Mesa drivers do) */
1137 if (!strcmp(info->device_name, info->chipset_name)) {
1138 snprintf(properties->deviceName, sizeof(properties->deviceName),
1139 "NVK %s", info->device_name);
1140 } else {
1141 snprintf(properties->deviceName, sizeof(properties->deviceName),
1142 "%s (NVK %s)", info->device_name, info->chipset_name);
1143 }
1144
1145 /* VK_EXT_host_image_copy */
1146
1147 /* Not sure if there are layout specific things, so for now just reporting
1148 * all layouts from extensions.
1149 */
1150 static const VkImageLayout supported_layouts[] = {
1151 VK_IMAGE_LAYOUT_GENERAL, /* this one is required by spec */
1152 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1153 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1154 VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1155 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1156 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1157 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1158 VK_IMAGE_LAYOUT_PREINITIALIZED,
1159 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1160 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1161 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1162 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1163 VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1164 VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1165 VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1166 VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1167 VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
1168 VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1169 };
1170
1171 properties->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
1172 properties->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1173 properties->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
1174 properties->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1175
1176 STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
1177 memcpy(properties->optimalTilingLayoutUUID,
1178 instance->driver_build_sha, VK_UUID_SIZE);
1179
1180 properties->identicalMemoryTypeRequirements = false;
1181
1182 /* VK_EXT_shader_module_identifier */
1183 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1184 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1185 memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
1186 vk_shaderModuleIdentifierAlgorithmUUID,
1187 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1188
1189 nv_device_uuid(info, properties->deviceUUID, VK_UUID_SIZE, true);
1190 STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
1191 memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
1192
1193 snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "NVK");
1194 snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1195 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1196 }
1197
1198 static void
nvk_physical_device_init_pipeline_cache(struct nvk_physical_device * pdev)1199 nvk_physical_device_init_pipeline_cache(struct nvk_physical_device *pdev)
1200 {
1201 struct nvk_instance *instance = nvk_physical_device_instance(pdev);
1202
1203 struct mesa_sha1 sha_ctx;
1204 _mesa_sha1_init(&sha_ctx);
1205
1206 _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
1207 sizeof(instance->driver_build_sha));
1208
1209 const uint64_t compiler_flags = nvk_physical_device_compiler_flags(pdev);
1210 _mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
1211
1212 unsigned char sha[SHA1_DIGEST_LENGTH];
1213 _mesa_sha1_final(&sha_ctx, sha);
1214
1215 STATIC_ASSERT(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
1216 memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
1217 memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
1218
1219 #ifdef ENABLE_SHADER_CACHE
1220 char renderer[10];
1221 ASSERTED int len = snprintf(renderer, sizeof(renderer), "nvk_%04x",
1222 pdev->info.chipset);
1223 assert(len == sizeof(renderer) - 2);
1224
1225 char timestamp[41];
1226 _mesa_sha1_format(timestamp, instance->driver_build_sha);
1227
1228 const uint64_t driver_flags = nvk_physical_device_compiler_flags(pdev);
1229 pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1230 #endif
1231 }
1232
1233 static void
nvk_physical_device_free_disk_cache(struct nvk_physical_device * pdev)1234 nvk_physical_device_free_disk_cache(struct nvk_physical_device *pdev)
1235 {
1236 #ifdef ENABLE_SHADER_CACHE
1237 if (pdev->vk.disk_cache) {
1238 disk_cache_destroy(pdev->vk.disk_cache);
1239 pdev->vk.disk_cache = NULL;
1240 }
1241 #else
1242 assert(pdev->vk.disk_cache == NULL);
1243 #endif
1244 }
1245
1246 static uint64_t
nvk_get_sysmem_heap_size(void)1247 nvk_get_sysmem_heap_size(void)
1248 {
1249 uint64_t sysmem_size_B = 0;
1250 if (!os_get_total_physical_memory(&sysmem_size_B))
1251 return 0;
1252
1253 /* Use 3/4 of total size to avoid swapping */
1254 return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1255 }
1256
1257 static uint64_t
nvk_get_sysmem_heap_available(struct nvk_physical_device * pdev)1258 nvk_get_sysmem_heap_available(struct nvk_physical_device *pdev)
1259 {
1260 uint64_t sysmem_size_B = 0;
1261 if (!os_get_available_system_memory(&sysmem_size_B)) {
1262 vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
1263 return 0;
1264 }
1265
1266 /* Use 3/4 of available to avoid swapping */
1267 return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1268 }
1269
1270 static uint64_t
nvk_get_vram_heap_available(struct nvk_physical_device * pdev)1271 nvk_get_vram_heap_available(struct nvk_physical_device *pdev)
1272 {
1273 const uint64_t used = nvkmd_pdev_get_vram_used(pdev->nvkmd);
1274 if (used > pdev->info.vram_size_B)
1275 return 0;
1276
1277 return pdev->info.vram_size_B - used;
1278 }
1279
1280 VkResult
nvk_create_drm_physical_device(struct vk_instance * _instance,struct _drmDevice * drm_device,struct vk_physical_device ** pdev_out)1281 nvk_create_drm_physical_device(struct vk_instance *_instance,
1282 struct _drmDevice *drm_device,
1283 struct vk_physical_device **pdev_out)
1284 {
1285 struct nvk_instance *instance = (struct nvk_instance *)_instance;
1286 VkResult result;
1287
1288 struct nvkmd_pdev *nvkmd;
1289 result = nvkmd_try_create_pdev_for_drm(drm_device, &instance->vk.base,
1290 instance->debug_flags, &nvkmd);
1291 if (result != VK_SUCCESS)
1292 return result;
1293
1294 /* We don't support anything pre-Kepler */
1295 if (nvkmd->dev_info.cls_eng3d < KEPLER_A) {
1296 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1297 goto fail_nvkmd;
1298 }
1299
1300 bool conformant =
1301 nvkmd->dev_info.type == NV_DEVICE_TYPE_DIS &&
1302 nvkmd->dev_info.cls_eng3d >= TURING_A &&
1303 nvkmd->dev_info.cls_eng3d <= ADA_A;
1304
1305 if (!conformant &&
1306 !debug_get_bool_option("NVK_I_WANT_A_BROKEN_VULKAN_DRIVER", false)) {
1307 #ifdef NDEBUG
1308 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1309 #else
1310 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1311 "WARNING: NVK is not well-tested on %s, pass "
1312 "NVK_I_WANT_A_BROKEN_VULKAN_DRIVER=1 "
1313 "if you know what you're doing.",
1314 nvkmd->dev_info.device_name);
1315 #endif
1316 goto fail_nvkmd;
1317 }
1318
1319 if (!conformant)
1320 vk_warn_non_conformant_implementation("NVK");
1321
1322 struct nvk_physical_device *pdev =
1323 vk_zalloc(&instance->vk.alloc, sizeof(*pdev),
1324 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1325
1326 if (pdev == NULL) {
1327 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1328 goto fail_nvkmd;
1329 }
1330
1331 struct vk_physical_device_dispatch_table dispatch_table;
1332 vk_physical_device_dispatch_table_from_entrypoints(
1333 &dispatch_table, &nvk_physical_device_entrypoints, true);
1334 vk_physical_device_dispatch_table_from_entrypoints(
1335 &dispatch_table, &wsi_physical_device_entrypoints, false);
1336
1337 struct vk_device_extension_table supported_extensions;
1338 nvk_get_device_extensions(instance, &nvkmd->dev_info,
1339 nvkmd->kmd_info.has_alloc_tiled,
1340 &supported_extensions);
1341
1342 struct vk_features supported_features;
1343 nvk_get_device_features(&nvkmd->dev_info, &supported_extensions,
1344 &supported_features);
1345
1346 struct vk_properties properties;
1347 nvk_get_device_properties(instance, &nvkmd->dev_info, conformant,
1348 &properties);
1349
1350 if (nvkmd->drm.render_dev) {
1351 properties.drmHasRender = true;
1352 properties.drmRenderMajor = major(nvkmd->drm.render_dev);
1353 properties.drmRenderMinor = minor(nvkmd->drm.render_dev);
1354 }
1355
1356 if (nvkmd->drm.primary_dev) {
1357 properties.drmHasPrimary = true;
1358 properties.drmPrimaryMajor = major(nvkmd->drm.primary_dev);
1359 properties.drmPrimaryMinor = minor(nvkmd->drm.primary_dev);
1360 }
1361
1362 result = vk_physical_device_init(&pdev->vk, &instance->vk,
1363 &supported_extensions,
1364 &supported_features,
1365 &properties,
1366 &dispatch_table);
1367 if (result != VK_SUCCESS)
1368 goto fail_alloc;
1369
1370 pdev->nvkmd = nvkmd;
1371 pdev->info = nvkmd->dev_info;
1372 pdev->debug_flags = instance->debug_flags;
1373
1374 pdev->nak = nak_compiler_create(&pdev->info);
1375 if (pdev->nak == NULL) {
1376 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1377 goto fail_init;
1378 }
1379
1380 nvk_physical_device_init_pipeline_cache(pdev);
1381
1382 uint64_t sysmem_size_B = nvk_get_sysmem_heap_size();
1383 if (sysmem_size_B == 0) {
1384 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1385 "Failed to query total system memory");
1386 goto fail_disk_cache;
1387 }
1388
1389 if (pdev->info.vram_size_B > 0) {
1390 uint32_t vram_heap_idx = pdev->mem_heap_count++;
1391 uint32_t bar_heap_idx = vram_heap_idx;
1392 pdev->mem_heaps[vram_heap_idx] = (struct nvk_memory_heap) {
1393 .size = pdev->info.vram_size_B,
1394 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1395 };
1396
1397 if (pdev->info.bar_size_B > 0 &&
1398 pdev->info.bar_size_B < pdev->info.vram_size_B) {
1399 bar_heap_idx = pdev->mem_heap_count++;
1400 pdev->mem_heaps[bar_heap_idx] = (struct nvk_memory_heap) {
1401 .size = pdev->info.bar_size_B,
1402 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1403 };
1404 }
1405
1406 /* Only set available if we have the ioctl. */
1407 if (nvkmd->kmd_info.has_get_vram_used)
1408 pdev->mem_heaps[vram_heap_idx].available = nvk_get_vram_heap_available;
1409
1410 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1411 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1412 .heapIndex = vram_heap_idx,
1413 };
1414
1415 if (pdev->info.cls_eng3d >= MAXWELL_A) {
1416 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1417 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1418 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1419 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1420 .heapIndex = bar_heap_idx,
1421 };
1422 }
1423 }
1424
1425 uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
1426 pdev->mem_heaps[sysmem_heap_idx] = (struct nvk_memory_heap) {
1427 .size = sysmem_size_B,
1428 /* If we don't have any VRAM (iGPU), claim sysmem as DEVICE_LOCAL */
1429 .flags = pdev->info.vram_size_B == 0
1430 ? VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
1431 : 0,
1432 .available = nvk_get_sysmem_heap_available,
1433 };
1434
1435 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1436 /* TODO: What's the right thing to do here on Tegra? */
1437 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1438 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1439 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1440 .heapIndex = sysmem_heap_idx,
1441 };
1442
1443 assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
1444 assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
1445
1446 pdev->queue_families[pdev->queue_family_count++] = (struct nvk_queue_family) {
1447 .queue_flags = VK_QUEUE_GRAPHICS_BIT |
1448 VK_QUEUE_COMPUTE_BIT |
1449 VK_QUEUE_TRANSFER_BIT |
1450 VK_QUEUE_SPARSE_BINDING_BIT,
1451 .queue_count = 1,
1452 };
1453 assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
1454
1455 pdev->vk.supported_sync_types = nvkmd->sync_types;
1456
1457 #ifdef NVK_USE_WSI_PLATFORM
1458 result = nvk_init_wsi(pdev);
1459 if (result != VK_SUCCESS)
1460 goto fail_disk_cache;
1461 #endif
1462
1463 *pdev_out = &pdev->vk;
1464
1465 return VK_SUCCESS;
1466
1467 fail_disk_cache:
1468 nvk_physical_device_free_disk_cache(pdev);
1469 nak_compiler_destroy(pdev->nak);
1470 fail_init:
1471 vk_physical_device_finish(&pdev->vk);
1472 fail_alloc:
1473 vk_free(&instance->vk.alloc, pdev);
1474 fail_nvkmd:
1475 nvkmd_pdev_destroy(nvkmd);
1476 return result;
1477 }
1478
1479 void
nvk_physical_device_destroy(struct vk_physical_device * vk_pdev)1480 nvk_physical_device_destroy(struct vk_physical_device *vk_pdev)
1481 {
1482 struct nvk_physical_device *pdev =
1483 container_of(vk_pdev, struct nvk_physical_device, vk);
1484
1485 #ifdef NVK_USE_WSI_PLATFORM
1486 nvk_finish_wsi(pdev);
1487 #endif
1488 nvk_physical_device_free_disk_cache(pdev);
1489 nak_compiler_destroy(pdev->nak);
1490 nvkmd_pdev_destroy(pdev->nvkmd);
1491 vk_physical_device_finish(&pdev->vk);
1492 vk_free(&pdev->vk.instance->alloc, pdev);
1493 }
1494
1495 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1496 nvk_GetPhysicalDeviceMemoryProperties2(
1497 VkPhysicalDevice physicalDevice,
1498 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1499 {
1500 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1501
1502 pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
1503 for (int i = 0; i < pdev->mem_heap_count; i++) {
1504 pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap) {
1505 .size = pdev->mem_heaps[i].size,
1506 .flags = pdev->mem_heaps[i].flags,
1507 };
1508 }
1509
1510 pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
1511 for (int i = 0; i < pdev->mem_type_count; i++) {
1512 pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
1513 }
1514
1515 vk_foreach_struct(ext, pMemoryProperties->pNext)
1516 {
1517 switch (ext->sType) {
1518 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1519 VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
1520
1521 for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1522 const struct nvk_memory_heap *heap = &pdev->mem_heaps[i];
1523 uint64_t used = p_atomic_read(&heap->used);
1524
1525 /* From the Vulkan 1.3.278 spec:
1526 *
1527 * "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1528 * values in which memory usages are returned, with one element
1529 * for each memory heap. A heap’s usage is an estimate of how
1530 * much memory the process is currently using in that heap."
1531 *
1532 * TODO: Include internal allocations?
1533 */
1534 p->heapUsage[i] = used;
1535
1536 uint64_t available = heap->size;
1537 if (heap->available)
1538 available = heap->available(pdev);
1539
1540 /* From the Vulkan 1.3.278 spec:
1541 *
1542 * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1543 * values in which memory budgets are returned, with one
1544 * element for each memory heap. A heap’s budget is a rough
1545 * estimate of how much memory the process can allocate from
1546 * that heap before allocations may fail or cause performance
1547 * degradation. The budget includes any currently allocated
1548 * device memory."
1549 *
1550 * and
1551 *
1552 * "The heapBudget value must be less than or equal to
1553 * VkMemoryHeap::size for each heap."
1554 *
1555 * available (queried above) is the total amount free memory
1556 * system-wide and does not include our allocations so we need
1557 * to add that in.
1558 */
1559 uint64_t budget = MIN2(available + used, heap->size);
1560
1561 /* Set the budget at 90% of available to avoid thrashing */
1562 p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
1563 }
1564
1565 /* From the Vulkan 1.3.278 spec:
1566 *
1567 * "The heapBudget and heapUsage values must be zero for array
1568 * elements greater than or equal to
1569 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
1570 * heapBudget value must be non-zero for array elements less than
1571 * VkPhysicalDeviceMemoryProperties::memoryHeapCount."
1572 */
1573 for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
1574 p->heapBudget[i] = 0u;
1575 p->heapUsage[i] = 0u;
1576 }
1577 break;
1578 }
1579 default:
1580 vk_debug_ignored_stype(ext->sType);
1581 break;
1582 }
1583 }
1584 }
1585
1586 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1587 nvk_GetPhysicalDeviceQueueFamilyProperties2(
1588 VkPhysicalDevice physicalDevice,
1589 uint32_t *pQueueFamilyPropertyCount,
1590 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1591 {
1592 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1593 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1594 pQueueFamilyPropertyCount);
1595
1596 for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
1597 const struct nvk_queue_family *queue_family = &pdev->queue_families[i];
1598
1599 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
1600 p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
1601 p->queueFamilyProperties.queueCount = queue_family->queue_count;
1602 p->queueFamilyProperties.timestampValidBits = 64;
1603 p->queueFamilyProperties.minImageTransferGranularity =
1604 (VkExtent3D){1, 1, 1};
1605
1606 vk_foreach_struct(ext, p->pNext) {
1607 switch (ext->sType) {
1608 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES: {
1609 VkQueueFamilyGlobalPriorityProperties *p = (void *)ext;
1610 p->priorityCount = 1;
1611 p->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM;
1612 break;
1613 }
1614
1615 default:
1616 vk_debug_ignored_stype(ext->sType);
1617 break;
1618 }
1619 }
1620 }
1621 }
1622 }
1623
1624 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)1625 nvk_GetPhysicalDeviceMultisamplePropertiesEXT(
1626 VkPhysicalDevice physicalDevice,
1627 VkSampleCountFlagBits samples,
1628 VkMultisamplePropertiesEXT *pMultisampleProperties)
1629 {
1630 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1631
1632 if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
1633 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
1634 } else {
1635 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
1636 }
1637 }
1638
1639 VkExtent2D
nvk_max_shading_rate(const struct nvk_physical_device * pdev,VkSampleCountFlagBits samples)1640 nvk_max_shading_rate(const struct nvk_physical_device *pdev,
1641 VkSampleCountFlagBits samples)
1642 {
1643 const struct nil_Extent4D_Samples px_extent_sa =
1644 nil_px_extent_sa(nil_choose_sample_layout(samples));
1645
1646 assert(px_extent_sa.width <= 4);
1647 assert(px_extent_sa.height <= 4);
1648 assert(px_extent_sa.depth == 1);
1649 assert(px_extent_sa.array_len == 1);
1650
1651 return (VkExtent2D) {
1652 .width = 4 / px_extent_sa.width,
1653 .height = 4 / px_extent_sa.height,
1654 };
1655 }
1656
1657 VKAPI_ATTR VkResult VKAPI_CALL
nvk_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)1658 nvk_GetPhysicalDeviceFragmentShadingRatesKHR(
1659 VkPhysicalDevice physicalDevice,
1660 uint32_t *pFragmentShadingRateCount,
1661 VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
1662 {
1663 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1664 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
1665 pFragmentShadingRates, pFragmentShadingRateCount);
1666
1667
1668 /* From the Vulkan 1.3.297 spec:
1669 *
1670 * "The returned array of fragment shading rates must be ordered from
1671 * largest fragmentSize.width value to smallest, and each set of
1672 * fragment shading rates with the same fragmentSize.width value must be
1673 * ordered from largest fragmentSize.height to smallest. Any two entries
1674 * in the array must not have the same fragmentSize values."
1675 */
1676 VkExtent2D shading_rates[] = {
1677 { 4, 4 },
1678 { 4, 2 },
1679 { 2, 4 },
1680 { 2, 2 },
1681 { 2, 1 },
1682 { 1, 2 },
1683 { 1, 1 },
1684 };
1685
1686 for (uint32_t i = 0; i < ARRAY_SIZE(shading_rates); i++) {
1687 vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, p) {
1688 p->fragmentSize = shading_rates[i];
1689 if (shading_rates[i].width == 1 && shading_rates[i].height == 1) {
1690 /* The Vulkan spec requires us to set ~0 for 1x1. */
1691 p->sampleCounts = ~0;
1692 } else {
1693 for (uint32_t samples = 1; samples <= 16; samples <<= 1) {
1694 VkExtent2D max_rate = nvk_max_shading_rate(pdev, samples);
1695 if (shading_rates[i].width > max_rate.width ||
1696 shading_rates[i].height > max_rate.height)
1697 break;
1698
1699 p->sampleCounts |= samples;
1700 }
1701 }
1702 }
1703 }
1704
1705 return vk_outarray_status(&out);
1706 }
1707