• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_physical_device.h"
6 
7 #include "nak.h"
8 #include "nvk_buffer.h"
9 #include "nvk_descriptor_types.h"
10 #include "nvk_entrypoints.h"
11 #include "nvk_format.h"
12 #include "nvk_image.h"
13 #include "nvk_image_view.h"
14 #include "nvk_instance.h"
15 #include "nvk_sampler.h"
16 #include "nvk_shader.h"
17 #include "nvk_wsi.h"
18 #include "nvkmd/nvkmd.h"
19 #include "nvkmd/nouveau/nvkmd_nouveau.h"
20 #include "git_sha1.h"
21 #include "util/detect_os.h"
22 #include "util/disk_cache.h"
23 #include "util/mesa-sha1.h"
24 
25 #if DETECT_OS_ANDROID
26 #include <vulkan/vk_android_native_buffer.h>
27 #include "util/u_gralloc/u_gralloc.h"
28 #endif
29 
30 #include "vk_android.h"
31 #include "vk_device.h"
32 #include "vk_drm_syncobj.h"
33 #include "vk_shader_module.h"
34 #include "vulkan/wsi/wsi_common.h"
35 
36 #include <sys/sysmacros.h>
37 
38 #include "nv_push.h"
39 #include "cl90c0.h"
40 #include "cl91c0.h"
41 #include "cla097.h"
42 #include "cla0c0.h"
43 #include "cla1c0.h"
44 #include "clb097.h"
45 #include "clb0c0.h"
46 #include "clb197.h"
47 #include "clb1c0.h"
48 #include "clc097.h"
49 #include "clc0c0.h"
50 #include "clc1c0.h"
51 #include "clc397.h"
52 #include "clc3c0.h"
53 #include "clc597.h"
54 #include "clc5c0.h"
55 #include "clc797.h"
56 #include "clc997.h"
57 
58 static bool
nvk_use_nak(const struct nv_device_info * info)59 nvk_use_nak(const struct nv_device_info *info)
60 {
61    const VkShaderStageFlags vk10_stages =
62       VK_SHADER_STAGE_VERTEX_BIT |
63       VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
64       VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
65       VK_SHADER_STAGE_GEOMETRY_BIT |
66       VK_SHADER_STAGE_FRAGMENT_BIT |
67       VK_SHADER_STAGE_COMPUTE_BIT;
68 
69    return !(vk10_stages & ~nvk_nak_stages(info));
70 }
71 
72 static uint32_t
nvk_get_vk_version(const struct nv_device_info * info)73 nvk_get_vk_version(const struct nv_device_info *info)
74 {
75    /* Version override takes priority */
76    const uint32_t version_override = vk_get_version_override();
77    if (version_override)
78       return version_override;
79 
80    /* If we're using codegen for anything, lock to version 1.0 */
81    if (!nvk_use_nak(info))
82       return VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
83 
84 #if defined(ANDROID_STRICT) && ANDROID_API_LEVEL <= 32
85    return VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION);
86 #endif
87 
88    /* Vulkan 1.4 requires hostImageCopy which is currently only supported on
89     * Turing+.
90     */
91    if (info->cls_eng3d < TURING_A)
92       return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
93 
94    return VK_MAKE_VERSION(1, 4, VK_HEADER_VERSION);
95 }
96 
97 static void
nvk_get_device_extensions(const struct nvk_instance * instance,const struct nv_device_info * info,bool has_tiled_bos,struct vk_device_extension_table * ext)98 nvk_get_device_extensions(const struct nvk_instance *instance,
99                           const struct nv_device_info *info,
100                           bool has_tiled_bos,
101                           struct vk_device_extension_table *ext)
102 {
103    *ext = (struct vk_device_extension_table) {
104       .KHR_8bit_storage = true,
105       .KHR_16bit_storage = true,
106       .KHR_bind_memory2 = true,
107       .KHR_buffer_device_address = true,
108       .KHR_calibrated_timestamps = true,
109       .KHR_compute_shader_derivatives = nvk_use_nak(info),
110       .KHR_copy_commands2 = true,
111       .KHR_create_renderpass2 = true,
112       .KHR_dedicated_allocation = true,
113       .KHR_depth_stencil_resolve = true,
114       .KHR_descriptor_update_template = true,
115       .KHR_device_group = true,
116       .KHR_draw_indirect_count = info->cls_eng3d >= TURING_A,
117       .KHR_driver_properties = true,
118       .KHR_dynamic_rendering = true,
119       .KHR_dynamic_rendering_local_read = true,
120       .KHR_external_fence = true,
121       .KHR_external_fence_fd = true,
122       .KHR_external_memory = true,
123       .KHR_external_memory_fd = true,
124       .KHR_external_semaphore = true,
125       .KHR_external_semaphore_fd = true,
126       .KHR_format_feature_flags2 = true,
127       .KHR_fragment_shader_barycentric = info->cls_eng3d >= TURING_A &&
128          (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
129       .KHR_fragment_shading_rate = info->cls_eng3d >= TURING_A,
130       .KHR_get_memory_requirements2 = true,
131       .KHR_global_priority = true,
132       .KHR_image_format_list = true,
133       .KHR_imageless_framebuffer = true,
134 #ifdef NVK_USE_WSI_PLATFORM
135       .KHR_incremental_present = true,
136 #endif
137       .KHR_index_type_uint8 = true,
138       .KHR_line_rasterization = true,
139       .KHR_load_store_op_none = true,
140       .KHR_maintenance1 = true,
141       .KHR_maintenance2 = true,
142       .KHR_maintenance3 = true,
143       .KHR_maintenance4 = true,
144       .KHR_maintenance5 = true,
145       .KHR_maintenance6 = true,
146       .KHR_maintenance7 = true,
147       .KHR_map_memory2 = true,
148       .KHR_multiview = true,
149       .KHR_pipeline_executable_properties = true,
150       .KHR_pipeline_library = true,
151 #ifdef NVK_USE_WSI_PLATFORM
152       /* Hide these behind dri configs for now since we cannot implement it
153        * reliably on all surfaces yet. There is no surface capability query
154        * for present wait/id, but the feature is useful enough to hide behind
155        * an opt-in mechanism for now.  If the instance only enables surface
156        * extensions that unconditionally support present wait, we can also
157        * expose the extension that way.
158        */
159       .KHR_present_id = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
160                         wsi_common_vk_instance_supports_present_wait(&instance->vk),
161       .KHR_present_wait = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
162                           wsi_common_vk_instance_supports_present_wait(&instance->vk),
163 #endif
164       .KHR_push_descriptor = true,
165       .KHR_relaxed_block_layout = true,
166       .KHR_sampler_mirror_clamp_to_edge = true,
167       .KHR_sampler_ycbcr_conversion = true,
168       .KHR_separate_depth_stencil_layouts = true,
169       .KHR_shader_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
170                                  nvk_use_nak(info),
171       .KHR_shader_clock = true,
172       .KHR_shader_draw_parameters = true,
173       .KHR_shader_expect_assume = true,
174       .KHR_shader_float_controls = true,
175       .KHR_shader_float_controls2 = true,
176       .KHR_shader_float16_int8 = true,
177       .KHR_shader_integer_dot_product = true,
178       .KHR_shader_maximal_reconvergence = true,
179       .KHR_shader_non_semantic_info = true,
180       .KHR_shader_quad_control = true,
181       .KHR_shader_relaxed_extended_instruction = true,
182       .KHR_shader_subgroup_extended_types = true,
183       .KHR_shader_subgroup_rotate = nvk_use_nak(info),
184       .KHR_shader_subgroup_uniform_control_flow = nvk_use_nak(info),
185       .KHR_shader_terminate_invocation =
186          (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
187       .KHR_spirv_1_4 = true,
188       .KHR_storage_buffer_storage_class = true,
189       .KHR_timeline_semaphore = true,
190 #ifdef NVK_USE_WSI_PLATFORM
191       .KHR_swapchain = true,
192       .KHR_swapchain_mutable_format = true,
193 #endif
194       .KHR_synchronization2 = true,
195       .KHR_uniform_buffer_standard_layout = true,
196       .KHR_variable_pointers = true,
197       .KHR_vertex_attribute_divisor = true,
198       .KHR_vulkan_memory_model = nvk_use_nak(info),
199       .KHR_workgroup_memory_explicit_layout = true,
200       .KHR_zero_initialize_workgroup_memory = true,
201       .EXT_4444_formats = true,
202       .EXT_attachment_feedback_loop_layout = true,
203       .EXT_border_color_swizzle = true,
204       .EXT_buffer_device_address = true,
205       .EXT_calibrated_timestamps = true,
206       .EXT_conditional_rendering = true,
207       .EXT_conservative_rasterization = info->cls_eng3d >= MAXWELL_B,
208       .EXT_color_write_enable = true,
209       .EXT_custom_border_color = true,
210       .EXT_depth_bias_control = true,
211       .EXT_depth_clamp_control = true,
212       .EXT_depth_clamp_zero_one = true,
213       .EXT_depth_clip_control = true,
214       .EXT_depth_clip_enable = true,
215       .EXT_depth_range_unrestricted = info->cls_eng3d >= VOLTA_A,
216       .EXT_descriptor_buffer = true,
217       .EXT_descriptor_indexing = true,
218       .EXT_device_generated_commands = true,
219 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
220       .EXT_display_control = true,
221 #endif
222       .EXT_image_drm_format_modifier = has_tiled_bos,
223       .EXT_dynamic_rendering_unused_attachments = true,
224       .EXT_extended_dynamic_state = true,
225       .EXT_extended_dynamic_state2 = true,
226       .EXT_extended_dynamic_state3 = true,
227       .EXT_external_memory_dma_buf = true,
228       .EXT_global_priority = true,
229       .EXT_global_priority_query = true,
230       .EXT_graphics_pipeline_library = true,
231       .EXT_host_query_reset = true,
232       .EXT_host_image_copy = info->cls_eng3d >= TURING_A,
233       .EXT_image_2d_view_of_3d = true,
234       .EXT_image_robustness = true,
235       .EXT_image_sliced_view_of_3d = true,
236       .EXT_image_view_min_lod = true,
237       .EXT_index_type_uint8 = true,
238       .EXT_inline_uniform_block = true,
239       .EXT_legacy_vertex_attributes = true,
240       .EXT_line_rasterization = true,
241       .EXT_load_store_op_none = true,
242       .EXT_map_memory_placed = true,
243       .EXT_memory_budget = true,
244       .EXT_multi_draw = true,
245       .EXT_mutable_descriptor_type = true,
246       .EXT_nested_command_buffer = true,
247       .EXT_non_seamless_cube_map = true,
248       .EXT_pci_bus_info = info->type == NV_DEVICE_TYPE_DIS,
249       .EXT_pipeline_creation_cache_control = true,
250       .EXT_pipeline_creation_feedback = true,
251       .EXT_pipeline_robustness = true,
252       .EXT_physical_device_drm = true,
253       .EXT_post_depth_coverage = true,
254       .EXT_primitive_topology_list_restart = true,
255       .EXT_private_data = true,
256       .EXT_primitives_generated_query = true,
257       .EXT_provoking_vertex = true,
258       .EXT_queue_family_foreign = true,
259       .EXT_robustness2 = true,
260       .EXT_sample_locations = info->cls_eng3d >= MAXWELL_B,
261       .EXT_sampler_filter_minmax = info->cls_eng3d >= MAXWELL_B,
262       .EXT_scalar_block_layout = nvk_use_nak(info),
263       .EXT_separate_stencil_usage = true,
264       .EXT_shader_image_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
265                                        nvk_use_nak(info),
266       .EXT_shader_demote_to_helper_invocation = true,
267       .EXT_shader_module_identifier = true,
268       .EXT_shader_object = true,
269       .EXT_shader_replicated_composites = true,
270       .EXT_shader_subgroup_ballot = true,
271       .EXT_shader_subgroup_vote = true,
272       .EXT_shader_viewport_index_layer = info->cls_eng3d >= MAXWELL_B,
273       .EXT_subgroup_size_control = true,
274 #ifdef NVK_USE_WSI_PLATFORM
275       .EXT_swapchain_maintenance1 = true,
276 #endif
277       .EXT_texel_buffer_alignment = true,
278       .EXT_tooling_info = true,
279       .EXT_transform_feedback = true,
280       .EXT_vertex_attribute_divisor = true,
281       .EXT_vertex_input_dynamic_state = true,
282       .EXT_ycbcr_2plane_444_formats = true,
283       .EXT_ycbcr_image_arrays = true,
284 #if DETECT_OS_ANDROID
285       .ANDROID_native_buffer = vk_android_get_ugralloc() != NULL,
286 #endif
287       .GOOGLE_decorate_string = true,
288       .GOOGLE_hlsl_functionality1 = true,
289       .GOOGLE_user_type = true,
290       .NV_compute_shader_derivatives = nvk_use_nak(info),
291       .NV_shader_sm_builtins = true,
292       .VALVE_mutable_descriptor_type = true,
293    };
294 }
295 
296 static void
nvk_get_device_features(const struct nv_device_info * info,const struct vk_device_extension_table * supported_extensions,struct vk_features * features)297 nvk_get_device_features(const struct nv_device_info *info,
298                         const struct vk_device_extension_table *supported_extensions,
299                         struct vk_features *features)
300 {
301    *features = (struct vk_features) {
302       /* Vulkan 1.0 */
303       .robustBufferAccess = true,
304       .fullDrawIndexUint32 = true,
305       .imageCubeArray = true,
306       .independentBlend = true,
307       .geometryShader = true,
308       .tessellationShader = true,
309       .sampleRateShading = true,
310       .dualSrcBlend = true,
311       .logicOp = true,
312       .multiDrawIndirect = true,
313       .drawIndirectFirstInstance = true,
314       .depthClamp = true,
315       .depthBiasClamp = true,
316       .fillModeNonSolid = true,
317       .depthBounds = true,
318       .wideLines = true,
319       .largePoints = true,
320       .alphaToOne = true,
321       .multiViewport = true,
322       .samplerAnisotropy = true,
323       .textureCompressionETC2 = false,
324       .textureCompressionBC = true,
325       .textureCompressionASTC_LDR = false,
326       .occlusionQueryPrecise = true,
327       .pipelineStatisticsQuery = true,
328       .vertexPipelineStoresAndAtomics = true,
329       .fragmentStoresAndAtomics = true,
330       .shaderTessellationAndGeometryPointSize = true,
331       .shaderImageGatherExtended = true,
332       .shaderStorageImageExtendedFormats = true,
333       .shaderStorageImageMultisample = true,
334       .shaderStorageImageReadWithoutFormat = info->cls_eng3d >= MAXWELL_A,
335       .shaderStorageImageWriteWithoutFormat = true,
336       .shaderUniformBufferArrayDynamicIndexing = true,
337       .shaderSampledImageArrayDynamicIndexing = true,
338       .shaderStorageBufferArrayDynamicIndexing = true,
339       .shaderStorageImageArrayDynamicIndexing = true,
340       .shaderClipDistance = true,
341       .shaderCullDistance = true,
342       .shaderFloat64 = true,
343       .shaderInt64 = true,
344       .shaderInt16 = true,
345       .shaderResourceResidency = info->cls_eng3d >= VOLTA_A,
346       .shaderResourceMinLod = info->cls_eng3d >= VOLTA_A,
347       .sparseBinding = true,
348       .sparseResidency2Samples = info->cls_eng3d >= MAXWELL_B,
349       .sparseResidency4Samples = info->cls_eng3d >= MAXWELL_B,
350       .sparseResidency8Samples = info->cls_eng3d >= MAXWELL_B,
351       .sparseResidencyAliased = info->cls_eng3d >= MAXWELL_B,
352       .sparseResidencyBuffer = info->cls_eng3d >= MAXWELL_B,
353       .sparseResidencyImage2D = info->cls_eng3d >= MAXWELL_B,
354       .sparseResidencyImage3D = info->cls_eng3d >= MAXWELL_B,
355       .variableMultisampleRate = true,
356       .inheritedQueries = true,
357 
358       /* Vulkan 1.1 */
359       .storageBuffer16BitAccess = true,
360       .uniformAndStorageBuffer16BitAccess = true,
361       .storagePushConstant16 = true,
362       .multiview = true,
363       .multiviewGeometryShader = true,
364       .multiviewTessellationShader = true,
365       .variablePointersStorageBuffer = true,
366       .variablePointers = true,
367       .shaderDrawParameters = true,
368       .samplerYcbcrConversion = true,
369 
370       /* Vulkan 1.2 */
371       .samplerMirrorClampToEdge = true,
372       .drawIndirectCount = info->cls_eng3d >= TURING_A,
373       .storageBuffer8BitAccess = true,
374       .uniformAndStorageBuffer8BitAccess = true,
375       .storagePushConstant8 = true,
376       .shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
377                                   nvk_use_nak(info),
378       .shaderSharedInt64Atomics = false, /* TODO */
379       /* TODO: Fp16 is currently busted on Turing and Volta due to instruction
380        * scheduling issues.  Re-enable it once those are sorted.
381        */
382       .shaderFloat16 = info->sm >= 80 && nvk_use_nak(info),
383       .shaderInt8 = true,
384       .descriptorIndexing = true,
385       .shaderInputAttachmentArrayDynamicIndexing = true,
386       .shaderUniformTexelBufferArrayDynamicIndexing = true,
387       .shaderStorageTexelBufferArrayDynamicIndexing = true,
388       .shaderUniformBufferArrayNonUniformIndexing = true,
389       .shaderSampledImageArrayNonUniformIndexing = true,
390       .shaderStorageBufferArrayNonUniformIndexing = true,
391       .shaderStorageImageArrayNonUniformIndexing = true,
392       .shaderInputAttachmentArrayNonUniformIndexing = true,
393       .shaderUniformTexelBufferArrayNonUniformIndexing = true,
394       .shaderStorageTexelBufferArrayNonUniformIndexing = true,
395       .descriptorBindingUniformBufferUpdateAfterBind = true,
396       .descriptorBindingSampledImageUpdateAfterBind = true,
397       .descriptorBindingStorageImageUpdateAfterBind = true,
398       .descriptorBindingStorageBufferUpdateAfterBind = true,
399       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
400       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
401       .descriptorBindingUpdateUnusedWhilePending = true,
402       .descriptorBindingPartiallyBound = true,
403       .descriptorBindingVariableDescriptorCount = true,
404       .runtimeDescriptorArray = true,
405       .samplerFilterMinmax = info->cls_eng3d >= MAXWELL_B,
406       .scalarBlockLayout = nvk_use_nak(info),
407       .imagelessFramebuffer = true,
408       .uniformBufferStandardLayout = true,
409       .shaderSubgroupExtendedTypes = true,
410       .separateDepthStencilLayouts = true,
411       .hostQueryReset = true,
412       .timelineSemaphore = true,
413       .bufferDeviceAddress = true,
414       .bufferDeviceAddressCaptureReplay = true,
415       .bufferDeviceAddressMultiDevice = false,
416       .vulkanMemoryModel = nvk_use_nak(info),
417       .vulkanMemoryModelDeviceScope = nvk_use_nak(info),
418       .vulkanMemoryModelAvailabilityVisibilityChains = nvk_use_nak(info),
419       .shaderOutputViewportIndex = info->cls_eng3d >= MAXWELL_B,
420       .shaderOutputLayer = info->cls_eng3d >= MAXWELL_B,
421       .subgroupBroadcastDynamicId = nvk_use_nak(info),
422 
423       /* Vulkan 1.3 */
424       .robustImageAccess = true,
425       .inlineUniformBlock = true,
426       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
427       .pipelineCreationCacheControl = true,
428       .privateData = true,
429       .shaderDemoteToHelperInvocation = true,
430       .shaderTerminateInvocation = true,
431       .subgroupSizeControl = true,
432       .computeFullSubgroups = true,
433       .synchronization2 = true,
434       .shaderZeroInitializeWorkgroupMemory = true,
435       .dynamicRendering = true,
436       .shaderIntegerDotProduct = true,
437       .maintenance4 = true,
438 
439       /* Vulkan 1.4 */
440       .globalPriorityQuery = true,
441       .shaderSubgroupRotate = nvk_use_nak(info),
442       .shaderSubgroupRotateClustered = nvk_use_nak(info),
443       .shaderFloatControls2 = true,
444       .shaderExpectAssume = true,
445       .rectangularLines = true,
446       .bresenhamLines = true,
447       .smoothLines = true,
448       .stippledRectangularLines = true,
449       .stippledBresenhamLines = true,
450       .stippledSmoothLines = true,
451       .vertexAttributeInstanceRateDivisor = true,
452       .vertexAttributeInstanceRateZeroDivisor = true,
453       .indexTypeUint8 = true,
454       .dynamicRenderingLocalRead = true,
455       .maintenance5 = true,
456       .maintenance6 = true,
457       .pipelineRobustness = true,
458       .hostImageCopy = info->cls_eng3d >= TURING_A,
459       .pushDescriptor = true,
460 
461       /* VK_KHR_compute_shader_derivatives */
462       .computeDerivativeGroupQuads = true,
463       .computeDerivativeGroupLinear = true,
464 
465       /* VK_KHR_fragment_shader_barycentric */
466       .fragmentShaderBarycentric = info->cls_eng3d >= TURING_A &&
467          (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
468 
469       /* VK_KHR_fragment_shading_rate */
470       .pipelineFragmentShadingRate = info->cls_eng3d >= TURING_A,
471       .primitiveFragmentShadingRate = info->cls_eng3d >= TURING_A,
472       .attachmentFragmentShadingRate = info->cls_eng3d >= TURING_A,
473 
474       /* VK_KHR_maintenance7 */
475       .maintenance7 = true,
476 
477       /* VK_KHR_pipeline_executable_properties */
478       .pipelineExecutableInfo = true,
479 
480       /* VK_KHR_present_id */
481       .presentId = supported_extensions->KHR_present_id,
482 
483       /* VK_KHR_present_wait */
484       .presentWait = supported_extensions->KHR_present_wait,
485 
486       /* VK_KHR_shader_quad_control */
487       .shaderQuadControl = nvk_use_nak(info),
488 
489       /* VK_KHR_shader_relaxed_extended_instruction */
490       .shaderRelaxedExtendedInstruction = true,
491 
492       /* VK_KHR_shader_clock */
493       .shaderSubgroupClock = true,
494       .shaderDeviceClock = true,
495 
496       /* VK_KHR_shader_maximal_reconvergence */
497       .shaderMaximalReconvergence = true,
498 
499       /* VK_KHR_shader_subgroup_uniform_control_flow */
500       .shaderSubgroupUniformControlFlow = nvk_use_nak(info),
501 
502       /* VK_KHR_workgroup_memory_explicit_layout */
503       .workgroupMemoryExplicitLayout = true,
504       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
505       .workgroupMemoryExplicitLayout8BitAccess = nvk_use_nak(info),
506       .workgroupMemoryExplicitLayout16BitAccess = nvk_use_nak(info),
507 
508       /* VK_EXT_4444_formats */
509       .formatA4R4G4B4 = true,
510       .formatA4B4G4R4 = true,
511 
512       /* VK_EXT_attachment_feedback_loop_layout */
513       .attachmentFeedbackLoopLayout = true,
514 
515       /* VK_EXT_border_color_swizzle */
516       .borderColorSwizzle = true,
517       .borderColorSwizzleFromImage = false,
518 
519       /* VK_EXT_buffer_device_address */
520       .bufferDeviceAddressCaptureReplayEXT = true,
521 
522       /* VK_EXT_color_write_enable */
523       .colorWriteEnable = true,
524 
525       /* VK_EXT_conditional_rendering */
526       .conditionalRendering = true,
527       .inheritedConditionalRendering = true,
528 
529       /* VK_EXT_custom_border_color */
530       .customBorderColors = true,
531       .customBorderColorWithoutFormat = true,
532 
533       /* VK_EXT_depth_bias_control */
534       .depthBiasControl = true,
535       .leastRepresentableValueForceUnormRepresentation = true,
536       .floatRepresentation = false,
537       .depthBiasExact = true,
538 
539       /* VK_EXT_depth_clamp_control */
540       .depthClampControl = true,
541 
542       /* VK_EXT_depth_clamp_zero_one */
543       .depthClampZeroOne = true,
544 
545       /* VK_EXT_depth_clip_control */
546       .depthClipControl = true,
547 
548       /* VK_EXT_depth_clip_enable */
549       .depthClipEnable = true,
550 
551       /* VK_EXT_descriptor_buffer */
552       .descriptorBuffer = true,
553       .descriptorBufferCaptureReplay = true,
554       .descriptorBufferImageLayoutIgnored = true,
555       .descriptorBufferPushDescriptors = false,
556 
557       /* VK_EXT_device_generated_commands */
558       .deviceGeneratedCommands = true,
559       .dynamicGeneratedPipelineLayout = true,
560 
561       /* VK_EXT_dynamic_rendering_unused_attachments */
562       .dynamicRenderingUnusedAttachments = true,
563 
564       /* VK_EXT_extended_dynamic_state */
565       .extendedDynamicState = true,
566 
567       /* VK_EXT_extended_dynamic_state2 */
568       .extendedDynamicState2 = true,
569       .extendedDynamicState2LogicOp = true,
570       .extendedDynamicState2PatchControlPoints = true,
571 
572       /* VK_EXT_extended_dynamic_state3 */
573       .extendedDynamicState3TessellationDomainOrigin = true,
574       .extendedDynamicState3DepthClampEnable = true,
575       .extendedDynamicState3PolygonMode = true,
576       .extendedDynamicState3RasterizationSamples = true,
577       .extendedDynamicState3SampleMask = true,
578       .extendedDynamicState3AlphaToCoverageEnable = true,
579       .extendedDynamicState3AlphaToOneEnable = true,
580       .extendedDynamicState3LogicOpEnable = true,
581       .extendedDynamicState3ColorBlendEnable = true,
582       .extendedDynamicState3ColorBlendEquation = true,
583       .extendedDynamicState3ColorWriteMask = true,
584       .extendedDynamicState3RasterizationStream = true,
585       .extendedDynamicState3ConservativeRasterizationMode = false,
586       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
587       .extendedDynamicState3DepthClipEnable = true,
588       .extendedDynamicState3SampleLocationsEnable = info->cls_eng3d >= MAXWELL_B,
589       .extendedDynamicState3ColorBlendAdvanced = false,
590       .extendedDynamicState3ProvokingVertexMode = true,
591       .extendedDynamicState3LineRasterizationMode = true,
592       .extendedDynamicState3LineStippleEnable = true,
593       .extendedDynamicState3DepthClipNegativeOneToOne = true,
594       .extendedDynamicState3ViewportWScalingEnable = false,
595       .extendedDynamicState3ViewportSwizzle = false,
596       .extendedDynamicState3CoverageToColorEnable = false,
597       .extendedDynamicState3CoverageToColorLocation = false,
598       .extendedDynamicState3CoverageModulationMode = false,
599       .extendedDynamicState3CoverageModulationTableEnable = false,
600       .extendedDynamicState3CoverageModulationTable = false,
601       .extendedDynamicState3CoverageReductionMode = false,
602       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
603       .extendedDynamicState3ShadingRateImageEnable = false,
604 
605       /* VK_EXT_graphics_pipeline_library */
606       .graphicsPipelineLibrary = true,
607 
608       /* VK_EXT_image_2d_view_of_3d */
609       .image2DViewOf3D = true,
610       .sampler2DViewOf3D = true,
611 
612       /* VK_EXT_image_sliced_view_of_3d */
613       .imageSlicedViewOf3D = true,
614 
615 #ifdef NVK_USE_WSI_PLATFORM
616       /* VK_EXT_swapchain_maintenance1 */
617       .swapchainMaintenance1 = true,
618 #endif
619 
620       /* VK_EXT_image_view_min_lod */
621       .minLod = true,
622 
623       /* VK_EXT_legacy_vertex_attributes */
624       .legacyVertexAttributes = true,
625 
626       /* VK_EXT_map_memory_placed */
627       .memoryMapPlaced = true,
628       .memoryMapRangePlaced = false,
629       .memoryUnmapReserve = true,
630 
631       /* VK_EXT_multi_draw */
632       .multiDraw = true,
633 
634       /* VK_EXT_mutable_descriptor_type */
635       .mutableDescriptorType = true,
636 
637       /* VK_EXT_nested_command_buffer */
638       .nestedCommandBuffer = true,
639       .nestedCommandBufferRendering = true,
640       .nestedCommandBufferSimultaneousUse = true,
641 
642       /* VK_EXT_non_seamless_cube_map */
643       .nonSeamlessCubeMap = true,
644 
645       /* VK_EXT_primitive_topology_list_restart */
646       .primitiveTopologyListRestart = true,
647       .primitiveTopologyPatchListRestart = true,
648 
649       /* VK_EXT_primitives_generated_query */
650       .primitivesGeneratedQuery = true,
651       .primitivesGeneratedQueryWithNonZeroStreams = true,
652       .primitivesGeneratedQueryWithRasterizerDiscard = true,
653 
654       /* VK_EXT_provoking_vertex */
655       .provokingVertexLast = true,
656       .transformFeedbackPreservesProvokingVertex = true,
657 
658       /* VK_EXT_robustness2 */
659       .robustBufferAccess2 = true,
660       .robustImageAccess2 = true,
661       .nullDescriptor = true,
662 
663       /* VK_EXT_shader_image_atomic_int64 */
664       .shaderImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
665                                  nvk_use_nak(info),
666       .sparseImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
667                                  nvk_use_nak(info),
668 
669       /* VK_EXT_shader_module_identifier */
670       .shaderModuleIdentifier = true,
671 
672       /* VK_EXT_shader_object */
673       .shaderObject = true,
674 
675       /* VK_EXT_shader_replicated_composites */
676       .shaderReplicatedComposites = true,
677 
678       /* VK_EXT_texel_buffer_alignment */
679       .texelBufferAlignment = true,
680 
681       /* VK_EXT_transform_feedback */
682       .transformFeedback = true,
683       .geometryStreams = true,
684 
685       /* VK_EXT_vertex_input_dynamic_state */
686       .vertexInputDynamicState = true,
687 
688       /* VK_EXT_ycbcr_2plane_444_formats */
689       .ycbcr2plane444Formats = true,
690 
691       /* VK_EXT_ycbcr_image_arrays */
692       .ycbcrImageArrays = true,
693 
694       /* VK_NV_shader_sm_builtins */
695       .shaderSMBuiltins = true,
696    };
697 }
698 
699 static void
nvk_get_device_properties(const struct nvk_instance * instance,const struct nv_device_info * info,bool conformant,struct vk_properties * properties)700 nvk_get_device_properties(const struct nvk_instance *instance,
701                           const struct nv_device_info *info,
702                           bool conformant,
703                           struct vk_properties *properties)
704 {
705    const VkSampleCountFlagBits sample_counts = VK_SAMPLE_COUNT_1_BIT |
706                                                VK_SAMPLE_COUNT_2_BIT |
707                                                VK_SAMPLE_COUNT_4_BIT |
708                                                VK_SAMPLE_COUNT_8_BIT;
709 
710    assert(sample_counts <= (NVK_MAX_SAMPLES << 1) - 1);
711 
712    uint64_t os_page_size = 4096;
713    os_get_page_size(&os_page_size);
714 
715    *properties = (struct vk_properties) {
716       .apiVersion = nvk_get_vk_version(info),
717       .driverVersion = vk_get_driver_version(),
718       .vendorID = instance->force_vk_vendor != 0 ?
719                   instance->force_vk_vendor : NVIDIA_VENDOR_ID,
720       .deviceID = info->device_id,
721       .deviceType = info->type == NV_DEVICE_TYPE_DIS ?
722                     VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
723                     VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
724 
725       /* Vulkan 1.0 limits */
726       .maxImageDimension1D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_1D),
727       .maxImageDimension2D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_2D),
728       .maxImageDimension3D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_3D),
729       .maxImageDimensionCube = 0x8000,
730       .maxImageArrayLayers = 2048,
731       .maxTexelBufferElements = 128 * 1024 * 1024,
732       .maxUniformBufferRange = 65536,
733       .maxStorageBufferRange = UINT32_MAX,
734       .maxPushConstantsSize = NVK_MAX_PUSH_SIZE,
735       .maxMemoryAllocationCount = 4096,
736       .maxSamplerAllocationCount = 4000,
737       .bufferImageGranularity = info->cls_eng3d >= MAXWELL_B ? 0x400 : 0x10000,
738       .sparseAddressSpaceSize = NVK_SPARSE_ADDR_SPACE_SIZE,
739       .maxBoundDescriptorSets = NVK_MAX_SETS,
740       .maxPerStageDescriptorSamplers = NVK_MAX_DESCRIPTORS,
741       .maxPerStageDescriptorUniformBuffers = NVK_MAX_DESCRIPTORS,
742       .maxPerStageDescriptorStorageBuffers = NVK_MAX_DESCRIPTORS,
743       .maxPerStageDescriptorSampledImages = NVK_MAX_DESCRIPTORS,
744       .maxPerStageDescriptorStorageImages = NVK_MAX_DESCRIPTORS,
745       .maxPerStageDescriptorInputAttachments = NVK_MAX_DESCRIPTORS,
746       .maxPerStageResources = UINT32_MAX,
747       .maxDescriptorSetSamplers = NVK_MAX_DESCRIPTORS,
748       .maxDescriptorSetUniformBuffers = NVK_MAX_DESCRIPTORS,
749       .maxDescriptorSetUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
750       .maxDescriptorSetStorageBuffers = NVK_MAX_DESCRIPTORS,
751       .maxDescriptorSetStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
752       .maxDescriptorSetSampledImages = NVK_MAX_DESCRIPTORS,
753       .maxDescriptorSetStorageImages = NVK_MAX_DESCRIPTORS,
754       .maxDescriptorSetInputAttachments = NVK_MAX_DESCRIPTORS,
755       .maxVertexInputAttributes = 32,
756       .maxVertexInputBindings = 32,
757       .maxVertexInputAttributeOffset = 2047,
758       .maxVertexInputBindingStride = 2048,
759       .maxVertexOutputComponents = 128,
760       .maxTessellationGenerationLevel = 64,
761       .maxTessellationPatchSize = 32,
762       .maxTessellationControlPerVertexInputComponents = 128,
763       .maxTessellationControlPerVertexOutputComponents = 128,
764       .maxTessellationControlPerPatchOutputComponents = 120,
765       .maxTessellationControlTotalOutputComponents = 4216,
766       .maxTessellationEvaluationInputComponents = 128,
767       .maxTessellationEvaluationOutputComponents = 128,
768       .maxGeometryShaderInvocations = 32,
769       .maxGeometryInputComponents = 128,
770       .maxGeometryOutputComponents = 128,
771       .maxGeometryOutputVertices = 1024,
772       .maxGeometryTotalOutputComponents = 1024,
773       .maxFragmentInputComponents = 128,
774       .maxFragmentOutputAttachments = NVK_MAX_RTS,
775       .maxFragmentDualSrcAttachments = 1,
776       .maxFragmentCombinedOutputResources = 16,
777       .maxComputeSharedMemorySize = NVK_MAX_SHARED_SIZE,
778       .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
779       .maxComputeWorkGroupInvocations = 1024,
780       .maxComputeWorkGroupSize = {1024, 1024, 64},
781       .subPixelPrecisionBits = 8,
782       .subTexelPrecisionBits = 8,
783       .mipmapPrecisionBits = 8,
784       .maxDrawIndexedIndexValue = UINT32_MAX,
785       .maxDrawIndirectCount = UINT32_MAX,
786       .maxSamplerLodBias = 15,
787       .maxSamplerAnisotropy = 16,
788       .maxViewports = NVK_MAX_VIEWPORTS,
789       .maxViewportDimensions = { 32768, 32768 },
790       .viewportBoundsRange = { -65536, 65536 },
791       .viewportSubPixelBits = 8,
792       .minMemoryMapAlignment = os_page_size,
793       .minTexelBufferOffsetAlignment = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
794       .minUniformBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
795       .minStorageBufferOffsetAlignment = NVK_MIN_SSBO_ALIGNMENT,
796       .minTexelOffset = -8,
797       .maxTexelOffset = 7,
798       .minTexelGatherOffset = -32,
799       .maxTexelGatherOffset = 31,
800       .minInterpolationOffset = -0.5,
801       .maxInterpolationOffset = 0.4375,
802       .subPixelInterpolationOffsetBits = 4,
803       .maxFramebufferHeight = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
804       .maxFramebufferWidth = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
805       .maxFramebufferLayers = 2048,
806       .framebufferColorSampleCounts = sample_counts,
807       .framebufferDepthSampleCounts = sample_counts,
808       .framebufferNoAttachmentsSampleCounts = sample_counts,
809       .framebufferStencilSampleCounts = sample_counts,
810       .maxColorAttachments = NVK_MAX_RTS,
811       .sampledImageColorSampleCounts = sample_counts,
812       .sampledImageIntegerSampleCounts = sample_counts,
813       .sampledImageDepthSampleCounts = sample_counts,
814       .sampledImageStencilSampleCounts = sample_counts,
815       .storageImageSampleCounts = sample_counts,
816       .maxSampleMaskWords = 1,
817       .timestampComputeAndGraphics = true,
818       /* FIXME: Is timestamp period actually 1? */
819       .timestampPeriod = 1.0f,
820       .maxClipDistances = 8,
821       .maxCullDistances = 8,
822       .maxCombinedClipAndCullDistances = 8,
823       .discreteQueuePriorities = 2,
824       .pointSizeRange = { 1.0, 2047.94 },
825       .lineWidthRange = { 1, 64 },
826       .pointSizeGranularity = 0.0625,
827       .lineWidthGranularity = 0.0625,
828       .strictLines = true,
829       .standardSampleLocations = true,
830       .optimalBufferCopyOffsetAlignment = 1,
831       .optimalBufferCopyRowPitchAlignment = 1,
832       .nonCoherentAtomSize = 64,
833 
834       /* Vulkan 1.0 sparse properties */
835       .sparseResidencyNonResidentStrict = true,
836       .sparseResidencyAlignedMipSize = info->cls_eng3d < MAXWELL_B, /* DXVK/vkd3d-proton requires this to be advertised as VK_FALSE for FL12 */
837       .sparseResidencyStandard2DBlockShape = true,
838       .sparseResidencyStandard2DMultisampleBlockShape = true,
839       .sparseResidencyStandard3DBlockShape = true,
840 
841       /* Vulkan 1.1 properties */
842       .subgroupSize = 32,
843       .subgroupSupportedStages = nvk_nak_stages(info),
844       .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
845                                      VK_SUBGROUP_FEATURE_BALLOT_BIT |
846                                      VK_SUBGROUP_FEATURE_BASIC_BIT |
847                                      VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
848                                      VK_SUBGROUP_FEATURE_QUAD_BIT |
849                                      VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
850                                      VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR |
851                                      VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
852                                      VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
853                                      VK_SUBGROUP_FEATURE_VOTE_BIT,
854       .subgroupQuadOperationsInAllStages = false,
855       .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
856       .maxMultiviewViewCount = NVK_MAX_MULTIVIEW_VIEW_COUNT,
857       .maxMultiviewInstanceIndex = UINT32_MAX,
858       .maxPerSetDescriptors = UINT32_MAX,
859       .maxMemoryAllocationSize = (1u << 31),
860 
861       /* Vulkan 1.2 properties */
862       .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
863                                     VK_RESOLVE_MODE_AVERAGE_BIT |
864                                     VK_RESOLVE_MODE_MIN_BIT |
865                                     VK_RESOLVE_MODE_MAX_BIT,
866       .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
867                                       VK_RESOLVE_MODE_MIN_BIT |
868                                       VK_RESOLVE_MODE_MAX_BIT,
869       .independentResolveNone = true,
870       .independentResolve = true,
871       .driverID = VK_DRIVER_ID_MESA_NVK,
872       .conformanceVersion =
873          conformant ? (VkConformanceVersion) { 1, 4, 0, 0 }
874                     : (VkConformanceVersion) { 0, 0, 0, 0 },
875       .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
876       .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
877       .shaderSignedZeroInfNanPreserveFloat16 = true,
878       .shaderSignedZeroInfNanPreserveFloat32 = true,
879       .shaderSignedZeroInfNanPreserveFloat64 = true,
880       .shaderDenormPreserveFloat16 = true,
881       .shaderDenormPreserveFloat32 = true,
882       .shaderDenormPreserveFloat64 = true,
883       .shaderDenormFlushToZeroFloat16 = false,
884       .shaderDenormFlushToZeroFloat32 = true,
885       .shaderDenormFlushToZeroFloat64 = false,
886       .shaderRoundingModeRTEFloat16 = true,
887       .shaderRoundingModeRTEFloat32 = true,
888       .shaderRoundingModeRTEFloat64 = true,
889       .shaderRoundingModeRTZFloat16 = false,
890       .shaderRoundingModeRTZFloat32 = true,
891       .shaderRoundingModeRTZFloat64 = true,
892       .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
893       .shaderUniformBufferArrayNonUniformIndexingNative = false,
894       .shaderSampledImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
895       .shaderStorageBufferArrayNonUniformIndexingNative = true,
896       .shaderStorageImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
897       .shaderInputAttachmentArrayNonUniformIndexingNative = false,
898       .robustBufferAccessUpdateAfterBind = true,
899       .quadDivergentImplicitLod = info->cls_eng3d >= TURING_A,
900       .maxPerStageDescriptorUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
901       .maxPerStageDescriptorUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
902       .maxPerStageDescriptorUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
903       .maxPerStageDescriptorUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
904       .maxPerStageDescriptorUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
905       .maxPerStageDescriptorUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
906       .maxPerStageUpdateAfterBindResources = UINT32_MAX,
907       .maxDescriptorSetUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
908       .maxDescriptorSetUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
909       .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
910       .maxDescriptorSetUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
911       .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
912       .maxDescriptorSetUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
913       .maxDescriptorSetUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
914       .maxDescriptorSetUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
915       .filterMinmaxSingleComponentFormats = true,
916       .filterMinmaxImageComponentMapping = true,
917       .maxTimelineSemaphoreValueDifference = UINT64_MAX,
918       .framebufferIntegerColorSampleCounts = sample_counts,
919 
920       /* Vulkan 1.3 properties */
921       .minSubgroupSize = 32,
922       .maxSubgroupSize = 32,
923       .maxComputeWorkgroupSubgroups = 1024 / 32,
924       .requiredSubgroupSizeStages = 0,
925       .maxInlineUniformBlockSize = 1 << 16,
926       .maxPerStageDescriptorInlineUniformBlocks = 32,
927       .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
928       .maxDescriptorSetInlineUniformBlocks = 6 * 32,
929       .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
930       .maxInlineUniformTotalSize = 1 << 16,
931       .integerDotProduct4x8BitPackedUnsignedAccelerated
932          = info->cls_eng3d >= VOLTA_A,
933       .integerDotProduct4x8BitPackedSignedAccelerated
934          = info->cls_eng3d >= VOLTA_A,
935       .integerDotProduct4x8BitPackedMixedSignednessAccelerated
936          = info->cls_eng3d >= VOLTA_A,
937       .storageTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
938       .storageTexelBufferOffsetSingleTexelAlignment = true,
939       .uniformTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
940       .uniformTexelBufferOffsetSingleTexelAlignment = true,
941       .maxBufferSize = NVK_MAX_BUFFER_SIZE,
942 
943       /* Vulkan 1.4 properties */
944       .lineSubPixelPrecisionBits = 8,
945       .maxVertexAttribDivisor = UINT32_MAX,
946       .supportsNonZeroFirstInstance = true,
947       .maxPushDescriptors = NVK_MAX_PUSH_DESCRIPTORS,
948       .dynamicRenderingLocalReadDepthStencilAttachments = true,
949       .dynamicRenderingLocalReadMultisampledAttachments = true,
950       .earlyFragmentMultisampleCoverageAfterSampleCounting = true,
951       .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
952       .depthStencilSwizzleOneSupport = true,
953       .polygonModePointSize = true,
954       .nonStrictSinglePixelWideLinesUseParallelogram = false,
955       .nonStrictWideLinesUseParallelogram = false,
956       .blockTexelViewCompatibleMultipleLayers = true,
957       .maxCombinedImageSamplerDescriptorCount = 3,
958       .fragmentShadingRateClampCombinerInputs = false, /* TODO */
959       .defaultRobustnessStorageBuffers =
960          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
961       .defaultRobustnessUniformBuffers =
962          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
963       .defaultRobustnessVertexInputs =
964          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT,
965       .defaultRobustnessImages =
966          VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
967 
968       /* VK_KHR_compute_shader_derivatives */
969       .meshAndTaskShaderDerivatives = false,
970 
971       /* VK_EXT_conservative_rasterization */
972       .primitiveOverestimationSize = info->cls_eng3d >= VOLTA_A ? 1.0f / 512.0f : 0.0,
973       .maxExtraPrimitiveOverestimationSize = 0.75,
974       .extraPrimitiveOverestimationSizeGranularity = 0.25,
975       .primitiveUnderestimation = info->cls_eng3d >= VOLTA_A,
976       .conservativePointAndLineRasterization = true,
977       .degenerateLinesRasterized = info->cls_eng3d >= VOLTA_A,
978       .degenerateTrianglesRasterized = info->cls_eng3d >= PASCAL_A,
979       .fullyCoveredFragmentShaderInputVariable = false,
980       .conservativeRasterizationPostDepthCoverage = true,
981 
982       /* VK_EXT_custom_border_color */
983       .maxCustomBorderColorSamplers = 4000,
984 
985       /* VK_EXT_descriptor_buffer */
986       .combinedImageSamplerDescriptorSingleArray = true,
987       .bufferlessPushDescriptors = false,
988       .allowSamplerImageViewPostSubmitCreation = false,
989       .descriptorBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
990       .maxDescriptorBufferBindings = 32,
991       .maxResourceDescriptorBufferBindings = 32,
992       .maxSamplerDescriptorBufferBindings = 32,
993       .maxEmbeddedImmutableSamplerBindings = 32,
994       .maxEmbeddedImmutableSamplers = 4000,
995       .bufferCaptureReplayDescriptorDataSize = 0,
996       .imageCaptureReplayDescriptorDataSize = 0,
997       .imageViewCaptureReplayDescriptorDataSize =
998          sizeof(struct nvk_image_view_capture),
999       .samplerCaptureReplayDescriptorDataSize =
1000          sizeof(struct nvk_sampler_capture),
1001       .accelerationStructureCaptureReplayDescriptorDataSize = 0, // todo
1002       .samplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1003       .combinedImageSamplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1004       .sampledImageDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1005       .storageImageDescriptorSize = sizeof(struct nvk_storage_image_descriptor),
1006       .uniformTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1007       .robustUniformTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1008       .storageTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1009       .robustStorageTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1010       .uniformBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1011       .robustUniformBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1012       .storageBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1013       .robustStorageBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1014       .inputAttachmentDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1015       .accelerationStructureDescriptorSize = 0,
1016       .maxSamplerDescriptorBufferRange = UINT32_MAX,
1017       .maxResourceDescriptorBufferRange = UINT32_MAX,
1018       .samplerDescriptorBufferAddressSpaceSize = UINT32_MAX,
1019       .resourceDescriptorBufferAddressSpaceSize = UINT32_MAX,
1020       .descriptorBufferAddressSpaceSize = UINT32_MAX,
1021 
1022       /* VK_EXT_device_generated_commands */
1023       .maxIndirectPipelineCount = UINT32_MAX,
1024       .maxIndirectShaderObjectCount = UINT32_MAX,
1025       .maxIndirectSequenceCount = 1 << 20,
1026       .maxIndirectCommandsTokenCount = 16,
1027       .maxIndirectCommandsTokenOffset = 2047,
1028       .maxIndirectCommandsIndirectStride = 1 << 12,
1029       .supportedIndirectCommandsInputModes =
1030          VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT |
1031          VK_INDIRECT_COMMANDS_INPUT_MODE_DXGI_INDEX_BUFFER_EXT,
1032       .supportedIndirectCommandsShaderStages =
1033          NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1034       .supportedIndirectCommandsShaderStagesPipelineBinding =
1035          NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1036       .supportedIndirectCommandsShaderStagesShaderBinding =
1037          NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1038       .deviceGeneratedCommandsTransformFeedback = true,
1039       .deviceGeneratedCommandsMultiDrawIndirectCount = true,
1040 
1041       /* VK_EXT_extended_dynamic_state3 */
1042       .dynamicPrimitiveTopologyUnrestricted = true,
1043 
1044       /* VK_EXT_graphics_pipeline_library */
1045       .graphicsPipelineLibraryFastLinking = true,
1046       .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
1047 
1048       /* VK_KHR_maintenance7 */
1049       .robustFragmentShadingRateAttachmentAccess = false,
1050       .separateDepthStencilAttachmentAccess = false,
1051       .maxDescriptorSetTotalUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1052       .maxDescriptorSetTotalStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1053       .maxDescriptorSetTotalBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS,
1054       .maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1055       .maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1056       .maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS,
1057 
1058       /* VK_EXT_legacy_vertex_attributes */
1059       .nativeUnalignedPerformance = true,
1060 
1061       /* VK_EXT_map_memory_placed */
1062       .minPlacedMemoryMapAlignment = os_page_size,
1063 
1064       /* VK_EXT_multi_draw */
1065       .maxMultiDrawCount = UINT32_MAX,
1066 
1067       /* VK_EXT_nested_command_buffer */
1068       .maxCommandBufferNestingLevel = UINT32_MAX,
1069 
1070       /* VK_EXT_pci_bus_info */
1071       .pciDomain   = info->pci.domain,
1072       .pciBus      = info->pci.bus,
1073       .pciDevice   = info->pci.dev,
1074       .pciFunction = info->pci.func,
1075 
1076       /* VK_EXT_physical_device_drm gets populated later */
1077 
1078       /* VK_EXT_provoking_vertex */
1079       .provokingVertexModePerPipeline = true,
1080       .transformFeedbackPreservesTriangleFanProvokingVertex = true,
1081 
1082       /* VK_EXT_robustness2 */
1083       .robustStorageBufferAccessSizeAlignment = NVK_SSBO_BOUNDS_CHECK_ALIGNMENT,
1084       .robustUniformBufferAccessSizeAlignment = nvk_min_cbuf_alignment(info),
1085 
1086       /* VK_EXT_sample_locations */
1087       .sampleLocationSampleCounts = sample_counts,
1088       .maxSampleLocationGridSize = (VkExtent2D){ 1, 1 },
1089       .sampleLocationCoordinateRange[0] = 0.0f,
1090       .sampleLocationCoordinateRange[1] = 0.9375f,
1091       .sampleLocationSubPixelBits = 4,
1092       .variableSampleLocations = true,
1093 
1094       /* VK_EXT_shader_object */
1095       .shaderBinaryVersion = 0,
1096 
1097       /* VK_EXT_transform_feedback */
1098       .maxTransformFeedbackStreams = 4,
1099       .maxTransformFeedbackBuffers = 4,
1100       .maxTransformFeedbackBufferSize = UINT32_MAX,
1101       .maxTransformFeedbackStreamDataSize = 2048,
1102       .maxTransformFeedbackBufferDataSize = 512,
1103       .maxTransformFeedbackBufferDataStride = 2048,
1104       .transformFeedbackQueries = true,
1105       .transformFeedbackStreamsLinesTriangles = false,
1106       .transformFeedbackRasterizationStreamSelect = true,
1107       .transformFeedbackDraw = true,
1108 
1109       /* VK_KHR_fragment_shader_barycentric */
1110       .triStripVertexOrderIndependentOfProvokingVertex = false,
1111 
1112       /* VK_KHR_fragment_shading_rate */
1113       .minFragmentShadingRateAttachmentTexelSize = { 16, 16 },
1114       .maxFragmentShadingRateAttachmentTexelSize = { 16, 16 },
1115       .maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1,
1116       .primitiveFragmentShadingRateWithMultipleViewports = info->cls_eng3d >= AMPERE_B,
1117       .layeredShadingRateAttachments = true,
1118       .fragmentShadingRateNonTrivialCombinerOps = true,
1119       .maxFragmentSize = { 4, 4 },
1120       .maxFragmentSizeAspectRatio = 2,
1121       .maxFragmentShadingRateCoverageSamples = 16,
1122       .maxFragmentShadingRateRasterizationSamples = 16,
1123       .fragmentShadingRateWithShaderDepthStencilWrites = true,
1124       .fragmentShadingRateWithSampleMask = true,
1125       .fragmentShadingRateWithShaderSampleMask = true,
1126       .fragmentShadingRateWithConservativeRasterization = true,
1127       //.fragmentShadingRateWithFragmentShaderInterlock = true,
1128       .fragmentShadingRateWithCustomSampleLocations = true,
1129       .fragmentShadingRateStrictMultiplyCombiner = true,
1130 
1131       /* VK_NV_shader_sm_builtins */
1132       .shaderSMCount = (uint32_t)info->tpc_count * info->mp_per_tpc,
1133       .shaderWarpsPerSM = info->max_warps_per_mp,
1134    };
1135 
1136    /* Add the driver to the device name (like other Mesa drivers do) */
1137    if (!strcmp(info->device_name, info->chipset_name)) {
1138       snprintf(properties->deviceName, sizeof(properties->deviceName),
1139                "NVK %s", info->device_name);
1140    } else {
1141       snprintf(properties->deviceName, sizeof(properties->deviceName),
1142                "%s (NVK %s)", info->device_name, info->chipset_name);
1143    }
1144 
1145    /* VK_EXT_host_image_copy */
1146 
1147    /* Not sure if there are layout specific things, so for now just reporting
1148     * all layouts from extensions.
1149     */
1150    static const VkImageLayout supported_layouts[] = {
1151       VK_IMAGE_LAYOUT_GENERAL, /* this one is required by spec */
1152       VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1153       VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1154       VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1155       VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1156       VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1157       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1158       VK_IMAGE_LAYOUT_PREINITIALIZED,
1159       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1160       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1161       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1162       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1163       VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1164       VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1165       VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1166       VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1167       VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
1168       VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1169    };
1170 
1171    properties->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
1172    properties->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1173    properties->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
1174    properties->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1175 
1176    STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
1177    memcpy(properties->optimalTilingLayoutUUID,
1178           instance->driver_build_sha, VK_UUID_SIZE);
1179 
1180    properties->identicalMemoryTypeRequirements = false;
1181 
1182    /* VK_EXT_shader_module_identifier */
1183    STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1184       sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1185    memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
1186             vk_shaderModuleIdentifierAlgorithmUUID,
1187             sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1188 
1189    nv_device_uuid(info, properties->deviceUUID, VK_UUID_SIZE, true);
1190    STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
1191    memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
1192 
1193    snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "NVK");
1194    snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1195             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1196 }
1197 
1198 static void
nvk_physical_device_init_pipeline_cache(struct nvk_physical_device * pdev)1199 nvk_physical_device_init_pipeline_cache(struct nvk_physical_device *pdev)
1200 {
1201    struct nvk_instance *instance = nvk_physical_device_instance(pdev);
1202 
1203    struct mesa_sha1 sha_ctx;
1204    _mesa_sha1_init(&sha_ctx);
1205 
1206    _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
1207                      sizeof(instance->driver_build_sha));
1208 
1209    const uint64_t compiler_flags = nvk_physical_device_compiler_flags(pdev);
1210    _mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
1211 
1212    unsigned char sha[SHA1_DIGEST_LENGTH];
1213    _mesa_sha1_final(&sha_ctx, sha);
1214 
1215    STATIC_ASSERT(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
1216    memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
1217    memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
1218 
1219 #ifdef ENABLE_SHADER_CACHE
1220    char renderer[10];
1221    ASSERTED int len = snprintf(renderer, sizeof(renderer), "nvk_%04x",
1222                                pdev->info.chipset);
1223    assert(len == sizeof(renderer) - 2);
1224 
1225    char timestamp[41];
1226    _mesa_sha1_format(timestamp, instance->driver_build_sha);
1227 
1228    const uint64_t driver_flags = nvk_physical_device_compiler_flags(pdev);
1229    pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1230 #endif
1231 }
1232 
1233 static void
nvk_physical_device_free_disk_cache(struct nvk_physical_device * pdev)1234 nvk_physical_device_free_disk_cache(struct nvk_physical_device *pdev)
1235 {
1236 #ifdef ENABLE_SHADER_CACHE
1237    if (pdev->vk.disk_cache) {
1238       disk_cache_destroy(pdev->vk.disk_cache);
1239       pdev->vk.disk_cache = NULL;
1240    }
1241 #else
1242    assert(pdev->vk.disk_cache == NULL);
1243 #endif
1244 }
1245 
1246 static uint64_t
nvk_get_sysmem_heap_size(void)1247 nvk_get_sysmem_heap_size(void)
1248 {
1249    uint64_t sysmem_size_B = 0;
1250    if (!os_get_total_physical_memory(&sysmem_size_B))
1251       return 0;
1252 
1253    /* Use 3/4 of total size to avoid swapping */
1254    return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1255 }
1256 
1257 static uint64_t
nvk_get_sysmem_heap_available(struct nvk_physical_device * pdev)1258 nvk_get_sysmem_heap_available(struct nvk_physical_device *pdev)
1259 {
1260    uint64_t sysmem_size_B = 0;
1261    if (!os_get_available_system_memory(&sysmem_size_B)) {
1262       vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
1263       return 0;
1264    }
1265 
1266    /* Use 3/4 of available to avoid swapping */
1267    return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1268 }
1269 
1270 static uint64_t
nvk_get_vram_heap_available(struct nvk_physical_device * pdev)1271 nvk_get_vram_heap_available(struct nvk_physical_device *pdev)
1272 {
1273    const uint64_t used = nvkmd_pdev_get_vram_used(pdev->nvkmd);
1274    if (used > pdev->info.vram_size_B)
1275       return 0;
1276 
1277    return pdev->info.vram_size_B - used;
1278 }
1279 
1280 VkResult
nvk_create_drm_physical_device(struct vk_instance * _instance,struct _drmDevice * drm_device,struct vk_physical_device ** pdev_out)1281 nvk_create_drm_physical_device(struct vk_instance *_instance,
1282                                struct _drmDevice *drm_device,
1283                                struct vk_physical_device **pdev_out)
1284 {
1285    struct nvk_instance *instance = (struct nvk_instance *)_instance;
1286    VkResult result;
1287 
1288    struct nvkmd_pdev *nvkmd;
1289    result = nvkmd_try_create_pdev_for_drm(drm_device, &instance->vk.base,
1290                                           instance->debug_flags, &nvkmd);
1291    if (result != VK_SUCCESS)
1292       return result;
1293 
1294    /* We don't support anything pre-Kepler */
1295    if (nvkmd->dev_info.cls_eng3d < KEPLER_A) {
1296       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1297       goto fail_nvkmd;
1298    }
1299 
1300    bool conformant =
1301       nvkmd->dev_info.type == NV_DEVICE_TYPE_DIS &&
1302       nvkmd->dev_info.cls_eng3d >= TURING_A &&
1303       nvkmd->dev_info.cls_eng3d <= ADA_A;
1304 
1305    if (!conformant &&
1306        !debug_get_bool_option("NVK_I_WANT_A_BROKEN_VULKAN_DRIVER", false)) {
1307 #ifdef NDEBUG
1308       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1309 #else
1310       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1311                          "WARNING: NVK is not well-tested on %s, pass "
1312                          "NVK_I_WANT_A_BROKEN_VULKAN_DRIVER=1 "
1313                          "if you know what you're doing.",
1314                          nvkmd->dev_info.device_name);
1315 #endif
1316       goto fail_nvkmd;
1317    }
1318 
1319    if (!conformant)
1320       vk_warn_non_conformant_implementation("NVK");
1321 
1322    struct nvk_physical_device *pdev =
1323       vk_zalloc(&instance->vk.alloc, sizeof(*pdev),
1324                 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1325 
1326    if (pdev == NULL) {
1327       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1328       goto fail_nvkmd;
1329    }
1330 
1331    struct vk_physical_device_dispatch_table dispatch_table;
1332    vk_physical_device_dispatch_table_from_entrypoints(
1333       &dispatch_table, &nvk_physical_device_entrypoints, true);
1334    vk_physical_device_dispatch_table_from_entrypoints(
1335       &dispatch_table, &wsi_physical_device_entrypoints, false);
1336 
1337    struct vk_device_extension_table supported_extensions;
1338    nvk_get_device_extensions(instance, &nvkmd->dev_info,
1339                              nvkmd->kmd_info.has_alloc_tiled,
1340                              &supported_extensions);
1341 
1342    struct vk_features supported_features;
1343    nvk_get_device_features(&nvkmd->dev_info, &supported_extensions,
1344                            &supported_features);
1345 
1346    struct vk_properties properties;
1347    nvk_get_device_properties(instance, &nvkmd->dev_info, conformant,
1348                              &properties);
1349 
1350    if (nvkmd->drm.render_dev) {
1351       properties.drmHasRender = true;
1352       properties.drmRenderMajor = major(nvkmd->drm.render_dev);
1353       properties.drmRenderMinor = minor(nvkmd->drm.render_dev);
1354    }
1355 
1356    if (nvkmd->drm.primary_dev) {
1357       properties.drmHasPrimary = true;
1358       properties.drmPrimaryMajor = major(nvkmd->drm.primary_dev);
1359       properties.drmPrimaryMinor = minor(nvkmd->drm.primary_dev);
1360    }
1361 
1362    result = vk_physical_device_init(&pdev->vk, &instance->vk,
1363                                     &supported_extensions,
1364                                     &supported_features,
1365                                     &properties,
1366                                     &dispatch_table);
1367    if (result != VK_SUCCESS)
1368       goto fail_alloc;
1369 
1370    pdev->nvkmd = nvkmd;
1371    pdev->info = nvkmd->dev_info;
1372    pdev->debug_flags = instance->debug_flags;
1373 
1374    pdev->nak = nak_compiler_create(&pdev->info);
1375    if (pdev->nak == NULL) {
1376       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1377       goto fail_init;
1378    }
1379 
1380    nvk_physical_device_init_pipeline_cache(pdev);
1381 
1382    uint64_t sysmem_size_B = nvk_get_sysmem_heap_size();
1383    if (sysmem_size_B == 0) {
1384       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1385                          "Failed to query total system memory");
1386       goto fail_disk_cache;
1387    }
1388 
1389    if (pdev->info.vram_size_B > 0) {
1390       uint32_t vram_heap_idx = pdev->mem_heap_count++;
1391       uint32_t bar_heap_idx = vram_heap_idx;
1392       pdev->mem_heaps[vram_heap_idx] = (struct nvk_memory_heap) {
1393          .size = pdev->info.vram_size_B,
1394          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1395       };
1396 
1397       if (pdev->info.bar_size_B > 0 &&
1398           pdev->info.bar_size_B < pdev->info.vram_size_B) {
1399          bar_heap_idx = pdev->mem_heap_count++;
1400          pdev->mem_heaps[bar_heap_idx] = (struct nvk_memory_heap) {
1401             .size = pdev->info.bar_size_B,
1402             .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1403          };
1404       }
1405 
1406       /* Only set available if we have the ioctl. */
1407       if (nvkmd->kmd_info.has_get_vram_used)
1408          pdev->mem_heaps[vram_heap_idx].available = nvk_get_vram_heap_available;
1409 
1410       pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1411          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1412          .heapIndex = vram_heap_idx,
1413       };
1414 
1415       if (pdev->info.cls_eng3d >= MAXWELL_A) {
1416          pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1417             .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1418                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1419                              VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1420             .heapIndex = bar_heap_idx,
1421          };
1422       }
1423    }
1424 
1425    uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
1426    pdev->mem_heaps[sysmem_heap_idx] = (struct nvk_memory_heap) {
1427       .size = sysmem_size_B,
1428       /* If we don't have any VRAM (iGPU), claim sysmem as DEVICE_LOCAL */
1429       .flags = pdev->info.vram_size_B == 0
1430                ? VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
1431                : 0,
1432       .available = nvk_get_sysmem_heap_available,
1433    };
1434 
1435    pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1436       /* TODO: What's the right thing to do here on Tegra? */
1437       .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1438                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1439                        VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1440       .heapIndex = sysmem_heap_idx,
1441    };
1442 
1443    assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
1444    assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
1445 
1446    pdev->queue_families[pdev->queue_family_count++] = (struct nvk_queue_family) {
1447       .queue_flags = VK_QUEUE_GRAPHICS_BIT |
1448                      VK_QUEUE_COMPUTE_BIT |
1449                      VK_QUEUE_TRANSFER_BIT |
1450                      VK_QUEUE_SPARSE_BINDING_BIT,
1451       .queue_count = 1,
1452    };
1453    assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
1454 
1455    pdev->vk.supported_sync_types = nvkmd->sync_types;
1456 
1457 #ifdef NVK_USE_WSI_PLATFORM
1458    result = nvk_init_wsi(pdev);
1459    if (result != VK_SUCCESS)
1460       goto fail_disk_cache;
1461 #endif
1462 
1463    *pdev_out = &pdev->vk;
1464 
1465    return VK_SUCCESS;
1466 
1467 fail_disk_cache:
1468    nvk_physical_device_free_disk_cache(pdev);
1469    nak_compiler_destroy(pdev->nak);
1470 fail_init:
1471    vk_physical_device_finish(&pdev->vk);
1472 fail_alloc:
1473    vk_free(&instance->vk.alloc, pdev);
1474 fail_nvkmd:
1475    nvkmd_pdev_destroy(nvkmd);
1476    return result;
1477 }
1478 
1479 void
nvk_physical_device_destroy(struct vk_physical_device * vk_pdev)1480 nvk_physical_device_destroy(struct vk_physical_device *vk_pdev)
1481 {
1482    struct nvk_physical_device *pdev =
1483       container_of(vk_pdev, struct nvk_physical_device, vk);
1484 
1485 #ifdef NVK_USE_WSI_PLATFORM
1486    nvk_finish_wsi(pdev);
1487 #endif
1488    nvk_physical_device_free_disk_cache(pdev);
1489    nak_compiler_destroy(pdev->nak);
1490    nvkmd_pdev_destroy(pdev->nvkmd);
1491    vk_physical_device_finish(&pdev->vk);
1492    vk_free(&pdev->vk.instance->alloc, pdev);
1493 }
1494 
1495 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1496 nvk_GetPhysicalDeviceMemoryProperties2(
1497    VkPhysicalDevice physicalDevice,
1498    VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1499 {
1500    VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1501 
1502    pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
1503    for (int i = 0; i < pdev->mem_heap_count; i++) {
1504       pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap) {
1505          .size = pdev->mem_heaps[i].size,
1506          .flags = pdev->mem_heaps[i].flags,
1507       };
1508    }
1509 
1510    pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
1511    for (int i = 0; i < pdev->mem_type_count; i++) {
1512       pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
1513    }
1514 
1515    vk_foreach_struct(ext, pMemoryProperties->pNext)
1516    {
1517       switch (ext->sType) {
1518       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1519          VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
1520 
1521          for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1522             const struct nvk_memory_heap *heap = &pdev->mem_heaps[i];
1523             uint64_t used = p_atomic_read(&heap->used);
1524 
1525             /* From the Vulkan 1.3.278 spec:
1526              *
1527              *    "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1528              *    values in which memory usages are returned, with one element
1529              *    for each memory heap. A heap’s usage is an estimate of how
1530              *    much memory the process is currently using in that heap."
1531              *
1532              * TODO: Include internal allocations?
1533              */
1534             p->heapUsage[i] = used;
1535 
1536             uint64_t available = heap->size;
1537             if (heap->available)
1538                available = heap->available(pdev);
1539 
1540             /* From the Vulkan 1.3.278 spec:
1541              *
1542              *    "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1543              *    values in which memory budgets are returned, with one
1544              *    element for each memory heap. A heap’s budget is a rough
1545              *    estimate of how much memory the process can allocate from
1546              *    that heap before allocations may fail or cause performance
1547              *    degradation. The budget includes any currently allocated
1548              *    device memory."
1549              *
1550              * and
1551              *
1552              *    "The heapBudget value must be less than or equal to
1553              *    VkMemoryHeap::size for each heap."
1554              *
1555              * available (queried above) is the total amount free memory
1556              * system-wide and does not include our allocations so we need
1557              * to add that in.
1558              */
1559             uint64_t budget = MIN2(available + used, heap->size);
1560 
1561             /* Set the budget at 90% of available to avoid thrashing */
1562             p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
1563          }
1564 
1565          /* From the Vulkan 1.3.278 spec:
1566           *
1567           *    "The heapBudget and heapUsage values must be zero for array
1568           *    elements greater than or equal to
1569           *    VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
1570           *    heapBudget value must be non-zero for array elements less than
1571           *    VkPhysicalDeviceMemoryProperties::memoryHeapCount."
1572           */
1573          for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
1574             p->heapBudget[i] = 0u;
1575             p->heapUsage[i] = 0u;
1576          }
1577          break;
1578       }
1579       default:
1580          vk_debug_ignored_stype(ext->sType);
1581          break;
1582       }
1583    }
1584 }
1585 
1586 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1587 nvk_GetPhysicalDeviceQueueFamilyProperties2(
1588    VkPhysicalDevice physicalDevice,
1589    uint32_t *pQueueFamilyPropertyCount,
1590    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1591 {
1592    VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1593    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1594                           pQueueFamilyPropertyCount);
1595 
1596    for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
1597       const struct nvk_queue_family *queue_family = &pdev->queue_families[i];
1598 
1599       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
1600          p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
1601          p->queueFamilyProperties.queueCount = queue_family->queue_count;
1602          p->queueFamilyProperties.timestampValidBits = 64;
1603          p->queueFamilyProperties.minImageTransferGranularity =
1604             (VkExtent3D){1, 1, 1};
1605 
1606          vk_foreach_struct(ext, p->pNext) {
1607             switch (ext->sType) {
1608             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES: {
1609                VkQueueFamilyGlobalPriorityProperties *p = (void *)ext;
1610                p->priorityCount = 1;
1611                p->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM;
1612                break;
1613             }
1614 
1615             default:
1616                vk_debug_ignored_stype(ext->sType);
1617                break;
1618             }
1619          }
1620       }
1621    }
1622 }
1623 
1624 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)1625 nvk_GetPhysicalDeviceMultisamplePropertiesEXT(
1626    VkPhysicalDevice physicalDevice,
1627    VkSampleCountFlagBits samples,
1628    VkMultisamplePropertiesEXT *pMultisampleProperties)
1629 {
1630    VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1631 
1632    if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
1633       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
1634    } else {
1635       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
1636    }
1637 }
1638 
1639 VkExtent2D
nvk_max_shading_rate(const struct nvk_physical_device * pdev,VkSampleCountFlagBits samples)1640 nvk_max_shading_rate(const struct nvk_physical_device *pdev,
1641                      VkSampleCountFlagBits samples)
1642 {
1643    const struct nil_Extent4D_Samples px_extent_sa =
1644       nil_px_extent_sa(nil_choose_sample_layout(samples));
1645 
1646    assert(px_extent_sa.width <= 4);
1647    assert(px_extent_sa.height <= 4);
1648    assert(px_extent_sa.depth == 1);
1649    assert(px_extent_sa.array_len == 1);
1650 
1651    return (VkExtent2D) {
1652       .width = 4 / px_extent_sa.width,
1653       .height = 4 / px_extent_sa.height,
1654    };
1655 }
1656 
1657 VKAPI_ATTR VkResult VKAPI_CALL
nvk_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)1658 nvk_GetPhysicalDeviceFragmentShadingRatesKHR(
1659    VkPhysicalDevice physicalDevice,
1660    uint32_t *pFragmentShadingRateCount,
1661    VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
1662 {
1663    VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1664    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
1665                           pFragmentShadingRates, pFragmentShadingRateCount);
1666 
1667 
1668    /* From the Vulkan 1.3.297 spec:
1669     *
1670     *    "The returned array of fragment shading rates must be ordered from
1671     *    largest fragmentSize.width value to smallest, and each set of
1672     *    fragment shading rates with the same fragmentSize.width value must be
1673     *    ordered from largest fragmentSize.height to smallest. Any two entries
1674     *    in the array must not have the same fragmentSize values."
1675     */
1676    VkExtent2D shading_rates[] = {
1677       { 4, 4 },
1678       { 4, 2 },
1679       { 2, 4 },
1680       { 2, 2 },
1681       { 2, 1 },
1682       { 1, 2 },
1683       { 1, 1 },
1684    };
1685 
1686    for (uint32_t i = 0; i < ARRAY_SIZE(shading_rates); i++) {
1687       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, p) {
1688          p->fragmentSize = shading_rates[i];
1689          if (shading_rates[i].width == 1 && shading_rates[i].height == 1) {
1690             /* The Vulkan spec requires us to set ~0 for 1x1. */
1691             p->sampleCounts = ~0;
1692          } else {
1693             for (uint32_t samples = 1; samples <= 16; samples <<= 1) {
1694                VkExtent2D max_rate = nvk_max_shading_rate(pdev, samples);
1695                if (shading_rates[i].width > max_rate.width ||
1696                    shading_rates[i].height > max_rate.height)
1697                   break;
1698 
1699                p->sampleCounts |= samples;
1700             }
1701          }
1702       }
1703    }
1704 
1705    return vk_outarray_status(&out);
1706 }
1707