• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_physical_device.h"
6 
7 #include "nak.h"
8 #include "nvk_buffer.h"
9 #include "nvk_descriptor_types.h"
10 #include "nvk_entrypoints.h"
11 #include "nvk_format.h"
12 #include "nvk_image.h"
13 #include "nvk_image_view.h"
14 #include "nvk_instance.h"
15 #include "nvk_sampler.h"
16 #include "nvk_shader.h"
17 #include "nvk_wsi.h"
18 #include "nvkmd/nvkmd.h"
19 #include "nvkmd/nouveau/nvkmd_nouveau.h"
20 #include "git_sha1.h"
21 #include "util/detect_os.h"
22 #include "util/disk_cache.h"
23 #include "util/mesa-sha1.h"
24 
25 #if DETECT_OS_ANDROID
26 #include <vulkan/vk_android_native_buffer.h>
27 #include "util/u_gralloc/u_gralloc.h"
28 #endif
29 
30 #include "vk_android.h"
31 #include "vk_device.h"
32 #include "vk_drm_syncobj.h"
33 #include "vk_shader_module.h"
34 #include "vulkan/wsi/wsi_common.h"
35 
36 #include <sys/sysmacros.h>
37 
38 #include "nv_push.h"
39 #include "cl90c0.h"
40 #include "cl91c0.h"
41 #include "cla097.h"
42 #include "cla0c0.h"
43 #include "cla1c0.h"
44 #include "clb097.h"
45 #include "clb0c0.h"
46 #include "clb197.h"
47 #include "clb1c0.h"
48 #include "clc097.h"
49 #include "clc0c0.h"
50 #include "clc1c0.h"
51 #include "clc397.h"
52 #include "clc3c0.h"
53 #include "clc597.h"
54 #include "clc5c0.h"
55 #include "clc797.h"
56 #include "clc997.h"
57 
58 static bool
nvk_use_nak(const struct nv_device_info * info)59 nvk_use_nak(const struct nv_device_info *info)
60 {
61    const VkShaderStageFlags vk10_stages =
62       VK_SHADER_STAGE_VERTEX_BIT |
63       VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
64       VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
65       VK_SHADER_STAGE_GEOMETRY_BIT |
66       VK_SHADER_STAGE_FRAGMENT_BIT |
67       VK_SHADER_STAGE_COMPUTE_BIT;
68 
69    return !(vk10_stages & ~nvk_nak_stages(info));
70 }
71 
72 static uint32_t
nvk_get_vk_version(const struct nv_device_info * info)73 nvk_get_vk_version(const struct nv_device_info *info)
74 {
75    /* Version override takes priority */
76    const uint32_t version_override = vk_get_version_override();
77    if (version_override)
78       return version_override;
79 
80    /* If we're using codegen for anything, lock to version 1.0 */
81    if (!nvk_use_nak(info))
82       return VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
83 
84 #if defined(ANDROID_STRICT) && ANDROID_API_LEVEL <= 32
85    return VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION);
86 #endif
87 
88    /* Vulkan 1.4 requires hostImageCopy which is currently only supported on
89     * Turing+.
90     */
91    if (info->cls_eng3d < TURING_A)
92       return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
93 
94    return VK_MAKE_VERSION(1, 4, VK_HEADER_VERSION);
95 }
96 
97 static void
nvk_get_device_extensions(const struct nvk_instance * instance,const struct nv_device_info * info,bool has_tiled_bos,struct vk_device_extension_table * ext)98 nvk_get_device_extensions(const struct nvk_instance *instance,
99                           const struct nv_device_info *info,
100                           bool has_tiled_bos,
101                           struct vk_device_extension_table *ext)
102 {
103    *ext = (struct vk_device_extension_table) {
104       .KHR_8bit_storage = true,
105       .KHR_16bit_storage = true,
106       .KHR_bind_memory2 = true,
107       .KHR_buffer_device_address = true,
108       .KHR_calibrated_timestamps = true,
109       .KHR_compute_shader_derivatives = nvk_use_nak(info) &&
110                                         info->cls_eng3d >= TURING_A,
111       .KHR_copy_commands2 = true,
112       .KHR_create_renderpass2 = true,
113       .KHR_dedicated_allocation = true,
114       .KHR_depth_stencil_resolve = true,
115       .KHR_descriptor_update_template = true,
116       .KHR_device_group = true,
117       .KHR_draw_indirect_count = info->cls_eng3d >= TURING_A,
118       .KHR_driver_properties = true,
119       .KHR_dynamic_rendering = true,
120       .KHR_dynamic_rendering_local_read = true,
121       .KHR_external_fence = true,
122       .KHR_external_fence_fd = true,
123       .KHR_external_memory = true,
124       .KHR_external_memory_fd = true,
125       .KHR_external_semaphore = true,
126       .KHR_external_semaphore_fd = true,
127       .KHR_format_feature_flags2 = true,
128       .KHR_fragment_shader_barycentric = info->cls_eng3d >= TURING_A &&
129          (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
130       .KHR_fragment_shading_rate = info->cls_eng3d >= TURING_A,
131       .KHR_get_memory_requirements2 = true,
132       .KHR_global_priority = true,
133       .KHR_image_format_list = true,
134       .KHR_imageless_framebuffer = true,
135 #ifdef NVK_USE_WSI_PLATFORM
136       .KHR_incremental_present = true,
137 #endif
138       .KHR_index_type_uint8 = true,
139       .KHR_line_rasterization = true,
140       .KHR_load_store_op_none = true,
141       .KHR_maintenance1 = true,
142       .KHR_maintenance2 = true,
143       .KHR_maintenance3 = true,
144       .KHR_maintenance4 = true,
145       .KHR_maintenance5 = true,
146       .KHR_maintenance6 = true,
147       .KHR_maintenance7 = true,
148       .KHR_map_memory2 = true,
149       .KHR_multiview = true,
150       .KHR_pipeline_executable_properties = true,
151       .KHR_pipeline_library = true,
152 #ifdef NVK_USE_WSI_PLATFORM
153       /* Hide these behind dri configs for now since we cannot implement it
154        * reliably on all surfaces yet. There is no surface capability query
155        * for present wait/id, but the feature is useful enough to hide behind
156        * an opt-in mechanism for now.  If the instance only enables surface
157        * extensions that unconditionally support present wait, we can also
158        * expose the extension that way.
159        */
160       .KHR_present_id = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
161                         wsi_common_vk_instance_supports_present_wait(&instance->vk),
162       .KHR_present_wait = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
163                           wsi_common_vk_instance_supports_present_wait(&instance->vk),
164 #endif
165       .KHR_push_descriptor = true,
166       .KHR_relaxed_block_layout = true,
167       .KHR_sampler_mirror_clamp_to_edge = true,
168       .KHR_sampler_ycbcr_conversion = true,
169       .KHR_separate_depth_stencil_layouts = true,
170       .KHR_shader_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
171                                  nvk_use_nak(info),
172       .KHR_shader_clock = true,
173       .KHR_shader_draw_parameters = true,
174       .KHR_shader_expect_assume = true,
175       .KHR_shader_float_controls = true,
176       .KHR_shader_float_controls2 = true,
177       .KHR_shader_float16_int8 = true,
178       .KHR_shader_integer_dot_product = true,
179       .KHR_shader_maximal_reconvergence = true,
180       .KHR_shader_non_semantic_info = true,
181       .KHR_shader_quad_control = true,
182       .KHR_shader_relaxed_extended_instruction = true,
183       .KHR_shader_subgroup_extended_types = true,
184       .KHR_shader_subgroup_rotate = nvk_use_nak(info),
185       .KHR_shader_subgroup_uniform_control_flow = nvk_use_nak(info),
186       .KHR_shader_terminate_invocation =
187          (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
188       .KHR_spirv_1_4 = true,
189       .KHR_storage_buffer_storage_class = true,
190       .KHR_timeline_semaphore = true,
191 #ifdef NVK_USE_WSI_PLATFORM
192       .KHR_swapchain = true,
193       .KHR_swapchain_mutable_format = true,
194 #endif
195       .KHR_synchronization2 = true,
196       .KHR_uniform_buffer_standard_layout = true,
197       .KHR_variable_pointers = true,
198       .KHR_vertex_attribute_divisor = true,
199       .KHR_vulkan_memory_model = nvk_use_nak(info),
200       .KHR_workgroup_memory_explicit_layout = true,
201       .KHR_zero_initialize_workgroup_memory = true,
202       .EXT_4444_formats = true,
203       .EXT_attachment_feedback_loop_layout = true,
204       .EXT_border_color_swizzle = true,
205       .EXT_buffer_device_address = true,
206       .EXT_calibrated_timestamps = true,
207       .EXT_conditional_rendering = true,
208       .EXT_conservative_rasterization = info->cls_eng3d >= MAXWELL_B,
209       .EXT_color_write_enable = true,
210       .EXT_custom_border_color = true,
211       .EXT_depth_bias_control = true,
212       .EXT_depth_clamp_control = true,
213       .EXT_depth_clamp_zero_one = true,
214       .EXT_depth_clip_control = true,
215       .EXT_depth_clip_enable = true,
216       .EXT_depth_range_unrestricted = info->cls_eng3d >= VOLTA_A,
217       .EXT_descriptor_buffer = true,
218       .EXT_descriptor_indexing = true,
219       .EXT_device_generated_commands = true,
220 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
221       .EXT_display_control = true,
222 #endif
223       .EXT_image_drm_format_modifier = has_tiled_bos,
224       .EXT_dynamic_rendering_unused_attachments = true,
225       .EXT_extended_dynamic_state = true,
226       .EXT_extended_dynamic_state2 = true,
227       .EXT_extended_dynamic_state3 = true,
228       .EXT_external_memory_dma_buf = true,
229       .EXT_global_priority = true,
230       .EXT_global_priority_query = true,
231       .EXT_graphics_pipeline_library = true,
232       .EXT_host_query_reset = true,
233       .EXT_host_image_copy = info->cls_eng3d >= TURING_A,
234       .EXT_image_2d_view_of_3d = true,
235       .EXT_image_robustness = true,
236       .EXT_image_sliced_view_of_3d = true,
237       .EXT_image_view_min_lod = true,
238       .EXT_index_type_uint8 = true,
239       .EXT_inline_uniform_block = true,
240       .EXT_legacy_vertex_attributes = true,
241       .EXT_line_rasterization = true,
242       .EXT_load_store_op_none = true,
243       .EXT_map_memory_placed = true,
244       .EXT_memory_budget = true,
245       .EXT_multi_draw = true,
246       .EXT_mutable_descriptor_type = true,
247       .EXT_nested_command_buffer = true,
248       .EXT_non_seamless_cube_map = true,
249       .EXT_pci_bus_info = info->type == NV_DEVICE_TYPE_DIS,
250       .EXT_pipeline_creation_cache_control = true,
251       .EXT_pipeline_creation_feedback = true,
252       .EXT_pipeline_robustness = true,
253       .EXT_physical_device_drm = true,
254       .EXT_post_depth_coverage = true,
255       .EXT_primitive_topology_list_restart = true,
256       .EXT_private_data = true,
257       .EXT_primitives_generated_query = true,
258       .EXT_provoking_vertex = true,
259       .EXT_queue_family_foreign = true,
260       .EXT_robustness2 = true,
261       .EXT_sample_locations = info->cls_eng3d >= MAXWELL_B,
262       .EXT_sampler_filter_minmax = info->cls_eng3d >= MAXWELL_B,
263       .EXT_scalar_block_layout = nvk_use_nak(info),
264       .EXT_separate_stencil_usage = true,
265       .EXT_shader_image_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
266                                        nvk_use_nak(info),
267       .EXT_shader_demote_to_helper_invocation = true,
268       .EXT_shader_module_identifier = true,
269       .EXT_shader_object = true,
270       .EXT_shader_replicated_composites = true,
271       .EXT_shader_subgroup_ballot = true,
272       .EXT_shader_subgroup_vote = true,
273       .EXT_shader_viewport_index_layer = info->cls_eng3d >= MAXWELL_B,
274       .EXT_subgroup_size_control = true,
275 #ifdef NVK_USE_WSI_PLATFORM
276       .EXT_swapchain_maintenance1 = true,
277 #endif
278       .EXT_texel_buffer_alignment = true,
279       .EXT_tooling_info = true,
280       .EXT_transform_feedback = true,
281       .EXT_vertex_attribute_divisor = true,
282       .EXT_vertex_input_dynamic_state = true,
283       .EXT_ycbcr_2plane_444_formats = true,
284       .EXT_ycbcr_image_arrays = true,
285 #if DETECT_OS_ANDROID
286       .ANDROID_native_buffer = vk_android_get_ugralloc() != NULL,
287 #endif
288       .GOOGLE_decorate_string = true,
289       .GOOGLE_hlsl_functionality1 = true,
290       .GOOGLE_user_type = true,
291       .NV_compute_shader_derivatives = nvk_use_nak(info) &&
292                                        info->cls_eng3d >= TURING_A,
293       .NV_shader_sm_builtins = true,
294       .VALVE_mutable_descriptor_type = true,
295    };
296 }
297 
298 static void
nvk_get_device_features(const struct nv_device_info * info,const struct vk_device_extension_table * supported_extensions,struct vk_features * features)299 nvk_get_device_features(const struct nv_device_info *info,
300                         const struct vk_device_extension_table *supported_extensions,
301                         struct vk_features *features)
302 {
303    *features = (struct vk_features) {
304       /* Vulkan 1.0 */
305       .robustBufferAccess = true,
306       .fullDrawIndexUint32 = true,
307       .imageCubeArray = true,
308       .independentBlend = true,
309       .geometryShader = true,
310       .tessellationShader = true,
311       .sampleRateShading = true,
312       .dualSrcBlend = true,
313       .logicOp = true,
314       .multiDrawIndirect = true,
315       .drawIndirectFirstInstance = true,
316       .depthClamp = true,
317       .depthBiasClamp = true,
318       .fillModeNonSolid = true,
319       .depthBounds = true,
320       .wideLines = true,
321       .largePoints = true,
322       .alphaToOne = true,
323       .multiViewport = true,
324       .samplerAnisotropy = true,
325       .textureCompressionETC2 = false,
326       .textureCompressionBC = true,
327       .textureCompressionASTC_LDR = false,
328       .occlusionQueryPrecise = true,
329       .pipelineStatisticsQuery = true,
330       .vertexPipelineStoresAndAtomics = true,
331       .fragmentStoresAndAtomics = true,
332       .shaderTessellationAndGeometryPointSize = true,
333       .shaderImageGatherExtended = true,
334       .shaderStorageImageExtendedFormats = true,
335       .shaderStorageImageMultisample = true,
336       .shaderStorageImageReadWithoutFormat = info->cls_eng3d >= MAXWELL_A,
337       .shaderStorageImageWriteWithoutFormat = true,
338       .shaderUniformBufferArrayDynamicIndexing = true,
339       .shaderSampledImageArrayDynamicIndexing = true,
340       .shaderStorageBufferArrayDynamicIndexing = true,
341       .shaderStorageImageArrayDynamicIndexing = true,
342       .shaderClipDistance = true,
343       .shaderCullDistance = true,
344       .shaderFloat64 = true,
345       .shaderInt64 = true,
346       .shaderInt16 = true,
347       .shaderResourceResidency = info->cls_eng3d >= VOLTA_A,
348       .shaderResourceMinLod = info->cls_eng3d >= VOLTA_A,
349       .sparseBinding = true,
350       .sparseResidency2Samples = info->cls_eng3d >= MAXWELL_B,
351       .sparseResidency4Samples = info->cls_eng3d >= MAXWELL_B,
352       .sparseResidency8Samples = info->cls_eng3d >= MAXWELL_B,
353       .sparseResidencyAliased = info->cls_eng3d >= MAXWELL_B,
354       .sparseResidencyBuffer = info->cls_eng3d >= MAXWELL_B,
355       .sparseResidencyImage2D = info->cls_eng3d >= MAXWELL_B,
356       .sparseResidencyImage3D = info->cls_eng3d >= MAXWELL_B,
357       .variableMultisampleRate = true,
358       .inheritedQueries = true,
359 
360       /* Vulkan 1.1 */
361       .storageBuffer16BitAccess = true,
362       .uniformAndStorageBuffer16BitAccess = true,
363       .storagePushConstant16 = true,
364       .multiview = true,
365       .multiviewGeometryShader = true,
366       .multiviewTessellationShader = true,
367       .variablePointersStorageBuffer = true,
368       .variablePointers = true,
369       .shaderDrawParameters = true,
370       .samplerYcbcrConversion = true,
371 
372       /* Vulkan 1.2 */
373       .samplerMirrorClampToEdge = true,
374       .drawIndirectCount = info->cls_eng3d >= TURING_A,
375       .storageBuffer8BitAccess = true,
376       .uniformAndStorageBuffer8BitAccess = true,
377       .storagePushConstant8 = true,
378       .shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
379                                   nvk_use_nak(info),
380       .shaderSharedInt64Atomics = false, /* TODO */
381       /* TODO: Fp16 is currently busted on Turing and Volta due to instruction
382        * scheduling issues.  Re-enable it once those are sorted.
383        */
384       .shaderFloat16 = info->sm >= 80 && nvk_use_nak(info),
385       .shaderInt8 = true,
386       .descriptorIndexing = true,
387       .shaderInputAttachmentArrayDynamicIndexing = true,
388       .shaderUniformTexelBufferArrayDynamicIndexing = true,
389       .shaderStorageTexelBufferArrayDynamicIndexing = true,
390       .shaderUniformBufferArrayNonUniformIndexing = true,
391       .shaderSampledImageArrayNonUniformIndexing = true,
392       .shaderStorageBufferArrayNonUniformIndexing = true,
393       .shaderStorageImageArrayNonUniformIndexing = true,
394       .shaderInputAttachmentArrayNonUniformIndexing = true,
395       .shaderUniformTexelBufferArrayNonUniformIndexing = true,
396       .shaderStorageTexelBufferArrayNonUniformIndexing = true,
397       .descriptorBindingUniformBufferUpdateAfterBind = true,
398       .descriptorBindingSampledImageUpdateAfterBind = true,
399       .descriptorBindingStorageImageUpdateAfterBind = true,
400       .descriptorBindingStorageBufferUpdateAfterBind = true,
401       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
402       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
403       .descriptorBindingUpdateUnusedWhilePending = true,
404       .descriptorBindingPartiallyBound = true,
405       .descriptorBindingVariableDescriptorCount = true,
406       .runtimeDescriptorArray = true,
407       .samplerFilterMinmax = info->cls_eng3d >= MAXWELL_B,
408       .scalarBlockLayout = nvk_use_nak(info),
409       .imagelessFramebuffer = true,
410       .uniformBufferStandardLayout = true,
411       .shaderSubgroupExtendedTypes = true,
412       .separateDepthStencilLayouts = true,
413       .hostQueryReset = true,
414       .timelineSemaphore = true,
415       .bufferDeviceAddress = true,
416       .bufferDeviceAddressCaptureReplay = true,
417       .bufferDeviceAddressMultiDevice = false,
418       .vulkanMemoryModel = nvk_use_nak(info),
419       .vulkanMemoryModelDeviceScope = nvk_use_nak(info),
420       .vulkanMemoryModelAvailabilityVisibilityChains = nvk_use_nak(info),
421       .shaderOutputViewportIndex = info->cls_eng3d >= MAXWELL_B,
422       .shaderOutputLayer = info->cls_eng3d >= MAXWELL_B,
423       .subgroupBroadcastDynamicId = nvk_use_nak(info),
424 
425       /* Vulkan 1.3 */
426       .robustImageAccess = true,
427       .inlineUniformBlock = true,
428       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
429       .pipelineCreationCacheControl = true,
430       .privateData = true,
431       .shaderDemoteToHelperInvocation = true,
432       .shaderTerminateInvocation = true,
433       .subgroupSizeControl = true,
434       .computeFullSubgroups = true,
435       .synchronization2 = true,
436       .shaderZeroInitializeWorkgroupMemory = true,
437       .dynamicRendering = true,
438       .shaderIntegerDotProduct = true,
439       .maintenance4 = true,
440 
441       /* Vulkan 1.4 */
442       .globalPriorityQuery = true,
443       .shaderSubgroupRotate = nvk_use_nak(info),
444       .shaderSubgroupRotateClustered = nvk_use_nak(info),
445       .shaderFloatControls2 = true,
446       .shaderExpectAssume = true,
447       .rectangularLines = true,
448       .bresenhamLines = true,
449       .smoothLines = true,
450       .stippledRectangularLines = true,
451       .stippledBresenhamLines = true,
452       .stippledSmoothLines = true,
453       .vertexAttributeInstanceRateDivisor = true,
454       .vertexAttributeInstanceRateZeroDivisor = true,
455       .indexTypeUint8 = true,
456       .dynamicRenderingLocalRead = true,
457       .maintenance5 = true,
458       .maintenance6 = true,
459       .pipelineRobustness = true,
460       .hostImageCopy = info->cls_eng3d >= TURING_A,
461       .pushDescriptor = true,
462 
463       /* VK_KHR_compute_shader_derivatives */
464       .computeDerivativeGroupQuads = info->cls_eng3d >= TURING_A,
465       .computeDerivativeGroupLinear = info->cls_eng3d >= TURING_A,
466 
467       /* VK_KHR_fragment_shader_barycentric */
468       .fragmentShaderBarycentric = info->cls_eng3d >= TURING_A &&
469          (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
470 
471       /* VK_KHR_fragment_shading_rate */
472       .pipelineFragmentShadingRate = info->cls_eng3d >= TURING_A,
473       .primitiveFragmentShadingRate = info->cls_eng3d >= TURING_A,
474       .attachmentFragmentShadingRate = info->cls_eng3d >= TURING_A,
475 
476       /* VK_KHR_maintenance7 */
477       .maintenance7 = true,
478 
479       /* VK_KHR_pipeline_executable_properties */
480       .pipelineExecutableInfo = true,
481 
482       /* VK_KHR_present_id */
483       .presentId = supported_extensions->KHR_present_id,
484 
485       /* VK_KHR_present_wait */
486       .presentWait = supported_extensions->KHR_present_wait,
487 
488       /* VK_KHR_shader_quad_control */
489       .shaderQuadControl = nvk_use_nak(info),
490 
491       /* VK_KHR_shader_relaxed_extended_instruction */
492       .shaderRelaxedExtendedInstruction = true,
493 
494       /* VK_KHR_shader_clock */
495       .shaderSubgroupClock = true,
496       .shaderDeviceClock = true,
497 
498       /* VK_KHR_shader_maximal_reconvergence */
499       .shaderMaximalReconvergence = true,
500 
501       /* VK_KHR_shader_subgroup_uniform_control_flow */
502       .shaderSubgroupUniformControlFlow = nvk_use_nak(info),
503 
504       /* VK_KHR_workgroup_memory_explicit_layout */
505       .workgroupMemoryExplicitLayout = true,
506       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
507       .workgroupMemoryExplicitLayout8BitAccess = nvk_use_nak(info),
508       .workgroupMemoryExplicitLayout16BitAccess = nvk_use_nak(info),
509 
510       /* VK_EXT_4444_formats */
511       .formatA4R4G4B4 = true,
512       .formatA4B4G4R4 = true,
513 
514       /* VK_EXT_attachment_feedback_loop_layout */
515       .attachmentFeedbackLoopLayout = true,
516 
517       /* VK_EXT_border_color_swizzle */
518       .borderColorSwizzle = true,
519       .borderColorSwizzleFromImage = false,
520 
521       /* VK_EXT_buffer_device_address */
522       .bufferDeviceAddressCaptureReplayEXT = true,
523 
524       /* VK_EXT_color_write_enable */
525       .colorWriteEnable = true,
526 
527       /* VK_EXT_conditional_rendering */
528       .conditionalRendering = true,
529       .inheritedConditionalRendering = true,
530 
531       /* VK_EXT_custom_border_color */
532       .customBorderColors = true,
533       .customBorderColorWithoutFormat = true,
534 
535       /* VK_EXT_depth_bias_control */
536       .depthBiasControl = true,
537       .leastRepresentableValueForceUnormRepresentation = true,
538       .floatRepresentation = false,
539       .depthBiasExact = true,
540 
541       /* VK_EXT_depth_clamp_control */
542       .depthClampControl = true,
543 
544       /* VK_EXT_depth_clamp_zero_one */
545       .depthClampZeroOne = true,
546 
547       /* VK_EXT_depth_clip_control */
548       .depthClipControl = true,
549 
550       /* VK_EXT_depth_clip_enable */
551       .depthClipEnable = true,
552 
553       /* VK_EXT_descriptor_buffer */
554       .descriptorBuffer = true,
555       .descriptorBufferCaptureReplay = true,
556       .descriptorBufferImageLayoutIgnored = true,
557       .descriptorBufferPushDescriptors = true,
558 
559       /* VK_EXT_device_generated_commands */
560       .deviceGeneratedCommands = true,
561       .dynamicGeneratedPipelineLayout = true,
562 
563       /* VK_EXT_dynamic_rendering_unused_attachments */
564       .dynamicRenderingUnusedAttachments = true,
565 
566       /* VK_EXT_extended_dynamic_state */
567       .extendedDynamicState = true,
568 
569       /* VK_EXT_extended_dynamic_state2 */
570       .extendedDynamicState2 = true,
571       .extendedDynamicState2LogicOp = true,
572       .extendedDynamicState2PatchControlPoints = true,
573 
574       /* VK_EXT_extended_dynamic_state3 */
575       .extendedDynamicState3TessellationDomainOrigin = true,
576       .extendedDynamicState3DepthClampEnable = true,
577       .extendedDynamicState3PolygonMode = true,
578       .extendedDynamicState3RasterizationSamples = true,
579       .extendedDynamicState3SampleMask = true,
580       .extendedDynamicState3AlphaToCoverageEnable = true,
581       .extendedDynamicState3AlphaToOneEnable = true,
582       .extendedDynamicState3LogicOpEnable = true,
583       .extendedDynamicState3ColorBlendEnable = true,
584       .extendedDynamicState3ColorBlendEquation = true,
585       .extendedDynamicState3ColorWriteMask = true,
586       .extendedDynamicState3RasterizationStream = true,
587       .extendedDynamicState3ConservativeRasterizationMode = false,
588       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
589       .extendedDynamicState3DepthClipEnable = true,
590       .extendedDynamicState3SampleLocationsEnable = info->cls_eng3d >= MAXWELL_B,
591       .extendedDynamicState3ColorBlendAdvanced = false,
592       .extendedDynamicState3ProvokingVertexMode = true,
593       .extendedDynamicState3LineRasterizationMode = true,
594       .extendedDynamicState3LineStippleEnable = true,
595       .extendedDynamicState3DepthClipNegativeOneToOne = true,
596       .extendedDynamicState3ViewportWScalingEnable = false,
597       .extendedDynamicState3ViewportSwizzle = false,
598       .extendedDynamicState3CoverageToColorEnable = false,
599       .extendedDynamicState3CoverageToColorLocation = false,
600       .extendedDynamicState3CoverageModulationMode = false,
601       .extendedDynamicState3CoverageModulationTableEnable = false,
602       .extendedDynamicState3CoverageModulationTable = false,
603       .extendedDynamicState3CoverageReductionMode = false,
604       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
605       .extendedDynamicState3ShadingRateImageEnable = false,
606 
607       /* VK_EXT_graphics_pipeline_library */
608       .graphicsPipelineLibrary = true,
609 
610       /* VK_EXT_image_2d_view_of_3d */
611       .image2DViewOf3D = true,
612       .sampler2DViewOf3D = true,
613 
614       /* VK_EXT_image_sliced_view_of_3d */
615       .imageSlicedViewOf3D = true,
616 
617 #ifdef NVK_USE_WSI_PLATFORM
618       /* VK_EXT_swapchain_maintenance1 */
619       .swapchainMaintenance1 = true,
620 #endif
621 
622       /* VK_EXT_image_view_min_lod */
623       .minLod = true,
624 
625       /* VK_EXT_legacy_vertex_attributes */
626       .legacyVertexAttributes = true,
627 
628       /* VK_EXT_map_memory_placed */
629       .memoryMapPlaced = true,
630       .memoryMapRangePlaced = false,
631       .memoryUnmapReserve = true,
632 
633       /* VK_EXT_multi_draw */
634       .multiDraw = true,
635 
636       /* VK_EXT_mutable_descriptor_type */
637       .mutableDescriptorType = true,
638 
639       /* VK_EXT_nested_command_buffer */
640       .nestedCommandBuffer = true,
641       .nestedCommandBufferRendering = true,
642       .nestedCommandBufferSimultaneousUse = true,
643 
644       /* VK_EXT_non_seamless_cube_map */
645       .nonSeamlessCubeMap = true,
646 
647       /* VK_EXT_primitive_topology_list_restart */
648       .primitiveTopologyListRestart = true,
649       .primitiveTopologyPatchListRestart = true,
650 
651       /* VK_EXT_primitives_generated_query */
652       .primitivesGeneratedQuery = true,
653       .primitivesGeneratedQueryWithNonZeroStreams = true,
654       .primitivesGeneratedQueryWithRasterizerDiscard = true,
655 
656       /* VK_EXT_provoking_vertex */
657       .provokingVertexLast = true,
658       .transformFeedbackPreservesProvokingVertex = true,
659 
660       /* VK_EXT_robustness2 */
661       .robustBufferAccess2 = true,
662       .robustImageAccess2 = true,
663       .nullDescriptor = true,
664 
665       /* VK_EXT_shader_image_atomic_int64 */
666       .shaderImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
667                                  nvk_use_nak(info),
668       .sparseImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
669                                  nvk_use_nak(info),
670 
671       /* VK_EXT_shader_module_identifier */
672       .shaderModuleIdentifier = true,
673 
674       /* VK_EXT_shader_object */
675       .shaderObject = true,
676 
677       /* VK_EXT_shader_replicated_composites */
678       .shaderReplicatedComposites = true,
679 
680       /* VK_EXT_texel_buffer_alignment */
681       .texelBufferAlignment = true,
682 
683       /* VK_EXT_transform_feedback */
684       .transformFeedback = true,
685       .geometryStreams = true,
686 
687       /* VK_EXT_vertex_input_dynamic_state */
688       .vertexInputDynamicState = true,
689 
690       /* VK_EXT_ycbcr_2plane_444_formats */
691       .ycbcr2plane444Formats = true,
692 
693       /* VK_EXT_ycbcr_image_arrays */
694       .ycbcrImageArrays = true,
695 
696       /* VK_NV_shader_sm_builtins */
697       .shaderSMBuiltins = true,
698    };
699 }
700 
701 static void
nvk_get_device_properties(const struct nvk_instance * instance,const struct nv_device_info * info,bool conformant,struct vk_properties * properties)702 nvk_get_device_properties(const struct nvk_instance *instance,
703                           const struct nv_device_info *info,
704                           bool conformant,
705                           struct vk_properties *properties)
706 {
707    const VkSampleCountFlagBits sample_counts = VK_SAMPLE_COUNT_1_BIT |
708                                                VK_SAMPLE_COUNT_2_BIT |
709                                                VK_SAMPLE_COUNT_4_BIT |
710                                                VK_SAMPLE_COUNT_8_BIT;
711 
712    assert(sample_counts <= (NVK_MAX_SAMPLES << 1) - 1);
713 
714    uint64_t os_page_size = 4096;
715    os_get_page_size(&os_page_size);
716 
717    *properties = (struct vk_properties) {
718       .apiVersion = nvk_get_vk_version(info),
719       .driverVersion = vk_get_driver_version(),
720       .vendorID = instance->force_vk_vendor != 0 ?
721                   instance->force_vk_vendor : NVIDIA_VENDOR_ID,
722       .deviceID = info->device_id,
723       .deviceType = info->type == NV_DEVICE_TYPE_DIS ?
724                     VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
725                     VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
726 
727       /* Vulkan 1.0 limits */
728       .maxImageDimension1D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_1D),
729       .maxImageDimension2D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_2D),
730       .maxImageDimension3D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_3D),
731       .maxImageDimensionCube = 0x8000,
732       .maxImageArrayLayers = 2048,
733       .maxTexelBufferElements = 128 * 1024 * 1024,
734       .maxUniformBufferRange = 65536,
735       .maxStorageBufferRange = UINT32_MAX,
736       .maxPushConstantsSize = NVK_MAX_PUSH_SIZE,
737       .maxMemoryAllocationCount = 4096,
738       .maxSamplerAllocationCount = 4000,
739       .bufferImageGranularity = info->cls_eng3d >= MAXWELL_B ? 0x400 : 0x10000,
740       .sparseAddressSpaceSize = NVK_SPARSE_ADDR_SPACE_SIZE,
741       .maxBoundDescriptorSets = NVK_MAX_SETS,
742       .maxPerStageDescriptorSamplers = NVK_MAX_DESCRIPTORS,
743       .maxPerStageDescriptorUniformBuffers = NVK_MAX_DESCRIPTORS,
744       .maxPerStageDescriptorStorageBuffers = NVK_MAX_DESCRIPTORS,
745       .maxPerStageDescriptorSampledImages = NVK_MAX_DESCRIPTORS,
746       .maxPerStageDescriptorStorageImages = NVK_MAX_DESCRIPTORS,
747       .maxPerStageDescriptorInputAttachments = NVK_MAX_DESCRIPTORS,
748       .maxPerStageResources = UINT32_MAX,
749       .maxDescriptorSetSamplers = NVK_MAX_DESCRIPTORS,
750       .maxDescriptorSetUniformBuffers = NVK_MAX_DESCRIPTORS,
751       .maxDescriptorSetUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
752       .maxDescriptorSetStorageBuffers = NVK_MAX_DESCRIPTORS,
753       .maxDescriptorSetStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
754       .maxDescriptorSetSampledImages = NVK_MAX_DESCRIPTORS,
755       .maxDescriptorSetStorageImages = NVK_MAX_DESCRIPTORS,
756       .maxDescriptorSetInputAttachments = NVK_MAX_DESCRIPTORS,
757       .maxVertexInputAttributes = 32,
758       .maxVertexInputBindings = 32,
759       .maxVertexInputAttributeOffset = 2047,
760       .maxVertexInputBindingStride = 2048,
761       .maxVertexOutputComponents = 128,
762       .maxTessellationGenerationLevel = 64,
763       .maxTessellationPatchSize = 32,
764       .maxTessellationControlPerVertexInputComponents = 128,
765       .maxTessellationControlPerVertexOutputComponents = 128,
766       .maxTessellationControlPerPatchOutputComponents = 120,
767       .maxTessellationControlTotalOutputComponents = 4216,
768       .maxTessellationEvaluationInputComponents = 128,
769       .maxTessellationEvaluationOutputComponents = 128,
770       .maxGeometryShaderInvocations = 32,
771       .maxGeometryInputComponents = 128,
772       .maxGeometryOutputComponents = 128,
773       .maxGeometryOutputVertices = 1024,
774       .maxGeometryTotalOutputComponents = 1024,
775       .maxFragmentInputComponents = 128,
776       .maxFragmentOutputAttachments = NVK_MAX_RTS,
777       .maxFragmentDualSrcAttachments = 1,
778       .maxFragmentCombinedOutputResources = 16,
779       .maxComputeSharedMemorySize = NVK_MAX_SHARED_SIZE,
780       .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
781       .maxComputeWorkGroupInvocations = 1024,
782       .maxComputeWorkGroupSize = {1024, 1024, 64},
783       .subPixelPrecisionBits = 8,
784       .subTexelPrecisionBits = 8,
785       .mipmapPrecisionBits = 8,
786       .maxDrawIndexedIndexValue = UINT32_MAX,
787       .maxDrawIndirectCount = UINT32_MAX,
788       .maxSamplerLodBias = 15,
789       .maxSamplerAnisotropy = 16,
790       .maxViewports = NVK_MAX_VIEWPORTS,
791       .maxViewportDimensions = { 32768, 32768 },
792       .viewportBoundsRange = { -65536, 65536 },
793       .viewportSubPixelBits = 8,
794       .minMemoryMapAlignment = os_page_size,
795       .minTexelBufferOffsetAlignment = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
796       .minUniformBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
797       .minStorageBufferOffsetAlignment = NVK_MIN_SSBO_ALIGNMENT,
798       .minTexelOffset = -8,
799       .maxTexelOffset = 7,
800       .minTexelGatherOffset = -32,
801       .maxTexelGatherOffset = 31,
802       .minInterpolationOffset = -0.5,
803       .maxInterpolationOffset = 0.4375,
804       .subPixelInterpolationOffsetBits = 4,
805       .maxFramebufferHeight = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
806       .maxFramebufferWidth = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
807       .maxFramebufferLayers = 2048,
808       .framebufferColorSampleCounts = sample_counts,
809       .framebufferDepthSampleCounts = sample_counts,
810       .framebufferNoAttachmentsSampleCounts = sample_counts,
811       .framebufferStencilSampleCounts = sample_counts,
812       .maxColorAttachments = NVK_MAX_RTS,
813       .sampledImageColorSampleCounts = sample_counts,
814       .sampledImageIntegerSampleCounts = sample_counts,
815       .sampledImageDepthSampleCounts = sample_counts,
816       .sampledImageStencilSampleCounts = sample_counts,
817       .storageImageSampleCounts = sample_counts,
818       .maxSampleMaskWords = 1,
819       .timestampComputeAndGraphics = true,
820       /* FIXME: Is timestamp period actually 1? */
821       .timestampPeriod = 1.0f,
822       .maxClipDistances = 8,
823       .maxCullDistances = 8,
824       .maxCombinedClipAndCullDistances = 8,
825       .discreteQueuePriorities = 2,
826       .pointSizeRange = { 1.0, 2047.94 },
827       .lineWidthRange = { 1, 64 },
828       .pointSizeGranularity = 0.0625,
829       .lineWidthGranularity = 0.0625,
830       .strictLines = true,
831       .standardSampleLocations = true,
832       .optimalBufferCopyOffsetAlignment = 1,
833       .optimalBufferCopyRowPitchAlignment = 1,
834       .nonCoherentAtomSize = 64,
835 
836       /* Vulkan 1.0 sparse properties */
837       .sparseResidencyNonResidentStrict = true,
838       .sparseResidencyAlignedMipSize = info->cls_eng3d < MAXWELL_B, /* DXVK/vkd3d-proton requires this to be advertised as VK_FALSE for FL12 */
839       .sparseResidencyStandard2DBlockShape = true,
840       .sparseResidencyStandard2DMultisampleBlockShape = true,
841       .sparseResidencyStandard3DBlockShape = true,
842 
843       /* Vulkan 1.1 properties */
844       .subgroupSize = 32,
845       .subgroupSupportedStages = nvk_nak_stages(info),
846       .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
847                                      VK_SUBGROUP_FEATURE_BALLOT_BIT |
848                                      VK_SUBGROUP_FEATURE_BASIC_BIT |
849                                      VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
850                                      VK_SUBGROUP_FEATURE_QUAD_BIT |
851                                      VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
852                                      VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR |
853                                      VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
854                                      VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
855                                      VK_SUBGROUP_FEATURE_VOTE_BIT,
856       .subgroupQuadOperationsInAllStages = false,
857       .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
858       .maxMultiviewViewCount = NVK_MAX_MULTIVIEW_VIEW_COUNT,
859       .maxMultiviewInstanceIndex = UINT32_MAX,
860       .maxPerSetDescriptors = UINT32_MAX,
861       .maxMemoryAllocationSize = (1u << 31),
862 
863       /* Vulkan 1.2 properties */
864       .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
865                                     VK_RESOLVE_MODE_AVERAGE_BIT |
866                                     VK_RESOLVE_MODE_MIN_BIT |
867                                     VK_RESOLVE_MODE_MAX_BIT,
868       .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
869                                       VK_RESOLVE_MODE_MIN_BIT |
870                                       VK_RESOLVE_MODE_MAX_BIT,
871       .independentResolveNone = true,
872       .independentResolve = true,
873       .driverID = VK_DRIVER_ID_MESA_NVK,
874       .conformanceVersion =
875          conformant ? (VkConformanceVersion) { 1, 4, 0, 0 }
876                     : (VkConformanceVersion) { 0, 0, 0, 0 },
877       .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
878       .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
879       .shaderSignedZeroInfNanPreserveFloat16 = true,
880       .shaderSignedZeroInfNanPreserveFloat32 = true,
881       .shaderSignedZeroInfNanPreserveFloat64 = true,
882       .shaderDenormPreserveFloat16 = true,
883       .shaderDenormPreserveFloat32 = true,
884       .shaderDenormPreserveFloat64 = true,
885       .shaderDenormFlushToZeroFloat16 = false,
886       .shaderDenormFlushToZeroFloat32 = true,
887       .shaderDenormFlushToZeroFloat64 = false,
888       .shaderRoundingModeRTEFloat16 = true,
889       .shaderRoundingModeRTEFloat32 = true,
890       .shaderRoundingModeRTEFloat64 = true,
891       .shaderRoundingModeRTZFloat16 = false,
892       .shaderRoundingModeRTZFloat32 = true,
893       .shaderRoundingModeRTZFloat64 = true,
894       .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
895       .shaderUniformBufferArrayNonUniformIndexingNative = false,
896       .shaderSampledImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
897       .shaderStorageBufferArrayNonUniformIndexingNative = true,
898       .shaderStorageImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
899       .shaderInputAttachmentArrayNonUniformIndexingNative = false,
900       .robustBufferAccessUpdateAfterBind = true,
901       .quadDivergentImplicitLod = info->cls_eng3d >= TURING_A,
902       .maxPerStageDescriptorUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
903       .maxPerStageDescriptorUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
904       .maxPerStageDescriptorUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
905       .maxPerStageDescriptorUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
906       .maxPerStageDescriptorUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
907       .maxPerStageDescriptorUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
908       .maxPerStageUpdateAfterBindResources = UINT32_MAX,
909       .maxDescriptorSetUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
910       .maxDescriptorSetUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
911       .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
912       .maxDescriptorSetUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
913       .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
914       .maxDescriptorSetUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
915       .maxDescriptorSetUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
916       .maxDescriptorSetUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
917       .filterMinmaxSingleComponentFormats = true,
918       .filterMinmaxImageComponentMapping = true,
919       .maxTimelineSemaphoreValueDifference = UINT64_MAX,
920       .framebufferIntegerColorSampleCounts = sample_counts,
921 
922       /* Vulkan 1.3 properties */
923       .minSubgroupSize = 32,
924       .maxSubgroupSize = 32,
925       .maxComputeWorkgroupSubgroups = 1024 / 32,
926       .requiredSubgroupSizeStages = 0,
927       .maxInlineUniformBlockSize = 1 << 16,
928       .maxPerStageDescriptorInlineUniformBlocks = 32,
929       .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
930       .maxDescriptorSetInlineUniformBlocks = 6 * 32,
931       .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
932       .maxInlineUniformTotalSize = 1 << 16,
933       .integerDotProduct4x8BitPackedUnsignedAccelerated
934          = info->cls_eng3d >= VOLTA_A,
935       .integerDotProduct4x8BitPackedSignedAccelerated
936          = info->cls_eng3d >= VOLTA_A,
937       .integerDotProduct4x8BitPackedMixedSignednessAccelerated
938          = info->cls_eng3d >= VOLTA_A,
939       .storageTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
940       .storageTexelBufferOffsetSingleTexelAlignment = true,
941       .uniformTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
942       .uniformTexelBufferOffsetSingleTexelAlignment = true,
943       .maxBufferSize = NVK_MAX_BUFFER_SIZE,
944 
945       /* Vulkan 1.4 properties */
946       .lineSubPixelPrecisionBits = 8,
947       .maxVertexAttribDivisor = UINT32_MAX,
948       .supportsNonZeroFirstInstance = true,
949       .maxPushDescriptors = NVK_MAX_PUSH_DESCRIPTORS,
950       .dynamicRenderingLocalReadDepthStencilAttachments = true,
951       .dynamicRenderingLocalReadMultisampledAttachments = true,
952       .earlyFragmentMultisampleCoverageAfterSampleCounting = true,
953       .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
954       .depthStencilSwizzleOneSupport = true,
955       .polygonModePointSize = true,
956       .nonStrictSinglePixelWideLinesUseParallelogram = false,
957       .nonStrictWideLinesUseParallelogram = false,
958       .blockTexelViewCompatibleMultipleLayers = true,
959       .maxCombinedImageSamplerDescriptorCount = 3,
960       .fragmentShadingRateClampCombinerInputs = false, /* TODO */
961       .defaultRobustnessStorageBuffers =
962          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
963       .defaultRobustnessUniformBuffers =
964          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
965       .defaultRobustnessVertexInputs =
966          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT,
967       .defaultRobustnessImages =
968          VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
969 
970       /* VK_KHR_compute_shader_derivatives */
971       .meshAndTaskShaderDerivatives = false,
972 
973       /* VK_EXT_conservative_rasterization */
974       .primitiveOverestimationSize = info->cls_eng3d >= VOLTA_A ? 1.0f / 512.0f : 0.0,
975       .maxExtraPrimitiveOverestimationSize = 0.75,
976       .extraPrimitiveOverestimationSizeGranularity = 0.25,
977       .primitiveUnderestimation = info->cls_eng3d >= VOLTA_A,
978       .conservativePointAndLineRasterization = true,
979       .degenerateLinesRasterized = info->cls_eng3d >= VOLTA_A,
980       .degenerateTrianglesRasterized = info->cls_eng3d >= PASCAL_A,
981       .fullyCoveredFragmentShaderInputVariable = false,
982       .conservativeRasterizationPostDepthCoverage = true,
983 
984       /* VK_EXT_custom_border_color */
985       .maxCustomBorderColorSamplers = 4000,
986 
987       /* VK_EXT_descriptor_buffer */
988       .combinedImageSamplerDescriptorSingleArray = true,
989       .bufferlessPushDescriptors = true,
990       .allowSamplerImageViewPostSubmitCreation = false,
991       .descriptorBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
992       .maxDescriptorBufferBindings = 32,
993       .maxResourceDescriptorBufferBindings = 32,
994       .maxSamplerDescriptorBufferBindings = 32,
995       .maxEmbeddedImmutableSamplerBindings = 32,
996       .maxEmbeddedImmutableSamplers = 4000,
997       .bufferCaptureReplayDescriptorDataSize = 0,
998       .imageCaptureReplayDescriptorDataSize = 0,
999       .imageViewCaptureReplayDescriptorDataSize =
1000          sizeof(struct nvk_image_view_capture),
1001       .samplerCaptureReplayDescriptorDataSize =
1002          sizeof(struct nvk_sampler_capture),
1003       .accelerationStructureCaptureReplayDescriptorDataSize = 0, // todo
1004       .samplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1005       .combinedImageSamplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1006       .sampledImageDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1007       .storageImageDescriptorSize = sizeof(struct nvk_storage_image_descriptor),
1008       .uniformTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1009       .robustUniformTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1010       .storageTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1011       .robustStorageTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
1012       .uniformBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1013       .robustUniformBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1014       .storageBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1015       .robustStorageBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
1016       .inputAttachmentDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
1017       .accelerationStructureDescriptorSize = 0,
1018       .maxSamplerDescriptorBufferRange = UINT32_MAX,
1019       .maxResourceDescriptorBufferRange = UINT32_MAX,
1020       .samplerDescriptorBufferAddressSpaceSize = UINT32_MAX,
1021       .resourceDescriptorBufferAddressSpaceSize = UINT32_MAX,
1022       .descriptorBufferAddressSpaceSize = UINT32_MAX,
1023 
1024       /* VK_EXT_device_generated_commands */
1025       .maxIndirectPipelineCount = UINT32_MAX,
1026       .maxIndirectShaderObjectCount = UINT32_MAX,
1027       .maxIndirectSequenceCount = 1 << 20,
1028       .maxIndirectCommandsTokenCount = 16,
1029       .maxIndirectCommandsTokenOffset = 2047,
1030       .maxIndirectCommandsIndirectStride = 1 << 12,
1031       .supportedIndirectCommandsInputModes =
1032          VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT |
1033          VK_INDIRECT_COMMANDS_INPUT_MODE_DXGI_INDEX_BUFFER_EXT,
1034       .supportedIndirectCommandsShaderStages =
1035          NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1036       .supportedIndirectCommandsShaderStagesPipelineBinding =
1037          NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1038       .supportedIndirectCommandsShaderStagesShaderBinding =
1039          NVK_SHADER_STAGE_GRAPHICS_BITS | VK_SHADER_STAGE_COMPUTE_BIT,
1040       .deviceGeneratedCommandsTransformFeedback = true,
1041       .deviceGeneratedCommandsMultiDrawIndirectCount = info->cls_eng3d >= TURING_A,
1042 
1043       /* VK_EXT_extended_dynamic_state3 */
1044       .dynamicPrimitiveTopologyUnrestricted = true,
1045 
1046       /* VK_EXT_graphics_pipeline_library */
1047       .graphicsPipelineLibraryFastLinking = true,
1048       .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
1049 
1050       /* VK_KHR_maintenance7 */
1051       .robustFragmentShadingRateAttachmentAccess = false,
1052       .separateDepthStencilAttachmentAccess = false,
1053       .maxDescriptorSetTotalUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1054       .maxDescriptorSetTotalStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1055       .maxDescriptorSetTotalBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS,
1056       .maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1057       .maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
1058       .maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS,
1059 
1060       /* VK_EXT_legacy_vertex_attributes */
1061       .nativeUnalignedPerformance = true,
1062 
1063       /* VK_EXT_map_memory_placed */
1064       .minPlacedMemoryMapAlignment = os_page_size,
1065 
1066       /* VK_EXT_multi_draw */
1067       .maxMultiDrawCount = UINT32_MAX,
1068 
1069       /* VK_EXT_nested_command_buffer */
1070       .maxCommandBufferNestingLevel = UINT32_MAX,
1071 
1072       /* VK_EXT_pci_bus_info */
1073       .pciDomain   = info->pci.domain,
1074       .pciBus      = info->pci.bus,
1075       .pciDevice   = info->pci.dev,
1076       .pciFunction = info->pci.func,
1077 
1078       /* VK_EXT_physical_device_drm gets populated later */
1079 
1080       /* VK_EXT_provoking_vertex */
1081       .provokingVertexModePerPipeline = true,
1082       .transformFeedbackPreservesTriangleFanProvokingVertex = true,
1083 
1084       /* VK_EXT_robustness2 */
1085       .robustStorageBufferAccessSizeAlignment = NVK_SSBO_BOUNDS_CHECK_ALIGNMENT,
1086       .robustUniformBufferAccessSizeAlignment = nvk_min_cbuf_alignment(info),
1087 
1088       /* VK_EXT_sample_locations */
1089       .sampleLocationSampleCounts = sample_counts,
1090       .maxSampleLocationGridSize = (VkExtent2D){ 1, 1 },
1091       .sampleLocationCoordinateRange[0] = 0.0f,
1092       .sampleLocationCoordinateRange[1] = 0.9375f,
1093       .sampleLocationSubPixelBits = 4,
1094       .variableSampleLocations = true,
1095 
1096       /* VK_EXT_shader_object */
1097       .shaderBinaryVersion = 0,
1098 
1099       /* VK_EXT_transform_feedback */
1100       .maxTransformFeedbackStreams = 4,
1101       .maxTransformFeedbackBuffers = 4,
1102       .maxTransformFeedbackBufferSize = UINT32_MAX,
1103       .maxTransformFeedbackStreamDataSize = 2048,
1104       .maxTransformFeedbackBufferDataSize = 512,
1105       .maxTransformFeedbackBufferDataStride = 2048,
1106       .transformFeedbackQueries = true,
1107       .transformFeedbackStreamsLinesTriangles = false,
1108       .transformFeedbackRasterizationStreamSelect = true,
1109       .transformFeedbackDraw = true,
1110 
1111       /* VK_KHR_fragment_shader_barycentric */
1112       .triStripVertexOrderIndependentOfProvokingVertex = false,
1113 
1114       /* VK_KHR_fragment_shading_rate */
1115       .minFragmentShadingRateAttachmentTexelSize = { 16, 16 },
1116       .maxFragmentShadingRateAttachmentTexelSize = { 16, 16 },
1117       .maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1,
1118       .primitiveFragmentShadingRateWithMultipleViewports = info->cls_eng3d >= AMPERE_B,
1119       .layeredShadingRateAttachments = true,
1120       .fragmentShadingRateNonTrivialCombinerOps = true,
1121       .maxFragmentSize = { 4, 4 },
1122       .maxFragmentSizeAspectRatio = 2,
1123       .maxFragmentShadingRateCoverageSamples = 16,
1124       .maxFragmentShadingRateRasterizationSamples = 16,
1125       .fragmentShadingRateWithShaderDepthStencilWrites = true,
1126       .fragmentShadingRateWithSampleMask = true,
1127       .fragmentShadingRateWithShaderSampleMask = true,
1128       .fragmentShadingRateWithConservativeRasterization = true,
1129       //.fragmentShadingRateWithFragmentShaderInterlock = true,
1130       .fragmentShadingRateWithCustomSampleLocations = true,
1131       .fragmentShadingRateStrictMultiplyCombiner = true,
1132 
1133       /* VK_NV_shader_sm_builtins */
1134       .shaderSMCount = (uint32_t)info->tpc_count * info->mp_per_tpc,
1135       .shaderWarpsPerSM = info->max_warps_per_mp,
1136    };
1137 
1138    /* Add the driver to the device name (like other Mesa drivers do) */
1139    if (!strcmp(info->device_name, info->chipset_name)) {
1140       snprintf(properties->deviceName, sizeof(properties->deviceName),
1141                "NVK %s", info->device_name);
1142    } else {
1143       snprintf(properties->deviceName, sizeof(properties->deviceName),
1144                "%s (NVK %s)", info->device_name, info->chipset_name);
1145    }
1146 
1147    /* VK_EXT_host_image_copy */
1148 
1149    /* Not sure if there are layout specific things, so for now just reporting
1150     * all layouts from extensions.
1151     */
1152    static const VkImageLayout supported_layouts[] = {
1153       VK_IMAGE_LAYOUT_GENERAL, /* this one is required by spec */
1154       VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1155       VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1156       VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1157       VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1158       VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1159       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1160       VK_IMAGE_LAYOUT_PREINITIALIZED,
1161       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1162       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1163       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1164       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1165       VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1166       VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1167       VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1168       VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1169       VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
1170       VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1171    };
1172 
1173    properties->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
1174    properties->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1175    properties->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
1176    properties->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1177 
1178    STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
1179    memcpy(properties->optimalTilingLayoutUUID,
1180           instance->driver_build_sha, VK_UUID_SIZE);
1181 
1182    properties->identicalMemoryTypeRequirements = false;
1183 
1184    /* VK_EXT_shader_module_identifier */
1185    STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1186       sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1187    memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
1188             vk_shaderModuleIdentifierAlgorithmUUID,
1189             sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1190 
1191    nv_device_uuid(info, properties->deviceUUID, VK_UUID_SIZE, true);
1192    STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
1193    memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
1194 
1195    snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "NVK");
1196    snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1197             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1198 }
1199 
1200 static void
nvk_physical_device_init_pipeline_cache(struct nvk_physical_device * pdev)1201 nvk_physical_device_init_pipeline_cache(struct nvk_physical_device *pdev)
1202 {
1203    struct nvk_instance *instance = nvk_physical_device_instance(pdev);
1204 
1205    struct mesa_sha1 sha_ctx;
1206    _mesa_sha1_init(&sha_ctx);
1207 
1208    _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
1209                      sizeof(instance->driver_build_sha));
1210 
1211    const uint64_t compiler_flags = nvk_physical_device_compiler_flags(pdev);
1212    _mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
1213 
1214    unsigned char sha[SHA1_DIGEST_LENGTH];
1215    _mesa_sha1_final(&sha_ctx, sha);
1216 
1217    STATIC_ASSERT(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
1218    memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
1219    memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
1220 
1221 #ifdef ENABLE_SHADER_CACHE
1222    char renderer[10];
1223    ASSERTED int len = snprintf(renderer, sizeof(renderer), "nvk_%04x",
1224                                pdev->info.chipset);
1225    assert(len == sizeof(renderer) - 2);
1226 
1227    char timestamp[41];
1228    _mesa_sha1_format(timestamp, instance->driver_build_sha);
1229 
1230    const uint64_t driver_flags = nvk_physical_device_compiler_flags(pdev);
1231    pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1232 #endif
1233 }
1234 
1235 static void
nvk_physical_device_free_disk_cache(struct nvk_physical_device * pdev)1236 nvk_physical_device_free_disk_cache(struct nvk_physical_device *pdev)
1237 {
1238 #ifdef ENABLE_SHADER_CACHE
1239    if (pdev->vk.disk_cache) {
1240       disk_cache_destroy(pdev->vk.disk_cache);
1241       pdev->vk.disk_cache = NULL;
1242    }
1243 #else
1244    assert(pdev->vk.disk_cache == NULL);
1245 #endif
1246 }
1247 
1248 static uint64_t
nvk_get_sysmem_heap_size(void)1249 nvk_get_sysmem_heap_size(void)
1250 {
1251    uint64_t sysmem_size_B = 0;
1252    if (!os_get_total_physical_memory(&sysmem_size_B))
1253       return 0;
1254 
1255    /* Use 3/4 of total size to avoid swapping */
1256    return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1257 }
1258 
1259 static uint64_t
nvk_get_sysmem_heap_available(struct nvk_physical_device * pdev)1260 nvk_get_sysmem_heap_available(struct nvk_physical_device *pdev)
1261 {
1262    uint64_t sysmem_size_B = 0;
1263    if (!os_get_available_system_memory(&sysmem_size_B)) {
1264       vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
1265       return 0;
1266    }
1267 
1268    /* Use 3/4 of available to avoid swapping */
1269    return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1270 }
1271 
1272 static uint64_t
nvk_get_vram_heap_available(struct nvk_physical_device * pdev)1273 nvk_get_vram_heap_available(struct nvk_physical_device *pdev)
1274 {
1275    const uint64_t used = nvkmd_pdev_get_vram_used(pdev->nvkmd);
1276    if (used > pdev->info.vram_size_B)
1277       return 0;
1278 
1279    return pdev->info.vram_size_B - used;
1280 }
1281 
1282 VkResult
nvk_create_drm_physical_device(struct vk_instance * _instance,struct _drmDevice * drm_device,struct vk_physical_device ** pdev_out)1283 nvk_create_drm_physical_device(struct vk_instance *_instance,
1284                                struct _drmDevice *drm_device,
1285                                struct vk_physical_device **pdev_out)
1286 {
1287    struct nvk_instance *instance = (struct nvk_instance *)_instance;
1288    VkResult result;
1289 
1290    struct nvkmd_pdev *nvkmd;
1291    result = nvkmd_try_create_pdev_for_drm(drm_device, &instance->vk.base,
1292                                           instance->debug_flags, &nvkmd);
1293    if (result != VK_SUCCESS)
1294       return result;
1295 
1296    /* We don't support anything pre-Kepler */
1297    if (nvkmd->dev_info.cls_eng3d < KEPLER_A) {
1298       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1299       goto fail_nvkmd;
1300    }
1301 
1302    bool conformant =
1303       nvkmd->dev_info.type == NV_DEVICE_TYPE_DIS &&
1304       nvkmd->dev_info.cls_eng3d >= TURING_A &&
1305       nvkmd->dev_info.cls_eng3d <= ADA_A;
1306 
1307    if (!conformant &&
1308        !debug_get_bool_option("NVK_I_WANT_A_BROKEN_VULKAN_DRIVER", false)) {
1309 #ifdef NDEBUG
1310       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1311 #else
1312       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1313                          "WARNING: NVK is not well-tested on %s, pass "
1314                          "NVK_I_WANT_A_BROKEN_VULKAN_DRIVER=1 "
1315                          "if you know what you're doing.",
1316                          nvkmd->dev_info.device_name);
1317 #endif
1318       goto fail_nvkmd;
1319    }
1320 
1321    if (!conformant)
1322       vk_warn_non_conformant_implementation("NVK");
1323 
1324    struct nvk_physical_device *pdev =
1325       vk_zalloc(&instance->vk.alloc, sizeof(*pdev),
1326                 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1327 
1328    if (pdev == NULL) {
1329       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1330       goto fail_nvkmd;
1331    }
1332 
1333    struct vk_physical_device_dispatch_table dispatch_table;
1334    vk_physical_device_dispatch_table_from_entrypoints(
1335       &dispatch_table, &nvk_physical_device_entrypoints, true);
1336    vk_physical_device_dispatch_table_from_entrypoints(
1337       &dispatch_table, &wsi_physical_device_entrypoints, false);
1338 
1339    struct vk_device_extension_table supported_extensions;
1340    nvk_get_device_extensions(instance, &nvkmd->dev_info,
1341                              nvkmd->kmd_info.has_alloc_tiled,
1342                              &supported_extensions);
1343 
1344    struct vk_features supported_features;
1345    nvk_get_device_features(&nvkmd->dev_info, &supported_extensions,
1346                            &supported_features);
1347 
1348    struct vk_properties properties;
1349    nvk_get_device_properties(instance, &nvkmd->dev_info, conformant,
1350                              &properties);
1351 
1352    if (nvkmd->drm.render_dev) {
1353       properties.drmHasRender = true;
1354       properties.drmRenderMajor = major(nvkmd->drm.render_dev);
1355       properties.drmRenderMinor = minor(nvkmd->drm.render_dev);
1356    }
1357 
1358    if (nvkmd->drm.primary_dev) {
1359       properties.drmHasPrimary = true;
1360       properties.drmPrimaryMajor = major(nvkmd->drm.primary_dev);
1361       properties.drmPrimaryMinor = minor(nvkmd->drm.primary_dev);
1362    }
1363 
1364    result = vk_physical_device_init(&pdev->vk, &instance->vk,
1365                                     &supported_extensions,
1366                                     &supported_features,
1367                                     &properties,
1368                                     &dispatch_table);
1369    if (result != VK_SUCCESS)
1370       goto fail_alloc;
1371 
1372    pdev->nvkmd = nvkmd;
1373    pdev->info = nvkmd->dev_info;
1374    pdev->debug_flags = instance->debug_flags;
1375 
1376    pdev->nak = nak_compiler_create(&pdev->info);
1377    if (pdev->nak == NULL) {
1378       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1379       goto fail_init;
1380    }
1381 
1382    nvk_physical_device_init_pipeline_cache(pdev);
1383 
1384    uint64_t sysmem_size_B = nvk_get_sysmem_heap_size();
1385    if (sysmem_size_B == 0) {
1386       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1387                          "Failed to query total system memory");
1388       goto fail_disk_cache;
1389    }
1390 
1391    if (pdev->info.vram_size_B > 0) {
1392       uint32_t vram_heap_idx = pdev->mem_heap_count++;
1393       uint32_t bar_heap_idx = vram_heap_idx;
1394       pdev->mem_heaps[vram_heap_idx] = (struct nvk_memory_heap) {
1395          .size = pdev->info.vram_size_B,
1396          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1397       };
1398 
1399       if (pdev->info.bar_size_B > 0 &&
1400           pdev->info.bar_size_B < pdev->info.vram_size_B) {
1401          bar_heap_idx = pdev->mem_heap_count++;
1402          pdev->mem_heaps[bar_heap_idx] = (struct nvk_memory_heap) {
1403             .size = pdev->info.bar_size_B,
1404             .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1405          };
1406       }
1407 
1408       /* Only set available if we have the ioctl. */
1409       if (nvkmd->kmd_info.has_get_vram_used)
1410          pdev->mem_heaps[vram_heap_idx].available = nvk_get_vram_heap_available;
1411 
1412       pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1413          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1414          .heapIndex = vram_heap_idx,
1415       };
1416 
1417       if (pdev->info.cls_eng3d >= MAXWELL_A) {
1418          pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1419             .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1420                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1421                              VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1422             .heapIndex = bar_heap_idx,
1423          };
1424       }
1425    }
1426 
1427    uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
1428    pdev->mem_heaps[sysmem_heap_idx] = (struct nvk_memory_heap) {
1429       .size = sysmem_size_B,
1430       /* If we don't have any VRAM (iGPU), claim sysmem as DEVICE_LOCAL */
1431       .flags = pdev->info.vram_size_B == 0
1432                ? VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
1433                : 0,
1434       .available = nvk_get_sysmem_heap_available,
1435    };
1436 
1437    pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1438       /* TODO: What's the right thing to do here on Tegra? */
1439       .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1440                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1441                        VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1442       .heapIndex = sysmem_heap_idx,
1443    };
1444 
1445    assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
1446    assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
1447 
1448    pdev->queue_families[pdev->queue_family_count++] = (struct nvk_queue_family) {
1449       .queue_flags = VK_QUEUE_GRAPHICS_BIT |
1450                      VK_QUEUE_COMPUTE_BIT |
1451                      VK_QUEUE_TRANSFER_BIT |
1452                      VK_QUEUE_SPARSE_BINDING_BIT,
1453       .queue_count = 1,
1454    };
1455    assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
1456 
1457    pdev->vk.supported_sync_types = nvkmd->sync_types;
1458 
1459 #ifdef NVK_USE_WSI_PLATFORM
1460    result = nvk_init_wsi(pdev);
1461    if (result != VK_SUCCESS)
1462       goto fail_disk_cache;
1463 #endif
1464 
1465    *pdev_out = &pdev->vk;
1466 
1467    return VK_SUCCESS;
1468 
1469 fail_disk_cache:
1470    nvk_physical_device_free_disk_cache(pdev);
1471    nak_compiler_destroy(pdev->nak);
1472 fail_init:
1473    vk_physical_device_finish(&pdev->vk);
1474 fail_alloc:
1475    vk_free(&instance->vk.alloc, pdev);
1476 fail_nvkmd:
1477    nvkmd_pdev_destroy(nvkmd);
1478    return result;
1479 }
1480 
1481 void
nvk_physical_device_destroy(struct vk_physical_device * vk_pdev)1482 nvk_physical_device_destroy(struct vk_physical_device *vk_pdev)
1483 {
1484    struct nvk_physical_device *pdev =
1485       container_of(vk_pdev, struct nvk_physical_device, vk);
1486 
1487 #ifdef NVK_USE_WSI_PLATFORM
1488    nvk_finish_wsi(pdev);
1489 #endif
1490    nvk_physical_device_free_disk_cache(pdev);
1491    nak_compiler_destroy(pdev->nak);
1492    nvkmd_pdev_destroy(pdev->nvkmd);
1493    vk_physical_device_finish(&pdev->vk);
1494    vk_free(&pdev->vk.instance->alloc, pdev);
1495 }
1496 
1497 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1498 nvk_GetPhysicalDeviceMemoryProperties2(
1499    VkPhysicalDevice physicalDevice,
1500    VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1501 {
1502    VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1503 
1504    pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
1505    for (int i = 0; i < pdev->mem_heap_count; i++) {
1506       pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap) {
1507          .size = pdev->mem_heaps[i].size,
1508          .flags = pdev->mem_heaps[i].flags,
1509       };
1510    }
1511 
1512    pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
1513    for (int i = 0; i < pdev->mem_type_count; i++) {
1514       pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
1515    }
1516 
1517    vk_foreach_struct(ext, pMemoryProperties->pNext)
1518    {
1519       switch (ext->sType) {
1520       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1521          VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
1522 
1523          for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1524             const struct nvk_memory_heap *heap = &pdev->mem_heaps[i];
1525             uint64_t used = p_atomic_read(&heap->used);
1526 
1527             /* From the Vulkan 1.3.278 spec:
1528              *
1529              *    "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1530              *    values in which memory usages are returned, with one element
1531              *    for each memory heap. A heap’s usage is an estimate of how
1532              *    much memory the process is currently using in that heap."
1533              *
1534              * TODO: Include internal allocations?
1535              */
1536             p->heapUsage[i] = used;
1537 
1538             uint64_t available = heap->size;
1539             if (heap->available)
1540                available = heap->available(pdev);
1541 
1542             /* From the Vulkan 1.3.278 spec:
1543              *
1544              *    "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1545              *    values in which memory budgets are returned, with one
1546              *    element for each memory heap. A heap’s budget is a rough
1547              *    estimate of how much memory the process can allocate from
1548              *    that heap before allocations may fail or cause performance
1549              *    degradation. The budget includes any currently allocated
1550              *    device memory."
1551              *
1552              * and
1553              *
1554              *    "The heapBudget value must be less than or equal to
1555              *    VkMemoryHeap::size for each heap."
1556              *
1557              * available (queried above) is the total amount free memory
1558              * system-wide and does not include our allocations so we need
1559              * to add that in.
1560              */
1561             uint64_t budget = MIN2(available + used, heap->size);
1562 
1563             /* Set the budget at 90% of available to avoid thrashing */
1564             p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
1565          }
1566 
1567          /* From the Vulkan 1.3.278 spec:
1568           *
1569           *    "The heapBudget and heapUsage values must be zero for array
1570           *    elements greater than or equal to
1571           *    VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
1572           *    heapBudget value must be non-zero for array elements less than
1573           *    VkPhysicalDeviceMemoryProperties::memoryHeapCount."
1574           */
1575          for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
1576             p->heapBudget[i] = 0u;
1577             p->heapUsage[i] = 0u;
1578          }
1579          break;
1580       }
1581       default:
1582          vk_debug_ignored_stype(ext->sType);
1583          break;
1584       }
1585    }
1586 }
1587 
1588 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1589 nvk_GetPhysicalDeviceQueueFamilyProperties2(
1590    VkPhysicalDevice physicalDevice,
1591    uint32_t *pQueueFamilyPropertyCount,
1592    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1593 {
1594    VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1595    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1596                           pQueueFamilyPropertyCount);
1597 
1598    for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
1599       const struct nvk_queue_family *queue_family = &pdev->queue_families[i];
1600 
1601       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
1602          p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
1603          p->queueFamilyProperties.queueCount = queue_family->queue_count;
1604          p->queueFamilyProperties.timestampValidBits = 64;
1605          p->queueFamilyProperties.minImageTransferGranularity =
1606             (VkExtent3D){1, 1, 1};
1607 
1608          vk_foreach_struct(ext, p->pNext) {
1609             switch (ext->sType) {
1610             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES: {
1611                VkQueueFamilyGlobalPriorityProperties *p = (void *)ext;
1612                p->priorityCount = 1;
1613                p->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM;
1614                break;
1615             }
1616 
1617             default:
1618                vk_debug_ignored_stype(ext->sType);
1619                break;
1620             }
1621          }
1622       }
1623    }
1624 }
1625 
1626 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)1627 nvk_GetPhysicalDeviceMultisamplePropertiesEXT(
1628    VkPhysicalDevice physicalDevice,
1629    VkSampleCountFlagBits samples,
1630    VkMultisamplePropertiesEXT *pMultisampleProperties)
1631 {
1632    VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1633 
1634    if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
1635       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
1636    } else {
1637       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
1638    }
1639 }
1640 
1641 VkExtent2D
nvk_max_shading_rate(const struct nvk_physical_device * pdev,VkSampleCountFlagBits samples)1642 nvk_max_shading_rate(const struct nvk_physical_device *pdev,
1643                      VkSampleCountFlagBits samples)
1644 {
1645    const struct nil_Extent4D_Samples px_extent_sa =
1646       nil_px_extent_sa(nil_choose_sample_layout(samples));
1647 
1648    assert(px_extent_sa.width <= 4);
1649    assert(px_extent_sa.height <= 4);
1650    assert(px_extent_sa.depth == 1);
1651    assert(px_extent_sa.array_len == 1);
1652 
1653    return (VkExtent2D) {
1654       .width = 4 / px_extent_sa.width,
1655       .height = 4 / px_extent_sa.height,
1656    };
1657 }
1658 
1659 VKAPI_ATTR VkResult VKAPI_CALL
nvk_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)1660 nvk_GetPhysicalDeviceFragmentShadingRatesKHR(
1661    VkPhysicalDevice physicalDevice,
1662    uint32_t *pFragmentShadingRateCount,
1663    VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
1664 {
1665    VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1666    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
1667                           pFragmentShadingRates, pFragmentShadingRateCount);
1668 
1669 
1670    /* From the Vulkan 1.3.297 spec:
1671     *
1672     *    "The returned array of fragment shading rates must be ordered from
1673     *    largest fragmentSize.width value to smallest, and each set of
1674     *    fragment shading rates with the same fragmentSize.width value must be
1675     *    ordered from largest fragmentSize.height to smallest. Any two entries
1676     *    in the array must not have the same fragmentSize values."
1677     */
1678    VkExtent2D shading_rates[] = {
1679       { 4, 4 },
1680       { 4, 2 },
1681       { 2, 4 },
1682       { 2, 2 },
1683       { 2, 1 },
1684       { 1, 2 },
1685       { 1, 1 },
1686    };
1687 
1688    for (uint32_t i = 0; i < ARRAY_SIZE(shading_rates); i++) {
1689       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, p) {
1690          p->fragmentSize = shading_rates[i];
1691          if (shading_rates[i].width == 1 && shading_rates[i].height == 1) {
1692             /* The Vulkan spec requires us to set ~0 for 1x1. */
1693             p->sampleCounts = ~0;
1694          } else {
1695             for (uint32_t samples = 1; samples <= 16; samples <<= 1) {
1696                VkExtent2D max_rate = nvk_max_shading_rate(pdev, samples);
1697                if (shading_rates[i].width > max_rate.width ||
1698                    shading_rates[i].height > max_rate.height)
1699                   break;
1700 
1701                p->sampleCounts |= samples;
1702             }
1703          }
1704       }
1705    }
1706 
1707    return vk_outarray_status(&out);
1708 }
1709