• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_physical_device.h"
6 
7 #include "nak.h"
8 #include "nvk_buffer.h"
9 #include "nvk_entrypoints.h"
10 #include "nvk_format.h"
11 #include "nvk_image.h"
12 #include "nvk_instance.h"
13 #include "nvk_shader.h"
14 #include "nvk_wsi.h"
15 #include "git_sha1.h"
16 #include "util/disk_cache.h"
17 #include "util/mesa-sha1.h"
18 
19 #include "vulkan/runtime/vk_device.h"
20 #include "vulkan/runtime/vk_drm_syncobj.h"
21 #include "vulkan/runtime/vk_shader_module.h"
22 #include "vulkan/wsi/wsi_common.h"
23 
24 #include <fcntl.h>
25 #include <sys/stat.h>
26 #include <sys/sysmacros.h>
27 #include <xf86drm.h>
28 
29 #include "cl90c0.h"
30 #include "cl91c0.h"
31 #include "cla097.h"
32 #include "cla0c0.h"
33 #include "cla1c0.h"
34 #include "clb097.h"
35 #include "clb0c0.h"
36 #include "clb197.h"
37 #include "clb1c0.h"
38 #include "clc0c0.h"
39 #include "clc1c0.h"
40 #include "clc397.h"
41 #include "clc3c0.h"
42 #include "clc597.h"
43 #include "clc5c0.h"
44 #include "clc997.h"
45 
46 static bool
nvk_use_nak(const struct nv_device_info * info)47 nvk_use_nak(const struct nv_device_info *info)
48 {
49    const VkShaderStageFlags vk10_stages =
50       VK_SHADER_STAGE_VERTEX_BIT |
51       VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
52       VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
53       VK_SHADER_STAGE_GEOMETRY_BIT |
54       VK_SHADER_STAGE_FRAGMENT_BIT |
55       VK_SHADER_STAGE_COMPUTE_BIT;
56 
57    return !(vk10_stages & ~nvk_nak_stages(info));
58 }
59 
60 static uint32_t
nvk_get_vk_version(const struct nv_device_info * info)61 nvk_get_vk_version(const struct nv_device_info *info)
62 {
63    /* Version override takes priority */
64    const uint32_t version_override = vk_get_version_override();
65    if (version_override)
66       return version_override;
67 
68    /* If we're using codegen for anything, lock to version 1.0 */
69    if (!nvk_use_nak(info))
70       return VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
71 
72    return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
73 }
74 
75 static void
nvk_get_device_extensions(const struct nvk_instance * instance,const struct nv_device_info * info,struct vk_device_extension_table * ext)76 nvk_get_device_extensions(const struct nvk_instance *instance,
77                           const struct nv_device_info *info,
78                           struct vk_device_extension_table *ext)
79 {
80    *ext = (struct vk_device_extension_table) {
81       .KHR_8bit_storage = true,
82       .KHR_16bit_storage = true,
83       .KHR_bind_memory2 = true,
84       .KHR_buffer_device_address = true,
85       .KHR_copy_commands2 = true,
86       .KHR_create_renderpass2 = true,
87       .KHR_dedicated_allocation = true,
88       .KHR_depth_stencil_resolve = true,
89       .KHR_descriptor_update_template = true,
90       .KHR_device_group = true,
91       .KHR_draw_indirect_count = info->cls_eng3d >= TURING_A,
92       .KHR_driver_properties = true,
93       .KHR_dynamic_rendering = true,
94       .KHR_external_fence = true,
95       .KHR_external_fence_fd = true,
96       .KHR_external_memory = true,
97       .KHR_external_memory_fd = true,
98       .KHR_external_semaphore = true,
99       .KHR_external_semaphore_fd = true,
100       .KHR_format_feature_flags2 = true,
101       .KHR_fragment_shader_barycentric = info->cls_eng3d >= TURING_A &&
102          (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
103       .KHR_get_memory_requirements2 = true,
104       .KHR_image_format_list = true,
105       .KHR_imageless_framebuffer = true,
106 #ifdef NVK_USE_WSI_PLATFORM
107       .KHR_incremental_present = true,
108 #endif
109       .KHR_index_type_uint8 = true,
110       .KHR_line_rasterization = true,
111       .KHR_load_store_op_none = true,
112       .KHR_maintenance1 = true,
113       .KHR_maintenance2 = true,
114       .KHR_maintenance3 = true,
115       .KHR_maintenance4 = true,
116       .KHR_maintenance5 = true,
117       .KHR_map_memory2 = true,
118       .KHR_multiview = true,
119       .KHR_pipeline_executable_properties = true,
120 
121 #ifdef NVK_USE_WSI_PLATFORM
122       /* Hide these behind dri configs for now since we cannot implement it
123        * reliably on all surfaces yet. There is no surface capability query
124        * for present wait/id, but the feature is useful enough to hide behind
125        * an opt-in mechanism for now.  If the instance only enables surface
126        * extensions that unconditionally support present wait, we can also
127        * expose the extension that way.
128        */
129       .KHR_present_id = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
130                         wsi_common_vk_instance_supports_present_wait(&instance->vk),
131       .KHR_present_wait = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
132                           wsi_common_vk_instance_supports_present_wait(&instance->vk),
133 #endif
134       .KHR_push_descriptor = true,
135       .KHR_relaxed_block_layout = true,
136       .KHR_sampler_mirror_clamp_to_edge = true,
137       .KHR_sampler_ycbcr_conversion = true,
138       .KHR_separate_depth_stencil_layouts = true,
139       .KHR_shader_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
140                                  nvk_use_nak(info),
141       .KHR_shader_clock = true,
142       .KHR_shader_draw_parameters = true,
143       .KHR_shader_expect_assume = true,
144       .KHR_shader_float_controls = true,
145       .KHR_shader_float16_int8 = true,
146       .KHR_shader_integer_dot_product = true,
147       .KHR_shader_non_semantic_info = true,
148       .KHR_shader_subgroup_extended_types = true,
149       .KHR_shader_terminate_invocation =
150          (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
151       .KHR_spirv_1_4 = true,
152       .KHR_storage_buffer_storage_class = true,
153       .KHR_timeline_semaphore = true,
154 #ifdef NVK_USE_WSI_PLATFORM
155       .KHR_swapchain = true,
156       .KHR_swapchain_mutable_format = true,
157 #endif
158       .KHR_synchronization2 = true,
159       .KHR_uniform_buffer_standard_layout = true,
160       .KHR_variable_pointers = true,
161       .KHR_vulkan_memory_model = nvk_use_nak(info),
162       .KHR_workgroup_memory_explicit_layout = true,
163       .KHR_zero_initialize_workgroup_memory = true,
164       .EXT_4444_formats = true,
165       .EXT_attachment_feedback_loop_layout = true,
166       .EXT_border_color_swizzle = true,
167       .EXT_buffer_device_address = true,
168       .EXT_conditional_rendering = true,
169       .EXT_color_write_enable = true,
170       .EXT_custom_border_color = true,
171       .EXT_depth_bias_control = true,
172       .EXT_depth_clip_control = true,
173       .EXT_depth_clip_enable = true,
174       .EXT_descriptor_indexing = true,
175       .EXT_dynamic_rendering_unused_attachments = true,
176       .EXT_extended_dynamic_state = true,
177       .EXT_extended_dynamic_state2 = true,
178       .EXT_extended_dynamic_state3 = true,
179       .EXT_external_memory_dma_buf = true,
180       .EXT_graphics_pipeline_library = true,
181       .EXT_host_query_reset = true,
182       .EXT_image_2d_view_of_3d = true,
183       .EXT_image_robustness = true,
184       .EXT_image_sliced_view_of_3d = true,
185       .EXT_image_view_min_lod = true,
186       .EXT_index_type_uint8 = true,
187       .EXT_inline_uniform_block = true,
188       .EXT_line_rasterization = true,
189       .EXT_load_store_op_none = true,
190       .EXT_map_memory_placed = true,
191       .EXT_memory_budget = true,
192       .EXT_multi_draw = true,
193       .EXT_mutable_descriptor_type = true,
194       .EXT_non_seamless_cube_map = true,
195       .EXT_pci_bus_info = info->type == NV_DEVICE_TYPE_DIS,
196       .EXT_pipeline_creation_cache_control = true,
197       .EXT_pipeline_creation_feedback = true,
198       .EXT_physical_device_drm = true,
199       .EXT_primitive_topology_list_restart = true,
200       .EXT_private_data = true,
201       .EXT_primitives_generated_query = true,
202       .EXT_provoking_vertex = true,
203       .EXT_robustness2 = true,
204       .EXT_sample_locations = info->cls_eng3d >= MAXWELL_B,
205       .EXT_sampler_filter_minmax = info->cls_eng3d >= MAXWELL_B,
206       .EXT_scalar_block_layout = nvk_use_nak(info),
207       .EXT_separate_stencil_usage = true,
208       .EXT_shader_image_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
209                                        nvk_use_nak(info),
210       .EXT_shader_demote_to_helper_invocation = true,
211       .EXT_shader_module_identifier = true,
212       .EXT_shader_object = true,
213       .EXT_shader_subgroup_ballot = true,
214       .EXT_shader_subgroup_vote = true,
215       .EXT_shader_viewport_index_layer = info->cls_eng3d >= MAXWELL_B,
216       .EXT_subgroup_size_control = true,
217       .EXT_texel_buffer_alignment = true,
218       .EXT_tooling_info = true,
219       .EXT_transform_feedback = true,
220       .EXT_vertex_attribute_divisor = true,
221       .EXT_vertex_input_dynamic_state = true,
222       .EXT_ycbcr_2plane_444_formats = true,
223       .EXT_ycbcr_image_arrays = true,
224       .NV_shader_sm_builtins = true,
225    };
226 }
227 
228 static void
nvk_get_device_features(const struct nv_device_info * info,const struct vk_device_extension_table * supported_extensions,struct vk_features * features)229 nvk_get_device_features(const struct nv_device_info *info,
230                         const struct vk_device_extension_table *supported_extensions,
231                         struct vk_features *features)
232 {
233    *features = (struct vk_features) {
234       /* Vulkan 1.0 */
235       .robustBufferAccess = true,
236       .fullDrawIndexUint32 = true,
237       .imageCubeArray = true,
238       .independentBlend = true,
239       .geometryShader = true,
240       .tessellationShader = true,
241       .sampleRateShading = true,
242       .dualSrcBlend = true,
243       .logicOp = true,
244       .multiDrawIndirect = true,
245       .drawIndirectFirstInstance = true,
246       .depthClamp = true,
247       .depthBiasClamp = true,
248       .fillModeNonSolid = true,
249       .depthBounds = true,
250       .wideLines = true,
251       .largePoints = true,
252       .alphaToOne = true,
253       .multiViewport = true,
254       .samplerAnisotropy = true,
255       .textureCompressionETC2 = false,
256       .textureCompressionBC = true,
257       .textureCompressionASTC_LDR = false,
258       .occlusionQueryPrecise = true,
259       .pipelineStatisticsQuery = true,
260       .vertexPipelineStoresAndAtomics = true,
261       .fragmentStoresAndAtomics = true,
262       .shaderTessellationAndGeometryPointSize = true,
263       .shaderImageGatherExtended = true,
264       .shaderStorageImageExtendedFormats = true,
265       /* TODO: shaderStorageImageMultisample */
266       .shaderStorageImageReadWithoutFormat = info->cls_eng3d >= MAXWELL_A,
267       .shaderStorageImageWriteWithoutFormat = true,
268       .shaderUniformBufferArrayDynamicIndexing = true,
269       .shaderSampledImageArrayDynamicIndexing = true,
270       .shaderStorageBufferArrayDynamicIndexing = true,
271       .shaderStorageImageArrayDynamicIndexing = true,
272       .shaderClipDistance = true,
273       .shaderCullDistance = true,
274       .shaderFloat64 = true,
275       .shaderInt64 = true,
276       .shaderInt16 = true,
277       /* TODO: shaderResourceResidency */
278       .shaderResourceMinLod = info->cls_eng3d >= VOLTA_A,
279       .sparseBinding = true,
280       .sparseResidencyBuffer = info->cls_eng3d >= MAXWELL_A,
281       /* TODO: sparseResidency* */
282       .variableMultisampleRate = true,
283       .inheritedQueries = true,
284 
285       /* Vulkan 1.1 */
286       .storageBuffer16BitAccess = true,
287       .uniformAndStorageBuffer16BitAccess = true,
288       .storagePushConstant16 = true,
289       .multiview = true,
290       .multiviewGeometryShader = true,
291       .multiviewTessellationShader = true,
292       .variablePointersStorageBuffer = true,
293       .variablePointers = true,
294       .shaderDrawParameters = true,
295       .samplerYcbcrConversion = true,
296 
297       /* Vulkan 1.2 */
298       .samplerMirrorClampToEdge = true,
299       .drawIndirectCount = info->cls_eng3d >= TURING_A,
300       .storageBuffer8BitAccess = true,
301       .uniformAndStorageBuffer8BitAccess = true,
302       .storagePushConstant8 = true,
303       .shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
304                                   nvk_use_nak(info),
305       .shaderSharedInt64Atomics = false, /* TODO */
306       .shaderInt8 = true,
307       .descriptorIndexing = true,
308       .shaderInputAttachmentArrayDynamicIndexing = true,
309       .shaderUniformTexelBufferArrayDynamicIndexing = true,
310       .shaderStorageTexelBufferArrayDynamicIndexing = true,
311       .shaderUniformBufferArrayNonUniformIndexing = true,
312       .shaderSampledImageArrayNonUniformIndexing = true,
313       .shaderStorageBufferArrayNonUniformIndexing = true,
314       .shaderStorageImageArrayNonUniformIndexing = true,
315       .shaderInputAttachmentArrayNonUniformIndexing = true,
316       .shaderUniformTexelBufferArrayNonUniformIndexing = true,
317       .shaderStorageTexelBufferArrayNonUniformIndexing = true,
318       .descriptorBindingUniformBufferUpdateAfterBind = true,
319       .descriptorBindingSampledImageUpdateAfterBind = true,
320       .descriptorBindingStorageImageUpdateAfterBind = true,
321       .descriptorBindingStorageBufferUpdateAfterBind = true,
322       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
323       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
324       .descriptorBindingUpdateUnusedWhilePending = true,
325       .descriptorBindingPartiallyBound = true,
326       .descriptorBindingVariableDescriptorCount = true,
327       .runtimeDescriptorArray = true,
328       .samplerFilterMinmax = info->cls_eng3d >= MAXWELL_B,
329       .scalarBlockLayout = nvk_use_nak(info),
330       .imagelessFramebuffer = true,
331       .uniformBufferStandardLayout = true,
332       .shaderSubgroupExtendedTypes = true,
333       .separateDepthStencilLayouts = true,
334       .hostQueryReset = true,
335       .timelineSemaphore = true,
336       .bufferDeviceAddress = true,
337       .bufferDeviceAddressCaptureReplay = true,
338       .bufferDeviceAddressMultiDevice = false,
339       .vulkanMemoryModel = nvk_use_nak(info),
340       .vulkanMemoryModelDeviceScope = nvk_use_nak(info),
341       .vulkanMemoryModelAvailabilityVisibilityChains = nvk_use_nak(info),
342       .shaderOutputViewportIndex = info->cls_eng3d >= MAXWELL_B,
343       .shaderOutputLayer = info->cls_eng3d >= MAXWELL_B,
344       .subgroupBroadcastDynamicId = nvk_use_nak(info),
345 
346       /* Vulkan 1.3 */
347       .robustImageAccess = true,
348       .inlineUniformBlock = true,
349       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
350       .pipelineCreationCacheControl = true,
351       .privateData = true,
352       .shaderDemoteToHelperInvocation = true,
353       .shaderTerminateInvocation = true,
354       .subgroupSizeControl = true,
355       .computeFullSubgroups = true,
356       .synchronization2 = true,
357       .shaderZeroInitializeWorkgroupMemory = true,
358       .dynamicRendering = true,
359       .shaderIntegerDotProduct = true,
360       .maintenance4 = true,
361 
362       /* VK_KHR_fragment_shader_barycentric */
363       .fragmentShaderBarycentric = info->cls_eng3d >= TURING_A &&
364          (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
365 
366       /* VK_KHR_maintenance5 */
367       .maintenance5 = true,
368 
369       /* VK_KHR_pipeline_executable_properties */
370       .pipelineExecutableInfo = true,
371 
372       /* VK_KHR_present_id */
373       .presentId = supported_extensions->KHR_present_id,
374 
375       /* VK_KHR_present_wait */
376       .presentWait = supported_extensions->KHR_present_wait,
377 
378       /* VK_KHR_shader_clock */
379       .shaderSubgroupClock = true,
380       .shaderDeviceClock = true,
381 
382       /* VK_KHR_workgroup_memory_explicit_layout */
383       .workgroupMemoryExplicitLayout = true,
384       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
385       .workgroupMemoryExplicitLayout8BitAccess = false,
386       .workgroupMemoryExplicitLayout16BitAccess = false,
387 
388       /* VK_EXT_4444_formats */
389       .formatA4R4G4B4 = true,
390       .formatA4B4G4R4 = true,
391 
392       /* VK_EXT_attachment_feedback_loop_layout */
393       .attachmentFeedbackLoopLayout = true,
394 
395       /* VK_EXT_border_color_swizzle */
396       .borderColorSwizzle = true,
397       .borderColorSwizzleFromImage = false,
398 
399       /* VK_EXT_buffer_device_address */
400       .bufferDeviceAddressCaptureReplayEXT = true,
401 
402       /* VK_EXT_color_write_enable */
403       .colorWriteEnable = true,
404 
405       /* VK_EXT_conditional_rendering */
406       .conditionalRendering = true,
407       .inheritedConditionalRendering = true,
408 
409       /* VK_EXT_custom_border_color */
410       .customBorderColors = true,
411       .customBorderColorWithoutFormat = true,
412 
413       /* VK_EXT_depth_bias_control */
414       .depthBiasControl = true,
415       .leastRepresentableValueForceUnormRepresentation = true,
416       .floatRepresentation = false,
417       .depthBiasExact = true,
418 
419       /* VK_EXT_depth_clip_control */
420       .depthClipControl = info->cls_eng3d >= VOLTA_A,
421 
422       /* VK_EXT_depth_clip_enable */
423       .depthClipEnable = true,
424 
425       /* VK_EXT_dynamic_rendering_unused_attachments */
426       .dynamicRenderingUnusedAttachments = true,
427 
428       /* VK_EXT_extended_dynamic_state */
429       .extendedDynamicState = true,
430 
431       /* VK_EXT_extended_dynamic_state2 */
432       .extendedDynamicState2 = true,
433       .extendedDynamicState2LogicOp = true,
434       .extendedDynamicState2PatchControlPoints = true,
435 
436       /* VK_EXT_extended_dynamic_state3 */
437       .extendedDynamicState3TessellationDomainOrigin = true,
438       .extendedDynamicState3DepthClampEnable = true,
439       .extendedDynamicState3PolygonMode = true,
440       .extendedDynamicState3RasterizationSamples = true,
441       .extendedDynamicState3SampleMask = true,
442       .extendedDynamicState3AlphaToCoverageEnable = true,
443       .extendedDynamicState3AlphaToOneEnable = true,
444       .extendedDynamicState3LogicOpEnable = true,
445       .extendedDynamicState3ColorBlendEnable = true,
446       .extendedDynamicState3ColorBlendEquation = true,
447       .extendedDynamicState3ColorWriteMask = true,
448       .extendedDynamicState3RasterizationStream = true,
449       .extendedDynamicState3ConservativeRasterizationMode = false,
450       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
451       .extendedDynamicState3DepthClipEnable = true,
452       .extendedDynamicState3SampleLocationsEnable = info->cls_eng3d >= MAXWELL_B,
453       .extendedDynamicState3ColorBlendAdvanced = false,
454       .extendedDynamicState3ProvokingVertexMode = true,
455       .extendedDynamicState3LineRasterizationMode = true,
456       .extendedDynamicState3LineStippleEnable = true,
457       .extendedDynamicState3DepthClipNegativeOneToOne = true,
458       .extendedDynamicState3ViewportWScalingEnable = false,
459       .extendedDynamicState3ViewportSwizzle = false,
460       .extendedDynamicState3CoverageToColorEnable = false,
461       .extendedDynamicState3CoverageToColorLocation = false,
462       .extendedDynamicState3CoverageModulationMode = false,
463       .extendedDynamicState3CoverageModulationTableEnable = false,
464       .extendedDynamicState3CoverageModulationTable = false,
465       .extendedDynamicState3CoverageReductionMode = false,
466       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
467       .extendedDynamicState3ShadingRateImageEnable = false,
468 
469       /* VK_EXT_graphics_pipeline_library */
470       .graphicsPipelineLibrary = true,
471 
472       /* VK_EXT_image_2d_view_of_3d */
473       .image2DViewOf3D = true,
474       .sampler2DViewOf3D = true,
475 
476       /* VK_EXT_image_sliced_view_of_3d */
477       .imageSlicedViewOf3D = true,
478 
479       /* VK_EXT_image_view_min_lod */
480       .minLod = true,
481 
482       /* VK_KHR_index_type_uint8 */
483       .indexTypeUint8 = true,
484 
485       /* VK_KHR_line_rasterization */
486       .rectangularLines = true,
487       .bresenhamLines = true,
488       .smoothLines = true,
489       .stippledRectangularLines = true,
490       .stippledBresenhamLines = true,
491       .stippledSmoothLines = true,
492 
493       /* VK_EXT_map_memory_placed */
494       .memoryMapPlaced = true,
495       .memoryMapRangePlaced = false,
496       .memoryUnmapReserve = true,
497 
498       /* VK_EXT_multi_draw */
499       .multiDraw = true,
500 
501       /* VK_EXT_non_seamless_cube_map */
502       .nonSeamlessCubeMap = true,
503 
504       /* VK_EXT_primitive_topology_list_restart */
505       .primitiveTopologyListRestart = true,
506       .primitiveTopologyPatchListRestart = true,
507 
508       /* VK_EXT_primitives_generated_query */
509       .primitivesGeneratedQuery = true,
510       .primitivesGeneratedQueryWithNonZeroStreams = true,
511       .primitivesGeneratedQueryWithRasterizerDiscard = true,
512 
513       /* VK_EXT_provoking_vertex */
514       .provokingVertexLast = true,
515       .transformFeedbackPreservesProvokingVertex = true,
516 
517       /* VK_EXT_robustness2 */
518       .robustBufferAccess2 = true,
519       .robustImageAccess2 = true,
520       .nullDescriptor = true,
521 
522       /* VK_EXT_shader_image_atomic_int64 */
523       .shaderImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
524                                  nvk_use_nak(info),
525       .sparseImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
526                                  nvk_use_nak(info),
527 
528       /* VK_EXT_shader_module_identifier */
529       .shaderModuleIdentifier = true,
530 
531       /* VK_EXT_shader_object */
532       .shaderObject = true,
533 
534       /* VK_EXT_texel_buffer_alignment */
535       .texelBufferAlignment = true,
536 
537       /* VK_EXT_transform_feedback */
538       .transformFeedback = true,
539       .geometryStreams = true,
540 
541       /* VK_EXT_vertex_attribute_divisor */
542       .vertexAttributeInstanceRateDivisor = true,
543       .vertexAttributeInstanceRateZeroDivisor = true,
544 
545       /* VK_EXT_vertex_input_dynamic_state */
546       .vertexInputDynamicState = true,
547 
548       /* VK_EXT_ycbcr_2plane_444_formats */
549       .ycbcr2plane444Formats = true,
550 
551       /* VK_EXT_ycbcr_image_arrays */
552       .ycbcrImageArrays = true,
553 
554       /* VK_NV_shader_sm_builtins */
555       .shaderSMBuiltins = true,
556 
557       /* VK_VALVE_mutable_descriptor_type */
558       .mutableDescriptorType = true,
559 
560       /* VK_KHR_shader_expect_assume */
561       .shaderExpectAssume = true,
562    };
563 }
564 
565 uint32_t
nvk_min_cbuf_alignment(const struct nv_device_info * info)566 nvk_min_cbuf_alignment(const struct nv_device_info *info)
567 {
568    return info->cls_eng3d >= TURING_A ? 64 : 256;
569 }
570 
571 static void
nvk_get_device_properties(const struct nvk_instance * instance,const struct nv_device_info * info,struct vk_properties * properties)572 nvk_get_device_properties(const struct nvk_instance *instance,
573                           const struct nv_device_info *info,
574                           struct vk_properties *properties)
575 {
576    const VkSampleCountFlagBits sample_counts = VK_SAMPLE_COUNT_1_BIT |
577                                                VK_SAMPLE_COUNT_2_BIT |
578                                                VK_SAMPLE_COUNT_4_BIT |
579                                                VK_SAMPLE_COUNT_8_BIT;
580 
581    uint64_t os_page_size = 4096;
582    os_get_page_size(&os_page_size);
583 
584    *properties = (struct vk_properties) {
585       .apiVersion = nvk_get_vk_version(info),
586       .driverVersion = vk_get_driver_version(),
587       .vendorID = NVIDIA_VENDOR_ID,
588       .deviceID = info->device_id,
589       .deviceType = info->type == NV_DEVICE_TYPE_DIS ?
590                     VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
591                     VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
592 
593       /* Vulkan 1.0 limits */
594       .maxImageDimension1D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_1D),
595       .maxImageDimension2D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_2D),
596       .maxImageDimension3D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_3D),
597       .maxImageDimensionCube = 0x8000,
598       .maxImageArrayLayers = 2048,
599       .maxTexelBufferElements = 128 * 1024 * 1024,
600       .maxUniformBufferRange = 65536,
601       .maxStorageBufferRange = UINT32_MAX,
602       .maxPushConstantsSize = NVK_MAX_PUSH_SIZE,
603       .maxMemoryAllocationCount = 4096,
604       .maxSamplerAllocationCount = 4000,
605       .bufferImageGranularity = info->chipset >= 0x120 ? 0x400 : 0x10000,
606       .sparseAddressSpaceSize = NVK_SPARSE_ADDR_SPACE_SIZE,
607       .maxBoundDescriptorSets = NVK_MAX_SETS,
608       .maxPerStageDescriptorSamplers = NVK_MAX_DESCRIPTORS,
609       .maxPerStageDescriptorUniformBuffers = NVK_MAX_DESCRIPTORS,
610       .maxPerStageDescriptorStorageBuffers = NVK_MAX_DESCRIPTORS,
611       .maxPerStageDescriptorSampledImages = NVK_MAX_DESCRIPTORS,
612       .maxPerStageDescriptorStorageImages = NVK_MAX_DESCRIPTORS,
613       .maxPerStageDescriptorInputAttachments = NVK_MAX_DESCRIPTORS,
614       .maxPerStageResources = UINT32_MAX,
615       .maxDescriptorSetSamplers = NVK_MAX_DESCRIPTORS,
616       .maxDescriptorSetUniformBuffers = NVK_MAX_DESCRIPTORS,
617       .maxDescriptorSetUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
618       .maxDescriptorSetStorageBuffers = NVK_MAX_DESCRIPTORS,
619       .maxDescriptorSetStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
620       .maxDescriptorSetSampledImages = NVK_MAX_DESCRIPTORS,
621       .maxDescriptorSetStorageImages = NVK_MAX_DESCRIPTORS,
622       .maxDescriptorSetInputAttachments = NVK_MAX_DESCRIPTORS,
623       .maxVertexInputAttributes = 32,
624       .maxVertexInputBindings = 32,
625       .maxVertexInputAttributeOffset = 2047,
626       .maxVertexInputBindingStride = 2048,
627       .maxVertexOutputComponents = 128,
628       .maxTessellationGenerationLevel = 64,
629       .maxTessellationPatchSize = 32,
630       .maxTessellationControlPerVertexInputComponents = 128,
631       .maxTessellationControlPerVertexOutputComponents = 128,
632       .maxTessellationControlPerPatchOutputComponents = 120,
633       .maxTessellationControlTotalOutputComponents = 4216,
634       .maxTessellationEvaluationInputComponents = 128,
635       .maxTessellationEvaluationOutputComponents = 128,
636       .maxGeometryShaderInvocations = 32,
637       .maxGeometryInputComponents = 128,
638       .maxGeometryOutputComponents = 128,
639       .maxGeometryOutputVertices = 1024,
640       .maxGeometryTotalOutputComponents = 1024,
641       .maxFragmentInputComponents = 128,
642       .maxFragmentOutputAttachments = NVK_MAX_RTS,
643       .maxFragmentDualSrcAttachments = 1,
644       .maxFragmentCombinedOutputResources = 16,
645       .maxComputeSharedMemorySize = NVK_MAX_SHARED_SIZE,
646       .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
647       .maxComputeWorkGroupInvocations = 1024,
648       .maxComputeWorkGroupSize = {1024, 1024, 64},
649       .subPixelPrecisionBits = 8,
650       .subTexelPrecisionBits = 8,
651       .mipmapPrecisionBits = 8,
652       .maxDrawIndexedIndexValue = UINT32_MAX,
653       .maxDrawIndirectCount = UINT32_MAX,
654       .maxSamplerLodBias = 15,
655       .maxSamplerAnisotropy = 16,
656       .maxViewports = NVK_MAX_VIEWPORTS,
657       .maxViewportDimensions = { 32768, 32768 },
658       .viewportBoundsRange = { -65536, 65536 },
659       .viewportSubPixelBits = 8,
660       .minMemoryMapAlignment = 64,
661       .minTexelBufferOffsetAlignment = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
662       .minUniformBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
663       .minStorageBufferOffsetAlignment = NVK_MIN_SSBO_ALIGNMENT,
664       .minTexelOffset = -8,
665       .maxTexelOffset = 7,
666       .minTexelGatherOffset = -32,
667       .maxTexelGatherOffset = 31,
668       .minInterpolationOffset = -0.5,
669       .maxInterpolationOffset = 0.4375,
670       .subPixelInterpolationOffsetBits = 4,
671       .maxFramebufferHeight = info->chipset >= 0x130 ? 0x8000 : 0x4000,
672       .maxFramebufferWidth = info->chipset >= 0x130 ? 0x8000 : 0x4000,
673       .maxFramebufferLayers = 2048,
674       .framebufferColorSampleCounts = sample_counts,
675       .framebufferDepthSampleCounts = sample_counts,
676       .framebufferNoAttachmentsSampleCounts = sample_counts,
677       .framebufferStencilSampleCounts = sample_counts,
678       .maxColorAttachments = NVK_MAX_RTS,
679       .sampledImageColorSampleCounts = sample_counts,
680       .sampledImageIntegerSampleCounts = sample_counts,
681       .sampledImageDepthSampleCounts = sample_counts,
682       .sampledImageStencilSampleCounts = sample_counts,
683       .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
684       .maxSampleMaskWords = 1,
685       .timestampComputeAndGraphics = true,
686       .timestampPeriod = 1,
687       .maxClipDistances = 8,
688       .maxCullDistances = 8,
689       .maxCombinedClipAndCullDistances = 8,
690       .discreteQueuePriorities = 2,
691       .pointSizeRange = { 1.0, 2047.94 },
692       .lineWidthRange = { 1, 64 },
693       .pointSizeGranularity = 0.0625,
694       .lineWidthGranularity = 0.0625,
695       .strictLines = true,
696       .standardSampleLocations = true,
697       .optimalBufferCopyOffsetAlignment = 1,
698       .optimalBufferCopyRowPitchAlignment = 1,
699       .nonCoherentAtomSize = 64,
700 
701       /* Vulkan 1.0 sparse properties */
702       .sparseResidencyNonResidentStrict = true,
703 
704       /* Vulkan 1.1 properties */
705       .subgroupSize = 32,
706       .subgroupSupportedStages = nvk_nak_stages(info),
707       .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
708                                      VK_SUBGROUP_FEATURE_BALLOT_BIT |
709                                      VK_SUBGROUP_FEATURE_BASIC_BIT |
710                                      VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
711                                      VK_SUBGROUP_FEATURE_QUAD_BIT |
712                                      VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
713                                      VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
714                                      VK_SUBGROUP_FEATURE_VOTE_BIT,
715       .subgroupQuadOperationsInAllStages = false,
716       .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
717       .maxMultiviewViewCount = NVK_MAX_MULTIVIEW_VIEW_COUNT,
718       .maxMultiviewInstanceIndex = UINT32_MAX,
719       .maxPerSetDescriptors = UINT32_MAX,
720       .maxMemoryAllocationSize = (1u << 31),
721 
722       /* Vulkan 1.2 properties */
723       .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
724                                     VK_RESOLVE_MODE_AVERAGE_BIT |
725                                     VK_RESOLVE_MODE_MIN_BIT |
726                                     VK_RESOLVE_MODE_MAX_BIT,
727       .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
728                                       VK_RESOLVE_MODE_MIN_BIT |
729                                       VK_RESOLVE_MODE_MAX_BIT,
730       .independentResolveNone = true,
731       .independentResolve = true,
732       .driverID = VK_DRIVER_ID_MESA_NVK,
733       .conformanceVersion = (VkConformanceVersion) { /* TODO: conf version */
734          .major = 0,
735          .minor = 0,
736          .subminor = 0,
737          .patch = 0,
738       },
739       .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
740       .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
741       .shaderSignedZeroInfNanPreserveFloat16 = true,
742       .shaderSignedZeroInfNanPreserveFloat32 = true,
743       .shaderSignedZeroInfNanPreserveFloat64 = true,
744       .shaderDenormPreserveFloat16 = true,
745       .shaderDenormPreserveFloat32 = true,
746       .shaderDenormPreserveFloat64 = true,
747       .shaderDenormFlushToZeroFloat16 = true,
748       .shaderDenormFlushToZeroFloat32 = true,
749       .shaderDenormFlushToZeroFloat64 = false,
750       .shaderRoundingModeRTEFloat16 = true,
751       .shaderRoundingModeRTEFloat32 = true,
752       .shaderRoundingModeRTEFloat64 = true,
753       .shaderRoundingModeRTZFloat16 = true,
754       .shaderRoundingModeRTZFloat32 = true,
755       .shaderRoundingModeRTZFloat64 = true,
756       .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
757       .shaderUniformBufferArrayNonUniformIndexingNative = false,
758       .shaderSampledImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
759       .shaderStorageBufferArrayNonUniformIndexingNative = true,
760       .shaderStorageImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
761       .shaderInputAttachmentArrayNonUniformIndexingNative = false,
762       .robustBufferAccessUpdateAfterBind = true,
763       .quadDivergentImplicitLod = info->cls_eng3d >= TURING_A,
764       .maxPerStageDescriptorUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
765       .maxPerStageDescriptorUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
766       .maxPerStageDescriptorUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
767       .maxPerStageDescriptorUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
768       .maxPerStageDescriptorUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
769       .maxPerStageDescriptorUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
770       .maxPerStageUpdateAfterBindResources = UINT32_MAX,
771       .maxDescriptorSetUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
772       .maxDescriptorSetUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
773       .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
774       .maxDescriptorSetUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
775       .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
776       .maxDescriptorSetUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
777       .maxDescriptorSetUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
778       .maxDescriptorSetUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
779       .filterMinmaxSingleComponentFormats = true,
780       .filterMinmaxImageComponentMapping = true,
781       .maxTimelineSemaphoreValueDifference = UINT64_MAX,
782       .framebufferIntegerColorSampleCounts = sample_counts,
783 
784       /* Vulkan 1.3 properties */
785       .minSubgroupSize = 32,
786       .maxSubgroupSize = 32,
787       .maxComputeWorkgroupSubgroups = 1024 / 32,
788       .requiredSubgroupSizeStages = 0,
789       .maxInlineUniformBlockSize = 1 << 16,
790       .maxPerStageDescriptorInlineUniformBlocks = 32,
791       .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
792       .maxDescriptorSetInlineUniformBlocks = 6 * 32,
793       .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
794       .maxInlineUniformTotalSize = 1 << 16,
795       .integerDotProduct4x8BitPackedUnsignedAccelerated
796          = info->cls_eng3d >= VOLTA_A,
797       .integerDotProduct4x8BitPackedSignedAccelerated
798          = info->cls_eng3d >= VOLTA_A,
799       .integerDotProduct4x8BitPackedMixedSignednessAccelerated
800          = info->cls_eng3d >= VOLTA_A,
801       .storageTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
802       .storageTexelBufferOffsetSingleTexelAlignment = true,
803       .uniformTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
804       .uniformTexelBufferOffsetSingleTexelAlignment = true,
805       .maxBufferSize = NVK_MAX_BUFFER_SIZE,
806 
807       /* VK_KHR_push_descriptor */
808       .maxPushDescriptors = NVK_MAX_PUSH_DESCRIPTORS,
809 
810       /* VK_EXT_custom_border_color */
811       .maxCustomBorderColorSamplers = 4000,
812 
813       /* VK_EXT_extended_dynamic_state3 */
814       .dynamicPrimitiveTopologyUnrestricted = true,
815 
816       /* VK_EXT_graphics_pipeline_library */
817       .graphicsPipelineLibraryFastLinking = true,
818       .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
819 
820       /* VK_KHR_line_rasterization */
821       .lineSubPixelPrecisionBits = 8,
822 
823       /* VK_KHR_maintenance5 */
824       .earlyFragmentMultisampleCoverageAfterSampleCounting = true,
825       .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
826       .depthStencilSwizzleOneSupport = true,
827       .polygonModePointSize = true,
828       .nonStrictSinglePixelWideLinesUseParallelogram = false,
829       .nonStrictWideLinesUseParallelogram = false,
830 
831       /* VK_EXT_map_memory_placed */
832       .minPlacedMemoryMapAlignment = os_page_size,
833 
834       /* VK_EXT_multi_draw */
835       .maxMultiDrawCount = UINT32_MAX,
836 
837       /* VK_EXT_pci_bus_info */
838       .pciDomain   = info->pci.domain,
839       .pciBus      = info->pci.bus,
840       .pciDevice   = info->pci.dev,
841       .pciFunction = info->pci.func,
842 
843       /* VK_EXT_physical_device_drm gets populated later */
844 
845       /* VK_EXT_provoking_vertex */
846       .provokingVertexModePerPipeline = true,
847       .transformFeedbackPreservesTriangleFanProvokingVertex = true,
848 
849       /* VK_EXT_robustness2 */
850       .robustStorageBufferAccessSizeAlignment = NVK_SSBO_BOUNDS_CHECK_ALIGNMENT,
851       .robustUniformBufferAccessSizeAlignment = nvk_min_cbuf_alignment(info),
852 
853       /* VK_EXT_sample_locations */
854       .sampleLocationSampleCounts = sample_counts,
855       .maxSampleLocationGridSize = (VkExtent2D){ 1, 1 },
856       .sampleLocationCoordinateRange[0] = 0.0f,
857       .sampleLocationCoordinateRange[1] = 0.9375f,
858       .sampleLocationSubPixelBits = 4,
859       .variableSampleLocations = true,
860 
861       /* VK_EXT_shader_object */
862       .shaderBinaryVersion = 0,
863 
864       /* VK_EXT_transform_feedback */
865       .maxTransformFeedbackStreams = 4,
866       .maxTransformFeedbackBuffers = 4,
867       .maxTransformFeedbackBufferSize = UINT32_MAX,
868       .maxTransformFeedbackStreamDataSize = 2048,
869       .maxTransformFeedbackBufferDataSize = 512,
870       .maxTransformFeedbackBufferDataStride = 2048,
871       .transformFeedbackQueries = true,
872       .transformFeedbackStreamsLinesTriangles = false,
873       .transformFeedbackRasterizationStreamSelect = true,
874       .transformFeedbackDraw = true,
875 
876       /* VK_EXT_vertex_attribute_divisor */
877       .maxVertexAttribDivisor = UINT32_MAX,
878 
879       /* VK_KHR_fragment_shader_barycentric */
880       .triStripVertexOrderIndependentOfProvokingVertex = false,
881 
882       /* VK_NV_shader_sm_builtins */
883       .shaderSMCount = (uint32_t)info->tpc_count * info->mp_per_tpc,
884       .shaderWarpsPerSM = info->max_warps_per_mp,
885    };
886 
887    snprintf(properties->deviceName, sizeof(properties->deviceName),
888             "%s", info->device_name);
889 
890    /* VK_EXT_shader_module_identifier */
891    STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
892       sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
893    memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
894             vk_shaderModuleIdentifierAlgorithmUUID,
895             sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
896 
897    const struct {
898       uint16_t vendor_id;
899       uint16_t device_id;
900       uint8_t pad[12];
901    } dev_uuid = {
902       .vendor_id = NVIDIA_VENDOR_ID,
903       .device_id = info->device_id,
904    };
905    STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE);
906    memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
907    STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
908    memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
909 
910    snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "NVK");
911    snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
912             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
913 }
914 
915 static void
nvk_physical_device_init_pipeline_cache(struct nvk_physical_device * pdev)916 nvk_physical_device_init_pipeline_cache(struct nvk_physical_device *pdev)
917 {
918    struct nvk_instance *instance = nvk_physical_device_instance(pdev);
919 
920    struct mesa_sha1 sha_ctx;
921    _mesa_sha1_init(&sha_ctx);
922 
923    _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
924                      sizeof(instance->driver_build_sha));
925 
926    const uint64_t compiler_flags = nvk_physical_device_compiler_flags(pdev);
927    _mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
928 
929    unsigned char sha[SHA1_DIGEST_LENGTH];
930    _mesa_sha1_final(&sha_ctx, sha);
931 
932    STATIC_ASSERT(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
933    memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
934    memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
935 
936 #ifdef ENABLE_SHADER_CACHE
937    char renderer[10];
938    ASSERTED int len = snprintf(renderer, sizeof(renderer), "nvk_%04x",
939                                pdev->info.chipset);
940    assert(len == sizeof(renderer) - 2);
941 
942    char timestamp[41];
943    _mesa_sha1_format(timestamp, instance->driver_build_sha);
944 
945    const uint64_t driver_flags = nvk_physical_device_compiler_flags(pdev);
946    pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
947 #endif
948 }
949 
950 static void
nvk_physical_device_free_disk_cache(struct nvk_physical_device * pdev)951 nvk_physical_device_free_disk_cache(struct nvk_physical_device *pdev)
952 {
953 #ifdef ENABLE_SHADER_CACHE
954    if (pdev->vk.disk_cache) {
955       disk_cache_destroy(pdev->vk.disk_cache);
956       pdev->vk.disk_cache = NULL;
957    }
958 #else
959    assert(pdev->vk.disk_cache == NULL);
960 #endif
961 }
962 
963 static uint64_t
nvk_get_sysmem_heap_size(void)964 nvk_get_sysmem_heap_size(void)
965 {
966    uint64_t sysmem_size_B = 0;
967    if (!os_get_total_physical_memory(&sysmem_size_B))
968       return 0;
969 
970    /* Use 3/4 of total size to avoid swapping */
971    return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
972 }
973 
974 static uint64_t
nvk_get_sysmem_heap_available(struct nvk_physical_device * pdev)975 nvk_get_sysmem_heap_available(struct nvk_physical_device *pdev)
976 {
977    uint64_t sysmem_size_B = 0;
978    if (!os_get_available_system_memory(&sysmem_size_B)) {
979       vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
980       return 0;
981    }
982 
983    /* Use 3/4 of available to avoid swapping */
984    return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
985 }
986 
987 static uint64_t
nvk_get_vram_heap_available(struct nvk_physical_device * pdev)988 nvk_get_vram_heap_available(struct nvk_physical_device *pdev)
989 {
990    const uint64_t used = nouveau_ws_device_vram_used(pdev->ws_dev);
991    if (used > pdev->info.vram_size_B)
992       return 0;
993 
994    return pdev->info.vram_size_B - used;
995 }
996 
997 VkResult
nvk_create_drm_physical_device(struct vk_instance * _instance,drmDevicePtr drm_device,struct vk_physical_device ** pdev_out)998 nvk_create_drm_physical_device(struct vk_instance *_instance,
999                                drmDevicePtr drm_device,
1000                                struct vk_physical_device **pdev_out)
1001 {
1002    struct nvk_instance *instance = (struct nvk_instance *)_instance;
1003    VkResult result;
1004    int master_fd = -1;
1005 
1006    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)))
1007       return VK_ERROR_INCOMPATIBLE_DRIVER;
1008 
1009    switch (drm_device->bustype) {
1010    case DRM_BUS_PCI:
1011       if (drm_device->deviceinfo.pci->vendor_id != NVIDIA_VENDOR_ID)
1012          return VK_ERROR_INCOMPATIBLE_DRIVER;
1013       break;
1014 
1015    case DRM_BUS_PLATFORM: {
1016       const char *compat_prefix = "nvidia,";
1017       bool found = false;
1018       for (int i = 0; drm_device->deviceinfo.platform->compatible[i] != NULL; i++) {
1019          if (strncmp(drm_device->deviceinfo.platform->compatible[0], compat_prefix, strlen(compat_prefix)) == 0) {
1020             found = true;
1021             break;
1022          }
1023       }
1024       if (!found)
1025          return VK_ERROR_INCOMPATIBLE_DRIVER;
1026       break;
1027    }
1028 
1029    default:
1030       return VK_ERROR_INCOMPATIBLE_DRIVER;
1031    }
1032 
1033    struct nouveau_ws_device *ws_dev = nouveau_ws_device_new(drm_device);
1034    if (!ws_dev)
1035       return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
1036 
1037    const struct nv_device_info info = ws_dev->info;
1038    const struct vk_sync_type syncobj_sync_type =
1039       vk_drm_syncobj_get_type(ws_dev->fd);
1040 
1041    /* We don't support anything pre-Kepler */
1042    if (info.cls_eng3d < KEPLER_A) {
1043       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1044       goto fail_ws_dev;
1045    }
1046 
1047    if ((info.type != NV_DEVICE_TYPE_DIS ||
1048         info.cls_eng3d < TURING_A || info.cls_eng3d > ADA_A) &&
1049        !debug_get_bool_option("NVK_I_WANT_A_BROKEN_VULKAN_DRIVER", false)) {
1050       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1051                          "WARNING: NVK is not well-tested on %s, pass "
1052                          "NVK_I_WANT_A_BROKEN_VULKAN_DRIVER=1 "
1053                          "if you know what you're doing.",
1054                          info.device_name);
1055       goto fail_ws_dev;
1056    }
1057 
1058    if (!ws_dev->has_vm_bind) {
1059       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1060                          "NVK Requires a Linux kernel version 6.6 or later");
1061       goto fail_ws_dev;
1062    }
1063 
1064    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER))) {
1065       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1066                          "NVK requires a render node");
1067       goto fail_ws_dev;
1068    }
1069 
1070    struct stat st;
1071    if (stat(drm_device->nodes[DRM_NODE_RENDER], &st)) {
1072       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1073                          "fstat() failed on %s: %m",
1074                          drm_device->nodes[DRM_NODE_RENDER]);
1075       goto fail_ws_dev;
1076    }
1077    const dev_t render_dev = st.st_rdev;
1078 
1079    vk_warn_non_conformant_implementation("NVK");
1080 
1081    struct nvk_physical_device *pdev =
1082       vk_zalloc(&instance->vk.alloc, sizeof(*pdev),
1083                 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1084 
1085    if (pdev == NULL) {
1086       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1087       goto fail_ws_dev;
1088    }
1089 
1090    struct vk_physical_device_dispatch_table dispatch_table;
1091    vk_physical_device_dispatch_table_from_entrypoints(
1092       &dispatch_table, &nvk_physical_device_entrypoints, true);
1093    vk_physical_device_dispatch_table_from_entrypoints(
1094       &dispatch_table, &wsi_physical_device_entrypoints, false);
1095 
1096    struct vk_device_extension_table supported_extensions;
1097    nvk_get_device_extensions(instance, &info, &supported_extensions);
1098 
1099    struct vk_features supported_features;
1100    nvk_get_device_features(&info, &supported_extensions, &supported_features);
1101 
1102    struct vk_properties properties;
1103    nvk_get_device_properties(instance, &info, &properties);
1104 
1105    properties.drmHasRender = true;
1106    properties.drmRenderMajor = major(render_dev);
1107    properties.drmRenderMinor = minor(render_dev);
1108 
1109    /* DRM primary is optional */
1110    if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
1111        !stat(drm_device->nodes[DRM_NODE_PRIMARY], &st)) {
1112       assert(st.st_rdev != 0);
1113       properties.drmHasPrimary = true;
1114       properties.drmPrimaryMajor = major(st.st_rdev);
1115       properties.drmPrimaryMinor = minor(st.st_rdev);
1116 
1117       /* TODO: Test if the FD is usable? */
1118       if (instance->vk.enabled_extensions.KHR_display)
1119          master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
1120    }
1121 
1122    result = vk_physical_device_init(&pdev->vk, &instance->vk,
1123                                     &supported_extensions,
1124                                     &supported_features,
1125                                     &properties,
1126                                     &dispatch_table);
1127    if (result != VK_SUCCESS)
1128       goto fail_master_fd;
1129 
1130    pdev->info = info;
1131    pdev->debug_flags = ws_dev->debug_flags;
1132    pdev->render_dev = render_dev;
1133    pdev->master_fd = master_fd;
1134    pdev->ws_dev = ws_dev;
1135 
1136    pdev->nak = nak_compiler_create(&pdev->info);
1137    if (pdev->nak == NULL) {
1138       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1139       goto fail_init;
1140    }
1141 
1142    nvk_physical_device_init_pipeline_cache(pdev);
1143 
1144    uint64_t sysmem_size_B = nvk_get_sysmem_heap_size();
1145    if (sysmem_size_B == 0) {
1146       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1147                          "Failed to query total system memory");
1148       goto fail_disk_cache;
1149    }
1150 
1151    if (pdev->info.vram_size_B > 0) {
1152       uint32_t vram_heap_idx = pdev->mem_heap_count++;
1153       pdev->mem_heaps[vram_heap_idx] = (struct nvk_memory_heap) {
1154          .size = pdev->info.vram_size_B,
1155          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1156       };
1157 
1158       /* Only set available if we have the ioctl. */
1159       if (nouveau_ws_device_vram_used(ws_dev) > 0)
1160          pdev->mem_heaps[vram_heap_idx].available = nvk_get_vram_heap_available;
1161 
1162       pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1163          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1164          .heapIndex = vram_heap_idx,
1165       };
1166 
1167       if (pdev->info.cls_eng3d >= MAXWELL_A &&
1168           pdev->info.bar_size_B >= pdev->info.vram_size_B) {
1169          pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1170             .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1171                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1172                              VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1173             .heapIndex = vram_heap_idx,
1174          };
1175       }
1176    }
1177 
1178    uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
1179    pdev->mem_heaps[sysmem_heap_idx] = (struct nvk_memory_heap) {
1180       .size = sysmem_size_B,
1181       /* If we don't have any VRAM (iGPU), claim sysmem as DEVICE_LOCAL */
1182       .flags = pdev->info.vram_size_B == 0
1183                ? VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
1184                : 0,
1185       .available = nvk_get_sysmem_heap_available,
1186    };
1187 
1188    pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1189       /* TODO: What's the right thing to do here on Tegra? */
1190       .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1191                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1192                        VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1193       .heapIndex = sysmem_heap_idx,
1194    };
1195 
1196    assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
1197    assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
1198 
1199    pdev->queue_families[pdev->queue_family_count++] = (struct nvk_queue_family) {
1200       .queue_flags = VK_QUEUE_GRAPHICS_BIT |
1201                      VK_QUEUE_COMPUTE_BIT |
1202                      VK_QUEUE_TRANSFER_BIT |
1203                      VK_QUEUE_SPARSE_BINDING_BIT,
1204       .queue_count = 1,
1205    };
1206    assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
1207 
1208    unsigned st_idx = 0;
1209    pdev->syncobj_sync_type = syncobj_sync_type;
1210    pdev->sync_types[st_idx++] = &pdev->syncobj_sync_type;
1211    pdev->sync_types[st_idx++] = NULL;
1212    assert(st_idx <= ARRAY_SIZE(pdev->sync_types));
1213    pdev->vk.supported_sync_types = pdev->sync_types;
1214 
1215    result = nvk_init_wsi(pdev);
1216    if (result != VK_SUCCESS)
1217       goto fail_disk_cache;
1218 
1219    *pdev_out = &pdev->vk;
1220 
1221    return VK_SUCCESS;
1222 
1223 fail_disk_cache:
1224    nvk_physical_device_free_disk_cache(pdev);
1225    nak_compiler_destroy(pdev->nak);
1226 fail_init:
1227    vk_physical_device_finish(&pdev->vk);
1228 fail_master_fd:
1229    if (master_fd >= 0)
1230       close(master_fd);
1231    vk_free(&instance->vk.alloc, pdev);
1232 fail_ws_dev:
1233    nouveau_ws_device_destroy(ws_dev);
1234    return result;
1235 }
1236 
1237 void
nvk_physical_device_destroy(struct vk_physical_device * vk_pdev)1238 nvk_physical_device_destroy(struct vk_physical_device *vk_pdev)
1239 {
1240    struct nvk_physical_device *pdev =
1241       container_of(vk_pdev, struct nvk_physical_device, vk);
1242 
1243    nvk_finish_wsi(pdev);
1244    nvk_physical_device_free_disk_cache(pdev);
1245    nak_compiler_destroy(pdev->nak);
1246    if (pdev->master_fd >= 0)
1247       close(pdev->master_fd);
1248    nouveau_ws_device_destroy(pdev->ws_dev);
1249    vk_physical_device_finish(&pdev->vk);
1250    vk_free(&pdev->vk.instance->alloc, pdev);
1251 }
1252 
1253 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1254 nvk_GetPhysicalDeviceMemoryProperties2(
1255    VkPhysicalDevice physicalDevice,
1256    VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1257 {
1258    VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1259 
1260    pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
1261    for (int i = 0; i < pdev->mem_heap_count; i++) {
1262       pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap) {
1263          .size = pdev->mem_heaps[i].size,
1264          .flags = pdev->mem_heaps[i].flags,
1265       };
1266    }
1267 
1268    pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
1269    for (int i = 0; i < pdev->mem_type_count; i++) {
1270       pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
1271    }
1272 
1273    vk_foreach_struct(ext, pMemoryProperties->pNext)
1274    {
1275       switch (ext->sType) {
1276       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1277          VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
1278 
1279          for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1280             const struct nvk_memory_heap *heap = &pdev->mem_heaps[i];
1281             uint64_t used = p_atomic_read(&heap->used);
1282 
1283             /* From the Vulkan 1.3.278 spec:
1284              *
1285              *    "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1286              *    values in which memory usages are returned, with one element
1287              *    for each memory heap. A heap’s usage is an estimate of how
1288              *    much memory the process is currently using in that heap."
1289              *
1290              * TODO: Include internal allocations?
1291              */
1292             p->heapUsage[i] = used;
1293 
1294             uint64_t available = heap->size;
1295             if (heap->available)
1296                available = heap->available(pdev);
1297 
1298             /* From the Vulkan 1.3.278 spec:
1299              *
1300              *    "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1301              *    values in which memory budgets are returned, with one
1302              *    element for each memory heap. A heap’s budget is a rough
1303              *    estimate of how much memory the process can allocate from
1304              *    that heap before allocations may fail or cause performance
1305              *    degradation. The budget includes any currently allocated
1306              *    device memory."
1307              *
1308              * and
1309              *
1310              *    "The heapBudget value must be less than or equal to
1311              *    VkMemoryHeap::size for each heap."
1312              *
1313              * available (queried above) is the total amount free memory
1314              * system-wide and does not include our allocations so we need
1315              * to add that in.
1316              */
1317             uint64_t budget = MIN2(available + used, heap->size);
1318 
1319             /* Set the budget at 90% of available to avoid thrashing */
1320             p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
1321          }
1322 
1323          /* From the Vulkan 1.3.278 spec:
1324           *
1325           *    "The heapBudget and heapUsage values must be zero for array
1326           *    elements greater than or equal to
1327           *    VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
1328           *    heapBudget value must be non-zero for array elements less than
1329           *    VkPhysicalDeviceMemoryProperties::memoryHeapCount."
1330           */
1331          for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
1332             p->heapBudget[i] = 0u;
1333             p->heapUsage[i] = 0u;
1334          }
1335          break;
1336       }
1337       default:
1338          nvk_debug_ignored_stype(ext->sType);
1339          break;
1340       }
1341    }
1342 }
1343 
1344 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1345 nvk_GetPhysicalDeviceQueueFamilyProperties2(
1346    VkPhysicalDevice physicalDevice,
1347    uint32_t *pQueueFamilyPropertyCount,
1348    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1349 {
1350    VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1351    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1352                           pQueueFamilyPropertyCount);
1353 
1354    for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
1355       const struct nvk_queue_family *queue_family = &pdev->queue_families[i];
1356 
1357       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
1358          p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
1359          p->queueFamilyProperties.queueCount = queue_family->queue_count;
1360          p->queueFamilyProperties.timestampValidBits = 64;
1361          p->queueFamilyProperties.minImageTransferGranularity =
1362             (VkExtent3D){1, 1, 1};
1363       }
1364    }
1365 }
1366 
1367 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)1368 nvk_GetPhysicalDeviceMultisamplePropertiesEXT(
1369    VkPhysicalDevice physicalDevice,
1370    VkSampleCountFlagBits samples,
1371    VkMultisamplePropertiesEXT *pMultisampleProperties)
1372 {
1373    VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1374 
1375    if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
1376       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
1377    } else {
1378       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
1379    }
1380 }
1381