1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_physical_device.h"
6
7 #include "nak.h"
8 #include "nvk_buffer.h"
9 #include "nvk_entrypoints.h"
10 #include "nvk_format.h"
11 #include "nvk_image.h"
12 #include "nvk_instance.h"
13 #include "nvk_shader.h"
14 #include "nvk_wsi.h"
15 #include "git_sha1.h"
16 #include "util/disk_cache.h"
17 #include "util/mesa-sha1.h"
18
19 #include "vulkan/runtime/vk_device.h"
20 #include "vulkan/runtime/vk_drm_syncobj.h"
21 #include "vulkan/runtime/vk_shader_module.h"
22 #include "vulkan/wsi/wsi_common.h"
23
24 #include <fcntl.h>
25 #include <sys/stat.h>
26 #include <sys/sysmacros.h>
27 #include <xf86drm.h>
28
29 #include "cl90c0.h"
30 #include "cl91c0.h"
31 #include "cla097.h"
32 #include "cla0c0.h"
33 #include "cla1c0.h"
34 #include "clb097.h"
35 #include "clb0c0.h"
36 #include "clb197.h"
37 #include "clb1c0.h"
38 #include "clc0c0.h"
39 #include "clc1c0.h"
40 #include "clc397.h"
41 #include "clc3c0.h"
42 #include "clc597.h"
43 #include "clc5c0.h"
44 #include "clc997.h"
45
46 static bool
nvk_use_nak(const struct nv_device_info * info)47 nvk_use_nak(const struct nv_device_info *info)
48 {
49 const VkShaderStageFlags vk10_stages =
50 VK_SHADER_STAGE_VERTEX_BIT |
51 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
52 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
53 VK_SHADER_STAGE_GEOMETRY_BIT |
54 VK_SHADER_STAGE_FRAGMENT_BIT |
55 VK_SHADER_STAGE_COMPUTE_BIT;
56
57 return !(vk10_stages & ~nvk_nak_stages(info));
58 }
59
60 static uint32_t
nvk_get_vk_version(const struct nv_device_info * info)61 nvk_get_vk_version(const struct nv_device_info *info)
62 {
63 /* Version override takes priority */
64 const uint32_t version_override = vk_get_version_override();
65 if (version_override)
66 return version_override;
67
68 /* If we're using codegen for anything, lock to version 1.0 */
69 if (!nvk_use_nak(info))
70 return VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
71
72 return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
73 }
74
75 static void
nvk_get_device_extensions(const struct nvk_instance * instance,const struct nv_device_info * info,struct vk_device_extension_table * ext)76 nvk_get_device_extensions(const struct nvk_instance *instance,
77 const struct nv_device_info *info,
78 struct vk_device_extension_table *ext)
79 {
80 *ext = (struct vk_device_extension_table) {
81 .KHR_8bit_storage = true,
82 .KHR_16bit_storage = true,
83 .KHR_bind_memory2 = true,
84 .KHR_buffer_device_address = true,
85 .KHR_copy_commands2 = true,
86 .KHR_create_renderpass2 = true,
87 .KHR_dedicated_allocation = true,
88 .KHR_depth_stencil_resolve = true,
89 .KHR_descriptor_update_template = true,
90 .KHR_device_group = true,
91 .KHR_draw_indirect_count = info->cls_eng3d >= TURING_A,
92 .KHR_driver_properties = true,
93 .KHR_dynamic_rendering = true,
94 .KHR_external_fence = true,
95 .KHR_external_fence_fd = true,
96 .KHR_external_memory = true,
97 .KHR_external_memory_fd = true,
98 .KHR_external_semaphore = true,
99 .KHR_external_semaphore_fd = true,
100 .KHR_format_feature_flags2 = true,
101 .KHR_fragment_shader_barycentric = info->cls_eng3d >= TURING_A &&
102 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
103 .KHR_get_memory_requirements2 = true,
104 .KHR_image_format_list = true,
105 .KHR_imageless_framebuffer = true,
106 #ifdef NVK_USE_WSI_PLATFORM
107 .KHR_incremental_present = true,
108 #endif
109 .KHR_index_type_uint8 = true,
110 .KHR_line_rasterization = true,
111 .KHR_load_store_op_none = true,
112 .KHR_maintenance1 = true,
113 .KHR_maintenance2 = true,
114 .KHR_maintenance3 = true,
115 .KHR_maintenance4 = true,
116 .KHR_maintenance5 = true,
117 .KHR_map_memory2 = true,
118 .KHR_multiview = true,
119 .KHR_pipeline_executable_properties = true,
120
121 #ifdef NVK_USE_WSI_PLATFORM
122 /* Hide these behind dri configs for now since we cannot implement it
123 * reliably on all surfaces yet. There is no surface capability query
124 * for present wait/id, but the feature is useful enough to hide behind
125 * an opt-in mechanism for now. If the instance only enables surface
126 * extensions that unconditionally support present wait, we can also
127 * expose the extension that way.
128 */
129 .KHR_present_id = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
130 wsi_common_vk_instance_supports_present_wait(&instance->vk),
131 .KHR_present_wait = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
132 wsi_common_vk_instance_supports_present_wait(&instance->vk),
133 #endif
134 .KHR_push_descriptor = true,
135 .KHR_relaxed_block_layout = true,
136 .KHR_sampler_mirror_clamp_to_edge = true,
137 .KHR_sampler_ycbcr_conversion = true,
138 .KHR_separate_depth_stencil_layouts = true,
139 .KHR_shader_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
140 nvk_use_nak(info),
141 .KHR_shader_clock = true,
142 .KHR_shader_draw_parameters = true,
143 .KHR_shader_expect_assume = true,
144 .KHR_shader_float_controls = true,
145 .KHR_shader_float16_int8 = true,
146 .KHR_shader_integer_dot_product = true,
147 .KHR_shader_non_semantic_info = true,
148 .KHR_shader_subgroup_extended_types = true,
149 .KHR_shader_terminate_invocation =
150 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
151 .KHR_spirv_1_4 = true,
152 .KHR_storage_buffer_storage_class = true,
153 .KHR_timeline_semaphore = true,
154 #ifdef NVK_USE_WSI_PLATFORM
155 .KHR_swapchain = true,
156 .KHR_swapchain_mutable_format = true,
157 #endif
158 .KHR_synchronization2 = true,
159 .KHR_uniform_buffer_standard_layout = true,
160 .KHR_variable_pointers = true,
161 .KHR_vulkan_memory_model = nvk_use_nak(info),
162 .KHR_workgroup_memory_explicit_layout = true,
163 .KHR_zero_initialize_workgroup_memory = true,
164 .EXT_4444_formats = true,
165 .EXT_attachment_feedback_loop_layout = true,
166 .EXT_border_color_swizzle = true,
167 .EXT_buffer_device_address = true,
168 .EXT_conditional_rendering = true,
169 .EXT_color_write_enable = true,
170 .EXT_custom_border_color = true,
171 .EXT_depth_bias_control = true,
172 .EXT_depth_clip_control = true,
173 .EXT_depth_clip_enable = true,
174 .EXT_descriptor_indexing = true,
175 .EXT_dynamic_rendering_unused_attachments = true,
176 .EXT_extended_dynamic_state = true,
177 .EXT_extended_dynamic_state2 = true,
178 .EXT_extended_dynamic_state3 = true,
179 .EXT_external_memory_dma_buf = true,
180 .EXT_graphics_pipeline_library = true,
181 .EXT_host_query_reset = true,
182 .EXT_image_2d_view_of_3d = true,
183 .EXT_image_robustness = true,
184 .EXT_image_sliced_view_of_3d = true,
185 .EXT_image_view_min_lod = true,
186 .EXT_index_type_uint8 = true,
187 .EXT_inline_uniform_block = true,
188 .EXT_line_rasterization = true,
189 .EXT_load_store_op_none = true,
190 .EXT_map_memory_placed = true,
191 .EXT_memory_budget = true,
192 .EXT_multi_draw = true,
193 .EXT_mutable_descriptor_type = true,
194 .EXT_non_seamless_cube_map = true,
195 .EXT_pci_bus_info = info->type == NV_DEVICE_TYPE_DIS,
196 .EXT_pipeline_creation_cache_control = true,
197 .EXT_pipeline_creation_feedback = true,
198 .EXT_physical_device_drm = true,
199 .EXT_primitive_topology_list_restart = true,
200 .EXT_private_data = true,
201 .EXT_primitives_generated_query = true,
202 .EXT_provoking_vertex = true,
203 .EXT_robustness2 = true,
204 .EXT_sample_locations = info->cls_eng3d >= MAXWELL_B,
205 .EXT_sampler_filter_minmax = info->cls_eng3d >= MAXWELL_B,
206 .EXT_scalar_block_layout = nvk_use_nak(info),
207 .EXT_separate_stencil_usage = true,
208 .EXT_shader_image_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
209 nvk_use_nak(info),
210 .EXT_shader_demote_to_helper_invocation = true,
211 .EXT_shader_module_identifier = true,
212 .EXT_shader_object = true,
213 .EXT_shader_subgroup_ballot = true,
214 .EXT_shader_subgroup_vote = true,
215 .EXT_shader_viewport_index_layer = info->cls_eng3d >= MAXWELL_B,
216 .EXT_subgroup_size_control = true,
217 .EXT_texel_buffer_alignment = true,
218 .EXT_tooling_info = true,
219 .EXT_transform_feedback = true,
220 .EXT_vertex_attribute_divisor = true,
221 .EXT_vertex_input_dynamic_state = true,
222 .EXT_ycbcr_2plane_444_formats = true,
223 .EXT_ycbcr_image_arrays = true,
224 .NV_shader_sm_builtins = true,
225 };
226 }
227
228 static void
nvk_get_device_features(const struct nv_device_info * info,const struct vk_device_extension_table * supported_extensions,struct vk_features * features)229 nvk_get_device_features(const struct nv_device_info *info,
230 const struct vk_device_extension_table *supported_extensions,
231 struct vk_features *features)
232 {
233 *features = (struct vk_features) {
234 /* Vulkan 1.0 */
235 .robustBufferAccess = true,
236 .fullDrawIndexUint32 = true,
237 .imageCubeArray = true,
238 .independentBlend = true,
239 .geometryShader = true,
240 .tessellationShader = true,
241 .sampleRateShading = true,
242 .dualSrcBlend = true,
243 .logicOp = true,
244 .multiDrawIndirect = true,
245 .drawIndirectFirstInstance = true,
246 .depthClamp = true,
247 .depthBiasClamp = true,
248 .fillModeNonSolid = true,
249 .depthBounds = true,
250 .wideLines = true,
251 .largePoints = true,
252 .alphaToOne = true,
253 .multiViewport = true,
254 .samplerAnisotropy = true,
255 .textureCompressionETC2 = false,
256 .textureCompressionBC = true,
257 .textureCompressionASTC_LDR = false,
258 .occlusionQueryPrecise = true,
259 .pipelineStatisticsQuery = true,
260 .vertexPipelineStoresAndAtomics = true,
261 .fragmentStoresAndAtomics = true,
262 .shaderTessellationAndGeometryPointSize = true,
263 .shaderImageGatherExtended = true,
264 .shaderStorageImageExtendedFormats = true,
265 /* TODO: shaderStorageImageMultisample */
266 .shaderStorageImageReadWithoutFormat = info->cls_eng3d >= MAXWELL_A,
267 .shaderStorageImageWriteWithoutFormat = true,
268 .shaderUniformBufferArrayDynamicIndexing = true,
269 .shaderSampledImageArrayDynamicIndexing = true,
270 .shaderStorageBufferArrayDynamicIndexing = true,
271 .shaderStorageImageArrayDynamicIndexing = true,
272 .shaderClipDistance = true,
273 .shaderCullDistance = true,
274 .shaderFloat64 = true,
275 .shaderInt64 = true,
276 .shaderInt16 = true,
277 /* TODO: shaderResourceResidency */
278 .shaderResourceMinLod = info->cls_eng3d >= VOLTA_A,
279 .sparseBinding = true,
280 .sparseResidencyBuffer = info->cls_eng3d >= MAXWELL_A,
281 /* TODO: sparseResidency* */
282 .variableMultisampleRate = true,
283 .inheritedQueries = true,
284
285 /* Vulkan 1.1 */
286 .storageBuffer16BitAccess = true,
287 .uniformAndStorageBuffer16BitAccess = true,
288 .storagePushConstant16 = true,
289 .multiview = true,
290 .multiviewGeometryShader = true,
291 .multiviewTessellationShader = true,
292 .variablePointersStorageBuffer = true,
293 .variablePointers = true,
294 .shaderDrawParameters = true,
295 .samplerYcbcrConversion = true,
296
297 /* Vulkan 1.2 */
298 .samplerMirrorClampToEdge = true,
299 .drawIndirectCount = info->cls_eng3d >= TURING_A,
300 .storageBuffer8BitAccess = true,
301 .uniformAndStorageBuffer8BitAccess = true,
302 .storagePushConstant8 = true,
303 .shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
304 nvk_use_nak(info),
305 .shaderSharedInt64Atomics = false, /* TODO */
306 .shaderInt8 = true,
307 .descriptorIndexing = true,
308 .shaderInputAttachmentArrayDynamicIndexing = true,
309 .shaderUniformTexelBufferArrayDynamicIndexing = true,
310 .shaderStorageTexelBufferArrayDynamicIndexing = true,
311 .shaderUniformBufferArrayNonUniformIndexing = true,
312 .shaderSampledImageArrayNonUniformIndexing = true,
313 .shaderStorageBufferArrayNonUniformIndexing = true,
314 .shaderStorageImageArrayNonUniformIndexing = true,
315 .shaderInputAttachmentArrayNonUniformIndexing = true,
316 .shaderUniformTexelBufferArrayNonUniformIndexing = true,
317 .shaderStorageTexelBufferArrayNonUniformIndexing = true,
318 .descriptorBindingUniformBufferUpdateAfterBind = true,
319 .descriptorBindingSampledImageUpdateAfterBind = true,
320 .descriptorBindingStorageImageUpdateAfterBind = true,
321 .descriptorBindingStorageBufferUpdateAfterBind = true,
322 .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
323 .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
324 .descriptorBindingUpdateUnusedWhilePending = true,
325 .descriptorBindingPartiallyBound = true,
326 .descriptorBindingVariableDescriptorCount = true,
327 .runtimeDescriptorArray = true,
328 .samplerFilterMinmax = info->cls_eng3d >= MAXWELL_B,
329 .scalarBlockLayout = nvk_use_nak(info),
330 .imagelessFramebuffer = true,
331 .uniformBufferStandardLayout = true,
332 .shaderSubgroupExtendedTypes = true,
333 .separateDepthStencilLayouts = true,
334 .hostQueryReset = true,
335 .timelineSemaphore = true,
336 .bufferDeviceAddress = true,
337 .bufferDeviceAddressCaptureReplay = true,
338 .bufferDeviceAddressMultiDevice = false,
339 .vulkanMemoryModel = nvk_use_nak(info),
340 .vulkanMemoryModelDeviceScope = nvk_use_nak(info),
341 .vulkanMemoryModelAvailabilityVisibilityChains = nvk_use_nak(info),
342 .shaderOutputViewportIndex = info->cls_eng3d >= MAXWELL_B,
343 .shaderOutputLayer = info->cls_eng3d >= MAXWELL_B,
344 .subgroupBroadcastDynamicId = nvk_use_nak(info),
345
346 /* Vulkan 1.3 */
347 .robustImageAccess = true,
348 .inlineUniformBlock = true,
349 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
350 .pipelineCreationCacheControl = true,
351 .privateData = true,
352 .shaderDemoteToHelperInvocation = true,
353 .shaderTerminateInvocation = true,
354 .subgroupSizeControl = true,
355 .computeFullSubgroups = true,
356 .synchronization2 = true,
357 .shaderZeroInitializeWorkgroupMemory = true,
358 .dynamicRendering = true,
359 .shaderIntegerDotProduct = true,
360 .maintenance4 = true,
361
362 /* VK_KHR_fragment_shader_barycentric */
363 .fragmentShaderBarycentric = info->cls_eng3d >= TURING_A &&
364 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
365
366 /* VK_KHR_maintenance5 */
367 .maintenance5 = true,
368
369 /* VK_KHR_pipeline_executable_properties */
370 .pipelineExecutableInfo = true,
371
372 /* VK_KHR_present_id */
373 .presentId = supported_extensions->KHR_present_id,
374
375 /* VK_KHR_present_wait */
376 .presentWait = supported_extensions->KHR_present_wait,
377
378 /* VK_KHR_shader_clock */
379 .shaderSubgroupClock = true,
380 .shaderDeviceClock = true,
381
382 /* VK_KHR_workgroup_memory_explicit_layout */
383 .workgroupMemoryExplicitLayout = true,
384 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
385 .workgroupMemoryExplicitLayout8BitAccess = false,
386 .workgroupMemoryExplicitLayout16BitAccess = false,
387
388 /* VK_EXT_4444_formats */
389 .formatA4R4G4B4 = true,
390 .formatA4B4G4R4 = true,
391
392 /* VK_EXT_attachment_feedback_loop_layout */
393 .attachmentFeedbackLoopLayout = true,
394
395 /* VK_EXT_border_color_swizzle */
396 .borderColorSwizzle = true,
397 .borderColorSwizzleFromImage = false,
398
399 /* VK_EXT_buffer_device_address */
400 .bufferDeviceAddressCaptureReplayEXT = true,
401
402 /* VK_EXT_color_write_enable */
403 .colorWriteEnable = true,
404
405 /* VK_EXT_conditional_rendering */
406 .conditionalRendering = true,
407 .inheritedConditionalRendering = true,
408
409 /* VK_EXT_custom_border_color */
410 .customBorderColors = true,
411 .customBorderColorWithoutFormat = true,
412
413 /* VK_EXT_depth_bias_control */
414 .depthBiasControl = true,
415 .leastRepresentableValueForceUnormRepresentation = true,
416 .floatRepresentation = false,
417 .depthBiasExact = true,
418
419 /* VK_EXT_depth_clip_control */
420 .depthClipControl = info->cls_eng3d >= VOLTA_A,
421
422 /* VK_EXT_depth_clip_enable */
423 .depthClipEnable = true,
424
425 /* VK_EXT_dynamic_rendering_unused_attachments */
426 .dynamicRenderingUnusedAttachments = true,
427
428 /* VK_EXT_extended_dynamic_state */
429 .extendedDynamicState = true,
430
431 /* VK_EXT_extended_dynamic_state2 */
432 .extendedDynamicState2 = true,
433 .extendedDynamicState2LogicOp = true,
434 .extendedDynamicState2PatchControlPoints = true,
435
436 /* VK_EXT_extended_dynamic_state3 */
437 .extendedDynamicState3TessellationDomainOrigin = true,
438 .extendedDynamicState3DepthClampEnable = true,
439 .extendedDynamicState3PolygonMode = true,
440 .extendedDynamicState3RasterizationSamples = true,
441 .extendedDynamicState3SampleMask = true,
442 .extendedDynamicState3AlphaToCoverageEnable = true,
443 .extendedDynamicState3AlphaToOneEnable = true,
444 .extendedDynamicState3LogicOpEnable = true,
445 .extendedDynamicState3ColorBlendEnable = true,
446 .extendedDynamicState3ColorBlendEquation = true,
447 .extendedDynamicState3ColorWriteMask = true,
448 .extendedDynamicState3RasterizationStream = true,
449 .extendedDynamicState3ConservativeRasterizationMode = false,
450 .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
451 .extendedDynamicState3DepthClipEnable = true,
452 .extendedDynamicState3SampleLocationsEnable = info->cls_eng3d >= MAXWELL_B,
453 .extendedDynamicState3ColorBlendAdvanced = false,
454 .extendedDynamicState3ProvokingVertexMode = true,
455 .extendedDynamicState3LineRasterizationMode = true,
456 .extendedDynamicState3LineStippleEnable = true,
457 .extendedDynamicState3DepthClipNegativeOneToOne = true,
458 .extendedDynamicState3ViewportWScalingEnable = false,
459 .extendedDynamicState3ViewportSwizzle = false,
460 .extendedDynamicState3CoverageToColorEnable = false,
461 .extendedDynamicState3CoverageToColorLocation = false,
462 .extendedDynamicState3CoverageModulationMode = false,
463 .extendedDynamicState3CoverageModulationTableEnable = false,
464 .extendedDynamicState3CoverageModulationTable = false,
465 .extendedDynamicState3CoverageReductionMode = false,
466 .extendedDynamicState3RepresentativeFragmentTestEnable = false,
467 .extendedDynamicState3ShadingRateImageEnable = false,
468
469 /* VK_EXT_graphics_pipeline_library */
470 .graphicsPipelineLibrary = true,
471
472 /* VK_EXT_image_2d_view_of_3d */
473 .image2DViewOf3D = true,
474 .sampler2DViewOf3D = true,
475
476 /* VK_EXT_image_sliced_view_of_3d */
477 .imageSlicedViewOf3D = true,
478
479 /* VK_EXT_image_view_min_lod */
480 .minLod = true,
481
482 /* VK_KHR_index_type_uint8 */
483 .indexTypeUint8 = true,
484
485 /* VK_KHR_line_rasterization */
486 .rectangularLines = true,
487 .bresenhamLines = true,
488 .smoothLines = true,
489 .stippledRectangularLines = true,
490 .stippledBresenhamLines = true,
491 .stippledSmoothLines = true,
492
493 /* VK_EXT_map_memory_placed */
494 .memoryMapPlaced = true,
495 .memoryMapRangePlaced = false,
496 .memoryUnmapReserve = true,
497
498 /* VK_EXT_multi_draw */
499 .multiDraw = true,
500
501 /* VK_EXT_non_seamless_cube_map */
502 .nonSeamlessCubeMap = true,
503
504 /* VK_EXT_primitive_topology_list_restart */
505 .primitiveTopologyListRestart = true,
506 .primitiveTopologyPatchListRestart = true,
507
508 /* VK_EXT_primitives_generated_query */
509 .primitivesGeneratedQuery = true,
510 .primitivesGeneratedQueryWithNonZeroStreams = true,
511 .primitivesGeneratedQueryWithRasterizerDiscard = true,
512
513 /* VK_EXT_provoking_vertex */
514 .provokingVertexLast = true,
515 .transformFeedbackPreservesProvokingVertex = true,
516
517 /* VK_EXT_robustness2 */
518 .robustBufferAccess2 = true,
519 .robustImageAccess2 = true,
520 .nullDescriptor = true,
521
522 /* VK_EXT_shader_image_atomic_int64 */
523 .shaderImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
524 nvk_use_nak(info),
525 .sparseImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
526 nvk_use_nak(info),
527
528 /* VK_EXT_shader_module_identifier */
529 .shaderModuleIdentifier = true,
530
531 /* VK_EXT_shader_object */
532 .shaderObject = true,
533
534 /* VK_EXT_texel_buffer_alignment */
535 .texelBufferAlignment = true,
536
537 /* VK_EXT_transform_feedback */
538 .transformFeedback = true,
539 .geometryStreams = true,
540
541 /* VK_EXT_vertex_attribute_divisor */
542 .vertexAttributeInstanceRateDivisor = true,
543 .vertexAttributeInstanceRateZeroDivisor = true,
544
545 /* VK_EXT_vertex_input_dynamic_state */
546 .vertexInputDynamicState = true,
547
548 /* VK_EXT_ycbcr_2plane_444_formats */
549 .ycbcr2plane444Formats = true,
550
551 /* VK_EXT_ycbcr_image_arrays */
552 .ycbcrImageArrays = true,
553
554 /* VK_NV_shader_sm_builtins */
555 .shaderSMBuiltins = true,
556
557 /* VK_VALVE_mutable_descriptor_type */
558 .mutableDescriptorType = true,
559
560 /* VK_KHR_shader_expect_assume */
561 .shaderExpectAssume = true,
562 };
563 }
564
565 uint32_t
nvk_min_cbuf_alignment(const struct nv_device_info * info)566 nvk_min_cbuf_alignment(const struct nv_device_info *info)
567 {
568 return info->cls_eng3d >= TURING_A ? 64 : 256;
569 }
570
571 static void
nvk_get_device_properties(const struct nvk_instance * instance,const struct nv_device_info * info,struct vk_properties * properties)572 nvk_get_device_properties(const struct nvk_instance *instance,
573 const struct nv_device_info *info,
574 struct vk_properties *properties)
575 {
576 const VkSampleCountFlagBits sample_counts = VK_SAMPLE_COUNT_1_BIT |
577 VK_SAMPLE_COUNT_2_BIT |
578 VK_SAMPLE_COUNT_4_BIT |
579 VK_SAMPLE_COUNT_8_BIT;
580
581 uint64_t os_page_size = 4096;
582 os_get_page_size(&os_page_size);
583
584 *properties = (struct vk_properties) {
585 .apiVersion = nvk_get_vk_version(info),
586 .driverVersion = vk_get_driver_version(),
587 .vendorID = NVIDIA_VENDOR_ID,
588 .deviceID = info->device_id,
589 .deviceType = info->type == NV_DEVICE_TYPE_DIS ?
590 VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
591 VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
592
593 /* Vulkan 1.0 limits */
594 .maxImageDimension1D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_1D),
595 .maxImageDimension2D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_2D),
596 .maxImageDimension3D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_3D),
597 .maxImageDimensionCube = 0x8000,
598 .maxImageArrayLayers = 2048,
599 .maxTexelBufferElements = 128 * 1024 * 1024,
600 .maxUniformBufferRange = 65536,
601 .maxStorageBufferRange = UINT32_MAX,
602 .maxPushConstantsSize = NVK_MAX_PUSH_SIZE,
603 .maxMemoryAllocationCount = 4096,
604 .maxSamplerAllocationCount = 4000,
605 .bufferImageGranularity = info->chipset >= 0x120 ? 0x400 : 0x10000,
606 .sparseAddressSpaceSize = NVK_SPARSE_ADDR_SPACE_SIZE,
607 .maxBoundDescriptorSets = NVK_MAX_SETS,
608 .maxPerStageDescriptorSamplers = NVK_MAX_DESCRIPTORS,
609 .maxPerStageDescriptorUniformBuffers = NVK_MAX_DESCRIPTORS,
610 .maxPerStageDescriptorStorageBuffers = NVK_MAX_DESCRIPTORS,
611 .maxPerStageDescriptorSampledImages = NVK_MAX_DESCRIPTORS,
612 .maxPerStageDescriptorStorageImages = NVK_MAX_DESCRIPTORS,
613 .maxPerStageDescriptorInputAttachments = NVK_MAX_DESCRIPTORS,
614 .maxPerStageResources = UINT32_MAX,
615 .maxDescriptorSetSamplers = NVK_MAX_DESCRIPTORS,
616 .maxDescriptorSetUniformBuffers = NVK_MAX_DESCRIPTORS,
617 .maxDescriptorSetUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
618 .maxDescriptorSetStorageBuffers = NVK_MAX_DESCRIPTORS,
619 .maxDescriptorSetStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
620 .maxDescriptorSetSampledImages = NVK_MAX_DESCRIPTORS,
621 .maxDescriptorSetStorageImages = NVK_MAX_DESCRIPTORS,
622 .maxDescriptorSetInputAttachments = NVK_MAX_DESCRIPTORS,
623 .maxVertexInputAttributes = 32,
624 .maxVertexInputBindings = 32,
625 .maxVertexInputAttributeOffset = 2047,
626 .maxVertexInputBindingStride = 2048,
627 .maxVertexOutputComponents = 128,
628 .maxTessellationGenerationLevel = 64,
629 .maxTessellationPatchSize = 32,
630 .maxTessellationControlPerVertexInputComponents = 128,
631 .maxTessellationControlPerVertexOutputComponents = 128,
632 .maxTessellationControlPerPatchOutputComponents = 120,
633 .maxTessellationControlTotalOutputComponents = 4216,
634 .maxTessellationEvaluationInputComponents = 128,
635 .maxTessellationEvaluationOutputComponents = 128,
636 .maxGeometryShaderInvocations = 32,
637 .maxGeometryInputComponents = 128,
638 .maxGeometryOutputComponents = 128,
639 .maxGeometryOutputVertices = 1024,
640 .maxGeometryTotalOutputComponents = 1024,
641 .maxFragmentInputComponents = 128,
642 .maxFragmentOutputAttachments = NVK_MAX_RTS,
643 .maxFragmentDualSrcAttachments = 1,
644 .maxFragmentCombinedOutputResources = 16,
645 .maxComputeSharedMemorySize = NVK_MAX_SHARED_SIZE,
646 .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
647 .maxComputeWorkGroupInvocations = 1024,
648 .maxComputeWorkGroupSize = {1024, 1024, 64},
649 .subPixelPrecisionBits = 8,
650 .subTexelPrecisionBits = 8,
651 .mipmapPrecisionBits = 8,
652 .maxDrawIndexedIndexValue = UINT32_MAX,
653 .maxDrawIndirectCount = UINT32_MAX,
654 .maxSamplerLodBias = 15,
655 .maxSamplerAnisotropy = 16,
656 .maxViewports = NVK_MAX_VIEWPORTS,
657 .maxViewportDimensions = { 32768, 32768 },
658 .viewportBoundsRange = { -65536, 65536 },
659 .viewportSubPixelBits = 8,
660 .minMemoryMapAlignment = 64,
661 .minTexelBufferOffsetAlignment = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
662 .minUniformBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
663 .minStorageBufferOffsetAlignment = NVK_MIN_SSBO_ALIGNMENT,
664 .minTexelOffset = -8,
665 .maxTexelOffset = 7,
666 .minTexelGatherOffset = -32,
667 .maxTexelGatherOffset = 31,
668 .minInterpolationOffset = -0.5,
669 .maxInterpolationOffset = 0.4375,
670 .subPixelInterpolationOffsetBits = 4,
671 .maxFramebufferHeight = info->chipset >= 0x130 ? 0x8000 : 0x4000,
672 .maxFramebufferWidth = info->chipset >= 0x130 ? 0x8000 : 0x4000,
673 .maxFramebufferLayers = 2048,
674 .framebufferColorSampleCounts = sample_counts,
675 .framebufferDepthSampleCounts = sample_counts,
676 .framebufferNoAttachmentsSampleCounts = sample_counts,
677 .framebufferStencilSampleCounts = sample_counts,
678 .maxColorAttachments = NVK_MAX_RTS,
679 .sampledImageColorSampleCounts = sample_counts,
680 .sampledImageIntegerSampleCounts = sample_counts,
681 .sampledImageDepthSampleCounts = sample_counts,
682 .sampledImageStencilSampleCounts = sample_counts,
683 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
684 .maxSampleMaskWords = 1,
685 .timestampComputeAndGraphics = true,
686 .timestampPeriod = 1,
687 .maxClipDistances = 8,
688 .maxCullDistances = 8,
689 .maxCombinedClipAndCullDistances = 8,
690 .discreteQueuePriorities = 2,
691 .pointSizeRange = { 1.0, 2047.94 },
692 .lineWidthRange = { 1, 64 },
693 .pointSizeGranularity = 0.0625,
694 .lineWidthGranularity = 0.0625,
695 .strictLines = true,
696 .standardSampleLocations = true,
697 .optimalBufferCopyOffsetAlignment = 1,
698 .optimalBufferCopyRowPitchAlignment = 1,
699 .nonCoherentAtomSize = 64,
700
701 /* Vulkan 1.0 sparse properties */
702 .sparseResidencyNonResidentStrict = true,
703
704 /* Vulkan 1.1 properties */
705 .subgroupSize = 32,
706 .subgroupSupportedStages = nvk_nak_stages(info),
707 .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
708 VK_SUBGROUP_FEATURE_BALLOT_BIT |
709 VK_SUBGROUP_FEATURE_BASIC_BIT |
710 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
711 VK_SUBGROUP_FEATURE_QUAD_BIT |
712 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
713 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
714 VK_SUBGROUP_FEATURE_VOTE_BIT,
715 .subgroupQuadOperationsInAllStages = false,
716 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
717 .maxMultiviewViewCount = NVK_MAX_MULTIVIEW_VIEW_COUNT,
718 .maxMultiviewInstanceIndex = UINT32_MAX,
719 .maxPerSetDescriptors = UINT32_MAX,
720 .maxMemoryAllocationSize = (1u << 31),
721
722 /* Vulkan 1.2 properties */
723 .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
724 VK_RESOLVE_MODE_AVERAGE_BIT |
725 VK_RESOLVE_MODE_MIN_BIT |
726 VK_RESOLVE_MODE_MAX_BIT,
727 .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
728 VK_RESOLVE_MODE_MIN_BIT |
729 VK_RESOLVE_MODE_MAX_BIT,
730 .independentResolveNone = true,
731 .independentResolve = true,
732 .driverID = VK_DRIVER_ID_MESA_NVK,
733 .conformanceVersion = (VkConformanceVersion) { /* TODO: conf version */
734 .major = 0,
735 .minor = 0,
736 .subminor = 0,
737 .patch = 0,
738 },
739 .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
740 .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
741 .shaderSignedZeroInfNanPreserveFloat16 = true,
742 .shaderSignedZeroInfNanPreserveFloat32 = true,
743 .shaderSignedZeroInfNanPreserveFloat64 = true,
744 .shaderDenormPreserveFloat16 = true,
745 .shaderDenormPreserveFloat32 = true,
746 .shaderDenormPreserveFloat64 = true,
747 .shaderDenormFlushToZeroFloat16 = true,
748 .shaderDenormFlushToZeroFloat32 = true,
749 .shaderDenormFlushToZeroFloat64 = false,
750 .shaderRoundingModeRTEFloat16 = true,
751 .shaderRoundingModeRTEFloat32 = true,
752 .shaderRoundingModeRTEFloat64 = true,
753 .shaderRoundingModeRTZFloat16 = true,
754 .shaderRoundingModeRTZFloat32 = true,
755 .shaderRoundingModeRTZFloat64 = true,
756 .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
757 .shaderUniformBufferArrayNonUniformIndexingNative = false,
758 .shaderSampledImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
759 .shaderStorageBufferArrayNonUniformIndexingNative = true,
760 .shaderStorageImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
761 .shaderInputAttachmentArrayNonUniformIndexingNative = false,
762 .robustBufferAccessUpdateAfterBind = true,
763 .quadDivergentImplicitLod = info->cls_eng3d >= TURING_A,
764 .maxPerStageDescriptorUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
765 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
766 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
767 .maxPerStageDescriptorUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
768 .maxPerStageDescriptorUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
769 .maxPerStageDescriptorUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
770 .maxPerStageUpdateAfterBindResources = UINT32_MAX,
771 .maxDescriptorSetUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
772 .maxDescriptorSetUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
773 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
774 .maxDescriptorSetUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
775 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
776 .maxDescriptorSetUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
777 .maxDescriptorSetUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
778 .maxDescriptorSetUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
779 .filterMinmaxSingleComponentFormats = true,
780 .filterMinmaxImageComponentMapping = true,
781 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
782 .framebufferIntegerColorSampleCounts = sample_counts,
783
784 /* Vulkan 1.3 properties */
785 .minSubgroupSize = 32,
786 .maxSubgroupSize = 32,
787 .maxComputeWorkgroupSubgroups = 1024 / 32,
788 .requiredSubgroupSizeStages = 0,
789 .maxInlineUniformBlockSize = 1 << 16,
790 .maxPerStageDescriptorInlineUniformBlocks = 32,
791 .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
792 .maxDescriptorSetInlineUniformBlocks = 6 * 32,
793 .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
794 .maxInlineUniformTotalSize = 1 << 16,
795 .integerDotProduct4x8BitPackedUnsignedAccelerated
796 = info->cls_eng3d >= VOLTA_A,
797 .integerDotProduct4x8BitPackedSignedAccelerated
798 = info->cls_eng3d >= VOLTA_A,
799 .integerDotProduct4x8BitPackedMixedSignednessAccelerated
800 = info->cls_eng3d >= VOLTA_A,
801 .storageTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
802 .storageTexelBufferOffsetSingleTexelAlignment = true,
803 .uniformTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
804 .uniformTexelBufferOffsetSingleTexelAlignment = true,
805 .maxBufferSize = NVK_MAX_BUFFER_SIZE,
806
807 /* VK_KHR_push_descriptor */
808 .maxPushDescriptors = NVK_MAX_PUSH_DESCRIPTORS,
809
810 /* VK_EXT_custom_border_color */
811 .maxCustomBorderColorSamplers = 4000,
812
813 /* VK_EXT_extended_dynamic_state3 */
814 .dynamicPrimitiveTopologyUnrestricted = true,
815
816 /* VK_EXT_graphics_pipeline_library */
817 .graphicsPipelineLibraryFastLinking = true,
818 .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
819
820 /* VK_KHR_line_rasterization */
821 .lineSubPixelPrecisionBits = 8,
822
823 /* VK_KHR_maintenance5 */
824 .earlyFragmentMultisampleCoverageAfterSampleCounting = true,
825 .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
826 .depthStencilSwizzleOneSupport = true,
827 .polygonModePointSize = true,
828 .nonStrictSinglePixelWideLinesUseParallelogram = false,
829 .nonStrictWideLinesUseParallelogram = false,
830
831 /* VK_EXT_map_memory_placed */
832 .minPlacedMemoryMapAlignment = os_page_size,
833
834 /* VK_EXT_multi_draw */
835 .maxMultiDrawCount = UINT32_MAX,
836
837 /* VK_EXT_pci_bus_info */
838 .pciDomain = info->pci.domain,
839 .pciBus = info->pci.bus,
840 .pciDevice = info->pci.dev,
841 .pciFunction = info->pci.func,
842
843 /* VK_EXT_physical_device_drm gets populated later */
844
845 /* VK_EXT_provoking_vertex */
846 .provokingVertexModePerPipeline = true,
847 .transformFeedbackPreservesTriangleFanProvokingVertex = true,
848
849 /* VK_EXT_robustness2 */
850 .robustStorageBufferAccessSizeAlignment = NVK_SSBO_BOUNDS_CHECK_ALIGNMENT,
851 .robustUniformBufferAccessSizeAlignment = nvk_min_cbuf_alignment(info),
852
853 /* VK_EXT_sample_locations */
854 .sampleLocationSampleCounts = sample_counts,
855 .maxSampleLocationGridSize = (VkExtent2D){ 1, 1 },
856 .sampleLocationCoordinateRange[0] = 0.0f,
857 .sampleLocationCoordinateRange[1] = 0.9375f,
858 .sampleLocationSubPixelBits = 4,
859 .variableSampleLocations = true,
860
861 /* VK_EXT_shader_object */
862 .shaderBinaryVersion = 0,
863
864 /* VK_EXT_transform_feedback */
865 .maxTransformFeedbackStreams = 4,
866 .maxTransformFeedbackBuffers = 4,
867 .maxTransformFeedbackBufferSize = UINT32_MAX,
868 .maxTransformFeedbackStreamDataSize = 2048,
869 .maxTransformFeedbackBufferDataSize = 512,
870 .maxTransformFeedbackBufferDataStride = 2048,
871 .transformFeedbackQueries = true,
872 .transformFeedbackStreamsLinesTriangles = false,
873 .transformFeedbackRasterizationStreamSelect = true,
874 .transformFeedbackDraw = true,
875
876 /* VK_EXT_vertex_attribute_divisor */
877 .maxVertexAttribDivisor = UINT32_MAX,
878
879 /* VK_KHR_fragment_shader_barycentric */
880 .triStripVertexOrderIndependentOfProvokingVertex = false,
881
882 /* VK_NV_shader_sm_builtins */
883 .shaderSMCount = (uint32_t)info->tpc_count * info->mp_per_tpc,
884 .shaderWarpsPerSM = info->max_warps_per_mp,
885 };
886
887 snprintf(properties->deviceName, sizeof(properties->deviceName),
888 "%s", info->device_name);
889
890 /* VK_EXT_shader_module_identifier */
891 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
892 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
893 memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
894 vk_shaderModuleIdentifierAlgorithmUUID,
895 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
896
897 const struct {
898 uint16_t vendor_id;
899 uint16_t device_id;
900 uint8_t pad[12];
901 } dev_uuid = {
902 .vendor_id = NVIDIA_VENDOR_ID,
903 .device_id = info->device_id,
904 };
905 STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE);
906 memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
907 STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
908 memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
909
910 snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "NVK");
911 snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
912 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
913 }
914
915 static void
nvk_physical_device_init_pipeline_cache(struct nvk_physical_device * pdev)916 nvk_physical_device_init_pipeline_cache(struct nvk_physical_device *pdev)
917 {
918 struct nvk_instance *instance = nvk_physical_device_instance(pdev);
919
920 struct mesa_sha1 sha_ctx;
921 _mesa_sha1_init(&sha_ctx);
922
923 _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
924 sizeof(instance->driver_build_sha));
925
926 const uint64_t compiler_flags = nvk_physical_device_compiler_flags(pdev);
927 _mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
928
929 unsigned char sha[SHA1_DIGEST_LENGTH];
930 _mesa_sha1_final(&sha_ctx, sha);
931
932 STATIC_ASSERT(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
933 memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
934 memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
935
936 #ifdef ENABLE_SHADER_CACHE
937 char renderer[10];
938 ASSERTED int len = snprintf(renderer, sizeof(renderer), "nvk_%04x",
939 pdev->info.chipset);
940 assert(len == sizeof(renderer) - 2);
941
942 char timestamp[41];
943 _mesa_sha1_format(timestamp, instance->driver_build_sha);
944
945 const uint64_t driver_flags = nvk_physical_device_compiler_flags(pdev);
946 pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
947 #endif
948 }
949
950 static void
nvk_physical_device_free_disk_cache(struct nvk_physical_device * pdev)951 nvk_physical_device_free_disk_cache(struct nvk_physical_device *pdev)
952 {
953 #ifdef ENABLE_SHADER_CACHE
954 if (pdev->vk.disk_cache) {
955 disk_cache_destroy(pdev->vk.disk_cache);
956 pdev->vk.disk_cache = NULL;
957 }
958 #else
959 assert(pdev->vk.disk_cache == NULL);
960 #endif
961 }
962
963 static uint64_t
nvk_get_sysmem_heap_size(void)964 nvk_get_sysmem_heap_size(void)
965 {
966 uint64_t sysmem_size_B = 0;
967 if (!os_get_total_physical_memory(&sysmem_size_B))
968 return 0;
969
970 /* Use 3/4 of total size to avoid swapping */
971 return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
972 }
973
974 static uint64_t
nvk_get_sysmem_heap_available(struct nvk_physical_device * pdev)975 nvk_get_sysmem_heap_available(struct nvk_physical_device *pdev)
976 {
977 uint64_t sysmem_size_B = 0;
978 if (!os_get_available_system_memory(&sysmem_size_B)) {
979 vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
980 return 0;
981 }
982
983 /* Use 3/4 of available to avoid swapping */
984 return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
985 }
986
987 static uint64_t
nvk_get_vram_heap_available(struct nvk_physical_device * pdev)988 nvk_get_vram_heap_available(struct nvk_physical_device *pdev)
989 {
990 const uint64_t used = nouveau_ws_device_vram_used(pdev->ws_dev);
991 if (used > pdev->info.vram_size_B)
992 return 0;
993
994 return pdev->info.vram_size_B - used;
995 }
996
997 VkResult
nvk_create_drm_physical_device(struct vk_instance * _instance,drmDevicePtr drm_device,struct vk_physical_device ** pdev_out)998 nvk_create_drm_physical_device(struct vk_instance *_instance,
999 drmDevicePtr drm_device,
1000 struct vk_physical_device **pdev_out)
1001 {
1002 struct nvk_instance *instance = (struct nvk_instance *)_instance;
1003 VkResult result;
1004 int master_fd = -1;
1005
1006 if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)))
1007 return VK_ERROR_INCOMPATIBLE_DRIVER;
1008
1009 switch (drm_device->bustype) {
1010 case DRM_BUS_PCI:
1011 if (drm_device->deviceinfo.pci->vendor_id != NVIDIA_VENDOR_ID)
1012 return VK_ERROR_INCOMPATIBLE_DRIVER;
1013 break;
1014
1015 case DRM_BUS_PLATFORM: {
1016 const char *compat_prefix = "nvidia,";
1017 bool found = false;
1018 for (int i = 0; drm_device->deviceinfo.platform->compatible[i] != NULL; i++) {
1019 if (strncmp(drm_device->deviceinfo.platform->compatible[0], compat_prefix, strlen(compat_prefix)) == 0) {
1020 found = true;
1021 break;
1022 }
1023 }
1024 if (!found)
1025 return VK_ERROR_INCOMPATIBLE_DRIVER;
1026 break;
1027 }
1028
1029 default:
1030 return VK_ERROR_INCOMPATIBLE_DRIVER;
1031 }
1032
1033 struct nouveau_ws_device *ws_dev = nouveau_ws_device_new(drm_device);
1034 if (!ws_dev)
1035 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
1036
1037 const struct nv_device_info info = ws_dev->info;
1038 const struct vk_sync_type syncobj_sync_type =
1039 vk_drm_syncobj_get_type(ws_dev->fd);
1040
1041 /* We don't support anything pre-Kepler */
1042 if (info.cls_eng3d < KEPLER_A) {
1043 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1044 goto fail_ws_dev;
1045 }
1046
1047 if ((info.type != NV_DEVICE_TYPE_DIS ||
1048 info.cls_eng3d < TURING_A || info.cls_eng3d > ADA_A) &&
1049 !debug_get_bool_option("NVK_I_WANT_A_BROKEN_VULKAN_DRIVER", false)) {
1050 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1051 "WARNING: NVK is not well-tested on %s, pass "
1052 "NVK_I_WANT_A_BROKEN_VULKAN_DRIVER=1 "
1053 "if you know what you're doing.",
1054 info.device_name);
1055 goto fail_ws_dev;
1056 }
1057
1058 if (!ws_dev->has_vm_bind) {
1059 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1060 "NVK Requires a Linux kernel version 6.6 or later");
1061 goto fail_ws_dev;
1062 }
1063
1064 if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER))) {
1065 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1066 "NVK requires a render node");
1067 goto fail_ws_dev;
1068 }
1069
1070 struct stat st;
1071 if (stat(drm_device->nodes[DRM_NODE_RENDER], &st)) {
1072 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1073 "fstat() failed on %s: %m",
1074 drm_device->nodes[DRM_NODE_RENDER]);
1075 goto fail_ws_dev;
1076 }
1077 const dev_t render_dev = st.st_rdev;
1078
1079 vk_warn_non_conformant_implementation("NVK");
1080
1081 struct nvk_physical_device *pdev =
1082 vk_zalloc(&instance->vk.alloc, sizeof(*pdev),
1083 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1084
1085 if (pdev == NULL) {
1086 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1087 goto fail_ws_dev;
1088 }
1089
1090 struct vk_physical_device_dispatch_table dispatch_table;
1091 vk_physical_device_dispatch_table_from_entrypoints(
1092 &dispatch_table, &nvk_physical_device_entrypoints, true);
1093 vk_physical_device_dispatch_table_from_entrypoints(
1094 &dispatch_table, &wsi_physical_device_entrypoints, false);
1095
1096 struct vk_device_extension_table supported_extensions;
1097 nvk_get_device_extensions(instance, &info, &supported_extensions);
1098
1099 struct vk_features supported_features;
1100 nvk_get_device_features(&info, &supported_extensions, &supported_features);
1101
1102 struct vk_properties properties;
1103 nvk_get_device_properties(instance, &info, &properties);
1104
1105 properties.drmHasRender = true;
1106 properties.drmRenderMajor = major(render_dev);
1107 properties.drmRenderMinor = minor(render_dev);
1108
1109 /* DRM primary is optional */
1110 if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
1111 !stat(drm_device->nodes[DRM_NODE_PRIMARY], &st)) {
1112 assert(st.st_rdev != 0);
1113 properties.drmHasPrimary = true;
1114 properties.drmPrimaryMajor = major(st.st_rdev);
1115 properties.drmPrimaryMinor = minor(st.st_rdev);
1116
1117 /* TODO: Test if the FD is usable? */
1118 if (instance->vk.enabled_extensions.KHR_display)
1119 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
1120 }
1121
1122 result = vk_physical_device_init(&pdev->vk, &instance->vk,
1123 &supported_extensions,
1124 &supported_features,
1125 &properties,
1126 &dispatch_table);
1127 if (result != VK_SUCCESS)
1128 goto fail_master_fd;
1129
1130 pdev->info = info;
1131 pdev->debug_flags = ws_dev->debug_flags;
1132 pdev->render_dev = render_dev;
1133 pdev->master_fd = master_fd;
1134 pdev->ws_dev = ws_dev;
1135
1136 pdev->nak = nak_compiler_create(&pdev->info);
1137 if (pdev->nak == NULL) {
1138 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1139 goto fail_init;
1140 }
1141
1142 nvk_physical_device_init_pipeline_cache(pdev);
1143
1144 uint64_t sysmem_size_B = nvk_get_sysmem_heap_size();
1145 if (sysmem_size_B == 0) {
1146 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1147 "Failed to query total system memory");
1148 goto fail_disk_cache;
1149 }
1150
1151 if (pdev->info.vram_size_B > 0) {
1152 uint32_t vram_heap_idx = pdev->mem_heap_count++;
1153 pdev->mem_heaps[vram_heap_idx] = (struct nvk_memory_heap) {
1154 .size = pdev->info.vram_size_B,
1155 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1156 };
1157
1158 /* Only set available if we have the ioctl. */
1159 if (nouveau_ws_device_vram_used(ws_dev) > 0)
1160 pdev->mem_heaps[vram_heap_idx].available = nvk_get_vram_heap_available;
1161
1162 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1163 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1164 .heapIndex = vram_heap_idx,
1165 };
1166
1167 if (pdev->info.cls_eng3d >= MAXWELL_A &&
1168 pdev->info.bar_size_B >= pdev->info.vram_size_B) {
1169 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1170 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1171 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1172 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1173 .heapIndex = vram_heap_idx,
1174 };
1175 }
1176 }
1177
1178 uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
1179 pdev->mem_heaps[sysmem_heap_idx] = (struct nvk_memory_heap) {
1180 .size = sysmem_size_B,
1181 /* If we don't have any VRAM (iGPU), claim sysmem as DEVICE_LOCAL */
1182 .flags = pdev->info.vram_size_B == 0
1183 ? VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
1184 : 0,
1185 .available = nvk_get_sysmem_heap_available,
1186 };
1187
1188 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1189 /* TODO: What's the right thing to do here on Tegra? */
1190 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1191 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1192 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1193 .heapIndex = sysmem_heap_idx,
1194 };
1195
1196 assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
1197 assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
1198
1199 pdev->queue_families[pdev->queue_family_count++] = (struct nvk_queue_family) {
1200 .queue_flags = VK_QUEUE_GRAPHICS_BIT |
1201 VK_QUEUE_COMPUTE_BIT |
1202 VK_QUEUE_TRANSFER_BIT |
1203 VK_QUEUE_SPARSE_BINDING_BIT,
1204 .queue_count = 1,
1205 };
1206 assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
1207
1208 unsigned st_idx = 0;
1209 pdev->syncobj_sync_type = syncobj_sync_type;
1210 pdev->sync_types[st_idx++] = &pdev->syncobj_sync_type;
1211 pdev->sync_types[st_idx++] = NULL;
1212 assert(st_idx <= ARRAY_SIZE(pdev->sync_types));
1213 pdev->vk.supported_sync_types = pdev->sync_types;
1214
1215 result = nvk_init_wsi(pdev);
1216 if (result != VK_SUCCESS)
1217 goto fail_disk_cache;
1218
1219 *pdev_out = &pdev->vk;
1220
1221 return VK_SUCCESS;
1222
1223 fail_disk_cache:
1224 nvk_physical_device_free_disk_cache(pdev);
1225 nak_compiler_destroy(pdev->nak);
1226 fail_init:
1227 vk_physical_device_finish(&pdev->vk);
1228 fail_master_fd:
1229 if (master_fd >= 0)
1230 close(master_fd);
1231 vk_free(&instance->vk.alloc, pdev);
1232 fail_ws_dev:
1233 nouveau_ws_device_destroy(ws_dev);
1234 return result;
1235 }
1236
1237 void
nvk_physical_device_destroy(struct vk_physical_device * vk_pdev)1238 nvk_physical_device_destroy(struct vk_physical_device *vk_pdev)
1239 {
1240 struct nvk_physical_device *pdev =
1241 container_of(vk_pdev, struct nvk_physical_device, vk);
1242
1243 nvk_finish_wsi(pdev);
1244 nvk_physical_device_free_disk_cache(pdev);
1245 nak_compiler_destroy(pdev->nak);
1246 if (pdev->master_fd >= 0)
1247 close(pdev->master_fd);
1248 nouveau_ws_device_destroy(pdev->ws_dev);
1249 vk_physical_device_finish(&pdev->vk);
1250 vk_free(&pdev->vk.instance->alloc, pdev);
1251 }
1252
1253 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1254 nvk_GetPhysicalDeviceMemoryProperties2(
1255 VkPhysicalDevice physicalDevice,
1256 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1257 {
1258 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1259
1260 pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
1261 for (int i = 0; i < pdev->mem_heap_count; i++) {
1262 pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap) {
1263 .size = pdev->mem_heaps[i].size,
1264 .flags = pdev->mem_heaps[i].flags,
1265 };
1266 }
1267
1268 pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
1269 for (int i = 0; i < pdev->mem_type_count; i++) {
1270 pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
1271 }
1272
1273 vk_foreach_struct(ext, pMemoryProperties->pNext)
1274 {
1275 switch (ext->sType) {
1276 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1277 VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
1278
1279 for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1280 const struct nvk_memory_heap *heap = &pdev->mem_heaps[i];
1281 uint64_t used = p_atomic_read(&heap->used);
1282
1283 /* From the Vulkan 1.3.278 spec:
1284 *
1285 * "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1286 * values in which memory usages are returned, with one element
1287 * for each memory heap. A heap’s usage is an estimate of how
1288 * much memory the process is currently using in that heap."
1289 *
1290 * TODO: Include internal allocations?
1291 */
1292 p->heapUsage[i] = used;
1293
1294 uint64_t available = heap->size;
1295 if (heap->available)
1296 available = heap->available(pdev);
1297
1298 /* From the Vulkan 1.3.278 spec:
1299 *
1300 * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1301 * values in which memory budgets are returned, with one
1302 * element for each memory heap. A heap’s budget is a rough
1303 * estimate of how much memory the process can allocate from
1304 * that heap before allocations may fail or cause performance
1305 * degradation. The budget includes any currently allocated
1306 * device memory."
1307 *
1308 * and
1309 *
1310 * "The heapBudget value must be less than or equal to
1311 * VkMemoryHeap::size for each heap."
1312 *
1313 * available (queried above) is the total amount free memory
1314 * system-wide and does not include our allocations so we need
1315 * to add that in.
1316 */
1317 uint64_t budget = MIN2(available + used, heap->size);
1318
1319 /* Set the budget at 90% of available to avoid thrashing */
1320 p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
1321 }
1322
1323 /* From the Vulkan 1.3.278 spec:
1324 *
1325 * "The heapBudget and heapUsage values must be zero for array
1326 * elements greater than or equal to
1327 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
1328 * heapBudget value must be non-zero for array elements less than
1329 * VkPhysicalDeviceMemoryProperties::memoryHeapCount."
1330 */
1331 for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
1332 p->heapBudget[i] = 0u;
1333 p->heapUsage[i] = 0u;
1334 }
1335 break;
1336 }
1337 default:
1338 nvk_debug_ignored_stype(ext->sType);
1339 break;
1340 }
1341 }
1342 }
1343
1344 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1345 nvk_GetPhysicalDeviceQueueFamilyProperties2(
1346 VkPhysicalDevice physicalDevice,
1347 uint32_t *pQueueFamilyPropertyCount,
1348 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1349 {
1350 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1351 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1352 pQueueFamilyPropertyCount);
1353
1354 for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
1355 const struct nvk_queue_family *queue_family = &pdev->queue_families[i];
1356
1357 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
1358 p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
1359 p->queueFamilyProperties.queueCount = queue_family->queue_count;
1360 p->queueFamilyProperties.timestampValidBits = 64;
1361 p->queueFamilyProperties.minImageTransferGranularity =
1362 (VkExtent3D){1, 1, 1};
1363 }
1364 }
1365 }
1366
1367 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)1368 nvk_GetPhysicalDeviceMultisamplePropertiesEXT(
1369 VkPhysicalDevice physicalDevice,
1370 VkSampleCountFlagBits samples,
1371 VkMultisamplePropertiesEXT *pMultisampleProperties)
1372 {
1373 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1374
1375 if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
1376 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
1377 } else {
1378 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
1379 }
1380 }
1381