1 /*
2 * Copyright 2024 Valve Corporation
3 * Copyright 2024 Alyssa Rosenzweig
4 * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5 * SPDX-License-Identifier: MIT
6 */
7 #include "hk_physical_device.h"
8
9 #include "asahi/compiler/agx_nir_texture.h"
10 #include "asahi/lib/agx_device.h"
11 #include "asahi/lib/agx_nir_lower_vbo.h"
12 #include "util/disk_cache.h"
13 #include "util/mesa-sha1.h"
14 #include "git_sha1.h"
15 #include "hk_buffer.h"
16 #include "hk_entrypoints.h"
17 #include "hk_image.h"
18 #include "hk_instance.h"
19 #include "hk_private.h"
20 #include "hk_shader.h"
21 #include "hk_wsi.h"
22
23 #include "util/simple_mtx.h"
24 #include "vulkan/vulkan_core.h"
25 #include "vulkan/wsi/wsi_common.h"
26 #include "unstable_asahi_drm.h"
27 #include "vk_drm_syncobj.h"
28 #include "vk_shader_module.h"
29
30 #include <fcntl.h>
31 #include <string.h>
32 #include <xf86drm.h>
33 #include <sys/stat.h>
34 #include <sys/sysmacros.h>
35
36 static uint32_t
hk_get_vk_version()37 hk_get_vk_version()
38 {
39 /* Version override takes priority */
40 const uint32_t version_override = vk_get_version_override();
41 if (version_override)
42 return version_override;
43
44 return VK_MAKE_VERSION(1, 4, VK_HEADER_VERSION);
45 }
46
47 static void
hk_get_device_extensions(const struct hk_instance * instance,struct vk_device_extension_table * ext)48 hk_get_device_extensions(const struct hk_instance *instance,
49 struct vk_device_extension_table *ext)
50 {
51 *ext = (struct vk_device_extension_table){
52 .KHR_8bit_storage = true,
53 .KHR_16bit_storage = true,
54 .KHR_bind_memory2 = true,
55 .KHR_buffer_device_address = true,
56 .KHR_calibrated_timestamps = false,
57 .KHR_copy_commands2 = true,
58 .KHR_create_renderpass2 = true,
59 .KHR_dedicated_allocation = true,
60 .KHR_depth_stencil_resolve = true,
61 .KHR_descriptor_update_template = true,
62 .KHR_device_group = true,
63 .KHR_draw_indirect_count = true,
64 .KHR_driver_properties = true,
65 .KHR_dynamic_rendering = true,
66 .KHR_dynamic_rendering_local_read = true,
67 .KHR_external_fence = true,
68 .KHR_external_fence_fd = true,
69 .KHR_external_memory = true,
70 .KHR_external_memory_fd = true,
71 /* XXX: External timeline semaphores maybe broken in kernel, see
72 * dEQP-VK.synchronization.signal_order.shared_timeline_semaphore.write_copy_buffer_to_image_read_image_compute.image_128_r32_uint_opaque_fd
73 */
74 .KHR_external_semaphore = false,
75 .KHR_external_semaphore_fd = false,
76 .KHR_format_feature_flags2 = true,
77 .KHR_fragment_shader_barycentric = false,
78 .KHR_get_memory_requirements2 = true,
79 .KHR_global_priority = true,
80 .KHR_image_format_list = true,
81 .KHR_imageless_framebuffer = true,
82 #ifdef HK_USE_WSI_PLATFORM
83 .KHR_incremental_present = true,
84 #endif
85 .KHR_index_type_uint8 = true,
86 .KHR_line_rasterization = true,
87 .KHR_load_store_op_none = true,
88 .KHR_maintenance1 = true,
89 .KHR_maintenance2 = true,
90 .KHR_maintenance3 = true,
91 .KHR_maintenance4 = true,
92 .KHR_maintenance5 = true,
93 .KHR_maintenance6 = true,
94 .KHR_map_memory2 = true,
95 .KHR_multiview = true,
96 .KHR_pipeline_executable_properties = true,
97 .KHR_pipeline_library = true,
98 .KHR_push_descriptor = true,
99 .KHR_relaxed_block_layout = true,
100 .KHR_sampler_mirror_clamp_to_edge = true,
101 .KHR_sampler_ycbcr_conversion = true,
102 .KHR_separate_depth_stencil_layouts = true,
103 .KHR_shader_atomic_int64 = false,
104 .KHR_shader_clock = false,
105 .KHR_shader_draw_parameters = true,
106 .KHR_shader_expect_assume = true,
107 .KHR_shader_float_controls = true,
108 // TODO: wait for nvk
109 .KHR_shader_float_controls2 = true,
110 .KHR_shader_float16_int8 = true,
111 .KHR_shader_integer_dot_product = true,
112 .KHR_shader_maximal_reconvergence = true,
113 .KHR_shader_non_semantic_info = true,
114 .KHR_shader_relaxed_extended_instruction = true,
115 .KHR_shader_subgroup_extended_types = true,
116 .KHR_shader_subgroup_rotate = true,
117 .KHR_shader_subgroup_uniform_control_flow = true,
118 .KHR_shader_terminate_invocation = true,
119 .KHR_spirv_1_4 = true,
120 .KHR_storage_buffer_storage_class = true,
121 .KHR_timeline_semaphore = true,
122 #ifdef HK_USE_WSI_PLATFORM
123 .KHR_swapchain = true,
124 .KHR_swapchain_mutable_format = true,
125 #endif
126 .KHR_synchronization2 = true,
127 .KHR_uniform_buffer_standard_layout = true,
128 .KHR_variable_pointers = true,
129 .KHR_vertex_attribute_divisor = true,
130 .KHR_vulkan_memory_model = true,
131 .KHR_workgroup_memory_explicit_layout = true,
132 .KHR_zero_initialize_workgroup_memory = true,
133 .EXT_4444_formats = true,
134 .EXT_attachment_feedback_loop_layout = true,
135 .EXT_border_color_swizzle = true,
136 .EXT_buffer_device_address = true,
137 .EXT_calibrated_timestamps = false,
138 .EXT_conditional_rendering = false,
139 .EXT_color_write_enable = true,
140 .EXT_custom_border_color = true,
141 .EXT_depth_bias_control = true,
142 .EXT_depth_clip_control = false,
143 .EXT_depth_clip_enable = true,
144 .EXT_descriptor_indexing = true,
145 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
146 .EXT_display_control = false,
147 #endif
148 .EXT_dynamic_rendering_unused_attachments = true,
149 .EXT_extended_dynamic_state = true,
150 .EXT_extended_dynamic_state2 = true,
151 .EXT_extended_dynamic_state3 = true,
152 .EXT_external_memory_dma_buf = true,
153 .EXT_global_priority = true,
154 .EXT_global_priority_query = true,
155 .EXT_graphics_pipeline_library = true,
156 .EXT_host_query_reset = true,
157 .EXT_host_image_copy = true,
158 .EXT_image_2d_view_of_3d = true,
159 .EXT_image_drm_format_modifier = true,
160 .EXT_image_robustness = true,
161 .EXT_image_sliced_view_of_3d = false,
162 .EXT_image_view_min_lod = false,
163 .EXT_index_type_uint8 = true,
164 .EXT_inline_uniform_block = true,
165 .EXT_line_rasterization = true,
166 .EXT_load_store_op_none = true,
167 .EXT_map_memory_placed = false,
168 .EXT_memory_budget = false,
169 .EXT_multi_draw = true,
170 .EXT_mutable_descriptor_type = true,
171 .EXT_non_seamless_cube_map = true,
172 .EXT_pipeline_creation_cache_control = true,
173 .EXT_pipeline_creation_feedback = true,
174 .EXT_pipeline_protected_access = true,
175 .EXT_pipeline_robustness = true,
176 .EXT_physical_device_drm = true,
177 .EXT_primitive_topology_list_restart = true,
178 .EXT_private_data = true,
179 .EXT_primitives_generated_query = false,
180 .EXT_provoking_vertex = true,
181 .EXT_robustness2 = true,
182 .EXT_sample_locations = true,
183 .EXT_sampler_filter_minmax = false,
184 .EXT_scalar_block_layout = true,
185 .EXT_separate_stencil_usage = true,
186 .EXT_shader_image_atomic_int64 = false,
187 .EXT_shader_demote_to_helper_invocation = true,
188 .EXT_shader_module_identifier = true,
189 .EXT_shader_object = true,
190 .EXT_shader_replicated_composites = true,
191 .EXT_shader_stencil_export = true,
192 .EXT_shader_subgroup_ballot = true,
193 .EXT_shader_subgroup_vote = true,
194 .EXT_shader_viewport_index_layer = true,
195 .EXT_subgroup_size_control = true,
196 #ifdef HK_USE_WSI_PLATFORM
197 .EXT_swapchain_maintenance1 = true,
198 #endif
199 .EXT_texel_buffer_alignment = true,
200 .EXT_tooling_info = true,
201 .EXT_transform_feedback = true,
202 .EXT_vertex_attribute_divisor = true,
203 .EXT_vertex_input_dynamic_state = true,
204 .EXT_ycbcr_2plane_444_formats = false,
205 .EXT_ycbcr_image_arrays = false,
206 .GOOGLE_decorate_string = true,
207 .GOOGLE_hlsl_functionality1 = true,
208 .GOOGLE_user_type = true,
209 .VALVE_mutable_descriptor_type = true,
210 };
211 }
212
213 static void
hk_get_device_features(const struct vk_device_extension_table * supported_extensions,struct vk_features * features)214 hk_get_device_features(
215 const struct vk_device_extension_table *supported_extensions,
216 struct vk_features *features)
217 {
218 *features = (struct vk_features){
219 /* Vulkan 1.0 */
220 .robustBufferAccess = true,
221 .fullDrawIndexUint32 = true,
222 .imageCubeArray = true,
223 .independentBlend = true,
224 .geometryShader = true,
225 .tessellationShader = true,
226 .sampleRateShading = true,
227 .dualSrcBlend = true,
228 .logicOp = true,
229 .multiDrawIndirect = true,
230 .drawIndirectFirstInstance = true,
231 .depthClamp = true,
232 .depthBiasClamp = true,
233 .fillModeNonSolid = true,
234 .depthBounds = false,
235 .wideLines = true,
236 .largePoints = true,
237 .alphaToOne = true,
238 .multiViewport = true,
239 .samplerAnisotropy = true,
240 .textureCompressionETC2 = false,
241 .textureCompressionBC = true,
242 .textureCompressionASTC_LDR = false,
243 .occlusionQueryPrecise = true,
244 .pipelineStatisticsQuery = true,
245 .vertexPipelineStoresAndAtomics = true,
246 .fragmentStoresAndAtomics = true,
247 .shaderTessellationAndGeometryPointSize = true,
248 .shaderImageGatherExtended = true,
249 .shaderStorageImageExtendedFormats = true,
250 /* TODO: hitting the vertex shader timeout in CTS, but should work */
251 .shaderStorageImageMultisample = false,
252 .shaderStorageImageReadWithoutFormat = true,
253 .shaderStorageImageWriteWithoutFormat = true,
254 .shaderUniformBufferArrayDynamicIndexing = true,
255 .shaderSampledImageArrayDynamicIndexing = true,
256 .shaderStorageBufferArrayDynamicIndexing = true,
257 .shaderStorageImageArrayDynamicIndexing = true,
258 .shaderClipDistance = true,
259 .shaderCullDistance = true,
260 .shaderFloat64 = false,
261 .shaderInt64 = true,
262 .shaderInt16 = true,
263 .shaderResourceResidency = false,
264 .shaderResourceMinLod = true,
265 .sparseBinding = false,
266 .sparseResidency2Samples = false,
267 .sparseResidency4Samples = false,
268 .sparseResidency8Samples = false,
269 .sparseResidencyAliased = false,
270 .sparseResidencyBuffer = false,
271 .sparseResidencyImage2D = false,
272 .sparseResidencyImage3D = false,
273 .variableMultisampleRate = false,
274 .inheritedQueries = true,
275
276 /* Vulkan 1.1 */
277 .storageBuffer16BitAccess = true,
278 .uniformAndStorageBuffer16BitAccess = true,
279 .storagePushConstant16 = true,
280 .storageInputOutput16 = false,
281 .multiview = true,
282 .multiviewGeometryShader = false,
283 .multiviewTessellationShader = false,
284 .variablePointersStorageBuffer = true,
285 .variablePointers = true,
286 .shaderDrawParameters = true,
287 .samplerYcbcrConversion = true,
288
289 /* Vulkan 1.2 */
290 .samplerMirrorClampToEdge = true,
291 .drawIndirectCount = true,
292 .storageBuffer8BitAccess = true,
293 .uniformAndStorageBuffer8BitAccess = true,
294 .storagePushConstant8 = true,
295 .shaderBufferInt64Atomics = false,
296 .shaderSharedInt64Atomics = false,
297 .shaderFloat16 = true,
298 .shaderInt8 = true,
299 .descriptorIndexing = true,
300 .shaderInputAttachmentArrayDynamicIndexing = true,
301 .shaderUniformTexelBufferArrayDynamicIndexing = true,
302 .shaderStorageTexelBufferArrayDynamicIndexing = true,
303 .shaderUniformBufferArrayNonUniformIndexing = true,
304 .shaderSampledImageArrayNonUniformIndexing = true,
305 .shaderStorageBufferArrayNonUniformIndexing = true,
306 .shaderStorageImageArrayNonUniformIndexing = true,
307 .shaderInputAttachmentArrayNonUniformIndexing = true,
308 .shaderUniformTexelBufferArrayNonUniformIndexing = true,
309 .shaderStorageTexelBufferArrayNonUniformIndexing = true,
310 .descriptorBindingUniformBufferUpdateAfterBind = true,
311 .descriptorBindingSampledImageUpdateAfterBind = true,
312 .descriptorBindingStorageImageUpdateAfterBind = true,
313 .descriptorBindingStorageBufferUpdateAfterBind = true,
314 .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
315 .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
316 .descriptorBindingUpdateUnusedWhilePending = true,
317 .descriptorBindingPartiallyBound = true,
318 .descriptorBindingVariableDescriptorCount = true,
319 .runtimeDescriptorArray = true,
320 .samplerFilterMinmax = false,
321 .scalarBlockLayout = true,
322 .imagelessFramebuffer = true,
323 .uniformBufferStandardLayout = true,
324 .shaderSubgroupExtendedTypes = true,
325 .separateDepthStencilLayouts = true,
326 .hostQueryReset = true,
327 .timelineSemaphore = true,
328 .bufferDeviceAddress = true,
329 .bufferDeviceAddressCaptureReplay = false,
330 .bufferDeviceAddressMultiDevice = false,
331 .vulkanMemoryModel = true,
332 .vulkanMemoryModelDeviceScope = true,
333 .vulkanMemoryModelAvailabilityVisibilityChains = false,
334 .shaderOutputViewportIndex = true,
335 .shaderOutputLayer = true,
336 .subgroupBroadcastDynamicId = true,
337
338 /* Vulkan 1.3 */
339 .robustImageAccess = true,
340 .inlineUniformBlock = true,
341 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
342 .pipelineCreationCacheControl = true,
343 .privateData = true,
344 .shaderDemoteToHelperInvocation = true,
345 .shaderTerminateInvocation = true,
346 .subgroupSizeControl = true,
347 .computeFullSubgroups = true,
348 .synchronization2 = true,
349 .shaderZeroInitializeWorkgroupMemory = true,
350 .dynamicRendering = true,
351 .shaderIntegerDotProduct = true,
352 .maintenance4 = true,
353
354 /* Vulkan 1.4 */
355 .pushDescriptor = true,
356
357 /* VK_KHR_dynamic_rendering_local_read */
358 .dynamicRenderingLocalRead = true,
359
360 /* VK_KHR_fragment_shader_barycentric */
361 .fragmentShaderBarycentric = false,
362
363 /* VK_KHR_global_priority */
364 .globalPriorityQuery = true,
365
366 /* VK_KHR_index_type_uint8 */
367 .indexTypeUint8 = true,
368
369 /* VK_KHR_line_rasterization */
370 .rectangularLines = false,
371 .bresenhamLines = true,
372 .smoothLines = false,
373 .stippledRectangularLines = false,
374 .stippledBresenhamLines = false,
375 .stippledSmoothLines = false,
376
377 /* VK_KHR_maintenance5 */
378 .maintenance5 = true,
379
380 /* VK_KHR_maintenance6 */
381 .maintenance6 = true,
382
383 /* VK_KHR_pipeline_executable_properties */
384 .pipelineExecutableInfo = true,
385
386 /* VK_KHR_present_id */
387 .presentId = false,
388
389 /* VK_KHR_present_wait */
390 .presentWait = false,
391
392 /* VK_KHR_shader_clock */
393 .shaderSubgroupClock = false,
394 .shaderDeviceClock = false,
395
396 /* VK_KHR_shader_expect_assume */
397 .shaderExpectAssume = true,
398
399 /* VK_KHR_shader_float_controls2 */
400 .shaderFloatControls2 = true,
401
402 /* VK_KHR_shader_maximal_reconvergence */
403 .shaderMaximalReconvergence = true,
404
405 /* VK_KHR_shader_subgroup_rotate */
406 .shaderSubgroupRotate = true,
407 .shaderSubgroupRotateClustered = true,
408
409 /* VK_KHR_vertex_attribute_divisor */
410 .vertexAttributeInstanceRateDivisor = true,
411 .vertexAttributeInstanceRateZeroDivisor = true,
412
413 /* VK_KHR_workgroup_memory_explicit_layout */
414 .workgroupMemoryExplicitLayout = true,
415 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
416 .workgroupMemoryExplicitLayout8BitAccess = true,
417 .workgroupMemoryExplicitLayout16BitAccess = true,
418
419 /* VK_EXT_4444_formats */
420 .formatA4R4G4B4 = true,
421 .formatA4B4G4R4 = true,
422
423 /* VK_EXT_attachment_feedback_loop_layout */
424 .attachmentFeedbackLoopLayout = true,
425
426 /* VK_EXT_border_color_swizzle */
427 .borderColorSwizzle = true,
428 .borderColorSwizzleFromImage = false,
429
430 /* VK_EXT_buffer_device_address */
431 .bufferDeviceAddressCaptureReplayEXT = false,
432
433 /* VK_EXT_color_write_enable */
434 .colorWriteEnable = true,
435
436 /* VK_EXT_conditional_rendering */
437 .conditionalRendering = false,
438 .inheritedConditionalRendering = false,
439
440 /* VK_EXT_custom_border_color */
441 .customBorderColors = true,
442 .customBorderColorWithoutFormat = true,
443
444 /* VK_EXT_depth_bias_control */
445 .depthBiasControl = true,
446 .leastRepresentableValueForceUnormRepresentation = true,
447 .floatRepresentation = false,
448 .depthBiasExact = true,
449
450 /* VK_EXT_depth_clip_control */
451 .depthClipControl = false,
452
453 /* VK_EXT_depth_clip_enable */
454 .depthClipEnable = true,
455
456 /* VK_EXT_dynamic_rendering_unused_attachments */
457 .dynamicRenderingUnusedAttachments = true,
458
459 /* VK_EXT_extended_dynamic_state */
460 .extendedDynamicState = true,
461
462 /* VK_EXT_extended_dynamic_state2 */
463 .extendedDynamicState2 = true,
464 .extendedDynamicState2LogicOp = true,
465 .extendedDynamicState2PatchControlPoints = true,
466
467 /* VK_EXT_extended_dynamic_state3 */
468 .extendedDynamicState3TessellationDomainOrigin = true,
469 .extendedDynamicState3DepthClampEnable = true,
470 .extendedDynamicState3PolygonMode = true,
471 .extendedDynamicState3RasterizationSamples = true,
472 .extendedDynamicState3SampleMask = true,
473 .extendedDynamicState3AlphaToCoverageEnable = true,
474 .extendedDynamicState3AlphaToOneEnable = true,
475 .extendedDynamicState3LogicOpEnable = true,
476 .extendedDynamicState3ColorBlendEnable = true,
477 .extendedDynamicState3ColorBlendEquation = true,
478 .extendedDynamicState3ColorWriteMask = true,
479 .extendedDynamicState3RasterizationStream = false,
480 .extendedDynamicState3ConservativeRasterizationMode = false,
481 .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
482 .extendedDynamicState3DepthClipEnable = true,
483 .extendedDynamicState3SampleLocationsEnable = true,
484 .extendedDynamicState3ColorBlendAdvanced = false,
485 .extendedDynamicState3ProvokingVertexMode = true,
486 .extendedDynamicState3LineRasterizationMode = true,
487 .extendedDynamicState3LineStippleEnable = false,
488 .extendedDynamicState3DepthClipNegativeOneToOne = false,
489 .extendedDynamicState3ViewportWScalingEnable = false,
490 .extendedDynamicState3ViewportSwizzle = false,
491 .extendedDynamicState3CoverageToColorEnable = false,
492 .extendedDynamicState3CoverageToColorLocation = false,
493 .extendedDynamicState3CoverageModulationMode = false,
494 .extendedDynamicState3CoverageModulationTableEnable = false,
495 .extendedDynamicState3CoverageModulationTable = false,
496 .extendedDynamicState3CoverageReductionMode = false,
497 .extendedDynamicState3RepresentativeFragmentTestEnable = false,
498 .extendedDynamicState3ShadingRateImageEnable = false,
499
500 /* VK_EXT_graphics_pipeline_library */
501 .graphicsPipelineLibrary = true,
502
503 /* VK_EXT_host_image_copy */
504 .hostImageCopy = true,
505
506 /* VK_EXT_image_2d_view_of_3d */
507 .image2DViewOf3D = true,
508 .sampler2DViewOf3D = true,
509
510 /* VK_EXT_image_sliced_view_of_3d */
511 .imageSlicedViewOf3D = false,
512
513 #ifdef HK_USE_WSI_PLATFORM
514 /* VK_EXT_swapchain_maintenance1 */
515 .swapchainMaintenance1 = true,
516 #endif
517
518 /* VK_EXT_image_view_min_lod */
519 .minLod = false,
520
521 /* VK_EXT_map_memory_placed */
522 .memoryMapPlaced = false,
523 .memoryMapRangePlaced = false,
524 .memoryUnmapReserve = false,
525
526 /* VK_EXT_multi_draw */
527 .multiDraw = true,
528
529 /* VK_EXT_mutable_descriptor_type */
530 .mutableDescriptorType = true,
531
532 /* VK_EXT_non_seamless_cube_map */
533 .nonSeamlessCubeMap = true,
534
535 /* VK_EXT_pipeline_protected_access */
536 .pipelineProtectedAccess = true,
537
538 /* VK_EXT_pipeline_robustness */
539 .pipelineRobustness = true,
540
541 /* VK_EXT_primitive_topology_list_restart */
542 .primitiveTopologyListRestart = true,
543 .primitiveTopologyPatchListRestart = false,
544
545 /* VK_EXT_primitives_generated_query */
546 .primitivesGeneratedQuery = false,
547 .primitivesGeneratedQueryWithNonZeroStreams = false,
548 .primitivesGeneratedQueryWithRasterizerDiscard = false,
549
550 /* VK_EXT_provoking_vertex */
551 .provokingVertexLast = true,
552 .transformFeedbackPreservesProvokingVertex = true,
553
554 /* VK_EXT_robustness2 */
555 .robustBufferAccess2 = true,
556 .robustImageAccess2 = true,
557 .nullDescriptor = true,
558
559 /* VK_EXT_shader_image_atomic_int64 */
560 .shaderImageInt64Atomics = false,
561 .sparseImageInt64Atomics = false,
562
563 /* VK_EXT_shader_module_identifier */
564 .shaderModuleIdentifier = true,
565
566 /* VK_EXT_shader_object */
567 .shaderObject = true,
568
569 /* VK_EXT_shader_replicated_composites */
570 .shaderReplicatedComposites = true,
571
572 /* VK_KHR_shader_subgroup_uniform_control_flow */
573 .shaderSubgroupUniformControlFlow = true,
574
575 /* VK_EXT_texel_buffer_alignment */
576 .texelBufferAlignment = true,
577
578 /* VK_EXT_transform_feedback */
579 .transformFeedback = true,
580 .geometryStreams = true,
581
582 /* VK_EXT_vertex_input_dynamic_state */
583 .vertexInputDynamicState = true,
584
585 /* VK_EXT_ycbcr_2plane_444_formats */
586 .ycbcr2plane444Formats = false,
587
588 /* VK_EXT_ycbcr_image_arrays */
589 .ycbcrImageArrays = false,
590
591 /* VK_KHR_shader_relaxed_extended_instruction */
592 .shaderRelaxedExtendedInstruction = true,
593 };
594 }
595
596 static void
hk_get_device_properties(const struct agx_device * dev,const struct hk_instance * instance,struct vk_properties * properties)597 hk_get_device_properties(const struct agx_device *dev,
598 const struct hk_instance *instance,
599 struct vk_properties *properties)
600 {
601 const VkSampleCountFlagBits sample_counts =
602 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
603
604 uint64_t os_page_size = 16384;
605 os_get_page_size(&os_page_size);
606
607 *properties = (struct vk_properties){
608 .apiVersion = hk_get_vk_version(),
609 .driverVersion = vk_get_driver_version(),
610 .vendorID = instance->force_vk_vendor ?: VK_VENDOR_ID_MESA,
611 .deviceID = 0,
612 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
613
614 /* Vulkan 1.0 limits */
615 .maxImageDimension1D = 16384,
616 .maxImageDimension2D = 16384,
617 .maxImageDimension3D = 16384,
618 .maxImageDimensionCube = 16384,
619 .maxImageArrayLayers = 2048,
620 .maxTexelBufferElements = AGX_TEXTURE_BUFFER_MAX_SIZE,
621 .maxUniformBufferRange = 65536,
622
623 /* From a hardware perspective, storage buffers are lowered to global
624 * address arithmetic so there is no hard limit. However, making efficient
625 * use of the hardware addressing modes depends on no signed wrapping in
626 * any `amul` operations, which are themselves bounded by
627 * maxStorageBufferRange. Therefore, limit storage buffers to INT32_MAX
628 * bytes instead of UINT32_MAX. This is believed to be acceptable for
629 * Direct3D.
630 */
631 .maxStorageBufferRange = INT32_MAX,
632 .maxPushConstantsSize = HK_MAX_PUSH_SIZE,
633 .maxMemoryAllocationCount = 4096,
634 .maxSamplerAllocationCount = 4000,
635 .bufferImageGranularity = 0x400,
636 .sparseAddressSpaceSize = HK_SPARSE_ADDR_SPACE_SIZE,
637 .maxBoundDescriptorSets = HK_MAX_SETS,
638 .maxPerStageDescriptorSamplers = HK_MAX_DESCRIPTORS,
639 .maxPerStageDescriptorUniformBuffers = HK_MAX_DESCRIPTORS,
640 .maxPerStageDescriptorStorageBuffers = HK_MAX_DESCRIPTORS,
641 .maxPerStageDescriptorSampledImages = HK_MAX_DESCRIPTORS,
642 .maxPerStageDescriptorStorageImages = HK_MAX_DESCRIPTORS,
643 .maxPerStageDescriptorInputAttachments = HK_MAX_DESCRIPTORS,
644 .maxPerStageResources = UINT32_MAX,
645 .maxDescriptorSetSamplers = HK_MAX_DESCRIPTORS,
646 .maxDescriptorSetUniformBuffers = HK_MAX_DESCRIPTORS,
647 .maxDescriptorSetUniformBuffersDynamic = HK_MAX_DYNAMIC_BUFFERS / 2,
648 .maxDescriptorSetStorageBuffers = HK_MAX_DESCRIPTORS,
649 .maxDescriptorSetStorageBuffersDynamic = HK_MAX_DYNAMIC_BUFFERS / 2,
650 .maxDescriptorSetSampledImages = HK_MAX_DESCRIPTORS,
651 .maxDescriptorSetStorageImages = HK_MAX_DESCRIPTORS,
652 .maxDescriptorSetInputAttachments = HK_MAX_DESCRIPTORS,
653 .maxVertexInputAttributes = AGX_MAX_VBUFS,
654 .maxVertexInputBindings = AGX_MAX_ATTRIBS,
655 .maxVertexInputAttributeOffset = 65535,
656 .maxVertexInputBindingStride = 2048,
657
658 /* Hardware limit is 128 but we need to reserve some for internal purposes
659 * (like cull distance emulation). Set 96 to be safe.
660 */
661 .maxVertexOutputComponents = 96,
662 .maxGeometryShaderInvocations = 32,
663 .maxGeometryInputComponents = 128,
664 .maxGeometryOutputComponents = 128,
665 .maxGeometryOutputVertices = 1024,
666 .maxGeometryTotalOutputComponents = 1024,
667 .maxTessellationGenerationLevel = 64,
668 .maxTessellationPatchSize = 32,
669 .maxTessellationControlPerVertexInputComponents = 128,
670 .maxTessellationControlPerVertexOutputComponents = 128,
671 .maxTessellationControlPerPatchOutputComponents = 120,
672 .maxTessellationControlTotalOutputComponents = 4216,
673 .maxTessellationEvaluationInputComponents = 128,
674 .maxTessellationEvaluationOutputComponents = 128,
675
676 /* Set to match maxVertexOutputComponents, hardware limit is higher. */
677 .maxFragmentInputComponents = 96,
678 .maxFragmentOutputAttachments = HK_MAX_RTS,
679 .maxFragmentDualSrcAttachments = 1,
680 .maxFragmentCombinedOutputResources = 16,
681 .maxComputeSharedMemorySize = HK_MAX_SHARED_SIZE,
682 .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
683 .maxComputeWorkGroupInvocations = 1024,
684 .maxComputeWorkGroupSize = {1024, 1024, 64},
685 .subPixelPrecisionBits = 8,
686 .subTexelPrecisionBits = 8,
687 .mipmapPrecisionBits = 8,
688 .maxDrawIndexedIndexValue = UINT32_MAX,
689 .maxDrawIndirectCount = UINT16_MAX,
690 .maxSamplerLodBias = 15,
691 .maxSamplerAnisotropy = 16,
692 .maxViewports = HK_MAX_VIEWPORTS,
693 .maxViewportDimensions = {32768, 32768},
694 .viewportBoundsRange = {-65536, 65536},
695 .viewportSubPixelBits = 8,
696 .minMemoryMapAlignment = os_page_size,
697 .minTexelBufferOffsetAlignment = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
698 .minUniformBufferOffsetAlignment = HK_MIN_UBO_ALIGNMENT,
699 .minStorageBufferOffsetAlignment = HK_MIN_SSBO_ALIGNMENT,
700 .minTexelOffset = -8,
701 .maxTexelOffset = 7,
702 .minTexelGatherOffset = -8,
703 .maxTexelGatherOffset = 7,
704 .minInterpolationOffset = -0.5,
705 .maxInterpolationOffset = 0.4375,
706 .subPixelInterpolationOffsetBits = 4,
707 .maxFramebufferHeight = 16384,
708 .maxFramebufferWidth = 16384,
709 .maxFramebufferLayers = 2048,
710 .framebufferColorSampleCounts = sample_counts,
711 .framebufferDepthSampleCounts = sample_counts,
712 .framebufferNoAttachmentsSampleCounts = sample_counts,
713 .framebufferStencilSampleCounts = sample_counts,
714 .maxColorAttachments = HK_MAX_RTS,
715 .sampledImageColorSampleCounts = sample_counts,
716 .sampledImageIntegerSampleCounts = sample_counts,
717 .sampledImageDepthSampleCounts = sample_counts,
718 .sampledImageStencilSampleCounts = sample_counts,
719 .storageImageSampleCounts = sample_counts,
720 .maxSampleMaskWords = 1,
721 .timestampComputeAndGraphics = agx_supports_timestamps(dev),
722 /* FIXME: Is timestamp period actually 1? */
723 .timestampPeriod = 1.0f,
724 .maxClipDistances = 8,
725 .maxCullDistances = 8,
726 .maxCombinedClipAndCullDistances = 8,
727 .discreteQueuePriorities = 2,
728 .pointSizeRange = {1.0, 512.f - 0.0625f},
729 .lineWidthRange = {1.0, 16.0f},
730 .pointSizeGranularity = 0.0625,
731 .lineWidthGranularity = 1.0f / 16.0f,
732 .strictLines = false,
733 .standardSampleLocations = true,
734 .optimalBufferCopyOffsetAlignment = 1,
735 .optimalBufferCopyRowPitchAlignment = 1,
736 .nonCoherentAtomSize = 64,
737
738 /* Vulkan 1.0 sparse properties */
739 .sparseResidencyNonResidentStrict = false,
740 .sparseResidencyAlignedMipSize = false,
741 .sparseResidencyStandard2DBlockShape = false,
742 .sparseResidencyStandard2DMultisampleBlockShape = false,
743 .sparseResidencyStandard3DBlockShape = false,
744
745 /* Vulkan 1.1 properties */
746 .subgroupSize = 32,
747 .subgroupSupportedStages =
748 VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_ALL_GRAPHICS,
749 .subgroupSupportedOperations =
750 VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
751 VK_SUBGROUP_FEATURE_VOTE_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
752 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
753 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
754 VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
755 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
756 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
757 VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR,
758 .subgroupQuadOperationsInAllStages = true,
759 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
760 .maxMultiviewViewCount = HK_MAX_MULTIVIEW_VIEW_COUNT,
761 .maxMultiviewInstanceIndex = UINT32_MAX,
762 .maxPerSetDescriptors = UINT32_MAX,
763 .maxMemoryAllocationSize = (1ull << 37),
764
765 /* Vulkan 1.2 properties */
766 .supportedDepthResolveModes =
767 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT |
768 VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
769 .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
770 VK_RESOLVE_MODE_MIN_BIT |
771 VK_RESOLVE_MODE_MAX_BIT,
772 .independentResolveNone = true,
773 .independentResolve = true,
774 .driverID = VK_DRIVER_ID_MESA_HONEYKRISP,
775 .conformanceVersion = (VkConformanceVersion){1, 4, 0, 0},
776 .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
777 .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
778 .shaderSignedZeroInfNanPreserveFloat16 = true,
779 .shaderSignedZeroInfNanPreserveFloat32 = true,
780 .shaderSignedZeroInfNanPreserveFloat64 = false,
781 .shaderDenormPreserveFloat16 = true,
782 .shaderDenormPreserveFloat32 = false,
783 .shaderDenormPreserveFloat64 = false,
784 .shaderDenormFlushToZeroFloat16 = false,
785 .shaderDenormFlushToZeroFloat32 = true,
786 .shaderDenormFlushToZeroFloat64 = false,
787 .shaderRoundingModeRTEFloat16 = true,
788 .shaderRoundingModeRTEFloat32 = true,
789 .shaderRoundingModeRTEFloat64 = false,
790 .shaderRoundingModeRTZFloat16 = false,
791 .shaderRoundingModeRTZFloat32 = false,
792 .shaderRoundingModeRTZFloat64 = false,
793 .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
794 .shaderUniformBufferArrayNonUniformIndexingNative = true,
795 .shaderSampledImageArrayNonUniformIndexingNative = true,
796 .shaderStorageBufferArrayNonUniformIndexingNative = true,
797 .shaderStorageImageArrayNonUniformIndexingNative = true,
798 .shaderInputAttachmentArrayNonUniformIndexingNative = true,
799 .robustBufferAccessUpdateAfterBind = true,
800 .quadDivergentImplicitLod = false,
801 .maxPerStageDescriptorUpdateAfterBindSamplers = HK_MAX_DESCRIPTORS,
802 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = HK_MAX_DESCRIPTORS,
803 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = HK_MAX_DESCRIPTORS,
804 .maxPerStageDescriptorUpdateAfterBindSampledImages = HK_MAX_DESCRIPTORS,
805 .maxPerStageDescriptorUpdateAfterBindStorageImages = HK_MAX_DESCRIPTORS,
806 .maxPerStageDescriptorUpdateAfterBindInputAttachments =
807 HK_MAX_DESCRIPTORS,
808 .maxPerStageUpdateAfterBindResources = UINT32_MAX,
809 .maxDescriptorSetUpdateAfterBindSamplers = HK_MAX_DESCRIPTORS,
810 .maxDescriptorSetUpdateAfterBindUniformBuffers = HK_MAX_DESCRIPTORS,
811 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic =
812 HK_MAX_DYNAMIC_BUFFERS / 2,
813 .maxDescriptorSetUpdateAfterBindStorageBuffers = HK_MAX_DESCRIPTORS,
814 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic =
815 HK_MAX_DYNAMIC_BUFFERS / 2,
816 .maxDescriptorSetUpdateAfterBindSampledImages = HK_MAX_DESCRIPTORS,
817 .maxDescriptorSetUpdateAfterBindStorageImages = HK_MAX_DESCRIPTORS,
818 .maxDescriptorSetUpdateAfterBindInputAttachments = HK_MAX_DESCRIPTORS,
819 .filterMinmaxSingleComponentFormats = false,
820 .filterMinmaxImageComponentMapping = false,
821 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
822 .framebufferIntegerColorSampleCounts = sample_counts,
823
824 /* Vulkan 1.3 properties */
825 .minSubgroupSize = 32,
826 .maxSubgroupSize = 32,
827 .maxComputeWorkgroupSubgroups = 1024 / 32,
828 .requiredSubgroupSizeStages = 0,
829 .maxInlineUniformBlockSize = 1 << 16,
830 .maxPerStageDescriptorInlineUniformBlocks = 32,
831 .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
832 .maxDescriptorSetInlineUniformBlocks = 6 * 32,
833 .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
834 .maxInlineUniformTotalSize = 1 << 16,
835 .integerDotProduct4x8BitPackedUnsignedAccelerated = false,
836 .integerDotProduct4x8BitPackedSignedAccelerated = false,
837 .integerDotProduct4x8BitPackedMixedSignednessAccelerated = false,
838 .storageTexelBufferOffsetAlignmentBytes = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
839 .storageTexelBufferOffsetSingleTexelAlignment = true,
840 .uniformTexelBufferOffsetAlignmentBytes = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
841 .uniformTexelBufferOffsetSingleTexelAlignment = true,
842 .maxBufferSize = HK_MAX_BUFFER_SIZE,
843
844 /* Vulkan 1.4 properties */
845 .dynamicRenderingLocalReadDepthStencilAttachments = false,
846 .dynamicRenderingLocalReadMultisampledAttachments = true,
847
848 /* VK_KHR_push_descriptor */
849 .maxPushDescriptors = HK_MAX_PUSH_DESCRIPTORS,
850
851 /* VK_EXT_custom_border_color */
852 .maxCustomBorderColorSamplers = 4000,
853
854 /* VK_EXT_extended_dynamic_state3 */
855 .dynamicPrimitiveTopologyUnrestricted = true,
856
857 /* VK_EXT_graphics_pipeline_library */
858 .graphicsPipelineLibraryFastLinking = true,
859 .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
860
861 /* VK_EXT_host_image_copy */
862
863 /* VK_KHR_line_rasterization */
864 .lineSubPixelPrecisionBits = 8,
865
866 /* VK_KHR_maintenance5 */
867 .earlyFragmentMultisampleCoverageAfterSampleCounting = false,
868 .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
869 .depthStencilSwizzleOneSupport = true,
870 .polygonModePointSize = false,
871 .nonStrictSinglePixelWideLinesUseParallelogram = false,
872 .nonStrictWideLinesUseParallelogram = false,
873
874 /* VK_KHR_maintenance6 */
875 .blockTexelViewCompatibleMultipleLayers = false,
876 .maxCombinedImageSamplerDescriptorCount = 3,
877 .fragmentShadingRateClampCombinerInputs = false,
878
879 /* VK_EXT_map_memory_placed */
880 .minPlacedMemoryMapAlignment = os_page_size,
881
882 /* VK_EXT_multi_draw */
883 .maxMultiDrawCount = UINT16_MAX,
884
885 /* VK_EXT_pipeline_robustness */
886 .defaultRobustnessStorageBuffers =
887 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
888 .defaultRobustnessUniformBuffers =
889 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
890 .defaultRobustnessVertexInputs =
891 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
892 .defaultRobustnessImages =
893 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
894
895 /* VK_EXT_physical_device_drm gets populated later */
896
897 /* VK_EXT_provoking_vertex */
898 .provokingVertexModePerPipeline = true,
899 .transformFeedbackPreservesTriangleFanProvokingVertex = true,
900
901 /* VK_EXT_robustness2 */
902 .robustStorageBufferAccessSizeAlignment = HK_SSBO_BOUNDS_CHECK_ALIGNMENT,
903 .robustUniformBufferAccessSizeAlignment = HK_MIN_UBO_ALIGNMENT,
904
905 /* VK_EXT_sample_locations */
906 .sampleLocationSampleCounts = sample_counts,
907 .maxSampleLocationGridSize = (VkExtent2D){1, 1},
908 .sampleLocationCoordinateRange[0] = 0.0f,
909 .sampleLocationCoordinateRange[1] = 0.9375f,
910 .sampleLocationSubPixelBits = 4,
911 .variableSampleLocations = false,
912
913 /* VK_EXT_shader_object */
914 .shaderBinaryVersion = 0,
915
916 /* VK_EXT_transform_feedback */
917 .maxTransformFeedbackStreams = 4,
918 .maxTransformFeedbackBuffers = 4,
919 .maxTransformFeedbackBufferSize = UINT32_MAX,
920 .maxTransformFeedbackStreamDataSize = 2048,
921 .maxTransformFeedbackBufferDataSize = 512,
922 .maxTransformFeedbackBufferDataStride = 2048,
923 .transformFeedbackQueries = true,
924 .transformFeedbackStreamsLinesTriangles = false,
925 .transformFeedbackRasterizationStreamSelect = false,
926 .transformFeedbackDraw = false,
927
928 /* VK_KHR_vertex_attribute_divisor */
929 .maxVertexAttribDivisor = UINT32_MAX,
930 .supportsNonZeroFirstInstance = true,
931
932 /* VK_KHR_fragment_shader_barycentric */
933 .triStripVertexOrderIndependentOfProvokingVertex = false,
934 };
935
936 strncpy(properties->deviceName, dev->name, sizeof(properties->deviceName));
937
938 /* VK_EXT_shader_module_identifier */
939 static_assert(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
940 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
941 memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
942 vk_shaderModuleIdentifierAlgorithmUUID,
943 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
944
945 uint8_t dev_uuid[VK_UUID_SIZE];
946 agx_get_device_uuid(dev, &dev_uuid);
947 static_assert(sizeof(dev_uuid) == VK_UUID_SIZE);
948 memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
949 static_assert(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
950 memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
951
952 strncpy(properties->driverName, "Honeykrisp", VK_MAX_DRIVER_NAME_SIZE);
953 snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
954 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
955
956 /* We don't use the layouts ATM so just report all layouts from
957 * extensions that we support as compatible.
958 */
959 static const VkImageLayout supported_layouts[] = {
960 VK_IMAGE_LAYOUT_GENERAL, /* required by spec */
961 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
962 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
963 VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
964 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
965 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
966 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
967 VK_IMAGE_LAYOUT_PREINITIALIZED,
968 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
969 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
970 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
971 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
972 VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
973 VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
974 VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
975 VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
976 // VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
977 VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
978 };
979
980 properties->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
981 properties->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
982 properties->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
983 properties->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
984
985 /* We're a UMR so we can always map every kind of memory */
986 properties->identicalMemoryTypeRequirements = true;
987
988 {
989 struct mesa_sha1 sha1_ctx;
990 uint8_t sha1[20];
991
992 _mesa_sha1_init(&sha1_ctx);
993 /* Make sure we don't match with other vendors */
994 const char *driver = "honeykrisp-v1";
995 _mesa_sha1_update(&sha1_ctx, driver, strlen(driver));
996 _mesa_sha1_final(&sha1_ctx, sha1);
997
998 memcpy(properties->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
999 }
1000 }
1001
1002 static void
hk_physical_device_init_pipeline_cache(struct hk_physical_device * pdev)1003 hk_physical_device_init_pipeline_cache(struct hk_physical_device *pdev)
1004 {
1005 struct hk_instance *instance = hk_physical_device_instance(pdev);
1006
1007 struct mesa_sha1 sha_ctx;
1008 _mesa_sha1_init(&sha_ctx);
1009
1010 _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
1011 sizeof(instance->driver_build_sha));
1012
1013 const uint64_t compiler_flags = hk_physical_device_compiler_flags(pdev);
1014 _mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
1015
1016 unsigned char sha[SHA1_DIGEST_LENGTH];
1017 _mesa_sha1_final(&sha_ctx, sha);
1018
1019 static_assert(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
1020 memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
1021 memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
1022
1023 #ifdef ENABLE_SHADER_CACHE
1024 char renderer[10];
1025 ASSERTED int len =
1026 snprintf(renderer, sizeof(renderer), "HK_G%u%c_",
1027 pdev->dev.params.gpu_generation, pdev->dev.params.gpu_variant);
1028
1029 assert(len == sizeof(renderer) - 2);
1030
1031 char timestamp[41];
1032 _mesa_sha1_format(timestamp, instance->driver_build_sha);
1033
1034 const uint64_t driver_flags = hk_physical_device_compiler_flags(pdev);
1035 pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1036 #endif
1037 }
1038
1039 static void
hk_physical_device_free_disk_cache(struct hk_physical_device * pdev)1040 hk_physical_device_free_disk_cache(struct hk_physical_device *pdev)
1041 {
1042 #ifdef ENABLE_SHADER_CACHE
1043 if (pdev->vk.disk_cache) {
1044 disk_cache_destroy(pdev->vk.disk_cache);
1045 pdev->vk.disk_cache = NULL;
1046 }
1047 #else
1048 assert(pdev->vk.disk_cache == NULL);
1049 #endif
1050 }
1051
1052 /* Use 1/2 of total size to avoid swapping */
1053 #define SYSMEM_HEAP_FRACTION(x) (x * 1 / 2)
1054
1055 static uint64_t
hk_get_sysmem_heap_size(struct hk_physical_device * pdev)1056 hk_get_sysmem_heap_size(struct hk_physical_device *pdev)
1057 {
1058 if (pdev->sysmem)
1059 return pdev->sysmem;
1060
1061 uint64_t sysmem_size_B = 0;
1062 if (!os_get_total_physical_memory(&sysmem_size_B))
1063 return 0;
1064
1065 return ROUND_DOWN_TO(SYSMEM_HEAP_FRACTION(sysmem_size_B), 1 << 20);
1066 }
1067
1068 static uint64_t
hk_get_sysmem_heap_available(struct hk_physical_device * pdev)1069 hk_get_sysmem_heap_available(struct hk_physical_device *pdev)
1070 {
1071 if (pdev->sysmem) {
1072 uint64_t total_used = 0;
1073 for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1074 const struct hk_memory_heap *heap = &pdev->mem_heaps[i];
1075 uint64_t used = p_atomic_read(&heap->used);
1076 total_used += used;
1077 }
1078 return pdev->sysmem - total_used;
1079 }
1080
1081 uint64_t sysmem_size_B = 0;
1082 if (!os_get_available_system_memory(&sysmem_size_B)) {
1083 vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
1084 return 0;
1085 }
1086
1087 return ROUND_DOWN_TO(SYSMEM_HEAP_FRACTION(sysmem_size_B), 1 << 20);
1088 }
1089
1090 VkResult
hk_create_drm_physical_device(struct vk_instance * _instance,drmDevicePtr drm_device,struct vk_physical_device ** pdev_out)1091 hk_create_drm_physical_device(struct vk_instance *_instance,
1092 drmDevicePtr drm_device,
1093 struct vk_physical_device **pdev_out)
1094 {
1095 struct hk_instance *instance = (struct hk_instance *)_instance;
1096 VkResult result;
1097
1098 /* Blanket refusal to probe due to unstable UAPI. */
1099 return VK_ERROR_INCOMPATIBLE_DRIVER;
1100
1101 if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
1102 drm_device->bustype != DRM_BUS_PLATFORM)
1103 return VK_ERROR_INCOMPATIBLE_DRIVER;
1104
1105 const char *path = drm_device->nodes[DRM_NODE_RENDER];
1106 int fd = open(path, O_RDWR | O_CLOEXEC);
1107 if (fd < 0) {
1108 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1109 "failed to open device %s", path);
1110 }
1111
1112 drmVersionPtr version = drmGetVersion(fd);
1113 if (!version) {
1114 result =
1115 vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1116 "failed to query kernel driver version for device %s", path);
1117 goto fail_fd;
1118 }
1119
1120 bool is_asahi = (strcmp(version->name, "asahi") == 0);
1121 is_asahi |= strcmp(version->name, "virtio_gpu") == 0;
1122 drmFreeVersion(version);
1123
1124 if (!is_asahi) {
1125 /* Fail silently */
1126 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1127 goto fail_fd;
1128 }
1129
1130 struct stat st;
1131 if (stat(drm_device->nodes[DRM_NODE_RENDER], &st)) {
1132 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1133 "fstat() failed on %s: %m",
1134 drm_device->nodes[DRM_NODE_RENDER]);
1135 goto fail_fd;
1136 }
1137 const dev_t render_dev = st.st_rdev;
1138
1139 struct hk_physical_device *pdev =
1140 vk_zalloc(&instance->vk.alloc, sizeof(*pdev), 8,
1141 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1142
1143 if (pdev == NULL) {
1144 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1145 goto fail_fd;
1146 }
1147
1148 /* We're render-only */
1149 pdev->master_fd = -1;
1150 pdev->render_dev = render_dev;
1151 pdev->dev.fd = fd;
1152
1153 if (!agx_open_device(NULL, &pdev->dev)) {
1154 /* Fail silently, for virtgpu */
1155 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1156 goto fail_pdev_alloc;
1157 }
1158
1159 struct vk_physical_device_dispatch_table dispatch_table;
1160 vk_physical_device_dispatch_table_from_entrypoints(
1161 &dispatch_table, &hk_physical_device_entrypoints, true);
1162 vk_physical_device_dispatch_table_from_entrypoints(
1163 &dispatch_table, &wsi_physical_device_entrypoints, false);
1164
1165 struct vk_device_extension_table supported_extensions;
1166 hk_get_device_extensions(instance, &supported_extensions);
1167
1168 struct vk_features supported_features;
1169 hk_get_device_features(&supported_extensions, &supported_features);
1170
1171 struct vk_properties properties;
1172 hk_get_device_properties(&pdev->dev, instance, &properties);
1173
1174 properties.drmHasRender = true;
1175 properties.drmRenderMajor = major(render_dev);
1176 properties.drmRenderMinor = minor(render_dev);
1177
1178 result = vk_physical_device_init(&pdev->vk, &instance->vk,
1179 &supported_extensions, &supported_features,
1180 &properties, &dispatch_table);
1181 if (result != VK_SUCCESS)
1182 goto fail_agx_device;
1183
1184 hk_physical_device_init_pipeline_cache(pdev);
1185
1186 const char *hk_sysmem = getenv("HK_SYSMEM");
1187 if (hk_sysmem) {
1188 uint64_t sysmem = strtoll(hk_sysmem, NULL, 10);
1189 if (sysmem != LLONG_MIN && sysmem != LLONG_MAX) {
1190 pdev->sysmem = sysmem;
1191 }
1192 }
1193
1194 uint64_t sysmem_size_B = hk_get_sysmem_heap_size(pdev);
1195 if (sysmem_size_B == 0) {
1196 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1197 "Failed to query total system memory");
1198 goto fail_disk_cache;
1199 }
1200
1201 uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
1202 pdev->mem_heaps[sysmem_heap_idx] = (struct hk_memory_heap){
1203 .size = sysmem_size_B,
1204 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1205 .available = hk_get_sysmem_heap_available,
1206 };
1207
1208 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType){
1209 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1210 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1211 VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
1212 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1213 .heapIndex = sysmem_heap_idx,
1214 };
1215
1216 assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
1217 assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
1218
1219 /* TODO: VK_QUEUE_SPARSE_BINDING_BIT*/
1220 pdev->queue_families[pdev->queue_family_count++] = (struct hk_queue_family){
1221 .queue_flags =
1222 VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1223
1224 .queue_count = 1,
1225 };
1226 assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
1227
1228 unsigned st_idx = 0;
1229 pdev->syncobj_sync_type = vk_drm_syncobj_get_type(fd);
1230 pdev->sync_types[st_idx++] = &pdev->syncobj_sync_type;
1231 pdev->sync_types[st_idx++] = NULL;
1232 assert(st_idx <= ARRAY_SIZE(pdev->sync_types));
1233 pdev->vk.supported_sync_types = pdev->sync_types;
1234
1235 result = hk_init_wsi(pdev);
1236 if (result != VK_SUCCESS)
1237 goto fail_disk_cache;
1238
1239 simple_mtx_init(&pdev->debug_compile_lock, mtx_plain);
1240 *pdev_out = &pdev->vk;
1241
1242 return VK_SUCCESS;
1243
1244 fail_disk_cache:
1245 hk_physical_device_free_disk_cache(pdev);
1246 vk_physical_device_finish(&pdev->vk);
1247 fail_agx_device:
1248 agx_close_device(&pdev->dev);
1249 fail_pdev_alloc:
1250 if (pdev->master_fd)
1251 close(pdev->master_fd);
1252
1253 vk_free(&pdev->vk.instance->alloc, pdev);
1254 fail_fd:
1255 close(fd);
1256 return result;
1257 }
1258
1259 void
hk_physical_device_destroy(struct vk_physical_device * vk_pdev)1260 hk_physical_device_destroy(struct vk_physical_device *vk_pdev)
1261 {
1262 struct hk_physical_device *pdev =
1263 container_of(vk_pdev, struct hk_physical_device, vk);
1264
1265 hk_finish_wsi(pdev);
1266
1267 if (pdev->master_fd >= 0)
1268 close(pdev->master_fd);
1269
1270 simple_mtx_destroy(&pdev->debug_compile_lock);
1271 hk_physical_device_free_disk_cache(pdev);
1272 agx_close_device(&pdev->dev);
1273 vk_physical_device_finish(&pdev->vk);
1274 vk_free(&pdev->vk.instance->alloc, pdev);
1275 }
1276
1277 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1278 hk_GetPhysicalDeviceMemoryProperties2(
1279 VkPhysicalDevice physicalDevice,
1280 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1281 {
1282 VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1283
1284 pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
1285 for (int i = 0; i < pdev->mem_heap_count; i++) {
1286 pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap){
1287 .size = pdev->mem_heaps[i].size,
1288 .flags = pdev->mem_heaps[i].flags,
1289 };
1290 }
1291
1292 pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
1293 for (int i = 0; i < pdev->mem_type_count; i++) {
1294 pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
1295 }
1296
1297 vk_foreach_struct(ext, pMemoryProperties->pNext) {
1298 switch (ext->sType) {
1299 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1300 VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
1301
1302 for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1303 const struct hk_memory_heap *heap = &pdev->mem_heaps[i];
1304 uint64_t used = p_atomic_read(&heap->used);
1305
1306 /* From the Vulkan 1.3.278 spec:
1307 *
1308 * "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1309 * values in which memory usages are returned, with one element
1310 * for each memory heap. A heap’s usage is an estimate of how
1311 * much memory the process is currently using in that heap."
1312 *
1313 * TODO: Include internal allocations?
1314 */
1315 p->heapUsage[i] = used;
1316
1317 uint64_t available = heap->size;
1318 if (heap->available)
1319 available = heap->available(pdev);
1320
1321 /* From the Vulkan 1.3.278 spec:
1322 *
1323 * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1324 * values in which memory budgets are returned, with one
1325 * element for each memory heap. A heap’s budget is a rough
1326 * estimate of how much memory the process can allocate from
1327 * that heap before allocations may fail or cause performance
1328 * degradation. The budget includes any currently allocated
1329 * device memory."
1330 *
1331 * and
1332 *
1333 * "The heapBudget value must be less than or equal to
1334 * VkMemoryHeap::size for each heap."
1335 *
1336 * available (queried above) is the total amount free memory
1337 * system-wide and does not include our allocations so we need
1338 * to add that in.
1339 */
1340 uint64_t budget = MIN2(available + used, heap->size);
1341
1342 /* Set the budget at 90% of available to avoid thrashing */
1343 p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
1344 }
1345
1346 /* From the Vulkan 1.3.278 spec:
1347 *
1348 * "The heapBudget and heapUsage values must be zero for array
1349 * elements greater than or equal to
1350 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
1351 * heapBudget value must be non-zero for array elements less than
1352 * VkPhysicalDeviceMemoryProperties::memoryHeapCount."
1353 */
1354 for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
1355 p->heapBudget[i] = 0u;
1356 p->heapUsage[i] = 0u;
1357 }
1358 break;
1359 }
1360 default:
1361 vk_debug_ignored_stype(ext->sType);
1362 break;
1363 }
1364 }
1365 }
1366
1367 static const VkQueueGlobalPriorityKHR hk_global_queue_priorities[] = {
1368 VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
1369 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
1370 VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
1371 VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
1372 };
1373
1374 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1375 hk_GetPhysicalDeviceQueueFamilyProperties2(
1376 VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount,
1377 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1378 {
1379 VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1380 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1381 pQueueFamilyPropertyCount);
1382
1383 for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
1384 const struct hk_queue_family *queue_family = &pdev->queue_families[i];
1385
1386 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1387 {
1388 p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
1389 p->queueFamilyProperties.queueCount = queue_family->queue_count;
1390 p->queueFamilyProperties.timestampValidBits =
1391 agx_supports_timestamps(&pdev->dev) ? 64 : 0;
1392 p->queueFamilyProperties.minImageTransferGranularity =
1393 (VkExtent3D){1, 1, 1};
1394
1395 VkQueueFamilyGlobalPriorityPropertiesKHR *prio = vk_find_struct(
1396 p->pNext, QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR);
1397 if (prio) {
1398 STATIC_ASSERT(ARRAY_SIZE(hk_global_queue_priorities) <=
1399 VK_MAX_GLOBAL_PRIORITY_SIZE_KHR);
1400 prio->priorityCount = ARRAY_SIZE(hk_global_queue_priorities);
1401 memcpy(&prio->priorities, hk_global_queue_priorities,
1402 sizeof(hk_global_queue_priorities));
1403 }
1404 }
1405 }
1406 }
1407
1408 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)1409 hk_GetPhysicalDeviceMultisamplePropertiesEXT(
1410 VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples,
1411 VkMultisamplePropertiesEXT *pMultisampleProperties)
1412 {
1413 VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1414
1415 if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
1416 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
1417 } else {
1418 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
1419 }
1420 }
1421