• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Valve Corporation
3  * Copyright 2024 Alyssa Rosenzweig
4  * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5  * SPDX-License-Identifier: MIT
6  */
7 #include "hk_physical_device.h"
8 
9 #include "asahi/compiler/agx_nir_texture.h"
10 #include "asahi/lib/agx_device.h"
11 #include "asahi/lib/agx_nir_lower_vbo.h"
12 #include "util/disk_cache.h"
13 #include "util/mesa-sha1.h"
14 #include "git_sha1.h"
15 #include "hk_buffer.h"
16 #include "hk_entrypoints.h"
17 #include "hk_image.h"
18 #include "hk_instance.h"
19 #include "hk_private.h"
20 #include "hk_shader.h"
21 #include "hk_wsi.h"
22 
23 #include "util/simple_mtx.h"
24 #include "vulkan/vulkan_core.h"
25 #include "vulkan/wsi/wsi_common.h"
26 #include "unstable_asahi_drm.h"
27 #include "vk_drm_syncobj.h"
28 #include "vk_shader_module.h"
29 
30 #include <fcntl.h>
31 #include <string.h>
32 #include <xf86drm.h>
33 #include <sys/stat.h>
34 #include <sys/sysmacros.h>
35 
36 static uint32_t
hk_get_vk_version()37 hk_get_vk_version()
38 {
39    /* Version override takes priority */
40    const uint32_t version_override = vk_get_version_override();
41    if (version_override)
42       return version_override;
43 
44    return VK_MAKE_VERSION(1, 4, VK_HEADER_VERSION);
45 }
46 
47 static void
hk_get_device_extensions(const struct hk_instance * instance,struct vk_device_extension_table * ext)48 hk_get_device_extensions(const struct hk_instance *instance,
49                          struct vk_device_extension_table *ext)
50 {
51    *ext = (struct vk_device_extension_table){
52       .KHR_8bit_storage = true,
53       .KHR_16bit_storage = true,
54       .KHR_bind_memory2 = true,
55       .KHR_buffer_device_address = true,
56       .KHR_calibrated_timestamps = false,
57       .KHR_copy_commands2 = true,
58       .KHR_create_renderpass2 = true,
59       .KHR_dedicated_allocation = true,
60       .KHR_depth_stencil_resolve = true,
61       .KHR_descriptor_update_template = true,
62       .KHR_device_group = true,
63       .KHR_draw_indirect_count = true,
64       .KHR_driver_properties = true,
65       .KHR_dynamic_rendering = true,
66       .KHR_dynamic_rendering_local_read = true,
67       .KHR_external_fence = true,
68       .KHR_external_fence_fd = true,
69       .KHR_external_memory = true,
70       .KHR_external_memory_fd = true,
71       /* XXX: External timeline semaphores maybe broken in kernel, see
72        * dEQP-VK.synchronization.signal_order.shared_timeline_semaphore.write_copy_buffer_to_image_read_image_compute.image_128_r32_uint_opaque_fd
73        */
74       .KHR_external_semaphore = false,
75       .KHR_external_semaphore_fd = false,
76       .KHR_format_feature_flags2 = true,
77       .KHR_fragment_shader_barycentric = false,
78       .KHR_get_memory_requirements2 = true,
79       .KHR_global_priority = true,
80       .KHR_image_format_list = true,
81       .KHR_imageless_framebuffer = true,
82 #ifdef HK_USE_WSI_PLATFORM
83       .KHR_incremental_present = true,
84 #endif
85       .KHR_index_type_uint8 = true,
86       .KHR_line_rasterization = true,
87       .KHR_load_store_op_none = true,
88       .KHR_maintenance1 = true,
89       .KHR_maintenance2 = true,
90       .KHR_maintenance3 = true,
91       .KHR_maintenance4 = true,
92       .KHR_maintenance5 = true,
93       .KHR_maintenance6 = true,
94       .KHR_map_memory2 = true,
95       .KHR_multiview = true,
96       .KHR_pipeline_executable_properties = true,
97       .KHR_pipeline_library = true,
98       .KHR_push_descriptor = true,
99       .KHR_relaxed_block_layout = true,
100       .KHR_sampler_mirror_clamp_to_edge = true,
101       .KHR_sampler_ycbcr_conversion = true,
102       .KHR_separate_depth_stencil_layouts = true,
103       .KHR_shader_atomic_int64 = false,
104       .KHR_shader_clock = false,
105       .KHR_shader_draw_parameters = true,
106       .KHR_shader_expect_assume = true,
107       .KHR_shader_float_controls = true,
108       // TODO: wait for nvk
109       .KHR_shader_float_controls2 = true,
110       .KHR_shader_float16_int8 = true,
111       .KHR_shader_integer_dot_product = true,
112       .KHR_shader_maximal_reconvergence = true,
113       .KHR_shader_non_semantic_info = true,
114       .KHR_shader_relaxed_extended_instruction = true,
115       .KHR_shader_subgroup_extended_types = true,
116       .KHR_shader_subgroup_rotate = true,
117       .KHR_shader_subgroup_uniform_control_flow = true,
118       .KHR_shader_terminate_invocation = true,
119       .KHR_spirv_1_4 = true,
120       .KHR_storage_buffer_storage_class = true,
121       .KHR_timeline_semaphore = true,
122 #ifdef HK_USE_WSI_PLATFORM
123       .KHR_swapchain = true,
124       .KHR_swapchain_mutable_format = true,
125 #endif
126       .KHR_synchronization2 = true,
127       .KHR_uniform_buffer_standard_layout = true,
128       .KHR_variable_pointers = true,
129       .KHR_vertex_attribute_divisor = true,
130       .KHR_vulkan_memory_model = true,
131       .KHR_workgroup_memory_explicit_layout = true,
132       .KHR_zero_initialize_workgroup_memory = true,
133       .EXT_4444_formats = true,
134       .EXT_attachment_feedback_loop_layout = true,
135       .EXT_border_color_swizzle = true,
136       .EXT_buffer_device_address = true,
137       .EXT_calibrated_timestamps = false,
138       .EXT_conditional_rendering = false,
139       .EXT_color_write_enable = true,
140       .EXT_custom_border_color = true,
141       .EXT_depth_bias_control = true,
142       .EXT_depth_clip_control = false,
143       .EXT_depth_clip_enable = true,
144       .EXT_descriptor_indexing = true,
145 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
146       .EXT_display_control = false,
147 #endif
148       .EXT_dynamic_rendering_unused_attachments = true,
149       .EXT_extended_dynamic_state = true,
150       .EXT_extended_dynamic_state2 = true,
151       .EXT_extended_dynamic_state3 = true,
152       .EXT_external_memory_dma_buf = true,
153       .EXT_global_priority = true,
154       .EXT_global_priority_query = true,
155       .EXT_graphics_pipeline_library = true,
156       .EXT_host_query_reset = true,
157       .EXT_host_image_copy = true,
158       .EXT_image_2d_view_of_3d = true,
159       .EXT_image_drm_format_modifier = true,
160       .EXT_image_robustness = true,
161       .EXT_image_sliced_view_of_3d = false,
162       .EXT_image_view_min_lod = false,
163       .EXT_index_type_uint8 = true,
164       .EXT_inline_uniform_block = true,
165       .EXT_line_rasterization = true,
166       .EXT_load_store_op_none = true,
167       .EXT_map_memory_placed = false,
168       .EXT_memory_budget = false,
169       .EXT_multi_draw = true,
170       .EXT_mutable_descriptor_type = true,
171       .EXT_non_seamless_cube_map = true,
172       .EXT_pipeline_creation_cache_control = true,
173       .EXT_pipeline_creation_feedback = true,
174       .EXT_pipeline_protected_access = true,
175       .EXT_pipeline_robustness = true,
176       .EXT_physical_device_drm = true,
177       .EXT_primitive_topology_list_restart = true,
178       .EXT_private_data = true,
179       .EXT_primitives_generated_query = false,
180       .EXT_provoking_vertex = true,
181       .EXT_robustness2 = true,
182       .EXT_sample_locations = true,
183       .EXT_sampler_filter_minmax = false,
184       .EXT_scalar_block_layout = true,
185       .EXT_separate_stencil_usage = true,
186       .EXT_shader_image_atomic_int64 = false,
187       .EXT_shader_demote_to_helper_invocation = true,
188       .EXT_shader_module_identifier = true,
189       .EXT_shader_object = true,
190       .EXT_shader_replicated_composites = true,
191       .EXT_shader_stencil_export = true,
192       .EXT_shader_subgroup_ballot = true,
193       .EXT_shader_subgroup_vote = true,
194       .EXT_shader_viewport_index_layer = true,
195       .EXT_subgroup_size_control = true,
196 #ifdef HK_USE_WSI_PLATFORM
197       .EXT_swapchain_maintenance1 = true,
198 #endif
199       .EXT_texel_buffer_alignment = true,
200       .EXT_tooling_info = true,
201       .EXT_transform_feedback = true,
202       .EXT_vertex_attribute_divisor = true,
203       .EXT_vertex_input_dynamic_state = true,
204       .EXT_ycbcr_2plane_444_formats = false,
205       .EXT_ycbcr_image_arrays = false,
206       .GOOGLE_decorate_string = true,
207       .GOOGLE_hlsl_functionality1 = true,
208       .GOOGLE_user_type = true,
209       .VALVE_mutable_descriptor_type = true,
210    };
211 }
212 
213 static void
hk_get_device_features(const struct vk_device_extension_table * supported_extensions,struct vk_features * features)214 hk_get_device_features(
215    const struct vk_device_extension_table *supported_extensions,
216    struct vk_features *features)
217 {
218    *features = (struct vk_features){
219       /* Vulkan 1.0 */
220       .robustBufferAccess = true,
221       .fullDrawIndexUint32 = true,
222       .imageCubeArray = true,
223       .independentBlend = true,
224       .geometryShader = true,
225       .tessellationShader = true,
226       .sampleRateShading = true,
227       .dualSrcBlend = true,
228       .logicOp = true,
229       .multiDrawIndirect = true,
230       .drawIndirectFirstInstance = true,
231       .depthClamp = true,
232       .depthBiasClamp = true,
233       .fillModeNonSolid = true,
234       .depthBounds = false,
235       .wideLines = true,
236       .largePoints = true,
237       .alphaToOne = true,
238       .multiViewport = true,
239       .samplerAnisotropy = true,
240       .textureCompressionETC2 = false,
241       .textureCompressionBC = true,
242       .textureCompressionASTC_LDR = false,
243       .occlusionQueryPrecise = true,
244       .pipelineStatisticsQuery = true,
245       .vertexPipelineStoresAndAtomics = true,
246       .fragmentStoresAndAtomics = true,
247       .shaderTessellationAndGeometryPointSize = true,
248       .shaderImageGatherExtended = true,
249       .shaderStorageImageExtendedFormats = true,
250       /* TODO: hitting the vertex shader timeout in CTS, but should work */
251       .shaderStorageImageMultisample = false,
252       .shaderStorageImageReadWithoutFormat = true,
253       .shaderStorageImageWriteWithoutFormat = true,
254       .shaderUniformBufferArrayDynamicIndexing = true,
255       .shaderSampledImageArrayDynamicIndexing = true,
256       .shaderStorageBufferArrayDynamicIndexing = true,
257       .shaderStorageImageArrayDynamicIndexing = true,
258       .shaderClipDistance = true,
259       .shaderCullDistance = true,
260       .shaderFloat64 = false,
261       .shaderInt64 = true,
262       .shaderInt16 = true,
263       .shaderResourceResidency = false,
264       .shaderResourceMinLod = true,
265       .sparseBinding = false,
266       .sparseResidency2Samples = false,
267       .sparseResidency4Samples = false,
268       .sparseResidency8Samples = false,
269       .sparseResidencyAliased = false,
270       .sparseResidencyBuffer = false,
271       .sparseResidencyImage2D = false,
272       .sparseResidencyImage3D = false,
273       .variableMultisampleRate = false,
274       .inheritedQueries = true,
275 
276       /* Vulkan 1.1 */
277       .storageBuffer16BitAccess = true,
278       .uniformAndStorageBuffer16BitAccess = true,
279       .storagePushConstant16 = true,
280       .storageInputOutput16 = false,
281       .multiview = true,
282       .multiviewGeometryShader = false,
283       .multiviewTessellationShader = false,
284       .variablePointersStorageBuffer = true,
285       .variablePointers = true,
286       .shaderDrawParameters = true,
287       .samplerYcbcrConversion = true,
288 
289       /* Vulkan 1.2 */
290       .samplerMirrorClampToEdge = true,
291       .drawIndirectCount = true,
292       .storageBuffer8BitAccess = true,
293       .uniformAndStorageBuffer8BitAccess = true,
294       .storagePushConstant8 = true,
295       .shaderBufferInt64Atomics = false,
296       .shaderSharedInt64Atomics = false,
297       .shaderFloat16 = true,
298       .shaderInt8 = true,
299       .descriptorIndexing = true,
300       .shaderInputAttachmentArrayDynamicIndexing = true,
301       .shaderUniformTexelBufferArrayDynamicIndexing = true,
302       .shaderStorageTexelBufferArrayDynamicIndexing = true,
303       .shaderUniformBufferArrayNonUniformIndexing = true,
304       .shaderSampledImageArrayNonUniformIndexing = true,
305       .shaderStorageBufferArrayNonUniformIndexing = true,
306       .shaderStorageImageArrayNonUniformIndexing = true,
307       .shaderInputAttachmentArrayNonUniformIndexing = true,
308       .shaderUniformTexelBufferArrayNonUniformIndexing = true,
309       .shaderStorageTexelBufferArrayNonUniformIndexing = true,
310       .descriptorBindingUniformBufferUpdateAfterBind = true,
311       .descriptorBindingSampledImageUpdateAfterBind = true,
312       .descriptorBindingStorageImageUpdateAfterBind = true,
313       .descriptorBindingStorageBufferUpdateAfterBind = true,
314       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
315       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
316       .descriptorBindingUpdateUnusedWhilePending = true,
317       .descriptorBindingPartiallyBound = true,
318       .descriptorBindingVariableDescriptorCount = true,
319       .runtimeDescriptorArray = true,
320       .samplerFilterMinmax = false,
321       .scalarBlockLayout = true,
322       .imagelessFramebuffer = true,
323       .uniformBufferStandardLayout = true,
324       .shaderSubgroupExtendedTypes = true,
325       .separateDepthStencilLayouts = true,
326       .hostQueryReset = true,
327       .timelineSemaphore = true,
328       .bufferDeviceAddress = true,
329       .bufferDeviceAddressCaptureReplay = false,
330       .bufferDeviceAddressMultiDevice = false,
331       .vulkanMemoryModel = true,
332       .vulkanMemoryModelDeviceScope = true,
333       .vulkanMemoryModelAvailabilityVisibilityChains = false,
334       .shaderOutputViewportIndex = true,
335       .shaderOutputLayer = true,
336       .subgroupBroadcastDynamicId = true,
337 
338       /* Vulkan 1.3 */
339       .robustImageAccess = true,
340       .inlineUniformBlock = true,
341       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
342       .pipelineCreationCacheControl = true,
343       .privateData = true,
344       .shaderDemoteToHelperInvocation = true,
345       .shaderTerminateInvocation = true,
346       .subgroupSizeControl = true,
347       .computeFullSubgroups = true,
348       .synchronization2 = true,
349       .shaderZeroInitializeWorkgroupMemory = true,
350       .dynamicRendering = true,
351       .shaderIntegerDotProduct = true,
352       .maintenance4 = true,
353 
354       /* Vulkan 1.4 */
355       .pushDescriptor = true,
356 
357       /* VK_KHR_dynamic_rendering_local_read */
358       .dynamicRenderingLocalRead = true,
359 
360       /* VK_KHR_fragment_shader_barycentric */
361       .fragmentShaderBarycentric = false,
362 
363       /* VK_KHR_global_priority */
364       .globalPriorityQuery = true,
365 
366       /* VK_KHR_index_type_uint8 */
367       .indexTypeUint8 = true,
368 
369       /* VK_KHR_line_rasterization */
370       .rectangularLines = false,
371       .bresenhamLines = true,
372       .smoothLines = false,
373       .stippledRectangularLines = false,
374       .stippledBresenhamLines = false,
375       .stippledSmoothLines = false,
376 
377       /* VK_KHR_maintenance5 */
378       .maintenance5 = true,
379 
380       /* VK_KHR_maintenance6 */
381       .maintenance6 = true,
382 
383       /* VK_KHR_pipeline_executable_properties */
384       .pipelineExecutableInfo = true,
385 
386       /* VK_KHR_present_id */
387       .presentId = false,
388 
389       /* VK_KHR_present_wait */
390       .presentWait = false,
391 
392       /* VK_KHR_shader_clock */
393       .shaderSubgroupClock = false,
394       .shaderDeviceClock = false,
395 
396       /* VK_KHR_shader_expect_assume */
397       .shaderExpectAssume = true,
398 
399       /* VK_KHR_shader_float_controls2 */
400       .shaderFloatControls2 = true,
401 
402       /* VK_KHR_shader_maximal_reconvergence */
403       .shaderMaximalReconvergence = true,
404 
405       /* VK_KHR_shader_subgroup_rotate */
406       .shaderSubgroupRotate = true,
407       .shaderSubgroupRotateClustered = true,
408 
409       /* VK_KHR_vertex_attribute_divisor */
410       .vertexAttributeInstanceRateDivisor = true,
411       .vertexAttributeInstanceRateZeroDivisor = true,
412 
413       /* VK_KHR_workgroup_memory_explicit_layout */
414       .workgroupMemoryExplicitLayout = true,
415       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
416       .workgroupMemoryExplicitLayout8BitAccess = true,
417       .workgroupMemoryExplicitLayout16BitAccess = true,
418 
419       /* VK_EXT_4444_formats */
420       .formatA4R4G4B4 = true,
421       .formatA4B4G4R4 = true,
422 
423       /* VK_EXT_attachment_feedback_loop_layout */
424       .attachmentFeedbackLoopLayout = true,
425 
426       /* VK_EXT_border_color_swizzle */
427       .borderColorSwizzle = true,
428       .borderColorSwizzleFromImage = false,
429 
430       /* VK_EXT_buffer_device_address */
431       .bufferDeviceAddressCaptureReplayEXT = false,
432 
433       /* VK_EXT_color_write_enable */
434       .colorWriteEnable = true,
435 
436       /* VK_EXT_conditional_rendering */
437       .conditionalRendering = false,
438       .inheritedConditionalRendering = false,
439 
440       /* VK_EXT_custom_border_color */
441       .customBorderColors = true,
442       .customBorderColorWithoutFormat = true,
443 
444       /* VK_EXT_depth_bias_control */
445       .depthBiasControl = true,
446       .leastRepresentableValueForceUnormRepresentation = true,
447       .floatRepresentation = false,
448       .depthBiasExact = true,
449 
450       /* VK_EXT_depth_clip_control */
451       .depthClipControl = false,
452 
453       /* VK_EXT_depth_clip_enable */
454       .depthClipEnable = true,
455 
456       /* VK_EXT_dynamic_rendering_unused_attachments */
457       .dynamicRenderingUnusedAttachments = true,
458 
459       /* VK_EXT_extended_dynamic_state */
460       .extendedDynamicState = true,
461 
462       /* VK_EXT_extended_dynamic_state2 */
463       .extendedDynamicState2 = true,
464       .extendedDynamicState2LogicOp = true,
465       .extendedDynamicState2PatchControlPoints = true,
466 
467       /* VK_EXT_extended_dynamic_state3 */
468       .extendedDynamicState3TessellationDomainOrigin = true,
469       .extendedDynamicState3DepthClampEnable = true,
470       .extendedDynamicState3PolygonMode = true,
471       .extendedDynamicState3RasterizationSamples = true,
472       .extendedDynamicState3SampleMask = true,
473       .extendedDynamicState3AlphaToCoverageEnable = true,
474       .extendedDynamicState3AlphaToOneEnable = true,
475       .extendedDynamicState3LogicOpEnable = true,
476       .extendedDynamicState3ColorBlendEnable = true,
477       .extendedDynamicState3ColorBlendEquation = true,
478       .extendedDynamicState3ColorWriteMask = true,
479       .extendedDynamicState3RasterizationStream = false,
480       .extendedDynamicState3ConservativeRasterizationMode = false,
481       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
482       .extendedDynamicState3DepthClipEnable = true,
483       .extendedDynamicState3SampleLocationsEnable = true,
484       .extendedDynamicState3ColorBlendAdvanced = false,
485       .extendedDynamicState3ProvokingVertexMode = true,
486       .extendedDynamicState3LineRasterizationMode = true,
487       .extendedDynamicState3LineStippleEnable = false,
488       .extendedDynamicState3DepthClipNegativeOneToOne = false,
489       .extendedDynamicState3ViewportWScalingEnable = false,
490       .extendedDynamicState3ViewportSwizzle = false,
491       .extendedDynamicState3CoverageToColorEnable = false,
492       .extendedDynamicState3CoverageToColorLocation = false,
493       .extendedDynamicState3CoverageModulationMode = false,
494       .extendedDynamicState3CoverageModulationTableEnable = false,
495       .extendedDynamicState3CoverageModulationTable = false,
496       .extendedDynamicState3CoverageReductionMode = false,
497       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
498       .extendedDynamicState3ShadingRateImageEnable = false,
499 
500       /* VK_EXT_graphics_pipeline_library */
501       .graphicsPipelineLibrary = true,
502 
503       /* VK_EXT_host_image_copy */
504       .hostImageCopy = true,
505 
506       /* VK_EXT_image_2d_view_of_3d */
507       .image2DViewOf3D = true,
508       .sampler2DViewOf3D = true,
509 
510       /* VK_EXT_image_sliced_view_of_3d */
511       .imageSlicedViewOf3D = false,
512 
513 #ifdef HK_USE_WSI_PLATFORM
514       /* VK_EXT_swapchain_maintenance1 */
515       .swapchainMaintenance1 = true,
516 #endif
517 
518       /* VK_EXT_image_view_min_lod */
519       .minLod = false,
520 
521       /* VK_EXT_map_memory_placed */
522       .memoryMapPlaced = false,
523       .memoryMapRangePlaced = false,
524       .memoryUnmapReserve = false,
525 
526       /* VK_EXT_multi_draw */
527       .multiDraw = true,
528 
529       /* VK_EXT_mutable_descriptor_type */
530       .mutableDescriptorType = true,
531 
532       /* VK_EXT_non_seamless_cube_map */
533       .nonSeamlessCubeMap = true,
534 
535       /* VK_EXT_pipeline_protected_access */
536       .pipelineProtectedAccess = true,
537 
538       /* VK_EXT_pipeline_robustness */
539       .pipelineRobustness = true,
540 
541       /* VK_EXT_primitive_topology_list_restart */
542       .primitiveTopologyListRestart = true,
543       .primitiveTopologyPatchListRestart = false,
544 
545       /* VK_EXT_primitives_generated_query */
546       .primitivesGeneratedQuery = false,
547       .primitivesGeneratedQueryWithNonZeroStreams = false,
548       .primitivesGeneratedQueryWithRasterizerDiscard = false,
549 
550       /* VK_EXT_provoking_vertex */
551       .provokingVertexLast = true,
552       .transformFeedbackPreservesProvokingVertex = true,
553 
554       /* VK_EXT_robustness2 */
555       .robustBufferAccess2 = true,
556       .robustImageAccess2 = true,
557       .nullDescriptor = true,
558 
559       /* VK_EXT_shader_image_atomic_int64 */
560       .shaderImageInt64Atomics = false,
561       .sparseImageInt64Atomics = false,
562 
563       /* VK_EXT_shader_module_identifier */
564       .shaderModuleIdentifier = true,
565 
566       /* VK_EXT_shader_object */
567       .shaderObject = true,
568 
569       /* VK_EXT_shader_replicated_composites */
570       .shaderReplicatedComposites = true,
571 
572       /* VK_KHR_shader_subgroup_uniform_control_flow */
573       .shaderSubgroupUniformControlFlow = true,
574 
575       /* VK_EXT_texel_buffer_alignment */
576       .texelBufferAlignment = true,
577 
578       /* VK_EXT_transform_feedback */
579       .transformFeedback = true,
580       .geometryStreams = true,
581 
582       /* VK_EXT_vertex_input_dynamic_state */
583       .vertexInputDynamicState = true,
584 
585       /* VK_EXT_ycbcr_2plane_444_formats */
586       .ycbcr2plane444Formats = false,
587 
588       /* VK_EXT_ycbcr_image_arrays */
589       .ycbcrImageArrays = false,
590 
591       /* VK_KHR_shader_relaxed_extended_instruction */
592       .shaderRelaxedExtendedInstruction = true,
593    };
594 }
595 
596 static void
hk_get_device_properties(const struct agx_device * dev,const struct hk_instance * instance,struct vk_properties * properties)597 hk_get_device_properties(const struct agx_device *dev,
598                          const struct hk_instance *instance,
599                          struct vk_properties *properties)
600 {
601    const VkSampleCountFlagBits sample_counts =
602       VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
603 
604    uint64_t os_page_size = 16384;
605    os_get_page_size(&os_page_size);
606 
607    *properties = (struct vk_properties){
608       .apiVersion = hk_get_vk_version(),
609       .driverVersion = vk_get_driver_version(),
610       .vendorID = instance->force_vk_vendor ?: VK_VENDOR_ID_MESA,
611       .deviceID = 0,
612       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
613 
614       /* Vulkan 1.0 limits */
615       .maxImageDimension1D = 16384,
616       .maxImageDimension2D = 16384,
617       .maxImageDimension3D = 16384,
618       .maxImageDimensionCube = 16384,
619       .maxImageArrayLayers = 2048,
620       .maxTexelBufferElements = AGX_TEXTURE_BUFFER_MAX_SIZE,
621       .maxUniformBufferRange = 65536,
622 
623       /* From a hardware perspective, storage buffers are lowered to global
624        * address arithmetic so there is no hard limit. However, making efficient
625        * use of the hardware addressing modes depends on no signed wrapping in
626        * any `amul` operations, which are themselves bounded by
627        * maxStorageBufferRange. Therefore, limit storage buffers to INT32_MAX
628        * bytes instead of UINT32_MAX. This is believed to be acceptable for
629        * Direct3D.
630        */
631       .maxStorageBufferRange = INT32_MAX,
632       .maxPushConstantsSize = HK_MAX_PUSH_SIZE,
633       .maxMemoryAllocationCount = 4096,
634       .maxSamplerAllocationCount = 4000,
635       .bufferImageGranularity = 0x400,
636       .sparseAddressSpaceSize = HK_SPARSE_ADDR_SPACE_SIZE,
637       .maxBoundDescriptorSets = HK_MAX_SETS,
638       .maxPerStageDescriptorSamplers = HK_MAX_DESCRIPTORS,
639       .maxPerStageDescriptorUniformBuffers = HK_MAX_DESCRIPTORS,
640       .maxPerStageDescriptorStorageBuffers = HK_MAX_DESCRIPTORS,
641       .maxPerStageDescriptorSampledImages = HK_MAX_DESCRIPTORS,
642       .maxPerStageDescriptorStorageImages = HK_MAX_DESCRIPTORS,
643       .maxPerStageDescriptorInputAttachments = HK_MAX_DESCRIPTORS,
644       .maxPerStageResources = UINT32_MAX,
645       .maxDescriptorSetSamplers = HK_MAX_DESCRIPTORS,
646       .maxDescriptorSetUniformBuffers = HK_MAX_DESCRIPTORS,
647       .maxDescriptorSetUniformBuffersDynamic = HK_MAX_DYNAMIC_BUFFERS / 2,
648       .maxDescriptorSetStorageBuffers = HK_MAX_DESCRIPTORS,
649       .maxDescriptorSetStorageBuffersDynamic = HK_MAX_DYNAMIC_BUFFERS / 2,
650       .maxDescriptorSetSampledImages = HK_MAX_DESCRIPTORS,
651       .maxDescriptorSetStorageImages = HK_MAX_DESCRIPTORS,
652       .maxDescriptorSetInputAttachments = HK_MAX_DESCRIPTORS,
653       .maxVertexInputAttributes = AGX_MAX_VBUFS,
654       .maxVertexInputBindings = AGX_MAX_ATTRIBS,
655       .maxVertexInputAttributeOffset = 65535,
656       .maxVertexInputBindingStride = 2048,
657 
658       /* Hardware limit is 128 but we need to reserve some for internal purposes
659        * (like cull distance emulation). Set 96 to be safe.
660        */
661       .maxVertexOutputComponents = 96,
662       .maxGeometryShaderInvocations = 32,
663       .maxGeometryInputComponents = 128,
664       .maxGeometryOutputComponents = 128,
665       .maxGeometryOutputVertices = 1024,
666       .maxGeometryTotalOutputComponents = 1024,
667       .maxTessellationGenerationLevel = 64,
668       .maxTessellationPatchSize = 32,
669       .maxTessellationControlPerVertexInputComponents = 128,
670       .maxTessellationControlPerVertexOutputComponents = 128,
671       .maxTessellationControlPerPatchOutputComponents = 120,
672       .maxTessellationControlTotalOutputComponents = 4216,
673       .maxTessellationEvaluationInputComponents = 128,
674       .maxTessellationEvaluationOutputComponents = 128,
675 
676       /* Set to match maxVertexOutputComponents, hardware limit is higher. */
677       .maxFragmentInputComponents = 96,
678       .maxFragmentOutputAttachments = HK_MAX_RTS,
679       .maxFragmentDualSrcAttachments = 1,
680       .maxFragmentCombinedOutputResources = 16,
681       .maxComputeSharedMemorySize = HK_MAX_SHARED_SIZE,
682       .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
683       .maxComputeWorkGroupInvocations = 1024,
684       .maxComputeWorkGroupSize = {1024, 1024, 64},
685       .subPixelPrecisionBits = 8,
686       .subTexelPrecisionBits = 8,
687       .mipmapPrecisionBits = 8,
688       .maxDrawIndexedIndexValue = UINT32_MAX,
689       .maxDrawIndirectCount = UINT16_MAX,
690       .maxSamplerLodBias = 15,
691       .maxSamplerAnisotropy = 16,
692       .maxViewports = HK_MAX_VIEWPORTS,
693       .maxViewportDimensions = {32768, 32768},
694       .viewportBoundsRange = {-65536, 65536},
695       .viewportSubPixelBits = 8,
696       .minMemoryMapAlignment = os_page_size,
697       .minTexelBufferOffsetAlignment = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
698       .minUniformBufferOffsetAlignment = HK_MIN_UBO_ALIGNMENT,
699       .minStorageBufferOffsetAlignment = HK_MIN_SSBO_ALIGNMENT,
700       .minTexelOffset = -8,
701       .maxTexelOffset = 7,
702       .minTexelGatherOffset = -8,
703       .maxTexelGatherOffset = 7,
704       .minInterpolationOffset = -0.5,
705       .maxInterpolationOffset = 0.4375,
706       .subPixelInterpolationOffsetBits = 4,
707       .maxFramebufferHeight = 16384,
708       .maxFramebufferWidth = 16384,
709       .maxFramebufferLayers = 2048,
710       .framebufferColorSampleCounts = sample_counts,
711       .framebufferDepthSampleCounts = sample_counts,
712       .framebufferNoAttachmentsSampleCounts = sample_counts,
713       .framebufferStencilSampleCounts = sample_counts,
714       .maxColorAttachments = HK_MAX_RTS,
715       .sampledImageColorSampleCounts = sample_counts,
716       .sampledImageIntegerSampleCounts = sample_counts,
717       .sampledImageDepthSampleCounts = sample_counts,
718       .sampledImageStencilSampleCounts = sample_counts,
719       .storageImageSampleCounts = sample_counts,
720       .maxSampleMaskWords = 1,
721       .timestampComputeAndGraphics = agx_supports_timestamps(dev),
722       /* FIXME: Is timestamp period actually 1? */
723       .timestampPeriod = 1.0f,
724       .maxClipDistances = 8,
725       .maxCullDistances = 8,
726       .maxCombinedClipAndCullDistances = 8,
727       .discreteQueuePriorities = 2,
728       .pointSizeRange = {1.0, 512.f - 0.0625f},
729       .lineWidthRange = {1.0, 16.0f},
730       .pointSizeGranularity = 0.0625,
731       .lineWidthGranularity = 1.0f / 16.0f,
732       .strictLines = false,
733       .standardSampleLocations = true,
734       .optimalBufferCopyOffsetAlignment = 1,
735       .optimalBufferCopyRowPitchAlignment = 1,
736       .nonCoherentAtomSize = 64,
737 
738       /* Vulkan 1.0 sparse properties */
739       .sparseResidencyNonResidentStrict = false,
740       .sparseResidencyAlignedMipSize = false,
741       .sparseResidencyStandard2DBlockShape = false,
742       .sparseResidencyStandard2DMultisampleBlockShape = false,
743       .sparseResidencyStandard3DBlockShape = false,
744 
745       /* Vulkan 1.1 properties */
746       .subgroupSize = 32,
747       .subgroupSupportedStages =
748          VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_ALL_GRAPHICS,
749       .subgroupSupportedOperations =
750          VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
751          VK_SUBGROUP_FEATURE_VOTE_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
752          VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
753          VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
754          VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
755          VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
756          VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
757          VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR,
758       .subgroupQuadOperationsInAllStages = true,
759       .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
760       .maxMultiviewViewCount = HK_MAX_MULTIVIEW_VIEW_COUNT,
761       .maxMultiviewInstanceIndex = UINT32_MAX,
762       .maxPerSetDescriptors = UINT32_MAX,
763       .maxMemoryAllocationSize = (1ull << 37),
764 
765       /* Vulkan 1.2 properties */
766       .supportedDepthResolveModes =
767          VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT |
768          VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
769       .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
770                                       VK_RESOLVE_MODE_MIN_BIT |
771                                       VK_RESOLVE_MODE_MAX_BIT,
772       .independentResolveNone = true,
773       .independentResolve = true,
774       .driverID = VK_DRIVER_ID_MESA_HONEYKRISP,
775       .conformanceVersion = (VkConformanceVersion){1, 4, 0, 0},
776       .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
777       .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
778       .shaderSignedZeroInfNanPreserveFloat16 = true,
779       .shaderSignedZeroInfNanPreserveFloat32 = true,
780       .shaderSignedZeroInfNanPreserveFloat64 = false,
781       .shaderDenormPreserveFloat16 = true,
782       .shaderDenormPreserveFloat32 = false,
783       .shaderDenormPreserveFloat64 = false,
784       .shaderDenormFlushToZeroFloat16 = false,
785       .shaderDenormFlushToZeroFloat32 = true,
786       .shaderDenormFlushToZeroFloat64 = false,
787       .shaderRoundingModeRTEFloat16 = true,
788       .shaderRoundingModeRTEFloat32 = true,
789       .shaderRoundingModeRTEFloat64 = false,
790       .shaderRoundingModeRTZFloat16 = false,
791       .shaderRoundingModeRTZFloat32 = false,
792       .shaderRoundingModeRTZFloat64 = false,
793       .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
794       .shaderUniformBufferArrayNonUniformIndexingNative = true,
795       .shaderSampledImageArrayNonUniformIndexingNative = true,
796       .shaderStorageBufferArrayNonUniformIndexingNative = true,
797       .shaderStorageImageArrayNonUniformIndexingNative = true,
798       .shaderInputAttachmentArrayNonUniformIndexingNative = true,
799       .robustBufferAccessUpdateAfterBind = true,
800       .quadDivergentImplicitLod = false,
801       .maxPerStageDescriptorUpdateAfterBindSamplers = HK_MAX_DESCRIPTORS,
802       .maxPerStageDescriptorUpdateAfterBindUniformBuffers = HK_MAX_DESCRIPTORS,
803       .maxPerStageDescriptorUpdateAfterBindStorageBuffers = HK_MAX_DESCRIPTORS,
804       .maxPerStageDescriptorUpdateAfterBindSampledImages = HK_MAX_DESCRIPTORS,
805       .maxPerStageDescriptorUpdateAfterBindStorageImages = HK_MAX_DESCRIPTORS,
806       .maxPerStageDescriptorUpdateAfterBindInputAttachments =
807          HK_MAX_DESCRIPTORS,
808       .maxPerStageUpdateAfterBindResources = UINT32_MAX,
809       .maxDescriptorSetUpdateAfterBindSamplers = HK_MAX_DESCRIPTORS,
810       .maxDescriptorSetUpdateAfterBindUniformBuffers = HK_MAX_DESCRIPTORS,
811       .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic =
812          HK_MAX_DYNAMIC_BUFFERS / 2,
813       .maxDescriptorSetUpdateAfterBindStorageBuffers = HK_MAX_DESCRIPTORS,
814       .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic =
815          HK_MAX_DYNAMIC_BUFFERS / 2,
816       .maxDescriptorSetUpdateAfterBindSampledImages = HK_MAX_DESCRIPTORS,
817       .maxDescriptorSetUpdateAfterBindStorageImages = HK_MAX_DESCRIPTORS,
818       .maxDescriptorSetUpdateAfterBindInputAttachments = HK_MAX_DESCRIPTORS,
819       .filterMinmaxSingleComponentFormats = false,
820       .filterMinmaxImageComponentMapping = false,
821       .maxTimelineSemaphoreValueDifference = UINT64_MAX,
822       .framebufferIntegerColorSampleCounts = sample_counts,
823 
824       /* Vulkan 1.3 properties */
825       .minSubgroupSize = 32,
826       .maxSubgroupSize = 32,
827       .maxComputeWorkgroupSubgroups = 1024 / 32,
828       .requiredSubgroupSizeStages = 0,
829       .maxInlineUniformBlockSize = 1 << 16,
830       .maxPerStageDescriptorInlineUniformBlocks = 32,
831       .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
832       .maxDescriptorSetInlineUniformBlocks = 6 * 32,
833       .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
834       .maxInlineUniformTotalSize = 1 << 16,
835       .integerDotProduct4x8BitPackedUnsignedAccelerated = false,
836       .integerDotProduct4x8BitPackedSignedAccelerated = false,
837       .integerDotProduct4x8BitPackedMixedSignednessAccelerated = false,
838       .storageTexelBufferOffsetAlignmentBytes = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
839       .storageTexelBufferOffsetSingleTexelAlignment = true,
840       .uniformTexelBufferOffsetAlignmentBytes = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
841       .uniformTexelBufferOffsetSingleTexelAlignment = true,
842       .maxBufferSize = HK_MAX_BUFFER_SIZE,
843 
844       /* Vulkan 1.4 properties */
845       .dynamicRenderingLocalReadDepthStencilAttachments = false,
846       .dynamicRenderingLocalReadMultisampledAttachments = true,
847 
848       /* VK_KHR_push_descriptor */
849       .maxPushDescriptors = HK_MAX_PUSH_DESCRIPTORS,
850 
851       /* VK_EXT_custom_border_color */
852       .maxCustomBorderColorSamplers = 4000,
853 
854       /* VK_EXT_extended_dynamic_state3 */
855       .dynamicPrimitiveTopologyUnrestricted = true,
856 
857       /* VK_EXT_graphics_pipeline_library */
858       .graphicsPipelineLibraryFastLinking = true,
859       .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
860 
861       /* VK_EXT_host_image_copy */
862 
863       /* VK_KHR_line_rasterization */
864       .lineSubPixelPrecisionBits = 8,
865 
866       /* VK_KHR_maintenance5 */
867       .earlyFragmentMultisampleCoverageAfterSampleCounting = false,
868       .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
869       .depthStencilSwizzleOneSupport = true,
870       .polygonModePointSize = false,
871       .nonStrictSinglePixelWideLinesUseParallelogram = false,
872       .nonStrictWideLinesUseParallelogram = false,
873 
874       /* VK_KHR_maintenance6 */
875       .blockTexelViewCompatibleMultipleLayers = false,
876       .maxCombinedImageSamplerDescriptorCount = 3,
877       .fragmentShadingRateClampCombinerInputs = false,
878 
879       /* VK_EXT_map_memory_placed */
880       .minPlacedMemoryMapAlignment = os_page_size,
881 
882       /* VK_EXT_multi_draw */
883       .maxMultiDrawCount = UINT16_MAX,
884 
885       /* VK_EXT_pipeline_robustness */
886       .defaultRobustnessStorageBuffers =
887          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
888       .defaultRobustnessUniformBuffers =
889          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
890       .defaultRobustnessVertexInputs =
891          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
892       .defaultRobustnessImages =
893          VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
894 
895       /* VK_EXT_physical_device_drm gets populated later */
896 
897       /* VK_EXT_provoking_vertex */
898       .provokingVertexModePerPipeline = true,
899       .transformFeedbackPreservesTriangleFanProvokingVertex = true,
900 
901       /* VK_EXT_robustness2 */
902       .robustStorageBufferAccessSizeAlignment = HK_SSBO_BOUNDS_CHECK_ALIGNMENT,
903       .robustUniformBufferAccessSizeAlignment = HK_MIN_UBO_ALIGNMENT,
904 
905       /* VK_EXT_sample_locations */
906       .sampleLocationSampleCounts = sample_counts,
907       .maxSampleLocationGridSize = (VkExtent2D){1, 1},
908       .sampleLocationCoordinateRange[0] = 0.0f,
909       .sampleLocationCoordinateRange[1] = 0.9375f,
910       .sampleLocationSubPixelBits = 4,
911       .variableSampleLocations = false,
912 
913       /* VK_EXT_shader_object */
914       .shaderBinaryVersion = 0,
915 
916       /* VK_EXT_transform_feedback */
917       .maxTransformFeedbackStreams = 4,
918       .maxTransformFeedbackBuffers = 4,
919       .maxTransformFeedbackBufferSize = UINT32_MAX,
920       .maxTransformFeedbackStreamDataSize = 2048,
921       .maxTransformFeedbackBufferDataSize = 512,
922       .maxTransformFeedbackBufferDataStride = 2048,
923       .transformFeedbackQueries = true,
924       .transformFeedbackStreamsLinesTriangles = false,
925       .transformFeedbackRasterizationStreamSelect = false,
926       .transformFeedbackDraw = false,
927 
928       /* VK_KHR_vertex_attribute_divisor */
929       .maxVertexAttribDivisor = UINT32_MAX,
930       .supportsNonZeroFirstInstance = true,
931 
932       /* VK_KHR_fragment_shader_barycentric */
933       .triStripVertexOrderIndependentOfProvokingVertex = false,
934    };
935 
936    strncpy(properties->deviceName, dev->name, sizeof(properties->deviceName));
937 
938    /* VK_EXT_shader_module_identifier */
939    static_assert(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
940                  sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
941    memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
942           vk_shaderModuleIdentifierAlgorithmUUID,
943           sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
944 
945    uint8_t dev_uuid[VK_UUID_SIZE];
946    agx_get_device_uuid(dev, &dev_uuid);
947    static_assert(sizeof(dev_uuid) == VK_UUID_SIZE);
948    memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
949    static_assert(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
950    memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
951 
952    strncpy(properties->driverName, "Honeykrisp", VK_MAX_DRIVER_NAME_SIZE);
953    snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
954             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
955 
956    /* We don't use the layouts ATM so just report all layouts from
957     * extensions that we support as compatible.
958     */
959    static const VkImageLayout supported_layouts[] = {
960       VK_IMAGE_LAYOUT_GENERAL, /* required by spec */
961       VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
962       VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
963       VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
964       VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
965       VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
966       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
967       VK_IMAGE_LAYOUT_PREINITIALIZED,
968       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
969       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
970       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
971       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
972       VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
973       VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
974       VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
975       VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
976       // VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
977       VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
978    };
979 
980    properties->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
981    properties->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
982    properties->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
983    properties->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
984 
985    /* We're a UMR so we can always map every kind of memory */
986    properties->identicalMemoryTypeRequirements = true;
987 
988    {
989       struct mesa_sha1 sha1_ctx;
990       uint8_t sha1[20];
991 
992       _mesa_sha1_init(&sha1_ctx);
993       /* Make sure we don't match with other vendors */
994       const char *driver = "honeykrisp-v1";
995       _mesa_sha1_update(&sha1_ctx, driver, strlen(driver));
996       _mesa_sha1_final(&sha1_ctx, sha1);
997 
998       memcpy(properties->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
999    }
1000 }
1001 
1002 static void
hk_physical_device_init_pipeline_cache(struct hk_physical_device * pdev)1003 hk_physical_device_init_pipeline_cache(struct hk_physical_device *pdev)
1004 {
1005    struct hk_instance *instance = hk_physical_device_instance(pdev);
1006 
1007    struct mesa_sha1 sha_ctx;
1008    _mesa_sha1_init(&sha_ctx);
1009 
1010    _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
1011                      sizeof(instance->driver_build_sha));
1012 
1013    const uint64_t compiler_flags = hk_physical_device_compiler_flags(pdev);
1014    _mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
1015 
1016    unsigned char sha[SHA1_DIGEST_LENGTH];
1017    _mesa_sha1_final(&sha_ctx, sha);
1018 
1019    static_assert(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
1020    memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
1021    memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
1022 
1023 #ifdef ENABLE_SHADER_CACHE
1024    char renderer[10];
1025    ASSERTED int len =
1026       snprintf(renderer, sizeof(renderer), "HK_G%u%c_",
1027                pdev->dev.params.gpu_generation, pdev->dev.params.gpu_variant);
1028 
1029    assert(len == sizeof(renderer) - 2);
1030 
1031    char timestamp[41];
1032    _mesa_sha1_format(timestamp, instance->driver_build_sha);
1033 
1034    const uint64_t driver_flags = hk_physical_device_compiler_flags(pdev);
1035    pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1036 #endif
1037 }
1038 
1039 static void
hk_physical_device_free_disk_cache(struct hk_physical_device * pdev)1040 hk_physical_device_free_disk_cache(struct hk_physical_device *pdev)
1041 {
1042 #ifdef ENABLE_SHADER_CACHE
1043    if (pdev->vk.disk_cache) {
1044       disk_cache_destroy(pdev->vk.disk_cache);
1045       pdev->vk.disk_cache = NULL;
1046    }
1047 #else
1048    assert(pdev->vk.disk_cache == NULL);
1049 #endif
1050 }
1051 
1052 /* Use 1/2 of total size to avoid swapping */
1053 #define SYSMEM_HEAP_FRACTION(x) (x * 1 / 2)
1054 
1055 static uint64_t
hk_get_sysmem_heap_size(struct hk_physical_device * pdev)1056 hk_get_sysmem_heap_size(struct hk_physical_device *pdev)
1057 {
1058    if (pdev->sysmem)
1059       return pdev->sysmem;
1060 
1061    uint64_t sysmem_size_B = 0;
1062    if (!os_get_total_physical_memory(&sysmem_size_B))
1063       return 0;
1064 
1065    return ROUND_DOWN_TO(SYSMEM_HEAP_FRACTION(sysmem_size_B), 1 << 20);
1066 }
1067 
1068 static uint64_t
hk_get_sysmem_heap_available(struct hk_physical_device * pdev)1069 hk_get_sysmem_heap_available(struct hk_physical_device *pdev)
1070 {
1071    if (pdev->sysmem) {
1072       uint64_t total_used = 0;
1073       for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1074          const struct hk_memory_heap *heap = &pdev->mem_heaps[i];
1075          uint64_t used = p_atomic_read(&heap->used);
1076          total_used += used;
1077       }
1078       return pdev->sysmem - total_used;
1079    }
1080 
1081    uint64_t sysmem_size_B = 0;
1082    if (!os_get_available_system_memory(&sysmem_size_B)) {
1083       vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
1084       return 0;
1085    }
1086 
1087    return ROUND_DOWN_TO(SYSMEM_HEAP_FRACTION(sysmem_size_B), 1 << 20);
1088 }
1089 
1090 VkResult
hk_create_drm_physical_device(struct vk_instance * _instance,drmDevicePtr drm_device,struct vk_physical_device ** pdev_out)1091 hk_create_drm_physical_device(struct vk_instance *_instance,
1092                               drmDevicePtr drm_device,
1093                               struct vk_physical_device **pdev_out)
1094 {
1095    struct hk_instance *instance = (struct hk_instance *)_instance;
1096    VkResult result;
1097 
1098    /* Blanket refusal to probe due to unstable UAPI. */
1099    return VK_ERROR_INCOMPATIBLE_DRIVER;
1100 
1101    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
1102        drm_device->bustype != DRM_BUS_PLATFORM)
1103       return VK_ERROR_INCOMPATIBLE_DRIVER;
1104 
1105    const char *path = drm_device->nodes[DRM_NODE_RENDER];
1106    int fd = open(path, O_RDWR | O_CLOEXEC);
1107    if (fd < 0) {
1108       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1109                        "failed to open device %s", path);
1110    }
1111 
1112    drmVersionPtr version = drmGetVersion(fd);
1113    if (!version) {
1114       result =
1115          vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1116                    "failed to query kernel driver version for device %s", path);
1117       goto fail_fd;
1118    }
1119 
1120    bool is_asahi = (strcmp(version->name, "asahi") == 0);
1121    is_asahi |= strcmp(version->name, "virtio_gpu") == 0;
1122    drmFreeVersion(version);
1123 
1124    if (!is_asahi) {
1125       /* Fail silently */
1126       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1127       goto fail_fd;
1128    }
1129 
1130    struct stat st;
1131    if (stat(drm_device->nodes[DRM_NODE_RENDER], &st)) {
1132       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1133                          "fstat() failed on %s: %m",
1134                          drm_device->nodes[DRM_NODE_RENDER]);
1135       goto fail_fd;
1136    }
1137    const dev_t render_dev = st.st_rdev;
1138 
1139    struct hk_physical_device *pdev =
1140       vk_zalloc(&instance->vk.alloc, sizeof(*pdev), 8,
1141                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1142 
1143    if (pdev == NULL) {
1144       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1145       goto fail_fd;
1146    }
1147 
1148    /* We're render-only */
1149    pdev->master_fd = -1;
1150    pdev->render_dev = render_dev;
1151    pdev->dev.fd = fd;
1152 
1153    if (!agx_open_device(NULL, &pdev->dev)) {
1154       /* Fail silently, for virtgpu */
1155       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1156       goto fail_pdev_alloc;
1157    }
1158 
1159    struct vk_physical_device_dispatch_table dispatch_table;
1160    vk_physical_device_dispatch_table_from_entrypoints(
1161       &dispatch_table, &hk_physical_device_entrypoints, true);
1162    vk_physical_device_dispatch_table_from_entrypoints(
1163       &dispatch_table, &wsi_physical_device_entrypoints, false);
1164 
1165    struct vk_device_extension_table supported_extensions;
1166    hk_get_device_extensions(instance, &supported_extensions);
1167 
1168    struct vk_features supported_features;
1169    hk_get_device_features(&supported_extensions, &supported_features);
1170 
1171    struct vk_properties properties;
1172    hk_get_device_properties(&pdev->dev, instance, &properties);
1173 
1174    properties.drmHasRender = true;
1175    properties.drmRenderMajor = major(render_dev);
1176    properties.drmRenderMinor = minor(render_dev);
1177 
1178    result = vk_physical_device_init(&pdev->vk, &instance->vk,
1179                                     &supported_extensions, &supported_features,
1180                                     &properties, &dispatch_table);
1181    if (result != VK_SUCCESS)
1182       goto fail_agx_device;
1183 
1184    hk_physical_device_init_pipeline_cache(pdev);
1185 
1186    const char *hk_sysmem = getenv("HK_SYSMEM");
1187    if (hk_sysmem) {
1188       uint64_t sysmem = strtoll(hk_sysmem, NULL, 10);
1189       if (sysmem != LLONG_MIN && sysmem != LLONG_MAX) {
1190          pdev->sysmem = sysmem;
1191       }
1192    }
1193 
1194    uint64_t sysmem_size_B = hk_get_sysmem_heap_size(pdev);
1195    if (sysmem_size_B == 0) {
1196       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1197                          "Failed to query total system memory");
1198       goto fail_disk_cache;
1199    }
1200 
1201    uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
1202    pdev->mem_heaps[sysmem_heap_idx] = (struct hk_memory_heap){
1203       .size = sysmem_size_B,
1204       .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1205       .available = hk_get_sysmem_heap_available,
1206    };
1207 
1208    pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType){
1209       .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1210                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1211                        VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
1212                        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1213       .heapIndex = sysmem_heap_idx,
1214    };
1215 
1216    assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
1217    assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
1218 
1219    /* TODO: VK_QUEUE_SPARSE_BINDING_BIT*/
1220    pdev->queue_families[pdev->queue_family_count++] = (struct hk_queue_family){
1221       .queue_flags =
1222          VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1223 
1224       .queue_count = 1,
1225    };
1226    assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
1227 
1228    unsigned st_idx = 0;
1229    pdev->syncobj_sync_type = vk_drm_syncobj_get_type(fd);
1230    pdev->sync_types[st_idx++] = &pdev->syncobj_sync_type;
1231    pdev->sync_types[st_idx++] = NULL;
1232    assert(st_idx <= ARRAY_SIZE(pdev->sync_types));
1233    pdev->vk.supported_sync_types = pdev->sync_types;
1234 
1235    result = hk_init_wsi(pdev);
1236    if (result != VK_SUCCESS)
1237       goto fail_disk_cache;
1238 
1239    simple_mtx_init(&pdev->debug_compile_lock, mtx_plain);
1240    *pdev_out = &pdev->vk;
1241 
1242    return VK_SUCCESS;
1243 
1244 fail_disk_cache:
1245    hk_physical_device_free_disk_cache(pdev);
1246    vk_physical_device_finish(&pdev->vk);
1247 fail_agx_device:
1248    agx_close_device(&pdev->dev);
1249 fail_pdev_alloc:
1250    if (pdev->master_fd)
1251       close(pdev->master_fd);
1252 
1253    vk_free(&pdev->vk.instance->alloc, pdev);
1254 fail_fd:
1255    close(fd);
1256    return result;
1257 }
1258 
1259 void
hk_physical_device_destroy(struct vk_physical_device * vk_pdev)1260 hk_physical_device_destroy(struct vk_physical_device *vk_pdev)
1261 {
1262    struct hk_physical_device *pdev =
1263       container_of(vk_pdev, struct hk_physical_device, vk);
1264 
1265    hk_finish_wsi(pdev);
1266 
1267    if (pdev->master_fd >= 0)
1268       close(pdev->master_fd);
1269 
1270    simple_mtx_destroy(&pdev->debug_compile_lock);
1271    hk_physical_device_free_disk_cache(pdev);
1272    agx_close_device(&pdev->dev);
1273    vk_physical_device_finish(&pdev->vk);
1274    vk_free(&pdev->vk.instance->alloc, pdev);
1275 }
1276 
1277 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1278 hk_GetPhysicalDeviceMemoryProperties2(
1279    VkPhysicalDevice physicalDevice,
1280    VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1281 {
1282    VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1283 
1284    pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
1285    for (int i = 0; i < pdev->mem_heap_count; i++) {
1286       pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap){
1287          .size = pdev->mem_heaps[i].size,
1288          .flags = pdev->mem_heaps[i].flags,
1289       };
1290    }
1291 
1292    pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
1293    for (int i = 0; i < pdev->mem_type_count; i++) {
1294       pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
1295    }
1296 
1297    vk_foreach_struct(ext, pMemoryProperties->pNext) {
1298       switch (ext->sType) {
1299       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1300          VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
1301 
1302          for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1303             const struct hk_memory_heap *heap = &pdev->mem_heaps[i];
1304             uint64_t used = p_atomic_read(&heap->used);
1305 
1306             /* From the Vulkan 1.3.278 spec:
1307              *
1308              *    "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1309              *    values in which memory usages are returned, with one element
1310              *    for each memory heap. A heap’s usage is an estimate of how
1311              *    much memory the process is currently using in that heap."
1312              *
1313              * TODO: Include internal allocations?
1314              */
1315             p->heapUsage[i] = used;
1316 
1317             uint64_t available = heap->size;
1318             if (heap->available)
1319                available = heap->available(pdev);
1320 
1321             /* From the Vulkan 1.3.278 spec:
1322              *
1323              *    "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1324              *    values in which memory budgets are returned, with one
1325              *    element for each memory heap. A heap’s budget is a rough
1326              *    estimate of how much memory the process can allocate from
1327              *    that heap before allocations may fail or cause performance
1328              *    degradation. The budget includes any currently allocated
1329              *    device memory."
1330              *
1331              * and
1332              *
1333              *    "The heapBudget value must be less than or equal to
1334              *    VkMemoryHeap::size for each heap."
1335              *
1336              * available (queried above) is the total amount free memory
1337              * system-wide and does not include our allocations so we need
1338              * to add that in.
1339              */
1340             uint64_t budget = MIN2(available + used, heap->size);
1341 
1342             /* Set the budget at 90% of available to avoid thrashing */
1343             p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
1344          }
1345 
1346          /* From the Vulkan 1.3.278 spec:
1347           *
1348           *    "The heapBudget and heapUsage values must be zero for array
1349           *    elements greater than or equal to
1350           *    VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
1351           *    heapBudget value must be non-zero for array elements less than
1352           *    VkPhysicalDeviceMemoryProperties::memoryHeapCount."
1353           */
1354          for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
1355             p->heapBudget[i] = 0u;
1356             p->heapUsage[i] = 0u;
1357          }
1358          break;
1359       }
1360       default:
1361          vk_debug_ignored_stype(ext->sType);
1362          break;
1363       }
1364    }
1365 }
1366 
1367 static const VkQueueGlobalPriorityKHR hk_global_queue_priorities[] = {
1368    VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
1369    VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
1370    VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
1371    VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
1372 };
1373 
1374 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1375 hk_GetPhysicalDeviceQueueFamilyProperties2(
1376    VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount,
1377    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1378 {
1379    VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1380    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1381                           pQueueFamilyPropertyCount);
1382 
1383    for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
1384       const struct hk_queue_family *queue_family = &pdev->queue_families[i];
1385 
1386       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1387       {
1388          p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
1389          p->queueFamilyProperties.queueCount = queue_family->queue_count;
1390          p->queueFamilyProperties.timestampValidBits =
1391             agx_supports_timestamps(&pdev->dev) ? 64 : 0;
1392          p->queueFamilyProperties.minImageTransferGranularity =
1393             (VkExtent3D){1, 1, 1};
1394 
1395          VkQueueFamilyGlobalPriorityPropertiesKHR *prio = vk_find_struct(
1396             p->pNext, QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR);
1397          if (prio) {
1398             STATIC_ASSERT(ARRAY_SIZE(hk_global_queue_priorities) <=
1399                           VK_MAX_GLOBAL_PRIORITY_SIZE_KHR);
1400             prio->priorityCount = ARRAY_SIZE(hk_global_queue_priorities);
1401             memcpy(&prio->priorities, hk_global_queue_priorities,
1402                    sizeof(hk_global_queue_priorities));
1403          }
1404       }
1405    }
1406 }
1407 
1408 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)1409 hk_GetPhysicalDeviceMultisamplePropertiesEXT(
1410    VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples,
1411    VkMultisamplePropertiesEXT *pMultisampleProperties)
1412 {
1413    VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1414 
1415    if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
1416       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
1417    } else {
1418       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
1419    }
1420 }
1421