1 /*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Derived from tu_device.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * SPDX-License-Identifier: MIT
10 */
11
12 #include <sys/stat.h>
13 #include <sys/sysinfo.h>
14 #include <sys/sysmacros.h>
15
16 #include "util/disk_cache.h"
17 #include "git_sha1.h"
18
19 #include "vk_device.h"
20 #include "vk_drm_syncobj.h"
21 #include "vk_format.h"
22 #include "vk_limits.h"
23 #include "vk_log.h"
24 #include "vk_shader_module.h"
25 #include "vk_util.h"
26
27 #include "panvk_device.h"
28 #include "panvk_entrypoints.h"
29 #include "panvk_instance.h"
30 #include "panvk_physical_device.h"
31 #include "panvk_wsi.h"
32
33 #include "pan_format.h"
34 #include "pan_props.h"
35
36 #include "genxml/gen_macros.h"
37
38 #define ARM_VENDOR_ID 0x13b5
39 #define MAX_PUSH_DESCRIPTORS 32
40 /* We reserve one ubo for push constant, one for sysvals and one per-set for the
41 * descriptor metadata */
42 #define RESERVED_UBO_COUNT 6
43 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 - RESERVED_UBO_COUNT
44 #define MAX_INLINE_UNIFORM_BLOCK_SIZE (1 << 16)
45
46 static VkResult
create_kmod_dev(struct panvk_physical_device * device,const struct panvk_instance * instance,drmDevicePtr drm_device)47 create_kmod_dev(struct panvk_physical_device *device,
48 const struct panvk_instance *instance, drmDevicePtr drm_device)
49 {
50 const char *path = drm_device->nodes[DRM_NODE_RENDER];
51 drmVersionPtr version;
52 int fd;
53
54 fd = open(path, O_RDWR | O_CLOEXEC);
55 if (fd < 0) {
56 return panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
57 "failed to open device %s", path);
58 }
59
60 version = drmGetVersion(fd);
61 if (!version) {
62 close(fd);
63 return panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
64 "failed to query kernel driver version for device %s",
65 path);
66 }
67
68 if (strcmp(version->name, "panfrost") && strcmp(version->name, "panthor")) {
69 drmFreeVersion(version);
70 close(fd);
71 return VK_ERROR_INCOMPATIBLE_DRIVER;
72 }
73
74 drmFreeVersion(version);
75
76 if (instance->debug_flags & PANVK_DEBUG_STARTUP)
77 vk_logi(VK_LOG_NO_OBJS(instance), "Found compatible device '%s'.", path);
78
79 device->kmod.dev = pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD,
80 &instance->kmod.allocator);
81
82 if (!device->kmod.dev) {
83 close(fd);
84 return panvk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
85 "cannot create device");
86 }
87
88 return VK_SUCCESS;
89 }
90
91 static VkResult
get_drm_device_ids(struct panvk_physical_device * device,const struct panvk_instance * instance,drmDevicePtr drm_device)92 get_drm_device_ids(struct panvk_physical_device *device,
93 const struct panvk_instance *instance,
94 drmDevicePtr drm_device)
95 {
96 struct stat st;
97
98 if (stat(drm_device->nodes[DRM_NODE_RENDER], &st)) {
99 return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
100 "failed to query render node stat");
101 }
102
103 device->drm.render_rdev = st.st_rdev;
104
105 if (drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) {
106 if (stat(drm_device->nodes[DRM_NODE_PRIMARY], &st)) {
107 return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
108 "failed to query primary node stat");
109 }
110
111 device->drm.primary_rdev = st.st_rdev;
112 }
113
114 return VK_SUCCESS;
115 }
116
117 static int
get_cache_uuid(uint16_t family,void * uuid)118 get_cache_uuid(uint16_t family, void *uuid)
119 {
120 uint32_t mesa_timestamp;
121 uint16_t f = family;
122
123 if (!disk_cache_get_function_timestamp(get_cache_uuid, &mesa_timestamp))
124 return -1;
125
126 memset(uuid, 0, VK_UUID_SIZE);
127 memcpy(uuid, &mesa_timestamp, 4);
128 memcpy((char *)uuid + 4, &f, 2);
129 snprintf((char *)uuid + 6, VK_UUID_SIZE - 10, "pan");
130 return 0;
131 }
132
133 static VkResult
get_device_sync_types(struct panvk_physical_device * device,const struct panvk_instance * instance)134 get_device_sync_types(struct panvk_physical_device *device,
135 const struct panvk_instance *instance)
136 {
137 const unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
138 uint32_t sync_type_count = 0;
139
140 device->drm_syncobj_type = vk_drm_syncobj_get_type(device->kmod.dev->fd);
141 if (!device->drm_syncobj_type.features) {
142 return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
143 "failed to query syncobj features");
144 }
145
146 device->sync_types[sync_type_count++] = &device->drm_syncobj_type;
147
148 if (arch >= 10) {
149 assert(device->drm_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE);
150 } else {
151 /* We don't support timelines in the uAPI yet and we don't want it getting
152 * suddenly turned on by vk_drm_syncobj_get_type() without us adding panvk
153 * code for it first.
154 */
155 device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
156
157 /* vk_sync_timeline requires VK_SYNC_FEATURE_GPU_MULTI_WAIT. Panfrost
158 * waits on the underlying dma-fences and supports the feature.
159 */
160 device->drm_syncobj_type.features |= VK_SYNC_FEATURE_GPU_MULTI_WAIT;
161
162 device->sync_timeline_type =
163 vk_sync_timeline_get_type(&device->drm_syncobj_type);
164 device->sync_types[sync_type_count++] = &device->sync_timeline_type.sync;
165 }
166
167 assert(sync_type_count < ARRAY_SIZE(device->sync_types));
168 device->sync_types[sync_type_count] = NULL;
169
170 return VK_SUCCESS;
171 }
172
173 static void
get_device_extensions(const struct panvk_physical_device * device,struct vk_device_extension_table * ext)174 get_device_extensions(const struct panvk_physical_device *device,
175 struct vk_device_extension_table *ext)
176 {
177 const unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
178
179 *ext = (struct vk_device_extension_table){
180 .KHR_8bit_storage = true,
181 .KHR_16bit_storage = true,
182 .KHR_bind_memory2 = true,
183 .KHR_buffer_device_address = true,
184 .KHR_copy_commands2 = true,
185 .KHR_create_renderpass2 = true,
186 .KHR_dedicated_allocation = true,
187 .KHR_descriptor_update_template = true,
188 .KHR_device_group = true,
189 .KHR_driver_properties = true,
190 .KHR_dynamic_rendering = true,
191 .KHR_external_fence = true,
192 .KHR_external_fence_fd = true,
193 .KHR_external_memory = true,
194 .KHR_external_memory_fd = true,
195 .KHR_external_semaphore = true,
196 .KHR_external_semaphore_fd = true,
197 .KHR_get_memory_requirements2 = true,
198 .KHR_global_priority = true,
199 .KHR_image_format_list = true,
200 .KHR_index_type_uint8 = true,
201 .KHR_maintenance1 = true,
202 .KHR_maintenance2 = true,
203 .KHR_maintenance3 = true,
204 .KHR_map_memory2 = true,
205 .KHR_multiview = arch >= 10,
206 .KHR_pipeline_executable_properties = true,
207 .KHR_pipeline_library = true,
208 .KHR_push_descriptor = true,
209 .KHR_relaxed_block_layout = true,
210 .KHR_sampler_mirror_clamp_to_edge = true,
211 .KHR_shader_draw_parameters = true,
212 .KHR_shader_expect_assume = true,
213 .KHR_shader_float16_int8 = true,
214 .KHR_shader_non_semantic_info = true,
215 .KHR_shader_relaxed_extended_instruction = true,
216 .KHR_shader_subgroup_rotate = true,
217 .KHR_storage_buffer_storage_class = true,
218 #ifdef PANVK_USE_WSI_PLATFORM
219 .KHR_swapchain = true,
220 #endif
221 .KHR_synchronization2 = true,
222 .KHR_timeline_semaphore = true,
223 .KHR_variable_pointers = true,
224 .KHR_vertex_attribute_divisor = true,
225 .KHR_zero_initialize_workgroup_memory = true,
226 .EXT_4444_formats = true,
227 .EXT_buffer_device_address = true,
228 .EXT_custom_border_color = true,
229 .EXT_depth_clip_enable = true,
230 .EXT_external_memory_dma_buf = true,
231 .EXT_global_priority = true,
232 .EXT_global_priority_query = true,
233 .EXT_graphics_pipeline_library = true,
234 .EXT_host_query_reset = true,
235 .EXT_image_drm_format_modifier = true,
236 .EXT_image_robustness = true,
237 .EXT_index_type_uint8 = true,
238 .EXT_physical_device_drm = true,
239 .EXT_pipeline_creation_cache_control = true,
240 .EXT_pipeline_creation_feedback = true,
241 .EXT_pipeline_robustness = true,
242 .EXT_private_data = true,
243 .EXT_provoking_vertex = true,
244 .EXT_queue_family_foreign = true,
245 .EXT_sampler_filter_minmax = arch >= 10,
246 .EXT_scalar_block_layout = true,
247 .EXT_shader_module_identifier = true,
248 .EXT_subgroup_size_control = arch >= 10, /* requires vk1.1 */
249 .EXT_tooling_info = true,
250 .GOOGLE_decorate_string = true,
251 .GOOGLE_hlsl_functionality1 = true,
252 .GOOGLE_user_type = true,
253 };
254 }
255
256 static void
get_features(const struct panvk_physical_device * device,struct vk_features * features)257 get_features(const struct panvk_physical_device *device,
258 struct vk_features *features)
259 {
260 unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
261
262 *features = (struct vk_features){
263 /* Vulkan 1.0 */
264 .depthClamp = true,
265 .depthBiasClamp = true,
266 .robustBufferAccess = true,
267 .fullDrawIndexUint32 = true,
268 .imageCubeArray = true,
269 .independentBlend = true,
270 .sampleRateShading = true,
271 .logicOp = true,
272 .wideLines = true,
273 .largePoints = true,
274 .occlusionQueryPrecise = true,
275 .samplerAnisotropy = true,
276 .textureCompressionETC2 = true,
277 .textureCompressionASTC_LDR = true,
278 .fragmentStoresAndAtomics = arch >= 10,
279 .shaderUniformBufferArrayDynamicIndexing = true,
280 .shaderSampledImageArrayDynamicIndexing = true,
281 .shaderStorageBufferArrayDynamicIndexing = true,
282 .shaderStorageImageArrayDynamicIndexing = true,
283 .shaderInt16 = true,
284 .shaderInt64 = true,
285 .drawIndirectFirstInstance = true,
286
287 /* Vulkan 1.1 */
288 .storageBuffer16BitAccess = true,
289 .uniformAndStorageBuffer16BitAccess = true,
290 .storagePushConstant16 = false,
291 .storageInputOutput16 = false,
292 .multiview = arch >= 10,
293 .multiviewGeometryShader = false,
294 .multiviewTessellationShader = false,
295 .variablePointersStorageBuffer = true,
296 .variablePointers = true,
297 .protectedMemory = false,
298 .samplerYcbcrConversion = false,
299 .shaderDrawParameters = true,
300
301 /* Vulkan 1.2 */
302 .samplerMirrorClampToEdge = true,
303 .drawIndirectCount = false,
304 .storageBuffer8BitAccess = true,
305 .uniformAndStorageBuffer8BitAccess = false,
306 .storagePushConstant8 = false,
307 .shaderBufferInt64Atomics = false,
308 .shaderSharedInt64Atomics = false,
309 .shaderFloat16 = false,
310 .shaderInt8 = true,
311
312 .descriptorIndexing = false,
313 .shaderInputAttachmentArrayDynamicIndexing = false,
314 .shaderUniformTexelBufferArrayDynamicIndexing = false,
315 .shaderStorageTexelBufferArrayDynamicIndexing = false,
316 .shaderUniformBufferArrayNonUniformIndexing = false,
317 .shaderSampledImageArrayNonUniformIndexing = false,
318 .shaderStorageBufferArrayNonUniformIndexing = false,
319 .shaderStorageImageArrayNonUniformIndexing = false,
320 .shaderInputAttachmentArrayNonUniformIndexing = false,
321 .shaderUniformTexelBufferArrayNonUniformIndexing = false,
322 .shaderStorageTexelBufferArrayNonUniformIndexing = false,
323 .descriptorBindingUniformBufferUpdateAfterBind = false,
324 .descriptorBindingSampledImageUpdateAfterBind = false,
325 .descriptorBindingStorageImageUpdateAfterBind = false,
326 .descriptorBindingStorageBufferUpdateAfterBind = false,
327 .descriptorBindingUniformTexelBufferUpdateAfterBind = false,
328 .descriptorBindingStorageTexelBufferUpdateAfterBind = false,
329 .descriptorBindingUpdateUnusedWhilePending = false,
330 .descriptorBindingPartiallyBound = false,
331 .descriptorBindingVariableDescriptorCount = false,
332 .runtimeDescriptorArray = false,
333
334 .samplerFilterMinmax = arch >= 10,
335 .scalarBlockLayout = true,
336 .imagelessFramebuffer = false,
337 .uniformBufferStandardLayout = false,
338 .shaderSubgroupExtendedTypes = false,
339 .separateDepthStencilLayouts = false,
340 .hostQueryReset = true,
341 .timelineSemaphore = true,
342 .bufferDeviceAddress = true,
343 .bufferDeviceAddressCaptureReplay = false,
344 .bufferDeviceAddressMultiDevice = false,
345 .vulkanMemoryModel = false,
346 .vulkanMemoryModelDeviceScope = false,
347 .vulkanMemoryModelAvailabilityVisibilityChains = false,
348 .shaderOutputViewportIndex = false,
349 .shaderOutputLayer = false,
350 .subgroupBroadcastDynamicId = true,
351
352 /* Vulkan 1.3 */
353 .robustImageAccess = true,
354 .inlineUniformBlock = false,
355 .descriptorBindingInlineUniformBlockUpdateAfterBind = false,
356 .pipelineCreationCacheControl = true,
357 .privateData = true,
358 .shaderDemoteToHelperInvocation = false,
359 .shaderTerminateInvocation = false,
360 .subgroupSizeControl = true,
361 .computeFullSubgroups = true,
362 .synchronization2 = true,
363 .textureCompressionASTC_HDR = false,
364 .shaderZeroInitializeWorkgroupMemory = true,
365 .dynamicRendering = true,
366 .shaderIntegerDotProduct = false,
367 .maintenance4 = false,
368
369 /* Vulkan 1.4 */
370 .shaderSubgroupRotate = true,
371 .shaderSubgroupRotateClustered = true,
372
373 /* VK_EXT_graphics_pipeline_library */
374 .graphicsPipelineLibrary = true,
375
376 /* VK_KHR_global_priority */
377 .globalPriorityQuery = true,
378
379 /* VK_KHR_index_type_uint8 */
380 .indexTypeUint8 = true,
381
382 /* VK_KHR_vertex_attribute_divisor */
383 .vertexAttributeInstanceRateDivisor = true,
384 .vertexAttributeInstanceRateZeroDivisor = true,
385
386 /* VK_EXT_depth_clip_enable */
387 .depthClipEnable = true,
388
389 /* VK_EXT_4444_formats */
390 .formatA4R4G4B4 = true,
391 .formatA4B4G4R4 = true,
392
393 /* VK_EXT_custom_border_color */
394 .customBorderColors = true,
395
396 /* VK_EXT_provoking_vertex */
397 .provokingVertexLast = true,
398 .transformFeedbackPreservesProvokingVertex = false,
399
400 /* v7 doesn't support AFBC(BGR). We need to tweak the texture swizzle to
401 * make it work, which forces us to apply the same swizzle on the border
402 * color, meaning we need to know the format when preparing the border
403 * color.
404 */
405 .customBorderColorWithoutFormat = arch != 7,
406
407 /* VK_KHR_pipeline_executable_properties */
408 .pipelineExecutableInfo = true,
409
410 /* VK_EXT_pipeline_robustness */
411 .pipelineRobustness = true,
412
413 /* VK_KHR_shader_relaxed_extended_instruction */
414 .shaderRelaxedExtendedInstruction = true,
415
416 /* VK_KHR_shader_expect_assume */
417 .shaderExpectAssume = true,
418
419 /* VK_EXT_shader_module_identifier */
420 .shaderModuleIdentifier = true,
421 };
422 }
423
424 static uint32_t
get_vk_version(unsigned arch)425 get_vk_version(unsigned arch)
426 {
427 const uint32_t version_override = vk_get_version_override();
428 if (version_override)
429 return version_override;
430
431 if (arch >= 10)
432 return VK_MAKE_API_VERSION(0, 1, 1, VK_HEADER_VERSION);
433
434 return VK_MAKE_API_VERSION(0, 1, 0, VK_HEADER_VERSION);
435 }
436
437 static VkConformanceVersion
get_conformance_version(unsigned arch)438 get_conformance_version(unsigned arch)
439 {
440 if (arch == 10)
441 return (VkConformanceVersion){1, 4, 1, 2};
442
443 return (VkConformanceVersion){0, 0, 0, 0};
444 }
445
446 static void
get_device_properties(const struct panvk_instance * instance,const struct panvk_physical_device * device,struct vk_properties * properties)447 get_device_properties(const struct panvk_instance *instance,
448 const struct panvk_physical_device *device,
449 struct vk_properties *properties)
450 {
451 /* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */
452 VkSampleCountFlags sample_counts =
453 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
454
455 uint64_t os_page_size = 4096;
456 os_get_page_size(&os_page_size);
457
458 unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
459
460 /* Ensure that the max threads count per workgroup is valid for Bifrost */
461 assert(arch > 8 || device->kmod.props.max_threads_per_wg <= 1024);
462
463 *properties = (struct vk_properties){
464 .apiVersion = get_vk_version(arch),
465 .driverVersion = vk_get_driver_version(),
466 .vendorID = ARM_VENDOR_ID,
467
468 /* Collect arch_major, arch_minor, arch_rev and product_major,
469 * as done by the Arm driver.
470 */
471 .deviceID = device->kmod.props.gpu_prod_id << 16,
472 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
473
474 /* Vulkan 1.0 limits */
475 /* Maximum texture dimension is 2^16. */
476 .maxImageDimension1D = (1 << 16),
477 .maxImageDimension2D = (1 << 16),
478 .maxImageDimension3D = (1 << 16),
479 .maxImageDimensionCube = (1 << 16),
480 .maxImageArrayLayers = (1 << 16),
481 /* Currently limited by the 1D texture size, which is 2^16.
482 * TODO: If we expose buffer views as 2D textures, we can increase the
483 * limit.
484 */
485 .maxTexelBufferElements = (1 << 16),
486 /* Each uniform entry is 16-byte and the number of entries is encoded in a
487 * 12-bit field, with the minus(1) modifier, which gives 2^20.
488 */
489 .maxUniformBufferRange = 1 << 20,
490 /* Storage buffer access is lowered to globals, so there's no limit here,
491 * except for the SW-descriptor we use to encode storage buffer
492 * descriptors, where the size is a 32-bit field.
493 */
494 .maxStorageBufferRange = UINT32_MAX,
495 /* 128 bytes of push constants, so we're aligned with the minimum Vulkan
496 * requirements.
497 */
498 .maxPushConstantsSize = 128,
499 /* On our kernel drivers we're limited by the available memory rather
500 * than available allocations. This is better expressed through memory
501 * properties and budget queries, and by returning
502 * VK_ERROR_OUT_OF_DEVICE_MEMORY when applicable, rather than
503 * this limit.
504 */
505 .maxMemoryAllocationCount = UINT32_MAX,
506 /* On Mali, VkSampler objects do not use any resources other than host
507 * memory and host address space, availability of which can change
508 * significantly over time.
509 */
510 .maxSamplerAllocationCount = UINT32_MAX,
511 /* A cache line. */
512 .bufferImageGranularity = 64,
513 /* Sparse binding not supported yet. */
514 .sparseAddressSpaceSize = 0,
515 /* On Bifrost, this is a software limit. We pick the minimum required by
516 * Vulkan, because Bifrost GPUs don't have unified descriptor tables,
517 * which forces us to agregatte all descriptors from all sets and dispatch
518 * them to per-type descriptor tables emitted at draw/dispatch time. The
519 * more sets we support the more copies we are likely to have to do at
520 * draw time.
521 *
522 * Valhall has native support for descriptor sets, and allows a maximum
523 * of 16 sets, but we reserve one for our internal use, so we have 15
524 * left.
525 */
526 .maxBoundDescriptorSets = arch <= 7 ? 4 : 15,
527 /* MALI_RENDERER_STATE::sampler_count is 16-bit. */
528 .maxDescriptorSetSamplers = UINT16_MAX,
529 /* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots
530 * for our internal UBOs.
531 */
532 .maxPerStageDescriptorUniformBuffers = UINT8_MAX - 32,
533 .maxDescriptorSetUniformBuffers = UINT8_MAX - 32,
534 /* SSBOs are limited by the size of a uniform buffer which contains our
535 * panvk_ssbo_desc objects.
536 * panvk_ssbo_desc is 16-byte, and each uniform entry in the Mali UBO is
537 * 16-byte too. The number of entries is encoded in a 12-bit field, with
538 * a minus(1) modifier, which gives a maximum of 2^12 SSBO
539 * descriptors.
540 */
541 .maxDescriptorSetStorageBuffers = 1 << 12,
542 /* MALI_RENDERER_STATE::sampler_count is 16-bit. */
543 .maxDescriptorSetSampledImages = UINT16_MAX,
544 /* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two
545 * MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images.
546 */
547 .maxDescriptorSetStorageImages = 1 << 8,
548 /* A maximum of 8 color render targets, and one depth-stencil render
549 * target.
550 */
551 .maxDescriptorSetInputAttachments = 9,
552
553 /* We could theoretically use the maxDescriptor values here (except for
554 * UBOs where we're really limited to 256 on the shader side), but on
555 * Bifrost we have to copy some tables around, which comes at an extra
556 * memory/processing cost, so let's pick something smaller.
557 */
558 .maxPerStageDescriptorInputAttachments = 9,
559 .maxPerStageDescriptorSampledImages = 256,
560 .maxPerStageDescriptorSamplers = 128,
561 .maxPerStageDescriptorStorageBuffers = 64,
562 .maxPerStageDescriptorStorageImages = 32,
563 .maxPerStageDescriptorUniformBuffers = 64,
564 .maxPerStageResources = 9 + 256 + 128 + 64 + 32 + 64,
565
566 /* Software limits to keep VkCommandBuffer tracking sane. */
567 .maxDescriptorSetUniformBuffersDynamic = 16,
568 .maxDescriptorSetStorageBuffersDynamic = 8,
569 /* Software limit to keep VkCommandBuffer tracking sane. The HW supports
570 * up to 2^9 vertex attributes.
571 */
572 .maxVertexInputAttributes = 16,
573 .maxVertexInputBindings = 16,
574 /* MALI_ATTRIBUTE::offset is 32-bit. */
575 .maxVertexInputAttributeOffset = UINT32_MAX,
576 /* MALI_ATTRIBUTE_BUFFER::stride is 32-bit. */
577 .maxVertexInputBindingStride = MESA_VK_MAX_VERTEX_BINDING_STRIDE,
578 /* 32 vec4 varyings. */
579 .maxVertexOutputComponents = 128,
580 /* Tesselation shaders not supported. */
581 .maxTessellationGenerationLevel = 0,
582 .maxTessellationPatchSize = 0,
583 .maxTessellationControlPerVertexInputComponents = 0,
584 .maxTessellationControlPerVertexOutputComponents = 0,
585 .maxTessellationControlPerPatchOutputComponents = 0,
586 .maxTessellationControlTotalOutputComponents = 0,
587 .maxTessellationEvaluationInputComponents = 0,
588 .maxTessellationEvaluationOutputComponents = 0,
589 /* Geometry shaders not supported. */
590 .maxGeometryShaderInvocations = 0,
591 .maxGeometryInputComponents = 0,
592 .maxGeometryOutputComponents = 0,
593 .maxGeometryOutputVertices = 0,
594 .maxGeometryTotalOutputComponents = 0,
595 /* 32 vec4 varyings. */
596 .maxFragmentInputComponents = 128,
597 /* 8 render targets. */
598 .maxFragmentOutputAttachments = 8,
599 /* We don't support dual source blending yet. */
600 .maxFragmentDualSrcAttachments = 0,
601 /* 8 render targets, 2^12 storage buffers and 2^8 storage images (see
602 * above).
603 */
604 .maxFragmentCombinedOutputResources = 8 + (1 << 12) + (1 << 8),
605 /* MALI_LOCAL_STORAGE::wls_size_{base,scale} allows us to have up to
606 * (7 << 30) bytes of shared memory, but we cap it to 32K as it doesn't
607 * really make sense to expose this amount of memory, especially since
608 * it's backed by global memory anyway.
609 */
610 .maxComputeSharedMemorySize = 32768,
611 /* Software limit to meet Vulkan 1.0 requirements. We split the
612 * dispatch in several jobs if it's too big.
613 */
614 .maxComputeWorkGroupCount = {65535, 65535, 65535},
615
616 /* We could also split into serveral jobs but this has many limitations.
617 * As such we limit to the max threads per workgroup supported by the GPU.
618 */
619 .maxComputeWorkGroupInvocations = device->kmod.props.max_threads_per_wg,
620 .maxComputeWorkGroupSize = {device->kmod.props.max_threads_per_wg,
621 device->kmod.props.max_threads_per_wg,
622 device->kmod.props.max_threads_per_wg},
623 /* 8-bit subpixel precision. */
624 .subPixelPrecisionBits = 8,
625 .subTexelPrecisionBits = 8,
626 .mipmapPrecisionBits = 8,
627 /* Software limit. */
628 .maxDrawIndexedIndexValue = UINT32_MAX,
629 /* Make it one for now. */
630 .maxDrawIndirectCount = 1,
631 .maxSamplerLodBias = (float)INT16_MAX / 256.0f,
632 .maxSamplerAnisotropy = 16,
633 .maxViewports = 1,
634 /* Same as the framebuffer limit. */
635 .maxViewportDimensions = {(1 << 14), (1 << 14)},
636 /* Encoded in a 16-bit signed integer. */
637 .viewportBoundsRange = {INT16_MIN, INT16_MAX},
638 .viewportSubPixelBits = 0,
639 /* Align on a page. */
640 .minMemoryMapAlignment = os_page_size,
641 /* Some compressed texture formats require 128-byte alignment. */
642 .minTexelBufferOffsetAlignment = 64,
643 /* Always aligned on a uniform slot (vec4). */
644 .minUniformBufferOffsetAlignment = 16,
645 /* Lowered to global accesses, which happen at the 32-bit granularity. */
646 .minStorageBufferOffsetAlignment = 4,
647 /* Signed 4-bit value. */
648 .minTexelOffset = -8,
649 .maxTexelOffset = 7,
650 .minTexelGatherOffset = -8,
651 .maxTexelGatherOffset = 7,
652 .minInterpolationOffset = -0.5,
653 .maxInterpolationOffset = 0.5,
654 .subPixelInterpolationOffsetBits = 8,
655 .maxFramebufferWidth = (1 << 14),
656 .maxFramebufferHeight = (1 << 14),
657 .maxFramebufferLayers = 256,
658 .framebufferColorSampleCounts = sample_counts,
659 .framebufferDepthSampleCounts = sample_counts,
660 .framebufferStencilSampleCounts = sample_counts,
661 .framebufferNoAttachmentsSampleCounts = sample_counts,
662 .maxColorAttachments = 8,
663 .sampledImageColorSampleCounts = sample_counts,
664 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
665 .sampledImageDepthSampleCounts = sample_counts,
666 .sampledImageStencilSampleCounts = sample_counts,
667 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
668 .maxSampleMaskWords = 1,
669 .timestampComputeAndGraphics = false,
670 .timestampPeriod = 0,
671 .maxClipDistances = 0,
672 .maxCullDistances = 0,
673 .maxCombinedClipAndCullDistances = 0,
674 .discreteQueuePriorities = 2,
675 .pointSizeRange = {0.125, 4095.9375},
676 .lineWidthRange = {0.0, 7.9921875},
677 .pointSizeGranularity = (1.0 / 16.0),
678 .lineWidthGranularity = (1.0 / 128.0),
679 .strictLines = false,
680 .standardSampleLocations = true,
681 .optimalBufferCopyOffsetAlignment = 64,
682 .optimalBufferCopyRowPitchAlignment = 64,
683 .nonCoherentAtomSize = 64,
684
685 /* Vulkan 1.0 sparse properties */
686 .sparseResidencyNonResidentStrict = false,
687 .sparseResidencyAlignedMipSize = false,
688 .sparseResidencyStandard2DBlockShape = false,
689 .sparseResidencyStandard2DMultisampleBlockShape = false,
690 .sparseResidencyStandard3DBlockShape = false,
691
692 /* Vulkan 1.1 properties */
693 /* XXX: 1.1 support */
694 .subgroupSize = pan_subgroup_size(arch),
695 /* We only support VS, FS, and CS.
696 *
697 * The HW may spawn VS invocations for non-existing indices, which could
698 * be observed through subgroup ops (though the user can observe them
699 * through infinte loops anyway), so subgroup ops can't be supported in
700 * VS.
701 *
702 * In FS, voting and potentially other subgroup ops are currently broken,
703 * so we don't report support for this stage either.
704 */
705 .subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
706 .subgroupSupportedOperations =
707 VK_SUBGROUP_FEATURE_BASIC_BIT |
708 VK_SUBGROUP_FEATURE_VOTE_BIT |
709 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
710 VK_SUBGROUP_FEATURE_BALLOT_BIT |
711 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
712 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
713 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
714 VK_SUBGROUP_FEATURE_QUAD_BIT |
715 VK_SUBGROUP_FEATURE_ROTATE_BIT |
716 VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT,
717 .subgroupQuadOperationsInAllStages = false,
718 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
719 .maxMultiviewViewCount = arch >= 10 ? 8 : 0,
720 .maxMultiviewInstanceIndex = arch >= 10 ? UINT32_MAX : 0,
721 .protectedNoFault = false,
722 .maxPerSetDescriptors = UINT16_MAX,
723 /* Our buffer size fields allow only this much */
724 .maxMemoryAllocationSize = UINT32_MAX,
725
726 /* Vulkan 1.2 properties */
727 /* XXX: 1.2 support */
728 /* XXX: VK_KHR_depth_stencil_resolve */
729 .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
730 .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
731 .independentResolveNone = true,
732 .independentResolve = true,
733 /* VK_KHR_driver_properties */
734 .driverID = VK_DRIVER_ID_MESA_PANVK,
735 .conformanceVersion = get_conformance_version(arch),
736 /* XXX: VK_KHR_shader_float_controls */
737 .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
738 .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
739 .shaderSignedZeroInfNanPreserveFloat16 = true,
740 .shaderSignedZeroInfNanPreserveFloat32 = true,
741 .shaderSignedZeroInfNanPreserveFloat64 = false,
742 .shaderDenormPreserveFloat16 = true,
743 .shaderDenormPreserveFloat32 = true,
744 .shaderDenormPreserveFloat64 = false,
745 .shaderDenormFlushToZeroFloat16 = true,
746 .shaderDenormFlushToZeroFloat32 = true,
747 .shaderDenormFlushToZeroFloat64 = false,
748 .shaderRoundingModeRTEFloat16 = true,
749 .shaderRoundingModeRTEFloat32 = true,
750 .shaderRoundingModeRTEFloat64 = false,
751 .shaderRoundingModeRTZFloat16 = true,
752 .shaderRoundingModeRTZFloat32 = true,
753 .shaderRoundingModeRTZFloat64 = false,
754 /* XXX: VK_EXT_descriptor_indexing */
755 .maxUpdateAfterBindDescriptorsInAllPools = 0,
756 .shaderUniformBufferArrayNonUniformIndexingNative = false,
757 .shaderSampledImageArrayNonUniformIndexingNative = false,
758 .shaderStorageBufferArrayNonUniformIndexingNative = false,
759 .shaderStorageImageArrayNonUniformIndexingNative = false,
760 .shaderInputAttachmentArrayNonUniformIndexingNative = false,
761 .robustBufferAccessUpdateAfterBind = false,
762 .quadDivergentImplicitLod = false,
763 .maxPerStageDescriptorUpdateAfterBindSamplers = 0,
764 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0,
765 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = 0,
766 .maxPerStageDescriptorUpdateAfterBindSampledImages = 0,
767 .maxPerStageDescriptorUpdateAfterBindStorageImages = 0,
768 .maxPerStageDescriptorUpdateAfterBindInputAttachments = 0,
769 .maxPerStageUpdateAfterBindResources = 0,
770 .maxDescriptorSetUpdateAfterBindSamplers = 0,
771 .maxDescriptorSetUpdateAfterBindUniformBuffers = 0,
772 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0,
773 .maxDescriptorSetUpdateAfterBindStorageBuffers = 0,
774 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = 0,
775 .maxDescriptorSetUpdateAfterBindSampledImages = 0,
776 .maxDescriptorSetUpdateAfterBindStorageImages = 0,
777 .maxDescriptorSetUpdateAfterBindInputAttachments = 0,
778 .filterMinmaxSingleComponentFormats = arch >= 10,
779 .filterMinmaxImageComponentMapping = arch >= 10,
780 .maxTimelineSemaphoreValueDifference = INT64_MAX,
781 .framebufferIntegerColorSampleCounts = sample_counts,
782
783 /* Vulkan 1.3 properties */
784 /* XXX: 1.3 support */
785 /* XXX: VK_EXT_subgroup_size_control */
786 .minSubgroupSize = pan_subgroup_size(arch),
787 .maxSubgroupSize = pan_subgroup_size(arch),
788 .maxComputeWorkgroupSubgroups =
789 device->kmod.props.max_threads_per_wg / pan_subgroup_size(arch),
790 .requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT,
791 /* XXX: VK_EXT_inline_uniform_block */
792 .maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
793 .maxPerStageDescriptorInlineUniformBlocks =
794 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
795 .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
796 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
797 .maxDescriptorSetInlineUniformBlocks =
798 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
799 .maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
800 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
801 .maxInlineUniformTotalSize =
802 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS * MAX_INLINE_UNIFORM_BLOCK_SIZE,
803 /* XXX: VK_KHR_shader_integer_dot_product */
804 .integerDotProduct8BitUnsignedAccelerated = true,
805 .integerDotProduct8BitSignedAccelerated = true,
806 .integerDotProduct4x8BitPackedUnsignedAccelerated = true,
807 .integerDotProduct4x8BitPackedSignedAccelerated = true,
808 /* XXX: VK_EXT_texel_buffer_alignment */
809 .storageTexelBufferOffsetAlignmentBytes = 64,
810 .storageTexelBufferOffsetSingleTexelAlignment = false,
811 .uniformTexelBufferOffsetAlignmentBytes = 4,
812 .uniformTexelBufferOffsetSingleTexelAlignment = true,
813 /* XXX: VK_KHR_maintenance4 */
814 .maxBufferSize = 1 << 30,
815
816 /* VK_EXT_custom_border_color */
817 .maxCustomBorderColorSamplers = 32768,
818
819 /* VK_EXT_graphics_pipeline_library */
820 .graphicsPipelineLibraryFastLinking = true,
821 .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
822
823 /* VK_EXT_pipeline_robustness */
824 .defaultRobustnessStorageBuffers =
825 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT,
826 .defaultRobustnessUniformBuffers =
827 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT,
828 .defaultRobustnessVertexInputs =
829 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT,
830 .defaultRobustnessImages =
831 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT,
832
833 /* VK_EXT_provoking_vertex */
834 .provokingVertexModePerPipeline = false,
835 .transformFeedbackPreservesTriangleFanProvokingVertex = false,
836
837 /* VK_KHR_vertex_attribute_divisor */
838 /* We will have to restrict this a bit for multiview */
839 .maxVertexAttribDivisor = UINT32_MAX,
840 .supportsNonZeroFirstInstance = false,
841
842 /* VK_KHR_push_descriptor */
843 .maxPushDescriptors = MAX_PUSH_DESCRIPTORS,
844 };
845
846 snprintf(properties->deviceName, sizeof(properties->deviceName), "%s",
847 device->name);
848
849 memcpy(properties->pipelineCacheUUID, device->cache_uuid, VK_UUID_SIZE);
850
851 const struct {
852 uint16_t vendor_id;
853 uint32_t device_id;
854 uint8_t pad[8];
855 } dev_uuid = {
856 .vendor_id = ARM_VENDOR_ID,
857 .device_id = device->model->gpu_id,
858 };
859
860 STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE);
861 memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
862 STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
863 memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
864
865 snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "panvk");
866 snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
867 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
868
869 /* VK_EXT_physical_device_drm */
870 if (device->drm.primary_rdev) {
871 properties->drmHasPrimary = true;
872 properties->drmPrimaryMajor = major(device->drm.primary_rdev);
873 properties->drmPrimaryMinor = minor(device->drm.primary_rdev);
874 }
875 if (device->drm.render_rdev) {
876 properties->drmHasRender = true;
877 properties->drmRenderMajor = major(device->drm.render_rdev);
878 properties->drmRenderMinor = minor(device->drm.render_rdev);
879 }
880
881 /* VK_EXT_shader_module_identifier */
882 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
883 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
884 memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
885 vk_shaderModuleIdentifierAlgorithmUUID,
886 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
887 }
888
889 void
panvk_physical_device_finish(struct panvk_physical_device * device)890 panvk_physical_device_finish(struct panvk_physical_device *device)
891 {
892 panvk_wsi_finish(device);
893
894 pan_kmod_dev_destroy(device->kmod.dev);
895
896 vk_physical_device_finish(&device->vk);
897 }
898
899 VkResult
panvk_physical_device_init(struct panvk_physical_device * device,struct panvk_instance * instance,drmDevicePtr drm_device)900 panvk_physical_device_init(struct panvk_physical_device *device,
901 struct panvk_instance *instance,
902 drmDevicePtr drm_device)
903 {
904 VkResult result;
905
906 result = create_kmod_dev(device, instance, drm_device);
907 if (result != VK_SUCCESS)
908 return result;
909
910 pan_kmod_dev_query_props(device->kmod.dev, &device->kmod.props);
911
912 device->model = panfrost_get_model(device->kmod.props.gpu_prod_id,
913 device->kmod.props.gpu_variant);
914
915 unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
916
917 if (!device->model) {
918 result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
919 "Unknown gpu_id (%#x) or variant (%#x)",
920 device->kmod.props.gpu_prod_id,
921 device->kmod.props.gpu_variant);
922 goto fail;
923 }
924
925 switch (arch) {
926 case 6:
927 case 7:
928 if (!getenv("PAN_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
929 result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
930 "WARNING: panvk is not well-tested on v%d, "
931 "pass PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 "
932 "if you know what you're doing.", arch);
933 goto fail;
934 }
935 break;
936
937 case 10:
938 break;
939
940 default:
941 result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
942 "%s not supported", device->model->name);
943 goto fail;
944 }
945
946 result = get_drm_device_ids(device, instance, drm_device);
947 if (result != VK_SUCCESS)
948 goto fail;
949
950 device->formats.all = panfrost_format_table(arch);
951 device->formats.blendable = panfrost_blendable_format_table(arch);
952
953 memset(device->name, 0, sizeof(device->name));
954 sprintf(device->name, "%s", device->model->name);
955
956 if (get_cache_uuid(device->kmod.props.gpu_prod_id, device->cache_uuid)) {
957 result = panvk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
958 "cannot generate UUID");
959 goto fail;
960 }
961
962 result = get_device_sync_types(device, instance);
963 if (result != VK_SUCCESS)
964 goto fail;
965
966 vk_warn_non_conformant_implementation("panvk");
967
968 struct vk_device_extension_table supported_extensions;
969 get_device_extensions(device, &supported_extensions);
970
971 struct vk_features supported_features;
972 get_features(device, &supported_features);
973
974 struct vk_properties properties;
975 get_device_properties(instance, device, &properties);
976
977 struct vk_physical_device_dispatch_table dispatch_table;
978 vk_physical_device_dispatch_table_from_entrypoints(
979 &dispatch_table, &panvk_physical_device_entrypoints, true);
980 vk_physical_device_dispatch_table_from_entrypoints(
981 &dispatch_table, &wsi_physical_device_entrypoints, false);
982
983 result = vk_physical_device_init(&device->vk, &instance->vk,
984 &supported_extensions, &supported_features,
985 &properties, &dispatch_table);
986
987 if (result != VK_SUCCESS)
988 goto fail;
989
990 device->vk.supported_sync_types = device->sync_types;
991
992 result = panvk_wsi_init(device);
993 if (result != VK_SUCCESS)
994 goto fail;
995
996 return VK_SUCCESS;
997
998 fail:
999 if (device->vk.instance)
1000 vk_physical_device_finish(&device->vk);
1001
1002 pan_kmod_dev_destroy(device->kmod.dev);
1003
1004 return result;
1005 }
1006
1007 static const VkQueueFamilyProperties panvk_queue_family_properties = {
1008 .queueFlags =
1009 VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1010 .queueCount = 1,
1011 .timestampValidBits = 0,
1012 .minImageTransferGranularity = {1, 1, 1},
1013 };
1014
1015 static void
panvk_fill_global_priority(const struct panvk_physical_device * physical_device,VkQueueFamilyGlobalPriorityPropertiesKHR * prio)1016 panvk_fill_global_priority(const struct panvk_physical_device *physical_device,
1017 VkQueueFamilyGlobalPriorityPropertiesKHR *prio)
1018 {
1019 enum pan_kmod_group_allow_priority_flags prio_mask =
1020 physical_device->kmod.props.allowed_group_priorities_mask;
1021 uint32_t prio_idx = 0;
1022
1023 if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW)
1024 prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
1025
1026 if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM)
1027 prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1028
1029 if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH)
1030 prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1031
1032 if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME)
1033 prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR;
1034
1035 prio->priorityCount = prio_idx;
1036 }
1037
1038 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1039 panvk_GetPhysicalDeviceQueueFamilyProperties2(
1040 VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount,
1041 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1042 {
1043 VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1044 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1045 pQueueFamilyPropertyCount);
1046
1047 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1048 {
1049 p->queueFamilyProperties = panvk_queue_family_properties;
1050
1051 VkQueueFamilyGlobalPriorityPropertiesKHR *prio =
1052 vk_find_struct(p->pNext, QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR);
1053 if (prio)
1054 panvk_fill_global_priority(physical_device, prio);
1055 }
1056 }
1057
1058 static uint64_t
get_system_heap_size()1059 get_system_heap_size()
1060 {
1061 struct sysinfo info;
1062 sysinfo(&info);
1063
1064 uint64_t total_ram = (uint64_t)info.totalram * info.mem_unit;
1065
1066 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
1067 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
1068 */
1069 uint64_t available_ram;
1070 if (total_ram <= 4ull * 1024 * 1024 * 1024)
1071 available_ram = total_ram / 2;
1072 else
1073 available_ram = total_ram * 3 / 4;
1074
1075 return available_ram;
1076 }
1077
1078 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1079 panvk_GetPhysicalDeviceMemoryProperties2(
1080 VkPhysicalDevice physicalDevice,
1081 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1082 {
1083 pMemoryProperties->memoryProperties = (VkPhysicalDeviceMemoryProperties){
1084 .memoryHeapCount = 1,
1085 .memoryHeaps[0].size = get_system_heap_size(),
1086 .memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1087 .memoryTypeCount = 1,
1088 .memoryTypes[0].propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1089 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1090 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1091 .memoryTypes[0].heapIndex = 0,
1092 };
1093 }
1094
1095 #define DEVICE_PER_ARCH_FUNCS(_ver) \
1096 VkResult panvk_v##_ver##_create_device( \
1097 struct panvk_physical_device *physical_device, \
1098 const VkDeviceCreateInfo *pCreateInfo, \
1099 const VkAllocationCallbacks *pAllocator, VkDevice *pDevice); \
1100 \
1101 void panvk_v##_ver##_destroy_device( \
1102 struct panvk_device *device, const VkAllocationCallbacks *pAllocator)
1103
1104 DEVICE_PER_ARCH_FUNCS(6);
1105 DEVICE_PER_ARCH_FUNCS(7);
1106 DEVICE_PER_ARCH_FUNCS(10);
1107
1108 VKAPI_ATTR VkResult VKAPI_CALL
panvk_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1109 panvk_CreateDevice(VkPhysicalDevice physicalDevice,
1110 const VkDeviceCreateInfo *pCreateInfo,
1111 const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
1112 {
1113 VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1114 unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
1115 VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1116
1117 panvk_arch_dispatch_ret(arch, create_device, result, physical_device,
1118 pCreateInfo, pAllocator, pDevice);
1119
1120 return result;
1121 }
1122
1123 VKAPI_ATTR void VKAPI_CALL
panvk_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1124 panvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
1125 {
1126 VK_FROM_HANDLE(panvk_device, device, _device);
1127 struct panvk_physical_device *physical_device =
1128 to_panvk_physical_device(device->vk.physical);
1129 unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
1130
1131 panvk_arch_dispatch(arch, destroy_device, device, pAllocator);
1132 }
1133
1134 static bool
format_is_supported(struct panvk_physical_device * physical_device,const struct panfrost_format fmt,enum pipe_format pfmt)1135 format_is_supported(struct panvk_physical_device *physical_device,
1136 const struct panfrost_format fmt,
1137 enum pipe_format pfmt)
1138 {
1139 /* If the format ID is zero, it's not supported. */
1140 if (!fmt.hw)
1141 return false;
1142
1143 /* Compressed formats (ID < 32) are optional. We need to check against
1144 * the supported formats reported by the GPU. */
1145 if (util_format_is_compressed(pfmt)) {
1146 uint32_t supported_compr_fmts =
1147 panfrost_query_compressed_formats(&physical_device->kmod.props);
1148
1149 if (!(BITFIELD_BIT(fmt.texfeat_bit) & supported_compr_fmts))
1150 return false;
1151 }
1152
1153 return true;
1154 }
1155
1156 static void
get_format_properties(struct panvk_physical_device * physical_device,VkFormat format,VkFormatProperties * out_properties)1157 get_format_properties(struct panvk_physical_device *physical_device,
1158 VkFormat format, VkFormatProperties *out_properties)
1159 {
1160 VkFormatFeatureFlags tex = 0, buffer = 0;
1161 enum pipe_format pfmt = vk_format_to_pipe_format(format);
1162 unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
1163
1164 if (pfmt == PIPE_FORMAT_NONE)
1165 goto end;
1166
1167 const struct panfrost_format fmt = physical_device->formats.all[pfmt];
1168
1169 if (!format_is_supported(physical_device, fmt, pfmt))
1170 goto end;
1171
1172 /* 3byte formats are not supported by the buffer <-> image copy helpers. */
1173 if (util_format_get_blocksize(pfmt) == 3)
1174 goto end;
1175
1176 /* Reject sRGB formats (see
1177 * https://github.com/KhronosGroup/Vulkan-Docs/issues/2214).
1178 */
1179 if ((fmt.bind & PAN_BIND_VERTEX_BUFFER) && !util_format_is_srgb(pfmt))
1180 buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
1181
1182 if (fmt.bind & PAN_BIND_SAMPLER_VIEW) {
1183 tex |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
1184 VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
1185 VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
1186 VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
1187 VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
1188
1189 if (arch >= 10)
1190 tex |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
1191
1192 /* Integer formats only support nearest filtering */
1193 if (!util_format_is_scaled(pfmt) && !util_format_is_pure_integer(pfmt))
1194 tex |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
1195
1196 if (!util_format_is_depth_or_stencil(pfmt))
1197 buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
1198
1199 tex |= VK_FORMAT_FEATURE_BLIT_SRC_BIT;
1200 }
1201
1202 if (fmt.bind & PAN_BIND_RENDER_TARGET) {
1203 tex |= VK_FORMAT_FEATURE_BLIT_DST_BIT;
1204 tex |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
1205
1206 /* SNORM rendering isn't working yet (nir_lower_blend bugs), disable for
1207 * now.
1208 *
1209 * XXX: Enable once fixed.
1210 */
1211 if (!util_format_is_snorm(pfmt)) {
1212 tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
1213 tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
1214 }
1215
1216 if (!util_format_is_depth_and_stencil(pfmt))
1217 buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
1218 }
1219
1220 if (pfmt == PIPE_FORMAT_R32_UINT || pfmt == PIPE_FORMAT_R32_SINT) {
1221 buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
1222 tex |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
1223 }
1224
1225 if (fmt.bind & PAN_BIND_DEPTH_STENCIL)
1226 tex |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
1227
1228 end:
1229 out_properties->linearTilingFeatures = tex;
1230 out_properties->optimalTilingFeatures = tex;
1231 out_properties->bufferFeatures = buffer;
1232 }
1233
1234 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,VkFormat format,VkFormatProperties2 * pFormatProperties)1235 panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
1236 VkFormat format,
1237 VkFormatProperties2 *pFormatProperties)
1238 {
1239 VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1240
1241 get_format_properties(physical_device, format,
1242 &pFormatProperties->formatProperties);
1243
1244 VkDrmFormatModifierPropertiesListEXT *list = vk_find_struct(
1245 pFormatProperties->pNext, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT);
1246 if (list) {
1247 VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierPropertiesEXT, out,
1248 list->pDrmFormatModifierProperties,
1249 &list->drmFormatModifierCount);
1250
1251 if (pFormatProperties->formatProperties.linearTilingFeatures) {
1252 vk_outarray_append_typed(VkDrmFormatModifierPropertiesEXT, &out,
1253 mod_props)
1254 {
1255 mod_props->drmFormatModifier = DRM_FORMAT_MOD_LINEAR;
1256 mod_props->drmFormatModifierPlaneCount = 1;
1257 mod_props->drmFormatModifierTilingFeatures =
1258 pFormatProperties->formatProperties.linearTilingFeatures;
1259 }
1260 }
1261 }
1262 }
1263
1264 static VkResult
get_image_format_properties(struct panvk_physical_device * physical_device,const VkPhysicalDeviceImageFormatInfo2 * info,VkImageFormatProperties * pImageFormatProperties,VkFormatFeatureFlags * p_feature_flags)1265 get_image_format_properties(struct panvk_physical_device *physical_device,
1266 const VkPhysicalDeviceImageFormatInfo2 *info,
1267 VkImageFormatProperties *pImageFormatProperties,
1268 VkFormatFeatureFlags *p_feature_flags)
1269 {
1270 VkFormatProperties format_props;
1271 VkFormatFeatureFlags format_feature_flags;
1272 VkExtent3D maxExtent;
1273 uint32_t maxMipLevels;
1274 uint32_t maxArraySize;
1275 VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
1276 enum pipe_format format = vk_format_to_pipe_format(info->format);
1277
1278 get_format_properties(physical_device, info->format, &format_props);
1279
1280 switch (info->tiling) {
1281 case VK_IMAGE_TILING_LINEAR:
1282 format_feature_flags = format_props.linearTilingFeatures;
1283 break;
1284 case VK_IMAGE_TILING_OPTIMAL:
1285 format_feature_flags = format_props.optimalTilingFeatures;
1286 break;
1287 case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT: {
1288 const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *mod_info =
1289 vk_find_struct_const(
1290 info->pNext, PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT);
1291 if (mod_info->drmFormatModifier != DRM_FORMAT_MOD_LINEAR)
1292 goto unsupported;
1293
1294 /* The only difference between optimal and linear is currently whether
1295 * depth/stencil attachments are allowed on depth/stencil formats.
1296 * There's no reason to allow importing depth/stencil textures, so just
1297 * disallow it and then this annoying edge case goes away.
1298 */
1299 if (util_format_is_depth_or_stencil(format))
1300 goto unsupported;
1301
1302 assert(format_props.optimalTilingFeatures ==
1303 format_props.linearTilingFeatures);
1304
1305 format_feature_flags = format_props.linearTilingFeatures;
1306 break;
1307 }
1308 default:
1309 unreachable("bad VkPhysicalDeviceImageFormatInfo2");
1310 }
1311
1312 if (format_feature_flags == 0)
1313 goto unsupported;
1314
1315 switch (info->type) {
1316 default:
1317 unreachable("bad vkimage type");
1318 case VK_IMAGE_TYPE_1D:
1319 maxExtent.width = 1 << 16;
1320 maxExtent.height = 1;
1321 maxExtent.depth = 1;
1322 maxMipLevels = 17; /* log2(maxWidth) + 1 */
1323 maxArraySize = 1 << 16;
1324 break;
1325 case VK_IMAGE_TYPE_2D:
1326 maxExtent.width = 1 << 16;
1327 maxExtent.height = 1 << 16;
1328 maxExtent.depth = 1;
1329 maxMipLevels = 17; /* log2(maxWidth) + 1 */
1330 maxArraySize = 1 << 16;
1331 break;
1332 case VK_IMAGE_TYPE_3D:
1333 maxExtent.width = 1 << 16;
1334 maxExtent.height = 1 << 16;
1335 maxExtent.depth = 1 << 16;
1336 maxMipLevels = 17; /* log2(maxWidth) + 1 */
1337 maxArraySize = 1;
1338 break;
1339 }
1340
1341 if (info->tiling == VK_IMAGE_TILING_OPTIMAL &&
1342 info->type == VK_IMAGE_TYPE_2D &&
1343 (format_feature_flags &
1344 (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
1345 VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
1346 !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
1347 !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1348 sampleCounts |= VK_SAMPLE_COUNT_4_BIT;
1349 }
1350
1351 /* From the Vulkan 1.2.199 spec:
1352 *
1353 * "VK_IMAGE_CREATE_EXTENDED_USAGE_BIT specifies that the image can be
1354 * created with usage flags that are not supported for the format the
1355 * image is created with but are supported for at least one format a
1356 * VkImageView created from the image can have."
1357 *
1358 * If VK_IMAGE_CREATE_EXTENDED_USAGE_BIT is set, views can be created with
1359 * different usage than the image so we can't always filter on usage.
1360 * There is one exception to this below for storage.
1361 */
1362 if (!(info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)) {
1363 if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
1364 if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
1365 goto unsupported;
1366 }
1367 }
1368
1369 if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
1370 if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
1371 goto unsupported;
1372 }
1373 }
1374
1375 if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT ||
1376 ((info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) &&
1377 !vk_format_is_depth_or_stencil(info->format))) {
1378 if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
1379 goto unsupported;
1380 }
1381 }
1382
1383 if ((info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ||
1384 ((info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) &&
1385 vk_format_is_depth_or_stencil(info->format))) {
1386 if (!(format_feature_flags &
1387 VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
1388 goto unsupported;
1389 }
1390 }
1391 }
1392
1393 *pImageFormatProperties = (VkImageFormatProperties){
1394 .maxExtent = maxExtent,
1395 .maxMipLevels = maxMipLevels,
1396 .maxArrayLayers = maxArraySize,
1397 .sampleCounts = sampleCounts,
1398
1399 /* We need to limit images to 32-bit range, because the maximum
1400 * slice-stride is 32-bit wide, meaning that if we allocate an image
1401 * with the maximum width and height, we end up overflowing it.
1402 *
1403 * We get around this by simply limiting the maximum resource size.
1404 */
1405 .maxResourceSize = UINT32_MAX,
1406 };
1407
1408 if (p_feature_flags)
1409 *p_feature_flags = format_feature_flags;
1410
1411 return VK_SUCCESS;
1412 unsupported:
1413 *pImageFormatProperties = (VkImageFormatProperties){
1414 .maxExtent = {0, 0, 0},
1415 .maxMipLevels = 0,
1416 .maxArrayLayers = 0,
1417 .sampleCounts = 0,
1418 .maxResourceSize = 0,
1419 };
1420
1421 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1422 }
1423
1424 static VkResult
panvk_get_external_image_format_properties(const struct panvk_physical_device * physical_device,const VkPhysicalDeviceImageFormatInfo2 * pImageFormatInfo,VkExternalMemoryHandleTypeFlagBits handleType,VkExternalMemoryProperties * external_properties)1425 panvk_get_external_image_format_properties(
1426 const struct panvk_physical_device *physical_device,
1427 const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
1428 VkExternalMemoryHandleTypeFlagBits handleType,
1429 VkExternalMemoryProperties *external_properties)
1430 {
1431 const VkExternalMemoryHandleTypeFlags supported_handle_types =
1432 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
1433 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
1434
1435 if (!(handleType & supported_handle_types)) {
1436 return panvk_errorf(physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED,
1437 "VkExternalMemoryTypeFlagBits(0x%x) unsupported",
1438 handleType);
1439 }
1440
1441 /* pan_image_layout_init requires 2D for explicit layout */
1442 if (pImageFormatInfo->type != VK_IMAGE_TYPE_2D) {
1443 return panvk_errorf(
1444 physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED,
1445 "VkExternalMemoryTypeFlagBits(0x%x) unsupported for VkImageType(%d)",
1446 handleType, pImageFormatInfo->type);
1447 }
1448
1449 /* There is no restriction on opaque fds. But for dma-bufs, we want to
1450 * make sure vkGetImageSubresourceLayout can be used to query the image
1451 * layout of an exported dma-buf. We also want to make sure
1452 * VkImageDrmFormatModifierExplicitCreateInfoEXT can be used to specify the
1453 * image layout of an imported dma-buf. These add restrictions on the
1454 * image tilings.
1455 */
1456 VkExternalMemoryFeatureFlags features = 0;
1457 if (handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1458 pImageFormatInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1459 features |= VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
1460 VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
1461 } else if (pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR) {
1462 features |= VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT;
1463 }
1464
1465 if (!features) {
1466 return panvk_errorf(
1467 physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED,
1468 "VkExternalMemoryTypeFlagBits(0x%x) unsupported for VkImageTiling(%d)",
1469 handleType, pImageFormatInfo->tiling);
1470 }
1471
1472 *external_properties = (VkExternalMemoryProperties){
1473 .externalMemoryFeatures = features,
1474 .exportFromImportedHandleTypes = supported_handle_types,
1475 .compatibleHandleTypes = supported_handle_types,
1476 };
1477
1478 return VK_SUCCESS;
1479 }
1480
1481 VKAPI_ATTR VkResult VKAPI_CALL
panvk_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceImageFormatInfo2 * base_info,VkImageFormatProperties2 * base_props)1482 panvk_GetPhysicalDeviceImageFormatProperties2(
1483 VkPhysicalDevice physicalDevice,
1484 const VkPhysicalDeviceImageFormatInfo2 *base_info,
1485 VkImageFormatProperties2 *base_props)
1486 {
1487 VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1488 const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
1489 const VkPhysicalDeviceImageViewImageFormatInfoEXT *image_view_info = NULL;
1490 VkExternalImageFormatProperties *external_props = NULL;
1491 VkFilterCubicImageViewImageFormatPropertiesEXT *cubic_props = NULL;
1492 VkFormatFeatureFlags format_feature_flags;
1493 VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL;
1494 VkResult result;
1495
1496 result = get_image_format_properties(physical_device, base_info,
1497 &base_props->imageFormatProperties,
1498 &format_feature_flags);
1499 if (result != VK_SUCCESS)
1500 return result;
1501
1502 /* Extract input structs */
1503 vk_foreach_struct_const(s, base_info->pNext) {
1504 switch (s->sType) {
1505 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
1506 external_info = (const void *)s;
1507 break;
1508 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_IMAGE_FORMAT_INFO_EXT:
1509 image_view_info = (const void *)s;
1510 break;
1511 default:
1512 break;
1513 }
1514 }
1515
1516 /* Extract output structs */
1517 vk_foreach_struct(s, base_props->pNext) {
1518 switch (s->sType) {
1519 case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
1520 external_props = (void *)s;
1521 break;
1522 case VK_STRUCTURE_TYPE_FILTER_CUBIC_IMAGE_VIEW_IMAGE_FORMAT_PROPERTIES_EXT:
1523 cubic_props = (void *)s;
1524 break;
1525 case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES:
1526 ycbcr_props = (void *)s;
1527 break;
1528 default:
1529 break;
1530 }
1531 }
1532
1533 /* From the Vulkan 1.0.42 spec:
1534 *
1535 * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
1536 * behave as if VkPhysicalDeviceExternalImageFormatInfo was not
1537 * present and VkExternalImageFormatProperties will be ignored.
1538 */
1539 if (external_info && external_info->handleType != 0) {
1540 VkExternalImageFormatProperties fallback_external_props;
1541
1542 if (!external_props) {
1543 memset(&fallback_external_props, 0, sizeof(fallback_external_props));
1544 external_props = &fallback_external_props;
1545 }
1546
1547 result = panvk_get_external_image_format_properties(
1548 physical_device, base_info, external_info->handleType,
1549 &external_props->externalMemoryProperties);
1550 if (result != VK_SUCCESS)
1551 goto fail;
1552
1553 /* pan_image_layout_init requirements for explicit layout */
1554 base_props->imageFormatProperties.maxMipLevels = 1;
1555 base_props->imageFormatProperties.maxArrayLayers = 1;
1556 base_props->imageFormatProperties.sampleCounts = 1;
1557 }
1558
1559 if (cubic_props) {
1560 /* note: blob only allows cubic filtering for 2D and 2D array views
1561 * its likely we can enable it for 1D and CUBE, needs testing however
1562 */
1563 if ((image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D ||
1564 image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D_ARRAY) &&
1565 (format_feature_flags &
1566 VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT)) {
1567 cubic_props->filterCubic = true;
1568 cubic_props->filterCubicMinmax = true;
1569 } else {
1570 cubic_props->filterCubic = false;
1571 cubic_props->filterCubicMinmax = false;
1572 }
1573 }
1574
1575 if (ycbcr_props)
1576 ycbcr_props->combinedImageSamplerDescriptorCount = 1;
1577
1578 return VK_SUCCESS;
1579
1580 fail:
1581 if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) {
1582 /* From the Vulkan 1.0.42 spec:
1583 *
1584 * If the combination of parameters to
1585 * vkGetPhysicalDeviceImageFormatProperties2 is not supported by
1586 * the implementation for use in vkCreateImage, then all members of
1587 * imageFormatProperties will be filled with zero.
1588 */
1589 base_props->imageFormatProperties = (VkImageFormatProperties){};
1590 }
1591
1592 return result;
1593 }
1594
1595 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice,VkFormat format,VkImageType type,VkSampleCountFlagBits samples,VkImageUsageFlags usage,VkImageTiling tiling,uint32_t * pNumProperties,VkSparseImageFormatProperties * pProperties)1596 panvk_GetPhysicalDeviceSparseImageFormatProperties(
1597 VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type,
1598 VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling,
1599 uint32_t *pNumProperties, VkSparseImageFormatProperties *pProperties)
1600 {
1601 /* Sparse images are not yet supported. */
1602 *pNumProperties = 0;
1603 }
1604
1605 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceSparseImageFormatInfo2 * pFormatInfo,uint32_t * pPropertyCount,VkSparseImageFormatProperties2 * pProperties)1606 panvk_GetPhysicalDeviceSparseImageFormatProperties2(
1607 VkPhysicalDevice physicalDevice,
1608 const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
1609 uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties)
1610 {
1611 /* Sparse images are not yet supported. */
1612 *pPropertyCount = 0;
1613 }
1614
1615 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalBufferInfo * pExternalBufferInfo,VkExternalBufferProperties * pExternalBufferProperties)1616 panvk_GetPhysicalDeviceExternalBufferProperties(
1617 VkPhysicalDevice physicalDevice,
1618 const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
1619 VkExternalBufferProperties *pExternalBufferProperties)
1620 {
1621 const VkExternalMemoryHandleTypeFlags supported_handle_types =
1622 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
1623 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
1624
1625 /* From the Vulkan 1.3.298 spec:
1626 *
1627 * compatibleHandleTypes must include at least handleType.
1628 */
1629 VkExternalMemoryHandleTypeFlags handle_types =
1630 pExternalBufferInfo->handleType;
1631 VkExternalMemoryFeatureFlags features = 0;
1632 if (pExternalBufferInfo->handleType & supported_handle_types) {
1633 handle_types |= supported_handle_types;
1634 features |= VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
1635 VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
1636 }
1637
1638 pExternalBufferProperties->externalMemoryProperties =
1639 (VkExternalMemoryProperties){
1640 .externalMemoryFeatures = features,
1641 .exportFromImportedHandleTypes = handle_types,
1642 .compatibleHandleTypes = handle_types,
1643 };
1644 }
1645