/* * Copyright © 2021 Collabora Ltd. * * Derived from tu_device.c which is: * Copyright © 2016 Red Hat. * Copyright © 2016 Bas Nieuwenhuizen * Copyright © 2015 Intel Corporation * * SPDX-License-Identifier: MIT */ #include #include #include #include "util/disk_cache.h" #include "git_sha1.h" #include "vk_device.h" #include "vk_drm_syncobj.h" #include "vk_format.h" #include "vk_limits.h" #include "vk_log.h" #include "vk_shader_module.h" #include "vk_util.h" #include "panvk_device.h" #include "panvk_entrypoints.h" #include "panvk_instance.h" #include "panvk_physical_device.h" #include "panvk_wsi.h" #include "pan_format.h" #include "pan_props.h" #include "genxml/gen_macros.h" #define ARM_VENDOR_ID 0x13b5 #define MAX_PUSH_DESCRIPTORS 32 /* We reserve one ubo for push constant, one for sysvals and one per-set for the * descriptor metadata */ #define RESERVED_UBO_COUNT 6 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 - RESERVED_UBO_COUNT #define MAX_INLINE_UNIFORM_BLOCK_SIZE (1 << 16) static VkResult create_kmod_dev(struct panvk_physical_device *device, const struct panvk_instance *instance, drmDevicePtr drm_device) { const char *path = drm_device->nodes[DRM_NODE_RENDER]; drmVersionPtr version; int fd; fd = open(path, O_RDWR | O_CLOEXEC); if (fd < 0) { return panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "failed to open device %s", path); } version = drmGetVersion(fd); if (!version) { close(fd); return panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "failed to query kernel driver version for device %s", path); } if (strcmp(version->name, "panfrost") && strcmp(version->name, "panthor")) { drmFreeVersion(version); close(fd); return VK_ERROR_INCOMPATIBLE_DRIVER; } drmFreeVersion(version); if (instance->debug_flags & PANVK_DEBUG_STARTUP) vk_logi(VK_LOG_NO_OBJS(instance), "Found compatible device '%s'.", path); device->kmod.dev = pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD, &instance->kmod.allocator); if (!device->kmod.dev) { close(fd); return panvk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY, "cannot create device"); } return VK_SUCCESS; } static VkResult get_drm_device_ids(struct panvk_physical_device *device, const struct panvk_instance *instance, drmDevicePtr drm_device) { struct stat st; if (stat(drm_device->nodes[DRM_NODE_RENDER], &st)) { return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to query render node stat"); } device->drm.render_rdev = st.st_rdev; if (drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) { if (stat(drm_device->nodes[DRM_NODE_PRIMARY], &st)) { return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to query primary node stat"); } device->drm.primary_rdev = st.st_rdev; } return VK_SUCCESS; } static int get_cache_uuid(uint16_t family, void *uuid) { uint32_t mesa_timestamp; uint16_t f = family; if (!disk_cache_get_function_timestamp(get_cache_uuid, &mesa_timestamp)) return -1; memset(uuid, 0, VK_UUID_SIZE); memcpy(uuid, &mesa_timestamp, 4); memcpy((char *)uuid + 4, &f, 2); snprintf((char *)uuid + 6, VK_UUID_SIZE - 10, "pan"); return 0; } static VkResult get_device_sync_types(struct panvk_physical_device *device, const struct panvk_instance *instance) { const unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); uint32_t sync_type_count = 0; device->drm_syncobj_type = vk_drm_syncobj_get_type(device->kmod.dev->fd); if (!device->drm_syncobj_type.features) { return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to query syncobj features"); } device->sync_types[sync_type_count++] = &device->drm_syncobj_type; if (arch >= 10) { assert(device->drm_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE); } else { /* We don't support timelines in the uAPI yet and we don't want it getting * suddenly turned on by vk_drm_syncobj_get_type() without us adding panvk * code for it first. */ device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE; /* vk_sync_timeline requires VK_SYNC_FEATURE_GPU_MULTI_WAIT. Panfrost * waits on the underlying dma-fences and supports the feature. */ device->drm_syncobj_type.features |= VK_SYNC_FEATURE_GPU_MULTI_WAIT; device->sync_timeline_type = vk_sync_timeline_get_type(&device->drm_syncobj_type); device->sync_types[sync_type_count++] = &device->sync_timeline_type.sync; } assert(sync_type_count < ARRAY_SIZE(device->sync_types)); device->sync_types[sync_type_count] = NULL; return VK_SUCCESS; } static void get_device_extensions(const struct panvk_physical_device *device, struct vk_device_extension_table *ext) { const unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); *ext = (struct vk_device_extension_table){ .KHR_8bit_storage = true, .KHR_16bit_storage = true, .KHR_bind_memory2 = true, .KHR_buffer_device_address = true, .KHR_copy_commands2 = true, .KHR_create_renderpass2 = true, .KHR_dedicated_allocation = true, .KHR_descriptor_update_template = true, .KHR_device_group = true, .KHR_driver_properties = true, .KHR_dynamic_rendering = true, .KHR_external_fence = true, .KHR_external_fence_fd = true, .KHR_external_memory = true, .KHR_external_memory_fd = true, .KHR_external_semaphore = true, .KHR_external_semaphore_fd = true, .KHR_get_memory_requirements2 = true, .KHR_global_priority = true, .KHR_image_format_list = true, .KHR_index_type_uint8 = true, .KHR_maintenance1 = true, .KHR_maintenance2 = true, .KHR_maintenance3 = true, .KHR_map_memory2 = true, .KHR_multiview = arch >= 10, .KHR_pipeline_executable_properties = true, .KHR_pipeline_library = true, .KHR_push_descriptor = true, .KHR_relaxed_block_layout = true, .KHR_sampler_mirror_clamp_to_edge = true, .KHR_shader_draw_parameters = true, .KHR_shader_expect_assume = true, .KHR_shader_float16_int8 = true, .KHR_shader_non_semantic_info = true, .KHR_shader_relaxed_extended_instruction = true, .KHR_storage_buffer_storage_class = true, #ifdef PANVK_USE_WSI_PLATFORM .KHR_swapchain = true, #endif .KHR_synchronization2 = true, .KHR_timeline_semaphore = true, .KHR_variable_pointers = true, .KHR_vertex_attribute_divisor = true, .KHR_zero_initialize_workgroup_memory = true, .EXT_4444_formats = true, .EXT_buffer_device_address = true, .EXT_custom_border_color = true, .EXT_depth_clip_enable = true, .EXT_external_memory_dma_buf = true, .EXT_global_priority = true, .EXT_global_priority_query = true, .EXT_graphics_pipeline_library = true, .EXT_host_query_reset = true, .EXT_image_drm_format_modifier = true, .EXT_image_robustness = true, .EXT_index_type_uint8 = true, .EXT_physical_device_drm = true, .EXT_pipeline_creation_cache_control = true, .EXT_pipeline_creation_feedback = true, .EXT_pipeline_robustness = true, .EXT_private_data = true, .EXT_provoking_vertex = true, .EXT_queue_family_foreign = true, .EXT_sampler_filter_minmax = arch >= 10, .EXT_scalar_block_layout = true, .EXT_shader_module_identifier = true, .EXT_tooling_info = true, .GOOGLE_decorate_string = true, .GOOGLE_hlsl_functionality1 = true, .GOOGLE_user_type = true, }; } static void get_features(const struct panvk_physical_device *device, struct vk_features *features) { unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); *features = (struct vk_features){ /* Vulkan 1.0 */ .depthClamp = true, .depthBiasClamp = true, .robustBufferAccess = true, .fullDrawIndexUint32 = true, .imageCubeArray = true, .independentBlend = true, .sampleRateShading = true, .logicOp = true, .wideLines = true, .largePoints = true, .occlusionQueryPrecise = true, .samplerAnisotropy = true, .textureCompressionETC2 = true, .textureCompressionASTC_LDR = true, .fragmentStoresAndAtomics = arch >= 10, .shaderUniformBufferArrayDynamicIndexing = true, .shaderSampledImageArrayDynamicIndexing = true, .shaderStorageBufferArrayDynamicIndexing = true, .shaderStorageImageArrayDynamicIndexing = true, .shaderInt16 = true, .shaderInt64 = true, .drawIndirectFirstInstance = true, /* Vulkan 1.1 */ .storageBuffer16BitAccess = true, .uniformAndStorageBuffer16BitAccess = true, .storagePushConstant16 = false, .storageInputOutput16 = false, .multiview = arch >= 10, .multiviewGeometryShader = false, .multiviewTessellationShader = false, .variablePointersStorageBuffer = true, .variablePointers = true, .protectedMemory = false, .samplerYcbcrConversion = false, .shaderDrawParameters = true, /* Vulkan 1.2 */ .samplerMirrorClampToEdge = true, .drawIndirectCount = false, .storageBuffer8BitAccess = true, .uniformAndStorageBuffer8BitAccess = false, .storagePushConstant8 = false, .shaderBufferInt64Atomics = false, .shaderSharedInt64Atomics = false, .shaderFloat16 = false, .shaderInt8 = true, .descriptorIndexing = false, .shaderInputAttachmentArrayDynamicIndexing = false, .shaderUniformTexelBufferArrayDynamicIndexing = false, .shaderStorageTexelBufferArrayDynamicIndexing = false, .shaderUniformBufferArrayNonUniformIndexing = false, .shaderSampledImageArrayNonUniformIndexing = false, .shaderStorageBufferArrayNonUniformIndexing = false, .shaderStorageImageArrayNonUniformIndexing = false, .shaderInputAttachmentArrayNonUniformIndexing = false, .shaderUniformTexelBufferArrayNonUniformIndexing = false, .shaderStorageTexelBufferArrayNonUniformIndexing = false, .descriptorBindingUniformBufferUpdateAfterBind = false, .descriptorBindingSampledImageUpdateAfterBind = false, .descriptorBindingStorageImageUpdateAfterBind = false, .descriptorBindingStorageBufferUpdateAfterBind = false, .descriptorBindingUniformTexelBufferUpdateAfterBind = false, .descriptorBindingStorageTexelBufferUpdateAfterBind = false, .descriptorBindingUpdateUnusedWhilePending = false, .descriptorBindingPartiallyBound = false, .descriptorBindingVariableDescriptorCount = false, .runtimeDescriptorArray = false, .samplerFilterMinmax = arch >= 10, .scalarBlockLayout = true, .imagelessFramebuffer = false, .uniformBufferStandardLayout = false, .shaderSubgroupExtendedTypes = false, .separateDepthStencilLayouts = false, .hostQueryReset = true, .timelineSemaphore = true, .bufferDeviceAddress = true, .bufferDeviceAddressCaptureReplay = false, .bufferDeviceAddressMultiDevice = false, .vulkanMemoryModel = false, .vulkanMemoryModelDeviceScope = false, .vulkanMemoryModelAvailabilityVisibilityChains = false, .shaderOutputViewportIndex = false, .shaderOutputLayer = false, .subgroupBroadcastDynamicId = false, /* Vulkan 1.3 */ .robustImageAccess = true, .inlineUniformBlock = false, .descriptorBindingInlineUniformBlockUpdateAfterBind = false, .pipelineCreationCacheControl = true, .privateData = true, .shaderDemoteToHelperInvocation = false, .shaderTerminateInvocation = false, .subgroupSizeControl = false, .computeFullSubgroups = false, .synchronization2 = true, .textureCompressionASTC_HDR = false, .shaderZeroInitializeWorkgroupMemory = true, .dynamicRendering = true, .shaderIntegerDotProduct = false, .maintenance4 = false, /* VK_EXT_graphics_pipeline_library */ .graphicsPipelineLibrary = true, /* VK_KHR_global_priority */ .globalPriorityQuery = true, /* VK_KHR_index_type_uint8 */ .indexTypeUint8 = true, /* VK_KHR_vertex_attribute_divisor */ .vertexAttributeInstanceRateDivisor = true, .vertexAttributeInstanceRateZeroDivisor = true, /* VK_EXT_depth_clip_enable */ .depthClipEnable = true, /* VK_EXT_4444_formats */ .formatA4R4G4B4 = true, .formatA4B4G4R4 = true, /* VK_EXT_custom_border_color */ .customBorderColors = true, /* VK_EXT_provoking_vertex */ .provokingVertexLast = true, .transformFeedbackPreservesProvokingVertex = false, /* v7 doesn't support AFBC(BGR). We need to tweak the texture swizzle to * make it work, which forces us to apply the same swizzle on the border * color, meaning we need to know the format when preparing the border * color. */ .customBorderColorWithoutFormat = arch != 7, /* VK_KHR_pipeline_executable_properties */ .pipelineExecutableInfo = true, /* VK_EXT_pipeline_robustness */ .pipelineRobustness = true, /* VK_KHR_shader_relaxed_extended_instruction */ .shaderRelaxedExtendedInstruction = true, /* VK_KHR_shader_expect_assume */ .shaderExpectAssume = true, /* VK_EXT_shader_module_identifier */ .shaderModuleIdentifier = true, }; } static uint32_t get_vk_version() { const uint32_t version_override = vk_get_version_override(); if (version_override) return version_override; return VK_MAKE_API_VERSION(0, 1, 0, VK_HEADER_VERSION); } static void get_device_properties(const struct panvk_instance *instance, const struct panvk_physical_device *device, struct vk_properties *properties) { /* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */ VkSampleCountFlags sample_counts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; uint64_t os_page_size = 4096; os_get_page_size(&os_page_size); unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); /* Ensure that the max threads count per workgroup is valid for Bifrost */ assert(arch > 8 || device->kmod.props.max_threads_per_wg <= 1024); *properties = (struct vk_properties){ .apiVersion = get_vk_version(), .driverVersion = vk_get_driver_version(), .vendorID = ARM_VENDOR_ID, /* Collect arch_major, arch_minor, arch_rev and product_major, * as done by the Arm driver. */ .deviceID = device->kmod.props.gpu_prod_id << 16, .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, /* Vulkan 1.0 limits */ /* Maximum texture dimension is 2^16. */ .maxImageDimension1D = (1 << 16), .maxImageDimension2D = (1 << 16), .maxImageDimension3D = (1 << 16), .maxImageDimensionCube = (1 << 16), .maxImageArrayLayers = (1 << 16), /* Currently limited by the 1D texture size, which is 2^16. * TODO: If we expose buffer views as 2D textures, we can increase the * limit. */ .maxTexelBufferElements = (1 << 16), /* Each uniform entry is 16-byte and the number of entries is encoded in a * 12-bit field, with the minus(1) modifier, which gives 2^20. */ .maxUniformBufferRange = 1 << 20, /* Storage buffer access is lowered to globals, so there's no limit here, * except for the SW-descriptor we use to encode storage buffer * descriptors, where the size is a 32-bit field. */ .maxStorageBufferRange = UINT32_MAX, /* 128 bytes of push constants, so we're aligned with the minimum Vulkan * requirements. */ .maxPushConstantsSize = 128, /* On our kernel drivers we're limited by the available memory rather * than available allocations. This is better expressed through memory * properties and budget queries, and by returning * VK_ERROR_OUT_OF_DEVICE_MEMORY when applicable, rather than * this limit. */ .maxMemoryAllocationCount = UINT32_MAX, /* On Mali, VkSampler objects do not use any resources other than host * memory and host address space, availability of which can change * significantly over time. */ .maxSamplerAllocationCount = UINT32_MAX, /* A cache line. */ .bufferImageGranularity = 64, /* Sparse binding not supported yet. */ .sparseAddressSpaceSize = 0, /* On Bifrost, this is a software limit. We pick the minimum required by * Vulkan, because Bifrost GPUs don't have unified descriptor tables, * which forces us to agregatte all descriptors from all sets and dispatch * them to per-type descriptor tables emitted at draw/dispatch time. The * more sets we support the more copies we are likely to have to do at * draw time. * * Valhall has native support for descriptor sets, and allows a maximum * of 16 sets, but we reserve one for our internal use, so we have 15 * left. */ .maxBoundDescriptorSets = arch <= 7 ? 4 : 15, /* MALI_RENDERER_STATE::sampler_count is 16-bit. */ .maxDescriptorSetSamplers = UINT16_MAX, /* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots * for our internal UBOs. */ .maxPerStageDescriptorUniformBuffers = UINT8_MAX - 32, .maxDescriptorSetUniformBuffers = UINT8_MAX - 32, /* SSBOs are limited by the size of a uniform buffer which contains our * panvk_ssbo_desc objects. * panvk_ssbo_desc is 16-byte, and each uniform entry in the Mali UBO is * 16-byte too. The number of entries is encoded in a 12-bit field, with * a minus(1) modifier, which gives a maximum of 2^12 SSBO * descriptors. */ .maxDescriptorSetStorageBuffers = 1 << 12, /* MALI_RENDERER_STATE::sampler_count is 16-bit. */ .maxDescriptorSetSampledImages = UINT16_MAX, /* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two * MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images. */ .maxDescriptorSetStorageImages = 1 << 8, /* A maximum of 8 color render targets, and one depth-stencil render * target. */ .maxDescriptorSetInputAttachments = 9, /* We could theoretically use the maxDescriptor values here (except for * UBOs where we're really limited to 256 on the shader side), but on * Bifrost we have to copy some tables around, which comes at an extra * memory/processing cost, so let's pick something smaller. */ .maxPerStageDescriptorInputAttachments = 9, .maxPerStageDescriptorSampledImages = 256, .maxPerStageDescriptorSamplers = 128, .maxPerStageDescriptorStorageBuffers = 64, .maxPerStageDescriptorStorageImages = 32, .maxPerStageDescriptorUniformBuffers = 64, .maxPerStageResources = 9 + 256 + 128 + 64 + 32 + 64, /* Software limits to keep VkCommandBuffer tracking sane. */ .maxDescriptorSetUniformBuffersDynamic = 16, .maxDescriptorSetStorageBuffersDynamic = 8, /* Software limit to keep VkCommandBuffer tracking sane. The HW supports * up to 2^9 vertex attributes. */ .maxVertexInputAttributes = 16, .maxVertexInputBindings = 16, /* MALI_ATTRIBUTE::offset is 32-bit. */ .maxVertexInputAttributeOffset = UINT32_MAX, /* MALI_ATTRIBUTE_BUFFER::stride is 32-bit. */ .maxVertexInputBindingStride = MESA_VK_MAX_VERTEX_BINDING_STRIDE, /* 32 vec4 varyings. */ .maxVertexOutputComponents = 128, /* Tesselation shaders not supported. */ .maxTessellationGenerationLevel = 0, .maxTessellationPatchSize = 0, .maxTessellationControlPerVertexInputComponents = 0, .maxTessellationControlPerVertexOutputComponents = 0, .maxTessellationControlPerPatchOutputComponents = 0, .maxTessellationControlTotalOutputComponents = 0, .maxTessellationEvaluationInputComponents = 0, .maxTessellationEvaluationOutputComponents = 0, /* Geometry shaders not supported. */ .maxGeometryShaderInvocations = 0, .maxGeometryInputComponents = 0, .maxGeometryOutputComponents = 0, .maxGeometryOutputVertices = 0, .maxGeometryTotalOutputComponents = 0, /* 32 vec4 varyings. */ .maxFragmentInputComponents = 128, /* 8 render targets. */ .maxFragmentOutputAttachments = 8, /* We don't support dual source blending yet. */ .maxFragmentDualSrcAttachments = 0, /* 8 render targets, 2^12 storage buffers and 2^8 storage images (see * above). */ .maxFragmentCombinedOutputResources = 8 + (1 << 12) + (1 << 8), /* MALI_LOCAL_STORAGE::wls_size_{base,scale} allows us to have up to * (7 << 30) bytes of shared memory, but we cap it to 32K as it doesn't * really make sense to expose this amount of memory, especially since * it's backed by global memory anyway. */ .maxComputeSharedMemorySize = 32768, /* Software limit to meet Vulkan 1.0 requirements. We split the * dispatch in several jobs if it's too big. */ .maxComputeWorkGroupCount = {65535, 65535, 65535}, /* We could also split into serveral jobs but this has many limitations. * As such we limit to the max threads per workgroup supported by the GPU. */ .maxComputeWorkGroupInvocations = device->kmod.props.max_threads_per_wg, .maxComputeWorkGroupSize = {device->kmod.props.max_threads_per_wg, device->kmod.props.max_threads_per_wg, device->kmod.props.max_threads_per_wg}, /* 8-bit subpixel precision. */ .subPixelPrecisionBits = 8, .subTexelPrecisionBits = 8, .mipmapPrecisionBits = 8, /* Software limit. */ .maxDrawIndexedIndexValue = UINT32_MAX, /* Make it one for now. */ .maxDrawIndirectCount = 1, .maxSamplerLodBias = (float)INT16_MAX / 256.0f, .maxSamplerAnisotropy = 16, .maxViewports = 1, /* Same as the framebuffer limit. */ .maxViewportDimensions = {(1 << 14), (1 << 14)}, /* Encoded in a 16-bit signed integer. */ .viewportBoundsRange = {INT16_MIN, INT16_MAX}, .viewportSubPixelBits = 0, /* Align on a page. */ .minMemoryMapAlignment = os_page_size, /* Some compressed texture formats require 128-byte alignment. */ .minTexelBufferOffsetAlignment = 64, /* Always aligned on a uniform slot (vec4). */ .minUniformBufferOffsetAlignment = 16, /* Lowered to global accesses, which happen at the 32-bit granularity. */ .minStorageBufferOffsetAlignment = 4, /* Signed 4-bit value. */ .minTexelOffset = -8, .maxTexelOffset = 7, .minTexelGatherOffset = -8, .maxTexelGatherOffset = 7, .minInterpolationOffset = -0.5, .maxInterpolationOffset = 0.5, .subPixelInterpolationOffsetBits = 8, .maxFramebufferWidth = (1 << 14), .maxFramebufferHeight = (1 << 14), .maxFramebufferLayers = 256, .framebufferColorSampleCounts = sample_counts, .framebufferDepthSampleCounts = sample_counts, .framebufferStencilSampleCounts = sample_counts, .framebufferNoAttachmentsSampleCounts = sample_counts, .maxColorAttachments = 8, .sampledImageColorSampleCounts = sample_counts, .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, .sampledImageDepthSampleCounts = sample_counts, .sampledImageStencilSampleCounts = sample_counts, .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, .timestampComputeAndGraphics = false, .timestampPeriod = 0, .maxClipDistances = 0, .maxCullDistances = 0, .maxCombinedClipAndCullDistances = 0, .discreteQueuePriorities = 2, .pointSizeRange = {0.125, 4095.9375}, .lineWidthRange = {0.0, 7.9921875}, .pointSizeGranularity = (1.0 / 16.0), .lineWidthGranularity = (1.0 / 128.0), .strictLines = false, .standardSampleLocations = true, .optimalBufferCopyOffsetAlignment = 64, .optimalBufferCopyRowPitchAlignment = 64, .nonCoherentAtomSize = 64, /* Vulkan 1.0 sparse properties */ .sparseResidencyNonResidentStrict = false, .sparseResidencyAlignedMipSize = false, .sparseResidencyStandard2DBlockShape = false, .sparseResidencyStandard2DMultisampleBlockShape = false, .sparseResidencyStandard3DBlockShape = false, /* Vulkan 1.1 properties */ /* XXX: 1.1 support */ .subgroupSize = 8, .subgroupSupportedStages = 0, .subgroupSupportedOperations = 0, .subgroupQuadOperationsInAllStages = false, .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES, .maxMultiviewViewCount = arch >= 10 ? 8 : 0, .maxMultiviewInstanceIndex = arch >= 10 ? UINT32_MAX : 0, .protectedNoFault = false, .maxPerSetDescriptors = UINT16_MAX, /* Our buffer size fields allow only this much */ .maxMemoryAllocationSize = UINT32_MAX, /* Vulkan 1.2 properties */ /* XXX: 1.2 support */ /* XXX: VK_KHR_depth_stencil_resolve */ .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, .independentResolveNone = true, .independentResolve = true, /* VK_KHR_driver_properties */ .driverID = VK_DRIVER_ID_MESA_PANVK, .conformanceVersion = (VkConformanceVersion){0, 0, 0, 0}, /* XXX: VK_KHR_shader_float_controls */ .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, .shaderSignedZeroInfNanPreserveFloat16 = true, .shaderSignedZeroInfNanPreserveFloat32 = true, .shaderSignedZeroInfNanPreserveFloat64 = false, .shaderDenormPreserveFloat16 = true, .shaderDenormPreserveFloat32 = true, .shaderDenormPreserveFloat64 = false, .shaderDenormFlushToZeroFloat16 = true, .shaderDenormFlushToZeroFloat32 = true, .shaderDenormFlushToZeroFloat64 = false, .shaderRoundingModeRTEFloat16 = true, .shaderRoundingModeRTEFloat32 = true, .shaderRoundingModeRTEFloat64 = false, .shaderRoundingModeRTZFloat16 = true, .shaderRoundingModeRTZFloat32 = true, .shaderRoundingModeRTZFloat64 = false, /* XXX: VK_EXT_descriptor_indexing */ .maxUpdateAfterBindDescriptorsInAllPools = 0, .shaderUniformBufferArrayNonUniformIndexingNative = false, .shaderSampledImageArrayNonUniformIndexingNative = false, .shaderStorageBufferArrayNonUniformIndexingNative = false, .shaderStorageImageArrayNonUniformIndexingNative = false, .shaderInputAttachmentArrayNonUniformIndexingNative = false, .robustBufferAccessUpdateAfterBind = false, .quadDivergentImplicitLod = false, .maxPerStageDescriptorUpdateAfterBindSamplers = 0, .maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0, .maxPerStageDescriptorUpdateAfterBindStorageBuffers = 0, .maxPerStageDescriptorUpdateAfterBindSampledImages = 0, .maxPerStageDescriptorUpdateAfterBindStorageImages = 0, .maxPerStageDescriptorUpdateAfterBindInputAttachments = 0, .maxPerStageUpdateAfterBindResources = 0, .maxDescriptorSetUpdateAfterBindSamplers = 0, .maxDescriptorSetUpdateAfterBindUniformBuffers = 0, .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0, .maxDescriptorSetUpdateAfterBindStorageBuffers = 0, .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = 0, .maxDescriptorSetUpdateAfterBindSampledImages = 0, .maxDescriptorSetUpdateAfterBindStorageImages = 0, .maxDescriptorSetUpdateAfterBindInputAttachments = 0, .filterMinmaxSingleComponentFormats = arch >= 10, .filterMinmaxImageComponentMapping = arch >= 10, .maxTimelineSemaphoreValueDifference = INT64_MAX, .framebufferIntegerColorSampleCounts = sample_counts, /* Vulkan 1.3 properties */ /* XXX: 1.3 support */ /* XXX: VK_EXT_subgroup_size_control */ .minSubgroupSize = 8, .maxSubgroupSize = 8, .maxComputeWorkgroupSubgroups = 48, .requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL, /* XXX: VK_EXT_inline_uniform_block */ .maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE, .maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, .maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS, .maxInlineUniformTotalSize = MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS * MAX_INLINE_UNIFORM_BLOCK_SIZE, /* XXX: VK_KHR_shader_integer_dot_product */ .integerDotProduct8BitUnsignedAccelerated = true, .integerDotProduct8BitSignedAccelerated = true, .integerDotProduct4x8BitPackedUnsignedAccelerated = true, .integerDotProduct4x8BitPackedSignedAccelerated = true, /* XXX: VK_EXT_texel_buffer_alignment */ .storageTexelBufferOffsetAlignmentBytes = 64, .storageTexelBufferOffsetSingleTexelAlignment = false, .uniformTexelBufferOffsetAlignmentBytes = 4, .uniformTexelBufferOffsetSingleTexelAlignment = true, /* XXX: VK_KHR_maintenance4 */ .maxBufferSize = 1 << 30, /* VK_EXT_custom_border_color */ .maxCustomBorderColorSamplers = 32768, /* VK_EXT_graphics_pipeline_library */ .graphicsPipelineLibraryFastLinking = true, .graphicsPipelineLibraryIndependentInterpolationDecoration = true, /* VK_EXT_pipeline_robustness */ .defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT, .defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT, .defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT, .defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT, /* VK_EXT_provoking_vertex */ .provokingVertexModePerPipeline = false, .transformFeedbackPreservesTriangleFanProvokingVertex = false, /* VK_KHR_vertex_attribute_divisor */ /* We will have to restrict this a bit for multiview */ .maxVertexAttribDivisor = UINT32_MAX, .supportsNonZeroFirstInstance = false, /* VK_KHR_push_descriptor */ .maxPushDescriptors = MAX_PUSH_DESCRIPTORS, }; snprintf(properties->deviceName, sizeof(properties->deviceName), "%s", device->name); memcpy(properties->pipelineCacheUUID, device->cache_uuid, VK_UUID_SIZE); const struct { uint16_t vendor_id; uint32_t device_id; uint8_t pad[8]; } dev_uuid = { .vendor_id = ARM_VENDOR_ID, .device_id = device->model->gpu_id, }; STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE); memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE); STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE); memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE); snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "panvk"); snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1); /* VK_EXT_physical_device_drm */ if (device->drm.primary_rdev) { properties->drmHasPrimary = true; properties->drmPrimaryMajor = major(device->drm.primary_rdev); properties->drmPrimaryMinor = minor(device->drm.primary_rdev); } if (device->drm.render_rdev) { properties->drmHasRender = true; properties->drmRenderMajor = major(device->drm.render_rdev); properties->drmRenderMinor = minor(device->drm.render_rdev); } /* VK_EXT_shader_module_identifier */ STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); memcpy(properties->shaderModuleIdentifierAlgorithmUUID, vk_shaderModuleIdentifierAlgorithmUUID, sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); } void panvk_physical_device_finish(struct panvk_physical_device *device) { panvk_wsi_finish(device); pan_kmod_dev_destroy(device->kmod.dev); vk_physical_device_finish(&device->vk); } VkResult panvk_physical_device_init(struct panvk_physical_device *device, struct panvk_instance *instance, drmDevicePtr drm_device) { VkResult result; result = create_kmod_dev(device, instance, drm_device); if (result != VK_SUCCESS) return result; pan_kmod_dev_query_props(device->kmod.dev, &device->kmod.props); device->model = panfrost_get_model(device->kmod.props.gpu_prod_id, device->kmod.props.gpu_variant); unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); switch (arch) { case 6: case 7: if (!getenv("PAN_I_WANT_A_BROKEN_VULKAN_DRIVER")) { result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "WARNING: panvk is not well-tested on v%d, " "pass PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 " "if you know what you're doing.", arch); goto fail; } break; case 10: break; default: result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "%s not supported", device->model->name); goto fail; } result = get_drm_device_ids(device, instance, drm_device); if (result != VK_SUCCESS) goto fail; device->formats.all = panfrost_format_table(arch); device->formats.blendable = panfrost_blendable_format_table(arch); memset(device->name, 0, sizeof(device->name)); sprintf(device->name, "%s", device->model->name); if (get_cache_uuid(device->kmod.props.gpu_prod_id, device->cache_uuid)) { result = panvk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID"); goto fail; } result = get_device_sync_types(device, instance); if (result != VK_SUCCESS) goto fail; vk_warn_non_conformant_implementation("panvk"); struct vk_device_extension_table supported_extensions; get_device_extensions(device, &supported_extensions); struct vk_features supported_features; get_features(device, &supported_features); struct vk_properties properties; get_device_properties(instance, device, &properties); struct vk_physical_device_dispatch_table dispatch_table; vk_physical_device_dispatch_table_from_entrypoints( &dispatch_table, &panvk_physical_device_entrypoints, true); vk_physical_device_dispatch_table_from_entrypoints( &dispatch_table, &wsi_physical_device_entrypoints, false); result = vk_physical_device_init(&device->vk, &instance->vk, &supported_extensions, &supported_features, &properties, &dispatch_table); if (result != VK_SUCCESS) goto fail; device->vk.supported_sync_types = device->sync_types; result = panvk_wsi_init(device); if (result != VK_SUCCESS) goto fail; return VK_SUCCESS; fail: if (device->vk.instance) vk_physical_device_finish(&device->vk); pan_kmod_dev_destroy(device->kmod.dev); return result; } static const VkQueueFamilyProperties panvk_queue_family_properties = { .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, .queueCount = 1, .timestampValidBits = 0, .minImageTransferGranularity = {1, 1, 1}, }; static void panvk_fill_global_priority(const struct panvk_physical_device *physical_device, VkQueueFamilyGlobalPriorityPropertiesKHR *prio) { enum pan_kmod_group_allow_priority_flags prio_mask = physical_device->kmod.props.allowed_group_priorities_mask; uint32_t prio_idx = 0; if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW) prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR; if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM) prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR; if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH) prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR; if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME) prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR; prio->priorityCount = prio_idx; } VKAPI_ATTR void VKAPI_CALL panvk_GetPhysicalDeviceQueueFamilyProperties2( VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount, VkQueueFamilyProperties2 *pQueueFamilyProperties) { VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties, pQueueFamilyPropertyCount); vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) { p->queueFamilyProperties = panvk_queue_family_properties; VkQueueFamilyGlobalPriorityPropertiesKHR *prio = vk_find_struct(p->pNext, QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR); if (prio) panvk_fill_global_priority(physical_device, prio); } } static uint64_t get_system_heap_size() { struct sysinfo info; sysinfo(&info); uint64_t total_ram = (uint64_t)info.totalram * info.mem_unit; /* We don't want to burn too much ram with the GPU. If the user has 4GiB * or less, we use at most half. If they have more than 4GiB, we use 3/4. */ uint64_t available_ram; if (total_ram <= 4ull * 1024 * 1024 * 1024) available_ram = total_ram / 2; else available_ram = total_ram * 3 / 4; return available_ram; } VKAPI_ATTR void VKAPI_CALL panvk_GetPhysicalDeviceMemoryProperties2( VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) { pMemoryProperties->memoryProperties = (VkPhysicalDeviceMemoryProperties){ .memoryHeapCount = 1, .memoryHeaps[0].size = get_system_heap_size(), .memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, .memoryTypeCount = 1, .memoryTypes[0].propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, .memoryTypes[0].heapIndex = 0, }; } #define DEVICE_PER_ARCH_FUNCS(_ver) \ VkResult panvk_v##_ver##_create_device( \ struct panvk_physical_device *physical_device, \ const VkDeviceCreateInfo *pCreateInfo, \ const VkAllocationCallbacks *pAllocator, VkDevice *pDevice); \ \ void panvk_v##_ver##_destroy_device( \ struct panvk_device *device, const VkAllocationCallbacks *pAllocator) DEVICE_PER_ARCH_FUNCS(6); DEVICE_PER_ARCH_FUNCS(7); DEVICE_PER_ARCH_FUNCS(10); VKAPI_ATTR VkResult VKAPI_CALL panvk_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDevice *pDevice) { VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id); VkResult result = VK_ERROR_INITIALIZATION_FAILED; panvk_arch_dispatch_ret(arch, create_device, result, physical_device, pCreateInfo, pAllocator, pDevice); return result; } VKAPI_ATTR void VKAPI_CALL panvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) { VK_FROM_HANDLE(panvk_device, device, _device); struct panvk_physical_device *physical_device = to_panvk_physical_device(device->vk.physical); unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id); panvk_arch_dispatch(arch, destroy_device, device, pAllocator); } static bool format_is_supported(struct panvk_physical_device *physical_device, const struct panfrost_format fmt, enum pipe_format pfmt) { /* If the format ID is zero, it's not supported. */ if (!fmt.hw) return false; /* Compressed formats (ID < 32) are optional. We need to check against * the supported formats reported by the GPU. */ if (util_format_is_compressed(pfmt)) { uint32_t supported_compr_fmts = panfrost_query_compressed_formats(&physical_device->kmod.props); if (!(BITFIELD_BIT(fmt.texfeat_bit) & supported_compr_fmts)) return false; } return true; } static void get_format_properties(struct panvk_physical_device *physical_device, VkFormat format, VkFormatProperties *out_properties) { VkFormatFeatureFlags tex = 0, buffer = 0; enum pipe_format pfmt = vk_format_to_pipe_format(format); unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id); if (pfmt == PIPE_FORMAT_NONE) goto end; const struct panfrost_format fmt = physical_device->formats.all[pfmt]; if (!format_is_supported(physical_device, fmt, pfmt)) goto end; /* 3byte formats are not supported by the buffer <-> image copy helpers. */ if (util_format_get_blocksize(pfmt) == 3) goto end; /* Reject sRGB formats (see * https://github.com/KhronosGroup/Vulkan-Docs/issues/2214). */ if ((fmt.bind & PAN_BIND_VERTEX_BUFFER) && !util_format_is_srgb(pfmt)) buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; if (fmt.bind & PAN_BIND_SAMPLER_VIEW) { tex |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT | VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT; if (arch >= 10) tex |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT; /* Integer formats only support nearest filtering */ if (!util_format_is_scaled(pfmt) && !util_format_is_pure_integer(pfmt)) tex |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; if (!util_format_is_depth_or_stencil(pfmt)) buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; tex |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; } if (fmt.bind & PAN_BIND_RENDER_TARGET) { tex |= VK_FORMAT_FEATURE_BLIT_DST_BIT; tex |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; /* SNORM rendering isn't working yet (nir_lower_blend bugs), disable for * now. * * XXX: Enable once fixed. */ if (!util_format_is_snorm(pfmt)) { tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; } if (!util_format_is_depth_and_stencil(pfmt)) buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; } if (pfmt == PIPE_FORMAT_R32_UINT || pfmt == PIPE_FORMAT_R32_SINT) { buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; tex |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; } if (fmt.bind & PAN_BIND_DEPTH_STENCIL) tex |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; end: out_properties->linearTilingFeatures = tex; out_properties->optimalTilingFeatures = tex; out_properties->bufferFeatures = buffer; } VKAPI_ATTR void VKAPI_CALL panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties2 *pFormatProperties) { VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); get_format_properties(physical_device, format, &pFormatProperties->formatProperties); VkDrmFormatModifierPropertiesListEXT *list = vk_find_struct( pFormatProperties->pNext, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT); if (list && pFormatProperties->formatProperties.linearTilingFeatures) { VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierPropertiesEXT, out, list->pDrmFormatModifierProperties, &list->drmFormatModifierCount); vk_outarray_append_typed(VkDrmFormatModifierPropertiesEXT, &out, mod_props) { mod_props->drmFormatModifier = DRM_FORMAT_MOD_LINEAR; mod_props->drmFormatModifierPlaneCount = 1; mod_props->drmFormatModifierTilingFeatures = pFormatProperties->formatProperties.linearTilingFeatures; } } } static VkResult get_image_format_properties(struct panvk_physical_device *physical_device, const VkPhysicalDeviceImageFormatInfo2 *info, VkImageFormatProperties *pImageFormatProperties, VkFormatFeatureFlags *p_feature_flags) { VkFormatProperties format_props; VkFormatFeatureFlags format_feature_flags; VkExtent3D maxExtent; uint32_t maxMipLevels; uint32_t maxArraySize; VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; enum pipe_format format = vk_format_to_pipe_format(info->format); get_format_properties(physical_device, info->format, &format_props); switch (info->tiling) { case VK_IMAGE_TILING_LINEAR: format_feature_flags = format_props.linearTilingFeatures; break; case VK_IMAGE_TILING_OPTIMAL: format_feature_flags = format_props.optimalTilingFeatures; break; case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT: { const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *mod_info = vk_find_struct_const( info->pNext, PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT); if (mod_info->drmFormatModifier != DRM_FORMAT_MOD_LINEAR) goto unsupported; /* The only difference between optimal and linear is currently whether * depth/stencil attachments are allowed on depth/stencil formats. * There's no reason to allow importing depth/stencil textures, so just * disallow it and then this annoying edge case goes away. */ if (util_format_is_depth_or_stencil(format)) goto unsupported; assert(format_props.optimalTilingFeatures == format_props.linearTilingFeatures); format_feature_flags = format_props.linearTilingFeatures; break; } default: unreachable("bad VkPhysicalDeviceImageFormatInfo2"); } if (format_feature_flags == 0) goto unsupported; switch (info->type) { default: unreachable("bad vkimage type"); case VK_IMAGE_TYPE_1D: maxExtent.width = 1 << 16; maxExtent.height = 1; maxExtent.depth = 1; maxMipLevels = 17; /* log2(maxWidth) + 1 */ maxArraySize = 1 << 16; break; case VK_IMAGE_TYPE_2D: maxExtent.width = 1 << 16; maxExtent.height = 1 << 16; maxExtent.depth = 1; maxMipLevels = 17; /* log2(maxWidth) + 1 */ maxArraySize = 1 << 16; break; case VK_IMAGE_TYPE_3D: maxExtent.width = 1 << 16; maxExtent.height = 1 << 16; maxExtent.depth = 1 << 16; maxMipLevels = 17; /* log2(maxWidth) + 1 */ maxArraySize = 1; break; } if (info->tiling == VK_IMAGE_TILING_OPTIMAL && info->type == VK_IMAGE_TYPE_2D && (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { sampleCounts |= VK_SAMPLE_COUNT_4_BIT; } /* From the Vulkan 1.2.199 spec: * * "VK_IMAGE_CREATE_EXTENDED_USAGE_BIT specifies that the image can be * created with usage flags that are not supported for the format the * image is created with but are supported for at least one format a * VkImageView created from the image can have." * * If VK_IMAGE_CREATE_EXTENDED_USAGE_BIT is set, views can be created with * different usage than the image so we can't always filter on usage. * There is one exception to this below for storage. */ if (!(info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)) { if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { goto unsupported; } } if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) { if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { goto unsupported; } } if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT || ((info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) && !vk_format_is_depth_or_stencil(info->format))) { if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { goto unsupported; } } if ((info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) || ((info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) && vk_format_is_depth_or_stencil(info->format))) { if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { goto unsupported; } } } *pImageFormatProperties = (VkImageFormatProperties){ .maxExtent = maxExtent, .maxMipLevels = maxMipLevels, .maxArrayLayers = maxArraySize, .sampleCounts = sampleCounts, /* We need to limit images to 32-bit range, because the maximum * slice-stride is 32-bit wide, meaning that if we allocate an image * with the maximum width and height, we end up overflowing it. * * We get around this by simply limiting the maximum resource size. */ .maxResourceSize = UINT32_MAX, }; if (p_feature_flags) *p_feature_flags = format_feature_flags; return VK_SUCCESS; unsupported: *pImageFormatProperties = (VkImageFormatProperties){ .maxExtent = {0, 0, 0}, .maxMipLevels = 0, .maxArrayLayers = 0, .sampleCounts = 0, .maxResourceSize = 0, }; return VK_ERROR_FORMAT_NOT_SUPPORTED; } static VkResult panvk_get_external_image_format_properties( const struct panvk_physical_device *physical_device, const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo, VkExternalMemoryHandleTypeFlagBits handleType, VkExternalMemoryProperties *external_properties) { const VkExternalMemoryHandleTypeFlags supported_handle_types = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; if (!(handleType & supported_handle_types)) { return panvk_errorf(physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED, "VkExternalMemoryTypeFlagBits(0x%x) unsupported", handleType); } /* pan_image_layout_init requires 2D for explicit layout */ if (pImageFormatInfo->type != VK_IMAGE_TYPE_2D) { return panvk_errorf( physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED, "VkExternalMemoryTypeFlagBits(0x%x) unsupported for VkImageType(%d)", handleType, pImageFormatInfo->type); } /* There is no restriction on opaque fds. But for dma-bufs, we want to * make sure vkGetImageSubresourceLayout can be used to query the image * layout of an exported dma-buf. We also want to make sure * VkImageDrmFormatModifierExplicitCreateInfoEXT can be used to specify the * image layout of an imported dma-buf. These add restrictions on the * image tilings. */ VkExternalMemoryFeatureFlags features = 0; if (handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || pImageFormatInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { features |= VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; } else if (pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR) { features |= VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT; } if (!features) { return panvk_errorf( physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED, "VkExternalMemoryTypeFlagBits(0x%x) unsupported for VkImageTiling(%d)", handleType, pImageFormatInfo->tiling); } *external_properties = (VkExternalMemoryProperties){ .externalMemoryFeatures = features, .exportFromImportedHandleTypes = supported_handle_types, .compatibleHandleTypes = supported_handle_types, }; return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL panvk_GetPhysicalDeviceImageFormatProperties2( VkPhysicalDevice physicalDevice, const VkPhysicalDeviceImageFormatInfo2 *base_info, VkImageFormatProperties2 *base_props) { VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; const VkPhysicalDeviceImageViewImageFormatInfoEXT *image_view_info = NULL; VkExternalImageFormatProperties *external_props = NULL; VkFilterCubicImageViewImageFormatPropertiesEXT *cubic_props = NULL; VkFormatFeatureFlags format_feature_flags; VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL; VkResult result; result = get_image_format_properties(physical_device, base_info, &base_props->imageFormatProperties, &format_feature_flags); if (result != VK_SUCCESS) return result; /* Extract input structs */ vk_foreach_struct_const(s, base_info->pNext) { switch (s->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: external_info = (const void *)s; break; case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_IMAGE_FORMAT_INFO_EXT: image_view_info = (const void *)s; break; default: break; } } /* Extract output structs */ vk_foreach_struct(s, base_props->pNext) { switch (s->sType) { case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: external_props = (void *)s; break; case VK_STRUCTURE_TYPE_FILTER_CUBIC_IMAGE_VIEW_IMAGE_FORMAT_PROPERTIES_EXT: cubic_props = (void *)s; break; case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES: ycbcr_props = (void *)s; break; default: break; } } /* From the Vulkan 1.0.42 spec: * * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will * behave as if VkPhysicalDeviceExternalImageFormatInfo was not * present and VkExternalImageFormatProperties will be ignored. */ if (external_info && external_info->handleType != 0) { VkExternalImageFormatProperties fallback_external_props; if (!external_props) { memset(&fallback_external_props, 0, sizeof(fallback_external_props)); external_props = &fallback_external_props; } result = panvk_get_external_image_format_properties( physical_device, base_info, external_info->handleType, &external_props->externalMemoryProperties); if (result != VK_SUCCESS) goto fail; /* pan_image_layout_init requirements for explicit layout */ base_props->imageFormatProperties.maxMipLevels = 1; base_props->imageFormatProperties.maxArrayLayers = 1; base_props->imageFormatProperties.sampleCounts = 1; } if (cubic_props) { /* note: blob only allows cubic filtering for 2D and 2D array views * its likely we can enable it for 1D and CUBE, needs testing however */ if ((image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D || image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D_ARRAY) && (format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT)) { cubic_props->filterCubic = true; cubic_props->filterCubicMinmax = true; } else { cubic_props->filterCubic = false; cubic_props->filterCubicMinmax = false; } } if (ycbcr_props) ycbcr_props->combinedImageSamplerDescriptorCount = 1; return VK_SUCCESS; fail: if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) { /* From the Vulkan 1.0.42 spec: * * If the combination of parameters to * vkGetPhysicalDeviceImageFormatProperties2 is not supported by * the implementation for use in vkCreateImage, then all members of * imageFormatProperties will be filled with zero. */ base_props->imageFormatProperties = (VkImageFormatProperties){}; } return result; } VKAPI_ATTR void VKAPI_CALL panvk_GetPhysicalDeviceSparseImageFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t *pNumProperties, VkSparseImageFormatProperties *pProperties) { /* Sparse images are not yet supported. */ *pNumProperties = 0; } VKAPI_ATTR void VKAPI_CALL panvk_GetPhysicalDeviceSparseImageFormatProperties2( VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties) { /* Sparse images are not yet supported. */ *pPropertyCount = 0; } VKAPI_ATTR void VKAPI_CALL panvk_GetPhysicalDeviceExternalBufferProperties( VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, VkExternalBufferProperties *pExternalBufferProperties) { const VkExternalMemoryHandleTypeFlags supported_handle_types = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; /* From the Vulkan 1.3.298 spec: * * compatibleHandleTypes must include at least handleType. */ VkExternalMemoryHandleTypeFlags handle_types = pExternalBufferInfo->handleType; VkExternalMemoryFeatureFlags features = 0; if (pExternalBufferInfo->handleType & supported_handle_types) { handle_types |= supported_handle_types; features |= VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; } pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryProperties){ .externalMemoryFeatures = features, .exportFromImportedHandleTypes = handle_types, .compatibleHandleTypes = handle_types, }; }