1 /* 2 * Copyright 2015 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef GrVkCaps_DEFINED 9 #define GrVkCaps_DEFINED 10 11 #include "include/gpu/vk/GrVkTypes.h" 12 #include "include/private/base/SkTDArray.h" 13 #include "src/gpu/ganesh/GrCaps.h" 14 15 class GrVkRenderTarget; 16 17 namespace skgpu { 18 class VulkanExtensions; 19 struct VulkanInterface; 20 } 21 22 /** 23 * Stores some capabilities of a Vk backend. 24 */ 25 class GrVkCaps : public GrCaps { 26 public: 27 /** 28 * Creates a GrVkCaps that is set such that nothing is supported. The init function should 29 * be called to fill out the caps. 30 */ 31 GrVkCaps(const GrContextOptions& contextOptions, 32 const skgpu::VulkanInterface* vkInterface, 33 VkPhysicalDevice device, 34 const VkPhysicalDeviceFeatures2& features, 35 uint32_t instanceVersion, 36 uint32_t physicalDeviceVersion, 37 const skgpu::VulkanExtensions& extensions, 38 GrProtected isProtected = GrProtected::kNo); 39 40 bool isFormatSRGB(const GrBackendFormat&) const override; 41 42 bool isFormatTexturable(const GrBackendFormat&, GrTextureType) const override; 43 bool isVkFormatTexturable(VkFormat) const; 44 isFormatCopyable(const GrBackendFormat &)45 bool isFormatCopyable(const GrBackendFormat&) const override { return true; } 46 47 bool isFormatAsColorTypeRenderable(GrColorType ct, 48 const GrBackendFormat& format, 49 int sampleCount = 1) const override; 50 bool isFormatRenderable(const GrBackendFormat& format, int sampleCount) const override; 51 bool isFormatRenderable(VkFormat, int sampleCount) const; 52 53 int getRenderTargetSampleCount(int requestedCount, const GrBackendFormat&) const override; 54 int getRenderTargetSampleCount(int requestedCount, VkFormat) const; 55 56 int maxRenderTargetSampleCount(const GrBackendFormat&) const override; 57 int maxRenderTargetSampleCount(VkFormat format) const; 58 59 SupportedWrite supportedWritePixelsColorType(GrColorType surfaceColorType, 60 const GrBackendFormat& surfaceFormat, 61 GrColorType srcColorType) const override; 62 63 SurfaceReadPixelsSupport surfaceSupportsReadPixels(const GrSurface*) const override; 64 isVkFormatTexturableLinearly(VkFormat format)65 bool isVkFormatTexturableLinearly(VkFormat format) const { 66 return SkToBool(FormatInfo::kTexturable_Flag & this->getFormatInfo(format).fLinearFlags); 67 } 68 formatCanBeDstofBlit(VkFormat format,bool linearTiled)69 bool formatCanBeDstofBlit(VkFormat format, bool linearTiled) const { 70 const FormatInfo& info = this->getFormatInfo(format); 71 const uint16_t& flags = linearTiled ? info.fLinearFlags : info.fOptimalFlags; 72 return SkToBool(FormatInfo::kBlitDst_Flag & flags); 73 } 74 formatCanBeSrcofBlit(VkFormat format,bool linearTiled)75 bool formatCanBeSrcofBlit(VkFormat format, bool linearTiled) const { 76 const FormatInfo& info = this->getFormatInfo(format); 77 const uint16_t& flags = linearTiled ? info.fLinearFlags : info.fOptimalFlags; 78 return SkToBool(FormatInfo::kBlitSrc_Flag & flags); 79 } 80 81 // Gets the GrColorType that should be used to transfer data in/out of a transfer buffer to 82 // write/read data when using a VkFormat with a specified color type. 83 GrColorType transferColorType(VkFormat, GrColorType surfaceColorType) const; 84 85 // On some GPUs (Windows Nvidia and Imagination) calls to QueueWaitIdle return before actually 86 // signalling the fences on the command buffers even though they have completed. This causes 87 // issues when then deleting the command buffers. Therefore we additionally will call 88 // vkWaitForFences on each outstanding command buffer to make sure the driver signals the fence. mustSyncCommandBuffersWithQueue()89 bool mustSyncCommandBuffersWithQueue() const { return fMustSyncCommandBuffersWithQueue; } 90 91 // Returns true if we should always make dedicated allocations for VkImages. shouldAlwaysUseDedicatedImageMemory()92 bool shouldAlwaysUseDedicatedImageMemory() const { 93 return fShouldAlwaysUseDedicatedImageMemory; 94 } 95 96 // Always use a transfer buffer instead of vkCmdUpdateBuffer to upload data to a VkBuffer. avoidUpdateBuffers()97 bool avoidUpdateBuffers() const { return fAvoidUpdateBuffers; } 98 99 /** 100 * Returns both a supported and most preferred stencil format to use in draws. 101 */ preferredStencilFormat()102 VkFormat preferredStencilFormat() const { return fPreferredStencilFormat; } 103 104 // Returns total number of bits used by stencil + depth + padding GetStencilFormatTotalBitCount(VkFormat format)105 static int GetStencilFormatTotalBitCount(VkFormat format) { 106 switch (format) { 107 case VK_FORMAT_S8_UINT: 108 return 8; 109 case VK_FORMAT_D24_UNORM_S8_UINT: 110 return 32; 111 case VK_FORMAT_D32_SFLOAT_S8_UINT: 112 // can optionally have 24 unused bits at the end so we assume the total bits is 64. 113 return 64; 114 default: 115 SkASSERT(false); 116 return 0; 117 } 118 } 119 120 // Returns whether the device supports VK_KHR_Swapchain. Internally Skia never uses any of the 121 // swapchain functions, but we may need to transition to and from the 122 // VK_IMAGE_LAYOUT_PRESENT_SRC_KHR image layout, so we must know whether that layout is 123 // supported. supportsSwapchain()124 bool supportsSwapchain() const { return fSupportsSwapchain; } 125 126 // Returns whether the device supports the ability to extend VkPhysicalDeviceProperties struct. supportsPhysicalDeviceProperties2()127 bool supportsPhysicalDeviceProperties2() const { return fSupportsPhysicalDeviceProperties2; } 128 // Returns whether the device supports the ability to extend VkMemoryRequirements struct. supportsMemoryRequirements2()129 bool supportsMemoryRequirements2() const { return fSupportsMemoryRequirements2; } 130 131 // Returns whether the device supports the ability to extend the vkBindMemory call. supportsBindMemory2()132 bool supportsBindMemory2() const { return fSupportsBindMemory2; } 133 134 // Returns whether or not the device suports the various API maintenance fixes to Vulkan 1.0. In 135 // Vulkan 1.1 all these maintenance are part of the core spec. supportsMaintenance1()136 bool supportsMaintenance1() const { return fSupportsMaintenance1; } supportsMaintenance2()137 bool supportsMaintenance2() const { return fSupportsMaintenance2; } supportsMaintenance3()138 bool supportsMaintenance3() const { return fSupportsMaintenance3; } 139 140 // Returns true if the device supports passing in a flag to say we are using dedicated GPU when 141 // allocating memory. For some devices this allows them to return more optimized memory knowning 142 // they will never need to suballocate amonst multiple objects. supportsDedicatedAllocation()143 bool supportsDedicatedAllocation() const { return fSupportsDedicatedAllocation; } 144 145 // Returns true if the device supports importing of external memory into Vulkan memory. supportsExternalMemory()146 bool supportsExternalMemory() const { return fSupportsExternalMemory; } 147 // Returns true if the device supports importing Android hardware buffers into Vulkan memory. supportsAndroidHWBExternalMemory()148 bool supportsAndroidHWBExternalMemory() const { return fSupportsAndroidHWBExternalMemory; } 149 150 // Returns true if it supports ycbcr conversion for samplers supportsYcbcrConversion()151 bool supportsYcbcrConversion() const { return fSupportsYcbcrConversion; } 152 153 // Returns the number of descriptor slots used by immutable ycbcr VkImages. 154 // 155 // TODO: We should update this to return a count for a specific format or external format. We 156 // can use vkGetPhysicalDeviceImageFormatProperties2 with a 157 // VkSamplerYcbcrConversionImageFormatProperties to query this. However, right now that call 158 // does not support external android formats which is where the majority of ycbcr images are 159 // coming from. So for now we stay safe and always return 3 here which is the max value that the 160 // count could be for any format. ycbcrCombinedImageSamplerDescriptorCount()161 uint32_t ycbcrCombinedImageSamplerDescriptorCount() const { 162 return 3; 163 } 164 165 // Returns true if the device supports protected memory. supportsProtectedMemory()166 bool supportsProtectedMemory() const { return fSupportsProtectedMemory; } 167 168 // Returns true if the VK_EXT_image_drm_format_modifier is enabled. supportsDRMFormatModifiers()169 bool supportsDRMFormatModifiers() const { return fSupportsDRMFormatModifiers; } 170 171 // Returns whether we prefer to record draws directly into a primary command buffer. preferPrimaryOverSecondaryCommandBuffers()172 bool preferPrimaryOverSecondaryCommandBuffers() const { 173 return fPreferPrimaryOverSecondaryCommandBuffers; 174 } 175 maxPerPoolCachedSecondaryCommandBuffers()176 int maxPerPoolCachedSecondaryCommandBuffers() const { 177 return fMaxPerPoolCachedSecondaryCommandBuffers; 178 } 179 maxInputAttachmentDescriptors()180 uint32_t maxInputAttachmentDescriptors() const { return fMaxInputAttachmentDescriptors; } 181 maxSamplerAnisotropy()182 float maxSamplerAnisotropy() const { return fMaxSamplerAnisotropy; } 183 mustInvalidatePrimaryCmdBufferStateAfterClearAttachments()184 bool mustInvalidatePrimaryCmdBufferStateAfterClearAttachments() const { 185 return fMustInvalidatePrimaryCmdBufferStateAfterClearAttachments; 186 } 187 188 // For host visible allocations, this returns true if we require that they are coherent. This 189 // is used to work around bugs for devices that don't handle non-coherent memory correctly. mustUseCoherentHostVisibleMemory()190 bool mustUseCoherentHostVisibleMemory() const { return fMustUseCoherentHostVisibleMemory; } 191 192 // Returns whether a pure GPU accessible buffer is more performant to read than a buffer that is 193 // also host visible. If so then in some cases we may prefer the cost of doing a copy to the 194 // buffer. This typically would only be the case for buffers that are written once and read 195 // many times on the gpu. gpuOnlyBuffersMorePerformant()196 bool gpuOnlyBuffersMorePerformant() const { return fGpuOnlyBuffersMorePerformant; } 197 198 // For our CPU write and GPU read buffers (vertex, uniform, etc.), should we keep these buffers 199 // persistently mapped. In general the answer will be yes. The main case we don't do this is 200 // when using special memory that is DEVICE_LOCAL and HOST_VISIBLE on discrete GPUs. shouldPersistentlyMapCpuToGpuBuffers()201 bool shouldPersistentlyMapCpuToGpuBuffers() const { 202 return fShouldPersistentlyMapCpuToGpuBuffers; 203 } 204 205 // The max draw count that can be passed into indirect draw calls. maxDrawIndirectDrawCount()206 uint32_t maxDrawIndirectDrawCount() const { return fMaxDrawIndirectDrawCount; } 207 208 /** 209 * Helpers used by canCopySurface. In all cases if the SampleCnt parameter is zero that means 210 * the surface is not a render target, otherwise it is the number of samples in the render 211 * target. 212 */ 213 bool canCopyImage(VkFormat dstFormat, 214 int dstSampleCnt, 215 bool dstHasYcbcr, 216 VkFormat srcFormat, 217 int srcSamplecnt, 218 bool srcHasYcbcr) const; 219 220 bool canCopyAsBlit(VkFormat dstConfig, 221 int dstSampleCnt, 222 bool dstIsLinear, 223 bool dstHasYcbcr, 224 VkFormat srcConfig, 225 int srcSampleCnt, 226 bool srcIsLinear, 227 bool srcHasYcbcr) const; 228 229 bool canCopyAsResolve(VkFormat dstConfig, 230 int dstSampleCnt, 231 bool dstHasYcbcr, 232 VkFormat srcConfig, 233 int srcSamplecnt, 234 bool srcHasYcbcr) const; 235 236 GrBackendFormat getBackendFormatFromCompressionType(SkImage::CompressionType) const override; 237 getFormatFromColorType(GrColorType colorType)238 VkFormat getFormatFromColorType(GrColorType colorType) const { 239 int idx = static_cast<int>(colorType); 240 return fColorTypeToFormatTable[idx]; 241 } 242 243 skgpu::Swizzle getWriteSwizzle(const GrBackendFormat&, GrColorType) const override; 244 245 uint64_t computeFormatKey(const GrBackendFormat&) const override; 246 247 int getFragmentUniformBinding() const; 248 int getFragmentUniformSet() const; 249 250 void addExtraSamplerKey(skgpu::KeyBuilder*, 251 GrSamplerState, 252 const GrBackendFormat&) const override; 253 254 GrProgramDesc makeDesc(GrRenderTarget*, 255 const GrProgramInfo&, 256 ProgramDescOverrideFlags) const override; 257 258 GrInternalSurfaceFlags getExtraSurfaceFlagsForDeferredRT() const override; 259 260 VkShaderStageFlags getPushConstantStageFlags() const; 261 mustLoadFullImageWithDiscardableMSAA()262 bool mustLoadFullImageWithDiscardableMSAA() const { 263 return fMustLoadFullImageWithDiscardableMSAA; 264 } supportsDiscardableMSAAForDMSAA()265 bool supportsDiscardableMSAAForDMSAA() const { return fSupportsDiscardableMSAAForDMSAA; } 266 bool renderTargetSupportsDiscardableMSAA(const GrVkRenderTarget*) const; 267 bool programInfoWillUseDiscardableMSAA(const GrProgramInfo&) const; 268 dmsaaResolveCanBeUsedAsTextureInSameRenderPass()269 bool dmsaaResolveCanBeUsedAsTextureInSameRenderPass() const override { return false; } 270 supportsMemorylessAttachments()271 bool supportsMemorylessAttachments() const { return fSupportsMemorylessAttachments; } 272 273 #if GR_TEST_UTILS 274 std::vector<GrTest::TestFormatColorTypeCombination> getTestingCombinations() const override; 275 #endif 276 277 private: 278 enum VkVendor { 279 kAMD_VkVendor = 4098, 280 kARM_VkVendor = 5045, 281 kImagination_VkVendor = 4112, 282 kIntel_VkVendor = 32902, 283 kNvidia_VkVendor = 4318, 284 kQualcomm_VkVendor = 20803, 285 }; 286 287 enum class IntelGPUType { 288 // 9th gen 289 kSkyLake, 290 291 // 11th gen 292 kIceLake, 293 294 // 12th gen 295 kRocketLake, 296 kTigerLake, 297 kAlderLake, 298 299 kOther 300 }; 301 302 static IntelGPUType GetIntelGPUType(uint32_t deviceID); GetIntelGen(IntelGPUType type)303 static int GetIntelGen(IntelGPUType type) { 304 switch (type) { 305 case IntelGPUType::kSkyLake: 306 return 9; 307 case IntelGPUType::kIceLake: 308 return 11; 309 case IntelGPUType::kRocketLake: // fall through 310 case IntelGPUType::kTigerLake: // fall through 311 case IntelGPUType::kAlderLake: 312 return 12; 313 case IntelGPUType::kOther: 314 // For now all our workaround checks are in the form of "if gen > some_value". So 315 // we can return 0 for kOther which means we won't put in the new workaround for 316 // older gens which is fine. If we stay on top of adding support for new gen 317 // intel devices we shouldn't hit cases where we'd need to change this pattern. 318 return 0; 319 } 320 SkUNREACHABLE; 321 } 322 323 void init(const GrContextOptions& contextOptions, 324 const skgpu::VulkanInterface* vkInterface, 325 VkPhysicalDevice device, 326 const VkPhysicalDeviceFeatures2&, 327 uint32_t physicalDeviceVersion, 328 const skgpu::VulkanExtensions&, 329 GrProtected isProtected); 330 void initGrCaps(const skgpu::VulkanInterface* vkInterface, 331 VkPhysicalDevice physDev, 332 const VkPhysicalDeviceProperties&, 333 const VkPhysicalDeviceMemoryProperties&, 334 const VkPhysicalDeviceFeatures2&, 335 const skgpu::VulkanExtensions&); 336 void initShaderCaps(const VkPhysicalDeviceProperties&, const VkPhysicalDeviceFeatures2&); 337 338 void initFormatTable(const GrContextOptions&, 339 const skgpu::VulkanInterface*, 340 VkPhysicalDevice, 341 const VkPhysicalDeviceProperties&); 342 void initStencilFormat(const skgpu::VulkanInterface* iface, VkPhysicalDevice physDev); 343 344 void applyDriverCorrectnessWorkarounds(const VkPhysicalDeviceProperties&); 345 346 bool onSurfaceSupportsWritePixels(const GrSurface*) const override; 347 bool onCanCopySurface(const GrSurfaceProxy* dst, const SkIRect& dstRect, 348 const GrSurfaceProxy* src, const SkIRect& srcRect) const override; 349 GrBackendFormat onGetDefaultBackendFormat(GrColorType) const override; 350 351 bool onAreColorTypeAndFormatCompatible(GrColorType, const GrBackendFormat&) const override; 352 353 SupportedRead onSupportedReadPixelsColorType(GrColorType, const GrBackendFormat&, 354 GrColorType) const override; 355 356 skgpu::Swizzle onGetReadSwizzle(const GrBackendFormat&, GrColorType) const override; 357 358 GrDstSampleFlags onGetDstSampleFlagsForProxy(const GrRenderTargetProxy*) const override; 359 360 bool onSupportsDynamicMSAA(const GrRenderTargetProxy*) const override; 361 362 // ColorTypeInfo for a specific format 363 struct ColorTypeInfo { 364 GrColorType fColorType = GrColorType::kUnknown; 365 GrColorType fTransferColorType = GrColorType::kUnknown; 366 enum { 367 kUploadData_Flag = 0x1, 368 // Does Ganesh itself support rendering to this colorType & format pair. Renderability 369 // still additionally depends on if the format itself is renderable. 370 kRenderable_Flag = 0x2, 371 // Indicates that this colorType is supported only if we are wrapping a texture with 372 // the given format and colorType. We do not allow creation with this pair. 373 kWrappedOnly_Flag = 0x4, 374 }; 375 uint32_t fFlags = 0; 376 377 skgpu::Swizzle fReadSwizzle; 378 skgpu::Swizzle fWriteSwizzle; 379 }; 380 381 struct FormatInfo { colorTypeFlagsFormatInfo382 uint32_t colorTypeFlags(GrColorType colorType) const { 383 for (int i = 0; i < fColorTypeInfoCount; ++i) { 384 if (fColorTypeInfos[i].fColorType == colorType) { 385 return fColorTypeInfos[i].fFlags; 386 } 387 } 388 return 0; 389 } 390 391 void init(const GrContextOptions&, 392 const skgpu::VulkanInterface*, 393 VkPhysicalDevice, 394 const VkPhysicalDeviceProperties&, 395 VkFormat); 396 static void InitFormatFlags(VkFormatFeatureFlags, uint16_t* flags); 397 void initSampleCounts(const GrContextOptions&, 398 const skgpu::VulkanInterface*, 399 VkPhysicalDevice, 400 const VkPhysicalDeviceProperties&, 401 VkFormat); 402 403 enum { 404 kTexturable_Flag = 0x1, 405 kRenderable_Flag = 0x2, 406 kBlitSrc_Flag = 0x4, 407 kBlitDst_Flag = 0x8, 408 }; 409 410 uint16_t fOptimalFlags = 0; 411 uint16_t fLinearFlags = 0; 412 413 SkTDArray<int> fColorSampleCounts; 414 415 std::unique_ptr<ColorTypeInfo[]> fColorTypeInfos; 416 int fColorTypeInfoCount = 0; 417 }; 418 static const size_t kNumVkFormats = 22; 419 FormatInfo fFormatTable[kNumVkFormats]; 420 421 FormatInfo& getFormatInfo(VkFormat); 422 const FormatInfo& getFormatInfo(VkFormat) const; 423 424 VkFormat fColorTypeToFormatTable[kGrColorTypeCnt]; 425 void setColorType(GrColorType, std::initializer_list<VkFormat> formats); 426 427 VkFormat fPreferredStencilFormat; 428 429 SkSTArray<1, GrVkYcbcrConversionInfo> fYcbcrInfos; 430 431 bool fMustSyncCommandBuffersWithQueue = false; 432 bool fShouldAlwaysUseDedicatedImageMemory = false; 433 434 bool fAvoidUpdateBuffers = false; 435 436 bool fSupportsSwapchain = false; 437 438 bool fSupportsPhysicalDeviceProperties2 = false; 439 bool fSupportsMemoryRequirements2 = false; 440 bool fSupportsBindMemory2 = false; 441 bool fSupportsMaintenance1 = false; 442 bool fSupportsMaintenance2 = false; 443 bool fSupportsMaintenance3 = false; 444 445 bool fSupportsDedicatedAllocation = false; 446 bool fSupportsExternalMemory = false; 447 bool fSupportsAndroidHWBExternalMemory = false; 448 449 bool fSupportsYcbcrConversion = false; 450 451 bool fSupportsProtectedMemory = false; 452 453 bool fSupportsDRMFormatModifiers = false; 454 455 bool fPreferPrimaryOverSecondaryCommandBuffers = true; 456 bool fMustInvalidatePrimaryCmdBufferStateAfterClearAttachments = false; 457 458 bool fMustUseCoherentHostVisibleMemory = false; 459 bool fGpuOnlyBuffersMorePerformant = false; 460 bool fShouldPersistentlyMapCpuToGpuBuffers = true; 461 462 // We default this to 100 since we already cap the max render tasks at 100 before doing a 463 // submission in the GrDrawingManager, so we shouldn't be going over 100 secondary command 464 // buffers per primary anyways. 465 int fMaxPerPoolCachedSecondaryCommandBuffers = 100; 466 467 uint32_t fMaxInputAttachmentDescriptors = 0; 468 469 float fMaxSamplerAnisotropy = 1.f; 470 471 bool fMustLoadFullImageWithDiscardableMSAA = false; 472 bool fSupportsDiscardableMSAAForDMSAA = true; 473 bool fSupportsMemorylessAttachments = false; 474 475 uint32_t fMaxDrawIndirectDrawCount = 0; 476 477 using INHERITED = GrCaps; 478 }; 479 480 #endif 481