• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 //    Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 //    Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10 
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13 
14 #include <deque>
15 
16 #include "common/Color.h"
17 #include "common/FixedVector.h"
18 #include "common/SimpleMutex.h"
19 #include "common/WorkerThread.h"
20 #include "libANGLE/Uniform.h"
21 #include "libANGLE/renderer/vulkan/ShaderInterfaceVariableInfoMap.h"
22 #include "libANGLE/renderer/vulkan/vk_resource.h"
23 #include "libANGLE/renderer/vulkan/vk_utils.h"
24 
25 namespace gl
26 {
27 class ProgramExecutable;
28 }  // namespace gl
29 
30 namespace rx
31 {
32 class ShaderInterfaceVariableInfoMap;
33 class UpdateDescriptorSetsBuilder;
34 
35 // Some descriptor set and pipeline layout constants.
36 //
37 // The set/binding assignment is done as following:
38 //
39 // - Set 0 contains uniform blocks created to encompass default uniforms.  1 binding is used per
40 //   pipeline stage.  Additionally, transform feedback buffers are bound from binding 2 and up.
41 //   For internal shaders, set 0 is used for all the needed resources.
42 // - Set 1 contains all textures (including texture buffers).
43 // - Set 2 contains all other shader resources, such as uniform and storage blocks, atomic counter
44 //   buffers, images and image buffers.
45 // - Set 3 reserved for OpenCL
46 
47 enum class DescriptorSetIndex : uint32_t
48 {
49     Internal       = 0,         // Internal shaders
50     UniformsAndXfb = Internal,  // Uniforms set index
51     Texture        = 1,         // Textures set index
52     ShaderResource = 2,         // Other shader resources set index
53 
54     // CL specific naming for set indices
55     LiteralSampler  = 0,
56     KernelArguments = 1,
57     ModuleConstants = 2,
58     Printf          = 3,
59 
60     InvalidEnum = 4,
61     EnumCount   = InvalidEnum,
62 };
63 
64 namespace vk
65 {
66 class Context;
67 class BufferHelper;
68 class DynamicDescriptorPool;
69 class SamplerHelper;
70 enum class ImageLayout;
71 class PipelineCacheAccess;
72 class RenderPassCommandBufferHelper;
73 class PackedClearValuesArray;
74 class AttachmentOpsArray;
75 
76 using PipelineLayoutPtr      = AtomicSharedPtr<PipelineLayout>;
77 using DescriptorSetLayoutPtr = AtomicSharedPtr<DescriptorSetLayout>;
78 
79 // Packed Vk resource descriptions.
80 // Most Vk types use many more bits than required to represent the underlying data.
81 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
82 // hashing (and also needs to check equality) we can optimize these operations by
83 // using fewer bits. Hence the packed types.
84 //
85 // One implementation note: these types could potentially be improved by using even
86 // fewer bits. For example, boolean values could be represented by a single bit instead
87 // of a uint8_t. However at the current time there are concerns about the portability
88 // of bitfield operators, and complexity issues with using bit mask operations. This is
89 // something we will likely want to investigate as the Vulkan implementation progresses.
90 //
91 // Second implementation note: the struct packing is also a bit fragile, and some of the
92 // packing requirements depend on using alignas and field ordering to get the result of
93 // packing nicely into the desired space. This is something we could also potentially fix
94 // with a redesign to use bitfields or bit mask operations.
95 
96 // Enable struct padding warnings for the code below since it is used in caches.
97 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
98 
99 enum class ResourceAccess
100 {
101     Unused    = 0x0,
102     ReadOnly  = 0x1,
103     WriteOnly = 0x2,
104     ReadWrite = ReadOnly | WriteOnly,
105 };
106 
UpdateAccess(ResourceAccess * oldAccess,ResourceAccess newAccess)107 inline void UpdateAccess(ResourceAccess *oldAccess, ResourceAccess newAccess)
108 {
109     *oldAccess = static_cast<ResourceAccess>(ToUnderlying(newAccess) | ToUnderlying(*oldAccess));
110 }
HasResourceWriteAccess(ResourceAccess access)111 inline bool HasResourceWriteAccess(ResourceAccess access)
112 {
113     return (ToUnderlying(access) & ToUnderlying(ResourceAccess::WriteOnly)) != 0;
114 }
115 
116 enum class RenderPassLoadOp
117 {
118     Load     = VK_ATTACHMENT_LOAD_OP_LOAD,
119     Clear    = VK_ATTACHMENT_LOAD_OP_CLEAR,
120     DontCare = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
121     None,
122 };
123 enum class RenderPassStoreOp
124 {
125     Store    = VK_ATTACHMENT_STORE_OP_STORE,
126     DontCare = VK_ATTACHMENT_STORE_OP_DONT_CARE,
127     None,
128 };
129 
130 enum class FramebufferFetchMode
131 {
132     None,
133     Color,
134     DepthStencil,
135     ColorAndDepthStencil,
136 };
137 FramebufferFetchMode GetProgramFramebufferFetchMode(const gl::ProgramExecutable *executable);
FramebufferFetchModeHasColor(FramebufferFetchMode framebufferFetchMode)138 ANGLE_INLINE bool FramebufferFetchModeHasColor(FramebufferFetchMode framebufferFetchMode)
139 {
140     static_assert(ToUnderlying(FramebufferFetchMode::Color) == 0x1);
141     static_assert(ToUnderlying(FramebufferFetchMode::ColorAndDepthStencil) == 0x3);
142     return (ToUnderlying(framebufferFetchMode) & 0x1) != 0;
143 }
FramebufferFetchModeHasDepthStencil(FramebufferFetchMode framebufferFetchMode)144 ANGLE_INLINE bool FramebufferFetchModeHasDepthStencil(FramebufferFetchMode framebufferFetchMode)
145 {
146     static_assert(ToUnderlying(FramebufferFetchMode::DepthStencil) == 0x2);
147     static_assert(ToUnderlying(FramebufferFetchMode::ColorAndDepthStencil) == 0x3);
148     return (ToUnderlying(framebufferFetchMode) & 0x2) != 0;
149 }
FramebufferFetchModeMerge(FramebufferFetchMode mode1,FramebufferFetchMode mode2)150 ANGLE_INLINE FramebufferFetchMode FramebufferFetchModeMerge(FramebufferFetchMode mode1,
151                                                             FramebufferFetchMode mode2)
152 {
153     constexpr uint32_t kNone         = ToUnderlying(FramebufferFetchMode::None);
154     constexpr uint32_t kColor        = ToUnderlying(FramebufferFetchMode::Color);
155     constexpr uint32_t kDepthStencil = ToUnderlying(FramebufferFetchMode::DepthStencil);
156     constexpr uint32_t kColorAndDepthStencil =
157         ToUnderlying(FramebufferFetchMode::ColorAndDepthStencil);
158     static_assert(kNone == 0);
159     static_assert((kColor & kColorAndDepthStencil) == kColor);
160     static_assert((kDepthStencil & kColorAndDepthStencil) == kDepthStencil);
161     static_assert((kColor | kDepthStencil) == kColorAndDepthStencil);
162 
163     return static_cast<FramebufferFetchMode>(ToUnderlying(mode1) | ToUnderlying(mode2));
164 }
165 
166 // There can be a maximum of IMPLEMENTATION_MAX_DRAW_BUFFERS color and resolve attachments, plus -
167 // - one depth/stencil attachment
168 // - one depth/stencil resolve attachment
169 // - one fragment shading rate attachment
170 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS * 2 + 3;
171 template <typename T>
172 using FramebufferAttachmentArray = std::array<T, kMaxFramebufferAttachments>;
173 template <typename T>
174 using FramebufferAttachmentsVector = angle::FixedVector<T, kMaxFramebufferAttachments>;
175 using FramebufferAttachmentMask    = angle::BitSet<kMaxFramebufferAttachments>;
176 
177 constexpr size_t kMaxFramebufferNonResolveAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1;
178 template <typename T>
179 using FramebufferNonResolveAttachmentArray = std::array<T, kMaxFramebufferNonResolveAttachments>;
180 using FramebufferNonResolveAttachmentMask  = angle::BitSet16<kMaxFramebufferNonResolveAttachments>;
181 
182 class PackedAttachmentIndex;
183 
184 class alignas(4) RenderPassDesc final
185 {
186   public:
187     RenderPassDesc();
188     ~RenderPassDesc();
189     RenderPassDesc(const RenderPassDesc &other);
190     RenderPassDesc &operator=(const RenderPassDesc &other);
191 
192     // Set format for an enabled GL color attachment.
193     void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
194     // Mark a GL color attachment index as disabled.
195     void packColorAttachmentGap(size_t colorIndexGL);
196     // The caller must pack the depth/stencil attachment last, which is packed right after the color
197     // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
198     void packDepthStencilAttachment(angle::FormatID angleFormatID);
199     void updateDepthStencilAccess(ResourceAccess access);
200     // Indicate that a color attachment should have a corresponding resolve attachment.
201     void packColorResolveAttachment(size_t colorIndexGL);
202     // Indicate that a YUV texture is attached to the resolve attachment.
203     void packYUVResolveAttachment(size_t colorIndexGL);
204     // Remove the resolve attachment.  Used when optimizing blit through resolve attachment to
205     // temporarily pack a resolve attachment and then remove it.
206     void removeColorResolveAttachment(size_t colorIndexGL);
207     // Indicate that a color attachment should take its data from the resolve attachment initially.
208     void packColorUnresolveAttachment(size_t colorIndexGL);
209     void removeColorUnresolveAttachment(size_t colorIndexGL);
210     // Indicate that a depth/stencil attachment should have a corresponding resolve attachment.
211     void packDepthResolveAttachment();
212     void packStencilResolveAttachment();
213     // Indicate that a depth/stencil attachment should take its data from the resolve attachment
214     // initially.
215     void packDepthUnresolveAttachment();
216     void packStencilUnresolveAttachment();
217     void removeDepthStencilUnresolveAttachment();
218 
219     PackedAttachmentIndex getPackedColorAttachmentIndex(size_t colorIndexGL);
220 
221     void setWriteControlMode(gl::SrgbWriteControlMode mode);
222 
223     size_t hash() const;
224 
225     // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()226     size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()227     size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
228 
229     bool isColorAttachmentEnabled(size_t colorIndexGL) const;
hasYUVResolveAttachment()230     bool hasYUVResolveAttachment() const { return mIsYUVResolve; }
231     bool hasDepthStencilAttachment() const;
getColorResolveAttachmentMask()232     gl::DrawBufferMask getColorResolveAttachmentMask() const { return mColorResolveAttachmentMask; }
hasColorResolveAttachment(size_t colorIndexGL)233     bool hasColorResolveAttachment(size_t colorIndexGL) const
234     {
235         return mColorResolveAttachmentMask.test(colorIndexGL);
236     }
getColorUnresolveAttachmentMask()237     gl::DrawBufferMask getColorUnresolveAttachmentMask() const
238     {
239         return mColorUnresolveAttachmentMask;
240     }
hasColorUnresolveAttachment(size_t colorIndexGL)241     bool hasColorUnresolveAttachment(size_t colorIndexGL) const
242     {
243         return mColorUnresolveAttachmentMask.test(colorIndexGL);
244     }
hasDepthStencilResolveAttachment()245     bool hasDepthStencilResolveAttachment() const { return mResolveDepth || mResolveStencil; }
hasDepthResolveAttachment()246     bool hasDepthResolveAttachment() const { return mResolveDepth; }
hasStencilResolveAttachment()247     bool hasStencilResolveAttachment() const { return mResolveStencil; }
hasDepthStencilUnresolveAttachment()248     bool hasDepthStencilUnresolveAttachment() const { return mUnresolveDepth || mUnresolveStencil; }
hasDepthUnresolveAttachment()249     bool hasDepthUnresolveAttachment() const { return mUnresolveDepth; }
hasStencilUnresolveAttachment()250     bool hasStencilUnresolveAttachment() const { return mUnresolveStencil; }
getSRGBWriteControlMode()251     gl::SrgbWriteControlMode getSRGBWriteControlMode() const
252     {
253         return static_cast<gl::SrgbWriteControlMode>(mSrgbWriteControl);
254     }
255 
isLegacyDitherEnabled()256     bool isLegacyDitherEnabled() const { return mLegacyDitherEnabled; }
257 
258     void setLegacyDither(bool enabled);
259 
260     // Get the number of clearable attachments in the Vulkan render pass, i.e. after removing
261     // disabled color attachments.
262     size_t clearableAttachmentCount() const;
263     // Get the total number of attachments in the Vulkan render pass, i.e. after removing disabled
264     // color attachments.
265     size_t attachmentCount() const;
266 
setSamples(GLint samples)267     void setSamples(GLint samples) { mSamples = static_cast<uint8_t>(samples); }
samples()268     uint8_t samples() const { return mSamples; }
269 
setViewCount(GLsizei viewCount)270     void setViewCount(GLsizei viewCount) { mViewCount = static_cast<uint8_t>(viewCount); }
viewCount()271     uint8_t viewCount() const { return mViewCount; }
272 
setFramebufferFetchMode(FramebufferFetchMode framebufferFetchMode)273     void setFramebufferFetchMode(FramebufferFetchMode framebufferFetchMode)
274     {
275         SetBitField(mFramebufferFetchMode, framebufferFetchMode);
276     }
framebufferFetchMode()277     FramebufferFetchMode framebufferFetchMode() const
278     {
279         return static_cast<FramebufferFetchMode>(mFramebufferFetchMode);
280     }
hasColorFramebufferFetch()281     bool hasColorFramebufferFetch() const
282     {
283         return FramebufferFetchModeHasColor(framebufferFetchMode());
284     }
hasDepthStencilFramebufferFetch()285     bool hasDepthStencilFramebufferFetch() const
286     {
287         return FramebufferFetchModeHasDepthStencil(framebufferFetchMode());
288     }
289 
updateRenderToTexture(bool isRenderToTexture)290     void updateRenderToTexture(bool isRenderToTexture) { mIsRenderToTexture = isRenderToTexture; }
isRenderToTexture()291     bool isRenderToTexture() const { return mIsRenderToTexture; }
292 
setFragmentShadingAttachment(bool value)293     void setFragmentShadingAttachment(bool value) { mHasFragmentShadingAttachment = value; }
hasFragmentShadingAttachment()294     bool hasFragmentShadingAttachment() const { return mHasFragmentShadingAttachment; }
295 
296     angle::FormatID operator[](size_t index) const
297     {
298         ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
299         return static_cast<angle::FormatID>(mAttachmentFormats[index]);
300     }
301 
302     // Start a render pass with a render pass object.
303     void beginRenderPass(ErrorContext *context,
304                          PrimaryCommandBuffer *primary,
305                          const RenderPass &renderPass,
306                          VkFramebuffer framebuffer,
307                          const gl::Rectangle &renderArea,
308                          VkSubpassContents subpassContents,
309                          PackedClearValuesArray &clearValues,
310                          const VkRenderPassAttachmentBeginInfo *attachmentBeginInfo) const;
311 
312     // Start a render pass with dynamic rendering.
313     void beginRendering(ErrorContext *context,
314                         PrimaryCommandBuffer *primary,
315                         const gl::Rectangle &renderArea,
316                         VkSubpassContents subpassContents,
317                         const FramebufferAttachmentsVector<VkImageView> &attachmentViews,
318                         const AttachmentOpsArray &ops,
319                         PackedClearValuesArray &clearValues,
320                         uint32_t layerCount) const;
321 
322     void populateRenderingInheritanceInfo(
323         Renderer *renderer,
324         VkCommandBufferInheritanceRenderingInfo *infoOut,
325         gl::DrawBuffersArray<VkFormat> *colorFormatStorageOut) const;
326 
327     // Calculate perf counters for a dynamic rendering render pass instance.  For render pass
328     // objects, the perf counters are updated when creating the render pass, where access to
329     // ContextVk is available.
330     void updatePerfCounters(ErrorContext *context,
331                             const FramebufferAttachmentsVector<VkImageView> &attachmentViews,
332                             const AttachmentOpsArray &ops,
333                             angle::VulkanPerfCounters *countersOut);
334 
335   private:
336     uint8_t mSamples;
337     uint8_t mColorAttachmentRange;
338 
339     // Multiview
340     uint8_t mViewCount;
341 
342     // sRGB
343     uint8_t mSrgbWriteControl : 1;
344 
345     // Framebuffer fetch, one of FramebufferFetchMode values
346     uint8_t mFramebufferFetchMode : 2;
347 
348     // Depth/stencil resolve
349     uint8_t mResolveDepth : 1;
350     uint8_t mResolveStencil : 1;
351 
352     // Multisampled render to texture
353     uint8_t mIsRenderToTexture : 1;
354     uint8_t mUnresolveDepth : 1;
355     uint8_t mUnresolveStencil : 1;
356 
357     // Dithering state when using VK_EXT_legacy_dithering
358     uint8_t mLegacyDitherEnabled : 1;
359 
360     // external_format_resolve
361     uint8_t mIsYUVResolve : 1;
362 
363     // Foveated rendering
364     uint8_t mHasFragmentShadingAttachment : 1;
365 
366     // Available space for expansion.
367     uint8_t mPadding2 : 5;
368 
369     // Whether each color attachment has a corresponding resolve attachment.  Color resolve
370     // attachments can be used to optimize resolve through glBlitFramebuffer() as well as support
371     // GL_EXT_multisampled_render_to_texture and GL_EXT_multisampled_render_to_texture2.
372     gl::DrawBufferMask mColorResolveAttachmentMask;
373 
374     // Whether each color attachment with a corresponding resolve attachment should be initialized
375     // with said resolve attachment in an initial subpass.  This is an optimization to avoid
376     // loadOp=LOAD on the implicit multisampled image used with multisampled-render-to-texture
377     // render targets.  This operation is referred to as "unresolve".
378     //
379     // Unused when VK_EXT_multisampled_render_to_single_sampled is available.
380     gl::DrawBufferMask mColorUnresolveAttachmentMask;
381 
382     // Color attachment formats are stored with their GL attachment indices.  The depth/stencil
383     // attachment formats follow the last enabled color attachment.  When creating a render pass,
384     // the disabled attachments are removed and the resulting attachments are packed.
385     //
386     // The attachment indices provided as input to various functions in this file are thus GL
387     // attachment indices.  These indices are marked as such, e.g. colorIndexGL.  The render pass
388     // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
389     // indices marked with Vk, e.g. colorIndexVk.  The subpass attachment references create the
390     // link between the two index spaces.  The subpass declares attachment references with GL
391     // indices (which corresponds to the location decoration of shader outputs).  The attachment
392     // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
393     //
394     // For example, if GL uses color attachments 0 and 3, then there are two render pass
395     // attachments (indexed 0 and 1) and 4 subpass attachments:
396     //
397     //  - Subpass attachment 0 -> Renderpass attachment 0
398     //  - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
399     //  - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
400     //  - Subpass attachment 3 -> Renderpass attachment 1
401     //
402     // The resolve attachments are packed after the non-resolve attachments.  They use the same
403     // formats, so they are not specified in this array.
404     FramebufferNonResolveAttachmentArray<uint8_t> mAttachmentFormats;
405 };
406 
407 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
408 
409 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
410 static_assert(kRenderPassDescSize == 16, "Size check failed");
411 
412 enum class GraphicsPipelineSubset
413 {
414     Complete,  // Include all subsets
415     Shaders,   // Include only the shader subsets, excluding vertex input and fragment output state.
416 };
417 
418 enum class CacheLookUpFeedback
419 {
420     None,
421     Hit,
422     Miss,
423     LinkedDrawHit,
424     LinkedDrawMiss,
425     WarmUpHit,
426     WarmUpMiss,
427     UtilsHit,
428     UtilsMiss,
429 };
430 
431 struct PackedAttachmentOpsDesc final
432 {
433     // RenderPassLoadOp is in range [0, 3], and RenderPassStoreOp is in range [0, 2].
434     uint16_t loadOp : 2;
435     uint16_t storeOp : 2;
436     uint16_t stencilLoadOp : 2;
437     uint16_t stencilStoreOp : 2;
438     // If a corresponding resolve attachment exists, storeOp may already be DONT_CARE, and it's
439     // unclear whether the attachment was invalidated or not.  This information is passed along here
440     // so that the resolve attachment's storeOp can be set to DONT_CARE if the attachment is
441     // invalidated, and if possible removed from the list of resolve attachments altogether.  Note
442     // that the latter may not be possible if the render pass has multiple subpasses due to Vulkan
443     // render pass compatibility rules (not an issue with dynamic rendering).
444     uint16_t isInvalidated : 1;
445     uint16_t isStencilInvalidated : 1;
446     uint16_t padding1 : 6;
447 
448     // Layouts take values from ImageLayout, so they are small.  Layouts that are possible here are
449     // placed at the beginning of that enum.
450     uint16_t initialLayout : 5;
451     uint16_t finalLayout : 5;
452     uint16_t finalResolveLayout : 5;
453     uint16_t padding2 : 1;
454 };
455 
456 static_assert(sizeof(PackedAttachmentOpsDesc) == 4, "Size check failed");
457 
458 class AttachmentOpsArray final
459 {
460   public:
461     AttachmentOpsArray();
462     ~AttachmentOpsArray();
463     AttachmentOpsArray(const AttachmentOpsArray &other);
464     AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
465 
466     const PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index) const
467     {
468         return mOps[index.get()];
469     }
470     PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index) { return mOps[index.get()]; }
471 
472     // Initialize an attachment op with all load and store operations.
473     void initWithLoadStore(PackedAttachmentIndex index,
474                            ImageLayout initialLayout,
475                            ImageLayout finalLayout);
476 
477     void setLayouts(PackedAttachmentIndex index,
478                     ImageLayout initialLayout,
479                     ImageLayout finalLayout);
480     void setOps(PackedAttachmentIndex index, RenderPassLoadOp loadOp, RenderPassStoreOp storeOp);
481     void setStencilOps(PackedAttachmentIndex index,
482                        RenderPassLoadOp loadOp,
483                        RenderPassStoreOp storeOp);
484 
485     void setClearOp(PackedAttachmentIndex index);
486     void setClearStencilOp(PackedAttachmentIndex index);
487 
488     size_t hash() const;
489 
490   private:
491     gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
492 };
493 
494 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
495 
496 static_assert(sizeof(AttachmentOpsArray) == 40, "Size check failed");
497 
498 struct PackedAttribDesc final
499 {
500     uint8_t format;
501     uint8_t divisor;
502     uint16_t offset : kAttributeOffsetMaxBits;
503     uint16_t compressed : 1;
504 };
505 
506 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
507 static_assert(kPackedAttribDescSize == 4, "Size mismatch");
508 
509 struct PackedVertexInputAttributes final
510 {
511     PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
512 
513     // Component type of the corresponding input in the program.  Used to adjust the format if
514     // necessary.  Takes values from gl::ComponentType.
515     uint32_t shaderAttribComponentType;
516 
517     // Although technically stride can be any value in ES 2.0, in practice supporting stride
518     // greater than MAX_USHORT should not be that helpful. Note that stride limits are
519     // introduced in ES 3.1.
520     // Dynamic in VK_EXT_extended_dynamic_state
521     uint16_t strides[gl::MAX_VERTEX_ATTRIBS];
522 };
523 
524 constexpr size_t kPackedVertexInputAttributesSize = sizeof(PackedVertexInputAttributes);
525 static_assert(kPackedVertexInputAttributesSize == 100, "Size mismatch");
526 
527 struct PackedInputAssemblyState final
528 {
529     struct
530     {
531         uint32_t topology : 4;
532 
533         // Dynamic in VK_EXT_extended_dynamic_state2
534         uint32_t primitiveRestartEnable : 1;  // ds2
535 
536         // Whether dynamic state for vertex stride from VK_EXT_extended_dynamic_state can be used
537         // for.  Used by GraphicsPipelineDesc::hash() to exclude |vertexStrides| from the hash
538         uint32_t useVertexInputBindingStrideDynamicState : 1;
539 
540         // Whether dynamic state for vertex input state from VK_EXT_vertex_input_dynamic_state can
541         // be used by GraphicsPipelineDesc::hash() to exclude |PackedVertexInputAttributes| from the
542         // hash
543         uint32_t useVertexInputDynamicState : 1;
544 
545         // Whether the pipeline is robust (vertex input copy)
546         uint32_t isRobustContext : 1;
547         // Whether the pipeline needs access to protected content (vertex input copy)
548         uint32_t isProtectedContext : 1;
549 
550         // Which attributes are actually active in the program and should affect the pipeline.
551         uint32_t programActiveAttributeLocations : gl::MAX_VERTEX_ATTRIBS;
552 
553         uint32_t padding : 23 - gl::MAX_VERTEX_ATTRIBS;
554     } bits;
555 };
556 
557 constexpr size_t kPackedInputAssemblyStateSize = sizeof(PackedInputAssemblyState);
558 static_assert(kPackedInputAssemblyStateSize == 4, "Size mismatch");
559 
560 struct PackedStencilOpState final
561 {
562     uint8_t fail : 4;
563     uint8_t pass : 4;
564     uint8_t depthFail : 4;
565     uint8_t compare : 4;
566 };
567 
568 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
569 static_assert(kPackedStencilOpSize == 2, "Size check failed");
570 
571 struct PackedPreRasterizationAndFragmentStates final
572 {
573     struct
574     {
575         // Affecting VkPipelineViewportStateCreateInfo
576         uint32_t viewportNegativeOneToOne : 1;
577 
578         // Affecting VkPipelineRasterizationStateCreateInfo
579         uint32_t depthClampEnable : 1;
580         uint32_t polygonMode : 2;
581         // Dynamic in VK_EXT_extended_dynamic_state
582         uint32_t cullMode : 4;
583         uint32_t frontFace : 4;
584         // Dynamic in VK_EXT_extended_dynamic_state2
585         uint32_t rasterizerDiscardEnable : 1;
586         uint32_t depthBiasEnable : 1;
587 
588         // Affecting VkPipelineTessellationStateCreateInfo
589         uint32_t patchVertices : 6;
590 
591         // Affecting VkPipelineDepthStencilStateCreateInfo
592         uint32_t depthBoundsTest : 1;
593         // Dynamic in VK_EXT_extended_dynamic_state
594         uint32_t depthTest : 1;
595         uint32_t depthWrite : 1;
596         uint32_t stencilTest : 1;
597         uint32_t nonZeroStencilWriteMaskWorkaround : 1;
598         // Dynamic in VK_EXT_extended_dynamic_state2
599         uint32_t depthCompareOp : 4;
600 
601         // Affecting specialization constants
602         uint32_t surfaceRotation : 1;
603 
604         // Whether the pipeline is robust (shader stages copy)
605         uint32_t isRobustContext : 1;
606         // Whether the pipeline needs access to protected content (shader stages copy)
607         uint32_t isProtectedContext : 1;
608     } bits;
609 
610     // Affecting specialization constants
611     static_assert(gl::IMPLEMENTATION_MAX_DRAW_BUFFERS <= 8,
612                   "2 bits per draw buffer is needed for dither emulation");
613     uint16_t emulatedDitherControl;
614     uint16_t padding;
615 
616     // Affecting VkPipelineDepthStencilStateCreateInfo
617     // Dynamic in VK_EXT_extended_dynamic_state
618     PackedStencilOpState front;
619     PackedStencilOpState back;
620 };
621 
622 constexpr size_t kPackedPreRasterizationAndFragmentStatesSize =
623     sizeof(PackedPreRasterizationAndFragmentStates);
624 static_assert(kPackedPreRasterizationAndFragmentStatesSize == 12, "Size check failed");
625 
626 struct PackedMultisampleAndSubpassState final
627 {
628     struct
629     {
630         // Affecting VkPipelineMultisampleStateCreateInfo
631         // Note: Only up to 16xMSAA is supported in the Vulkan backend.
632         uint16_t sampleMask;
633         // Stored as minus one so sample count 16 can fit in 4 bits.
634         uint16_t rasterizationSamplesMinusOne : 4;
635         uint16_t sampleShadingEnable : 1;
636         uint16_t alphaToCoverageEnable : 1;
637         uint16_t alphaToOneEnable : 1;
638         // The subpass index affects both the shader stages and the fragment output similarly to
639         // multisampled state, so they are grouped together.
640         // Note: Currently only 2 subpasses possible.
641         uint16_t subpass : 1;
642         // 8-bit normalized instead of float to align the struct.
643         uint16_t minSampleShading : 8;
644     } bits;
645 };
646 
647 constexpr size_t kPackedMultisampleAndSubpassStateSize = sizeof(PackedMultisampleAndSubpassState);
648 static_assert(kPackedMultisampleAndSubpassStateSize == 4, "Size check failed");
649 
650 struct PackedColorBlendAttachmentState final
651 {
652     uint16_t srcColorBlendFactor : 5;
653     uint16_t dstColorBlendFactor : 5;
654     uint16_t colorBlendOp : 6;
655     uint16_t srcAlphaBlendFactor : 5;
656     uint16_t dstAlphaBlendFactor : 5;
657     uint16_t alphaBlendOp : 6;
658 };
659 
660 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
661 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
662 
663 struct PackedColorBlendState final
664 {
665     uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
666     PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
667 };
668 
669 constexpr size_t kPackedColorBlendStateSize = sizeof(PackedColorBlendState);
670 static_assert(kPackedColorBlendStateSize == 36, "Size check failed");
671 
672 struct PackedBlendMaskAndLogicOpState final
673 {
674     struct
675     {
676         uint32_t blendEnableMask : 8;
677         uint32_t logicOpEnable : 1;
678         // Dynamic in VK_EXT_extended_dynamic_state2
679         uint32_t logicOp : 4;
680 
681         // Whether the pipeline needs access to protected content (fragment output copy)
682         uint32_t isProtectedContext : 1;
683 
684         // Output that is present in the framebuffer but is never written to in the shader.  Used by
685         // GL_ANGLE_robust_fragment_shader_output which defines the behavior in this case (which is
686         // to mask these outputs)
687         uint32_t missingOutputsMask : gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
688 
689         uint32_t padding : 18 - gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
690     } bits;
691 };
692 
693 constexpr size_t kPackedBlendMaskAndLogicOpStateSize = sizeof(PackedBlendMaskAndLogicOpState);
694 static_assert(kPackedBlendMaskAndLogicOpStateSize == 4, "Size check failed");
695 
696 // The vertex input subset of the pipeline.
697 struct PipelineVertexInputState final
698 {
699     PackedInputAssemblyState inputAssembly;
700     PackedVertexInputAttributes vertex;
701 };
702 
703 // The pre-rasterization and fragment shader subsets of the pipeline.  This is excluding
704 // multisampled and render pass states which are shared with fragment output.
705 struct PipelineShadersState final
706 {
707     PackedPreRasterizationAndFragmentStates shaders;
708 };
709 
710 // Multisampled and render pass states.
711 struct PipelineSharedNonVertexInputState final
712 {
713     PackedMultisampleAndSubpassState multisample;
714     RenderPassDesc renderPass;
715 };
716 
717 // The fragment output subset of the pipeline.  This is excluding multisampled and render pass
718 // states which are shared with the shader subsets.
719 struct PipelineFragmentOutputState final
720 {
721     PackedColorBlendState blend;
722     PackedBlendMaskAndLogicOpState blendMaskAndLogic;
723 };
724 
725 constexpr size_t kGraphicsPipelineVertexInputStateSize =
726     kPackedVertexInputAttributesSize + kPackedInputAssemblyStateSize;
727 constexpr size_t kGraphicsPipelineShadersStateSize = kPackedPreRasterizationAndFragmentStatesSize;
728 constexpr size_t kGraphicsPipelineSharedNonVertexInputStateSize =
729     kPackedMultisampleAndSubpassStateSize + kRenderPassDescSize;
730 constexpr size_t kGraphicsPipelineFragmentOutputStateSize =
731     kPackedColorBlendStateSize + kPackedBlendMaskAndLogicOpStateSize;
732 
733 constexpr size_t kGraphicsPipelineDescSumOfSizes =
734     kGraphicsPipelineVertexInputStateSize + kGraphicsPipelineShadersStateSize +
735     kGraphicsPipelineSharedNonVertexInputStateSize + kGraphicsPipelineFragmentOutputStateSize;
736 
737 // Number of dirty bits in the dirty bit set.
738 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
739 constexpr static size_t kNumGraphicsPipelineDirtyBits =
740     kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
741 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
742 
743 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
744 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
745 
746 GraphicsPipelineTransitionBits GetGraphicsPipelineTransitionBitsMask(GraphicsPipelineSubset subset);
747 
748 // Disable padding warnings for a few helper structs that aggregate Vulkan state objects.  These are
749 // not used as hash keys, they just simplify passing them around to functions.
750 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
751 
752 struct GraphicsPipelineVertexInputVulkanStructs
753 {
754     VkPipelineVertexInputStateCreateInfo vertexInputState       = {};
755     VkPipelineInputAssemblyStateCreateInfo inputAssemblyState   = {};
756     VkPipelineVertexInputDivisorStateCreateInfoEXT divisorState = {};
757 
758     // Support storage
759     gl::AttribArray<VkVertexInputBindingDescription> bindingDescs;
760     gl::AttribArray<VkVertexInputAttributeDescription> attributeDescs;
761     gl::AttribArray<VkVertexInputBindingDivisorDescriptionEXT> divisorDesc;
762 };
763 
764 struct GraphicsPipelineShadersVulkanStructs
765 {
766     VkPipelineViewportStateCreateInfo viewportState                               = {};
767     VkPipelineRasterizationStateCreateInfo rasterState                            = {};
768     VkPipelineDepthStencilStateCreateInfo depthStencilState                       = {};
769     VkPipelineTessellationStateCreateInfo tessellationState                       = {};
770     VkPipelineTessellationDomainOriginStateCreateInfo domainOriginState           = {};
771     VkPipelineViewportDepthClipControlCreateInfoEXT depthClipControl              = {};
772     VkPipelineRasterizationLineStateCreateInfoEXT rasterLineState                 = {};
773     VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provokingVertexState = {};
774     VkPipelineRasterizationStateStreamCreateInfoEXT rasterStreamState             = {};
775     VkSpecializationInfo specializationInfo                                       = {};
776 
777     // Support storage
778     angle::FixedVector<VkPipelineShaderStageCreateInfo, 5> shaderStages;
779     SpecializationConstantMap<VkSpecializationMapEntry> specializationEntries;
780 };
781 
782 struct GraphicsPipelineSharedNonVertexInputVulkanStructs
783 {
784     VkPipelineMultisampleStateCreateInfo multisampleState = {};
785 
786     // Support storage
787     uint32_t sampleMask;
788 };
789 
790 struct GraphicsPipelineFragmentOutputVulkanStructs
791 {
792     VkPipelineColorBlendStateCreateInfo blendState = {};
793 
794     // Support storage
795     gl::DrawBuffersArray<VkPipelineColorBlendAttachmentState> blendAttachmentState;
796 };
797 
798 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
799 
800 using GraphicsPipelineDynamicStateList = angle::FixedVector<VkDynamicState, 23>;
801 
802 enum class PipelineRobustness
803 {
804     NonRobust,
805     Robust,
806 };
807 
808 enum class PipelineProtectedAccess
809 {
810     Unprotected,
811     Protected,
812 };
813 
814 // Context state that can affect a compute pipeline
815 union ComputePipelineOptions final
816 {
817     struct
818     {
819         // Whether VK_EXT_pipeline_robustness should be used to make the pipeline robust.  Note that
820         // programs are allowed to be shared between robust and non-robust contexts, so different
821         // pipelines can be created for the same compute program.
822         uint8_t robustness : 1;
823         // Whether VK_EXT_pipeline_protected_access should be used to make the pipeline
824         // protected-only. Similar to robustness, EGL allows protected and unprotected to be in the
825         // same share group.
826         uint8_t protectedAccess : 1;
827         uint8_t reserved : 6;  // must initialize to zero
828     };
829     uint8_t permutationIndex;
830     static constexpr uint32_t kPermutationCount = 0x1 << 2;
831 };
832 static_assert(sizeof(ComputePipelineOptions) == 1, "Size check failed");
833 ComputePipelineOptions GetComputePipelineOptions(vk::PipelineRobustness robustness,
834                                                  vk::PipelineProtectedAccess protectedAccess);
835 
836 // Compute Pipeline Description
837 class ComputePipelineDesc final
838 {
839   public:
840     void *operator new(std::size_t size);
841     void operator delete(void *ptr);
842 
843     ComputePipelineDesc();
844     ComputePipelineDesc(const ComputePipelineDesc &other);
845     ComputePipelineDesc &operator=(const ComputePipelineDesc &other);
846 
847     ComputePipelineDesc(VkSpecializationInfo *specializationInfo,
848                         vk::ComputePipelineOptions pipelineOptions);
849     ~ComputePipelineDesc() = default;
850 
851     size_t hash() const;
852     bool keyEqual(const ComputePipelineDesc &other) const;
853 
854     template <typename T>
getPtr()855     const T *getPtr() const
856     {
857         return reinterpret_cast<const T *>(this);
858     }
859 
getConstantIds()860     std::vector<uint32_t> getConstantIds() const { return mConstantIds; }
getConstants()861     std::vector<uint32_t> getConstants() const { return mConstants; }
getPipelineOptions()862     ComputePipelineOptions getPipelineOptions() const { return mPipelineOptions; }
863 
864   private:
865     std::vector<uint32_t> mConstantIds, mConstants;
866     ComputePipelineOptions mPipelineOptions = {};
867     char mPadding[7]                        = {};
868 };
869 
870 class PipelineHelper;
871 
872 // When a graphics pipeline is created, the shaders state is either directly specified (monolithic
873 // pipeline) or is specified in a pipeline library.  This struct encapsulates the choices.
874 struct GraphicsPipelineShadersInfo final
875 {
876   public:
GraphicsPipelineShadersInfofinal877     GraphicsPipelineShadersInfo(const ShaderModuleMap *shaders,
878                                 const SpecializationConstants *specConsts)
879         : mShaders(shaders), mSpecConsts(specConsts)
880     {}
GraphicsPipelineShadersInfofinal881     GraphicsPipelineShadersInfo(vk::PipelineHelper *pipelineLibrary)
882         : mPipelineLibrary(pipelineLibrary)
883     {}
884 
pipelineLibraryfinal885     vk::PipelineHelper *pipelineLibrary() const { return mPipelineLibrary; }
usePipelineLibraryfinal886     bool usePipelineLibrary() const { return mPipelineLibrary != nullptr; }
887 
888   private:
889     // If the shaders state should be directly specified in the final pipeline.
890     const ShaderModuleMap *mShaders            = nullptr;
891     const SpecializationConstants *mSpecConsts = nullptr;
892 
893     // If the shaders state is provided via a pipeline library.
894     vk::PipelineHelper *mPipelineLibrary = nullptr;
895 
896     friend class GraphicsPipelineDesc;
897 };
898 
899 // State changes are applied through the update methods. Each update method can also have a
900 // sibling method that applies the update without marking a state transition. The non-transition
901 // update methods are used for internal shader pipelines. Not every non-transition update method
902 // is implemented yet as not every state is used in internal shaders.
903 class GraphicsPipelineDesc final
904 {
905   public:
906     // Use aligned allocation and free so we can use the alignas keyword.
907     void *operator new(std::size_t size);
908     void operator delete(void *ptr);
909 
910     GraphicsPipelineDesc();
911     ~GraphicsPipelineDesc();
912     GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
913     GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
914 
915     size_t hash(GraphicsPipelineSubset subset) const;
916     bool keyEqual(const GraphicsPipelineDesc &other, GraphicsPipelineSubset subset) const;
917 
918     void initDefaults(const ErrorContext *context,
919                       GraphicsPipelineSubset subset,
920                       PipelineRobustness contextRobustness,
921                       PipelineProtectedAccess contextProtectedAccess);
922 
923     // For custom comparisons.
924     template <typename T>
getPtr()925     const T *getPtr() const
926     {
927         return reinterpret_cast<const T *>(this);
928     }
929 
930     VkResult initializePipeline(ErrorContext *context,
931                                 PipelineCacheAccess *pipelineCache,
932                                 GraphicsPipelineSubset subset,
933                                 const RenderPass &compatibleRenderPass,
934                                 const PipelineLayout &pipelineLayout,
935                                 const GraphicsPipelineShadersInfo &shaders,
936                                 Pipeline *pipelineOut,
937                                 CacheLookUpFeedback *feedbackOut) const;
938 
939     // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
940     void updateVertexInput(ContextVk *contextVk,
941                            GraphicsPipelineTransitionBits *transition,
942                            uint32_t attribIndex,
943                            GLuint stride,
944                            GLuint divisor,
945                            angle::FormatID format,
946                            bool compressed,
947                            GLuint relativeOffset);
948     void setVertexShaderComponentTypes(gl::AttributesMask activeAttribLocations,
949                                        gl::ComponentTypeMask componentTypeMask);
950     void updateVertexShaderComponentTypes(GraphicsPipelineTransitionBits *transition,
951                                           gl::AttributesMask activeAttribLocations,
952                                           gl::ComponentTypeMask componentTypeMask);
953 
954     // Input assembly info
955     void setTopology(gl::PrimitiveMode drawMode);
956     void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
957     void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
958                                        bool primitiveRestartEnabled);
959 
960     // Viewport states
961     void updateDepthClipControl(GraphicsPipelineTransitionBits *transition, bool negativeOneToOne);
962 
963     // Raster states
964     void updatePolygonMode(GraphicsPipelineTransitionBits *transition, gl::PolygonMode polygonMode);
965     void updateCullMode(GraphicsPipelineTransitionBits *transition,
966                         const gl::RasterizerState &rasterState);
967     void updateFrontFace(GraphicsPipelineTransitionBits *transition,
968                          const gl::RasterizerState &rasterState,
969                          bool invertFrontFace);
970     void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
971                                         bool rasterizerDiscardEnabled);
972 
973     // Multisample states
974     uint32_t getRasterizationSamples() const;
975     void setRasterizationSamples(uint32_t rasterizationSamples);
976     void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
977                                     uint32_t rasterizationSamples);
978     void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
979     void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
980     void updateSampleMask(GraphicsPipelineTransitionBits *transition,
981                           uint32_t maskNumber,
982                           uint32_t mask);
983 
984     void updateSampleShading(GraphicsPipelineTransitionBits *transition, bool enable, float value);
985 
986     // RenderPass description.
getRenderPassDesc()987     const RenderPassDesc &getRenderPassDesc() const { return mSharedNonVertexInput.renderPass; }
988 
989     void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
990     void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
991                               const angle::FeaturesVk &features,
992                               const RenderPassDesc &renderPassDesc,
993                               FramebufferFetchMode framebufferFetchMode);
994     void setRenderPassSampleCount(GLint samples);
995     void setRenderPassFramebufferFetchMode(FramebufferFetchMode framebufferFetchMode);
getRenderPassColorFramebufferFetchMode()996     bool getRenderPassColorFramebufferFetchMode() const
997     {
998         return mSharedNonVertexInput.renderPass.hasColorFramebufferFetch();
999     }
getRenderPassDepthStencilFramebufferFetchMode()1000     bool getRenderPassDepthStencilFramebufferFetchMode() const
1001     {
1002         return mSharedNonVertexInput.renderPass.hasDepthStencilFramebufferFetch();
1003     }
1004 
1005     void setRenderPassFoveation(bool isFoveated);
getRenderPassFoveation()1006     bool getRenderPassFoveation() const
1007     {
1008         return mSharedNonVertexInput.renderPass.hasFragmentShadingAttachment();
1009     }
1010 
1011     void setRenderPassColorAttachmentFormat(size_t colorIndexGL, angle::FormatID formatID);
1012 
1013     // Blend states
1014     void setSingleBlend(uint32_t colorIndexGL,
1015                         bool enabled,
1016                         VkBlendOp op,
1017                         VkBlendFactor srcFactor,
1018                         VkBlendFactor dstFactor);
1019     void updateBlendEnabled(GraphicsPipelineTransitionBits *transition,
1020                             gl::DrawBufferMask blendEnabledMask);
1021     void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
1022                           const gl::BlendStateExt &blendStateExt,
1023                           gl::DrawBufferMask attachmentMask);
1024     void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
1025                               const gl::BlendStateExt &blendStateExt,
1026                               gl::DrawBufferMask attachmentMask);
1027     void resetBlendFuncsAndEquations(GraphicsPipelineTransitionBits *transition,
1028                                      const gl::BlendStateExt &blendStateExt,
1029                                      gl::DrawBufferMask previousAttachmentsMask,
1030                                      gl::DrawBufferMask newAttachmentsMask);
1031     void setColorWriteMasks(gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
1032                             const gl::DrawBufferMask &alphaMask,
1033                             const gl::DrawBufferMask &enabledDrawBuffers);
1034     void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
1035     void updateColorWriteMasks(GraphicsPipelineTransitionBits *transition,
1036                                gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
1037                                const gl::DrawBufferMask &alphaMask,
1038                                const gl::DrawBufferMask &enabledDrawBuffers);
1039     void updateMissingOutputsMask(GraphicsPipelineTransitionBits *transition,
1040                                   gl::DrawBufferMask missingOutputsMask);
1041 
1042     // Logic op
1043     void updateLogicOpEnabled(GraphicsPipelineTransitionBits *transition, bool enable);
1044     void updateLogicOp(GraphicsPipelineTransitionBits *transition, VkLogicOp logicOp);
1045 
1046     // Depth/stencil states.
1047     void setDepthTestEnabled(bool enabled);
1048     void setDepthWriteEnabled(bool enabled);
1049     void setDepthFunc(VkCompareOp op);
1050     void setDepthClampEnabled(bool enabled);
1051     void setStencilTestEnabled(bool enabled);
1052     void setStencilFrontFuncs(VkCompareOp compareOp);
1053     void setStencilBackFuncs(VkCompareOp compareOp);
1054     void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
1055     void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
1056     void setStencilFrontWriteMask(uint8_t mask);
1057     void setStencilBackWriteMask(uint8_t mask);
1058     void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
1059                                 const gl::DepthStencilState &depthStencilState,
1060                                 const gl::Framebuffer *drawFramebuffer);
1061     void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
1062                          const gl::DepthStencilState &depthStencilState);
1063     void updateDepthClampEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
1064     void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
1065                                  const gl::DepthStencilState &depthStencilState,
1066                                  const gl::Framebuffer *drawFramebuffer);
1067     void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
1068                                   const gl::DepthStencilState &depthStencilState,
1069                                   const gl::Framebuffer *drawFramebuffer);
1070     void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
1071                                  const gl::DepthStencilState &depthStencilState);
1072     void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
1073                                 const gl::DepthStencilState &depthStencilState);
1074     void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
1075                                const gl::DepthStencilState &depthStencilState);
1076     void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
1077                               const gl::DepthStencilState &depthStencilState);
1078 
1079     // Depth offset.
1080     void updatePolygonOffsetEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
1081 
1082     // Tessellation
1083     void updatePatchVertices(GraphicsPipelineTransitionBits *transition, GLuint value);
1084 
1085     // Subpass
1086     void resetSubpass(GraphicsPipelineTransitionBits *transition);
1087     void nextSubpass(GraphicsPipelineTransitionBits *transition);
1088     void setSubpass(uint32_t subpass);
1089     uint32_t getSubpass() const;
1090 
1091     void updateSurfaceRotation(GraphicsPipelineTransitionBits *transition,
1092                                bool isRotatedAspectRatio);
getSurfaceRotation()1093     bool getSurfaceRotation() const { return mShaders.shaders.bits.surfaceRotation; }
1094 
1095     void updateEmulatedDitherControl(GraphicsPipelineTransitionBits *transition, uint16_t value);
getEmulatedDitherControl()1096     uint32_t getEmulatedDitherControl() const { return mShaders.shaders.emulatedDitherControl; }
1097 
isLegacyDitherEnabled()1098     bool isLegacyDitherEnabled() const
1099     {
1100         return mSharedNonVertexInput.renderPass.isLegacyDitherEnabled();
1101     }
1102 
1103     void updateNonZeroStencilWriteMaskWorkaround(GraphicsPipelineTransitionBits *transition,
1104                                                  bool enabled);
1105 
setSupportsDynamicStateForTest(bool supports)1106     void setSupportsDynamicStateForTest(bool supports)
1107     {
1108         mVertexInput.inputAssembly.bits.useVertexInputBindingStrideDynamicState = supports;
1109         mShaders.shaders.bits.nonZeroStencilWriteMaskWorkaround                 = false;
1110     }
1111 
1112     static VkFormat getPipelineVertexInputStateFormat(ErrorContext *context,
1113                                                       angle::FormatID formatID,
1114                                                       bool compressed,
1115                                                       const gl::ComponentType programAttribType,
1116                                                       uint32_t attribIndex);
1117 
1118     // Helpers to dump the state
getVertexInputStateForLog()1119     const PipelineVertexInputState &getVertexInputStateForLog() const { return mVertexInput; }
getShadersStateForLog()1120     const PipelineShadersState &getShadersStateForLog() const { return mShaders; }
getSharedNonVertexInputStateForLog()1121     const PipelineSharedNonVertexInputState &getSharedNonVertexInputStateForLog() const
1122     {
1123         return mSharedNonVertexInput;
1124     }
getFragmentOutputStateForLog()1125     const PipelineFragmentOutputState &getFragmentOutputStateForLog() const
1126     {
1127         return mFragmentOutput;
1128     }
1129 
hasPipelineProtectedAccess()1130     bool hasPipelineProtectedAccess() const
1131     {
1132         ASSERT(mShaders.shaders.bits.isProtectedContext ==
1133                mVertexInput.inputAssembly.bits.isProtectedContext);
1134         ASSERT(mShaders.shaders.bits.isProtectedContext ==
1135                mFragmentOutput.blendMaskAndLogic.bits.isProtectedContext);
1136 
1137         return mShaders.shaders.bits.isProtectedContext;
1138     }
1139 
1140   private:
1141     void updateSubpass(GraphicsPipelineTransitionBits *transition, uint32_t subpass);
1142 
1143     const void *getPipelineSubsetMemory(GraphicsPipelineSubset subset, size_t *sizeOut) const;
1144 
1145     void initializePipelineVertexInputState(
1146         ErrorContext *context,
1147         GraphicsPipelineVertexInputVulkanStructs *stateOut,
1148         GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1149 
1150     void initializePipelineShadersState(
1151         ErrorContext *context,
1152         const ShaderModuleMap &shaders,
1153         const SpecializationConstants &specConsts,
1154         GraphicsPipelineShadersVulkanStructs *stateOut,
1155         GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1156 
1157     void initializePipelineSharedNonVertexInputState(
1158         ErrorContext *context,
1159         GraphicsPipelineSharedNonVertexInputVulkanStructs *stateOut,
1160         GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1161 
1162     void initializePipelineFragmentOutputState(
1163         ErrorContext *context,
1164         GraphicsPipelineFragmentOutputVulkanStructs *stateOut,
1165         GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1166 
1167     PipelineShadersState mShaders;
1168     PipelineSharedNonVertexInputState mSharedNonVertexInput;
1169     PipelineFragmentOutputState mFragmentOutput;
1170     PipelineVertexInputState mVertexInput;
1171 };
1172 
1173 // Verify the packed pipeline description has no gaps in the packing.
1174 // This is not guaranteed by the spec, but is validated by a compile-time check.
1175 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
1176 // into uninitialized memory regions.
1177 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
1178 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
1179 
1180 // Values are based on data recorded here -> https://anglebug.com/42267114#comment5
1181 constexpr size_t kDefaultDescriptorSetLayoutBindingsCount = 8;
1182 constexpr size_t kDefaultImmutableSamplerBindingsCount    = 1;
1183 using DescriptorSetLayoutBindingVector =
1184     angle::FastVector<VkDescriptorSetLayoutBinding, kDefaultDescriptorSetLayoutBindingsCount>;
1185 
1186 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
1187 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
1188 // usage. In the future we could generalize this.
1189 class DescriptorSetLayoutDesc final
1190 {
1191   public:
1192     DescriptorSetLayoutDesc();
1193     ~DescriptorSetLayoutDesc();
1194     DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
1195     DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
1196 
1197     size_t hash() const;
1198     bool operator==(const DescriptorSetLayoutDesc &other) const;
1199 
1200     void addBinding(uint32_t bindingIndex,
1201                     VkDescriptorType descriptorType,
1202                     uint32_t count,
1203                     VkShaderStageFlags stages,
1204                     const Sampler *immutableSampler);
1205 
1206     void unpackBindings(DescriptorSetLayoutBindingVector *bindings) const;
1207 
empty()1208     bool empty() const { return mDescriptorSetLayoutBindings.empty(); }
1209 
1210   private:
1211     // There is a small risk of an issue if the sampler cache is evicted but not the descriptor
1212     // cache we would have an invalid handle here. Thus propose follow-up work:
1213     // TODO: https://issuetracker.google.com/issues/159156775: Have immutable sampler use serial
1214     union PackedDescriptorSetBinding
1215     {
1216         static constexpr uint8_t kInvalidType = 255;
1217 
1218         struct
1219         {
1220             uint8_t type;                      // Stores a packed VkDescriptorType descriptorType.
1221             uint8_t stages;                    // Stores a packed VkShaderStageFlags.
1222             uint16_t count : 15;               // Stores a packed uint32_t descriptorCount
1223             uint16_t hasImmutableSampler : 1;  // Whether this binding has an immutable sampler
1224         };
1225         uint32_t value;
1226 
1227         bool operator==(const PackedDescriptorSetBinding &other) const
1228         {
1229             return value == other.value;
1230         }
1231     };
1232 
1233     // 1x 32bit
1234     static_assert(sizeof(PackedDescriptorSetBinding) == 4, "Unexpected size");
1235 
1236     angle::FastVector<VkSampler, kDefaultImmutableSamplerBindingsCount> mImmutableSamplers;
1237     angle::FastVector<PackedDescriptorSetBinding, kDefaultDescriptorSetLayoutBindingsCount>
1238         mDescriptorSetLayoutBindings;
1239 
1240 #if !defined(ANGLE_IS_64_BIT_CPU)
1241     ANGLE_MAYBE_UNUSED_PRIVATE_FIELD uint32_t mPadding = 0;
1242 #endif
1243 };
1244 
1245 // The following are for caching descriptor set layouts. Limited to max three descriptor set
1246 // layouts. This can be extended in the future.
1247 constexpr size_t kMaxDescriptorSetLayouts = ToUnderlying(DescriptorSetIndex::EnumCount);
1248 
1249 union PackedPushConstantRange
1250 {
1251     struct
1252     {
1253         uint8_t offset;
1254         uint8_t size;
1255         uint16_t stageMask;
1256     };
1257     uint32_t value;
1258 
1259     bool operator==(const PackedPushConstantRange &other) const { return value == other.value; }
1260 };
1261 
1262 static_assert(sizeof(PackedPushConstantRange) == sizeof(uint32_t), "Unexpected Size");
1263 
1264 template <typename T>
1265 using DescriptorSetArray = angle::PackedEnumMap<DescriptorSetIndex, T>;
1266 using DescriptorSetLayoutPointerArray = DescriptorSetArray<DescriptorSetLayoutPtr>;
1267 
1268 class PipelineLayoutDesc final
1269 {
1270   public:
1271     PipelineLayoutDesc();
1272     ~PipelineLayoutDesc();
1273     PipelineLayoutDesc(const PipelineLayoutDesc &other);
1274     PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
1275 
1276     size_t hash() const;
1277     bool operator==(const PipelineLayoutDesc &other) const;
1278 
1279     void updateDescriptorSetLayout(DescriptorSetIndex setIndex,
1280                                    const DescriptorSetLayoutDesc &desc);
1281     void updatePushConstantRange(VkShaderStageFlags stageMask, uint32_t offset, uint32_t size);
1282 
getPushConstantRange()1283     const PackedPushConstantRange &getPushConstantRange() const { return mPushConstantRange; }
1284 
1285   private:
1286     DescriptorSetArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
1287     PackedPushConstantRange mPushConstantRange;
1288     ANGLE_MAYBE_UNUSED_PRIVATE_FIELD uint32_t mPadding;
1289 
1290     // Verify the arrays are properly packed.
1291     static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
1292                       (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
1293                   "Unexpected size");
1294 };
1295 
1296 // Verify the structure is properly packed.
1297 static_assert(sizeof(PipelineLayoutDesc) == sizeof(DescriptorSetArray<DescriptorSetLayoutDesc>) +
1298                                                 sizeof(PackedPushConstantRange) + sizeof(uint32_t),
1299               "Unexpected Size");
1300 
1301 enum class YcbcrLinearFilterSupport
1302 {
1303     Unsupported,
1304     Supported,
1305 };
1306 
1307 class YcbcrConversionDesc final
1308 {
1309   public:
1310     YcbcrConversionDesc();
1311     ~YcbcrConversionDesc();
1312     YcbcrConversionDesc(const YcbcrConversionDesc &other);
1313     YcbcrConversionDesc &operator=(const YcbcrConversionDesc &other);
1314 
1315     size_t hash() const;
1316     bool operator==(const YcbcrConversionDesc &other) const;
1317 
valid()1318     bool valid() const { return mExternalOrVkFormat != 0; }
1319     void reset();
1320     void update(Renderer *renderer,
1321                 uint64_t externalFormat,
1322                 VkSamplerYcbcrModelConversion conversionModel,
1323                 VkSamplerYcbcrRange colorRange,
1324                 VkChromaLocation xChromaOffset,
1325                 VkChromaLocation yChromaOffset,
1326                 VkFilter chromaFilter,
1327                 VkComponentMapping components,
1328                 angle::FormatID intendedFormatID,
1329                 YcbcrLinearFilterSupport linearFilterSupported);
getChromaFilter()1330     VkFilter getChromaFilter() const { return static_cast<VkFilter>(mChromaFilter); }
1331     bool updateChromaFilter(Renderer *renderer, VkFilter filter);
1332     void updateConversionModel(VkSamplerYcbcrModelConversion conversionModel);
getExternalFormat()1333     uint64_t getExternalFormat() const { return mIsExternalFormat ? mExternalOrVkFormat : 0; }
1334 
1335     angle::Result init(ErrorContext *context, SamplerYcbcrConversion *conversionOut) const;
1336 
1337   private:
1338     // If the sampler needs to convert the image content (e.g. from YUV to RGB) then
1339     // mExternalOrVkFormat will be non-zero. The value is either the external format
1340     // as returned by vkGetAndroidHardwareBufferPropertiesANDROID or a YUV VkFormat.
1341     // For VkSamplerYcbcrConversion, mExternalOrVkFormat along with mIsExternalFormat,
1342     // mConversionModel and mColorRange works as a Serial() used elsewhere in ANGLE.
1343     uint64_t mExternalOrVkFormat;
1344     // 1 bit to identify if external format is used
1345     uint32_t mIsExternalFormat : 1;
1346     // 3 bits to identify conversion model
1347     uint32_t mConversionModel : 3;
1348     // 1 bit to identify color component range
1349     uint32_t mColorRange : 1;
1350     // 1 bit to identify x chroma location
1351     uint32_t mXChromaOffset : 1;
1352     // 1 bit to identify y chroma location
1353     uint32_t mYChromaOffset : 1;
1354     // 1 bit to identify chroma filtering
1355     uint32_t mChromaFilter : 1;
1356     // 3 bit to identify R component swizzle
1357     uint32_t mRSwizzle : 3;
1358     // 3 bit to identify G component swizzle
1359     uint32_t mGSwizzle : 3;
1360     // 3 bit to identify B component swizzle
1361     uint32_t mBSwizzle : 3;
1362     // 3 bit to identify A component swizzle
1363     uint32_t mASwizzle : 3;
1364     // 1 bit for whether linear filtering is supported (independent of whether currently enabled)
1365     uint32_t mLinearFilterSupported : 1;
1366     uint32_t mPadding : 11;
1367     uint32_t mReserved;
1368 };
1369 
1370 static_assert(sizeof(YcbcrConversionDesc) == 16, "Unexpected YcbcrConversionDesc size");
1371 
1372 // Packed sampler description for the sampler cache.
1373 class SamplerDesc final
1374 {
1375   public:
1376     SamplerDesc();
1377     SamplerDesc(ErrorContext *context,
1378                 const gl::SamplerState &samplerState,
1379                 bool stencilMode,
1380                 const YcbcrConversionDesc *ycbcrConversionDesc,
1381                 angle::FormatID intendedFormatID);
1382     ~SamplerDesc();
1383 
1384     SamplerDesc(const SamplerDesc &other);
1385     SamplerDesc &operator=(const SamplerDesc &rhs);
1386 
1387     void update(Renderer *renderer,
1388                 const gl::SamplerState &samplerState,
1389                 bool stencilMode,
1390                 const YcbcrConversionDesc *ycbcrConversionDesc,
1391                 angle::FormatID intendedFormatID);
1392     void reset();
1393     angle::Result init(ContextVk *contextVk, Sampler *sampler) const;
1394 
1395     size_t hash() const;
1396     bool operator==(const SamplerDesc &other) const;
1397 
1398   private:
1399     // 32*4 bits for floating point data.
1400     // Note: anisotropy enabled is implicitly determined by maxAnisotropy and caps.
1401     float mMipLodBias;
1402     float mMaxAnisotropy;
1403     float mMinLod;
1404     float mMaxLod;
1405 
1406     // 16*8 bits to uniquely identify a YCbCr conversion sampler.
1407     YcbcrConversionDesc mYcbcrConversionDesc;
1408 
1409     // 16 bits for modes + states.
1410     // 1 bit per filter (only 2 possible values in GL: linear/nearest)
1411     uint16_t mMagFilter : 1;
1412     uint16_t mMinFilter : 1;
1413     uint16_t mMipmapMode : 1;
1414 
1415     // 3 bits per address mode (5 possible values)
1416     uint16_t mAddressModeU : 3;
1417     uint16_t mAddressModeV : 3;
1418     uint16_t mAddressModeW : 3;
1419 
1420     // 1 bit for compare enabled (2 possible values)
1421     uint16_t mCompareEnabled : 1;
1422 
1423     // 3 bits for compare op. (8 possible values)
1424     uint16_t mCompareOp : 3;
1425 
1426     // Values from angle::ColorGeneric::Type. Float is 0 and others are 1.
1427     uint16_t mBorderColorType : 1;
1428 
1429     uint16_t mPadding : 15;
1430 
1431     // 16*8 bits for BorderColor
1432     angle::ColorF mBorderColor;
1433 
1434     // 32 bits reserved for future use.
1435     uint32_t mReserved;
1436 };
1437 
1438 static_assert(sizeof(SamplerDesc) == 56, "Unexpected SamplerDesc size");
1439 
1440 // Disable warnings about struct padding.
1441 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1442 
1443 struct GraphicsPipelineTransition
1444 {
1445     GraphicsPipelineTransition();
1446     GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
1447     GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
1448                                const GraphicsPipelineDesc *desc,
1449                                PipelineHelper *pipeline);
1450 
1451     GraphicsPipelineTransitionBits bits;
1452     const GraphicsPipelineDesc *desc;
1453     PipelineHelper *target;
1454 };
1455 
1456 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
1457 
1458 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1459     const GraphicsPipelineTransition &other) = default;
1460 
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)1461 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1462     GraphicsPipelineTransitionBits bits,
1463     const GraphicsPipelineDesc *desc,
1464     PipelineHelper *pipeline)
1465     : bits(bits), desc(desc), target(pipeline)
1466 {}
1467 
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)1468 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
1469                                                   GraphicsPipelineTransitionBits bitsB,
1470                                                   const GraphicsPipelineDesc &descA,
1471                                                   const GraphicsPipelineDesc &descB)
1472 {
1473     if (bitsA != bitsB)
1474         return false;
1475 
1476     // We currently mask over 4 bytes of the pipeline description with each dirty bit.
1477     // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
1478     // of the code faster. The for loop below would scan over twice as many bits per iteration.
1479     // But there may be more collisions between the same dirty bit masks leading to different
1480     // transitions. Thus there may be additional cost when applications use many transitions.
1481     // We should revisit this in the future and investigate using different bit widths.
1482     static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
1483 
1484     const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
1485     const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
1486 
1487     for (size_t dirtyBit : bitsA)
1488     {
1489         if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
1490             return false;
1491     }
1492 
1493     return true;
1494 }
1495 
1496 // A class that encapsulates the vk::PipelineCache and associated mutex.  The mutex may be nullptr
1497 // if synchronization is not necessary.
1498 class PipelineCacheAccess
1499 {
1500   public:
1501     PipelineCacheAccess()  = default;
1502     ~PipelineCacheAccess() = default;
1503 
init(const vk::PipelineCache * pipelineCache,angle::SimpleMutex * mutex)1504     void init(const vk::PipelineCache *pipelineCache, angle::SimpleMutex *mutex)
1505     {
1506         mPipelineCache = pipelineCache;
1507         mMutex         = mutex;
1508     }
1509 
1510     VkResult createGraphicsPipeline(vk::ErrorContext *context,
1511                                     const VkGraphicsPipelineCreateInfo &createInfo,
1512                                     vk::Pipeline *pipelineOut);
1513     VkResult createComputePipeline(vk::ErrorContext *context,
1514                                    const VkComputePipelineCreateInfo &createInfo,
1515                                    vk::Pipeline *pipelineOut);
1516 
1517     VkResult getCacheData(vk::ErrorContext *context, size_t *cacheSize, void *cacheData);
1518 
1519     void merge(Renderer *renderer, const vk::PipelineCache &pipelineCache);
1520 
isThreadSafe()1521     bool isThreadSafe() const { return mMutex != nullptr; }
1522 
1523   private:
1524     std::unique_lock<angle::SimpleMutex> getLock();
1525 
1526     const vk::PipelineCache *mPipelineCache = nullptr;
1527     angle::SimpleMutex *mMutex;
1528 };
1529 
1530 // Monolithic pipeline creation tasks are created as soon as a pipeline is created out of libraries.
1531 // However, they are not immediately posted to the worker queue to allow pacing.  On each use of a
1532 // pipeline, an attempt is made to post the task.
1533 class CreateMonolithicPipelineTask : public ErrorContext, public angle::Closure
1534 {
1535   public:
1536     CreateMonolithicPipelineTask(Renderer *renderer,
1537                                  const PipelineCacheAccess &pipelineCache,
1538                                  const PipelineLayout &pipelineLayout,
1539                                  const ShaderModuleMap &shaders,
1540                                  const SpecializationConstants &specConsts,
1541                                  const GraphicsPipelineDesc &desc);
1542 
1543     // The compatible render pass is set only when the task is ready to run.  This is because the
1544     // render pass cache may have been cleared since the task was created (e.g. to accomodate
1545     // framebuffer fetch).  Such render pass cache clears ensure there are no active tasks, so it's
1546     // safe to hold on to this pointer for the brief period between task post and completion.
1547     //
1548     // Not applicable to dynamic rendering.
getRenderPassDesc()1549     const RenderPassDesc &getRenderPassDesc() const { return mDesc.getRenderPassDesc(); }
1550     void setCompatibleRenderPass(const RenderPass *compatibleRenderPass);
1551 
1552     void operator()() override;
1553 
getResult()1554     VkResult getResult() const { return mResult; }
getPipeline()1555     Pipeline &getPipeline() { return mPipeline; }
getFeedback()1556     CacheLookUpFeedback getFeedback() const { return mFeedback; }
1557 
1558     void handleError(VkResult result,
1559                      const char *file,
1560                      const char *function,
1561                      unsigned int line) override;
1562 
1563   private:
1564     // Input to pipeline creation
1565     PipelineCacheAccess mPipelineCache;
1566     const RenderPass *mCompatibleRenderPass;
1567     const PipelineLayout &mPipelineLayout;
1568     const ShaderModuleMap &mShaders;
1569     SpecializationConstants mSpecConsts;
1570     GraphicsPipelineDesc mDesc;
1571 
1572     // Results
1573     VkResult mResult;
1574     Pipeline mPipeline;
1575     CacheLookUpFeedback mFeedback;
1576 };
1577 
1578 class WaitableMonolithicPipelineCreationTask
1579 {
1580   public:
1581     ~WaitableMonolithicPipelineCreationTask();
1582 
setTask(std::shared_ptr<CreateMonolithicPipelineTask> && task)1583     void setTask(std::shared_ptr<CreateMonolithicPipelineTask> &&task) { mTask = std::move(task); }
setRenderPass(const RenderPass * compatibleRenderPass)1584     void setRenderPass(const RenderPass *compatibleRenderPass)
1585     {
1586         mTask->setCompatibleRenderPass(compatibleRenderPass);
1587     }
onSchedule(const std::shared_ptr<angle::WaitableEvent> & waitableEvent)1588     void onSchedule(const std::shared_ptr<angle::WaitableEvent> &waitableEvent)
1589     {
1590         mWaitableEvent = waitableEvent;
1591     }
reset()1592     void reset()
1593     {
1594         mWaitableEvent.reset();
1595         mTask.reset();
1596     }
1597 
isValid()1598     bool isValid() const { return mTask.get() != nullptr; }
isPosted()1599     bool isPosted() const { return mWaitableEvent.get() != nullptr; }
isReady()1600     bool isReady() { return mWaitableEvent->isReady(); }
wait()1601     void wait() { return mWaitableEvent->wait(); }
1602 
getTask()1603     std::shared_ptr<CreateMonolithicPipelineTask> getTask() const { return mTask; }
1604 
1605   private:
1606     std::shared_ptr<angle::WaitableEvent> mWaitableEvent;
1607     std::shared_ptr<CreateMonolithicPipelineTask> mTask;
1608 };
1609 
1610 class PipelineHelper final : public Resource
1611 {
1612   public:
1613     PipelineHelper();
1614     ~PipelineHelper() override;
1615     inline explicit PipelineHelper(Pipeline &&pipeline, CacheLookUpFeedback feedback);
1616     PipelineHelper &operator=(PipelineHelper &&other);
1617 
1618     void destroy(VkDevice device);
1619     void release(ErrorContext *context);
1620 
valid()1621     bool valid() const { return mPipeline.valid(); }
getPipeline()1622     const Pipeline &getPipeline() const { return mPipeline; }
1623 
1624     // Get the pipeline.  If there is a monolithic pipeline creation task pending, scheduling it is
1625     // attempted.  If that task is done, the pipeline is replaced with the results and the old
1626     // pipeline released.
1627     angle::Result getPreferredPipeline(ContextVk *contextVk, const Pipeline **pipelineOut);
1628 
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)1629     ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
1630                                      const GraphicsPipelineDesc &desc,
1631                                      PipelineHelper **pipelineOut) const
1632     {
1633         // Search could be improved using sorting or hashing.
1634         for (const GraphicsPipelineTransition &transition : mTransitions)
1635         {
1636             if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
1637             {
1638                 *pipelineOut = transition.target;
1639                 return true;
1640             }
1641         }
1642 
1643         return false;
1644     }
1645 
1646     void addTransition(GraphicsPipelineTransitionBits bits,
1647                        const GraphicsPipelineDesc *desc,
1648                        PipelineHelper *pipeline);
1649 
getTransitions()1650     const std::vector<GraphicsPipelineTransition> &getTransitions() const { return mTransitions; }
1651 
setComputePipeline(Pipeline && pipeline,CacheLookUpFeedback feedback)1652     void setComputePipeline(Pipeline &&pipeline, CacheLookUpFeedback feedback)
1653     {
1654         ASSERT(!mPipeline.valid());
1655         mPipeline = std::move(pipeline);
1656 
1657         ASSERT(mCacheLookUpFeedback == CacheLookUpFeedback::None);
1658         mCacheLookUpFeedback = feedback;
1659     }
getCacheLookUpFeedback()1660     CacheLookUpFeedback getCacheLookUpFeedback() const { return mCacheLookUpFeedback; }
1661 
1662     void setLinkedLibraryReferences(vk::PipelineHelper *shadersPipeline);
1663 
1664     void retainInRenderPass(RenderPassCommandBufferHelper *renderPassCommands);
1665 
setMonolithicPipelineCreationTask(std::shared_ptr<CreateMonolithicPipelineTask> && task)1666     void setMonolithicPipelineCreationTask(std::shared_ptr<CreateMonolithicPipelineTask> &&task)
1667     {
1668         mMonolithicPipelineCreationTask.setTask(std::move(task));
1669     }
1670 
1671   private:
1672     void reset();
1673 
1674     std::vector<GraphicsPipelineTransition> mTransitions;
1675     Pipeline mPipeline;
1676     CacheLookUpFeedback mCacheLookUpFeedback           = CacheLookUpFeedback::None;
1677     CacheLookUpFeedback mMonolithicCacheLookUpFeedback = CacheLookUpFeedback::None;
1678 
1679     // The list of pipeline helpers that were referenced when creating a linked pipeline.  These
1680     // pipelines must be kept alive, so their serial is updated at the same time as this object.
1681     // The shaders pipeline is the only library so far.
1682     PipelineHelper *mLinkedShaders = nullptr;
1683 
1684     // If pipeline libraries are used and monolithic pipelines are created in parallel, this is the
1685     // temporary library created (previously in |mPipeline|) that is now replaced by the monolithic
1686     // one.  It is not immediately garbage collected when replaced, because there is currently a bug
1687     // with that.  http://anglebug.com/42266335
1688     Pipeline mLinkedPipelineToRelease;
1689 
1690     // An async task to create a monolithic pipeline.  Only used if the pipeline was originally
1691     // created as a linked library.  The |getPipeline()| call will attempt to schedule this task
1692     // through the share group, which manages and paces these tasks.  Once the task results are
1693     // ready, |mPipeline| is released and replaced by the result of this task.
1694     WaitableMonolithicPipelineCreationTask mMonolithicPipelineCreationTask;
1695 };
1696 
1697 class FramebufferHelper : public Resource
1698 {
1699   public:
1700     FramebufferHelper();
1701     ~FramebufferHelper() override;
1702 
1703     FramebufferHelper(FramebufferHelper &&other);
1704     FramebufferHelper &operator=(FramebufferHelper &&other);
1705 
1706     angle::Result init(ErrorContext *context, const VkFramebufferCreateInfo &createInfo);
1707     void destroy(Renderer *renderer);
1708     void release(ContextVk *contextVk);
1709 
valid()1710     bool valid() { return mFramebuffer.valid(); }
1711 
getFramebuffer()1712     const Framebuffer &getFramebuffer() const
1713     {
1714         ASSERT(mFramebuffer.valid());
1715         return mFramebuffer;
1716     }
1717 
getFramebuffer()1718     Framebuffer &getFramebuffer()
1719     {
1720         ASSERT(mFramebuffer.valid());
1721         return mFramebuffer;
1722     }
1723 
1724   private:
1725     // Vulkan object.
1726     Framebuffer mFramebuffer;
1727 };
1728 
PipelineHelper(Pipeline && pipeline,CacheLookUpFeedback feedback)1729 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline, CacheLookUpFeedback feedback)
1730     : mPipeline(std::move(pipeline)), mCacheLookUpFeedback(feedback)
1731 {}
1732 
1733 ANGLE_INLINE PipelineHelper &PipelineHelper::operator=(PipelineHelper &&other)
1734 {
1735     ASSERT(!mPipeline.valid());
1736 
1737     std::swap(mPipeline, other.mPipeline);
1738     mCacheLookUpFeedback = other.mCacheLookUpFeedback;
1739 
1740     return *this;
1741 }
1742 
1743 struct ImageSubresourceRange
1744 {
1745     // GL max is 1000 (fits in 10 bits).
1746     uint32_t level : 10;
1747     // Max 31 levels (2 ** 5 - 1). Can store levelCount-1 if we need to save another bit.
1748     uint32_t levelCount : 5;
1749     // Implementation max is 4096 (12 bits).
1750     uint32_t layer : 12;
1751     // One of vk::LayerMode values.  If 0, it means all layers.  Otherwise it's the count of layers
1752     // which is usually 1, except for multiview in which case it can be up to
1753     // gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS.
1754     uint32_t layerMode : 3;
1755     // For reads: Values are either ImageViewColorspace::Linear or ImageViewColorspace::SRGB
1756     uint32_t readColorspace : 1;
1757     // For writes: Values are either ImageViewColorspace::Linear or ImageViewColorspace::SRGB
1758     uint32_t writeColorspace : 1;
1759 
1760     static_assert(gl::IMPLEMENTATION_MAX_TEXTURE_LEVELS < (1 << 5),
1761                   "Not enough bits for level count");
1762     static_assert(gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS <= (1 << 12),
1763                   "Not enough bits for layer index");
1764     static_assert(gl::IMPLEMENTATION_ANGLE_MULTIVIEW_MAX_VIEWS <= (1 << 3),
1765                   "Not enough bits for layer count");
1766 };
1767 
1768 static_assert(sizeof(ImageSubresourceRange) == sizeof(uint32_t), "Size mismatch");
1769 
1770 inline bool operator==(const ImageSubresourceRange &a, const ImageSubresourceRange &b)
1771 {
1772     return a.level == b.level && a.levelCount == b.levelCount && a.layer == b.layer &&
1773            a.layerMode == b.layerMode && a.readColorspace == b.readColorspace &&
1774            a.writeColorspace == b.writeColorspace;
1775 }
1776 
1777 constexpr ImageSubresourceRange kInvalidImageSubresourceRange = {0, 0, 0, 0, 0, 0};
1778 
1779 struct ImageOrBufferViewSubresourceSerial
1780 {
1781     ImageOrBufferViewSerial viewSerial;
1782     ImageSubresourceRange subresource;
1783 };
1784 
1785 inline bool operator==(const ImageOrBufferViewSubresourceSerial &a,
1786                        const ImageOrBufferViewSubresourceSerial &b)
1787 {
1788     return a.viewSerial == b.viewSerial && a.subresource == b.subresource;
1789 }
1790 
1791 constexpr ImageOrBufferViewSubresourceSerial kInvalidImageOrBufferViewSubresourceSerial = {
1792     kInvalidImageOrBufferViewSerial, kInvalidImageSubresourceRange};
1793 
1794 // Always starts with array element zero, with descriptorCount descriptors.
1795 struct WriteDescriptorDesc
1796 {
1797     uint8_t binding;              // Redundant: determined by the containing WriteDesc array.
1798     uint8_t descriptorCount;      // Number of array elements in this descriptor write.
1799     uint8_t descriptorType;       // Packed VkDescriptorType.
1800     uint8_t descriptorInfoIndex;  // Base index into an array of DescriptorInfoDescs.
1801 };
1802 
1803 static_assert(sizeof(WriteDescriptorDesc) == 4, "Size mismatch");
1804 
1805 struct DescriptorInfoDesc
1806 {
1807     uint32_t samplerOrBufferSerial;
1808     uint32_t imageViewSerialOrOffset;
1809     uint32_t imageLayoutOrRange;
1810     uint32_t imageSubresourceRange;
1811 };
1812 
1813 static_assert(sizeof(DescriptorInfoDesc) == 16, "Size mismatch");
1814 
1815 // Generic description of a descriptor set. Used as a key when indexing descriptor set caches. The
1816 // key storage is an angle:FixedVector. Beyond a certain fixed size we'll end up using heap memory
1817 // to store keys. Currently we specialize the structure for three use cases: uniforms, textures,
1818 // and other shader resources. Because of the way the specialization works we can't currently cache
1819 // programs that use some types of resources.
1820 static constexpr size_t kFastDescriptorSetDescLimit = 8;
1821 
1822 struct DescriptorDescHandles
1823 {
1824     VkBuffer buffer;
1825     VkSampler sampler;
1826     VkImageView imageView;
1827     VkBufferView bufferView;
1828 };
1829 
1830 class WriteDescriptorDescs
1831 {
1832   public:
reset()1833     void reset()
1834     {
1835         mDescs.clear();
1836         mDynamicDescriptorSetCount = 0;
1837         mCurrentInfoIndex          = 0;
1838     }
1839 
1840     void updateShaderBuffers(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1841                              const std::vector<gl::InterfaceBlock> &blocks,
1842                              VkDescriptorType descriptorType);
1843 
1844     void updateAtomicCounters(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1845                               const std::vector<gl::AtomicCounterBuffer> &atomicCounterBuffers);
1846 
1847     void updateImages(const gl::ProgramExecutable &executable,
1848                       const ShaderInterfaceVariableInfoMap &variableInfoMap);
1849 
1850     void updateInputAttachments(const gl::ProgramExecutable &executable,
1851                                 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1852                                 FramebufferVk *framebufferVk);
1853 
1854     void updateExecutableActiveTextures(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1855                                         const gl::ProgramExecutable &executable);
1856 
1857     void updateDefaultUniform(gl::ShaderBitSet shaderTypes,
1858                               const ShaderInterfaceVariableInfoMap &variableInfoMap,
1859                               const gl::ProgramExecutable &executable);
1860 
1861     void updateTransformFeedbackWrite(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1862                                       const gl::ProgramExecutable &executable);
1863 
1864     void updateDynamicDescriptorsCount();
1865 
size()1866     size_t size() const { return mDescs.size(); }
empty()1867     bool empty() const { return mDescs.size() == 0; }
1868 
1869     const WriteDescriptorDesc &operator[](uint32_t bindingIndex) const
1870     {
1871         return mDescs[bindingIndex];
1872     }
1873 
getTotalDescriptorCount()1874     size_t getTotalDescriptorCount() const { return mCurrentInfoIndex; }
getDynamicDescriptorSetCount()1875     size_t getDynamicDescriptorSetCount() const { return mDynamicDescriptorSetCount; }
1876 
1877   private:
hasWriteDescAtIndex(uint32_t bindingIndex)1878     bool hasWriteDescAtIndex(uint32_t bindingIndex) const
1879     {
1880         return bindingIndex < mDescs.size() && mDescs[bindingIndex].descriptorCount > 0;
1881     }
1882 
incrementDescriptorCount(uint32_t bindingIndex,uint32_t count)1883     void incrementDescriptorCount(uint32_t bindingIndex, uint32_t count)
1884     {
1885         // Validate we have no subsequent writes.
1886         ASSERT(hasWriteDescAtIndex(bindingIndex));
1887         mDescs[bindingIndex].descriptorCount += count;
1888     }
1889 
1890     void updateWriteDesc(uint32_t bindingIndex,
1891                          VkDescriptorType descriptorType,
1892                          uint32_t descriptorCount);
1893 
1894     // After a preliminary minimum size, use heap memory.
1895     angle::FastMap<WriteDescriptorDesc, kFastDescriptorSetDescLimit> mDescs;
1896     size_t mDynamicDescriptorSetCount = 0;
1897     uint32_t mCurrentInfoIndex        = 0;
1898 };
1899 std::ostream &operator<<(std::ostream &os, const WriteDescriptorDescs &desc);
1900 
1901 class DescriptorSetDesc
1902 {
1903   public:
1904     DescriptorSetDesc()  = default;
1905     ~DescriptorSetDesc() = default;
1906 
DescriptorSetDesc(const DescriptorSetDesc & other)1907     DescriptorSetDesc(const DescriptorSetDesc &other) : mDescriptorInfos(other.mDescriptorInfos) {}
1908 
1909     DescriptorSetDesc &operator=(const DescriptorSetDesc &other)
1910     {
1911         mDescriptorInfos = other.mDescriptorInfos;
1912         return *this;
1913     }
1914 
1915     size_t hash() const;
1916 
size()1917     size_t size() const { return mDescriptorInfos.size(); }
resize(size_t count)1918     void resize(size_t count) { mDescriptorInfos.resize(count); }
1919 
getKeySizeBytes()1920     size_t getKeySizeBytes() const { return mDescriptorInfos.size() * sizeof(DescriptorInfoDesc); }
1921 
1922     bool operator==(const DescriptorSetDesc &other) const
1923     {
1924         return mDescriptorInfos.size() == other.mDescriptorInfos.size() &&
1925                memcmp(mDescriptorInfos.data(), other.mDescriptorInfos.data(),
1926                       mDescriptorInfos.size() * sizeof(DescriptorInfoDesc)) == 0;
1927     }
1928 
getInfoDesc(uint32_t infoDescIndex)1929     DescriptorInfoDesc &getInfoDesc(uint32_t infoDescIndex)
1930     {
1931         return mDescriptorInfos[infoDescIndex];
1932     }
1933 
getInfoDesc(uint32_t infoDescIndex)1934     const DescriptorInfoDesc &getInfoDesc(uint32_t infoDescIndex) const
1935     {
1936         return mDescriptorInfos[infoDescIndex];
1937     }
1938 
1939     void updateDescriptorSet(Renderer *renderer,
1940                              const WriteDescriptorDescs &writeDescriptorDescs,
1941                              UpdateDescriptorSetsBuilder *updateBuilder,
1942                              const DescriptorDescHandles *handles,
1943                              VkDescriptorSet descriptorSet) const;
1944 
1945   private:
1946     // After a preliminary minimum size, use heap memory.
1947     angle::FastVector<DescriptorInfoDesc, kFastDescriptorSetDescLimit> mDescriptorInfos;
1948 };
1949 std::ostream &operator<<(std::ostream &os, const DescriptorSetDesc &desc);
1950 
1951 class DescriptorPoolHelper;
1952 
1953 // SharedDescriptorSetCacheKey.
1954 // Because DescriptorSet must associate with a pool, we need to define a structure that wraps both.
1955 class DescriptorSetDescAndPool final
1956 {
1957   public:
DescriptorSetDescAndPool()1958     DescriptorSetDescAndPool() : mPool(nullptr) {}
DescriptorSetDescAndPool(const DescriptorSetDesc & desc,DynamicDescriptorPool * pool)1959     DescriptorSetDescAndPool(const DescriptorSetDesc &desc, DynamicDescriptorPool *pool)
1960         : mDesc(desc), mPool(pool)
1961     {}
DescriptorSetDescAndPool(DescriptorSetDescAndPool && other)1962     DescriptorSetDescAndPool(DescriptorSetDescAndPool &&other)
1963         : mDesc(other.mDesc), mPool(other.mPool)
1964     {
1965         other.mPool = nullptr;
1966     }
~DescriptorSetDescAndPool()1967     ~DescriptorSetDescAndPool() { ASSERT(!valid()); }
destroy(VkDevice)1968     void destroy(VkDevice /*device*/) { mPool = nullptr; }
1969 
1970     void destroyCachedObject(Renderer *renderer);
releaseCachedObject(ContextVk * contextVk)1971     void releaseCachedObject(ContextVk *contextVk) { UNREACHABLE(); }
1972     void releaseCachedObject(Renderer *renderer);
valid()1973     bool valid() const { return mPool != nullptr; }
getDesc()1974     const DescriptorSetDesc &getDesc() const
1975     {
1976         ASSERT(valid());
1977         return mDesc;
1978     }
1979     bool operator==(const DescriptorSetDescAndPool &other) const
1980     {
1981         return mDesc == other.mDesc && mPool == other.mPool;
1982     }
1983 
1984     // Returns true if the key/value can be found in the cache.
1985     bool hasValidCachedObject(ContextVk *contextVk) const;
1986 
1987   private:
1988     DescriptorSetDesc mDesc;
1989     DynamicDescriptorPool *mPool;
1990 };
1991 using SharedDescriptorSetCacheKey = SharedPtr<DescriptorSetDescAndPool>;
1992 ANGLE_INLINE const SharedDescriptorSetCacheKey
CreateSharedDescriptorSetCacheKey(const DescriptorSetDesc & desc,DynamicDescriptorPool * pool)1993 CreateSharedDescriptorSetCacheKey(const DescriptorSetDesc &desc, DynamicDescriptorPool *pool)
1994 {
1995     return SharedDescriptorSetCacheKey::MakeShared(VK_NULL_HANDLE, desc, pool);
1996 }
1997 
1998 constexpr VkDescriptorType kStorageBufferDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1999 
2000 // Manages a descriptor set desc with a few helper routines and also stores object handles.
2001 class DescriptorSetDescBuilder final
2002 {
2003   public:
2004     DescriptorSetDescBuilder();
2005     DescriptorSetDescBuilder(size_t descriptorCount);
2006     ~DescriptorSetDescBuilder();
2007 
2008     DescriptorSetDescBuilder(const DescriptorSetDescBuilder &other);
2009     DescriptorSetDescBuilder &operator=(const DescriptorSetDescBuilder &other);
2010 
getDesc()2011     const DescriptorSetDesc &getDesc() const { return mDesc; }
2012 
resize(size_t descriptorCount)2013     void resize(size_t descriptorCount)
2014     {
2015         mDesc.resize(descriptorCount);
2016         mHandles.resize(descriptorCount);
2017         mDynamicOffsets.resize(descriptorCount);
2018     }
2019 
2020     // Specific helpers for uniforms/xfb descriptors.
2021     void updateUniformBuffer(uint32_t shaderIndex,
2022                              const WriteDescriptorDescs &writeDescriptorDescs,
2023                              const BufferHelper &bufferHelper,
2024                              VkDeviceSize bufferRange);
2025 
2026     void updateTransformFeedbackBuffer(const Context *context,
2027                                        const ShaderInterfaceVariableInfoMap &variableInfoMap,
2028                                        const WriteDescriptorDescs &writeDescriptorDescs,
2029                                        uint32_t xfbBufferIndex,
2030                                        const BufferHelper &bufferHelper,
2031                                        VkDeviceSize bufferOffset,
2032                                        VkDeviceSize bufferRange);
2033 
2034     void updateUniformsAndXfb(Context *context,
2035                               const gl::ProgramExecutable &executable,
2036                               const WriteDescriptorDescs &writeDescriptorDescs,
2037                               const BufferHelper *currentUniformBuffer,
2038                               const BufferHelper &emptyBuffer,
2039                               bool activeUnpaused,
2040                               TransformFeedbackVk *transformFeedbackVk);
2041 
2042     // Specific helpers for shader resource descriptors.
2043     template <typename CommandBufferT>
2044     void updateOneShaderBuffer(Context *context,
2045                                CommandBufferT *commandBufferHelper,
2046                                const ShaderInterfaceVariableInfoMap &variableInfoMap,
2047                                const gl::BufferVector &buffers,
2048                                const gl::InterfaceBlock &block,
2049                                uint32_t bufferIndex,
2050                                VkDescriptorType descriptorType,
2051                                VkDeviceSize maxBoundBufferRange,
2052                                const BufferHelper &emptyBuffer,
2053                                const WriteDescriptorDescs &writeDescriptorDescs,
2054                                const GLbitfield memoryBarrierBits);
2055     template <typename CommandBufferT>
2056     void updateShaderBuffers(Context *context,
2057                              CommandBufferT *commandBufferHelper,
2058                              const gl::ProgramExecutable &executable,
2059                              const ShaderInterfaceVariableInfoMap &variableInfoMap,
2060                              const gl::BufferVector &buffers,
2061                              const std::vector<gl::InterfaceBlock> &blocks,
2062                              VkDescriptorType descriptorType,
2063                              VkDeviceSize maxBoundBufferRange,
2064                              const BufferHelper &emptyBuffer,
2065                              const WriteDescriptorDescs &writeDescriptorDescs,
2066                              const GLbitfield memoryBarrierBits);
2067     template <typename CommandBufferT>
2068     void updateAtomicCounters(Context *context,
2069                               CommandBufferT *commandBufferHelper,
2070                               const gl::ProgramExecutable &executable,
2071                               const ShaderInterfaceVariableInfoMap &variableInfoMap,
2072                               const gl::BufferVector &buffers,
2073                               const std::vector<gl::AtomicCounterBuffer> &atomicCounterBuffers,
2074                               const VkDeviceSize requiredOffsetAlignment,
2075                               const BufferHelper &emptyBuffer,
2076                               const WriteDescriptorDescs &writeDescriptorDescs);
2077     angle::Result updateImages(Context *context,
2078                                const gl::ProgramExecutable &executable,
2079                                const ShaderInterfaceVariableInfoMap &variableInfoMap,
2080                                const gl::ActiveTextureArray<TextureVk *> &activeImages,
2081                                const std::vector<gl::ImageUnit> &imageUnits,
2082                                const WriteDescriptorDescs &writeDescriptorDescs);
2083     angle::Result updateInputAttachments(vk::Context *context,
2084                                          const gl::ProgramExecutable &executable,
2085                                          const ShaderInterfaceVariableInfoMap &variableInfoMap,
2086                                          FramebufferVk *framebufferVk,
2087                                          const WriteDescriptorDescs &writeDescriptorDescs);
2088 
2089     // Specialized update for textures.
2090     void updatePreCacheActiveTextures(Context *context,
2091                                       const gl::ProgramExecutable &executable,
2092                                       const gl::ActiveTextureArray<TextureVk *> &textures,
2093                                       const gl::SamplerBindingVector &samplers);
2094 
2095     void updateDescriptorSet(Renderer *renderer,
2096                              const WriteDescriptorDescs &writeDescriptorDescs,
2097                              UpdateDescriptorSetsBuilder *updateBuilder,
2098                              VkDescriptorSet descriptorSet) const;
2099 
getDynamicOffsets()2100     const uint32_t *getDynamicOffsets() const { return mDynamicOffsets.data(); }
getDynamicOffsetsSize()2101     size_t getDynamicOffsetsSize() const { return mDynamicOffsets.size(); }
2102 
2103   private:
2104     void updateInputAttachment(Context *context,
2105                                uint32_t binding,
2106                                VkImageLayout layout,
2107                                const vk::ImageView *imageView,
2108                                ImageOrBufferViewSubresourceSerial serial,
2109                                const WriteDescriptorDescs &writeDescriptorDescs);
2110 
2111     void setEmptyBuffer(uint32_t infoDescIndex,
2112                         VkDescriptorType descriptorType,
2113                         const BufferHelper &emptyBuffer);
2114 
2115     DescriptorSetDesc mDesc;
2116     angle::FastVector<DescriptorDescHandles, kFastDescriptorSetDescLimit> mHandles;
2117     angle::FastVector<uint32_t, kFastDescriptorSetDescLimit> mDynamicOffsets;
2118 };
2119 
2120 // In the FramebufferDesc object:
2121 //  - Depth/stencil serial is at index 0
2122 //  - Color serials are at indices [1, gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]
2123 //  - Depth/stencil resolve attachment is at index gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+1
2124 //  - Resolve attachments are at indices [gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+2,
2125 //                                        gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1]
2126 //    Fragment shading rate attachment serial is at index
2127 //    (gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1)+1
2128 constexpr size_t kFramebufferDescDepthStencilIndex = 0;
2129 constexpr size_t kFramebufferDescColorIndexOffset  = kFramebufferDescDepthStencilIndex + 1;
2130 constexpr size_t kFramebufferDescDepthStencilResolveIndexOffset =
2131     kFramebufferDescColorIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
2132 constexpr size_t kFramebufferDescColorResolveIndexOffset =
2133     kFramebufferDescDepthStencilResolveIndexOffset + 1;
2134 constexpr size_t kFramebufferDescFragmentShadingRateAttachmentIndexOffset =
2135     kFramebufferDescColorResolveIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
2136 
2137 // Enable struct padding warnings for the code below since it is used in caches.
2138 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
2139 
2140 class FramebufferDesc
2141 {
2142   public:
2143     FramebufferDesc();
2144     ~FramebufferDesc();
2145 
2146     FramebufferDesc(const FramebufferDesc &other);
2147     FramebufferDesc &operator=(const FramebufferDesc &other);
2148 
2149     void updateColor(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
2150     void updateColorResolve(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
2151     void updateUnresolveMask(FramebufferNonResolveAttachmentMask unresolveMask);
2152     void updateDepthStencil(ImageOrBufferViewSubresourceSerial serial);
2153     void updateDepthStencilResolve(ImageOrBufferViewSubresourceSerial serial);
setWriteControlMode(gl::SrgbWriteControlMode mode)2154     ANGLE_INLINE void setWriteControlMode(gl::SrgbWriteControlMode mode)
2155     {
2156         mSrgbWriteControlMode = static_cast<uint16_t>(mode);
2157     }
updateIsMultiview(bool isMultiview)2158     void updateIsMultiview(bool isMultiview) { mIsMultiview = isMultiview; }
2159     size_t hash() const;
2160 
2161     bool operator==(const FramebufferDesc &other) const;
2162 
2163     uint32_t attachmentCount() const;
2164 
getColorImageViewSerial(uint32_t index)2165     ImageOrBufferViewSubresourceSerial getColorImageViewSerial(uint32_t index)
2166     {
2167         ASSERT(kFramebufferDescColorIndexOffset + index < mSerials.size());
2168         return mSerials[kFramebufferDescColorIndexOffset + index];
2169     }
2170 
2171     FramebufferNonResolveAttachmentMask getUnresolveAttachmentMask() const;
getWriteControlMode()2172     ANGLE_INLINE gl::SrgbWriteControlMode getWriteControlMode() const
2173     {
2174         return (mSrgbWriteControlMode == 1) ? gl::SrgbWriteControlMode::Linear
2175                                             : gl::SrgbWriteControlMode::Default;
2176     }
2177 
2178     void updateLayerCount(uint32_t layerCount);
getLayerCount()2179     uint32_t getLayerCount() const { return mLayerCount; }
2180     void setColorFramebufferFetchMode(bool hasColorFramebufferFetch);
hasColorFramebufferFetch()2181     bool hasColorFramebufferFetch() const { return mHasColorFramebufferFetch; }
2182 
isMultiview()2183     bool isMultiview() const { return mIsMultiview; }
2184 
2185     void updateRenderToTexture(bool isRenderToTexture);
2186 
2187     void updateFragmentShadingRate(ImageOrBufferViewSubresourceSerial serial);
2188     bool hasFragmentShadingRateAttachment() const;
2189 
2190     // Used by SharedFramebufferCacheKey
destroy(VkDevice)2191     void destroy(VkDevice /*device*/) { SetBitField(mIsValid, 0); }
2192     void destroyCachedObject(Renderer *renderer);
releaseCachedObject(Renderer * renderer)2193     void releaseCachedObject(Renderer *renderer) { UNREACHABLE(); }
2194     void releaseCachedObject(ContextVk *contextVk);
valid()2195     bool valid() const { return mIsValid; }
2196     bool hasValidCachedObject(ContextVk *contextVk) const;
2197 
2198   private:
2199     void reset();
2200     void update(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
2201 
2202     // Note: this is an exclusive index. If there is one index it will be "1".
2203     // Maximum value is 18
2204     uint16_t mMaxIndex : 5;
2205 
2206     // Whether the render pass has input attachments or not.
2207     // Note that depth/stencil framebuffer fetch is only implemented for dynamic rendering, and so
2208     // does not interact with this class.
2209     uint16_t mHasColorFramebufferFetch : 1;
2210     static_assert(gl::IMPLEMENTATION_MAX_FRAMEBUFFER_LAYERS < (1 << 9) - 1,
2211                   "Not enough bits for mLayerCount");
2212 
2213     uint16_t mLayerCount : 9;
2214 
2215     uint16_t mSrgbWriteControlMode : 1;
2216 
2217     // If the render pass contains an initial subpass to unresolve a number of attachments, the
2218     // subpass description is derived from the following mask, specifying which attachments need
2219     // to be unresolved.  Includes both color and depth/stencil attachments.
2220     uint16_t mUnresolveAttachmentMask : kMaxFramebufferNonResolveAttachments;
2221 
2222     // Whether this is a multisampled-render-to-single-sampled framebuffer.  Only used when using
2223     // VK_EXT_multisampled_render_to_single_sampled.  Only one bit is used and the rest is padding.
2224     uint16_t mIsRenderToTexture : 14 - kMaxFramebufferNonResolveAttachments;
2225 
2226     uint16_t mIsMultiview : 1;
2227     // Used by SharedFramebufferCacheKey to indicate if this cache key is valid or not.
2228     uint16_t mIsValid : 1;
2229 
2230     FramebufferAttachmentArray<ImageOrBufferViewSubresourceSerial> mSerials;
2231 };
2232 
2233 constexpr size_t kFramebufferDescSize = sizeof(FramebufferDesc);
2234 static_assert(kFramebufferDescSize == 156, "Size check failed");
2235 
2236 // Disable warnings about struct padding.
2237 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
2238 
2239 using SharedFramebufferCacheKey = SharedPtr<FramebufferDesc>;
2240 ANGLE_INLINE const SharedFramebufferCacheKey
CreateSharedFramebufferCacheKey(const FramebufferDesc & desc)2241 CreateSharedFramebufferCacheKey(const FramebufferDesc &desc)
2242 {
2243     return SharedFramebufferCacheKey::MakeShared(VK_NULL_HANDLE, desc);
2244 }
2245 
2246 // The SamplerHelper allows a Sampler to be coupled with a serial.
2247 // Must be included before we declare SamplerCache.
2248 class SamplerHelper final : angle::NonCopyable
2249 {
2250   public:
2251     SamplerHelper() = default;
~SamplerHelper()2252     ~SamplerHelper() { ASSERT(!valid()); }
2253 
2254     explicit SamplerHelper(SamplerHelper &&samplerHelper);
2255     SamplerHelper &operator=(SamplerHelper &&rhs);
2256 
2257     angle::Result init(ErrorContext *context, const VkSamplerCreateInfo &createInfo);
2258     angle::Result init(ContextVk *contextVk, const SamplerDesc &desc);
destroy(VkDevice device)2259     void destroy(VkDevice device) { mSampler.destroy(device); }
destroy()2260     void destroy() { ASSERT(!valid()); }
valid()2261     bool valid() const { return mSampler.valid(); }
get()2262     const Sampler &get() const { return mSampler; }
getSamplerSerial()2263     SamplerSerial getSamplerSerial() const { return mSamplerSerial; }
2264 
2265   private:
2266     Sampler mSampler;
2267     SamplerSerial mSamplerSerial;
2268 };
2269 
2270 using SharedSamplerPtr = SharedPtr<SamplerHelper>;
2271 
2272 class RenderPassHelper final : angle::NonCopyable
2273 {
2274   public:
2275     RenderPassHelper();
2276     ~RenderPassHelper();
2277 
2278     RenderPassHelper(RenderPassHelper &&other);
2279     RenderPassHelper &operator=(RenderPassHelper &&other);
2280 
2281     void destroy(VkDevice device);
2282     void release(ContextVk *contextVk);
2283 
2284     const RenderPass &getRenderPass() const;
2285     RenderPass &getRenderPass();
2286 
2287     const RenderPassPerfCounters &getPerfCounters() const;
2288     RenderPassPerfCounters &getPerfCounters();
2289 
2290   private:
2291     RenderPass mRenderPass;
2292     RenderPassPerfCounters mPerfCounters;
2293 };
2294 
2295 // Helper class manages the lifetime of various cache objects so that the cache entry can be
2296 // destroyed when one of the components becomes invalid.
2297 template <class SharedCacheKeyT>
2298 class SharedCacheKeyManager
2299 {
2300   public:
2301     SharedCacheKeyManager() = default;
~SharedCacheKeyManager()2302     ~SharedCacheKeyManager() { ASSERT(empty()); }
2303     // Store the pointer to the cache key and retains it
2304     void addKey(const SharedCacheKeyT &key);
2305     // Iterate over the descriptor array and release the descriptor and cache.
2306     void releaseKeys(ContextVk *contextVk);
2307     void releaseKeys(Renderer *renderer);
2308     // Iterate over the descriptor array and destroy the descriptor and cache.
2309     void destroyKeys(Renderer *renderer);
2310     void clear();
2311 
2312     // The following APIs are expected to be used for assertion only
empty()2313     bool empty() const { return mSharedCacheKeys.empty(); }
2314     bool allValidEntriesAreCached(ContextVk *contextVk) const;
2315 
2316   private:
2317     size_t updateEmptySlotBits();
2318     void addKeyImpl(const SharedCacheKeyT &key);
2319 
2320     bool containsKeyWithOwnerEqual(const SharedCacheKeyT &key) const;
2321     void assertAllEntriesDestroyed() const;
2322 
2323     // Tracks an array of cache keys with refcounting. Note this owns one refcount of
2324     // SharedCacheKeyT object.
2325     std::deque<SharedCacheKeyT> mSharedCacheKeys;
2326     SharedCacheKeyT mLastAddedSharedCacheKey;
2327 
2328     // To speed up searching for available slot in the mSharedCacheKeys, we use bitset to track
2329     // available (i.e, empty) slot
2330     static constexpr size_t kInvalidSlot  = -1;
2331     static constexpr size_t kSlotBitCount = 64;
2332     using SlotBitMask                     = angle::BitSet64<kSlotBitCount>;
2333     std::vector<SlotBitMask> mEmptySlotBits;
2334 };
2335 
2336 using FramebufferCacheManager   = SharedCacheKeyManager<SharedFramebufferCacheKey>;
2337 template <>
2338 void FramebufferCacheManager::addKey(const SharedFramebufferCacheKey &key);
2339 
2340 using DescriptorSetCacheManager = SharedCacheKeyManager<SharedDescriptorSetCacheKey>;
2341 template <>
2342 void DescriptorSetCacheManager::addKey(const SharedDescriptorSetCacheKey &key);
2343 }  // namespace vk
2344 }  // namespace rx
2345 
2346 // Introduce std::hash for the above classes.
2347 namespace std
2348 {
2349 template <>
2350 struct hash<rx::vk::RenderPassDesc>
2351 {
2352     size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
2353 };
2354 
2355 template <>
2356 struct hash<rx::vk::AttachmentOpsArray>
2357 {
2358     size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
2359 };
2360 
2361 template <>
2362 struct hash<rx::vk::DescriptorSetLayoutDesc>
2363 {
2364     size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
2365 };
2366 
2367 template <>
2368 struct hash<rx::vk::PipelineLayoutDesc>
2369 {
2370     size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
2371 };
2372 
2373 template <>
2374 struct hash<rx::vk::ImageSubresourceRange>
2375 {
2376     size_t operator()(const rx::vk::ImageSubresourceRange &key) const
2377     {
2378         return *reinterpret_cast<const uint32_t *>(&key);
2379     }
2380 };
2381 
2382 template <>
2383 struct hash<rx::vk::DescriptorSetDesc>
2384 {
2385     size_t operator()(const rx::vk::DescriptorSetDesc &key) const { return key.hash(); }
2386 };
2387 
2388 template <>
2389 struct hash<rx::vk::FramebufferDesc>
2390 {
2391     size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
2392 };
2393 
2394 template <>
2395 struct hash<rx::vk::YcbcrConversionDesc>
2396 {
2397     size_t operator()(const rx::vk::YcbcrConversionDesc &key) const { return key.hash(); }
2398 };
2399 
2400 template <>
2401 struct hash<rx::vk::SamplerDesc>
2402 {
2403     size_t operator()(const rx::vk::SamplerDesc &key) const { return key.hash(); }
2404 };
2405 
2406 // See Resource Serial types defined in vk_utils.h.
2407 #define ANGLE_HASH_VK_SERIAL(Type)                               \
2408     template <>                                                  \
2409     struct hash<rx::vk::Type##Serial>                            \
2410     {                                                            \
2411         size_t operator()(const rx::vk::Type##Serial &key) const \
2412         {                                                        \
2413             return key.getValue();                               \
2414         }                                                        \
2415     };
2416 
2417 ANGLE_VK_SERIAL_OP(ANGLE_HASH_VK_SERIAL)
2418 
2419 }  // namespace std
2420 
2421 namespace rx
2422 {
2423 // Cache types for various Vulkan objects
2424 enum class VulkanCacheType
2425 {
2426     CompatibleRenderPass,
2427     RenderPassWithOps,
2428     GraphicsPipeline,
2429     ComputePipeline,
2430     PipelineLayout,
2431     Sampler,
2432     SamplerYcbcrConversion,
2433     DescriptorSetLayout,
2434     DriverUniformsDescriptors,
2435     TextureDescriptors,
2436     UniformsAndXfbDescriptors,
2437     ShaderResourcesDescriptors,
2438     Framebuffer,
2439     DescriptorMetaCache,
2440     EnumCount
2441 };
2442 
2443 // Base class for all caches. Provides cache hit and miss counters.
2444 class CacheStats final : angle::NonCopyable
2445 {
2446   public:
2447     CacheStats() { reset(); }
2448     ~CacheStats() {}
2449 
2450     CacheStats(const CacheStats &rhs)
2451         : mHitCount(rhs.mHitCount), mMissCount(rhs.mMissCount), mSize(rhs.mSize)
2452     {}
2453 
2454     CacheStats &operator=(const CacheStats &rhs)
2455     {
2456         mHitCount  = rhs.mHitCount;
2457         mMissCount = rhs.mMissCount;
2458         mSize      = rhs.mSize;
2459         return *this;
2460     }
2461 
2462     ANGLE_INLINE void hit() { mHitCount++; }
2463     ANGLE_INLINE void miss() { mMissCount++; }
2464     ANGLE_INLINE void incrementSize() { mSize++; }
2465     ANGLE_INLINE void decrementSize() { mSize--; }
2466     ANGLE_INLINE void missAndIncrementSize()
2467     {
2468         mMissCount++;
2469         mSize++;
2470     }
2471     ANGLE_INLINE void accumulate(const CacheStats &stats)
2472     {
2473         mHitCount += stats.mHitCount;
2474         mMissCount += stats.mMissCount;
2475         mSize += stats.mSize;
2476     }
2477 
2478     uint32_t getHitCount() const { return mHitCount; }
2479     uint32_t getMissCount() const { return mMissCount; }
2480 
2481     ANGLE_INLINE double getHitRatio() const
2482     {
2483         if (mHitCount + mMissCount == 0)
2484         {
2485             return 0;
2486         }
2487         else
2488         {
2489             return static_cast<double>(mHitCount) / (mHitCount + mMissCount);
2490         }
2491     }
2492 
2493     ANGLE_INLINE uint32_t getSize() const { return mSize; }
2494     ANGLE_INLINE void setSize(uint32_t size) { mSize = size; }
2495 
2496     void reset()
2497     {
2498         mHitCount  = 0;
2499         mMissCount = 0;
2500         mSize      = 0;
2501     }
2502 
2503     void resetHitAndMissCount()
2504     {
2505         mHitCount  = 0;
2506         mMissCount = 0;
2507     }
2508 
2509     void accumulateCacheStats(VulkanCacheType cacheType, const CacheStats &cacheStats)
2510     {
2511         mHitCount += cacheStats.getHitCount();
2512         mMissCount += cacheStats.getMissCount();
2513     }
2514 
2515   private:
2516     uint32_t mHitCount;
2517     uint32_t mMissCount;
2518     uint32_t mSize;
2519 };
2520 
2521 template <VulkanCacheType CacheType>
2522 class HasCacheStats : angle::NonCopyable
2523 {
2524   public:
2525     template <typename Accumulator>
2526     void accumulateCacheStats(Accumulator *accum)
2527     {
2528         accum->accumulateCacheStats(CacheType, mCacheStats);
2529         mCacheStats.reset();
2530     }
2531 
2532     void getCacheStats(CacheStats *accum) const { accum->accumulate(mCacheStats); }
2533 
2534   protected:
2535     HasCacheStats()          = default;
2536     virtual ~HasCacheStats() = default;
2537 
2538     CacheStats mCacheStats;
2539 };
2540 
2541 using VulkanCacheStats = angle::PackedEnumMap<VulkanCacheType, CacheStats>;
2542 
2543 // FramebufferVk Cache
2544 class FramebufferCache final : angle::NonCopyable
2545 {
2546   public:
2547     FramebufferCache() = default;
2548     ~FramebufferCache() { ASSERT(mPayload.empty()); }
2549 
2550     void destroy(vk::Renderer *renderer);
2551 
2552     bool get(ContextVk *contextVk, const vk::FramebufferDesc &desc, vk::Framebuffer &framebuffer);
2553     void insert(ContextVk *contextVk,
2554                 const vk::FramebufferDesc &desc,
2555                 vk::FramebufferHelper &&framebufferHelper);
2556     void erase(ContextVk *contextVk, const vk::FramebufferDesc &desc);
2557 
2558     size_t getSize() const { return mPayload.size(); }
2559     bool empty() const { return mPayload.empty(); }
2560 
2561   private:
2562     angle::HashMap<vk::FramebufferDesc, vk::FramebufferHelper> mPayload;
2563     CacheStats mCacheStats;
2564 };
2565 
2566 // TODO(jmadill): Add cache trimming/eviction.
2567 class RenderPassCache final : angle::NonCopyable
2568 {
2569   public:
2570     RenderPassCache();
2571     ~RenderPassCache();
2572 
2573     void destroy(ContextVk *contextVk);
2574     void clear(ContextVk *contextVk);
2575 
2576     ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
2577                                                        const vk::RenderPassDesc &desc,
2578                                                        const vk::RenderPass **renderPassOut)
2579     {
2580         auto outerIt = mPayload.find(desc);
2581         if (outerIt != mPayload.end())
2582         {
2583             InnerCache &innerCache = outerIt->second;
2584             ASSERT(!innerCache.empty());
2585 
2586             // Find the first element and return it.
2587             *renderPassOut = &innerCache.begin()->second.getRenderPass();
2588             mCompatibleRenderPassCacheStats.hit();
2589             return angle::Result::Continue;
2590         }
2591 
2592         mCompatibleRenderPassCacheStats.missAndIncrementSize();
2593         return addCompatibleRenderPass(contextVk, desc, renderPassOut);
2594     }
2595 
2596     angle::Result getRenderPassWithOps(ContextVk *contextVk,
2597                                        const vk::RenderPassDesc &desc,
2598                                        const vk::AttachmentOpsArray &attachmentOps,
2599                                        const vk::RenderPass **renderPassOut);
2600 
2601     static void InitializeOpsForCompatibleRenderPass(const vk::RenderPassDesc &desc,
2602                                                      vk::AttachmentOpsArray *opsOut);
2603     static angle::Result MakeRenderPass(vk::ErrorContext *context,
2604                                         const vk::RenderPassDesc &desc,
2605                                         const vk::AttachmentOpsArray &ops,
2606                                         vk::RenderPass *renderPass,
2607                                         vk::RenderPassPerfCounters *renderPassCounters);
2608 
2609   private:
2610     angle::Result getRenderPassWithOpsImpl(ContextVk *contextVk,
2611                                            const vk::RenderPassDesc &desc,
2612                                            const vk::AttachmentOpsArray &attachmentOps,
2613                                            bool updatePerfCounters,
2614                                            const vk::RenderPass **renderPassOut);
2615 
2616     angle::Result addCompatibleRenderPass(ContextVk *contextVk,
2617                                           const vk::RenderPassDesc &desc,
2618                                           const vk::RenderPass **renderPassOut);
2619 
2620     // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
2621     // The second layer caches the attachment load/store ops and initial/final layout.
2622     // Switch to `std::unordered_map` to retain pointer stability.
2623     using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassHelper>;
2624     using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
2625 
2626     OuterCache mPayload;
2627     CacheStats mCompatibleRenderPassCacheStats;
2628     CacheStats mRenderPassWithOpsCacheStats;
2629 };
2630 
2631 enum class PipelineSource
2632 {
2633     // Pipeline created when warming up the program's pipeline cache
2634     WarmUp,
2635     // Monolithic pipeline created at draw time
2636     Draw,
2637     // Pipeline created at draw time by linking partial pipeline libraries
2638     DrawLinked,
2639     // Pipeline created for UtilsVk
2640     Utils,
2641     // Pipeline created at dispatch time
2642     Dispatch
2643 };
2644 
2645 struct ComputePipelineDescHash
2646 {
2647     size_t operator()(const rx::vk::ComputePipelineDesc &key) const { return key.hash(); }
2648 };
2649 struct GraphicsPipelineDescCompleteHash
2650 {
2651     size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2652     {
2653         return key.hash(vk::GraphicsPipelineSubset::Complete);
2654     }
2655 };
2656 struct GraphicsPipelineDescShadersHash
2657 {
2658     size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2659     {
2660         return key.hash(vk::GraphicsPipelineSubset::Shaders);
2661     }
2662 };
2663 
2664 struct ComputePipelineDescKeyEqual
2665 {
2666     size_t operator()(const rx::vk::ComputePipelineDesc &first,
2667                       const rx::vk::ComputePipelineDesc &second) const
2668     {
2669         return first.keyEqual(second);
2670     }
2671 };
2672 struct GraphicsPipelineDescCompleteKeyEqual
2673 {
2674     size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2675                       const rx::vk::GraphicsPipelineDesc &second) const
2676     {
2677         return first.keyEqual(second, vk::GraphicsPipelineSubset::Complete);
2678     }
2679 };
2680 struct GraphicsPipelineDescShadersKeyEqual
2681 {
2682     size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2683                       const rx::vk::GraphicsPipelineDesc &second) const
2684     {
2685         return first.keyEqual(second, vk::GraphicsPipelineSubset::Shaders);
2686     }
2687 };
2688 
2689 // Derive the KeyEqual and GraphicsPipelineSubset enum from the Hash struct
2690 template <typename Hash>
2691 struct GraphicsPipelineCacheTypeHelper
2692 {
2693     using KeyEqual                                      = GraphicsPipelineDescCompleteKeyEqual;
2694     static constexpr vk::GraphicsPipelineSubset kSubset = vk::GraphicsPipelineSubset::Complete;
2695 };
2696 
2697 template <>
2698 struct GraphicsPipelineCacheTypeHelper<GraphicsPipelineDescShadersHash>
2699 {
2700     using KeyEqual                                      = GraphicsPipelineDescShadersKeyEqual;
2701     static constexpr vk::GraphicsPipelineSubset kSubset = vk::GraphicsPipelineSubset::Shaders;
2702 };
2703 
2704 // Compute Pipeline Cache implementation
2705 // TODO(aannestrand): Add cache trimming/eviction.
2706 // http://anglebug.com/391672281
2707 class ComputePipelineCache final : HasCacheStats<rx::VulkanCacheType::ComputePipeline>
2708 {
2709   public:
2710     ComputePipelineCache() = default;
2711     ~ComputePipelineCache() override { ASSERT(mPayload.empty()); }
2712 
2713     void destroy(vk::ErrorContext *context);
2714     void release(vk::ErrorContext *context);
2715 
2716     angle::Result getOrCreatePipeline(vk::ErrorContext *context,
2717                                       vk::PipelineCacheAccess *pipelineCache,
2718                                       const vk::PipelineLayout &pipelineLayout,
2719                                       vk::ComputePipelineOptions &pipelineOptions,
2720                                       PipelineSource source,
2721                                       vk::PipelineHelper **pipelineOut,
2722                                       const char *shaderName,
2723                                       VkSpecializationInfo *specializationInfo,
2724                                       const vk::ShaderModuleMap &shaderModuleMap);
2725 
2726   private:
2727     angle::Result createPipeline(vk::ErrorContext *context,
2728                                  vk::PipelineCacheAccess *pipelineCache,
2729                                  const vk::PipelineLayout &pipelineLayout,
2730                                  vk::ComputePipelineOptions &pipelineOptions,
2731                                  PipelineSource source,
2732                                  const char *shaderName,
2733                                  const vk::ShaderModule &shaderModule,
2734                                  VkSpecializationInfo *specializationInfo,
2735                                  const vk::ComputePipelineDesc &desc,
2736                                  vk::PipelineHelper **pipelineOut);
2737 
2738     std::unordered_map<vk::ComputePipelineDesc,
2739                        vk::PipelineHelper,
2740                        ComputePipelineDescHash,
2741                        ComputePipelineDescKeyEqual>
2742         mPayload;
2743 };
2744 
2745 // TODO(jmadill): Add cache trimming/eviction.
2746 template <typename Hash>
2747 class GraphicsPipelineCache final : public HasCacheStats<VulkanCacheType::GraphicsPipeline>
2748 {
2749   public:
2750     GraphicsPipelineCache() = default;
2751     ~GraphicsPipelineCache() override { ASSERT(mPayload.empty()); }
2752 
2753     void destroy(vk::ErrorContext *context);
2754     void release(vk::ErrorContext *context);
2755 
2756     void populate(const vk::GraphicsPipelineDesc &desc,
2757                   vk::Pipeline &&pipeline,
2758                   vk::PipelineHelper **pipelineHelperOut);
2759 
2760     // Get a pipeline from the cache, if it exists
2761     ANGLE_INLINE bool getPipeline(const vk::GraphicsPipelineDesc &desc,
2762                                   const vk::GraphicsPipelineDesc **descPtrOut,
2763                                   vk::PipelineHelper **pipelineOut)
2764     {
2765         auto item = mPayload.find(desc);
2766         if (item == mPayload.end())
2767         {
2768             return false;
2769         }
2770 
2771         *descPtrOut  = &item->first;
2772         *pipelineOut = &item->second;
2773 
2774         mCacheStats.hit();
2775 
2776         return true;
2777     }
2778 
2779     angle::Result createPipeline(vk::ErrorContext *context,
2780                                  vk::PipelineCacheAccess *pipelineCache,
2781                                  const vk::RenderPass &compatibleRenderPass,
2782                                  const vk::PipelineLayout &pipelineLayout,
2783                                  const vk::GraphicsPipelineShadersInfo &shaders,
2784                                  PipelineSource source,
2785                                  const vk::GraphicsPipelineDesc &desc,
2786                                  const vk::GraphicsPipelineDesc **descPtrOut,
2787                                  vk::PipelineHelper **pipelineOut);
2788 
2789     // Helper for VulkanPipelineCachePerf that resets the object without destroying any object.
2790     void reset() { mPayload.clear(); }
2791 
2792   private:
2793     void addToCache(PipelineSource source,
2794                     const vk::GraphicsPipelineDesc &desc,
2795                     vk::Pipeline &&pipeline,
2796                     vk::CacheLookUpFeedback feedback,
2797                     const vk::GraphicsPipelineDesc **descPtrOut,
2798                     vk::PipelineHelper **pipelineOut);
2799 
2800     using KeyEqual = typename GraphicsPipelineCacheTypeHelper<Hash>::KeyEqual;
2801     std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper, Hash, KeyEqual> mPayload;
2802 };
2803 
2804 using CompleteGraphicsPipelineCache    = GraphicsPipelineCache<GraphicsPipelineDescCompleteHash>;
2805 using ShadersGraphicsPipelineCache     = GraphicsPipelineCache<GraphicsPipelineDescShadersHash>;
2806 
2807 class DescriptorSetLayoutCache final : angle::NonCopyable
2808 {
2809   public:
2810     DescriptorSetLayoutCache();
2811     ~DescriptorSetLayoutCache();
2812 
2813     void destroy(vk::Renderer *renderer);
2814 
2815     angle::Result getDescriptorSetLayout(vk::ErrorContext *context,
2816                                          const vk::DescriptorSetLayoutDesc &desc,
2817                                          vk::DescriptorSetLayoutPtr *descriptorSetLayoutOut);
2818 
2819     // Helpers for white box tests
2820     size_t getCacheHitCount() const { return mCacheStats.getHitCount(); }
2821     size_t getCacheMissCount() const { return mCacheStats.getMissCount(); }
2822 
2823   private:
2824     mutable angle::SimpleMutex mMutex;
2825     std::unordered_map<vk::DescriptorSetLayoutDesc, vk::DescriptorSetLayoutPtr> mPayload;
2826     CacheStats mCacheStats;
2827 };
2828 
2829 class PipelineLayoutCache final : public HasCacheStats<VulkanCacheType::PipelineLayout>
2830 {
2831   public:
2832     PipelineLayoutCache();
2833     ~PipelineLayoutCache() override;
2834 
2835     void destroy(vk::Renderer *renderer);
2836 
2837     angle::Result getPipelineLayout(vk::ErrorContext *context,
2838                                     const vk::PipelineLayoutDesc &desc,
2839                                     const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
2840                                     vk::PipelineLayoutPtr *pipelineLayoutOut);
2841 
2842   private:
2843     mutable angle::SimpleMutex mMutex;
2844     std::unordered_map<vk::PipelineLayoutDesc, vk::PipelineLayoutPtr> mPayload;
2845 };
2846 
2847 class SamplerCache final : public HasCacheStats<VulkanCacheType::Sampler>
2848 {
2849   public:
2850     SamplerCache();
2851     ~SamplerCache() override;
2852 
2853     void destroy(vk::Renderer *renderer);
2854 
2855     angle::Result getSampler(ContextVk *contextVk,
2856                              const vk::SamplerDesc &desc,
2857                              vk::SharedSamplerPtr *samplerOut);
2858 
2859   private:
2860     std::unordered_map<vk::SamplerDesc, vk::SharedSamplerPtr> mPayload;
2861 };
2862 
2863 // YuvConversion Cache
2864 class SamplerYcbcrConversionCache final
2865     : public HasCacheStats<VulkanCacheType::SamplerYcbcrConversion>
2866 {
2867   public:
2868     SamplerYcbcrConversionCache();
2869     ~SamplerYcbcrConversionCache() override;
2870 
2871     void destroy(vk::Renderer *renderer);
2872 
2873     angle::Result getSamplerYcbcrConversion(vk::ErrorContext *context,
2874                                             const vk::YcbcrConversionDesc &ycbcrConversionDesc,
2875                                             VkSamplerYcbcrConversion *vkSamplerYcbcrConversionOut);
2876 
2877   private:
2878     using SamplerYcbcrConversionMap =
2879         std::unordered_map<vk::YcbcrConversionDesc, vk::SamplerYcbcrConversion>;
2880     SamplerYcbcrConversionMap mExternalFormatPayload;
2881     SamplerYcbcrConversionMap mVkFormatPayload;
2882 };
2883 
2884 // Descriptor Set Cache
2885 template <typename T>
2886 class DescriptorSetCache final : angle::NonCopyable
2887 {
2888   public:
2889     DescriptorSetCache() = default;
2890     ~DescriptorSetCache() { ASSERT(mPayload.empty()); }
2891 
2892     DescriptorSetCache(DescriptorSetCache &&other) : DescriptorSetCache()
2893     {
2894         *this = std::move(other);
2895     }
2896 
2897     DescriptorSetCache &operator=(DescriptorSetCache &&other)
2898     {
2899         std::swap(mPayload, other.mPayload);
2900         return *this;
2901     }
2902 
2903     void clear() { mPayload.clear(); }
2904 
2905     bool getDescriptorSet(const vk::DescriptorSetDesc &desc, T *descriptorSetOut) const
2906     {
2907         auto iter = mPayload.find(desc);
2908         if (iter != mPayload.end())
2909         {
2910             *descriptorSetOut = iter->second;
2911             return true;
2912         }
2913         return false;
2914     }
2915 
2916     void insertDescriptorSet(const vk::DescriptorSetDesc &desc, const T &descriptorSetHelper)
2917     {
2918         mPayload.emplace(desc, descriptorSetHelper);
2919     }
2920 
2921     bool eraseDescriptorSet(const vk::DescriptorSetDesc &desc, T *descriptorSetOut)
2922     {
2923         auto iter = mPayload.find(desc);
2924         if (iter != mPayload.end())
2925         {
2926             *descriptorSetOut = std::move(iter->second);
2927             mPayload.erase(iter);
2928             return true;
2929         }
2930         return false;
2931     }
2932 
2933     bool eraseDescriptorSet(const vk::DescriptorSetDesc &desc)
2934     {
2935         auto iter = mPayload.find(desc);
2936         if (iter != mPayload.end())
2937         {
2938             mPayload.erase(iter);
2939             return true;
2940         }
2941         return false;
2942     }
2943 
2944     size_t getTotalCacheSize() const { return mPayload.size(); }
2945 
2946     size_t getTotalCacheKeySizeBytes() const
2947     {
2948         size_t totalSize = 0;
2949         for (const auto &iter : mPayload)
2950         {
2951             const vk::DescriptorSetDesc &desc = iter.first;
2952             totalSize += desc.getKeySizeBytes();
2953         }
2954         return totalSize;
2955     }
2956     bool empty() const { return mPayload.empty(); }
2957 
2958   private:
2959     angle::HashMap<vk::DescriptorSetDesc, T> mPayload;
2960 };
2961 
2962 // There is 1 default uniform binding used per stage.
2963 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
2964 
2965 class UpdateDescriptorSetsBuilder final : angle::NonCopyable
2966 {
2967   public:
2968     UpdateDescriptorSetsBuilder();
2969     ~UpdateDescriptorSetsBuilder();
2970 
2971     VkDescriptorBufferInfo *allocDescriptorBufferInfos(uint32_t count)
2972     {
2973         return mDescriptorBufferInfos.allocate(count);
2974     }
2975     VkDescriptorImageInfo *allocDescriptorImageInfos(uint32_t count)
2976     {
2977         return mDescriptorImageInfos.allocate(count);
2978     }
2979     VkWriteDescriptorSet *allocWriteDescriptorSets(uint32_t count)
2980     {
2981         return mWriteDescriptorSets.allocate(count);
2982     }
2983     VkBufferView *allocBufferViews(uint32_t count) { return mBufferViews.allocate(count); }
2984 
2985     VkDescriptorBufferInfo &allocDescriptorBufferInfo() { return *allocDescriptorBufferInfos(1); }
2986     VkDescriptorImageInfo &allocDescriptorImageInfo() { return *allocDescriptorImageInfos(1); }
2987     VkWriteDescriptorSet &allocWriteDescriptorSet() { return *allocWriteDescriptorSets(1); }
2988     VkBufferView &allocBufferView() { return *allocBufferViews(1); }
2989 
2990     // Returns the number of written descriptor sets.
2991     uint32_t flushDescriptorSetUpdates(VkDevice device);
2992 
2993   private:
2994     // Manage the storage for VkDescriptorBufferInfo and VkDescriptorImageInfo. The storage is not
2995     // required to be continuous, but the requested allocation from allocate() call must be
2996     // continuous. The actual storage will grow as needed.
2997     template <typename T>
2998     class DescriptorInfoAllocator : angle::NonCopyable
2999     {
3000       public:
3001         void init(uint32_t initialVectorCapacity)
3002         {
3003             mVectorCapacity = initialVectorCapacity;
3004             mDescriptorInfos.emplace_back();
3005             mDescriptorInfos.back().reserve(mVectorCapacity);
3006             mCurrentVector = mDescriptorInfos.begin();
3007             mTotalSize     = 0;
3008         }
3009         void clear()
3010         {
3011             mDescriptorInfos.resize(1);
3012             mDescriptorInfos.front().clear();
3013             // Grow the first vector's capacity big enough to hold all of them
3014             mVectorCapacity = std::max(mTotalSize, mVectorCapacity);
3015             mDescriptorInfos.front().reserve(mVectorCapacity);
3016             mCurrentVector = mDescriptorInfos.begin();
3017             mTotalSize     = 0;
3018         }
3019         T *allocate(uint32_t count);
3020 
3021         bool empty() const { return mTotalSize == 0; }
3022 
3023       protected:
3024         uint32_t mVectorCapacity = 16;
3025         std::deque<std::vector<T>> mDescriptorInfos;
3026         typename std::deque<std::vector<T>>::iterator mCurrentVector;
3027         uint32_t mTotalSize;
3028     };
3029 
3030     class WriteDescriptorSetAllocator final : public DescriptorInfoAllocator<VkWriteDescriptorSet>
3031     {
3032       public:
3033         uint32_t updateDescriptorSets(VkDevice device) const;
3034     };
3035 
3036     DescriptorInfoAllocator<VkDescriptorBufferInfo> mDescriptorBufferInfos;
3037     DescriptorInfoAllocator<VkDescriptorImageInfo> mDescriptorImageInfos;
3038     DescriptorInfoAllocator<VkBufferView> mBufferViews;
3039     WriteDescriptorSetAllocator mWriteDescriptorSets;
3040 };
3041 
3042 }  // namespace rx
3043 
3044 #endif  // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
3045