• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 //    Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 //    Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10 
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13 
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "libANGLE/renderer/vulkan/ResourceVk.h"
17 #include "libANGLE/renderer/vulkan/vk_utils.h"
18 
19 namespace rx
20 {
21 
22 // Some descriptor set and pipeline layout constants.
23 //
24 // The set/binding assignment is done as following:
25 //
26 // - Set 0 contains the ANGLE driver uniforms at binding 0.  Note that driver uniforms are updated
27 //   only under rare circumstances, such as viewport or depth range change.  However, there is only
28 //   one binding in this set.  This set is placed before Set 1 containing transform feedback
29 //   buffers, so that switching between xfb and non-xfb programs doesn't require rebinding this set.
30 //   Otherwise, as the layout of Set 1 changes (due to addition and removal of xfb buffers), and all
31 //   subsequent sets need to be rebound (due to Vulkan pipeline layout validation rules), we would
32 //   have needed to invalidateGraphicsDriverUniforms().
33 // - Set 1 contains uniform blocks created to encompass default uniforms.  1 binding is used per
34 //   pipeline stage.  Additionally, transform feedback buffers are bound from binding 2 and up.
35 // - Set 2 contains all textures (including texture buffers).
36 // - Set 3 contains all other shader resources, such as uniform and storage blocks, atomic counter
37 //   buffers, images and image buffers.
38 
39 enum class DescriptorSetIndex : uint32_t
40 {
41     Internal,        // ANGLE driver uniforms or internal shaders
42     UniformsAndXfb,  // Uniforms set index
43     Texture,         // Textures set index
44     ShaderResource,  // Other shader resources set index
45 
46     InvalidEnum,
47     EnumCount = InvalidEnum,
48 };
49 
50 namespace vk
51 {
52 class DynamicDescriptorPool;
53 class ImageHelper;
54 enum class ImageLayout;
55 
56 using RefCountedDescriptorSetLayout    = RefCounted<DescriptorSetLayout>;
57 using RefCountedPipelineLayout         = RefCounted<PipelineLayout>;
58 using RefCountedSamplerYcbcrConversion = RefCounted<SamplerYcbcrConversion>;
59 
60 // Helper macro that casts to a bitfield type then verifies no bits were dropped.
61 #define SetBitField(lhs, rhs)                                                         \
62     do                                                                                \
63     {                                                                                 \
64         auto ANGLE_LOCAL_VAR = rhs;                                                   \
65         lhs = static_cast<typename std::decay<decltype(lhs)>::type>(ANGLE_LOCAL_VAR); \
66         ASSERT(static_cast<decltype(ANGLE_LOCAL_VAR)>(lhs) == ANGLE_LOCAL_VAR);       \
67     } while (0)
68 
69 // Packed Vk resource descriptions.
70 // Most Vk types use many more bits than required to represent the underlying data.
71 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
72 // hashing (and also needs to check equality) we can optimize these operations by
73 // using fewer bits. Hence the packed types.
74 //
75 // One implementation note: these types could potentially be improved by using even
76 // fewer bits. For example, boolean values could be represented by a single bit instead
77 // of a uint8_t. However at the current time there are concerns about the portability
78 // of bitfield operators, and complexity issues with using bit mask operations. This is
79 // something we will likely want to investigate as the Vulkan implementation progresses.
80 //
81 // Second implementation note: the struct packing is also a bit fragile, and some of the
82 // packing requirements depend on using alignas and field ordering to get the result of
83 // packing nicely into the desired space. This is something we could also potentially fix
84 // with a redesign to use bitfields or bit mask operations.
85 
86 // Enable struct padding warnings for the code below since it is used in caches.
87 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
88 
89 enum ResourceAccess
90 {
91     Unused,
92     ReadOnly,
93     Write,
94 };
95 
UpdateAccess(ResourceAccess * oldAccess,ResourceAccess newAccess)96 inline void UpdateAccess(ResourceAccess *oldAccess, ResourceAccess newAccess)
97 {
98     if (newAccess > *oldAccess)
99     {
100         *oldAccess = newAccess;
101     }
102 }
103 
104 enum class RenderPassLoadOp
105 {
106     Load     = VK_ATTACHMENT_LOAD_OP_LOAD,
107     Clear    = VK_ATTACHMENT_LOAD_OP_CLEAR,
108     DontCare = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
109     None,
110 };
111 enum class RenderPassStoreOp
112 {
113     Store    = VK_ATTACHMENT_STORE_OP_STORE,
114     DontCare = VK_ATTACHMENT_STORE_OP_DONT_CARE,
115     None,
116 };
117 
118 // There can be a maximum of IMPLEMENTATION_MAX_DRAW_BUFFERS color and resolve attachments, plus one
119 // depth/stencil attachment and one depth/stencil resolve attachment.
120 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS * 2 + 2;
121 template <typename T>
122 using FramebufferAttachmentArray = std::array<T, kMaxFramebufferAttachments>;
123 template <typename T>
124 using FramebufferAttachmentsVector = angle::FixedVector<T, kMaxFramebufferAttachments>;
125 using FramebufferAttachmentMask    = angle::BitSet<kMaxFramebufferAttachments>;
126 
127 constexpr size_t kMaxFramebufferNonResolveAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1;
128 template <typename T>
129 using FramebufferNonResolveAttachmentArray = std::array<T, kMaxFramebufferNonResolveAttachments>;
130 using FramebufferNonResolveAttachmentMask  = angle::BitSet16<kMaxFramebufferNonResolveAttachments>;
131 
132 class alignas(4) RenderPassDesc final
133 {
134   public:
135     RenderPassDesc();
136     ~RenderPassDesc();
137     RenderPassDesc(const RenderPassDesc &other);
138     RenderPassDesc &operator=(const RenderPassDesc &other);
139 
140     // Set format for an enabled GL color attachment.
141     void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
142     // Mark a GL color attachment index as disabled.
143     void packColorAttachmentGap(size_t colorIndexGL);
144     // The caller must pack the depth/stencil attachment last, which is packed right after the color
145     // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
146     void packDepthStencilAttachment(angle::FormatID angleFormatID);
147     void updateDepthStencilAccess(ResourceAccess access);
148     // Indicate that a color attachment should have a corresponding resolve attachment.
149     void packColorResolveAttachment(size_t colorIndexGL);
150     // Remove the resolve attachment.  Used when optimizing blit through resolve attachment to
151     // temporarily pack a resolve attachment and then remove it.
152     void removeColorResolveAttachment(size_t colorIndexGL);
153     // Indicate that a color attachment should take its data from the resolve attachment initially.
154     void packColorUnresolveAttachment(size_t colorIndexGL);
155     void removeColorUnresolveAttachment(size_t colorIndexGL);
156     // Indicate that a depth/stencil attachment should have a corresponding resolve attachment.
157     void packDepthStencilResolveAttachment();
158     // Indicate that a depth/stencil attachment should take its data from the resolve attachment
159     // initially.
160     void packDepthStencilUnresolveAttachment(bool unresolveDepth, bool unresolveStencil);
161     void removeDepthStencilUnresolveAttachment();
162 
163     void setWriteControlMode(gl::SrgbWriteControlMode mode);
164 
165     size_t hash() const;
166 
167     // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()168     size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()169     size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
170 
171     bool isColorAttachmentEnabled(size_t colorIndexGL) const;
172     bool hasDepthStencilAttachment() const;
hasColorResolveAttachment(size_t colorIndexGL)173     bool hasColorResolveAttachment(size_t colorIndexGL) const
174     {
175         return mColorResolveAttachmentMask.test(colorIndexGL);
176     }
getColorUnresolveAttachmentMask()177     gl::DrawBufferMask getColorUnresolveAttachmentMask() const
178     {
179         return mColorUnresolveAttachmentMask;
180     }
hasColorUnresolveAttachment(size_t colorIndexGL)181     bool hasColorUnresolveAttachment(size_t colorIndexGL) const
182     {
183         return mColorUnresolveAttachmentMask.test(colorIndexGL);
184     }
hasDepthStencilResolveAttachment()185     bool hasDepthStencilResolveAttachment() const { return mResolveDepthStencil; }
hasDepthStencilUnresolveAttachment()186     bool hasDepthStencilUnresolveAttachment() const { return mUnresolveDepth || mUnresolveStencil; }
hasDepthUnresolveAttachment()187     bool hasDepthUnresolveAttachment() const { return mUnresolveDepth; }
hasStencilUnresolveAttachment()188     bool hasStencilUnresolveAttachment() const { return mUnresolveStencil; }
getSRGBWriteControlMode()189     gl::SrgbWriteControlMode getSRGBWriteControlMode() const
190     {
191         return static_cast<gl::SrgbWriteControlMode>(mSrgbWriteControl);
192     }
193 
194     // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
195     // color attachments.
196     size_t attachmentCount() const;
197 
setSamples(GLint samples)198     void setSamples(GLint samples) { mSamples = static_cast<uint8_t>(samples); }
samples()199     uint8_t samples() const { return mSamples; }
200 
setViewCount(GLsizei viewCount)201     void setViewCount(GLsizei viewCount) { mViewCount = static_cast<uint8_t>(viewCount); }
viewCount()202     uint8_t viewCount() const { return mViewCount; }
203 
setFramebufferFetchMode(bool hasFramebufferFetch)204     void setFramebufferFetchMode(bool hasFramebufferFetch)
205     {
206         mHasFramebufferFetch = hasFramebufferFetch;
207     }
getFramebufferFetchMode()208     bool getFramebufferFetchMode() const { return mHasFramebufferFetch; }
209 
updateRenderToTexture(bool isRenderToTexture)210     void updateRenderToTexture(bool isRenderToTexture) { mIsRenderToTexture = isRenderToTexture; }
isRenderToTexture()211     bool isRenderToTexture() const { return mIsRenderToTexture; }
212 
213     angle::FormatID operator[](size_t index) const
214     {
215         ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
216         return static_cast<angle::FormatID>(mAttachmentFormats[index]);
217     }
218 
219   private:
220     uint8_t mSamples;
221     uint8_t mColorAttachmentRange;
222 
223     // Multivew
224     uint8_t mViewCount;
225 
226     // sRGB
227     uint8_t mSrgbWriteControl : 1;
228 
229     // Framebuffer fetch
230     uint8_t mHasFramebufferFetch : 1;
231 
232     // Multisampled render to texture
233     uint8_t mIsRenderToTexture : 1;
234     uint8_t mResolveDepthStencil : 1;
235     uint8_t mUnresolveDepth : 1;
236     uint8_t mUnresolveStencil : 1;
237 
238     // Available space for expansion.
239     uint8_t mPadding1 : 2;
240     uint8_t mPadding2;
241 
242     // Whether each color attachment has a corresponding resolve attachment.  Color resolve
243     // attachments can be used to optimize resolve through glBlitFramebuffer() as well as support
244     // GL_EXT_multisampled_render_to_texture and GL_EXT_multisampled_render_to_texture2.
245     gl::DrawBufferMask mColorResolveAttachmentMask;
246 
247     // Whether each color attachment with a corresponding resolve attachment should be initialized
248     // with said resolve attachment in an initial subpass.  This is an optimization to avoid
249     // loadOp=LOAD on the implicit multisampled image used with multisampled-render-to-texture
250     // render targets.  This operation is referred to as "unresolve".
251     //
252     // Unused when VK_EXT_multisampled_render_to_single_sampled is available.
253     gl::DrawBufferMask mColorUnresolveAttachmentMask;
254 
255     // Color attachment formats are stored with their GL attachment indices.  The depth/stencil
256     // attachment formats follow the last enabled color attachment.  When creating a render pass,
257     // the disabled attachments are removed and the resulting attachments are packed.
258     //
259     // The attachment indices provided as input to various functions in this file are thus GL
260     // attachment indices.  These indices are marked as such, e.g. colorIndexGL.  The render pass
261     // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
262     // indices marked with Vk, e.g. colorIndexVk.  The subpass attachment references create the
263     // link between the two index spaces.  The subpass declares attachment references with GL
264     // indices (which corresponds to the location decoration of shader outputs).  The attachment
265     // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
266     //
267     // For example, if GL uses color attachments 0 and 3, then there are two render pass
268     // attachments (indexed 0 and 1) and 4 subpass attachments:
269     //
270     //  - Subpass attachment 0 -> Renderpass attachment 0
271     //  - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
272     //  - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
273     //  - Subpass attachment 3 -> Renderpass attachment 1
274     //
275     // The resolve attachments are packed after the non-resolve attachments.  They use the same
276     // formats, so they are not specified in this array.
277     FramebufferNonResolveAttachmentArray<uint8_t> mAttachmentFormats;
278 };
279 
280 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
281 
282 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
283 static_assert(kRenderPassDescSize == 16, "Size check failed");
284 
285 struct PackedAttachmentOpsDesc final
286 {
287     // RenderPassLoadOp is in range [0, 3], and RenderPassStoreOp is in range [0, 2].
288     uint16_t loadOp : 2;
289     uint16_t storeOp : 2;
290     uint16_t stencilLoadOp : 2;
291     uint16_t stencilStoreOp : 2;
292     // If a corresponding resolve attachment exists, storeOp may already be DONT_CARE, and it's
293     // unclear whether the attachment was invalidated or not.  This information is passed along here
294     // so that the resolve attachment's storeOp can be set to DONT_CARE if the attachment is
295     // invalidated, and if possible removed from the list of resolve attachments altogether.  Note
296     // that the latter may not be possible if the render pass has multiple subpasses due to Vulkan
297     // render pass compatibility rules.
298     uint16_t isInvalidated : 1;
299     uint16_t isStencilInvalidated : 1;
300     uint16_t padding1 : 6;
301 
302     // 4-bits to force pad the structure to exactly 2 bytes.  Note that we currently don't support
303     // any of the extension layouts, whose values start at 1'000'000'000.
304     uint16_t initialLayout : 4;
305     uint16_t finalLayout : 4;
306     uint16_t padding2 : 8;
307 };
308 
309 static_assert(sizeof(PackedAttachmentOpsDesc) == 4, "Size check failed");
310 
311 class PackedAttachmentIndex;
312 
313 class AttachmentOpsArray final
314 {
315   public:
316     AttachmentOpsArray();
317     ~AttachmentOpsArray();
318     AttachmentOpsArray(const AttachmentOpsArray &other);
319     AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
320 
321     const PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index) const;
322     PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index);
323 
324     // Initialize an attachment op with all load and store operations.
325     void initWithLoadStore(PackedAttachmentIndex index,
326                            ImageLayout initialLayout,
327                            ImageLayout finalLayout);
328 
329     void setLayouts(PackedAttachmentIndex index,
330                     ImageLayout initialLayout,
331                     ImageLayout finalLayout);
332     void setOps(PackedAttachmentIndex index, RenderPassLoadOp loadOp, RenderPassStoreOp storeOp);
333     void setStencilOps(PackedAttachmentIndex index,
334                        RenderPassLoadOp loadOp,
335                        RenderPassStoreOp storeOp);
336 
337     void setClearOp(PackedAttachmentIndex index);
338     void setClearStencilOp(PackedAttachmentIndex index);
339 
340     size_t hash() const;
341 
342   private:
343     gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
344 };
345 
346 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
347 
348 static_assert(sizeof(AttachmentOpsArray) == 40, "Size check failed");
349 
350 struct PackedAttribDesc final
351 {
352     uint8_t format;
353     uint8_t divisor;
354 
355     // Desktop drivers support
356     uint16_t offset : kAttributeOffsetMaxBits;
357 
358     uint16_t compressed : 1;
359 
360     // Although technically stride can be any value in ES 2.0, in practice supporting stride
361     // greater than MAX_USHORT should not be that helpful. Note that stride limits are
362     // introduced in ES 3.1.
363     uint16_t stride;
364 };
365 
366 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
367 static_assert(kPackedAttribDescSize == 6, "Size mismatch");
368 
369 struct VertexInputAttributes final
370 {
371     PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
372 };
373 
374 constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
375 static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
376 
377 struct RasterizationStateBits final
378 {
379     // Note: Currently only 2 subpasses possible, so there are 5 bits in subpass that can be
380     // repurposed.
381     uint32_t subpass : 6;
382     uint32_t depthClampEnable : 1;
383     uint32_t rasterizationDiscardEnable : 1;
384     uint32_t polygonMode : 4;
385     uint32_t cullMode : 4;
386     uint32_t frontFace : 4;
387     uint32_t depthBiasEnable : 1;
388     uint32_t sampleShadingEnable : 1;
389     uint32_t alphaToCoverageEnable : 1;
390     uint32_t alphaToOneEnable : 1;
391     uint32_t rasterizationSamples : 8;
392 };
393 
394 constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
395 static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
396 
397 struct PackedRasterizationAndMultisampleStateInfo final
398 {
399     RasterizationStateBits bits;
400     // Padded to ensure there's no gaps in this structure or those that use it.
401     float minSampleShading;
402     uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
403     // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
404     float depthBiasClamp;
405     float depthBiasConstantFactor;
406     float depthBiasSlopeFactor;
407     float lineWidth;
408 };
409 
410 constexpr size_t kPackedRasterizationAndMultisampleStateSize =
411     sizeof(PackedRasterizationAndMultisampleStateInfo);
412 static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
413 
414 struct StencilOps final
415 {
416     uint8_t fail : 4;
417     uint8_t pass : 4;
418     uint8_t depthFail : 4;
419     uint8_t compare : 4;
420 };
421 
422 constexpr size_t kStencilOpsSize = sizeof(StencilOps);
423 static_assert(kStencilOpsSize == 2, "Size check failed");
424 
425 struct PackedStencilOpState final
426 {
427     StencilOps ops;
428     uint8_t compareMask;
429     uint8_t writeMask;
430 };
431 
432 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
433 static_assert(kPackedStencilOpSize == 4, "Size check failed");
434 
435 struct DepthStencilEnableFlags final
436 {
437     uint8_t viewportNegativeOneToOne : 1;
438 
439     uint8_t depthTest : 1;
440     uint8_t depthWrite : 2;  // these only need one bit each. the extra is used as padding.
441     uint8_t depthBoundsTest : 2;
442     uint8_t stencilTest : 2;
443 };
444 
445 constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
446 static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
447 
448 // We are borrowing three bits here for surface rotation, even though it has nothing to do with
449 // depth stencil.
450 struct DepthCompareOpAndSurfaceRotation final
451 {
452     uint8_t depthCompareOp : 4;
453     uint8_t surfaceRotation : 3;
454     uint8_t padding : 1;
455 };
456 constexpr size_t kDepthCompareOpAndSurfaceRotationSize = sizeof(DepthCompareOpAndSurfaceRotation);
457 static_assert(kDepthCompareOpAndSurfaceRotationSize == 1, "Size check failed");
458 
459 struct PackedDepthStencilStateInfo final
460 {
461     DepthStencilEnableFlags enable;
462     uint8_t frontStencilReference;
463     uint8_t backStencilReference;
464     DepthCompareOpAndSurfaceRotation depthCompareOpAndSurfaceRotation;
465 
466     float minDepthBounds;
467     float maxDepthBounds;
468     PackedStencilOpState front;
469     PackedStencilOpState back;
470 };
471 
472 constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
473 static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
474 static_assert(static_cast<int>(SurfaceRotation::EnumCount) <= 8, "Size check failed");
475 
476 struct LogicOpState final
477 {
478     uint8_t opEnable : 1;
479     uint8_t op : 7;
480 };
481 
482 constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
483 static_assert(kLogicOpStateSize == 1, "Size check failed");
484 
485 struct PackedColorBlendAttachmentState final
486 {
487     uint16_t srcColorBlendFactor : 5;
488     uint16_t dstColorBlendFactor : 5;
489     uint16_t colorBlendOp : 6;
490     uint16_t srcAlphaBlendFactor : 5;
491     uint16_t dstAlphaBlendFactor : 5;
492     uint16_t alphaBlendOp : 6;
493 };
494 
495 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
496 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
497 
498 struct PrimitiveState final
499 {
500     uint16_t topology : 9;
501     uint16_t patchVertices : 6;
502     uint16_t restartEnable : 1;
503 };
504 
505 constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
506 static_assert(kPrimitiveStateSize == 2, "Size check failed");
507 
508 struct PackedInputAssemblyAndColorBlendStateInfo final
509 {
510     uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
511     PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
512     float blendConstants[4];
513     LogicOpState logic;
514     uint8_t blendEnableMask;
515     PrimitiveState primitive;
516 };
517 
518 struct PackedExtent final
519 {
520     uint16_t width;
521     uint16_t height;
522 };
523 
524 struct PackedDither final
525 {
526     static_assert(gl::IMPLEMENTATION_MAX_DRAW_BUFFERS <= 8,
527                   "2 bits per draw buffer is needed for dither emulation");
528     uint16_t emulatedDitherControl;
529     uint16_t unused;
530 };
531 
532 constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
533     sizeof(PackedInputAssemblyAndColorBlendStateInfo);
534 static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
535 
536 constexpr size_t kGraphicsPipelineDescSumOfSizes =
537     kVertexInputAttributesSize + kRenderPassDescSize + kPackedRasterizationAndMultisampleStateSize +
538     kPackedDepthStencilStateSize + kPackedInputAssemblyAndColorBlendStateSize +
539     sizeof(PackedExtent) + sizeof(PackedDither);
540 
541 // Number of dirty bits in the dirty bit set.
542 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
543 constexpr static size_t kNumGraphicsPipelineDirtyBits =
544     kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
545 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
546 
547 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
548 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
549 
550 // State changes are applied through the update methods. Each update method can also have a
551 // sibling method that applies the update without marking a state transition. The non-transition
552 // update methods are used for internal shader pipelines. Not every non-transition update method
553 // is implemented yet as not every state is used in internal shaders.
554 class GraphicsPipelineDesc final
555 {
556   public:
557     // Use aligned allocation and free so we can use the alignas keyword.
558     void *operator new(std::size_t size);
559     void operator delete(void *ptr);
560 
561     GraphicsPipelineDesc();
562     ~GraphicsPipelineDesc();
563     GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
564     GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
565 
566     size_t hash() const;
567     bool operator==(const GraphicsPipelineDesc &other) const;
568 
569     void initDefaults(const ContextVk *contextVk);
570 
571     // For custom comparisons.
572     template <typename T>
getPtr()573     const T *getPtr() const
574     {
575         return reinterpret_cast<const T *>(this);
576     }
577 
578     angle::Result initializePipeline(ContextVk *contextVk,
579                                      const PipelineCache &pipelineCacheVk,
580                                      const RenderPass &compatibleRenderPass,
581                                      const PipelineLayout &pipelineLayout,
582                                      const gl::AttributesMask &activeAttribLocationsMask,
583                                      const gl::ComponentTypeMask &programAttribsTypeMask,
584                                      const gl::DrawBufferMask &missingOutputsMask,
585                                      const ShaderAndSerialMap &shaders,
586                                      const SpecializationConstants &specConsts,
587                                      Pipeline *pipelineOut) const;
588 
589     // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
590     void updateVertexInput(GraphicsPipelineTransitionBits *transition,
591                            uint32_t attribIndex,
592                            GLuint stride,
593                            GLuint divisor,
594                            angle::FormatID format,
595                            bool compressed,
596                            GLuint relativeOffset);
597 
598     // Input assembly info
599     void setTopology(gl::PrimitiveMode drawMode);
600     void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
601     void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
602                                        bool primitiveRestartEnabled);
603 
604     // Viewport states
605     void updateDepthClipControl(GraphicsPipelineTransitionBits *transition, bool negativeOneToOne);
606 
607     // Raster states
608     void setCullMode(VkCullModeFlagBits cullMode);
609     void updateCullMode(GraphicsPipelineTransitionBits *transition,
610                         const gl::RasterizerState &rasterState);
611     void updateFrontFace(GraphicsPipelineTransitionBits *transition,
612                          const gl::RasterizerState &rasterState,
613                          bool invertFrontFace);
614     void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
615     void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
616                                         bool rasterizerDiscardEnabled);
617 
618     // Multisample states
619     uint32_t getRasterizationSamples() const;
620     void setRasterizationSamples(uint32_t rasterizationSamples);
621     void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
622                                     uint32_t rasterizationSamples);
623     void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
624     void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
625     void updateSampleMask(GraphicsPipelineTransitionBits *transition,
626                           uint32_t maskNumber,
627                           uint32_t mask);
628 
629     void updateSampleShading(GraphicsPipelineTransitionBits *transition, bool enable, float value);
630 
631     // RenderPass description.
getRenderPassDesc()632     const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
633 
634     void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
635     void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
636                               const RenderPassDesc &renderPassDesc);
637     void setRenderPassSampleCount(GLint samples);
638     void setRenderPassColorAttachmentFormat(size_t colorIndexGL, angle::FormatID formatID);
639 
640     // Blend states
641     void setSingleBlend(uint32_t colorIndexGL,
642                         bool enabled,
643                         VkBlendOp op,
644                         VkBlendFactor srcFactor,
645                         VkBlendFactor dstFactor);
646     void updateBlendEnabled(GraphicsPipelineTransitionBits *transition,
647                             gl::DrawBufferMask blendEnabledMask);
648     void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
649     void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
650                           const gl::BlendStateExt &blendStateExt,
651                           gl::DrawBufferMask attachmentMask);
652     void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
653                               const gl::BlendStateExt &blendStateExt,
654                               gl::DrawBufferMask attachmentMask);
655     void resetBlendFuncsAndEquations(GraphicsPipelineTransitionBits *transition,
656                                      const gl::BlendStateExt &blendStateExt,
657                                      gl::DrawBufferMask previousAttachmentsMask,
658                                      gl::DrawBufferMask newAttachmentsMask);
659     void setColorWriteMasks(gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
660                             const gl::DrawBufferMask &alphaMask,
661                             const gl::DrawBufferMask &enabledDrawBuffers);
662     void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
663     void updateColorWriteMasks(GraphicsPipelineTransitionBits *transition,
664                                gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
665                                const gl::DrawBufferMask &alphaMask,
666                                const gl::DrawBufferMask &enabledDrawBuffers);
667 
668     // Depth/stencil states.
669     void setDepthTestEnabled(bool enabled);
670     void setDepthWriteEnabled(bool enabled);
671     void setDepthFunc(VkCompareOp op);
672     void setDepthClampEnabled(bool enabled);
673     void setStencilTestEnabled(bool enabled);
674     void setStencilFrontFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
675     void setStencilBackFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
676     void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
677     void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
678     void setStencilFrontWriteMask(uint8_t mask);
679     void setStencilBackWriteMask(uint8_t mask);
680     void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
681                                 const gl::DepthStencilState &depthStencilState,
682                                 const gl::Framebuffer *drawFramebuffer);
683     void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
684                          const gl::DepthStencilState &depthStencilState);
685     void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
686                                  const gl::DepthStencilState &depthStencilState,
687                                  const gl::Framebuffer *drawFramebuffer);
688     void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
689                                   const gl::DepthStencilState &depthStencilState,
690                                   const gl::Framebuffer *drawFramebuffer);
691     void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
692                                  GLint ref,
693                                  const gl::DepthStencilState &depthStencilState);
694     void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
695                                 GLint ref,
696                                 const gl::DepthStencilState &depthStencilState);
697     void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
698                                const gl::DepthStencilState &depthStencilState);
699     void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
700                               const gl::DepthStencilState &depthStencilState);
701     void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
702                                      const gl::DepthStencilState &depthStencilState,
703                                      const gl::Framebuffer *drawFramebuffer);
704     void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
705                                     const gl::DepthStencilState &depthStencilState,
706                                     const gl::Framebuffer *drawFramebuffer);
707 
708     // Depth offset.
709     void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
710     void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
711                              const gl::RasterizerState &rasterState);
712 
713     // Tessellation
714     void updatePatchVertices(GraphicsPipelineTransitionBits *transition, GLuint value);
715 
716     // Subpass
717     void resetSubpass(GraphicsPipelineTransitionBits *transition);
718     void nextSubpass(GraphicsPipelineTransitionBits *transition);
719     void setSubpass(uint32_t subpass);
720     uint32_t getSubpass() const;
721 
722     void updateSurfaceRotation(GraphicsPipelineTransitionBits *transition,
723                                const SurfaceRotation surfaceRotation);
getSurfaceRotation()724     SurfaceRotation getSurfaceRotation() const
725     {
726         return static_cast<SurfaceRotation>(
727             mDepthStencilStateInfo.depthCompareOpAndSurfaceRotation.surfaceRotation);
728     }
729 
730     void updateDrawableSize(GraphicsPipelineTransitionBits *transition,
731                             uint32_t width,
732                             uint32_t height);
getDrawableSize()733     const PackedExtent &getDrawableSize() const { return mDrawableSize; }
734 
735     void updateEmulatedDitherControl(GraphicsPipelineTransitionBits *transition, uint16_t value);
getEmulatedDitherControl()736     uint32_t getEmulatedDitherControl() const { return mDither.emulatedDitherControl; }
737 
738   private:
739     void updateSubpass(GraphicsPipelineTransitionBits *transition, uint32_t subpass);
740 
741     VertexInputAttributes mVertexInputAttribs;
742     RenderPassDesc mRenderPassDesc;
743     PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
744     PackedDepthStencilStateInfo mDepthStencilStateInfo;
745     PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
746     PackedExtent mDrawableSize;
747     PackedDither mDither;
748 };
749 
750 // Verify the packed pipeline description has no gaps in the packing.
751 // This is not guaranteed by the spec, but is validated by a compile-time check.
752 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
753 // into uninitialized memory regions.
754 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
755 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
756 
757 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
758     std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
759              gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
760 
761 using DescriptorSetLayoutBindingVector =
762     angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
763 
764 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
765 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
766 // usage. In the future we could generalize this.
767 class DescriptorSetLayoutDesc final
768 {
769   public:
770     DescriptorSetLayoutDesc();
771     ~DescriptorSetLayoutDesc();
772     DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
773     DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
774 
775     size_t hash() const;
776     bool operator==(const DescriptorSetLayoutDesc &other) const;
777 
778     void update(uint32_t bindingIndex,
779                 VkDescriptorType descriptorType,
780                 uint32_t count,
781                 VkShaderStageFlags stages,
782                 const Sampler *immutableSampler);
783 
784     void unpackBindings(DescriptorSetLayoutBindingVector *bindings,
785                         std::vector<VkSampler> *immutableSamplers) const;
786 
787   private:
788     // There is a small risk of an issue if the sampler cache is evicted but not the descriptor
789     // cache we would have an invalid handle here. Thus propose follow-up work:
790     // TODO: https://issuetracker.google.com/issues/159156775: Have immutable sampler use serial
791     struct PackedDescriptorSetBinding
792     {
793         uint8_t type;    // Stores a packed VkDescriptorType descriptorType.
794         uint8_t stages;  // Stores a packed VkShaderStageFlags.
795         uint16_t count;  // Stores a packed uint32_t descriptorCount.
796         uint32_t pad;
797         VkSampler immutableSampler;
798     };
799 
800     // 4x 32bit
801     static_assert(sizeof(PackedDescriptorSetBinding) == 16, "Unexpected size");
802 
803     // This is a compact representation of a descriptor set layout.
804     std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
805         mPackedDescriptorSetLayout;
806 };
807 
808 // The following are for caching descriptor set layouts. Limited to max four descriptor set layouts.
809 // This can be extended in the future.
810 constexpr size_t kMaxDescriptorSetLayouts = 4;
811 
812 struct PackedPushConstantRange
813 {
814     uint8_t offset;
815     uint8_t size;
816     uint16_t stageMask;
817 };
818 
819 static_assert(sizeof(PackedPushConstantRange) == sizeof(uint32_t), "Unexpected Size");
820 
821 template <typename T>
822 using DescriptorSetArray              = angle::PackedEnumMap<DescriptorSetIndex, T>;
823 using DescriptorSetLayoutPointerArray = DescriptorSetArray<BindingPointer<DescriptorSetLayout>>;
824 
825 class PipelineLayoutDesc final
826 {
827   public:
828     PipelineLayoutDesc();
829     ~PipelineLayoutDesc();
830     PipelineLayoutDesc(const PipelineLayoutDesc &other);
831     PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
832 
833     size_t hash() const;
834     bool operator==(const PipelineLayoutDesc &other) const;
835 
836     void updateDescriptorSetLayout(DescriptorSetIndex setIndex,
837                                    const DescriptorSetLayoutDesc &desc);
838     void updatePushConstantRange(VkShaderStageFlags stageMask, uint32_t offset, uint32_t size);
839 
getPushConstantRange()840     const PackedPushConstantRange &getPushConstantRange() const { return mPushConstantRange; }
841 
842   private:
843     DescriptorSetArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
844     PackedPushConstantRange mPushConstantRange;
845     ANGLE_MAYBE_UNUSED uint32_t mPadding;
846 
847     // Verify the arrays are properly packed.
848     static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
849                       (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
850                   "Unexpected size");
851 };
852 
853 // Verify the structure is properly packed.
854 static_assert(sizeof(PipelineLayoutDesc) == sizeof(DescriptorSetArray<DescriptorSetLayoutDesc>) +
855                                                 sizeof(PackedPushConstantRange) + sizeof(uint32_t),
856               "Unexpected Size");
857 
858 struct YcbcrConversionDesc final
859 {
860     YcbcrConversionDesc();
861     ~YcbcrConversionDesc();
862     YcbcrConversionDesc(const YcbcrConversionDesc &other);
863     YcbcrConversionDesc &operator=(const YcbcrConversionDesc &other);
864 
865     size_t hash() const;
866     bool operator==(const YcbcrConversionDesc &other) const;
867 
validfinal868     bool valid() const { return mExternalOrVkFormat != 0; }
869     void reset();
870     void update(RendererVk *rendererVk,
871                 uint64_t externalFormat,
872                 VkSamplerYcbcrModelConversion conversionModel,
873                 VkSamplerYcbcrRange colorRange,
874                 VkChromaLocation xChromaOffset,
875                 VkChromaLocation yChromaOffset,
876                 VkFilter chromaFilter,
877                 VkComponentMapping components,
878                 angle::FormatID intendedFormatID);
879 
880     // If the sampler needs to convert the image content (e.g. from YUV to RGB) then
881     // mExternalOrVkFormat will be non-zero. The value is either the external format
882     // as returned by vkGetAndroidHardwareBufferPropertiesANDROID or a YUV VkFormat.
883     // For VkSamplerYcbcrConversion, mExternalOrVkFormat along with mIsExternalFormat,
884     // mConversionModel and mColorRange works as a Serial() used elsewhere in ANGLE.
885     uint64_t mExternalOrVkFormat;
886     // 1 bit to identify if external format is used
887     uint32_t mIsExternalFormat : 1;
888     // 3 bits to identify conversion model
889     uint32_t mConversionModel : 3;
890     // 1 bit to identify color component range
891     uint32_t mColorRange : 1;
892     // 1 bit to identify x chroma location
893     uint32_t mXChromaOffset : 1;
894     // 1 bit to identify y chroma location
895     uint32_t mYChromaOffset : 1;
896     // 1 bit to identify chroma filtering
897     uint32_t mChromaFilter : 1;
898     // 3 bit to identify R component swizzle
899     uint32_t mRSwizzle : 3;
900     // 3 bit to identify G component swizzle
901     uint32_t mGSwizzle : 3;
902     // 3 bit to identify B component swizzle
903     uint32_t mBSwizzle : 3;
904     // 3 bit to identify A component swizzle
905     uint32_t mASwizzle : 3;
906     uint32_t mPadding : 12;
907     uint32_t mReserved;
908 };
909 
910 static_assert(sizeof(YcbcrConversionDesc) == 16, "Unexpected YcbcrConversionDesc size");
911 
912 // Packed sampler description for the sampler cache.
913 class SamplerDesc final
914 {
915   public:
916     SamplerDesc();
917     SamplerDesc(ContextVk *contextVk,
918                 const gl::SamplerState &samplerState,
919                 bool stencilMode,
920                 const YcbcrConversionDesc *ycbcrConversionDesc,
921                 angle::FormatID intendedFormatID);
922     ~SamplerDesc();
923 
924     SamplerDesc(const SamplerDesc &other);
925     SamplerDesc &operator=(const SamplerDesc &rhs);
926 
927     void update(ContextVk *contextVk,
928                 const gl::SamplerState &samplerState,
929                 bool stencilMode,
930                 const YcbcrConversionDesc *ycbcrConversionDesc,
931                 angle::FormatID intendedFormatID);
932     void reset();
933     angle::Result init(ContextVk *contextVk, Sampler *sampler) const;
934 
935     size_t hash() const;
936     bool operator==(const SamplerDesc &other) const;
937 
938   private:
939     // 32*4 bits for floating point data.
940     // Note: anisotropy enabled is implicitly determined by maxAnisotropy and caps.
941     float mMipLodBias;
942     float mMaxAnisotropy;
943     float mMinLod;
944     float mMaxLod;
945 
946     // 16*8 bits to uniquely identify a YCbCr conversion sampler.
947     YcbcrConversionDesc mYcbcrConversionDesc;
948 
949     // 16 bits for modes + states.
950     // 1 bit per filter (only 2 possible values in GL: linear/nearest)
951     uint16_t mMagFilter : 1;
952     uint16_t mMinFilter : 1;
953     uint16_t mMipmapMode : 1;
954 
955     // 3 bits per address mode (5 possible values)
956     uint16_t mAddressModeU : 3;
957     uint16_t mAddressModeV : 3;
958     uint16_t mAddressModeW : 3;
959 
960     // 1 bit for compare enabled (2 possible values)
961     uint16_t mCompareEnabled : 1;
962 
963     // 3 bits for compare op. (8 possible values)
964     uint16_t mCompareOp : 3;
965 
966     // Values from angle::ColorGeneric::Type. Float is 0 and others are 1.
967     uint16_t mBorderColorType : 1;
968 
969     uint16_t mPadding : 15;
970 
971     // 16*8 bits for BorderColor
972     angle::ColorF mBorderColor;
973 
974     // 32 bits reserved for future use.
975     uint32_t mReserved;
976 };
977 
978 static_assert(sizeof(SamplerDesc) == 56, "Unexpected SamplerDesc size");
979 
980 // Disable warnings about struct padding.
981 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
982 
983 class PipelineHelper;
984 
985 struct GraphicsPipelineTransition
986 {
987     GraphicsPipelineTransition();
988     GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
989     GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
990                                const GraphicsPipelineDesc *desc,
991                                PipelineHelper *pipeline);
992 
993     GraphicsPipelineTransitionBits bits;
994     const GraphicsPipelineDesc *desc;
995     PipelineHelper *target;
996 };
997 
998 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
999 
1000 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1001     const GraphicsPipelineTransition &other) = default;
1002 
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)1003 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1004     GraphicsPipelineTransitionBits bits,
1005     const GraphicsPipelineDesc *desc,
1006     PipelineHelper *pipeline)
1007     : bits(bits), desc(desc), target(pipeline)
1008 {}
1009 
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)1010 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
1011                                                   GraphicsPipelineTransitionBits bitsB,
1012                                                   const GraphicsPipelineDesc &descA,
1013                                                   const GraphicsPipelineDesc &descB)
1014 {
1015     if (bitsA != bitsB)
1016         return false;
1017 
1018     // We currently mask over 4 bytes of the pipeline description with each dirty bit.
1019     // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
1020     // of the code faster. The for loop below would scan over twice as many bits per iteration.
1021     // But there may be more collisions between the same dirty bit masks leading to different
1022     // transitions. Thus there may be additional cost when applications use many transitions.
1023     // We should revisit this in the future and investigate using different bit widths.
1024     static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
1025 
1026     const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
1027     const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
1028 
1029     for (size_t dirtyBit : bitsA)
1030     {
1031         if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
1032             return false;
1033     }
1034 
1035     return true;
1036 }
1037 
1038 class PipelineHelper final : public Resource
1039 {
1040   public:
1041     PipelineHelper();
1042     ~PipelineHelper() override;
1043     inline explicit PipelineHelper(Pipeline &&pipeline);
1044 
1045     void destroy(VkDevice device);
1046 
valid()1047     bool valid() const { return mPipeline.valid(); }
getPipeline()1048     Pipeline &getPipeline() { return mPipeline; }
1049 
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)1050     ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
1051                                      const GraphicsPipelineDesc &desc,
1052                                      PipelineHelper **pipelineOut) const
1053     {
1054         // Search could be improved using sorting or hashing.
1055         for (const GraphicsPipelineTransition &transition : mTransitions)
1056         {
1057             if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
1058             {
1059                 *pipelineOut = transition.target;
1060                 return true;
1061             }
1062         }
1063 
1064         return false;
1065     }
1066 
1067     void addTransition(GraphicsPipelineTransitionBits bits,
1068                        const GraphicsPipelineDesc *desc,
1069                        PipelineHelper *pipeline);
1070 
1071   private:
1072     std::vector<GraphicsPipelineTransition> mTransitions;
1073     Pipeline mPipeline;
1074 };
1075 
PipelineHelper(Pipeline && pipeline)1076 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
1077 
1078 struct ImageSubresourceRange
1079 {
1080     // GL max is 1000 (fits in 10 bits).
1081     uint32_t level : 10;
1082     // Max 31 levels (2 ** 5 - 1). Can store levelCount-1 if we need to save another bit.
1083     uint32_t levelCount : 5;
1084     // Implementation max is 2048 (11 bits).
1085     uint32_t layer : 12;
1086     // One of vk::LayerMode values.  If 0, it means all layers.  Otherwise it's the count of layers
1087     // which is usually 1, except for multiview in which case it can be up to
1088     // gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS.
1089     uint32_t layerMode : 3;
1090     // Values from vk::SrgbDecodeMode.  Unused with draw views.
1091     uint32_t srgbDecodeMode : 1;
1092     // For read views: Values from gl::SrgbOverride, either Default or SRGB.
1093     // For draw views: Values from gl::SrgbWriteControlMode.
1094     uint32_t srgbMode : 1;
1095 
1096     static_assert(gl::IMPLEMENTATION_MAX_TEXTURE_LEVELS < (1 << 5),
1097                   "Not enough bits for level count");
1098     static_assert(gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS <= (1 << 12),
1099                   "Not enough bits for layer index");
1100     static_assert(gl::IMPLEMENTATION_ANGLE_MULTIVIEW_MAX_VIEWS <= (1 << 3),
1101                   "Not enough bits for layer count");
1102 };
1103 
1104 static_assert(sizeof(ImageSubresourceRange) == sizeof(uint32_t), "Size mismatch");
1105 
1106 inline bool operator==(const ImageSubresourceRange &a, const ImageSubresourceRange &b)
1107 {
1108     return a.level == b.level && a.levelCount == b.levelCount && a.layer == b.layer &&
1109            a.layerMode == b.layerMode && a.srgbDecodeMode == b.srgbDecodeMode &&
1110            a.srgbMode == b.srgbMode;
1111 }
1112 
1113 constexpr ImageSubresourceRange kInvalidImageSubresourceRange = {0, 0, 0, 0, 0, 0};
1114 
1115 struct ImageOrBufferViewSubresourceSerial
1116 {
1117     ImageOrBufferViewSerial viewSerial;
1118     ImageSubresourceRange subresource;
1119 };
1120 
1121 inline bool operator==(const ImageOrBufferViewSubresourceSerial &a,
1122                        const ImageOrBufferViewSubresourceSerial &b)
1123 {
1124     return a.viewSerial == b.viewSerial && a.subresource == b.subresource;
1125 }
1126 
1127 constexpr ImageOrBufferViewSubresourceSerial kInvalidImageOrBufferViewSubresourceSerial = {
1128     kInvalidImageOrBufferViewSerial, kInvalidImageSubresourceRange};
1129 
1130 // Generic description of a descriptor set. Used as a key when indexing descriptor set caches. The
1131 // key storage is an angle:FixedVector. Beyond a certain fixed size we'll end up using heap memory
1132 // to store keys. Currently we specialize the structure for three use cases: uniforms, textures,
1133 // and other shader resources. Because of the way the specialization works we can't currently cache
1134 // programs that use some types of resources.
1135 class DescriptorSetDesc
1136 {
1137   public:
1138     DescriptorSetDesc()  = default;
1139     ~DescriptorSetDesc() = default;
1140 
DescriptorSetDesc(const DescriptorSetDesc & other)1141     DescriptorSetDesc(const DescriptorSetDesc &other) : mPayload(other.mPayload) {}
1142 
1143     DescriptorSetDesc &operator=(const DescriptorSetDesc &other)
1144     {
1145         mPayload = other.mPayload;
1146         return *this;
1147     }
1148 
1149     size_t hash() const;
1150 
reset()1151     void reset() { mPayload.clear(); }
1152 
getKeySizeBytes()1153     size_t getKeySizeBytes() const { return mPayload.size() * sizeof(uint32_t); }
1154 
1155     bool operator==(const DescriptorSetDesc &other) const
1156     {
1157         return (mPayload.size() == other.mPayload.size()) &&
1158                (memcmp(mPayload.data(), other.mPayload.data(),
1159                        mPayload.size() * sizeof(mPayload[0])) == 0);
1160     }
1161 
1162     // Specific helpers for uniforms/xfb descriptors.
1163     static constexpr size_t kDefaultUniformBufferWordOffset = 0;
1164     static constexpr size_t kXfbBufferSerialWordOffset      = 1;
1165     static constexpr size_t kXfbBufferOffsetWordOffset      = 2;
1166     static constexpr size_t kXfbWordStride                  = 2;
1167 
updateDefaultUniformBuffer(BufferSerial bufferSerial)1168     void updateDefaultUniformBuffer(BufferSerial bufferSerial)
1169     {
1170         setBufferSerial(kDefaultUniformBufferWordOffset, 1, 0, bufferSerial);
1171     }
1172 
updateTransformFeedbackBuffer(size_t xfbIndex,BufferSerial bufferSerial,VkDeviceSize bufferOffset)1173     void updateTransformFeedbackBuffer(size_t xfbIndex,
1174                                        BufferSerial bufferSerial,
1175                                        VkDeviceSize bufferOffset)
1176     {
1177         setBufferSerial(kXfbBufferSerialWordOffset, kXfbWordStride, xfbIndex, bufferSerial);
1178         setClamped64BitValue(kXfbBufferOffsetWordOffset, kXfbWordStride, xfbIndex, bufferOffset);
1179     }
1180 
1181     // Specific helpers for texture descriptors.
1182     static constexpr size_t kImageOrBufferViewWordOffset     = 0;
1183     static constexpr size_t kImageSubresourceRangeWordOffset = 1;
1184     static constexpr size_t kSamplerSerialWordOffset         = 2;
1185     static constexpr size_t kTextureWordStride               = 3;
updateTexture(size_t textureUnit,ImageOrBufferViewSubresourceSerial imageOrBufferViewSubresource,SamplerSerial samplerSerial)1186     void updateTexture(size_t textureUnit,
1187                        ImageOrBufferViewSubresourceSerial imageOrBufferViewSubresource,
1188                        SamplerSerial samplerSerial)
1189     {
1190         setImageOrBufferViewSerial(kImageOrBufferViewWordOffset, kTextureWordStride, textureUnit,
1191                                    imageOrBufferViewSubresource.viewSerial);
1192         setImageSubresourceRange(kImageSubresourceRangeWordOffset, kTextureWordStride, textureUnit,
1193                                  imageOrBufferViewSubresource.subresource);
1194         setSamplerSerial(kSamplerSerialWordOffset, kTextureWordStride, textureUnit, samplerSerial);
1195     }
1196 
1197     // Specific helpers for the shader resources descriptors.
appendBufferSerial(BufferSerial bufferSerial)1198     void appendBufferSerial(BufferSerial bufferSerial)
1199     {
1200         append32BitValue(bufferSerial.getValue());
1201     }
1202 
append32BitValue(uint32_t value)1203     void append32BitValue(uint32_t value) { mPayload.push_back(value); }
1204 
appendClamped64BitValue(uint64_t value)1205     void appendClamped64BitValue(uint64_t value)
1206     {
1207         ASSERT(value <= static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
1208         append32BitValue(static_cast<uint32_t>(value));
1209     }
1210 
1211   private:
setBufferSerial(size_t wordOffset,size_t wordStride,size_t elementIndex,BufferSerial bufferSerial)1212     void setBufferSerial(size_t wordOffset,
1213                          size_t wordStride,
1214                          size_t elementIndex,
1215                          BufferSerial bufferSerial)
1216     {
1217         set32BitValue(wordOffset, wordStride, elementIndex, bufferSerial.getValue());
1218     }
1219 
setImageOrBufferViewSerial(size_t wordOffset,size_t wordStride,size_t elementIndex,ImageOrBufferViewSerial imageOrBufferViewSerial)1220     void setImageOrBufferViewSerial(size_t wordOffset,
1221                                     size_t wordStride,
1222                                     size_t elementIndex,
1223                                     ImageOrBufferViewSerial imageOrBufferViewSerial)
1224     {
1225         set32BitValue(wordOffset, wordStride, elementIndex, imageOrBufferViewSerial.getValue());
1226     }
1227 
setImageSubresourceRange(size_t wordOffset,size_t wordStride,size_t elementIndex,ImageSubresourceRange subresourceRange)1228     void setImageSubresourceRange(size_t wordOffset,
1229                                   size_t wordStride,
1230                                   size_t elementIndex,
1231                                   ImageSubresourceRange subresourceRange)
1232     {
1233         static_assert(sizeof(ImageSubresourceRange) == sizeof(uint32_t));
1234 
1235         uint32_t value32bits;
1236         memcpy(&value32bits, &subresourceRange, sizeof(uint32_t));
1237         set32BitValue(wordOffset, wordStride, elementIndex, value32bits);
1238     }
1239 
setSamplerSerial(size_t wordOffset,size_t wordStride,size_t elementIndex,SamplerSerial samplerSerial)1240     void setSamplerSerial(size_t wordOffset,
1241                           size_t wordStride,
1242                           size_t elementIndex,
1243                           SamplerSerial samplerSerial)
1244     {
1245         set32BitValue(wordOffset, wordStride, elementIndex, samplerSerial.getValue());
1246     }
1247 
set32BitValue(size_t wordOffset,size_t wordStride,size_t elementIndex,uint32_t value)1248     void set32BitValue(size_t wordOffset, size_t wordStride, size_t elementIndex, uint32_t value)
1249     {
1250         size_t wordIndex = wordOffset + wordStride * elementIndex;
1251         ensureCapacity(wordIndex + 1);
1252         mPayload[wordIndex] = value;
1253     }
1254 
setClamped64BitValue(size_t wordOffset,size_t wordStride,size_t elementIndex,uint64_t value)1255     void setClamped64BitValue(size_t wordOffset,
1256                               size_t wordStride,
1257                               size_t elementIndex,
1258                               uint64_t value)
1259     {
1260         ASSERT(value <= static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
1261         set32BitValue(wordOffset, wordStride, elementIndex, static_cast<uint32_t>(value));
1262     }
1263 
ensureCapacity(size_t capacity)1264     void ensureCapacity(size_t capacity)
1265     {
1266         if (mPayload.size() < capacity)
1267         {
1268             mPayload.resize(capacity, 0);
1269         }
1270     }
1271 
1272     // After a preliminary minimum size, use heap memory.
1273     static constexpr size_t kFastBufferWordLimit = 32;
1274     angle::FastVector<uint32_t, kFastBufferWordLimit> mPayload;
1275 };
1276 
1277 // In the FramebufferDesc object:
1278 //  - Depth/stencil serial is at index 0
1279 //  - Color serials are at indices [1, gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]
1280 //  - Depth/stencil resolve attachment is at index gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+1
1281 //  - Resolve attachments are at indices [gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+2,
1282 //                                        gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1]
1283 constexpr size_t kFramebufferDescDepthStencilIndex = 0;
1284 constexpr size_t kFramebufferDescColorIndexOffset  = kFramebufferDescDepthStencilIndex + 1;
1285 constexpr size_t kFramebufferDescDepthStencilResolveIndexOffset =
1286     kFramebufferDescColorIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
1287 constexpr size_t kFramebufferDescColorResolveIndexOffset =
1288     kFramebufferDescDepthStencilResolveIndexOffset + 1;
1289 
1290 // Enable struct padding warnings for the code below since it is used in caches.
1291 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
1292 
1293 class FramebufferDesc
1294 {
1295   public:
1296     FramebufferDesc();
1297     ~FramebufferDesc();
1298 
1299     FramebufferDesc(const FramebufferDesc &other);
1300     FramebufferDesc &operator=(const FramebufferDesc &other);
1301 
1302     void updateColor(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1303     void updateColorResolve(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1304     void updateUnresolveMask(FramebufferNonResolveAttachmentMask unresolveMask);
1305     void updateDepthStencil(ImageOrBufferViewSubresourceSerial serial);
1306     void updateDepthStencilResolve(ImageOrBufferViewSubresourceSerial serial);
setWriteControlMode(gl::SrgbWriteControlMode mode)1307     ANGLE_INLINE void setWriteControlMode(gl::SrgbWriteControlMode mode)
1308     {
1309         mSrgbWriteControlMode = static_cast<uint16_t>(mode);
1310     }
updateIsMultiview(bool isMultiview)1311     void updateIsMultiview(bool isMultiview) { mIsMultiview = isMultiview; }
1312     size_t hash() const;
1313 
1314     bool operator==(const FramebufferDesc &other) const;
1315 
1316     uint32_t attachmentCount() const;
1317 
getColorImageViewSerial(uint32_t index)1318     ImageOrBufferViewSubresourceSerial getColorImageViewSerial(uint32_t index)
1319     {
1320         ASSERT(kFramebufferDescColorIndexOffset + index < mSerials.size());
1321         return mSerials[kFramebufferDescColorIndexOffset + index];
1322     }
1323 
1324     FramebufferNonResolveAttachmentMask getUnresolveAttachmentMask() const;
getWriteControlMode()1325     ANGLE_INLINE gl::SrgbWriteControlMode getWriteControlMode() const
1326     {
1327         return (mSrgbWriteControlMode == 1) ? gl::SrgbWriteControlMode::Linear
1328                                             : gl::SrgbWriteControlMode::Default;
1329     }
1330 
1331     void updateLayerCount(uint32_t layerCount);
getLayerCount()1332     uint32_t getLayerCount() const { return mLayerCount; }
1333     void updateFramebufferFetchMode(bool hasFramebufferFetch);
hasFramebufferFetch()1334     bool hasFramebufferFetch() const { return mHasFramebufferFetch; }
1335 
isMultiview()1336     bool isMultiview() const { return mIsMultiview; }
1337 
1338     void updateRenderToTexture(bool isRenderToTexture);
1339 
1340   private:
1341     void reset();
1342     void update(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1343 
1344     // Note: this is an exclusive index. If there is one index it will be "1".
1345     // Maximum value is 18
1346     uint16_t mMaxIndex : 5;
1347     uint16_t mHasFramebufferFetch : 1;
1348     static_assert(gl::IMPLEMENTATION_MAX_FRAMEBUFFER_LAYERS < (1 << 9) - 1,
1349                   "Not enough bits for mLayerCount");
1350 
1351     uint16_t mLayerCount : 9;
1352 
1353     uint16_t mSrgbWriteControlMode : 1;
1354 
1355     // If the render pass contains an initial subpass to unresolve a number of attachments, the
1356     // subpass description is derived from the following mask, specifying which attachments need
1357     // to be unresolved.  Includes both color and depth/stencil attachments.
1358     uint16_t mUnresolveAttachmentMask : kMaxFramebufferNonResolveAttachments;
1359 
1360     // Whether this is a multisampled-render-to-single-sampled framebuffer.  Only used when using
1361     // VK_EXT_multisampled_render_to_single_sampled.  Only one bit is used and the rest is padding.
1362     uint16_t mIsRenderToTexture : 15 - kMaxFramebufferNonResolveAttachments;
1363 
1364     uint16_t mIsMultiview : 1;
1365 
1366     FramebufferAttachmentArray<ImageOrBufferViewSubresourceSerial> mSerials;
1367 };
1368 
1369 constexpr size_t kFramebufferDescSize = sizeof(FramebufferDesc);
1370 static_assert(kFramebufferDescSize == 148, "Size check failed");
1371 
1372 // Disable warnings about struct padding.
1373 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1374 
1375 // The SamplerHelper allows a Sampler to be coupled with a serial.
1376 // Must be included before we declare SamplerCache.
1377 class SamplerHelper final : angle::NonCopyable
1378 {
1379   public:
1380     SamplerHelper(ContextVk *contextVk);
1381     ~SamplerHelper();
1382 
1383     explicit SamplerHelper(SamplerHelper &&samplerHelper);
1384     SamplerHelper &operator=(SamplerHelper &&rhs);
1385 
valid()1386     bool valid() const { return mSampler.valid(); }
get()1387     const Sampler &get() const { return mSampler; }
get()1388     Sampler &get() { return mSampler; }
getSamplerSerial()1389     SamplerSerial getSamplerSerial() const { return mSamplerSerial; }
1390 
1391   private:
1392     Sampler mSampler;
1393     SamplerSerial mSamplerSerial;
1394 };
1395 
1396 using RefCountedSampler = RefCounted<SamplerHelper>;
1397 using SamplerBinding    = BindingPointer<SamplerHelper>;
1398 
1399 class RenderPassHelper final : angle::NonCopyable
1400 {
1401   public:
1402     RenderPassHelper();
1403     ~RenderPassHelper();
1404 
1405     RenderPassHelper(RenderPassHelper &&other);
1406     RenderPassHelper &operator=(RenderPassHelper &&other);
1407 
1408     void destroy(VkDevice device);
1409 
1410     const RenderPass &getRenderPass() const;
1411     RenderPass &getRenderPass();
1412 
1413     const RenderPassPerfCounters &getPerfCounters() const;
1414     RenderPassPerfCounters &getPerfCounters();
1415 
1416   private:
1417     RenderPass mRenderPass;
1418     RenderPassPerfCounters mPerfCounters;
1419 };
1420 }  // namespace vk
1421 }  // namespace rx
1422 
1423 // Introduce std::hash for the above classes.
1424 namespace std
1425 {
1426 template <>
1427 struct hash<rx::vk::RenderPassDesc>
1428 {
1429     size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
1430 };
1431 
1432 template <>
1433 struct hash<rx::vk::AttachmentOpsArray>
1434 {
1435     size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
1436 };
1437 
1438 template <>
1439 struct hash<rx::vk::GraphicsPipelineDesc>
1440 {
1441     size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const { return key.hash(); }
1442 };
1443 
1444 template <>
1445 struct hash<rx::vk::DescriptorSetLayoutDesc>
1446 {
1447     size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
1448 };
1449 
1450 template <>
1451 struct hash<rx::vk::PipelineLayoutDesc>
1452 {
1453     size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
1454 };
1455 
1456 template <>
1457 struct hash<rx::vk::ImageSubresourceRange>
1458 {
1459     size_t operator()(const rx::vk::ImageSubresourceRange &key) const
1460     {
1461         return *reinterpret_cast<const uint32_t *>(&key);
1462     }
1463 };
1464 
1465 template <>
1466 struct hash<rx::vk::DescriptorSetDesc>
1467 {
1468     size_t operator()(const rx::vk::DescriptorSetDesc &key) const { return key.hash(); }
1469 };
1470 
1471 template <>
1472 struct hash<rx::vk::FramebufferDesc>
1473 {
1474     size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
1475 };
1476 
1477 template <>
1478 struct hash<rx::vk::YcbcrConversionDesc>
1479 {
1480     size_t operator()(const rx::vk::YcbcrConversionDesc &key) const { return key.hash(); }
1481 };
1482 
1483 template <>
1484 struct hash<rx::vk::SamplerDesc>
1485 {
1486     size_t operator()(const rx::vk::SamplerDesc &key) const { return key.hash(); }
1487 };
1488 
1489 // See Resource Serial types defined in vk_utils.h.
1490 #define ANGLE_HASH_VK_SERIAL(Type)                                                          \
1491     template <>                                                                             \
1492     struct hash<rx::vk::Type##Serial>                                                       \
1493     {                                                                                       \
1494         size_t operator()(const rx::vk::Type##Serial &key) const { return key.getValue(); } \
1495     };
1496 
1497 ANGLE_VK_SERIAL_OP(ANGLE_HASH_VK_SERIAL)
1498 
1499 }  // namespace std
1500 
1501 namespace rx
1502 {
1503 // Cache types for various Vulkan objects
1504 enum class VulkanCacheType
1505 {
1506     CompatibleRenderPass,
1507     RenderPassWithOps,
1508     GraphicsPipeline,
1509     PipelineLayout,
1510     Sampler,
1511     SamplerYcbcrConversion,
1512     DescriptorSetLayout,
1513     DriverUniformsDescriptors,
1514     TextureDescriptors,
1515     UniformsAndXfbDescriptors,
1516     ShaderBuffersDescriptors,
1517     Framebuffer,
1518     EnumCount
1519 };
1520 
1521 // Base class for all caches. Provides cache hit and miss counters.
1522 class CacheStats final : angle::NonCopyable
1523 {
1524   public:
1525     CacheStats() { reset(); }
1526     ~CacheStats() {}
1527 
1528     CacheStats(const CacheStats &rhs)
1529         : mHitCount(rhs.mHitCount), mMissCount(rhs.mMissCount), mSize(rhs.mSize)
1530     {}
1531 
1532     CacheStats &operator=(const CacheStats &rhs)
1533     {
1534         mHitCount  = rhs.mHitCount;
1535         mMissCount = rhs.mMissCount;
1536         mSize      = rhs.mSize;
1537         return *this;
1538     }
1539 
1540     ANGLE_INLINE void hit() { mHitCount++; }
1541     ANGLE_INLINE void miss() { mMissCount++; }
1542     ANGLE_INLINE void accumulate(const CacheStats &stats)
1543     {
1544         mHitCount += stats.mHitCount;
1545         mMissCount += stats.mMissCount;
1546         mSize = stats.mSize;
1547     }
1548 
1549     uint64_t getHitCount() const { return mHitCount; }
1550     uint64_t getMissCount() const { return mMissCount; }
1551 
1552     ANGLE_INLINE double getHitRatio() const
1553     {
1554         if (mHitCount + mMissCount == 0)
1555         {
1556             return 0;
1557         }
1558         else
1559         {
1560             return static_cast<double>(mHitCount) / (mHitCount + mMissCount);
1561         }
1562     }
1563 
1564     ANGLE_INLINE void incrementSize() { ++mSize; }
1565 
1566     ANGLE_INLINE uint64_t getSize() const { return mSize; }
1567 
1568     void reset()
1569     {
1570         mHitCount  = 0;
1571         mMissCount = 0;
1572         mSize      = 0;
1573     }
1574 
1575     void resetHitAndMissCount()
1576     {
1577         mHitCount  = 0;
1578         mMissCount = 0;
1579     }
1580 
1581   private:
1582     uint64_t mHitCount;
1583     uint64_t mMissCount;
1584     uint64_t mSize;
1585 };
1586 
1587 template <VulkanCacheType CacheType>
1588 class HasCacheStats : angle::NonCopyable
1589 {
1590   public:
1591     template <typename Accumulator>
1592     void accumulateCacheStats(Accumulator *accum)
1593     {
1594         accum->accumulateCacheStats(CacheType, mCacheStats);
1595         mCacheStats.reset();
1596     }
1597 
1598   protected:
1599     HasCacheStats()          = default;
1600     virtual ~HasCacheStats() = default;
1601 
1602     CacheStats mCacheStats;
1603 };
1604 
1605 // TODO(jmadill): Add cache trimming/eviction.
1606 class RenderPassCache final : angle::NonCopyable
1607 {
1608   public:
1609     RenderPassCache();
1610     ~RenderPassCache();
1611 
1612     void destroy(RendererVk *rendererVk);
1613 
1614     ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
1615                                                        const vk::RenderPassDesc &desc,
1616                                                        vk::RenderPass **renderPassOut)
1617     {
1618         auto outerIt = mPayload.find(desc);
1619         if (outerIt != mPayload.end())
1620         {
1621             InnerCache &innerCache = outerIt->second;
1622             ASSERT(!innerCache.empty());
1623 
1624             // Find the first element and return it.
1625             *renderPassOut = &innerCache.begin()->second.getRenderPass();
1626             mCompatibleRenderPassCacheStats.hit();
1627             return angle::Result::Continue;
1628         }
1629 
1630         mCompatibleRenderPassCacheStats.miss();
1631         return addRenderPass(contextVk, desc, renderPassOut);
1632     }
1633 
1634     angle::Result getRenderPassWithOps(ContextVk *contextVk,
1635                                        const vk::RenderPassDesc &desc,
1636                                        const vk::AttachmentOpsArray &attachmentOps,
1637                                        vk::RenderPass **renderPassOut);
1638 
1639   private:
1640     angle::Result getRenderPassWithOpsImpl(ContextVk *contextVk,
1641                                            const vk::RenderPassDesc &desc,
1642                                            const vk::AttachmentOpsArray &attachmentOps,
1643                                            bool updatePerfCounters,
1644                                            vk::RenderPass **renderPassOut);
1645 
1646     angle::Result addRenderPass(ContextVk *contextVk,
1647                                 const vk::RenderPassDesc &desc,
1648                                 vk::RenderPass **renderPassOut);
1649 
1650     // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
1651     // The second layer caches the attachment load/store ops and initial/final layout.
1652     // Switch to `std::unordered_map` to retain pointer stability.
1653     using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassHelper>;
1654     using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
1655 
1656     OuterCache mPayload;
1657     CacheStats mCompatibleRenderPassCacheStats;
1658     CacheStats mRenderPassWithOpsCacheStats;
1659 };
1660 
1661 // TODO(jmadill): Add cache trimming/eviction.
1662 class GraphicsPipelineCache final : public HasCacheStats<VulkanCacheType::GraphicsPipeline>
1663 {
1664   public:
1665     GraphicsPipelineCache();
1666     ~GraphicsPipelineCache() override;
1667 
1668     void destroy(RendererVk *rendererVk);
1669     void release(ContextVk *context);
1670 
1671     void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
1672 
1673     ANGLE_INLINE angle::Result getPipeline(ContextVk *contextVk,
1674                                            const vk::PipelineCache &pipelineCacheVk,
1675                                            const vk::RenderPass &compatibleRenderPass,
1676                                            const vk::PipelineLayout &pipelineLayout,
1677                                            const gl::AttributesMask &activeAttribLocationsMask,
1678                                            const gl::ComponentTypeMask &programAttribsTypeMask,
1679                                            const gl::DrawBufferMask &missingOutputsMask,
1680                                            const vk::ShaderAndSerialMap &shaders,
1681                                            const vk::SpecializationConstants &specConsts,
1682                                            const vk::GraphicsPipelineDesc &desc,
1683                                            const vk::GraphicsPipelineDesc **descPtrOut,
1684                                            vk::PipelineHelper **pipelineOut)
1685     {
1686         auto item = mPayload.find(desc);
1687         if (item != mPayload.end())
1688         {
1689             *descPtrOut  = &item->first;
1690             *pipelineOut = &item->second;
1691             mCacheStats.hit();
1692             return angle::Result::Continue;
1693         }
1694 
1695         mCacheStats.miss();
1696         return insertPipeline(contextVk, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
1697                               activeAttribLocationsMask, programAttribsTypeMask, missingOutputsMask,
1698                               shaders, specConsts, desc, descPtrOut, pipelineOut);
1699     }
1700 
1701   private:
1702     angle::Result insertPipeline(ContextVk *contextVk,
1703                                  const vk::PipelineCache &pipelineCacheVk,
1704                                  const vk::RenderPass &compatibleRenderPass,
1705                                  const vk::PipelineLayout &pipelineLayout,
1706                                  const gl::AttributesMask &activeAttribLocationsMask,
1707                                  const gl::ComponentTypeMask &programAttribsTypeMask,
1708                                  const gl::DrawBufferMask &missingOutputsMask,
1709                                  const vk::ShaderAndSerialMap &shaders,
1710                                  const vk::SpecializationConstants &specConsts,
1711                                  const vk::GraphicsPipelineDesc &desc,
1712                                  const vk::GraphicsPipelineDesc **descPtrOut,
1713                                  vk::PipelineHelper **pipelineOut);
1714 
1715     std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
1716 };
1717 
1718 class DescriptorSetLayoutCache final : angle::NonCopyable
1719 {
1720   public:
1721     DescriptorSetLayoutCache();
1722     ~DescriptorSetLayoutCache();
1723 
1724     void destroy(RendererVk *rendererVk);
1725 
1726     angle::Result getDescriptorSetLayout(
1727         vk::Context *context,
1728         const vk::DescriptorSetLayoutDesc &desc,
1729         vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
1730 
1731   private:
1732     std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
1733     CacheStats mCacheStats;
1734 };
1735 
1736 class PipelineLayoutCache final : public HasCacheStats<VulkanCacheType::PipelineLayout>
1737 {
1738   public:
1739     PipelineLayoutCache();
1740     ~PipelineLayoutCache() override;
1741 
1742     void destroy(RendererVk *rendererVk);
1743 
1744     angle::Result getPipelineLayout(vk::Context *context,
1745                                     const vk::PipelineLayoutDesc &desc,
1746                                     const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
1747                                     vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
1748 
1749   private:
1750     std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
1751 };
1752 
1753 class SamplerCache final : public HasCacheStats<VulkanCacheType::Sampler>
1754 {
1755   public:
1756     SamplerCache();
1757     ~SamplerCache() override;
1758 
1759     void destroy(RendererVk *rendererVk);
1760 
1761     angle::Result getSampler(ContextVk *contextVk,
1762                              const vk::SamplerDesc &desc,
1763                              vk::SamplerBinding *samplerOut);
1764 
1765   private:
1766     std::unordered_map<vk::SamplerDesc, vk::RefCountedSampler> mPayload;
1767 };
1768 
1769 // YuvConversion Cache
1770 class SamplerYcbcrConversionCache final
1771     : public HasCacheStats<VulkanCacheType::SamplerYcbcrConversion>
1772 {
1773   public:
1774     SamplerYcbcrConversionCache();
1775     ~SamplerYcbcrConversionCache() override;
1776 
1777     void destroy(RendererVk *rendererVk);
1778 
1779     angle::Result getSamplerYcbcrConversion(vk::Context *context,
1780                                             const vk::YcbcrConversionDesc &ycbcrConversionDesc,
1781                                             VkSamplerYcbcrConversion *vkSamplerYcbcrConversionOut);
1782 
1783   private:
1784     using SamplerYcbcrConversionMap =
1785         std::unordered_map<vk::YcbcrConversionDesc, vk::SamplerYcbcrConversion>;
1786     SamplerYcbcrConversionMap mExternalFormatPayload;
1787     SamplerYcbcrConversionMap mVkFormatPayload;
1788 };
1789 
1790 // DescriptorSet Cache
1791 class DriverUniformsDescriptorSetCache final
1792     : public HasCacheStats<VulkanCacheType::DriverUniformsDescriptors>
1793 {
1794   public:
1795     DriverUniformsDescriptorSetCache() = default;
1796     ~DriverUniformsDescriptorSetCache() override { ASSERT(mPayload.empty()); }
1797 
1798     void destroy(RendererVk *rendererVk);
1799 
1800     ANGLE_INLINE bool get(uint32_t serial, VkDescriptorSet *descriptorSet)
1801     {
1802         if (mPayload.get(serial, descriptorSet))
1803         {
1804             mCacheStats.hit();
1805             return true;
1806         }
1807         mCacheStats.miss();
1808         return false;
1809     }
1810 
1811     ANGLE_INLINE void insert(uint32_t serial, VkDescriptorSet descriptorSet)
1812     {
1813         mPayload.insert(serial, descriptorSet);
1814     }
1815 
1816     ANGLE_INLINE void clear() { mPayload.clear(); }
1817 
1818     size_t getSize() const { return mPayload.size(); }
1819 
1820   private:
1821     static constexpr uint32_t kFlatMapSize = 16;
1822     angle::FlatUnorderedMap<uint32_t, VkDescriptorSet, kFlatMapSize> mPayload;
1823 };
1824 
1825 // Templated Descriptors Cache
1826 class DescriptorSetCache final : angle::NonCopyable
1827 {
1828   public:
1829     DescriptorSetCache() = default;
1830     ~DescriptorSetCache() { ASSERT(mPayload.empty()); }
1831 
1832     ANGLE_INLINE void clear() { mPayload.clear(); }
1833 
1834     ANGLE_INLINE bool get(const vk::DescriptorSetDesc &desc,
1835                           VkDescriptorSet *descriptorSet,
1836                           CacheStats *cacheStats)
1837     {
1838         auto iter = mPayload.find(desc);
1839         if (iter != mPayload.end())
1840         {
1841             *descriptorSet = iter->second;
1842             cacheStats->hit();
1843             return true;
1844         }
1845         cacheStats->miss();
1846         return false;
1847     }
1848 
1849     ANGLE_INLINE void insert(const vk::DescriptorSetDesc &desc,
1850                              VkDescriptorSet descriptorSet,
1851                              CacheStats *cacheStats)
1852     {
1853         mPayload.emplace(desc, descriptorSet);
1854         cacheStats->incrementSize();
1855     }
1856 
1857     size_t getTotalCacheKeySizeBytes() const
1858     {
1859         size_t totalSize = 0;
1860         for (const auto &iter : mPayload)
1861         {
1862             const vk::DescriptorSetDesc &desc = iter.first;
1863             totalSize += desc.getKeySizeBytes();
1864         }
1865         return totalSize;
1866     }
1867 
1868   private:
1869     angle::HashMap<vk::DescriptorSetDesc, VkDescriptorSet> mPayload;
1870 };
1871 
1872 // Only 1 driver uniform binding is used.
1873 constexpr uint32_t kReservedDriverUniformBindingCount = 1;
1874 // There is 1 default uniform binding used per stage.  Currently, a maximum of three stages are
1875 // supported.
1876 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
1877 constexpr uint32_t kReservedDefaultUniformBindingCount         = 3;
1878 }  // namespace rx
1879 
1880 #endif  // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
1881