1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 // Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 // Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "libANGLE/renderer/vulkan/vk_utils.h"
17
18 namespace rx
19 {
20
21 // Some descriptor set and pipeline layout constants.
22 //
23 // The set/binding assignment is done as following:
24 //
25 // - Set 0 contains the ANGLE driver uniforms at binding 0. Note that driver uniforms are updated
26 // only under rare circumstances, such as viewport or depth range change. However, there is only
27 // one binding in this set. This set is placed before Set 1 containing transform feedback
28 // buffers, so that switching between xfb and non-xfb programs doesn't require rebinding this set.
29 // Otherwise, as the layout of Set 1 changes (due to addition and removal of xfb buffers), and all
30 // subsequent sets need to be rebound (due to Vulkan pipeline layout validation rules), we would
31 // have needed to invalidateGraphicsDriverUniforms().
32 // - Set 1 contains uniform blocks created to encompass default uniforms. 1 binding is used per
33 // pipeline stage. Additionally, transform feedback buffers are bound from binding 2 and up.
34 // - Set 2 contains all textures (including texture buffers).
35 // - Set 3 contains all other shader resources, such as uniform and storage blocks, atomic counter
36 // buffers, images and image buffers.
37
38 enum class DescriptorSetIndex : uint32_t
39 {
40 Internal, // ANGLE driver uniforms or internal shaders
41 UniformsAndXfb, // Uniforms set index
42 Texture, // Textures set index
43 ShaderResource, // Other shader resources set index
44
45 InvalidEnum,
46 EnumCount = InvalidEnum,
47 };
48
49 namespace vk
50 {
51 class DynamicDescriptorPool;
52 class ImageHelper;
53 enum class ImageLayout;
54
55 using PipelineAndSerial = ObjectAndSerial<Pipeline>;
56
57 using RefCountedDescriptorSetLayout = RefCounted<DescriptorSetLayout>;
58 using RefCountedPipelineLayout = RefCounted<PipelineLayout>;
59 using RefCountedSamplerYcbcrConversion = RefCounted<SamplerYcbcrConversion>;
60
61 // Helper macro that casts to a bitfield type then verifies no bits were dropped.
62 #define SetBitField(lhs, rhs) \
63 do \
64 { \
65 auto ANGLE_LOCAL_VAR = rhs; \
66 lhs = static_cast<typename std::decay<decltype(lhs)>::type>(ANGLE_LOCAL_VAR); \
67 ASSERT(static_cast<decltype(ANGLE_LOCAL_VAR)>(lhs) == ANGLE_LOCAL_VAR); \
68 } while (0)
69
70 // Packed Vk resource descriptions.
71 // Most Vk types use many more bits than required to represent the underlying data.
72 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
73 // hashing (and also needs to check equality) we can optimize these operations by
74 // using fewer bits. Hence the packed types.
75 //
76 // One implementation note: these types could potentially be improved by using even
77 // fewer bits. For example, boolean values could be represented by a single bit instead
78 // of a uint8_t. However at the current time there are concerns about the portability
79 // of bitfield operators, and complexity issues with using bit mask operations. This is
80 // something we will likely want to investigate as the Vulkan implementation progresses.
81 //
82 // Second implementation note: the struct packing is also a bit fragile, and some of the
83 // packing requirements depend on using alignas and field ordering to get the result of
84 // packing nicely into the desired space. This is something we could also potentially fix
85 // with a redesign to use bitfields or bit mask operations.
86
87 // Enable struct padding warnings for the code below since it is used in caches.
88 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
89
90 enum ResourceAccess
91 {
92 Unused,
93 ReadOnly,
94 Write,
95 };
96
UpdateAccess(ResourceAccess * oldAccess,ResourceAccess newAccess)97 inline void UpdateAccess(ResourceAccess *oldAccess, ResourceAccess newAccess)
98 {
99 if (newAccess > *oldAccess)
100 {
101 *oldAccess = newAccess;
102 }
103 }
104
105 enum RenderPassStoreOp
106 {
107 Store = VK_ATTACHMENT_STORE_OP_STORE,
108 DontCare = VK_ATTACHMENT_STORE_OP_DONT_CARE,
109 NoneQCOM,
110 };
111 // ConvertRenderPassStoreOpToVkStoreOp rely on the fact that only NoneQCOM is different from VK
112 // enums.
113 static_assert(RenderPassStoreOp::NoneQCOM == 2, "ConvertRenderPassStoreOpToVkStoreOp must updated");
114
ConvertRenderPassStoreOpToVkStoreOp(RenderPassStoreOp storeOp)115 inline VkAttachmentStoreOp ConvertRenderPassStoreOpToVkStoreOp(RenderPassStoreOp storeOp)
116 {
117 return storeOp == RenderPassStoreOp::NoneQCOM ? VK_ATTACHMENT_STORE_OP_NONE_QCOM
118 : static_cast<VkAttachmentStoreOp>(storeOp);
119 }
120
121 // There can be a maximum of IMPLEMENTATION_MAX_DRAW_BUFFERS color and resolve attachments, plus one
122 // depth/stencil attachment and one depth/stencil resolve attachment.
123 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS * 2 + 2;
124 template <typename T>
125 using FramebufferAttachmentArray = std::array<T, kMaxFramebufferAttachments>;
126 template <typename T>
127 using FramebufferAttachmentsVector = angle::FixedVector<T, kMaxFramebufferAttachments>;
128 using FramebufferAttachmentMask = angle::BitSet<kMaxFramebufferAttachments>;
129
130 constexpr size_t kMaxFramebufferNonResolveAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1;
131 template <typename T>
132 using FramebufferNonResolveAttachmentArray = std::array<T, kMaxFramebufferNonResolveAttachments>;
133 using FramebufferNonResolveAttachmentMask = angle::BitSet16<kMaxFramebufferNonResolveAttachments>;
134
135 class alignas(4) RenderPassDesc final
136 {
137 public:
138 RenderPassDesc();
139 ~RenderPassDesc();
140 RenderPassDesc(const RenderPassDesc &other);
141 RenderPassDesc &operator=(const RenderPassDesc &other);
142
143 // Set format for an enabled GL color attachment.
144 void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
145 // Mark a GL color attachment index as disabled.
146 void packColorAttachmentGap(size_t colorIndexGL);
147 // The caller must pack the depth/stencil attachment last, which is packed right after the color
148 // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
149 void packDepthStencilAttachment(angle::FormatID angleFormatID);
150 void updateDepthStencilAccess(ResourceAccess access);
151 // Indicate that a color attachment should have a corresponding resolve attachment.
152 void packColorResolveAttachment(size_t colorIndexGL);
153 // Remove the resolve attachment. Used when optimizing blit through resolve attachment to
154 // temporarily pack a resolve attachment and then remove it.
155 void removeColorResolveAttachment(size_t colorIndexGL);
156 // Indicate that a color attachment should take its data from the resolve attachment initially.
157 void packColorUnresolveAttachment(size_t colorIndexGL);
158 void removeColorUnresolveAttachment(size_t colorIndexGL);
159 // Indicate that a depth/stencil attachment should have a corresponding resolve attachment.
160 void packDepthStencilResolveAttachment();
161 // Indicate that a depth/stencil attachment should take its data from the resolve attachment
162 // initially.
163 void packDepthStencilUnresolveAttachment(bool unresolveDepth, bool unresolveStencil);
164 void removeDepthStencilUnresolveAttachment();
165
166 void setWriteControlMode(gl::SrgbWriteControlMode mode);
167
168 size_t hash() const;
169
170 // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()171 size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()172 size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
173
174 bool isColorAttachmentEnabled(size_t colorIndexGL) const;
175 bool hasDepthStencilAttachment() const;
hasColorResolveAttachment(size_t colorIndexGL)176 bool hasColorResolveAttachment(size_t colorIndexGL) const
177 {
178 return mColorResolveAttachmentMask.test(colorIndexGL);
179 }
getColorUnresolveAttachmentMask()180 gl::DrawBufferMask getColorUnresolveAttachmentMask() const
181 {
182 return mColorUnresolveAttachmentMask;
183 }
hasColorUnresolveAttachment(size_t colorIndexGL)184 bool hasColorUnresolveAttachment(size_t colorIndexGL) const
185 {
186 return mColorUnresolveAttachmentMask.test(colorIndexGL);
187 }
hasDepthStencilResolveAttachment()188 bool hasDepthStencilResolveAttachment() const { return mResolveDepthStencil; }
hasDepthStencilUnresolveAttachment()189 bool hasDepthStencilUnresolveAttachment() const { return mUnresolveDepth || mUnresolveStencil; }
hasDepthUnresolveAttachment()190 bool hasDepthUnresolveAttachment() const { return mUnresolveDepth; }
hasStencilUnresolveAttachment()191 bool hasStencilUnresolveAttachment() const { return mUnresolveStencil; }
getSRGBWriteControlMode()192 gl::SrgbWriteControlMode getSRGBWriteControlMode() const
193 {
194 return static_cast<gl::SrgbWriteControlMode>(mSrgbWriteControl);
195 }
196
197 // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
198 // color attachments.
199 size_t attachmentCount() const;
200
setSamples(GLint samples)201 void setSamples(GLint samples) { mSamples = static_cast<uint8_t>(samples); }
samples()202 uint8_t samples() const { return mSamples; }
203
setViewCount(GLsizei viewCount)204 void setViewCount(GLsizei viewCount) { mViewCount = static_cast<uint8_t>(viewCount); }
viewCount()205 uint8_t viewCount() const { return mViewCount; }
206
setFramebufferFetchMode(bool hasFramebufferFetch)207 void setFramebufferFetchMode(bool hasFramebufferFetch)
208 {
209 mHasFramebufferFetch = hasFramebufferFetch;
210 }
getFramebufferFetchMode()211 bool getFramebufferFetchMode() const { return mHasFramebufferFetch; }
212
updateRenderToTexture(bool isRenderToTexture)213 void updateRenderToTexture(bool isRenderToTexture) { mIsRenderToTexture = isRenderToTexture; }
isRenderToTexture()214 bool isRenderToTexture() const { return mIsRenderToTexture; }
215
216 angle::FormatID operator[](size_t index) const
217 {
218 ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
219 return static_cast<angle::FormatID>(mAttachmentFormats[index]);
220 }
221
222 private:
223 uint8_t mSamples;
224 uint8_t mColorAttachmentRange;
225
226 // Multivew
227 uint8_t mViewCount;
228
229 // sRGB
230 uint8_t mSrgbWriteControl : 1;
231
232 // Framebuffer fetch
233 uint8_t mHasFramebufferFetch : 1;
234
235 // Multisampled render to texture
236 uint8_t mIsRenderToTexture : 1;
237 uint8_t mResolveDepthStencil : 1;
238 uint8_t mUnresolveDepth : 1;
239 uint8_t mUnresolveStencil : 1;
240
241 // Available space for expansion.
242 uint8_t mPadding1 : 2;
243 uint8_t mPadding2;
244
245 // Whether each color attachment has a corresponding resolve attachment. Color resolve
246 // attachments can be used to optimize resolve through glBlitFramebuffer() as well as support
247 // GL_EXT_multisampled_render_to_texture and GL_EXT_multisampled_render_to_texture2.
248 gl::DrawBufferMask mColorResolveAttachmentMask;
249
250 // Whether each color attachment with a corresponding resolve attachment should be initialized
251 // with said resolve attachment in an initial subpass. This is an optimization to avoid
252 // loadOp=LOAD on the implicit multisampled image used with multisampled-render-to-texture
253 // render targets. This operation is referred to as "unresolve".
254 //
255 // Unused when VK_EXT_multisampled_render_to_single_sampled is available.
256 gl::DrawBufferMask mColorUnresolveAttachmentMask;
257
258 // Color attachment formats are stored with their GL attachment indices. The depth/stencil
259 // attachment formats follow the last enabled color attachment. When creating a render pass,
260 // the disabled attachments are removed and the resulting attachments are packed.
261 //
262 // The attachment indices provided as input to various functions in this file are thus GL
263 // attachment indices. These indices are marked as such, e.g. colorIndexGL. The render pass
264 // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
265 // indices marked with Vk, e.g. colorIndexVk. The subpass attachment references create the
266 // link between the two index spaces. The subpass declares attachment references with GL
267 // indices (which corresponds to the location decoration of shader outputs). The attachment
268 // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
269 //
270 // For example, if GL uses color attachments 0 and 3, then there are two render pass
271 // attachments (indexed 0 and 1) and 4 subpass attachments:
272 //
273 // - Subpass attachment 0 -> Renderpass attachment 0
274 // - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
275 // - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
276 // - Subpass attachment 3 -> Renderpass attachment 1
277 //
278 // The resolve attachments are packed after the non-resolve attachments. They use the same
279 // formats, so they are not specified in this array.
280 FramebufferNonResolveAttachmentArray<uint8_t> mAttachmentFormats;
281 };
282
283 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
284
285 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
286 static_assert(kRenderPassDescSize == 16, "Size check failed");
287
288 struct PackedAttachmentOpsDesc final
289 {
290 // VkAttachmentLoadOp is in range [0, 2], and VkAttachmentStoreOp is in range [0, 2].
291 uint16_t loadOp : 2;
292 uint16_t storeOp : 2;
293 uint16_t stencilLoadOp : 2;
294 uint16_t stencilStoreOp : 2;
295 // If a corresponding resolve attachment exists, storeOp may already be DONT_CARE, and it's
296 // unclear whether the attachment was invalidated or not. This information is passed along here
297 // so that the resolve attachment's storeOp can be set to DONT_CARE if the attachment is
298 // invalidated, and if possible removed from the list of resolve attachments altogether. Note
299 // that the latter may not be possible if the render pass has multiple subpasses due to Vulkan
300 // render pass compatibility rules.
301 uint16_t isInvalidated : 1;
302 uint16_t isStencilInvalidated : 1;
303 uint16_t padding1 : 6;
304
305 // 4-bits to force pad the structure to exactly 2 bytes. Note that we currently don't support
306 // any of the extension layouts, whose values start at 1'000'000'000.
307 uint16_t initialLayout : 4;
308 uint16_t finalLayout : 4;
309 uint16_t padding2 : 8;
310 };
311
312 static_assert(sizeof(PackedAttachmentOpsDesc) == 4, "Size check failed");
313
314 class PackedAttachmentIndex;
315
316 class AttachmentOpsArray final
317 {
318 public:
319 AttachmentOpsArray();
320 ~AttachmentOpsArray();
321 AttachmentOpsArray(const AttachmentOpsArray &other);
322 AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
323
324 const PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index) const;
325 PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index);
326
327 // Initialize an attachment op with all load and store operations.
328 void initWithLoadStore(PackedAttachmentIndex index,
329 ImageLayout initialLayout,
330 ImageLayout finalLayout);
331
332 void setLayouts(PackedAttachmentIndex index,
333 ImageLayout initialLayout,
334 ImageLayout finalLayout);
335 void setOps(PackedAttachmentIndex index, VkAttachmentLoadOp loadOp, RenderPassStoreOp storeOp);
336 void setStencilOps(PackedAttachmentIndex index,
337 VkAttachmentLoadOp loadOp,
338 RenderPassStoreOp storeOp);
339
340 void setClearOp(PackedAttachmentIndex index);
341 void setClearStencilOp(PackedAttachmentIndex index);
342
343 size_t hash() const;
344
345 private:
346 gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
347 };
348
349 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
350
351 static_assert(sizeof(AttachmentOpsArray) == 40, "Size check failed");
352
353 struct PackedAttribDesc final
354 {
355 uint8_t format;
356 uint8_t divisor;
357
358 // Desktop drivers support
359 uint16_t offset : kAttributeOffsetMaxBits;
360
361 uint16_t compressed : 1;
362
363 // Although technically stride can be any value in ES 2.0, in practice supporting stride
364 // greater than MAX_USHORT should not be that helpful. Note that stride limits are
365 // introduced in ES 3.1.
366 uint16_t stride;
367 };
368
369 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
370 static_assert(kPackedAttribDescSize == 6, "Size mismatch");
371
372 struct VertexInputAttributes final
373 {
374 PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
375 };
376
377 constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
378 static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
379
380 struct RasterizationStateBits final
381 {
382 // Note: Currently only 2 subpasses possible, so there are 5 bits in subpass that can be
383 // repurposed.
384 uint32_t subpass : 6;
385 uint32_t depthClampEnable : 1;
386 uint32_t rasterizationDiscardEnable : 1;
387 uint32_t polygonMode : 4;
388 uint32_t cullMode : 4;
389 uint32_t frontFace : 4;
390 uint32_t depthBiasEnable : 1;
391 uint32_t sampleShadingEnable : 1;
392 uint32_t alphaToCoverageEnable : 1;
393 uint32_t alphaToOneEnable : 1;
394 uint32_t rasterizationSamples : 8;
395 };
396
397 constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
398 static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
399
400 struct PackedRasterizationAndMultisampleStateInfo final
401 {
402 RasterizationStateBits bits;
403 // Padded to ensure there's no gaps in this structure or those that use it.
404 float minSampleShading;
405 uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
406 // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
407 float depthBiasClamp;
408 float depthBiasConstantFactor;
409 float depthBiasSlopeFactor;
410 float lineWidth;
411 };
412
413 constexpr size_t kPackedRasterizationAndMultisampleStateSize =
414 sizeof(PackedRasterizationAndMultisampleStateInfo);
415 static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
416
417 struct StencilOps final
418 {
419 uint8_t fail : 4;
420 uint8_t pass : 4;
421 uint8_t depthFail : 4;
422 uint8_t compare : 4;
423 };
424
425 constexpr size_t kStencilOpsSize = sizeof(StencilOps);
426 static_assert(kStencilOpsSize == 2, "Size check failed");
427
428 struct PackedStencilOpState final
429 {
430 StencilOps ops;
431 uint8_t compareMask;
432 uint8_t writeMask;
433 };
434
435 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
436 static_assert(kPackedStencilOpSize == 4, "Size check failed");
437
438 struct DepthStencilEnableFlags final
439 {
440 uint8_t depthTest : 2; // these only need one bit each. the extra is used as padding.
441 uint8_t depthWrite : 2;
442 uint8_t depthBoundsTest : 2;
443 uint8_t stencilTest : 2;
444 };
445
446 constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
447 static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
448
449 // We are borrowing three bits here for surface rotation, even though it has nothing to do with
450 // depth stencil.
451 struct DepthCompareOpAndSurfaceRotation final
452 {
453 uint8_t depthCompareOp : 4;
454 uint8_t surfaceRotation : 3;
455 uint8_t padding : 1;
456 };
457 constexpr size_t kDepthCompareOpAndSurfaceRotationSize = sizeof(DepthCompareOpAndSurfaceRotation);
458 static_assert(kDepthCompareOpAndSurfaceRotationSize == 1, "Size check failed");
459
460 struct PackedDepthStencilStateInfo final
461 {
462 DepthStencilEnableFlags enable;
463 uint8_t frontStencilReference;
464 uint8_t backStencilReference;
465 DepthCompareOpAndSurfaceRotation depthCompareOpAndSurfaceRotation;
466
467 float minDepthBounds;
468 float maxDepthBounds;
469 PackedStencilOpState front;
470 PackedStencilOpState back;
471 };
472
473 constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
474 static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
475 static_assert(static_cast<int>(SurfaceRotation::EnumCount) <= 8, "Size check failed");
476
477 struct LogicOpState final
478 {
479 uint8_t opEnable : 1;
480 uint8_t op : 7;
481 };
482
483 constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
484 static_assert(kLogicOpStateSize == 1, "Size check failed");
485
486 struct PackedColorBlendAttachmentState final
487 {
488 uint16_t srcColorBlendFactor : 5;
489 uint16_t dstColorBlendFactor : 5;
490 uint16_t colorBlendOp : 6;
491 uint16_t srcAlphaBlendFactor : 5;
492 uint16_t dstAlphaBlendFactor : 5;
493 uint16_t alphaBlendOp : 6;
494 };
495
496 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
497 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
498
499 struct PrimitiveState final
500 {
501 uint16_t topology : 9;
502 uint16_t patchVertices : 6;
503 uint16_t restartEnable : 1;
504 };
505
506 constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
507 static_assert(kPrimitiveStateSize == 2, "Size check failed");
508
509 struct PackedInputAssemblyAndColorBlendStateInfo final
510 {
511 uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
512 PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
513 float blendConstants[4];
514 LogicOpState logic;
515 uint8_t blendEnableMask;
516 PrimitiveState primitive;
517 };
518
519 struct PackedExtent final
520 {
521 uint16_t width;
522 uint16_t height;
523 };
524
525 constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
526 sizeof(PackedInputAssemblyAndColorBlendStateInfo);
527 static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
528
529 constexpr size_t kGraphicsPipelineDescSumOfSizes =
530 kVertexInputAttributesSize + kRenderPassDescSize + kPackedRasterizationAndMultisampleStateSize +
531 kPackedDepthStencilStateSize + kPackedInputAssemblyAndColorBlendStateSize +
532 sizeof(PackedExtent);
533
534 // Number of dirty bits in the dirty bit set.
535 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
536 constexpr static size_t kNumGraphicsPipelineDirtyBits =
537 kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
538 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
539
540 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
541 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
542
543 // State changes are applied through the update methods. Each update method can also have a
544 // sibling method that applies the update without marking a state transition. The non-transition
545 // update methods are used for internal shader pipelines. Not every non-transition update method
546 // is implemented yet as not every state is used in internal shaders.
547 class GraphicsPipelineDesc final
548 {
549 public:
550 // Use aligned allocation and free so we can use the alignas keyword.
551 void *operator new(std::size_t size);
552 void operator delete(void *ptr);
553
554 GraphicsPipelineDesc();
555 ~GraphicsPipelineDesc();
556 GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
557 GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
558
559 size_t hash() const;
560 bool operator==(const GraphicsPipelineDesc &other) const;
561
562 void initDefaults(const ContextVk *contextVk);
563
564 // For custom comparisons.
565 template <typename T>
getPtr()566 const T *getPtr() const
567 {
568 return reinterpret_cast<const T *>(this);
569 }
570
571 angle::Result initializePipeline(ContextVk *contextVk,
572 const PipelineCache &pipelineCacheVk,
573 const RenderPass &compatibleRenderPass,
574 const PipelineLayout &pipelineLayout,
575 const gl::AttributesMask &activeAttribLocationsMask,
576 const gl::ComponentTypeMask &programAttribsTypeMask,
577 const ShaderModule *vertexModule,
578 const ShaderModule *fragmentModule,
579 const ShaderModule *geometryModule,
580 const ShaderModule *tessControlModule,
581 const ShaderModule *tessEvaluationModule,
582 const SpecializationConstants &specConsts,
583 Pipeline *pipelineOut) const;
584
585 // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
586 void updateVertexInput(GraphicsPipelineTransitionBits *transition,
587 uint32_t attribIndex,
588 GLuint stride,
589 GLuint divisor,
590 angle::FormatID format,
591 bool compressed,
592 GLuint relativeOffset);
593
594 // Input assembly info
595 void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
596 void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
597 bool primitiveRestartEnabled);
598
599 // Raster states
600 void setCullMode(VkCullModeFlagBits cullMode);
601 void updateCullMode(GraphicsPipelineTransitionBits *transition,
602 const gl::RasterizerState &rasterState);
603 void updateFrontFace(GraphicsPipelineTransitionBits *transition,
604 const gl::RasterizerState &rasterState,
605 bool invertFrontFace);
606 void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
607 void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
608 bool rasterizerDiscardEnabled);
609
610 // Multisample states
611 uint32_t getRasterizationSamples() const;
612 void setRasterizationSamples(uint32_t rasterizationSamples);
613 void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
614 uint32_t rasterizationSamples);
615 void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
616 void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
617 void updateSampleMask(GraphicsPipelineTransitionBits *transition,
618 uint32_t maskNumber,
619 uint32_t mask);
620
621 void updateSampleShading(GraphicsPipelineTransitionBits *transition, bool enable, float value);
622
623 // RenderPass description.
getRenderPassDesc()624 const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
625
626 void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
627 void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
628 const RenderPassDesc &renderPassDesc);
629
630 // Blend states
631 void updateBlendEnabled(GraphicsPipelineTransitionBits *transition,
632 gl::DrawBufferMask blendEnabledMask);
633 void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
634 void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
635 const gl::BlendStateExt &blendStateExt);
636 void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
637 const gl::BlendStateExt &blendStateExt);
638 void setColorWriteMasks(gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
639 const gl::DrawBufferMask &alphaMask,
640 const gl::DrawBufferMask &enabledDrawBuffers);
641 void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
642 void updateColorWriteMasks(GraphicsPipelineTransitionBits *transition,
643 gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
644 const gl::DrawBufferMask &alphaMask,
645 const gl::DrawBufferMask &enabledDrawBuffers);
646
647 // Depth/stencil states.
648 void setDepthTestEnabled(bool enabled);
649 void setDepthWriteEnabled(bool enabled);
650 void setDepthFunc(VkCompareOp op);
651 void setDepthClampEnabled(bool enabled);
652 void setStencilTestEnabled(bool enabled);
653 void setStencilFrontFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
654 void setStencilBackFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
655 void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
656 void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
657 void setStencilFrontWriteMask(uint8_t mask);
658 void setStencilBackWriteMask(uint8_t mask);
659 void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
660 const gl::DepthStencilState &depthStencilState,
661 const gl::Framebuffer *drawFramebuffer);
662 void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
663 const gl::DepthStencilState &depthStencilState);
664 void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
665 const gl::DepthStencilState &depthStencilState,
666 const gl::Framebuffer *drawFramebuffer);
667 void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
668 const gl::DepthStencilState &depthStencilState,
669 const gl::Framebuffer *drawFramebuffer);
670 void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
671 GLint ref,
672 const gl::DepthStencilState &depthStencilState);
673 void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
674 GLint ref,
675 const gl::DepthStencilState &depthStencilState);
676 void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
677 const gl::DepthStencilState &depthStencilState);
678 void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
679 const gl::DepthStencilState &depthStencilState);
680 void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
681 const gl::DepthStencilState &depthStencilState,
682 const gl::Framebuffer *drawFramebuffer);
683 void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
684 const gl::DepthStencilState &depthStencilState,
685 const gl::Framebuffer *drawFramebuffer);
686
687 // Depth offset.
688 void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
689 void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
690 const gl::RasterizerState &rasterState);
691
692 // Tessellation
693 void updatePatchVertices(GraphicsPipelineTransitionBits *transition, GLuint value);
694
695 // Subpass
696 void resetSubpass(GraphicsPipelineTransitionBits *transition);
697 void nextSubpass(GraphicsPipelineTransitionBits *transition);
698 void setSubpass(uint32_t subpass);
699 uint32_t getSubpass() const;
700
701 void updateSurfaceRotation(GraphicsPipelineTransitionBits *transition,
702 const SurfaceRotation surfaceRotation);
getSurfaceRotation()703 SurfaceRotation getSurfaceRotation() const
704 {
705 return static_cast<SurfaceRotation>(
706 mDepthStencilStateInfo.depthCompareOpAndSurfaceRotation.surfaceRotation);
707 }
708
709 void updateDrawableSize(GraphicsPipelineTransitionBits *transition,
710 uint32_t width,
711 uint32_t height);
getDrawableSize()712 const PackedExtent &getDrawableSize() const { return mDrawableSize; }
713
714 private:
715 void updateSubpass(GraphicsPipelineTransitionBits *transition, uint32_t subpass);
716
717 VertexInputAttributes mVertexInputAttribs;
718 RenderPassDesc mRenderPassDesc;
719 PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
720 PackedDepthStencilStateInfo mDepthStencilStateInfo;
721 PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
722 PackedExtent mDrawableSize;
723 };
724
725 // Verify the packed pipeline description has no gaps in the packing.
726 // This is not guaranteed by the spec, but is validated by a compile-time check.
727 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
728 // into uninitialized memory regions.
729 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
730 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
731
732 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
733 std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
734 gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
735
736 using DescriptorSetLayoutBindingVector =
737 angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
738
739 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
740 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
741 // usage. In the future we could generalize this.
742 class DescriptorSetLayoutDesc final
743 {
744 public:
745 DescriptorSetLayoutDesc();
746 ~DescriptorSetLayoutDesc();
747 DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
748 DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
749
750 size_t hash() const;
751 bool operator==(const DescriptorSetLayoutDesc &other) const;
752
753 void update(uint32_t bindingIndex,
754 VkDescriptorType type,
755 uint32_t count,
756 VkShaderStageFlags stages,
757 const Sampler *immutableSampler);
758
759 void unpackBindings(DescriptorSetLayoutBindingVector *bindings,
760 std::vector<VkSampler> *immutableSamplers) const;
761
762 private:
763 // There is a small risk of an issue if the sampler cache is evicted but not the descriptor
764 // cache we would have an invalid handle here. Thus propose follow-up work:
765 // TODO: https://issuetracker.google.com/issues/159156775: Have immutable sampler use serial
766 struct PackedDescriptorSetBinding
767 {
768 uint8_t type; // Stores a packed VkDescriptorType descriptorType.
769 uint8_t stages; // Stores a packed VkShaderStageFlags.
770 uint16_t count; // Stores a packed uint32_t descriptorCount.
771 uint32_t pad;
772 VkSampler immutableSampler;
773 };
774
775 // 4x 32bit
776 static_assert(sizeof(PackedDescriptorSetBinding) == 16, "Unexpected size");
777
778 // This is a compact representation of a descriptor set layout.
779 std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
780 mPackedDescriptorSetLayout;
781 };
782
783 // The following are for caching descriptor set layouts. Limited to max four descriptor set layouts.
784 // This can be extended in the future.
785 constexpr size_t kMaxDescriptorSetLayouts = 4;
786
787 struct PackedPushConstantRange
788 {
789 uint32_t offset;
790 uint32_t size;
791 };
792
793 template <typename T>
794 using DescriptorSetArray = angle::PackedEnumMap<DescriptorSetIndex, T>;
795 using DescriptorSetLayoutPointerArray = DescriptorSetArray<BindingPointer<DescriptorSetLayout>>;
796 template <typename T>
797 using PushConstantRangeArray = gl::ShaderMap<T>;
798
799 class PipelineLayoutDesc final
800 {
801 public:
802 PipelineLayoutDesc();
803 ~PipelineLayoutDesc();
804 PipelineLayoutDesc(const PipelineLayoutDesc &other);
805 PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
806
807 size_t hash() const;
808 bool operator==(const PipelineLayoutDesc &other) const;
809
810 void updateDescriptorSetLayout(DescriptorSetIndex setIndex,
811 const DescriptorSetLayoutDesc &desc);
812 void updatePushConstantRange(gl::ShaderType shaderType, uint32_t offset, uint32_t size);
813
814 const PushConstantRangeArray<PackedPushConstantRange> &getPushConstantRanges() const;
815
816 private:
817 DescriptorSetArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
818 PushConstantRangeArray<PackedPushConstantRange> mPushConstantRanges;
819
820 // Verify the arrays are properly packed.
821 static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
822 (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
823 "Unexpected size");
824 static_assert(sizeof(decltype(mPushConstantRanges)) ==
825 (sizeof(PackedPushConstantRange) * angle::EnumSize<gl::ShaderType>()),
826 "Unexpected size");
827 };
828
829 // Verify the structure is properly packed.
830 static_assert(sizeof(PipelineLayoutDesc) == (sizeof(DescriptorSetArray<DescriptorSetLayoutDesc>) +
831 sizeof(gl::ShaderMap<PackedPushConstantRange>)),
832 "Unexpected Size");
833
834 // Packed sampler description for the sampler cache.
835 class SamplerDesc final
836 {
837 public:
838 SamplerDesc();
839 SamplerDesc(ContextVk *contextVk,
840 const gl::SamplerState &samplerState,
841 bool stencilMode,
842 uint64_t externalFormat,
843 angle::FormatID formatID);
844 ~SamplerDesc();
845
846 SamplerDesc(const SamplerDesc &other);
847 SamplerDesc &operator=(const SamplerDesc &rhs);
848
849 void update(ContextVk *contextVk,
850 const gl::SamplerState &samplerState,
851 bool stencilMode,
852 uint64_t externalFormat,
853 angle::FormatID formatID);
854 void reset();
855 angle::Result init(ContextVk *contextVk, Sampler *sampler) const;
856
857 size_t hash() const;
858 bool operator==(const SamplerDesc &other) const;
859
860 private:
861 // 32*4 bits for floating point data.
862 // Note: anisotropy enabled is implicitly determined by maxAnisotropy and caps.
863 float mMipLodBias;
864 float mMaxAnisotropy;
865 float mMinLod;
866 float mMaxLod;
867
868 // If the sampler needs to convert the image content (e.g. from YUV to RGB) then
869 // mExternalOrVkFormat will be non-zero. The value is either the external format
870 // as returned by vkGetAndroidHardwareBufferPropertiesANDROID or a YUV VkFormat.
871 // The format is guaranteed to be unique in that any image with the same mExternalOrVkFormat
872 // can use the same conversion sampler. Thus mExternalOrVkFormat along with mIsExternalFormat
873 // works as a Serial() used elsewhere in ANGLE.
874 uint64_t mExternalOrVkFormat;
875
876 // 16 bits for modes + states.
877 // 1 bit per filter (only 2 possible values in GL: linear/nearest)
878 uint16_t mMagFilter : 1;
879 uint16_t mMinFilter : 1;
880 uint16_t mMipmapMode : 1;
881
882 // 3 bits per address mode (5 possible values)
883 uint16_t mAddressModeU : 3;
884 uint16_t mAddressModeV : 3;
885 uint16_t mAddressModeW : 3;
886
887 // 1 bit for compare enabled (2 possible values)
888 uint16_t mCompareEnabled : 1;
889
890 // 3 bits for compare op. (8 possible values)
891 uint16_t mCompareOp : 3;
892
893 // 1 bit to identify if external format is used
894 uint16_t mIsExternalFormat : 1;
895
896 uint16_t mPadding : 14;
897
898 // Values from angle::ColorGeneric::Type. Float is 0 and others are 1.
899 uint16_t mBorderColorType : 1;
900
901 // 16*8 bits for BorderColor
902 angle::ColorF mBorderColor;
903
904 // 32 bits reserved for future use.
905 uint32_t mReserved;
906 };
907
908 static_assert(sizeof(SamplerDesc) == 48, "Unexpected SamplerDesc size");
909
910 // Disable warnings about struct padding.
911 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
912
913 class PipelineHelper;
914
915 struct GraphicsPipelineTransition
916 {
917 GraphicsPipelineTransition();
918 GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
919 GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
920 const GraphicsPipelineDesc *desc,
921 PipelineHelper *pipeline);
922
923 GraphicsPipelineTransitionBits bits;
924 const GraphicsPipelineDesc *desc;
925 PipelineHelper *target;
926 };
927
928 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
929
930 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
931 const GraphicsPipelineTransition &other) = default;
932
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)933 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
934 GraphicsPipelineTransitionBits bits,
935 const GraphicsPipelineDesc *desc,
936 PipelineHelper *pipeline)
937 : bits(bits), desc(desc), target(pipeline)
938 {}
939
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)940 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
941 GraphicsPipelineTransitionBits bitsB,
942 const GraphicsPipelineDesc &descA,
943 const GraphicsPipelineDesc &descB)
944 {
945 if (bitsA != bitsB)
946 return false;
947
948 // We currently mask over 4 bytes of the pipeline description with each dirty bit.
949 // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
950 // of the code faster. The for loop below would scan over twice as many bits per iteration.
951 // But there may be more collisions between the same dirty bit masks leading to different
952 // transitions. Thus there may be additional cost when applications use many transitions.
953 // We should revisit this in the future and investigate using different bit widths.
954 static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
955
956 const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
957 const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
958
959 for (size_t dirtyBit : bitsA)
960 {
961 if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
962 return false;
963 }
964
965 return true;
966 }
967
968 class PipelineHelper final : angle::NonCopyable
969 {
970 public:
971 PipelineHelper();
972 ~PipelineHelper();
973 inline explicit PipelineHelper(Pipeline &&pipeline);
974
975 void destroy(VkDevice device);
976
updateSerial(Serial serial)977 void updateSerial(Serial serial) { mSerial = serial; }
valid()978 bool valid() const { return mPipeline.valid(); }
getSerial()979 Serial getSerial() const { return mSerial; }
getPipeline()980 Pipeline &getPipeline() { return mPipeline; }
981
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)982 ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
983 const GraphicsPipelineDesc &desc,
984 PipelineHelper **pipelineOut) const
985 {
986 // Search could be improved using sorting or hashing.
987 for (const GraphicsPipelineTransition &transition : mTransitions)
988 {
989 if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
990 {
991 *pipelineOut = transition.target;
992 return true;
993 }
994 }
995
996 return false;
997 }
998
999 void addTransition(GraphicsPipelineTransitionBits bits,
1000 const GraphicsPipelineDesc *desc,
1001 PipelineHelper *pipeline);
1002
1003 private:
1004 std::vector<GraphicsPipelineTransition> mTransitions;
1005 Serial mSerial;
1006 Pipeline mPipeline;
1007 };
1008
PipelineHelper(Pipeline && pipeline)1009 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
1010
1011 struct ImageSubresourceRange
1012 {
1013 // GL max is 1000 (fits in 10 bits).
1014 uint32_t level : 10;
1015 // Max 31 levels (2 ** 5 - 1). Can store levelCount-1 if we need to save another bit.
1016 uint32_t levelCount : 5;
1017 // Implementation max is 2048 (11 bits).
1018 uint32_t layer : 12;
1019 // One of vk::LayerMode values. If 0, it means all layers. Otherwise it's the count of layers
1020 // which is usually 1, except for multiview in which case it can be up to
1021 // gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS.
1022 uint32_t layerMode : 3;
1023 // Values from vk::SrgbDecodeMode. Unused with draw views.
1024 uint32_t srgbDecodeMode : 1;
1025 // For read views: Values from gl::SrgbOverride, either Default or SRGB.
1026 // For draw views: Values from gl::SrgbWriteControlMode.
1027 uint32_t srgbMode : 1;
1028
1029 static_assert(gl::IMPLEMENTATION_MAX_TEXTURE_LEVELS < (1 << 5),
1030 "Not enough bits for level count");
1031 static_assert(gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS <= (1 << 12),
1032 "Not enough bits for layer index");
1033 static_assert(gl::IMPLEMENTATION_ANGLE_MULTIVIEW_MAX_VIEWS <= (1 << 3),
1034 "Not enough bits for layer count");
1035 };
1036
1037 static_assert(sizeof(ImageSubresourceRange) == sizeof(uint32_t), "Size mismatch");
1038
1039 inline bool operator==(const ImageSubresourceRange &a, const ImageSubresourceRange &b)
1040 {
1041 return a.level == b.level && a.levelCount == b.levelCount && a.layer == b.layer &&
1042 a.layerMode == b.layerMode && a.srgbDecodeMode == b.srgbDecodeMode &&
1043 a.srgbMode == b.srgbMode;
1044 }
1045
1046 constexpr ImageSubresourceRange kInvalidImageSubresourceRange = {0, 0, 0, 0, 0, 0};
1047
1048 struct ImageOrBufferViewSubresourceSerial
1049 {
1050 ImageOrBufferViewSerial viewSerial;
1051 ImageSubresourceRange subresource;
1052 };
1053
1054 static_assert(sizeof(ImageOrBufferViewSubresourceSerial) == sizeof(uint64_t), "Size mismatch");
1055
1056 constexpr ImageOrBufferViewSubresourceSerial kInvalidImageOrBufferViewSubresourceSerial = {
1057 kInvalidImageOrBufferViewSerial, kInvalidImageSubresourceRange};
1058
1059 class TextureDescriptorDesc
1060 {
1061 public:
1062 TextureDescriptorDesc();
1063 ~TextureDescriptorDesc();
1064
1065 TextureDescriptorDesc(const TextureDescriptorDesc &other);
1066 TextureDescriptorDesc &operator=(const TextureDescriptorDesc &other);
1067
1068 void update(size_t index,
1069 ImageOrBufferViewSubresourceSerial viewSerial,
1070 SamplerSerial samplerSerial);
1071 size_t hash() const;
1072 void reset();
1073
1074 bool operator==(const TextureDescriptorDesc &other) const;
1075
1076 // Note: this is an exclusive index. If there is one index it will return "1".
getMaxIndex()1077 uint32_t getMaxIndex() const { return mMaxIndex; }
1078
1079 private:
1080 uint32_t mMaxIndex;
1081
1082 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
1083 struct TexUnitSerials
1084 {
1085 ImageOrBufferViewSubresourceSerial view;
1086 SamplerSerial sampler;
1087 };
1088 gl::ActiveTextureArray<TexUnitSerials> mSerials;
1089 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1090 };
1091
1092 class UniformsAndXfbDescriptorDesc
1093 {
1094 public:
1095 UniformsAndXfbDescriptorDesc();
1096 ~UniformsAndXfbDescriptorDesc();
1097
1098 UniformsAndXfbDescriptorDesc(const UniformsAndXfbDescriptorDesc &other);
1099 UniformsAndXfbDescriptorDesc &operator=(const UniformsAndXfbDescriptorDesc &other);
1100
getDefaultUniformBufferSerial()1101 BufferSerial getDefaultUniformBufferSerial() const
1102 {
1103 return mBufferSerials[kDefaultUniformBufferIndex];
1104 }
updateDefaultUniformBuffer(BufferSerial bufferSerial)1105 void updateDefaultUniformBuffer(BufferSerial bufferSerial)
1106 {
1107 mBufferSerials[kDefaultUniformBufferIndex] = bufferSerial;
1108 mBufferCount = std::max(mBufferCount, static_cast<uint32_t>(1));
1109 }
updateTransformFeedbackBuffer(size_t xfbIndex,BufferSerial bufferSerial,VkDeviceSize bufferOffset)1110 void updateTransformFeedbackBuffer(size_t xfbIndex,
1111 BufferSerial bufferSerial,
1112 VkDeviceSize bufferOffset)
1113 {
1114 uint32_t bufferIndex = static_cast<uint32_t>(xfbIndex) + 1;
1115 mBufferSerials[bufferIndex] = bufferSerial;
1116
1117 ASSERT(static_cast<uint64_t>(bufferOffset) <=
1118 static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
1119 mXfbBufferOffsets[xfbIndex] = static_cast<uint32_t>(bufferOffset);
1120
1121 mBufferCount = std::max(mBufferCount, (bufferIndex + 1));
1122 }
1123 size_t hash() const;
1124 void reset();
1125
1126 bool operator==(const UniformsAndXfbDescriptorDesc &other) const;
1127
1128 private:
1129 uint32_t mBufferCount;
1130 // The array index 0 is used for default uniform buffer
1131 static constexpr size_t kDefaultUniformBufferIndex = 0;
1132 static constexpr size_t kDefaultUniformBufferCount = 1;
1133 static constexpr size_t kMaxBufferCount =
1134 kDefaultUniformBufferCount + gl::IMPLEMENTATION_MAX_TRANSFORM_FEEDBACK_BUFFERS;
1135 std::array<BufferSerial, kMaxBufferCount> mBufferSerials;
1136 std::array<uint32_t, gl::IMPLEMENTATION_MAX_TRANSFORM_FEEDBACK_BUFFERS> mXfbBufferOffsets;
1137 };
1138
1139 class ShaderBuffersDescriptorDesc
1140 {
1141 public:
1142 ShaderBuffersDescriptorDesc();
1143 ~ShaderBuffersDescriptorDesc();
1144
1145 ShaderBuffersDescriptorDesc(const ShaderBuffersDescriptorDesc &other);
1146 ShaderBuffersDescriptorDesc &operator=(const ShaderBuffersDescriptorDesc &other);
1147
1148 size_t hash() const;
1149 void reset();
1150
1151 bool operator==(const ShaderBuffersDescriptorDesc &other) const;
1152
appendBufferSerial(BufferSerial bufferSerial)1153 ANGLE_INLINE void appendBufferSerial(BufferSerial bufferSerial)
1154 {
1155 mPayload.push_back(bufferSerial.getValue());
1156 }
append32BitValue(uint32_t value)1157 ANGLE_INLINE void append32BitValue(uint32_t value) { mPayload.push_back(value); }
1158
1159 private:
1160 // After a preliminary minimum size, use heap memory.
1161 static constexpr size_t kFastBufferWordLimit = 32;
1162 angle::FastVector<uint32_t, kFastBufferWordLimit> mPayload;
1163 };
1164
1165 // In the FramebufferDesc object:
1166 // - Depth/stencil serial is at index 0
1167 // - Color serials are at indices [1, gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]
1168 // - Depth/stencil resolve attachment is at index gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+1
1169 // - Resolve attachments are at indices [gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+2,
1170 // gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1]
1171 constexpr size_t kFramebufferDescDepthStencilIndex = 0;
1172 constexpr size_t kFramebufferDescColorIndexOffset = kFramebufferDescDepthStencilIndex + 1;
1173 constexpr size_t kFramebufferDescDepthStencilResolveIndexOffset =
1174 kFramebufferDescColorIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
1175 constexpr size_t kFramebufferDescColorResolveIndexOffset =
1176 kFramebufferDescDepthStencilResolveIndexOffset + 1;
1177
1178 // Enable struct padding warnings for the code below since it is used in caches.
1179 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
1180
1181 class FramebufferDesc
1182 {
1183 public:
1184 FramebufferDesc();
1185 ~FramebufferDesc();
1186
1187 FramebufferDesc(const FramebufferDesc &other);
1188 FramebufferDesc &operator=(const FramebufferDesc &other);
1189
1190 void updateColor(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1191 void updateColorResolve(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1192 void updateUnresolveMask(FramebufferNonResolveAttachmentMask unresolveMask);
1193 void updateDepthStencil(ImageOrBufferViewSubresourceSerial serial);
1194 void updateDepthStencilResolve(ImageOrBufferViewSubresourceSerial serial);
setWriteControlMode(gl::SrgbWriteControlMode mode)1195 ANGLE_INLINE void setWriteControlMode(gl::SrgbWriteControlMode mode)
1196 {
1197 mSrgbWriteControlMode = static_cast<uint16_t>(mode);
1198 }
updateIsMultiview(bool isMultiview)1199 void updateIsMultiview(bool isMultiview) { mIsMultiview = isMultiview; }
1200 size_t hash() const;
1201
1202 bool operator==(const FramebufferDesc &other) const;
1203
1204 uint32_t attachmentCount() const;
1205
getColorImageViewSerial(uint32_t index)1206 ImageOrBufferViewSubresourceSerial getColorImageViewSerial(uint32_t index)
1207 {
1208 ASSERT(kFramebufferDescColorIndexOffset + index < mSerials.size());
1209 return mSerials[kFramebufferDescColorIndexOffset + index];
1210 }
1211
1212 FramebufferNonResolveAttachmentMask getUnresolveAttachmentMask() const;
getWriteControlMode()1213 ANGLE_INLINE gl::SrgbWriteControlMode getWriteControlMode() const
1214 {
1215 return (mSrgbWriteControlMode == 1) ? gl::SrgbWriteControlMode::Linear
1216 : gl::SrgbWriteControlMode::Default;
1217 }
1218
1219 void updateLayerCount(uint32_t layerCount);
getLayerCount()1220 uint32_t getLayerCount() const { return mLayerCount; }
1221 void updateFramebufferFetchMode(bool hasFramebufferFetch);
1222
isMultiview()1223 bool isMultiview() const { return mIsMultiview; }
1224
1225 void updateRenderToTexture(bool isRenderToTexture);
1226
1227 private:
1228 void reset();
1229 void update(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1230
1231 // Note: this is an exclusive index. If there is one index it will be "1".
1232 // Maximum value is 18
1233 uint16_t mMaxIndex : 5;
1234 uint16_t mHasFramebufferFetch : 1;
1235 static_assert(gl::IMPLEMENTATION_MAX_FRAMEBUFFER_LAYERS < (1 << 9) - 1,
1236 "Not enough bits for mLayerCount");
1237
1238 uint16_t mLayerCount : 9;
1239
1240 uint16_t mSrgbWriteControlMode : 1;
1241
1242 // If the render pass contains an initial subpass to unresolve a number of attachments, the
1243 // subpass description is derived from the following mask, specifying which attachments need
1244 // to be unresolved. Includes both color and depth/stencil attachments.
1245 uint16_t mUnresolveAttachmentMask : kMaxFramebufferNonResolveAttachments;
1246
1247 // Whether this is a multisampled-render-to-single-sampled framebuffer. Only used when using
1248 // VK_EXT_multisampled_render_to_single_sampled. Only one bit is used and the rest is padding.
1249 uint16_t mIsRenderToTexture : 15 - kMaxFramebufferNonResolveAttachments;
1250
1251 uint16_t mIsMultiview : 1;
1252
1253 FramebufferAttachmentArray<ImageOrBufferViewSubresourceSerial> mSerials;
1254 };
1255
1256 constexpr size_t kFramebufferDescSize = sizeof(FramebufferDesc);
1257 static_assert(kFramebufferDescSize == 148, "Size check failed");
1258
1259 // Disable warnings about struct padding.
1260 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1261
1262 // The SamplerHelper allows a Sampler to be coupled with a serial.
1263 // Must be included before we declare SamplerCache.
1264 class SamplerHelper final : angle::NonCopyable
1265 {
1266 public:
1267 SamplerHelper(ContextVk *contextVk);
1268 ~SamplerHelper();
1269
1270 explicit SamplerHelper(SamplerHelper &&samplerHelper);
1271 SamplerHelper &operator=(SamplerHelper &&rhs);
1272
valid()1273 bool valid() const { return mSampler.valid(); }
get()1274 const Sampler &get() const { return mSampler; }
get()1275 Sampler &get() { return mSampler; }
getSamplerSerial()1276 SamplerSerial getSamplerSerial() const { return mSamplerSerial; }
1277
1278 private:
1279 Sampler mSampler;
1280 SamplerSerial mSamplerSerial;
1281 };
1282
1283 using RefCountedSampler = RefCounted<SamplerHelper>;
1284 using SamplerBinding = BindingPointer<SamplerHelper>;
1285
1286 class RenderPassHelper final : angle::NonCopyable
1287 {
1288 public:
1289 RenderPassHelper();
1290 ~RenderPassHelper();
1291
1292 RenderPassHelper(RenderPassHelper &&other);
1293 RenderPassHelper &operator=(RenderPassHelper &&other);
1294
1295 void destroy(VkDevice device);
1296
1297 const RenderPass &getRenderPass() const;
1298 RenderPass &getRenderPass();
1299
1300 const RenderPassPerfCounters &getPerfCounters() const;
1301 RenderPassPerfCounters &getPerfCounters();
1302
1303 private:
1304 RenderPass mRenderPass;
1305 RenderPassPerfCounters mPerfCounters;
1306 };
1307 } // namespace vk
1308 } // namespace rx
1309
1310 // Introduce std::hash for the above classes.
1311 namespace std
1312 {
1313 template <>
1314 struct hash<rx::vk::RenderPassDesc>
1315 {
1316 size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
1317 };
1318
1319 template <>
1320 struct hash<rx::vk::AttachmentOpsArray>
1321 {
1322 size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
1323 };
1324
1325 template <>
1326 struct hash<rx::vk::GraphicsPipelineDesc>
1327 {
1328 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const { return key.hash(); }
1329 };
1330
1331 template <>
1332 struct hash<rx::vk::DescriptorSetLayoutDesc>
1333 {
1334 size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
1335 };
1336
1337 template <>
1338 struct hash<rx::vk::PipelineLayoutDesc>
1339 {
1340 size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
1341 };
1342
1343 template <>
1344 struct hash<rx::vk::ImageSubresourceRange>
1345 {
1346 size_t operator()(const rx::vk::ImageSubresourceRange &key) const
1347 {
1348 return *reinterpret_cast<const uint32_t *>(&key);
1349 }
1350 };
1351
1352 template <>
1353 struct hash<rx::vk::TextureDescriptorDesc>
1354 {
1355 size_t operator()(const rx::vk::TextureDescriptorDesc &key) const { return key.hash(); }
1356 };
1357
1358 template <>
1359 struct hash<rx::vk::UniformsAndXfbDescriptorDesc>
1360 {
1361 size_t operator()(const rx::vk::UniformsAndXfbDescriptorDesc &key) const { return key.hash(); }
1362 };
1363
1364 template <>
1365 struct hash<rx::vk::ShaderBuffersDescriptorDesc>
1366 {
1367 size_t operator()(const rx::vk::ShaderBuffersDescriptorDesc &key) const { return key.hash(); }
1368 };
1369
1370 template <>
1371 struct hash<rx::vk::FramebufferDesc>
1372 {
1373 size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
1374 };
1375
1376 template <>
1377 struct hash<rx::vk::SamplerDesc>
1378 {
1379 size_t operator()(const rx::vk::SamplerDesc &key) const { return key.hash(); }
1380 };
1381
1382 // See Resource Serial types defined in vk_utils.h.
1383 #define ANGLE_HASH_VK_SERIAL(Type) \
1384 template <> \
1385 struct hash<rx::vk::Type##Serial> \
1386 { \
1387 size_t operator()(const rx::vk::Type##Serial &key) const { return key.getValue(); } \
1388 };
1389
1390 ANGLE_VK_SERIAL_OP(ANGLE_HASH_VK_SERIAL)
1391
1392 } // namespace std
1393
1394 namespace rx
1395 {
1396 // Cache types for various Vulkan objects
1397 enum class VulkanCacheType
1398 {
1399 CompatibleRenderPass,
1400 RenderPassWithOps,
1401 GraphicsPipeline,
1402 PipelineLayout,
1403 Sampler,
1404 SamplerYcbcrConversion,
1405 DescriptorSetLayout,
1406 DriverUniformsDescriptors,
1407 TextureDescriptors,
1408 UniformsAndXfbDescriptors,
1409 ShaderBuffersDescriptors,
1410 Framebuffer,
1411 EnumCount
1412 };
1413
1414 // Base class for all caches. Provides cache hit and miss counters.
1415 class CacheStats final : angle::NonCopyable
1416 {
1417 public:
1418 CacheStats() { reset(); }
1419 ~CacheStats() {}
1420
1421 ANGLE_INLINE void hit() { mHitCount++; }
1422 ANGLE_INLINE void miss() { mMissCount++; }
1423 ANGLE_INLINE void accumulate(const CacheStats &stats)
1424 {
1425 mHitCount += stats.mHitCount;
1426 mMissCount += stats.mMissCount;
1427 }
1428
1429 uint64_t getHitCount() const { return mHitCount; }
1430 uint64_t getMissCount() const { return mMissCount; }
1431
1432 ANGLE_INLINE double getHitRatio() const
1433 {
1434 if (mHitCount + mMissCount == 0)
1435 {
1436 return 0;
1437 }
1438 else
1439 {
1440 return static_cast<double>(mHitCount) / (mHitCount + mMissCount);
1441 }
1442 }
1443
1444 void reset()
1445 {
1446 mHitCount = 0;
1447 mMissCount = 0;
1448 }
1449
1450 private:
1451 uint64_t mHitCount;
1452 uint64_t mMissCount;
1453 };
1454
1455 template <VulkanCacheType CacheType>
1456 class HasCacheStats : angle::NonCopyable
1457 {
1458 public:
1459 template <typename Accumulator>
1460 void accumulateCacheStats(Accumulator *accum)
1461 {
1462 accum->accumulateCacheStats(CacheType, mCacheStats);
1463 mCacheStats.reset();
1464 }
1465
1466 protected:
1467 HasCacheStats() = default;
1468 virtual ~HasCacheStats() = default;
1469
1470 CacheStats mCacheStats;
1471 };
1472
1473 // TODO(jmadill): Add cache trimming/eviction.
1474 class RenderPassCache final : angle::NonCopyable
1475 {
1476 public:
1477 RenderPassCache();
1478 ~RenderPassCache();
1479
1480 void destroy(RendererVk *rendererVk);
1481
1482 ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
1483 const vk::RenderPassDesc &desc,
1484 vk::RenderPass **renderPassOut)
1485 {
1486 auto outerIt = mPayload.find(desc);
1487 if (outerIt != mPayload.end())
1488 {
1489 InnerCache &innerCache = outerIt->second;
1490 ASSERT(!innerCache.empty());
1491
1492 // Find the first element and return it.
1493 *renderPassOut = &innerCache.begin()->second.getRenderPass();
1494 mCompatibleRenderPassCacheStats.hit();
1495 return angle::Result::Continue;
1496 }
1497
1498 mCompatibleRenderPassCacheStats.miss();
1499 return addRenderPass(contextVk, desc, renderPassOut);
1500 }
1501
1502 angle::Result getRenderPassWithOps(ContextVk *contextVk,
1503 const vk::RenderPassDesc &desc,
1504 const vk::AttachmentOpsArray &attachmentOps,
1505 vk::RenderPass **renderPassOut);
1506
1507 private:
1508 angle::Result getRenderPassWithOpsImpl(ContextVk *contextVk,
1509 const vk::RenderPassDesc &desc,
1510 const vk::AttachmentOpsArray &attachmentOps,
1511 bool updatePerfCounters,
1512 vk::RenderPass **renderPassOut);
1513
1514 angle::Result addRenderPass(ContextVk *contextVk,
1515 const vk::RenderPassDesc &desc,
1516 vk::RenderPass **renderPassOut);
1517
1518 // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
1519 // The second layer caches the attachment load/store ops and initial/final layout.
1520 using InnerCache = angle::HashMap<vk::AttachmentOpsArray, vk::RenderPassHelper>;
1521 using OuterCache = angle::HashMap<vk::RenderPassDesc, InnerCache>;
1522
1523 OuterCache mPayload;
1524 CacheStats mCompatibleRenderPassCacheStats;
1525 CacheStats mRenderPassWithOpsCacheStats;
1526 };
1527
1528 // TODO(jmadill): Add cache trimming/eviction.
1529 class GraphicsPipelineCache final : public HasCacheStats<VulkanCacheType::GraphicsPipeline>
1530 {
1531 public:
1532 GraphicsPipelineCache();
1533 ~GraphicsPipelineCache() override;
1534
1535 void destroy(RendererVk *rendererVk);
1536 void release(ContextVk *context);
1537
1538 void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
1539
1540 ANGLE_INLINE angle::Result getPipeline(ContextVk *contextVk,
1541 const vk::PipelineCache &pipelineCacheVk,
1542 const vk::RenderPass &compatibleRenderPass,
1543 const vk::PipelineLayout &pipelineLayout,
1544 const gl::AttributesMask &activeAttribLocationsMask,
1545 const gl::ComponentTypeMask &programAttribsTypeMask,
1546 const vk::ShaderModule *vertexModule,
1547 const vk::ShaderModule *fragmentModule,
1548 const vk::ShaderModule *geometryModule,
1549 const vk::ShaderModule *tessControlModule,
1550 const vk::ShaderModule *tessEvaluationModule,
1551 const vk::SpecializationConstants &specConsts,
1552 const vk::GraphicsPipelineDesc &desc,
1553 const vk::GraphicsPipelineDesc **descPtrOut,
1554 vk::PipelineHelper **pipelineOut)
1555 {
1556 auto item = mPayload.find(desc);
1557 if (item != mPayload.end())
1558 {
1559 *descPtrOut = &item->first;
1560 *pipelineOut = &item->second;
1561 mCacheStats.hit();
1562 return angle::Result::Continue;
1563 }
1564
1565 mCacheStats.miss();
1566 return insertPipeline(contextVk, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
1567 activeAttribLocationsMask, programAttribsTypeMask, vertexModule,
1568 fragmentModule, geometryModule, tessControlModule,
1569 tessEvaluationModule, specConsts, desc, descPtrOut, pipelineOut);
1570 }
1571
1572 private:
1573 angle::Result insertPipeline(ContextVk *contextVk,
1574 const vk::PipelineCache &pipelineCacheVk,
1575 const vk::RenderPass &compatibleRenderPass,
1576 const vk::PipelineLayout &pipelineLayout,
1577 const gl::AttributesMask &activeAttribLocationsMask,
1578 const gl::ComponentTypeMask &programAttribsTypeMask,
1579 const vk::ShaderModule *vertexModule,
1580 const vk::ShaderModule *fragmentModule,
1581 const vk::ShaderModule *geometryModule,
1582 const vk::ShaderModule *tessControlModule,
1583 const vk::ShaderModule *tessEvaluationModule,
1584 const vk::SpecializationConstants &specConsts,
1585 const vk::GraphicsPipelineDesc &desc,
1586 const vk::GraphicsPipelineDesc **descPtrOut,
1587 vk::PipelineHelper **pipelineOut);
1588
1589 std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
1590 };
1591
1592 class DescriptorSetLayoutCache final : angle::NonCopyable
1593 {
1594 public:
1595 DescriptorSetLayoutCache();
1596 ~DescriptorSetLayoutCache();
1597
1598 void destroy(RendererVk *rendererVk);
1599
1600 angle::Result getDescriptorSetLayout(
1601 vk::Context *context,
1602 const vk::DescriptorSetLayoutDesc &desc,
1603 vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
1604
1605 private:
1606 std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
1607 CacheStats mCacheStats;
1608 };
1609
1610 class PipelineLayoutCache final : public HasCacheStats<VulkanCacheType::PipelineLayout>
1611 {
1612 public:
1613 PipelineLayoutCache();
1614 ~PipelineLayoutCache() override;
1615
1616 void destroy(RendererVk *rendererVk);
1617
1618 angle::Result getPipelineLayout(vk::Context *context,
1619 const vk::PipelineLayoutDesc &desc,
1620 const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
1621 vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
1622
1623 private:
1624 std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
1625 };
1626
1627 class SamplerCache final : public HasCacheStats<VulkanCacheType::Sampler>
1628 {
1629 public:
1630 SamplerCache();
1631 ~SamplerCache() override;
1632
1633 void destroy(RendererVk *rendererVk);
1634
1635 angle::Result getSampler(ContextVk *contextVk,
1636 const vk::SamplerDesc &desc,
1637 vk::SamplerBinding *samplerOut);
1638
1639 private:
1640 std::unordered_map<vk::SamplerDesc, vk::RefCountedSampler> mPayload;
1641 };
1642
1643 // YuvConversion Cache
1644 class SamplerYcbcrConversionCache final
1645 : public HasCacheStats<VulkanCacheType::SamplerYcbcrConversion>
1646 {
1647 public:
1648 SamplerYcbcrConversionCache();
1649 ~SamplerYcbcrConversionCache() override;
1650
1651 void destroy(RendererVk *rendererVk);
1652
1653 angle::Result getYuvConversion(
1654 vk::Context *context,
1655 uint64_t externalOrVkFormat,
1656 bool isExternalFormat,
1657 const VkSamplerYcbcrConversionCreateInfo &yuvConversionCreateInfo,
1658 vk::BindingPointer<vk::SamplerYcbcrConversion> *yuvConversionOut);
1659 VkSamplerYcbcrConversion getSamplerYcbcrConversion(uint64_t externalOrVkFormat,
1660 bool isExternalFormat) const;
1661
1662 private:
1663 template <typename T>
1664 using SamplerYcbcrConversionMap = std::unordered_map<T, vk::RefCountedSamplerYcbcrConversion>;
1665
1666 template <typename T>
1667 angle::Result getYuvConversionImpl(
1668 vk::Context *context,
1669 T format,
1670 SamplerYcbcrConversionMap<T> *payload,
1671 const VkSamplerYcbcrConversionCreateInfo &yuvConversionCreateInfo,
1672 vk::BindingPointer<vk::SamplerYcbcrConversion> *yuvConversionOut);
1673
1674 template <typename T>
1675 VkSamplerYcbcrConversion getSamplerYcbcrConversionImpl(
1676 T format,
1677 const SamplerYcbcrConversionMap<T> &payload) const;
1678
1679 SamplerYcbcrConversionMap<uint64_t> mExternalFormatPayload;
1680 SamplerYcbcrConversionMap<VkFormat> mVkFormatPayload;
1681 };
1682
1683 // DescriptorSet Cache
1684 class DriverUniformsDescriptorSetCache final
1685 : public HasCacheStats<VulkanCacheType::DriverUniformsDescriptors>
1686 {
1687 public:
1688 DriverUniformsDescriptorSetCache() = default;
1689 ~DriverUniformsDescriptorSetCache() override { ASSERT(mPayload.empty()); }
1690
1691 void destroy(RendererVk *rendererVk);
1692
1693 ANGLE_INLINE bool get(uint32_t serial, VkDescriptorSet *descriptorSet)
1694 {
1695 if (mPayload.get(serial, descriptorSet))
1696 {
1697 mCacheStats.hit();
1698 return true;
1699 }
1700 mCacheStats.miss();
1701 return false;
1702 }
1703
1704 ANGLE_INLINE void insert(uint32_t serial, VkDescriptorSet descriptorSet)
1705 {
1706 mPayload.insert(serial, descriptorSet);
1707 }
1708
1709 ANGLE_INLINE void clear() { mPayload.clear(); }
1710
1711 private:
1712 angle::FastIntegerMap<VkDescriptorSet> mPayload;
1713 };
1714
1715 // Templated Descriptors Cache
1716 template <typename Key, VulkanCacheType CacheType>
1717 class DescriptorSetCache final : public HasCacheStats<CacheType>
1718 {
1719 public:
1720 DescriptorSetCache() = default;
1721 ~DescriptorSetCache() override { ASSERT(mPayload.empty()); }
1722
1723 void destroy(RendererVk *rendererVk);
1724
1725 ANGLE_INLINE bool get(const Key &desc, VkDescriptorSet *descriptorSet)
1726 {
1727 auto iter = mPayload.find(desc);
1728 if (iter != mPayload.end())
1729 {
1730 *descriptorSet = iter->second;
1731 this->mCacheStats.hit();
1732 return true;
1733 }
1734 this->mCacheStats.miss();
1735 return false;
1736 }
1737
1738 ANGLE_INLINE void insert(const Key &desc, VkDescriptorSet descriptorSet)
1739 {
1740 mPayload.emplace(desc, descriptorSet);
1741 }
1742
1743 private:
1744 angle::HashMap<Key, VkDescriptorSet> mPayload;
1745 };
1746
1747 // Only 1 driver uniform binding is used.
1748 constexpr uint32_t kReservedDriverUniformBindingCount = 1;
1749 // There is 1 default uniform binding used per stage. Currently, a maxium of three stages are
1750 // supported.
1751 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
1752 constexpr uint32_t kReservedDefaultUniformBindingCount = 3;
1753 } // namespace rx
1754
1755 #endif // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
1756