1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 // Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 // Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "libANGLE/renderer/vulkan/ResourceVk.h"
17 #include "libANGLE/renderer/vulkan/vk_utils.h"
18
19 namespace rx
20 {
21
22 // Some descriptor set and pipeline layout constants.
23 //
24 // The set/binding assignment is done as following:
25 //
26 // - Set 0 contains the ANGLE driver uniforms at binding 0. Note that driver uniforms are updated
27 // only under rare circumstances, such as viewport or depth range change. However, there is only
28 // one binding in this set. This set is placed before Set 1 containing transform feedback
29 // buffers, so that switching between xfb and non-xfb programs doesn't require rebinding this set.
30 // Otherwise, as the layout of Set 1 changes (due to addition and removal of xfb buffers), and all
31 // subsequent sets need to be rebound (due to Vulkan pipeline layout validation rules), we would
32 // have needed to invalidateGraphicsDriverUniforms().
33 // - Set 1 contains uniform blocks created to encompass default uniforms. 1 binding is used per
34 // pipeline stage. Additionally, transform feedback buffers are bound from binding 2 and up.
35 // - Set 2 contains all textures (including texture buffers).
36 // - Set 3 contains all other shader resources, such as uniform and storage blocks, atomic counter
37 // buffers, images and image buffers.
38
39 enum class DescriptorSetIndex : uint32_t
40 {
41 Internal, // ANGLE driver uniforms or internal shaders
42 UniformsAndXfb, // Uniforms set index
43 Texture, // Textures set index
44 ShaderResource, // Other shader resources set index
45
46 InvalidEnum,
47 EnumCount = InvalidEnum,
48 };
49
50 namespace vk
51 {
52 class DynamicDescriptorPool;
53 class ImageHelper;
54 enum class ImageLayout;
55
56 using RefCountedDescriptorSetLayout = RefCounted<DescriptorSetLayout>;
57 using RefCountedPipelineLayout = RefCounted<PipelineLayout>;
58 using RefCountedSamplerYcbcrConversion = RefCounted<SamplerYcbcrConversion>;
59
60 // Helper macro that casts to a bitfield type then verifies no bits were dropped.
61 #define SetBitField(lhs, rhs) \
62 do \
63 { \
64 auto ANGLE_LOCAL_VAR = rhs; \
65 lhs = static_cast<typename std::decay<decltype(lhs)>::type>(ANGLE_LOCAL_VAR); \
66 ASSERT(static_cast<decltype(ANGLE_LOCAL_VAR)>(lhs) == ANGLE_LOCAL_VAR); \
67 } while (0)
68
69 // Packed Vk resource descriptions.
70 // Most Vk types use many more bits than required to represent the underlying data.
71 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
72 // hashing (and also needs to check equality) we can optimize these operations by
73 // using fewer bits. Hence the packed types.
74 //
75 // One implementation note: these types could potentially be improved by using even
76 // fewer bits. For example, boolean values could be represented by a single bit instead
77 // of a uint8_t. However at the current time there are concerns about the portability
78 // of bitfield operators, and complexity issues with using bit mask operations. This is
79 // something we will likely want to investigate as the Vulkan implementation progresses.
80 //
81 // Second implementation note: the struct packing is also a bit fragile, and some of the
82 // packing requirements depend on using alignas and field ordering to get the result of
83 // packing nicely into the desired space. This is something we could also potentially fix
84 // with a redesign to use bitfields or bit mask operations.
85
86 // Enable struct padding warnings for the code below since it is used in caches.
87 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
88
89 enum ResourceAccess
90 {
91 Unused,
92 ReadOnly,
93 Write,
94 };
95
UpdateAccess(ResourceAccess * oldAccess,ResourceAccess newAccess)96 inline void UpdateAccess(ResourceAccess *oldAccess, ResourceAccess newAccess)
97 {
98 if (newAccess > *oldAccess)
99 {
100 *oldAccess = newAccess;
101 }
102 }
103
104 enum class RenderPassLoadOp
105 {
106 Load = VK_ATTACHMENT_LOAD_OP_LOAD,
107 Clear = VK_ATTACHMENT_LOAD_OP_CLEAR,
108 DontCare = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
109 None,
110 };
111 enum class RenderPassStoreOp
112 {
113 Store = VK_ATTACHMENT_STORE_OP_STORE,
114 DontCare = VK_ATTACHMENT_STORE_OP_DONT_CARE,
115 None,
116 };
117
118 // There can be a maximum of IMPLEMENTATION_MAX_DRAW_BUFFERS color and resolve attachments, plus one
119 // depth/stencil attachment and one depth/stencil resolve attachment.
120 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS * 2 + 2;
121 template <typename T>
122 using FramebufferAttachmentArray = std::array<T, kMaxFramebufferAttachments>;
123 template <typename T>
124 using FramebufferAttachmentsVector = angle::FixedVector<T, kMaxFramebufferAttachments>;
125 using FramebufferAttachmentMask = angle::BitSet<kMaxFramebufferAttachments>;
126
127 constexpr size_t kMaxFramebufferNonResolveAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1;
128 template <typename T>
129 using FramebufferNonResolveAttachmentArray = std::array<T, kMaxFramebufferNonResolveAttachments>;
130 using FramebufferNonResolveAttachmentMask = angle::BitSet16<kMaxFramebufferNonResolveAttachments>;
131
132 class alignas(4) RenderPassDesc final
133 {
134 public:
135 RenderPassDesc();
136 ~RenderPassDesc();
137 RenderPassDesc(const RenderPassDesc &other);
138 RenderPassDesc &operator=(const RenderPassDesc &other);
139
140 // Set format for an enabled GL color attachment.
141 void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
142 // Mark a GL color attachment index as disabled.
143 void packColorAttachmentGap(size_t colorIndexGL);
144 // The caller must pack the depth/stencil attachment last, which is packed right after the color
145 // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
146 void packDepthStencilAttachment(angle::FormatID angleFormatID);
147 void updateDepthStencilAccess(ResourceAccess access);
148 // Indicate that a color attachment should have a corresponding resolve attachment.
149 void packColorResolveAttachment(size_t colorIndexGL);
150 // Remove the resolve attachment. Used when optimizing blit through resolve attachment to
151 // temporarily pack a resolve attachment and then remove it.
152 void removeColorResolveAttachment(size_t colorIndexGL);
153 // Indicate that a color attachment should take its data from the resolve attachment initially.
154 void packColorUnresolveAttachment(size_t colorIndexGL);
155 void removeColorUnresolveAttachment(size_t colorIndexGL);
156 // Indicate that a depth/stencil attachment should have a corresponding resolve attachment.
157 void packDepthStencilResolveAttachment();
158 // Indicate that a depth/stencil attachment should take its data from the resolve attachment
159 // initially.
160 void packDepthStencilUnresolveAttachment(bool unresolveDepth, bool unresolveStencil);
161 void removeDepthStencilUnresolveAttachment();
162
163 void setWriteControlMode(gl::SrgbWriteControlMode mode);
164
165 size_t hash() const;
166
167 // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()168 size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()169 size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
170
171 bool isColorAttachmentEnabled(size_t colorIndexGL) const;
172 bool hasDepthStencilAttachment() const;
hasColorResolveAttachment(size_t colorIndexGL)173 bool hasColorResolveAttachment(size_t colorIndexGL) const
174 {
175 return mColorResolveAttachmentMask.test(colorIndexGL);
176 }
getColorUnresolveAttachmentMask()177 gl::DrawBufferMask getColorUnresolveAttachmentMask() const
178 {
179 return mColorUnresolveAttachmentMask;
180 }
hasColorUnresolveAttachment(size_t colorIndexGL)181 bool hasColorUnresolveAttachment(size_t colorIndexGL) const
182 {
183 return mColorUnresolveAttachmentMask.test(colorIndexGL);
184 }
hasDepthStencilResolveAttachment()185 bool hasDepthStencilResolveAttachment() const { return mResolveDepthStencil; }
hasDepthStencilUnresolveAttachment()186 bool hasDepthStencilUnresolveAttachment() const { return mUnresolveDepth || mUnresolveStencil; }
hasDepthUnresolveAttachment()187 bool hasDepthUnresolveAttachment() const { return mUnresolveDepth; }
hasStencilUnresolveAttachment()188 bool hasStencilUnresolveAttachment() const { return mUnresolveStencil; }
getSRGBWriteControlMode()189 gl::SrgbWriteControlMode getSRGBWriteControlMode() const
190 {
191 return static_cast<gl::SrgbWriteControlMode>(mSrgbWriteControl);
192 }
193
194 // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
195 // color attachments.
196 size_t attachmentCount() const;
197
setSamples(GLint samples)198 void setSamples(GLint samples) { mSamples = static_cast<uint8_t>(samples); }
samples()199 uint8_t samples() const { return mSamples; }
200
setViewCount(GLsizei viewCount)201 void setViewCount(GLsizei viewCount) { mViewCount = static_cast<uint8_t>(viewCount); }
viewCount()202 uint8_t viewCount() const { return mViewCount; }
203
setFramebufferFetchMode(bool hasFramebufferFetch)204 void setFramebufferFetchMode(bool hasFramebufferFetch)
205 {
206 mHasFramebufferFetch = hasFramebufferFetch;
207 }
getFramebufferFetchMode()208 bool getFramebufferFetchMode() const { return mHasFramebufferFetch; }
209
updateRenderToTexture(bool isRenderToTexture)210 void updateRenderToTexture(bool isRenderToTexture) { mIsRenderToTexture = isRenderToTexture; }
isRenderToTexture()211 bool isRenderToTexture() const { return mIsRenderToTexture; }
212
213 angle::FormatID operator[](size_t index) const
214 {
215 ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
216 return static_cast<angle::FormatID>(mAttachmentFormats[index]);
217 }
218
219 private:
220 uint8_t mSamples;
221 uint8_t mColorAttachmentRange;
222
223 // Multivew
224 uint8_t mViewCount;
225
226 // sRGB
227 uint8_t mSrgbWriteControl : 1;
228
229 // Framebuffer fetch
230 uint8_t mHasFramebufferFetch : 1;
231
232 // Multisampled render to texture
233 uint8_t mIsRenderToTexture : 1;
234 uint8_t mResolveDepthStencil : 1;
235 uint8_t mUnresolveDepth : 1;
236 uint8_t mUnresolveStencil : 1;
237
238 // Available space for expansion.
239 uint8_t mPadding1 : 2;
240 uint8_t mPadding2;
241
242 // Whether each color attachment has a corresponding resolve attachment. Color resolve
243 // attachments can be used to optimize resolve through glBlitFramebuffer() as well as support
244 // GL_EXT_multisampled_render_to_texture and GL_EXT_multisampled_render_to_texture2.
245 gl::DrawBufferMask mColorResolveAttachmentMask;
246
247 // Whether each color attachment with a corresponding resolve attachment should be initialized
248 // with said resolve attachment in an initial subpass. This is an optimization to avoid
249 // loadOp=LOAD on the implicit multisampled image used with multisampled-render-to-texture
250 // render targets. This operation is referred to as "unresolve".
251 //
252 // Unused when VK_EXT_multisampled_render_to_single_sampled is available.
253 gl::DrawBufferMask mColorUnresolveAttachmentMask;
254
255 // Color attachment formats are stored with their GL attachment indices. The depth/stencil
256 // attachment formats follow the last enabled color attachment. When creating a render pass,
257 // the disabled attachments are removed and the resulting attachments are packed.
258 //
259 // The attachment indices provided as input to various functions in this file are thus GL
260 // attachment indices. These indices are marked as such, e.g. colorIndexGL. The render pass
261 // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
262 // indices marked with Vk, e.g. colorIndexVk. The subpass attachment references create the
263 // link between the two index spaces. The subpass declares attachment references with GL
264 // indices (which corresponds to the location decoration of shader outputs). The attachment
265 // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
266 //
267 // For example, if GL uses color attachments 0 and 3, then there are two render pass
268 // attachments (indexed 0 and 1) and 4 subpass attachments:
269 //
270 // - Subpass attachment 0 -> Renderpass attachment 0
271 // - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
272 // - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
273 // - Subpass attachment 3 -> Renderpass attachment 1
274 //
275 // The resolve attachments are packed after the non-resolve attachments. They use the same
276 // formats, so they are not specified in this array.
277 FramebufferNonResolveAttachmentArray<uint8_t> mAttachmentFormats;
278 };
279
280 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
281
282 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
283 static_assert(kRenderPassDescSize == 16, "Size check failed");
284
285 struct PackedAttachmentOpsDesc final
286 {
287 // RenderPassLoadOp is in range [0, 3], and RenderPassStoreOp is in range [0, 2].
288 uint16_t loadOp : 2;
289 uint16_t storeOp : 2;
290 uint16_t stencilLoadOp : 2;
291 uint16_t stencilStoreOp : 2;
292 // If a corresponding resolve attachment exists, storeOp may already be DONT_CARE, and it's
293 // unclear whether the attachment was invalidated or not. This information is passed along here
294 // so that the resolve attachment's storeOp can be set to DONT_CARE if the attachment is
295 // invalidated, and if possible removed from the list of resolve attachments altogether. Note
296 // that the latter may not be possible if the render pass has multiple subpasses due to Vulkan
297 // render pass compatibility rules.
298 uint16_t isInvalidated : 1;
299 uint16_t isStencilInvalidated : 1;
300 uint16_t padding1 : 6;
301
302 // 4-bits to force pad the structure to exactly 2 bytes. Note that we currently don't support
303 // any of the extension layouts, whose values start at 1'000'000'000.
304 uint16_t initialLayout : 4;
305 uint16_t finalLayout : 4;
306 uint16_t padding2 : 8;
307 };
308
309 static_assert(sizeof(PackedAttachmentOpsDesc) == 4, "Size check failed");
310
311 class PackedAttachmentIndex;
312
313 class AttachmentOpsArray final
314 {
315 public:
316 AttachmentOpsArray();
317 ~AttachmentOpsArray();
318 AttachmentOpsArray(const AttachmentOpsArray &other);
319 AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
320
321 const PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index) const;
322 PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index);
323
324 // Initialize an attachment op with all load and store operations.
325 void initWithLoadStore(PackedAttachmentIndex index,
326 ImageLayout initialLayout,
327 ImageLayout finalLayout);
328
329 void setLayouts(PackedAttachmentIndex index,
330 ImageLayout initialLayout,
331 ImageLayout finalLayout);
332 void setOps(PackedAttachmentIndex index, RenderPassLoadOp loadOp, RenderPassStoreOp storeOp);
333 void setStencilOps(PackedAttachmentIndex index,
334 RenderPassLoadOp loadOp,
335 RenderPassStoreOp storeOp);
336
337 void setClearOp(PackedAttachmentIndex index);
338 void setClearStencilOp(PackedAttachmentIndex index);
339
340 size_t hash() const;
341
342 private:
343 gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
344 };
345
346 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
347
348 static_assert(sizeof(AttachmentOpsArray) == 40, "Size check failed");
349
350 struct PackedAttribDesc final
351 {
352 uint8_t format;
353 uint8_t divisor;
354
355 // Desktop drivers support
356 uint16_t offset : kAttributeOffsetMaxBits;
357
358 uint16_t compressed : 1;
359
360 // Although technically stride can be any value in ES 2.0, in practice supporting stride
361 // greater than MAX_USHORT should not be that helpful. Note that stride limits are
362 // introduced in ES 3.1.
363 uint16_t stride;
364 };
365
366 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
367 static_assert(kPackedAttribDescSize == 6, "Size mismatch");
368
369 struct VertexInputAttributes final
370 {
371 PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
372 };
373
374 constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
375 static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
376
377 struct RasterizationStateBits final
378 {
379 // Note: Currently only 2 subpasses possible, so there are 5 bits in subpass that can be
380 // repurposed.
381 uint32_t subpass : 6;
382 uint32_t depthClampEnable : 1;
383 uint32_t rasterizationDiscardEnable : 1;
384 uint32_t polygonMode : 4;
385 uint32_t cullMode : 4;
386 uint32_t frontFace : 4;
387 uint32_t depthBiasEnable : 1;
388 uint32_t sampleShadingEnable : 1;
389 uint32_t alphaToCoverageEnable : 1;
390 uint32_t alphaToOneEnable : 1;
391 uint32_t rasterizationSamples : 8;
392 };
393
394 constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
395 static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
396
397 struct PackedRasterizationAndMultisampleStateInfo final
398 {
399 RasterizationStateBits bits;
400 // Padded to ensure there's no gaps in this structure or those that use it.
401 float minSampleShading;
402 uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
403 // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
404 float depthBiasClamp;
405 float depthBiasConstantFactor;
406 float depthBiasSlopeFactor;
407 float lineWidth;
408 };
409
410 constexpr size_t kPackedRasterizationAndMultisampleStateSize =
411 sizeof(PackedRasterizationAndMultisampleStateInfo);
412 static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
413
414 struct StencilOps final
415 {
416 uint8_t fail : 4;
417 uint8_t pass : 4;
418 uint8_t depthFail : 4;
419 uint8_t compare : 4;
420 };
421
422 constexpr size_t kStencilOpsSize = sizeof(StencilOps);
423 static_assert(kStencilOpsSize == 2, "Size check failed");
424
425 struct PackedStencilOpState final
426 {
427 StencilOps ops;
428 uint8_t compareMask;
429 uint8_t writeMask;
430 };
431
432 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
433 static_assert(kPackedStencilOpSize == 4, "Size check failed");
434
435 struct DepthStencilEnableFlags final
436 {
437 uint8_t viewportNegativeOneToOne : 1;
438
439 uint8_t depthTest : 1;
440 uint8_t depthWrite : 2; // these only need one bit each. the extra is used as padding.
441 uint8_t depthBoundsTest : 2;
442 uint8_t stencilTest : 2;
443 };
444
445 constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
446 static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
447
448 // We are borrowing three bits here for surface rotation, even though it has nothing to do with
449 // depth stencil.
450 struct DepthCompareOpAndSurfaceRotation final
451 {
452 uint8_t depthCompareOp : 4;
453 uint8_t surfaceRotation : 3;
454 uint8_t padding : 1;
455 };
456 constexpr size_t kDepthCompareOpAndSurfaceRotationSize = sizeof(DepthCompareOpAndSurfaceRotation);
457 static_assert(kDepthCompareOpAndSurfaceRotationSize == 1, "Size check failed");
458
459 struct PackedDepthStencilStateInfo final
460 {
461 DepthStencilEnableFlags enable;
462 uint8_t frontStencilReference;
463 uint8_t backStencilReference;
464 DepthCompareOpAndSurfaceRotation depthCompareOpAndSurfaceRotation;
465
466 float minDepthBounds;
467 float maxDepthBounds;
468 PackedStencilOpState front;
469 PackedStencilOpState back;
470 };
471
472 constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
473 static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
474 static_assert(static_cast<int>(SurfaceRotation::EnumCount) <= 8, "Size check failed");
475
476 struct LogicOpState final
477 {
478 uint8_t opEnable : 1;
479 uint8_t op : 7;
480 };
481
482 constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
483 static_assert(kLogicOpStateSize == 1, "Size check failed");
484
485 struct PackedColorBlendAttachmentState final
486 {
487 uint16_t srcColorBlendFactor : 5;
488 uint16_t dstColorBlendFactor : 5;
489 uint16_t colorBlendOp : 6;
490 uint16_t srcAlphaBlendFactor : 5;
491 uint16_t dstAlphaBlendFactor : 5;
492 uint16_t alphaBlendOp : 6;
493 };
494
495 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
496 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
497
498 struct PrimitiveState final
499 {
500 uint16_t topology : 9;
501 uint16_t patchVertices : 6;
502 uint16_t restartEnable : 1;
503 };
504
505 constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
506 static_assert(kPrimitiveStateSize == 2, "Size check failed");
507
508 struct PackedInputAssemblyAndColorBlendStateInfo final
509 {
510 uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
511 PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
512 float blendConstants[4];
513 LogicOpState logic;
514 uint8_t blendEnableMask;
515 PrimitiveState primitive;
516 };
517
518 struct PackedExtent final
519 {
520 uint16_t width;
521 uint16_t height;
522 };
523
524 struct PackedDither final
525 {
526 static_assert(gl::IMPLEMENTATION_MAX_DRAW_BUFFERS <= 8,
527 "2 bits per draw buffer is needed for dither emulation");
528 uint16_t emulatedDitherControl;
529 uint16_t unused;
530 };
531
532 constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
533 sizeof(PackedInputAssemblyAndColorBlendStateInfo);
534 static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
535
536 constexpr size_t kGraphicsPipelineDescSumOfSizes =
537 kVertexInputAttributesSize + kRenderPassDescSize + kPackedRasterizationAndMultisampleStateSize +
538 kPackedDepthStencilStateSize + kPackedInputAssemblyAndColorBlendStateSize +
539 sizeof(PackedExtent) + sizeof(PackedDither);
540
541 // Number of dirty bits in the dirty bit set.
542 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
543 constexpr static size_t kNumGraphicsPipelineDirtyBits =
544 kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
545 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
546
547 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
548 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
549
550 // State changes are applied through the update methods. Each update method can also have a
551 // sibling method that applies the update without marking a state transition. The non-transition
552 // update methods are used for internal shader pipelines. Not every non-transition update method
553 // is implemented yet as not every state is used in internal shaders.
554 class GraphicsPipelineDesc final
555 {
556 public:
557 // Use aligned allocation and free so we can use the alignas keyword.
558 void *operator new(std::size_t size);
559 void operator delete(void *ptr);
560
561 GraphicsPipelineDesc();
562 ~GraphicsPipelineDesc();
563 GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
564 GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
565
566 size_t hash() const;
567 bool operator==(const GraphicsPipelineDesc &other) const;
568
569 void initDefaults(const ContextVk *contextVk);
570
571 // For custom comparisons.
572 template <typename T>
getPtr()573 const T *getPtr() const
574 {
575 return reinterpret_cast<const T *>(this);
576 }
577
578 angle::Result initializePipeline(ContextVk *contextVk,
579 const PipelineCache &pipelineCacheVk,
580 const RenderPass &compatibleRenderPass,
581 const PipelineLayout &pipelineLayout,
582 const gl::AttributesMask &activeAttribLocationsMask,
583 const gl::ComponentTypeMask &programAttribsTypeMask,
584 const gl::DrawBufferMask &missingOutputsMask,
585 const ShaderAndSerialMap &shaders,
586 const SpecializationConstants &specConsts,
587 Pipeline *pipelineOut) const;
588
589 // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
590 void updateVertexInput(GraphicsPipelineTransitionBits *transition,
591 uint32_t attribIndex,
592 GLuint stride,
593 GLuint divisor,
594 angle::FormatID format,
595 bool compressed,
596 GLuint relativeOffset);
597
598 // Input assembly info
599 void setTopology(gl::PrimitiveMode drawMode);
600 void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
601 void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
602 bool primitiveRestartEnabled);
603
604 // Viewport states
605 void updateDepthClipControl(GraphicsPipelineTransitionBits *transition, bool negativeOneToOne);
606
607 // Raster states
608 void setCullMode(VkCullModeFlagBits cullMode);
609 void updateCullMode(GraphicsPipelineTransitionBits *transition,
610 const gl::RasterizerState &rasterState);
611 void updateFrontFace(GraphicsPipelineTransitionBits *transition,
612 const gl::RasterizerState &rasterState,
613 bool invertFrontFace);
614 void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
615 void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
616 bool rasterizerDiscardEnabled);
617
618 // Multisample states
619 uint32_t getRasterizationSamples() const;
620 void setRasterizationSamples(uint32_t rasterizationSamples);
621 void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
622 uint32_t rasterizationSamples);
623 void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
624 void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
625 void updateSampleMask(GraphicsPipelineTransitionBits *transition,
626 uint32_t maskNumber,
627 uint32_t mask);
628
629 void updateSampleShading(GraphicsPipelineTransitionBits *transition, bool enable, float value);
630
631 // RenderPass description.
getRenderPassDesc()632 const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
633
634 void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
635 void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
636 const RenderPassDesc &renderPassDesc);
637 void setRenderPassSampleCount(GLint samples);
638 void setRenderPassColorAttachmentFormat(size_t colorIndexGL, angle::FormatID formatID);
639
640 // Blend states
641 void setSingleBlend(uint32_t colorIndexGL,
642 bool enabled,
643 VkBlendOp op,
644 VkBlendFactor srcFactor,
645 VkBlendFactor dstFactor);
646 void updateBlendEnabled(GraphicsPipelineTransitionBits *transition,
647 gl::DrawBufferMask blendEnabledMask);
648 void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
649 void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
650 const gl::BlendStateExt &blendStateExt,
651 gl::DrawBufferMask attachmentMask);
652 void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
653 const gl::BlendStateExt &blendStateExt,
654 gl::DrawBufferMask attachmentMask);
655 void resetBlendFuncsAndEquations(GraphicsPipelineTransitionBits *transition,
656 const gl::BlendStateExt &blendStateExt,
657 gl::DrawBufferMask previousAttachmentsMask,
658 gl::DrawBufferMask newAttachmentsMask);
659 void setColorWriteMasks(gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
660 const gl::DrawBufferMask &alphaMask,
661 const gl::DrawBufferMask &enabledDrawBuffers);
662 void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
663 void updateColorWriteMasks(GraphicsPipelineTransitionBits *transition,
664 gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
665 const gl::DrawBufferMask &alphaMask,
666 const gl::DrawBufferMask &enabledDrawBuffers);
667
668 // Depth/stencil states.
669 void setDepthTestEnabled(bool enabled);
670 void setDepthWriteEnabled(bool enabled);
671 void setDepthFunc(VkCompareOp op);
672 void setDepthClampEnabled(bool enabled);
673 void setStencilTestEnabled(bool enabled);
674 void setStencilFrontFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
675 void setStencilBackFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
676 void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
677 void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
678 void setStencilFrontWriteMask(uint8_t mask);
679 void setStencilBackWriteMask(uint8_t mask);
680 void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
681 const gl::DepthStencilState &depthStencilState,
682 const gl::Framebuffer *drawFramebuffer);
683 void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
684 const gl::DepthStencilState &depthStencilState);
685 void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
686 const gl::DepthStencilState &depthStencilState,
687 const gl::Framebuffer *drawFramebuffer);
688 void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
689 const gl::DepthStencilState &depthStencilState,
690 const gl::Framebuffer *drawFramebuffer);
691 void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
692 GLint ref,
693 const gl::DepthStencilState &depthStencilState);
694 void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
695 GLint ref,
696 const gl::DepthStencilState &depthStencilState);
697 void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
698 const gl::DepthStencilState &depthStencilState);
699 void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
700 const gl::DepthStencilState &depthStencilState);
701 void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
702 const gl::DepthStencilState &depthStencilState,
703 const gl::Framebuffer *drawFramebuffer);
704 void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
705 const gl::DepthStencilState &depthStencilState,
706 const gl::Framebuffer *drawFramebuffer);
707
708 // Depth offset.
709 void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
710 void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
711 const gl::RasterizerState &rasterState);
712
713 // Tessellation
714 void updatePatchVertices(GraphicsPipelineTransitionBits *transition, GLuint value);
715
716 // Subpass
717 void resetSubpass(GraphicsPipelineTransitionBits *transition);
718 void nextSubpass(GraphicsPipelineTransitionBits *transition);
719 void setSubpass(uint32_t subpass);
720 uint32_t getSubpass() const;
721
722 void updateSurfaceRotation(GraphicsPipelineTransitionBits *transition,
723 const SurfaceRotation surfaceRotation);
getSurfaceRotation()724 SurfaceRotation getSurfaceRotation() const
725 {
726 return static_cast<SurfaceRotation>(
727 mDepthStencilStateInfo.depthCompareOpAndSurfaceRotation.surfaceRotation);
728 }
729
730 void updateDrawableSize(GraphicsPipelineTransitionBits *transition,
731 uint32_t width,
732 uint32_t height);
getDrawableSize()733 const PackedExtent &getDrawableSize() const { return mDrawableSize; }
734
735 void updateEmulatedDitherControl(GraphicsPipelineTransitionBits *transition, uint16_t value);
getEmulatedDitherControl()736 uint32_t getEmulatedDitherControl() const { return mDither.emulatedDitherControl; }
737
738 private:
739 void updateSubpass(GraphicsPipelineTransitionBits *transition, uint32_t subpass);
740
741 VertexInputAttributes mVertexInputAttribs;
742 RenderPassDesc mRenderPassDesc;
743 PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
744 PackedDepthStencilStateInfo mDepthStencilStateInfo;
745 PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
746 PackedExtent mDrawableSize;
747 PackedDither mDither;
748 };
749
750 // Verify the packed pipeline description has no gaps in the packing.
751 // This is not guaranteed by the spec, but is validated by a compile-time check.
752 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
753 // into uninitialized memory regions.
754 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
755 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
756
757 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
758 std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
759 gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
760
761 using DescriptorSetLayoutBindingVector =
762 angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
763
764 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
765 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
766 // usage. In the future we could generalize this.
767 class DescriptorSetLayoutDesc final
768 {
769 public:
770 DescriptorSetLayoutDesc();
771 ~DescriptorSetLayoutDesc();
772 DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
773 DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
774
775 size_t hash() const;
776 bool operator==(const DescriptorSetLayoutDesc &other) const;
777
778 void update(uint32_t bindingIndex,
779 VkDescriptorType descriptorType,
780 uint32_t count,
781 VkShaderStageFlags stages,
782 const Sampler *immutableSampler);
783
784 void unpackBindings(DescriptorSetLayoutBindingVector *bindings,
785 std::vector<VkSampler> *immutableSamplers) const;
786
787 private:
788 // There is a small risk of an issue if the sampler cache is evicted but not the descriptor
789 // cache we would have an invalid handle here. Thus propose follow-up work:
790 // TODO: https://issuetracker.google.com/issues/159156775: Have immutable sampler use serial
791 struct PackedDescriptorSetBinding
792 {
793 uint8_t type; // Stores a packed VkDescriptorType descriptorType.
794 uint8_t stages; // Stores a packed VkShaderStageFlags.
795 uint16_t count; // Stores a packed uint32_t descriptorCount.
796 uint32_t pad;
797 VkSampler immutableSampler;
798 };
799
800 // 4x 32bit
801 static_assert(sizeof(PackedDescriptorSetBinding) == 16, "Unexpected size");
802
803 // This is a compact representation of a descriptor set layout.
804 std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
805 mPackedDescriptorSetLayout;
806 };
807
808 // The following are for caching descriptor set layouts. Limited to max four descriptor set layouts.
809 // This can be extended in the future.
810 constexpr size_t kMaxDescriptorSetLayouts = 4;
811
812 struct PackedPushConstantRange
813 {
814 uint8_t offset;
815 uint8_t size;
816 uint16_t stageMask;
817 };
818
819 static_assert(sizeof(PackedPushConstantRange) == sizeof(uint32_t), "Unexpected Size");
820
821 template <typename T>
822 using DescriptorSetArray = angle::PackedEnumMap<DescriptorSetIndex, T>;
823 using DescriptorSetLayoutPointerArray = DescriptorSetArray<BindingPointer<DescriptorSetLayout>>;
824
825 class PipelineLayoutDesc final
826 {
827 public:
828 PipelineLayoutDesc();
829 ~PipelineLayoutDesc();
830 PipelineLayoutDesc(const PipelineLayoutDesc &other);
831 PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
832
833 size_t hash() const;
834 bool operator==(const PipelineLayoutDesc &other) const;
835
836 void updateDescriptorSetLayout(DescriptorSetIndex setIndex,
837 const DescriptorSetLayoutDesc &desc);
838 void updatePushConstantRange(VkShaderStageFlags stageMask, uint32_t offset, uint32_t size);
839
getPushConstantRange()840 const PackedPushConstantRange &getPushConstantRange() const { return mPushConstantRange; }
841
842 private:
843 DescriptorSetArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
844 PackedPushConstantRange mPushConstantRange;
845 ANGLE_MAYBE_UNUSED uint32_t mPadding;
846
847 // Verify the arrays are properly packed.
848 static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
849 (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
850 "Unexpected size");
851 };
852
853 // Verify the structure is properly packed.
854 static_assert(sizeof(PipelineLayoutDesc) == sizeof(DescriptorSetArray<DescriptorSetLayoutDesc>) +
855 sizeof(PackedPushConstantRange) + sizeof(uint32_t),
856 "Unexpected Size");
857
858 struct YcbcrConversionDesc final
859 {
860 YcbcrConversionDesc();
861 ~YcbcrConversionDesc();
862 YcbcrConversionDesc(const YcbcrConversionDesc &other);
863 YcbcrConversionDesc &operator=(const YcbcrConversionDesc &other);
864
865 size_t hash() const;
866 bool operator==(const YcbcrConversionDesc &other) const;
867
validfinal868 bool valid() const { return mExternalOrVkFormat != 0; }
869 void reset();
870 void update(RendererVk *rendererVk,
871 uint64_t externalFormat,
872 VkSamplerYcbcrModelConversion conversionModel,
873 VkSamplerYcbcrRange colorRange,
874 VkChromaLocation xChromaOffset,
875 VkChromaLocation yChromaOffset,
876 VkFilter chromaFilter,
877 VkComponentMapping components,
878 angle::FormatID intendedFormatID);
879
880 // If the sampler needs to convert the image content (e.g. from YUV to RGB) then
881 // mExternalOrVkFormat will be non-zero. The value is either the external format
882 // as returned by vkGetAndroidHardwareBufferPropertiesANDROID or a YUV VkFormat.
883 // For VkSamplerYcbcrConversion, mExternalOrVkFormat along with mIsExternalFormat,
884 // mConversionModel and mColorRange works as a Serial() used elsewhere in ANGLE.
885 uint64_t mExternalOrVkFormat;
886 // 1 bit to identify if external format is used
887 uint32_t mIsExternalFormat : 1;
888 // 3 bits to identify conversion model
889 uint32_t mConversionModel : 3;
890 // 1 bit to identify color component range
891 uint32_t mColorRange : 1;
892 // 1 bit to identify x chroma location
893 uint32_t mXChromaOffset : 1;
894 // 1 bit to identify y chroma location
895 uint32_t mYChromaOffset : 1;
896 // 1 bit to identify chroma filtering
897 uint32_t mChromaFilter : 1;
898 // 3 bit to identify R component swizzle
899 uint32_t mRSwizzle : 3;
900 // 3 bit to identify G component swizzle
901 uint32_t mGSwizzle : 3;
902 // 3 bit to identify B component swizzle
903 uint32_t mBSwizzle : 3;
904 // 3 bit to identify A component swizzle
905 uint32_t mASwizzle : 3;
906 uint32_t mPadding : 12;
907 uint32_t mReserved;
908 };
909
910 static_assert(sizeof(YcbcrConversionDesc) == 16, "Unexpected YcbcrConversionDesc size");
911
912 // Packed sampler description for the sampler cache.
913 class SamplerDesc final
914 {
915 public:
916 SamplerDesc();
917 SamplerDesc(ContextVk *contextVk,
918 const gl::SamplerState &samplerState,
919 bool stencilMode,
920 const YcbcrConversionDesc *ycbcrConversionDesc,
921 angle::FormatID intendedFormatID);
922 ~SamplerDesc();
923
924 SamplerDesc(const SamplerDesc &other);
925 SamplerDesc &operator=(const SamplerDesc &rhs);
926
927 void update(ContextVk *contextVk,
928 const gl::SamplerState &samplerState,
929 bool stencilMode,
930 const YcbcrConversionDesc *ycbcrConversionDesc,
931 angle::FormatID intendedFormatID);
932 void reset();
933 angle::Result init(ContextVk *contextVk, Sampler *sampler) const;
934
935 size_t hash() const;
936 bool operator==(const SamplerDesc &other) const;
937
938 private:
939 // 32*4 bits for floating point data.
940 // Note: anisotropy enabled is implicitly determined by maxAnisotropy and caps.
941 float mMipLodBias;
942 float mMaxAnisotropy;
943 float mMinLod;
944 float mMaxLod;
945
946 // 16*8 bits to uniquely identify a YCbCr conversion sampler.
947 YcbcrConversionDesc mYcbcrConversionDesc;
948
949 // 16 bits for modes + states.
950 // 1 bit per filter (only 2 possible values in GL: linear/nearest)
951 uint16_t mMagFilter : 1;
952 uint16_t mMinFilter : 1;
953 uint16_t mMipmapMode : 1;
954
955 // 3 bits per address mode (5 possible values)
956 uint16_t mAddressModeU : 3;
957 uint16_t mAddressModeV : 3;
958 uint16_t mAddressModeW : 3;
959
960 // 1 bit for compare enabled (2 possible values)
961 uint16_t mCompareEnabled : 1;
962
963 // 3 bits for compare op. (8 possible values)
964 uint16_t mCompareOp : 3;
965
966 // Values from angle::ColorGeneric::Type. Float is 0 and others are 1.
967 uint16_t mBorderColorType : 1;
968
969 uint16_t mPadding : 15;
970
971 // 16*8 bits for BorderColor
972 angle::ColorF mBorderColor;
973
974 // 32 bits reserved for future use.
975 uint32_t mReserved;
976 };
977
978 static_assert(sizeof(SamplerDesc) == 56, "Unexpected SamplerDesc size");
979
980 // Disable warnings about struct padding.
981 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
982
983 class PipelineHelper;
984
985 struct GraphicsPipelineTransition
986 {
987 GraphicsPipelineTransition();
988 GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
989 GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
990 const GraphicsPipelineDesc *desc,
991 PipelineHelper *pipeline);
992
993 GraphicsPipelineTransitionBits bits;
994 const GraphicsPipelineDesc *desc;
995 PipelineHelper *target;
996 };
997
998 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
999
1000 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1001 const GraphicsPipelineTransition &other) = default;
1002
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)1003 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1004 GraphicsPipelineTransitionBits bits,
1005 const GraphicsPipelineDesc *desc,
1006 PipelineHelper *pipeline)
1007 : bits(bits), desc(desc), target(pipeline)
1008 {}
1009
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)1010 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
1011 GraphicsPipelineTransitionBits bitsB,
1012 const GraphicsPipelineDesc &descA,
1013 const GraphicsPipelineDesc &descB)
1014 {
1015 if (bitsA != bitsB)
1016 return false;
1017
1018 // We currently mask over 4 bytes of the pipeline description with each dirty bit.
1019 // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
1020 // of the code faster. The for loop below would scan over twice as many bits per iteration.
1021 // But there may be more collisions between the same dirty bit masks leading to different
1022 // transitions. Thus there may be additional cost when applications use many transitions.
1023 // We should revisit this in the future and investigate using different bit widths.
1024 static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
1025
1026 const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
1027 const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
1028
1029 for (size_t dirtyBit : bitsA)
1030 {
1031 if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
1032 return false;
1033 }
1034
1035 return true;
1036 }
1037
1038 class PipelineHelper final : public Resource
1039 {
1040 public:
1041 PipelineHelper();
1042 ~PipelineHelper() override;
1043 inline explicit PipelineHelper(Pipeline &&pipeline);
1044
1045 void destroy(VkDevice device);
1046
valid()1047 bool valid() const { return mPipeline.valid(); }
getPipeline()1048 Pipeline &getPipeline() { return mPipeline; }
1049
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)1050 ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
1051 const GraphicsPipelineDesc &desc,
1052 PipelineHelper **pipelineOut) const
1053 {
1054 // Search could be improved using sorting or hashing.
1055 for (const GraphicsPipelineTransition &transition : mTransitions)
1056 {
1057 if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
1058 {
1059 *pipelineOut = transition.target;
1060 return true;
1061 }
1062 }
1063
1064 return false;
1065 }
1066
1067 void addTransition(GraphicsPipelineTransitionBits bits,
1068 const GraphicsPipelineDesc *desc,
1069 PipelineHelper *pipeline);
1070
1071 private:
1072 std::vector<GraphicsPipelineTransition> mTransitions;
1073 Pipeline mPipeline;
1074 };
1075
PipelineHelper(Pipeline && pipeline)1076 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
1077
1078 struct ImageSubresourceRange
1079 {
1080 // GL max is 1000 (fits in 10 bits).
1081 uint32_t level : 10;
1082 // Max 31 levels (2 ** 5 - 1). Can store levelCount-1 if we need to save another bit.
1083 uint32_t levelCount : 5;
1084 // Implementation max is 2048 (11 bits).
1085 uint32_t layer : 12;
1086 // One of vk::LayerMode values. If 0, it means all layers. Otherwise it's the count of layers
1087 // which is usually 1, except for multiview in which case it can be up to
1088 // gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS.
1089 uint32_t layerMode : 3;
1090 // Values from vk::SrgbDecodeMode. Unused with draw views.
1091 uint32_t srgbDecodeMode : 1;
1092 // For read views: Values from gl::SrgbOverride, either Default or SRGB.
1093 // For draw views: Values from gl::SrgbWriteControlMode.
1094 uint32_t srgbMode : 1;
1095
1096 static_assert(gl::IMPLEMENTATION_MAX_TEXTURE_LEVELS < (1 << 5),
1097 "Not enough bits for level count");
1098 static_assert(gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS <= (1 << 12),
1099 "Not enough bits for layer index");
1100 static_assert(gl::IMPLEMENTATION_ANGLE_MULTIVIEW_MAX_VIEWS <= (1 << 3),
1101 "Not enough bits for layer count");
1102 };
1103
1104 static_assert(sizeof(ImageSubresourceRange) == sizeof(uint32_t), "Size mismatch");
1105
1106 inline bool operator==(const ImageSubresourceRange &a, const ImageSubresourceRange &b)
1107 {
1108 return a.level == b.level && a.levelCount == b.levelCount && a.layer == b.layer &&
1109 a.layerMode == b.layerMode && a.srgbDecodeMode == b.srgbDecodeMode &&
1110 a.srgbMode == b.srgbMode;
1111 }
1112
1113 constexpr ImageSubresourceRange kInvalidImageSubresourceRange = {0, 0, 0, 0, 0, 0};
1114
1115 struct ImageOrBufferViewSubresourceSerial
1116 {
1117 ImageOrBufferViewSerial viewSerial;
1118 ImageSubresourceRange subresource;
1119 };
1120
1121 inline bool operator==(const ImageOrBufferViewSubresourceSerial &a,
1122 const ImageOrBufferViewSubresourceSerial &b)
1123 {
1124 return a.viewSerial == b.viewSerial && a.subresource == b.subresource;
1125 }
1126
1127 constexpr ImageOrBufferViewSubresourceSerial kInvalidImageOrBufferViewSubresourceSerial = {
1128 kInvalidImageOrBufferViewSerial, kInvalidImageSubresourceRange};
1129
1130 // Generic description of a descriptor set. Used as a key when indexing descriptor set caches. The
1131 // key storage is an angle:FixedVector. Beyond a certain fixed size we'll end up using heap memory
1132 // to store keys. Currently we specialize the structure for three use cases: uniforms, textures,
1133 // and other shader resources. Because of the way the specialization works we can't currently cache
1134 // programs that use some types of resources.
1135 class DescriptorSetDesc
1136 {
1137 public:
1138 DescriptorSetDesc() = default;
1139 ~DescriptorSetDesc() = default;
1140
DescriptorSetDesc(const DescriptorSetDesc & other)1141 DescriptorSetDesc(const DescriptorSetDesc &other) : mPayload(other.mPayload) {}
1142
1143 DescriptorSetDesc &operator=(const DescriptorSetDesc &other)
1144 {
1145 mPayload = other.mPayload;
1146 return *this;
1147 }
1148
1149 size_t hash() const;
1150
reset()1151 void reset() { mPayload.clear(); }
1152
getKeySizeBytes()1153 size_t getKeySizeBytes() const { return mPayload.size() * sizeof(uint32_t); }
1154
1155 bool operator==(const DescriptorSetDesc &other) const
1156 {
1157 return (mPayload.size() == other.mPayload.size()) &&
1158 (memcmp(mPayload.data(), other.mPayload.data(),
1159 mPayload.size() * sizeof(mPayload[0])) == 0);
1160 }
1161
1162 // Specific helpers for uniforms/xfb descriptors.
1163 static constexpr size_t kDefaultUniformBufferWordOffset = 0;
1164 static constexpr size_t kXfbBufferSerialWordOffset = 1;
1165 static constexpr size_t kXfbBufferOffsetWordOffset = 2;
1166 static constexpr size_t kXfbWordStride = 2;
1167
updateDefaultUniformBuffer(BufferSerial bufferSerial)1168 void updateDefaultUniformBuffer(BufferSerial bufferSerial)
1169 {
1170 setBufferSerial(kDefaultUniformBufferWordOffset, 1, 0, bufferSerial);
1171 }
1172
updateTransformFeedbackBuffer(size_t xfbIndex,BufferSerial bufferSerial,VkDeviceSize bufferOffset)1173 void updateTransformFeedbackBuffer(size_t xfbIndex,
1174 BufferSerial bufferSerial,
1175 VkDeviceSize bufferOffset)
1176 {
1177 setBufferSerial(kXfbBufferSerialWordOffset, kXfbWordStride, xfbIndex, bufferSerial);
1178 setClamped64BitValue(kXfbBufferOffsetWordOffset, kXfbWordStride, xfbIndex, bufferOffset);
1179 }
1180
1181 // Specific helpers for texture descriptors.
1182 static constexpr size_t kImageOrBufferViewWordOffset = 0;
1183 static constexpr size_t kImageSubresourceRangeWordOffset = 1;
1184 static constexpr size_t kSamplerSerialWordOffset = 2;
1185 static constexpr size_t kTextureWordStride = 3;
updateTexture(size_t textureUnit,ImageOrBufferViewSubresourceSerial imageOrBufferViewSubresource,SamplerSerial samplerSerial)1186 void updateTexture(size_t textureUnit,
1187 ImageOrBufferViewSubresourceSerial imageOrBufferViewSubresource,
1188 SamplerSerial samplerSerial)
1189 {
1190 setImageOrBufferViewSerial(kImageOrBufferViewWordOffset, kTextureWordStride, textureUnit,
1191 imageOrBufferViewSubresource.viewSerial);
1192 setImageSubresourceRange(kImageSubresourceRangeWordOffset, kTextureWordStride, textureUnit,
1193 imageOrBufferViewSubresource.subresource);
1194 setSamplerSerial(kSamplerSerialWordOffset, kTextureWordStride, textureUnit, samplerSerial);
1195 }
1196
1197 // Specific helpers for the shader resources descriptors.
appendBufferSerial(BufferSerial bufferSerial)1198 void appendBufferSerial(BufferSerial bufferSerial)
1199 {
1200 append32BitValue(bufferSerial.getValue());
1201 }
1202
append32BitValue(uint32_t value)1203 void append32BitValue(uint32_t value) { mPayload.push_back(value); }
1204
appendClamped64BitValue(uint64_t value)1205 void appendClamped64BitValue(uint64_t value)
1206 {
1207 ASSERT(value <= static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
1208 append32BitValue(static_cast<uint32_t>(value));
1209 }
1210
1211 private:
setBufferSerial(size_t wordOffset,size_t wordStride,size_t elementIndex,BufferSerial bufferSerial)1212 void setBufferSerial(size_t wordOffset,
1213 size_t wordStride,
1214 size_t elementIndex,
1215 BufferSerial bufferSerial)
1216 {
1217 set32BitValue(wordOffset, wordStride, elementIndex, bufferSerial.getValue());
1218 }
1219
setImageOrBufferViewSerial(size_t wordOffset,size_t wordStride,size_t elementIndex,ImageOrBufferViewSerial imageOrBufferViewSerial)1220 void setImageOrBufferViewSerial(size_t wordOffset,
1221 size_t wordStride,
1222 size_t elementIndex,
1223 ImageOrBufferViewSerial imageOrBufferViewSerial)
1224 {
1225 set32BitValue(wordOffset, wordStride, elementIndex, imageOrBufferViewSerial.getValue());
1226 }
1227
setImageSubresourceRange(size_t wordOffset,size_t wordStride,size_t elementIndex,ImageSubresourceRange subresourceRange)1228 void setImageSubresourceRange(size_t wordOffset,
1229 size_t wordStride,
1230 size_t elementIndex,
1231 ImageSubresourceRange subresourceRange)
1232 {
1233 static_assert(sizeof(ImageSubresourceRange) == sizeof(uint32_t));
1234
1235 uint32_t value32bits;
1236 memcpy(&value32bits, &subresourceRange, sizeof(uint32_t));
1237 set32BitValue(wordOffset, wordStride, elementIndex, value32bits);
1238 }
1239
setSamplerSerial(size_t wordOffset,size_t wordStride,size_t elementIndex,SamplerSerial samplerSerial)1240 void setSamplerSerial(size_t wordOffset,
1241 size_t wordStride,
1242 size_t elementIndex,
1243 SamplerSerial samplerSerial)
1244 {
1245 set32BitValue(wordOffset, wordStride, elementIndex, samplerSerial.getValue());
1246 }
1247
set32BitValue(size_t wordOffset,size_t wordStride,size_t elementIndex,uint32_t value)1248 void set32BitValue(size_t wordOffset, size_t wordStride, size_t elementIndex, uint32_t value)
1249 {
1250 size_t wordIndex = wordOffset + wordStride * elementIndex;
1251 ensureCapacity(wordIndex + 1);
1252 mPayload[wordIndex] = value;
1253 }
1254
setClamped64BitValue(size_t wordOffset,size_t wordStride,size_t elementIndex,uint64_t value)1255 void setClamped64BitValue(size_t wordOffset,
1256 size_t wordStride,
1257 size_t elementIndex,
1258 uint64_t value)
1259 {
1260 ASSERT(value <= static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
1261 set32BitValue(wordOffset, wordStride, elementIndex, static_cast<uint32_t>(value));
1262 }
1263
ensureCapacity(size_t capacity)1264 void ensureCapacity(size_t capacity)
1265 {
1266 if (mPayload.size() < capacity)
1267 {
1268 mPayload.resize(capacity, 0);
1269 }
1270 }
1271
1272 // After a preliminary minimum size, use heap memory.
1273 static constexpr size_t kFastBufferWordLimit = 32;
1274 angle::FastVector<uint32_t, kFastBufferWordLimit> mPayload;
1275 };
1276
1277 // In the FramebufferDesc object:
1278 // - Depth/stencil serial is at index 0
1279 // - Color serials are at indices [1, gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]
1280 // - Depth/stencil resolve attachment is at index gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+1
1281 // - Resolve attachments are at indices [gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+2,
1282 // gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1]
1283 constexpr size_t kFramebufferDescDepthStencilIndex = 0;
1284 constexpr size_t kFramebufferDescColorIndexOffset = kFramebufferDescDepthStencilIndex + 1;
1285 constexpr size_t kFramebufferDescDepthStencilResolveIndexOffset =
1286 kFramebufferDescColorIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
1287 constexpr size_t kFramebufferDescColorResolveIndexOffset =
1288 kFramebufferDescDepthStencilResolveIndexOffset + 1;
1289
1290 // Enable struct padding warnings for the code below since it is used in caches.
1291 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
1292
1293 class FramebufferDesc
1294 {
1295 public:
1296 FramebufferDesc();
1297 ~FramebufferDesc();
1298
1299 FramebufferDesc(const FramebufferDesc &other);
1300 FramebufferDesc &operator=(const FramebufferDesc &other);
1301
1302 void updateColor(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1303 void updateColorResolve(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1304 void updateUnresolveMask(FramebufferNonResolveAttachmentMask unresolveMask);
1305 void updateDepthStencil(ImageOrBufferViewSubresourceSerial serial);
1306 void updateDepthStencilResolve(ImageOrBufferViewSubresourceSerial serial);
setWriteControlMode(gl::SrgbWriteControlMode mode)1307 ANGLE_INLINE void setWriteControlMode(gl::SrgbWriteControlMode mode)
1308 {
1309 mSrgbWriteControlMode = static_cast<uint16_t>(mode);
1310 }
updateIsMultiview(bool isMultiview)1311 void updateIsMultiview(bool isMultiview) { mIsMultiview = isMultiview; }
1312 size_t hash() const;
1313
1314 bool operator==(const FramebufferDesc &other) const;
1315
1316 uint32_t attachmentCount() const;
1317
getColorImageViewSerial(uint32_t index)1318 ImageOrBufferViewSubresourceSerial getColorImageViewSerial(uint32_t index)
1319 {
1320 ASSERT(kFramebufferDescColorIndexOffset + index < mSerials.size());
1321 return mSerials[kFramebufferDescColorIndexOffset + index];
1322 }
1323
1324 FramebufferNonResolveAttachmentMask getUnresolveAttachmentMask() const;
getWriteControlMode()1325 ANGLE_INLINE gl::SrgbWriteControlMode getWriteControlMode() const
1326 {
1327 return (mSrgbWriteControlMode == 1) ? gl::SrgbWriteControlMode::Linear
1328 : gl::SrgbWriteControlMode::Default;
1329 }
1330
1331 void updateLayerCount(uint32_t layerCount);
getLayerCount()1332 uint32_t getLayerCount() const { return mLayerCount; }
1333 void updateFramebufferFetchMode(bool hasFramebufferFetch);
hasFramebufferFetch()1334 bool hasFramebufferFetch() const { return mHasFramebufferFetch; }
1335
isMultiview()1336 bool isMultiview() const { return mIsMultiview; }
1337
1338 void updateRenderToTexture(bool isRenderToTexture);
1339
1340 private:
1341 void reset();
1342 void update(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1343
1344 // Note: this is an exclusive index. If there is one index it will be "1".
1345 // Maximum value is 18
1346 uint16_t mMaxIndex : 5;
1347 uint16_t mHasFramebufferFetch : 1;
1348 static_assert(gl::IMPLEMENTATION_MAX_FRAMEBUFFER_LAYERS < (1 << 9) - 1,
1349 "Not enough bits for mLayerCount");
1350
1351 uint16_t mLayerCount : 9;
1352
1353 uint16_t mSrgbWriteControlMode : 1;
1354
1355 // If the render pass contains an initial subpass to unresolve a number of attachments, the
1356 // subpass description is derived from the following mask, specifying which attachments need
1357 // to be unresolved. Includes both color and depth/stencil attachments.
1358 uint16_t mUnresolveAttachmentMask : kMaxFramebufferNonResolveAttachments;
1359
1360 // Whether this is a multisampled-render-to-single-sampled framebuffer. Only used when using
1361 // VK_EXT_multisampled_render_to_single_sampled. Only one bit is used and the rest is padding.
1362 uint16_t mIsRenderToTexture : 15 - kMaxFramebufferNonResolveAttachments;
1363
1364 uint16_t mIsMultiview : 1;
1365
1366 FramebufferAttachmentArray<ImageOrBufferViewSubresourceSerial> mSerials;
1367 };
1368
1369 constexpr size_t kFramebufferDescSize = sizeof(FramebufferDesc);
1370 static_assert(kFramebufferDescSize == 148, "Size check failed");
1371
1372 // Disable warnings about struct padding.
1373 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1374
1375 // The SamplerHelper allows a Sampler to be coupled with a serial.
1376 // Must be included before we declare SamplerCache.
1377 class SamplerHelper final : angle::NonCopyable
1378 {
1379 public:
1380 SamplerHelper(ContextVk *contextVk);
1381 ~SamplerHelper();
1382
1383 explicit SamplerHelper(SamplerHelper &&samplerHelper);
1384 SamplerHelper &operator=(SamplerHelper &&rhs);
1385
valid()1386 bool valid() const { return mSampler.valid(); }
get()1387 const Sampler &get() const { return mSampler; }
get()1388 Sampler &get() { return mSampler; }
getSamplerSerial()1389 SamplerSerial getSamplerSerial() const { return mSamplerSerial; }
1390
1391 private:
1392 Sampler mSampler;
1393 SamplerSerial mSamplerSerial;
1394 };
1395
1396 using RefCountedSampler = RefCounted<SamplerHelper>;
1397 using SamplerBinding = BindingPointer<SamplerHelper>;
1398
1399 class RenderPassHelper final : angle::NonCopyable
1400 {
1401 public:
1402 RenderPassHelper();
1403 ~RenderPassHelper();
1404
1405 RenderPassHelper(RenderPassHelper &&other);
1406 RenderPassHelper &operator=(RenderPassHelper &&other);
1407
1408 void destroy(VkDevice device);
1409
1410 const RenderPass &getRenderPass() const;
1411 RenderPass &getRenderPass();
1412
1413 const RenderPassPerfCounters &getPerfCounters() const;
1414 RenderPassPerfCounters &getPerfCounters();
1415
1416 private:
1417 RenderPass mRenderPass;
1418 RenderPassPerfCounters mPerfCounters;
1419 };
1420 } // namespace vk
1421 } // namespace rx
1422
1423 // Introduce std::hash for the above classes.
1424 namespace std
1425 {
1426 template <>
1427 struct hash<rx::vk::RenderPassDesc>
1428 {
1429 size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
1430 };
1431
1432 template <>
1433 struct hash<rx::vk::AttachmentOpsArray>
1434 {
1435 size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
1436 };
1437
1438 template <>
1439 struct hash<rx::vk::GraphicsPipelineDesc>
1440 {
1441 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const { return key.hash(); }
1442 };
1443
1444 template <>
1445 struct hash<rx::vk::DescriptorSetLayoutDesc>
1446 {
1447 size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
1448 };
1449
1450 template <>
1451 struct hash<rx::vk::PipelineLayoutDesc>
1452 {
1453 size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
1454 };
1455
1456 template <>
1457 struct hash<rx::vk::ImageSubresourceRange>
1458 {
1459 size_t operator()(const rx::vk::ImageSubresourceRange &key) const
1460 {
1461 return *reinterpret_cast<const uint32_t *>(&key);
1462 }
1463 };
1464
1465 template <>
1466 struct hash<rx::vk::DescriptorSetDesc>
1467 {
1468 size_t operator()(const rx::vk::DescriptorSetDesc &key) const { return key.hash(); }
1469 };
1470
1471 template <>
1472 struct hash<rx::vk::FramebufferDesc>
1473 {
1474 size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
1475 };
1476
1477 template <>
1478 struct hash<rx::vk::YcbcrConversionDesc>
1479 {
1480 size_t operator()(const rx::vk::YcbcrConversionDesc &key) const { return key.hash(); }
1481 };
1482
1483 template <>
1484 struct hash<rx::vk::SamplerDesc>
1485 {
1486 size_t operator()(const rx::vk::SamplerDesc &key) const { return key.hash(); }
1487 };
1488
1489 // See Resource Serial types defined in vk_utils.h.
1490 #define ANGLE_HASH_VK_SERIAL(Type) \
1491 template <> \
1492 struct hash<rx::vk::Type##Serial> \
1493 { \
1494 size_t operator()(const rx::vk::Type##Serial &key) const { return key.getValue(); } \
1495 };
1496
1497 ANGLE_VK_SERIAL_OP(ANGLE_HASH_VK_SERIAL)
1498
1499 } // namespace std
1500
1501 namespace rx
1502 {
1503 // Cache types for various Vulkan objects
1504 enum class VulkanCacheType
1505 {
1506 CompatibleRenderPass,
1507 RenderPassWithOps,
1508 GraphicsPipeline,
1509 PipelineLayout,
1510 Sampler,
1511 SamplerYcbcrConversion,
1512 DescriptorSetLayout,
1513 DriverUniformsDescriptors,
1514 TextureDescriptors,
1515 UniformsAndXfbDescriptors,
1516 ShaderBuffersDescriptors,
1517 Framebuffer,
1518 EnumCount
1519 };
1520
1521 // Base class for all caches. Provides cache hit and miss counters.
1522 class CacheStats final : angle::NonCopyable
1523 {
1524 public:
1525 CacheStats() { reset(); }
1526 ~CacheStats() {}
1527
1528 CacheStats(const CacheStats &rhs)
1529 : mHitCount(rhs.mHitCount), mMissCount(rhs.mMissCount), mSize(rhs.mSize)
1530 {}
1531
1532 CacheStats &operator=(const CacheStats &rhs)
1533 {
1534 mHitCount = rhs.mHitCount;
1535 mMissCount = rhs.mMissCount;
1536 mSize = rhs.mSize;
1537 return *this;
1538 }
1539
1540 ANGLE_INLINE void hit() { mHitCount++; }
1541 ANGLE_INLINE void miss() { mMissCount++; }
1542 ANGLE_INLINE void accumulate(const CacheStats &stats)
1543 {
1544 mHitCount += stats.mHitCount;
1545 mMissCount += stats.mMissCount;
1546 mSize = stats.mSize;
1547 }
1548
1549 uint64_t getHitCount() const { return mHitCount; }
1550 uint64_t getMissCount() const { return mMissCount; }
1551
1552 ANGLE_INLINE double getHitRatio() const
1553 {
1554 if (mHitCount + mMissCount == 0)
1555 {
1556 return 0;
1557 }
1558 else
1559 {
1560 return static_cast<double>(mHitCount) / (mHitCount + mMissCount);
1561 }
1562 }
1563
1564 ANGLE_INLINE void incrementSize() { ++mSize; }
1565
1566 ANGLE_INLINE uint64_t getSize() const { return mSize; }
1567
1568 void reset()
1569 {
1570 mHitCount = 0;
1571 mMissCount = 0;
1572 mSize = 0;
1573 }
1574
1575 void resetHitAndMissCount()
1576 {
1577 mHitCount = 0;
1578 mMissCount = 0;
1579 }
1580
1581 private:
1582 uint64_t mHitCount;
1583 uint64_t mMissCount;
1584 uint64_t mSize;
1585 };
1586
1587 template <VulkanCacheType CacheType>
1588 class HasCacheStats : angle::NonCopyable
1589 {
1590 public:
1591 template <typename Accumulator>
1592 void accumulateCacheStats(Accumulator *accum)
1593 {
1594 accum->accumulateCacheStats(CacheType, mCacheStats);
1595 mCacheStats.reset();
1596 }
1597
1598 protected:
1599 HasCacheStats() = default;
1600 virtual ~HasCacheStats() = default;
1601
1602 CacheStats mCacheStats;
1603 };
1604
1605 // TODO(jmadill): Add cache trimming/eviction.
1606 class RenderPassCache final : angle::NonCopyable
1607 {
1608 public:
1609 RenderPassCache();
1610 ~RenderPassCache();
1611
1612 void destroy(RendererVk *rendererVk);
1613
1614 ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
1615 const vk::RenderPassDesc &desc,
1616 vk::RenderPass **renderPassOut)
1617 {
1618 auto outerIt = mPayload.find(desc);
1619 if (outerIt != mPayload.end())
1620 {
1621 InnerCache &innerCache = outerIt->second;
1622 ASSERT(!innerCache.empty());
1623
1624 // Find the first element and return it.
1625 *renderPassOut = &innerCache.begin()->second.getRenderPass();
1626 mCompatibleRenderPassCacheStats.hit();
1627 return angle::Result::Continue;
1628 }
1629
1630 mCompatibleRenderPassCacheStats.miss();
1631 return addRenderPass(contextVk, desc, renderPassOut);
1632 }
1633
1634 angle::Result getRenderPassWithOps(ContextVk *contextVk,
1635 const vk::RenderPassDesc &desc,
1636 const vk::AttachmentOpsArray &attachmentOps,
1637 vk::RenderPass **renderPassOut);
1638
1639 private:
1640 angle::Result getRenderPassWithOpsImpl(ContextVk *contextVk,
1641 const vk::RenderPassDesc &desc,
1642 const vk::AttachmentOpsArray &attachmentOps,
1643 bool updatePerfCounters,
1644 vk::RenderPass **renderPassOut);
1645
1646 angle::Result addRenderPass(ContextVk *contextVk,
1647 const vk::RenderPassDesc &desc,
1648 vk::RenderPass **renderPassOut);
1649
1650 // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
1651 // The second layer caches the attachment load/store ops and initial/final layout.
1652 // Switch to `std::unordered_map` to retain pointer stability.
1653 using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassHelper>;
1654 using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
1655
1656 OuterCache mPayload;
1657 CacheStats mCompatibleRenderPassCacheStats;
1658 CacheStats mRenderPassWithOpsCacheStats;
1659 };
1660
1661 // TODO(jmadill): Add cache trimming/eviction.
1662 class GraphicsPipelineCache final : public HasCacheStats<VulkanCacheType::GraphicsPipeline>
1663 {
1664 public:
1665 GraphicsPipelineCache();
1666 ~GraphicsPipelineCache() override;
1667
1668 void destroy(RendererVk *rendererVk);
1669 void release(ContextVk *context);
1670
1671 void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
1672
1673 ANGLE_INLINE angle::Result getPipeline(ContextVk *contextVk,
1674 const vk::PipelineCache &pipelineCacheVk,
1675 const vk::RenderPass &compatibleRenderPass,
1676 const vk::PipelineLayout &pipelineLayout,
1677 const gl::AttributesMask &activeAttribLocationsMask,
1678 const gl::ComponentTypeMask &programAttribsTypeMask,
1679 const gl::DrawBufferMask &missingOutputsMask,
1680 const vk::ShaderAndSerialMap &shaders,
1681 const vk::SpecializationConstants &specConsts,
1682 const vk::GraphicsPipelineDesc &desc,
1683 const vk::GraphicsPipelineDesc **descPtrOut,
1684 vk::PipelineHelper **pipelineOut)
1685 {
1686 auto item = mPayload.find(desc);
1687 if (item != mPayload.end())
1688 {
1689 *descPtrOut = &item->first;
1690 *pipelineOut = &item->second;
1691 mCacheStats.hit();
1692 return angle::Result::Continue;
1693 }
1694
1695 mCacheStats.miss();
1696 return insertPipeline(contextVk, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
1697 activeAttribLocationsMask, programAttribsTypeMask, missingOutputsMask,
1698 shaders, specConsts, desc, descPtrOut, pipelineOut);
1699 }
1700
1701 private:
1702 angle::Result insertPipeline(ContextVk *contextVk,
1703 const vk::PipelineCache &pipelineCacheVk,
1704 const vk::RenderPass &compatibleRenderPass,
1705 const vk::PipelineLayout &pipelineLayout,
1706 const gl::AttributesMask &activeAttribLocationsMask,
1707 const gl::ComponentTypeMask &programAttribsTypeMask,
1708 const gl::DrawBufferMask &missingOutputsMask,
1709 const vk::ShaderAndSerialMap &shaders,
1710 const vk::SpecializationConstants &specConsts,
1711 const vk::GraphicsPipelineDesc &desc,
1712 const vk::GraphicsPipelineDesc **descPtrOut,
1713 vk::PipelineHelper **pipelineOut);
1714
1715 std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
1716 };
1717
1718 class DescriptorSetLayoutCache final : angle::NonCopyable
1719 {
1720 public:
1721 DescriptorSetLayoutCache();
1722 ~DescriptorSetLayoutCache();
1723
1724 void destroy(RendererVk *rendererVk);
1725
1726 angle::Result getDescriptorSetLayout(
1727 vk::Context *context,
1728 const vk::DescriptorSetLayoutDesc &desc,
1729 vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
1730
1731 private:
1732 std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
1733 CacheStats mCacheStats;
1734 };
1735
1736 class PipelineLayoutCache final : public HasCacheStats<VulkanCacheType::PipelineLayout>
1737 {
1738 public:
1739 PipelineLayoutCache();
1740 ~PipelineLayoutCache() override;
1741
1742 void destroy(RendererVk *rendererVk);
1743
1744 angle::Result getPipelineLayout(vk::Context *context,
1745 const vk::PipelineLayoutDesc &desc,
1746 const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
1747 vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
1748
1749 private:
1750 std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
1751 };
1752
1753 class SamplerCache final : public HasCacheStats<VulkanCacheType::Sampler>
1754 {
1755 public:
1756 SamplerCache();
1757 ~SamplerCache() override;
1758
1759 void destroy(RendererVk *rendererVk);
1760
1761 angle::Result getSampler(ContextVk *contextVk,
1762 const vk::SamplerDesc &desc,
1763 vk::SamplerBinding *samplerOut);
1764
1765 private:
1766 std::unordered_map<vk::SamplerDesc, vk::RefCountedSampler> mPayload;
1767 };
1768
1769 // YuvConversion Cache
1770 class SamplerYcbcrConversionCache final
1771 : public HasCacheStats<VulkanCacheType::SamplerYcbcrConversion>
1772 {
1773 public:
1774 SamplerYcbcrConversionCache();
1775 ~SamplerYcbcrConversionCache() override;
1776
1777 void destroy(RendererVk *rendererVk);
1778
1779 angle::Result getSamplerYcbcrConversion(vk::Context *context,
1780 const vk::YcbcrConversionDesc &ycbcrConversionDesc,
1781 VkSamplerYcbcrConversion *vkSamplerYcbcrConversionOut);
1782
1783 private:
1784 using SamplerYcbcrConversionMap =
1785 std::unordered_map<vk::YcbcrConversionDesc, vk::SamplerYcbcrConversion>;
1786 SamplerYcbcrConversionMap mExternalFormatPayload;
1787 SamplerYcbcrConversionMap mVkFormatPayload;
1788 };
1789
1790 // DescriptorSet Cache
1791 class DriverUniformsDescriptorSetCache final
1792 : public HasCacheStats<VulkanCacheType::DriverUniformsDescriptors>
1793 {
1794 public:
1795 DriverUniformsDescriptorSetCache() = default;
1796 ~DriverUniformsDescriptorSetCache() override { ASSERT(mPayload.empty()); }
1797
1798 void destroy(RendererVk *rendererVk);
1799
1800 ANGLE_INLINE bool get(uint32_t serial, VkDescriptorSet *descriptorSet)
1801 {
1802 if (mPayload.get(serial, descriptorSet))
1803 {
1804 mCacheStats.hit();
1805 return true;
1806 }
1807 mCacheStats.miss();
1808 return false;
1809 }
1810
1811 ANGLE_INLINE void insert(uint32_t serial, VkDescriptorSet descriptorSet)
1812 {
1813 mPayload.insert(serial, descriptorSet);
1814 }
1815
1816 ANGLE_INLINE void clear() { mPayload.clear(); }
1817
1818 size_t getSize() const { return mPayload.size(); }
1819
1820 private:
1821 static constexpr uint32_t kFlatMapSize = 16;
1822 angle::FlatUnorderedMap<uint32_t, VkDescriptorSet, kFlatMapSize> mPayload;
1823 };
1824
1825 // Templated Descriptors Cache
1826 class DescriptorSetCache final : angle::NonCopyable
1827 {
1828 public:
1829 DescriptorSetCache() = default;
1830 ~DescriptorSetCache() { ASSERT(mPayload.empty()); }
1831
1832 ANGLE_INLINE void clear() { mPayload.clear(); }
1833
1834 ANGLE_INLINE bool get(const vk::DescriptorSetDesc &desc,
1835 VkDescriptorSet *descriptorSet,
1836 CacheStats *cacheStats)
1837 {
1838 auto iter = mPayload.find(desc);
1839 if (iter != mPayload.end())
1840 {
1841 *descriptorSet = iter->second;
1842 cacheStats->hit();
1843 return true;
1844 }
1845 cacheStats->miss();
1846 return false;
1847 }
1848
1849 ANGLE_INLINE void insert(const vk::DescriptorSetDesc &desc,
1850 VkDescriptorSet descriptorSet,
1851 CacheStats *cacheStats)
1852 {
1853 mPayload.emplace(desc, descriptorSet);
1854 cacheStats->incrementSize();
1855 }
1856
1857 size_t getTotalCacheKeySizeBytes() const
1858 {
1859 size_t totalSize = 0;
1860 for (const auto &iter : mPayload)
1861 {
1862 const vk::DescriptorSetDesc &desc = iter.first;
1863 totalSize += desc.getKeySizeBytes();
1864 }
1865 return totalSize;
1866 }
1867
1868 private:
1869 angle::HashMap<vk::DescriptorSetDesc, VkDescriptorSet> mPayload;
1870 };
1871
1872 // Only 1 driver uniform binding is used.
1873 constexpr uint32_t kReservedDriverUniformBindingCount = 1;
1874 // There is 1 default uniform binding used per stage. Currently, a maximum of three stages are
1875 // supported.
1876 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
1877 constexpr uint32_t kReservedDefaultUniformBindingCount = 3;
1878 } // namespace rx
1879
1880 #endif // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
1881