1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 // Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 // Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "common/WorkerThread.h"
17 #include "libANGLE/Uniform.h"
18 #include "libANGLE/renderer/vulkan/ResourceVk.h"
19 #include "libANGLE/renderer/vulkan/ShaderInterfaceVariableInfoMap.h"
20 #include "libANGLE/renderer/vulkan/vk_utils.h"
21
22 namespace gl
23 {
24 class ProgramExecutable;
25 } // namespace gl
26
27 namespace rx
28 {
29 class ShaderInterfaceVariableInfoMap;
30 class UpdateDescriptorSetsBuilder;
31
32 // Some descriptor set and pipeline layout constants.
33 //
34 // The set/binding assignment is done as following:
35 //
36 // - Set 0 contains uniform blocks created to encompass default uniforms. 1 binding is used per
37 // pipeline stage. Additionally, transform feedback buffers are bound from binding 2 and up.
38 // For internal shaders, set 0 is used for all the needed resources.
39 // - Set 1 contains all textures (including texture buffers).
40 // - Set 2 contains all other shader resources, such as uniform and storage blocks, atomic counter
41 // buffers, images and image buffers.
42
43 enum class DescriptorSetIndex : uint32_t
44 {
45 Internal = 0, // Internal shaders
46 UniformsAndXfb = Internal, // Uniforms set index
47 Texture = 1, // Textures set index
48 ShaderResource = 2, // Other shader resources set index
49
50 InvalidEnum = 3,
51 EnumCount = InvalidEnum,
52 };
53
54 namespace vk
55 {
56 class BufferHelper;
57 class DynamicDescriptorPool;
58 class SamplerHelper;
59 enum class ImageLayout;
60 class PipelineCacheAccess;
61 class RenderPassCommandBufferHelper;
62
63 using RefCountedDescriptorSetLayout = RefCounted<DescriptorSetLayout>;
64 using RefCountedPipelineLayout = RefCounted<PipelineLayout>;
65 using RefCountedSamplerYcbcrConversion = RefCounted<SamplerYcbcrConversion>;
66
67 // Packed Vk resource descriptions.
68 // Most Vk types use many more bits than required to represent the underlying data.
69 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
70 // hashing (and also needs to check equality) we can optimize these operations by
71 // using fewer bits. Hence the packed types.
72 //
73 // One implementation note: these types could potentially be improved by using even
74 // fewer bits. For example, boolean values could be represented by a single bit instead
75 // of a uint8_t. However at the current time there are concerns about the portability
76 // of bitfield operators, and complexity issues with using bit mask operations. This is
77 // something we will likely want to investigate as the Vulkan implementation progresses.
78 //
79 // Second implementation note: the struct packing is also a bit fragile, and some of the
80 // packing requirements depend on using alignas and field ordering to get the result of
81 // packing nicely into the desired space. This is something we could also potentially fix
82 // with a redesign to use bitfields or bit mask operations.
83
84 // Enable struct padding warnings for the code below since it is used in caches.
85 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
86
87 enum class ResourceAccess
88 {
89 Unused = 0x0,
90 ReadOnly = 0x1,
91 WriteOnly = 0x2,
92 ReadWrite = ReadOnly | WriteOnly,
93 };
94
UpdateAccess(ResourceAccess * oldAccess,ResourceAccess newAccess)95 inline void UpdateAccess(ResourceAccess *oldAccess, ResourceAccess newAccess)
96 {
97 *oldAccess = static_cast<ResourceAccess>(ToUnderlying(newAccess) | ToUnderlying(*oldAccess));
98 }
HasResourceWriteAccess(ResourceAccess access)99 inline bool HasResourceWriteAccess(ResourceAccess access)
100 {
101 return (ToUnderlying(access) & ToUnderlying(ResourceAccess::WriteOnly)) != 0;
102 }
103
104 enum class RenderPassLoadOp
105 {
106 Load = VK_ATTACHMENT_LOAD_OP_LOAD,
107 Clear = VK_ATTACHMENT_LOAD_OP_CLEAR,
108 DontCare = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
109 None,
110 };
111 enum class RenderPassStoreOp
112 {
113 Store = VK_ATTACHMENT_STORE_OP_STORE,
114 DontCare = VK_ATTACHMENT_STORE_OP_DONT_CARE,
115 None,
116 };
117
118 // There can be a maximum of IMPLEMENTATION_MAX_DRAW_BUFFERS color and resolve attachments, plus one
119 // depth/stencil attachment and one depth/stencil resolve attachment.
120 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS * 2 + 2;
121 template <typename T>
122 using FramebufferAttachmentArray = std::array<T, kMaxFramebufferAttachments>;
123 template <typename T>
124 using FramebufferAttachmentsVector = angle::FixedVector<T, kMaxFramebufferAttachments>;
125 using FramebufferAttachmentMask = angle::BitSet<kMaxFramebufferAttachments>;
126
127 constexpr size_t kMaxFramebufferNonResolveAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1;
128 template <typename T>
129 using FramebufferNonResolveAttachmentArray = std::array<T, kMaxFramebufferNonResolveAttachments>;
130 using FramebufferNonResolveAttachmentMask = angle::BitSet16<kMaxFramebufferNonResolveAttachments>;
131
132 class alignas(4) RenderPassDesc final
133 {
134 public:
135 RenderPassDesc();
136 ~RenderPassDesc();
137 RenderPassDesc(const RenderPassDesc &other);
138 RenderPassDesc &operator=(const RenderPassDesc &other);
139
140 // Set format for an enabled GL color attachment.
141 void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
142 // Mark a GL color attachment index as disabled.
143 void packColorAttachmentGap(size_t colorIndexGL);
144 // The caller must pack the depth/stencil attachment last, which is packed right after the color
145 // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
146 void packDepthStencilAttachment(angle::FormatID angleFormatID);
147 void updateDepthStencilAccess(ResourceAccess access);
148 // Indicate that a color attachment should have a corresponding resolve attachment.
149 void packColorResolveAttachment(size_t colorIndexGL);
150 // Remove the resolve attachment. Used when optimizing blit through resolve attachment to
151 // temporarily pack a resolve attachment and then remove it.
152 void removeColorResolveAttachment(size_t colorIndexGL);
153 // Indicate that a color attachment should take its data from the resolve attachment initially.
154 void packColorUnresolveAttachment(size_t colorIndexGL);
155 void removeColorUnresolveAttachment(size_t colorIndexGL);
156 // Indicate that a depth/stencil attachment should have a corresponding resolve attachment.
157 void packDepthStencilResolveAttachment();
158 // Indicate that a depth/stencil attachment should take its data from the resolve attachment
159 // initially.
160 void packDepthStencilUnresolveAttachment(bool unresolveDepth, bool unresolveStencil);
161 void removeDepthStencilUnresolveAttachment();
162
163 void setWriteControlMode(gl::SrgbWriteControlMode mode);
164
165 size_t hash() const;
166
167 // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()168 size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()169 size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
170
171 bool isColorAttachmentEnabled(size_t colorIndexGL) const;
172 bool hasDepthStencilAttachment() const;
getColorResolveAttachmentMask()173 gl::DrawBufferMask getColorResolveAttachmentMask() const { return mColorResolveAttachmentMask; }
hasColorResolveAttachment(size_t colorIndexGL)174 bool hasColorResolveAttachment(size_t colorIndexGL) const
175 {
176 return mColorResolveAttachmentMask.test(colorIndexGL);
177 }
getColorUnresolveAttachmentMask()178 gl::DrawBufferMask getColorUnresolveAttachmentMask() const
179 {
180 return mColorUnresolveAttachmentMask;
181 }
hasColorUnresolveAttachment(size_t colorIndexGL)182 bool hasColorUnresolveAttachment(size_t colorIndexGL) const
183 {
184 return mColorUnresolveAttachmentMask.test(colorIndexGL);
185 }
hasDepthStencilResolveAttachment()186 bool hasDepthStencilResolveAttachment() const { return mResolveDepthStencil; }
hasDepthStencilUnresolveAttachment()187 bool hasDepthStencilUnresolveAttachment() const { return mUnresolveDepth || mUnresolveStencil; }
hasDepthUnresolveAttachment()188 bool hasDepthUnresolveAttachment() const { return mUnresolveDepth; }
hasStencilUnresolveAttachment()189 bool hasStencilUnresolveAttachment() const { return mUnresolveStencil; }
getSRGBWriteControlMode()190 gl::SrgbWriteControlMode getSRGBWriteControlMode() const
191 {
192 return static_cast<gl::SrgbWriteControlMode>(mSrgbWriteControl);
193 }
194
isLegacyDitherEnabled()195 bool isLegacyDitherEnabled() const { return mLegacyDitherEnabled; }
196
197 void setLegacyDither(bool enabled);
198
199 // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
200 // color attachments.
201 size_t attachmentCount() const;
202
setSamples(GLint samples)203 void setSamples(GLint samples) { mSamples = static_cast<uint8_t>(samples); }
samples()204 uint8_t samples() const { return mSamples; }
205
setViewCount(GLsizei viewCount)206 void setViewCount(GLsizei viewCount) { mViewCount = static_cast<uint8_t>(viewCount); }
viewCount()207 uint8_t viewCount() const { return mViewCount; }
208
setFramebufferFetchMode(bool hasFramebufferFetch)209 void setFramebufferFetchMode(bool hasFramebufferFetch)
210 {
211 mHasFramebufferFetch = hasFramebufferFetch;
212 }
hasFramebufferFetch()213 bool hasFramebufferFetch() const { return mHasFramebufferFetch; }
214
updateRenderToTexture(bool isRenderToTexture)215 void updateRenderToTexture(bool isRenderToTexture) { mIsRenderToTexture = isRenderToTexture; }
isRenderToTexture()216 bool isRenderToTexture() const { return mIsRenderToTexture; }
217
218 angle::FormatID operator[](size_t index) const
219 {
220 ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
221 return static_cast<angle::FormatID>(mAttachmentFormats[index]);
222 }
223
224 private:
225 uint8_t mSamples;
226 uint8_t mColorAttachmentRange;
227
228 // Multiview
229 uint8_t mViewCount;
230
231 // sRGB
232 uint8_t mSrgbWriteControl : 1;
233
234 // Framebuffer fetch
235 uint8_t mHasFramebufferFetch : 1;
236
237 // Multisampled render to texture
238 uint8_t mIsRenderToTexture : 1;
239 uint8_t mResolveDepthStencil : 1;
240 uint8_t mUnresolveDepth : 1;
241 uint8_t mUnresolveStencil : 1;
242
243 // Dithering state when using VK_EXT_legacy_dithering
244 uint8_t mLegacyDitherEnabled : 1;
245
246 // Available space for expansion.
247 uint8_t mPadding1 : 1;
248 uint8_t mPadding2;
249
250 // Whether each color attachment has a corresponding resolve attachment. Color resolve
251 // attachments can be used to optimize resolve through glBlitFramebuffer() as well as support
252 // GL_EXT_multisampled_render_to_texture and GL_EXT_multisampled_render_to_texture2.
253 gl::DrawBufferMask mColorResolveAttachmentMask;
254
255 // Whether each color attachment with a corresponding resolve attachment should be initialized
256 // with said resolve attachment in an initial subpass. This is an optimization to avoid
257 // loadOp=LOAD on the implicit multisampled image used with multisampled-render-to-texture
258 // render targets. This operation is referred to as "unresolve".
259 //
260 // Unused when VK_EXT_multisampled_render_to_single_sampled is available.
261 gl::DrawBufferMask mColorUnresolveAttachmentMask;
262
263 // Color attachment formats are stored with their GL attachment indices. The depth/stencil
264 // attachment formats follow the last enabled color attachment. When creating a render pass,
265 // the disabled attachments are removed and the resulting attachments are packed.
266 //
267 // The attachment indices provided as input to various functions in this file are thus GL
268 // attachment indices. These indices are marked as such, e.g. colorIndexGL. The render pass
269 // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
270 // indices marked with Vk, e.g. colorIndexVk. The subpass attachment references create the
271 // link between the two index spaces. The subpass declares attachment references with GL
272 // indices (which corresponds to the location decoration of shader outputs). The attachment
273 // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
274 //
275 // For example, if GL uses color attachments 0 and 3, then there are two render pass
276 // attachments (indexed 0 and 1) and 4 subpass attachments:
277 //
278 // - Subpass attachment 0 -> Renderpass attachment 0
279 // - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
280 // - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
281 // - Subpass attachment 3 -> Renderpass attachment 1
282 //
283 // The resolve attachments are packed after the non-resolve attachments. They use the same
284 // formats, so they are not specified in this array.
285 FramebufferNonResolveAttachmentArray<uint8_t> mAttachmentFormats;
286 };
287
288 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
289
290 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
291 static_assert(kRenderPassDescSize == 16, "Size check failed");
292
293 enum class GraphicsPipelineSubset
294 {
295 Complete, // Including all subsets
296 VertexInput,
297 Shaders,
298 FragmentOutput,
299 };
300
301 enum class CacheLookUpFeedback
302 {
303 None,
304 Hit,
305 Miss,
306 LinkedDrawHit,
307 LinkedDrawMiss,
308 WarmUpHit,
309 WarmUpMiss,
310 UtilsHit,
311 UtilsMiss,
312 };
313
314 struct PackedAttachmentOpsDesc final
315 {
316 // RenderPassLoadOp is in range [0, 3], and RenderPassStoreOp is in range [0, 2].
317 uint16_t loadOp : 2;
318 uint16_t storeOp : 2;
319 uint16_t stencilLoadOp : 2;
320 uint16_t stencilStoreOp : 2;
321 // If a corresponding resolve attachment exists, storeOp may already be DONT_CARE, and it's
322 // unclear whether the attachment was invalidated or not. This information is passed along here
323 // so that the resolve attachment's storeOp can be set to DONT_CARE if the attachment is
324 // invalidated, and if possible removed from the list of resolve attachments altogether. Note
325 // that the latter may not be possible if the render pass has multiple subpasses due to Vulkan
326 // render pass compatibility rules.
327 uint16_t isInvalidated : 1;
328 uint16_t isStencilInvalidated : 1;
329 uint16_t padding1 : 6;
330
331 // Layouts take values from ImageLayout, so they are small. Layouts that are possible here are
332 // placed at the beginning of that enum.
333 uint16_t initialLayout : 5;
334 uint16_t finalLayout : 5;
335 uint16_t padding2 : 6;
336 };
337
338 static_assert(sizeof(PackedAttachmentOpsDesc) == 4, "Size check failed");
339
340 class PackedAttachmentIndex;
341
342 class AttachmentOpsArray final
343 {
344 public:
345 AttachmentOpsArray();
346 ~AttachmentOpsArray();
347 AttachmentOpsArray(const AttachmentOpsArray &other);
348 AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
349
350 const PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index) const;
351 PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index);
352
353 // Initialize an attachment op with all load and store operations.
354 void initWithLoadStore(PackedAttachmentIndex index,
355 ImageLayout initialLayout,
356 ImageLayout finalLayout);
357
358 void setLayouts(PackedAttachmentIndex index,
359 ImageLayout initialLayout,
360 ImageLayout finalLayout);
361 void setOps(PackedAttachmentIndex index, RenderPassLoadOp loadOp, RenderPassStoreOp storeOp);
362 void setStencilOps(PackedAttachmentIndex index,
363 RenderPassLoadOp loadOp,
364 RenderPassStoreOp storeOp);
365
366 void setClearOp(PackedAttachmentIndex index);
367 void setClearStencilOp(PackedAttachmentIndex index);
368
369 size_t hash() const;
370
371 private:
372 gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
373 };
374
375 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
376
377 static_assert(sizeof(AttachmentOpsArray) == 40, "Size check failed");
378
379 struct PackedAttribDesc final
380 {
381 uint8_t format;
382 uint8_t divisor;
383
384 // Desktop drivers support
385 uint16_t offset : kAttributeOffsetMaxBits;
386
387 uint16_t compressed : 1;
388 };
389
390 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
391 static_assert(kPackedAttribDescSize == 4, "Size mismatch");
392
393 struct PackedVertexInputAttributes final
394 {
395 PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
396
397 // Component type of the corresponding input in the program. Used to adjust the format if
398 // necessary. Takes values from gl::ComponentType.
399 uint32_t shaderAttribComponentType;
400
401 // Although technically stride can be any value in ES 2.0, in practice supporting stride
402 // greater than MAX_USHORT should not be that helpful. Note that stride limits are
403 // introduced in ES 3.1.
404 // Dynamic in VK_EXT_extended_dynamic_state
405 uint16_t strides[gl::MAX_VERTEX_ATTRIBS];
406 };
407
408 constexpr size_t kPackedVertexInputAttributesSize = sizeof(PackedVertexInputAttributes);
409 static_assert(kPackedVertexInputAttributesSize == 100, "Size mismatch");
410
411 struct PackedInputAssemblyState final
412 {
413 struct
414 {
415 uint32_t topology : 4;
416
417 // Dynamic in VK_EXT_extended_dynamic_state2
418 uint32_t primitiveRestartEnable : 1; // ds2
419
420 // Whether dynamic state for vertex stride from VK_EXT_extended_dynamic_state can be used
421 // for. Used by GraphicsPipelineDesc::hash() to exclude |vertexStrides| from the hash
422 uint32_t useVertexInputBindingStrideDynamicState : 1;
423
424 // Whether the pipeline is robust (vertex input copy)
425 uint32_t isRobustContext : 1;
426 // Whether the pipeline needs access to protected content (vertex input copy)
427 uint32_t isProtectedContext : 1;
428
429 // Which attributes are actually active in the program and should affect the pipeline.
430 uint32_t programActiveAttributeLocations : gl::MAX_VERTEX_ATTRIBS;
431
432 uint32_t padding : 24 - gl::MAX_VERTEX_ATTRIBS;
433 } bits;
434 };
435
436 constexpr size_t kPackedInputAssemblyStateSize = sizeof(PackedInputAssemblyState);
437 static_assert(kPackedInputAssemblyStateSize == 4, "Size mismatch");
438
439 struct PackedStencilOpState final
440 {
441 uint8_t fail : 4;
442 uint8_t pass : 4;
443 uint8_t depthFail : 4;
444 uint8_t compare : 4;
445 };
446
447 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
448 static_assert(kPackedStencilOpSize == 2, "Size check failed");
449
450 struct PackedPreRasterizationAndFragmentStates final
451 {
452 struct
453 {
454 // Affecting VkPipelineViewportStateCreateInfo
455 uint32_t viewportNegativeOneToOne : 1;
456
457 // Affecting VkPipelineRasterizationStateCreateInfo
458 uint32_t depthClampEnable : 1;
459 uint32_t polygonMode : 2;
460 // Dynamic in VK_EXT_extended_dynamic_state
461 uint32_t cullMode : 4;
462 uint32_t frontFace : 4;
463 // Dynamic in VK_EXT_extended_dynamic_state2
464 uint32_t rasterizerDiscardEnable : 1;
465 uint32_t depthBiasEnable : 1;
466
467 // Affecting VkPipelineTessellationStateCreateInfo
468 uint32_t patchVertices : 6;
469
470 // Affecting VkPipelineDepthStencilStateCreateInfo
471 uint32_t depthBoundsTest : 1;
472 // Dynamic in VK_EXT_extended_dynamic_state
473 uint32_t depthTest : 1;
474 uint32_t depthWrite : 1;
475 uint32_t stencilTest : 1;
476 uint32_t nonZeroStencilWriteMaskWorkaround : 1;
477 // Dynamic in VK_EXT_extended_dynamic_state2
478 uint32_t depthCompareOp : 4;
479
480 // Affecting specialization constants
481 uint32_t surfaceRotation : 1;
482
483 // Whether the pipeline is robust (shader stages copy)
484 uint32_t isRobustContext : 1;
485 // Whether the pipeline needs access to protected content (shader stages copy)
486 uint32_t isProtectedContext : 1;
487 } bits;
488
489 // Affecting specialization constants
490 static_assert(gl::IMPLEMENTATION_MAX_DRAW_BUFFERS <= 8,
491 "2 bits per draw buffer is needed for dither emulation");
492 uint16_t emulatedDitherControl;
493 uint16_t padding;
494
495 // Affecting VkPipelineDepthStencilStateCreateInfo
496 // Dynamic in VK_EXT_extended_dynamic_state
497 PackedStencilOpState front;
498 PackedStencilOpState back;
499 };
500
501 constexpr size_t kPackedPreRasterizationAndFragmentStatesSize =
502 sizeof(PackedPreRasterizationAndFragmentStates);
503 static_assert(kPackedPreRasterizationAndFragmentStatesSize == 12, "Size check failed");
504
505 struct PackedMultisampleAndSubpassState final
506 {
507 struct
508 {
509 // Affecting VkPipelineMultisampleStateCreateInfo
510 // Note: Only up to 16xMSAA is supported in the Vulkan backend.
511 uint16_t sampleMask;
512 // Stored as minus one so sample count 16 can fit in 4 bits.
513 uint16_t rasterizationSamplesMinusOne : 4;
514 uint16_t sampleShadingEnable : 1;
515 uint16_t alphaToCoverageEnable : 1;
516 uint16_t alphaToOneEnable : 1;
517 // The subpass index affects both the shader stages and the fragment output similarly to
518 // multisampled state, so they are grouped together.
519 // Note: Currently only 2 subpasses possible.
520 uint16_t subpass : 1;
521 // 8-bit normalized instead of float to align the struct.
522 uint16_t minSampleShading : 8;
523 } bits;
524 };
525
526 constexpr size_t kPackedMultisampleAndSubpassStateSize = sizeof(PackedMultisampleAndSubpassState);
527 static_assert(kPackedMultisampleAndSubpassStateSize == 4, "Size check failed");
528
529 struct PackedColorBlendAttachmentState final
530 {
531 uint16_t srcColorBlendFactor : 5;
532 uint16_t dstColorBlendFactor : 5;
533 uint16_t colorBlendOp : 6;
534 uint16_t srcAlphaBlendFactor : 5;
535 uint16_t dstAlphaBlendFactor : 5;
536 uint16_t alphaBlendOp : 6;
537 };
538
539 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
540 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
541
542 struct PackedColorBlendState final
543 {
544 uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
545 PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
546 };
547
548 constexpr size_t kPackedColorBlendStateSize = sizeof(PackedColorBlendState);
549 static_assert(kPackedColorBlendStateSize == 36, "Size check failed");
550
551 struct PackedBlendMaskAndLogicOpState final
552 {
553 struct
554 {
555 uint32_t blendEnableMask : 8;
556 uint32_t logicOpEnable : 1;
557 // Dynamic in VK_EXT_extended_dynamic_state2
558 uint32_t logicOp : 4;
559
560 // Whether the pipeline needs access to protected content (fragment output copy)
561 uint32_t isProtectedContext : 1;
562
563 // Output that is present in the framebuffer but is never written to in the shader. Used by
564 // GL_ANGLE_robust_fragment_shader_output which defines the behavior in this case (which is
565 // to mask these outputs)
566 uint32_t missingOutputsMask : gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
567
568 uint32_t padding : 18 - gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
569 } bits;
570 };
571
572 constexpr size_t kPackedBlendMaskAndLogicOpStateSize = sizeof(PackedBlendMaskAndLogicOpState);
573 static_assert(kPackedBlendMaskAndLogicOpStateSize == 4, "Size check failed");
574
575 // The vertex input subset of the pipeline.
576 struct PipelineVertexInputState final
577 {
578 PackedInputAssemblyState inputAssembly;
579 PackedVertexInputAttributes vertex;
580 };
581
582 // The pre-rasterization and fragment shader subsets of the pipeline. This is excluding
583 // multisampled and render pass states which are shared with fragment output.
584 struct PipelineShadersState final
585 {
586 PackedPreRasterizationAndFragmentStates shaders;
587 };
588
589 // Multisampled and render pass states.
590 struct PipelineSharedNonVertexInputState final
591 {
592 PackedMultisampleAndSubpassState multisample;
593 RenderPassDesc renderPass;
594 };
595
596 // The fragment output subset of the pipeline. This is excluding multisampled and render pass
597 // states which are shared with the shader subsets.
598 struct PipelineFragmentOutputState final
599 {
600 PackedColorBlendState blend;
601 PackedBlendMaskAndLogicOpState blendMaskAndLogic;
602 };
603
604 constexpr size_t kGraphicsPipelineVertexInputStateSize =
605 kPackedVertexInputAttributesSize + kPackedInputAssemblyStateSize;
606 constexpr size_t kGraphicsPipelineShadersStateSize = kPackedPreRasterizationAndFragmentStatesSize;
607 constexpr size_t kGraphicsPipelineSharedNonVertexInputStateSize =
608 kPackedMultisampleAndSubpassStateSize + kRenderPassDescSize;
609 constexpr size_t kGraphicsPipelineFragmentOutputStateSize =
610 kPackedColorBlendStateSize + kPackedBlendMaskAndLogicOpStateSize;
611
612 constexpr size_t kGraphicsPipelineDescSumOfSizes =
613 kGraphicsPipelineVertexInputStateSize + kGraphicsPipelineShadersStateSize +
614 kGraphicsPipelineSharedNonVertexInputStateSize + kGraphicsPipelineFragmentOutputStateSize;
615
616 // Number of dirty bits in the dirty bit set.
617 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
618 constexpr static size_t kNumGraphicsPipelineDirtyBits =
619 kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
620 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
621
622 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
623 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
624
625 GraphicsPipelineTransitionBits GetGraphicsPipelineTransitionBitsMask(GraphicsPipelineSubset subset);
626
627 // Disable padding warnings for a few helper structs that aggregate Vulkan state objects. These are
628 // not used as hash keys, they just simplify passing them around to functions.
629 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
630
631 struct GraphicsPipelineVertexInputVulkanStructs
632 {
633 VkPipelineVertexInputStateCreateInfo vertexInputState = {};
634 VkPipelineInputAssemblyStateCreateInfo inputAssemblyState = {};
635 VkPipelineVertexInputDivisorStateCreateInfoEXT divisorState = {};
636
637 // Support storage
638 gl::AttribArray<VkVertexInputBindingDescription> bindingDescs;
639 gl::AttribArray<VkVertexInputAttributeDescription> attributeDescs;
640 gl::AttribArray<VkVertexInputBindingDivisorDescriptionEXT> divisorDesc;
641 };
642
643 struct GraphicsPipelineShadersVulkanStructs
644 {
645 VkPipelineViewportStateCreateInfo viewportState = {};
646 VkPipelineRasterizationStateCreateInfo rasterState = {};
647 VkPipelineDepthStencilStateCreateInfo depthStencilState = {};
648 VkPipelineTessellationStateCreateInfo tessellationState = {};
649 VkPipelineTessellationDomainOriginStateCreateInfo domainOriginState = {};
650 VkPipelineViewportDepthClipControlCreateInfoEXT depthClipControl = {};
651 VkPipelineRasterizationLineStateCreateInfoEXT rasterLineState = {};
652 VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provokingVertexState = {};
653 VkPipelineRasterizationStateStreamCreateInfoEXT rasterStreamState = {};
654 VkSpecializationInfo specializationInfo = {};
655
656 // Support storage
657 angle::FixedVector<VkPipelineShaderStageCreateInfo, 5> shaderStages;
658 SpecializationConstantMap<VkSpecializationMapEntry> specializationEntries;
659 };
660
661 struct GraphicsPipelineSharedNonVertexInputVulkanStructs
662 {
663 VkPipelineMultisampleStateCreateInfo multisampleState = {};
664
665 // Support storage
666 uint32_t sampleMask;
667 };
668
669 struct GraphicsPipelineFragmentOutputVulkanStructs
670 {
671 VkPipelineColorBlendStateCreateInfo blendState = {};
672
673 // Support storage
674 gl::DrawBuffersArray<VkPipelineColorBlendAttachmentState> blendAttachmentState;
675 };
676
677 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
678
679 using GraphicsPipelineDynamicStateList = angle::FixedVector<VkDynamicState, 22>;
680
681 // State changes are applied through the update methods. Each update method can also have a
682 // sibling method that applies the update without marking a state transition. The non-transition
683 // update methods are used for internal shader pipelines. Not every non-transition update method
684 // is implemented yet as not every state is used in internal shaders.
685 class GraphicsPipelineDesc final
686 {
687 public:
688 // Use aligned allocation and free so we can use the alignas keyword.
689 void *operator new(std::size_t size);
690 void operator delete(void *ptr);
691
692 GraphicsPipelineDesc();
693 ~GraphicsPipelineDesc();
694 GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
695 GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
696
697 size_t hash(GraphicsPipelineSubset subset) const;
698 bool keyEqual(const GraphicsPipelineDesc &other, GraphicsPipelineSubset subset) const;
699
700 void initDefaults(const ContextVk *contextVk, GraphicsPipelineSubset subset);
701
702 // For custom comparisons.
703 template <typename T>
getPtr()704 const T *getPtr() const
705 {
706 return reinterpret_cast<const T *>(this);
707 }
708
709 VkResult initializePipeline(Context *context,
710 PipelineCacheAccess *pipelineCache,
711 GraphicsPipelineSubset subset,
712 const RenderPass &compatibleRenderPass,
713 const PipelineLayout &pipelineLayout,
714 const ShaderModuleMap &shaders,
715 const SpecializationConstants &specConsts,
716 Pipeline *pipelineOut,
717 CacheLookUpFeedback *feedbackOut) const;
718
719 // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
720 void updateVertexInput(ContextVk *contextVk,
721 GraphicsPipelineTransitionBits *transition,
722 uint32_t attribIndex,
723 GLuint stride,
724 GLuint divisor,
725 angle::FormatID format,
726 bool compressed,
727 GLuint relativeOffset);
728 void setVertexShaderComponentTypes(gl::AttributesMask activeAttribLocations,
729 gl::ComponentTypeMask componentTypeMask);
730 void updateVertexShaderComponentTypes(GraphicsPipelineTransitionBits *transition,
731 gl::AttributesMask activeAttribLocations,
732 gl::ComponentTypeMask componentTypeMask);
733
734 // Input assembly info
735 void setTopology(gl::PrimitiveMode drawMode);
736 void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
737 void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
738 bool primitiveRestartEnabled);
739
740 // Viewport states
741 void updateDepthClipControl(GraphicsPipelineTransitionBits *transition, bool negativeOneToOne);
742
743 // Raster states
744 void updatePolygonMode(GraphicsPipelineTransitionBits *transition, gl::PolygonMode polygonMode);
745 void updateCullMode(GraphicsPipelineTransitionBits *transition,
746 const gl::RasterizerState &rasterState);
747 void updateFrontFace(GraphicsPipelineTransitionBits *transition,
748 const gl::RasterizerState &rasterState,
749 bool invertFrontFace);
750 void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
751 bool rasterizerDiscardEnabled);
752
753 // Multisample states
754 uint32_t getRasterizationSamples() const;
755 void setRasterizationSamples(uint32_t rasterizationSamples);
756 void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
757 uint32_t rasterizationSamples);
758 void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
759 void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
760 void updateSampleMask(GraphicsPipelineTransitionBits *transition,
761 uint32_t maskNumber,
762 uint32_t mask);
763
764 void updateSampleShading(GraphicsPipelineTransitionBits *transition, bool enable, float value);
765
766 // RenderPass description.
getRenderPassDesc()767 const RenderPassDesc &getRenderPassDesc() const { return mSharedNonVertexInput.renderPass; }
768
769 void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
770 void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
771 const RenderPassDesc &renderPassDesc);
772 void setRenderPassSampleCount(GLint samples);
773 void setRenderPassFramebufferFetchMode(bool hasFramebufferFetch);
getRenderPassFramebufferFetchMode()774 bool getRenderPassFramebufferFetchMode() const
775 {
776 return mSharedNonVertexInput.renderPass.hasFramebufferFetch();
777 }
778
779 void setRenderPassColorAttachmentFormat(size_t colorIndexGL, angle::FormatID formatID);
780
781 // Blend states
782 void setSingleBlend(uint32_t colorIndexGL,
783 bool enabled,
784 VkBlendOp op,
785 VkBlendFactor srcFactor,
786 VkBlendFactor dstFactor);
787 void updateBlendEnabled(GraphicsPipelineTransitionBits *transition,
788 gl::DrawBufferMask blendEnabledMask);
789 void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
790 const gl::BlendStateExt &blendStateExt,
791 gl::DrawBufferMask attachmentMask);
792 void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
793 const gl::BlendStateExt &blendStateExt,
794 gl::DrawBufferMask attachmentMask);
795 void resetBlendFuncsAndEquations(GraphicsPipelineTransitionBits *transition,
796 const gl::BlendStateExt &blendStateExt,
797 gl::DrawBufferMask previousAttachmentsMask,
798 gl::DrawBufferMask newAttachmentsMask);
799 void setColorWriteMasks(gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
800 const gl::DrawBufferMask &alphaMask,
801 const gl::DrawBufferMask &enabledDrawBuffers);
802 void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
803 void updateColorWriteMasks(GraphicsPipelineTransitionBits *transition,
804 gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
805 const gl::DrawBufferMask &alphaMask,
806 const gl::DrawBufferMask &enabledDrawBuffers);
807 void updateMissingOutputsMask(GraphicsPipelineTransitionBits *transition,
808 gl::DrawBufferMask missingOutputsMask);
809
810 // Logic op
811 void updateLogicOpEnabled(GraphicsPipelineTransitionBits *transition, bool enable);
812 void updateLogicOp(GraphicsPipelineTransitionBits *transition, VkLogicOp logicOp);
813
814 // Depth/stencil states.
815 void setDepthTestEnabled(bool enabled);
816 void setDepthWriteEnabled(bool enabled);
817 void setDepthFunc(VkCompareOp op);
818 void setDepthClampEnabled(bool enabled);
819 void setStencilTestEnabled(bool enabled);
820 void setStencilFrontFuncs(VkCompareOp compareOp);
821 void setStencilBackFuncs(VkCompareOp compareOp);
822 void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
823 void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
824 void setStencilFrontWriteMask(uint8_t mask);
825 void setStencilBackWriteMask(uint8_t mask);
826 void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
827 const gl::DepthStencilState &depthStencilState,
828 const gl::Framebuffer *drawFramebuffer);
829 void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
830 const gl::DepthStencilState &depthStencilState);
831 void updateDepthClampEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
832 void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
833 const gl::DepthStencilState &depthStencilState,
834 const gl::Framebuffer *drawFramebuffer);
835 void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
836 const gl::DepthStencilState &depthStencilState,
837 const gl::Framebuffer *drawFramebuffer);
838 void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
839 const gl::DepthStencilState &depthStencilState);
840 void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
841 const gl::DepthStencilState &depthStencilState);
842 void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
843 const gl::DepthStencilState &depthStencilState);
844 void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
845 const gl::DepthStencilState &depthStencilState);
846
847 // Depth offset.
848 void updatePolygonOffsetEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
849
850 // Tessellation
851 void updatePatchVertices(GraphicsPipelineTransitionBits *transition, GLuint value);
852
853 // Subpass
854 void resetSubpass(GraphicsPipelineTransitionBits *transition);
855 void nextSubpass(GraphicsPipelineTransitionBits *transition);
856 void setSubpass(uint32_t subpass);
857 uint32_t getSubpass() const;
858
859 void updateSurfaceRotation(GraphicsPipelineTransitionBits *transition,
860 bool isRotatedAspectRatio);
getSurfaceRotation()861 bool getSurfaceRotation() const { return mShaders.shaders.bits.surfaceRotation; }
862
863 void updateEmulatedDitherControl(GraphicsPipelineTransitionBits *transition, uint16_t value);
getEmulatedDitherControl()864 uint32_t getEmulatedDitherControl() const { return mShaders.shaders.emulatedDitherControl; }
865
isLegacyDitherEnabled()866 bool isLegacyDitherEnabled() const
867 {
868 return mSharedNonVertexInput.renderPass.isLegacyDitherEnabled();
869 }
870
871 void updateNonZeroStencilWriteMaskWorkaround(GraphicsPipelineTransitionBits *transition,
872 bool enabled);
873
setSupportsDynamicStateForTest(bool supports)874 void setSupportsDynamicStateForTest(bool supports)
875 {
876 mVertexInput.inputAssembly.bits.useVertexInputBindingStrideDynamicState = supports;
877 mShaders.shaders.bits.nonZeroStencilWriteMaskWorkaround = false;
878 }
879
880 // Helpers to dump the state
getVertexInputStateForLog()881 const PipelineVertexInputState &getVertexInputStateForLog() const { return mVertexInput; }
getShadersStateForLog()882 const PipelineShadersState &getShadersStateForLog() const { return mShaders; }
getSharedNonVertexInputStateForLog()883 const PipelineSharedNonVertexInputState &getSharedNonVertexInputStateForLog() const
884 {
885 return mSharedNonVertexInput;
886 }
getFragmentOutputStateForLog()887 const PipelineFragmentOutputState &getFragmentOutputStateForLog() const
888 {
889 return mFragmentOutput;
890 }
891
892 private:
893 void updateSubpass(GraphicsPipelineTransitionBits *transition, uint32_t subpass);
894
895 const void *getPipelineSubsetMemory(GraphicsPipelineSubset subset, size_t *sizeOut) const;
896
897 void initializePipelineVertexInputState(
898 Context *context,
899 GraphicsPipelineVertexInputVulkanStructs *stateOut,
900 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
901
902 void initializePipelineShadersState(
903 Context *context,
904 const ShaderModuleMap &shaders,
905 const SpecializationConstants &specConsts,
906 GraphicsPipelineShadersVulkanStructs *stateOut,
907 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
908
909 void initializePipelineSharedNonVertexInputState(
910 Context *context,
911 GraphicsPipelineSharedNonVertexInputVulkanStructs *stateOut,
912 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
913
914 void initializePipelineFragmentOutputState(
915 Context *context,
916 GraphicsPipelineFragmentOutputVulkanStructs *stateOut,
917 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
918
919 PipelineShadersState mShaders;
920 PipelineSharedNonVertexInputState mSharedNonVertexInput;
921 PipelineFragmentOutputState mFragmentOutput;
922 PipelineVertexInputState mVertexInput;
923 };
924
925 // Verify the packed pipeline description has no gaps in the packing.
926 // This is not guaranteed by the spec, but is validated by a compile-time check.
927 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
928 // into uninitialized memory regions.
929 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
930 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
931
932 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
933 std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
934 gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
935
936 using DescriptorSetLayoutBindingVector =
937 angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
938
939 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
940 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
941 // usage. In the future we could generalize this.
942 class DescriptorSetLayoutDesc final
943 {
944 public:
945 DescriptorSetLayoutDesc();
946 ~DescriptorSetLayoutDesc();
947 DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
948 DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
949
950 size_t hash() const;
951 bool operator==(const DescriptorSetLayoutDesc &other) const;
952
953 void update(uint32_t bindingIndex,
954 VkDescriptorType descriptorType,
955 uint32_t count,
956 VkShaderStageFlags stages,
957 const Sampler *immutableSampler);
958
959 void unpackBindings(DescriptorSetLayoutBindingVector *bindings,
960 std::vector<VkSampler> *immutableSamplers) const;
961
empty()962 bool empty() const { return *this == DescriptorSetLayoutDesc(); }
963
964 private:
965 // There is a small risk of an issue if the sampler cache is evicted but not the descriptor
966 // cache we would have an invalid handle here. Thus propose follow-up work:
967 // TODO: https://issuetracker.google.com/issues/159156775: Have immutable sampler use serial
968 struct PackedDescriptorSetBinding
969 {
970 uint8_t type; // Stores a packed VkDescriptorType descriptorType.
971 uint8_t stages; // Stores a packed VkShaderStageFlags.
972 uint16_t count; // Stores a packed uint32_t descriptorCount.
973 uint32_t pad;
974 VkSampler immutableSampler;
975 };
976
977 // 4x 32bit
978 static_assert(sizeof(PackedDescriptorSetBinding) == 16, "Unexpected size");
979
980 // This is a compact representation of a descriptor set layout.
981 std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
982 mPackedDescriptorSetLayout;
983 };
984
985 // The following are for caching descriptor set layouts. Limited to max three descriptor set
986 // layouts. This can be extended in the future.
987 constexpr size_t kMaxDescriptorSetLayouts = 3;
988
989 struct PackedPushConstantRange
990 {
991 uint8_t offset;
992 uint8_t size;
993 uint16_t stageMask;
994 };
995
996 static_assert(sizeof(PackedPushConstantRange) == sizeof(uint32_t), "Unexpected Size");
997
998 template <typename T>
999 using DescriptorSetArray = angle::PackedEnumMap<DescriptorSetIndex, T>;
1000 using DescriptorSetLayoutPointerArray = DescriptorSetArray<BindingPointer<DescriptorSetLayout>>;
1001
1002 class PipelineLayoutDesc final
1003 {
1004 public:
1005 PipelineLayoutDesc();
1006 ~PipelineLayoutDesc();
1007 PipelineLayoutDesc(const PipelineLayoutDesc &other);
1008 PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
1009
1010 size_t hash() const;
1011 bool operator==(const PipelineLayoutDesc &other) const;
1012
1013 void updateDescriptorSetLayout(DescriptorSetIndex setIndex,
1014 const DescriptorSetLayoutDesc &desc);
1015 void updatePushConstantRange(VkShaderStageFlags stageMask, uint32_t offset, uint32_t size);
1016
getPushConstantRange()1017 const PackedPushConstantRange &getPushConstantRange() const { return mPushConstantRange; }
1018
1019 private:
1020 DescriptorSetArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
1021 PackedPushConstantRange mPushConstantRange;
1022 ANGLE_MAYBE_UNUSED_PRIVATE_FIELD uint32_t mPadding;
1023
1024 // Verify the arrays are properly packed.
1025 static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
1026 (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
1027 "Unexpected size");
1028 };
1029
1030 // Verify the structure is properly packed.
1031 static_assert(sizeof(PipelineLayoutDesc) == sizeof(DescriptorSetArray<DescriptorSetLayoutDesc>) +
1032 sizeof(PackedPushConstantRange) + sizeof(uint32_t),
1033 "Unexpected Size");
1034
1035 class YcbcrConversionDesc final
1036 {
1037 public:
1038 YcbcrConversionDesc();
1039 ~YcbcrConversionDesc();
1040 YcbcrConversionDesc(const YcbcrConversionDesc &other);
1041 YcbcrConversionDesc &operator=(const YcbcrConversionDesc &other);
1042
1043 size_t hash() const;
1044 bool operator==(const YcbcrConversionDesc &other) const;
1045
valid()1046 bool valid() const { return mExternalOrVkFormat != 0; }
1047 void reset();
1048 void update(RendererVk *rendererVk,
1049 uint64_t externalFormat,
1050 VkSamplerYcbcrModelConversion conversionModel,
1051 VkSamplerYcbcrRange colorRange,
1052 VkChromaLocation xChromaOffset,
1053 VkChromaLocation yChromaOffset,
1054 VkFilter chromaFilter,
1055 VkComponentMapping components,
1056 angle::FormatID intendedFormatID);
getChromaFilter()1057 VkFilter getChromaFilter() const { return static_cast<VkFilter>(mChromaFilter); }
1058 bool updateChromaFilter(RendererVk *rendererVk, VkFilter filter);
1059 void updateConversionModel(VkSamplerYcbcrModelConversion conversionModel);
getExternalFormat()1060 uint64_t getExternalFormat() const { return mIsExternalFormat ? mExternalOrVkFormat : 0; }
1061
1062 angle::Result init(Context *context, SamplerYcbcrConversion *conversionOut) const;
1063
1064 private:
1065 // If the sampler needs to convert the image content (e.g. from YUV to RGB) then
1066 // mExternalOrVkFormat will be non-zero. The value is either the external format
1067 // as returned by vkGetAndroidHardwareBufferPropertiesANDROID or a YUV VkFormat.
1068 // For VkSamplerYcbcrConversion, mExternalOrVkFormat along with mIsExternalFormat,
1069 // mConversionModel and mColorRange works as a Serial() used elsewhere in ANGLE.
1070 uint64_t mExternalOrVkFormat;
1071 // 1 bit to identify if external format is used
1072 uint32_t mIsExternalFormat : 1;
1073 // 3 bits to identify conversion model
1074 uint32_t mConversionModel : 3;
1075 // 1 bit to identify color component range
1076 uint32_t mColorRange : 1;
1077 // 1 bit to identify x chroma location
1078 uint32_t mXChromaOffset : 1;
1079 // 1 bit to identify y chroma location
1080 uint32_t mYChromaOffset : 1;
1081 // 1 bit to identify chroma filtering
1082 uint32_t mChromaFilter : 1;
1083 // 3 bit to identify R component swizzle
1084 uint32_t mRSwizzle : 3;
1085 // 3 bit to identify G component swizzle
1086 uint32_t mGSwizzle : 3;
1087 // 3 bit to identify B component swizzle
1088 uint32_t mBSwizzle : 3;
1089 // 3 bit to identify A component swizzle
1090 uint32_t mASwizzle : 3;
1091 uint32_t mPadding : 12;
1092 uint32_t mReserved;
1093 };
1094
1095 static_assert(sizeof(YcbcrConversionDesc) == 16, "Unexpected YcbcrConversionDesc size");
1096
1097 // Packed sampler description for the sampler cache.
1098 class SamplerDesc final
1099 {
1100 public:
1101 SamplerDesc();
1102 SamplerDesc(ContextVk *contextVk,
1103 const gl::SamplerState &samplerState,
1104 bool stencilMode,
1105 const YcbcrConversionDesc *ycbcrConversionDesc,
1106 angle::FormatID intendedFormatID);
1107 ~SamplerDesc();
1108
1109 SamplerDesc(const SamplerDesc &other);
1110 SamplerDesc &operator=(const SamplerDesc &rhs);
1111
1112 void update(ContextVk *contextVk,
1113 const gl::SamplerState &samplerState,
1114 bool stencilMode,
1115 const YcbcrConversionDesc *ycbcrConversionDesc,
1116 angle::FormatID intendedFormatID);
1117 void reset();
1118 angle::Result init(ContextVk *contextVk, Sampler *sampler) const;
1119
1120 size_t hash() const;
1121 bool operator==(const SamplerDesc &other) const;
1122
1123 private:
1124 // 32*4 bits for floating point data.
1125 // Note: anisotropy enabled is implicitly determined by maxAnisotropy and caps.
1126 float mMipLodBias;
1127 float mMaxAnisotropy;
1128 float mMinLod;
1129 float mMaxLod;
1130
1131 // 16*8 bits to uniquely identify a YCbCr conversion sampler.
1132 YcbcrConversionDesc mYcbcrConversionDesc;
1133
1134 // 16 bits for modes + states.
1135 // 1 bit per filter (only 2 possible values in GL: linear/nearest)
1136 uint16_t mMagFilter : 1;
1137 uint16_t mMinFilter : 1;
1138 uint16_t mMipmapMode : 1;
1139
1140 // 3 bits per address mode (5 possible values)
1141 uint16_t mAddressModeU : 3;
1142 uint16_t mAddressModeV : 3;
1143 uint16_t mAddressModeW : 3;
1144
1145 // 1 bit for compare enabled (2 possible values)
1146 uint16_t mCompareEnabled : 1;
1147
1148 // 3 bits for compare op. (8 possible values)
1149 uint16_t mCompareOp : 3;
1150
1151 // Values from angle::ColorGeneric::Type. Float is 0 and others are 1.
1152 uint16_t mBorderColorType : 1;
1153
1154 uint16_t mPadding : 15;
1155
1156 // 16*8 bits for BorderColor
1157 angle::ColorF mBorderColor;
1158
1159 // 32 bits reserved for future use.
1160 uint32_t mReserved;
1161 };
1162
1163 static_assert(sizeof(SamplerDesc) == 56, "Unexpected SamplerDesc size");
1164
1165 // Disable warnings about struct padding.
1166 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1167
1168 class PipelineHelper;
1169
1170 struct GraphicsPipelineTransition
1171 {
1172 GraphicsPipelineTransition();
1173 GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
1174 GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
1175 const GraphicsPipelineDesc *desc,
1176 PipelineHelper *pipeline);
1177
1178 GraphicsPipelineTransitionBits bits;
1179 const GraphicsPipelineDesc *desc;
1180 PipelineHelper *target;
1181 };
1182
1183 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
1184
1185 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1186 const GraphicsPipelineTransition &other) = default;
1187
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)1188 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1189 GraphicsPipelineTransitionBits bits,
1190 const GraphicsPipelineDesc *desc,
1191 PipelineHelper *pipeline)
1192 : bits(bits), desc(desc), target(pipeline)
1193 {}
1194
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)1195 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
1196 GraphicsPipelineTransitionBits bitsB,
1197 const GraphicsPipelineDesc &descA,
1198 const GraphicsPipelineDesc &descB)
1199 {
1200 if (bitsA != bitsB)
1201 return false;
1202
1203 // We currently mask over 4 bytes of the pipeline description with each dirty bit.
1204 // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
1205 // of the code faster. The for loop below would scan over twice as many bits per iteration.
1206 // But there may be more collisions between the same dirty bit masks leading to different
1207 // transitions. Thus there may be additional cost when applications use many transitions.
1208 // We should revisit this in the future and investigate using different bit widths.
1209 static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
1210
1211 const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
1212 const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
1213
1214 for (size_t dirtyBit : bitsA)
1215 {
1216 if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
1217 return false;
1218 }
1219
1220 return true;
1221 }
1222
1223 // A class that encapsulates the vk::PipelineCache and associated mutex. The mutex may be nullptr
1224 // if synchronization is not necessary.
1225 class PipelineCacheAccess
1226 {
1227 public:
1228 PipelineCacheAccess() = default;
1229 ~PipelineCacheAccess() = default;
1230
init(const vk::PipelineCache * pipelineCache,std::mutex * mutex)1231 void init(const vk::PipelineCache *pipelineCache, std::mutex *mutex)
1232 {
1233 mPipelineCache = pipelineCache;
1234 mMutex = mutex;
1235 }
1236
1237 VkResult createGraphicsPipeline(vk::Context *context,
1238 const VkGraphicsPipelineCreateInfo &createInfo,
1239 vk::Pipeline *pipelineOut);
1240 VkResult createComputePipeline(vk::Context *context,
1241 const VkComputePipelineCreateInfo &createInfo,
1242 vk::Pipeline *pipelineOut);
1243
1244 void merge(RendererVk *renderer, const vk::PipelineCache &pipelineCache);
1245
isThreadSafe()1246 bool isThreadSafe() const { return mMutex != nullptr; }
1247
1248 private:
1249 std::unique_lock<std::mutex> getLock();
1250
1251 const vk::PipelineCache *mPipelineCache = nullptr;
1252 std::mutex *mMutex;
1253 };
1254
1255 // Monolithic pipeline creation tasks are created as soon as a pipeline is created out of libraries.
1256 // However, they are not immediately posted to the worker queue to allow pacing. One each use of a
1257 // pipeline, an attempt is made to post the task.
1258 class CreateMonolithicPipelineTask : public Context, public angle::Closure
1259 {
1260 public:
1261 CreateMonolithicPipelineTask(RendererVk *renderer,
1262 const PipelineCacheAccess &pipelineCache,
1263 const PipelineLayout &pipelineLayout,
1264 const ShaderModuleMap &shaders,
1265 const SpecializationConstants &specConsts,
1266 const GraphicsPipelineDesc &desc);
1267
1268 // The compatible render pass is set only when the task is ready to run. This is because the
1269 // render pass cache may have been cleared since the task was created (e.g. to accomodate
1270 // framebuffer fetch). Such render pass cache clears ensure there are no active tasks, so it's
1271 // safe to hold on to this pointer for the brief period between task post and completion.
getRenderPassDesc()1272 const RenderPassDesc &getRenderPassDesc() const { return mDesc.getRenderPassDesc(); }
1273 void setCompatibleRenderPass(const RenderPass *compatibleRenderPass);
1274
1275 void operator()() override;
1276
getResult()1277 VkResult getResult() const { return mResult; }
getPipeline()1278 Pipeline &getPipeline() { return mPipeline; }
getFeedback()1279 CacheLookUpFeedback getFeedback() const { return mFeedback; }
1280
1281 void handleError(VkResult result,
1282 const char *file,
1283 const char *function,
1284 unsigned int line) override;
1285
1286 private:
1287 // Input to pipeline creation
1288 PipelineCacheAccess mPipelineCache;
1289 const RenderPass *mCompatibleRenderPass;
1290 const PipelineLayout &mPipelineLayout;
1291 const ShaderModuleMap &mShaders;
1292 SpecializationConstants mSpecConsts;
1293 GraphicsPipelineDesc mDesc;
1294
1295 // Results
1296 VkResult mResult;
1297 Pipeline mPipeline;
1298 CacheLookUpFeedback mFeedback;
1299 };
1300
1301 class WaitableMonolithicPipelineCreationTask
1302 {
1303 public:
1304 ~WaitableMonolithicPipelineCreationTask();
1305
setTask(std::shared_ptr<CreateMonolithicPipelineTask> && task)1306 void setTask(std::shared_ptr<CreateMonolithicPipelineTask> &&task) { mTask = std::move(task); }
setRenderPass(const RenderPass * compatibleRenderPass)1307 void setRenderPass(const RenderPass *compatibleRenderPass)
1308 {
1309 mTask->setCompatibleRenderPass(compatibleRenderPass);
1310 }
onSchedule(const std::shared_ptr<angle::WaitableEvent> & waitableEvent)1311 void onSchedule(const std::shared_ptr<angle::WaitableEvent> &waitableEvent)
1312 {
1313 mWaitableEvent = waitableEvent;
1314 }
reset()1315 void reset()
1316 {
1317 mWaitableEvent.reset();
1318 mTask.reset();
1319 }
1320
isValid()1321 bool isValid() const { return mTask.get() != nullptr; }
isPosted()1322 bool isPosted() const { return mWaitableEvent.get() != nullptr; }
isReady()1323 bool isReady() { return mWaitableEvent->isReady(); }
wait()1324 void wait() { return mWaitableEvent->wait(); }
1325
getTask()1326 std::shared_ptr<CreateMonolithicPipelineTask> getTask() const { return mTask; }
1327
1328 private:
1329 std::shared_ptr<angle::WaitableEvent> mWaitableEvent;
1330 std::shared_ptr<CreateMonolithicPipelineTask> mTask;
1331 };
1332
1333 class PipelineHelper final : public Resource
1334 {
1335 public:
1336 PipelineHelper();
1337 ~PipelineHelper() override;
1338 inline explicit PipelineHelper(Pipeline &&pipeline, CacheLookUpFeedback feedback);
1339
1340 void destroy(VkDevice device);
1341 void release(ContextVk *contextVk);
1342
valid()1343 bool valid() const { return mPipeline.valid(); }
getPipeline()1344 const Pipeline &getPipeline() const { return mPipeline; }
1345
1346 // Get the pipeline. If there is a monolithic pipeline creation task pending, scheduling it is
1347 // attempted. If that task is done, the pipeline is replaced with the results and the old
1348 // pipeline released.
1349 angle::Result getPreferredPipeline(ContextVk *contextVk, const Pipeline **pipelineOut);
1350
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)1351 ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
1352 const GraphicsPipelineDesc &desc,
1353 PipelineHelper **pipelineOut) const
1354 {
1355 // Search could be improved using sorting or hashing.
1356 for (const GraphicsPipelineTransition &transition : mTransitions)
1357 {
1358 if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
1359 {
1360 *pipelineOut = transition.target;
1361 return true;
1362 }
1363 }
1364
1365 return false;
1366 }
1367
1368 void addTransition(GraphicsPipelineTransitionBits bits,
1369 const GraphicsPipelineDesc *desc,
1370 PipelineHelper *pipeline);
1371
getTransitions()1372 const std::vector<GraphicsPipelineTransition> getTransitions() const { return mTransitions; }
1373
setComputePipeline(Pipeline && pipeline,CacheLookUpFeedback feedback)1374 void setComputePipeline(Pipeline &&pipeline, CacheLookUpFeedback feedback)
1375 {
1376 ASSERT(!mPipeline.valid());
1377 mPipeline = std::move(pipeline);
1378
1379 ASSERT(mCacheLookUpFeedback == CacheLookUpFeedback::None);
1380 mCacheLookUpFeedback = feedback;
1381 }
getCacheLookUpFeedback()1382 CacheLookUpFeedback getCacheLookUpFeedback() const { return mCacheLookUpFeedback; }
1383
1384 void setLinkedLibraryReferences(vk::PipelineHelper *shadersPipeline);
1385
1386 void retainInRenderPass(RenderPassCommandBufferHelper *renderPassCommands);
1387
setMonolithicPipelineCreationTask(std::shared_ptr<CreateMonolithicPipelineTask> && task)1388 void setMonolithicPipelineCreationTask(std::shared_ptr<CreateMonolithicPipelineTask> &&task)
1389 {
1390 mMonolithicPipelineCreationTask.setTask(std::move(task));
1391 }
1392
1393 private:
1394 void reset();
1395
1396 std::vector<GraphicsPipelineTransition> mTransitions;
1397 Pipeline mPipeline;
1398 CacheLookUpFeedback mCacheLookUpFeedback = CacheLookUpFeedback::None;
1399 CacheLookUpFeedback mMonolithicCacheLookUpFeedback = CacheLookUpFeedback::None;
1400
1401 // The list of pipeline helpers that were referenced when creating a linked pipeline. These
1402 // pipelines must be kept alive, so their serial is updated at the same time as this object.
1403 // Not necessary for vertex input and fragment output as they stay alive until context's
1404 // destruction.
1405 PipelineHelper *mLinkedShaders = nullptr;
1406
1407 // If pipeline libraries are used and monolithic pipelines are created in parallel, this is the
1408 // temporary library created (previously in |mPipeline|) that is now replaced by the monolithic
1409 // one. It is not immediately garbage collected when replaced, because there is currently a bug
1410 // with that. http://anglebug.com/7862
1411 Pipeline mLinkedPipelineToRelease;
1412
1413 // An async task to create a monolithic pipeline. Only used if the pipeline was originally
1414 // created as a linked library. The |getPipeline()| call will attempt to schedule this task
1415 // through the share group, which manages and paces these tasks. Once the task results are
1416 // ready, |mPipeline| is released and replaced by the result of this task.
1417 WaitableMonolithicPipelineCreationTask mMonolithicPipelineCreationTask;
1418 };
1419
1420 class FramebufferHelper : public Resource
1421 {
1422 public:
1423 FramebufferHelper();
1424 ~FramebufferHelper() override;
1425
1426 FramebufferHelper(FramebufferHelper &&other);
1427 FramebufferHelper &operator=(FramebufferHelper &&other);
1428
1429 angle::Result init(ContextVk *contextVk, const VkFramebufferCreateInfo &createInfo);
1430 void destroy(RendererVk *rendererVk);
1431 void release(ContextVk *contextVk);
1432
valid()1433 bool valid() { return mFramebuffer.valid(); }
1434
getFramebuffer()1435 const Framebuffer &getFramebuffer() const
1436 {
1437 ASSERT(mFramebuffer.valid());
1438 return mFramebuffer;
1439 }
1440
getFramebuffer()1441 Framebuffer &getFramebuffer()
1442 {
1443 ASSERT(mFramebuffer.valid());
1444 return mFramebuffer;
1445 }
1446
1447 private:
1448 // Vulkan object.
1449 Framebuffer mFramebuffer;
1450 };
1451
PipelineHelper(Pipeline && pipeline,CacheLookUpFeedback feedback)1452 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline, CacheLookUpFeedback feedback)
1453 : mPipeline(std::move(pipeline)), mCacheLookUpFeedback(feedback)
1454 {}
1455
1456 struct ImageSubresourceRange
1457 {
1458 // GL max is 1000 (fits in 10 bits).
1459 uint32_t level : 10;
1460 // Max 31 levels (2 ** 5 - 1). Can store levelCount-1 if we need to save another bit.
1461 uint32_t levelCount : 5;
1462 // Implementation max is 2048 (11 bits).
1463 uint32_t layer : 12;
1464 // One of vk::LayerMode values. If 0, it means all layers. Otherwise it's the count of layers
1465 // which is usually 1, except for multiview in which case it can be up to
1466 // gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS.
1467 uint32_t layerMode : 3;
1468 // Values from vk::SrgbDecodeMode. Unused with draw views.
1469 uint32_t srgbDecodeMode : 1;
1470 // For read views: Values from gl::SrgbOverride, either Default or SRGB.
1471 // For draw views: Values from gl::SrgbWriteControlMode.
1472 uint32_t srgbMode : 1;
1473
1474 static_assert(gl::IMPLEMENTATION_MAX_TEXTURE_LEVELS < (1 << 5),
1475 "Not enough bits for level count");
1476 static_assert(gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS <= (1 << 12),
1477 "Not enough bits for layer index");
1478 static_assert(gl::IMPLEMENTATION_ANGLE_MULTIVIEW_MAX_VIEWS <= (1 << 3),
1479 "Not enough bits for layer count");
1480 };
1481
1482 static_assert(sizeof(ImageSubresourceRange) == sizeof(uint32_t), "Size mismatch");
1483
1484 inline bool operator==(const ImageSubresourceRange &a, const ImageSubresourceRange &b)
1485 {
1486 return a.level == b.level && a.levelCount == b.levelCount && a.layer == b.layer &&
1487 a.layerMode == b.layerMode && a.srgbDecodeMode == b.srgbDecodeMode &&
1488 a.srgbMode == b.srgbMode;
1489 }
1490
1491 constexpr ImageSubresourceRange kInvalidImageSubresourceRange = {0, 0, 0, 0, 0, 0};
1492
1493 struct ImageOrBufferViewSubresourceSerial
1494 {
1495 ImageOrBufferViewSerial viewSerial;
1496 ImageSubresourceRange subresource;
1497 };
1498
1499 inline bool operator==(const ImageOrBufferViewSubresourceSerial &a,
1500 const ImageOrBufferViewSubresourceSerial &b)
1501 {
1502 return a.viewSerial == b.viewSerial && a.subresource == b.subresource;
1503 }
1504
1505 constexpr ImageOrBufferViewSubresourceSerial kInvalidImageOrBufferViewSubresourceSerial = {
1506 kInvalidImageOrBufferViewSerial, kInvalidImageSubresourceRange};
1507
1508 // Always starts with array element zero, with descriptorCount descriptors.
1509 struct WriteDescriptorDesc
1510 {
1511 uint8_t binding; // Redundant: determined by the containing WriteDesc array.
1512 uint8_t descriptorCount; // Number of array elements in this descriptor write.
1513 uint8_t descriptorType; // Packed VkDescriptorType.
1514 uint8_t descriptorInfoIndex; // Base index into an array of DescriptorInfoDescs.
1515 };
1516
1517 static_assert(sizeof(WriteDescriptorDesc) == 4, "Size mismatch");
1518
1519 struct DescriptorInfoDesc
1520 {
1521 uint32_t samplerOrBufferSerial;
1522 uint32_t imageViewSerialOrOffset;
1523 uint32_t imageLayoutOrRange; // Packed VkImageLayout
1524 uint32_t imageSubresourceRange;
1525 uint32_t binding; // TODO(anglebug.com/7974): Could be made implicit?
1526 };
1527
1528 static_assert(sizeof(DescriptorInfoDesc) == 20, "Size mismatch");
1529
1530 // Generic description of a descriptor set. Used as a key when indexing descriptor set caches. The
1531 // key storage is an angle:FixedVector. Beyond a certain fixed size we'll end up using heap memory
1532 // to store keys. Currently we specialize the structure for three use cases: uniforms, textures,
1533 // and other shader resources. Because of the way the specialization works we can't currently cache
1534 // programs that use some types of resources.
1535 static constexpr size_t kFastDescriptorSetDescLimit = 8;
1536
1537 struct DescriptorDescHandles
1538 {
1539 VkBuffer buffer;
1540 VkSampler sampler;
1541 VkImageView imageView;
1542 VkBufferView bufferView;
1543 };
1544
1545 class WriteDescriptorDescs
1546 {
1547 public:
reset()1548 void reset()
1549 {
1550 mDescs.clear();
1551 mDynamicDescriptorSetCount = 0;
1552 mCurrentInfoIndex = 0;
1553 }
1554
1555 void updateShaderBuffers(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1556 const std::vector<gl::InterfaceBlock> &blocks,
1557 VkDescriptorType descriptorType);
1558
1559 void updateAtomicCounters(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1560 const std::vector<gl::AtomicCounterBuffer> &atomicCounterBuffers);
1561
1562 void updateImages(const gl::ProgramExecutable &executable,
1563 const ShaderInterfaceVariableInfoMap &variableInfoMap);
1564
1565 void updateInputAttachments(const gl::ProgramExecutable &executable,
1566 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1567 FramebufferVk *framebufferVk);
1568
1569 void updateExecutableActiveTextures(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1570 const gl::ProgramExecutable &executable);
1571
1572 void updateDefaultUniform(gl::ShaderBitSet shaderTypes,
1573 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1574 const gl::ProgramExecutable &executable);
1575
1576 void updateTransformFeedbackWrite(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1577 const gl::ProgramExecutable &executable);
1578
1579 void updateDynamicDescriptorsCount();
1580
size()1581 size_t size() const { return mDescs.size(); }
empty()1582 bool empty() const { return mDescs.size() == 0; }
1583
1584 const WriteDescriptorDesc &operator[](uint32_t bindingIndex) const
1585 {
1586 return mDescs[bindingIndex];
1587 }
1588
getTotalDescriptorCount()1589 size_t getTotalDescriptorCount() const { return mCurrentInfoIndex; }
getDynamicDescriptorSetCount()1590 size_t getDynamicDescriptorSetCount() const { return mDynamicDescriptorSetCount; }
1591
1592 void streamOut(std::ostream &os) const;
1593
1594 private:
hasWriteDescAtIndex(uint32_t bindingIndex)1595 bool hasWriteDescAtIndex(uint32_t bindingIndex) const
1596 {
1597 return bindingIndex < mDescs.size() && mDescs[bindingIndex].descriptorCount > 0;
1598 }
1599
incrementDescriptorCount(uint32_t bindingIndex,uint32_t count)1600 void incrementDescriptorCount(uint32_t bindingIndex, uint32_t count)
1601 {
1602 // Validate we have no subsequent writes.
1603 ASSERT(hasWriteDescAtIndex(bindingIndex));
1604 mDescs[bindingIndex].descriptorCount += count;
1605 }
1606
1607 void updateWriteDesc(uint32_t bindingIndex,
1608 VkDescriptorType descriptorType,
1609 uint32_t descriptorCount);
1610
1611 // After a preliminary minimum size, use heap memory.
1612 angle::FastMap<WriteDescriptorDesc, kFastDescriptorSetDescLimit> mDescs;
1613 size_t mDynamicDescriptorSetCount = 0;
1614 uint32_t mCurrentInfoIndex = 0;
1615 };
1616
1617 class DescriptorSetDesc
1618 {
1619 public:
1620 DescriptorSetDesc() = default;
1621 ~DescriptorSetDesc() = default;
1622
DescriptorSetDesc(const DescriptorSetDesc & other)1623 DescriptorSetDesc(const DescriptorSetDesc &other) : mDescriptorInfos(other.mDescriptorInfos) {}
1624
1625 DescriptorSetDesc &operator=(const DescriptorSetDesc &other)
1626 {
1627 mDescriptorInfos = other.mDescriptorInfos;
1628 return *this;
1629 }
1630
1631 size_t hash() const;
1632
resize(size_t count)1633 void resize(size_t count) { mDescriptorInfos.resize(count); }
1634
getKeySizeBytes()1635 size_t getKeySizeBytes() const { return mDescriptorInfos.size() * sizeof(DescriptorInfoDesc); }
1636
1637 bool operator==(const DescriptorSetDesc &other) const
1638 {
1639 return mDescriptorInfos.size() == other.mDescriptorInfos.size() &&
1640 memcmp(mDescriptorInfos.data(), other.mDescriptorInfos.data(),
1641 mDescriptorInfos.size() * sizeof(DescriptorInfoDesc)) == 0;
1642 }
1643
getInfoDesc(uint32_t infoDescIndex)1644 DescriptorInfoDesc &getInfoDesc(uint32_t infoDescIndex)
1645 {
1646 return mDescriptorInfos[infoDescIndex];
1647 }
1648
1649 void updateDescriptorSet(Context *context,
1650 const WriteDescriptorDescs &writeDescriptorDescs,
1651 UpdateDescriptorSetsBuilder *updateBuilder,
1652 const DescriptorDescHandles *handles,
1653 VkDescriptorSet descriptorSet) const;
1654
1655 void streamOut(std::ostream &os) const;
1656
1657 private:
1658 // After a preliminary minimum size, use heap memory.
1659 angle::FastVector<DescriptorInfoDesc, kFastDescriptorSetDescLimit> mDescriptorInfos;
1660 };
1661
1662 class DescriptorPoolHelper;
1663 using RefCountedDescriptorPoolHelper = RefCounted<DescriptorPoolHelper>;
1664
1665 // SharedDescriptorSetCacheKey.
1666 // Because DescriptorSet must associate with a pool, we need to define a structure that wraps both.
1667 struct DescriptorSetDescAndPool
1668 {
1669 DescriptorSetDesc mDesc;
1670 DynamicDescriptorPool *mPool;
1671 };
1672 using DescriptorSetAndPoolPointer = std::unique_ptr<DescriptorSetDescAndPool>;
1673 using SharedDescriptorSetCacheKey = std::shared_ptr<DescriptorSetAndPoolPointer>;
1674 ANGLE_INLINE const SharedDescriptorSetCacheKey
CreateSharedDescriptorSetCacheKey(const DescriptorSetDesc & desc,DynamicDescriptorPool * pool)1675 CreateSharedDescriptorSetCacheKey(const DescriptorSetDesc &desc, DynamicDescriptorPool *pool)
1676 {
1677 DescriptorSetAndPoolPointer DescriptorAndPoolPointer =
1678 std::make_unique<DescriptorSetDescAndPool>(DescriptorSetDescAndPool{desc, pool});
1679 return std::make_shared<DescriptorSetAndPoolPointer>(std::move(DescriptorAndPoolPointer));
1680 }
1681
1682 constexpr VkDescriptorType kStorageBufferDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1683
1684 // Manages a descriptor set desc with a few helper routines and also stores object handles.
1685 class DescriptorSetDescBuilder final
1686 {
1687 public:
1688 DescriptorSetDescBuilder();
1689 DescriptorSetDescBuilder(size_t descriptorCount);
1690 ~DescriptorSetDescBuilder();
1691
1692 DescriptorSetDescBuilder(const DescriptorSetDescBuilder &other);
1693 DescriptorSetDescBuilder &operator=(const DescriptorSetDescBuilder &other);
1694
getDesc()1695 const DescriptorSetDesc &getDesc() const { return mDesc; }
1696
resize(size_t descriptorCount)1697 void resize(size_t descriptorCount)
1698 {
1699 mDesc.resize(descriptorCount);
1700 mHandles.resize(descriptorCount);
1701 mDynamicOffsets.resize(descriptorCount);
1702 }
1703
1704 // Specific helpers for uniforms/xfb descriptors.
1705 void updateUniformBuffer(uint32_t shaderIndex,
1706 const WriteDescriptorDescs &writeDescriptorDescs,
1707 const BufferHelper &bufferHelper,
1708 VkDeviceSize bufferRange);
1709
1710 void updateTransformFeedbackBuffer(const Context *context,
1711 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1712 const WriteDescriptorDescs &writeDescriptorDescs,
1713 uint32_t xfbBufferIndex,
1714 const BufferHelper &bufferHelper,
1715 VkDeviceSize bufferOffset,
1716 VkDeviceSize bufferRange);
1717
1718 void updateUniformsAndXfb(Context *context,
1719 const gl::ProgramExecutable &executable,
1720 const ProgramExecutableVk &executableVk,
1721 const WriteDescriptorDescs &writeDescriptorDescs,
1722 const BufferHelper *currentUniformBuffer,
1723 const BufferHelper &emptyBuffer,
1724 bool activeUnpaused,
1725 TransformFeedbackVk *transformFeedbackVk);
1726
1727 // Specific helpers for shader resource descriptors.
1728 template <typename CommandBufferT>
1729 void updateOneShaderBuffer(ContextVk *contextVk,
1730 CommandBufferT *commandBufferHelper,
1731 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1732 const gl::BufferVector &buffers,
1733 const std::vector<gl::InterfaceBlock> &blocks,
1734 uint32_t blockIndex,
1735 VkDescriptorType descriptorType,
1736 VkDeviceSize maxBoundBufferRange,
1737 const BufferHelper &emptyBuffer,
1738 const WriteDescriptorDescs &writeDescriptorDescs);
1739 template <typename CommandBufferT>
1740 void updateShaderBuffers(ContextVk *contextVk,
1741 CommandBufferT *commandBufferHelper,
1742 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1743 const gl::BufferVector &buffers,
1744 const std::vector<gl::InterfaceBlock> &blocks,
1745 VkDescriptorType descriptorType,
1746 VkDeviceSize maxBoundBufferRange,
1747 const BufferHelper &emptyBuffer,
1748 const WriteDescriptorDescs &writeDescriptorDescs);
1749 template <typename CommandBufferT>
1750 void updateAtomicCounters(ContextVk *contextVk,
1751 CommandBufferT *commandBufferHelper,
1752 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1753 const gl::BufferVector &buffers,
1754 const std::vector<gl::AtomicCounterBuffer> &atomicCounterBuffers,
1755 const VkDeviceSize requiredOffsetAlignment,
1756 const BufferHelper &emptyBuffer,
1757 const WriteDescriptorDescs &writeDescriptorDescs);
1758 angle::Result updateImages(Context *context,
1759 const gl::ProgramExecutable &executable,
1760 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1761 const gl::ActiveTextureArray<TextureVk *> &activeImages,
1762 const std::vector<gl::ImageUnit> &imageUnits,
1763 const WriteDescriptorDescs &writeDescriptorDescs);
1764 angle::Result updateInputAttachments(vk::Context *context,
1765 const gl::ProgramExecutable &executable,
1766 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1767 FramebufferVk *framebufferVk,
1768 const WriteDescriptorDescs &writeDescriptorDescs);
1769
1770 // Specific helpers for image descriptors.
1771 void updatePreCacheActiveTextures(const gl::ActiveTextureMask &activeTextures,
1772 const gl::ActiveTextureArray<TextureVk *> &textures,
1773 const gl::SamplerBindingVector &samplers);
1774
1775 angle::Result updateFullActiveTextures(Context *context,
1776 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1777 const WriteDescriptorDescs &writeDescriptorDescs,
1778 const gl::ProgramExecutable &executable,
1779 const gl::ActiveTextureArray<TextureVk *> &textures,
1780 const gl::SamplerBindingVector &samplers,
1781 bool emulateSeamfulCubeMapSampling,
1782 PipelineType pipelineType,
1783 const SharedDescriptorSetCacheKey &sharedCacheKey);
1784
1785 void updateDescriptorSet(Context *context,
1786 const WriteDescriptorDescs &writeDescriptorDescs,
1787 UpdateDescriptorSetsBuilder *updateBuilder,
1788 VkDescriptorSet descriptorSet) const;
1789
getDynamicOffsets()1790 const uint32_t *getDynamicOffsets() const { return mDynamicOffsets.data(); }
getDynamicOffsetsSize()1791 size_t getDynamicOffsetsSize() const { return mDynamicOffsets.size(); }
1792
1793 private:
1794 void setEmptyBuffer(uint32_t infoDescIndex,
1795 VkDescriptorType descriptorType,
1796 const BufferHelper &emptyBuffer);
1797
1798 DescriptorSetDesc mDesc;
1799 angle::FastVector<DescriptorDescHandles, kFastDescriptorSetDescLimit> mHandles;
1800 angle::FastVector<uint32_t, kFastDescriptorSetDescLimit> mDynamicOffsets;
1801 };
1802
1803 // Specialized update for textures.
1804 void UpdatePreCacheActiveTextures(const gl::ProgramExecutable &executable,
1805 const ProgramExecutableVk &executableVk,
1806 const std::vector<gl::SamplerBinding> &samplerBindings,
1807 const gl::ActiveTextureMask &activeTextures,
1808 const gl::ActiveTextureArray<TextureVk *> &textures,
1809 const gl::SamplerBindingVector &samplers,
1810 DescriptorSetDesc *desc);
1811
1812 // In the FramebufferDesc object:
1813 // - Depth/stencil serial is at index 0
1814 // - Color serials are at indices [1, gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]
1815 // - Depth/stencil resolve attachment is at index gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+1
1816 // - Resolve attachments are at indices [gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+2,
1817 // gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1]
1818 constexpr size_t kFramebufferDescDepthStencilIndex = 0;
1819 constexpr size_t kFramebufferDescColorIndexOffset = kFramebufferDescDepthStencilIndex + 1;
1820 constexpr size_t kFramebufferDescDepthStencilResolveIndexOffset =
1821 kFramebufferDescColorIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
1822 constexpr size_t kFramebufferDescColorResolveIndexOffset =
1823 kFramebufferDescDepthStencilResolveIndexOffset + 1;
1824
1825 // Enable struct padding warnings for the code below since it is used in caches.
1826 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
1827
1828 class FramebufferDesc
1829 {
1830 public:
1831 FramebufferDesc();
1832 ~FramebufferDesc();
1833
1834 FramebufferDesc(const FramebufferDesc &other);
1835 FramebufferDesc &operator=(const FramebufferDesc &other);
1836
1837 void updateColor(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1838 void updateColorResolve(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1839 void updateUnresolveMask(FramebufferNonResolveAttachmentMask unresolveMask);
1840 void updateDepthStencil(ImageOrBufferViewSubresourceSerial serial);
1841 void updateDepthStencilResolve(ImageOrBufferViewSubresourceSerial serial);
setWriteControlMode(gl::SrgbWriteControlMode mode)1842 ANGLE_INLINE void setWriteControlMode(gl::SrgbWriteControlMode mode)
1843 {
1844 mSrgbWriteControlMode = static_cast<uint16_t>(mode);
1845 }
updateIsMultiview(bool isMultiview)1846 void updateIsMultiview(bool isMultiview) { mIsMultiview = isMultiview; }
1847 size_t hash() const;
1848
1849 bool operator==(const FramebufferDesc &other) const;
1850
1851 uint32_t attachmentCount() const;
1852
getColorImageViewSerial(uint32_t index)1853 ImageOrBufferViewSubresourceSerial getColorImageViewSerial(uint32_t index)
1854 {
1855 ASSERT(kFramebufferDescColorIndexOffset + index < mSerials.size());
1856 return mSerials[kFramebufferDescColorIndexOffset + index];
1857 }
1858
1859 FramebufferNonResolveAttachmentMask getUnresolveAttachmentMask() const;
getWriteControlMode()1860 ANGLE_INLINE gl::SrgbWriteControlMode getWriteControlMode() const
1861 {
1862 return (mSrgbWriteControlMode == 1) ? gl::SrgbWriteControlMode::Linear
1863 : gl::SrgbWriteControlMode::Default;
1864 }
1865
1866 void updateLayerCount(uint32_t layerCount);
getLayerCount()1867 uint32_t getLayerCount() const { return mLayerCount; }
1868 void setFramebufferFetchMode(bool hasFramebufferFetch);
hasFramebufferFetch()1869 bool hasFramebufferFetch() const { return mHasFramebufferFetch; }
1870
isMultiview()1871 bool isMultiview() const { return mIsMultiview; }
1872
1873 void updateRenderToTexture(bool isRenderToTexture);
1874
1875 private:
1876 void reset();
1877 void update(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
1878
1879 // Note: this is an exclusive index. If there is one index it will be "1".
1880 // Maximum value is 18
1881 uint16_t mMaxIndex : 5;
1882 uint16_t mHasFramebufferFetch : 1;
1883 static_assert(gl::IMPLEMENTATION_MAX_FRAMEBUFFER_LAYERS < (1 << 9) - 1,
1884 "Not enough bits for mLayerCount");
1885
1886 uint16_t mLayerCount : 9;
1887
1888 uint16_t mSrgbWriteControlMode : 1;
1889
1890 // If the render pass contains an initial subpass to unresolve a number of attachments, the
1891 // subpass description is derived from the following mask, specifying which attachments need
1892 // to be unresolved. Includes both color and depth/stencil attachments.
1893 uint16_t mUnresolveAttachmentMask : kMaxFramebufferNonResolveAttachments;
1894
1895 // Whether this is a multisampled-render-to-single-sampled framebuffer. Only used when using
1896 // VK_EXT_multisampled_render_to_single_sampled. Only one bit is used and the rest is padding.
1897 uint16_t mIsRenderToTexture : 15 - kMaxFramebufferNonResolveAttachments;
1898
1899 uint16_t mIsMultiview : 1;
1900
1901 FramebufferAttachmentArray<ImageOrBufferViewSubresourceSerial> mSerials;
1902 };
1903
1904 constexpr size_t kFramebufferDescSize = sizeof(FramebufferDesc);
1905 static_assert(kFramebufferDescSize == 148, "Size check failed");
1906
1907 // Disable warnings about struct padding.
1908 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1909
1910 // SharedFramebufferCacheKey
1911 using FramebufferDescPointer = std::unique_ptr<FramebufferDesc>;
1912 using SharedFramebufferCacheKey = std::shared_ptr<FramebufferDescPointer>;
1913 ANGLE_INLINE const SharedFramebufferCacheKey
CreateSharedFramebufferCacheKey(const FramebufferDesc & desc)1914 CreateSharedFramebufferCacheKey(const FramebufferDesc &desc)
1915 {
1916 FramebufferDescPointer framebufferDescPointer = std::make_unique<FramebufferDesc>(desc);
1917 return std::make_shared<FramebufferDescPointer>(std::move(framebufferDescPointer));
1918 }
1919
1920 // The SamplerHelper allows a Sampler to be coupled with a serial.
1921 // Must be included before we declare SamplerCache.
1922 class SamplerHelper final : angle::NonCopyable
1923 {
1924 public:
1925 SamplerHelper(ContextVk *contextVk);
1926 ~SamplerHelper();
1927
1928 explicit SamplerHelper(SamplerHelper &&samplerHelper);
1929 SamplerHelper &operator=(SamplerHelper &&rhs);
1930
valid()1931 bool valid() const { return mSampler.valid(); }
get()1932 const Sampler &get() const { return mSampler; }
get()1933 Sampler &get() { return mSampler; }
getSamplerSerial()1934 SamplerSerial getSamplerSerial() const { return mSamplerSerial; }
1935
1936 private:
1937 Sampler mSampler;
1938 SamplerSerial mSamplerSerial;
1939 };
1940
1941 using RefCountedSampler = RefCounted<SamplerHelper>;
1942 using SamplerBinding = BindingPointer<SamplerHelper>;
1943
1944 class RenderPassHelper final : angle::NonCopyable
1945 {
1946 public:
1947 RenderPassHelper();
1948 ~RenderPassHelper();
1949
1950 RenderPassHelper(RenderPassHelper &&other);
1951 RenderPassHelper &operator=(RenderPassHelper &&other);
1952
1953 void destroy(VkDevice device);
1954 void release(ContextVk *contextVk);
1955
1956 const RenderPass &getRenderPass() const;
1957 RenderPass &getRenderPass();
1958
1959 const RenderPassPerfCounters &getPerfCounters() const;
1960 RenderPassPerfCounters &getPerfCounters();
1961
1962 private:
1963 RenderPass mRenderPass;
1964 RenderPassPerfCounters mPerfCounters;
1965 };
1966
1967 // Helper class manages the lifetime of various cache objects so that the cache entry can be
1968 // destroyed when one of the components becomes invalid.
1969 template <class SharedCacheKeyT>
1970 class SharedCacheKeyManager
1971 {
1972 public:
1973 SharedCacheKeyManager() = default;
~SharedCacheKeyManager()1974 ~SharedCacheKeyManager() { ASSERT(empty()); }
1975 // Store the pointer to the cache key and retains it
1976 void addKey(const SharedCacheKeyT &key);
1977 // Iterate over the descriptor array and release the descriptor and cache.
1978 void releaseKeys(ContextVk *contextVk);
1979 // Iterate over the descriptor array and destroy the descriptor and cache.
1980 void destroyKeys(RendererVk *renderer);
1981 void clear();
1982
1983 // The following APIs are expected to be used for assertion only
1984 bool containsKey(const SharedCacheKeyT &key) const;
empty()1985 bool empty() const { return mSharedCacheKeys.empty(); }
1986 void assertAllEntriesDestroyed();
1987
1988 private:
1989 // Tracks an array of cache keys with refcounting. Note this owns one refcount of
1990 // SharedCacheKeyT object.
1991 std::vector<SharedCacheKeyT> mSharedCacheKeys;
1992 };
1993
1994 using FramebufferCacheManager = SharedCacheKeyManager<SharedFramebufferCacheKey>;
1995 using DescriptorSetCacheManager = SharedCacheKeyManager<SharedDescriptorSetCacheKey>;
1996 } // namespace vk
1997 } // namespace rx
1998
1999 // Introduce std::hash for the above classes.
2000 namespace std
2001 {
2002 template <>
2003 struct hash<rx::vk::RenderPassDesc>
2004 {
2005 size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
2006 };
2007
2008 template <>
2009 struct hash<rx::vk::AttachmentOpsArray>
2010 {
2011 size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
2012 };
2013
2014 template <>
2015 struct hash<rx::vk::DescriptorSetLayoutDesc>
2016 {
2017 size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
2018 };
2019
2020 template <>
2021 struct hash<rx::vk::PipelineLayoutDesc>
2022 {
2023 size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
2024 };
2025
2026 template <>
2027 struct hash<rx::vk::ImageSubresourceRange>
2028 {
2029 size_t operator()(const rx::vk::ImageSubresourceRange &key) const
2030 {
2031 return *reinterpret_cast<const uint32_t *>(&key);
2032 }
2033 };
2034
2035 template <>
2036 struct hash<rx::vk::DescriptorSetDesc>
2037 {
2038 size_t operator()(const rx::vk::DescriptorSetDesc &key) const { return key.hash(); }
2039 };
2040
2041 template <>
2042 struct hash<rx::vk::FramebufferDesc>
2043 {
2044 size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
2045 };
2046
2047 template <>
2048 struct hash<rx::vk::YcbcrConversionDesc>
2049 {
2050 size_t operator()(const rx::vk::YcbcrConversionDesc &key) const { return key.hash(); }
2051 };
2052
2053 template <>
2054 struct hash<rx::vk::SamplerDesc>
2055 {
2056 size_t operator()(const rx::vk::SamplerDesc &key) const { return key.hash(); }
2057 };
2058
2059 // See Resource Serial types defined in vk_utils.h.
2060 #define ANGLE_HASH_VK_SERIAL(Type) \
2061 template <> \
2062 struct hash<rx::vk::Type##Serial> \
2063 { \
2064 size_t operator()(const rx::vk::Type##Serial &key) const \
2065 { \
2066 return key.getValue(); \
2067 } \
2068 };
2069
2070 ANGLE_VK_SERIAL_OP(ANGLE_HASH_VK_SERIAL)
2071
2072 } // namespace std
2073
2074 namespace rx
2075 {
2076 // Cache types for various Vulkan objects
2077 enum class VulkanCacheType
2078 {
2079 CompatibleRenderPass,
2080 RenderPassWithOps,
2081 GraphicsPipeline,
2082 PipelineLayout,
2083 Sampler,
2084 SamplerYcbcrConversion,
2085 DescriptorSetLayout,
2086 DriverUniformsDescriptors,
2087 TextureDescriptors,
2088 UniformsAndXfbDescriptors,
2089 ShaderResourcesDescriptors,
2090 Framebuffer,
2091 DescriptorMetaCache,
2092 EnumCount
2093 };
2094
2095 // Base class for all caches. Provides cache hit and miss counters.
2096 class CacheStats final : angle::NonCopyable
2097 {
2098 public:
2099 CacheStats() { reset(); }
2100 ~CacheStats() {}
2101
2102 CacheStats(const CacheStats &rhs)
2103 : mHitCount(rhs.mHitCount), mMissCount(rhs.mMissCount), mSize(rhs.mSize)
2104 {}
2105
2106 CacheStats &operator=(const CacheStats &rhs)
2107 {
2108 mHitCount = rhs.mHitCount;
2109 mMissCount = rhs.mMissCount;
2110 mSize = rhs.mSize;
2111 return *this;
2112 }
2113
2114 ANGLE_INLINE void hit() { mHitCount++; }
2115 ANGLE_INLINE void miss() { mMissCount++; }
2116 ANGLE_INLINE void incrementSize() { mSize++; }
2117 ANGLE_INLINE void decrementSize() { mSize--; }
2118 ANGLE_INLINE void missAndIncrementSize()
2119 {
2120 mMissCount++;
2121 mSize++;
2122 }
2123 ANGLE_INLINE void accumulate(const CacheStats &stats)
2124 {
2125 mHitCount += stats.mHitCount;
2126 mMissCount += stats.mMissCount;
2127 mSize += stats.mSize;
2128 }
2129
2130 uint32_t getHitCount() const { return mHitCount; }
2131 uint32_t getMissCount() const { return mMissCount; }
2132
2133 ANGLE_INLINE double getHitRatio() const
2134 {
2135 if (mHitCount + mMissCount == 0)
2136 {
2137 return 0;
2138 }
2139 else
2140 {
2141 return static_cast<double>(mHitCount) / (mHitCount + mMissCount);
2142 }
2143 }
2144
2145 ANGLE_INLINE uint32_t getSize() const { return mSize; }
2146 ANGLE_INLINE void setSize(uint32_t size) { mSize = size; }
2147
2148 void reset()
2149 {
2150 mHitCount = 0;
2151 mMissCount = 0;
2152 mSize = 0;
2153 }
2154
2155 void resetHitAndMissCount()
2156 {
2157 mHitCount = 0;
2158 mMissCount = 0;
2159 }
2160
2161 void accumulateCacheStats(VulkanCacheType cacheType, const CacheStats &cacheStats)
2162 {
2163 mHitCount += cacheStats.getHitCount();
2164 mMissCount += cacheStats.getMissCount();
2165 }
2166
2167 private:
2168 uint32_t mHitCount;
2169 uint32_t mMissCount;
2170 uint32_t mSize;
2171 };
2172
2173 template <VulkanCacheType CacheType>
2174 class HasCacheStats : angle::NonCopyable
2175 {
2176 public:
2177 template <typename Accumulator>
2178 void accumulateCacheStats(Accumulator *accum)
2179 {
2180 accum->accumulateCacheStats(CacheType, mCacheStats);
2181 mCacheStats.reset();
2182 }
2183
2184 void getCacheStats(CacheStats *accum) const { accum->accumulate(mCacheStats); }
2185
2186 protected:
2187 HasCacheStats() = default;
2188 virtual ~HasCacheStats() = default;
2189
2190 CacheStats mCacheStats;
2191 };
2192
2193 using VulkanCacheStats = angle::PackedEnumMap<VulkanCacheType, CacheStats>;
2194
2195 // FramebufferVk Cache
2196 class FramebufferCache final : angle::NonCopyable
2197 {
2198 public:
2199 FramebufferCache() = default;
2200 ~FramebufferCache() { ASSERT(mPayload.empty()); }
2201
2202 void destroy(RendererVk *rendererVk);
2203
2204 bool get(ContextVk *contextVk, const vk::FramebufferDesc &desc, vk::Framebuffer &framebuffer);
2205 void insert(ContextVk *contextVk,
2206 const vk::FramebufferDesc &desc,
2207 vk::FramebufferHelper &&framebufferHelper);
2208 void erase(ContextVk *contextVk, const vk::FramebufferDesc &desc);
2209
2210 size_t getSize() const { return mPayload.size(); }
2211 bool empty() const { return mPayload.empty(); }
2212
2213 private:
2214 angle::HashMap<vk::FramebufferDesc, vk::FramebufferHelper> mPayload;
2215 CacheStats mCacheStats;
2216 };
2217
2218 // TODO(jmadill): Add cache trimming/eviction.
2219 class RenderPassCache final : angle::NonCopyable
2220 {
2221 public:
2222 RenderPassCache();
2223 ~RenderPassCache();
2224
2225 void destroy(ContextVk *contextVk);
2226 void clear(ContextVk *contextVk);
2227
2228 ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
2229 const vk::RenderPassDesc &desc,
2230 const vk::RenderPass **renderPassOut)
2231 {
2232 auto outerIt = mPayload.find(desc);
2233 if (outerIt != mPayload.end())
2234 {
2235 InnerCache &innerCache = outerIt->second;
2236 ASSERT(!innerCache.empty());
2237
2238 // Find the first element and return it.
2239 *renderPassOut = &innerCache.begin()->second.getRenderPass();
2240 mCompatibleRenderPassCacheStats.hit();
2241 return angle::Result::Continue;
2242 }
2243
2244 mCompatibleRenderPassCacheStats.missAndIncrementSize();
2245 return addCompatibleRenderPass(contextVk, desc, renderPassOut);
2246 }
2247
2248 angle::Result getRenderPassWithOps(ContextVk *contextVk,
2249 const vk::RenderPassDesc &desc,
2250 const vk::AttachmentOpsArray &attachmentOps,
2251 const vk::RenderPass **renderPassOut);
2252
2253 private:
2254 angle::Result getRenderPassWithOpsImpl(ContextVk *contextVk,
2255 const vk::RenderPassDesc &desc,
2256 const vk::AttachmentOpsArray &attachmentOps,
2257 bool updatePerfCounters,
2258 const vk::RenderPass **renderPassOut);
2259
2260 angle::Result addCompatibleRenderPass(ContextVk *contextVk,
2261 const vk::RenderPassDesc &desc,
2262 const vk::RenderPass **renderPassOut);
2263
2264 // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
2265 // The second layer caches the attachment load/store ops and initial/final layout.
2266 // Switch to `std::unordered_map` to retain pointer stability.
2267 using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassHelper>;
2268 using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
2269
2270 OuterCache mPayload;
2271 CacheStats mCompatibleRenderPassCacheStats;
2272 CacheStats mRenderPassWithOpsCacheStats;
2273 };
2274
2275 enum class PipelineSource
2276 {
2277 // Pipeline created when warming up the program's pipeline cache
2278 WarmUp,
2279 // Monolithic pipeline created at draw time
2280 Draw,
2281 // Pipeline created at draw time by linking partial pipeline libraries
2282 DrawLinked,
2283 // Pipeline created for UtilsVk
2284 Utils,
2285 };
2286
2287 struct GraphicsPipelineDescCompleteHash
2288 {
2289 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2290 {
2291 return key.hash(vk::GraphicsPipelineSubset::Complete);
2292 }
2293 };
2294 struct GraphicsPipelineDescVertexInputHash
2295 {
2296 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2297 {
2298 return key.hash(vk::GraphicsPipelineSubset::VertexInput);
2299 }
2300 };
2301 struct GraphicsPipelineDescShadersHash
2302 {
2303 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2304 {
2305 return key.hash(vk::GraphicsPipelineSubset::Shaders);
2306 }
2307 };
2308 struct GraphicsPipelineDescFragmentOutputHash
2309 {
2310 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2311 {
2312 return key.hash(vk::GraphicsPipelineSubset::FragmentOutput);
2313 }
2314 };
2315
2316 struct GraphicsPipelineDescCompleteKeyEqual
2317 {
2318 size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2319 const rx::vk::GraphicsPipelineDesc &second) const
2320 {
2321 return first.keyEqual(second, vk::GraphicsPipelineSubset::Complete);
2322 }
2323 };
2324 struct GraphicsPipelineDescVertexInputKeyEqual
2325 {
2326 size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2327 const rx::vk::GraphicsPipelineDesc &second) const
2328 {
2329 return first.keyEqual(second, vk::GraphicsPipelineSubset::VertexInput);
2330 }
2331 };
2332 struct GraphicsPipelineDescShadersKeyEqual
2333 {
2334 size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2335 const rx::vk::GraphicsPipelineDesc &second) const
2336 {
2337 return first.keyEqual(second, vk::GraphicsPipelineSubset::Shaders);
2338 }
2339 };
2340 struct GraphicsPipelineDescFragmentOutputKeyEqual
2341 {
2342 size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2343 const rx::vk::GraphicsPipelineDesc &second) const
2344 {
2345 return first.keyEqual(second, vk::GraphicsPipelineSubset::FragmentOutput);
2346 }
2347 };
2348
2349 // Derive the KeyEqual and GraphicsPipelineSubset enum from the Hash struct
2350 template <typename Hash>
2351 struct GraphicsPipelineCacheTypeHelper
2352 {
2353 using KeyEqual = GraphicsPipelineDescCompleteKeyEqual;
2354 static constexpr vk::GraphicsPipelineSubset kSubset = vk::GraphicsPipelineSubset::Complete;
2355 };
2356
2357 template <>
2358 struct GraphicsPipelineCacheTypeHelper<GraphicsPipelineDescVertexInputHash>
2359 {
2360 using KeyEqual = GraphicsPipelineDescVertexInputKeyEqual;
2361 static constexpr vk::GraphicsPipelineSubset kSubset = vk::GraphicsPipelineSubset::VertexInput;
2362 };
2363 template <>
2364 struct GraphicsPipelineCacheTypeHelper<GraphicsPipelineDescShadersHash>
2365 {
2366 using KeyEqual = GraphicsPipelineDescShadersKeyEqual;
2367 static constexpr vk::GraphicsPipelineSubset kSubset = vk::GraphicsPipelineSubset::Shaders;
2368 };
2369 template <>
2370 struct GraphicsPipelineCacheTypeHelper<GraphicsPipelineDescFragmentOutputHash>
2371 {
2372 using KeyEqual = GraphicsPipelineDescFragmentOutputKeyEqual;
2373 static constexpr vk::GraphicsPipelineSubset kSubset =
2374 vk::GraphicsPipelineSubset::FragmentOutput;
2375 };
2376
2377 // TODO(jmadill): Add cache trimming/eviction.
2378 template <typename Hash>
2379 class GraphicsPipelineCache final : public HasCacheStats<VulkanCacheType::GraphicsPipeline>
2380 {
2381 public:
2382 GraphicsPipelineCache() = default;
2383 ~GraphicsPipelineCache() override { ASSERT(mPayload.empty()); }
2384
2385 void destroy(ContextVk *contextVk);
2386 void release(ContextVk *contextVk);
2387
2388 void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
2389
2390 // Get a pipeline from the cache, if it exists
2391 ANGLE_INLINE bool getPipeline(const vk::GraphicsPipelineDesc &desc,
2392 const vk::GraphicsPipelineDesc **descPtrOut,
2393 vk::PipelineHelper **pipelineOut)
2394 {
2395 auto item = mPayload.find(desc);
2396 if (item == mPayload.end())
2397 {
2398 return false;
2399 }
2400
2401 *descPtrOut = &item->first;
2402 *pipelineOut = &item->second;
2403
2404 mCacheStats.hit();
2405
2406 return true;
2407 }
2408
2409 angle::Result createPipeline(ContextVk *contextVk,
2410 vk::PipelineCacheAccess *pipelineCache,
2411 const vk::RenderPass &compatibleRenderPass,
2412 const vk::PipelineLayout &pipelineLayout,
2413 const vk::ShaderModuleMap &shaders,
2414 const vk::SpecializationConstants &specConsts,
2415 PipelineSource source,
2416 const vk::GraphicsPipelineDesc &desc,
2417 const vk::GraphicsPipelineDesc **descPtrOut,
2418 vk::PipelineHelper **pipelineOut);
2419
2420 angle::Result linkLibraries(ContextVk *contextVk,
2421 vk::PipelineCacheAccess *pipelineCache,
2422 const vk::GraphicsPipelineDesc &desc,
2423 const vk::PipelineLayout &pipelineLayout,
2424 vk::PipelineHelper *vertexInputPipeline,
2425 vk::PipelineHelper *shadersPipeline,
2426 vk::PipelineHelper *fragmentOutputPipeline,
2427 const vk::GraphicsPipelineDesc **descPtrOut,
2428 vk::PipelineHelper **pipelineOut);
2429
2430 // Helper for VulkanPipelineCachePerf that resets the object without destroying any object.
2431 void reset() { mPayload.clear(); }
2432
2433 private:
2434 void addToCache(PipelineSource source,
2435 const vk::GraphicsPipelineDesc &desc,
2436 vk::Pipeline &&pipeline,
2437 vk::CacheLookUpFeedback feedback,
2438 const vk::GraphicsPipelineDesc **descPtrOut,
2439 vk::PipelineHelper **pipelineOut);
2440
2441 using KeyEqual = typename GraphicsPipelineCacheTypeHelper<Hash>::KeyEqual;
2442 std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper, Hash, KeyEqual> mPayload;
2443 };
2444
2445 using CompleteGraphicsPipelineCache = GraphicsPipelineCache<GraphicsPipelineDescCompleteHash>;
2446 using VertexInputGraphicsPipelineCache = GraphicsPipelineCache<GraphicsPipelineDescVertexInputHash>;
2447 using ShadersGraphicsPipelineCache = GraphicsPipelineCache<GraphicsPipelineDescShadersHash>;
2448 using FragmentOutputGraphicsPipelineCache =
2449 GraphicsPipelineCache<GraphicsPipelineDescFragmentOutputHash>;
2450
2451 class DescriptorSetLayoutCache final : angle::NonCopyable
2452 {
2453 public:
2454 DescriptorSetLayoutCache();
2455 ~DescriptorSetLayoutCache();
2456
2457 void destroy(RendererVk *rendererVk);
2458
2459 angle::Result getDescriptorSetLayout(
2460 vk::Context *context,
2461 const vk::DescriptorSetLayoutDesc &desc,
2462 vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
2463
2464 private:
2465 std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
2466 CacheStats mCacheStats;
2467 };
2468
2469 class PipelineLayoutCache final : public HasCacheStats<VulkanCacheType::PipelineLayout>
2470 {
2471 public:
2472 PipelineLayoutCache();
2473 ~PipelineLayoutCache() override;
2474
2475 void destroy(RendererVk *rendererVk);
2476
2477 angle::Result getPipelineLayout(vk::Context *context,
2478 const vk::PipelineLayoutDesc &desc,
2479 const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
2480 vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
2481
2482 private:
2483 std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
2484 };
2485
2486 class SamplerCache final : public HasCacheStats<VulkanCacheType::Sampler>
2487 {
2488 public:
2489 SamplerCache();
2490 ~SamplerCache() override;
2491
2492 void destroy(RendererVk *rendererVk);
2493
2494 angle::Result getSampler(ContextVk *contextVk,
2495 const vk::SamplerDesc &desc,
2496 vk::SamplerBinding *samplerOut);
2497
2498 private:
2499 std::unordered_map<vk::SamplerDesc, vk::RefCountedSampler> mPayload;
2500 };
2501
2502 // YuvConversion Cache
2503 class SamplerYcbcrConversionCache final
2504 : public HasCacheStats<VulkanCacheType::SamplerYcbcrConversion>
2505 {
2506 public:
2507 SamplerYcbcrConversionCache();
2508 ~SamplerYcbcrConversionCache() override;
2509
2510 void destroy(RendererVk *rendererVk);
2511
2512 angle::Result getSamplerYcbcrConversion(vk::Context *context,
2513 const vk::YcbcrConversionDesc &ycbcrConversionDesc,
2514 VkSamplerYcbcrConversion *vkSamplerYcbcrConversionOut);
2515
2516 private:
2517 using SamplerYcbcrConversionMap =
2518 std::unordered_map<vk::YcbcrConversionDesc, vk::SamplerYcbcrConversion>;
2519 SamplerYcbcrConversionMap mExternalFormatPayload;
2520 SamplerYcbcrConversionMap mVkFormatPayload;
2521 };
2522
2523 // Descriptor Set Cache
2524 class DescriptorSetCache final : angle::NonCopyable
2525 {
2526 public:
2527 DescriptorSetCache() = default;
2528 ~DescriptorSetCache() { ASSERT(mPayload.empty()); }
2529
2530 DescriptorSetCache(DescriptorSetCache &&other) : DescriptorSetCache()
2531 {
2532 *this = std::move(other);
2533 }
2534
2535 DescriptorSetCache &operator=(DescriptorSetCache &&other)
2536 {
2537 std::swap(mPayload, other.mPayload);
2538 return *this;
2539 }
2540
2541 void resetCache() { mPayload.clear(); }
2542
2543 ANGLE_INLINE bool getDescriptorSet(const vk::DescriptorSetDesc &desc,
2544 VkDescriptorSet *descriptorSetOut,
2545 vk::RefCountedDescriptorPoolHelper **poolOut)
2546 {
2547 auto iter = mPayload.find(desc);
2548 if (iter != mPayload.end())
2549 {
2550 *descriptorSetOut = iter->second->getDescriptorSet();
2551 *poolOut = iter->second->getPool();
2552 return true;
2553 }
2554 return false;
2555 }
2556
2557 ANGLE_INLINE void insertDescriptorSet(const vk::DescriptorSetDesc &desc,
2558 VkDescriptorSet descriptorSet,
2559 vk::RefCountedDescriptorPoolHelper *pool)
2560 {
2561 mPayload.emplace(desc, std::make_unique<dsCacheEntry>(descriptorSet, pool));
2562 }
2563
2564 ANGLE_INLINE void eraseDescriptorSet(const vk::DescriptorSetDesc &desc)
2565 {
2566 mPayload.erase(desc);
2567 }
2568
2569 ANGLE_INLINE size_t getTotalCacheSize() const { return mPayload.size(); }
2570
2571 size_t getTotalCacheKeySizeBytes() const
2572 {
2573 size_t totalSize = 0;
2574 for (const auto &iter : mPayload)
2575 {
2576 const vk::DescriptorSetDesc &desc = iter.first;
2577 totalSize += desc.getKeySizeBytes();
2578 }
2579 return totalSize;
2580 }
2581
2582 bool empty() const { return mPayload.empty(); }
2583
2584 private:
2585 class dsCacheEntry
2586 {
2587 public:
2588 dsCacheEntry(VkDescriptorSet descriptorSet, vk::RefCountedDescriptorPoolHelper *pool)
2589 : mDescriptorSet(descriptorSet), mPool(pool)
2590 {}
2591 VkDescriptorSet getDescriptorSet() const { return mDescriptorSet; }
2592 vk::RefCountedDescriptorPoolHelper *getPool() const { return mPool; }
2593
2594 private:
2595 VkDescriptorSet mDescriptorSet;
2596 // Weak pointer to the pool this descriptorSet allocated from. The RefCount is tracking if
2597 // this pool is bound as the current pool in any ProgramExecutableVk or not, so we should
2598 // not add refcount from the cache.
2599 vk::RefCountedDescriptorPoolHelper *mPool;
2600 };
2601 angle::HashMap<vk::DescriptorSetDesc, std::unique_ptr<dsCacheEntry>> mPayload;
2602 };
2603
2604 // There is 1 default uniform binding used per stage.
2605 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
2606 } // namespace rx
2607
2608 #endif // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
2609