• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 //    Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 //    Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10 
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13 
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "libANGLE/renderer/vulkan/vk_utils.h"
17 
18 namespace rx
19 {
20 
21 namespace vk
22 {
23 class ImageHelper;
24 enum class ImageLayout;
25 
26 using RenderPassAndSerial = ObjectAndSerial<RenderPass>;
27 using PipelineAndSerial   = ObjectAndSerial<Pipeline>;
28 
29 using RefCountedDescriptorSetLayout = RefCounted<DescriptorSetLayout>;
30 using RefCountedPipelineLayout      = RefCounted<PipelineLayout>;
31 using RefCountedSampler             = RefCounted<Sampler>;
32 
33 // Helper macro that casts to a bitfield type then verifies no bits were dropped.
34 #define SetBitField(lhs, rhs)                                         \
35     lhs = static_cast<typename std::decay<decltype(lhs)>::type>(rhs); \
36     ASSERT(static_cast<decltype(rhs)>(lhs) == (rhs))
37 
38 // Packed Vk resource descriptions.
39 // Most Vk types use many more bits than required to represent the underlying data.
40 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
41 // hashing (and also needs to check equality) we can optimize these operations by
42 // using fewer bits. Hence the packed types.
43 //
44 // One implementation note: these types could potentially be improved by using even
45 // fewer bits. For example, boolean values could be represented by a single bit instead
46 // of a uint8_t. However at the current time there are concerns about the portability
47 // of bitfield operators, and complexity issues with using bit mask operations. This is
48 // something we will likely want to investigate as the Vulkan implementation progresses.
49 //
50 // Second implementation note: the struct packing is also a bit fragile, and some of the
51 // packing requirements depend on using alignas and field ordering to get the result of
52 // packing nicely into the desired space. This is something we could also potentially fix
53 // with a redesign to use bitfields or bit mask operations.
54 
55 // Enable struct padding warnings for the code below since it is used in caches.
56 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
57 
58 class alignas(4) RenderPassDesc final
59 {
60   public:
61     RenderPassDesc();
62     ~RenderPassDesc();
63     RenderPassDesc(const RenderPassDesc &other);
64     RenderPassDesc &operator=(const RenderPassDesc &other);
65 
66     // Set format for an enabled GL color attachment.
67     void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
68     // Mark a GL color attachment index as disabled.
69     void packColorAttachmentGap(size_t colorIndexGL);
70     // The caller must pack the depth/stencil attachment last, which is packed right after the color
71     // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
72     void packDepthStencilAttachment(angle::FormatID angleFormatID);
73 
74     size_t hash() const;
75 
76     // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()77     size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()78     size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
79 
80     bool isColorAttachmentEnabled(size_t colorIndexGL) const;
hasDepthStencilAttachment()81     bool hasDepthStencilAttachment() const { return mHasDepthStencilAttachment; }
82 
83     // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
84     // color attachments.
85     size_t attachmentCount() const;
86 
87     void setSamples(GLint samples);
88 
samples()89     uint8_t samples() const { return mSamples; }
90 
91     angle::FormatID operator[](size_t index) const
92     {
93         ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
94         return static_cast<angle::FormatID>(mAttachmentFormats[index]);
95     }
96 
97   private:
98     uint8_t mSamples;
99     uint8_t mColorAttachmentRange : 7;
100     uint8_t mHasDepthStencilAttachment : 1;
101     // Color attachment formats are stored with their GL attachment indices.  The depth/stencil
102     // attachment formats follow the last enabled color attachment.  When creating a render pass,
103     // the disabled attachments are removed and the resulting attachments are packed.
104     //
105     // The attachment indices provided as input to various functions in this file are thus GL
106     // attachment indices.  These indices are marked as such, e.g. colorIndexGL.  The render pass
107     // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
108     // indices marked with Vk, e.g. colorIndexVk.  The subpass attachment references create the
109     // link between the two index spaces.  The subpass declares attachment references with GL
110     // indices (which corresponds to the location decoration of shader outputs).  The attachment
111     // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
112     //
113     // For example, if GL uses color attachments 0 and 3, then there are two render pass
114     // attachments (indexed 0 and 1) and 4 subpass attachments:
115     //
116     //  - Subpass attachment 0 -> Renderpass attachment 0
117     //  - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
118     //  - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
119     //  - Subpass attachment 3 -> Renderpass attachment 1
120     //
121     gl::AttachmentArray<uint8_t> mAttachmentFormats;
122 };
123 
124 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
125 
126 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
127 static_assert(kRenderPassDescSize == 12, "Size check failed");
128 
129 struct PackedAttachmentOpsDesc final
130 {
131     // VkAttachmentLoadOp is in range [0, 2], and VkAttachmentStoreOp is in range [0, 1].
132     uint16_t loadOp : 2;
133     uint16_t storeOp : 1;
134     uint16_t stencilLoadOp : 2;
135     uint16_t stencilStoreOp : 1;
136 
137     // 5-bits to force pad the structure to exactly 2 bytes.  Note that we currently don't support
138     // any of the extension layouts, whose values start at 1'000'000'000.
139     uint16_t initialLayout : 5;
140     uint16_t finalLayout : 5;
141 };
142 
143 static_assert(sizeof(PackedAttachmentOpsDesc) == 2, "Size check failed");
144 
145 class AttachmentOpsArray final
146 {
147   public:
148     AttachmentOpsArray();
149     ~AttachmentOpsArray();
150     AttachmentOpsArray(const AttachmentOpsArray &other);
151     AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
152 
153     const PackedAttachmentOpsDesc &operator[](size_t index) const;
154     PackedAttachmentOpsDesc &operator[](size_t index);
155 
156     // Initialize an attachment op with all load and store operations.
157     void initWithLoadStore(size_t index, ImageLayout initialLayout, ImageLayout finalLayout);
158 
159     void setLayouts(size_t index, ImageLayout initialLayout, ImageLayout finalLayout);
160     void setOps(size_t index, VkAttachmentLoadOp loadOp, VkAttachmentStoreOp storeOp);
161     void setStencilOps(size_t index, VkAttachmentLoadOp loadOp, VkAttachmentStoreOp storeOp);
162 
163     size_t hash() const;
164 
165   private:
166     gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
167 };
168 
169 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
170 
171 static_assert(sizeof(AttachmentOpsArray) == 20, "Size check failed");
172 
173 struct PackedAttribDesc final
174 {
175     uint8_t format;
176     uint8_t divisor;
177 
178     // Can only take 11 bits on NV.
179     uint16_t offset;
180 
181     // Although technically stride can be any value in ES 2.0, in practice supporting stride
182     // greater than MAX_USHORT should not be that helpful. Note that stride limits are
183     // introduced in ES 3.1.
184     uint16_t stride;
185 };
186 
187 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
188 static_assert(kPackedAttribDescSize == 6, "Size mismatch");
189 
190 struct VertexInputAttributes final
191 {
192     PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
193 };
194 
195 constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
196 static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
197 
198 struct RasterizationStateBits final
199 {
200     uint32_t depthClampEnable : 4;
201     uint32_t rasterizationDiscardEnable : 4;
202     uint32_t polygonMode : 4;
203     uint32_t cullMode : 4;
204     uint32_t frontFace : 4;
205     uint32_t depthBiasEnable : 1;
206     uint32_t sampleShadingEnable : 1;
207     uint32_t alphaToCoverageEnable : 1;
208     uint32_t alphaToOneEnable : 1;
209     uint32_t rasterizationSamples : 8;
210 };
211 
212 constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
213 static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
214 
215 struct PackedRasterizationAndMultisampleStateInfo final
216 {
217     RasterizationStateBits bits;
218     // Padded to ensure there's no gaps in this structure or those that use it.
219     float minSampleShading;
220     uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
221     // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
222     float depthBiasClamp;
223     float depthBiasConstantFactor;
224     float depthBiasSlopeFactor;
225     float lineWidth;
226 };
227 
228 constexpr size_t kPackedRasterizationAndMultisampleStateSize =
229     sizeof(PackedRasterizationAndMultisampleStateInfo);
230 static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
231 
232 struct StencilOps final
233 {
234     uint8_t fail : 4;
235     uint8_t pass : 4;
236     uint8_t depthFail : 4;
237     uint8_t compare : 4;
238 };
239 
240 constexpr size_t kStencilOpsSize = sizeof(StencilOps);
241 static_assert(kStencilOpsSize == 2, "Size check failed");
242 
243 struct PackedStencilOpState final
244 {
245     StencilOps ops;
246     uint8_t compareMask;
247     uint8_t writeMask;
248 };
249 
250 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
251 static_assert(kPackedStencilOpSize == 4, "Size check failed");
252 
253 struct DepthStencilEnableFlags final
254 {
255     uint8_t depthTest : 2;  // these only need one bit each. the extra is used as padding.
256     uint8_t depthWrite : 2;
257     uint8_t depthBoundsTest : 2;
258     uint8_t stencilTest : 2;
259 };
260 
261 constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
262 static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
263 
264 struct PackedDepthStencilStateInfo final
265 {
266     DepthStencilEnableFlags enable;
267     uint8_t frontStencilReference;
268     uint8_t backStencilReference;
269     uint8_t depthCompareOp;  // only needs 4 bits. extra used as padding.
270     float minDepthBounds;
271     float maxDepthBounds;
272     PackedStencilOpState front;
273     PackedStencilOpState back;
274 };
275 
276 constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
277 static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
278 
279 struct LogicOpState final
280 {
281     uint8_t opEnable : 1;
282     uint8_t op : 7;
283 };
284 
285 constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
286 static_assert(kLogicOpStateSize == 1, "Size check failed");
287 
288 struct PackedColorBlendAttachmentState final
289 {
290     uint16_t srcColorBlendFactor : 5;
291     uint16_t dstColorBlendFactor : 5;
292     uint16_t colorBlendOp : 6;
293     uint16_t srcAlphaBlendFactor : 5;
294     uint16_t dstAlphaBlendFactor : 5;
295     uint16_t alphaBlendOp : 6;
296 };
297 
298 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
299 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
300 
301 struct PrimitiveState final
302 {
303     uint16_t topology : 15;
304     uint16_t restartEnable : 1;
305 };
306 
307 constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
308 static_assert(kPrimitiveStateSize == 2, "Size check failed");
309 
310 struct PackedInputAssemblyAndColorBlendStateInfo final
311 {
312     uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
313     PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
314     float blendConstants[4];
315     LogicOpState logic;
316     uint8_t blendEnableMask;
317     PrimitiveState primitive;
318 };
319 
320 constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
321     sizeof(PackedInputAssemblyAndColorBlendStateInfo);
322 static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
323 
324 constexpr size_t kGraphicsPipelineDescSumOfSizes =
325     kVertexInputAttributesSize + kRenderPassDescSize + kPackedRasterizationAndMultisampleStateSize +
326     kPackedDepthStencilStateSize + kPackedInputAssemblyAndColorBlendStateSize + sizeof(VkViewport) +
327     sizeof(VkRect2D);
328 
329 // Number of dirty bits in the dirty bit set.
330 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
331 constexpr static size_t kNumGraphicsPipelineDirtyBits =
332     kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
333 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
334 
335 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
336 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
337 
338 // State changes are applied through the update methods. Each update method can also have a
339 // sibling method that applies the update without marking a state transition. The non-transition
340 // update methods are used for internal shader pipelines. Not every non-transition update method
341 // is implemented yet as not every state is used in internal shaders.
342 class GraphicsPipelineDesc final
343 {
344   public:
345     // Use aligned allocation and free so we can use the alignas keyword.
346     void *operator new(std::size_t size);
347     void operator delete(void *ptr);
348 
349     GraphicsPipelineDesc();
350     ~GraphicsPipelineDesc();
351     GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
352     GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
353 
354     size_t hash() const;
355     bool operator==(const GraphicsPipelineDesc &other) const;
356 
357     void initDefaults();
358 
359     // For custom comparisons.
360     template <typename T>
getPtr()361     const T *getPtr() const
362     {
363         return reinterpret_cast<const T *>(this);
364     }
365 
366     angle::Result initializePipeline(ContextVk *contextVk,
367                                      const vk::PipelineCache &pipelineCacheVk,
368                                      const RenderPass &compatibleRenderPass,
369                                      const PipelineLayout &pipelineLayout,
370                                      const gl::AttributesMask &activeAttribLocationsMask,
371                                      const gl::ComponentTypeMask &programAttribsTypeMask,
372                                      const ShaderModule *vertexModule,
373                                      const ShaderModule *fragmentModule,
374                                      const ShaderModule *geometryModule,
375                                      vk::SpecializationConstantBitSet specConsts,
376                                      Pipeline *pipelineOut) const;
377 
378     // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
379     void updateVertexInput(GraphicsPipelineTransitionBits *transition,
380                            uint32_t attribIndex,
381                            GLuint stride,
382                            GLuint divisor,
383                            angle::FormatID format,
384                            GLuint relativeOffset);
385 
386     // Input assembly info
387     void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
388     void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
389                                        bool primitiveRestartEnabled);
390 
391     // Raster states
392     void setCullMode(VkCullModeFlagBits cullMode);
393     void updateCullMode(GraphicsPipelineTransitionBits *transition,
394                         const gl::RasterizerState &rasterState);
395     void updateFrontFace(GraphicsPipelineTransitionBits *transition,
396                          const gl::RasterizerState &rasterState,
397                          bool invertFrontFace);
398     void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
399     void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
400                                         bool rasterizerDiscardEnabled);
401 
402     // Multisample states
403     uint32_t getRasterizationSamples() const;
404     void setRasterizationSamples(uint32_t rasterizationSamples);
405     void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
406                                     uint32_t rasterizationSamples);
407     void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
408     void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
409     void updateSampleMask(GraphicsPipelineTransitionBits *transition,
410                           uint32_t maskNumber,
411                           uint32_t mask);
412 
413     // RenderPass description.
getRenderPassDesc()414     const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
415 
416     void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
417     void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
418                               const RenderPassDesc &renderPassDesc);
419 
420     // Blend states
421     void updateBlendEnabled(GraphicsPipelineTransitionBits *transition, bool isBlendEnabled);
422     void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
423     void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
424                           const gl::BlendState &blendState);
425     void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
426                               const gl::BlendState &blendState);
427     void setColorWriteMask(VkColorComponentFlags colorComponentFlags,
428                            const gl::DrawBufferMask &alphaMask);
429     void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
430     void updateColorWriteMask(GraphicsPipelineTransitionBits *transition,
431                               VkColorComponentFlags colorComponentFlags,
432                               const gl::DrawBufferMask &alphaMask);
433 
434     // Depth/stencil states.
435     void setDepthTestEnabled(bool enabled);
436     void setDepthWriteEnabled(bool enabled);
437     void setDepthFunc(VkCompareOp op);
438     void setStencilTestEnabled(bool enabled);
439     void setStencilFrontFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
440     void setStencilBackFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
441     void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
442     void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
443     void setStencilFrontWriteMask(uint8_t mask);
444     void setStencilBackWriteMask(uint8_t mask);
445     void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
446                                 const gl::DepthStencilState &depthStencilState,
447                                 const gl::Framebuffer *drawFramebuffer);
448     void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
449                          const gl::DepthStencilState &depthStencilState);
450     void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
451                                  const gl::DepthStencilState &depthStencilState,
452                                  const gl::Framebuffer *drawFramebuffer);
453     void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
454                                   const gl::DepthStencilState &depthStencilState,
455                                   const gl::Framebuffer *drawFramebuffer);
456     void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
457                                  GLint ref,
458                                  const gl::DepthStencilState &depthStencilState);
459     void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
460                                 GLint ref,
461                                 const gl::DepthStencilState &depthStencilState);
462     void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
463                                const gl::DepthStencilState &depthStencilState);
464     void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
465                               const gl::DepthStencilState &depthStencilState);
466     void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
467                                      const gl::DepthStencilState &depthStencilState,
468                                      const gl::Framebuffer *drawFramebuffer);
469     void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
470                                     const gl::DepthStencilState &depthStencilState,
471                                     const gl::Framebuffer *drawFramebuffer);
472 
473     // Depth offset.
474     void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
475     void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
476                              const gl::RasterizerState &rasterState);
477 
478     // Viewport and scissor.
479     void setViewport(const VkViewport &viewport);
480     void updateViewport(GraphicsPipelineTransitionBits *transition, const VkViewport &viewport);
481     void updateDepthRange(GraphicsPipelineTransitionBits *transition,
482                           float nearPlane,
483                           float farPlane);
484     void setScissor(const VkRect2D &scissor);
485     void updateScissor(GraphicsPipelineTransitionBits *transition, const VkRect2D &scissor);
486 
487   private:
488     VertexInputAttributes mVertexInputAttribs;
489     RenderPassDesc mRenderPassDesc;
490     PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
491     PackedDepthStencilStateInfo mDepthStencilStateInfo;
492     PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
493     VkViewport mViewport;
494     VkRect2D mScissor;
495 };
496 
497 // Verify the packed pipeline description has no gaps in the packing.
498 // This is not guaranteed by the spec, but is validated by a compile-time check.
499 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
500 // into uninitialized memory regions.
501 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
502 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
503 
504 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
505     std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
506              gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
507 
508 using DescriptorSetLayoutBindingVector =
509     angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
510 
511 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
512 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
513 // usage. In the future we could generalize this.
514 class DescriptorSetLayoutDesc final
515 {
516   public:
517     DescriptorSetLayoutDesc();
518     ~DescriptorSetLayoutDesc();
519     DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
520     DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
521 
522     size_t hash() const;
523     bool operator==(const DescriptorSetLayoutDesc &other) const;
524 
525     void update(uint32_t bindingIndex,
526                 VkDescriptorType type,
527                 uint32_t count,
528                 VkShaderStageFlags stages);
529 
530     void unpackBindings(DescriptorSetLayoutBindingVector *bindings) const;
531 
532   private:
533     struct PackedDescriptorSetBinding
534     {
535         uint8_t type;    // Stores a packed VkDescriptorType descriptorType.
536         uint8_t stages;  // Stores a packed VkShaderStageFlags.
537         uint16_t count;  // Stores a packed uint32_t descriptorCount.
538     };
539 
540     static_assert(sizeof(PackedDescriptorSetBinding) == sizeof(uint32_t), "Unexpected size");
541 
542     // This is a compact representation of a descriptor set layout.
543     std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
544         mPackedDescriptorSetLayout;
545 };
546 
547 // The following are for caching descriptor set layouts. Limited to max four descriptor set layouts.
548 // This can be extended in the future.
549 constexpr size_t kMaxDescriptorSetLayouts = 4;
550 
551 struct PackedPushConstantRange
552 {
553     uint32_t offset;
554     uint32_t size;
555 };
556 
557 template <typename T>
558 using DescriptorSetLayoutArray = std::array<T, kMaxDescriptorSetLayouts>;
559 using DescriptorSetLayoutPointerArray =
560     DescriptorSetLayoutArray<BindingPointer<DescriptorSetLayout>>;
561 template <typename T>
562 using PushConstantRangeArray = gl::ShaderMap<T>;
563 
564 class PipelineLayoutDesc final
565 {
566   public:
567     PipelineLayoutDesc();
568     ~PipelineLayoutDesc();
569     PipelineLayoutDesc(const PipelineLayoutDesc &other);
570     PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
571 
572     size_t hash() const;
573     bool operator==(const PipelineLayoutDesc &other) const;
574 
575     void updateDescriptorSetLayout(uint32_t setIndex, const DescriptorSetLayoutDesc &desc);
576     void updatePushConstantRange(gl::ShaderType shaderType, uint32_t offset, uint32_t size);
577 
578     const PushConstantRangeArray<PackedPushConstantRange> &getPushConstantRanges() const;
579 
580   private:
581     DescriptorSetLayoutArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
582     PushConstantRangeArray<PackedPushConstantRange> mPushConstantRanges;
583 
584     // Verify the arrays are properly packed.
585     static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
586                       (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
587                   "Unexpected size");
588     static_assert(sizeof(decltype(mPushConstantRanges)) ==
589                       (sizeof(PackedPushConstantRange) * angle::EnumSize<gl::ShaderType>()),
590                   "Unexpected size");
591 };
592 
593 // Verify the structure is properly packed.
594 static_assert(sizeof(PipelineLayoutDesc) ==
595                   (sizeof(DescriptorSetLayoutArray<DescriptorSetLayoutDesc>) +
596                    sizeof(gl::ShaderMap<PackedPushConstantRange>)),
597               "Unexpected Size");
598 
599 // Packed sampler description for the sampler cache.
600 class SamplerDesc final
601 {
602   public:
603     SamplerDesc();
604     explicit SamplerDesc(const gl::SamplerState &samplerState, bool stencilMode);
605     ~SamplerDesc();
606 
607     SamplerDesc(const SamplerDesc &other);
608     SamplerDesc &operator=(const SamplerDesc &rhs);
609 
610     void update(const gl::SamplerState &samplerState, bool stencilMode);
611     void reset();
612     angle::Result init(ContextVk *contextVk, vk::Sampler *sampler) const;
613 
614     size_t hash() const;
615     bool operator==(const SamplerDesc &other) const;
616 
617   private:
618     // 32*4 bits for floating point data.
619     // Note: anisotropy enabled is implicitly determined by maxAnisotropy and caps.
620     float mMipLodBias;
621     float mMaxAnisotropy;
622     float mMinLod;
623     float mMaxLod;
624 
625     // 16 bits for modes + states.
626     // 1 bit per filter (only 2 possible values in GL: linear/nearest)
627     uint16_t mMagFilter : 1;
628     uint16_t mMinFilter : 1;
629     uint16_t mMipmapMode : 1;
630 
631     // 3 bits per address mode (5 possible values)
632     uint16_t mAddressModeU : 3;
633     uint16_t mAddressModeV : 3;
634     uint16_t mAddressModeW : 3;
635 
636     // 1 bit for compare enabled (2 possible values)
637     uint16_t mCompareEnabled : 1;
638 
639     // 3 bits for compare op. (8 possible values)
640     uint16_t mCompareOp : 3;
641 
642     // Border color and unnormalized coordinates implicitly set to contants.
643 
644     // 16 extra bits reserved for future use.
645     uint16_t mReserved;
646 };
647 
648 // Total size: 160 bits == 20 bytes.
649 static_assert(sizeof(SamplerDesc) == 20, "Unexpected SamplerDesc size");
650 
651 // Disable warnings about struct padding.
652 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
653 
654 class PipelineHelper;
655 
656 struct GraphicsPipelineTransition
657 {
658     GraphicsPipelineTransition();
659     GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
660     GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
661                                const GraphicsPipelineDesc *desc,
662                                PipelineHelper *pipeline);
663 
664     GraphicsPipelineTransitionBits bits;
665     const GraphicsPipelineDesc *desc;
666     PipelineHelper *target;
667 };
668 
669 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
670 
671 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
672     const GraphicsPipelineTransition &other) = default;
673 
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)674 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
675     GraphicsPipelineTransitionBits bits,
676     const GraphicsPipelineDesc *desc,
677     PipelineHelper *pipeline)
678     : bits(bits), desc(desc), target(pipeline)
679 {}
680 
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)681 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
682                                                   GraphicsPipelineTransitionBits bitsB,
683                                                   const GraphicsPipelineDesc &descA,
684                                                   const GraphicsPipelineDesc &descB)
685 {
686     if (bitsA != bitsB)
687         return false;
688 
689     // We currently mask over 4 bytes of the pipeline description with each dirty bit.
690     // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
691     // of the code faster. The for loop below would scan over twice as many bits per iteration.
692     // But there may be more collisions between the same dirty bit masks leading to different
693     // transitions. Thus there may be additional cost when applications use many transitions.
694     // We should revisit this in the future and investigate using different bit widths.
695     static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
696 
697     const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
698     const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
699 
700     for (size_t dirtyBit : bitsA)
701     {
702         if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
703             return false;
704     }
705 
706     return true;
707 }
708 
709 class PipelineHelper final : angle::NonCopyable
710 {
711   public:
712     PipelineHelper();
713     ~PipelineHelper();
714     inline explicit PipelineHelper(Pipeline &&pipeline);
715 
716     void destroy(VkDevice device);
717 
updateSerial(Serial serial)718     void updateSerial(Serial serial) { mSerial = serial; }
valid()719     bool valid() const { return mPipeline.valid(); }
getSerial()720     Serial getSerial() const { return mSerial; }
getPipeline()721     Pipeline &getPipeline() { return mPipeline; }
722 
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)723     ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
724                                      const GraphicsPipelineDesc &desc,
725                                      PipelineHelper **pipelineOut) const
726     {
727         // Search could be improved using sorting or hashing.
728         for (const GraphicsPipelineTransition &transition : mTransitions)
729         {
730             if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
731             {
732                 *pipelineOut = transition.target;
733                 return true;
734             }
735         }
736 
737         return false;
738     }
739 
740     void addTransition(GraphicsPipelineTransitionBits bits,
741                        const GraphicsPipelineDesc *desc,
742                        PipelineHelper *pipeline);
743 
744   private:
745     std::vector<GraphicsPipelineTransition> mTransitions;
746     Serial mSerial;
747     Pipeline mPipeline;
748 };
749 
PipelineHelper(Pipeline && pipeline)750 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
751 
752 class TextureDescriptorDesc
753 {
754   public:
755     TextureDescriptorDesc();
756     ~TextureDescriptorDesc();
757 
758     TextureDescriptorDesc(const TextureDescriptorDesc &other);
759     TextureDescriptorDesc &operator=(const TextureDescriptorDesc &other);
760 
761     void update(size_t index, Serial textureSerial, Serial samplerSerial);
762     size_t hash() const;
763     void reset();
764 
765     bool operator==(const TextureDescriptorDesc &other) const;
766 
767     // Note: this is an exclusive index. If there is one index it will return "1".
getMaxIndex()768     uint32_t getMaxIndex() const { return mMaxIndex; }
769 
770   private:
771     uint32_t mMaxIndex;
772     struct TexUnitSerials
773     {
774         uint32_t texture;
775         uint32_t sampler;
776     };
777     gl::ActiveTextureArray<TexUnitSerials> mSerials;
778 };
779 
780 // This is IMPLEMENTATION_MAX_DRAW_BUFFERS + 1 for DS attachment
781 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_FRAMEBUFFER_ATTACHMENTS;
782 // Color serials are at index [0:gl::IMPLEMENTATION_MAX_DRAW_BUFFERS-1]
783 // Depth/stencil index is at gl::IMPLEMENTATION_MAX_DRAW_BUFFERS
784 constexpr size_t kFramebufferDescDepthStencilIndex = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
785 // Struct for AttachmentSerial cache signatures. Includes level/layer for imageView as
786 //  well as a unique Serial value for the underlying image
787 struct AttachmentSerial
788 {
789     uint16_t level;
790     uint16_t layer;
791     uint32_t imageSerial;
792 };
793 constexpr AttachmentSerial kZeroAttachmentSerial = {0, 0, 0};
794 class FramebufferDesc
795 {
796   public:
797     FramebufferDesc();
798     ~FramebufferDesc();
799 
800     FramebufferDesc(const FramebufferDesc &other);
801     FramebufferDesc &operator=(const FramebufferDesc &other);
802 
803     void update(uint32_t index, AttachmentSerial serial);
804     size_t hash() const;
805     void reset();
806 
807     bool operator==(const FramebufferDesc &other) const;
808 
809     uint32_t attachmentCount() const;
810 
811   private:
812     gl::AttachmentArray<AttachmentSerial> mSerials;
813 };
814 }  // namespace vk
815 }  // namespace rx
816 
817 // Introduce a std::hash for a RenderPassDesc
818 namespace std
819 {
820 template <>
821 struct hash<rx::vk::RenderPassDesc>
822 {
823     size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
824 };
825 
826 template <>
827 struct hash<rx::vk::AttachmentOpsArray>
828 {
829     size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
830 };
831 
832 template <>
833 struct hash<rx::vk::GraphicsPipelineDesc>
834 {
835     size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const { return key.hash(); }
836 };
837 
838 template <>
839 struct hash<rx::vk::DescriptorSetLayoutDesc>
840 {
841     size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
842 };
843 
844 template <>
845 struct hash<rx::vk::PipelineLayoutDesc>
846 {
847     size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
848 };
849 
850 template <>
851 struct hash<rx::vk::TextureDescriptorDesc>
852 {
853     size_t operator()(const rx::vk::TextureDescriptorDesc &key) const { return key.hash(); }
854 };
855 
856 template <>
857 struct hash<rx::vk::FramebufferDesc>
858 {
859     size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
860 };
861 
862 template <>
863 struct hash<rx::vk::SamplerDesc>
864 {
865     size_t operator()(const rx::vk::SamplerDesc &key) const { return key.hash(); }
866 };
867 }  // namespace std
868 
869 namespace rx
870 {
871 // TODO(jmadill): Add cache trimming/eviction.
872 class RenderPassCache final : angle::NonCopyable
873 {
874   public:
875     RenderPassCache();
876     ~RenderPassCache();
877 
878     void destroy(VkDevice device);
879 
880     ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
881                                                        Serial serial,
882                                                        const vk::RenderPassDesc &desc,
883                                                        vk::RenderPass **renderPassOut)
884     {
885         auto outerIt = mPayload.find(desc);
886         if (outerIt != mPayload.end())
887         {
888             InnerCache &innerCache = outerIt->second;
889             ASSERT(!innerCache.empty());
890 
891             // Find the first element and return it.
892             innerCache.begin()->second.updateSerial(serial);
893             *renderPassOut = &innerCache.begin()->second.get();
894             return angle::Result::Continue;
895         }
896 
897         return addRenderPass(contextVk, serial, desc, renderPassOut);
898     }
899 
900     angle::Result getRenderPassWithOps(vk::Context *context,
901                                        Serial serial,
902                                        const vk::RenderPassDesc &desc,
903                                        const vk::AttachmentOpsArray &attachmentOps,
904                                        vk::RenderPass **renderPassOut);
905 
906   private:
907     angle::Result addRenderPass(ContextVk *contextVk,
908                                 Serial serial,
909                                 const vk::RenderPassDesc &desc,
910                                 vk::RenderPass **renderPassOut);
911 
912     // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
913     // The second layer caches the attachment load/store ops and initial/final layout.
914     using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassAndSerial>;
915     using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
916 
917     OuterCache mPayload;
918 };
919 
920 // TODO(jmadill): Add cache trimming/eviction.
921 class GraphicsPipelineCache final : angle::NonCopyable
922 {
923   public:
924     GraphicsPipelineCache();
925     ~GraphicsPipelineCache();
926 
927     void destroy(VkDevice device);
928     void release(ContextVk *context);
929 
930     void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
931 
932     ANGLE_INLINE angle::Result getPipeline(ContextVk *contextVk,
933                                            const vk::PipelineCache &pipelineCacheVk,
934                                            const vk::RenderPass &compatibleRenderPass,
935                                            const vk::PipelineLayout &pipelineLayout,
936                                            const gl::AttributesMask &activeAttribLocationsMask,
937                                            const gl::ComponentTypeMask &programAttribsTypeMask,
938                                            const vk::ShaderModule *vertexModule,
939                                            const vk::ShaderModule *fragmentModule,
940                                            const vk::ShaderModule *geometryModule,
941                                            vk::SpecializationConstantBitSet specConsts,
942                                            const vk::GraphicsPipelineDesc &desc,
943                                            const vk::GraphicsPipelineDesc **descPtrOut,
944                                            vk::PipelineHelper **pipelineOut)
945     {
946         auto item = mPayload.find(desc);
947         if (item != mPayload.end())
948         {
949             *descPtrOut  = &item->first;
950             *pipelineOut = &item->second;
951             return angle::Result::Continue;
952         }
953 
954         return insertPipeline(contextVk, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
955                               activeAttribLocationsMask, programAttribsTypeMask, vertexModule,
956                               fragmentModule, geometryModule, specConsts, desc, descPtrOut,
957                               pipelineOut);
958     }
959 
960   private:
961     angle::Result insertPipeline(ContextVk *contextVk,
962                                  const vk::PipelineCache &pipelineCacheVk,
963                                  const vk::RenderPass &compatibleRenderPass,
964                                  const vk::PipelineLayout &pipelineLayout,
965                                  const gl::AttributesMask &activeAttribLocationsMask,
966                                  const gl::ComponentTypeMask &programAttribsTypeMask,
967                                  const vk::ShaderModule *vertexModule,
968                                  const vk::ShaderModule *fragmentModule,
969                                  const vk::ShaderModule *geometryModule,
970                                  vk::SpecializationConstantBitSet specConsts,
971                                  const vk::GraphicsPipelineDesc &desc,
972                                  const vk::GraphicsPipelineDesc **descPtrOut,
973                                  vk::PipelineHelper **pipelineOut);
974 
975     std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
976 };
977 
978 class DescriptorSetLayoutCache final : angle::NonCopyable
979 {
980   public:
981     DescriptorSetLayoutCache();
982     ~DescriptorSetLayoutCache();
983 
984     void destroy(VkDevice device);
985 
986     angle::Result getDescriptorSetLayout(
987         vk::Context *context,
988         const vk::DescriptorSetLayoutDesc &desc,
989         vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
990 
991   private:
992     std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
993 };
994 
995 class PipelineLayoutCache final : angle::NonCopyable
996 {
997   public:
998     PipelineLayoutCache();
999     ~PipelineLayoutCache();
1000 
1001     void destroy(VkDevice device);
1002 
1003     angle::Result getPipelineLayout(vk::Context *context,
1004                                     const vk::PipelineLayoutDesc &desc,
1005                                     const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
1006                                     vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
1007 
1008   private:
1009     std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
1010 };
1011 
1012 class SamplerCache final : angle::NonCopyable
1013 {
1014   public:
1015     SamplerCache();
1016     ~SamplerCache();
1017 
1018     void destroy(RendererVk *renderer);
1019 
1020     angle::Result getSampler(ContextVk *contextVk,
1021                              const vk::SamplerDesc &desc,
1022                              vk::BindingPointer<vk::Sampler> *samplerOut);
1023 
1024   private:
1025     std::unordered_map<vk::SamplerDesc, vk::RefCountedSampler> mPayload;
1026 };
1027 
1028 // Some descriptor set and pipeline layout constants.
1029 //
1030 // The set/binding assignment is done as following:
1031 //
1032 // - Set 0 contains the ANGLE driver uniforms at binding 0.  Note that driver uniforms are updated
1033 //   only under rare circumstances, such as viewport or depth range change.  However, there is only
1034 //   one binding in this set.  This set is placed before Set 1 containing transform feedback
1035 //   buffers, so that switching between xfb and non-xfb programs doesn't require rebinding this set.
1036 //   Otherwise, as the layout of Set 1 changes (due to addition and removal of xfb buffers), and all
1037 //   subsequent sets need to be rebound (due to Vulkan pipeline layout validation rules), we would
1038 //   have needed to invalidateGraphicsDriverUniforms().
1039 // - Set 1 contains uniform blocks created to encompass default uniforms.  1 binding is used per
1040 //   pipeline stage.  Additionally, transform feedback buffers are bound from binding 2 and up.
1041 // - Set 2 contains all textures.
1042 // - Set 3 contains all other shader resources, such as uniform and storage blocks, atomic counter
1043 //   buffers and images.
1044 
1045 // ANGLE driver uniforms set index (binding is always 0):
1046 constexpr uint32_t kDriverUniformsDescriptorSetIndex = 0;
1047 // Uniforms set index:
1048 constexpr uint32_t kUniformsAndXfbDescriptorSetIndex = 1;
1049 // Textures set index:
1050 constexpr uint32_t kTextureDescriptorSetIndex = 2;
1051 // Other shader resources set index:
1052 constexpr uint32_t kShaderResourceDescriptorSetIndex = 3;
1053 
1054 // Only 1 driver uniform binding is used.
1055 constexpr uint32_t kReservedDriverUniformBindingCount = 1;
1056 // There is 1 default uniform binding used per stage.  Currently, a maxium of three stages are
1057 // supported.
1058 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
1059 constexpr uint32_t kReservedDefaultUniformBindingCount         = 3;
1060 }  // namespace rx
1061 
1062 #endif  // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
1063