• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 //    Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 //    Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10 
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13 
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "libANGLE/renderer/vulkan/vk_utils.h"
17 
18 namespace rx
19 {
20 
21 namespace vk
22 {
23 class ImageHelper;
24 enum class ImageLayout;
25 
26 using RenderPassAndSerial = ObjectAndSerial<RenderPass>;
27 using PipelineAndSerial   = ObjectAndSerial<Pipeline>;
28 
29 using RefCountedDescriptorSetLayout = RefCounted<DescriptorSetLayout>;
30 using RefCountedPipelineLayout      = RefCounted<PipelineLayout>;
31 
32 // Helper macro that casts to a bitfield type then verifies no bits were dropped.
33 #define SetBitField(lhs, rhs)                                         \
34     lhs = static_cast<typename std::decay<decltype(lhs)>::type>(rhs); \
35     ASSERT(static_cast<decltype(rhs)>(lhs) == (rhs))
36 
37 // Packed Vk resource descriptions.
38 // Most Vk types use many more bits than required to represent the underlying data.
39 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
40 // hashing (and also needs to check equality) we can optimize these operations by
41 // using fewer bits. Hence the packed types.
42 //
43 // One implementation note: these types could potentially be improved by using even
44 // fewer bits. For example, boolean values could be represented by a single bit instead
45 // of a uint8_t. However at the current time there are concerns about the portability
46 // of bitfield operators, and complexity issues with using bit mask operations. This is
47 // something we will likely want to investigate as the Vulkan implementation progresses.
48 //
49 // Second implementation note: the struct packing is also a bit fragile, and some of the
50 // packing requirements depend on using alignas and field ordering to get the result of
51 // packing nicely into the desired space. This is something we could also potentially fix
52 // with a redesign to use bitfields or bit mask operations.
53 
54 // Enable struct padding warnings for the code below since it is used in caches.
55 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
56 
57 class alignas(4) RenderPassDesc final
58 {
59   public:
60     RenderPassDesc();
61     ~RenderPassDesc();
62     RenderPassDesc(const RenderPassDesc &other);
63     RenderPassDesc &operator=(const RenderPassDesc &other);
64 
65     // Set format for an enabled GL color attachment.
66     void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
67     // Mark a GL color attachment index as disabled.
68     void packColorAttachmentGap(size_t colorIndexGL);
69     // The caller must pack the depth/stencil attachment last, which is packed right after the color
70     // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
71     void packDepthStencilAttachment(angle::FormatID angleFormatID);
72 
73     size_t hash() const;
74 
75     // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()76     size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()77     size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
78 
79     bool isColorAttachmentEnabled(size_t colorIndexGL) const;
hasDepthStencilAttachment()80     bool hasDepthStencilAttachment() const { return mHasDepthStencilAttachment; }
81 
82     // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
83     // color attachments.
84     size_t attachmentCount() const;
85 
86     void setSamples(GLint samples);
87 
samples()88     uint8_t samples() const { return mSamples; }
89 
90     angle::FormatID operator[](size_t index) const
91     {
92         ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
93         return static_cast<angle::FormatID>(mAttachmentFormats[index]);
94     }
95 
96   private:
97     uint8_t mSamples;
98     uint8_t mColorAttachmentRange : 7;
99     uint8_t mHasDepthStencilAttachment : 1;
100     // Color attachment formats are stored with their GL attachment indices.  The depth/stencil
101     // attachment formats follow the last enabled color attachment.  When creating a render pass,
102     // the disabled attachments are removed and the resulting attachments are packed.
103     //
104     // The attachment indices provided as input to various functions in this file are thus GL
105     // attachment indices.  These indices are marked as such, e.g. colorIndexGL.  The render pass
106     // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
107     // indices marked with Vk, e.g. colorIndexVk.  The subpass attachment references create the
108     // link between the two index spaces.  The subpass declares attachment references with GL
109     // indices (which corresponds to the location decoration of shader outputs).  The attachment
110     // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
111     //
112     // For example, if GL uses color attachments 0 and 3, then there are two render pass
113     // attachments (indexed 0 and 1) and 4 subpass attachments:
114     //
115     //  - Subpass attachment 0 -> Renderpass attachment 0
116     //  - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
117     //  - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
118     //  - Subpass attachment 3 -> Renderpass attachment 1
119     //
120     gl::AttachmentArray<uint8_t> mAttachmentFormats;
121 };
122 
123 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
124 
125 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
126 static_assert(kRenderPassDescSize == 12, "Size check failed");
127 
128 struct PackedAttachmentOpsDesc final
129 {
130     // VkAttachmentLoadOp is in range [0, 2], and VkAttachmentStoreOp is in range [0, 1].
131     uint16_t loadOp : 2;
132     uint16_t storeOp : 1;
133     uint16_t stencilLoadOp : 2;
134     uint16_t stencilStoreOp : 1;
135 
136     // 5-bits to force pad the structure to exactly 2 bytes.  Note that we currently don't support
137     // any of the extension layouts, whose values start at 1'000'000'000.
138     uint16_t initialLayout : 5;
139     uint16_t finalLayout : 5;
140 };
141 
142 static_assert(sizeof(PackedAttachmentOpsDesc) == 2, "Size check failed");
143 
144 class AttachmentOpsArray final
145 {
146   public:
147     AttachmentOpsArray();
148     ~AttachmentOpsArray();
149     AttachmentOpsArray(const AttachmentOpsArray &other);
150     AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
151 
152     const PackedAttachmentOpsDesc &operator[](size_t index) const;
153     PackedAttachmentOpsDesc &operator[](size_t index);
154 
155     // Initializes an attachment op with whatever values. Used for compatible RenderPass checks.
156     void initDummyOp(size_t index, ImageLayout initialLayout, ImageLayout finalLayout);
157     // Initialize an attachment op with store operations.
158     void initWithStore(size_t index,
159                        VkAttachmentLoadOp loadOp,
160                        ImageLayout initialLayout,
161                        ImageLayout finalLayout);
162 
163     size_t hash() const;
164 
165   private:
166     gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
167 };
168 
169 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
170 
171 static_assert(sizeof(AttachmentOpsArray) == 20, "Size check failed");
172 
173 struct PackedAttribDesc final
174 {
175     uint8_t format;
176     uint8_t divisor;
177 
178     // Can only take 11 bits on NV.
179     uint16_t offset;
180 
181     // Although technically stride can be any value in ES 2.0, in practice supporting stride
182     // greater than MAX_USHORT should not be that helpful. Note that stride limits are
183     // introduced in ES 3.1.
184     uint16_t stride;
185 };
186 
187 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
188 static_assert(kPackedAttribDescSize == 6, "Size mismatch");
189 
190 struct VertexInputAttributes final
191 {
192     PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
193 };
194 
195 constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
196 static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
197 
198 struct RasterizationStateBits final
199 {
200     uint32_t depthClampEnable : 4;
201     uint32_t rasterizationDiscardEnable : 4;
202     uint32_t polygonMode : 4;
203     uint32_t cullMode : 4;
204     uint32_t frontFace : 4;
205     uint32_t depthBiasEnable : 1;
206     uint32_t sampleShadingEnable : 1;
207     uint32_t alphaToCoverageEnable : 1;
208     uint32_t alphaToOneEnable : 1;
209     uint32_t rasterizationSamples : 8;
210 };
211 
212 constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
213 static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
214 
215 struct PackedRasterizationAndMultisampleStateInfo final
216 {
217     RasterizationStateBits bits;
218     // Padded to ensure there's no gaps in this structure or those that use it.
219     float minSampleShading;
220     uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
221     // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
222     float depthBiasClamp;
223     float depthBiasConstantFactor;
224     float depthBiasSlopeFactor;
225     float lineWidth;
226 };
227 
228 constexpr size_t kPackedRasterizationAndMultisampleStateSize =
229     sizeof(PackedRasterizationAndMultisampleStateInfo);
230 static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
231 
232 struct StencilOps final
233 {
234     uint8_t fail : 4;
235     uint8_t pass : 4;
236     uint8_t depthFail : 4;
237     uint8_t compare : 4;
238 };
239 
240 constexpr size_t kStencilOpsSize = sizeof(StencilOps);
241 static_assert(kStencilOpsSize == 2, "Size check failed");
242 
243 struct PackedStencilOpState final
244 {
245     StencilOps ops;
246     uint8_t compareMask;
247     uint8_t writeMask;
248 };
249 
250 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
251 static_assert(kPackedStencilOpSize == 4, "Size check failed");
252 
253 struct DepthStencilEnableFlags final
254 {
255     uint8_t depthTest : 2;  // these only need one bit each. the extra is used as padding.
256     uint8_t depthWrite : 2;
257     uint8_t depthBoundsTest : 2;
258     uint8_t stencilTest : 2;
259 };
260 
261 constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
262 static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
263 
264 struct PackedDepthStencilStateInfo final
265 {
266     DepthStencilEnableFlags enable;
267     uint8_t frontStencilReference;
268     uint8_t backStencilReference;
269     uint8_t depthCompareOp;  // only needs 4 bits. extra used as padding.
270     float minDepthBounds;
271     float maxDepthBounds;
272     PackedStencilOpState front;
273     PackedStencilOpState back;
274 };
275 
276 constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
277 static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
278 
279 struct LogicOpState final
280 {
281     uint8_t opEnable : 1;
282     uint8_t op : 7;
283 };
284 
285 constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
286 static_assert(kLogicOpStateSize == 1, "Size check failed");
287 
288 struct PackedColorBlendAttachmentState final
289 {
290     uint16_t srcColorBlendFactor : 5;
291     uint16_t dstColorBlendFactor : 5;
292     uint16_t colorBlendOp : 6;
293     uint16_t srcAlphaBlendFactor : 5;
294     uint16_t dstAlphaBlendFactor : 5;
295     uint16_t alphaBlendOp : 6;
296 };
297 
298 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
299 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
300 
301 struct PrimitiveState final
302 {
303     uint16_t topology : 15;
304     uint16_t restartEnable : 1;
305 };
306 
307 constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
308 static_assert(kPrimitiveStateSize == 2, "Size check failed");
309 
310 struct PackedInputAssemblyAndColorBlendStateInfo final
311 {
312     uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
313     PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
314     float blendConstants[4];
315     LogicOpState logic;
316     uint8_t blendEnableMask;
317     PrimitiveState primitive;
318 };
319 
320 constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
321     sizeof(PackedInputAssemblyAndColorBlendStateInfo);
322 static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
323 
324 constexpr size_t kGraphicsPipelineDescSumOfSizes =
325     kVertexInputAttributesSize + kRenderPassDescSize + kPackedRasterizationAndMultisampleStateSize +
326     kPackedDepthStencilStateSize + kPackedInputAssemblyAndColorBlendStateSize + sizeof(VkViewport) +
327     sizeof(VkRect2D);
328 
329 // Number of dirty bits in the dirty bit set.
330 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
331 constexpr static size_t kNumGraphicsPipelineDirtyBits =
332     kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
333 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
334 
335 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
336 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
337 
338 // State changes are applied through the update methods. Each update method can also have a
339 // sibling method that applies the update without marking a state transition. The non-transition
340 // update methods are used for internal shader pipelines. Not every non-transition update method
341 // is implemented yet as not every state is used in internal shaders.
342 class GraphicsPipelineDesc final
343 {
344   public:
345     // Use aligned allocation and free so we can use the alignas keyword.
346     void *operator new(std::size_t size);
347     void operator delete(void *ptr);
348 
349     GraphicsPipelineDesc();
350     ~GraphicsPipelineDesc();
351     GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
352     GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
353 
354     size_t hash() const;
355     bool operator==(const GraphicsPipelineDesc &other) const;
356 
357     void initDefaults();
358 
359     // For custom comparisons.
360     template <typename T>
getPtr()361     const T *getPtr() const
362     {
363         return reinterpret_cast<const T *>(this);
364     }
365 
366     angle::Result initializePipeline(ContextVk *contextVk,
367                                      const vk::PipelineCache &pipelineCacheVk,
368                                      const RenderPass &compatibleRenderPass,
369                                      const PipelineLayout &pipelineLayout,
370                                      const gl::AttributesMask &activeAttribLocationsMask,
371                                      const gl::ComponentTypeMask &programAttribsTypeMask,
372                                      const ShaderModule *vertexModule,
373                                      const ShaderModule *fragmentModule,
374                                      const ShaderModule *geometryModule,
375                                      vk::SpecializationConstantBitSet specConsts,
376                                      Pipeline *pipelineOut) const;
377 
378     // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
379     void updateVertexInput(GraphicsPipelineTransitionBits *transition,
380                            uint32_t attribIndex,
381                            GLuint stride,
382                            GLuint divisor,
383                            angle::FormatID format,
384                            GLuint relativeOffset);
385 
386     // Input assembly info
387     void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
388     void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
389                                        bool primitiveRestartEnabled);
390 
391     // Raster states
392     void setCullMode(VkCullModeFlagBits cullMode);
393     void updateCullMode(GraphicsPipelineTransitionBits *transition,
394                         const gl::RasterizerState &rasterState);
395     void updateFrontFace(GraphicsPipelineTransitionBits *transition,
396                          const gl::RasterizerState &rasterState,
397                          bool invertFrontFace);
398     void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
399     void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
400                                         bool rasterizerDiscardEnabled);
401 
402     // Multisample states
403     uint32_t getRasterizationSamples() const;
404     void setRasterizationSamples(uint32_t rasterizationSamples);
405     void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
406                                     uint32_t rasterizationSamples);
407     void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
408     void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
409     void updateSampleMask(GraphicsPipelineTransitionBits *transition,
410                           uint32_t maskNumber,
411                           uint32_t mask);
412 
413     // RenderPass description.
getRenderPassDesc()414     const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
415 
416     void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
417     void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
418                               const RenderPassDesc &renderPassDesc);
419 
420     // Blend states
421     void updateBlendEnabled(GraphicsPipelineTransitionBits *transition, bool isBlendEnabled);
422     void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
423     void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
424                           const gl::BlendState &blendState);
425     void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
426                               const gl::BlendState &blendState);
427     void setColorWriteMask(VkColorComponentFlags colorComponentFlags,
428                            const gl::DrawBufferMask &alphaMask);
429     void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
430     void updateColorWriteMask(GraphicsPipelineTransitionBits *transition,
431                               VkColorComponentFlags colorComponentFlags,
432                               const gl::DrawBufferMask &alphaMask);
433 
434     // Depth/stencil states.
435     void setDepthTestEnabled(bool enabled);
436     void setDepthWriteEnabled(bool enabled);
437     void setDepthFunc(VkCompareOp op);
438     void setStencilTestEnabled(bool enabled);
439     void setStencilFrontFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
440     void setStencilBackFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
441     void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
442     void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
443     void setStencilFrontWriteMask(uint8_t mask);
444     void setStencilBackWriteMask(uint8_t mask);
445     void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
446                                 const gl::DepthStencilState &depthStencilState,
447                                 const gl::Framebuffer *drawFramebuffer);
448     void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
449                          const gl::DepthStencilState &depthStencilState);
450     void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
451                                  const gl::DepthStencilState &depthStencilState,
452                                  const gl::Framebuffer *drawFramebuffer);
453     void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
454                                   const gl::DepthStencilState &depthStencilState,
455                                   const gl::Framebuffer *drawFramebuffer);
456     void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
457                                  GLint ref,
458                                  const gl::DepthStencilState &depthStencilState);
459     void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
460                                 GLint ref,
461                                 const gl::DepthStencilState &depthStencilState);
462     void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
463                                const gl::DepthStencilState &depthStencilState);
464     void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
465                               const gl::DepthStencilState &depthStencilState);
466     void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
467                                      const gl::DepthStencilState &depthStencilState,
468                                      const gl::Framebuffer *drawFramebuffer);
469     void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
470                                     const gl::DepthStencilState &depthStencilState,
471                                     const gl::Framebuffer *drawFramebuffer);
472 
473     // Depth offset.
474     void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
475     void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
476                              const gl::RasterizerState &rasterState);
477 
478     // Viewport and scissor.
479     void setViewport(const VkViewport &viewport);
480     void updateViewport(GraphicsPipelineTransitionBits *transition, const VkViewport &viewport);
481     void updateDepthRange(GraphicsPipelineTransitionBits *transition,
482                           float nearPlane,
483                           float farPlane);
484     void setScissor(const VkRect2D &scissor);
485     void updateScissor(GraphicsPipelineTransitionBits *transition, const VkRect2D &scissor);
486 
487   private:
488     VertexInputAttributes mVertexInputAttribs;
489     RenderPassDesc mRenderPassDesc;
490     PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
491     PackedDepthStencilStateInfo mDepthStencilStateInfo;
492     PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
493     VkViewport mViewport;
494     VkRect2D mScissor;
495 };
496 
497 // Verify the packed pipeline description has no gaps in the packing.
498 // This is not guaranteed by the spec, but is validated by a compile-time check.
499 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
500 // into uninitialized memory regions.
501 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
502 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
503 
504 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
505     std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
506              gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
507 
508 using DescriptorSetLayoutBindingVector =
509     angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
510 
511 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
512 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
513 // usage. In the future we could generalize this.
514 class DescriptorSetLayoutDesc final
515 {
516   public:
517     DescriptorSetLayoutDesc();
518     ~DescriptorSetLayoutDesc();
519     DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
520     DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
521 
522     size_t hash() const;
523     bool operator==(const DescriptorSetLayoutDesc &other) const;
524 
525     void update(uint32_t bindingIndex,
526                 VkDescriptorType type,
527                 uint32_t count,
528                 VkShaderStageFlags stages);
529 
530     void unpackBindings(DescriptorSetLayoutBindingVector *bindings) const;
531 
532   private:
533     struct PackedDescriptorSetBinding
534     {
535         uint8_t type;    // Stores a packed VkDescriptorType descriptorType.
536         uint8_t stages;  // Stores a packed VkShaderStageFlags.
537         uint16_t count;  // Stores a packed uint32_t descriptorCount.
538     };
539 
540     static_assert(sizeof(PackedDescriptorSetBinding) == sizeof(uint32_t), "Unexpected size");
541 
542     // This is a compact representation of a descriptor set layout.
543     std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
544         mPackedDescriptorSetLayout;
545 };
546 
547 // The following are for caching descriptor set layouts. Limited to max four descriptor set layouts.
548 // This can be extended in the future.
549 constexpr size_t kMaxDescriptorSetLayouts = 4;
550 
551 struct PackedPushConstantRange
552 {
553     uint32_t offset;
554     uint32_t size;
555 };
556 
557 template <typename T>
558 using DescriptorSetLayoutArray = std::array<T, kMaxDescriptorSetLayouts>;
559 using DescriptorSetLayoutPointerArray =
560     DescriptorSetLayoutArray<BindingPointer<DescriptorSetLayout>>;
561 template <typename T>
562 using PushConstantRangeArray = gl::ShaderMap<T>;
563 
564 class PipelineLayoutDesc final
565 {
566   public:
567     PipelineLayoutDesc();
568     ~PipelineLayoutDesc();
569     PipelineLayoutDesc(const PipelineLayoutDesc &other);
570     PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
571 
572     size_t hash() const;
573     bool operator==(const PipelineLayoutDesc &other) const;
574 
575     void updateDescriptorSetLayout(uint32_t setIndex, const DescriptorSetLayoutDesc &desc);
576     void updatePushConstantRange(gl::ShaderType shaderType, uint32_t offset, uint32_t size);
577 
578     const PushConstantRangeArray<PackedPushConstantRange> &getPushConstantRanges() const;
579 
580   private:
581     DescriptorSetLayoutArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
582     PushConstantRangeArray<PackedPushConstantRange> mPushConstantRanges;
583 
584     // Verify the arrays are properly packed.
585     static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
586                       (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
587                   "Unexpected size");
588     static_assert(sizeof(decltype(mPushConstantRanges)) ==
589                       (sizeof(PackedPushConstantRange) * angle::EnumSize<gl::ShaderType>()),
590                   "Unexpected size");
591 };
592 
593 // Verify the structure is properly packed.
594 static_assert(sizeof(PipelineLayoutDesc) ==
595                   (sizeof(DescriptorSetLayoutArray<DescriptorSetLayoutDesc>) +
596                    sizeof(gl::ShaderMap<PackedPushConstantRange>)),
597               "Unexpected Size");
598 
599 // Disable warnings about struct padding.
600 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
601 
602 class PipelineHelper;
603 
604 struct GraphicsPipelineTransition
605 {
606     GraphicsPipelineTransition();
607     GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
608     GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
609                                const GraphicsPipelineDesc *desc,
610                                PipelineHelper *pipeline);
611 
612     GraphicsPipelineTransitionBits bits;
613     const GraphicsPipelineDesc *desc;
614     PipelineHelper *target;
615 };
616 
617 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
618 
619 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
620     const GraphicsPipelineTransition &other) = default;
621 
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)622 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
623     GraphicsPipelineTransitionBits bits,
624     const GraphicsPipelineDesc *desc,
625     PipelineHelper *pipeline)
626     : bits(bits), desc(desc), target(pipeline)
627 {}
628 
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)629 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
630                                                   GraphicsPipelineTransitionBits bitsB,
631                                                   const GraphicsPipelineDesc &descA,
632                                                   const GraphicsPipelineDesc &descB)
633 {
634     if (bitsA != bitsB)
635         return false;
636 
637     // We currently mask over 4 bytes of the pipeline description with each dirty bit.
638     // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
639     // of the code faster. The for loop below would scan over twice as many bits per iteration.
640     // But there may be more collisions between the same dirty bit masks leading to different
641     // transitions. Thus there may be additional cost when applications use many transitions.
642     // We should revisit this in the future and investigate using different bit widths.
643     static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
644 
645     const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
646     const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
647 
648     for (size_t dirtyBit : bitsA)
649     {
650         if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
651             return false;
652     }
653 
654     return true;
655 }
656 
657 class PipelineHelper final : angle::NonCopyable
658 {
659   public:
660     PipelineHelper();
661     ~PipelineHelper();
662     inline explicit PipelineHelper(Pipeline &&pipeline);
663 
664     void destroy(VkDevice device);
665 
updateSerial(Serial serial)666     void updateSerial(Serial serial) { mSerial = serial; }
valid()667     bool valid() const { return mPipeline.valid(); }
getSerial()668     Serial getSerial() const { return mSerial; }
getPipeline()669     Pipeline &getPipeline() { return mPipeline; }
670 
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)671     ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
672                                      const GraphicsPipelineDesc &desc,
673                                      PipelineHelper **pipelineOut) const
674     {
675         // Search could be improved using sorting or hashing.
676         for (const GraphicsPipelineTransition &transition : mTransitions)
677         {
678             if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
679             {
680                 *pipelineOut = transition.target;
681                 return true;
682             }
683         }
684 
685         return false;
686     }
687 
688     void addTransition(GraphicsPipelineTransitionBits bits,
689                        const GraphicsPipelineDesc *desc,
690                        PipelineHelper *pipeline);
691 
692   private:
693     std::vector<GraphicsPipelineTransition> mTransitions;
694     Serial mSerial;
695     Pipeline mPipeline;
696 };
697 
PipelineHelper(Pipeline && pipeline)698 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
699 
700 class TextureDescriptorDesc
701 {
702   public:
703     TextureDescriptorDesc();
704     ~TextureDescriptorDesc();
705 
706     TextureDescriptorDesc(const TextureDescriptorDesc &other);
707     TextureDescriptorDesc &operator=(const TextureDescriptorDesc &other);
708 
709     void update(size_t index, Serial textureSerial, Serial samplerSerial);
710     size_t hash() const;
711     void reset();
712 
713     bool operator==(const TextureDescriptorDesc &other) const;
714 
715     // Note: this is an exclusive index. If there is one index it will return "1".
getMaxIndex()716     uint32_t getMaxIndex() const { return mMaxIndex; }
717 
718   private:
719     uint32_t mMaxIndex;
720     struct TexUnitSerials
721     {
722         uint32_t texture;
723         uint32_t sampler;
724     };
725     gl::ActiveTextureArray<TexUnitSerials> mSerials;
726 };
727 
728 // This is IMPLEMENTATION_MAX_DRAW_BUFFERS + 1 for DS attachment
729 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_FRAMEBUFFER_ATTACHMENTS;
730 // Color serials are at index [0:gl::IMPLEMENTATION_MAX_DRAW_BUFFERS-1]
731 // Depth/stencil index is at gl::IMPLEMENTATION_MAX_DRAW_BUFFERS
732 constexpr size_t kFramebufferDescDepthStencilIndex = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
733 // Struct for AttachmentSerial cache signatures. Includes level/layer for imageView as
734 //  well as a unique Serial value for the underlying image
735 struct AttachmentSerial
736 {
737     uint16_t level;
738     uint16_t layer;
739     uint32_t imageSerial;
740 };
741 constexpr AttachmentSerial kZeroAttachmentSerial = {0, 0, 0};
742 class FramebufferDesc
743 {
744   public:
745     FramebufferDesc();
746     ~FramebufferDesc();
747 
748     FramebufferDesc(const FramebufferDesc &other);
749     FramebufferDesc &operator=(const FramebufferDesc &other);
750 
751     void update(uint32_t index, AttachmentSerial serial);
752     size_t hash() const;
753     void reset();
754 
755     bool operator==(const FramebufferDesc &other) const;
756 
757     uint32_t attachmentCount() const;
758 
759   private:
760     gl::AttachmentArray<AttachmentSerial> mSerials;
761 };
762 }  // namespace vk
763 }  // namespace rx
764 
765 // Introduce a std::hash for a RenderPassDesc
766 namespace std
767 {
768 template <>
769 struct hash<rx::vk::RenderPassDesc>
770 {
771     size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
772 };
773 
774 template <>
775 struct hash<rx::vk::AttachmentOpsArray>
776 {
777     size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
778 };
779 
780 template <>
781 struct hash<rx::vk::GraphicsPipelineDesc>
782 {
783     size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const { return key.hash(); }
784 };
785 
786 template <>
787 struct hash<rx::vk::DescriptorSetLayoutDesc>
788 {
789     size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
790 };
791 
792 template <>
793 struct hash<rx::vk::PipelineLayoutDesc>
794 {
795     size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
796 };
797 
798 template <>
799 struct hash<rx::vk::TextureDescriptorDesc>
800 {
801     size_t operator()(const rx::vk::TextureDescriptorDesc &key) const { return key.hash(); }
802 };
803 
804 template <>
805 struct hash<rx::vk::FramebufferDesc>
806 {
807     size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
808 };
809 }  // namespace std
810 
811 namespace rx
812 {
813 // TODO(jmadill): Add cache trimming/eviction.
814 class RenderPassCache final : angle::NonCopyable
815 {
816   public:
817     RenderPassCache();
818     ~RenderPassCache();
819 
820     void destroy(VkDevice device);
821 
822     ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
823                                                        Serial serial,
824                                                        const vk::RenderPassDesc &desc,
825                                                        vk::RenderPass **renderPassOut)
826     {
827         auto outerIt = mPayload.find(desc);
828         if (outerIt != mPayload.end())
829         {
830             InnerCache &innerCache = outerIt->second;
831             ASSERT(!innerCache.empty());
832 
833             // Find the first element and return it.
834             innerCache.begin()->second.updateSerial(serial);
835             *renderPassOut = &innerCache.begin()->second.get();
836             return angle::Result::Continue;
837         }
838 
839         return addRenderPass(contextVk, serial, desc, renderPassOut);
840     }
841 
842     angle::Result getRenderPassWithOps(vk::Context *context,
843                                        Serial serial,
844                                        const vk::RenderPassDesc &desc,
845                                        const vk::AttachmentOpsArray &attachmentOps,
846                                        vk::RenderPass **renderPassOut);
847 
848   private:
849     angle::Result addRenderPass(ContextVk *contextVk,
850                                 Serial serial,
851                                 const vk::RenderPassDesc &desc,
852                                 vk::RenderPass **renderPassOut);
853 
854     // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
855     // The second layer caches the attachment load/store ops and initial/final layout.
856     using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassAndSerial>;
857     using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
858 
859     OuterCache mPayload;
860 };
861 
862 // TODO(jmadill): Add cache trimming/eviction.
863 class GraphicsPipelineCache final : angle::NonCopyable
864 {
865   public:
866     GraphicsPipelineCache();
867     ~GraphicsPipelineCache();
868 
869     void destroy(VkDevice device);
870     void release(ContextVk *context);
871 
872     void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
873 
874     ANGLE_INLINE angle::Result getPipeline(ContextVk *contextVk,
875                                            const vk::PipelineCache &pipelineCacheVk,
876                                            const vk::RenderPass &compatibleRenderPass,
877                                            const vk::PipelineLayout &pipelineLayout,
878                                            const gl::AttributesMask &activeAttribLocationsMask,
879                                            const gl::ComponentTypeMask &programAttribsTypeMask,
880                                            const vk::ShaderModule *vertexModule,
881                                            const vk::ShaderModule *fragmentModule,
882                                            const vk::ShaderModule *geometryModule,
883                                            vk::SpecializationConstantBitSet specConsts,
884                                            const vk::GraphicsPipelineDesc &desc,
885                                            const vk::GraphicsPipelineDesc **descPtrOut,
886                                            vk::PipelineHelper **pipelineOut)
887     {
888         auto item = mPayload.find(desc);
889         if (item != mPayload.end())
890         {
891             *descPtrOut  = &item->first;
892             *pipelineOut = &item->second;
893             return angle::Result::Continue;
894         }
895 
896         return insertPipeline(contextVk, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
897                               activeAttribLocationsMask, programAttribsTypeMask, vertexModule,
898                               fragmentModule, geometryModule, specConsts, desc, descPtrOut,
899                               pipelineOut);
900     }
901 
902   private:
903     angle::Result insertPipeline(ContextVk *contextVk,
904                                  const vk::PipelineCache &pipelineCacheVk,
905                                  const vk::RenderPass &compatibleRenderPass,
906                                  const vk::PipelineLayout &pipelineLayout,
907                                  const gl::AttributesMask &activeAttribLocationsMask,
908                                  const gl::ComponentTypeMask &programAttribsTypeMask,
909                                  const vk::ShaderModule *vertexModule,
910                                  const vk::ShaderModule *fragmentModule,
911                                  const vk::ShaderModule *geometryModule,
912                                  vk::SpecializationConstantBitSet specConsts,
913                                  const vk::GraphicsPipelineDesc &desc,
914                                  const vk::GraphicsPipelineDesc **descPtrOut,
915                                  vk::PipelineHelper **pipelineOut);
916 
917     std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
918 };
919 
920 class DescriptorSetLayoutCache final : angle::NonCopyable
921 {
922   public:
923     DescriptorSetLayoutCache();
924     ~DescriptorSetLayoutCache();
925 
926     void destroy(VkDevice device);
927 
928     angle::Result getDescriptorSetLayout(
929         vk::Context *context,
930         const vk::DescriptorSetLayoutDesc &desc,
931         vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
932 
933   private:
934     std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
935 };
936 
937 class PipelineLayoutCache final : angle::NonCopyable
938 {
939   public:
940     PipelineLayoutCache();
941     ~PipelineLayoutCache();
942 
943     void destroy(VkDevice device);
944 
945     angle::Result getPipelineLayout(vk::Context *context,
946                                     const vk::PipelineLayoutDesc &desc,
947                                     const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
948                                     vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
949 
950   private:
951     std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
952 };
953 
954 // Some descriptor set and pipeline layout constants.
955 //
956 // The set/binding assignment is done as following:
957 //
958 // - Set 0 contains the ANGLE driver uniforms at binding 0.  Note that driver uniforms are updated
959 //   only under rare circumstances, such as viewport or depth range change.  However, there is only
960 //   one binding in this set.  This set is placed before Set 1 containing transform feedback
961 //   buffers, so that switching between xfb and non-xfb programs doesn't require rebinding this set.
962 //   Otherwise, as the layout of Set 1 changes (due to addition and removal of xfb buffers), and all
963 //   subsequent sets need to be rebound (due to Vulkan pipeline layout validation rules), we would
964 //   have needed to invalidateGraphicsDriverUniforms().
965 // - Set 1 contains uniform blocks created to encompass default uniforms.  1 binding is used per
966 //   pipeline stage.  Additionally, transform feedback buffers are bound from binding 2 and up.
967 // - Set 2 contains all textures.
968 // - Set 3 contains all other shader resources, such as uniform and storage blocks, atomic counter
969 //   buffers and images.
970 
971 // ANGLE driver uniforms set index (binding is always 0):
972 constexpr uint32_t kDriverUniformsDescriptorSetIndex = 0;
973 // Uniforms set index:
974 constexpr uint32_t kUniformsAndXfbDescriptorSetIndex = 1;
975 // Textures set index:
976 constexpr uint32_t kTextureDescriptorSetIndex = 2;
977 // Other shader resources set index:
978 constexpr uint32_t kShaderResourceDescriptorSetIndex = 3;
979 
980 // Only 1 driver uniform binding is used.
981 constexpr uint32_t kReservedDriverUniformBindingCount = 1;
982 // There is 1 default uniform binding used per stage.  Currently, a maxium of three stages are
983 // supported.
984 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
985 constexpr uint32_t kReservedDefaultUniformBindingCount         = 3;
986 }  // namespace rx
987 
988 #endif  // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
989