• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 //    Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 //    Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10 
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13 
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "libANGLE/renderer/vulkan/vk_utils.h"
17 
18 namespace rx
19 {
20 
21 namespace vk
22 {
23 class ImageHelper;
24 
25 using RenderPassAndSerial = ObjectAndSerial<RenderPass>;
26 using PipelineAndSerial   = ObjectAndSerial<Pipeline>;
27 
28 using RefCountedDescriptorSetLayout = RefCounted<DescriptorSetLayout>;
29 using RefCountedPipelineLayout      = RefCounted<PipelineLayout>;
30 
31 // Packed Vk resource descriptions.
32 // Most Vk types use many more bits than required to represent the underlying data.
33 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
34 // hashing (and also needs to check equality) we can optimize these operations by
35 // using fewer bits. Hence the packed types.
36 //
37 // One implementation note: these types could potentially be improved by using even
38 // fewer bits. For example, boolean values could be represented by a single bit instead
39 // of a uint8_t. However at the current time there are concerns about the portability
40 // of bitfield operators, and complexity issues with using bit mask operations. This is
41 // something likely we will want to investigate as the Vulkan implementation progresses.
42 //
43 // Second implementation note: the struct packing is also a bit fragile, and some of the
44 // packing requirements depend on using alignas and field ordering to get the result of
45 // packing nicely into the desired space. This is something we could also potentially fix
46 // with a redesign to use bitfields or bit mask operations.
47 
48 // Enable struct padding warnings for the code below since it is used in caches.
49 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
50 
51 class alignas(4) RenderPassDesc final
52 {
53   public:
54     RenderPassDesc();
55     ~RenderPassDesc();
56     RenderPassDesc(const RenderPassDesc &other);
57     RenderPassDesc &operator=(const RenderPassDesc &other);
58 
59     // Set format for an enabled GL color attachment.
60     void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
61     // Mark a GL color attachment index as disabled.
62     void packColorAttachmentGap(size_t colorIndexGL);
63     // The caller must pack the depth/stencil attachment last, which is packed right after the color
64     // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
65     void packDepthStencilAttachment(angle::FormatID angleFormatID);
66 
67     size_t hash() const;
68 
69     // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()70     size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()71     size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
72 
73     bool isColorAttachmentEnabled(size_t colorIndexGL) const;
hasDepthStencilAttachment()74     bool hasDepthStencilAttachment() const { return mHasDepthStencilAttachment; }
75 
76     // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
77     // color attachments.
78     size_t attachmentCount() const;
79 
80     void setSamples(GLint samples);
81 
samples()82     uint8_t samples() const { return mSamples; }
83 
84     angle::FormatID operator[](size_t index) const
85     {
86         ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
87         return static_cast<angle::FormatID>(mAttachmentFormats[index]);
88     }
89 
90   private:
91     uint8_t mSamples;
92     uint8_t mColorAttachmentRange : 7;
93     uint8_t mHasDepthStencilAttachment : 1;
94     // Color attachment formats are stored with their GL attachment indices.  The depth/stencil
95     // attachment formats follow the last enabled color attachment.  When creating a render pass,
96     // the disabled attachments are removed and the resulting attachments are packed.
97     //
98     // The attachment indices provided as input to various functions in this file are thus GL
99     // attachment indices.  These indices are marked as such, e.g. colorIndexGL.  The render pass
100     // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
101     // indices marked with Vk, e.g. colorIndexVk.  The subpass attachment references create the
102     // link between the two index spaces.  The subpass declares attachment references with GL
103     // indices (which corresponds to the location decoration of shader outputs).  The attachment
104     // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
105     //
106     // For example, if GL uses color attachments 0 and 3, then there are two render pass
107     // attachments (indexed 0 and 1) and 4 subpass attachments:
108     //
109     //  - Subpass attachment 0 -> Renderpass attachment 0
110     //  - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
111     //  - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
112     //  - Subpass attachment 3 -> Renderpass attachment 1
113     //
114     gl::AttachmentArray<uint8_t> mAttachmentFormats;
115 };
116 
117 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
118 
119 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
120 static_assert(kRenderPassDescSize == 12, "Size check failed");
121 
122 struct PackedAttachmentOpsDesc final
123 {
124     // VkAttachmentLoadOp is in range [0, 2], and VkAttachmentStoreOp is in range [0, 1].
125     uint16_t loadOp : 2;
126     uint16_t storeOp : 1;
127     uint16_t stencilLoadOp : 2;
128     uint16_t stencilStoreOp : 1;
129 
130     // 5-bits to force pad the structure to exactly 2 bytes.  Note that we currently don't support
131     // any of the extension layouts, whose values start at 1'000'000'000.
132     uint16_t initialLayout : 5;
133     uint16_t finalLayout : 5;
134 };
135 
136 static_assert(sizeof(PackedAttachmentOpsDesc) == 2, "Size check failed");
137 
138 class AttachmentOpsArray final
139 {
140   public:
141     AttachmentOpsArray();
142     ~AttachmentOpsArray();
143     AttachmentOpsArray(const AttachmentOpsArray &other);
144     AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
145 
146     const PackedAttachmentOpsDesc &operator[](size_t index) const;
147     PackedAttachmentOpsDesc &operator[](size_t index);
148 
149     // Initializes an attachment op with whatever values. Used for compatible RenderPass checks.
150     void initDummyOp(size_t index, VkImageLayout initialLayout, VkImageLayout finalLayout);
151     // Initialize an attachment op with all load and store operations.
152     void initWithLoadStore(size_t index, VkImageLayout initialLayout, VkImageLayout finalLayout);
153 
154     size_t hash() const;
155 
156   private:
157     gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
158 };
159 
160 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
161 
162 static_assert(sizeof(AttachmentOpsArray) == 20, "Size check failed");
163 
164 struct PackedAttribDesc final
165 {
166     uint8_t format;
167 
168     // TODO(http://anglebug.com/2672): Emulate divisors greater than UBYTE_MAX.
169     uint8_t divisor;
170 
171     // Can only take 11 bits on NV.
172     uint16_t offset;
173 
174     // Although technically stride can be any value in ES 2.0, in practice supporting stride
175     // greater than MAX_USHORT should not be that helpful. Note that stride limits are
176     // introduced in ES 3.1.
177     uint16_t stride;
178 };
179 
180 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
181 static_assert(kPackedAttribDescSize == 6, "Size mismatch");
182 
183 struct VertexInputAttributes final
184 {
185     PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
186 };
187 
188 constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
189 static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
190 
191 struct RasterizationStateBits final
192 {
193     uint32_t depthClampEnable : 4;
194     uint32_t rasterizationDiscardEnable : 4;
195     uint32_t polygonMode : 4;
196     uint32_t cullMode : 4;
197     uint32_t frontFace : 4;
198     uint32_t depthBiasEnable : 1;
199     uint32_t sampleShadingEnable : 1;
200     uint32_t alphaToCoverageEnable : 1;
201     uint32_t alphaToOneEnable : 1;
202     uint32_t rasterizationSamples : 8;
203 };
204 
205 constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
206 static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
207 
208 struct PackedRasterizationAndMultisampleStateInfo final
209 {
210     RasterizationStateBits bits;
211     // Padded to ensure there's no gaps in this structure or those that use it.
212     float minSampleShading;
213     uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
214     // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
215     float depthBiasClamp;
216     float depthBiasConstantFactor;
217     float depthBiasSlopeFactor;
218     float lineWidth;
219 };
220 
221 constexpr size_t kPackedRasterizationAndMultisampleStateSize =
222     sizeof(PackedRasterizationAndMultisampleStateInfo);
223 static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
224 
225 struct StencilOps final
226 {
227     uint8_t fail : 4;
228     uint8_t pass : 4;
229     uint8_t depthFail : 4;
230     uint8_t compare : 4;
231 };
232 
233 constexpr size_t kStencilOpsSize = sizeof(StencilOps);
234 static_assert(kStencilOpsSize == 2, "Size check failed");
235 
236 struct PackedStencilOpState final
237 {
238     StencilOps ops;
239     uint8_t compareMask;
240     uint8_t writeMask;
241 };
242 
243 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
244 static_assert(kPackedStencilOpSize == 4, "Size check failed");
245 
246 struct DepthStencilEnableFlags final
247 {
248     uint8_t depthTest : 2;  // these only need one bit each. the extra is used as padding.
249     uint8_t depthWrite : 2;
250     uint8_t depthBoundsTest : 2;
251     uint8_t stencilTest : 2;
252 };
253 
254 constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
255 static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
256 
257 struct PackedDepthStencilStateInfo final
258 {
259     DepthStencilEnableFlags enable;
260     uint8_t frontStencilReference;
261     uint8_t backStencilReference;
262     uint8_t depthCompareOp;  // only needs 4 bits. extra used as padding.
263     float minDepthBounds;
264     float maxDepthBounds;
265     PackedStencilOpState front;
266     PackedStencilOpState back;
267 };
268 
269 constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
270 static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
271 
272 struct LogicOpState final
273 {
274     uint8_t opEnable : 1;
275     uint8_t op : 7;
276 };
277 
278 constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
279 static_assert(kLogicOpStateSize == 1, "Size check failed");
280 
281 struct PackedColorBlendAttachmentState final
282 {
283     uint16_t srcColorBlendFactor : 5;
284     uint16_t dstColorBlendFactor : 5;
285     uint16_t colorBlendOp : 6;
286     uint16_t srcAlphaBlendFactor : 5;
287     uint16_t dstAlphaBlendFactor : 5;
288     uint16_t alphaBlendOp : 6;
289 };
290 
291 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
292 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
293 
294 struct PrimitiveState final
295 {
296     uint16_t topology : 15;
297     uint16_t restartEnable : 1;
298 };
299 
300 constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
301 static_assert(kPrimitiveStateSize == 2, "Size check failed");
302 
303 struct PackedInputAssemblyAndColorBlendStateInfo final
304 {
305     uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
306     PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
307     float blendConstants[4];
308     LogicOpState logic;
309     uint8_t blendEnableMask;
310     PrimitiveState primitive;
311 };
312 
313 constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
314     sizeof(PackedInputAssemblyAndColorBlendStateInfo);
315 static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
316 
317 constexpr size_t kGraphicsPipelineDescSumOfSizes =
318     kVertexInputAttributesSize + kPackedInputAssemblyAndColorBlendStateSize +
319     kPackedRasterizationAndMultisampleStateSize + kPackedDepthStencilStateSize +
320     kRenderPassDescSize + sizeof(VkViewport) + sizeof(VkRect2D);
321 
322 // Number of dirty bits in the dirty bit set.
323 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
324 constexpr static size_t kNumGraphicsPipelineDirtyBits =
325     kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
326 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
327 
328 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
329 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
330 
331 // State changes are applied through the update methods. Each update method can also have a
332 // sibling method that applies the update without marking a state transition. The non-transition
333 // update methods are used for internal shader pipelines. Not every non-transition update method
334 // is implemented yet as not every state is used in internal shaders.
335 class GraphicsPipelineDesc final
336 {
337   public:
338     // Use aligned allocation and free so we can use the alignas keyword.
339     void *operator new(std::size_t size);
340     void operator delete(void *ptr);
341 
342     GraphicsPipelineDesc();
343     ~GraphicsPipelineDesc();
344     GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
345     GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
346 
347     size_t hash() const;
348     bool operator==(const GraphicsPipelineDesc &other) const;
349 
350     void initDefaults();
351 
352     // For custom comparisons.
353     template <typename T>
getPtr()354     const T *getPtr() const
355     {
356         return reinterpret_cast<const T *>(this);
357     }
358 
359     angle::Result initializePipeline(vk::Context *context,
360                                      const vk::PipelineCache &pipelineCacheVk,
361                                      const RenderPass &compatibleRenderPass,
362                                      const PipelineLayout &pipelineLayout,
363                                      const gl::AttributesMask &activeAttribLocationsMask,
364                                      const gl::ComponentTypeMask &programAttribsTypeMask,
365                                      const ShaderModule *vertexModule,
366                                      const ShaderModule *fragmentModule,
367                                      Pipeline *pipelineOut) const;
368 
369     // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
370     void updateVertexInput(GraphicsPipelineTransitionBits *transition,
371                            uint32_t attribIndex,
372                            GLuint stride,
373                            GLuint divisor,
374                            angle::FormatID format,
375                            GLuint relativeOffset);
376 
377     // Input assembly info
378     void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
379     void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
380                                        bool primitiveRestartEnabled);
381 
382     // Raster states
383     void setCullMode(VkCullModeFlagBits cullMode);
384     void updateCullMode(GraphicsPipelineTransitionBits *transition,
385                         const gl::RasterizerState &rasterState);
386     void updateFrontFace(GraphicsPipelineTransitionBits *transition,
387                          const gl::RasterizerState &rasterState,
388                          bool invertFrontFace);
389     void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
390     void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
391                                         bool rasterizerDiscardEnabled);
392 
393     // Multisample states
394     void setRasterizationSamples(uint32_t rasterizationSamples);
395     void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
396                                     uint32_t rasterizationSamples);
397     void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
398     void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
399     void updateSampleMask(GraphicsPipelineTransitionBits *transition,
400                           uint32_t maskNumber,
401                           uint32_t mask);
402 
403     // RenderPass description.
getRenderPassDesc()404     const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
405 
406     void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
407     void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
408                               const RenderPassDesc &renderPassDesc);
409 
410     // Blend states
411     void updateBlendEnabled(GraphicsPipelineTransitionBits *transition, bool isBlendEnabled);
412     void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
413     void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
414                           const gl::BlendState &blendState);
415     void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
416                               const gl::BlendState &blendState);
417     void setColorWriteMask(VkColorComponentFlags colorComponentFlags,
418                            const gl::DrawBufferMask &alphaMask);
419     void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
420     void updateColorWriteMask(GraphicsPipelineTransitionBits *transition,
421                               VkColorComponentFlags colorComponentFlags,
422                               const gl::DrawBufferMask &alphaMask);
423 
424     // Depth/stencil states.
425     void setDepthTestEnabled(bool enabled);
426     void setDepthWriteEnabled(bool enabled);
427     void setDepthFunc(VkCompareOp op);
428     void setStencilTestEnabled(bool enabled);
429     void setStencilFrontFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
430     void setStencilBackFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
431     void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
432     void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
433     void setStencilFrontWriteMask(uint8_t mask);
434     void setStencilBackWriteMask(uint8_t mask);
435     void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
436                                 const gl::DepthStencilState &depthStencilState,
437                                 const gl::Framebuffer *drawFramebuffer);
438     void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
439                          const gl::DepthStencilState &depthStencilState);
440     void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
441                                  const gl::DepthStencilState &depthStencilState,
442                                  const gl::Framebuffer *drawFramebuffer);
443     void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
444                                   const gl::DepthStencilState &depthStencilState,
445                                   const gl::Framebuffer *drawFramebuffer);
446     void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
447                                  GLint ref,
448                                  const gl::DepthStencilState &depthStencilState);
449     void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
450                                 GLint ref,
451                                 const gl::DepthStencilState &depthStencilState);
452     void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
453                                const gl::DepthStencilState &depthStencilState);
454     void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
455                               const gl::DepthStencilState &depthStencilState);
456     void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
457                                      const gl::DepthStencilState &depthStencilState,
458                                      const gl::Framebuffer *drawFramebuffer);
459     void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
460                                     const gl::DepthStencilState &depthStencilState,
461                                     const gl::Framebuffer *drawFramebuffer);
462 
463     // Depth offset.
464     void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
465     void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
466                              const gl::RasterizerState &rasterState);
467 
468     // Viewport and scissor.
469     void setViewport(const VkViewport &viewport);
470     void updateViewport(GraphicsPipelineTransitionBits *transition, const VkViewport &viewport);
471     void updateDepthRange(GraphicsPipelineTransitionBits *transition,
472                           float nearPlane,
473                           float farPlane);
474     void setScissor(const VkRect2D &scissor);
475     void updateScissor(GraphicsPipelineTransitionBits *transition, const VkRect2D &scissor);
476 
477   private:
478     VertexInputAttributes mVertexInputAttribs;
479     RenderPassDesc mRenderPassDesc;
480     PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
481     PackedDepthStencilStateInfo mDepthStencilStateInfo;
482     PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
483     VkViewport mViewport;
484     VkRect2D mScissor;
485 };
486 
487 // Verify the packed pipeline description has no gaps in the packing.
488 // This is not guaranteed by the spec, but is validated by a compile-time check.
489 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
490 // into uninitialized memory regions.
491 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
492 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
493 
494 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
495     std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
496              gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
497 
498 using DescriptorSetLayoutBindingVector =
499     angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
500 
501 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
502 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
503 // usage. In the future we could generalize this.
504 class DescriptorSetLayoutDesc final
505 {
506   public:
507     DescriptorSetLayoutDesc();
508     ~DescriptorSetLayoutDesc();
509     DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
510     DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
511 
512     size_t hash() const;
513     bool operator==(const DescriptorSetLayoutDesc &other) const;
514 
515     void update(uint32_t bindingIndex,
516                 VkDescriptorType type,
517                 uint32_t count,
518                 VkShaderStageFlags stages);
519 
520     void unpackBindings(DescriptorSetLayoutBindingVector *bindings) const;
521 
522   private:
523     struct PackedDescriptorSetBinding
524     {
525         uint8_t type;    // Stores a packed VkDescriptorType descriptorType.
526         uint8_t stages;  // Stores a packed VkShaderStageFlags.
527         uint16_t count;  // Stores a packed uint32_t descriptorCount.
528     };
529 
530     static_assert(sizeof(PackedDescriptorSetBinding) == sizeof(uint32_t), "Unexpected size");
531 
532     // This is a compact representation of a descriptor set layout.
533     std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
534         mPackedDescriptorSetLayout;
535 };
536 
537 // The following are for caching descriptor set layouts. Limited to max four descriptor set layouts.
538 // This can be extended in the future.
539 constexpr size_t kMaxDescriptorSetLayouts = 4;
540 
541 struct PackedPushConstantRange
542 {
543     uint32_t offset;
544     uint32_t size;
545 };
546 
547 template <typename T>
548 using DescriptorSetLayoutArray = std::array<T, kMaxDescriptorSetLayouts>;
549 using DescriptorSetLayoutPointerArray =
550     DescriptorSetLayoutArray<BindingPointer<DescriptorSetLayout>>;
551 template <typename T>
552 using PushConstantRangeArray = gl::ShaderMap<T>;
553 
554 class PipelineLayoutDesc final
555 {
556   public:
557     PipelineLayoutDesc();
558     ~PipelineLayoutDesc();
559     PipelineLayoutDesc(const PipelineLayoutDesc &other);
560     PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
561 
562     size_t hash() const;
563     bool operator==(const PipelineLayoutDesc &other) const;
564 
565     void updateDescriptorSetLayout(uint32_t setIndex, const DescriptorSetLayoutDesc &desc);
566     void updatePushConstantRange(gl::ShaderType shaderType, uint32_t offset, uint32_t size);
567 
568     const PushConstantRangeArray<PackedPushConstantRange> &getPushConstantRanges() const;
569 
570   private:
571     DescriptorSetLayoutArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
572     PushConstantRangeArray<PackedPushConstantRange> mPushConstantRanges;
573 
574     // Verify the arrays are properly packed.
575     static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
576                       (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
577                   "Unexpected size");
578     static_assert(sizeof(decltype(mPushConstantRanges)) ==
579                       (sizeof(PackedPushConstantRange) * angle::EnumSize<gl::ShaderType>()),
580                   "Unexpected size");
581 };
582 
583 // Verify the structure is properly packed.
584 static_assert(sizeof(PipelineLayoutDesc) ==
585                   (sizeof(DescriptorSetLayoutArray<DescriptorSetLayoutDesc>) +
586                    sizeof(gl::ShaderMap<PackedPushConstantRange>)),
587               "Unexpected Size");
588 
589 // Disable warnings about struct padding.
590 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
591 
592 class PipelineHelper;
593 
594 struct GraphicsPipelineTransition
595 {
596     GraphicsPipelineTransition();
597     GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
598     GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
599                                const GraphicsPipelineDesc *desc,
600                                PipelineHelper *pipeline);
601 
602     GraphicsPipelineTransitionBits bits;
603     const GraphicsPipelineDesc *desc;
604     PipelineHelper *target;
605 };
606 
607 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
608 
609 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
610     const GraphicsPipelineTransition &other) = default;
611 
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)612 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
613     GraphicsPipelineTransitionBits bits,
614     const GraphicsPipelineDesc *desc,
615     PipelineHelper *pipeline)
616     : bits(bits), desc(desc), target(pipeline)
617 {}
618 
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)619 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
620                                                   GraphicsPipelineTransitionBits bitsB,
621                                                   const GraphicsPipelineDesc &descA,
622                                                   const GraphicsPipelineDesc &descB)
623 {
624     if (bitsA != bitsB)
625         return false;
626 
627     // We currently mask over 4 bytes of the pipeline description with each dirty bit.
628     // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
629     // of the code faster. The for loop below would scan over twice as many bits per iteration.
630     // But there may be more collisions between the same dirty bit masks leading to different
631     // transitions. Thus there may be additional cost when applications use many transitions.
632     // We should revisit this in the future and investigate using different bit widths.
633     static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
634 
635     const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
636     const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
637 
638     for (size_t dirtyBit : bitsA)
639     {
640         if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
641             return false;
642     }
643 
644     return true;
645 }
646 
647 class PipelineHelper final : angle::NonCopyable
648 {
649   public:
650     PipelineHelper();
651     ~PipelineHelper();
652     inline explicit PipelineHelper(Pipeline &&pipeline);
653 
654     void destroy(VkDevice device);
655 
updateSerial(Serial serial)656     void updateSerial(Serial serial) { mSerial = serial; }
valid()657     bool valid() const { return mPipeline.valid(); }
getSerial()658     Serial getSerial() const { return mSerial; }
getPipeline()659     Pipeline &getPipeline() { return mPipeline; }
660 
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)661     ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
662                                      const GraphicsPipelineDesc &desc,
663                                      PipelineHelper **pipelineOut) const
664     {
665         // Search could be improved using sorting or hashing.
666         for (const GraphicsPipelineTransition &transition : mTransitions)
667         {
668             if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
669             {
670                 *pipelineOut = transition.target;
671                 return true;
672             }
673         }
674 
675         return false;
676     }
677 
678     void addTransition(GraphicsPipelineTransitionBits bits,
679                        const GraphicsPipelineDesc *desc,
680                        PipelineHelper *pipeline);
681 
682   private:
683     std::vector<GraphicsPipelineTransition> mTransitions;
684     Serial mSerial;
685     Pipeline mPipeline;
686 };
687 
PipelineHelper(Pipeline && pipeline)688 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
689 
690 class TextureDescriptorDesc
691 {
692   public:
693     TextureDescriptorDesc();
694     ~TextureDescriptorDesc();
695 
696     TextureDescriptorDesc(const TextureDescriptorDesc &other);
697     TextureDescriptorDesc &operator=(const TextureDescriptorDesc &other);
698 
699     void update(size_t index, Serial textureSerial, Serial samplerSerial);
700     size_t hash() const;
701     void reset();
702 
703     bool operator==(const TextureDescriptorDesc &other) const;
704 
705     // Note: this is an exclusive index. If there is one index it will return "1".
getMaxIndex()706     uint32_t getMaxIndex() const { return mMaxIndex; }
707 
708   private:
709     uint32_t mMaxIndex;
710     struct TexUnitSerials
711     {
712         uint32_t texture;
713         uint32_t sampler;
714     };
715     gl::ActiveTextureArray<TexUnitSerials> mSerials;
716 };
717 }  // namespace vk
718 }  // namespace rx
719 
720 // Introduce a std::hash for a RenderPassDesc
721 namespace std
722 {
723 template <>
724 struct hash<rx::vk::RenderPassDesc>
725 {
726     size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
727 };
728 
729 template <>
730 struct hash<rx::vk::AttachmentOpsArray>
731 {
732     size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
733 };
734 
735 template <>
736 struct hash<rx::vk::GraphicsPipelineDesc>
737 {
738     size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const { return key.hash(); }
739 };
740 
741 template <>
742 struct hash<rx::vk::DescriptorSetLayoutDesc>
743 {
744     size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
745 };
746 
747 template <>
748 struct hash<rx::vk::PipelineLayoutDesc>
749 {
750     size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
751 };
752 
753 template <>
754 struct hash<rx::vk::TextureDescriptorDesc>
755 {
756     size_t operator()(const rx::vk::TextureDescriptorDesc &key) const { return key.hash(); }
757 };
758 }  // namespace std
759 
760 namespace rx
761 {
762 // TODO(jmadill): Add cache trimming/eviction.
763 class RenderPassCache final : angle::NonCopyable
764 {
765   public:
766     RenderPassCache();
767     ~RenderPassCache();
768 
769     void destroy(VkDevice device);
770 
771     ANGLE_INLINE angle::Result getCompatibleRenderPass(vk::Context *context,
772                                                        Serial serial,
773                                                        const vk::RenderPassDesc &desc,
774                                                        vk::RenderPass **renderPassOut)
775     {
776         auto outerIt = mPayload.find(desc);
777         if (outerIt != mPayload.end())
778         {
779             InnerCache &innerCache = outerIt->second;
780             ASSERT(!innerCache.empty());
781 
782             // Find the first element and return it.
783             innerCache.begin()->second.updateSerial(serial);
784             *renderPassOut = &innerCache.begin()->second.get();
785             return angle::Result::Continue;
786         }
787 
788         return addRenderPass(context, serial, desc, renderPassOut);
789     }
790 
791     angle::Result getRenderPassWithOps(vk::Context *context,
792                                        Serial serial,
793                                        const vk::RenderPassDesc &desc,
794                                        const vk::AttachmentOpsArray &attachmentOps,
795                                        vk::RenderPass **renderPassOut);
796 
797   private:
798     angle::Result addRenderPass(vk::Context *context,
799                                 Serial serial,
800                                 const vk::RenderPassDesc &desc,
801                                 vk::RenderPass **renderPassOut);
802 
803     // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
804     // The second layer caches the attachment load/store ops and initial/final layout.
805     using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassAndSerial>;
806     using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
807 
808     OuterCache mPayload;
809 };
810 
811 // TODO(jmadill): Add cache trimming/eviction.
812 class GraphicsPipelineCache final : angle::NonCopyable
813 {
814   public:
815     GraphicsPipelineCache();
816     ~GraphicsPipelineCache();
817 
818     void destroy(VkDevice device);
819     void release(ContextVk *context);
820 
821     void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
822 
823     ANGLE_INLINE angle::Result getPipeline(vk::Context *context,
824                                            const vk::PipelineCache &pipelineCacheVk,
825                                            const vk::RenderPass &compatibleRenderPass,
826                                            const vk::PipelineLayout &pipelineLayout,
827                                            const gl::AttributesMask &activeAttribLocationsMask,
828                                            const gl::ComponentTypeMask &programAttribsTypeMask,
829                                            const vk::ShaderModule *vertexModule,
830                                            const vk::ShaderModule *fragmentModule,
831                                            const vk::GraphicsPipelineDesc &desc,
832                                            const vk::GraphicsPipelineDesc **descPtrOut,
833                                            vk::PipelineHelper **pipelineOut)
834     {
835         auto item = mPayload.find(desc);
836         if (item != mPayload.end())
837         {
838             *descPtrOut  = &item->first;
839             *pipelineOut = &item->second;
840             return angle::Result::Continue;
841         }
842 
843         return insertPipeline(context, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
844                               activeAttribLocationsMask, programAttribsTypeMask, vertexModule,
845                               fragmentModule, desc, descPtrOut, pipelineOut);
846     }
847 
848   private:
849     angle::Result insertPipeline(vk::Context *context,
850                                  const vk::PipelineCache &pipelineCacheVk,
851                                  const vk::RenderPass &compatibleRenderPass,
852                                  const vk::PipelineLayout &pipelineLayout,
853                                  const gl::AttributesMask &activeAttribLocationsMask,
854                                  const gl::ComponentTypeMask &programAttribsTypeMask,
855                                  const vk::ShaderModule *vertexModule,
856                                  const vk::ShaderModule *fragmentModule,
857                                  const vk::GraphicsPipelineDesc &desc,
858                                  const vk::GraphicsPipelineDesc **descPtrOut,
859                                  vk::PipelineHelper **pipelineOut);
860 
861     std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
862 };
863 
864 class DescriptorSetLayoutCache final : angle::NonCopyable
865 {
866   public:
867     DescriptorSetLayoutCache();
868     ~DescriptorSetLayoutCache();
869 
870     void destroy(VkDevice device);
871 
872     angle::Result getDescriptorSetLayout(
873         vk::Context *context,
874         const vk::DescriptorSetLayoutDesc &desc,
875         vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
876 
877   private:
878     std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
879 };
880 
881 class PipelineLayoutCache final : angle::NonCopyable
882 {
883   public:
884     PipelineLayoutCache();
885     ~PipelineLayoutCache();
886 
887     void destroy(VkDevice device);
888 
889     angle::Result getPipelineLayout(vk::Context *context,
890                                     const vk::PipelineLayoutDesc &desc,
891                                     const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
892                                     vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
893 
894   private:
895     std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
896 };
897 
898 // Some descriptor set and pipeline layout constants.
899 //
900 // The set/binding assignment is done as following:
901 //
902 // - Set 0 contains uniform blocks created to encompass default uniforms.  1 binding is used per
903 //   pipeline stage.  Additionally, transform feedback buffers are bound from binding 2 and up.
904 // - Set 1 contains all textures.
905 // - Set 2 contains all other shader resources, such as uniform and storage blocks, atomic counter
906 //   buffers and images.
907 // - Set 3 contains the ANGLE driver uniforms at binding 0.  Note that driver uniforms are updated
908 //   only under rare circumstances, such as viewport or depth range change.  However, there is only
909 //   one binding in this set.
910 
911 // Uniforms set index:
912 constexpr uint32_t kUniformsAndXfbDescriptorSetIndex = 0;
913 // Textures set index:
914 constexpr uint32_t kTextureDescriptorSetIndex = 1;
915 // Other shader resources set index:
916 constexpr uint32_t kShaderResourceDescriptorSetIndex = 2;
917 // ANGLE driver uniforms set index (binding is always 3):
918 constexpr uint32_t kDriverUniformsDescriptorSetIndex = 3;
919 
920 // Only 1 driver uniform binding is used.
921 constexpr uint32_t kReservedDriverUniformBindingCount = 1;
922 // There is 1 default uniform binding used per stage.  Currently, a maxium of two stages are
923 // supported.
924 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
925 constexpr uint32_t kReservedDefaultUniformBindingCount         = 2;
926 // Binding index start for transform feedback buffers:
927 constexpr uint32_t kXfbBindingIndexStart = kReservedDefaultUniformBindingCount;
928 }  // namespace rx
929 
930 #endif  // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
931