1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 // Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 // Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "libANGLE/renderer/vulkan/vk_utils.h"
17
18 namespace rx
19 {
20
21 namespace vk
22 {
23 class ImageHelper;
24 enum class ImageLayout;
25
26 using RenderPassAndSerial = ObjectAndSerial<RenderPass>;
27 using PipelineAndSerial = ObjectAndSerial<Pipeline>;
28
29 using RefCountedDescriptorSetLayout = RefCounted<DescriptorSetLayout>;
30 using RefCountedPipelineLayout = RefCounted<PipelineLayout>;
31
32 // Helper macro that casts to a bitfield type then verifies no bits were dropped.
33 #define SetBitField(lhs, rhs) \
34 lhs = static_cast<typename std::decay<decltype(lhs)>::type>(rhs); \
35 ASSERT(static_cast<decltype(rhs)>(lhs) == (rhs))
36
37 // Packed Vk resource descriptions.
38 // Most Vk types use many more bits than required to represent the underlying data.
39 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
40 // hashing (and also needs to check equality) we can optimize these operations by
41 // using fewer bits. Hence the packed types.
42 //
43 // One implementation note: these types could potentially be improved by using even
44 // fewer bits. For example, boolean values could be represented by a single bit instead
45 // of a uint8_t. However at the current time there are concerns about the portability
46 // of bitfield operators, and complexity issues with using bit mask operations. This is
47 // something we will likely want to investigate as the Vulkan implementation progresses.
48 //
49 // Second implementation note: the struct packing is also a bit fragile, and some of the
50 // packing requirements depend on using alignas and field ordering to get the result of
51 // packing nicely into the desired space. This is something we could also potentially fix
52 // with a redesign to use bitfields or bit mask operations.
53
54 // Enable struct padding warnings for the code below since it is used in caches.
55 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
56
57 class alignas(4) RenderPassDesc final
58 {
59 public:
60 RenderPassDesc();
61 ~RenderPassDesc();
62 RenderPassDesc(const RenderPassDesc &other);
63 RenderPassDesc &operator=(const RenderPassDesc &other);
64
65 // Set format for an enabled GL color attachment.
66 void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
67 // Mark a GL color attachment index as disabled.
68 void packColorAttachmentGap(size_t colorIndexGL);
69 // The caller must pack the depth/stencil attachment last, which is packed right after the color
70 // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
71 void packDepthStencilAttachment(angle::FormatID angleFormatID);
72
73 size_t hash() const;
74
75 // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()76 size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()77 size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
78
79 bool isColorAttachmentEnabled(size_t colorIndexGL) const;
hasDepthStencilAttachment()80 bool hasDepthStencilAttachment() const { return mHasDepthStencilAttachment; }
81
82 // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
83 // color attachments.
84 size_t attachmentCount() const;
85
86 void setSamples(GLint samples);
87
samples()88 uint8_t samples() const { return mSamples; }
89
90 angle::FormatID operator[](size_t index) const
91 {
92 ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
93 return static_cast<angle::FormatID>(mAttachmentFormats[index]);
94 }
95
96 private:
97 uint8_t mSamples;
98 uint8_t mColorAttachmentRange : 7;
99 uint8_t mHasDepthStencilAttachment : 1;
100 // Color attachment formats are stored with their GL attachment indices. The depth/stencil
101 // attachment formats follow the last enabled color attachment. When creating a render pass,
102 // the disabled attachments are removed and the resulting attachments are packed.
103 //
104 // The attachment indices provided as input to various functions in this file are thus GL
105 // attachment indices. These indices are marked as such, e.g. colorIndexGL. The render pass
106 // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
107 // indices marked with Vk, e.g. colorIndexVk. The subpass attachment references create the
108 // link between the two index spaces. The subpass declares attachment references with GL
109 // indices (which corresponds to the location decoration of shader outputs). The attachment
110 // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
111 //
112 // For example, if GL uses color attachments 0 and 3, then there are two render pass
113 // attachments (indexed 0 and 1) and 4 subpass attachments:
114 //
115 // - Subpass attachment 0 -> Renderpass attachment 0
116 // - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
117 // - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
118 // - Subpass attachment 3 -> Renderpass attachment 1
119 //
120 gl::AttachmentArray<uint8_t> mAttachmentFormats;
121 };
122
123 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
124
125 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
126 static_assert(kRenderPassDescSize == 12, "Size check failed");
127
128 struct PackedAttachmentOpsDesc final
129 {
130 // VkAttachmentLoadOp is in range [0, 2], and VkAttachmentStoreOp is in range [0, 1].
131 uint16_t loadOp : 2;
132 uint16_t storeOp : 1;
133 uint16_t stencilLoadOp : 2;
134 uint16_t stencilStoreOp : 1;
135
136 // 5-bits to force pad the structure to exactly 2 bytes. Note that we currently don't support
137 // any of the extension layouts, whose values start at 1'000'000'000.
138 uint16_t initialLayout : 5;
139 uint16_t finalLayout : 5;
140 };
141
142 static_assert(sizeof(PackedAttachmentOpsDesc) == 2, "Size check failed");
143
144 class AttachmentOpsArray final
145 {
146 public:
147 AttachmentOpsArray();
148 ~AttachmentOpsArray();
149 AttachmentOpsArray(const AttachmentOpsArray &other);
150 AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
151
152 const PackedAttachmentOpsDesc &operator[](size_t index) const;
153 PackedAttachmentOpsDesc &operator[](size_t index);
154
155 // Initializes an attachment op with whatever values. Used for compatible RenderPass checks.
156 void initDummyOp(size_t index, ImageLayout initialLayout, ImageLayout finalLayout);
157 // Initialize an attachment op with store operations.
158 void initWithStore(size_t index,
159 VkAttachmentLoadOp loadOp,
160 ImageLayout initialLayout,
161 ImageLayout finalLayout);
162
163 size_t hash() const;
164
165 private:
166 gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
167 };
168
169 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
170
171 static_assert(sizeof(AttachmentOpsArray) == 20, "Size check failed");
172
173 struct PackedAttribDesc final
174 {
175 uint8_t format;
176 uint8_t divisor;
177
178 // Can only take 11 bits on NV.
179 uint16_t offset;
180
181 // Although technically stride can be any value in ES 2.0, in practice supporting stride
182 // greater than MAX_USHORT should not be that helpful. Note that stride limits are
183 // introduced in ES 3.1.
184 uint16_t stride;
185 };
186
187 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
188 static_assert(kPackedAttribDescSize == 6, "Size mismatch");
189
190 struct VertexInputAttributes final
191 {
192 PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
193 };
194
195 constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
196 static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
197
198 struct RasterizationStateBits final
199 {
200 uint32_t depthClampEnable : 4;
201 uint32_t rasterizationDiscardEnable : 4;
202 uint32_t polygonMode : 4;
203 uint32_t cullMode : 4;
204 uint32_t frontFace : 4;
205 uint32_t depthBiasEnable : 1;
206 uint32_t sampleShadingEnable : 1;
207 uint32_t alphaToCoverageEnable : 1;
208 uint32_t alphaToOneEnable : 1;
209 uint32_t rasterizationSamples : 8;
210 };
211
212 constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
213 static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
214
215 struct PackedRasterizationAndMultisampleStateInfo final
216 {
217 RasterizationStateBits bits;
218 // Padded to ensure there's no gaps in this structure or those that use it.
219 float minSampleShading;
220 uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
221 // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
222 float depthBiasClamp;
223 float depthBiasConstantFactor;
224 float depthBiasSlopeFactor;
225 float lineWidth;
226 };
227
228 constexpr size_t kPackedRasterizationAndMultisampleStateSize =
229 sizeof(PackedRasterizationAndMultisampleStateInfo);
230 static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
231
232 struct StencilOps final
233 {
234 uint8_t fail : 4;
235 uint8_t pass : 4;
236 uint8_t depthFail : 4;
237 uint8_t compare : 4;
238 };
239
240 constexpr size_t kStencilOpsSize = sizeof(StencilOps);
241 static_assert(kStencilOpsSize == 2, "Size check failed");
242
243 struct PackedStencilOpState final
244 {
245 StencilOps ops;
246 uint8_t compareMask;
247 uint8_t writeMask;
248 };
249
250 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
251 static_assert(kPackedStencilOpSize == 4, "Size check failed");
252
253 struct DepthStencilEnableFlags final
254 {
255 uint8_t depthTest : 2; // these only need one bit each. the extra is used as padding.
256 uint8_t depthWrite : 2;
257 uint8_t depthBoundsTest : 2;
258 uint8_t stencilTest : 2;
259 };
260
261 constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
262 static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
263
264 struct PackedDepthStencilStateInfo final
265 {
266 DepthStencilEnableFlags enable;
267 uint8_t frontStencilReference;
268 uint8_t backStencilReference;
269 uint8_t depthCompareOp; // only needs 4 bits. extra used as padding.
270 float minDepthBounds;
271 float maxDepthBounds;
272 PackedStencilOpState front;
273 PackedStencilOpState back;
274 };
275
276 constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
277 static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
278
279 struct LogicOpState final
280 {
281 uint8_t opEnable : 1;
282 uint8_t op : 7;
283 };
284
285 constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
286 static_assert(kLogicOpStateSize == 1, "Size check failed");
287
288 struct PackedColorBlendAttachmentState final
289 {
290 uint16_t srcColorBlendFactor : 5;
291 uint16_t dstColorBlendFactor : 5;
292 uint16_t colorBlendOp : 6;
293 uint16_t srcAlphaBlendFactor : 5;
294 uint16_t dstAlphaBlendFactor : 5;
295 uint16_t alphaBlendOp : 6;
296 };
297
298 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
299 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
300
301 struct PrimitiveState final
302 {
303 uint16_t topology : 15;
304 uint16_t restartEnable : 1;
305 };
306
307 constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
308 static_assert(kPrimitiveStateSize == 2, "Size check failed");
309
310 struct PackedInputAssemblyAndColorBlendStateInfo final
311 {
312 uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
313 PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
314 float blendConstants[4];
315 LogicOpState logic;
316 uint8_t blendEnableMask;
317 PrimitiveState primitive;
318 };
319
320 constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
321 sizeof(PackedInputAssemblyAndColorBlendStateInfo);
322 static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
323
324 constexpr size_t kGraphicsPipelineDescSumOfSizes =
325 kVertexInputAttributesSize + kRenderPassDescSize + kPackedRasterizationAndMultisampleStateSize +
326 kPackedDepthStencilStateSize + kPackedInputAssemblyAndColorBlendStateSize + sizeof(VkViewport) +
327 sizeof(VkRect2D);
328
329 // Number of dirty bits in the dirty bit set.
330 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
331 constexpr static size_t kNumGraphicsPipelineDirtyBits =
332 kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
333 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
334
335 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
336 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
337
338 // State changes are applied through the update methods. Each update method can also have a
339 // sibling method that applies the update without marking a state transition. The non-transition
340 // update methods are used for internal shader pipelines. Not every non-transition update method
341 // is implemented yet as not every state is used in internal shaders.
342 class GraphicsPipelineDesc final
343 {
344 public:
345 // Use aligned allocation and free so we can use the alignas keyword.
346 void *operator new(std::size_t size);
347 void operator delete(void *ptr);
348
349 GraphicsPipelineDesc();
350 ~GraphicsPipelineDesc();
351 GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
352 GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
353
354 size_t hash() const;
355 bool operator==(const GraphicsPipelineDesc &other) const;
356
357 void initDefaults();
358
359 // For custom comparisons.
360 template <typename T>
getPtr()361 const T *getPtr() const
362 {
363 return reinterpret_cast<const T *>(this);
364 }
365
366 angle::Result initializePipeline(ContextVk *contextVk,
367 const vk::PipelineCache &pipelineCacheVk,
368 const RenderPass &compatibleRenderPass,
369 const PipelineLayout &pipelineLayout,
370 const gl::AttributesMask &activeAttribLocationsMask,
371 const gl::ComponentTypeMask &programAttribsTypeMask,
372 const ShaderModule *vertexModule,
373 const ShaderModule *fragmentModule,
374 const ShaderModule *geometryModule,
375 vk::SpecializationConstantBitSet specConsts,
376 Pipeline *pipelineOut) const;
377
378 // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
379 void updateVertexInput(GraphicsPipelineTransitionBits *transition,
380 uint32_t attribIndex,
381 GLuint stride,
382 GLuint divisor,
383 angle::FormatID format,
384 GLuint relativeOffset);
385
386 // Input assembly info
387 void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
388 void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
389 bool primitiveRestartEnabled);
390
391 // Raster states
392 void setCullMode(VkCullModeFlagBits cullMode);
393 void updateCullMode(GraphicsPipelineTransitionBits *transition,
394 const gl::RasterizerState &rasterState);
395 void updateFrontFace(GraphicsPipelineTransitionBits *transition,
396 const gl::RasterizerState &rasterState,
397 bool invertFrontFace);
398 void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
399 void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
400 bool rasterizerDiscardEnabled);
401
402 // Multisample states
403 uint32_t getRasterizationSamples() const;
404 void setRasterizationSamples(uint32_t rasterizationSamples);
405 void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
406 uint32_t rasterizationSamples);
407 void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
408 void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
409 void updateSampleMask(GraphicsPipelineTransitionBits *transition,
410 uint32_t maskNumber,
411 uint32_t mask);
412
413 // RenderPass description.
getRenderPassDesc()414 const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
415
416 void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
417 void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
418 const RenderPassDesc &renderPassDesc);
419
420 // Blend states
421 void updateBlendEnabled(GraphicsPipelineTransitionBits *transition, bool isBlendEnabled);
422 void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
423 void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
424 const gl::BlendState &blendState);
425 void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
426 const gl::BlendState &blendState);
427 void setColorWriteMask(VkColorComponentFlags colorComponentFlags,
428 const gl::DrawBufferMask &alphaMask);
429 void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
430 void updateColorWriteMask(GraphicsPipelineTransitionBits *transition,
431 VkColorComponentFlags colorComponentFlags,
432 const gl::DrawBufferMask &alphaMask);
433
434 // Depth/stencil states.
435 void setDepthTestEnabled(bool enabled);
436 void setDepthWriteEnabled(bool enabled);
437 void setDepthFunc(VkCompareOp op);
438 void setStencilTestEnabled(bool enabled);
439 void setStencilFrontFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
440 void setStencilBackFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
441 void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
442 void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
443 void setStencilFrontWriteMask(uint8_t mask);
444 void setStencilBackWriteMask(uint8_t mask);
445 void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
446 const gl::DepthStencilState &depthStencilState,
447 const gl::Framebuffer *drawFramebuffer);
448 void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
449 const gl::DepthStencilState &depthStencilState);
450 void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
451 const gl::DepthStencilState &depthStencilState,
452 const gl::Framebuffer *drawFramebuffer);
453 void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
454 const gl::DepthStencilState &depthStencilState,
455 const gl::Framebuffer *drawFramebuffer);
456 void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
457 GLint ref,
458 const gl::DepthStencilState &depthStencilState);
459 void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
460 GLint ref,
461 const gl::DepthStencilState &depthStencilState);
462 void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
463 const gl::DepthStencilState &depthStencilState);
464 void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
465 const gl::DepthStencilState &depthStencilState);
466 void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
467 const gl::DepthStencilState &depthStencilState,
468 const gl::Framebuffer *drawFramebuffer);
469 void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
470 const gl::DepthStencilState &depthStencilState,
471 const gl::Framebuffer *drawFramebuffer);
472
473 // Depth offset.
474 void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
475 void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
476 const gl::RasterizerState &rasterState);
477
478 // Viewport and scissor.
479 void setViewport(const VkViewport &viewport);
480 void updateViewport(GraphicsPipelineTransitionBits *transition, const VkViewport &viewport);
481 void updateDepthRange(GraphicsPipelineTransitionBits *transition,
482 float nearPlane,
483 float farPlane);
484 void setScissor(const VkRect2D &scissor);
485 void updateScissor(GraphicsPipelineTransitionBits *transition, const VkRect2D &scissor);
486
487 private:
488 VertexInputAttributes mVertexInputAttribs;
489 RenderPassDesc mRenderPassDesc;
490 PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
491 PackedDepthStencilStateInfo mDepthStencilStateInfo;
492 PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
493 VkViewport mViewport;
494 VkRect2D mScissor;
495 };
496
497 // Verify the packed pipeline description has no gaps in the packing.
498 // This is not guaranteed by the spec, but is validated by a compile-time check.
499 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
500 // into uninitialized memory regions.
501 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
502 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
503
504 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
505 std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
506 gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
507
508 using DescriptorSetLayoutBindingVector =
509 angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
510
511 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
512 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
513 // usage. In the future we could generalize this.
514 class DescriptorSetLayoutDesc final
515 {
516 public:
517 DescriptorSetLayoutDesc();
518 ~DescriptorSetLayoutDesc();
519 DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
520 DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
521
522 size_t hash() const;
523 bool operator==(const DescriptorSetLayoutDesc &other) const;
524
525 void update(uint32_t bindingIndex,
526 VkDescriptorType type,
527 uint32_t count,
528 VkShaderStageFlags stages);
529
530 void unpackBindings(DescriptorSetLayoutBindingVector *bindings) const;
531
532 private:
533 struct PackedDescriptorSetBinding
534 {
535 uint8_t type; // Stores a packed VkDescriptorType descriptorType.
536 uint8_t stages; // Stores a packed VkShaderStageFlags.
537 uint16_t count; // Stores a packed uint32_t descriptorCount.
538 };
539
540 static_assert(sizeof(PackedDescriptorSetBinding) == sizeof(uint32_t), "Unexpected size");
541
542 // This is a compact representation of a descriptor set layout.
543 std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
544 mPackedDescriptorSetLayout;
545 };
546
547 // The following are for caching descriptor set layouts. Limited to max four descriptor set layouts.
548 // This can be extended in the future.
549 constexpr size_t kMaxDescriptorSetLayouts = 4;
550
551 struct PackedPushConstantRange
552 {
553 uint32_t offset;
554 uint32_t size;
555 };
556
557 template <typename T>
558 using DescriptorSetLayoutArray = std::array<T, kMaxDescriptorSetLayouts>;
559 using DescriptorSetLayoutPointerArray =
560 DescriptorSetLayoutArray<BindingPointer<DescriptorSetLayout>>;
561 template <typename T>
562 using PushConstantRangeArray = gl::ShaderMap<T>;
563
564 class PipelineLayoutDesc final
565 {
566 public:
567 PipelineLayoutDesc();
568 ~PipelineLayoutDesc();
569 PipelineLayoutDesc(const PipelineLayoutDesc &other);
570 PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
571
572 size_t hash() const;
573 bool operator==(const PipelineLayoutDesc &other) const;
574
575 void updateDescriptorSetLayout(uint32_t setIndex, const DescriptorSetLayoutDesc &desc);
576 void updatePushConstantRange(gl::ShaderType shaderType, uint32_t offset, uint32_t size);
577
578 const PushConstantRangeArray<PackedPushConstantRange> &getPushConstantRanges() const;
579
580 private:
581 DescriptorSetLayoutArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
582 PushConstantRangeArray<PackedPushConstantRange> mPushConstantRanges;
583
584 // Verify the arrays are properly packed.
585 static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
586 (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
587 "Unexpected size");
588 static_assert(sizeof(decltype(mPushConstantRanges)) ==
589 (sizeof(PackedPushConstantRange) * angle::EnumSize<gl::ShaderType>()),
590 "Unexpected size");
591 };
592
593 // Verify the structure is properly packed.
594 static_assert(sizeof(PipelineLayoutDesc) ==
595 (sizeof(DescriptorSetLayoutArray<DescriptorSetLayoutDesc>) +
596 sizeof(gl::ShaderMap<PackedPushConstantRange>)),
597 "Unexpected Size");
598
599 // Disable warnings about struct padding.
600 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
601
602 class PipelineHelper;
603
604 struct GraphicsPipelineTransition
605 {
606 GraphicsPipelineTransition();
607 GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
608 GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
609 const GraphicsPipelineDesc *desc,
610 PipelineHelper *pipeline);
611
612 GraphicsPipelineTransitionBits bits;
613 const GraphicsPipelineDesc *desc;
614 PipelineHelper *target;
615 };
616
617 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
618
619 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
620 const GraphicsPipelineTransition &other) = default;
621
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)622 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
623 GraphicsPipelineTransitionBits bits,
624 const GraphicsPipelineDesc *desc,
625 PipelineHelper *pipeline)
626 : bits(bits), desc(desc), target(pipeline)
627 {}
628
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)629 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
630 GraphicsPipelineTransitionBits bitsB,
631 const GraphicsPipelineDesc &descA,
632 const GraphicsPipelineDesc &descB)
633 {
634 if (bitsA != bitsB)
635 return false;
636
637 // We currently mask over 4 bytes of the pipeline description with each dirty bit.
638 // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
639 // of the code faster. The for loop below would scan over twice as many bits per iteration.
640 // But there may be more collisions between the same dirty bit masks leading to different
641 // transitions. Thus there may be additional cost when applications use many transitions.
642 // We should revisit this in the future and investigate using different bit widths.
643 static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
644
645 const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
646 const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
647
648 for (size_t dirtyBit : bitsA)
649 {
650 if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
651 return false;
652 }
653
654 return true;
655 }
656
657 class PipelineHelper final : angle::NonCopyable
658 {
659 public:
660 PipelineHelper();
661 ~PipelineHelper();
662 inline explicit PipelineHelper(Pipeline &&pipeline);
663
664 void destroy(VkDevice device);
665
updateSerial(Serial serial)666 void updateSerial(Serial serial) { mSerial = serial; }
valid()667 bool valid() const { return mPipeline.valid(); }
getSerial()668 Serial getSerial() const { return mSerial; }
getPipeline()669 Pipeline &getPipeline() { return mPipeline; }
670
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)671 ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
672 const GraphicsPipelineDesc &desc,
673 PipelineHelper **pipelineOut) const
674 {
675 // Search could be improved using sorting or hashing.
676 for (const GraphicsPipelineTransition &transition : mTransitions)
677 {
678 if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
679 {
680 *pipelineOut = transition.target;
681 return true;
682 }
683 }
684
685 return false;
686 }
687
688 void addTransition(GraphicsPipelineTransitionBits bits,
689 const GraphicsPipelineDesc *desc,
690 PipelineHelper *pipeline);
691
692 private:
693 std::vector<GraphicsPipelineTransition> mTransitions;
694 Serial mSerial;
695 Pipeline mPipeline;
696 };
697
PipelineHelper(Pipeline && pipeline)698 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
699
700 class TextureDescriptorDesc
701 {
702 public:
703 TextureDescriptorDesc();
704 ~TextureDescriptorDesc();
705
706 TextureDescriptorDesc(const TextureDescriptorDesc &other);
707 TextureDescriptorDesc &operator=(const TextureDescriptorDesc &other);
708
709 void update(size_t index, Serial textureSerial, Serial samplerSerial);
710 size_t hash() const;
711 void reset();
712
713 bool operator==(const TextureDescriptorDesc &other) const;
714
715 // Note: this is an exclusive index. If there is one index it will return "1".
getMaxIndex()716 uint32_t getMaxIndex() const { return mMaxIndex; }
717
718 private:
719 uint32_t mMaxIndex;
720 struct TexUnitSerials
721 {
722 uint32_t texture;
723 uint32_t sampler;
724 };
725 gl::ActiveTextureArray<TexUnitSerials> mSerials;
726 };
727
728 // This is IMPLEMENTATION_MAX_DRAW_BUFFERS + 1 for DS attachment
729 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_FRAMEBUFFER_ATTACHMENTS;
730 // Color serials are at index [0:gl::IMPLEMENTATION_MAX_DRAW_BUFFERS-1]
731 // Depth/stencil index is at gl::IMPLEMENTATION_MAX_DRAW_BUFFERS
732 constexpr size_t kFramebufferDescDepthStencilIndex = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
733 // Struct for AttachmentSerial cache signatures. Includes level/layer for imageView as
734 // well as a unique Serial value for the underlying image
735 struct AttachmentSerial
736 {
737 uint16_t level;
738 uint16_t layer;
739 uint32_t imageSerial;
740 };
741 constexpr AttachmentSerial kZeroAttachmentSerial = {0, 0, 0};
742 class FramebufferDesc
743 {
744 public:
745 FramebufferDesc();
746 ~FramebufferDesc();
747
748 FramebufferDesc(const FramebufferDesc &other);
749 FramebufferDesc &operator=(const FramebufferDesc &other);
750
751 void update(uint32_t index, AttachmentSerial serial);
752 size_t hash() const;
753 void reset();
754
755 bool operator==(const FramebufferDesc &other) const;
756
757 uint32_t attachmentCount() const;
758
759 private:
760 gl::AttachmentArray<AttachmentSerial> mSerials;
761 };
762 } // namespace vk
763 } // namespace rx
764
765 // Introduce a std::hash for a RenderPassDesc
766 namespace std
767 {
768 template <>
769 struct hash<rx::vk::RenderPassDesc>
770 {
771 size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
772 };
773
774 template <>
775 struct hash<rx::vk::AttachmentOpsArray>
776 {
777 size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
778 };
779
780 template <>
781 struct hash<rx::vk::GraphicsPipelineDesc>
782 {
783 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const { return key.hash(); }
784 };
785
786 template <>
787 struct hash<rx::vk::DescriptorSetLayoutDesc>
788 {
789 size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
790 };
791
792 template <>
793 struct hash<rx::vk::PipelineLayoutDesc>
794 {
795 size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
796 };
797
798 template <>
799 struct hash<rx::vk::TextureDescriptorDesc>
800 {
801 size_t operator()(const rx::vk::TextureDescriptorDesc &key) const { return key.hash(); }
802 };
803
804 template <>
805 struct hash<rx::vk::FramebufferDesc>
806 {
807 size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
808 };
809 } // namespace std
810
811 namespace rx
812 {
813 // TODO(jmadill): Add cache trimming/eviction.
814 class RenderPassCache final : angle::NonCopyable
815 {
816 public:
817 RenderPassCache();
818 ~RenderPassCache();
819
820 void destroy(VkDevice device);
821
822 ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
823 Serial serial,
824 const vk::RenderPassDesc &desc,
825 vk::RenderPass **renderPassOut)
826 {
827 auto outerIt = mPayload.find(desc);
828 if (outerIt != mPayload.end())
829 {
830 InnerCache &innerCache = outerIt->second;
831 ASSERT(!innerCache.empty());
832
833 // Find the first element and return it.
834 innerCache.begin()->second.updateSerial(serial);
835 *renderPassOut = &innerCache.begin()->second.get();
836 return angle::Result::Continue;
837 }
838
839 return addRenderPass(contextVk, serial, desc, renderPassOut);
840 }
841
842 angle::Result getRenderPassWithOps(vk::Context *context,
843 Serial serial,
844 const vk::RenderPassDesc &desc,
845 const vk::AttachmentOpsArray &attachmentOps,
846 vk::RenderPass **renderPassOut);
847
848 private:
849 angle::Result addRenderPass(ContextVk *contextVk,
850 Serial serial,
851 const vk::RenderPassDesc &desc,
852 vk::RenderPass **renderPassOut);
853
854 // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
855 // The second layer caches the attachment load/store ops and initial/final layout.
856 using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassAndSerial>;
857 using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
858
859 OuterCache mPayload;
860 };
861
862 // TODO(jmadill): Add cache trimming/eviction.
863 class GraphicsPipelineCache final : angle::NonCopyable
864 {
865 public:
866 GraphicsPipelineCache();
867 ~GraphicsPipelineCache();
868
869 void destroy(VkDevice device);
870 void release(ContextVk *context);
871
872 void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
873
874 ANGLE_INLINE angle::Result getPipeline(ContextVk *contextVk,
875 const vk::PipelineCache &pipelineCacheVk,
876 const vk::RenderPass &compatibleRenderPass,
877 const vk::PipelineLayout &pipelineLayout,
878 const gl::AttributesMask &activeAttribLocationsMask,
879 const gl::ComponentTypeMask &programAttribsTypeMask,
880 const vk::ShaderModule *vertexModule,
881 const vk::ShaderModule *fragmentModule,
882 const vk::ShaderModule *geometryModule,
883 vk::SpecializationConstantBitSet specConsts,
884 const vk::GraphicsPipelineDesc &desc,
885 const vk::GraphicsPipelineDesc **descPtrOut,
886 vk::PipelineHelper **pipelineOut)
887 {
888 auto item = mPayload.find(desc);
889 if (item != mPayload.end())
890 {
891 *descPtrOut = &item->first;
892 *pipelineOut = &item->second;
893 return angle::Result::Continue;
894 }
895
896 return insertPipeline(contextVk, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
897 activeAttribLocationsMask, programAttribsTypeMask, vertexModule,
898 fragmentModule, geometryModule, specConsts, desc, descPtrOut,
899 pipelineOut);
900 }
901
902 private:
903 angle::Result insertPipeline(ContextVk *contextVk,
904 const vk::PipelineCache &pipelineCacheVk,
905 const vk::RenderPass &compatibleRenderPass,
906 const vk::PipelineLayout &pipelineLayout,
907 const gl::AttributesMask &activeAttribLocationsMask,
908 const gl::ComponentTypeMask &programAttribsTypeMask,
909 const vk::ShaderModule *vertexModule,
910 const vk::ShaderModule *fragmentModule,
911 const vk::ShaderModule *geometryModule,
912 vk::SpecializationConstantBitSet specConsts,
913 const vk::GraphicsPipelineDesc &desc,
914 const vk::GraphicsPipelineDesc **descPtrOut,
915 vk::PipelineHelper **pipelineOut);
916
917 std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
918 };
919
920 class DescriptorSetLayoutCache final : angle::NonCopyable
921 {
922 public:
923 DescriptorSetLayoutCache();
924 ~DescriptorSetLayoutCache();
925
926 void destroy(VkDevice device);
927
928 angle::Result getDescriptorSetLayout(
929 vk::Context *context,
930 const vk::DescriptorSetLayoutDesc &desc,
931 vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
932
933 private:
934 std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
935 };
936
937 class PipelineLayoutCache final : angle::NonCopyable
938 {
939 public:
940 PipelineLayoutCache();
941 ~PipelineLayoutCache();
942
943 void destroy(VkDevice device);
944
945 angle::Result getPipelineLayout(vk::Context *context,
946 const vk::PipelineLayoutDesc &desc,
947 const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
948 vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
949
950 private:
951 std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
952 };
953
954 // Some descriptor set and pipeline layout constants.
955 //
956 // The set/binding assignment is done as following:
957 //
958 // - Set 0 contains the ANGLE driver uniforms at binding 0. Note that driver uniforms are updated
959 // only under rare circumstances, such as viewport or depth range change. However, there is only
960 // one binding in this set. This set is placed before Set 1 containing transform feedback
961 // buffers, so that switching between xfb and non-xfb programs doesn't require rebinding this set.
962 // Otherwise, as the layout of Set 1 changes (due to addition and removal of xfb buffers), and all
963 // subsequent sets need to be rebound (due to Vulkan pipeline layout validation rules), we would
964 // have needed to invalidateGraphicsDriverUniforms().
965 // - Set 1 contains uniform blocks created to encompass default uniforms. 1 binding is used per
966 // pipeline stage. Additionally, transform feedback buffers are bound from binding 2 and up.
967 // - Set 2 contains all textures.
968 // - Set 3 contains all other shader resources, such as uniform and storage blocks, atomic counter
969 // buffers and images.
970
971 // ANGLE driver uniforms set index (binding is always 0):
972 constexpr uint32_t kDriverUniformsDescriptorSetIndex = 0;
973 // Uniforms set index:
974 constexpr uint32_t kUniformsAndXfbDescriptorSetIndex = 1;
975 // Textures set index:
976 constexpr uint32_t kTextureDescriptorSetIndex = 2;
977 // Other shader resources set index:
978 constexpr uint32_t kShaderResourceDescriptorSetIndex = 3;
979
980 // Only 1 driver uniform binding is used.
981 constexpr uint32_t kReservedDriverUniformBindingCount = 1;
982 // There is 1 default uniform binding used per stage. Currently, a maxium of three stages are
983 // supported.
984 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
985 constexpr uint32_t kReservedDefaultUniformBindingCount = 3;
986 } // namespace rx
987
988 #endif // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
989