1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 // Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 // Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "libANGLE/renderer/vulkan/vk_utils.h"
17
18 namespace rx
19 {
20
21 namespace vk
22 {
23 class ImageHelper;
24
25 using RenderPassAndSerial = ObjectAndSerial<RenderPass>;
26 using PipelineAndSerial = ObjectAndSerial<Pipeline>;
27
28 using RefCountedDescriptorSetLayout = RefCounted<DescriptorSetLayout>;
29 using RefCountedPipelineLayout = RefCounted<PipelineLayout>;
30
31 // Packed Vk resource descriptions.
32 // Most Vk types use many more bits than required to represent the underlying data.
33 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
34 // hashing (and also needs to check equality) we can optimize these operations by
35 // using fewer bits. Hence the packed types.
36 //
37 // One implementation note: these types could potentially be improved by using even
38 // fewer bits. For example, boolean values could be represented by a single bit instead
39 // of a uint8_t. However at the current time there are concerns about the portability
40 // of bitfield operators, and complexity issues with using bit mask operations. This is
41 // something likely we will want to investigate as the Vulkan implementation progresses.
42 //
43 // Second implementation note: the struct packing is also a bit fragile, and some of the
44 // packing requirements depend on using alignas and field ordering to get the result of
45 // packing nicely into the desired space. This is something we could also potentially fix
46 // with a redesign to use bitfields or bit mask operations.
47
48 // Enable struct padding warnings for the code below since it is used in caches.
49 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
50
51 class alignas(4) RenderPassDesc final
52 {
53 public:
54 RenderPassDesc();
55 ~RenderPassDesc();
56 RenderPassDesc(const RenderPassDesc &other);
57 RenderPassDesc &operator=(const RenderPassDesc &other);
58
59 // Set format for an enabled GL color attachment.
60 void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
61 // Mark a GL color attachment index as disabled.
62 void packColorAttachmentGap(size_t colorIndexGL);
63 // The caller must pack the depth/stencil attachment last, which is packed right after the color
64 // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
65 void packDepthStencilAttachment(angle::FormatID angleFormatID);
66
67 size_t hash() const;
68
69 // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()70 size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()71 size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
72
73 bool isColorAttachmentEnabled(size_t colorIndexGL) const;
hasDepthStencilAttachment()74 bool hasDepthStencilAttachment() const { return mHasDepthStencilAttachment; }
75
76 // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
77 // color attachments.
78 size_t attachmentCount() const;
79
80 void setSamples(GLint samples);
81
samples()82 uint8_t samples() const { return mSamples; }
83
84 angle::FormatID operator[](size_t index) const
85 {
86 ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
87 return static_cast<angle::FormatID>(mAttachmentFormats[index]);
88 }
89
90 private:
91 uint8_t mSamples;
92 uint8_t mColorAttachmentRange : 7;
93 uint8_t mHasDepthStencilAttachment : 1;
94 // Color attachment formats are stored with their GL attachment indices. The depth/stencil
95 // attachment formats follow the last enabled color attachment. When creating a render pass,
96 // the disabled attachments are removed and the resulting attachments are packed.
97 //
98 // The attachment indices provided as input to various functions in this file are thus GL
99 // attachment indices. These indices are marked as such, e.g. colorIndexGL. The render pass
100 // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
101 // indices marked with Vk, e.g. colorIndexVk. The subpass attachment references create the
102 // link between the two index spaces. The subpass declares attachment references with GL
103 // indices (which corresponds to the location decoration of shader outputs). The attachment
104 // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
105 //
106 // For example, if GL uses color attachments 0 and 3, then there are two render pass
107 // attachments (indexed 0 and 1) and 4 subpass attachments:
108 //
109 // - Subpass attachment 0 -> Renderpass attachment 0
110 // - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
111 // - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
112 // - Subpass attachment 3 -> Renderpass attachment 1
113 //
114 gl::AttachmentArray<uint8_t> mAttachmentFormats;
115 };
116
117 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
118
119 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
120 static_assert(kRenderPassDescSize == 12, "Size check failed");
121
122 struct PackedAttachmentOpsDesc final
123 {
124 // VkAttachmentLoadOp is in range [0, 2], and VkAttachmentStoreOp is in range [0, 1].
125 uint16_t loadOp : 2;
126 uint16_t storeOp : 1;
127 uint16_t stencilLoadOp : 2;
128 uint16_t stencilStoreOp : 1;
129
130 // 5-bits to force pad the structure to exactly 2 bytes. Note that we currently don't support
131 // any of the extension layouts, whose values start at 1'000'000'000.
132 uint16_t initialLayout : 5;
133 uint16_t finalLayout : 5;
134 };
135
136 static_assert(sizeof(PackedAttachmentOpsDesc) == 2, "Size check failed");
137
138 class AttachmentOpsArray final
139 {
140 public:
141 AttachmentOpsArray();
142 ~AttachmentOpsArray();
143 AttachmentOpsArray(const AttachmentOpsArray &other);
144 AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
145
146 const PackedAttachmentOpsDesc &operator[](size_t index) const;
147 PackedAttachmentOpsDesc &operator[](size_t index);
148
149 // Initializes an attachment op with whatever values. Used for compatible RenderPass checks.
150 void initDummyOp(size_t index, VkImageLayout initialLayout, VkImageLayout finalLayout);
151 // Initialize an attachment op with all load and store operations.
152 void initWithLoadStore(size_t index, VkImageLayout initialLayout, VkImageLayout finalLayout);
153
154 size_t hash() const;
155
156 private:
157 gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
158 };
159
160 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
161
162 static_assert(sizeof(AttachmentOpsArray) == 20, "Size check failed");
163
164 struct PackedAttribDesc final
165 {
166 uint8_t format;
167
168 // TODO(http://anglebug.com/2672): Emulate divisors greater than UBYTE_MAX.
169 uint8_t divisor;
170
171 // Can only take 11 bits on NV.
172 uint16_t offset;
173
174 // Although technically stride can be any value in ES 2.0, in practice supporting stride
175 // greater than MAX_USHORT should not be that helpful. Note that stride limits are
176 // introduced in ES 3.1.
177 uint16_t stride;
178 };
179
180 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
181 static_assert(kPackedAttribDescSize == 6, "Size mismatch");
182
183 struct VertexInputAttributes final
184 {
185 PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
186 };
187
188 constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
189 static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
190
191 struct RasterizationStateBits final
192 {
193 uint32_t depthClampEnable : 4;
194 uint32_t rasterizationDiscardEnable : 4;
195 uint32_t polygonMode : 4;
196 uint32_t cullMode : 4;
197 uint32_t frontFace : 4;
198 uint32_t depthBiasEnable : 1;
199 uint32_t sampleShadingEnable : 1;
200 uint32_t alphaToCoverageEnable : 1;
201 uint32_t alphaToOneEnable : 1;
202 uint32_t rasterizationSamples : 8;
203 };
204
205 constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
206 static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
207
208 struct PackedRasterizationAndMultisampleStateInfo final
209 {
210 RasterizationStateBits bits;
211 // Padded to ensure there's no gaps in this structure or those that use it.
212 float minSampleShading;
213 uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
214 // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
215 float depthBiasClamp;
216 float depthBiasConstantFactor;
217 float depthBiasSlopeFactor;
218 float lineWidth;
219 };
220
221 constexpr size_t kPackedRasterizationAndMultisampleStateSize =
222 sizeof(PackedRasterizationAndMultisampleStateInfo);
223 static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
224
225 struct StencilOps final
226 {
227 uint8_t fail : 4;
228 uint8_t pass : 4;
229 uint8_t depthFail : 4;
230 uint8_t compare : 4;
231 };
232
233 constexpr size_t kStencilOpsSize = sizeof(StencilOps);
234 static_assert(kStencilOpsSize == 2, "Size check failed");
235
236 struct PackedStencilOpState final
237 {
238 StencilOps ops;
239 uint8_t compareMask;
240 uint8_t writeMask;
241 };
242
243 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
244 static_assert(kPackedStencilOpSize == 4, "Size check failed");
245
246 struct DepthStencilEnableFlags final
247 {
248 uint8_t depthTest : 2; // these only need one bit each. the extra is used as padding.
249 uint8_t depthWrite : 2;
250 uint8_t depthBoundsTest : 2;
251 uint8_t stencilTest : 2;
252 };
253
254 constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
255 static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
256
257 struct PackedDepthStencilStateInfo final
258 {
259 DepthStencilEnableFlags enable;
260 uint8_t frontStencilReference;
261 uint8_t backStencilReference;
262 uint8_t depthCompareOp; // only needs 4 bits. extra used as padding.
263 float minDepthBounds;
264 float maxDepthBounds;
265 PackedStencilOpState front;
266 PackedStencilOpState back;
267 };
268
269 constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
270 static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
271
272 struct LogicOpState final
273 {
274 uint8_t opEnable : 1;
275 uint8_t op : 7;
276 };
277
278 constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
279 static_assert(kLogicOpStateSize == 1, "Size check failed");
280
281 struct PackedColorBlendAttachmentState final
282 {
283 uint16_t srcColorBlendFactor : 5;
284 uint16_t dstColorBlendFactor : 5;
285 uint16_t colorBlendOp : 6;
286 uint16_t srcAlphaBlendFactor : 5;
287 uint16_t dstAlphaBlendFactor : 5;
288 uint16_t alphaBlendOp : 6;
289 };
290
291 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
292 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
293
294 struct PrimitiveState final
295 {
296 uint16_t topology : 15;
297 uint16_t restartEnable : 1;
298 };
299
300 constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
301 static_assert(kPrimitiveStateSize == 2, "Size check failed");
302
303 struct PackedInputAssemblyAndColorBlendStateInfo final
304 {
305 uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
306 PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
307 float blendConstants[4];
308 LogicOpState logic;
309 uint8_t blendEnableMask;
310 PrimitiveState primitive;
311 };
312
313 constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
314 sizeof(PackedInputAssemblyAndColorBlendStateInfo);
315 static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
316
317 constexpr size_t kGraphicsPipelineDescSumOfSizes =
318 kVertexInputAttributesSize + kPackedInputAssemblyAndColorBlendStateSize +
319 kPackedRasterizationAndMultisampleStateSize + kPackedDepthStencilStateSize +
320 kRenderPassDescSize + sizeof(VkViewport) + sizeof(VkRect2D);
321
322 // Number of dirty bits in the dirty bit set.
323 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
324 constexpr static size_t kNumGraphicsPipelineDirtyBits =
325 kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
326 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
327
328 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
329 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
330
331 // State changes are applied through the update methods. Each update method can also have a
332 // sibling method that applies the update without marking a state transition. The non-transition
333 // update methods are used for internal shader pipelines. Not every non-transition update method
334 // is implemented yet as not every state is used in internal shaders.
335 class GraphicsPipelineDesc final
336 {
337 public:
338 // Use aligned allocation and free so we can use the alignas keyword.
339 void *operator new(std::size_t size);
340 void operator delete(void *ptr);
341
342 GraphicsPipelineDesc();
343 ~GraphicsPipelineDesc();
344 GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
345 GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
346
347 size_t hash() const;
348 bool operator==(const GraphicsPipelineDesc &other) const;
349
350 void initDefaults();
351
352 // For custom comparisons.
353 template <typename T>
getPtr()354 const T *getPtr() const
355 {
356 return reinterpret_cast<const T *>(this);
357 }
358
359 angle::Result initializePipeline(vk::Context *context,
360 const vk::PipelineCache &pipelineCacheVk,
361 const RenderPass &compatibleRenderPass,
362 const PipelineLayout &pipelineLayout,
363 const gl::AttributesMask &activeAttribLocationsMask,
364 const gl::ComponentTypeMask &programAttribsTypeMask,
365 const ShaderModule *vertexModule,
366 const ShaderModule *fragmentModule,
367 Pipeline *pipelineOut) const;
368
369 // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
370 void updateVertexInput(GraphicsPipelineTransitionBits *transition,
371 uint32_t attribIndex,
372 GLuint stride,
373 GLuint divisor,
374 angle::FormatID format,
375 GLuint relativeOffset);
376
377 // Input assembly info
378 void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
379 void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
380 bool primitiveRestartEnabled);
381
382 // Raster states
383 void setCullMode(VkCullModeFlagBits cullMode);
384 void updateCullMode(GraphicsPipelineTransitionBits *transition,
385 const gl::RasterizerState &rasterState);
386 void updateFrontFace(GraphicsPipelineTransitionBits *transition,
387 const gl::RasterizerState &rasterState,
388 bool invertFrontFace);
389 void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
390 void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
391 bool rasterizerDiscardEnabled);
392
393 // Multisample states
394 void setRasterizationSamples(uint32_t rasterizationSamples);
395 void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
396 uint32_t rasterizationSamples);
397 void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
398 void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
399 void updateSampleMask(GraphicsPipelineTransitionBits *transition,
400 uint32_t maskNumber,
401 uint32_t mask);
402
403 // RenderPass description.
getRenderPassDesc()404 const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
405
406 void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
407 void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
408 const RenderPassDesc &renderPassDesc);
409
410 // Blend states
411 void updateBlendEnabled(GraphicsPipelineTransitionBits *transition, bool isBlendEnabled);
412 void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
413 void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
414 const gl::BlendState &blendState);
415 void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
416 const gl::BlendState &blendState);
417 void setColorWriteMask(VkColorComponentFlags colorComponentFlags,
418 const gl::DrawBufferMask &alphaMask);
419 void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
420 void updateColorWriteMask(GraphicsPipelineTransitionBits *transition,
421 VkColorComponentFlags colorComponentFlags,
422 const gl::DrawBufferMask &alphaMask);
423
424 // Depth/stencil states.
425 void setDepthTestEnabled(bool enabled);
426 void setDepthWriteEnabled(bool enabled);
427 void setDepthFunc(VkCompareOp op);
428 void setStencilTestEnabled(bool enabled);
429 void setStencilFrontFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
430 void setStencilBackFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
431 void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
432 void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
433 void setStencilFrontWriteMask(uint8_t mask);
434 void setStencilBackWriteMask(uint8_t mask);
435 void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
436 const gl::DepthStencilState &depthStencilState,
437 const gl::Framebuffer *drawFramebuffer);
438 void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
439 const gl::DepthStencilState &depthStencilState);
440 void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
441 const gl::DepthStencilState &depthStencilState,
442 const gl::Framebuffer *drawFramebuffer);
443 void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
444 const gl::DepthStencilState &depthStencilState,
445 const gl::Framebuffer *drawFramebuffer);
446 void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
447 GLint ref,
448 const gl::DepthStencilState &depthStencilState);
449 void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
450 GLint ref,
451 const gl::DepthStencilState &depthStencilState);
452 void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
453 const gl::DepthStencilState &depthStencilState);
454 void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
455 const gl::DepthStencilState &depthStencilState);
456 void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
457 const gl::DepthStencilState &depthStencilState,
458 const gl::Framebuffer *drawFramebuffer);
459 void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
460 const gl::DepthStencilState &depthStencilState,
461 const gl::Framebuffer *drawFramebuffer);
462
463 // Depth offset.
464 void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
465 void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
466 const gl::RasterizerState &rasterState);
467
468 // Viewport and scissor.
469 void setViewport(const VkViewport &viewport);
470 void updateViewport(GraphicsPipelineTransitionBits *transition, const VkViewport &viewport);
471 void updateDepthRange(GraphicsPipelineTransitionBits *transition,
472 float nearPlane,
473 float farPlane);
474 void setScissor(const VkRect2D &scissor);
475 void updateScissor(GraphicsPipelineTransitionBits *transition, const VkRect2D &scissor);
476
477 private:
478 VertexInputAttributes mVertexInputAttribs;
479 RenderPassDesc mRenderPassDesc;
480 PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
481 PackedDepthStencilStateInfo mDepthStencilStateInfo;
482 PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
483 VkViewport mViewport;
484 VkRect2D mScissor;
485 };
486
487 // Verify the packed pipeline description has no gaps in the packing.
488 // This is not guaranteed by the spec, but is validated by a compile-time check.
489 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
490 // into uninitialized memory regions.
491 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
492 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
493
494 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
495 std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
496 gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
497
498 using DescriptorSetLayoutBindingVector =
499 angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
500
501 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
502 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
503 // usage. In the future we could generalize this.
504 class DescriptorSetLayoutDesc final
505 {
506 public:
507 DescriptorSetLayoutDesc();
508 ~DescriptorSetLayoutDesc();
509 DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
510 DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
511
512 size_t hash() const;
513 bool operator==(const DescriptorSetLayoutDesc &other) const;
514
515 void update(uint32_t bindingIndex,
516 VkDescriptorType type,
517 uint32_t count,
518 VkShaderStageFlags stages);
519
520 void unpackBindings(DescriptorSetLayoutBindingVector *bindings) const;
521
522 private:
523 struct PackedDescriptorSetBinding
524 {
525 uint8_t type; // Stores a packed VkDescriptorType descriptorType.
526 uint8_t stages; // Stores a packed VkShaderStageFlags.
527 uint16_t count; // Stores a packed uint32_t descriptorCount.
528 };
529
530 static_assert(sizeof(PackedDescriptorSetBinding) == sizeof(uint32_t), "Unexpected size");
531
532 // This is a compact representation of a descriptor set layout.
533 std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
534 mPackedDescriptorSetLayout;
535 };
536
537 // The following are for caching descriptor set layouts. Limited to max four descriptor set layouts.
538 // This can be extended in the future.
539 constexpr size_t kMaxDescriptorSetLayouts = 4;
540
541 struct PackedPushConstantRange
542 {
543 uint32_t offset;
544 uint32_t size;
545 };
546
547 template <typename T>
548 using DescriptorSetLayoutArray = std::array<T, kMaxDescriptorSetLayouts>;
549 using DescriptorSetLayoutPointerArray =
550 DescriptorSetLayoutArray<BindingPointer<DescriptorSetLayout>>;
551 template <typename T>
552 using PushConstantRangeArray = gl::ShaderMap<T>;
553
554 class PipelineLayoutDesc final
555 {
556 public:
557 PipelineLayoutDesc();
558 ~PipelineLayoutDesc();
559 PipelineLayoutDesc(const PipelineLayoutDesc &other);
560 PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
561
562 size_t hash() const;
563 bool operator==(const PipelineLayoutDesc &other) const;
564
565 void updateDescriptorSetLayout(uint32_t setIndex, const DescriptorSetLayoutDesc &desc);
566 void updatePushConstantRange(gl::ShaderType shaderType, uint32_t offset, uint32_t size);
567
568 const PushConstantRangeArray<PackedPushConstantRange> &getPushConstantRanges() const;
569
570 private:
571 DescriptorSetLayoutArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
572 PushConstantRangeArray<PackedPushConstantRange> mPushConstantRanges;
573
574 // Verify the arrays are properly packed.
575 static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
576 (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
577 "Unexpected size");
578 static_assert(sizeof(decltype(mPushConstantRanges)) ==
579 (sizeof(PackedPushConstantRange) * angle::EnumSize<gl::ShaderType>()),
580 "Unexpected size");
581 };
582
583 // Verify the structure is properly packed.
584 static_assert(sizeof(PipelineLayoutDesc) ==
585 (sizeof(DescriptorSetLayoutArray<DescriptorSetLayoutDesc>) +
586 sizeof(gl::ShaderMap<PackedPushConstantRange>)),
587 "Unexpected Size");
588
589 // Disable warnings about struct padding.
590 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
591
592 class PipelineHelper;
593
594 struct GraphicsPipelineTransition
595 {
596 GraphicsPipelineTransition();
597 GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
598 GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
599 const GraphicsPipelineDesc *desc,
600 PipelineHelper *pipeline);
601
602 GraphicsPipelineTransitionBits bits;
603 const GraphicsPipelineDesc *desc;
604 PipelineHelper *target;
605 };
606
607 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
608
609 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
610 const GraphicsPipelineTransition &other) = default;
611
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)612 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
613 GraphicsPipelineTransitionBits bits,
614 const GraphicsPipelineDesc *desc,
615 PipelineHelper *pipeline)
616 : bits(bits), desc(desc), target(pipeline)
617 {}
618
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)619 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
620 GraphicsPipelineTransitionBits bitsB,
621 const GraphicsPipelineDesc &descA,
622 const GraphicsPipelineDesc &descB)
623 {
624 if (bitsA != bitsB)
625 return false;
626
627 // We currently mask over 4 bytes of the pipeline description with each dirty bit.
628 // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
629 // of the code faster. The for loop below would scan over twice as many bits per iteration.
630 // But there may be more collisions between the same dirty bit masks leading to different
631 // transitions. Thus there may be additional cost when applications use many transitions.
632 // We should revisit this in the future and investigate using different bit widths.
633 static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
634
635 const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
636 const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
637
638 for (size_t dirtyBit : bitsA)
639 {
640 if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
641 return false;
642 }
643
644 return true;
645 }
646
647 class PipelineHelper final : angle::NonCopyable
648 {
649 public:
650 PipelineHelper();
651 ~PipelineHelper();
652 inline explicit PipelineHelper(Pipeline &&pipeline);
653
654 void destroy(VkDevice device);
655
updateSerial(Serial serial)656 void updateSerial(Serial serial) { mSerial = serial; }
valid()657 bool valid() const { return mPipeline.valid(); }
getSerial()658 Serial getSerial() const { return mSerial; }
getPipeline()659 Pipeline &getPipeline() { return mPipeline; }
660
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)661 ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
662 const GraphicsPipelineDesc &desc,
663 PipelineHelper **pipelineOut) const
664 {
665 // Search could be improved using sorting or hashing.
666 for (const GraphicsPipelineTransition &transition : mTransitions)
667 {
668 if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
669 {
670 *pipelineOut = transition.target;
671 return true;
672 }
673 }
674
675 return false;
676 }
677
678 void addTransition(GraphicsPipelineTransitionBits bits,
679 const GraphicsPipelineDesc *desc,
680 PipelineHelper *pipeline);
681
682 private:
683 std::vector<GraphicsPipelineTransition> mTransitions;
684 Serial mSerial;
685 Pipeline mPipeline;
686 };
687
PipelineHelper(Pipeline && pipeline)688 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
689
690 class TextureDescriptorDesc
691 {
692 public:
693 TextureDescriptorDesc();
694 ~TextureDescriptorDesc();
695
696 TextureDescriptorDesc(const TextureDescriptorDesc &other);
697 TextureDescriptorDesc &operator=(const TextureDescriptorDesc &other);
698
699 void update(size_t index, Serial textureSerial, Serial samplerSerial);
700 size_t hash() const;
701 void reset();
702
703 bool operator==(const TextureDescriptorDesc &other) const;
704
705 // Note: this is an exclusive index. If there is one index it will return "1".
getMaxIndex()706 uint32_t getMaxIndex() const { return mMaxIndex; }
707
708 private:
709 uint32_t mMaxIndex;
710 struct TexUnitSerials
711 {
712 uint32_t texture;
713 uint32_t sampler;
714 };
715 gl::ActiveTextureArray<TexUnitSerials> mSerials;
716 };
717 } // namespace vk
718 } // namespace rx
719
720 // Introduce a std::hash for a RenderPassDesc
721 namespace std
722 {
723 template <>
724 struct hash<rx::vk::RenderPassDesc>
725 {
726 size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
727 };
728
729 template <>
730 struct hash<rx::vk::AttachmentOpsArray>
731 {
732 size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
733 };
734
735 template <>
736 struct hash<rx::vk::GraphicsPipelineDesc>
737 {
738 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const { return key.hash(); }
739 };
740
741 template <>
742 struct hash<rx::vk::DescriptorSetLayoutDesc>
743 {
744 size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
745 };
746
747 template <>
748 struct hash<rx::vk::PipelineLayoutDesc>
749 {
750 size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
751 };
752
753 template <>
754 struct hash<rx::vk::TextureDescriptorDesc>
755 {
756 size_t operator()(const rx::vk::TextureDescriptorDesc &key) const { return key.hash(); }
757 };
758 } // namespace std
759
760 namespace rx
761 {
762 // TODO(jmadill): Add cache trimming/eviction.
763 class RenderPassCache final : angle::NonCopyable
764 {
765 public:
766 RenderPassCache();
767 ~RenderPassCache();
768
769 void destroy(VkDevice device);
770
771 ANGLE_INLINE angle::Result getCompatibleRenderPass(vk::Context *context,
772 Serial serial,
773 const vk::RenderPassDesc &desc,
774 vk::RenderPass **renderPassOut)
775 {
776 auto outerIt = mPayload.find(desc);
777 if (outerIt != mPayload.end())
778 {
779 InnerCache &innerCache = outerIt->second;
780 ASSERT(!innerCache.empty());
781
782 // Find the first element and return it.
783 innerCache.begin()->second.updateSerial(serial);
784 *renderPassOut = &innerCache.begin()->second.get();
785 return angle::Result::Continue;
786 }
787
788 return addRenderPass(context, serial, desc, renderPassOut);
789 }
790
791 angle::Result getRenderPassWithOps(vk::Context *context,
792 Serial serial,
793 const vk::RenderPassDesc &desc,
794 const vk::AttachmentOpsArray &attachmentOps,
795 vk::RenderPass **renderPassOut);
796
797 private:
798 angle::Result addRenderPass(vk::Context *context,
799 Serial serial,
800 const vk::RenderPassDesc &desc,
801 vk::RenderPass **renderPassOut);
802
803 // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
804 // The second layer caches the attachment load/store ops and initial/final layout.
805 using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassAndSerial>;
806 using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
807
808 OuterCache mPayload;
809 };
810
811 // TODO(jmadill): Add cache trimming/eviction.
812 class GraphicsPipelineCache final : angle::NonCopyable
813 {
814 public:
815 GraphicsPipelineCache();
816 ~GraphicsPipelineCache();
817
818 void destroy(VkDevice device);
819 void release(ContextVk *context);
820
821 void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
822
823 ANGLE_INLINE angle::Result getPipeline(vk::Context *context,
824 const vk::PipelineCache &pipelineCacheVk,
825 const vk::RenderPass &compatibleRenderPass,
826 const vk::PipelineLayout &pipelineLayout,
827 const gl::AttributesMask &activeAttribLocationsMask,
828 const gl::ComponentTypeMask &programAttribsTypeMask,
829 const vk::ShaderModule *vertexModule,
830 const vk::ShaderModule *fragmentModule,
831 const vk::GraphicsPipelineDesc &desc,
832 const vk::GraphicsPipelineDesc **descPtrOut,
833 vk::PipelineHelper **pipelineOut)
834 {
835 auto item = mPayload.find(desc);
836 if (item != mPayload.end())
837 {
838 *descPtrOut = &item->first;
839 *pipelineOut = &item->second;
840 return angle::Result::Continue;
841 }
842
843 return insertPipeline(context, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
844 activeAttribLocationsMask, programAttribsTypeMask, vertexModule,
845 fragmentModule, desc, descPtrOut, pipelineOut);
846 }
847
848 private:
849 angle::Result insertPipeline(vk::Context *context,
850 const vk::PipelineCache &pipelineCacheVk,
851 const vk::RenderPass &compatibleRenderPass,
852 const vk::PipelineLayout &pipelineLayout,
853 const gl::AttributesMask &activeAttribLocationsMask,
854 const gl::ComponentTypeMask &programAttribsTypeMask,
855 const vk::ShaderModule *vertexModule,
856 const vk::ShaderModule *fragmentModule,
857 const vk::GraphicsPipelineDesc &desc,
858 const vk::GraphicsPipelineDesc **descPtrOut,
859 vk::PipelineHelper **pipelineOut);
860
861 std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
862 };
863
864 class DescriptorSetLayoutCache final : angle::NonCopyable
865 {
866 public:
867 DescriptorSetLayoutCache();
868 ~DescriptorSetLayoutCache();
869
870 void destroy(VkDevice device);
871
872 angle::Result getDescriptorSetLayout(
873 vk::Context *context,
874 const vk::DescriptorSetLayoutDesc &desc,
875 vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
876
877 private:
878 std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
879 };
880
881 class PipelineLayoutCache final : angle::NonCopyable
882 {
883 public:
884 PipelineLayoutCache();
885 ~PipelineLayoutCache();
886
887 void destroy(VkDevice device);
888
889 angle::Result getPipelineLayout(vk::Context *context,
890 const vk::PipelineLayoutDesc &desc,
891 const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
892 vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
893
894 private:
895 std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
896 };
897
898 // Some descriptor set and pipeline layout constants.
899 //
900 // The set/binding assignment is done as following:
901 //
902 // - Set 0 contains uniform blocks created to encompass default uniforms. 1 binding is used per
903 // pipeline stage. Additionally, transform feedback buffers are bound from binding 2 and up.
904 // - Set 1 contains all textures.
905 // - Set 2 contains all other shader resources, such as uniform and storage blocks, atomic counter
906 // buffers and images.
907 // - Set 3 contains the ANGLE driver uniforms at binding 0. Note that driver uniforms are updated
908 // only under rare circumstances, such as viewport or depth range change. However, there is only
909 // one binding in this set.
910
911 // Uniforms set index:
912 constexpr uint32_t kUniformsAndXfbDescriptorSetIndex = 0;
913 // Textures set index:
914 constexpr uint32_t kTextureDescriptorSetIndex = 1;
915 // Other shader resources set index:
916 constexpr uint32_t kShaderResourceDescriptorSetIndex = 2;
917 // ANGLE driver uniforms set index (binding is always 3):
918 constexpr uint32_t kDriverUniformsDescriptorSetIndex = 3;
919
920 // Only 1 driver uniform binding is used.
921 constexpr uint32_t kReservedDriverUniformBindingCount = 1;
922 // There is 1 default uniform binding used per stage. Currently, a maxium of two stages are
923 // supported.
924 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
925 constexpr uint32_t kReservedDefaultUniformBindingCount = 2;
926 // Binding index start for transform feedback buffers:
927 constexpr uint32_t kXfbBindingIndexStart = kReservedDefaultUniformBindingCount;
928 } // namespace rx
929
930 #endif // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
931