1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 // Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 // Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "libANGLE/renderer/vulkan/vk_utils.h"
17
18 namespace rx
19 {
20
21 namespace vk
22 {
23 class ImageHelper;
24 enum class ImageLayout;
25
26 using RenderPassAndSerial = ObjectAndSerial<RenderPass>;
27 using PipelineAndSerial = ObjectAndSerial<Pipeline>;
28
29 using RefCountedDescriptorSetLayout = RefCounted<DescriptorSetLayout>;
30 using RefCountedPipelineLayout = RefCounted<PipelineLayout>;
31 using RefCountedSampler = RefCounted<Sampler>;
32
33 // Helper macro that casts to a bitfield type then verifies no bits were dropped.
34 #define SetBitField(lhs, rhs) \
35 lhs = static_cast<typename std::decay<decltype(lhs)>::type>(rhs); \
36 ASSERT(static_cast<decltype(rhs)>(lhs) == (rhs))
37
38 // Packed Vk resource descriptions.
39 // Most Vk types use many more bits than required to represent the underlying data.
40 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
41 // hashing (and also needs to check equality) we can optimize these operations by
42 // using fewer bits. Hence the packed types.
43 //
44 // One implementation note: these types could potentially be improved by using even
45 // fewer bits. For example, boolean values could be represented by a single bit instead
46 // of a uint8_t. However at the current time there are concerns about the portability
47 // of bitfield operators, and complexity issues with using bit mask operations. This is
48 // something we will likely want to investigate as the Vulkan implementation progresses.
49 //
50 // Second implementation note: the struct packing is also a bit fragile, and some of the
51 // packing requirements depend on using alignas and field ordering to get the result of
52 // packing nicely into the desired space. This is something we could also potentially fix
53 // with a redesign to use bitfields or bit mask operations.
54
55 // Enable struct padding warnings for the code below since it is used in caches.
56 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
57
58 class alignas(4) RenderPassDesc final
59 {
60 public:
61 RenderPassDesc();
62 ~RenderPassDesc();
63 RenderPassDesc(const RenderPassDesc &other);
64 RenderPassDesc &operator=(const RenderPassDesc &other);
65
66 // Set format for an enabled GL color attachment.
67 void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
68 // Mark a GL color attachment index as disabled.
69 void packColorAttachmentGap(size_t colorIndexGL);
70 // The caller must pack the depth/stencil attachment last, which is packed right after the color
71 // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
72 void packDepthStencilAttachment(angle::FormatID angleFormatID);
73
74 size_t hash() const;
75
76 // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()77 size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()78 size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
79
80 bool isColorAttachmentEnabled(size_t colorIndexGL) const;
hasDepthStencilAttachment()81 bool hasDepthStencilAttachment() const { return mHasDepthStencilAttachment; }
82
83 // Get the number of attachments in the Vulkan render pass, i.e. after removing disabled
84 // color attachments.
85 size_t attachmentCount() const;
86
87 void setSamples(GLint samples);
88
samples()89 uint8_t samples() const { return mSamples; }
90
91 angle::FormatID operator[](size_t index) const
92 {
93 ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
94 return static_cast<angle::FormatID>(mAttachmentFormats[index]);
95 }
96
97 private:
98 uint8_t mSamples;
99 uint8_t mColorAttachmentRange : 7;
100 uint8_t mHasDepthStencilAttachment : 1;
101 // Color attachment formats are stored with their GL attachment indices. The depth/stencil
102 // attachment formats follow the last enabled color attachment. When creating a render pass,
103 // the disabled attachments are removed and the resulting attachments are packed.
104 //
105 // The attachment indices provided as input to various functions in this file are thus GL
106 // attachment indices. These indices are marked as such, e.g. colorIndexGL. The render pass
107 // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
108 // indices marked with Vk, e.g. colorIndexVk. The subpass attachment references create the
109 // link between the two index spaces. The subpass declares attachment references with GL
110 // indices (which corresponds to the location decoration of shader outputs). The attachment
111 // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
112 //
113 // For example, if GL uses color attachments 0 and 3, then there are two render pass
114 // attachments (indexed 0 and 1) and 4 subpass attachments:
115 //
116 // - Subpass attachment 0 -> Renderpass attachment 0
117 // - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
118 // - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
119 // - Subpass attachment 3 -> Renderpass attachment 1
120 //
121 gl::AttachmentArray<uint8_t> mAttachmentFormats;
122 };
123
124 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
125
126 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
127 static_assert(kRenderPassDescSize == 12, "Size check failed");
128
129 struct PackedAttachmentOpsDesc final
130 {
131 // VkAttachmentLoadOp is in range [0, 2], and VkAttachmentStoreOp is in range [0, 1].
132 uint16_t loadOp : 2;
133 uint16_t storeOp : 1;
134 uint16_t stencilLoadOp : 2;
135 uint16_t stencilStoreOp : 1;
136
137 // 5-bits to force pad the structure to exactly 2 bytes. Note that we currently don't support
138 // any of the extension layouts, whose values start at 1'000'000'000.
139 uint16_t initialLayout : 5;
140 uint16_t finalLayout : 5;
141 };
142
143 static_assert(sizeof(PackedAttachmentOpsDesc) == 2, "Size check failed");
144
145 class AttachmentOpsArray final
146 {
147 public:
148 AttachmentOpsArray();
149 ~AttachmentOpsArray();
150 AttachmentOpsArray(const AttachmentOpsArray &other);
151 AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
152
153 const PackedAttachmentOpsDesc &operator[](size_t index) const;
154 PackedAttachmentOpsDesc &operator[](size_t index);
155
156 // Initialize an attachment op with all load and store operations.
157 void initWithLoadStore(size_t index, ImageLayout initialLayout, ImageLayout finalLayout);
158
159 void setLayouts(size_t index, ImageLayout initialLayout, ImageLayout finalLayout);
160 void setOps(size_t index, VkAttachmentLoadOp loadOp, VkAttachmentStoreOp storeOp);
161 void setStencilOps(size_t index, VkAttachmentLoadOp loadOp, VkAttachmentStoreOp storeOp);
162
163 size_t hash() const;
164
165 private:
166 gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
167 };
168
169 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
170
171 static_assert(sizeof(AttachmentOpsArray) == 20, "Size check failed");
172
173 struct PackedAttribDesc final
174 {
175 uint8_t format;
176 uint8_t divisor;
177
178 // Can only take 11 bits on NV.
179 uint16_t offset;
180
181 // Although technically stride can be any value in ES 2.0, in practice supporting stride
182 // greater than MAX_USHORT should not be that helpful. Note that stride limits are
183 // introduced in ES 3.1.
184 uint16_t stride;
185 };
186
187 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
188 static_assert(kPackedAttribDescSize == 6, "Size mismatch");
189
190 struct VertexInputAttributes final
191 {
192 PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
193 };
194
195 constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
196 static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
197
198 struct RasterizationStateBits final
199 {
200 uint32_t depthClampEnable : 4;
201 uint32_t rasterizationDiscardEnable : 4;
202 uint32_t polygonMode : 4;
203 uint32_t cullMode : 4;
204 uint32_t frontFace : 4;
205 uint32_t depthBiasEnable : 1;
206 uint32_t sampleShadingEnable : 1;
207 uint32_t alphaToCoverageEnable : 1;
208 uint32_t alphaToOneEnable : 1;
209 uint32_t rasterizationSamples : 8;
210 };
211
212 constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits);
213 static_assert(kRasterizationStateBitsSize == 4, "Size check failed");
214
215 struct PackedRasterizationAndMultisampleStateInfo final
216 {
217 RasterizationStateBits bits;
218 // Padded to ensure there's no gaps in this structure or those that use it.
219 float minSampleShading;
220 uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS];
221 // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness.
222 float depthBiasClamp;
223 float depthBiasConstantFactor;
224 float depthBiasSlopeFactor;
225 float lineWidth;
226 };
227
228 constexpr size_t kPackedRasterizationAndMultisampleStateSize =
229 sizeof(PackedRasterizationAndMultisampleStateInfo);
230 static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed");
231
232 struct StencilOps final
233 {
234 uint8_t fail : 4;
235 uint8_t pass : 4;
236 uint8_t depthFail : 4;
237 uint8_t compare : 4;
238 };
239
240 constexpr size_t kStencilOpsSize = sizeof(StencilOps);
241 static_assert(kStencilOpsSize == 2, "Size check failed");
242
243 struct PackedStencilOpState final
244 {
245 StencilOps ops;
246 uint8_t compareMask;
247 uint8_t writeMask;
248 };
249
250 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
251 static_assert(kPackedStencilOpSize == 4, "Size check failed");
252
253 struct DepthStencilEnableFlags final
254 {
255 uint8_t depthTest : 2; // these only need one bit each. the extra is used as padding.
256 uint8_t depthWrite : 2;
257 uint8_t depthBoundsTest : 2;
258 uint8_t stencilTest : 2;
259 };
260
261 constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags);
262 static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed");
263
264 struct PackedDepthStencilStateInfo final
265 {
266 DepthStencilEnableFlags enable;
267 uint8_t frontStencilReference;
268 uint8_t backStencilReference;
269 uint8_t depthCompareOp; // only needs 4 bits. extra used as padding.
270 float minDepthBounds;
271 float maxDepthBounds;
272 PackedStencilOpState front;
273 PackedStencilOpState back;
274 };
275
276 constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo);
277 static_assert(kPackedDepthStencilStateSize == 20, "Size check failed");
278
279 struct LogicOpState final
280 {
281 uint8_t opEnable : 1;
282 uint8_t op : 7;
283 };
284
285 constexpr size_t kLogicOpStateSize = sizeof(LogicOpState);
286 static_assert(kLogicOpStateSize == 1, "Size check failed");
287
288 struct PackedColorBlendAttachmentState final
289 {
290 uint16_t srcColorBlendFactor : 5;
291 uint16_t dstColorBlendFactor : 5;
292 uint16_t colorBlendOp : 6;
293 uint16_t srcAlphaBlendFactor : 5;
294 uint16_t dstAlphaBlendFactor : 5;
295 uint16_t alphaBlendOp : 6;
296 };
297
298 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
299 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
300
301 struct PrimitiveState final
302 {
303 uint16_t topology : 15;
304 uint16_t restartEnable : 1;
305 };
306
307 constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState);
308 static_assert(kPrimitiveStateSize == 2, "Size check failed");
309
310 struct PackedInputAssemblyAndColorBlendStateInfo final
311 {
312 uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
313 PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
314 float blendConstants[4];
315 LogicOpState logic;
316 uint8_t blendEnableMask;
317 PrimitiveState primitive;
318 };
319
320 constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
321 sizeof(PackedInputAssemblyAndColorBlendStateInfo);
322 static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
323
324 constexpr size_t kGraphicsPipelineDescSumOfSizes =
325 kVertexInputAttributesSize + kRenderPassDescSize + kPackedRasterizationAndMultisampleStateSize +
326 kPackedDepthStencilStateSize + kPackedInputAssemblyAndColorBlendStateSize + sizeof(VkViewport) +
327 sizeof(VkRect2D);
328
329 // Number of dirty bits in the dirty bit set.
330 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
331 constexpr static size_t kNumGraphicsPipelineDirtyBits =
332 kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
333 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
334
335 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
336 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
337
338 // State changes are applied through the update methods. Each update method can also have a
339 // sibling method that applies the update without marking a state transition. The non-transition
340 // update methods are used for internal shader pipelines. Not every non-transition update method
341 // is implemented yet as not every state is used in internal shaders.
342 class GraphicsPipelineDesc final
343 {
344 public:
345 // Use aligned allocation and free so we can use the alignas keyword.
346 void *operator new(std::size_t size);
347 void operator delete(void *ptr);
348
349 GraphicsPipelineDesc();
350 ~GraphicsPipelineDesc();
351 GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
352 GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
353
354 size_t hash() const;
355 bool operator==(const GraphicsPipelineDesc &other) const;
356
357 void initDefaults();
358
359 // For custom comparisons.
360 template <typename T>
getPtr()361 const T *getPtr() const
362 {
363 return reinterpret_cast<const T *>(this);
364 }
365
366 angle::Result initializePipeline(ContextVk *contextVk,
367 const vk::PipelineCache &pipelineCacheVk,
368 const RenderPass &compatibleRenderPass,
369 const PipelineLayout &pipelineLayout,
370 const gl::AttributesMask &activeAttribLocationsMask,
371 const gl::ComponentTypeMask &programAttribsTypeMask,
372 const ShaderModule *vertexModule,
373 const ShaderModule *fragmentModule,
374 const ShaderModule *geometryModule,
375 vk::SpecializationConstantBitSet specConsts,
376 Pipeline *pipelineOut) const;
377
378 // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
379 void updateVertexInput(GraphicsPipelineTransitionBits *transition,
380 uint32_t attribIndex,
381 GLuint stride,
382 GLuint divisor,
383 angle::FormatID format,
384 GLuint relativeOffset);
385
386 // Input assembly info
387 void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
388 void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
389 bool primitiveRestartEnabled);
390
391 // Raster states
392 void setCullMode(VkCullModeFlagBits cullMode);
393 void updateCullMode(GraphicsPipelineTransitionBits *transition,
394 const gl::RasterizerState &rasterState);
395 void updateFrontFace(GraphicsPipelineTransitionBits *transition,
396 const gl::RasterizerState &rasterState,
397 bool invertFrontFace);
398 void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth);
399 void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
400 bool rasterizerDiscardEnabled);
401
402 // Multisample states
403 uint32_t getRasterizationSamples() const;
404 void setRasterizationSamples(uint32_t rasterizationSamples);
405 void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
406 uint32_t rasterizationSamples);
407 void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
408 void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
409 void updateSampleMask(GraphicsPipelineTransitionBits *transition,
410 uint32_t maskNumber,
411 uint32_t mask);
412
413 // RenderPass description.
getRenderPassDesc()414 const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
415
416 void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
417 void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
418 const RenderPassDesc &renderPassDesc);
419
420 // Blend states
421 void updateBlendEnabled(GraphicsPipelineTransitionBits *transition, bool isBlendEnabled);
422 void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color);
423 void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
424 const gl::BlendState &blendState);
425 void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
426 const gl::BlendState &blendState);
427 void setColorWriteMask(VkColorComponentFlags colorComponentFlags,
428 const gl::DrawBufferMask &alphaMask);
429 void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
430 void updateColorWriteMask(GraphicsPipelineTransitionBits *transition,
431 VkColorComponentFlags colorComponentFlags,
432 const gl::DrawBufferMask &alphaMask);
433
434 // Depth/stencil states.
435 void setDepthTestEnabled(bool enabled);
436 void setDepthWriteEnabled(bool enabled);
437 void setDepthFunc(VkCompareOp op);
438 void setStencilTestEnabled(bool enabled);
439 void setStencilFrontFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
440 void setStencilBackFuncs(uint8_t reference, VkCompareOp compareOp, uint8_t compareMask);
441 void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
442 void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
443 void setStencilFrontWriteMask(uint8_t mask);
444 void setStencilBackWriteMask(uint8_t mask);
445 void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
446 const gl::DepthStencilState &depthStencilState,
447 const gl::Framebuffer *drawFramebuffer);
448 void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
449 const gl::DepthStencilState &depthStencilState);
450 void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
451 const gl::DepthStencilState &depthStencilState,
452 const gl::Framebuffer *drawFramebuffer);
453 void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
454 const gl::DepthStencilState &depthStencilState,
455 const gl::Framebuffer *drawFramebuffer);
456 void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
457 GLint ref,
458 const gl::DepthStencilState &depthStencilState);
459 void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
460 GLint ref,
461 const gl::DepthStencilState &depthStencilState);
462 void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
463 const gl::DepthStencilState &depthStencilState);
464 void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
465 const gl::DepthStencilState &depthStencilState);
466 void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition,
467 const gl::DepthStencilState &depthStencilState,
468 const gl::Framebuffer *drawFramebuffer);
469 void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition,
470 const gl::DepthStencilState &depthStencilState,
471 const gl::Framebuffer *drawFramebuffer);
472
473 // Depth offset.
474 void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
475 void updatePolygonOffset(GraphicsPipelineTransitionBits *transition,
476 const gl::RasterizerState &rasterState);
477
478 // Viewport and scissor.
479 void setViewport(const VkViewport &viewport);
480 void updateViewport(GraphicsPipelineTransitionBits *transition, const VkViewport &viewport);
481 void updateDepthRange(GraphicsPipelineTransitionBits *transition,
482 float nearPlane,
483 float farPlane);
484 void setScissor(const VkRect2D &scissor);
485 void updateScissor(GraphicsPipelineTransitionBits *transition, const VkRect2D &scissor);
486
487 private:
488 VertexInputAttributes mVertexInputAttribs;
489 RenderPassDesc mRenderPassDesc;
490 PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
491 PackedDepthStencilStateInfo mDepthStencilStateInfo;
492 PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo;
493 VkViewport mViewport;
494 VkRect2D mScissor;
495 };
496
497 // Verify the packed pipeline description has no gaps in the packing.
498 // This is not guaranteed by the spec, but is validated by a compile-time check.
499 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
500 // into uninitialized memory regions.
501 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
502 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
503
504 constexpr uint32_t kMaxDescriptorSetLayoutBindings =
505 std::max(gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
506 gl::IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS);
507
508 using DescriptorSetLayoutBindingVector =
509 angle::FixedVector<VkDescriptorSetLayoutBinding, kMaxDescriptorSetLayoutBindings>;
510
511 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
512 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
513 // usage. In the future we could generalize this.
514 class DescriptorSetLayoutDesc final
515 {
516 public:
517 DescriptorSetLayoutDesc();
518 ~DescriptorSetLayoutDesc();
519 DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
520 DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
521
522 size_t hash() const;
523 bool operator==(const DescriptorSetLayoutDesc &other) const;
524
525 void update(uint32_t bindingIndex,
526 VkDescriptorType type,
527 uint32_t count,
528 VkShaderStageFlags stages);
529
530 void unpackBindings(DescriptorSetLayoutBindingVector *bindings) const;
531
532 private:
533 struct PackedDescriptorSetBinding
534 {
535 uint8_t type; // Stores a packed VkDescriptorType descriptorType.
536 uint8_t stages; // Stores a packed VkShaderStageFlags.
537 uint16_t count; // Stores a packed uint32_t descriptorCount.
538 };
539
540 static_assert(sizeof(PackedDescriptorSetBinding) == sizeof(uint32_t), "Unexpected size");
541
542 // This is a compact representation of a descriptor set layout.
543 std::array<PackedDescriptorSetBinding, kMaxDescriptorSetLayoutBindings>
544 mPackedDescriptorSetLayout;
545 };
546
547 // The following are for caching descriptor set layouts. Limited to max four descriptor set layouts.
548 // This can be extended in the future.
549 constexpr size_t kMaxDescriptorSetLayouts = 4;
550
551 struct PackedPushConstantRange
552 {
553 uint32_t offset;
554 uint32_t size;
555 };
556
557 template <typename T>
558 using DescriptorSetLayoutArray = std::array<T, kMaxDescriptorSetLayouts>;
559 using DescriptorSetLayoutPointerArray =
560 DescriptorSetLayoutArray<BindingPointer<DescriptorSetLayout>>;
561 template <typename T>
562 using PushConstantRangeArray = gl::ShaderMap<T>;
563
564 class PipelineLayoutDesc final
565 {
566 public:
567 PipelineLayoutDesc();
568 ~PipelineLayoutDesc();
569 PipelineLayoutDesc(const PipelineLayoutDesc &other);
570 PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
571
572 size_t hash() const;
573 bool operator==(const PipelineLayoutDesc &other) const;
574
575 void updateDescriptorSetLayout(uint32_t setIndex, const DescriptorSetLayoutDesc &desc);
576 void updatePushConstantRange(gl::ShaderType shaderType, uint32_t offset, uint32_t size);
577
578 const PushConstantRangeArray<PackedPushConstantRange> &getPushConstantRanges() const;
579
580 private:
581 DescriptorSetLayoutArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
582 PushConstantRangeArray<PackedPushConstantRange> mPushConstantRanges;
583
584 // Verify the arrays are properly packed.
585 static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
586 (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
587 "Unexpected size");
588 static_assert(sizeof(decltype(mPushConstantRanges)) ==
589 (sizeof(PackedPushConstantRange) * angle::EnumSize<gl::ShaderType>()),
590 "Unexpected size");
591 };
592
593 // Verify the structure is properly packed.
594 static_assert(sizeof(PipelineLayoutDesc) ==
595 (sizeof(DescriptorSetLayoutArray<DescriptorSetLayoutDesc>) +
596 sizeof(gl::ShaderMap<PackedPushConstantRange>)),
597 "Unexpected Size");
598
599 // Packed sampler description for the sampler cache.
600 class SamplerDesc final
601 {
602 public:
603 SamplerDesc();
604 explicit SamplerDesc(const gl::SamplerState &samplerState, bool stencilMode);
605 ~SamplerDesc();
606
607 SamplerDesc(const SamplerDesc &other);
608 SamplerDesc &operator=(const SamplerDesc &rhs);
609
610 void update(const gl::SamplerState &samplerState, bool stencilMode);
611 void reset();
612 angle::Result init(ContextVk *contextVk, vk::Sampler *sampler) const;
613
614 size_t hash() const;
615 bool operator==(const SamplerDesc &other) const;
616
617 private:
618 // 32*4 bits for floating point data.
619 // Note: anisotropy enabled is implicitly determined by maxAnisotropy and caps.
620 float mMipLodBias;
621 float mMaxAnisotropy;
622 float mMinLod;
623 float mMaxLod;
624
625 // 16 bits for modes + states.
626 // 1 bit per filter (only 2 possible values in GL: linear/nearest)
627 uint16_t mMagFilter : 1;
628 uint16_t mMinFilter : 1;
629 uint16_t mMipmapMode : 1;
630
631 // 3 bits per address mode (5 possible values)
632 uint16_t mAddressModeU : 3;
633 uint16_t mAddressModeV : 3;
634 uint16_t mAddressModeW : 3;
635
636 // 1 bit for compare enabled (2 possible values)
637 uint16_t mCompareEnabled : 1;
638
639 // 3 bits for compare op. (8 possible values)
640 uint16_t mCompareOp : 3;
641
642 // Border color and unnormalized coordinates implicitly set to contants.
643
644 // 16 extra bits reserved for future use.
645 uint16_t mReserved;
646 };
647
648 // Total size: 160 bits == 20 bytes.
649 static_assert(sizeof(SamplerDesc) == 20, "Unexpected SamplerDesc size");
650
651 // Disable warnings about struct padding.
652 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
653
654 class PipelineHelper;
655
656 struct GraphicsPipelineTransition
657 {
658 GraphicsPipelineTransition();
659 GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
660 GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
661 const GraphicsPipelineDesc *desc,
662 PipelineHelper *pipeline);
663
664 GraphicsPipelineTransitionBits bits;
665 const GraphicsPipelineDesc *desc;
666 PipelineHelper *target;
667 };
668
669 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
670
671 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
672 const GraphicsPipelineTransition &other) = default;
673
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)674 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
675 GraphicsPipelineTransitionBits bits,
676 const GraphicsPipelineDesc *desc,
677 PipelineHelper *pipeline)
678 : bits(bits), desc(desc), target(pipeline)
679 {}
680
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)681 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
682 GraphicsPipelineTransitionBits bitsB,
683 const GraphicsPipelineDesc &descA,
684 const GraphicsPipelineDesc &descB)
685 {
686 if (bitsA != bitsB)
687 return false;
688
689 // We currently mask over 4 bytes of the pipeline description with each dirty bit.
690 // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
691 // of the code faster. The for loop below would scan over twice as many bits per iteration.
692 // But there may be more collisions between the same dirty bit masks leading to different
693 // transitions. Thus there may be additional cost when applications use many transitions.
694 // We should revisit this in the future and investigate using different bit widths.
695 static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
696
697 const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
698 const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
699
700 for (size_t dirtyBit : bitsA)
701 {
702 if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
703 return false;
704 }
705
706 return true;
707 }
708
709 class PipelineHelper final : angle::NonCopyable
710 {
711 public:
712 PipelineHelper();
713 ~PipelineHelper();
714 inline explicit PipelineHelper(Pipeline &&pipeline);
715
716 void destroy(VkDevice device);
717
updateSerial(Serial serial)718 void updateSerial(Serial serial) { mSerial = serial; }
valid()719 bool valid() const { return mPipeline.valid(); }
getSerial()720 Serial getSerial() const { return mSerial; }
getPipeline()721 Pipeline &getPipeline() { return mPipeline; }
722
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)723 ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
724 const GraphicsPipelineDesc &desc,
725 PipelineHelper **pipelineOut) const
726 {
727 // Search could be improved using sorting or hashing.
728 for (const GraphicsPipelineTransition &transition : mTransitions)
729 {
730 if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
731 {
732 *pipelineOut = transition.target;
733 return true;
734 }
735 }
736
737 return false;
738 }
739
740 void addTransition(GraphicsPipelineTransitionBits bits,
741 const GraphicsPipelineDesc *desc,
742 PipelineHelper *pipeline);
743
744 private:
745 std::vector<GraphicsPipelineTransition> mTransitions;
746 Serial mSerial;
747 Pipeline mPipeline;
748 };
749
PipelineHelper(Pipeline && pipeline)750 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {}
751
752 class TextureDescriptorDesc
753 {
754 public:
755 TextureDescriptorDesc();
756 ~TextureDescriptorDesc();
757
758 TextureDescriptorDesc(const TextureDescriptorDesc &other);
759 TextureDescriptorDesc &operator=(const TextureDescriptorDesc &other);
760
761 void update(size_t index, Serial textureSerial, Serial samplerSerial);
762 size_t hash() const;
763 void reset();
764
765 bool operator==(const TextureDescriptorDesc &other) const;
766
767 // Note: this is an exclusive index. If there is one index it will return "1".
getMaxIndex()768 uint32_t getMaxIndex() const { return mMaxIndex; }
769
770 private:
771 uint32_t mMaxIndex;
772 struct TexUnitSerials
773 {
774 uint32_t texture;
775 uint32_t sampler;
776 };
777 gl::ActiveTextureArray<TexUnitSerials> mSerials;
778 };
779
780 // This is IMPLEMENTATION_MAX_DRAW_BUFFERS + 1 for DS attachment
781 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_FRAMEBUFFER_ATTACHMENTS;
782 // Color serials are at index [0:gl::IMPLEMENTATION_MAX_DRAW_BUFFERS-1]
783 // Depth/stencil index is at gl::IMPLEMENTATION_MAX_DRAW_BUFFERS
784 constexpr size_t kFramebufferDescDepthStencilIndex = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
785 // Struct for AttachmentSerial cache signatures. Includes level/layer for imageView as
786 // well as a unique Serial value for the underlying image
787 struct AttachmentSerial
788 {
789 uint16_t level;
790 uint16_t layer;
791 uint32_t imageSerial;
792 };
793 constexpr AttachmentSerial kZeroAttachmentSerial = {0, 0, 0};
794 class FramebufferDesc
795 {
796 public:
797 FramebufferDesc();
798 ~FramebufferDesc();
799
800 FramebufferDesc(const FramebufferDesc &other);
801 FramebufferDesc &operator=(const FramebufferDesc &other);
802
803 void update(uint32_t index, AttachmentSerial serial);
804 size_t hash() const;
805 void reset();
806
807 bool operator==(const FramebufferDesc &other) const;
808
809 uint32_t attachmentCount() const;
810
811 private:
812 gl::AttachmentArray<AttachmentSerial> mSerials;
813 };
814 } // namespace vk
815 } // namespace rx
816
817 // Introduce a std::hash for a RenderPassDesc
818 namespace std
819 {
820 template <>
821 struct hash<rx::vk::RenderPassDesc>
822 {
823 size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
824 };
825
826 template <>
827 struct hash<rx::vk::AttachmentOpsArray>
828 {
829 size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
830 };
831
832 template <>
833 struct hash<rx::vk::GraphicsPipelineDesc>
834 {
835 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const { return key.hash(); }
836 };
837
838 template <>
839 struct hash<rx::vk::DescriptorSetLayoutDesc>
840 {
841 size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
842 };
843
844 template <>
845 struct hash<rx::vk::PipelineLayoutDesc>
846 {
847 size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
848 };
849
850 template <>
851 struct hash<rx::vk::TextureDescriptorDesc>
852 {
853 size_t operator()(const rx::vk::TextureDescriptorDesc &key) const { return key.hash(); }
854 };
855
856 template <>
857 struct hash<rx::vk::FramebufferDesc>
858 {
859 size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
860 };
861
862 template <>
863 struct hash<rx::vk::SamplerDesc>
864 {
865 size_t operator()(const rx::vk::SamplerDesc &key) const { return key.hash(); }
866 };
867 } // namespace std
868
869 namespace rx
870 {
871 // TODO(jmadill): Add cache trimming/eviction.
872 class RenderPassCache final : angle::NonCopyable
873 {
874 public:
875 RenderPassCache();
876 ~RenderPassCache();
877
878 void destroy(VkDevice device);
879
880 ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
881 Serial serial,
882 const vk::RenderPassDesc &desc,
883 vk::RenderPass **renderPassOut)
884 {
885 auto outerIt = mPayload.find(desc);
886 if (outerIt != mPayload.end())
887 {
888 InnerCache &innerCache = outerIt->second;
889 ASSERT(!innerCache.empty());
890
891 // Find the first element and return it.
892 innerCache.begin()->second.updateSerial(serial);
893 *renderPassOut = &innerCache.begin()->second.get();
894 return angle::Result::Continue;
895 }
896
897 return addRenderPass(contextVk, serial, desc, renderPassOut);
898 }
899
900 angle::Result getRenderPassWithOps(vk::Context *context,
901 Serial serial,
902 const vk::RenderPassDesc &desc,
903 const vk::AttachmentOpsArray &attachmentOps,
904 vk::RenderPass **renderPassOut);
905
906 private:
907 angle::Result addRenderPass(ContextVk *contextVk,
908 Serial serial,
909 const vk::RenderPassDesc &desc,
910 vk::RenderPass **renderPassOut);
911
912 // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
913 // The second layer caches the attachment load/store ops and initial/final layout.
914 using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassAndSerial>;
915 using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
916
917 OuterCache mPayload;
918 };
919
920 // TODO(jmadill): Add cache trimming/eviction.
921 class GraphicsPipelineCache final : angle::NonCopyable
922 {
923 public:
924 GraphicsPipelineCache();
925 ~GraphicsPipelineCache();
926
927 void destroy(VkDevice device);
928 void release(ContextVk *context);
929
930 void populate(const vk::GraphicsPipelineDesc &desc, vk::Pipeline &&pipeline);
931
932 ANGLE_INLINE angle::Result getPipeline(ContextVk *contextVk,
933 const vk::PipelineCache &pipelineCacheVk,
934 const vk::RenderPass &compatibleRenderPass,
935 const vk::PipelineLayout &pipelineLayout,
936 const gl::AttributesMask &activeAttribLocationsMask,
937 const gl::ComponentTypeMask &programAttribsTypeMask,
938 const vk::ShaderModule *vertexModule,
939 const vk::ShaderModule *fragmentModule,
940 const vk::ShaderModule *geometryModule,
941 vk::SpecializationConstantBitSet specConsts,
942 const vk::GraphicsPipelineDesc &desc,
943 const vk::GraphicsPipelineDesc **descPtrOut,
944 vk::PipelineHelper **pipelineOut)
945 {
946 auto item = mPayload.find(desc);
947 if (item != mPayload.end())
948 {
949 *descPtrOut = &item->first;
950 *pipelineOut = &item->second;
951 return angle::Result::Continue;
952 }
953
954 return insertPipeline(contextVk, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
955 activeAttribLocationsMask, programAttribsTypeMask, vertexModule,
956 fragmentModule, geometryModule, specConsts, desc, descPtrOut,
957 pipelineOut);
958 }
959
960 private:
961 angle::Result insertPipeline(ContextVk *contextVk,
962 const vk::PipelineCache &pipelineCacheVk,
963 const vk::RenderPass &compatibleRenderPass,
964 const vk::PipelineLayout &pipelineLayout,
965 const gl::AttributesMask &activeAttribLocationsMask,
966 const gl::ComponentTypeMask &programAttribsTypeMask,
967 const vk::ShaderModule *vertexModule,
968 const vk::ShaderModule *fragmentModule,
969 const vk::ShaderModule *geometryModule,
970 vk::SpecializationConstantBitSet specConsts,
971 const vk::GraphicsPipelineDesc &desc,
972 const vk::GraphicsPipelineDesc **descPtrOut,
973 vk::PipelineHelper **pipelineOut);
974
975 std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
976 };
977
978 class DescriptorSetLayoutCache final : angle::NonCopyable
979 {
980 public:
981 DescriptorSetLayoutCache();
982 ~DescriptorSetLayoutCache();
983
984 void destroy(VkDevice device);
985
986 angle::Result getDescriptorSetLayout(
987 vk::Context *context,
988 const vk::DescriptorSetLayoutDesc &desc,
989 vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut);
990
991 private:
992 std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
993 };
994
995 class PipelineLayoutCache final : angle::NonCopyable
996 {
997 public:
998 PipelineLayoutCache();
999 ~PipelineLayoutCache();
1000
1001 void destroy(VkDevice device);
1002
1003 angle::Result getPipelineLayout(vk::Context *context,
1004 const vk::PipelineLayoutDesc &desc,
1005 const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
1006 vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut);
1007
1008 private:
1009 std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
1010 };
1011
1012 class SamplerCache final : angle::NonCopyable
1013 {
1014 public:
1015 SamplerCache();
1016 ~SamplerCache();
1017
1018 void destroy(RendererVk *renderer);
1019
1020 angle::Result getSampler(ContextVk *contextVk,
1021 const vk::SamplerDesc &desc,
1022 vk::BindingPointer<vk::Sampler> *samplerOut);
1023
1024 private:
1025 std::unordered_map<vk::SamplerDesc, vk::RefCountedSampler> mPayload;
1026 };
1027
1028 // Some descriptor set and pipeline layout constants.
1029 //
1030 // The set/binding assignment is done as following:
1031 //
1032 // - Set 0 contains the ANGLE driver uniforms at binding 0. Note that driver uniforms are updated
1033 // only under rare circumstances, such as viewport or depth range change. However, there is only
1034 // one binding in this set. This set is placed before Set 1 containing transform feedback
1035 // buffers, so that switching between xfb and non-xfb programs doesn't require rebinding this set.
1036 // Otherwise, as the layout of Set 1 changes (due to addition and removal of xfb buffers), and all
1037 // subsequent sets need to be rebound (due to Vulkan pipeline layout validation rules), we would
1038 // have needed to invalidateGraphicsDriverUniforms().
1039 // - Set 1 contains uniform blocks created to encompass default uniforms. 1 binding is used per
1040 // pipeline stage. Additionally, transform feedback buffers are bound from binding 2 and up.
1041 // - Set 2 contains all textures.
1042 // - Set 3 contains all other shader resources, such as uniform and storage blocks, atomic counter
1043 // buffers and images.
1044
1045 // ANGLE driver uniforms set index (binding is always 0):
1046 constexpr uint32_t kDriverUniformsDescriptorSetIndex = 0;
1047 // Uniforms set index:
1048 constexpr uint32_t kUniformsAndXfbDescriptorSetIndex = 1;
1049 // Textures set index:
1050 constexpr uint32_t kTextureDescriptorSetIndex = 2;
1051 // Other shader resources set index:
1052 constexpr uint32_t kShaderResourceDescriptorSetIndex = 3;
1053
1054 // Only 1 driver uniform binding is used.
1055 constexpr uint32_t kReservedDriverUniformBindingCount = 1;
1056 // There is 1 default uniform binding used per stage. Currently, a maxium of three stages are
1057 // supported.
1058 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
1059 constexpr uint32_t kReservedDefaultUniformBindingCount = 3;
1060 } // namespace rx
1061
1062 #endif // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
1063