1 /*
2 * Copyright 2021 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/gpu/graphite/DrawPass.h"
9
10 #include "include/gpu/graphite/GraphiteTypes.h"
11 #include "include/gpu/graphite/Recorder.h"
12 #include "include/private/base/SkAlign.h"
13 #include "src/core/SkTraceEvent.h"
14 #include "src/gpu/graphite/Buffer.h"
15 #include "src/gpu/graphite/BufferManager.h"
16 #include "src/gpu/graphite/Caps.h"
17 #include "src/gpu/graphite/ContextPriv.h"
18 #include "src/gpu/graphite/ContextUtils.h"
19 #include "src/gpu/graphite/DrawContext.h"
20 #include "src/gpu/graphite/DrawList.h"
21 #include "src/gpu/graphite/DrawWriter.h"
22 #include "src/gpu/graphite/GlobalCache.h"
23 #include "src/gpu/graphite/GraphicsPipeline.h"
24 #include "src/gpu/graphite/GraphicsPipelineDesc.h"
25 #include "src/gpu/graphite/Log.h"
26 #include "src/gpu/graphite/PaintParamsKey.h"
27 #include "src/gpu/graphite/PipelineData.h"
28 #include "src/gpu/graphite/PipelineDataCache.h"
29 #include "src/gpu/graphite/RecorderPriv.h"
30 #include "src/gpu/graphite/Renderer.h"
31 #include "src/gpu/graphite/ResourceProvider.h"
32 #include "src/gpu/graphite/Sampler.h"
33 #include "src/gpu/graphite/Texture.h"
34 #include "src/gpu/graphite/UniformManager.h"
35 #include "src/gpu/graphite/geom/BoundsManager.h"
36
37 #include "src/base/SkMathPriv.h"
38 #include "src/base/SkTBlockList.h"
39
40 #include <algorithm>
41 #include <unordered_map>
42
43 using namespace skia_private;
44
45 namespace skgpu::graphite {
46
47 namespace {
48
49 // Helper to manage packed fields within a uint64_t
50 template <uint64_t Bits, uint64_t Offset>
51 struct Bitfield {
52 static constexpr uint64_t kMask = ((uint64_t) 1 << Bits) - 1;
53 static constexpr uint64_t kOffset = Offset;
54 static constexpr uint64_t kBits = Bits;
55
getskgpu::graphite::__anon20ac24a70111::Bitfield56 static uint32_t get(uint64_t v) { return static_cast<uint32_t>((v >> kOffset) & kMask); }
setskgpu::graphite::__anon20ac24a70111::Bitfield57 static uint64_t set(uint32_t v) { return (v & kMask) << kOffset; }
58 };
59
60 // This class maps objects to a dense index which can then be used to look them up later
61 template <typename T, typename V = T, typename C = V>
62 class DenseBiMap {
63 public:
64 using Index = uint32_t;
65
66 // See note below in GeometryUniformField. This value can be round-tripped within the SortKey
67 // packing for all fields but will not be produced when recording actual draw data.
68 static constexpr Index kInvalidIndex{1 << SkNextLog2_portable(DrawList::kMaxRenderSteps)};
69
empty() const70 bool empty() const { return fIndexToData.empty(); }
size() const71 size_t size() const { return fIndexToData.size(); }
72
insert(const T & data)73 Index insert(const T& data) {
74 Index* index = fDataToIndex.find(data);
75 if (!index) {
76 SkASSERT(SkToU32(fIndexToData.size()) < kInvalidIndex);
77 index = fDataToIndex.set(data, (Index) fIndexToData.size());
78 fIndexToData.push_back(C{data});
79 }
80 return *index;
81 }
82
lookup(Index index)83 const V& lookup(Index index) {
84 SkASSERT(index < kInvalidIndex);
85 return fIndexToData[index];
86 }
87
data()88 SkSpan<V> data() { return {fIndexToData.data(), fIndexToData.size()}; }
89
detach()90 TArray<V>&& detach() { return std::move(fIndexToData); }
91
92 private:
93 THashMap<T, Index> fDataToIndex;
94 TArray<V> fIndexToData;
95 };
96
97 // Tracks uniform data on the CPU and then its transition to storage in a GPU buffer (ubo or ssbo).
98 struct CpuOrGpuData {
99 union {
100 const UniformDataBlock* fCpuData;
101 BindUniformBufferInfo fGpuData;
102 };
103
104 // Can only start from CPU data
CpuOrGpuDataskgpu::graphite::__anon20ac24a70111::CpuOrGpuData105 CpuOrGpuData(const UniformDataBlock* cpuData) : fCpuData(cpuData) {}
106 };
107
108 // Tracks the combination of textures from the paint and from the RenderStep to describe the full
109 // binding that needs to be in the command list.
110 struct TextureBinding {
111 const TextureDataBlock* fPaintTextures;
112 const TextureDataBlock* fStepTextures;
113
operator ==skgpu::graphite::__anon20ac24a70111::TextureBinding114 bool operator==(const TextureBinding& other) const {
115 return fPaintTextures == other.fPaintTextures &&
116 fStepTextures == other.fStepTextures;
117 }
operator !=skgpu::graphite::__anon20ac24a70111::TextureBinding118 bool operator!=(const TextureBinding& other) const { return !(*this == other); }
119
numTexturesskgpu::graphite::__anon20ac24a70111::TextureBinding120 int numTextures() const {
121 return (fPaintTextures ? fPaintTextures->numTextures() : 0) +
122 (fStepTextures ? fStepTextures->numTextures() : 0);
123 }
124 };
125
126 using UniformCache = DenseBiMap<const UniformDataBlock*, CpuOrGpuData>;
127 using TextureBindingCache = DenseBiMap<TextureBinding>;
128 using GraphicsPipelineCache = DenseBiMap<GraphicsPipelineDesc>;
129
130 // Automatically merges and manages texture bindings and uniform bindings sourced from either the
131 // paint or the RenderStep. Tracks the bound state based on last-provided unique index to write
132 // Bind commands to a CommandList when necessary.
133 class TextureBindingTracker {
134 public:
trackTextures(const TextureDataBlock * paintTextures,const TextureDataBlock * stepTextures)135 TextureBindingCache::Index trackTextures(const TextureDataBlock* paintTextures,
136 const TextureDataBlock* stepTextures) {
137 if (!paintTextures && !stepTextures) {
138 return TextureBindingCache::kInvalidIndex;
139 }
140 return fBindingCache.insert({paintTextures, stepTextures});
141 }
142
setCurrentTextureBindings(TextureBindingCache::Index bindingIndex)143 bool setCurrentTextureBindings(TextureBindingCache::Index bindingIndex) {
144 if (bindingIndex < TextureBindingCache::kInvalidIndex && fLastIndex != bindingIndex) {
145 fLastIndex = bindingIndex;
146 return true;
147 }
148 // No binding change
149 return false;
150 }
151
bindTextures(DrawPassCommands::List * commandList)152 void bindTextures(DrawPassCommands::List* commandList) {
153 SkASSERT(fLastIndex < TextureBindingCache::kInvalidIndex);
154 const TextureBinding& binding = fBindingCache.lookup(fLastIndex);
155
156 auto [texIndices, samplerIndices] =
157 commandList->bindDeferredTexturesAndSamplers(binding.numTextures());
158
159 if (binding.fPaintTextures) {
160 for (int i = 0; i < binding.fPaintTextures->numTextures(); ++i) {
161 auto [tex, sampler] = binding.fPaintTextures->texture(i);
162 *texIndices++ = fProxyCache.insert(tex.get());
163 *samplerIndices++ = fSamplerCache.insert(sampler);
164 }
165 }
166 if (binding.fStepTextures) {
167 for (int i = 0; i < binding.fStepTextures->numTextures(); ++i) {
168 auto [tex, sampler] = binding.fStepTextures->texture(i);
169 *texIndices++ = fProxyCache.insert(tex.get());
170 *samplerIndices++ = fSamplerCache.insert(sampler);
171 }
172 }
173 }
174
detachTextures()175 TArray<sk_sp<TextureProxy>>&& detachTextures() { return fProxyCache.detach(); }
detachSamplers()176 TArray<SamplerDesc>&& detachSamplers() { return fSamplerCache.detach(); }
177
178 private:
179 struct ProxyRef {
180 const TextureProxy* fProxy;
operator sk_sp<TextureProxy>skgpu::graphite::__anon20ac24a70111::TextureBindingTracker::ProxyRef181 operator sk_sp<TextureProxy>() const { return sk_ref_sp(fProxy); }
182 };
183 using TextureProxyCache = DenseBiMap<const TextureProxy*, sk_sp<TextureProxy>, ProxyRef>;
184 using SamplerDescCache = DenseBiMap<SamplerDesc>;
185
186 TextureBindingCache fBindingCache;
187
188 TextureProxyCache fProxyCache;
189 SamplerDescCache fSamplerCache;
190
191 TextureBindingCache::Index fLastIndex = TextureBindingCache::kInvalidIndex;
192 };
193
194 // Collects and writes uniform data either to uniform buffers or to shared storage buffers, and
195 // tracks when bindings need to change between draws.
196 class UniformTracker {
197 public:
UniformTracker(bool useStorageBuffers)198 UniformTracker(bool useStorageBuffers) : fUseStorageBuffers(useStorageBuffers) {}
199
200 // Maps a given {pipeline index, uniform data cache index} pair to a buffer index within the
201 // pipeline's accumulated array of uniforms.
trackUniforms(GraphicsPipelineCache::Index pipelineIndex,const UniformDataBlock * cpuData)202 UniformCache::Index trackUniforms(GraphicsPipelineCache::Index pipelineIndex,
203 const UniformDataBlock* cpuData) {
204 if (!cpuData) {
205 return UniformCache::kInvalidIndex;
206 }
207
208 if (pipelineIndex >= SkToU32(fPerPipelineCaches.size())) {
209 fPerPipelineCaches.resize(pipelineIndex + 1);
210 }
211
212 return fPerPipelineCaches[pipelineIndex].insert(cpuData);
213 }
214
215 // Writes all tracked uniform data into buffers, tracking the bindings for the written buffers
216 // by GraphicsPipelineCache::Index and possibly the UniformCache::Index (when not using SSBOs).
217 // When using SSBOs, the buffer is the same for all UniformCache::Indices that share the same
218 // pipeline (and is stored in index 0).
writeUniforms(DrawBufferManager * bufferMgr)219 bool writeUniforms(DrawBufferManager* bufferMgr) {
220 for (UniformCache& cache : fPerPipelineCaches) {
221 if (cache.empty()) {
222 continue;
223 }
224 // All data blocks for the same pipeline have the same size, so peek the first
225 // to determine the total buffer size
226 size_t udbSize = cache.lookup(0).fCpuData->size();
227 size_t udbDataSize = udbSize;
228 if (!fUseStorageBuffers) {
229 udbSize = bufferMgr->alignUniformBlockSize(udbSize);
230 }
231 auto [writer, bufferInfo] =
232 fUseStorageBuffers ? bufferMgr->getSsboWriter(udbSize * cache.size())
233 : bufferMgr->getUniformWriter(udbSize * cache.size());
234 if (!writer) {
235 return false; // Early out if buffer mapping failed
236 }
237
238 uint32_t bindingSize;
239 if (fUseStorageBuffers) {
240 // For storage buffer we will always bind all the blocks.
241 bindingSize = static_cast<uint32_t>(udbSize * cache.size());
242 }
243 else {
244 // For uniform buffer we will bind one block at a time.
245 bindingSize = static_cast<uint32_t>(udbSize);
246 }
247
248 for (CpuOrGpuData& dataBlock : cache.data()) {
249 SkASSERT(dataBlock.fCpuData->size() == udbDataSize);
250 writer.write(dataBlock.fCpuData->data(), udbDataSize);
251 // Swap from tracking the CPU data to the location of the GPU data
252 dataBlock.fGpuData.fBuffer = bufferInfo.fBuffer;
253 dataBlock.fGpuData.fOffset = bufferInfo.fOffset;
254 dataBlock.fGpuData.fBindingSize = bindingSize;
255
256 if (!fUseStorageBuffers) {
257 bufferInfo.fOffset += bindingSize;
258 writer.skipBytes(bindingSize - udbDataSize);
259 } // else keep bufferInfo pointing to the start of the array
260 }
261 }
262
263 return true;
264 }
265
266 // Updates the current tracked pipeline and uniform index and returns whether or not
267 // bindBuffers() needs to be called, depending on if 'fUseStorageBuffers' is true or not.
setCurrentUniforms(GraphicsPipelineCache::Index pipelineIndex,UniformCache::Index uniformIndex)268 bool setCurrentUniforms(GraphicsPipelineCache::Index pipelineIndex,
269 UniformCache::Index uniformIndex) {
270 if (uniformIndex >= UniformCache::kInvalidIndex) {
271 return false;
272 }
273 SkASSERT(pipelineIndex < SkToU32(fPerPipelineCaches.size()) &&
274 uniformIndex < fPerPipelineCaches[pipelineIndex].size());
275
276 if (fUseStorageBuffers) {
277 uniformIndex = 0; // The specific index has no effect on binding
278 }
279 if (fLastPipeline != pipelineIndex || fLastIndex != uniformIndex) {
280 fLastPipeline = pipelineIndex;
281 fLastIndex = uniformIndex;
282 return true;
283 } else {
284 return false;
285 }
286 }
287
288 // Binds a new uniform or storage buffer, based on most recently provided batch key and uniform
289 // data cache index.
bindUniforms(UniformSlot slot,DrawPassCommands::List * commandList)290 void bindUniforms(UniformSlot slot, DrawPassCommands::List* commandList) {
291 SkASSERT(fLastPipeline < GraphicsPipelineCache::kInvalidIndex &&
292 fLastIndex < UniformCache::kInvalidIndex);
293 SkASSERT(!fUseStorageBuffers || fLastIndex == 0);
294 const BindUniformBufferInfo& binding =
295 fPerPipelineCaches[fLastPipeline].lookup(fLastIndex).fGpuData;
296 commandList->bindUniformBuffer(binding, slot);
297 }
298
299 private:
300 // Access first by pipeline index. The final UniformCache::Index is either used to select the
301 // BindBufferInfo for a draw using UBOs, or it's the real index into a packed array of uniforms
302 // in a storage buffer object (whose binding is stored in index 0).
303 TArray<UniformCache> fPerPipelineCaches;
304
305 const bool fUseStorageBuffers;
306
307 GraphicsPipelineCache::Index fLastPipeline = GraphicsPipelineCache::kInvalidIndex;
308 UniformCache::Index fLastIndex = UniformCache::kInvalidIndex;
309 };
310
311 class GradientBufferTracker {
312 public:
writeData(SkSpan<const float> gradData,DrawBufferManager * bufferMgr)313 bool writeData(SkSpan<const float> gradData, DrawBufferManager* bufferMgr) {
314 if (gradData.empty()) {
315 return true;
316 }
317
318 auto [writer, bufferInfo] = bufferMgr->getSsboWriter(gradData.size_bytes());
319
320 if (!writer) {
321 return false;
322 }
323
324 writer.write(gradData.data(), gradData.size_bytes());
325
326 fBufferInfo.fBuffer = bufferInfo.fBuffer;
327 fBufferInfo.fOffset = bufferInfo.fOffset;
328 fBufferInfo.fBindingSize = gradData.size_bytes();
329 fHasData = true;
330
331 return true;
332 }
333
bindIfNeeded(DrawPassCommands::List * commandList) const334 void bindIfNeeded(DrawPassCommands::List* commandList) const {
335 if (fHasData) {
336 commandList->bindUniformBuffer(fBufferInfo, UniformSlot::kGradient);
337 }
338 }
339
340 private:
341 BindUniformBufferInfo fBufferInfo;
342 bool fHasData = false;
343 };
344
345 } // namespace
346
347 ///////////////////////////////////////////////////////////////////////////////////////////////////
348
349 /**
350 * Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's
351 * Renderer), which can be sorted independently. Each (step, draw) pair produces its own SortKey.
352 *
353 * The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic binds
354 * within a pipeline, while still respecting the overall painter's order. This decreases the number
355 * of low-level draw commands in a command buffer and increases the size of those, allowing the GPU
356 * to operate more efficiently and have fewer bubbles within its own instruction stream.
357 *
358 * The Draw's CompresssedPaintersOrder and DisjointStencilINdex represent the most significant bits
359 * of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline
360 * description is encoded in two steps:
361 * 1. The index of the RenderStep packed in the high bits to ensure each step for a draw is
362 * ordered correctly.
363 * 2. An index into a cache of pipeline descriptions is used to encode the identity of the
364 * pipeline (SortKeys that differ in the bits from #1 necessarily would have different
365 * descriptions, but then the specific ordering of the RenderSteps isn't enforced).
366 * Last, the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass.
367 * This allows the SortKey to cluster draw steps that have both a compatible pipeline and do not
368 * require rebinding uniform data or other state (e.g. scissor). Since the uniform data index and
369 * the pipeline description index are packed into indices and not actual pointers, a given SortKey
370 * is only valid for the a specific DrawList->DrawPass conversion.
371 */
372 class DrawPass::SortKey {
373 public:
SortKey(const DrawList::Draw * draw,int renderStep,GraphicsPipelineCache::Index pipelineIndex,UniformCache::Index geomUniformIndex,UniformCache::Index shadingUniformIndex,TextureBindingCache::Index textureBindingIndex)374 SortKey(const DrawList::Draw* draw,
375 int renderStep,
376 GraphicsPipelineCache::Index pipelineIndex,
377 UniformCache::Index geomUniformIndex,
378 UniformCache::Index shadingUniformIndex,
379 TextureBindingCache::Index textureBindingIndex)
380 : fPipelineKey(ColorDepthOrderField::set(draw->fDrawParams.order().paintOrder().bits()) |
381 StencilIndexField::set(draw->fDrawParams.order().stencilIndex().bits()) |
382 RenderStepField::set(static_cast<uint32_t>(renderStep)) |
383 PipelineField::set(pipelineIndex))
384 , fUniformKey(GeometryUniformField::set(geomUniformIndex) |
385 ShadingUniformField::set(shadingUniformIndex) |
386 TextureBindingsField::set(textureBindingIndex))
387 , fDraw(draw) {
388 SkASSERT(pipelineIndex < GraphicsPipelineCache::kInvalidIndex);
389 SkASSERT(renderStep <= draw->fRenderer->numRenderSteps());
390 }
391
operator <(const SortKey & k) const392 bool operator<(const SortKey& k) const {
393 return fPipelineKey < k.fPipelineKey ||
394 (fPipelineKey == k.fPipelineKey && fUniformKey < k.fUniformKey);
395 }
396
renderStep() const397 const RenderStep& renderStep() const {
398 return fDraw->fRenderer->step(RenderStepField::get(fPipelineKey));
399 }
400
draw() const401 const DrawList::Draw& draw() const { return *fDraw; }
402
pipelineIndex() const403 GraphicsPipelineCache::Index pipelineIndex() const {
404 return PipelineField::get(fPipelineKey);
405 }
geometryUniformIndex() const406 UniformCache::Index geometryUniformIndex() const {
407 return GeometryUniformField::get(fUniformKey);
408 }
shadingUniformIndex() const409 UniformCache::Index shadingUniformIndex() const {
410 return ShadingUniformField::get(fUniformKey);
411 }
textureBindingIndex() const412 TextureBindingCache::Index textureBindingIndex() const {
413 return TextureBindingsField::get(fUniformKey);
414 }
415
416 private:
417 // Fields are ordered from most-significant to least when sorting by 128-bit value.
418 // NOTE: We don't use C++ bit fields because field ordering is implementation defined and we
419 // need to sort consistently.
420 using ColorDepthOrderField = Bitfield<16, 48>; // sizeof(CompressedPaintersOrder)
421 using StencilIndexField = Bitfield<16, 32>; // sizeof(DisjointStencilIndex)
422 using RenderStepField = Bitfield<2, 30>; // bits >= log2(Renderer::kMaxRenderSteps)
423 using PipelineField = Bitfield<30, 0>; // bits >= log2(max total steps in draw list)
424 uint64_t fPipelineKey;
425
426 // The uniform/texture index fields need 1 extra bit to encode "no-data". Values that are
427 // greater than or equal to 2^(bits-1) represent "no-data", while values between
428 // [0, 2^(bits-1)-1] can access data arrays without extra logic.
429 using GeometryUniformField = Bitfield<17, 47>; // bits >= 1+log2(max total steps)
430 using ShadingUniformField = Bitfield<17, 30>; // bits >= 1+log2(max total steps)
431 using TextureBindingsField = Bitfield<30, 0>; // bits >= 1+log2(max total steps)
432 uint64_t fUniformKey;
433
434 // Backpointer to the draw that produced the sort key
435 const DrawList::Draw* fDraw;
436
437 static_assert(ColorDepthOrderField::kBits >= sizeof(CompressedPaintersOrder));
438 static_assert(StencilIndexField::kBits >= sizeof(DisjointStencilIndex));
439 static_assert(RenderStepField::kBits >= SkNextLog2_portable(Renderer::kMaxRenderSteps));
440 static_assert(PipelineField::kBits >= SkNextLog2_portable(DrawList::kMaxRenderSteps));
441 static_assert(GeometryUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
442 static_assert(ShadingUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
443 static_assert(TextureBindingsField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
444 };
445
446 ///////////////////////////////////////////////////////////////////////////////////////////////////
447
DrawPass(sk_sp<TextureProxy> target,std::pair<LoadOp,StoreOp> ops,std::array<float,4> clearColor)448 DrawPass::DrawPass(sk_sp<TextureProxy> target,
449 std::pair<LoadOp, StoreOp> ops,
450 std::array<float, 4> clearColor)
451 : fTarget(std::move(target))
452 , fBounds(SkIRect::MakeEmpty())
453 , fOps(ops)
454 , fClearColor(clearColor) {}
455
456 DrawPass::~DrawPass() = default;
457
Make(Recorder * recorder,std::unique_ptr<DrawList> draws,sk_sp<TextureProxy> target,const SkImageInfo & targetInfo,std::pair<LoadOp,StoreOp> ops,std::array<float,4> clearColor,sk_sp<TextureProxy> dstCopy,SkIPoint dstCopyOffset)458 std::unique_ptr<DrawPass> DrawPass::Make(Recorder* recorder,
459 std::unique_ptr<DrawList> draws,
460 sk_sp<TextureProxy> target,
461 const SkImageInfo& targetInfo,
462 std::pair<LoadOp, StoreOp> ops,
463 std::array<float, 4> clearColor,
464 sk_sp<TextureProxy> dstCopy,
465 SkIPoint dstCopyOffset) {
466 // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
467 // its size should be done with care and good reason. The performance of sorting the keys is
468 // heavily tied to the total size.
469 //
470 // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
471 // 16 bytes. There are several ways this could be done if necessary:
472 // - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
473 // the uniform data => 8 bytes of key, 8 bytes of pointer.
474 // - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
475 // 4 low bits from the Draw* pointer since it's 16 byte aligned.
476 // - Compact the Draw* to an index into the original collection, although that has extra
477 // indirection and does not work as well with SkTBlockList.
478 // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
479 // than an 8 byte key and unmodified pointer.
480 static_assert(sizeof(DrawPass::SortKey) ==
481 SkAlignTo(16 + sizeof(void*), alignof(DrawPass::SortKey)));
482
483 TRACE_EVENT1("skia.gpu", TRACE_FUNC, "draw count", draws->fDraws.count());
484
485 // The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass
486 // is returned from Make(), it is considered immutable.
487 std::unique_ptr<DrawPass> drawPass(new DrawPass(target, ops, clearColor));
488
489 Rect passBounds = Rect::InfiniteInverted();
490
491 // We don't expect the uniforms from the renderSteps to reappear multiple times across a
492 // recorder's lifetime so we only de-dupe them w/in a given DrawPass.
493 UniformDataCache geometryUniformDataCache;
494 TextureDataCache* textureDataCache = recorder->priv().textureDataCache();
495 DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager();
496 if (bufferMgr->hasMappingFailed()) {
497 SKGPU_LOG_W("Buffer mapping has already failed; dropping draw pass!");
498 return nullptr;
499 }
500 // Ensure there's a destination copy if required
501 if (!draws->dstCopyBounds().isEmptyNegativeOrNaN() && !dstCopy) {
502 SKGPU_LOG_W("Failed to copy destination for reading. Dropping draw pass!");
503 return nullptr;
504 }
505
506 GraphicsPipelineCache pipelineCache;
507
508 // Geometry uniforms are currently always UBO-backed.
509 const bool useStorageBuffers = recorder->priv().caps()->storageBufferPreferred();
510 const ResourceBindingRequirements& bindingReqs =
511 recorder->priv().caps()->resourceBindingRequirements();
512 Layout uniformLayout =
513 useStorageBuffers ? bindingReqs.fStorageBufferLayout : bindingReqs.fUniformBufferLayout;
514
515 UniformTracker geometryUniformTracker(useStorageBuffers);
516 UniformTracker shadingUniformTracker(useStorageBuffers);
517 TextureBindingTracker textureBindingTracker;
518 GradientBufferTracker gradientBufferTracker;
519
520 ShaderCodeDictionary* dict = recorder->priv().shaderCodeDictionary();
521 PaintParamsKeyBuilder builder(dict);
522
523 // The initial layout we pass here is not important as it will be re-assigned when writing
524 // shading and geometry uniforms below.
525 PipelineDataGatherer gatherer(recorder->priv().caps(), uniformLayout);
526
527 std::vector<SortKey> keys;
528 keys.reserve(draws->renderStepCount());
529
530 for (const DrawList::Draw& draw : draws->fDraws.items()) {
531 // If we have two different descriptors, such that the uniforms from the PaintParams can be
532 // bound independently of those used by the rest of the RenderStep, then we can upload now
533 // and remember the location for re-use on any RenderStep that does shading.
534 UniquePaintParamsID shaderID;
535 const UniformDataBlock* shadingUniforms = nullptr;
536 const TextureDataBlock* paintTextures = nullptr;
537
538 if (draw.fPaintParams.has_value()) {
539 sk_sp<TextureProxy> curDst =
540 draw.fPaintParams->dstReadRequirement() == DstReadRequirement::kTextureCopy
541 ? dstCopy
542 : nullptr;
543 std::tie(shaderID, shadingUniforms, paintTextures) =
544 ExtractPaintData(recorder,
545 &gatherer,
546 &builder,
547 uniformLayout,
548 draw.fDrawParams.transform(),
549 draw.fPaintParams.value(),
550 draw.fDrawParams.geometry(),
551 curDst,
552 dstCopyOffset,
553 targetInfo.colorInfo());
554 } // else depth-only
555
556 for (int stepIndex = 0; stepIndex < draw.fRenderer->numRenderSteps(); ++stepIndex) {
557 const RenderStep* const step = draw.fRenderer->steps()[stepIndex];
558 const bool performsShading = draw.fPaintParams.has_value() && step->performsShading();
559
560 GraphicsPipelineCache::Index pipelineIndex = pipelineCache.insert(
561 {step, performsShading ? shaderID : UniquePaintParamsID::InvalidID()});
562 auto [geometryUniforms, stepTextures] = ExtractRenderStepData(&geometryUniformDataCache,
563 textureDataCache,
564 &gatherer,
565 uniformLayout,
566 step,
567 draw.fDrawParams);
568
569 UniformCache::Index geomUniformIndex = geometryUniformTracker.trackUniforms(
570 pipelineIndex, geometryUniforms);
571 UniformCache::Index shadingUniformIndex = shadingUniformTracker.trackUniforms(
572 pipelineIndex, performsShading ? shadingUniforms : nullptr);
573 TextureBindingCache::Index textureIndex = textureBindingTracker.trackTextures(
574 performsShading ? paintTextures : nullptr, stepTextures);
575
576 keys.push_back({&draw, stepIndex, pipelineIndex,
577 geomUniformIndex, shadingUniformIndex, textureIndex});
578 }
579
580 passBounds.join(draw.fDrawParams.clip().drawBounds());
581 drawPass->fDepthStencilFlags |= draw.fRenderer->depthStencilFlags();
582 drawPass->fRequiresMSAA |= draw.fRenderer->requiresMSAA();
583 }
584
585 if (!geometryUniformTracker.writeUniforms(bufferMgr) ||
586 !shadingUniformTracker.writeUniforms(bufferMgr) ||
587 !gradientBufferTracker.writeData(gatherer.gradientBufferData(), bufferMgr)) {
588 // The necessary uniform data couldn't be written to the GPU, so the DrawPass is invalid.
589 // Early out now since the next Recording snap will fail.
590 return nullptr;
591 }
592
593 // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
594 // algorithms that approach O(n) in that condition may be favorable. Alternatively, could
595 // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
596 // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
597 // vs. algorithms that require an extra O(n) storage.
598 // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
599 // bugs in the DrawOrder determination code?
600 std::sort(keys.begin(), keys.end());
601
602 // Used to record vertex/instance data, buffer binds, and draw calls
603 DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr);
604 GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex;
605 SkIRect lastScissor = SkIRect::MakeSize(targetInfo.dimensions());
606
607 SkASSERT(drawPass->fTarget->isFullyLazy() ||
608 SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor));
609 drawPass->fCommandList.setScissor(lastScissor);
610
611 // All large gradients pack their data into a single buffer throughout the draw pass,
612 // therefore the gradient buffer only needs to be bound once.
613 gradientBufferTracker.bindIfNeeded(&drawPass->fCommandList);
614
615 for (const SortKey& key : keys) {
616 const DrawList::Draw& draw = key.draw();
617 const RenderStep& renderStep = key.renderStep();
618
619 const bool pipelineChange = key.pipelineIndex() != lastPipeline;
620
621 const bool geomBindingChange = geometryUniformTracker.setCurrentUniforms(
622 key.pipelineIndex(), key.geometryUniformIndex());
623 const bool shadingBindingChange = shadingUniformTracker.setCurrentUniforms(
624 key.pipelineIndex(), key.shadingUniformIndex());
625 const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings(
626 key.textureBindingIndex());
627 const SkIRect* newScissor = draw.fDrawParams.clip().scissor() != lastScissor ?
628 &draw.fDrawParams.clip().scissor() : nullptr;
629
630 const bool stateChange = geomBindingChange ||
631 shadingBindingChange ||
632 textureBindingsChange ||
633 SkToBool(newScissor);
634
635 // Update DrawWriter *before* we actually change any state so that accumulated draws from
636 // the previous state use the proper state.
637 if (pipelineChange) {
638 drawWriter.newPipelineState(renderStep.primitiveType(),
639 renderStep.vertexStride(),
640 renderStep.instanceStride());
641 } else if (stateChange) {
642 drawWriter.newDynamicState();
643 }
644
645 // Make state changes before accumulating new draw data
646 if (pipelineChange) {
647 drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex());
648 lastPipeline = key.pipelineIndex();
649 }
650 if (stateChange) {
651 if (geomBindingChange) {
652 geometryUniformTracker.bindUniforms(UniformSlot::kRenderStep,
653 &drawPass->fCommandList);
654 }
655 if (shadingBindingChange) {
656 shadingUniformTracker.bindUniforms(UniformSlot::kPaint, &drawPass->fCommandList);
657 }
658 if (textureBindingsChange) {
659 textureBindingTracker.bindTextures(&drawPass->fCommandList);
660 }
661 if (newScissor) {
662 drawPass->fCommandList.setScissor(*newScissor);
663 lastScissor = *newScissor;
664 }
665 }
666
667 UniformCache::Index geometrySsboIndex =
668 (key.geometryUniformIndex() == UniformCache::kInvalidIndex)
669 ? 0
670 : key.geometryUniformIndex();
671 UniformCache::Index shadingSsboIndex =
672 (key.shadingUniformIndex() == UniformCache::kInvalidIndex)
673 ? 0
674 : key.shadingUniformIndex();
675 skvx::ushort2 ssboIndices = {SkToU16(geometrySsboIndex), SkToU16(shadingSsboIndex)};
676 renderStep.writeVertices(&drawWriter, draw.fDrawParams, ssboIndices);
677
678 if (bufferMgr->hasMappingFailed()) {
679 SKGPU_LOG_W("Failed to write necessary vertex/instance data for DrawPass, dropping!");
680 return nullptr;
681 }
682 }
683 // Finish recording draw calls for any collected data at the end of the loop
684 drawWriter.flush();
685
686 drawPass->fBounds = passBounds.roundOut().asSkIRect();
687
688 drawPass->fPipelineDescs = pipelineCache.detach();
689 drawPass->fSamplerDescs = textureBindingTracker.detachSamplers();
690 drawPass->fSampledTextures = textureBindingTracker.detachTextures();
691
692 TRACE_COUNTER1("skia.gpu", "# pipelines", drawPass->fPipelineDescs.size());
693 TRACE_COUNTER1("skia.gpu", "# textures", drawPass->fSampledTextures.size());
694 TRACE_COUNTER1("skia.gpu", "# commands", drawPass->fCommandList.count());
695
696 return drawPass;
697 }
698
prepareResources(ResourceProvider * resourceProvider,const RuntimeEffectDictionary * runtimeDict,const RenderPassDesc & renderPassDesc)699 bool DrawPass::prepareResources(ResourceProvider* resourceProvider,
700 const RuntimeEffectDictionary* runtimeDict,
701 const RenderPassDesc& renderPassDesc) {
702 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
703
704 fFullPipelines.reserve(fFullPipelines.size() + fPipelineDescs.size());
705 for (const GraphicsPipelineDesc& pipelineDesc : fPipelineDescs) {
706 auto pipeline = resourceProvider->findOrCreateGraphicsPipeline(runtimeDict,
707 pipelineDesc,
708 renderPassDesc);
709 if (!pipeline) {
710 SKGPU_LOG_W("Failed to create GraphicsPipeline for draw in RenderPass. Dropping pass!");
711 return false;
712 }
713 fFullPipelines.push_back(std::move(pipeline));
714 }
715 // The DrawPass may be long lived on a Recording and we no longer need the GraphicPipelineDescs
716 // once we've created pipelines, so we drop the storage for them here.
717 fPipelineDescs.clear();
718
719 #if defined(SK_DEBUG)
720 for (int i = 0; i < fSampledTextures.size(); ++i) {
721 // It should not have been possible to draw an Image that has an invalid texture info
722 SkASSERT(fSampledTextures[i]->textureInfo().isValid());
723 // Tasks should have been ordered to instantiate any scratch textures already, or any
724 // client-owned image will have been instantiated at creation.
725 SkASSERTF(fSampledTextures[i]->isInstantiated() ||
726 fSampledTextures[i]->isLazy(),
727 "proxy label = %s", fSampledTextures[i]->label());
728 }
729 #endif
730
731 fSamplers.reserve(fSamplers.size() + fSamplerDescs.size());
732 for (int i = 0; i < fSamplerDescs.size(); ++i) {
733 sk_sp<Sampler> sampler = resourceProvider->findOrCreateCompatibleSampler(fSamplerDescs[i]);
734 if (!sampler) {
735 SKGPU_LOG_W("Failed to create sampler. Will not create renderpass!");
736 return false;
737 }
738 fSamplers.push_back(std::move(sampler));
739 }
740 // The DrawPass may be long lived on a Recording and we no longer need the SamplerDescs
741 // once we've created Samplers, so we drop the storage for them here.
742 fSamplerDescs.clear();
743
744 return true;
745 }
746
addResourceRefs(CommandBuffer * commandBuffer) const747 void DrawPass::addResourceRefs(CommandBuffer* commandBuffer) const {
748 for (int i = 0; i < fFullPipelines.size(); ++i) {
749 commandBuffer->trackResource(fFullPipelines[i]);
750 }
751 for (int i = 0; i < fSampledTextures.size(); ++i) {
752 commandBuffer->trackCommandBufferResource(fSampledTextures[i]->refTexture());
753 }
754 for (int i = 0; i < fSamplers.size(); ++i) {
755 commandBuffer->trackResource(fSamplers[i]);
756 }
757 }
758
getTexture(size_t index) const759 const Texture* DrawPass::getTexture(size_t index) const {
760 SkASSERT(index < SkToSizeT(fSampledTextures.size()));
761 SkASSERT(fSampledTextures[index]);
762 SkASSERT(fSampledTextures[index]->texture());
763 return fSampledTextures[index]->texture();
764 }
getSampler(size_t index) const765 const Sampler* DrawPass::getSampler(size_t index) const {
766 SkASSERT(index < SkToSizeT(fSamplers.size()));
767 SkASSERT(fSamplers[index]);
768 return fSamplers[index].get();
769 }
770
771 } // namespace skgpu::graphite
772