• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/gpu/graphite/DrawPass.h"
9 
10 #include "include/gpu/graphite/GraphiteTypes.h"
11 #include "include/gpu/graphite/Recorder.h"
12 #include "include/private/base/SkAlign.h"
13 #include "src/core/SkTraceEvent.h"
14 #include "src/gpu/graphite/Buffer.h"
15 #include "src/gpu/graphite/BufferManager.h"
16 #include "src/gpu/graphite/Caps.h"
17 #include "src/gpu/graphite/ContextPriv.h"
18 #include "src/gpu/graphite/ContextUtils.h"
19 #include "src/gpu/graphite/DrawContext.h"
20 #include "src/gpu/graphite/DrawList.h"
21 #include "src/gpu/graphite/DrawWriter.h"
22 #include "src/gpu/graphite/GlobalCache.h"
23 #include "src/gpu/graphite/GraphicsPipeline.h"
24 #include "src/gpu/graphite/GraphicsPipelineDesc.h"
25 #include "src/gpu/graphite/Log.h"
26 #include "src/gpu/graphite/PaintParamsKey.h"
27 #include "src/gpu/graphite/PipelineData.h"
28 #include "src/gpu/graphite/PipelineDataCache.h"
29 #include "src/gpu/graphite/RecorderPriv.h"
30 #include "src/gpu/graphite/Renderer.h"
31 #include "src/gpu/graphite/ResourceProvider.h"
32 #include "src/gpu/graphite/Sampler.h"
33 #include "src/gpu/graphite/Texture.h"
34 #include "src/gpu/graphite/UniformManager.h"
35 #include "src/gpu/graphite/geom/BoundsManager.h"
36 
37 #include "src/base/SkMathPriv.h"
38 #include "src/base/SkTBlockList.h"
39 
40 #include <algorithm>
41 #include <unordered_map>
42 
43 using namespace skia_private;
44 
45 namespace skgpu::graphite {
46 
47 namespace {
48 
49 // Helper to manage packed fields within a uint64_t
50 template <uint64_t Bits, uint64_t Offset>
51 struct Bitfield {
52     static constexpr uint64_t kMask = ((uint64_t) 1 << Bits) - 1;
53     static constexpr uint64_t kOffset = Offset;
54     static constexpr uint64_t kBits = Bits;
55 
getskgpu::graphite::__anon20ac24a70111::Bitfield56     static uint32_t get(uint64_t v) { return static_cast<uint32_t>((v >> kOffset) & kMask); }
setskgpu::graphite::__anon20ac24a70111::Bitfield57     static uint64_t set(uint32_t v) { return (v & kMask) << kOffset; }
58 };
59 
60 // This class maps objects to a dense index which can then be used to look them up later
61 template <typename T, typename V = T, typename C = V>
62 class DenseBiMap {
63 public:
64     using Index = uint32_t;
65 
66     // See note below in GeometryUniformField. This value can be round-tripped within the SortKey
67     // packing for all fields but will not be produced when recording actual draw data.
68     static constexpr Index kInvalidIndex{1 << SkNextLog2_portable(DrawList::kMaxRenderSteps)};
69 
empty() const70     bool empty() const { return fIndexToData.empty(); }
size() const71     size_t size() const { return fIndexToData.size(); }
72 
insert(const T & data)73     Index insert(const T& data) {
74         Index* index = fDataToIndex.find(data);
75         if (!index) {
76             SkASSERT(SkToU32(fIndexToData.size()) < kInvalidIndex);
77             index = fDataToIndex.set(data, (Index) fIndexToData.size());
78             fIndexToData.push_back(C{data});
79         }
80         return *index;
81     }
82 
lookup(Index index)83     const V& lookup(Index index) {
84         SkASSERT(index < kInvalidIndex);
85         return fIndexToData[index];
86     }
87 
data()88     SkSpan<V> data() { return {fIndexToData.data(), fIndexToData.size()}; }
89 
detach()90     TArray<V>&& detach() { return std::move(fIndexToData); }
91 
92 private:
93     THashMap<T, Index> fDataToIndex;
94     TArray<V> fIndexToData;
95 };
96 
97 // Tracks uniform data on the CPU and then its transition to storage in a GPU buffer (ubo or ssbo).
98 struct CpuOrGpuData {
99     union {
100         const UniformDataBlock* fCpuData;
101         BindUniformBufferInfo fGpuData;
102     };
103 
104     // Can only start from CPU data
CpuOrGpuDataskgpu::graphite::__anon20ac24a70111::CpuOrGpuData105     CpuOrGpuData(const UniformDataBlock* cpuData) : fCpuData(cpuData) {}
106 };
107 
108 // Tracks the combination of textures from the paint and from the RenderStep to describe the full
109 // binding that needs to be in the command list.
110 struct TextureBinding {
111     const TextureDataBlock* fPaintTextures;
112     const TextureDataBlock* fStepTextures;
113 
operator ==skgpu::graphite::__anon20ac24a70111::TextureBinding114     bool operator==(const TextureBinding& other) const {
115         return fPaintTextures == other.fPaintTextures &&
116                fStepTextures == other.fStepTextures;
117     }
operator !=skgpu::graphite::__anon20ac24a70111::TextureBinding118     bool operator!=(const TextureBinding& other) const { return !(*this == other); }
119 
numTexturesskgpu::graphite::__anon20ac24a70111::TextureBinding120     int numTextures() const {
121         return (fPaintTextures ? fPaintTextures->numTextures() : 0) +
122                (fStepTextures ? fStepTextures->numTextures() : 0);
123     }
124 };
125 
126 using UniformCache = DenseBiMap<const UniformDataBlock*, CpuOrGpuData>;
127 using TextureBindingCache = DenseBiMap<TextureBinding>;
128 using GraphicsPipelineCache = DenseBiMap<GraphicsPipelineDesc>;
129 
130 // Automatically merges and manages texture bindings and uniform bindings sourced from either the
131 // paint or the RenderStep. Tracks the bound state based on last-provided unique index to write
132 // Bind commands to a CommandList when necessary.
133 class TextureBindingTracker {
134 public:
trackTextures(const TextureDataBlock * paintTextures,const TextureDataBlock * stepTextures)135     TextureBindingCache::Index trackTextures(const TextureDataBlock* paintTextures,
136                                              const TextureDataBlock* stepTextures) {
137         if (!paintTextures && !stepTextures) {
138             return TextureBindingCache::kInvalidIndex;
139         }
140         return fBindingCache.insert({paintTextures, stepTextures});
141     }
142 
setCurrentTextureBindings(TextureBindingCache::Index bindingIndex)143     bool setCurrentTextureBindings(TextureBindingCache::Index bindingIndex) {
144         if (bindingIndex < TextureBindingCache::kInvalidIndex && fLastIndex != bindingIndex) {
145             fLastIndex = bindingIndex;
146             return true;
147         }
148         // No binding change
149         return false;
150     }
151 
bindTextures(DrawPassCommands::List * commandList)152     void bindTextures(DrawPassCommands::List* commandList) {
153         SkASSERT(fLastIndex < TextureBindingCache::kInvalidIndex);
154         const TextureBinding& binding = fBindingCache.lookup(fLastIndex);
155 
156         auto [texIndices, samplerIndices] =
157                 commandList->bindDeferredTexturesAndSamplers(binding.numTextures());
158 
159         if (binding.fPaintTextures) {
160             for (int i = 0; i < binding.fPaintTextures->numTextures(); ++i) {
161                 auto [tex, sampler] = binding.fPaintTextures->texture(i);
162                 *texIndices++     = fProxyCache.insert(tex.get());
163                 *samplerIndices++ = fSamplerCache.insert(sampler);
164             }
165         }
166         if (binding.fStepTextures) {
167             for (int i = 0; i < binding.fStepTextures->numTextures(); ++i) {
168                 auto [tex, sampler] = binding.fStepTextures->texture(i);
169                 *texIndices++     = fProxyCache.insert(tex.get());
170                 *samplerIndices++ = fSamplerCache.insert(sampler);
171             }
172         }
173     }
174 
detachTextures()175     TArray<sk_sp<TextureProxy>>&& detachTextures() { return fProxyCache.detach(); }
detachSamplers()176     TArray<SamplerDesc>&& detachSamplers() { return fSamplerCache.detach(); }
177 
178 private:
179     struct ProxyRef {
180         const TextureProxy* fProxy;
operator sk_sp<TextureProxy>skgpu::graphite::__anon20ac24a70111::TextureBindingTracker::ProxyRef181         operator sk_sp<TextureProxy>() const { return sk_ref_sp(fProxy); }
182     };
183     using TextureProxyCache = DenseBiMap<const TextureProxy*, sk_sp<TextureProxy>, ProxyRef>;
184     using SamplerDescCache = DenseBiMap<SamplerDesc>;
185 
186     TextureBindingCache fBindingCache;
187 
188     TextureProxyCache fProxyCache;
189     SamplerDescCache fSamplerCache;
190 
191     TextureBindingCache::Index fLastIndex = TextureBindingCache::kInvalidIndex;
192 };
193 
194 // Collects and writes uniform data either to uniform buffers or to shared storage buffers, and
195 // tracks when bindings need to change between draws.
196 class UniformTracker {
197 public:
UniformTracker(bool useStorageBuffers)198     UniformTracker(bool useStorageBuffers) : fUseStorageBuffers(useStorageBuffers) {}
199 
200     // Maps a given {pipeline index, uniform data cache index} pair to a buffer index within the
201     // pipeline's accumulated array of uniforms.
trackUniforms(GraphicsPipelineCache::Index pipelineIndex,const UniformDataBlock * cpuData)202     UniformCache::Index trackUniforms(GraphicsPipelineCache::Index pipelineIndex,
203                                       const UniformDataBlock* cpuData) {
204         if (!cpuData) {
205             return UniformCache::kInvalidIndex;
206         }
207 
208         if (pipelineIndex >= SkToU32(fPerPipelineCaches.size())) {
209             fPerPipelineCaches.resize(pipelineIndex + 1);
210         }
211 
212         return fPerPipelineCaches[pipelineIndex].insert(cpuData);
213     }
214 
215     // Writes all tracked uniform data into buffers, tracking the bindings for the written buffers
216     // by GraphicsPipelineCache::Index and possibly the UniformCache::Index (when not using SSBOs).
217     // When using SSBOs, the buffer is the same for all UniformCache::Indices that share the same
218     // pipeline (and is stored in index 0).
writeUniforms(DrawBufferManager * bufferMgr)219     bool writeUniforms(DrawBufferManager* bufferMgr) {
220         for (UniformCache& cache : fPerPipelineCaches) {
221             if (cache.empty()) {
222                 continue;
223             }
224             // All data blocks for the same pipeline have the same size, so peek the first
225             // to determine the total buffer size
226             size_t udbSize = cache.lookup(0).fCpuData->size();
227             size_t udbDataSize = udbSize;
228             if (!fUseStorageBuffers) {
229                 udbSize = bufferMgr->alignUniformBlockSize(udbSize);
230             }
231             auto [writer, bufferInfo] =
232                     fUseStorageBuffers ? bufferMgr->getSsboWriter(udbSize * cache.size())
233                                        : bufferMgr->getUniformWriter(udbSize * cache.size());
234             if (!writer) {
235                 return false; // Early out if buffer mapping failed
236             }
237 
238             uint32_t bindingSize;
239             if (fUseStorageBuffers) {
240                 // For storage buffer we will always bind all the blocks.
241                 bindingSize = static_cast<uint32_t>(udbSize * cache.size());
242             }
243             else {
244                 // For uniform buffer we will bind one block at a time.
245                 bindingSize = static_cast<uint32_t>(udbSize);
246             }
247 
248             for (CpuOrGpuData& dataBlock : cache.data()) {
249                 SkASSERT(dataBlock.fCpuData->size() == udbDataSize);
250                 writer.write(dataBlock.fCpuData->data(), udbDataSize);
251                 // Swap from tracking the CPU data to the location of the GPU data
252                 dataBlock.fGpuData.fBuffer = bufferInfo.fBuffer;
253                 dataBlock.fGpuData.fOffset = bufferInfo.fOffset;
254                 dataBlock.fGpuData.fBindingSize = bindingSize;
255 
256                 if (!fUseStorageBuffers) {
257                     bufferInfo.fOffset += bindingSize;
258                     writer.skipBytes(bindingSize - udbDataSize);
259                 } // else keep bufferInfo pointing to the start of the array
260             }
261         }
262 
263         return true;
264     }
265 
266     // Updates the current tracked pipeline and uniform index and returns whether or not
267     // bindBuffers() needs to be called, depending on if 'fUseStorageBuffers' is true or not.
setCurrentUniforms(GraphicsPipelineCache::Index pipelineIndex,UniformCache::Index uniformIndex)268     bool setCurrentUniforms(GraphicsPipelineCache::Index pipelineIndex,
269                             UniformCache::Index uniformIndex) {
270         if (uniformIndex >= UniformCache::kInvalidIndex) {
271             return false;
272         }
273         SkASSERT(pipelineIndex < SkToU32(fPerPipelineCaches.size()) &&
274                  uniformIndex < fPerPipelineCaches[pipelineIndex].size());
275 
276         if (fUseStorageBuffers) {
277             uniformIndex = 0; // The specific index has no effect on binding
278         }
279         if (fLastPipeline != pipelineIndex || fLastIndex != uniformIndex) {
280             fLastPipeline = pipelineIndex;
281             fLastIndex = uniformIndex;
282             return true;
283         } else {
284             return false;
285         }
286     }
287 
288     // Binds a new uniform or storage buffer, based on most recently provided batch key and uniform
289     // data cache index.
bindUniforms(UniformSlot slot,DrawPassCommands::List * commandList)290     void bindUniforms(UniformSlot slot, DrawPassCommands::List* commandList) {
291         SkASSERT(fLastPipeline < GraphicsPipelineCache::kInvalidIndex &&
292                  fLastIndex < UniformCache::kInvalidIndex);
293         SkASSERT(!fUseStorageBuffers || fLastIndex == 0);
294         const BindUniformBufferInfo& binding =
295                 fPerPipelineCaches[fLastPipeline].lookup(fLastIndex).fGpuData;
296         commandList->bindUniformBuffer(binding, slot);
297     }
298 
299 private:
300     // Access first by pipeline index. The final UniformCache::Index is either used to select the
301     // BindBufferInfo for a draw using UBOs, or it's the real index into a packed array of uniforms
302     // in a storage buffer object (whose binding is stored in index 0).
303     TArray<UniformCache> fPerPipelineCaches;
304 
305     const bool fUseStorageBuffers;
306 
307     GraphicsPipelineCache::Index fLastPipeline = GraphicsPipelineCache::kInvalidIndex;
308     UniformCache::Index fLastIndex = UniformCache::kInvalidIndex;
309 };
310 
311 class GradientBufferTracker {
312 public:
writeData(SkSpan<const float> gradData,DrawBufferManager * bufferMgr)313     bool writeData(SkSpan<const float> gradData, DrawBufferManager* bufferMgr) {
314         if (gradData.empty()) {
315             return true;
316         }
317 
318         auto [writer, bufferInfo] = bufferMgr->getSsboWriter(gradData.size_bytes());
319 
320         if (!writer) {
321             return false;
322         }
323 
324         writer.write(gradData.data(), gradData.size_bytes());
325 
326         fBufferInfo.fBuffer = bufferInfo.fBuffer;
327         fBufferInfo.fOffset = bufferInfo.fOffset;
328         fBufferInfo.fBindingSize = gradData.size_bytes();
329         fHasData = true;
330 
331         return true;
332     }
333 
bindIfNeeded(DrawPassCommands::List * commandList) const334     void bindIfNeeded(DrawPassCommands::List* commandList) const {
335         if (fHasData) {
336             commandList->bindUniformBuffer(fBufferInfo, UniformSlot::kGradient);
337         }
338     }
339 
340 private:
341     BindUniformBufferInfo fBufferInfo;
342     bool fHasData = false;
343 };
344 
345 } // namespace
346 
347 ///////////////////////////////////////////////////////////////////////////////////////////////////
348 
349 /**
350  * Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's
351  * Renderer), which can be sorted independently. Each (step, draw) pair produces its own SortKey.
352  *
353  * The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic binds
354  * within a pipeline, while still respecting the overall painter's order. This decreases the number
355  * of low-level draw commands in a command buffer and increases the size of those, allowing the GPU
356  * to operate more efficiently and have fewer bubbles within its own instruction stream.
357  *
358  * The Draw's CompresssedPaintersOrder and DisjointStencilINdex represent the most significant bits
359  * of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline
360  * description is encoded in two steps:
361  *  1. The index of the RenderStep packed in the high bits to ensure each step for a draw is
362  *     ordered correctly.
363  *  2. An index into a cache of pipeline descriptions is used to encode the identity of the
364  *     pipeline (SortKeys that differ in the bits from #1 necessarily would have different
365  *     descriptions, but then the specific ordering of the RenderSteps isn't enforced).
366  * Last, the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass.
367  * This allows the SortKey to cluster draw steps that have both a compatible pipeline and do not
368  * require rebinding uniform data or other state (e.g. scissor). Since the uniform data index and
369  * the pipeline description index are packed into indices and not actual pointers, a given SortKey
370  * is only valid for the a specific DrawList->DrawPass conversion.
371  */
372 class DrawPass::SortKey {
373 public:
SortKey(const DrawList::Draw * draw,int renderStep,GraphicsPipelineCache::Index pipelineIndex,UniformCache::Index geomUniformIndex,UniformCache::Index shadingUniformIndex,TextureBindingCache::Index textureBindingIndex)374     SortKey(const DrawList::Draw* draw,
375             int renderStep,
376             GraphicsPipelineCache::Index pipelineIndex,
377             UniformCache::Index geomUniformIndex,
378             UniformCache::Index shadingUniformIndex,
379             TextureBindingCache::Index textureBindingIndex)
380         : fPipelineKey(ColorDepthOrderField::set(draw->fDrawParams.order().paintOrder().bits()) |
381                        StencilIndexField::set(draw->fDrawParams.order().stencilIndex().bits())  |
382                        RenderStepField::set(static_cast<uint32_t>(renderStep))                  |
383                        PipelineField::set(pipelineIndex))
384         , fUniformKey(GeometryUniformField::set(geomUniformIndex)   |
385                       ShadingUniformField::set(shadingUniformIndex) |
386                       TextureBindingsField::set(textureBindingIndex))
387         , fDraw(draw) {
388         SkASSERT(pipelineIndex < GraphicsPipelineCache::kInvalidIndex);
389         SkASSERT(renderStep <= draw->fRenderer->numRenderSteps());
390     }
391 
operator <(const SortKey & k) const392     bool operator<(const SortKey& k) const {
393         return fPipelineKey < k.fPipelineKey ||
394                (fPipelineKey == k.fPipelineKey && fUniformKey < k.fUniformKey);
395     }
396 
renderStep() const397     const RenderStep& renderStep() const {
398         return fDraw->fRenderer->step(RenderStepField::get(fPipelineKey));
399     }
400 
draw() const401     const DrawList::Draw& draw() const { return *fDraw; }
402 
pipelineIndex() const403     GraphicsPipelineCache::Index pipelineIndex() const {
404         return PipelineField::get(fPipelineKey);
405     }
geometryUniformIndex() const406     UniformCache::Index geometryUniformIndex() const {
407         return GeometryUniformField::get(fUniformKey);
408     }
shadingUniformIndex() const409     UniformCache::Index shadingUniformIndex() const {
410         return ShadingUniformField::get(fUniformKey);
411     }
textureBindingIndex() const412     TextureBindingCache::Index textureBindingIndex() const {
413         return TextureBindingsField::get(fUniformKey);
414     }
415 
416 private:
417     // Fields are ordered from most-significant to least when sorting by 128-bit value.
418     // NOTE: We don't use C++ bit fields because field ordering is implementation defined and we
419     // need to sort consistently.
420     using ColorDepthOrderField = Bitfield<16, 48>; // sizeof(CompressedPaintersOrder)
421     using StencilIndexField    = Bitfield<16, 32>; // sizeof(DisjointStencilIndex)
422     using RenderStepField      = Bitfield<2,  30>; // bits >= log2(Renderer::kMaxRenderSteps)
423     using PipelineField        = Bitfield<30, 0>;  // bits >= log2(max total steps in draw list)
424     uint64_t fPipelineKey;
425 
426     // The uniform/texture index fields need 1 extra bit to encode "no-data". Values that are
427     // greater than or equal to 2^(bits-1) represent "no-data", while values between
428     // [0, 2^(bits-1)-1] can access data arrays without extra logic.
429     using GeometryUniformField = Bitfield<17, 47>; // bits >= 1+log2(max total steps)
430     using ShadingUniformField  = Bitfield<17, 30>; // bits >= 1+log2(max total steps)
431     using TextureBindingsField = Bitfield<30, 0>;  // bits >= 1+log2(max total steps)
432     uint64_t fUniformKey;
433 
434     // Backpointer to the draw that produced the sort key
435     const DrawList::Draw* fDraw;
436 
437     static_assert(ColorDepthOrderField::kBits >= sizeof(CompressedPaintersOrder));
438     static_assert(StencilIndexField::kBits    >= sizeof(DisjointStencilIndex));
439     static_assert(RenderStepField::kBits      >= SkNextLog2_portable(Renderer::kMaxRenderSteps));
440     static_assert(PipelineField::kBits        >= SkNextLog2_portable(DrawList::kMaxRenderSteps));
441     static_assert(GeometryUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
442     static_assert(ShadingUniformField::kBits  >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
443     static_assert(TextureBindingsField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
444 };
445 
446 ///////////////////////////////////////////////////////////////////////////////////////////////////
447 
DrawPass(sk_sp<TextureProxy> target,std::pair<LoadOp,StoreOp> ops,std::array<float,4> clearColor)448 DrawPass::DrawPass(sk_sp<TextureProxy> target,
449                    std::pair<LoadOp, StoreOp> ops,
450                    std::array<float, 4> clearColor)
451         : fTarget(std::move(target))
452         , fBounds(SkIRect::MakeEmpty())
453         , fOps(ops)
454         , fClearColor(clearColor) {}
455 
456 DrawPass::~DrawPass() = default;
457 
Make(Recorder * recorder,std::unique_ptr<DrawList> draws,sk_sp<TextureProxy> target,const SkImageInfo & targetInfo,std::pair<LoadOp,StoreOp> ops,std::array<float,4> clearColor,sk_sp<TextureProxy> dstCopy,SkIPoint dstCopyOffset)458 std::unique_ptr<DrawPass> DrawPass::Make(Recorder* recorder,
459                                          std::unique_ptr<DrawList> draws,
460                                          sk_sp<TextureProxy> target,
461                                          const SkImageInfo& targetInfo,
462                                          std::pair<LoadOp, StoreOp> ops,
463                                          std::array<float, 4> clearColor,
464                                          sk_sp<TextureProxy> dstCopy,
465                                          SkIPoint dstCopyOffset) {
466     // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
467     // its size should be done with care and good reason. The performance of sorting the keys is
468     // heavily tied to the total size.
469     //
470     // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
471     // 16 bytes. There are several ways this could be done if necessary:
472     //  - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
473     //    the uniform data => 8 bytes of key, 8 bytes of pointer.
474     //  - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
475     //    4 low bits from the Draw* pointer since it's 16 byte aligned.
476     //  - Compact the Draw* to an index into the original collection, although that has extra
477     //    indirection and does not work as well with SkTBlockList.
478     // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
479     // than an 8 byte key and unmodified pointer.
480     static_assert(sizeof(DrawPass::SortKey) ==
481                   SkAlignTo(16 + sizeof(void*), alignof(DrawPass::SortKey)));
482 
483     TRACE_EVENT1("skia.gpu", TRACE_FUNC, "draw count", draws->fDraws.count());
484 
485     // The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass
486     // is returned from Make(), it is considered immutable.
487     std::unique_ptr<DrawPass> drawPass(new DrawPass(target, ops, clearColor));
488 
489     Rect passBounds = Rect::InfiniteInverted();
490 
491     // We don't expect the uniforms from the renderSteps to reappear multiple times across a
492     // recorder's lifetime so we only de-dupe them w/in a given DrawPass.
493     UniformDataCache geometryUniformDataCache;
494     TextureDataCache* textureDataCache = recorder->priv().textureDataCache();
495     DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager();
496     if (bufferMgr->hasMappingFailed()) {
497         SKGPU_LOG_W("Buffer mapping has already failed; dropping draw pass!");
498         return nullptr;
499     }
500     // Ensure there's a destination copy if required
501     if (!draws->dstCopyBounds().isEmptyNegativeOrNaN() && !dstCopy) {
502         SKGPU_LOG_W("Failed to copy destination for reading. Dropping draw pass!");
503         return nullptr;
504     }
505 
506     GraphicsPipelineCache pipelineCache;
507 
508     // Geometry uniforms are currently always UBO-backed.
509     const bool useStorageBuffers = recorder->priv().caps()->storageBufferPreferred();
510     const ResourceBindingRequirements& bindingReqs =
511             recorder->priv().caps()->resourceBindingRequirements();
512     Layout uniformLayout =
513             useStorageBuffers ? bindingReqs.fStorageBufferLayout : bindingReqs.fUniformBufferLayout;
514 
515     UniformTracker geometryUniformTracker(useStorageBuffers);
516     UniformTracker shadingUniformTracker(useStorageBuffers);
517     TextureBindingTracker textureBindingTracker;
518     GradientBufferTracker gradientBufferTracker;
519 
520     ShaderCodeDictionary* dict = recorder->priv().shaderCodeDictionary();
521     PaintParamsKeyBuilder builder(dict);
522 
523     // The initial layout we pass here is not important as it will be re-assigned when writing
524     // shading and geometry uniforms below.
525     PipelineDataGatherer gatherer(recorder->priv().caps(), uniformLayout);
526 
527     std::vector<SortKey> keys;
528     keys.reserve(draws->renderStepCount());
529 
530     for (const DrawList::Draw& draw : draws->fDraws.items()) {
531         // If we have two different descriptors, such that the uniforms from the PaintParams can be
532         // bound independently of those used by the rest of the RenderStep, then we can upload now
533         // and remember the location for re-use on any RenderStep that does shading.
534         UniquePaintParamsID shaderID;
535         const UniformDataBlock* shadingUniforms = nullptr;
536         const TextureDataBlock* paintTextures = nullptr;
537 
538         if (draw.fPaintParams.has_value()) {
539             sk_sp<TextureProxy> curDst =
540                     draw.fPaintParams->dstReadRequirement() == DstReadRequirement::kTextureCopy
541                             ? dstCopy
542                             : nullptr;
543             std::tie(shaderID, shadingUniforms, paintTextures) =
544                     ExtractPaintData(recorder,
545                                      &gatherer,
546                                      &builder,
547                                      uniformLayout,
548                                      draw.fDrawParams.transform(),
549                                      draw.fPaintParams.value(),
550                                      draw.fDrawParams.geometry(),
551                                      curDst,
552                                      dstCopyOffset,
553                                      targetInfo.colorInfo());
554         } // else depth-only
555 
556         for (int stepIndex = 0; stepIndex < draw.fRenderer->numRenderSteps(); ++stepIndex) {
557             const RenderStep* const step = draw.fRenderer->steps()[stepIndex];
558             const bool performsShading = draw.fPaintParams.has_value() && step->performsShading();
559 
560             GraphicsPipelineCache::Index pipelineIndex = pipelineCache.insert(
561                     {step, performsShading ? shaderID : UniquePaintParamsID::InvalidID()});
562             auto [geometryUniforms, stepTextures] = ExtractRenderStepData(&geometryUniformDataCache,
563                                                                           textureDataCache,
564                                                                           &gatherer,
565                                                                           uniformLayout,
566                                                                           step,
567                                                                           draw.fDrawParams);
568 
569             UniformCache::Index geomUniformIndex = geometryUniformTracker.trackUniforms(
570                     pipelineIndex, geometryUniforms);
571             UniformCache::Index shadingUniformIndex = shadingUniformTracker.trackUniforms(
572                     pipelineIndex, performsShading ? shadingUniforms : nullptr);
573             TextureBindingCache::Index textureIndex = textureBindingTracker.trackTextures(
574                     performsShading ? paintTextures : nullptr, stepTextures);
575 
576             keys.push_back({&draw, stepIndex, pipelineIndex,
577                             geomUniformIndex, shadingUniformIndex, textureIndex});
578         }
579 
580         passBounds.join(draw.fDrawParams.clip().drawBounds());
581         drawPass->fDepthStencilFlags |= draw.fRenderer->depthStencilFlags();
582         drawPass->fRequiresMSAA |= draw.fRenderer->requiresMSAA();
583     }
584 
585     if (!geometryUniformTracker.writeUniforms(bufferMgr) ||
586         !shadingUniformTracker.writeUniforms(bufferMgr) ||
587         !gradientBufferTracker.writeData(gatherer.gradientBufferData(), bufferMgr)) {
588         // The necessary uniform data couldn't be written to the GPU, so the DrawPass is invalid.
589         // Early out now since the next Recording snap will fail.
590         return nullptr;
591     }
592 
593     // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
594     // algorithms that approach O(n) in that condition may be favorable. Alternatively, could
595     // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
596     // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
597     // vs. algorithms that require an extra O(n) storage.
598     // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
599     // bugs in the DrawOrder determination code?
600     std::sort(keys.begin(), keys.end());
601 
602     // Used to record vertex/instance data, buffer binds, and draw calls
603     DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr);
604     GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex;
605     SkIRect lastScissor = SkIRect::MakeSize(targetInfo.dimensions());
606 
607     SkASSERT(drawPass->fTarget->isFullyLazy() ||
608              SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor));
609     drawPass->fCommandList.setScissor(lastScissor);
610 
611     // All large gradients pack their data into a single buffer throughout the draw pass,
612     // therefore the gradient buffer only needs to be bound once.
613     gradientBufferTracker.bindIfNeeded(&drawPass->fCommandList);
614 
615     for (const SortKey& key : keys) {
616         const DrawList::Draw& draw = key.draw();
617         const RenderStep& renderStep = key.renderStep();
618 
619         const bool pipelineChange = key.pipelineIndex() != lastPipeline;
620 
621         const bool geomBindingChange    = geometryUniformTracker.setCurrentUniforms(
622                 key.pipelineIndex(), key.geometryUniformIndex());
623         const bool shadingBindingChange  = shadingUniformTracker.setCurrentUniforms(
624                 key.pipelineIndex(), key.shadingUniformIndex());
625         const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings(
626                 key.textureBindingIndex());
627         const SkIRect* newScissor        = draw.fDrawParams.clip().scissor() != lastScissor ?
628                 &draw.fDrawParams.clip().scissor() : nullptr;
629 
630         const bool stateChange = geomBindingChange ||
631                                  shadingBindingChange ||
632                                  textureBindingsChange ||
633                                  SkToBool(newScissor);
634 
635         // Update DrawWriter *before* we actually change any state so that accumulated draws from
636         // the previous state use the proper state.
637         if (pipelineChange) {
638             drawWriter.newPipelineState(renderStep.primitiveType(),
639                                         renderStep.vertexStride(),
640                                         renderStep.instanceStride());
641         } else if (stateChange) {
642             drawWriter.newDynamicState();
643         }
644 
645         // Make state changes before accumulating new draw data
646         if (pipelineChange) {
647             drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex());
648             lastPipeline = key.pipelineIndex();
649         }
650         if (stateChange) {
651             if (geomBindingChange) {
652                 geometryUniformTracker.bindUniforms(UniformSlot::kRenderStep,
653                                                     &drawPass->fCommandList);
654             }
655             if (shadingBindingChange) {
656                 shadingUniformTracker.bindUniforms(UniformSlot::kPaint, &drawPass->fCommandList);
657             }
658             if (textureBindingsChange) {
659                 textureBindingTracker.bindTextures(&drawPass->fCommandList);
660             }
661             if (newScissor) {
662                 drawPass->fCommandList.setScissor(*newScissor);
663                 lastScissor = *newScissor;
664             }
665         }
666 
667         UniformCache::Index geometrySsboIndex =
668                 (key.geometryUniformIndex() == UniformCache::kInvalidIndex)
669                         ? 0
670                         : key.geometryUniformIndex();
671         UniformCache::Index shadingSsboIndex =
672                 (key.shadingUniformIndex() == UniformCache::kInvalidIndex)
673                         ? 0
674                         : key.shadingUniformIndex();
675         skvx::ushort2 ssboIndices = {SkToU16(geometrySsboIndex), SkToU16(shadingSsboIndex)};
676         renderStep.writeVertices(&drawWriter, draw.fDrawParams, ssboIndices);
677 
678         if (bufferMgr->hasMappingFailed()) {
679             SKGPU_LOG_W("Failed to write necessary vertex/instance data for DrawPass, dropping!");
680             return nullptr;
681         }
682     }
683     // Finish recording draw calls for any collected data at the end of the loop
684     drawWriter.flush();
685 
686     drawPass->fBounds = passBounds.roundOut().asSkIRect();
687 
688     drawPass->fPipelineDescs   = pipelineCache.detach();
689     drawPass->fSamplerDescs    = textureBindingTracker.detachSamplers();
690     drawPass->fSampledTextures = textureBindingTracker.detachTextures();
691 
692     TRACE_COUNTER1("skia.gpu", "# pipelines", drawPass->fPipelineDescs.size());
693     TRACE_COUNTER1("skia.gpu", "# textures", drawPass->fSampledTextures.size());
694     TRACE_COUNTER1("skia.gpu", "# commands", drawPass->fCommandList.count());
695 
696     return drawPass;
697 }
698 
prepareResources(ResourceProvider * resourceProvider,const RuntimeEffectDictionary * runtimeDict,const RenderPassDesc & renderPassDesc)699 bool DrawPass::prepareResources(ResourceProvider* resourceProvider,
700                                 const RuntimeEffectDictionary* runtimeDict,
701                                 const RenderPassDesc& renderPassDesc) {
702     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
703 
704     fFullPipelines.reserve(fFullPipelines.size() + fPipelineDescs.size());
705     for (const GraphicsPipelineDesc& pipelineDesc : fPipelineDescs) {
706         auto pipeline = resourceProvider->findOrCreateGraphicsPipeline(runtimeDict,
707                                                                        pipelineDesc,
708                                                                        renderPassDesc);
709         if (!pipeline) {
710             SKGPU_LOG_W("Failed to create GraphicsPipeline for draw in RenderPass. Dropping pass!");
711             return false;
712         }
713         fFullPipelines.push_back(std::move(pipeline));
714     }
715     // The DrawPass may be long lived on a Recording and we no longer need the GraphicPipelineDescs
716     // once we've created pipelines, so we drop the storage for them here.
717     fPipelineDescs.clear();
718 
719 #if defined(SK_DEBUG)
720     for (int i = 0; i < fSampledTextures.size(); ++i) {
721         // It should not have been possible to draw an Image that has an invalid texture info
722         SkASSERT(fSampledTextures[i]->textureInfo().isValid());
723         // Tasks should have been ordered to instantiate any scratch textures already, or any
724         // client-owned image will have been instantiated at creation.
725         SkASSERTF(fSampledTextures[i]->isInstantiated() ||
726                   fSampledTextures[i]->isLazy(),
727                   "proxy label = %s", fSampledTextures[i]->label());
728     }
729 #endif
730 
731     fSamplers.reserve(fSamplers.size() + fSamplerDescs.size());
732     for (int i = 0; i < fSamplerDescs.size(); ++i) {
733         sk_sp<Sampler> sampler = resourceProvider->findOrCreateCompatibleSampler(fSamplerDescs[i]);
734         if (!sampler) {
735             SKGPU_LOG_W("Failed to create sampler. Will not create renderpass!");
736             return false;
737         }
738         fSamplers.push_back(std::move(sampler));
739     }
740     // The DrawPass may be long lived on a Recording and we no longer need the SamplerDescs
741     // once we've created Samplers, so we drop the storage for them here.
742     fSamplerDescs.clear();
743 
744     return true;
745 }
746 
addResourceRefs(CommandBuffer * commandBuffer) const747 void DrawPass::addResourceRefs(CommandBuffer* commandBuffer) const {
748     for (int i = 0; i < fFullPipelines.size(); ++i) {
749         commandBuffer->trackResource(fFullPipelines[i]);
750     }
751     for (int i = 0; i < fSampledTextures.size(); ++i) {
752         commandBuffer->trackCommandBufferResource(fSampledTextures[i]->refTexture());
753     }
754     for (int i = 0; i < fSamplers.size(); ++i) {
755         commandBuffer->trackResource(fSamplers[i]);
756     }
757 }
758 
getTexture(size_t index) const759 const Texture* DrawPass::getTexture(size_t index) const {
760     SkASSERT(index < SkToSizeT(fSampledTextures.size()));
761     SkASSERT(fSampledTextures[index]);
762     SkASSERT(fSampledTextures[index]->texture());
763     return fSampledTextures[index]->texture();
764 }
getSampler(size_t index) const765 const Sampler* DrawPass::getSampler(size_t index) const {
766     SkASSERT(index < SkToSizeT(fSamplers.size()));
767     SkASSERT(fSamplers[index]);
768     return fSamplers[index].get();
769 }
770 
771 } // namespace skgpu::graphite
772