• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/gpu/graphite/DrawPass.h"
9 
10 #include "include/gpu/graphite/GraphiteTypes.h"
11 #include "include/gpu/graphite/Recorder.h"
12 #include "src/gpu/graphite/Buffer.h"
13 #include "src/gpu/graphite/BufferManager.h"
14 #include "src/gpu/graphite/Caps.h"
15 #include "src/gpu/graphite/ContextPriv.h"
16 #include "src/gpu/graphite/ContextUtils.h"
17 #include "src/gpu/graphite/DrawContext.h"
18 #include "src/gpu/graphite/DrawList.h"
19 #include "src/gpu/graphite/DrawWriter.h"
20 #include "src/gpu/graphite/GlobalCache.h"
21 #include "src/gpu/graphite/GraphicsPipeline.h"
22 #include "src/gpu/graphite/GraphicsPipelineDesc.h"
23 #include "src/gpu/graphite/Log.h"
24 #include "src/gpu/graphite/PaintParamsKey.h"
25 #include "src/gpu/graphite/PipelineData.h"
26 #include "src/gpu/graphite/PipelineDataCache.h"
27 #include "src/gpu/graphite/RecorderPriv.h"
28 #include "src/gpu/graphite/Renderer.h"
29 #include "src/gpu/graphite/ResourceProvider.h"
30 #include "src/gpu/graphite/Sampler.h"
31 #include "src/gpu/graphite/Texture.h"
32 #include "src/gpu/graphite/TextureProxy.h"
33 #include "src/gpu/graphite/UniformManager.h"
34 #include "src/gpu/graphite/geom/BoundsManager.h"
35 
36 #include "src/base/SkMathPriv.h"
37 #include "src/base/SkTBlockList.h"
38 
39 #include <algorithm>
40 #include <unordered_map>
41 
42 namespace skgpu::graphite {
43 
44 namespace {
45 
46 // Helper to manage packed fields within a uint64_t
47 template <uint64_t Bits, uint64_t Offset>
48 struct Bitfield {
49     static constexpr uint64_t kMask = ((uint64_t) 1 << Bits) - 1;
50     static constexpr uint64_t kOffset = Offset;
51     static constexpr uint64_t kBits = Bits;
52 
getskgpu::graphite::__anon3ddbbdf80111::Bitfield53     static uint32_t get(uint64_t v) { return static_cast<uint32_t>((v >> kOffset) & kMask); }
setskgpu::graphite::__anon3ddbbdf80111::Bitfield54     static uint64_t set(uint32_t v) { return (v & kMask) << kOffset; }
55 };
56 
57 // This class maps objects to a dense index which can then be used to look them up later
58 template <typename T, typename V = T, typename C = V>
59 class DenseBiMap {
60 public:
61     using Index = uint32_t;
62 
63     // See note below in GeometryUniformField. This value can be round-tripped within the SortKey
64     // packing for all fields but will not be produced when recording actual draw data.
65     static constexpr Index kInvalidIndex{1 << SkNextLog2_portable(Renderer::kMaxRenderSteps *
66                                                                   DrawList::kMaxDraws)};
67 
empty() const68     bool empty() const { return fIndexToData.empty(); }
size() const69     size_t size() const { return fIndexToData.size(); }
70 
insert(const T & data)71     Index insert(const T& data) {
72         Index* index = fDataToIndex.find(data);
73         if (!index) {
74             SkASSERT(SkToU32(fIndexToData.size()) < kInvalidIndex - 1);
75             index = fDataToIndex.set(data, (Index) fIndexToData.size());
76             fIndexToData.push_back(C{data});
77         }
78         return *index;
79     }
80 
lookup(Index index)81     const V& lookup(Index index) {
82         SkASSERT(index < kInvalidIndex);
83         return fIndexToData[index];
84     }
85 
data()86     SkSpan<V> data() { return {fIndexToData.data(), fIndexToData.size()}; }
87 
detach()88     SkTArray<V>&& detach() { return std::move(fIndexToData); }
89 
90 private:
91     SkTHashMap<T, Index> fDataToIndex;
92     SkTArray<V> fIndexToData;
93 };
94 
95 // Tracks uniform data on the CPU and then its transition to storage in a GPU buffer (ubo or ssbo).
96 struct CpuOrGpuData {
97     union {
98         const UniformDataBlock* fCpuData;
99         BindBufferInfo fGpuData;
100     };
101 
102     // Can only start from CPU data
CpuOrGpuDataskgpu::graphite::__anon3ddbbdf80111::CpuOrGpuData103     CpuOrGpuData(const UniformDataBlock* cpuData) : fCpuData(cpuData) {}
104 };
105 
106 // Tracks the combination of textures from the paint and from the RenderStep to describe the full
107 // binding that needs to be in the command list.
108 struct TextureBinding {
109     const TextureDataBlock* fPaintTextures;
110     const TextureDataBlock* fStepTextures;
111 
operator ==skgpu::graphite::__anon3ddbbdf80111::TextureBinding112     bool operator==(const TextureBinding& other) const {
113         return fPaintTextures == other.fPaintTextures &&
114                fStepTextures == other.fStepTextures;
115     }
operator !=skgpu::graphite::__anon3ddbbdf80111::TextureBinding116     bool operator!=(const TextureBinding& other) const { return !(*this == other); }
117 
numTexturesskgpu::graphite::__anon3ddbbdf80111::TextureBinding118     int numTextures() const {
119         return (fPaintTextures ? fPaintTextures->numTextures() : 0) +
120                (fStepTextures ? fStepTextures->numTextures() : 0);
121     }
122 };
123 
124 using UniformSsboCache = DenseBiMap<const UniformDataBlock*, CpuOrGpuData>;
125 using TextureBindingCache = DenseBiMap<TextureBinding>;
126 using GraphicsPipelineCache = DenseBiMap<GraphicsPipelineDesc>;
127 
128 // Automatically merges and manages texture bindings and uniform bindings sourced from either the
129 // paint or the RenderStep. Tracks the bound state based on last-provided unique index to write
130 // Bind commands to a CommandList when necessary.
131 class TextureBindingTracker {
132 public:
trackTextures(const TextureDataBlock * paintTextures,const TextureDataBlock * stepTextures)133     TextureBindingCache::Index trackTextures(const TextureDataBlock* paintTextures,
134                                              const TextureDataBlock* stepTextures) {
135         if (!paintTextures && !stepTextures) {
136             return TextureBindingCache::kInvalidIndex;
137         }
138         return fBindingCache.insert({paintTextures, stepTextures});
139     }
140 
setCurrentTextureBindings(TextureBindingCache::Index bindingIndex)141     bool setCurrentTextureBindings(TextureBindingCache::Index bindingIndex) {
142         if (bindingIndex < TextureBindingCache::kInvalidIndex && fLastIndex != bindingIndex) {
143             fLastIndex = bindingIndex;
144             return true;
145         }
146         // No binding change
147         return false;
148     }
149 
bindTextures(DrawPassCommands::List * commandList)150     void bindTextures(DrawPassCommands::List* commandList) {
151         SkASSERT(fLastIndex < TextureBindingCache::kInvalidIndex);
152         const TextureBinding& binding = fBindingCache.lookup(fLastIndex);
153 
154         auto [texIndices, samplerIndices] =
155                 commandList->bindDeferredTexturesAndSamplers(binding.numTextures());
156 
157         if (binding.fPaintTextures) {
158             for (int i = 0; i < binding.fPaintTextures->numTextures(); ++i) {
159                 auto [tex, sampler] = binding.fPaintTextures->texture(i);
160                 *texIndices++     = fProxyCache.insert(tex.get());
161                 *samplerIndices++ = fSamplerCache.insert(sampler);
162             }
163         }
164         if (binding.fStepTextures) {
165             for (int i = 0; i < binding.fStepTextures->numTextures(); ++i) {
166                 auto [tex, sampler] = binding.fStepTextures->texture(i);
167                 *texIndices++     = fProxyCache.insert(tex.get());
168                 *samplerIndices++ = fSamplerCache.insert(sampler);
169             }
170         }
171     }
172 
detachTextures()173     SkTArray<sk_sp<TextureProxy>>&& detachTextures() { return fProxyCache.detach(); }
detachSamplers()174     SkTArray<SamplerDesc>&& detachSamplers() { return fSamplerCache.detach(); }
175 
176 private:
177     struct ProxyRef {
178         const TextureProxy* fProxy;
operator sk_sp<TextureProxy>skgpu::graphite::__anon3ddbbdf80111::TextureBindingTracker::ProxyRef179         operator sk_sp<TextureProxy>() const { return sk_ref_sp(fProxy); }
180     };
181     using TextureProxyCache = DenseBiMap<const TextureProxy*, sk_sp<TextureProxy>, ProxyRef>;
182     using SamplerDescCache = DenseBiMap<SamplerDesc>;
183 
184     TextureBindingCache fBindingCache;
185 
186     TextureProxyCache fProxyCache;
187     SamplerDescCache fSamplerCache;
188 
189     TextureBindingCache::Index fLastIndex = TextureBindingCache::kInvalidIndex;
190 };
191 
192 // Collects and writes uniform data either to uniform buffers or to shared storage buffers, and
193 // tracks when bindings need to change between draws.
194 class UniformSsboTracker {
195 public:
UniformSsboTracker(bool useStorageBuffers)196     UniformSsboTracker(bool useStorageBuffers) : fUseStorageBuffers(useStorageBuffers) {}
197 
198     // Maps a given {pipeline index, uniform data cache index} pair to an SSBO index within the
199     // pipeline's accumulated array of uniforms.
trackUniforms(GraphicsPipelineCache::Index pipelineIndex,const UniformDataBlock * cpuData)200     UniformSsboCache::Index trackUniforms(GraphicsPipelineCache::Index pipelineIndex,
201                                           const UniformDataBlock* cpuData) {
202         if (!cpuData) {
203             return UniformSsboCache::kInvalidIndex;
204         }
205 
206         if (pipelineIndex >= SkToU32(fPerPipelineCaches.size())) {
207             fPerPipelineCaches.resize(pipelineIndex + 1);
208         }
209 
210         return fPerPipelineCaches[pipelineIndex].insert(cpuData);
211     }
212 
213     // Writes all tracked uniform data into buffers, tracking the bindings for the written buffers
214     // by GraphicsPipelineCache::Index and possibly the UniformSsboCache::Index (when not using
215     // SSBOs). When using SSBos, the buffer is the same for all UniformSsboCache::Indices that share
216     // the same pipeline (and is stored in index 0).
writeUniforms(DrawBufferManager * bufferMgr)217     void writeUniforms(DrawBufferManager* bufferMgr) {
218         for (UniformSsboCache& cache : fPerPipelineCaches) {
219             if (cache.empty()) {
220                 continue;
221             }
222             // All data blocks for the same pipeline have the same size, so peek the first
223             // to determine the total buffer size
224             size_t udbSize = cache.lookup(0).fCpuData->size();
225             size_t udbDataSize = udbSize;
226             if (!fUseStorageBuffers) {
227                 udbSize = bufferMgr->alignUniformBlockSize(udbSize);
228             }
229             auto [writer, bufferInfo] =
230                     fUseStorageBuffers ? bufferMgr->getSsboWriter(udbSize * cache.size())
231                                        : bufferMgr->getUniformWriter(udbSize * cache.size());
232 
233             for (CpuOrGpuData& dataBlock : cache.data()) {
234                 SkASSERT(dataBlock.fCpuData->size() == udbDataSize);
235                 writer.write(dataBlock.fCpuData->data(), udbDataSize);
236                 // Swap from tracking the CPU data to the location of the GPU data
237                 dataBlock.fGpuData = bufferInfo;
238                 if (!fUseStorageBuffers) {
239                     bufferInfo.fOffset += udbSize;
240                     writer.skipBytes(udbSize - udbDataSize);
241                 } // else keep bufferInfo pointing to the start of the array
242             }
243         }
244     }
245 
246     // Updates the current tracked pipeline and ssbo index and returns whether or not bindBuffers()
247     // needs to be called, depending on if 'fUseStorageBuffers' is true or not.
setCurrentUniforms(GraphicsPipelineCache::Index pipelineIndex,UniformSsboCache::Index ssboIndex)248     bool setCurrentUniforms(GraphicsPipelineCache::Index pipelineIndex,
249                             UniformSsboCache::Index ssboIndex) {
250         if (ssboIndex >= UniformSsboCache::kInvalidIndex) {
251             return false;
252         }
253         SkASSERT(pipelineIndex < SkToU32(fPerPipelineCaches.size()) &&
254                  ssboIndex < fPerPipelineCaches[pipelineIndex].size());
255 
256         if (fUseStorageBuffers) {
257             ssboIndex = 0; // The specific index has no effect on binding
258         }
259         if (fLastPipeline != pipelineIndex || fLastIndex != ssboIndex) {
260             fLastPipeline = pipelineIndex;
261             fLastIndex = ssboIndex;
262             return true;
263         } else {
264             return false;
265         }
266     }
267 
268     // Binds a new uniform or storage buffer, based on most recently provided batch key and uniform
269     // data cache index.
bindUniforms(UniformSlot slot,DrawPassCommands::List * commandList)270     void bindUniforms(UniformSlot slot, DrawPassCommands::List* commandList) {
271         SkASSERT(fLastPipeline < GraphicsPipelineCache::kInvalidIndex &&
272                  fLastIndex < UniformSsboCache::kInvalidIndex);
273         SkASSERT(!fUseStorageBuffers || fLastIndex == 0);
274         const BindBufferInfo& binding =
275                 fPerPipelineCaches[fLastPipeline].lookup(fLastIndex).fGpuData;
276         commandList->bindUniformBuffer(binding, slot);
277     }
278 
279 private:
280     // Access first by pipeline index. The final UniformSsboCache::Index is either used to select
281     // the BindBufferInfo for a draw using UBOs, or it's the real index into a packed array of
282     // uniforms in a storage buffer object (whose binding is stored in index 0).
283     SkTArray<UniformSsboCache> fPerPipelineCaches;
284 
285     const bool fUseStorageBuffers;
286 
287     GraphicsPipelineCache::Index fLastPipeline = GraphicsPipelineCache::kInvalidIndex;
288     UniformSsboCache::Index fLastIndex = UniformSsboCache::kInvalidIndex;
289 };
290 
291 } // namespace
292 
293 ///////////////////////////////////////////////////////////////////////////////////////////////////
294 
295 /**
296  * Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's
297  * Renderer), which can be sorted independently. Each (step, draw) pair produces its own SortKey.
298  *
299  * The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic binds
300  * within a pipeline, while still respecting the overall painter's order. This decreases the number
301  * of low-level draw commands in a command buffer and increases the size of those, allowing the GPU
302  * to operate more efficiently and have fewer bubbles within its own instruction stream.
303  *
304  * The Draw's CompresssedPaintersOrder and DisjointStencilINdex represent the most significant bits
305  * of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline
306  * description is encoded in two steps:
307  *  1. The index of the RenderStep packed in the high bits to ensure each step for a draw is
308  *     ordered correctly.
309  *  2. An index into a cache of pipeline descriptions is used to encode the identity of the
310  *     pipeline (SortKeys that differ in the bits from #1 necessarily would have different
311  *     descriptions, but then the specific ordering of the RenderSteps isn't enforced).
312  * Last, the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass.
313  * This allows the SortKey to cluster draw steps that have both a compatible pipeline and do not
314  * require rebinding uniform data or other state (e.g. scissor). Since the uniform data index and
315  * the pipeline description index are packed into indices and not actual pointers, a given SortKey
316  * is only valid for the a specific DrawList->DrawPass conversion.
317  */
318 class DrawPass::SortKey {
319 public:
SortKey(const DrawList::Draw * draw,int renderStep,GraphicsPipelineCache::Index pipelineIndex,UniformSsboCache::Index geomSsboIndex,UniformSsboCache::Index shadingSsboIndex,TextureBindingCache::Index textureBindingIndex)320     SortKey(const DrawList::Draw* draw,
321             int renderStep,
322             GraphicsPipelineCache::Index pipelineIndex,
323             UniformSsboCache::Index geomSsboIndex,
324             UniformSsboCache::Index shadingSsboIndex,
325             TextureBindingCache::Index textureBindingIndex)
326         : fPipelineKey(ColorDepthOrderField::set(draw->fDrawParams.order().paintOrder().bits()) |
327                        StencilIndexField::set(draw->fDrawParams.order().stencilIndex().bits())  |
328                        RenderStepField::set(static_cast<uint32_t>(renderStep))                  |
329                        PipelineField::set(pipelineIndex))
330         , fUniformKey(GeometryUniformField::set(geomSsboIndex)   |
331                       ShadingUniformField::set(shadingSsboIndex) |
332                       TextureBindingsField::set(textureBindingIndex))
333         , fDraw(draw) {
334         SkASSERT(pipelineIndex < GraphicsPipelineCache::kInvalidIndex);
335         SkASSERT(renderStep <= draw->fRenderer->numRenderSteps());
336     }
337 
operator <(const SortKey & k) const338     bool operator<(const SortKey& k) const {
339         return fPipelineKey < k.fPipelineKey ||
340                (fPipelineKey == k.fPipelineKey && fUniformKey < k.fUniformKey);
341     }
342 
renderStep() const343     const RenderStep& renderStep() const {
344         return fDraw->fRenderer->step(RenderStepField::get(fPipelineKey));
345     }
346 
draw() const347     const DrawList::Draw& draw() const { return *fDraw; }
348 
pipelineIndex() const349     GraphicsPipelineCache::Index pipelineIndex() const {
350         return PipelineField::get(fPipelineKey);
351     }
geometrySsboIndex() const352     UniformSsboCache::Index geometrySsboIndex() const {
353         return GeometryUniformField::get(fUniformKey);
354     }
shadingSsboIndex() const355     UniformSsboCache::Index shadingSsboIndex() const {
356         return ShadingUniformField::get(fUniformKey);
357     }
textureBindingIndex() const358     TextureBindingCache::Index textureBindingIndex() const {
359         return TextureBindingsField::get(fUniformKey);
360     }
361 
362 private:
363     // Fields are ordered from most-significant to least when sorting by 128-bit value.
364     // NOTE: We don't use C++ bit fields because field ordering is implementation defined and we
365     // need to sort consistently.
366     using ColorDepthOrderField = Bitfield<16, 48>; // sizeof(CompressedPaintersOrder)
367     using StencilIndexField    = Bitfield<16, 32>; // sizeof(DisjointStencilIndex)
368     using RenderStepField      = Bitfield<2,  30>; // bits >= log2(Renderer::kMaxRenderSteps)
369     using PipelineField        = Bitfield<30, 0>;  // bits >= log2(max steps*DrawList::kMaxDraws)
370     uint64_t fPipelineKey;
371 
372     // The uniform/texture index fields need 1 extra bit to encode "no-data". Values that are
373     // greater than or equal to 2^(bits-1) represent "no-data", while values between
374     // [0, 2^(bits-1)-1] can access data arrays without extra logic.
375     using GeometryUniformField = Bitfield<22, 42>; // bits >= 1+log2(max steps * max draw count)
376     using ShadingUniformField  = Bitfield<21, 21>; // bits >= 1+log2(max steps * max draw count)
377     using TextureBindingsField = Bitfield<21, 0>;  // bits >= 1+log2(max steps * max draw count)
378     uint64_t fUniformKey;
379 
380     // Backpointer to the draw that produced the sort key
381     const DrawList::Draw* fDraw;
382 
383     static_assert(ColorDepthOrderField::kBits >= sizeof(CompressedPaintersOrder));
384     static_assert(StencilIndexField::kBits    >= sizeof(DisjointStencilIndex));
385     static_assert(RenderStepField::kBits      >= SkNextLog2_portable(Renderer::kMaxRenderSteps));
386     static_assert(PipelineField::kBits        >=
387                           SkNextLog2_portable(Renderer::kMaxRenderSteps * DrawList::kMaxDraws));
388     static_assert(GeometryUniformField::kBits >=
389                           1 + SkNextLog2_portable(Renderer::kMaxRenderSteps * DrawList::kMaxDraws));
390     static_assert(ShadingUniformField::kBits  >=
391                           1 + SkNextLog2_portable(Renderer::kMaxRenderSteps * DrawList::kMaxDraws));
392     static_assert(TextureBindingsField::kBits >=
393                           1 + SkNextLog2_portable(Renderer::kMaxRenderSteps * DrawList::kMaxDraws));
394 };
395 
396 ///////////////////////////////////////////////////////////////////////////////////////////////////
397 
DrawPass(sk_sp<TextureProxy> target,std::pair<LoadOp,StoreOp> ops,std::array<float,4> clearColor)398 DrawPass::DrawPass(sk_sp<TextureProxy> target,
399                    std::pair<LoadOp, StoreOp> ops,
400                    std::array<float, 4> clearColor)
401         : fTarget(std::move(target))
402         , fBounds(SkIRect::MakeEmpty())
403         , fOps(ops)
404         , fClearColor(clearColor) {}
405 
406 DrawPass::~DrawPass() = default;
407 
Make(Recorder * recorder,std::unique_ptr<DrawList> draws,sk_sp<TextureProxy> target,const SkImageInfo & targetInfo,std::pair<LoadOp,StoreOp> ops,std::array<float,4> clearColor)408 std::unique_ptr<DrawPass> DrawPass::Make(Recorder* recorder,
409                                          std::unique_ptr<DrawList> draws,
410                                          sk_sp<TextureProxy> target,
411                                          const SkImageInfo& targetInfo,
412                                          std::pair<LoadOp, StoreOp> ops,
413                                          std::array<float, 4> clearColor) {
414     // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
415     // its size should be done with care and good reason. The performance of sorting the keys is
416     // heavily tied to the total size.
417     //
418     // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
419     // 16 bytes. There are several ways this could be done if necessary:
420     //  - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
421     //    the uniform data => 8 bytes of key, 8 bytes of pointer.
422     //  - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
423     //    4 low bits from the Draw* pointer since it's 16 byte aligned.
424     //  - Compact the Draw* to an index into the original collection, although that has extra
425     //    indirection and does not work as well with SkTBlockList.
426     // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
427     // than an 8 byte key and unmodified pointer.
428     static_assert(sizeof(DrawPass::SortKey) == 16 + sizeof(void*));
429 
430     // The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass
431     // is returned from Make(), it is considered immutable.
432     std::unique_ptr<DrawPass> drawPass(new DrawPass(std::move(target), ops, clearColor));
433 
434     Rect passBounds = Rect::InfiniteInverted();
435 
436     // We don't expect the uniforms from the renderSteps to reappear multiple times across a
437     // recorder's lifetime so we only de-dupe them w/in a given DrawPass.
438     UniformDataCache geometryUniformDataCache;
439     TextureDataCache* textureDataCache = recorder->priv().textureDataCache();
440     DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager();
441 
442     GraphicsPipelineCache pipelineCache;
443 
444     // Geometry uniforms are currently always UBO-backed.
445     const ResourceBindingRequirements& bindingReqs =
446             recorder->priv().caps()->resourceBindingRequirements();
447     Layout geometryUniformLayout = bindingReqs.fUniformBufferLayout;
448     UniformSsboTracker geometrySsboTracker(/*useStorageBuffers=*/false);
449 
450     bool useStorageBuffers = recorder->priv().caps()->storageBufferPreferred();
451     Layout shadingUniformLayout =
452             useStorageBuffers ? bindingReqs.fStorageBufferLayout : bindingReqs.fUniformBufferLayout;
453     UniformSsboTracker shadingSsboTracker(useStorageBuffers);
454     TextureBindingTracker textureBindingTracker;
455 
456     ShaderCodeDictionary* dict = recorder->priv().shaderCodeDictionary();
457     PaintParamsKeyBuilder builder(dict);
458 
459     // The initial layout we pass here is not important as it will be re-assigned when writing
460     // shading and geometry uniforms below.
461     PipelineDataGatherer gatherer(shadingUniformLayout);
462 
463     std::vector<SortKey> keys;
464     keys.reserve(draws->renderStepCount());
465     for (const DrawList::Draw& draw : draws->fDraws.items()) {
466         // If we have two different descriptors, such that the uniforms from the PaintParams can be
467         // bound independently of those used by the rest of the RenderStep, then we can upload now
468         // and remember the location for re-use on any RenderStep that does shading.
469         UniquePaintParamsID shaderID;
470         const UniformDataBlock* shadingUniforms = nullptr;
471         const TextureDataBlock* paintTextures = nullptr;
472         if (draw.fPaintParams.has_value()) {
473             std::tie(shaderID, shadingUniforms, paintTextures) =
474                     ExtractPaintData(recorder,
475                                      &gatherer,
476                                      &builder,
477                                      shadingUniformLayout,
478                                      draw.fDrawParams.transform(),
479                                      draw.fPaintParams.value(),
480                                      targetInfo.colorInfo());
481         } // else depth-only
482 
483         for (int stepIndex = 0; stepIndex < draw.fRenderer->numRenderSteps(); ++stepIndex) {
484             const RenderStep* const step = draw.fRenderer->steps()[stepIndex];
485             const bool performsShading = draw.fPaintParams.has_value() && step->performsShading();
486 
487             GraphicsPipelineCache::Index pipelineIndex = pipelineCache.insert(
488                     {step, performsShading ? shaderID : UniquePaintParamsID::InvalidID()});
489             auto [geometryUniforms, stepTextures] = ExtractRenderStepData(&geometryUniformDataCache,
490                                                                           textureDataCache,
491                                                                           &gatherer,
492                                                                           geometryUniformLayout,
493                                                                           step,
494                                                                           draw.fDrawParams);
495 
496             UniformSsboCache::Index geomSsboIndex = geometrySsboTracker.trackUniforms(
497                     pipelineIndex, geometryUniforms);
498             UniformSsboCache::Index shadingSsboIndex = shadingSsboTracker.trackUniforms(
499                     pipelineIndex, performsShading ? shadingUniforms : nullptr);
500             TextureBindingCache::Index textureIndex = textureBindingTracker.trackTextures(
501                     performsShading ? paintTextures : nullptr, stepTextures);
502 
503             keys.push_back({&draw, stepIndex, pipelineIndex,
504                             geomSsboIndex, shadingSsboIndex, textureIndex});
505         }
506 
507         passBounds.join(draw.fDrawParams.clip().drawBounds());
508         drawPass->fDepthStencilFlags |= draw.fRenderer->depthStencilFlags();
509         drawPass->fRequiresMSAA |= draw.fRenderer->requiresMSAA();
510     }
511 
512     geometrySsboTracker.writeUniforms(bufferMgr);
513     shadingSsboTracker.writeUniforms(bufferMgr);
514 
515     // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
516     // algorithms that approach O(n) in that condition may be favorable. Alternatively, could
517     // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
518     // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
519     // vs. algorithms that require an extra O(n) storage.
520     // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
521     // bugs in the DrawOrder determination code?
522     std::sort(keys.begin(), keys.end());
523 
524     // Used to record vertex/instance data, buffer binds, and draw calls
525     DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr);
526     GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex;
527     SkIRect lastScissor = SkIRect::MakeSize(targetInfo.dimensions());
528 
529     SkASSERT(!drawPass->fTarget->isInstantiated() ||
530              SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor));
531     drawPass->fCommandList.setScissor(lastScissor);
532 
533     for (const SortKey& key : keys) {
534         const DrawList::Draw& draw = key.draw();
535         const RenderStep& renderStep = key.renderStep();
536 
537         const bool pipelineChange = key.pipelineIndex() != lastPipeline;
538 
539         const bool geomBindingChange    = geometrySsboTracker.setCurrentUniforms(
540                 key.pipelineIndex(), key.geometrySsboIndex());
541         const bool shadingBindingChange  = shadingSsboTracker.setCurrentUniforms(
542                 key.pipelineIndex(), key.shadingSsboIndex());
543         const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings(
544                 key.textureBindingIndex());
545         const SkIRect* newScissor        = draw.fDrawParams.clip().scissor() != lastScissor ?
546                 &draw.fDrawParams.clip().scissor() : nullptr;
547 
548         const bool stateChange = geomBindingChange ||
549                                  shadingBindingChange ||
550                                  textureBindingsChange ||
551                                  SkToBool(newScissor);
552 
553         // Update DrawWriter *before* we actually change any state so that accumulated draws from
554         // the previous state use the proper state.
555         if (pipelineChange) {
556             drawWriter.newPipelineState(renderStep.primitiveType(),
557                                         renderStep.vertexStride(),
558                                         renderStep.instanceStride());
559         } else if (stateChange) {
560             drawWriter.newDynamicState();
561         }
562 
563         // Make state changes before accumulating new draw data
564         if (pipelineChange) {
565             drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex());
566             lastPipeline = key.pipelineIndex();
567         }
568         if (stateChange) {
569             if (geomBindingChange) {
570                 geometrySsboTracker.bindUniforms(UniformSlot::kRenderStep, &drawPass->fCommandList);
571             }
572             if (shadingBindingChange) {
573                 shadingSsboTracker.bindUniforms(UniformSlot::kPaint, &drawPass->fCommandList);
574             }
575             if (textureBindingsChange) {
576                 textureBindingTracker.bindTextures(&drawPass->fCommandList);
577             }
578             if (newScissor) {
579                 drawPass->fCommandList.setScissor(*newScissor);
580                 lastScissor = *newScissor;
581             }
582         }
583 
584         renderStep.writeVertices(&drawWriter, draw.fDrawParams, key.shadingSsboIndex());
585     }
586     // Finish recording draw calls for any collected data at the end of the loop
587     drawWriter.flush();
588 
589     drawPass->fBounds = passBounds.roundOut().asSkIRect();
590 
591     drawPass->fPipelineDescs   = pipelineCache.detach();
592     drawPass->fSamplerDescs    = textureBindingTracker.detachSamplers();
593     drawPass->fSampledTextures = textureBindingTracker.detachTextures();
594 
595     return drawPass;
596 }
597 
prepareResources(ResourceProvider * resourceProvider,const RuntimeEffectDictionary * runtimeDict,const RenderPassDesc & renderPassDesc)598 bool DrawPass::prepareResources(ResourceProvider* resourceProvider,
599                                 const RuntimeEffectDictionary* runtimeDict,
600                                 const RenderPassDesc& renderPassDesc) {
601     fFullPipelines.reserve_back(fPipelineDescs.size());
602     for (const GraphicsPipelineDesc& pipelineDesc : fPipelineDescs) {
603         auto pipeline = resourceProvider->findOrCreateGraphicsPipeline(runtimeDict,
604                                                                        pipelineDesc,
605                                                                        renderPassDesc);
606         if (!pipeline) {
607             SKGPU_LOG_W("Failed to create GraphicsPipeline for draw in RenderPass. Dropping pass!");
608             return false;
609         }
610         fFullPipelines.push_back(std::move(pipeline));
611     }
612     // The DrawPass may be long lived on a Recording and we no longer need the GraphicPipelineDescs
613     // once we've created pipelines, so we drop the storage for them here.
614     fPipelineDescs.clear();
615 
616     for (int i = 0; i < fSampledTextures.size(); ++i) {
617         // TODO: We need to remove this check once we are creating valid SkImages from things like
618         // snapshot, save layers, etc. Right now we only support SkImages directly made for graphite
619         // and all others have a TextureProxy with an invalid TextureInfo.
620         if (!fSampledTextures[i]->textureInfo().isValid()) {
621             SKGPU_LOG_W("Failed to validate sampled texture. Will not create renderpass!");
622             return false;
623         }
624         if (!TextureProxy::InstantiateIfNotLazy(resourceProvider, fSampledTextures[i].get())) {
625             SKGPU_LOG_W("Failed to instantiate sampled texture. Will not create renderpass!");
626             return false;
627         }
628     }
629 
630     fSamplers.reserve_back(fSamplerDescs.size());
631     for (int i = 0; i < fSamplerDescs.size(); ++i) {
632         sk_sp<Sampler> sampler = resourceProvider->findOrCreateCompatibleSampler(
633                 fSamplerDescs[i].fSamplingOptions,
634                 fSamplerDescs[i].fTileModes[0],
635                 fSamplerDescs[i].fTileModes[1]);
636         if (!sampler) {
637             SKGPU_LOG_W("Failed to create sampler. Will not create renderpass!");
638             return false;
639         }
640         fSamplers.push_back(std::move(sampler));
641     }
642     // The DrawPass may be long lived on a Recording and we no longer need the SamplerDescs
643     // once we've created Samplers, so we drop the storage for them here.
644     fSamplerDescs.clear();
645 
646     return true;
647 }
648 
addResourceRefs(CommandBuffer * commandBuffer) const649 void DrawPass::addResourceRefs(CommandBuffer* commandBuffer) const {
650     for (int i = 0; i < fFullPipelines.size(); ++i) {
651         commandBuffer->trackResource(fFullPipelines[i]);
652     }
653     for (int i = 0; i < fSampledTextures.size(); ++i) {
654         commandBuffer->trackResource(fSampledTextures[i]->refTexture());
655     }
656     for (int i = 0; i < fSamplers.size(); ++i) {
657         commandBuffer->trackResource(fSamplers[i]);
658     }
659 }
660 
getTexture(size_t index) const661 const Texture* DrawPass::getTexture(size_t index) const {
662     SkASSERT(index < SkToSizeT(fSampledTextures.size()));
663     SkASSERT(fSampledTextures[index]);
664     SkASSERT(fSampledTextures[index]->texture());
665     return fSampledTextures[index]->texture();
666 }
getSampler(size_t index) const667 const Sampler* DrawPass::getSampler(size_t index) const {
668     SkASSERT(index < SkToSizeT(fSamplers.size()));
669     SkASSERT(fSamplers[index]);
670     return fSamplers[index].get();
671 }
672 
673 } // namespace skgpu::graphite
674