1 /*
2 * Copyright 2023 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/gpu/graphite/compute/DispatchGroup.h"
9
10 #include "include/core/SkColorType.h"
11 #include "include/core/SkTypes.h"
12 #include "include/gpu/GpuTypes.h"
13 #include "include/gpu/graphite/Recorder.h"
14 #include "include/gpu/graphite/TextureInfo.h"
15 #include "include/private/base/SkSpan_impl.h"
16 #include "include/private/base/SkTo.h"
17 #include "src/gpu/BufferWriter.h"
18 #include "src/gpu/graphite/BufferManager.h"
19 #include "src/gpu/graphite/Caps.h"
20 #include "src/gpu/graphite/CommandBuffer.h"
21 #include "src/gpu/graphite/ComputePipeline.h"
22 #include "src/gpu/graphite/Log.h"
23 #include "src/gpu/graphite/RecorderPriv.h"
24 #include "src/gpu/graphite/Resource.h"
25 #include "src/gpu/graphite/ResourceProvider.h"
26 #include "src/gpu/graphite/Sampler.h"
27 #include "src/gpu/graphite/Texture.h" // IWYU pragma: keep
28 #include "src/gpu/graphite/TextureProxy.h"
29 #include "src/gpu/graphite/UniformManager.h"
30 #include "src/gpu/graphite/task/ClearBuffersTask.h"
31
32 #include <utility>
33
34 namespace skgpu::graphite {
35
36 DispatchGroup::~DispatchGroup() = default;
37
prepareResources(ResourceProvider * resourceProvider)38 bool DispatchGroup::prepareResources(ResourceProvider* resourceProvider) {
39 fPipelines.reserve(fPipelines.size() + fPipelineDescs.size());
40 for (const ComputePipelineDesc& desc : fPipelineDescs) {
41 auto pipeline = resourceProvider->findOrCreateComputePipeline(desc);
42 if (!pipeline) {
43 SKGPU_LOG_W("Failed to create ComputePipeline for dispatch group. Dropping group!");
44 return false;
45 }
46 fPipelines.push_back(std::move(pipeline));
47 }
48
49 for (int i = 0; i < fTextures.size(); ++i) {
50 if (!fTextures[i]->textureInfo().isValid()) {
51 SKGPU_LOG_W("Failed to validate bound texture. Dropping dispatch group!");
52 return false;
53 }
54 if (!TextureProxy::InstantiateIfNotLazy(resourceProvider, fTextures[i].get())) {
55 SKGPU_LOG_W("Failed to instantiate bound texture. Dropping dispatch group!");
56 return false;
57 }
58 }
59
60 for (const SamplerDesc& desc : fSamplerDescs) {
61 sk_sp<Sampler> sampler = resourceProvider->findOrCreateCompatibleSampler(desc);
62 if (!sampler) {
63 SKGPU_LOG_W("Failed to create sampler. Dropping dispatch group!");
64 return false;
65 }
66 fSamplers.push_back(std::move(sampler));
67 }
68
69 // The DispatchGroup may be long lived on a Recording and we no longer need the descriptors
70 // once we've created pipelines.
71 fPipelineDescs.clear();
72 fSamplerDescs.clear();
73
74 return true;
75 }
76
addResourceRefs(CommandBuffer * commandBuffer) const77 void DispatchGroup::addResourceRefs(CommandBuffer* commandBuffer) const {
78 for (int i = 0; i < fPipelines.size(); ++i) {
79 commandBuffer->trackResource(fPipelines[i]);
80 }
81 for (int i = 0; i < fTextures.size(); ++i) {
82 commandBuffer->trackCommandBufferResource(fTextures[i]->refTexture());
83 }
84 }
85
snapChildTask()86 sk_sp<Task> DispatchGroup::snapChildTask() {
87 if (fClearList.empty()) {
88 return nullptr;
89 }
90 return ClearBuffersTask::Make(std::move(fClearList));
91 }
92
getTexture(size_t index) const93 const Texture* DispatchGroup::getTexture(size_t index) const {
94 SkASSERT(index < SkToSizeT(fTextures.size()));
95 SkASSERT(fTextures[index]);
96 SkASSERT(fTextures[index]->texture());
97 return fTextures[index]->texture();
98 }
99
getSampler(size_t index) const100 const Sampler* DispatchGroup::getSampler(size_t index) const {
101 SkASSERT(index < SkToSizeT(fSamplers.size()));
102 SkASSERT(fSamplers[index]);
103 return fSamplers[index].get();
104 }
105
106 using Builder = DispatchGroup::Builder;
107
Builder(Recorder * recorder)108 Builder::Builder(Recorder* recorder) : fObj(new DispatchGroup()), fRecorder(recorder) {
109 SkASSERT(fRecorder);
110 }
111
appendStep(const ComputeStep * step,std::optional<WorkgroupSize> globalSize)112 bool Builder::appendStep(const ComputeStep* step, std::optional<WorkgroupSize> globalSize) {
113 return this->appendStepInternal(step,
114 globalSize ? *globalSize : step->calculateGlobalDispatchSize());
115 }
116
appendStepIndirect(const ComputeStep * step,BindBufferInfo indirectBuffer)117 bool Builder::appendStepIndirect(const ComputeStep* step, BindBufferInfo indirectBuffer) {
118 return this->appendStepInternal(step, indirectBuffer);
119 }
120
appendStepInternal(const ComputeStep * step,const std::variant<WorkgroupSize,BindBufferInfo> & globalSizeOrIndirect)121 bool Builder::appendStepInternal(
122 const ComputeStep* step,
123 const std::variant<WorkgroupSize, BindBufferInfo>& globalSizeOrIndirect) {
124 SkASSERT(fObj);
125 SkASSERT(step);
126
127 Dispatch dispatch;
128
129 // Process the step's resources.
130 auto resources = step->resources();
131 dispatch.fBindings.reserve(resources.size());
132
133 // `nextIndex` matches the declaration order of resources as specified by the ComputeStep.
134 int nextIndex = 0;
135
136 // We assign buffer, texture, and sampler indices from separate ranges. This is compatible with
137 // how Graphite assigns indices on Metal, as these map directly to the buffer/texture/sampler
138 // index ranges. On Dawn/Vulkan buffers and textures/samplers are allocated from separate bind
139 // groups/descriptor sets but texture and sampler indices need to not overlap.
140 const auto& bindingReqs = fRecorder->priv().caps()->resourceBindingRequirements();
141 const bool separateSampler = bindingReqs.fSeparateTextureAndSamplerBinding;
142 const bool texturesUseDistinctIdxRanges = bindingReqs.fComputeUsesDistinctIdxRangesForTextures;
143 // Some binding index determination logic relies upon the fact that we do not expect to
144 // encounter a backend that both uses separate sampler bindings AND requires separate index
145 // ranges for textures.
146 SkASSERT(!(separateSampler && texturesUseDistinctIdxRanges));
147
148 int bufferOrGlobalIndex = 0;
149 int texIndex = 0;
150 // NOTE: SkSL Metal codegen always assigns the same binding index to a texture and its sampler.
151 // TODO: This could cause sampler indices to not be tightly packed if the sampler2D declaration
152 // comes after 1 or more storage texture declarations (which don't have samplers).
153 for (const ComputeStep::ResourceDesc& r : resources) {
154 SkASSERT(r.fSlot == -1 || (r.fSlot >= 0 && r.fSlot < kMaxComputeDataFlowSlots));
155 const int index = nextIndex++;
156
157 DispatchResourceOptional maybeResource;
158
159 using DataFlow = ComputeStep::DataFlow;
160 using Type = ComputeStep::ResourceType;
161 switch (r.fFlow) {
162 case DataFlow::kPrivate:
163 // A sampled or fetched-type readonly texture must either get assigned via
164 // `assignSharedTexture()` or internally allocated as a storage texture of a
165 // preceding step. Such a texture always has a data slot.
166 SkASSERT(r.fType != Type::kReadOnlyTexture);
167 SkASSERT(r.fType != Type::kSampledTexture);
168 maybeResource = this->allocateResource(step, r, index);
169 break;
170 case DataFlow::kShared: {
171 SkASSERT(r.fSlot >= 0);
172 // Allocate a new resource only if the shared slot is empty (except for a
173 // SampledTexture which needs its sampler to be allocated internally).
174 DispatchResourceOptional* slot = &fOutputTable.fSharedSlots[r.fSlot];
175 if (std::holds_alternative<std::monostate>(*slot)) {
176 SkASSERT(r.fType != Type::kReadOnlyTexture);
177 SkASSERT(r.fType != Type::kSampledTexture);
178 maybeResource = this->allocateResource(step, r, index);
179 *slot = maybeResource;
180 } else {
181 SkASSERT(((r.fType == Type::kUniformBuffer ||
182 r.fType == Type::kStorageBuffer ||
183 r.fType == Type::kReadOnlyStorageBuffer ||
184 r.fType == Type::kIndirectBuffer) &&
185 std::holds_alternative<BindBufferInfo>(*slot)) ||
186 ((r.fType == Type::kReadOnlyTexture ||
187 r.fType == Type::kSampledTexture ||
188 r.fType == Type::kWriteOnlyStorageTexture) &&
189 std::holds_alternative<TextureIndex>(*slot)));
190 #ifdef SK_DEBUG
191 // Ensure that the texture has the right format if it was assigned via
192 // `assignSharedTexture()`.
193 const TextureIndex* texIdx = std::get_if<TextureIndex>(slot);
194 if (texIdx && r.fType == Type::kWriteOnlyStorageTexture) {
195 const TextureProxy* t = fObj->fTextures[texIdx->fValue].get();
196 SkASSERT(t);
197 auto [_, colorType] = step->calculateTextureParameters(index, r);
198 SkASSERT(t->textureInfo().canBeFulfilledBy(
199 fRecorder->priv().caps()->getDefaultStorageTextureInfo(colorType)));
200 }
201 #endif // SK_DEBUG
202
203 maybeResource = *slot;
204
205 if (r.fType == Type::kSampledTexture) {
206 // The shared slot holds the texture part of the sampled texture but we
207 // still need to allocate the sampler.
208 SkASSERT(std::holds_alternative<TextureIndex>(*slot));
209 auto samplerResource = this->allocateResource(step, r, index);
210 const SamplerIndex* samplerIdx =
211 std::get_if<SamplerIndex>(&samplerResource);
212 SkASSERT(samplerIdx);
213 int bindingIndex = texturesUseDistinctIdxRanges ? texIndex :
214 separateSampler ? bufferOrGlobalIndex++
215 : bufferOrGlobalIndex;
216 dispatch.fBindings.push_back(
217 {static_cast<BindingIndex>(bindingIndex), *samplerIdx});
218 }
219 }
220 break;
221 }
222 }
223
224 int bindingIndex = 0;
225 DispatchResource dispatchResource;
226 if (const BindBufferInfo* buffer = std::get_if<BindBufferInfo>(&maybeResource)) {
227 dispatchResource = *buffer;
228 bindingIndex = bufferOrGlobalIndex++;
229 } else if (const TextureIndex* texIdx = std::get_if<TextureIndex>(&maybeResource)) {
230 dispatchResource = *texIdx;
231 bindingIndex = texturesUseDistinctIdxRanges ? texIndex++ : bufferOrGlobalIndex++;
232 } else {
233 SKGPU_LOG_W("Failed to allocate resource for compute dispatch");
234 return false;
235 }
236 dispatch.fBindings.push_back({static_cast<BindingIndex>(bindingIndex), dispatchResource});
237 }
238
239 auto wgBufferDescs = step->workgroupBuffers();
240 if (!wgBufferDescs.empty()) {
241 dispatch.fWorkgroupBuffers.push_back_n(wgBufferDescs.size(), wgBufferDescs.data());
242 }
243
244 // We need to switch pipelines if this step uses a different pipeline from the previous step.
245 if (fObj->fPipelineDescs.empty() ||
246 fObj->fPipelineDescs.back().uniqueID() != step->uniqueID()) {
247 fObj->fPipelineDescs.push_back(ComputePipelineDesc(step));
248 }
249
250 dispatch.fPipelineIndex = fObj->fPipelineDescs.size() - 1;
251 dispatch.fLocalSize = step->localDispatchSize();
252 dispatch.fGlobalSizeOrIndirect = globalSizeOrIndirect;
253
254 fObj->fDispatchList.push_back(std::move(dispatch));
255
256 return true;
257 }
258
assignSharedBuffer(BindBufferInfo buffer,unsigned int slot,ClearBuffer cleared)259 void Builder::assignSharedBuffer(BindBufferInfo buffer, unsigned int slot, ClearBuffer cleared) {
260 SkASSERT(fObj);
261 SkASSERT(buffer);
262 SkASSERT(buffer.fSize);
263
264 fOutputTable.fSharedSlots[slot] = buffer;
265 if (cleared == ClearBuffer::kYes) {
266 fObj->fClearList.push_back(buffer);
267 }
268 }
269
assignSharedTexture(sk_sp<TextureProxy> texture,unsigned int slot)270 void Builder::assignSharedTexture(sk_sp<TextureProxy> texture, unsigned int slot) {
271 SkASSERT(fObj);
272 SkASSERT(texture);
273
274 fObj->fTextures.push_back(std::move(texture));
275 fOutputTable.fSharedSlots[slot] = TextureIndex{fObj->fTextures.size() - 1u};
276 }
277
finalize()278 std::unique_ptr<DispatchGroup> Builder::finalize() {
279 auto obj = std::move(fObj);
280 fOutputTable.reset();
281 return obj;
282 }
283
284 #if defined(GPU_TEST_UTILS)
reset()285 void Builder::reset() {
286 fOutputTable.reset();
287 fObj.reset(new DispatchGroup);
288 }
289 #endif
290
getSharedBufferResource(unsigned int slot) const291 BindBufferInfo Builder::getSharedBufferResource(unsigned int slot) const {
292 SkASSERT(fObj);
293
294 BindBufferInfo info;
295 if (const BindBufferInfo* slotValue =
296 std::get_if<BindBufferInfo>(&fOutputTable.fSharedSlots[slot])) {
297 info = *slotValue;
298 }
299 return info;
300 }
301
getSharedTextureResource(unsigned int slot) const302 sk_sp<TextureProxy> Builder::getSharedTextureResource(unsigned int slot) const {
303 SkASSERT(fObj);
304
305 const TextureIndex* idx = std::get_if<TextureIndex>(&fOutputTable.fSharedSlots[slot]);
306 if (!idx) {
307 return nullptr;
308 }
309
310 SkASSERT(idx->fValue < SkToSizeT(fObj->fTextures.size()));
311 return fObj->fTextures[idx->fValue];
312 }
313
allocateResource(const ComputeStep * step,const ComputeStep::ResourceDesc & resource,int resourceIdx)314 DispatchResourceOptional Builder::allocateResource(const ComputeStep* step,
315 const ComputeStep::ResourceDesc& resource,
316 int resourceIdx) {
317 SkASSERT(step);
318 SkASSERT(fObj);
319 using Type = ComputeStep::ResourceType;
320 using ResourcePolicy = ComputeStep::ResourcePolicy;
321
322 DrawBufferManager* bufferMgr = fRecorder->priv().drawBufferManager();
323 DispatchResourceOptional result;
324 switch (resource.fType) {
325 case Type::kReadOnlyStorageBuffer:
326 case Type::kStorageBuffer: {
327 size_t bufferSize = step->calculateBufferSize(resourceIdx, resource);
328 SkASSERT(bufferSize);
329 if (resource.fPolicy == ResourcePolicy::kMapped) {
330 auto [ptr, bufInfo] = bufferMgr->getStoragePointer(bufferSize);
331 if (ptr) {
332 step->prepareStorageBuffer(resourceIdx, resource, ptr, bufferSize);
333 result = bufInfo;
334 }
335 } else {
336 auto bufInfo = bufferMgr->getStorage(bufferSize,
337 resource.fPolicy == ResourcePolicy::kClear
338 ? ClearBuffer::kYes
339 : ClearBuffer::kNo);
340 if (bufInfo) {
341 result = bufInfo;
342 }
343 }
344 break;
345 }
346 case Type::kIndirectBuffer: {
347 SkASSERT(resource.fPolicy != ResourcePolicy::kMapped);
348
349 size_t bufferSize = step->calculateBufferSize(resourceIdx, resource);
350 SkASSERT(bufferSize);
351 auto bufInfo = bufferMgr->getIndirectStorage(bufferSize,
352 resource.fPolicy == ResourcePolicy::kClear
353 ? ClearBuffer::kYes
354 : ClearBuffer::kNo);
355 if (bufInfo) {
356 result = bufInfo;
357 }
358 break;
359 }
360 case Type::kUniformBuffer: {
361 SkASSERT(resource.fPolicy == ResourcePolicy::kMapped);
362
363 const auto& resourceReqs = fRecorder->priv().caps()->resourceBindingRequirements();
364 UniformManager uboMgr(resourceReqs.fUniformBufferLayout);
365 step->prepareUniformBuffer(resourceIdx, resource, &uboMgr);
366
367 auto dataBlock = uboMgr.finish();
368 SkASSERT(!dataBlock.empty());
369
370 auto [writer, bufInfo] = bufferMgr->getUniformWriter(/*count=*/1, dataBlock.size());
371 if (bufInfo) {
372 writer.write(dataBlock.data(), dataBlock.size());
373 result = bufInfo;
374 }
375 break;
376 }
377 case Type::kWriteOnlyStorageTexture: {
378 auto [size, colorType] = step->calculateTextureParameters(resourceIdx, resource);
379 SkASSERT(!size.isEmpty());
380 SkASSERT(colorType != kUnknown_SkColorType);
381
382 auto textureInfo = fRecorder->priv().caps()->getDefaultStorageTextureInfo(colorType);
383 sk_sp<TextureProxy> texture = TextureProxy::Make(
384 fRecorder->priv().caps(), fRecorder->priv().resourceProvider(),
385 size, textureInfo, "DispatchWriteOnlyStorageTexture", skgpu::Budgeted::kYes);
386 if (texture) {
387 fObj->fTextures.push_back(std::move(texture));
388 result = TextureIndex{fObj->fTextures.size() - 1u};
389 }
390 break;
391 }
392 case Type::kReadOnlyTexture:
393 // This resource type is meant to be populated externally (e.g. by an upload or a render
394 // pass) and only read/sampled by a ComputeStep. It's not meaningful to allocate an
395 // internal texture for a DispatchGroup if none of the ComputeSteps will write to it.
396 //
397 // Instead of using internal allocation, this texture must be assigned explicitly to a
398 // slot by calling the Builder::assignSharedTexture() method.
399 //
400 // Note: A ComputeStep is allowed to read/sample from a storage texture that a previous
401 // ComputeStep has written to.
402 SK_ABORT("a readonly texture must be externally assigned to a ComputeStep");
403 break;
404 case Type::kSampledTexture: {
405 fObj->fSamplerDescs.push_back(step->calculateSamplerParameters(resourceIdx, resource));
406 result = SamplerIndex{fObj->fSamplerDescs.size() - 1u};
407 break;
408 }
409 }
410 return result;
411 }
412
413 } // namespace skgpu::graphite
414