• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2023 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef skgpu_graphite_ComputeStep_DEFINED
9 #define skgpu_graphite_ComputeStep_DEFINED
10 
11 #include "include/core/SkSpan.h"
12 #include "src/core/SkEnumBitMask.h"
13 #include "src/gpu/graphite/ComputeTypes.h"
14 
15 #include <optional>
16 #include <string>
17 #include <string_view>
18 #include <vector>
19 
20 namespace skgpu::graphite {
21 
22 class DrawParams;
23 struct ResourceBindingRequirements;
24 
25 /**
26  * A `ComputeStep` represents a compute pass within a wider draw operation. A `ComputeStep`
27  * implementation describes an invocation of a compute program and its data binding layout.
28  *
29  * A `ComputeStep` can perform arbitrary operations on the GPU over various types of data, including
30  * geometry and image processing. The data processed by a `ComputeStep` can be inputs (textures or
31  * buffers) populated on the CPU, data forwarded to and from other `ComputeStep` invocations (via
32  * "slots"), transient storage buffers/textures that are only used within an individual dispatch,
33  * geometry attribute (vertex/index/instance) and indirect draw parameters of a subsequent raster
34  * pipeline stage, as well as texture outputs.
35  *
36  * The data flow between sequential `ComputeStep` invocations within a DispatchGroup is achieved by
37  * operating over a shared "resource table". `ComputeStep`s can declare a resource with a slot
38  * number. Multiple `ComputeStep`s in a group that declare a resource with the same slot number will
39  * have access to the same backing resource object through that slot:
40  *
41  *      _______________                _______________
42  *     |               |              |               |
43  *     |                ---[Slot 0]---                |
44  *     |               |              |               |
45  *     |                ---[Slot 1]---                |
46  *     | ComputeStep 1 |              | ComputeStep 2 |
47  *     |                ---[Slot 2]   |               |
48  *     |               |              |               |
49  *     |               |   [Slot 3]---                |
50  *     |               |              |               |
51  *      ---------------                ---------------
52  *
53  * In the example above, slots 0 and 1 are accessed by both ComputeSteps, while slots 2 and 3 are
54  * exclusively accessed by ComputeStep 1 and 2 respectively. Alternately, slots 2 and 3 could be
55  * declared as "private" resources which are visible to a single ComputeStep.
56  *
57  * Similarly, raster stage geometry buffers that are specified as the output of a ComputeStep can be
58  * used to assign the draw buffers of a RenderStep.
59  *
60  * It is the responsibility of the owning entity (e.g. a RendererProvider) to ensure that a chain of
61  * ComputeStep and RenderStep invocations have a compatible resource and data-flow layout.
62  */
63 class ComputeStep {
64 public:
65     enum class DataFlow {
66         // A set of writable Buffer bindings that the `ComputeStep` will write vertex and instance
67         // attributes to. If present, these buffers can be used to encode the draw command for a
68         // subsequent `RenderStep`.
69         kVertexOutput,
70         kIndexOutput,
71         kInstanceOutput,
72         kIndirectDrawOutput,
73 
74         // A private binding is a resource that is only visible to a single ComputeStep invocation.
75         kPrivate,
76 
77         // Bindings with a slot number that can be used to forward data between a series of
78         // `ComputeStep`s. This DataFlow type is accompanied with a "slot number" that can be
79         // shared by multiple `ComputeStep`s in a group.
80         kShared,
81     };
82 
83     enum class ResourceType {
84         kUniformBuffer,
85         kStorageBuffer,
86 
87         // TODO(b/238794438): Support sampled and storage texture types.
88     };
89 
90     enum class ResourcePolicy {
91         kNone,
92 
93         // The memory of the resource will be initialized to 0
94         kClear,
95 
96         // The ComputeStep will be asked to initialize the memory on the CPU via
97         // `ComputeStep::prepareBuffer` prior to pipeline execution. This may incur a transfer cost
98         // on platforms that do not allow buffers to be mapped in shared memory.
99         //
100         // If multiple ComputeSteps in a DispatchGroup declare a mapped resource with the same
101         // shared slot number, only the first ComputeStep in the series will receive a call to
102         // `ComputeStep::prepareBuffer`.
103         kMapped,
104     };
105 
106     struct ResourceDesc final {
107         ResourceType fType;
108         DataFlow fFlow;
109         ResourcePolicy fPolicy;
110 
111         // This field only has meaning (and must have a non-negative value) if `fFlow` is
112         // `DataFlow::kShared`.
113         int fSlot = -1;
114 
115         constexpr ResourceDesc(ResourceType type,
116                                DataFlow flow,
117                                ResourcePolicy policy,
118                                int slot = -1)
fTypefinal119                 : fType(type), fFlow(flow), fPolicy(policy), fSlot(slot) {}
120     };
121 
122     virtual ~ComputeStep() = default;
123 
124     // Returns a complete SkSL compute program. The returned SkSL must declare all resoure bindings
125     // starting at `nextBindingIndex` in the order in which they are enumerated by
126     // `ComputeStep::resources()`.
127     virtual std::string computeSkSL(const ResourceBindingRequirements&,
128                                     int nextBindingIndex) const = 0;
129 
130     // This method will be called for entries in the ComputeStep's resource list to determine the
131     // required allocation sizes. The ComputeStep should return the minimum allocation size for the
132     // resource.
133     //
134     // TODO(armansito): The only piece of information that the ComputeStep currently uses to make
135     // this determination is the draw parameters. This approach particularly doesn't address (and
136     // likely needs to be reworked) for intermediate ComputeSteps in a chain of invocations, where
137     // the effective data sizes may not be known on the CPU.
138     //
139     // For now, we assume that there will be a strict data contract between chained ComputeSteps.
140     // The buffer sizes are an estimate based on the DrawParams. This is generic enough to allow
141     // different schemes (such as dynamic allocations and buffer pools) but may not be easily
142     // validated on the CPU.
calculateResourceSize(const DrawParams &,int resourceIndex,const ResourceDesc &)143     virtual size_t calculateResourceSize(const DrawParams&,
144                                          int resourceIndex,
145                                          const ResourceDesc&) const {
146         return 0u;
147     }
148 
149     // Return the global dispatch size (aka "workgroup count") for this step based on the draw
150     // parameters. The default value is a workgroup count of (1, 1, 1)
151     //
152     // TODO(armansito): The only piece of information that the ComputeStep currently gets to make
153     // this determination is the draw parameters. There might be other inputs to this calculation
154     // for intermediate compute stages that may not be known on the CPU. One way to address this is
155     // to drive the workgroup dimensions via an indirect dispatch.
calculateGlobalDispatchSize(const DrawParams &)156     virtual WorkgroupSize calculateGlobalDispatchSize(const DrawParams&) const {
157         return WorkgroupSize();
158     }
159 
160     // Populates a buffer resource which was specified as "mapped". This method will only be called
161     // once for a resource right after its allocation and before pipeline execution. For shared
162     // resources, only the first ComputeStep in a DispatchGroup will be asked to prepare the buffer.
163     //
164     // `resourceIndex` matches the order in which `resource` was enumerated by
165     // `ComputeStep::resources()`.
166     virtual void prepareBuffer(const DrawParams&,
167                                int ssboIndex,
168                                int resourceIndex,
169                                const ResourceDesc& resource,
170                                void* buffer,
171                                size_t bufferSize) const;
172 
resources()173     SkSpan<const ResourceDesc> resources() const { return SkSpan(fResources); }
174 
175     // Identifier that can be used as part of a unique key for a compute pipeline state object
176     // associated with this `ComputeStep`.
uniqueID()177     uint32_t uniqueID() const { return fUniqueID; }
178 
179     // Returns a debug name for the subclass implementation.
name()180     const char* name() const { return fName.c_str(); }
181 
182     // The size of the workgroup for this ComputeStep's entry point function. This value is hardware
183     // dependent. On Metal, this value should be used when invoking the dispatch API call. On all
184     // other backends, this value will be baked into the pipeline.
localDispatchSize()185     WorkgroupSize localDispatchSize() const { return fLocalDispatchSize; }
186 
187     // Data flow behavior queries:
outputsVertices()188     bool outputsVertices() const { return fFlags & Flags::kOutputsVertexBuffer; }
outputsIndices()189     bool outputsIndices() const { return fFlags & Flags::kOutputsIndexBuffer; }
outputsInstances()190     bool outputsInstances() const { return fFlags & Flags::kOutputsInstanceBuffer; }
writesIndirectDraw()191     bool writesIndirectDraw() const { return fFlags & Flags::kOutputsIndirectDrawBuffer; }
192 
193 protected:
194     ComputeStep(std::string_view name,
195                 WorkgroupSize localDispatchSize,
196                 SkSpan<const ResourceDesc> resources);
197 
198 private:
199     enum class Flags : uint8_t {
200         kNone                      = 0b0000,
201         kOutputsVertexBuffer       = 0b0001,
202         kOutputsIndexBuffer        = 0b0010,
203         kOutputsInstanceBuffer     = 0b0100,
204         kOutputsIndirectDrawBuffer = 0b1000,
205     };
206     SK_DECL_BITMASK_OPS_FRIENDS(Flags);
207 
208     // Disallow copy and move
209     ComputeStep(const ComputeStep&) = delete;
210     ComputeStep(ComputeStep&&)      = delete;
211 
212     uint32_t fUniqueID;
213     SkEnumBitMask<Flags> fFlags;
214     std::string fName;
215     std::vector<ResourceDesc> fResources;
216 
217     // TODO(b/240615224): Subclasses should simply specify the workgroup size that they need.
218     // The ComputeStep constructor should check and reduce that number based on the maximum
219     // supported workgroup size stored in Caps. In Metal, we'll pass this number directly to the
220     // dispatch API call. On other backends, we'll use this value to generate the right SkSL
221     // workgroup size declaration to avoid any validation failures.
222     WorkgroupSize fLocalDispatchSize;
223 };
224 SK_MAKE_BITMASK_OPS(ComputeStep::Flags);
225 
226 }  // namespace skgpu::graphite
227 
228 #endif  // skgpu_graphite_ComputeStep_DEFINED
229