1 /* 2 * Copyright 2023 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef skgpu_graphite_ComputeStep_DEFINED 9 #define skgpu_graphite_ComputeStep_DEFINED 10 11 #include "include/core/SkSpan.h" 12 #include "src/core/SkEnumBitMask.h" 13 #include "src/gpu/graphite/ComputeTypes.h" 14 15 #include <optional> 16 #include <string> 17 #include <string_view> 18 #include <vector> 19 20 namespace skgpu::graphite { 21 22 class DrawParams; 23 struct ResourceBindingRequirements; 24 25 /** 26 * A `ComputeStep` represents a compute pass within a wider draw operation. A `ComputeStep` 27 * implementation describes an invocation of a compute program and its data binding layout. 28 * 29 * A `ComputeStep` can perform arbitrary operations on the GPU over various types of data, including 30 * geometry and image processing. The data processed by a `ComputeStep` can be inputs (textures or 31 * buffers) populated on the CPU, data forwarded to and from other `ComputeStep` invocations (via 32 * "slots"), transient storage buffers/textures that are only used within an individual dispatch, 33 * geometry attribute (vertex/index/instance) and indirect draw parameters of a subsequent raster 34 * pipeline stage, as well as texture outputs. 35 * 36 * The data flow between sequential `ComputeStep` invocations within a DispatchGroup is achieved by 37 * operating over a shared "resource table". `ComputeStep`s can declare a resource with a slot 38 * number. Multiple `ComputeStep`s in a group that declare a resource with the same slot number will 39 * have access to the same backing resource object through that slot: 40 * 41 * _______________ _______________ 42 * | | | | 43 * | ---[Slot 0]--- | 44 * | | | | 45 * | ---[Slot 1]--- | 46 * | ComputeStep 1 | | ComputeStep 2 | 47 * | ---[Slot 2] | | 48 * | | | | 49 * | | [Slot 3]--- | 50 * | | | | 51 * --------------- --------------- 52 * 53 * In the example above, slots 0 and 1 are accessed by both ComputeSteps, while slots 2 and 3 are 54 * exclusively accessed by ComputeStep 1 and 2 respectively. Alternately, slots 2 and 3 could be 55 * declared as "private" resources which are visible to a single ComputeStep. 56 * 57 * Similarly, raster stage geometry buffers that are specified as the output of a ComputeStep can be 58 * used to assign the draw buffers of a RenderStep. 59 * 60 * It is the responsibility of the owning entity (e.g. a RendererProvider) to ensure that a chain of 61 * ComputeStep and RenderStep invocations have a compatible resource and data-flow layout. 62 */ 63 class ComputeStep { 64 public: 65 enum class DataFlow { 66 // A set of writable Buffer bindings that the `ComputeStep` will write vertex and instance 67 // attributes to. If present, these buffers can be used to encode the draw command for a 68 // subsequent `RenderStep`. 69 kVertexOutput, 70 kIndexOutput, 71 kInstanceOutput, 72 kIndirectDrawOutput, 73 74 // A private binding is a resource that is only visible to a single ComputeStep invocation. 75 kPrivate, 76 77 // Bindings with a slot number that can be used to forward data between a series of 78 // `ComputeStep`s. This DataFlow type is accompanied with a "slot number" that can be 79 // shared by multiple `ComputeStep`s in a group. 80 kShared, 81 }; 82 83 enum class ResourceType { 84 kUniformBuffer, 85 kStorageBuffer, 86 87 // TODO(b/238794438): Support sampled and storage texture types. 88 }; 89 90 enum class ResourcePolicy { 91 kNone, 92 93 // The memory of the resource will be initialized to 0 94 kClear, 95 96 // The ComputeStep will be asked to initialize the memory on the CPU via 97 // `ComputeStep::prepareBuffer` prior to pipeline execution. This may incur a transfer cost 98 // on platforms that do not allow buffers to be mapped in shared memory. 99 // 100 // If multiple ComputeSteps in a DispatchGroup declare a mapped resource with the same 101 // shared slot number, only the first ComputeStep in the series will receive a call to 102 // `ComputeStep::prepareBuffer`. 103 kMapped, 104 }; 105 106 struct ResourceDesc final { 107 ResourceType fType; 108 DataFlow fFlow; 109 ResourcePolicy fPolicy; 110 111 // This field only has meaning (and must have a non-negative value) if `fFlow` is 112 // `DataFlow::kShared`. 113 int fSlot = -1; 114 115 constexpr ResourceDesc(ResourceType type, 116 DataFlow flow, 117 ResourcePolicy policy, 118 int slot = -1) fTypefinal119 : fType(type), fFlow(flow), fPolicy(policy), fSlot(slot) {} 120 }; 121 122 virtual ~ComputeStep() = default; 123 124 // Returns a complete SkSL compute program. The returned SkSL must declare all resoure bindings 125 // starting at `nextBindingIndex` in the order in which they are enumerated by 126 // `ComputeStep::resources()`. 127 virtual std::string computeSkSL(const ResourceBindingRequirements&, 128 int nextBindingIndex) const = 0; 129 130 // This method will be called for entries in the ComputeStep's resource list to determine the 131 // required allocation sizes. The ComputeStep should return the minimum allocation size for the 132 // resource. 133 // 134 // TODO(armansito): The only piece of information that the ComputeStep currently uses to make 135 // this determination is the draw parameters. This approach particularly doesn't address (and 136 // likely needs to be reworked) for intermediate ComputeSteps in a chain of invocations, where 137 // the effective data sizes may not be known on the CPU. 138 // 139 // For now, we assume that there will be a strict data contract between chained ComputeSteps. 140 // The buffer sizes are an estimate based on the DrawParams. This is generic enough to allow 141 // different schemes (such as dynamic allocations and buffer pools) but may not be easily 142 // validated on the CPU. calculateResourceSize(const DrawParams &,int resourceIndex,const ResourceDesc &)143 virtual size_t calculateResourceSize(const DrawParams&, 144 int resourceIndex, 145 const ResourceDesc&) const { 146 return 0u; 147 } 148 149 // Return the global dispatch size (aka "workgroup count") for this step based on the draw 150 // parameters. The default value is a workgroup count of (1, 1, 1) 151 // 152 // TODO(armansito): The only piece of information that the ComputeStep currently gets to make 153 // this determination is the draw parameters. There might be other inputs to this calculation 154 // for intermediate compute stages that may not be known on the CPU. One way to address this is 155 // to drive the workgroup dimensions via an indirect dispatch. calculateGlobalDispatchSize(const DrawParams &)156 virtual WorkgroupSize calculateGlobalDispatchSize(const DrawParams&) const { 157 return WorkgroupSize(); 158 } 159 160 // Populates a buffer resource which was specified as "mapped". This method will only be called 161 // once for a resource right after its allocation and before pipeline execution. For shared 162 // resources, only the first ComputeStep in a DispatchGroup will be asked to prepare the buffer. 163 // 164 // `resourceIndex` matches the order in which `resource` was enumerated by 165 // `ComputeStep::resources()`. 166 virtual void prepareBuffer(const DrawParams&, 167 int ssboIndex, 168 int resourceIndex, 169 const ResourceDesc& resource, 170 void* buffer, 171 size_t bufferSize) const; 172 resources()173 SkSpan<const ResourceDesc> resources() const { return SkSpan(fResources); } 174 175 // Identifier that can be used as part of a unique key for a compute pipeline state object 176 // associated with this `ComputeStep`. uniqueID()177 uint32_t uniqueID() const { return fUniqueID; } 178 179 // Returns a debug name for the subclass implementation. name()180 const char* name() const { return fName.c_str(); } 181 182 // The size of the workgroup for this ComputeStep's entry point function. This value is hardware 183 // dependent. On Metal, this value should be used when invoking the dispatch API call. On all 184 // other backends, this value will be baked into the pipeline. localDispatchSize()185 WorkgroupSize localDispatchSize() const { return fLocalDispatchSize; } 186 187 // Data flow behavior queries: outputsVertices()188 bool outputsVertices() const { return fFlags & Flags::kOutputsVertexBuffer; } outputsIndices()189 bool outputsIndices() const { return fFlags & Flags::kOutputsIndexBuffer; } outputsInstances()190 bool outputsInstances() const { return fFlags & Flags::kOutputsInstanceBuffer; } writesIndirectDraw()191 bool writesIndirectDraw() const { return fFlags & Flags::kOutputsIndirectDrawBuffer; } 192 193 protected: 194 ComputeStep(std::string_view name, 195 WorkgroupSize localDispatchSize, 196 SkSpan<const ResourceDesc> resources); 197 198 private: 199 enum class Flags : uint8_t { 200 kNone = 0b0000, 201 kOutputsVertexBuffer = 0b0001, 202 kOutputsIndexBuffer = 0b0010, 203 kOutputsInstanceBuffer = 0b0100, 204 kOutputsIndirectDrawBuffer = 0b1000, 205 }; 206 SK_DECL_BITMASK_OPS_FRIENDS(Flags); 207 208 // Disallow copy and move 209 ComputeStep(const ComputeStep&) = delete; 210 ComputeStep(ComputeStep&&) = delete; 211 212 uint32_t fUniqueID; 213 SkEnumBitMask<Flags> fFlags; 214 std::string fName; 215 std::vector<ResourceDesc> fResources; 216 217 // TODO(b/240615224): Subclasses should simply specify the workgroup size that they need. 218 // The ComputeStep constructor should check and reduce that number based on the maximum 219 // supported workgroup size stored in Caps. In Metal, we'll pass this number directly to the 220 // dispatch API call. On other backends, we'll use this value to generate the right SkSL 221 // workgroup size declaration to avoid any validation failures. 222 WorkgroupSize fLocalDispatchSize; 223 }; 224 SK_MAKE_BITMASK_OPS(ComputeStep::Flags); 225 226 } // namespace skgpu::graphite 227 228 #endif // skgpu_graphite_ComputeStep_DEFINED 229