1 /*
2 * Copyright 2024 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/gpu/graphite/ComputePathAtlas.h"
9
10 #include "include/gpu/graphite/Recorder.h"
11 #include "src/core/SkTraceEvent.h"
12 #include "src/gpu/graphite/AtlasProvider.h"
13 #include "src/gpu/graphite/Caps.h"
14 #include "src/gpu/graphite/Log.h"
15 #include "src/gpu/graphite/RasterPathUtils.h"
16 #include "src/gpu/graphite/RecorderPriv.h"
17 #include "src/gpu/graphite/RendererProvider.h"
18 #include "src/gpu/graphite/TextureProxy.h"
19 #include "src/gpu/graphite/TextureUtils.h"
20 #include "src/gpu/graphite/geom/Transform.h"
21
22 #ifdef SK_ENABLE_VELLO_SHADERS
23 #include "src/gpu/graphite/compute/DispatchGroup.h"
24 #endif
25
26 namespace skgpu::graphite {
27 namespace {
28
29 // TODO: This is the maximum target dimension that vello can handle today.
30 constexpr uint16_t kComputeAtlasDim = 4096;
31
32 // TODO: Currently we reject shapes that are smaller than a subset of a given atlas page to avoid
33 // creating too many flushes in a Recording containing many large path draws. These shapes often
34 // don't make efficient use of the available atlas texture space and the cost of sequential
35 // dispatches to render multiple atlas pages can be prohibitive.
36 constexpr size_t kBboxAreaThreshold = 1024 * 512;
37
38 // Coordinate size that is too large for vello to handle efficiently. See the discussion on
39 // https://github.com/linebender/vello/pull/542.
40 constexpr float kCoordinateThreshold = 1e10;
41
42 } // namespace
43
ComputePathAtlas(Recorder * recorder)44 ComputePathAtlas::ComputePathAtlas(Recorder* recorder)
45 : PathAtlas(recorder, kComputeAtlasDim, kComputeAtlasDim)
46 , fRectanizer(this->width(), this->height()) {}
47
initializeTextureIfNeeded()48 bool ComputePathAtlas::initializeTextureIfNeeded() {
49 if (!fTexture) {
50 SkColorType targetCT = ComputeShaderCoverageMaskTargetFormat(fRecorder->priv().caps());
51 fTexture = fRecorder->priv().atlasProvider()->getAtlasTexture(fRecorder,
52 this->width(),
53 this->height(),
54 targetCT,
55 /*identifier=*/0,
56 /*requireStorageUsage=*/true);
57 }
58 return fTexture != nullptr;
59 }
60
isSuitableForAtlasing(const Rect & transformedShapeBounds,const Rect & clipBounds) const61 bool ComputePathAtlas::isSuitableForAtlasing(const Rect& transformedShapeBounds,
62 const Rect& clipBounds) const {
63 Rect shapeBounds = transformedShapeBounds.makeRoundOut();
64 Rect maskBounds = shapeBounds.makeIntersect(clipBounds);
65 skvx::float2 maskSize = maskBounds.size();
66 float width = maskSize.x(), height = maskSize.y();
67
68 if (width > this->width() || height > this->height()) {
69 return false;
70 }
71
72 // For now we're allowing paths that are smaller than 1/32nd of the full 4096x4096 atlas size
73 // to prevent the atlas texture from filling up too often. There are several approaches we
74 // should explore to alleviate the cost of atlasing large paths.
75 if (width * height > kBboxAreaThreshold) {
76 return false;
77 }
78
79 // Reject pathological shapes that vello can't handle efficiently yet.
80 skvx::float2 unclippedSize = shapeBounds.size();
81 if (std::fabs(unclippedSize.x()) > kCoordinateThreshold ||
82 std::fabs(unclippedSize.y()) > kCoordinateThreshold) {
83 return false;
84 }
85
86 return true;
87 }
88
addRect(skvx::half2 maskSize,SkIPoint16 * outPos)89 const TextureProxy* ComputePathAtlas::addRect(skvx::half2 maskSize,
90 SkIPoint16* outPos) {
91 if (!this->initializeTextureIfNeeded()) {
92 SKGPU_LOG_E("Failed to instantiate an atlas texture");
93 return nullptr;
94 }
95
96 // An empty mask always fits, so just return the texture.
97 // TODO: This may not be needed if we can handle clipped out bounds with inverse fills
98 // another way. See PathAtlas::addShape().
99 if (!all(maskSize)) {
100 *outPos = {0, 0};
101 return fTexture.get();
102 }
103
104 if (!fRectanizer.addPaddedRect(maskSize.x(), maskSize.y(), kEntryPadding, outPos)) {
105 return nullptr;
106 }
107
108 return fTexture.get();
109 }
110
reset()111 void ComputePathAtlas::reset() {
112 fRectanizer.reset();
113
114 this->onReset();
115 }
116
117 #ifdef SK_ENABLE_VELLO_SHADERS
118
119 /**
120 * ComputePathAtlas that uses a VelloRenderer.
121 */
122 class VelloComputePathAtlas final : public ComputePathAtlas {
123 public:
VelloComputePathAtlas(Recorder * recorder)124 explicit VelloComputePathAtlas(Recorder* recorder)
125 : ComputePathAtlas(recorder)
126 , fCachedAtlasMgr(fWidth, fHeight, recorder->priv().caps()) {}
127 // Record the compute dispatches that will draw the atlas contents.
128 bool recordDispatches(Recorder*, ComputeTask::DispatchGroupList*) const override;
129
130 private:
131 const TextureProxy* onAddShape(const Shape&,
132 const Transform& localToDevice,
133 const SkStrokeRec&,
134 skvx::half2 maskOrigin,
135 skvx::half2 maskSize,
136 SkIVector transformedMaskOffset,
137 skvx::half2* outPos) override;
onReset()138 void onReset() override {
139 fCachedAtlasMgr.onReset();
140
141 fUncachedScene.reset();
142 fUncachedOccupiedArea = { 0, 0 };
143 }
144
145 class VelloAtlasMgr : public PathAtlas::DrawAtlasMgr {
146 public:
VelloAtlasMgr(size_t width,size_t height,const Caps * caps)147 VelloAtlasMgr(size_t width, size_t height, const Caps* caps)
148 : PathAtlas::DrawAtlasMgr(width, height, width, height,
149 DrawAtlas::UseStorageTextures::kYes,
150 /*label=*/"VelloPathAtlas", caps) {}
151
152 bool recordDispatches(Recorder* recorder, ComputeTask::DispatchGroupList* dispatches) const;
153
onReset()154 void onReset() {
155 fDrawAtlas->markUsedPlotsAsFull();
156 for (int i = 0; i < PlotLocator::kMaxMultitexturePages; ++i) {
157 fScenes[i].reset();
158 fOccupiedAreas[i] = {0, 0};
159 }
160 }
161
162 protected:
163 bool onAddToAtlas(const Shape&,
164 const Transform& localToDevice,
165 const SkStrokeRec&,
166 SkIRect shapeBounds,
167 SkIVector transformedMaskOffset,
168 const AtlasLocator&) override;
169
170 private:
171 VelloScene fScenes[PlotLocator::kMaxMultitexturePages];
172 SkISize fOccupiedAreas[PlotLocator::kMaxMultitexturePages] = {
173 {0, 0}, {0, 0}, {0, 0}, {0, 0}
174 };
175 };
176
177 VelloAtlasMgr fCachedAtlasMgr;
178
179 // Contains the encoded scene buffer data that serves as the input to a vello compute pass.
180 // For the uncached atlas.
181 VelloScene fUncachedScene;
182
183 // Occupied bounds of the uncached atlas
184 SkISize fUncachedOccupiedArea = { 0, 0 };
185 };
186
get_vello_aa_config(Recorder * recorder)187 static VelloAaConfig get_vello_aa_config(Recorder* recorder) {
188 // Use the analytic area AA mode unless caps say otherwise.
189 VelloAaConfig config = VelloAaConfig::kAnalyticArea;
190 #if defined(GPU_TEST_UTILS)
191 PathRendererStrategy strategy = recorder->priv().caps()->requestedPathRendererStrategy();
192 if (strategy == PathRendererStrategy::kComputeMSAA16) {
193 config = VelloAaConfig::kMSAA16;
194 } else if (strategy == PathRendererStrategy::kComputeMSAA8) {
195 config = VelloAaConfig::kMSAA8;
196 }
197 #endif
198
199 return config;
200 }
201
render_vello_scene(Recorder * recorder,sk_sp<TextureProxy> texture,const VelloScene & scene,SkISize occupiedArea,VelloAaConfig config)202 static std::unique_ptr<DispatchGroup> render_vello_scene(Recorder* recorder,
203 sk_sp<TextureProxy> texture,
204 const VelloScene& scene,
205 SkISize occupiedArea,
206 VelloAaConfig config) {
207 return recorder->priv().rendererProvider()->velloRenderer()->renderScene(
208 {(uint32_t)occupiedArea.width(),
209 (uint32_t)occupiedArea.height(),
210 SkColors::kBlack,
211 config},
212 scene,
213 std::move(texture),
214 recorder);
215 }
216
add_shape_to_scene(const Shape & shape,const Transform & localToDevice,const SkStrokeRec & style,Rect atlasBounds,SkIVector transformedMaskOffset,VelloScene * scene,SkISize * occupiedArea)217 static void add_shape_to_scene(const Shape& shape,
218 const Transform& localToDevice,
219 const SkStrokeRec& style,
220 Rect atlasBounds,
221 SkIVector transformedMaskOffset,
222 VelloScene* scene,
223 SkISize* occupiedArea) {
224 occupiedArea->fWidth = std::max(occupiedArea->fWidth,
225 (int)atlasBounds.right() + PathAtlas::kEntryPadding);
226 occupiedArea->fHeight = std::max(occupiedArea->fHeight,
227 (int)atlasBounds.bot() + PathAtlas::kEntryPadding);
228
229 // TODO(b/283876964): Apply clips here. Initially we'll need to encode the clip stack repeatedly
230 // for each shape since the full vello renderer treats clips and their affected draws as a
231 // single shape hierarchy in the same scene coordinate space. For coverage masks we want each
232 // mask to be transformed to its atlas allocation coordinates and for the clip to be applied
233 // with a translation relative to the atlas slot.
234 //
235 // Repeatedly encoding the clip stack should be relatively cheap (depending on how deep the
236 // clips get) however it is wasteful both in terms of time and memory. If this proves to hurt
237 // performance, future work will explore building an atlas-oriented element processing stage
238 // that applies the atlas-relative translation while evaluating the stack monoid on the GPU.
239
240 // Clip the mask to the bounds of the atlas slot, which are already inset by 1px relative to
241 // the bounds that the Rectanizer assigned.
242 SkPath clipRect = SkPath::Rect(atlasBounds.asSkRect());
243 scene->pushClipLayer(clipRect, Transform::Identity());
244
245 // The atlas transform of the shape is `localToDevice` translated by the top-left offset of the
246 // 'atlasBounds' and the inverse of the base mask transform offset.
247 Transform atlasTransform = localToDevice.postTranslate(
248 atlasBounds.x()-transformedMaskOffset.x(), atlasBounds.y()-transformedMaskOffset.y());
249 SkPath devicePath = shape.asPath();
250
251 // For stroke-and-fill, draw two masks into the same atlas slot: one for the stroke and one for
252 // the fill.
253 SkStrokeRec::Style styleType = style.getStyle();
254 if (styleType == SkStrokeRec::kStroke_Style ||
255 styleType == SkStrokeRec::kHairline_Style ||
256 styleType == SkStrokeRec::kStrokeAndFill_Style) {
257 // We need to special-case hairline strokes and strokes with sub-pixel width as Vello
258 // draws these with aliasing and the results are barely visible. Draw the stroke with a
259 // device-space width of 1 pixel and scale down the alpha by the true width to approximate
260 // the sampled area.
261 float width = style.getWidth();
262 float deviceWidth = width * atlasTransform.maxScaleFactor();
263 if (style.isHairlineStyle() || deviceWidth <= 1.0) {
264 // Both strokes get 1/2 weight scaled by the theoretical area (1 for hairlines,
265 // `deviceWidth` otherwise).
266 SkColor4f color = SkColors::kRed;
267 color.fR *= style.isHairlineStyle() ? 1.0 : deviceWidth;
268
269 // Transform the stroke's width to its local coordinate space since it'll get drawn with
270 // `atlasTransform`.
271 float transformedWidth = 1.0f / atlasTransform.maxScaleFactor();
272 SkStrokeRec adjustedStyle(style);
273 adjustedStyle.setStrokeStyle(transformedWidth);
274 scene->solidStroke(devicePath, color, adjustedStyle, atlasTransform);
275 } else {
276 scene->solidStroke(devicePath, SkColors::kRed, style, atlasTransform);
277 }
278 }
279 if (styleType == SkStrokeRec::kFill_Style || styleType == SkStrokeRec::kStrokeAndFill_Style) {
280 scene->solidFill(devicePath, SkColors::kRed, shape.fillType(), atlasTransform);
281 }
282
283 scene->popClipLayer();
284 }
285
recordDispatches(Recorder * recorder,ComputeTask::DispatchGroupList * dispatches) const286 bool VelloComputePathAtlas::recordDispatches(Recorder* recorder,
287 ComputeTask::DispatchGroupList* dispatches) const {
288 bool addedDispatches = fCachedAtlasMgr.recordDispatches(recorder, dispatches);
289
290 if (this->texture() && !fUncachedOccupiedArea.isEmpty()) {
291 SkASSERT(recorder && recorder == fRecorder);
292
293 VelloAaConfig config = get_vello_aa_config(recorder);
294 std::unique_ptr<DispatchGroup> dispatchGroup =
295 render_vello_scene(recorder,
296 sk_ref_sp(this->texture()),
297 fUncachedScene,
298 fUncachedOccupiedArea,
299 config);
300 if (dispatchGroup) {
301 TRACE_EVENT_INSTANT1("skia.gpu", TRACE_FUNC, TRACE_EVENT_SCOPE_THREAD,
302 "# dispatches", dispatchGroup->dispatches().size());
303 dispatches->emplace_back(std::move(dispatchGroup));
304 return true;
305 } else {
306 SKGPU_LOG_E("VelloComputePathAtlas:: Failed to create dispatch group.");
307 }
308 }
309
310 return addedDispatches;
311 }
312
onAddShape(const Shape & shape,const Transform & localToDevice,const SkStrokeRec & style,skvx::half2 maskOrigin,skvx::half2 maskSize,SkIVector transformedMaskOffset,skvx::half2 * outPos)313 const TextureProxy* VelloComputePathAtlas::onAddShape(
314 const Shape& shape,
315 const Transform& localToDevice,
316 const SkStrokeRec& style,
317 skvx::half2 maskOrigin,
318 skvx::half2 maskSize,
319 SkIVector transformedMaskOffset,
320 skvx::half2* outPos) {
321
322 skgpu::UniqueKey maskKey;
323 bool hasKey = shape.hasKey();
324 if (hasKey) {
325 // Try to locate or add to cached DrawAtlas
326 const TextureProxy* proxy = fCachedAtlasMgr.findOrCreateEntry(fRecorder,
327 shape,
328 localToDevice,
329 style,
330 maskOrigin,
331 maskSize,
332 transformedMaskOffset,
333 outPos);
334 if (proxy) {
335 return proxy;
336 }
337 }
338
339 // Try to add to uncached texture
340 SkIPoint16 iPos;
341 const TextureProxy* texProxy = this->addRect(maskSize, &iPos);
342 if (!texProxy) {
343 return nullptr;
344 }
345 *outPos = skvx::half2(iPos.x(), iPos.y());
346 // If the mask is empty, just return.
347 // TODO: This may not be needed if we can handle clipped out bounds with inverse fills
348 // another way. See PathAtlas::addShape().
349 if (!all(maskSize)) {
350 return texProxy;
351 }
352
353 // TODO: The compute renderer doesn't support perspective yet. We assume that the path has been
354 // appropriately transformed in that case.
355 SkASSERT(localToDevice.type() != Transform::Type::kPerspective);
356
357 // Restrict the render to the occupied area of the atlas, including entry padding so that the
358 // padded row/column is cleared when Vello renders.
359 Rect atlasBounds = Rect::XYWH(skvx::float2(iPos.x(), iPos.y()), skvx::cast<float>(maskSize));
360
361 add_shape_to_scene(shape, localToDevice, style, atlasBounds, transformedMaskOffset,
362 &fUncachedScene, &fUncachedOccupiedArea);
363
364 return texProxy;
365 }
366
367 /////////////////////////////////////////////////////////////////////////////////////////
368
onAddToAtlas(const Shape & shape,const Transform & localToDevice,const SkStrokeRec & style,SkIRect shapeBounds,SkIVector transformedMaskOffset,const AtlasLocator & locator)369 bool VelloComputePathAtlas::VelloAtlasMgr::onAddToAtlas(const Shape& shape,
370 const Transform& localToDevice,
371 const SkStrokeRec& style,
372 SkIRect shapeBounds,
373 SkIVector transformedMaskOffset,
374 const AtlasLocator& locator) {
375 uint32_t index = locator.pageIndex();
376 const TextureProxy* texProxy = fDrawAtlas->getProxies()[index].get();
377 if (!texProxy) {
378 return false;
379 }
380
381 // TODO: The compute renderer doesn't support perspective yet. We assume that the path has been
382 // appropriately transformed in that case.
383 SkASSERT(localToDevice.type() != Transform::Type::kPerspective);
384
385 // Restrict the render to the occupied area of the atlas, including entry padding so that the
386 // padded row/column is cleared when Vello renders.
387 SkIPoint iPos = locator.topLeft();
388 Rect atlasBounds = Rect::XYWH(skvx::float2(iPos.x() + kEntryPadding, iPos.y() + kEntryPadding),
389 skvx::float2(shapeBounds.width(), shapeBounds.height()));
390
391 add_shape_to_scene(shape, localToDevice, style, atlasBounds, transformedMaskOffset,
392 &fScenes[index], &fOccupiedAreas[index]);
393
394 return true;
395 }
396
recordDispatches(Recorder * recorder,ComputeTask::DispatchGroupList * dispatches) const397 bool VelloComputePathAtlas::VelloAtlasMgr::recordDispatches(
398 Recorder* recorder, ComputeTask::DispatchGroupList* dispatches) const {
399 SkASSERT(recorder);
400 VelloAaConfig config = get_vello_aa_config(recorder);
401
402 bool addedDispatches = false;
403 for (int i = 0; i < 4; ++i) {
404 if (!fOccupiedAreas[i].isEmpty()) {
405 std::unique_ptr<DispatchGroup> dispatchGroup =
406 render_vello_scene(recorder,
407 fDrawAtlas->getProxies()[i],
408 fScenes[i],
409 fOccupiedAreas[i],
410 config);
411 if (dispatchGroup) {
412 TRACE_EVENT_INSTANT1("skia.gpu", TRACE_FUNC, TRACE_EVENT_SCOPE_THREAD,
413 "# dispatches", dispatchGroup->dispatches().size());
414 dispatches->emplace_back(std::move(dispatchGroup));
415 addedDispatches = true;
416 } else {
417 SKGPU_LOG_E("VelloComputePathAtlas:: Failed to create dispatch group.");
418 }
419 }
420 }
421 return addedDispatches;
422 }
423
424
425 #endif // SK_ENABLE_VELLO_SHADERS
426
CreateDefault(Recorder * recorder)427 std::unique_ptr<ComputePathAtlas> ComputePathAtlas::CreateDefault(Recorder* recorder) {
428 #ifdef SK_ENABLE_VELLO_SHADERS
429 return std::make_unique<VelloComputePathAtlas>(recorder);
430 #else
431 return nullptr;
432 #endif
433 }
434
435 } // namespace skgpu::graphite
436