• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/gpu/graphite/ComputePathAtlas.h"
9 
10 #include "include/gpu/graphite/Recorder.h"
11 #include "src/core/SkTraceEvent.h"
12 #include "src/gpu/graphite/AtlasProvider.h"
13 #include "src/gpu/graphite/Caps.h"
14 #include "src/gpu/graphite/Log.h"
15 #include "src/gpu/graphite/RasterPathUtils.h"
16 #include "src/gpu/graphite/RecorderPriv.h"
17 #include "src/gpu/graphite/RendererProvider.h"
18 #include "src/gpu/graphite/TextureProxy.h"
19 #include "src/gpu/graphite/TextureUtils.h"
20 #include "src/gpu/graphite/geom/Transform.h"
21 
22 #ifdef SK_ENABLE_VELLO_SHADERS
23 #include "src/gpu/graphite/compute/DispatchGroup.h"
24 #endif
25 
26 namespace skgpu::graphite {
27 namespace {
28 
29 // TODO: This is the maximum target dimension that vello can handle today.
30 constexpr uint16_t kComputeAtlasDim = 4096;
31 
32 // TODO: Currently we reject shapes that are smaller than a subset of a given atlas page to avoid
33 // creating too many flushes in a Recording containing many large path draws. These shapes often
34 // don't make efficient use of the available atlas texture space and the cost of sequential
35 // dispatches to render multiple atlas pages can be prohibitive.
36 constexpr size_t kBboxAreaThreshold = 1024 * 512;
37 
38 // Coordinate size that is too large for vello to handle efficiently. See the discussion on
39 // https://github.com/linebender/vello/pull/542.
40 constexpr float kCoordinateThreshold = 1e10;
41 
42 }  // namespace
43 
ComputePathAtlas(Recorder * recorder)44 ComputePathAtlas::ComputePathAtlas(Recorder* recorder)
45     : PathAtlas(recorder, kComputeAtlasDim, kComputeAtlasDim)
46     , fRectanizer(this->width(), this->height()) {}
47 
initializeTextureIfNeeded()48 bool ComputePathAtlas::initializeTextureIfNeeded() {
49     if (!fTexture) {
50         SkColorType targetCT = ComputeShaderCoverageMaskTargetFormat(fRecorder->priv().caps());
51         fTexture = fRecorder->priv().atlasProvider()->getAtlasTexture(fRecorder,
52                                                                       this->width(),
53                                                                       this->height(),
54                                                                       targetCT,
55                                                                       /*identifier=*/0,
56                                                                       /*requireStorageUsage=*/true);
57     }
58     return fTexture != nullptr;
59 }
60 
isSuitableForAtlasing(const Rect & transformedShapeBounds,const Rect & clipBounds) const61 bool ComputePathAtlas::isSuitableForAtlasing(const Rect& transformedShapeBounds,
62                                              const Rect& clipBounds) const {
63     Rect shapeBounds = transformedShapeBounds.makeRoundOut();
64     Rect maskBounds = shapeBounds.makeIntersect(clipBounds);
65     skvx::float2 maskSize = maskBounds.size();
66     float width = maskSize.x(), height = maskSize.y();
67 
68     if (width > this->width() || height > this->height()) {
69         return false;
70     }
71 
72     // For now we're allowing paths that are smaller than 1/32nd of the full 4096x4096 atlas size
73     // to prevent the atlas texture from filling up too often. There are several approaches we
74     // should explore to alleviate the cost of atlasing large paths.
75     if (width * height > kBboxAreaThreshold) {
76         return false;
77     }
78 
79     // Reject pathological shapes that vello can't handle efficiently yet.
80     skvx::float2 unclippedSize = shapeBounds.size();
81     if (std::fabs(unclippedSize.x()) > kCoordinateThreshold ||
82         std::fabs(unclippedSize.y()) > kCoordinateThreshold) {
83         return false;
84     }
85 
86     return true;
87 }
88 
addRect(skvx::half2 maskSize,SkIPoint16 * outPos)89 const TextureProxy* ComputePathAtlas::addRect(skvx::half2 maskSize,
90                                               SkIPoint16* outPos) {
91     if (!this->initializeTextureIfNeeded()) {
92         SKGPU_LOG_E("Failed to instantiate an atlas texture");
93         return nullptr;
94     }
95 
96     // An empty mask always fits, so just return the texture.
97     // TODO: This may not be needed if we can handle clipped out bounds with inverse fills
98     // another way. See PathAtlas::addShape().
99     if (!all(maskSize)) {
100         *outPos = {0, 0};
101         return fTexture.get();
102     }
103 
104     if (!fRectanizer.addPaddedRect(maskSize.x(), maskSize.y(), kEntryPadding, outPos)) {
105         return nullptr;
106     }
107 
108     return fTexture.get();
109 }
110 
reset()111 void ComputePathAtlas::reset() {
112     fRectanizer.reset();
113 
114     this->onReset();
115 }
116 
117 #ifdef SK_ENABLE_VELLO_SHADERS
118 
119 /**
120  * ComputePathAtlas that uses a VelloRenderer.
121  */
122 class VelloComputePathAtlas final : public ComputePathAtlas {
123 public:
VelloComputePathAtlas(Recorder * recorder)124     explicit VelloComputePathAtlas(Recorder* recorder)
125         : ComputePathAtlas(recorder)
126         , fCachedAtlasMgr(fWidth, fHeight, recorder->priv().caps()) {}
127     // Record the compute dispatches that will draw the atlas contents.
128     bool recordDispatches(Recorder*, ComputeTask::DispatchGroupList*) const override;
129 
130 private:
131     const TextureProxy* onAddShape(const Shape&,
132                                    const Transform& localToDevice,
133                                    const SkStrokeRec&,
134                                    skvx::half2 maskOrigin,
135                                    skvx::half2 maskSize,
136                                    SkIVector transformedMaskOffset,
137                                    skvx::half2* outPos) override;
onReset()138     void onReset() override {
139         fCachedAtlasMgr.onReset();
140 
141         fUncachedScene.reset();
142         fUncachedOccupiedArea = { 0, 0 };
143     }
144 
145     class VelloAtlasMgr : public PathAtlas::DrawAtlasMgr {
146     public:
VelloAtlasMgr(size_t width,size_t height,const Caps * caps)147         VelloAtlasMgr(size_t width, size_t height, const Caps* caps)
148             : PathAtlas::DrawAtlasMgr(width, height, width, height,
149                                       DrawAtlas::UseStorageTextures::kYes,
150                                       /*label=*/"VelloPathAtlas", caps) {}
151 
152         bool recordDispatches(Recorder* recorder, ComputeTask::DispatchGroupList* dispatches) const;
153 
onReset()154         void onReset() {
155             fDrawAtlas->markUsedPlotsAsFull();
156             for (int i = 0; i < PlotLocator::kMaxMultitexturePages; ++i) {
157                 fScenes[i].reset();
158                 fOccupiedAreas[i] = {0, 0};
159             }
160         }
161 
162     protected:
163         bool onAddToAtlas(const Shape&,
164                           const Transform& localToDevice,
165                           const SkStrokeRec&,
166                           SkIRect shapeBounds,
167                           SkIVector transformedMaskOffset,
168                           const AtlasLocator&) override;
169 
170     private:
171         VelloScene fScenes[PlotLocator::kMaxMultitexturePages];
172         SkISize fOccupiedAreas[PlotLocator::kMaxMultitexturePages] = {
173             {0, 0}, {0, 0}, {0, 0}, {0, 0}
174         };
175     };
176 
177     VelloAtlasMgr fCachedAtlasMgr;
178 
179     // Contains the encoded scene buffer data that serves as the input to a vello compute pass.
180     // For the uncached atlas.
181     VelloScene fUncachedScene;
182 
183     // Occupied bounds of the uncached atlas
184     SkISize fUncachedOccupiedArea = { 0, 0 };
185 };
186 
get_vello_aa_config(Recorder * recorder)187 static VelloAaConfig get_vello_aa_config(Recorder* recorder) {
188     // Use the analytic area AA mode unless caps say otherwise.
189     VelloAaConfig config = VelloAaConfig::kAnalyticArea;
190 #if defined(GPU_TEST_UTILS)
191     PathRendererStrategy strategy = recorder->priv().caps()->requestedPathRendererStrategy();
192     if (strategy == PathRendererStrategy::kComputeMSAA16) {
193         config = VelloAaConfig::kMSAA16;
194     } else if (strategy == PathRendererStrategy::kComputeMSAA8) {
195         config = VelloAaConfig::kMSAA8;
196     }
197 #endif
198 
199     return config;
200 }
201 
render_vello_scene(Recorder * recorder,sk_sp<TextureProxy> texture,const VelloScene & scene,SkISize occupiedArea,VelloAaConfig config)202 static std::unique_ptr<DispatchGroup> render_vello_scene(Recorder* recorder,
203                                                          sk_sp<TextureProxy> texture,
204                                                          const VelloScene& scene,
205                                                          SkISize occupiedArea,
206                                                          VelloAaConfig config) {
207     return recorder->priv().rendererProvider()->velloRenderer()->renderScene(
208                 {(uint32_t)occupiedArea.width(),
209                  (uint32_t)occupiedArea.height(),
210                  SkColors::kBlack,
211                  config},
212                 scene,
213                 std::move(texture),
214                 recorder);
215 }
216 
add_shape_to_scene(const Shape & shape,const Transform & localToDevice,const SkStrokeRec & style,Rect atlasBounds,SkIVector transformedMaskOffset,VelloScene * scene,SkISize * occupiedArea)217 static void add_shape_to_scene(const Shape& shape,
218                                const Transform& localToDevice,
219                                const SkStrokeRec& style,
220                                Rect atlasBounds,
221                                SkIVector transformedMaskOffset,
222                                VelloScene* scene,
223                                SkISize* occupiedArea) {
224     occupiedArea->fWidth = std::max(occupiedArea->fWidth,
225                                     (int)atlasBounds.right() + PathAtlas::kEntryPadding);
226     occupiedArea->fHeight = std::max(occupiedArea->fHeight,
227                                      (int)atlasBounds.bot() + PathAtlas::kEntryPadding);
228 
229     // TODO(b/283876964): Apply clips here. Initially we'll need to encode the clip stack repeatedly
230     // for each shape since the full vello renderer treats clips and their affected draws as a
231     // single shape hierarchy in the same scene coordinate space. For coverage masks we want each
232     // mask to be transformed to its atlas allocation coordinates and for the clip to be applied
233     // with a translation relative to the atlas slot.
234     //
235     // Repeatedly encoding the clip stack should be relatively cheap (depending on how deep the
236     // clips get) however it is wasteful both in terms of time and memory. If this proves to hurt
237     // performance, future work will explore building an atlas-oriented element processing stage
238     // that applies the atlas-relative translation while evaluating the stack monoid on the GPU.
239 
240     // Clip the mask to the bounds of the atlas slot, which are already inset by 1px relative to
241     // the bounds that the Rectanizer assigned.
242     SkPath clipRect = SkPath::Rect(atlasBounds.asSkRect());
243     scene->pushClipLayer(clipRect, Transform::Identity());
244 
245     // The atlas transform of the shape is `localToDevice` translated by the top-left offset of the
246     // 'atlasBounds' and the inverse of the base mask transform offset.
247     Transform atlasTransform = localToDevice.postTranslate(
248             atlasBounds.x()-transformedMaskOffset.x(), atlasBounds.y()-transformedMaskOffset.y());
249     SkPath devicePath = shape.asPath();
250 
251     // For stroke-and-fill, draw two masks into the same atlas slot: one for the stroke and one for
252     // the fill.
253     SkStrokeRec::Style styleType = style.getStyle();
254     if (styleType == SkStrokeRec::kStroke_Style ||
255         styleType == SkStrokeRec::kHairline_Style ||
256         styleType == SkStrokeRec::kStrokeAndFill_Style) {
257         // We need to special-case hairline strokes and strokes with sub-pixel width as Vello
258         // draws these with aliasing and the results are barely visible. Draw the stroke with a
259         // device-space width of 1 pixel and scale down the alpha by the true width to approximate
260         // the sampled area.
261         float width = style.getWidth();
262         float deviceWidth = width * atlasTransform.maxScaleFactor();
263         if (style.isHairlineStyle() || deviceWidth <= 1.0) {
264             // Both strokes get 1/2 weight scaled by the theoretical area (1 for hairlines,
265             // `deviceWidth` otherwise).
266             SkColor4f color = SkColors::kRed;
267             color.fR *= style.isHairlineStyle() ? 1.0 : deviceWidth;
268 
269             // Transform the stroke's width to its local coordinate space since it'll get drawn with
270             // `atlasTransform`.
271             float transformedWidth = 1.0f / atlasTransform.maxScaleFactor();
272             SkStrokeRec adjustedStyle(style);
273             adjustedStyle.setStrokeStyle(transformedWidth);
274             scene->solidStroke(devicePath, color, adjustedStyle, atlasTransform);
275         } else {
276             scene->solidStroke(devicePath, SkColors::kRed, style, atlasTransform);
277         }
278     }
279     if (styleType == SkStrokeRec::kFill_Style || styleType == SkStrokeRec::kStrokeAndFill_Style) {
280         scene->solidFill(devicePath, SkColors::kRed, shape.fillType(), atlasTransform);
281     }
282 
283     scene->popClipLayer();
284 }
285 
recordDispatches(Recorder * recorder,ComputeTask::DispatchGroupList * dispatches) const286 bool VelloComputePathAtlas::recordDispatches(Recorder* recorder,
287                                              ComputeTask::DispatchGroupList* dispatches) const {
288     bool addedDispatches = fCachedAtlasMgr.recordDispatches(recorder, dispatches);
289 
290     if (this->texture() && !fUncachedOccupiedArea.isEmpty()) {
291         SkASSERT(recorder && recorder == fRecorder);
292 
293         VelloAaConfig config = get_vello_aa_config(recorder);
294         std::unique_ptr<DispatchGroup> dispatchGroup =
295                 render_vello_scene(recorder,
296                                    sk_ref_sp(this->texture()),
297                                    fUncachedScene,
298                                    fUncachedOccupiedArea,
299                                    config);
300         if (dispatchGroup) {
301             TRACE_EVENT_INSTANT1("skia.gpu", TRACE_FUNC, TRACE_EVENT_SCOPE_THREAD,
302                                  "# dispatches", dispatchGroup->dispatches().size());
303             dispatches->emplace_back(std::move(dispatchGroup));
304             return true;
305         } else {
306             SKGPU_LOG_E("VelloComputePathAtlas:: Failed to create dispatch group.");
307         }
308     }
309 
310     return addedDispatches;
311 }
312 
onAddShape(const Shape & shape,const Transform & localToDevice,const SkStrokeRec & style,skvx::half2 maskOrigin,skvx::half2 maskSize,SkIVector transformedMaskOffset,skvx::half2 * outPos)313 const TextureProxy* VelloComputePathAtlas::onAddShape(
314         const Shape& shape,
315         const Transform& localToDevice,
316         const SkStrokeRec& style,
317         skvx::half2 maskOrigin,
318         skvx::half2 maskSize,
319         SkIVector transformedMaskOffset,
320         skvx::half2* outPos) {
321 
322     skgpu::UniqueKey maskKey;
323     bool hasKey = shape.hasKey();
324     if (hasKey) {
325         // Try to locate or add to cached DrawAtlas
326         const TextureProxy* proxy = fCachedAtlasMgr.findOrCreateEntry(fRecorder,
327                                                                       shape,
328                                                                       localToDevice,
329                                                                       style,
330                                                                       maskOrigin,
331                                                                       maskSize,
332                                                                       transformedMaskOffset,
333                                                                       outPos);
334         if (proxy) {
335             return proxy;
336         }
337     }
338 
339     // Try to add to uncached texture
340     SkIPoint16 iPos;
341     const TextureProxy* texProxy = this->addRect(maskSize, &iPos);
342     if (!texProxy) {
343         return nullptr;
344     }
345     *outPos = skvx::half2(iPos.x(), iPos.y());
346     // If the mask is empty, just return.
347     // TODO: This may not be needed if we can handle clipped out bounds with inverse fills
348     // another way. See PathAtlas::addShape().
349     if (!all(maskSize)) {
350         return texProxy;
351     }
352 
353     // TODO: The compute renderer doesn't support perspective yet. We assume that the path has been
354     // appropriately transformed in that case.
355     SkASSERT(localToDevice.type() != Transform::Type::kPerspective);
356 
357     // Restrict the render to the occupied area of the atlas, including entry padding so that the
358     // padded row/column is cleared when Vello renders.
359     Rect atlasBounds = Rect::XYWH(skvx::float2(iPos.x(), iPos.y()), skvx::cast<float>(maskSize));
360 
361     add_shape_to_scene(shape, localToDevice, style, atlasBounds, transformedMaskOffset,
362                        &fUncachedScene, &fUncachedOccupiedArea);
363 
364     return texProxy;
365 }
366 
367 /////////////////////////////////////////////////////////////////////////////////////////
368 
onAddToAtlas(const Shape & shape,const Transform & localToDevice,const SkStrokeRec & style,SkIRect shapeBounds,SkIVector transformedMaskOffset,const AtlasLocator & locator)369 bool VelloComputePathAtlas::VelloAtlasMgr::onAddToAtlas(const Shape& shape,
370                                                         const Transform& localToDevice,
371                                                         const SkStrokeRec& style,
372                                                         SkIRect shapeBounds,
373                                                         SkIVector transformedMaskOffset,
374                                                         const AtlasLocator& locator) {
375     uint32_t index = locator.pageIndex();
376     const TextureProxy* texProxy = fDrawAtlas->getProxies()[index].get();
377     if (!texProxy) {
378         return false;
379     }
380 
381     // TODO: The compute renderer doesn't support perspective yet. We assume that the path has been
382     // appropriately transformed in that case.
383     SkASSERT(localToDevice.type() != Transform::Type::kPerspective);
384 
385     // Restrict the render to the occupied area of the atlas, including entry padding so that the
386     // padded row/column is cleared when Vello renders.
387     SkIPoint iPos = locator.topLeft();
388     Rect atlasBounds = Rect::XYWH(skvx::float2(iPos.x() + kEntryPadding, iPos.y() + kEntryPadding),
389                                   skvx::float2(shapeBounds.width(), shapeBounds.height()));
390 
391     add_shape_to_scene(shape, localToDevice, style, atlasBounds, transformedMaskOffset,
392                        &fScenes[index], &fOccupiedAreas[index]);
393 
394     return true;
395 }
396 
recordDispatches(Recorder * recorder,ComputeTask::DispatchGroupList * dispatches) const397 bool VelloComputePathAtlas::VelloAtlasMgr::recordDispatches(
398         Recorder* recorder, ComputeTask::DispatchGroupList* dispatches) const {
399     SkASSERT(recorder);
400     VelloAaConfig config = get_vello_aa_config(recorder);
401 
402     bool addedDispatches = false;
403     for (int i = 0; i < 4; ++i) {
404         if (!fOccupiedAreas[i].isEmpty()) {
405             std::unique_ptr<DispatchGroup> dispatchGroup =
406                     render_vello_scene(recorder,
407                                        fDrawAtlas->getProxies()[i],
408                                        fScenes[i],
409                                        fOccupiedAreas[i],
410                                        config);
411             if (dispatchGroup) {
412                 TRACE_EVENT_INSTANT1("skia.gpu", TRACE_FUNC, TRACE_EVENT_SCOPE_THREAD,
413                                      "# dispatches", dispatchGroup->dispatches().size());
414                 dispatches->emplace_back(std::move(dispatchGroup));
415                 addedDispatches = true;
416             } else {
417                 SKGPU_LOG_E("VelloComputePathAtlas:: Failed to create dispatch group.");
418             }
419         }
420     }
421     return addedDispatches;
422 }
423 
424 
425 #endif  // SK_ENABLE_VELLO_SHADERS
426 
CreateDefault(Recorder * recorder)427 std::unique_ptr<ComputePathAtlas> ComputePathAtlas::CreateDefault(Recorder* recorder) {
428 #ifdef SK_ENABLE_VELLO_SHADERS
429     return std::make_unique<VelloComputePathAtlas>(recorder);
430 #else
431     return nullptr;
432 #endif
433 }
434 
435 }  // namespace skgpu::graphite
436