• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Google LLC.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef skgpu_tessellate_PatchWriter_DEFINED
9 #define skgpu_tessellate_PatchWriter_DEFINED
10 
11 #include "include/core/SkScalar.h"
12 #include "include/core/SkTypes.h"
13 #include "include/private/base/SkDebug.h"
14 #include "include/private/base/SkFloatingPoint.h"
15 #include "include/private/base/SkPoint_impl.h"
16 #include "include/private/base/SkTemplates.h"
17 #include "src/base/SkUtils.h"
18 #include "src/base/SkVx.h"
19 #include "src/core/SkColorData.h"
20 #include "src/gpu/BufferWriter.h"
21 #include "src/gpu/tessellate/LinearTolerances.h"
22 #include "src/gpu/tessellate/MiddleOutPolygonTriangulator.h"
23 #include "src/gpu/tessellate/Tessellation.h"
24 #include "src/gpu/tessellate/WangsFormula.h"
25 
26 #include <algorithm>
27 #include <cstdint>
28 #include <cstring>
29 #include <math.h>
30 #include <tuple>
31 #include <type_traits>
32 #include <utility>
33 #include <variant>
34 
35 namespace skgpu::tess {
36 
37 /**
38  * PatchWriter writes out tessellation patches, formatted with their specific attribs, to a GPU
39  * buffer.
40  *
41  * PatchWriter is a template class that takes traits to configure both its compile-time and runtime
42  * behavior for the different tessellation rendering algorithms and GPU backends. The complexity of
43  * this system is worthwhile because the attribute writing operations and math already require
44  * heavy inlining for performance, and the algorithmic variations tend to only differ slightly, but
45  * do so in the inner most loops. Additionally, Graphite and Ganesh use the same fundamental
46  * algorithms, but Graphite's architecture and higher required hardware level mean that its
47  * attribute configurations can be determined entirely at compile time.
48  *
49  * Traits are specified in PatchWriter's single var-args template pack. Traits come in two main
50  * categories: PatchAttribs configuration and feature/processing configuration. A given PatchAttrib
51  * can be always enabled, enabled at runtime, or always disabled. A feature can be either enabled
52  * or disabled and are coupled more closely with the control points of the curve. Across the two
53  * GPU backends and different path rendering strategies, a "patch" has the following structure:
54  *
55  *   - 4 control points (8 floats total) defining the curve's geometry
56  *      - quadratic curves are converted to equivalent cubics on the CPU during writing
57  *      - conic curves store {w, inf} in their last control point
58  *      - triangles store {inf, inf} in their last control point
59  *      - everything else is presumed to be a cubic defined by all 4 control points
60  *   - Enabled PatchAttrib values, constant for the entire instance
61  *      - layout is identical to PatchAttrib's definition, skipping disabled attribs
62  *      - attribs can be enabled/disabled at runtime by building a mask of attrib values
63  *
64  * Currently PatchWriter supports the following traits:
65  *   - Required<PatchAttrib>
66  *   - Optional<PatchAttrib>
67  *   - TrackJoinControlPoints
68  *   - AddTrianglesWhenChopping
69  *   - DiscardFlatCurves
70  *
71  * In addition to variable traits, PatchWriter's first template argument defines the type used for
72  * allocating the GPU instance data. The templated "PatchAllocator" can be any type that provides:
73  *    // A GPU-backed vertex writer for a single instance worth of data. The provided
74  *    // LinearTolerances value represents the tolerances for the curve that will be written to the
75  *    // returned vertex space.
76  *    skgpu::VertexWriter append(const LinearTolerances&);
77  *
78  * Additionally, it must have a constructor that takes the stride as its first argument.
79  * PatchWriter forwards any additional constructor args from its ctor to the allocator after
80  * computing the necessary stride for its PatchAttribs configuration.
81  */
82 
83 // *** TRAITS ***
84 
85 // Marks a PatchAttrib is enabled at compile time, i.e. it must always be set and will always be
86 // written to each patch's instance data. If present, will assert if the runtime attribs do not fit.
87 template <PatchAttribs A> struct Required {};
88 // Marks a PatchAttrib as supported, i.e. it can be enabled or disabled at runtime. Optional<A> is
89 // overridden by Required<A>. If neither Required<A> nor Optional<A> are in a PatchWriter's trait
90 // list, then the attrib is disabled at compile time and it will assert if the runtime attribs
91 // attempt to enable it.
92 template <PatchAttribs A> struct Optional {};
93 
94 // Enables tracking of the kJoinControlPointAttrib based on control points of the previously
95 // written patch (automatically taking into account curve chopping). When a patch is first written
96 // (and there is no prior patch to define the join control point), the PatchWriter automatically
97 // records the patch to a temporary buffer--sans join--until writeDeferredStrokePatch() is called,
98 // filling in the now-defined join control point.
99 //
100 // This feature must be paired with Required<PatchAttribs::kJoinControlPoint>
101 struct TrackJoinControlPoints {};
102 
103 // Write additional triangular patches to fill the resulting empty area when a curve is chopped.
104 // Normally, the patch geometry covers the curve defined by its control points, up to the implicitly
105 // closing edge between its first and last control points. When a curve is chopped to fit within
106 // the maximum segment count, the resulting space between the original closing edge and new closing
107 // edges is not filled, unless some mechanism of the shader makes it so (e.g. a fan point or
108 // stroking).
109 //
110 // This feature enables automatically writing triangular patches to fill this empty space when a
111 // curve is chopped.
112 struct AddTrianglesWhenChopping {};
113 
114 // If a curve requires at most 1 segment to render accurately, it's effectively a straight line.
115 // This feature turns on automatically ignoring those curves, with the assumption that some other
116 // render pass will produce equivalent geometry (e.g. middle-out or inner triangulations).
117 struct DiscardFlatCurves {};
118 
119 // Upload lines as a cubic with {a, a, b, b} for control points, instead of the truly linear cubic
120 // of {a, 2/3a + 1/3b, 1/3a + 2/3b, b}. Wang's formula will not return an tight lower bound on the
121 // number of segments in this case, but it's convenient to detect in the vertex shader and assume
122 // only a single segment is required. This bypasses numerical stability issues in Wang's formula
123 // when evaluated on the ideal linear cubic for very large control point coordinates. Other curve
124 // types with large coordinates do not need this treatment since they would be pre-chopped and
125 // culled to lines.
126 struct ReplicateLineEndPoints {};
127 
128 // *** PatchWriter internals ***
129 
130 // AttribValue exposes a consistent store and write interface for a PatchAttrib's value while
131 // abstracting over compile-time enabled, conditionally-enabled, or compile-time disabled attribs.
132 template <PatchAttribs A, typename T, bool Required, bool Optional>
133 struct AttribValue {
134     using DataType = std::conditional_t<Required, T,
135                      std::conditional_t<Optional, std::pair<T, bool>,
136                                        /* else */ std::monostate>>;
137 
138     static constexpr bool kEnabled = Required || Optional;
139 
AttribValueAttribValue140     explicit AttribValue(PatchAttribs attribs) : AttribValue(attribs, {}) {}
AttribValueAttribValue141     AttribValue(PatchAttribs attribs, const T& t) {
142         (void) attribs; // may be unused on release builds
143         if constexpr (Required) {
144             SkASSERT(attribs & A);
145         } else if constexpr (Optional) {
146             std::get<1>(fV) = attribs & A;
147         } else {
148             SkASSERT(!(attribs & A));
149         }
150         *this = t;
151     }
152 
153     AttribValue& operator=(const T& v) {
154         if constexpr (Required) {
155             fV = v;
156         } else if constexpr (Optional) {
157             // for simplicity, store even if disabled and won't be written out to VertexWriter
158             std::get<0>(fV) = v;
159         } // else ignore for disabled values
160         return *this;
161     }
162 
163     DataType fV;
164 };
165 
166 template <PatchAttribs A, typename T, bool Required, bool Optional>
167 VertexWriter& operator<<(VertexWriter& w, const AttribValue<A, T, Required, Optional>& v) {
168     if constexpr (Required) {
169         w << v.fV; // always write
170     } else if constexpr (Optional) {
171         if (std::get<1>(v.fV)) {
172             w << std::get<0>(v.fV); // write if enabled
173         }
174     } // else never write
175     return w;
176 }
177 
178 // Stores state and deferred patch data when TrackJoinControlPoints is used for a PatchWriter.
179 template <size_t Stride>
180 struct PatchStorage {
181     float fN_p4    = -1.f; // The parametric segment value to restore on LinearTolerances
182     bool  fMustDefer = true;  // True means next patch must be deferred
183 
184     // Holds an entire patch, except with an undefined join control point.
185     char fData[Stride];
186 
hasPendingPatchStorage187     bool hasPending() const {
188         return fN_p4 >= 0.f;
189     }
resetPatchStorage190     void reset() {
191         fN_p4 = -1.f;
192         fMustDefer = true;
193     }
194 };
195 
196 // An empty object that has the same constructor signature as MiddleOutPolygonTriangulator, used
197 // as a stand-in when AddTrianglesWhenChopping is not a defined trait.
198 struct NullTriangulator {
NullTriangulatorNullTriangulator199     NullTriangulator(int, SkPoint) {}
200 };
201 
202 #define AI SK_ALWAYS_INLINE
203 #define ENABLE_IF(cond) template <typename Void=void> std::enable_if_t<cond, Void>
204 
205 // *** PatchWriter ***
206 template <typename PatchAllocator, typename... Traits>
207 class PatchWriter {
208     // Helpers to extract specifics from the template traits pack.
209     template <typename F>     struct has_trait  : std::disjunction<std::is_same<F, Traits>...> {};
210     template <PatchAttribs A> using  req_attrib = has_trait<Required<A>>;
211     template <PatchAttribs A> using  opt_attrib = has_trait<Optional<A>>;
212 
213     // Enabled features and attribute configuration
214     static constexpr bool kTrackJoinControlPoints   = has_trait<TrackJoinControlPoints>::value;
215     static constexpr bool kAddTrianglesWhenChopping = has_trait<AddTrianglesWhenChopping>::value;
216     static constexpr bool kDiscardFlatCurves        = has_trait<DiscardFlatCurves>::value;
217     static constexpr bool kReplicateLineEndPoints   = has_trait<ReplicateLineEndPoints>::value;
218 
219     // NOTE: MSVC 19.24 cannot compile constexpr fold expressions referenced in templates, so
220     // extract everything into constexpr bool's instead of using `req_attrib` directly, etc. :(
221     template <PatchAttribs A, typename T, bool Req/*=req_attrib<A>*/, bool Opt/*=opt_attrib<A>*/>
222     using attrib_t = AttribValue<A, T, Req, Opt>;
223 
224     // TODO: Remove when MSVC compiler is fixed, in favor of `using Name = attrib_t<>` directly.
225 #define DEF_ATTRIB_TYPE(name, A, T) \
226     static constexpr bool kRequire##name = req_attrib<A>::value; \
227     static constexpr bool kOptional##name = opt_attrib<A>::value; \
228     using name = attrib_t<A, T, kRequire##name, kOptional##name>
229 
230     DEF_ATTRIB_TYPE(JoinAttrib,      PatchAttribs::kJoinControlPoint,  SkPoint);
231     DEF_ATTRIB_TYPE(FanPointAttrib,  PatchAttribs::kFanPoint,          SkPoint);
232     DEF_ATTRIB_TYPE(StrokeAttrib,    PatchAttribs::kStrokeParams,      StrokeParams);
233 
234     // kWideColorIfEnabled does not define an attribute, but changes the type of the kColor attrib.
235     static constexpr bool kRequireWideColor  = req_attrib<PatchAttribs::kWideColorIfEnabled>::value;
236     static constexpr bool kOptionalWideColor = opt_attrib<PatchAttribs::kWideColorIfEnabled>::value;
237     using Color = std::conditional_t<kRequireWideColor,  SkPMColor4f,
238                   std::conditional_t<kOptionalWideColor, VertexColor,
239                                               /* else */ uint32_t>>;
240 
241     DEF_ATTRIB_TYPE(ColorAttrib,     PatchAttribs::kColor,             Color);
242     DEF_ATTRIB_TYPE(DepthAttrib,     PatchAttribs::kPaintDepth,        float);
243     DEF_ATTRIB_TYPE(CurveTypeAttrib, PatchAttribs::kExplicitCurveType, float);
244     DEF_ATTRIB_TYPE(SsboIndexAttrib, PatchAttribs::kSsboIndex,         skvx::uint2);
245 #undef DEF_ATTRIB_TYPE
246 
247     static constexpr size_t kMaxStride = 4 * sizeof(SkPoint) + // control points
248             (JoinAttrib::kEnabled      ? sizeof(SkPoint)                              : 0) +
249             (FanPointAttrib::kEnabled  ? sizeof(SkPoint)                              : 0) +
250             (StrokeAttrib::kEnabled    ? sizeof(StrokeParams)                         : 0) +
251             (ColorAttrib::kEnabled     ? std::min(sizeof(Color), sizeof(SkPMColor4f)) : 0) +
252             (DepthAttrib::kEnabled     ? sizeof(float)                                : 0) +
253             (CurveTypeAttrib::kEnabled ? sizeof(float)                                : 0) +
254             (SsboIndexAttrib::kEnabled ? 2 * sizeof(uint32_t)                         : 0);
255 
256     // Types that vary depending on the activated features, but do not define the patch data.
257     using DeferredPatch = std::conditional_t<kTrackJoinControlPoints,
258             PatchStorage<kMaxStride>, std::monostate>;
259     using InnerTriangulator = std::conditional_t<kAddTrianglesWhenChopping,
260             MiddleOutPolygonTriangulator, NullTriangulator>;
261 
262     using float2 = skvx::float2;
263     using float4 = skvx::float4;
264 
265     static_assert(!kTrackJoinControlPoints || req_attrib<PatchAttribs::kJoinControlPoint>::value,
266                   "Deferred patches and auto-updating joins requires kJoinControlPoint attrib");
267 public:
268     template <typename... Args> // forwarded to PatchAllocator
PatchWriter(PatchAttribs attribs,Args &&...allocArgs)269     PatchWriter(PatchAttribs attribs,
270                 Args&&... allocArgs)
271             : fAttribs(attribs)
272             , fPatchAllocator(PatchStride(attribs), std::forward<Args>(allocArgs)...)
273             , fJoin(attribs)
274             , fFanPoint(attribs)
275             , fStrokeParams(attribs)
276             , fColor(attribs)
277             , fDepth(attribs)
278             , fSsboIndex(attribs) {
279         // Explicit curve types are provided on the writePatch signature, and not a field of
280         // PatchWriter, so initialize one in the ctor to validate the provided runtime attribs.
281         SkDEBUGCODE((void) CurveTypeAttrib(attribs);)
282         // Validate the kWideColorIfEnabled attribute variant flag as well
283         if constexpr (req_attrib<PatchAttribs::kWideColorIfEnabled>::value) {
284             SkASSERT(attribs & PatchAttribs::kWideColorIfEnabled);    // required
285         } else if constexpr (!opt_attrib<PatchAttribs::kWideColorIfEnabled>::value) {
286             SkASSERT(!(attribs & PatchAttribs::kWideColorIfEnabled)); // disabled
287         }
288     }
289 
~PatchWriter()290     ~PatchWriter() {
291         if constexpr (kTrackJoinControlPoints) {
292             // flush any pending patch
293             this->writeDeferredStrokePatch();
294         }
295     }
296 
attribs()297     PatchAttribs attribs() const { return fAttribs; }
298 
299     // The max scale factor should be derived from the same matrix that 'xform' was. It's only used
300     // in stroking calculations, so can be ignored for path filling.
301     void setShaderTransform(const wangs_formula::VectorXform& xform,
302                             float maxScale = 1.f) {
303         fApproxTransform = xform;
304         fMaxScale = maxScale;
305     }
306 
307     // Completes a closed contour of a stroke by rewriting a deferred patch with now-available
308     // join control point information. Automatically resets the join control point attribute.
writeDeferredStrokePatch()309     ENABLE_IF(kTrackJoinControlPoints) writeDeferredStrokePatch() {
310         if (fDeferredPatch.hasPending()) {
311             SkASSERT(!fDeferredPatch.fMustDefer);
312             // Overwrite join control point with updated value, which is the first attribute
313             // after the 4 control points.
314             memcpy(SkTAddOffset<void>(fDeferredPatch.fData, 4 * sizeof(SkPoint)),
315                    &fJoin, sizeof(SkPoint));
316             // Assuming that the stroke parameters aren't changing within a contour, we only have
317             // to set the parametric segments in order to recover the LinearTolerances state at the
318             // time the deferred patch was recorded.
319             fTolerances.setParametricSegments(fDeferredPatch.fN_p4);
320             if (VertexWriter vw = fPatchAllocator.append(fTolerances)) {
321                 vw << VertexWriter::Array<char>(fDeferredPatch.fData, PatchStride(fAttribs));
322             }
323         }
324 
325         fDeferredPatch.reset();
326     }
327 
328     // Updates the stroke's join control point that will be written out with each patch. This is
329     // automatically adjusted when appending various geometries (e.g. Conic/Cubic), but sometimes
330     // must be set explicitly.
updateJoinControlPointAttrib(SkPoint lastControlPoint)331     ENABLE_IF(JoinAttrib::kEnabled) updateJoinControlPointAttrib(SkPoint lastControlPoint) {
332         SkASSERT(fAttribs & PatchAttribs::kJoinControlPoint); // must be runtime enabled as well
333         fJoin = lastControlPoint;
334         if constexpr (kTrackJoinControlPoints) {
335             fDeferredPatch.fMustDefer = false;
336         }
337     }
338 
339     // Updates the fan point that will be written out with each patch (i.e., the point that wedges
340     // fan around).
updateFanPointAttrib(SkPoint fanPoint)341     ENABLE_IF(FanPointAttrib::kEnabled) updateFanPointAttrib(SkPoint fanPoint) {
342         SkASSERT(fAttribs & PatchAttribs::kFanPoint);
343         fFanPoint = fanPoint;
344     }
345 
346     // Updates the stroke params that are written out with each patch.
updateStrokeParamsAttrib(StrokeParams strokeParams)347     ENABLE_IF(StrokeAttrib::kEnabled) updateStrokeParamsAttrib(StrokeParams strokeParams) {
348         SkASSERT(fAttribs & PatchAttribs::kStrokeParams);
349         fStrokeParams = strokeParams;
350         fTolerances.setStroke(strokeParams, fMaxScale);
351     }
352     // Updates tolerances to account for stroke params that are stored as uniforms instead of
353     // dynamic instance attributes.
updateUniformStrokeParams(StrokeParams strokeParams)354     ENABLE_IF(StrokeAttrib::kEnabled) updateUniformStrokeParams(StrokeParams strokeParams) {
355         SkASSERT(!(fAttribs & PatchAttribs::kStrokeParams));
356         fTolerances.setStroke(strokeParams, fMaxScale);
357     }
358 
359     // Updates the color that will be written out with each patch.
updateColorAttrib(const SkPMColor4f & color)360     ENABLE_IF(ColorAttrib::kEnabled) updateColorAttrib(const SkPMColor4f& color) {
361         SkASSERT(fAttribs & PatchAttribs::kColor);
362         // Converts SkPMColor4f to the selected 'Color' attrib type. The always-wide and never-wide
363         // branches match what VertexColor does based on the runtime check.
364         if constexpr (req_attrib<PatchAttribs::kWideColorIfEnabled>::value) {
365             fColor = color;
366         } else if constexpr (opt_attrib<PatchAttribs::kWideColorIfEnabled>::value) {
367             fColor = VertexColor(color, fAttribs & PatchAttribs::kWideColorIfEnabled);
368         } else {
369             fColor = color.toBytes_RGBA();
370         }
371     }
372 
373     // Updates the paint depth written out with each patch.
updatePaintDepthAttrib(float depth)374     ENABLE_IF(DepthAttrib::kEnabled) updatePaintDepthAttrib(float depth) {
375         SkASSERT(fAttribs & PatchAttribs::kPaintDepth);
376         fDepth = depth;
377     }
378 
379     // Updates the storage buffer index used to access uniforms.
380     ENABLE_IF(SsboIndexAttrib::kEnabled)
updateSsboIndexAttrib(skvx::uint2 ssboIndex)381     updateSsboIndexAttrib(skvx::uint2 ssboIndex) {
382         SkASSERT(fAttribs & PatchAttribs::kSsboIndex);
383         fSsboIndex = ssboIndex;
384     }
385 
386     /**
387      * writeX functions for supported patch geometry types. Every geometric type is converted to an
388      * equivalent cubic or conic, so this will always write at minimum 8 floats for the four control
389      * points (cubic) or three control points and {w, inf} (conics). The PatchWriter additionally
390      * writes the current values of all attributes enabled in its PatchAttribs flags.
391      */
392 
393     // Write a cubic curve with its four control points.
writeCubic(float2 p0,float2 p1,float2 p2,float2 p3)394     AI void writeCubic(float2 p0, float2 p1, float2 p2, float2 p3) {
395         float n4 = wangs_formula::cubic_p4(kPrecision, p0, p1, p2, p3, fApproxTransform);
396         if constexpr (kDiscardFlatCurves) {
397             if (n4 <= 1.f) {
398                 // This cubic only needs one segment (e.g. a line) but we're not filling space with
399                 // fans or stroking, so nothing actually needs to be drawn.
400                 return;
401             }
402         }
403         if (int numPatches = this->accountForCurve(n4)) {
404             this->chopAndWriteCubics(p0, p1, p2, p3, numPatches);
405         } else {
406             this->writeCubicPatch(p0, p1, p2, p3);
407         }
408     }
writeCubic(const SkPoint pts[4])409     AI void writeCubic(const SkPoint pts[4]) {
410         float4 p0p1 = float4::Load(pts);
411         float4 p2p3 = float4::Load(pts + 2);
412         this->writeCubic(p0p1.lo, p0p1.hi, p2p3.lo, p2p3.hi);
413     }
414 
415     // Write a conic curve with three control points and 'w', with the last coord of the last
416     // control point signaling a conic by being set to infinity.
writeConic(float2 p0,float2 p1,float2 p2,float w)417     AI void writeConic(float2 p0, float2 p1, float2 p2, float w) {
418         float n2 = wangs_formula::conic_p2(kPrecision, p0, p1, p2, w, fApproxTransform);
419         if constexpr (kDiscardFlatCurves) {
420             if (n2 <= 1.f) {
421                 // This conic only needs one segment (e.g. a line) but we're not filling space with
422                 // fans or stroking, so nothing actually needs to be drawn.
423                 return;
424             }
425         }
426         if (int numPatches = this->accountForCurve(n2 * n2)) {
427             this->chopAndWriteConics(p0, p1, p2, w, numPatches);
428         } else {
429             this->writeConicPatch(p0, p1, p2, w);
430         }
431     }
writeConic(const SkPoint pts[3],float w)432     AI void writeConic(const SkPoint pts[3], float w) {
433         this->writeConic(sk_bit_cast<float2>(pts[0]),
434                          sk_bit_cast<float2>(pts[1]),
435                          sk_bit_cast<float2>(pts[2]),
436                          w);
437     }
438 
439     // Write a quadratic curve that automatically converts its three control points into an
440     // equivalent cubic.
writeQuadratic(float2 p0,float2 p1,float2 p2)441     AI void writeQuadratic(float2 p0, float2 p1, float2 p2) {
442         float n4 = wangs_formula::quadratic_p4(kPrecision, p0, p1, p2, fApproxTransform);
443         if constexpr (kDiscardFlatCurves) {
444             if (n4 <= 1.f) {
445                 // This quad only needs one segment (e.g. a line) but we're not filling space with
446                 // fans or stroking, so nothing actually needs to be drawn.
447                 return;
448             }
449         }
450         if (int numPatches = this->accountForCurve(n4)) {
451             this->chopAndWriteQuads(p0, p1, p2, numPatches);
452         } else {
453             this->writeQuadPatch(p0, p1, p2);
454         }
455     }
writeQuadratic(const SkPoint pts[3])456     AI void writeQuadratic(const SkPoint pts[3]) {
457         this->writeQuadratic(sk_bit_cast<float2>(pts[0]),
458                              sk_bit_cast<float2>(pts[1]),
459                              sk_bit_cast<float2>(pts[2]));
460     }
461 
462     // Write a line that is automatically converted into an equivalent cubic.
writeLine(float4 p0p1)463     AI void writeLine(float4 p0p1) {
464         // No chopping needed, a line only ever requires one segment (the minimum required already).
465         fTolerances.setParametricSegments(1.f);
466         if constexpr (kReplicateLineEndPoints) {
467             // Visually this cubic is still a line, but 't' does not move linearly over the line,
468             // so Wang's formula is more pessimistic. Shaders should avoid evaluating Wang's
469             // formula when a patch has control points in this arrangement.
470             this->writeCubicPatch(p0p1.lo, p0p1.lo, p0p1.hi, p0p1.hi);
471         } else {
472             // In exact math, this cubic structure should have Wang's formula return 0. Due to
473             // floating point math, this isn't always the case, so shaders need some way to restrict
474             // the number of parametric segments if Wang's formula numerically blows up.
475             this->writeCubicPatch(p0p1.lo, (p0p1.zwxy() - p0p1) * (1/3.f) + p0p1, p0p1.hi);
476         }
477     }
writeLine(float2 p0,float2 p1)478     AI void writeLine(float2 p0, float2 p1) { this->writeLine({p0, p1}); }
writeLine(SkPoint p0,SkPoint p1)479     AI void writeLine(SkPoint p0, SkPoint p1) {
480         this->writeLine(sk_bit_cast<float2>(p0), sk_bit_cast<float2>(p1));
481     }
482 
483     // Write a triangle by setting it to a conic with w=Inf, and using a distinct
484     // explicit curve type for when inf isn't supported in shaders.
writeTriangle(float2 p0,float2 p1,float2 p2)485     AI void writeTriangle(float2 p0, float2 p1, float2 p2) {
486         // No chopping needed, the max supported segment count should always support 2 lines
487         // (which form a triangle when implicitly closed).
488         static constexpr float kTriangleSegments_p4 = 2.f * 2.f * 2.f * 2.f;
489         fTolerances.setParametricSegments(kTriangleSegments_p4);
490         this->writePatch(p0, p1, p2, {SK_FloatInfinity, SK_FloatInfinity},
491                          kTriangularConicCurveType);
492     }
writeTriangle(SkPoint p0,SkPoint p1,SkPoint p2)493     AI void writeTriangle(SkPoint p0, SkPoint p1, SkPoint p2) {
494         this->writeTriangle(sk_bit_cast<float2>(p0),
495                             sk_bit_cast<float2>(p1),
496                             sk_bit_cast<float2>(p2));
497     }
498 
499     // Writes a circle used for round caps and joins in stroking, encoded as a cubic with
500     // identical control points and an empty join.
writeCircle(SkPoint p)501     AI void writeCircle(SkPoint p) {
502         // This does not use writePatch() because it uses its own location as the join attribute
503         // value instead of fJoin and never defers.
504         fTolerances.setParametricSegments(0.f);
505         if (VertexWriter vw = fPatchAllocator.append(fTolerances)) {
506             vw << VertexWriter::Repeat<4>(p); // p0,p1,p2,p3 = p -> 4 copies
507             this->emitPatchAttribs(std::move(vw), {fAttribs, p}, kCubicCurveType);
508         }
509     }
510 
511 private:
emitPatchAttribs(VertexWriter vertexWriter,const JoinAttrib & join,float explicitCurveType)512     AI void emitPatchAttribs(VertexWriter vertexWriter,
513                              const JoinAttrib& join,
514                              float explicitCurveType) {
515         // NOTE: operator<< overrides automatically handle optional and disabled attribs.
516         vertexWriter << join << fFanPoint << fStrokeParams << fColor << fDepth
517                      << CurveTypeAttrib{fAttribs, explicitCurveType} << fSsboIndex;
518     }
519 
appendPatch()520     AI VertexWriter appendPatch() {
521         if constexpr (kTrackJoinControlPoints) {
522             if (fDeferredPatch.fMustDefer) {
523                 SkASSERT(!fDeferredPatch.hasPending());
524                 SkASSERT(PatchStride(fAttribs) <= kMaxStride);
525                 // Save the computed parametric segment tolerance value so that we can pass that to
526                 // the PatchAllocator when flushing the deferred patch.
527                 fDeferredPatch.fN_p4 = fTolerances.numParametricSegments_p4();
528                 return {fDeferredPatch.fData, PatchStride(fAttribs)};
529             }
530         }
531         return fPatchAllocator.append(fTolerances);
532     }
533 
writePatch(float2 p0,float2 p1,float2 p2,float2 p3,float explicitCurveType)534     AI void writePatch(float2 p0, float2 p1, float2 p2, float2 p3, float explicitCurveType) {
535         if (VertexWriter vw = this->appendPatch()) {
536             // NOTE: fJoin will be undefined if we're writing to a deferred patch. If that's the
537             // case, correct data will overwrite it when the contour is closed (this is fine since a
538             // deferred patch writes to CPU memory instead of directly to the GPU buffer).
539             vw << p0 << p1 << p2 << p3;
540             this->emitPatchAttribs(std::move(vw), fJoin, explicitCurveType);
541 
542             // Automatically update join control point for next patch.
543             if constexpr (kTrackJoinControlPoints) {
544                 if (explicitCurveType == kCubicCurveType && any(p3 != p2)) {
545                     // p2 is control point defining the tangent vector into the next patch.
546                     p2.store(&fJoin);
547                 } else if (any(p2 != p1)) {
548                     // p1 is the control point defining the tangent vector.
549                     p1.store(&fJoin);
550                 } else {
551                     // p0 is the control point defining the tangent vector.
552                     p0.store(&fJoin);
553                 }
554                 fDeferredPatch.fMustDefer = false;
555             }
556         }
557     }
558 
559     // Helpers that normalize curves to a generic patch, but do no other work.
writeCubicPatch(float2 p0,float2 p1,float2 p2,float2 p3)560     AI void writeCubicPatch(float2 p0, float2 p1, float2 p2, float2 p3) {
561         this->writePatch(p0, p1, p2, p3, kCubicCurveType);
562     }
writeCubicPatch(float2 p0,float4 p1p2,float2 p3)563     AI void writeCubicPatch(float2 p0, float4 p1p2, float2 p3) {
564         this->writeCubicPatch(p0, p1p2.lo, p1p2.hi, p3);
565     }
writeQuadPatch(float2 p0,float2 p1,float2 p2)566     AI void writeQuadPatch(float2 p0, float2 p1, float2 p2) {
567         this->writeCubicPatch(p0, mix(float4(p0, p2), p1.xyxy(), 2/3.f), p2);
568     }
writeConicPatch(float2 p0,float2 p1,float2 p2,float w)569     AI void writeConicPatch(float2 p0, float2 p1, float2 p2, float w) {
570         this->writePatch(p0, p1, p2, {w, SK_FloatInfinity}, kConicCurveType);
571     }
572 
accountForCurve(float n4)573     int accountForCurve(float n4) {
574         if (n4 <= kMaxParametricSegments_p4) {
575             // Record n^4 and return 0 to signal no chopping
576             fTolerances.setParametricSegments(n4);
577             return 0;
578         } else {
579             // Clamp to max allowed segmentation for a patch and return required number of chops
580             // to achieve visual correctness.
581             fTolerances.setParametricSegments(kMaxParametricSegments_p4);
582             return SkScalarCeilToInt(wangs_formula::root4(std::min(n4, kMaxSegmentsPerCurve_p4) /
583                                                           kMaxParametricSegments_p4));
584         }
585     }
586 
587     // This does not return b when t==1, but it otherwise seems to get better precision than
588     // "a*(1 - t) + b*t" for things like chopping cubics on exact cusp points.
589     // The responsibility falls on the caller to check that t != 1 before calling.
mix(float4 a,float4 b,float4 T)590     static AI float4 mix(float4 a, float4 b, float4 T) {
591         SkASSERT(all((0 <= T) & (T < 1)));
592         return (b - a)*T + a;
593     }
594 
595     // Helpers that chop the curve type into 'numPatches' parametrically uniform curves. It is
596     // assumed that 'numPatches' is calculated such that the resulting curves require the maximum
597     // number of segments to draw appropriately (since the original presumably needed even more).
chopAndWriteQuads(float2 p0,float2 p1,float2 p2,int numPatches)598     void chopAndWriteQuads(float2 p0, float2 p1, float2 p2, int numPatches) {
599         InnerTriangulator triangulator(numPatches, sk_bit_cast<SkPoint>(p0));
600         for (; numPatches >= 3; numPatches -= 2) {
601             // Chop into 3 quads.
602             float4 T = float4(1,1,2,2) / numPatches;
603             float4 ab = mix(p0.xyxy(), p1.xyxy(), T);
604             float4 bc = mix(p1.xyxy(), p2.xyxy(), T);
605             float4 abc = mix(ab, bc, T);
606             // p1 & p2 of the cubic representation of the middle quad.
607             float4 middle = mix(ab, bc, mix(T, T.zwxy(), 2/3.f));
608 
609             this->writeQuadPatch(p0, ab.lo, abc.lo);  // Write the 1st quad.
610             if constexpr (kAddTrianglesWhenChopping) {
611                 this->writeTriangle(p0, abc.lo, abc.hi);
612             }
613             this->writeCubicPatch(abc.lo, middle, abc.hi);  // Write the 2nd quad (already a cubic)
614             if constexpr (kAddTrianglesWhenChopping) {
615                 this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(abc.hi)));
616             }
617             std::tie(p0, p1) = {abc.hi, bc.hi};  // Save the 3rd quad.
618         }
619         if (numPatches == 2) {
620             // Chop into 2 quads.
621             float2 ab = (p0 + p1) * .5f;
622             float2 bc = (p1 + p2) * .5f;
623             float2 abc = (ab + bc) * .5f;
624 
625             this->writeQuadPatch(p0, ab, abc);  // Write the 1st quad.
626             if constexpr (kAddTrianglesWhenChopping) {
627                 this->writeTriangle(p0, abc, p2);
628             }
629             this->writeQuadPatch(abc, bc, p2);  // Write the 2nd quad.
630         } else {
631             SkASSERT(numPatches == 1);
632             this->writeQuadPatch(p0, p1, p2);  // Write the single remaining quad.
633         }
634         if constexpr (kAddTrianglesWhenChopping) {
635             this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(p2)));
636             this->writeTriangleStack(triangulator.close());
637         }
638     }
639 
chopAndWriteConics(float2 p0,float2 p1,float2 p2,float w,int numPatches)640     void chopAndWriteConics(float2 p0, float2 p1, float2 p2, float w, int numPatches) {
641         InnerTriangulator triangulator(numPatches, sk_bit_cast<SkPoint>(p0));
642         // Load the conic in 3d homogeneous (unprojected) space.
643         float4 h0 = float4(p0,1,1);
644         float4 h1 = float4(p1,1,1) * w;
645         float4 h2 = float4(p2,1,1);
646         for (; numPatches >= 2; --numPatches) {
647             // Chop in homogeneous space.
648             float T = 1.f/numPatches;
649             float4 ab = mix(h0, h1, T);
650             float4 bc = mix(h1, h2, T);
651             float4 abc = mix(ab, bc, T);
652 
653             // Project and write the 1st conic.
654             float2 midpoint = abc.xy() / abc.w();
655             this->writeConicPatch(h0.xy() / h0.w(),
656                                   ab.xy() / ab.w(),
657                                   midpoint,
658                                   ab.w() / sqrtf(h0.w() * abc.w()));
659             if constexpr (kAddTrianglesWhenChopping) {
660                 this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(midpoint)));
661             }
662             std::tie(h0, h1) = {abc, bc};  // Save the 2nd conic (in homogeneous space).
663         }
664         // Project and write the remaining conic.
665         SkASSERT(numPatches == 1);
666         this->writeConicPatch(h0.xy() / h0.w(),
667                               h1.xy() / h1.w(),
668                               h2.xy(), // h2.w == 1
669                               h1.w() / sqrtf(h0.w()));
670         if constexpr (kAddTrianglesWhenChopping) {
671             this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(h2.xy())));
672             this->writeTriangleStack(triangulator.close());
673         }
674     }
675 
chopAndWriteCubics(float2 p0,float2 p1,float2 p2,float2 p3,int numPatches)676     void chopAndWriteCubics(float2 p0, float2 p1, float2 p2, float2 p3, int numPatches) {
677         InnerTriangulator triangulator(numPatches, sk_bit_cast<SkPoint>(p0));
678         for (; numPatches >= 3; numPatches -= 2) {
679             // Chop into 3 cubics.
680             float4 T = float4(1,1,2,2) / numPatches;
681             float4 ab = mix(p0.xyxy(), p1.xyxy(), T);
682             float4 bc = mix(p1.xyxy(), p2.xyxy(), T);
683             float4 cd = mix(p2.xyxy(), p3.xyxy(), T);
684             float4 abc = mix(ab, bc, T);
685             float4 bcd = mix(bc, cd, T);
686             float4 abcd = mix(abc, bcd, T);
687             float4 middle = mix(abc, bcd, T.zwxy());  // p1 & p2 of the middle cubic.
688 
689             this->writeCubicPatch(p0, ab.lo, abc.lo, abcd.lo);  // Write the 1st cubic.
690             if constexpr (kAddTrianglesWhenChopping) {
691                 this->writeTriangle(p0, abcd.lo, abcd.hi);
692             }
693             this->writeCubicPatch(abcd.lo, middle, abcd.hi);  // Write the 2nd cubic.
694             if constexpr (kAddTrianglesWhenChopping) {
695                 this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(abcd.hi)));
696             }
697             std::tie(p0, p1, p2) = {abcd.hi, bcd.hi, cd.hi};  // Save the 3rd cubic.
698         }
699         if (numPatches == 2) {
700             // Chop into 2 cubics.
701             float2 ab = (p0 + p1) * .5f;
702             float2 bc = (p1 + p2) * .5f;
703             float2 cd = (p2 + p3) * .5f;
704             float2 abc = (ab + bc) * .5f;
705             float2 bcd = (bc + cd) * .5f;
706             float2 abcd = (abc + bcd) * .5f;
707 
708             this->writeCubicPatch(p0, ab, abc, abcd);  // Write the 1st cubic.
709             if constexpr (kAddTrianglesWhenChopping) {
710                 this->writeTriangle(p0, abcd, p3);
711             }
712             this->writeCubicPatch(abcd, bcd, cd, p3);  // Write the 2nd cubic.
713         } else {
714             SkASSERT(numPatches == 1);
715             this->writeCubicPatch(p0, p1, p2, p3);  // Write the single remaining cubic.
716         }
717         if constexpr (kAddTrianglesWhenChopping) {
718             this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(p3)));
719             this->writeTriangleStack(triangulator.close());
720         }
721     }
722 
723     ENABLE_IF(kAddTrianglesWhenChopping)
writeTriangleStack(MiddleOutPolygonTriangulator::PoppedTriangleStack && stack)724     writeTriangleStack(MiddleOutPolygonTriangulator::PoppedTriangleStack&& stack) {
725         for (auto [p0, p1, p2] : stack) {
726             this->writeTriangle(p0, p1, p2);
727         }
728     }
729 
730     // Runtime configuration, will always contain required attribs but may not have all optional
731     // attribs enabled (e.g. depending on caps or batching).
732     const PatchAttribs fAttribs;
733 
734     // The 2x2 approximation of the local-to-device transform that will affect subsequently
735     // recorded curves (when fully transformed in the vertex shader).
736     wangs_formula::VectorXform fApproxTransform = {};
737     // A maximum scale factor extracted from the current approximate transform.
738     float fMaxScale = 1.0f;
739     // Tracks the linear tolerances for the most recently written patches.
740     LinearTolerances fTolerances;
741 
742     PatchAllocator fPatchAllocator;
743     DeferredPatch  fDeferredPatch; // only usable if kTrackJoinControlPoints is true
744 
745     // Instance attribute state written after the 4 control points of a patch
746     JoinAttrib     fJoin;
747     FanPointAttrib fFanPoint;
748     StrokeAttrib   fStrokeParams;
749     ColorAttrib    fColor;
750     DepthAttrib    fDepth;
751 
752     // Index into a shared storage buffer containing this PatchWriter's patches' corresponding
753     // uniforms. Written out as an attribute with every patch, to read the appropriate uniform
754     // values from the storage buffer on draw.
755     SsboIndexAttrib fSsboIndex;
756 };
757 
758 }  // namespace skgpu::tess
759 
760 #undef ENABLE_IF
761 #undef AI
762 
763 #endif  // skgpu_tessellate_PatchWriter_DEFINED
764