1 /*
2 * Copyright 2020 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/gpu/tessellate/GrStrokeIndirectTessellator.h"
9
10 #include "src/core/SkGeometry.h"
11 #include "src/core/SkPathPriv.h"
12 #include "src/gpu/GrRecordingContextPriv.h"
13 #include "src/gpu/GrVertexWriter.h"
14 #include "src/gpu/GrVx.h"
15 #include "src/gpu/geometry/GrPathUtils.h"
16 #include "src/gpu/geometry/GrWangsFormula.h"
17 #include "src/gpu/tessellate/GrStrokeIterator.h"
18 #include "src/gpu/tessellate/GrStrokeTessellateShader.h"
19
20 namespace {
21
22 // Only use SIMD if SkVx will use a built-in compiler extensions for vectors.
23 #if !defined(SKNX_NO_SIMD) && (defined(__clang__) || defined(__GNUC__))
24 #define USE_SIMD 1
25 #else
26 #define USE_SIMD 0
27 #endif
28
29 #if USE_SIMD
30 using grvx::vec;
31 using grvx::ivec;
32 using grvx::uvec;
33
34 // Muxes between "N" (Nx2/2) 2d vectors in SIMD based on the provided conditions. This is equivalent
35 // to returning the following at each point:
36 //
37 // (conds.lo[i] & conds.hi[i])) ? {t[i].lo, t[i].hi} : {e[i].lo, e[i].hi}.
38 //
39 template<int Nx2>
if_both_then_else(ivec<Nx2> conds,vec<Nx2> t,vec<Nx2> e)40 static SK_ALWAYS_INLINE vec<Nx2> if_both_then_else(ivec<Nx2> conds, vec<Nx2> t, vec<Nx2> e) {
41 auto both = conds.lo & conds.hi;
42 return skvx::if_then_else(skvx::join(both, both), t, e);
43 }
44
45 // Returns the lengths squared of "N" (Nx2/2) 2d vectors in SIMD. The x values are in "v.lo" and
46 // the y values are in "v.hi".
length_pow2(vec<Nx2> v)47 template<int Nx2> static SK_ALWAYS_INLINE vec<Nx2/2> length_pow2(vec<Nx2> v) {
48 auto vv = v*v;
49 return vv.lo + vv.hi;
50 }
51
52 // Interpolates between "a" and "b" by a factor of T. T must be <1 and >= 0.
53 //
54 // NOTE: This does not return b when T==1. It's implemented as-is because it otherwise seems to get
55 // better precision than "a*(1 - T) + b*T" for things like chopping cubics on exact cusp points.
56 // The responsibility falls on the caller to check that T != 1 before calling.
unchecked_mix(vec<N> a,vec<N> b,vec<N> T)57 template<int N> SK_ALWAYS_INLINE vec<N> unchecked_mix(vec<N> a, vec<N> b, vec<N> T) {
58 return grvx::fast_madd(b - a, T, a);
59 }
60 #endif
61
62 // Computes and writes out the resolveLevels for individual strokes. Maintains a counter of the
63 // number of instances at each resolveLevel. If SIMD is available, then these calculations are done
64 // in batches.
65 class ResolveLevelCounter {
66 public:
67 constexpr static int8_t kMaxResolveLevel = GrStrokeIndirectTessellator::kMaxResolveLevel;
68
ResolveLevelCounter(const SkMatrix & viewMatrix,int * resolveLevelCounts)69 ResolveLevelCounter(const SkMatrix& viewMatrix, int* resolveLevelCounts)
70 : fResolveLevelCounts(resolveLevelCounts) {
71 if (!viewMatrix.getMinMaxScales(fMatrixMinMaxScales.data())) {
72 fMatrixMinMaxScales.fill(1);
73 }
74 }
75
updateTolerances(float strokeWidth,bool isRoundJoin)76 void updateTolerances(float strokeWidth, bool isRoundJoin) {
77 this->flush();
78 fTolerances = GrStrokeTolerances::Make(fMatrixMinMaxScales.data(), strokeWidth);
79 fResolveLevelForCircles = SkTPin<float>(
80 sk_float_nextlog2(fTolerances.fNumRadialSegmentsPerRadian * SK_ScalarPI),
81 1, kMaxResolveLevel);
82 fIsRoundJoin = isRoundJoin;
83 #if USE_SIMD
84 fWangsTermQuadratic = GrWangsFormula::length_term<2>(fTolerances.fParametricPrecision);
85 fWangsTermCubic = GrWangsFormula::length_term<3>(fTolerances.fParametricPrecision);
86 #endif
87 }
88
isRoundJoin() const89 bool isRoundJoin() const { return fIsRoundJoin; }
90
91 // Accounts for 180-degree point strokes, which render as circles with diameters equal to the
92 // stroke width. We draw circles at cusp points on curves and for round caps.
93 //
94 // Returns the resolveLevel to use when drawing these circles.
countCircles(int numCircles)95 int8_t countCircles(int numCircles) {
96 fResolveLevelCounts[fResolveLevelForCircles] += numCircles;
97 return fResolveLevelForCircles;
98 }
99
100 #if !USE_SIMD
countLine(const SkPoint pts[2],SkPoint lastControlPoint,int8_t * resolveLevelPtr)101 bool SK_WARN_UNUSED_RESULT countLine(const SkPoint pts[2], SkPoint lastControlPoint,
102 int8_t* resolveLevelPtr) {
103 if (!fIsRoundJoin) {
104 // There is no resolve level to track. It's always zero.
105 ++fResolveLevelCounts[0];
106 return false;
107 }
108 float rotation = SkMeasureAngleBetweenVectors(pts[0] - lastControlPoint, pts[1] - pts[0]);
109 this->writeResolveLevel(0, rotation, resolveLevelPtr);
110 return true;
111 }
112
countQuad(const SkPoint pts[3],SkPoint lastControlPoint,int8_t * resolveLevelPtr)113 void countQuad(const SkPoint pts[3], SkPoint lastControlPoint, int8_t* resolveLevelPtr) {
114 float numParametricSegments =
115 GrWangsFormula::quadratic(fTolerances.fParametricPrecision, pts);
116 float rotation = SkMeasureQuadRotation(pts);
117 if (fIsRoundJoin) {
118 SkVector nextTan = ((pts[0] == pts[1]) ? pts[2] : pts[1]) - pts[0];
119 rotation += SkMeasureAngleBetweenVectors(pts[0] - lastControlPoint, nextTan);
120 }
121 this->writeResolveLevel(numParametricSegments, rotation, resolveLevelPtr);
122 }
123
countCubic(const SkPoint pts[4],SkPoint lastControlPoint,int8_t * resolveLevelPtr)124 void countCubic(const SkPoint pts[4], SkPoint lastControlPoint, int8_t* resolveLevelPtr) {
125 float numParametricSegments = GrWangsFormula::cubic(fTolerances.fParametricPrecision, pts);
126 SkVector tan0 = ((pts[0] == pts[1]) ? pts[2] : pts[1]) - pts[0];
127 SkVector tan1 = pts[3] - ((pts[3] == pts[2]) ? pts[1] : pts[2]);
128 float rotation = SkMeasureAngleBetweenVectors(tan0, tan1);
129 if (fIsRoundJoin && pts[0] != lastControlPoint) {
130 SkVector nextTan = (tan0.isZero()) ? tan1 : tan0;
131 rotation += SkMeasureAngleBetweenVectors(pts[0] - lastControlPoint, nextTan);
132 }
133 this->writeResolveLevel(numParametricSegments, rotation, resolveLevelPtr);
134 }
135
countChoppedCubic(const SkPoint pts[4],const float chopT,SkPoint lastControlPoint,int8_t * resolveLevelPtr)136 void countChoppedCubic(const SkPoint pts[4], const float chopT, SkPoint lastControlPoint,
137 int8_t* resolveLevelPtr) {
138 SkPoint chops[7];
139 SkChopCubicAt(pts, chops, chopT);
140 this->countCubic(chops, lastControlPoint, resolveLevelPtr);
141 this->countCubic(chops + 3, chops[3], resolveLevelPtr + 1);
142 }
143
flush()144 void flush() {}
145
146 private:
writeResolveLevel(float numParametricSegments,float rotation,int8_t * resolveLevelPtr) const147 void writeResolveLevel(float numParametricSegments, float rotation,
148 int8_t* resolveLevelPtr) const {
149 float numCombinedSegments =
150 fTolerances.fNumRadialSegmentsPerRadian * rotation + numParametricSegments;
151 int8_t resolveLevel = sk_float_nextlog2(numCombinedSegments);
152 resolveLevel = std::min(resolveLevel, kMaxResolveLevel);
153 ++fResolveLevelCounts[(*resolveLevelPtr = resolveLevel)];
154 }
155
156 #else // USE_SIMD
~ResolveLevelCounter()157 ~ResolveLevelCounter() {
158 // Always call flush() when finished.
159 SkASSERT(fLineQueue.fCount == 0);
160 SkASSERT(fQuadQueue.fCount == 0);
161 SkASSERT(fCubicQueue.fCount == 0);
162 SkASSERT(fChoppedCubicQueue.fCount == 0);
163 }
164
countLine(const SkPoint pts[2],SkPoint lastControlPoint,int8_t * resolveLevelPtr)165 bool SK_WARN_UNUSED_RESULT countLine(const SkPoint pts[2], SkPoint lastControlPoint,
166 int8_t* resolveLevelPtr) {
167 if (!fIsRoundJoin) {
168 // There is no resolve level to track. It's always zero.
169 ++fResolveLevelCounts[0];
170 return false;
171 }
172 if (fLineQueue.push(pts, fIsRoundJoin, lastControlPoint, resolveLevelPtr) == 3) {
173 this->flushLines<4>();
174 }
175 return true;
176 }
177
countQuad(const SkPoint pts[3],SkPoint lastControlPoint,int8_t * resolveLevelPtr)178 void countQuad(const SkPoint pts[3], SkPoint lastControlPoint, int8_t* resolveLevelPtr) {
179 if (fQuadQueue.push(pts, fIsRoundJoin, lastControlPoint, resolveLevelPtr) == 3) {
180 this->flushQuads<4>();
181 }
182 }
183
countCubic(const SkPoint pts[4],SkPoint lastControlPoint,int8_t * resolveLevelPtr)184 void countCubic(const SkPoint pts[4], SkPoint lastControlPoint, int8_t* resolveLevelPtr) {
185 if (fCubicQueue.push(pts, fIsRoundJoin, lastControlPoint, resolveLevelPtr) == 3) {
186 this->flushCubics<4>();
187 }
188 }
189
countChoppedCubic(const SkPoint pts[4],const float chopT,SkPoint lastControlPoint,int8_t * resolveLevelPtr)190 void countChoppedCubic(const SkPoint pts[4], const float chopT, SkPoint lastControlPoint,
191 int8_t* resolveLevelPtr) {
192 int i = fChoppedCubicQueue.push(pts, fIsRoundJoin, lastControlPoint, resolveLevelPtr);
193 fCubicChopTs[i] = fCubicChopTs[4 + i] = chopT;
194 if (i == 3) {
195 this->flushChoppedCubics<4>();
196 }
197 }
198
flush()199 void flush() {
200 // Flush each queue, crunching either 2 curves in SIMD or 4. We do 2 when the queue is low
201 // because it allows us to expand two points into a single float4: [x0,x1,y0,y1].
202 if (fLineQueue.fCount) {
203 SkASSERT(fIsRoundJoin);
204 if (fLineQueue.fCount <= 2) {
205 this->flushLines<2>();
206 } else {
207 this->flushLines<4>();
208 }
209 }
210 if (fQuadQueue.fCount) {
211 if (fQuadQueue.fCount <= 2) {
212 this->flushQuads<2>();
213 } else {
214 this->flushQuads<4>();
215 }
216 }
217 if (fCubicQueue.fCount) {
218 if (fCubicQueue.fCount <= 2) {
219 this->flushCubics<2>();
220 } else {
221 this->flushCubics<4>();
222 }
223 }
224 if (fChoppedCubicQueue.fCount) {
225 if (fChoppedCubicQueue.fCount <= 2) {
226 this->flushChoppedCubics<2>();
227 } else {
228 this->flushChoppedCubics<4>();
229 }
230 }
231 }
232
233 private:
234 // This struct stores deferred resolveLevel calculations for performing in SIMD batches.
235 template<int NumPts> struct SIMDQueue {
236 // Enqueues a stroke.
push__anon90f4518c0111::ResolveLevelCounter::SIMDQueue237 SK_ALWAYS_INLINE int push(const SkPoint pts[NumPts], bool pushRoundJoin,
238 SkPoint lastControlPoint, int8_t* resolveLevelPtr) {
239 SkASSERT(0 <= fCount && fCount < 4);
240 for (int i = 0; i < NumPts; ++i) {
241 fPts[i][fCount] = pts[i].fX;
242 fPts[i][4 + fCount] = pts[i].fY;
243 }
244 if (pushRoundJoin) {
245 fLastControlPoints[fCount] = lastControlPoint.fX;
246 fLastControlPoints[4 + fCount] = lastControlPoint.fY;
247 }
248 fResolveLevelPtrs[fCount] = resolveLevelPtr;
249 return fCount++;
250 }
251
252 // Loads pts[idx] in SIMD for fCount strokes, with the x values in the "vec.lo" and the y
253 // values in "vec.hi".
loadPoint__anon90f4518c0111::ResolveLevelCounter::SIMDQueue254 template<int N> vec<N*2> loadPoint(int idx) const {
255 SkASSERT(0 <= idx && idx < NumPts);
256 return this->loadPointFromArray<N>(fPts[idx]);
257 }
258
259 // Loads fCount lastControlPoints in SIMD, with the x values in "vec.lo" and the y values in
260 // "vec.hi".
loadLastControlPoint__anon90f4518c0111::ResolveLevelCounter::SIMDQueue261 template<int N> vec<N*2> loadLastControlPoint() const {
262 return this->loadPointFromArray<N>(fLastControlPoints);
263 }
264
265 // Loads fCount points from the given array in SIMD, with the x values in "vec.lo" and the y
266 // values in "vec.hi". The array must be ordered as: [x0,x1,x2,x3,y0,y1,y2,y3].
loadPointFromArray__anon90f4518c0111::ResolveLevelCounter::SIMDQueue267 template<int N> vec<N*2> loadPointFromArray(const float array[8]) const {
268 if constexpr (N == 4) {
269 if (fCount == 4) {
270 return vec<8>::Load(array);
271 } else {
272 SkASSERT(fCount == 3);
273 return {array[0], array[1], array[2], 0, array[4], array[5], array[6], 0};
274 }
275 } else {
276 if (fCount == 2) {
277 return {array[0], array[1], array[4], array[5]};
278 } else {
279 SkASSERT(fCount == 1);
280 return {array[0], 0, array[4], 0};
281 }
282 }
283 }
284
285 struct alignas(sizeof(float) * 8) {
286 float fPts[NumPts][8];
287 float fLastControlPoints[8];
288 };
289 int8_t* fResolveLevelPtrs[4];
290 int fCount = 0;
291 };
292
flushLines()293 template<int N> void flushLines() {
294 SkASSERT(fLineQueue.fCount > 0);
295
296 // Find the angle of rotation in the preceding round join.
297 auto a = fLineQueue.loadLastControlPoint<N>();
298 auto b = fLineQueue.loadPoint<N>(0);
299 auto c = fLineQueue.loadPoint<N>(1);
300 auto rotation = grvx::approx_angle_between_vectors(b - a, c - b);
301
302 this->writeResolveLevels<N>(0, rotation, fLineQueue.fCount, fLineQueue.fResolveLevelPtrs);
303 fLineQueue.fCount = 0;
304 }
305
flushQuads()306 template<int N> void flushQuads() {
307 SkASSERT(fQuadQueue.fCount > 0);
308 auto p0 = fQuadQueue.loadPoint<N>(0);
309 auto p1 = fQuadQueue.loadPoint<N>(1);
310 auto p2 = fQuadQueue.loadPoint<N>(2);
311
312 // Execute Wang's formula to determine how many parametric segments the curve needs to be
313 // divided into. (See GrWangsFormula::quadratic().)
314 auto l = length_pow2(grvx::fast_madd<N*2>(-2, p1, p2) + p0);
315 auto numParametricSegments = skvx::sqrt(fWangsTermQuadratic * skvx::sqrt(l));
316
317 // Find the curve's rotation. Since quads cannot inflect or rotate more than 180 degrees,
318 // this is equal to the angle between the beginning and ending tangents.
319 // NOTE: If p0==p1 or p1==p2, this will give rotation=0.
320 auto tan0 = p1 - p0;
321 auto tan1 = p2 - p1;
322 auto rotation = grvx::approx_angle_between_vectors(tan0, tan1);
323 if (fIsRoundJoin) {
324 // Add rotation for the preceding round join.
325 auto lastControlPoint = fQuadQueue.loadLastControlPoint<N>();
326 auto nextTan = if_both_then_else((tan0 == 0), tan1, tan0);
327 rotation += grvx::approx_angle_between_vectors(p0 - lastControlPoint, nextTan);
328 }
329
330 this->writeResolveLevels<N>(numParametricSegments, rotation, fQuadQueue.fCount,
331 fQuadQueue.fResolveLevelPtrs);
332 fQuadQueue.fCount = 0;
333 }
334
flushCubics()335 template<int N> void flushCubics() {
336 SkASSERT(fCubicQueue.fCount > 0);
337 auto p0 = fCubicQueue.loadPoint<N>(0);
338 auto p1 = fCubicQueue.loadPoint<N>(1);
339 auto p2 = fCubicQueue.loadPoint<N>(2);
340 auto p3 = fCubicQueue.loadPoint<N>(3);
341 this->flushCubics<N>(fCubicQueue, p0, p1, p2, p3, fIsRoundJoin, 0);
342 fCubicQueue.fCount = 0;
343 }
344
flushChoppedCubics()345 template<int N> void flushChoppedCubics() {
346 SkASSERT(fChoppedCubicQueue.fCount > 0);
347 auto p0 = fChoppedCubicQueue.loadPoint<N>(0);
348 auto p1 = fChoppedCubicQueue.loadPoint<N>(1);
349 auto p2 = fChoppedCubicQueue.loadPoint<N>(2);
350 auto p3 = fChoppedCubicQueue.loadPoint<N>(3);
351 auto T = fChoppedCubicQueue.loadPointFromArray<N>(fCubicChopTs);
352
353 // Chop the cubic at its chopT and find the resolve level for each half.
354 auto ab = unchecked_mix(p0, p1, T);
355 auto bc = unchecked_mix(p1, p2, T);
356 auto cd = unchecked_mix(p2, p3, T);
357 auto abc = unchecked_mix(ab, bc, T);
358 auto bcd = unchecked_mix(bc, cd, T);
359 auto abcd = unchecked_mix(abc, bcd, T);
360 this->flushCubics<N>(fChoppedCubicQueue, p0, ab, abc, abcd, fIsRoundJoin, 0);
361 this->flushCubics<N>(fChoppedCubicQueue, abcd, bcd, cd, p3, false/*countRoundJoin*/, 1);
362
363 fChoppedCubicQueue.fCount = 0;
364 }
365
flushCubics(const SIMDQueue<4> & queue,vec<N * 2> p0,vec<N * 2> p1,vec<N * 2> p2,vec<N * 2> p3,bool countRoundJoin,int resultOffset) const366 template<int N> SK_ALWAYS_INLINE void flushCubics(const SIMDQueue<4>& queue, vec<N*2> p0,
367 vec<N*2> p1, vec<N*2> p2, vec<N*2> p3,
368 bool countRoundJoin, int resultOffset) const {
369 // Execute Wang's formula to determine how many parametric segments the curve needs to be
370 // divided into. (See GrWangsFormula::cubic().)
371 auto l0 = length_pow2(grvx::fast_madd<N*2>(-2, p1, p2) + p0);
372 auto l1 = length_pow2(grvx::fast_madd<N*2>(-2, p2, p3) + p1);
373 auto numParametricSegments = skvx::sqrt(fWangsTermCubic * skvx::sqrt(skvx::max(l0, l1)));
374
375 // Find the starting tangent (or zero if p0==p1==p2).
376 auto tan0 = p1 - p0;
377 tan0 = if_both_then_else((tan0 == 0), p2 - p0, tan0);
378
379 // Find the ending tangent (or zero if p1==p2==p3).
380 auto tan1 = p3 - p2;
381 tan1 = if_both_then_else((tan1 == 0), p3 - p1, tan1);
382
383 // Find the curve's rotation. Since it cannot inflect or rotate more than 180 degrees at
384 // this point, this is equal to the angle between the beginning and ending tangents.
385 auto rotation = grvx::approx_angle_between_vectors(tan0, tan1);
386 if (countRoundJoin) {
387 // Add rotation for the preceding round join.
388 auto lastControlPoint = queue.loadLastControlPoint<N>();
389 auto nextTan = if_both_then_else((tan0 == 0), tan1, tan0);
390 rotation += grvx::approx_angle_between_vectors(p0 - lastControlPoint, nextTan);
391 }
392
393 this->writeResolveLevels<N>(numParametricSegments, rotation, queue.fCount,
394 queue.fResolveLevelPtrs, resultOffset);
395 }
396
writeResolveLevels(vec<N> numParametricSegments,vec<N> rotation,int count,int8_t * const * resolveLevelPtrs,int offset=0) const397 template<int N> SK_ALWAYS_INLINE void writeResolveLevels(
398 vec<N> numParametricSegments, vec<N> rotation, int count,
399 int8_t* const* resolveLevelPtrs, int offset = 0) const {
400 auto numCombinedSegments = grvx::fast_madd<N>(
401 fTolerances.fNumRadialSegmentsPerRadian, rotation, numParametricSegments);
402
403 // Find ceil(log2(numCombinedSegments)) by twiddling the exponents. See sk_float_nextlog2().
404 auto bits = skvx::bit_pun<uvec<N>>(numCombinedSegments);
405 bits += (1u << 23) - 1u; // Increment the exponent for non-powers-of-2.
406 // This will make negative values, denorms, and negative exponents all < 0.
407 auto exp = (skvx::bit_pun<ivec<N>>(bits) >> 23) - 127;
408 auto level = skvx::pin<N,int>(exp, 0, kMaxResolveLevel);
409
410 switch (count) {
411 default: SkUNREACHABLE;
412 case 4: ++fResolveLevelCounts[resolveLevelPtrs[3][offset] = level[3]]; [[fallthrough]];
413 case 3: ++fResolveLevelCounts[resolveLevelPtrs[2][offset] = level[2]]; [[fallthrough]];
414 case 2: ++fResolveLevelCounts[resolveLevelPtrs[1][offset] = level[1]]; [[fallthrough]];
415 case 1: ++fResolveLevelCounts[resolveLevelPtrs[0][offset] = level[0]]; break;
416 }
417 }
418
419 SIMDQueue<2> fLineQueue;
420 SIMDQueue<3> fQuadQueue;
421 SIMDQueue<4> fCubicQueue;
422 SIMDQueue<4> fChoppedCubicQueue;
423 struct alignas(sizeof(float) * 8) {
424 float fCubicChopTs[8];
425 };
426
427 float fWangsTermQuadratic;
428 float fWangsTermCubic;
429
430 #endif
431 int* const fResolveLevelCounts;
432 std::array<float, 2> fMatrixMinMaxScales;
433 GrStrokeTolerances fTolerances;
434 int fResolveLevelForCircles;
435 bool fIsRoundJoin;
436 };
437
438 } // namespace
439
GrStrokeIndirectTessellator(ShaderFlags shaderFlags,const SkMatrix & viewMatrix,PathStrokeList * pathStrokeList,int totalCombinedVerbCnt,SkArenaAlloc * alloc)440 GrStrokeIndirectTessellator::GrStrokeIndirectTessellator(ShaderFlags shaderFlags,
441 const SkMatrix& viewMatrix,
442 PathStrokeList* pathStrokeList,
443 int totalCombinedVerbCnt,
444 SkArenaAlloc* alloc)
445 : GrStrokeTessellator(GrStrokeTessellateShader::Mode::kLog2Indirect, shaderFlags,
446 viewMatrix, pathStrokeList) {
447 // The maximum potential number of values we will need in fResolveLevels is:
448 //
449 // * 3 segments per verb (from two chops)
450 // * Plus 1 extra resolveLevel per verb that says how many chops it needs
451 // * Plus 2 final resolveLevels for square or round caps at the very end not initiated by a
452 // "kMoveTo".
453 int resolveLevelAllocCount = totalCombinedVerbCnt * (3 + 1) + 2;
454 fResolveLevels = alloc->makeArrayDefault<int8_t>(resolveLevelAllocCount);
455 int8_t* nextResolveLevel = fResolveLevels;
456
457 // The maximum potential number of chopT values we will need is 2 per verb.
458 int chopTAllocCount = totalCombinedVerbCnt * 2;
459 fChopTs = alloc->makeArrayDefault<float>(chopTAllocCount);
460 float* nextChopTs = fChopTs;
461
462 ResolveLevelCounter counter(viewMatrix, fResolveLevelCounts);
463
464 float lastStrokeWidth = -1;
465 SkPoint lastControlPoint = {0,0};
466 for (PathStrokeList* pathStroke = fPathStrokeList; pathStroke; pathStroke = pathStroke->fNext) {
467 const SkStrokeRec& stroke = pathStroke->fStroke;
468 SkASSERT(stroke.getWidth() >= 0); // Otherwise we can't initialize lastStrokeWidth=-1.
469 if (stroke.getWidth() != lastStrokeWidth ||
470 (stroke.getJoin() == SkPaint::kRound_Join) != counter.isRoundJoin()) {
471 counter.updateTolerances(stroke.getWidth(), (stroke.getJoin() == SkPaint::kRound_Join));
472 lastStrokeWidth = stroke.getWidth();
473 }
474 fMaxNumExtraEdgesInJoin = std::max(fMaxNumExtraEdgesInJoin,
475 GrStrokeTessellateShader::NumFixedEdgesInJoin(stroke.getJoin()));
476 // Iterate through each verb in the stroke, counting its resolveLevel(s).
477 GrStrokeIterator iter(pathStroke->fPath, &stroke, &viewMatrix);
478 while (iter.next()) {
479 using Verb = GrStrokeIterator::Verb;
480 Verb verb = iter.verb();
481 if (!GrStrokeIterator::IsVerbGeometric(verb)) {
482 // We don't need to handle non-geomtric verbs.
483 continue;
484 }
485 const SkPoint* pts = iter.pts();
486 if (counter.isRoundJoin()) {
487 // Round joins need a "lastControlPoint" so we can measure the angle of the previous
488 // join. This doesn't have to be the exact control point we will send the GPU after
489 // any chopping; we just need a direction.
490 const SkPoint* prevPts = iter.prevPts();
491 switch (iter.prevVerb()) {
492 case Verb::kCubic:
493 if (prevPts[2] != prevPts[3]) {
494 lastControlPoint = prevPts[2];
495 break;
496 }
497 [[fallthrough]];
498 case Verb::kQuad:
499 case Verb::kConic:
500 if (prevPts[1] != prevPts[2]) {
501 lastControlPoint = prevPts[1];
502 break;
503 }
504 [[fallthrough]];
505 case Verb::kLine:
506 lastControlPoint = prevPts[0];
507 break;
508 case Verb::kMoveWithinContour:
509 case Verb::kCircle:
510 // There is no previous stroke to join to. Set lastControlPoint equal to the
511 // current point, which makes the direction 0 and the number of radial
512 // segments in the join 0.
513 lastControlPoint = pts[0];
514 break;
515 case Verb::kContourFinished:
516 SkUNREACHABLE;
517 }
518 }
519 switch (verb) {
520 case Verb::kLine:
521 if (counter.countLine(pts, lastControlPoint, nextResolveLevel)) {
522 ++nextResolveLevel;
523 }
524 break;
525 case Verb::kConic:
526 // We use the same quadratic formula for conics, ignoring w. This is pretty
527 // close to what the actual number of subdivisions would have been.
528 [[fallthrough]];
529 case Verb::kQuad: {
530 if (GrPathUtils::conicHasCusp(pts)) {
531 // The curve has a cusp. Draw two lines and a circle instead of a quad.
532 int8_t cuspResolveLevel = counter.countCircles(1);
533 *nextResolveLevel++ = -cuspResolveLevel; // Negative signals a cusp.
534 if (counter.countLine(pts, lastControlPoint, nextResolveLevel)) {
535 ++nextResolveLevel;
536 }
537 ++fResolveLevelCounts[0]; // Second line instance.
538 } else {
539 counter.countQuad(pts, lastControlPoint, nextResolveLevel++);
540 }
541 break;
542 }
543 case Verb::kCubic: {
544 int8_t cuspResolveLevel = 0;
545 bool areCusps;
546 int numChops = GrPathUtils::findCubicConvex180Chops(pts, nextChopTs, &areCusps);
547 if (areCusps && numChops > 0) {
548 cuspResolveLevel = counter.countCircles(numChops);
549 }
550 if (numChops == 0) {
551 counter.countCubic(pts, lastControlPoint, nextResolveLevel);
552 } else if (numChops == 1) {
553 // A negative resolveLevel indicates how many chops the curve needs, and
554 // whether they are cusps.
555 static_assert(kMaxResolveLevel <= 0xf);
556 SkASSERT(cuspResolveLevel <= 0xf);
557 *nextResolveLevel++ = -((1 << 4) | cuspResolveLevel);
558 counter.countChoppedCubic(pts, nextChopTs[0], lastControlPoint,
559 nextResolveLevel);
560 } else {
561 SkASSERT(numChops == 2);
562 // A negative resolveLevel indicates how many chops the curve needs, and
563 // whether they are cusps.
564 static_assert(kMaxResolveLevel <= 0xf);
565 SkASSERT(cuspResolveLevel <= 0xf);
566 *nextResolveLevel++ = -((2 << 4) | cuspResolveLevel);
567 SkPoint pts_[10];
568 SkChopCubicAt(pts, pts_, nextChopTs, 2);
569 counter.countCubic(pts_, lastControlPoint, nextResolveLevel);
570 counter.countCubic(pts_ + 3, pts_[3], nextResolveLevel + 1);
571 counter.countCubic(pts_ + 6, pts_[6], nextResolveLevel + 2);
572 }
573 nextResolveLevel += numChops + 1;
574 nextChopTs += numChops;
575 break;
576 }
577 case Verb::kCircle:
578 // The iterator implements round caps as circles.
579 *nextResolveLevel++ = counter.countCircles(1);
580 break;
581 case Verb::kMoveWithinContour:
582 case Verb::kContourFinished:
583 // We should have continued early for non-geometric verbs.
584 SkUNREACHABLE;
585 break;
586 }
587 }
588 }
589 counter.flush();
590
591 for (int resolveLevelInstanceCount : fResolveLevelCounts) {
592 fTotalInstanceCount += resolveLevelInstanceCount;
593 if (resolveLevelInstanceCount) {
594 ++fChainedDrawIndirectCount;
595 }
596 }
597 fChainedInstanceCount = fTotalInstanceCount;
598
599 #ifdef SK_DEBUG
600 SkASSERT(nextResolveLevel <= fResolveLevels + resolveLevelAllocCount);
601 fResolveLevelArrayCount = nextResolveLevel - fResolveLevels;
602 SkASSERT(nextChopTs <= fChopTs + chopTAllocCount);
603 fChopTsArrayCount = nextChopTs - fChopTs;
604 fChopTsArrayCount = nextChopTs - fChopTs;
605 #endif
606 }
607
addToChain(GrStrokeIndirectTessellator * tessellator)608 void GrStrokeIndirectTessellator::addToChain(GrStrokeIndirectTessellator* tessellator) {
609 SkASSERT(tessellator->fShaderFlags == fShaderFlags);
610
611 fChainedInstanceCount += tessellator->fChainedInstanceCount;
612 tessellator->fChainedInstanceCount = 0;
613
614 fChainedDrawIndirectCount += tessellator->fChainedDrawIndirectCount;
615 tessellator->fChainedDrawIndirectCount = 0;
616
617 fMaxNumExtraEdgesInJoin = std::max(tessellator->fMaxNumExtraEdgesInJoin,
618 fMaxNumExtraEdgesInJoin);
619 tessellator->fMaxNumExtraEdgesInJoin = 0;
620
621 *fChainTail = tessellator;
622 fChainTail = tessellator->fChainTail;
623 tessellator->fChainTail = nullptr;
624 }
625
626 namespace {
627
num_edges_in_resolve_level(int resolveLevel)628 constexpr static int num_edges_in_resolve_level(int resolveLevel) {
629 // A "resolveLevel" means the stroke is composed of 2^resolveLevel line segments.
630 int numSegments = 1 << resolveLevel;
631 // There are edges both at the beginning and end of a stroke, so there is always one more edge
632 // than there are segments.
633 int numStrokeEdges = numSegments + 1;
634 return numStrokeEdges;
635 }
636
637 // Partitions the instance buffer into bins for each resolveLevel. Writes out indirect draw commands
638 // per bin. Provides methods to write strokes to their respective bins.
639 class BinningInstanceWriter {
640 public:
641 using ShaderFlags = GrStrokeTessellateShader::ShaderFlags;
642 using DynamicStroke = GrStrokeTessellateShader::DynamicStroke;
643 constexpr static int kNumBins = GrStrokeIndirectTessellator::kMaxResolveLevel + 1;
644
BinningInstanceWriter(GrDrawIndirectWriter * indirectWriter,GrVertexWriter * instanceWriter,ShaderFlags shaderFlags,size_t instanceStride,int baseInstance,int numExtraEdgesInJoin,const int resolveLevelCounts[kNumBins])645 BinningInstanceWriter(GrDrawIndirectWriter* indirectWriter, GrVertexWriter* instanceWriter,
646 ShaderFlags shaderFlags, size_t instanceStride, int baseInstance,
647 int numExtraEdgesInJoin, const int resolveLevelCounts[kNumBins])
648 : fShaderFlags(shaderFlags) {
649 SkASSERT(numExtraEdgesInJoin == 3 || numExtraEdgesInJoin == 4);
650 // Partition the instance buffer into bins and write out indirect draw commands per bin.
651 int runningInstanceCount = 0;
652 for (int i = 0; i < kNumBins; ++i) {
653 if (resolveLevelCounts[i]) {
654 int numEdges = numExtraEdgesInJoin + num_edges_in_resolve_level(i);
655 indirectWriter->write(resolveLevelCounts[i], baseInstance + runningInstanceCount,
656 numEdges * 2, 0);
657 fInstanceWriters[i] = instanceWriter->makeOffset(instanceStride *
658 runningInstanceCount);
659 fNumEdgesPerResolveLevel[i] = numEdges;
660 #ifdef SK_DEBUG
661 } else {
662 fInstanceWriters[i] = {nullptr};
663 }
664 if (i > 0) {
665 fEndWriters[i - 1] = instanceWriter->makeOffset(instanceStride *
666 runningInstanceCount);
667 #endif
668 }
669 runningInstanceCount += resolveLevelCounts[i];
670 }
671 SkDEBUGCODE(fEndWriters[kNumBins - 1] =
672 instanceWriter->makeOffset(instanceStride * runningInstanceCount));
673 *instanceWriter = instanceWriter->makeOffset(instanceStride * runningInstanceCount);
674 }
675
updateDynamicStroke(const SkStrokeRec & stroke)676 void updateDynamicStroke(const SkStrokeRec& stroke) {
677 SkASSERT(fShaderFlags & ShaderFlags::kDynamicStroke);
678 fDynamicStroke.set(stroke);
679 }
680
updateDynamicColor(const SkPMColor4f & color)681 void updateDynamicColor(const SkPMColor4f& color) {
682 SkASSERT(fShaderFlags & ShaderFlags::kDynamicColor);
683 bool wideColor = fShaderFlags & ShaderFlags::kWideColor;
684 SkASSERT(wideColor || color.fitsInBytes());
685 fDynamicColor.set(color, wideColor);
686 }
687
writeStroke(int8_t resolveLevel,const SkPoint pts[4],SkPoint prevControlPoint,bool isInternalChop=false)688 void writeStroke(int8_t resolveLevel, const SkPoint pts[4], SkPoint prevControlPoint,
689 bool isInternalChop = false) {
690 SkASSERT(0 <= resolveLevel && resolveLevel < kNumBins);
691 float numEdges = fNumEdgesPerResolveLevel[resolveLevel];
692 fInstanceWriters[resolveLevel].writeArray(pts, 4);
693 fInstanceWriters[resolveLevel].write(prevControlPoint,
694 // Negative numEdges will tell the GPU that this stroke
695 // instance follows a chop, and round joins from
696 // chopping always get exactly one segment.
697 (isInternalChop) ? -numEdges : +numEdges);
698 this->writeDynamicAttribs(resolveLevel);
699 }
700
701 // Writes out a 180-degree point stroke, which renders as a circle with a diameter equal to the
702 // stroke width. These should be drawn at at cusp points on curves and for round caps.
writeCircle(int8_t resolveLevel,SkPoint center)703 void writeCircle(int8_t resolveLevel, SkPoint center) {
704 SkASSERT(0 <= resolveLevel && resolveLevel < kNumBins);
705 // An empty stroke is a special case that denotes a circle, or 180-degree point stroke.
706 fInstanceWriters[resolveLevel].fill(center, 5);
707 // Mark numTotalEdges negative so the shader assigns the least possible number of edges to
708 // its (empty) preceding join.
709 fInstanceWriters[resolveLevel].write(-fNumEdgesPerResolveLevel[resolveLevel]);
710 this->writeDynamicAttribs(resolveLevel);
711 }
712
713 #ifdef SK_DEBUG
~BinningInstanceWriter()714 ~BinningInstanceWriter() {
715 for (int i = 0; i < kNumBins; ++i) {
716 if (fInstanceWriters[i]) {
717 SkASSERT(fInstanceWriters[i] == fEndWriters[i]);
718 }
719 }
720 }
721 #endif
722
723 private:
writeDynamicAttribs(int8_t resolveLevel)724 void writeDynamicAttribs(int8_t resolveLevel) {
725 if (fShaderFlags & ShaderFlags::kDynamicStroke) {
726 fInstanceWriters[resolveLevel].write(fDynamicStroke);
727 }
728 if (fShaderFlags & ShaderFlags::kDynamicColor) {
729 fInstanceWriters[resolveLevel].write(fDynamicColor);
730 }
731 }
732
733 const ShaderFlags fShaderFlags;
734 GrVertexWriter fInstanceWriters[kNumBins];
735 float fNumEdgesPerResolveLevel[kNumBins];
736 SkDEBUGCODE(GrVertexWriter fEndWriters[kNumBins];)
737
738 // Stateful values for the dynamic state (if any) that will get written out with each instance.
739 DynamicStroke fDynamicStroke;
740 GrVertexColor fDynamicColor;
741 };
742
743 } // namespace
744
prepare(GrMeshDrawOp::Target * target,int)745 void GrStrokeIndirectTessellator::prepare(GrMeshDrawOp::Target* target,
746 int /*totalCombinedVerbCnt*/) {
747 SkASSERT(fResolveLevels);
748 SkASSERT(!fDrawIndirectBuffer);
749 SkASSERT(!fInstanceBuffer);
750
751 if (!fChainedDrawIndirectCount) {
752 return;
753 }
754 SkASSERT(fChainedDrawIndirectCount > 0);
755 SkASSERT(fChainedInstanceCount > 0);
756
757 // Allocate indirect draw commands.
758 GrDrawIndirectWriter indirectWriter = target->makeDrawIndirectSpace(fChainedDrawIndirectCount,
759 &fDrawIndirectBuffer,
760 &fDrawIndirectOffset);
761 if (!indirectWriter.isValid()) {
762 SkASSERT(!fDrawIndirectBuffer);
763 return;
764 }
765 SkDEBUGCODE(auto endIndirectWriter = indirectWriter.makeOffset(fChainedDrawIndirectCount));
766
767 // We already know the instance count. Allocate an instance for each.
768 int baseInstance;
769 GrVertexWriter instanceWriter = {target->makeVertexSpace(fShader.instanceStride(),
770 fChainedInstanceCount,
771 &fInstanceBuffer, &baseInstance)};
772 if (!instanceWriter) {
773 SkASSERT(!fInstanceBuffer);
774 fDrawIndirectBuffer.reset();
775 return;
776 }
777 SkDEBUGCODE(auto endInstanceWriter = instanceWriter.makeOffset(fShader.instanceStride() *
778 fChainedInstanceCount);)
779
780 // Fill in the indirect-draw and instance buffers.
781 for (auto* tess = this; tess; tess = tess->fNextInChain) {
782 tess->writeBuffers(&indirectWriter, &instanceWriter, fShader.viewMatrix(),
783 fShader.instanceStride(), baseInstance, fMaxNumExtraEdgesInJoin);
784 baseInstance += tess->fTotalInstanceCount;
785 }
786
787 SkASSERT(indirectWriter == endIndirectWriter);
788 SkASSERT(instanceWriter == endInstanceWriter);
789 }
790
writeBuffers(GrDrawIndirectWriter * indirectWriter,GrVertexWriter * instanceWriter,const SkMatrix & viewMatrix,size_t instanceStride,int baseInstance,int numExtraEdgesInJoin)791 void GrStrokeIndirectTessellator::writeBuffers(GrDrawIndirectWriter* indirectWriter,
792 GrVertexWriter* instanceWriter,
793 const SkMatrix& viewMatrix,
794 size_t instanceStride, int baseInstance,
795 int numExtraEdgesInJoin) {
796 BinningInstanceWriter binningWriter(indirectWriter, instanceWriter, fShaderFlags,
797 instanceStride, baseInstance, numExtraEdgesInJoin,
798 fResolveLevelCounts);
799
800 SkPoint scratchBuffer[4 + 10];
801 SkPoint* scratch = scratchBuffer;
802
803 int8_t* nextResolveLevel = fResolveLevels;
804 float* nextChopTs = fChopTs;
805
806 SkPoint lastControlPoint = {0,0};
807 const SkPoint* firstCubic = nullptr;
808 int8_t firstResolveLevel = -1;
809 int8_t resolveLevel;
810
811 // Now write out each instance to its resolveLevel's designated location in the instance buffer.
812 for (PathStrokeList* pathStroke = fPathStrokeList; pathStroke; pathStroke = pathStroke->fNext) {
813 const SkStrokeRec& stroke = pathStroke->fStroke;
814 SkASSERT(stroke.getJoin() != SkPaint::kMiter_Join || numExtraEdgesInJoin == 4);
815 bool isRoundJoin = (stroke.getJoin() == SkPaint::kRound_Join);
816 if (fShaderFlags & ShaderFlags::kDynamicStroke) {
817 binningWriter.updateDynamicStroke(stroke);
818 }
819 if (fShaderFlags & ShaderFlags::kDynamicColor) {
820 binningWriter.updateDynamicColor(pathStroke->fColor);
821 }
822 GrStrokeIterator iter(pathStroke->fPath, &stroke, &viewMatrix);
823 bool hasLastControlPoint = false;
824 while (iter.next()) {
825 using Verb = GrStrokeIterator::Verb;
826 int numChops = 0;
827 const SkPoint* pts=iter.pts(), *pts_=pts;
828 Verb verb = iter.verb();
829 switch (verb) {
830 case Verb::kCircle:
831 binningWriter.writeCircle(*nextResolveLevel++, pts[0]);
832 [[fallthrough]];
833 case Verb::kMoveWithinContour:
834 // The next verb won't be joined to anything.
835 lastControlPoint = pts[0];
836 hasLastControlPoint = true;
837 continue;
838 case Verb::kContourFinished:
839 SkASSERT(hasLastControlPoint);
840 if (firstCubic) {
841 // Emit the initial cubic that we deferred at the beginning.
842 binningWriter.writeStroke(firstResolveLevel, firstCubic, lastControlPoint);
843 firstCubic = nullptr;
844 }
845 hasLastControlPoint = false;
846 // Restore "scratch" to the original scratchBuffer.
847 scratch = scratchBuffer;
848 continue;
849 case Verb::kLine:
850 resolveLevel = (isRoundJoin) ? *nextResolveLevel++ : 0;
851 scratch[0] = scratch[1] = pts[0];
852 scratch[2] = scratch[3] = pts[1];
853 pts_ = scratch;
854 break;
855 case Verb::kQuad:
856 resolveLevel = *nextResolveLevel++;
857 if (resolveLevel < 0) {
858 // The curve has a cusp. Draw two lines and a circle instead of a quad.
859 int8_t cuspResolveLevel = -resolveLevel;
860 float cuspT = SkFindQuadMidTangent(pts);
861 SkPoint cusp = SkEvalQuadAt(pts, cuspT);
862 numChops = 1;
863 scratch[0] = scratch[1] = pts[0];
864 scratch[2] = scratch[3] = scratch[4] = cusp;
865 scratch[5] = scratch[6] = pts[2];
866 binningWriter.writeCircle(cuspResolveLevel, cusp);
867 resolveLevel = (isRoundJoin) ? *nextResolveLevel++ : 0;
868 } else {
869 GrPathUtils::convertQuadToCubic(pts, scratch);
870 }
871 pts_ = scratch;
872 break;
873 case Verb::kConic:
874 resolveLevel = *nextResolveLevel++;
875 if (resolveLevel < 0) {
876 // The curve has a cusp. Draw two lines and a cusp instead of a conic.
877 int8_t cuspResolveLevel = -resolveLevel;
878 SkPoint cusp;
879 SkConic conic(pts, iter.w());
880 float cuspT = conic.findMidTangent();
881 conic.evalAt(cuspT, &cusp);
882 numChops = 1;
883 scratch[0] = scratch[1] = pts[0];
884 scratch[2] = scratch[3] = scratch[4] = cusp;
885 scratch[5] = scratch[6] = pts[2];
886 binningWriter.writeCircle(cuspResolveLevel, cusp);
887 resolveLevel = (isRoundJoin) ? *nextResolveLevel++ : 0;
888 } else {
889 GrPathShader::WriteConicPatch(pts, iter.w(), scratch);
890 }
891 pts_ = scratch;
892 break;
893 case Verb::kCubic:
894 resolveLevel = *nextResolveLevel++;
895 if (resolveLevel < 0) {
896 // A negative resolveLevel indicates how many chops the curve needs, and
897 // whether they are cusps.
898 numChops = -resolveLevel >> 4;
899 SkChopCubicAt(pts, scratch, nextChopTs, numChops);
900 nextChopTs += numChops;
901 pts_ = scratch;
902 // Are the chop points cusps?
903 if (int8_t cuspResolveLevel = (-resolveLevel & 0xf)) {
904 for (int i = 1; i <= numChops; ++i) {
905 binningWriter.writeCircle(cuspResolveLevel, pts_[i*3]);
906 }
907 }
908 resolveLevel = *nextResolveLevel++;
909 }
910 break;
911 }
912 for (int i = 0;;) {
913 if (!hasLastControlPoint) {
914 SkASSERT(!firstCubic);
915 // Defer the initial cubic until we know its previous control point.
916 firstCubic = pts_;
917 firstResolveLevel = resolveLevel;
918 // Increment the scratch pts in case that's where our first cubic is stored.
919 scratch += 4;
920 } else {
921 binningWriter.writeStroke(resolveLevel, pts_, lastControlPoint, (i != 0));
922 }
923 // Determine the last control point.
924 if (pts_[2] != pts_[3] && verb != Verb::kConic) { // Conics use pts_[3] for w.
925 lastControlPoint = pts_[2];
926 } else if (pts_[1] != pts_[2]) {
927 lastControlPoint = pts_[1];
928 } else if (pts_[0] != pts_[1]) {
929 lastControlPoint = pts_[0];
930 } else {
931 // This is very unusual, but all chops became degenerate. Don't update the
932 // lastControlPoint.
933 }
934 hasLastControlPoint = true;
935 if (i++ == numChops) {
936 break;
937 }
938 pts_ += 3;
939 // If a non-cubic got chopped, it means it was chopped into lines and a circle.
940 resolveLevel = (verb == Verb::kCubic) ? *nextResolveLevel++ : 0;
941 SkASSERT(verb == Verb::kQuad || verb == Verb::kConic || verb == Verb::kCubic);
942 }
943 }
944 }
945
946 SkASSERT(nextResolveLevel == fResolveLevels + fResolveLevelArrayCount);
947 SkASSERT(nextChopTs == fChopTs + fChopTsArrayCount);
948 }
949
draw(GrOpFlushState * flushState) const950 void GrStrokeIndirectTessellator::draw(GrOpFlushState* flushState) const {
951 if (!fDrawIndirectBuffer) {
952 return;
953 }
954
955 SkASSERT(fChainedDrawIndirectCount > 0);
956 SkASSERT(fChainedInstanceCount > 0);
957
958 flushState->bindBuffers(nullptr, fInstanceBuffer, nullptr);
959 flushState->drawIndirect(fDrawIndirectBuffer.get(), fDrawIndirectOffset,
960 fChainedDrawIndirectCount);
961 }
962