1 /*
2 * Copyright 2020 Google LLC.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/gpu/tessellate/shaders/GrStrokeTessellationShader.h"
9
10 #include "src/gpu/glsl/GrGLSLFragmentShaderBuilder.h"
11 #include "src/gpu/glsl/GrGLSLVarying.h"
12 #include "src/gpu/glsl/GrGLSLVertexGeoBuilder.h"
13 #include "src/gpu/tessellate/StrokeTessellator.h"
14
GrStrokeTessellationShader(const GrShaderCaps & shaderCaps,Mode mode,PatchAttribs attribs,const SkMatrix & viewMatrix,const SkStrokeRec & stroke,SkPMColor4f color,int8_t maxParametricSegments_log2)15 GrStrokeTessellationShader::GrStrokeTessellationShader(const GrShaderCaps& shaderCaps,
16 Mode mode,
17 PatchAttribs attribs,
18 const SkMatrix& viewMatrix,
19 const SkStrokeRec& stroke,
20 SkPMColor4f color,
21 int8_t maxParametricSegments_log2)
22 : GrTessellationShader(kTessellate_GrStrokeTessellationShader_ClassID,
23 (mode == Mode::kHardwareTessellation)
24 ? GrPrimitiveType::kPatches
25 : GrPrimitiveType::kTriangleStrip,
26 (mode == Mode::kHardwareTessellation) ? 1 : 0, viewMatrix, color)
27 , fMode(mode)
28 , fPatchAttribs(attribs)
29 , fStroke(stroke)
30 , fMaxParametricSegments_log2(maxParametricSegments_log2) {
31 // We should use explicit curve type when, and only when, there isn't infinity support.
32 // Otherwise the GPU can infer curve type based on infinity.
33 SkASSERT(shaderCaps.infinitySupport() != (attribs & PatchAttribs::kExplicitCurveType));
34 if (fMode == Mode::kHardwareTessellation) {
35 // Explicit curve type is not implemented for tessellation shaders.
36 SkASSERT(!(attribs & PatchAttribs::kExplicitCurveType));
37 }
38 if (fMode == Mode::kHardwareTessellation) {
39 // A join calculates its starting angle using prevCtrlPtAttr.
40 fAttribs.emplace_back("prevCtrlPtAttr", kFloat2_GrVertexAttribType, kFloat2_GrSLType);
41 // pts 0..3 define the stroke as a cubic bezier. If p3.y is infinity, then it's a conic
42 // with w=p3.x.
43 //
44 // If p0 == prevCtrlPtAttr, then no join is emitted.
45 //
46 // pts=[p0, p3, p3, p3] is a reserved pattern that means this patch is a join only,
47 // whose start and end tangents are (p0 - inputPrevCtrlPt) and (p3 - p0).
48 //
49 // pts=[p0, p0, p0, p3] is a reserved pattern that means this patch is a "bowtie", or
50 // double-sided round join, anchored on p0 and rotating from (p0 - prevCtrlPtAttr) to
51 // (p3 - p0).
52 fAttribs.emplace_back("pts01Attr", kFloat4_GrVertexAttribType, kFloat4_GrSLType);
53 fAttribs.emplace_back("pts23Attr", kFloat4_GrVertexAttribType, kFloat4_GrSLType);
54 } else {
55 // pts 0..3 define the stroke as a cubic bezier. If p3.y is infinity, then it's a conic
56 // with w=p3.x.
57 //
58 // An empty stroke (p0==p1==p2==p3) is a special case that denotes a circle, or
59 // 180-degree point stroke.
60 fAttribs.emplace_back("pts01Attr", kFloat4_GrVertexAttribType, kFloat4_GrSLType);
61 fAttribs.emplace_back("pts23Attr", kFloat4_GrVertexAttribType, kFloat4_GrSLType);
62 if (fMode == Mode::kLog2Indirect) {
63 // argsAttr.xy contains the lastControlPoint for setting up the join.
64 //
65 // "argsAttr.z=numTotalEdges" tells the shader the literal number of edges in the
66 // triangle strip being rendered (i.e., it should be vertexCount/2). If
67 // numTotalEdges is negative and the join type is "kRound", it also instructs the
68 // shader to only allocate one segment the preceding round join.
69 fAttribs.emplace_back("argsAttr", kFloat3_GrVertexAttribType, kFloat3_GrSLType);
70 } else {
71 SkASSERT(fMode == Mode::kFixedCount);
72 // argsAttr contains the lastControlPoint for setting up the join.
73 fAttribs.emplace_back("argsAttr", kFloat2_GrVertexAttribType, kFloat2_GrSLType);
74 }
75 }
76 if (fPatchAttribs & PatchAttribs::kStrokeParams) {
77 fAttribs.emplace_back("dynamicStrokeAttr", kFloat2_GrVertexAttribType,
78 kFloat2_GrSLType);
79 }
80 if (fPatchAttribs & PatchAttribs::kColor) {
81 fAttribs.emplace_back("dynamicColorAttr",
82 (fPatchAttribs & PatchAttribs::kWideColorIfEnabled)
83 ? kFloat4_GrVertexAttribType
84 : kUByte4_norm_GrVertexAttribType,
85 kHalf4_GrSLType);
86 }
87 if (fPatchAttribs & PatchAttribs::kExplicitCurveType) {
88 // A conic curve is written out with p3=[w,Infinity], but GPUs that don't support
89 // infinity can't detect this. On these platforms we write out an extra float with each
90 // patch that explicitly tells the shader what type of curve it is.
91 fAttribs.emplace_back("curveTypeAttr", kFloat_GrVertexAttribType, kFloat_GrSLType);
92 }
93 if (fMode == Mode::kHardwareTessellation) {
94 this->setVertexAttributes(fAttribs.data(), fAttribs.count());
95 SkASSERT(this->vertexStride() == sizeof(SkPoint) * 5 + PatchAttribsStride(fPatchAttribs));
96 } else {
97 this->setInstanceAttributes(fAttribs.data(), fAttribs.count());
98 SkASSERT(this->instanceStride() == sizeof(SkPoint) * 5 + PatchAttribsStride(fPatchAttribs));
99 if (!shaderCaps.vertexIDSupport()) {
100 constexpr static Attribute kVertexAttrib("edgeID", kFloat_GrVertexAttribType,
101 kFloat_GrSLType);
102 this->setVertexAttributes(&kVertexAttrib, 1);
103 }
104 }
105 SkASSERT(fAttribs.count() <= kMaxAttribCount);
106 }
107
108 const char* GrStrokeTessellationShader::Impl::kCosineBetweenVectorsFn = R"(
109 float cosine_between_vectors(float2 a, float2 b) {
110 // FIXME(crbug.com/800804,skbug.com/11268): This can overflow if we don't normalize exponents.
111 float ab_cosTheta = dot(a,b);
112 float ab_pow2 = dot(a,a) * dot(b,b);
113 return (ab_pow2 == 0.0) ? 1.0 : clamp(ab_cosTheta * inversesqrt(ab_pow2), -1.0, 1.0);
114 })";
115
116 // Extends the middle radius to either the miter point, or the bevel edge if we surpassed the miter
117 // limit and need to revert to a bevel join.
118 const char* GrStrokeTessellationShader::Impl::kMiterExtentFn = R"(
119 float miter_extent(float cosTheta, float miterLimit) {
120 float x = fma(cosTheta, .5, .5);
121 return (x * miterLimit * miterLimit >= 1.0) ? inversesqrt(x) : sqrt(x);
122 })";
123
124 // Returns the number of radial segments required for each radian of rotation, in order for the
125 // curve to appear "smooth" as defined by the parametricPrecision.
126 const char* GrStrokeTessellationShader::Impl::kNumRadialSegmentsPerRadianFn = R"(
127 float num_radial_segments_per_radian(float parametricPrecision, float strokeRadius) {
128 return .5 / acos(max(1.0 - 1.0/(parametricPrecision * strokeRadius), -1.0));
129 })";
130
131 // Unlike mix(), this does not return b when t==1. But it otherwise seems to get better
132 // precision than "a*(1 - t) + b*t" for things like chopping cubics on exact cusp points.
133 // We override this result anyway when t==1 so it shouldn't be a problem.
134 const char* GrStrokeTessellationShader::Impl::kUncheckedMixFn = R"(
135 float unchecked_mix(float a, float b, float T) {
136 return fma(b - a, T, a);
137 }
138 float2 unchecked_mix(float2 a, float2 b, float T) {
139 return fma(b - a, float2(T), a);
140 }
141 float4 unchecked_mix(float4 a, float4 b, float4 T) {
142 return fma(b - a, T, a);
143 })";
144
emitTessellationCode(const GrStrokeTessellationShader & shader,SkString * code,GrGPArgs * gpArgs,const GrShaderCaps & shaderCaps) const145 void GrStrokeTessellationShader::Impl::emitTessellationCode(
146 const GrStrokeTessellationShader& shader, SkString* code, GrGPArgs* gpArgs,
147 const GrShaderCaps& shaderCaps) const {
148 // The subclass is responsible to define the following symbols before calling this method:
149 //
150 // // Functions.
151 // float2 unchecked_mix(float2, float2, float);
152 // float unchecked_mix(float, float, float);
153 //
154 // // Values provided by either uniforms or attribs.
155 // float2 p0, p1, p2, p3;
156 // float w;
157 // float STROKE_RADIUS;
158 // float 2x2 AFFINE_MATRIX;
159 // float2 TRANSLATE;
160 //
161 // // Values calculated by the specific subclass.
162 // float combinedEdgeID;
163 // bool isFinalEdge;
164 // float numParametricSegments;
165 // float radsPerSegment;
166 // float2 tan0;
167 // float2 tan1;
168 // float strokeOutset;
169 //
170 code->appendf(R"(
171 float2 tangent, strokeCoord;
172 if (combinedEdgeID != 0 && !isFinalEdge) {
173 // Compute the location and tangent direction of the stroke edge with the integral id
174 // "combinedEdgeID", where combinedEdgeID is the sorted-order index of parametric and radial
175 // edges. Start by finding the tangent function's power basis coefficients. These define a
176 // tangent direction (scaled by some uniform value) as:
177 // |T^2|
178 // Tangent_Direction(T) = dx,dy = |A 2B C| * |T |
179 // |. . .| |1 |
180 float2 A, B, C = p1 - p0;
181 float2 D = p3 - p0;
182 if (w >= 0.0) {
183 // P0..P2 represent a conic and P3==P2. The derivative of a conic has a cumbersome
184 // order-4 denominator. However, this isn't necessary if we are only interested in a
185 // vector in the same *direction* as a given tangent line. Since the denominator scales
186 // dx and dy uniformly, we can throw it out completely after evaluating the derivative
187 // with the standard quotient rule. This leaves us with a simpler quadratic function
188 // that we use to find a tangent.
189 C *= w;
190 B = .5*D - C;
191 A = (w - 1.0) * D;
192 p1 *= w;
193 } else {
194 float2 E = p2 - p1;
195 B = E - C;
196 A = fma(float2(-3), E, D);
197 }
198 // FIXME(crbug.com/800804,skbug.com/11268): Consider normalizing the exponents in A,B,C at
199 // this point in order to prevent fp32 overflow.
200
201 // Now find the coefficients that give a tangent direction from a parametric edge ID:
202 //
203 // |parametricEdgeID^2|
204 // Tangent_Direction(parametricEdgeID) = dx,dy = |A B_ C_| * |parametricEdgeID |
205 // |. . .| |1 |
206 //
207 float2 B_ = B * (numParametricSegments * 2.0);
208 float2 C_ = C * (numParametricSegments * numParametricSegments);
209
210 // Run a binary search to determine the highest parametric edge that is located on or before
211 // the combinedEdgeID. A combined ID is determined by the sum of complete parametric and
212 // radial segments behind it. i.e., find the highest parametric edge where:
213 //
214 // parametricEdgeID + floor(numRadialSegmentsAtParametricT) <= combinedEdgeID
215 //
216 float lastParametricEdgeID = 0.0;
217 float maxParametricEdgeID = min(numParametricSegments - 1.0, combinedEdgeID);
218 // FIXME(crbug.com/800804,skbug.com/11268): This normalize() can overflow.
219 float2 tan0norm = normalize(tan0);
220 float negAbsRadsPerSegment = -abs(radsPerSegment);
221 float maxRotation0 = (1.0 + combinedEdgeID) * abs(radsPerSegment);
222 for (int exp = %i - 1; exp >= 0; --exp) {
223 // Test the parametric edge at lastParametricEdgeID + 2^exp.
224 float testParametricID = lastParametricEdgeID + exp2(float(exp));
225 if (testParametricID <= maxParametricEdgeID) {
226 float2 testTan = fma(float2(testParametricID), A, B_);
227 testTan = fma(float2(testParametricID), testTan, C_);
228 float cosRotation = dot(normalize(testTan), tan0norm);
229 float maxRotation = fma(testParametricID, negAbsRadsPerSegment, maxRotation0);
230 maxRotation = min(maxRotation, PI);
231 // Is rotation <= maxRotation? (i.e., is the number of complete radial segments
232 // behind testT, + testParametricID <= combinedEdgeID?)
233 if (cosRotation >= cos(maxRotation)) {
234 // testParametricID is on or before the combinedEdgeID. Keep it!
235 lastParametricEdgeID = testParametricID;
236 }
237 }
238 }
239
240 // Find the T value of the parametric edge at lastParametricEdgeID.
241 float parametricT = lastParametricEdgeID / numParametricSegments;
242
243 // Now that we've identified the highest parametric edge on or before the
244 // combinedEdgeID, the highest radial edge is easy:
245 float lastRadialEdgeID = combinedEdgeID - lastParametricEdgeID;
246
247 // Find the angle of tan0, or the angle between tan0norm and the positive x axis.
248 float angle0 = acos(clamp(tan0norm.x, -1.0, 1.0));
249 angle0 = tan0norm.y >= 0.0 ? angle0 : -angle0;
250
251 // Find the tangent vector on the edge at lastRadialEdgeID.
252 float radialAngle = fma(lastRadialEdgeID, radsPerSegment, angle0);
253 tangent = float2(cos(radialAngle), sin(radialAngle));
254 float2 norm = float2(-tangent.y, tangent.x);
255
256 // Find the T value where the tangent is orthogonal to norm. This is a quadratic:
257 //
258 // dot(norm, Tangent_Direction(T)) == 0
259 //
260 // |T^2|
261 // norm * |A 2B C| * |T | == 0
262 // |. . .| |1 |
263 //
264 float a=dot(norm,A), b_over_2=dot(norm,B), c=dot(norm,C);
265 float discr_over_4 = max(b_over_2*b_over_2 - a*c, 0.0);
266 float q = sqrt(discr_over_4);
267 if (b_over_2 > 0.0) {
268 q = -q;
269 }
270 q -= b_over_2;
271
272 // Roots are q/a and c/q. Since each curve section does not inflect or rotate more than 180
273 // degrees, there can only be one tangent orthogonal to "norm" inside 0..1. Pick the root
274 // nearest .5.
275 float _5qa = -.5*q*a;
276 float2 root = (abs(fma(q,q,_5qa)) < abs(fma(a,c,_5qa))) ? float2(q,a) : float2(c,q);
277 float radialT = (root.t != 0.0) ? root.s / root.t : 0.0;
278 radialT = clamp(radialT, 0.0, 1.0);
279
280 if (lastRadialEdgeID == 0.0) {
281 // The root finder above can become unstable when lastRadialEdgeID == 0 (e.g., if
282 // there are roots at exatly 0 and 1 both). radialT should always == 0 in this case.
283 radialT = 0.0;
284 }
285
286 // Now that we've identified the T values of the last parametric and radial edges, our final
287 // T value for combinedEdgeID is whichever is larger.
288 float T = max(parametricT, radialT);
289
290 // Evaluate the cubic at T. Use De Casteljau's for its accuracy and stability.
291 float2 ab = unchecked_mix(p0, p1, T);
292 float2 bc = unchecked_mix(p1, p2, T);
293 float2 cd = unchecked_mix(p2, p3, T);
294 float2 abc = unchecked_mix(ab, bc, T);
295 float2 bcd = unchecked_mix(bc, cd, T);
296 float2 abcd = unchecked_mix(abc, bcd, T);
297
298 // Evaluate the conic weight at T.
299 float u = unchecked_mix(1.0, w, T);
300 float v = w + 1 - u; // == mix(w, 1, T)
301 float uv = unchecked_mix(u, v, T);
302
303 // If we went with T=parametricT, then update the tangent. Otherwise leave it at the radial
304 // tangent found previously. (In the event that parametricT == radialT, we keep the radial
305 // tangent.)
306 if (T != radialT) {
307 tangent = (w >= 0.0) ? bc*u - ab*v : bcd - abc;
308 }
309
310 strokeCoord = (w >= 0.0) ? abc/uv : abcd;
311 } else {
312 // Edges at the beginning and end of the strip use exact endpoints and tangents. This
313 // ensures crack-free seaming between instances.
314 tangent = (combinedEdgeID == 0) ? tan0 : tan1;
315 strokeCoord = (combinedEdgeID == 0) ? p0 : p3;
316 })", shader.maxParametricSegments_log2() /* Parametric/radial sort loop count. */);
317
318 code->append(R"(
319 // FIXME(crbug.com/800804,skbug.com/11268): This normalize() can overflow.
320 float2 ortho = normalize(float2(tangent.y, -tangent.x));
321 strokeCoord += ortho * (STROKE_RADIUS * strokeOutset);)");
322
323 if (!shader.stroke().isHairlineStyle()) {
324 // Normal case. Do the transform after tessellation.
325 code->append(R"(
326 float2 devCoord = AFFINE_MATRIX * strokeCoord + TRANSLATE;)");
327 gpArgs->fPositionVar.set(kFloat2_GrSLType, "devCoord");
328 gpArgs->fLocalCoordVar.set(kFloat2_GrSLType, "strokeCoord");
329 } else {
330 // Hairline case. The scale and skew already happened before tessellation.
331 code->append(R"(
332 float2 devCoord = strokeCoord + TRANSLATE;
333 float2 localCoord = inverse(AFFINE_MATRIX) * strokeCoord;)");
334 gpArgs->fPositionVar.set(kFloat2_GrSLType, "devCoord");
335 gpArgs->fLocalCoordVar.set(kFloat2_GrSLType, "localCoord");
336 }
337 }
338
emitFragmentCode(const GrStrokeTessellationShader & shader,const EmitArgs & args)339 void GrStrokeTessellationShader::Impl::emitFragmentCode(const GrStrokeTessellationShader& shader,
340 const EmitArgs& args) {
341 if (!shader.hasDynamicColor()) {
342 // The fragment shader just outputs a uniform color.
343 const char* colorUniformName;
344 fColorUniform = args.fUniformHandler->addUniform(nullptr, kFragment_GrShaderFlag,
345 kHalf4_GrSLType, "color",
346 &colorUniformName);
347 args.fFragBuilder->codeAppendf("half4 %s = %s;", args.fOutputColor, colorUniformName);
348 } else {
349 args.fFragBuilder->codeAppendf("half4 %s = %s;", args.fOutputColor,
350 fDynamicColorName.c_str());
351 }
352 args.fFragBuilder->codeAppendf("const half4 %s = half4(1);", args.fOutputCoverage);
353 }
354
setData(const GrGLSLProgramDataManager & pdman,const GrShaderCaps &,const GrGeometryProcessor & geomProc)355 void GrStrokeTessellationShader::Impl::setData(const GrGLSLProgramDataManager& pdman,
356 const GrShaderCaps&,
357 const GrGeometryProcessor& geomProc) {
358 const auto& shader = geomProc.cast<GrStrokeTessellationShader>();
359 const auto& stroke = shader.stroke();
360
361 if (!shader.hasDynamicStroke()) {
362 // Set up the tessellation control uniforms.
363 skgpu::StrokeTolerances tolerances;
364 if (!stroke.isHairlineStyle()) {
365 tolerances = skgpu::StrokeTolerances::MakeNonHairline(shader.viewMatrix().getMaxScale(),
366 stroke.getWidth());
367 } else {
368 // In the hairline case we transform prior to tessellation. Set up tolerances for an
369 // identity viewMatrix and a strokeWidth of 1.
370 tolerances = skgpu::StrokeTolerances::MakeNonHairline(1, 1);
371 }
372 float strokeRadius = (stroke.isHairlineStyle()) ? .5f : stroke.getWidth() * .5;
373 pdman.set4f(fTessControlArgsUniform,
374 tolerances.fParametricPrecision, // PARAMETRIC_PRECISION
375 tolerances.fNumRadialSegmentsPerRadian, // NUM_RADIAL_SEGMENTS_PER_RADIAN
376 skgpu::GetJoinType(stroke), // JOIN_TYPE
377 strokeRadius); // STROKE_RADIUS
378 } else {
379 SkASSERT(!stroke.isHairlineStyle());
380 float maxScale = shader.viewMatrix().getMaxScale();
381 pdman.set1f(fTessControlArgsUniform,
382 skgpu::StrokeTolerances::CalcParametricPrecision(maxScale));
383 }
384
385 if (shader.mode() == GrStrokeTessellationShader::Mode::kFixedCount) {
386 SkASSERT(shader.fixedCountNumTotalEdges() != 0);
387 pdman.set1f(fEdgeCountUniform, (float)shader.fixedCountNumTotalEdges());
388 }
389
390 // Set up the view matrix, if any.
391 const SkMatrix& m = shader.viewMatrix();
392 pdman.set2f(fTranslateUniform, m.getTranslateX(), m.getTranslateY());
393 pdman.set4f(fAffineMatrixUniform, m.getScaleX(), m.getSkewY(), m.getSkewX(),
394 m.getScaleY());
395
396 if (!shader.hasDynamicColor()) {
397 pdman.set4fv(fColorUniform, 1, shader.color().vec());
398 }
399 }
400
addToKey(const GrShaderCaps &,GrProcessorKeyBuilder * b) const401 void GrStrokeTessellationShader::addToKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const {
402 bool keyNeedsJoin = (fMode != Mode::kHardwareTessellation) &&
403 !(fPatchAttribs & PatchAttribs::kStrokeParams);
404 SkASSERT((int)fMode >> 2 == 0);
405 SkASSERT(fStroke.getJoin() >> 2 == 0);
406 // Attribs get worked into the key automatically during GrGeometryProcessor::getAttributeKey().
407 // When color is in a uniform, it's always wide. kWideColor doesn't need to be considered here.
408 uint32_t key = (uint32_t)(fPatchAttribs & ~PatchAttribs::kColor);
409 key = (key << 2) | (uint32_t)fMode;
410 key = (key << 2) | ((keyNeedsJoin) ? fStroke.getJoin() : 0);
411 key = (key << 1) | (uint32_t)fStroke.isHairlineStyle();
412 key = (key << 8) | fMaxParametricSegments_log2;
413 b->add32(key);
414 }
415
makeProgramImpl(const GrShaderCaps &) const416 std::unique_ptr<GrGeometryProcessor::ProgramImpl> GrStrokeTessellationShader::makeProgramImpl(
417 const GrShaderCaps&) const {
418 switch (fMode) {
419 case Mode::kHardwareTessellation:
420 return std::make_unique<HardwareImpl>();
421 case Mode::kLog2Indirect:
422 case Mode::kFixedCount:
423 return std::make_unique<InstancedImpl>();
424 }
425 SkUNREACHABLE;
426 }
427