• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Graphite-specific vertex shader code
2
3const float $PI = 3.141592653589793238;
4
5///////////////////////////////////////////////////////////////////////////////////////////////////
6// Support functions for tessellating path renderers
7
8const float $kCubicCurveType = 0;            // skgpu::tess::kCubicCurveType
9const float $kConicCurveType = 1;            // skgpu::tess::kConicCurveType
10const float $kTriangularConicCurveType = 2;  // skgpu::tess::kTriangularConicCurveType
11
12// This function can be used on GPUs with infinity support to infer the curve type from the specific
13// path control-point encoding used by tessellating path renderers. Calling this function on a
14// platform that lacks infinity support may result in a shader compilation error.
15$pure float curve_type_using_inf_support(float4 p23) {
16    return isinf(p23.z) ? $kTriangularConicCurveType :
17           isinf(p23.w) ? $kConicCurveType :
18                          $kCubicCurveType;
19}
20
21$pure bool $is_conic_curve(float curveType) {
22    return curveType != $kCubicCurveType;
23}
24
25$pure bool $is_triangular_conic_curve(float curveType) {
26    return curveType == $kTriangularConicCurveType;
27}
28
29// Wang's formula gives the minimum number of evenly spaced (in the parametric sense) line segments
30// that a bezier curve must be chopped into in order to guarantee all lines stay within a distance
31// of "1/precision" pixels from the true curve. Its definition for a bezier curve of degree "n" is
32// as follows:
33//
34//     maxLength = max([length(p[i+2] - 2p[i+1] + p[i]) for (0 <= i <= n-2)])
35//     numParametricSegments = sqrt(maxLength * precision * n*(n - 1)/8)
36//
37// (Goldman, Ron. (2003). 5.6.3 Wang's Formula. "Pyramid Algorithms: A Dynamic Programming Approach
38// to Curves and Surfaces for Geometric Modeling". Morgan Kaufmann Publishers.)
39
40const float $kDegree = 3;
41const float $kPrecision = 4; // Must match skgpu::tess::kPrecision
42const float $kLengthTerm     = ($kDegree * ($kDegree - 1) / 8.0) * $kPrecision;
43const float $kLengthTermPow2 = (($kDegree * $kDegree) * (($kDegree - 1) * ($kDegree - 1)) / 64.0) *
44                               ($kPrecision * $kPrecision);
45
46// Returns the length squared of the largest forward difference from Wang's cubic formula.
47$pure float $wangs_formula_max_fdiff_p2(float2 p0, float2 p1, float2 p2, float2 p3,
48                                        float2x2 matrix) {
49    float2 d0 = matrix * (fma(float2(-2), p1, p2) + p0);
50    float2 d1 = matrix * (fma(float2(-2), p2, p3) + p1);
51    return max(dot(d0,d0), dot(d1,d1));
52}
53
54$pure float $wangs_formula_cubic(float2 p0, float2 p1, float2 p2, float2 p3,
55                                 float2x2 matrix) {
56    float m = $wangs_formula_max_fdiff_p2(p0, p1, p2, p3, matrix);
57    return max(ceil(sqrt($kLengthTerm * sqrt(m))), 1.0);
58}
59
60$pure float $wangs_formula_cubic_log2(float2 p0, float2 p1, float2 p2, float2 p3,
61                                      float2x2 matrix) {
62    float m = $wangs_formula_max_fdiff_p2(p0, p1, p2, p3, matrix);
63    return ceil(log2(max($kLengthTermPow2 * m, 1.0)) * .25);
64}
65
66$pure float $wangs_formula_conic_p2(float2 p0, float2 p1, float2 p2, float w) {
67    // Translate the bounding box center to the origin.
68    float2 C = (min(min(p0, p1), p2) + max(max(p0, p1), p2)) * 0.5;
69    p0 -= C;
70    p1 -= C;
71    p2 -= C;
72
73    // Compute max length.
74    float m = sqrt(max(max(dot(p0,p0), dot(p1,p1)), dot(p2,p2)));
75
76    // Compute forward differences.
77    float2 dp = fma(float2(-2.0 * w), p1, p0) + p2;
78    float dw = abs(fma(-2.0, w, 2.0));
79
80    // Compute numerator and denominator for parametric step size of linearization. Here, the
81    // epsilon referenced from the cited paper is 1/precision.
82    float rp_minus_1 = max(0.0, fma(m, $kPrecision, -1.0));
83    float numer = length(dp) * $kPrecision + rp_minus_1 * dw;
84    float denom = 4 * min(w, 1.0);
85
86    return numer/denom;
87}
88
89$pure float $wangs_formula_conic(float2 p0, float2 p1, float2 p2, float w) {
90    float n2 = $wangs_formula_conic_p2(p0, p1, p2, w);
91    return max(ceil(sqrt(n2)), 1.0);
92}
93
94$pure float $wangs_formula_conic_log2(float2 p0, float2 p1, float2 p2, float w) {
95    float n2 = $wangs_formula_conic_p2(p0, p1, p2, w);
96    return ceil(log2(max(n2, 1.0)) * .5);
97}
98
99// Returns the normalized difference between a and b, i.e. normalize(a - b), with care taken for
100// if 'a' and/or 'b' have large coordinates.
101$pure float2 $robust_normalize_diff(float2 a, float2 b) {
102    float2 diff = a - b;
103    if (diff == float2(0.0)) {
104        return float2(0.0);
105    } else {
106        float invMag = 1.0 / max(abs(diff.x), abs(diff.y));
107        return normalize(invMag * diff);
108    }
109}
110
111// Returns the cosine of the angle between a and b, assuming a and b are unit vectors already.
112// Guaranteed to be between [-1, 1].
113$pure float $cosine_between_unit_vectors(float2 a, float2 b) {
114    // Since a and b are assumed to be normalized, the cosine is equal to the dot product, although
115    // we clamp that to ensure it falls within the expected range of [-1, 1].
116    return clamp(dot(a, b), -1.0, 1.0);
117}
118
119// Extends the middle radius to either the miter point, or the bevel edge if we surpassed the
120// miter limit and need to revert to a bevel join.
121$pure float $miter_extent(float cosTheta, float miterLimit) {
122    float x = fma(cosTheta, .5, .5);
123    return (x * miterLimit * miterLimit >= 1.0) ? inversesqrt(x) : sqrt(x);
124}
125
126// Returns the number of radial segments required for each radian of rotation, in order for the
127// curve to appear "smooth" as defined by the approximate device-space stroke radius.
128$pure float $num_radial_segments_per_radian(float approxDevStrokeRadius) {
129    return .5 / acos(max(1.0 - (1.0 / $kPrecision) / approxDevStrokeRadius, -1.0));
130}
131
132// Unlike mix(), this does not return b when t==1. But it otherwise seems to get better
133// precision than "a*(1 - t) + b*t" for things like chopping cubics on exact cusp points.
134// We override this result anyway when t==1 so it shouldn't be a problem.
135$pure float $unchecked_mix(float a, float b, float T) {
136    return fma(b - a, T, a);
137}
138$pure float2 $unchecked_mix(float2 a, float2 b, float T) {
139    return fma(b - a, float2(T), a);
140}
141$pure float4 $unchecked_mix(float4 a, float4 b, float4 T) {
142    return fma(b - a, T, a);
143}
144
145// Compute a vertex position for the curve described by p01 and p23 packed control points,
146// tessellated to the given resolve level, and assuming it will be drawn as a filled curve.
147$pure float2 tessellate_filled_curve(float2x2 vectorXform,
148                                     float resolveLevel, float idxInResolveLevel,
149                                     float4 p01, float4 p23,
150                                     float curveType) {
151    float2 localcoord;
152    if ($is_triangular_conic_curve(curveType)) {
153        // This patch is an exact triangle.
154        localcoord = (resolveLevel != 0)      ? p01.zw
155                   : (idxInResolveLevel != 0) ? p23.xy
156                                              : p01.xy;
157    } else {
158        float2 p0=p01.xy, p1=p01.zw, p2=p23.xy, p3=p23.zw;
159        float w = -1;  // w < 0 tells us to treat the instance as an integral cubic.
160        float maxResolveLevel;
161        if ($is_conic_curve(curveType)) {
162            // Conics are 3 points, with the weight in p3.
163            w = p3.x;
164            maxResolveLevel = $wangs_formula_conic_log2(vectorXform*p0,
165                                                        vectorXform*p1,
166                                                        vectorXform*p2, w);
167            p1 *= w;  // Unproject p1.
168            p3 = p2;  // Duplicate the endpoint for shared code that also runs on cubics.
169        } else {
170            // The patch is an integral cubic.
171            maxResolveLevel = $wangs_formula_cubic_log2(p0, p1, p2, p3, vectorXform);
172        }
173        if (resolveLevel > maxResolveLevel) {
174            // This vertex is at a higher resolve level than we need. Demote to a lower
175            // resolveLevel, which will produce a degenerate triangle.
176            idxInResolveLevel = floor(ldexp(idxInResolveLevel,
177                                            int(maxResolveLevel - resolveLevel)));
178            resolveLevel = maxResolveLevel;
179        }
180        // Promote our location to a discrete position in the maximum fixed resolve level.
181        // This is extra paranoia to ensure we get the exact same fp32 coordinates for
182        // colocated points from different resolve levels (e.g., the vertices T=3/4 and
183        // T=6/8 should be exactly colocated).
184        float fixedVertexID = floor(.5 + ldexp(idxInResolveLevel, int(5 - resolveLevel)));
185        if (0 < fixedVertexID && fixedVertexID < 32) {
186            float T = fixedVertexID * (1 / 32.0);
187
188            // Evaluate at T. Use De Casteljau's for its accuracy and stability.
189            float2 ab = mix(p0, p1, T);
190            float2 bc = mix(p1, p2, T);
191            float2 cd = mix(p2, p3, T);
192            float2 abc = mix(ab, bc, T);
193            float2 bcd = mix(bc, cd, T);
194            float2 abcd = mix(abc, bcd, T);
195
196            // Evaluate the conic weight at T.
197            float u = mix(1.0, w, T);
198            float v = w + 1 - u;  // == mix(w, 1, T)
199            float uv = mix(u, v, T);
200
201            localcoord = (w < 0) ? /*cubic*/ abcd : /*conic*/ abc/uv;
202        } else {
203            localcoord = (fixedVertexID == 0) ? p0.xy : p3.xy;
204        }
205    }
206    return localcoord;
207}
208
209// Device coords are in xy, local coords are in zw, since for now perspective isn't supported.
210$pure float4 tessellate_stroked_curve(float edgeID, float maxEdges,
211                                      float2x2 affineMatrix,
212                                      float2 translate,
213                                      float maxScale /* derived from affineMatrix */,
214                                      float4 p01, float4 p23,
215                                      float2 lastControlPoint,
216                                      float2 strokeParams,
217                                      float curveType) {
218    float2 p0=p01.xy, p1=p01.zw, p2=p23.xy, p3=p23.zw;
219    float w = -1;  // w<0 means the curve is an integral cubic.
220    if ($is_conic_curve(curveType)) {
221        // Conics are 3 points, with the weight in p3.
222        w = p3.x;
223        p3 = p2;  // Setting p3 equal to p2 works for the remaining rotational logic.
224    }
225
226    // Call Wang's formula to determine parametric segments before transform points for hairlines
227    // so that it is consistent with how the CPU tested the control points for chopping.
228    float numParametricSegments;
229    if (w < 0) {
230        if (p0 == p1 && p2 == p3) {
231            numParametricSegments = 1; // a line
232        } else {
233            numParametricSegments = $wangs_formula_cubic(p0, p1, p2, p3, affineMatrix);
234        }
235    } else {
236        numParametricSegments = $wangs_formula_conic(affineMatrix * p0,
237                                                     affineMatrix * p1,
238                                                     affineMatrix * p2, w);
239    }
240
241    // Matches skgpu::tess::StrokeParams
242    float strokeRadius = strokeParams.x;
243    float joinType = strokeParams.y; // <0 = round join, ==0 = bevel join, >0 encodes miter limit
244    bool isHairline = strokeParams.x == 0.0;
245    float numRadialSegmentsPerRadian;
246    if (isHairline) {
247        numRadialSegmentsPerRadian = $num_radial_segments_per_radian(1.0);
248        strokeRadius = 0.5;
249    } else {
250        numRadialSegmentsPerRadian = $num_radial_segments_per_radian(maxScale * strokeParams.x);
251    }
252
253    if (isHairline) {
254        // Hairline case. Transform the points before tessellation. We can still hold off on the
255        // translate until the end; we just need to perform the scale and skew right now.
256        p0 = affineMatrix * p0;
257        p1 = affineMatrix * p1;
258        p2 = affineMatrix * p2;
259        p3 = affineMatrix * p3;
260        lastControlPoint = affineMatrix * lastControlPoint;
261    }
262
263    // Find the starting and ending tangents.
264    float2 tan0 = $robust_normalize_diff((p0 == p1) ? ((p1 == p2) ? p3 : p2) : p1, p0);
265    float2 tan1 = $robust_normalize_diff(p3, (p3 == p2) ? ((p2 == p1) ? p0 : p1) : p2);
266    if (tan0 == float2(0)) {
267        // The stroke is a point. This special case tells us to draw a stroke-width circle as a
268        // 180 degree point stroke instead.
269        tan0 = float2(1,0);
270        tan1 = float2(-1,0);
271    }
272
273    // Determine how many edges to give to the join. We emit the first and final edges
274    // of the join twice: once full width and once restricted to half width. This guarantees
275    // perfect seaming by matching the vertices from the join as well as from the strokes on
276    // either side.
277    float numEdgesInJoin;
278    if (joinType >= 0 /*Is the join not a round type?*/) {
279        // Bevel(0) and miter(+) joins get 1 and 2 segments respectively.
280        // +2 because we emit the beginning and ending edges twice (see above comments).
281        numEdgesInJoin = sign(joinType) + (1 + 2);
282    } else {
283        float2 prevTan = $robust_normalize_diff(p0, lastControlPoint);
284        float joinRads = acos($cosine_between_unit_vectors(prevTan, tan0));
285        float numRadialSegmentsInJoin = max(ceil(joinRads * numRadialSegmentsPerRadian), 1);
286        // +2 because we emit the beginning and ending edges twice (see above comment).
287        numEdgesInJoin = numRadialSegmentsInJoin + 2;
288        // The stroke section needs at least two edges. Don't assign more to the join than
289        // "maxEdges - 2". (This is only relevant when the ideal max edge count calculated
290        // on the CPU had to be limited to maxEdges in the draw call).
291        numEdgesInJoin = min(numEdgesInJoin, maxEdges - 2);
292    }
293
294    // Find which direction the curve turns.
295    // NOTE: Since the curve is not allowed to inflect, we can just check F'(.5) x F''(.5).
296    // NOTE: F'(.5) x F''(.5) has the same sign as (P2 - P0) x (P3 - P1)
297    float turn = cross_length_2d(p2 - p0, p3 - p1);
298    float combinedEdgeID = abs(edgeID) - numEdgesInJoin;
299    if (combinedEdgeID < 0) {
300        tan1 = tan0;
301        // Don't let tan0 become zero. The code as-is isn't built to handle that case. tan0=0
302        // means the join is disabled, and to disable it with the existing code we can leave
303        // tan0 equal to tan1.
304        if (lastControlPoint != p0) {
305            tan0 = $robust_normalize_diff(p0, lastControlPoint);
306        }
307        turn = cross_length_2d(tan0, tan1);
308    }
309
310    // Calculate the curve's starting angle and rotation.
311    float cosTheta = $cosine_between_unit_vectors(tan0, tan1);
312    float rotation = acos(cosTheta);
313    if (turn < 0) {
314        // Adjust sign of rotation to match the direction the curve turns.
315        rotation = -rotation;
316    }
317
318    float numRadialSegments;
319    float strokeOutset = sign(edgeID);
320    if (combinedEdgeID < 0) {
321        // We belong to the preceding join. The first and final edges get duplicated, so we only
322        // have "numEdgesInJoin - 2" segments.
323        numRadialSegments = numEdgesInJoin - 2;
324        numParametricSegments = 1;  // Joins don't have parametric segments.
325        p3 = p2 = p1 = p0;  // Colocate all points on the junction point.
326        // Shift combinedEdgeID to the range [-1, numRadialSegments]. This duplicates the first
327        // edge and lands one edge at the very end of the join. (The duplicated final edge will
328        // actually come from the section of our strip that belongs to the stroke.)
329        combinedEdgeID += numRadialSegments + 1;
330        if (combinedEdgeID < 0) {
331            combinedEdgeID = 0;
332        } else {
333            // We normally restrict the join on one side of the junction, but if the tangents are
334            // nearly equivalent this could theoretically result in bad seaming and/or cracks on the
335            // side we don't put it on. If the tangents are nearly equivalent then we leave the join
336            // double-sided.
337            const float sinEpsilon = 1e-2;  // ~= sin(180deg / 3000)
338            bool tangentsNearlyParallel =
339                    (abs(turn) * inversesqrt(dot(tan0, tan0) * dot(tan1, tan1))) < sinEpsilon;
340            if (!tangentsNearlyParallel || dot(tan0, tan1) < 0) {
341                // There are two edges colocated at the beginning. Leave the first one double sided
342                // for seaming with the previous stroke. (The double sided edge at the end will
343                // actually come from the section of our strip that belongs to the stroke.)
344                strokeOutset = (turn < 0) ? min(strokeOutset, 0) : max(strokeOutset, 0);
345            }
346        }
347    } else {
348        // We belong to the stroke. Unless numRadialSegmentsPerRadian is incredibly high,
349        // clamping to maxCombinedSegments will be a no-op because the draw call was invoked with
350        // sufficient vertices to cover the worst case scenario of 180 degree rotation.
351        float maxCombinedSegments = maxEdges - numEdgesInJoin - 1;
352        numRadialSegments = max(ceil(abs(rotation) * numRadialSegmentsPerRadian), 1);
353        numRadialSegments = min(numRadialSegments, maxCombinedSegments);
354        numParametricSegments = min(numParametricSegments,
355                                    maxCombinedSegments - numRadialSegments + 1);
356    }
357
358    // Additional parameters for final tessellation evaluation.
359    float radsPerSegment = rotation / numRadialSegments;
360    float numCombinedSegments = numParametricSegments + numRadialSegments - 1;
361    bool isFinalEdge = (combinedEdgeID >= numCombinedSegments);
362    if (combinedEdgeID > numCombinedSegments) {
363        strokeOutset = 0;  // The strip has more edges than we need. Drop this one.
364    }
365    // Edge #2 extends to the miter point.
366    if (abs(edgeID) == 2 && joinType > 0/*Is the join a miter type?*/) {
367        strokeOutset *= $miter_extent(cosTheta, joinType/*miterLimit*/);
368    }
369
370    float2 tangent, strokeCoord;
371    if (combinedEdgeID != 0 && !isFinalEdge) {
372        // Compute the location and tangent direction of the stroke edge with the integral id
373        // "combinedEdgeID", where combinedEdgeID is the sorted-order index of parametric and radial
374        // edges. Start by finding the tangent function's power basis coefficients. These define a
375        // tangent direction (scaled by some uniform value) as:
376        //                                                 |T^2|
377        //     Tangent_Direction(T) = dx,dy = |A  2B  C| * |T  |
378        //                                    |.   .  .|   |1  |
379        float2 A, B, C = p1 - p0;
380        float2 D = p3 - p0;
381        if (w >= 0.0) {
382            // P0..P2 represent a conic and P3==P2. The derivative of a conic has a cumbersome
383            // order-4 denominator. However, this isn't necessary if we are only interested in a
384            // vector in the same *direction* as a given tangent line. Since the denominator scales
385            // dx and dy uniformly, we can throw it out completely after evaluating the derivative
386            // with the standard quotient rule. This leaves us with a simpler quadratic function
387            // that we use to find a tangent.
388            C *= w;
389            B = .5*D - C;
390            A = (w - 1.0) * D;
391            p1 *= w;
392        } else {
393            float2 E = p2 - p1;
394            B = E - C;
395            A = fma(float2(-3), E, D);
396        }
397        // FIXME(crbug.com/800804,skbug.com/11268): Consider normalizing the exponents in A,B,C at
398        // this point in order to prevent fp32 overflow.
399
400        // Now find the coefficients that give a tangent direction from a parametric edge ID:
401        //
402        //                                                                 |parametricEdgeID^2|
403        //     Tangent_Direction(parametricEdgeID) = dx,dy = |A  B_  C_| * |parametricEdgeID  |
404        //                                                   |.   .   .|   |1                 |
405        //
406        float2 B_ = B * (numParametricSegments * 2.0);
407        float2 C_ = C * (numParametricSegments * numParametricSegments);
408
409        // Run a binary search to determine the highest parametric edge that is located on or before
410        // the combinedEdgeID. A combined ID is determined by the sum of complete parametric and
411        // radial segments behind it. i.e., find the highest parametric edge where:
412        //
413        //    parametricEdgeID + floor(numRadialSegmentsAtParametricT) <= combinedEdgeID
414        //
415        float lastParametricEdgeID = 0.0;
416        float maxParametricEdgeID = min(numParametricSegments - 1.0, combinedEdgeID);
417        float negAbsRadsPerSegment = -abs(radsPerSegment);
418        float maxRotation0 = (1.0 + combinedEdgeID) * abs(radsPerSegment);
419        for (float exp = 32.0; exp >= 1.0; exp *= 0.5) {
420            // Test the parametric edge at lastParametricEdgeID + (32, 16, 8, 4, 2, 1).
421            float testParametricID = lastParametricEdgeID + exp;
422            if (testParametricID <= maxParametricEdgeID) {
423                float2 testTan = fma(float2(testParametricID), A, B_);
424                testTan = fma(float2(testParametricID), testTan, C_);
425                float cosRotation = dot(normalize(testTan), tan0);
426                float maxRotation = fma(testParametricID, negAbsRadsPerSegment, maxRotation0);
427                maxRotation = min(maxRotation, $PI);
428                // Is rotation <= maxRotation? (i.e., is the number of complete radial segments
429                // behind testT, + testParametricID <= combinedEdgeID?)
430                if (cosRotation >= cos(maxRotation)) {
431                    // testParametricID is on or before the combinedEdgeID. Keep it!
432                    lastParametricEdgeID = testParametricID;
433                }
434            }
435        }
436
437        // Find the T value of the parametric edge at lastParametricEdgeID.
438        float parametricT = lastParametricEdgeID / numParametricSegments;
439
440        // Now that we've identified the highest parametric edge on or before the
441        // combinedEdgeID, the highest radial edge is easy:
442        float lastRadialEdgeID = combinedEdgeID - lastParametricEdgeID;
443
444        // Find the angle of tan0, i.e. the angle between tan0 and the positive x axis.
445        float angle0 = acos(clamp(tan0.x, -1.0, 1.0));
446        angle0 = tan0.y >= 0.0 ? angle0 : -angle0;
447
448        // Find the tangent vector on the edge at lastRadialEdgeID. By construction it is already
449        // normalized.
450        float radialAngle = fma(lastRadialEdgeID, radsPerSegment, angle0);
451        tangent = float2(cos(radialAngle), sin(radialAngle));
452        float2 norm = float2(-tangent.y, tangent.x);
453
454        // Find the T value where the tangent is orthogonal to norm. This is a quadratic:
455        //
456        //     dot(norm, Tangent_Direction(T)) == 0
457        //
458        //                         |T^2|
459        //     norm * |A  2B  C| * |T  | == 0
460        //            |.   .  .|   |1  |
461        //
462        float a=dot(norm,A), b_over_2=dot(norm,B), c=dot(norm,C);
463        float discr_over_4 = max(b_over_2*b_over_2 - a*c, 0.0);
464        float q = sqrt(discr_over_4);
465        if (b_over_2 > 0.0) {
466            q = -q;
467        }
468        q -= b_over_2;
469
470        // Roots are q/a and c/q. Since each curve section does not inflect or rotate more than 180
471        // degrees, there can only be one tangent orthogonal to "norm" inside 0..1. Pick the root
472        // nearest .5.
473        float _5qa = -.5*q*a;
474        float2 root = (abs(fma(q,q,_5qa)) < abs(fma(a,c,_5qa))) ? float2(q,a) : float2(c,q);
475
476        // The root finder above can become unstable when lastRadialEdgeID == 0 (e.g., if there are
477        // roots at exatly 0 and 1 both). radialT should always equal 0 in this case.
478        float radialT = (lastRadialEdgeID != 0.0 && root.t != 0.0)
479                            ? saturate(root.s / root.t)
480                            : 0.0;
481
482        // Now that we've identified the T values of the last parametric and radial edges, our final
483        // T value for combinedEdgeID is whichever is larger.
484        float T = max(parametricT, radialT);
485
486        // Evaluate the cubic at T. Use De Casteljau's for its accuracy and stability.
487        float2 ab = $unchecked_mix(p0, p1, T);
488        float2 bc = $unchecked_mix(p1, p2, T);
489        float2 cd = $unchecked_mix(p2, p3, T);
490        float2 abc = $unchecked_mix(ab, bc, T);
491        float2 bcd = $unchecked_mix(bc, cd, T);
492        float2 abcd = $unchecked_mix(abc, bcd, T);
493
494        // Evaluate the conic weight at T.
495        float u = $unchecked_mix(1.0, w, T);
496        float v = w + 1 - u;  // == mix(w, 1, T)
497        float uv = $unchecked_mix(u, v, T);
498
499        // If we went with T=parametricT, then update the tangent. Otherwise leave it at the radial
500        // tangent found previously. (In the event that parametricT == radialT, we keep the radial
501        // tangent.)
502        if (T != radialT) {
503            // We must re-normalize here because the tangent is determined by the curve coefficients
504            tangent = w >= 0.0 ? $robust_normalize_diff(bc*u, ab*v)
505                               : $robust_normalize_diff(bcd, abc);
506        }
507
508        strokeCoord = (w >= 0.0) ? abc/uv : abcd;
509    } else {
510        // Edges at the beginning and end of the strip use exact endpoints and tangents. This
511        // ensures crack-free seaming between instances.
512        tangent = (combinedEdgeID == 0) ? tan0 : tan1;
513        strokeCoord = (combinedEdgeID == 0) ? p0 : p3;
514    }
515
516    // At this point 'tangent' is normalized, so the orthogonal vector is also normalized.
517    float2 ortho = float2(tangent.y, -tangent.x);
518    strokeCoord += ortho * (strokeRadius * strokeOutset);
519
520    if (isHairline) {
521        // Hairline case. The scale and skew already happened before tessellation.
522        // TODO: There's probably a more efficient way to tessellate the hairline that lets us
523        // avoid inverting the affine matrix to get back to local coords, but it's just a 2x2 so
524        // this works for now.
525        return float4(strokeCoord + translate, inverse(affineMatrix) * strokeCoord);
526    } else {
527        // Normal case. Do the transform after tessellation.
528        return float4(affineMatrix * strokeCoord + translate, strokeCoord);
529    }
530}
531
532float4 analytic_rrect_vertex_fn(// Vertex Attributes
533                                float2 position,
534                                float2 normal,
535                                float normalScale,
536                                float centerWeight,
537                                // Instance Attributes
538                                float4 xRadiiOrFlags,
539                                float4 radiiOrQuadXs,
540                                float4 ltrbOrQuadYs,
541                                float4 center,
542                                float depth,
543                                float3x3 localToDevice,
544                                // Varyings
545                                out float4 jacobian,
546                                out float4 edgeDistances,
547                                out float4 xRadii,
548                                out float4 yRadii,
549                                out float2 strokeParams,
550                                out float2 perPixelControl,
551                                // Render Step
552                                out float2 stepLocalCoords) {
553    const uint kCornerVertexCount = 9; // KEEP IN SYNC WITH C++'s
554                                       // AnalyticRRectRenderStep::kCornerVertexCount
555    const float kMiterScale = 1.0;
556    const float kBevelScale = 0.0;
557    const float kRoundScale = 0.41421356237; // sqrt(2)-1
558
559    const float kEpsilon = 0.00024; // SK_ScalarNearlyZero
560
561    // Default to miter'ed vertex positioning. Corners with sufficiently large corner radii, or
562    // bevel'ed strokes will adjust vertex placement on a per corner basis. This will not affect
563    // the final coverage calculations in the fragment shader.
564    float joinScale = kMiterScale;
565
566    // Unpack instance-level state that determines the vertex placement and style of shape.
567    bool bidirectionalCoverage = center.z <= 0.0;
568    bool deviceSpaceDistances = false;
569    float4 xs, ys; // ordered TL, TR, BR, BL
570    float4 edgeAA = float4(1.0); // ordered L,T,R,B. 1 = AA, 0 = no AA
571    bool strokedLine = false;
572    if (xRadiiOrFlags.x < -1.0) {
573        // Stroked [round] rect or line
574        // If y > 0, unpack the line end points, otherwise unpack the rect edges
575        strokedLine = xRadiiOrFlags.y > 0.0;
576        xs = strokedLine ? ltrbOrQuadYs.LLRR : ltrbOrQuadYs.LRRL;
577        ys = ltrbOrQuadYs.TTBB;
578
579        if (xRadiiOrFlags.y < 0.0) {
580            // A hairline [r]rect so the X radii are encoded as negative values in this field,
581            // and Y radii are stored directly in the subsequent float4.
582            xRadii = -xRadiiOrFlags - 2.0;
583            yRadii = radiiOrQuadXs;
584
585            // All hairlines use miter joins (join style > 0)
586            strokeParams = float2(0.0, 1.0);
587        } else {
588            xRadii = radiiOrQuadXs;
589            yRadii = xRadii; // regular strokes are circular
590            strokeParams = xRadiiOrFlags.zw;
591
592            // `sign(strokeParams.y)` evaluates to kMiterScale (1.0) when the
593            // input is positive, and kBevelScale (0.0) when it is zero.
594            // kRoundScale uses the stroke radius to round rectangular corners.
595            joinScale = (strokeParams.y < 0.0) ? kRoundScale
596                                               : sign(strokeParams.y);
597        }
598    } else if (any(greaterThan(xRadiiOrFlags, float4(0.0)))) {
599        // Filled round rect
600        xs = ltrbOrQuadYs.LRRL;
601        ys = ltrbOrQuadYs.TTBB;
602
603        xRadii = xRadiiOrFlags;
604        yRadii = radiiOrQuadXs;
605
606        strokeParams = float2(0.0, -1.0); // A negative join style is "round"
607    } else {
608        // Per-edge quadrilateral, so we have to calculate the corner's basis from the
609        // quad's edges.
610        xs = radiiOrQuadXs;
611        ys = ltrbOrQuadYs;
612        edgeAA = -xRadiiOrFlags; // AA flags needed to be < 0 on upload, so flip the sign.
613
614        xRadii = float4(0.0);
615        yRadii = float4(0.0);
616
617        strokeParams = float2(0.0, 1.0); // Will be ignored, but set to a "miter"
618        deviceSpaceDistances = true;
619    }
620
621    // Adjust state on a per-corner basis
622    uint cornerID = uint(sk_VertexID) / kCornerVertexCount;
623    float2 cornerRadii = float2(xRadii[cornerID], yRadii[cornerID]);
624    if (cornerID % 2 != 0) {
625        // Corner radii are uploaded in the local coordinate frame, but vertex placement happens
626        // in a consistent winding before transforming to final local coords, so swap the
627        // radii for odd corners.
628        cornerRadii = cornerRadii.yx;
629    }
630
631    float2 cornerAspectRatio = float2(1.0);
632    if (all(greaterThan(cornerRadii, float2(0.0)))) {
633        // Position vertices for an elliptical corner; overriding any previous join style since
634        // that only applies when radii are 0.
635        joinScale = kRoundScale;
636        cornerAspectRatio = cornerRadii.yx;
637    }
638
639    // Calculate the local edge vectors, ordered L, T, R, B starting from the bottom left point.
640    // For quadrilaterals these are not necessarily axis-aligned, but in all cases they orient
641    // the +X/+Y normalized vertex template for each corner.
642    float4 dx = xs - xs.wxyz;
643    float4 dy = ys - ys.wxyz;
644    // This is a specialized application of `robust_normalized_diff` for a quad, with an extra
645    // max against 1.0 to not scale small edges. This is to avoid overflows for extremely large
646    // coordinates when squaring dx or dy.
647    float4 invMag = 1.0 / max(abs(dx), max(abs(dy), float4(1.0)));
648    dx *= invMag;
649    dy *= invMag;
650    float4 edgeSquaredLen = dx*dx + dy*dy;
651
652    float4 edgeMask = sign(edgeSquaredLen); // 0 for zero-length edge, 1 for non-zero edge.
653    float4 edgeBias = float4(0.0); // adjustment to edge distance for butt cap correction
654    float2 strokeRadius = float2(strokeParams.x);
655    if (any(equal(edgeMask, float4(0.0)))) {
656        // Must clean up (dx,dy) depending on the empty edge configuration
657        if (all(equal(edgeMask, float4(0.0)))) {
658            // A point so use the canonical basis
659            dx = float4( 0.0, 1.0, 0.0, -1.0);
660            dy = float4(-1.0, 0.0, 1.0,  0.0);
661            edgeSquaredLen = float4(1.0);
662        } else {
663            // Triangles (3 non-zero edges) copy the adjacent edge. Otherwise it's a line so
664            // replace empty edges with the left-hand normal vector of the adjacent edge.
665            bool triangle = (edgeMask[0] + edgeMask[1] + edgeMask[2] + edgeMask[3]) > 2.5;
666            float4 edgeX = triangle ? dx.yzwx :  dy.yzwx;
667            float4 edgeY = triangle ? dy.yzwx : -dx.yzwx;
668
669            dx = mix(edgeX, dx, edgeMask);
670            dy = mix(edgeY, dy, edgeMask);
671            edgeSquaredLen = mix(edgeSquaredLen.yzwx, edgeSquaredLen, edgeMask);
672            edgeAA = mix(edgeAA.yzwx, edgeAA, edgeMask);
673
674            if (!triangle && joinScale == kBevelScale) {
675                // Don't outset by stroke radius for butt caps on the zero-length edge, but
676                // adjust edgeBias and strokeParams to calculate an AA miter'ed shape with the
677                // non-uniform stroke outset.
678                strokeRadius *= float2(edgeMask[cornerID], edgeMask.yzwx[cornerID]);
679                edgeBias = (edgeMask - 1.0) * strokeParams.x;
680                strokeParams.y = 1.0;
681                joinScale = kMiterScale;
682            }
683        }
684    }
685
686    float4 inverseEdgeLen = inversesqrt(edgeSquaredLen);
687    dx *= inverseEdgeLen;
688    dy *= inverseEdgeLen;
689
690    // Calculate local coordinate for the vertex (relative to xAxis and yAxis at first).
691    float2 xAxis = -float2(dx.yzwx[cornerID], dy.yzwx[cornerID]);
692    float2 yAxis =  float2(dx.xyzw[cornerID], dy.xyzw[cornerID]);
693    float2 localPos;
694    bool snapToCenter = false;
695    if (normalScale < 0.0) {
696        // Vertex is inset from the base shape, so we scale by (cornerRadii - strokeRadius)
697        // and have to check for the possibility of an inner miter. It is always inset by an
698        // additional conservative AA amount.
699        if (center.w < 0.0 || centerWeight * center.z != 0.0) {
700            snapToCenter = true;
701        } else {
702            float localAARadius = center.w;
703            float2 insetRadii =
704                    cornerRadii + (bidirectionalCoverage ? -strokeRadius : strokeRadius);
705            if (joinScale == kMiterScale ||
706                any(lessThanEqual(insetRadii, float2(localAARadius)))) {
707                // Miter the inset position
708                localPos = (insetRadii - localAARadius);
709            } else {
710                localPos = insetRadii*position - localAARadius*normal;
711            }
712        }
713    } else {
714        // Vertex is outset from the base shape (and possibly with an additional AA outset later
715        // in device space).
716        localPos = (cornerRadii + strokeRadius) * (position + joinScale*position.yx);
717    }
718
719    if (snapToCenter) {
720        // Center is already relative to true local coords, not the corner basis.
721        localPos = center.xy;
722    } else {
723        // Transform from corner basis to true local coords.
724        localPos -= cornerRadii;
725        localPos = float2(xs[cornerID], ys[cornerID]) + xAxis*localPos.x + yAxis*localPos.y;
726    }
727
728    // Calculate edge distances and device space coordinate for the vertex
729    edgeDistances = dy*(xs - localPos.x) - dx*(ys - localPos.y) + edgeBias;
730
731    // NOTE: This 3x3 inverse is different than just taking the 1st two columns of the 4x4
732    // inverse of the original SkM44 local-to-device matrix. We could calculate the 3x3 inverse
733    // and upload it, but it does not seem to be a bottleneck and saves on bandwidth to
734    // calculate it here instead.
735    float3x3 deviceToLocal = inverse(localToDevice);
736    float3 devPos = localToDevice * localPos.xy1;
737    jacobian = float4(deviceToLocal[0].xy - deviceToLocal[0].z*localPos,
738                      deviceToLocal[1].xy - deviceToLocal[1].z*localPos);
739
740    if (deviceSpaceDistances) {
741        // Apply the Jacobian in the vertex shader so any quadrilateral normals do not have to
742        // be passed to the fragment shader. However, it's important to use the Jacobian at a
743        // vertex on the edge, not the current vertex's Jacobian.
744        float4 gx = -dy*(deviceToLocal[0].x - deviceToLocal[0].z*xs) +
745                     dx*(deviceToLocal[0].y - deviceToLocal[0].z*ys);
746        float4 gy = -dy*(deviceToLocal[1].x - deviceToLocal[1].z*xs) +
747                     dx*(deviceToLocal[1].y - deviceToLocal[1].z*ys);
748        // NOTE: The gradient is missing a W term so edgeDistances must still be multiplied by
749        // 1/w in the fragment shader. The same goes for the encoded coverage scale.
750        edgeDistances *= inversesqrt(gx*gx + gy*gy);
751
752        // Bias non-AA edge distances by device W so its coverage contribution is >= 1.0
753        edgeDistances += (1 - edgeAA)*abs(devPos.z);
754
755        // Mixed edge AA shapes do not use subpixel scale+bias for coverage, since they tile
756        // to a large shape of unknown--but likely not subpixel--size. Triangles and quads do
757        // not use subpixel coverage since the scale+bias is not constant over the shape, but
758        // we can't evaluate per-fragment since we aren't passing down their arbitrary normals.
759        bool subpixelCoverage = edgeAA == float4(1.0) &&
760                                dot(abs(dx*dx.yzwx + dy*dy.yzwx), float4(1.0)) < kEpsilon;
761        if (subpixelCoverage) {
762            // Reconstructs the actual device-space width and height for all rectangle vertices.
763            float2 dim = edgeDistances.xy + edgeDistances.zw;
764            perPixelControl.y = 1.0 + min(min(dim.x, dim.y), abs(devPos.z));
765        } else {
766            perPixelControl.y = 1.0 + abs(devPos.z); // standard 1px width pre W division.
767        }
768    }
769
770    // Only outset for a vertex that is in front of the w=0 plane to avoid dealing with outset
771    // triangles rasterizing differently from the main triangles as w crosses 0.
772    if (normalScale > 0.0 && devPos.z > 0.0) {
773        // Note that when there's no perspective, the jacobian is equivalent to the normal
774        // matrix (inverse transpose), but produces correct results when there's perspective
775        // because it accounts for the position's influence on a line's projected direction.
776        float2x2 J = float2x2(jacobian);
777
778        float2 edgeAANormal = float2(edgeAA[cornerID], edgeAA.yzwx[cornerID]) * normal;
779        float2 nx = cornerAspectRatio.x * edgeAANormal.x * perp(-yAxis) * J;
780        float2 ny = cornerAspectRatio.y * edgeAANormal.y * perp( xAxis) * J;
781
782        bool isMidVertex = all(notEqual(edgeAANormal, float2(0)));
783        if (joinScale == kMiterScale && isMidVertex) {
784            // Produce a bisecting vector in device space.
785            nx = normalize(nx);
786            ny = normalize(ny);
787            if (dot(nx, ny) < -0.8) {
788                // Normals are in nearly opposite directions, so adjust to avoid float error.
789                float s = sign(cross_length_2d(nx, ny));
790                nx =  s*perp(nx);
791                ny = -s*perp(ny);
792            }
793        }
794        // Adding the normal components together directly results in what we'd have
795        // calculated if we'd just transformed 'normal' in one go, assuming they weren't
796        // normalized in the if-block above. If they were normalized, the sum equals the
797        // bisector between the original nx and ny.
798        //
799        // We multiply by W so that after perspective division the new point is offset by the
800        // now-unit normal.
801        // NOTE: (nx + ny) can become the zero vector if the device outset is for an edge
802        // marked as non-AA. In this case normalize() could produce the zero vector or NaN.
803        // Until a counter-example is found, GPUs seem to discard triangles with NaN vertices,
804        // which has the same effect as outsetting by the zero vector with this mesh, so we
805        // don't bother guarding the normalize() (yet).
806        devPos.xy += devPos.z * normalize(nx + ny);
807
808        // By construction these points are 1px away from the outer edge in device space.
809        if (deviceSpaceDistances) {
810            // Apply directly to edgeDistances to save work per pixel later on.
811            edgeDistances -= devPos.z;
812        } else {
813            // Otherwise store separately so edgeDistances can be used to reconstruct corner pos
814            perPixelControl.y = -devPos.z;
815        }
816    } else if (!deviceSpaceDistances) {
817        // Triangles are within the original shape so there's no additional outsetting to
818        // take into account for coverage calculations.
819        perPixelControl.y = 0.0;
820    }
821
822    perPixelControl.x = (centerWeight != 0.0)
823            // A positive value signals that a pixel is trivially full coverage.
824            ? 1.0
825            // A negative value signals bidirectional coverage, and a zero value signals a solid
826            // interior with per-pixel coverage.
827            : bidirectionalCoverage ? -1.0 : 0.0;
828
829    // The fragment shader operates in a canonical basis (x-axis = (1,0), y-axis = (0,1)). For
830    // stroked lines, incorporate their local orientation into the Jacobian to preserve this.
831    if (strokedLine) {
832        // The updated Jacobian is J' = B^-1 * J, where B is float2x2(xAxis, yAxis) for the
833        // top-left corner (so that B^-1 is constant over the whole shape). Since it's a line
834        // the basis was constructed to be orthonormal, det(B) = 1 and B^-1 is trivial.
835        // NOTE: float2x2 is column-major.
836        jacobian = float4(float2x2(dy[0], -dy[1], -dx[0], dx[1]) * float2x2(jacobian));
837    }
838
839    // Write out final results
840    stepLocalCoords = localPos;
841    return float4(devPos.xy, devPos.z*depth, devPos.z);
842}
843
844float4 per_edge_aa_quad_vertex_fn(// Vertex Attributes
845                                  float2 normal,
846                                  // Instance Attributes
847                                  float4 edgeAA,
848                                  float4 xs, // ordered TL, TR, BR, BL
849                                  float4 ys,
850                                  float depth,
851                                  float3x3 localToDevice,
852                                  // Varyings
853                                  out float4 edgeDistances,
854                                  // Render Step
855                                  out float2 stepLocalCoords) {
856    const uint kCornerVertexCount = 4; // KEEP IN SYNC WITH C++'s
857                                       // PerEdgeAAQuadRenderStep::kCornerVertexCount
858
859    const float kEpsilon = 0.00024; // SK_ScalarNearlyZero
860
861    // Calculate the local edge vectors, ordered L, T, R, B starting from the bottom left point.
862    // For quadrilaterals these are not necessarily axis-aligned, but in all cases they orient
863    // the +X/+Y normalized vertex template for each corner.
864    float4 dx = xs - xs.wxyz;
865    float4 dy = ys - ys.wxyz;
866    // This is a specialized application of `robust_normalized_diff` for a quad, with an extra
867    // max against 1.0 to not scale small edges. This is to avoid overflows for extremely large
868    // coordinates when squaring dx or dy.
869    float4 invMag = 1.0 / max(abs(dx), max(abs(dy), float4(1.0)));
870    dx *= invMag;
871    dy *= invMag;
872    float4 edgeSquaredLen = dx*dx + dy*dy;
873
874    float4 edgeMask = sign(edgeSquaredLen); // 0 for zero-length edge, 1 for non-zero edge.
875    if (any(equal(edgeMask, float4(0.0)))) {
876        // Must clean up (dx,dy) depending on the empty edge configuration
877        if (all(equal(edgeMask, float4(0.0)))) {
878            // A point so use the canonical basis
879            dx = float4( 0.0, 1.0, 0.0, -1.0);
880            dy = float4(-1.0, 0.0, 1.0,  0.0);
881            edgeSquaredLen = float4(1.0);
882        } else {
883            // Triangles (3 non-zero edges) copy the adjacent edge. Otherwise it's a line so
884            // replace empty edges with the left-hand normal vector of the adjacent edge.
885            bool triangle = (edgeMask[0] + edgeMask[1] + edgeMask[2] + edgeMask[3]) > 2.5;
886            float4 edgeX = triangle ? dx.yzwx :  dy.yzwx;
887            float4 edgeY = triangle ? dy.yzwx : -dx.yzwx;
888
889            dx = mix(edgeX, dx, edgeMask);
890            dy = mix(edgeY, dy, edgeMask);
891            edgeSquaredLen = mix(edgeSquaredLen.yzwx, edgeSquaredLen, edgeMask);
892            edgeAA = mix(edgeAA.yzwx, edgeAA, edgeMask);
893        }
894    }
895
896    float4 inverseEdgeLen = inversesqrt(edgeSquaredLen);
897    dx *= inverseEdgeLen;
898    dy *= inverseEdgeLen;
899
900    // Calculate local coordinate for the vertex (relative to xAxis and yAxis at first).
901    uint cornerID = uint(sk_VertexID) / kCornerVertexCount;
902    float2 xAxis = -float2(dx.yzwx[cornerID], dy.yzwx[cornerID]);
903    float2 yAxis =  float2(dx.xyzw[cornerID], dy.xyzw[cornerID]);
904
905    // Vertex is outset from the base shape (and possibly with an additional AA outset later
906    // in device space).
907    float2 localPos = float2(xs[cornerID], ys[cornerID]);
908
909    // Calculate edge distances and device space coordinate for the vertex
910    edgeDistances = dy*(xs - localPos.x) - dx*(ys - localPos.y);
911
912    // NOTE: This 3x3 inverse is different than just taking the 1st two columns of the 4x4
913    // inverse of the original SkM44 local-to-device matrix. We could calculate the 3x3 inverse
914    // and upload it, but it does not seem to be a bottleneck and saves on bandwidth to
915    // calculate it here instead.
916    float3x3 deviceToLocal = inverse(localToDevice);
917    float3 devPos = localToDevice * localPos.xy1;
918
919    // Apply the Jacobian in the vertex shader so any quadrilateral normals do not have to
920    // be passed to the fragment shader. However, it's important to use the Jacobian at a
921    // vertex on the edge, not the current vertex's Jacobian.
922    float4 gx = -dy*(deviceToLocal[0].x - deviceToLocal[0].z*xs) +
923                 dx*(deviceToLocal[0].y - deviceToLocal[0].z*ys);
924    float4 gy = -dy*(deviceToLocal[1].x - deviceToLocal[1].z*xs) +
925                 dx*(deviceToLocal[1].y - deviceToLocal[1].z*ys);
926    // NOTE: The gradient is missing a W term so edgeDistances must still be multiplied by
927    // 1/w in the fragment shader. The same goes for the encoded coverage scale.
928    edgeDistances *= inversesqrt(gx*gx + gy*gy);
929
930    // Bias non-AA edge distances by device W so its coverage contribution is >= 1.0
931    // Add additional 1/2 bias here so we don't have to do so in the fragment shader.
932    edgeDistances += (1.5 - edgeAA)*abs(devPos.z);
933
934    // Only outset for a vertex that is in front of the w=0 plane to avoid dealing with outset
935    // triangles rasterizing differently from the main triangles as w crosses 0.
936    if (any(notEqual(normal, float2(0.0))) && devPos.z > 0.0) {
937        // Note that when there's no perspective, the jacobian is equivalent to the normal
938        // matrix (inverse transpose), but produces correct results when there's perspective
939        // because it accounts for the position's influence on a line's projected direction.
940        float2x2 J = float2x2(deviceToLocal[0].xy - deviceToLocal[0].z*localPos,
941                              deviceToLocal[1].xy - deviceToLocal[1].z*localPos);
942
943        float2 edgeAANormal = float2(edgeAA[cornerID], edgeAA.yzwx[cornerID]) * normal;
944        float2 nx = edgeAANormal.x * perp(-yAxis) * J;
945        float2 ny = edgeAANormal.y * perp( xAxis) * J;
946
947        bool isMidVertex = all(notEqual(edgeAANormal, float2(0)));
948        if (isMidVertex) {
949            // Produce a bisecting vector in device space.
950            nx = normalize(nx);
951            ny = normalize(ny);
952            if (dot(nx, ny) < -0.8) {
953                // Normals are in nearly opposite directions, so adjust to avoid float error.
954                float s = sign(cross_length_2d(nx, ny));
955                nx =  s*perp(nx);
956                ny = -s*perp(ny);
957            }
958        }
959        // Adding the normal components together directly results in what we'd have
960        // calculated if we'd just transformed 'normal' in one go, assuming they weren't
961        // normalized in the if-block above. If they were normalized, the sum equals the
962        // bisector between the original nx and ny.
963        //
964        // We multiply by W so that after perspective division the new point is offset by the
965        // now-unit normal.
966        // NOTE: (nx + ny) can become the zero vector if the device outset is for an edge
967        // marked as non-AA. In this case normalize() could produce the zero vector or NaN.
968        // Until a counter-example is found, GPUs seem to discard triangles with NaN vertices,
969        // which has the same effect as outsetting by the zero vector with this mesh, so we
970        // don't bother guarding the normalize() (yet).
971        devPos.xy += devPos.z * normalize(nx + ny);
972
973        // By construction these points are 1px away from the outer edge in device space.
974        // Apply directly to edgeDistances to save work per pixel later on.
975        edgeDistances -= devPos.z;
976    }
977
978    // Write out final results
979    stepLocalCoords = localPos;
980    return float4(devPos.xy, devPos.z*depth, devPos.z);
981}
982
983float4 circular_arc_vertex_fn(float3 position,
984                              // Instance Attributes
985                              float4 centerScales,
986                              float3 radiiAndFlags,
987                              float3 geoClipPlane,
988                              float3 fragClipPlane0,
989                              float3 fragClipPlane1,
990                              float4 inRoundCapPos,
991                              float depth,
992                              float3x3 localToDevice,
993                              // Varyings
994                              out float4 circleEdge,
995                              out float3 clipPlane,
996                              out float3 isectPlane,
997                              out float3 unionPlane,
998                              out float  roundCapRadius,
999                              out float4 roundCapPos,
1000                              // Render Step
1001                              out float2 stepLocalCoords) {
1002    // TODO: clip offset against clip planes
1003    float2 localCenter = centerScales.xy;
1004    float2 localPos = localCenter;
1005    // do geometric clip in normalized space
1006    float dist = min(dot(position.xy, geoClipPlane.xy) + geoClipPlane.z, 0);
1007    position.xy -= geoClipPlane.xy * dist;
1008    // Get the new length to use below for scaling the offset
1009    // (origLength is the initial length of position.xy).
1010    float offsetScale = length(position.xy);
1011
1012    // scale and translate to local space
1013    if (position.z > 0) {
1014        localPos += position.xy * centerScales.z;
1015    } else {
1016        localPos += position.xy * centerScales.w;
1017    }
1018
1019    float3 devPos = localToDevice * localPos.xy1;
1020    float3 devCenter = localToDevice * localCenter.xy1;
1021    float2 offset = devPos.xy - devCenter.xy;
1022    // offset for AA and correct length of offset
1023    if (offset != float2(0)) {
1024        offset = normalize(offset);
1025        devPos.xy += position.z*offset;
1026        if (position.z > 0) {
1027            // Scale using distance from center of unit octagon to the vertex
1028            // Because of geometry clipping we need to scale by 1.0823922*newLength/origLength
1029            // But the original length is 1.0823922 so the offsetScale is just newLength
1030            offset *= offsetScale;
1031        } else {
1032            // Because of geometry clipping we need to scale by innerRadius*newLength/origLength
1033            // But the original length is 1 so this is just innerRadius*newLength
1034            offset *= offsetScale*radiiAndFlags.y;
1035        }
1036    }
1037
1038    circleEdge = float4(offset, radiiAndFlags.xy);
1039    if (radiiAndFlags.z > 0) {
1040        clipPlane = fragClipPlane0;
1041        isectPlane = fragClipPlane1;
1042        unionPlane = float3(0, 0, 0);
1043    } else {
1044        clipPlane = fragClipPlane0;
1045        isectPlane = float3(0, 0, 1);
1046        unionPlane = fragClipPlane1;
1047    }
1048    if (abs(radiiAndFlags.z) > 1) {
1049        // This is the cap radius in normalized space where the outer radius is 1 and
1050        // radii.y is the normalized inner radius.
1051        roundCapRadius = (1.0 - radiiAndFlags.y) / 2.0;
1052    } else {
1053        roundCapRadius = 0;
1054    }
1055    roundCapPos = inRoundCapPos;
1056    stepLocalCoords = localPos;
1057
1058    // We assume no perspective
1059    return float4(devPos.xy, depth, 1);
1060}
1061
1062float4 text_vertex_fn(float2 baseCoords,
1063                      // Uniforms
1064                      float4x4 subRunDeviceMatrix,
1065                      float4x4 deviceToLocal,
1066                      float2 atlasSizeInv,
1067                      // Instance Attributes
1068                      float2 size,
1069                      float2 uvPos,
1070                      float2 xyPos,
1071                      float strikeToSourceScale,
1072                      float depth,
1073                      // Varyings
1074                      out float2 textureCoords,
1075                      out float2 unormTexCoords,  // used as varying in SDFText
1076                      // Render Step
1077                      out float2 stepLocalCoords) {
1078    baseCoords.xy *= float2(size);
1079
1080    // Sub runs have a decomposed transform and are sometimes already transformed into device
1081    // space, in which `subRunCoords` represents the bounds projected to device space without
1082    // the local-to-device translation and `subRunDeviceMatrix` contains the translation.
1083    float2 subRunCoords = strikeToSourceScale * baseCoords + xyPos;
1084    float4 position = subRunDeviceMatrix * subRunCoords.xy01;
1085
1086    // Calculate the local coords used for shading.
1087    // TODO(b/246963258): This is incorrect if the transform has perspective, which would
1088    // require a division + a valid z coordinate (which is currently set to 0).
1089    stepLocalCoords = (deviceToLocal * position).xy;
1090
1091    unormTexCoords = baseCoords + uvPos;
1092    textureCoords = unormTexCoords * atlasSizeInv;
1093
1094    return float4(position.xy, depth*position.w, position.w);
1095}
1096
1097float4 coverage_mask_vertex_fn(float2 quadCoords,
1098                               // Uniforms
1099                               float3x3 maskToDeviceRemainder,
1100                               // Instance Attributes
1101                               float4 drawBounds,
1102                               float4 maskBoundsIn,
1103                               float2 deviceOrigin,
1104                               float depth,
1105                               float3x3 deviceToLocal,
1106                               // Varyings
1107                               out float4 maskBounds,
1108                               out float2 textureCoords,
1109                               out half invert,
1110                               // Render Step
1111                               out float2 stepLocalCoords) {
1112    // An atlas shape is an axis-aligned rectangle tessellated as a triangle strip.
1113    //
1114    // The bounds coordinates are in an intermediate space, pixel-aligned with the mask texture
1115    // that's sampled in the fragment shader. The coords must be transformed by both
1116    // maskToDeviceRemainder and translated by deviceOrigin to get device coords.
1117    textureCoords = mix(drawBounds.xy, drawBounds.zw, quadCoords);
1118    float3 drawCoords = maskToDeviceRemainder*((textureCoords + deviceOrigin).xy1);
1119
1120    // Local coordinates used for shading are derived from the final device coords and the inverse
1121    // of the original local-to-device matrix.
1122    float3 localCoords = deviceToLocal * drawCoords;
1123    // TODO: Support float3 local coordinates if the matrix has perspective so that W is
1124    // interpolated correctly to the fragment shader.
1125    stepLocalCoords = localCoords.xy / localCoords.z;
1126
1127    // For an inverse fill, `textureCoords` will get clamped to `maskBounds` and the edge pixels
1128    // will always land on a 0-coverage border pixel assuming the atlas was prepared with 1px
1129    // padding around each mask entry. This includes inverse fills where the mask was fully clipped
1130    // out, since then maskBounds.RBLT == (0,0,-1,-1) and we sample the top-left-most pixel of the
1131    // atlas, which is guaranteed to be transparent.
1132    if (all(lessThanEqual(maskBoundsIn.LT, maskBoundsIn.RB))) {
1133        // Regular fill
1134        maskBounds = maskBoundsIn;
1135        invert = 0;
1136    } else {
1137        // Re-arrange the mask bounds to sorted order for texture clamping in the fragment shader
1138        maskBounds = maskBoundsIn.RBLT;
1139        invert = 1;
1140    }
1141
1142    return float4(drawCoords.xy, depth*drawCoords.z, drawCoords.z);
1143}
1144
1145float4 cover_bounds_vertex_fn(float2 corner,
1146                              float4 bounds,
1147                              float depth,
1148                              float3x3 matrix,
1149                              out float2 stepLocalCoords) {
1150    if (all(lessThanEqual(bounds.LT, bounds.RB))) {
1151        // A regular fill
1152        corner = mix(bounds.LT, bounds.RB, corner);
1153        float3 devCorner = matrix * corner.xy1;
1154        stepLocalCoords = corner;
1155        return float4(devCorner.xy, depth*devCorner.z, devCorner.z);
1156    } else {
1157        // An inverse fill
1158        corner = mix(bounds.RB, bounds.LT, corner);
1159        // TODO(b/351923375): Get the 3x3 inverse  of the local-to-device transform from the CPU
1160        // if it can be computed fast enough on the CPU from the cached 4x4 inverse.
1161        float3 localCoords = inverse(matrix) * corner.xy1;
1162        // Dividing the inverse mapped local coords by its homogenous coordinate reconstructs the
1163        // original local coords.
1164        float invW = 1.0 / localCoords.z;
1165        stepLocalCoords = localCoords.xy * invW;
1166
1167        // 1/W also happens to be equal to (matrix*stepLocalCoords.xy1).z, which is the device-space
1168        // homogenous coordinate we want perspective interpolation to respect. We multiply the
1169        // output position by 1/W and set the output position's homogenous coord to that same 1/W
1170        // which ensures the projected vertices are still the device-space corners, but
1171        // stepLocalCoords will be correctly perspective interpolated by HW.
1172        return float4(corner*invW, depth*invW, invW);
1173    }
1174}
1175