• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2    Copyright (c) Microsoft Corporation
3
4    Permission is hereby granted, free of charge, to any person obtaining a copy
5   of this software and associated documentation files (the "Software"), to deal
6   in the Software without restriction, including without limitation the rights
7   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8   copies of the Software, and to permit persons to whom the Software is
9   furnished to do so, subject to the following conditions:
10
11    The above copyright notice and this permission notice shall be included in
12   all copies or substantial portions of the Software.
13
14    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   SOFTWARE.
21*/
22
23#include "geometry.h"
24#include "tessellator.h"
25
26#define LIBAGX_TESS_MIN_ISOLINE_DENSITY_TESSELLATION_FACTOR 1.0f
27#define LIBAGX_TESS_MAX_ISOLINE_DENSITY_TESSELLATION_FACTOR 64.0f
28
29typedef unsigned int FXP; // fixed point number
30
31enum {
32   U = 0, // points on a tri patch
33   V = 1,
34};
35
36enum {
37   Ueq0 = 0, // edges on a tri patch
38   Veq0 = 1,
39   Weq0 = 2,
40};
41
42enum {
43   Ueq1 = 2, // edges on a quad patch: Ueq0, Veq0, Ueq1, Veq1
44   Veq1 = 3,
45};
46
47#define QUAD_AXES  2
48#define QUAD_EDGES 4
49#define TRI_EDGES  3
50
51// The interior can just use a simpler stitch.
52typedef enum DIAGONALS {
53   DIAGONALS_INSIDE_TO_OUTSIDE,
54   DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE,
55   DIAGONALS_MIRRORED
56} DIAGONALS;
57
58typedef struct TESS_FACTOR_CONTEXT {
59   FXP fxpInvNumSegmentsOnFloorTessFactor;
60   FXP fxpInvNumSegmentsOnCeilTessFactor;
61   FXP fxpHalfTessFactorFraction;
62   int numHalfTessFactorPoints;
63   int splitPointOnFloorHalfTessFactor;
64} TESS_FACTOR_CONTEXT;
65
66struct INDEX_PATCH_CONTEXT {
67   int insidePointIndexDeltaToRealValue;
68   int insidePointIndexBadValue;
69   int insidePointIndexReplacementValue;
70   int outsidePointIndexPatchBase;
71   int outsidePointIndexDeltaToRealValue;
72   int outsidePointIndexBadValue;
73   int outsidePointIndexReplacementValue;
74};
75
76struct INDEX_PATCH_CONTEXT2 {
77   int baseIndexToInvert;
78   int indexInversionEndPoint;
79   int cornerCaseBadValue;
80   int cornerCaseReplacementValue;
81};
82
83struct CHWTessellator {
84   enum libagx_tess_mode mode;
85   uint index_bias;
86
87   // array where we will store u/v's for the points we generate
88   global struct libagx_tess_point *Point;
89
90   // array where we will store index topology
91   global void *Index;
92
93   // A second index patch we have to do handles the leftover strip of quads in
94   // the middle of an odd quad patch after finishing all the concentric rings.
95   // This also handles the leftover strip of points in the middle of an even
96   // quad patch, when stitching the row of triangles up the left side (V major
97   // quad) or bottom (U major quad) of the inner ring
98   bool bUsingPatchedIndices;
99   bool bUsingPatchedIndices2;
100   struct INDEX_PATCH_CONTEXT IndexPatchCtx;
101   struct INDEX_PATCH_CONTEXT2 IndexPatchCtx2;
102};
103
104#define FXP_INTEGER_BITS  15
105#define FXP_FRACTION_BITS 16
106#define FXP_FRACTION_MASK 0x0000ffff
107#define FXP_INTEGER_MASK  0x7fff0000
108#define FXP_ONE           (1 << FXP_FRACTION_BITS)
109#define FXP_ONE_THIRD     0x00005555
110#define FXP_TWO_THIRDS    0x0000aaaa
111#define FXP_ONE_HALF      0x00008000
112
113static global float *
114tess_factors(constant struct libagx_tess_args *p, uint patch)
115{
116   return p->tcs_buffer + (patch * p->tcs_stride_el);
117}
118
119static inline uint
120libagx_heap_alloc(global struct agx_geometry_state *heap, uint size_B)
121{
122   // TODO: drop align to 4 I think
123   return atomic_fetch_add((volatile atomic_uint *)(&heap->heap_bottom),
124                           align(size_B, 8));
125}
126
127/*
128 * Generate an indexed draw for a patch with the computed number of indices.
129 * This allocates heap memory for the index buffer, returning the allocated
130 * memory.
131 */
132static global void *
133libagx_draw(constant struct libagx_tess_args *p, enum libagx_tess_mode mode,
134            bool lines, uint patch, uint count)
135{
136   if (mode == LIBAGX_TESS_MODE_COUNT) {
137      p->counts[patch] = count;
138   }
139
140   if (mode == LIBAGX_TESS_MODE_WITH_COUNTS) {
141      /* The index buffer is already allocated, get a pointer inside it.
142       * p->counts has had an inclusive prefix sum hence the subtraction.
143       */
144      uint offset_el = p->counts[sub_sat(patch, 1u)];
145      if (patch == 0)
146         offset_el = 0;
147
148      return &p->index_buffer[offset_el];
149   }
150
151   return NULL;
152}
153
154static void
155libagx_draw_points(private struct CHWTessellator *ctx,
156                   constant struct libagx_tess_args *p, uint patch, uint count)
157{
158   /* For points mode with a single draw, we need to generate a trivial index
159    * buffer to stuff in the patch ID in the right place.
160    */
161   global uint32_t *indices = libagx_draw(p, ctx->mode, false, patch, count);
162
163   if (ctx->mode == LIBAGX_TESS_MODE_COUNT)
164      return;
165
166   for (int i = 0; i < count; ++i) {
167      indices[i] = ctx->index_bias + i;
168   }
169}
170
171static void
172libagx_draw_empty(constant struct libagx_tess_args *p,
173                  enum libagx_tess_mode mode,
174                  uint patch)
175{
176   if (mode == LIBAGX_TESS_MODE_COUNT) {
177      p->counts[patch] = 0;
178   }
179}
180
181/*
182 * Allocate heap memory for domain points for a patch. The allocation
183 * is recorded in the coord_allocs[] array, which is in elements.
184 */
185static global struct libagx_tess_point *
186libagx_heap_alloc_points(constant struct libagx_tess_args *p, uint patch,
187                         uint count)
188{
189   /* If we're recording statistics, increment now. The statistic is for
190    * tessellation evaluation shader invocations, which is equal to the number
191    * of domain points generated.
192    */
193   if (p->statistic) {
194      atomic_fetch_add((volatile atomic_uint *)(p->statistic), count);
195   }
196
197   uint32_t elsize_B = sizeof(struct libagx_tess_point);
198   uint32_t alloc_B = libagx_heap_alloc(p->heap, elsize_B * count);
199   uint32_t alloc_el = alloc_B / elsize_B;
200
201   p->coord_allocs[patch] = alloc_el;
202   return (global struct libagx_tess_point *)(((uintptr_t)p->heap->heap) +
203                                              alloc_B);
204}
205
206// Microsoft D3D11 Fixed Function Tessellator Reference - May 7, 2012
207// amar.patel@microsoft.com
208
209#define LIBAGX_TESS_MIN_ODD_TESSELLATION_FACTOR  1
210#define LIBAGX_TESS_MAX_ODD_TESSELLATION_FACTOR  63
211#define LIBAGX_TESS_MIN_EVEN_TESSELLATION_FACTOR 2
212#define LIBAGX_TESS_MAX_EVEN_TESSELLATION_FACTOR 64
213
214// 2^(-16), min positive fixed point fraction
215#define EPSILON 0.0000152587890625f
216#define MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON                                   \
217   (LIBAGX_TESS_MIN_ODD_TESSELLATION_FACTOR + EPSILON / 2)
218
219static float clamp_factor(float factor,
220                          enum libagx_tess_partitioning partitioning,
221                          float maxf)
222{
223   float lower = (partitioning == LIBAGX_TESS_PARTITIONING_FRACTIONAL_EVEN)
224                    ? LIBAGX_TESS_MIN_EVEN_TESSELLATION_FACTOR
225                    : LIBAGX_TESS_MIN_ODD_TESSELLATION_FACTOR;
226
227   float upper = (partitioning == LIBAGX_TESS_PARTITIONING_FRACTIONAL_ODD)
228                    ? LIBAGX_TESS_MAX_ODD_TESSELLATION_FACTOR
229                    : LIBAGX_TESS_MAX_EVEN_TESSELLATION_FACTOR;
230
231   // If any TessFactor will end up > 1 after floatToFixed conversion later,
232   // then force the inside TessFactors to be > 1 so there is a picture frame.
233   if (partitioning == LIBAGX_TESS_PARTITIONING_FRACTIONAL_ODD &&
234       maxf > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) {
235
236      lower = LIBAGX_TESS_MIN_ODD_TESSELLATION_FACTOR + EPSILON;
237   }
238
239   factor = clamp(factor, lower, upper);
240
241   if (partitioning == LIBAGX_TESS_PARTITIONING_INTEGER) {
242      factor = ceil(factor);
243   }
244
245   return factor;
246}
247
248
249static FXP
250floatToFixed(const float input)
251{
252   return mad(input, FXP_ONE, 0.5f);
253}
254
255static bool
256isOdd(const float input)
257{
258   return ((int)input) & 1;
259}
260
261static FXP
262fxpCeil(const FXP input)
263{
264   if (input & FXP_FRACTION_MASK) {
265      return (input & FXP_INTEGER_MASK) + FXP_ONE;
266   }
267   return input;
268}
269
270static FXP
271fxpFloor(const FXP input)
272{
273   return (input & FXP_INTEGER_MASK);
274}
275
276static int
277PatchIndexValue(private struct CHWTessellator *ctx, int index)
278{
279   if (ctx->bUsingPatchedIndices) {
280      // assumed remapped outide indices are > remapped inside vertices
281      if (index >= ctx->IndexPatchCtx.outsidePointIndexPatchBase) {
282         if (index == ctx->IndexPatchCtx.outsidePointIndexBadValue)
283            return ctx->IndexPatchCtx.outsidePointIndexReplacementValue;
284         else
285            return index + ctx->IndexPatchCtx.outsidePointIndexDeltaToRealValue;
286      } else {
287         if (index == ctx->IndexPatchCtx.insidePointIndexBadValue)
288            return ctx->IndexPatchCtx.insidePointIndexReplacementValue;
289         else
290            return index + ctx->IndexPatchCtx.insidePointIndexDeltaToRealValue;
291      }
292   } else if (ctx->bUsingPatchedIndices2) {
293      if (index == ctx->IndexPatchCtx2.cornerCaseBadValue) {
294         return ctx->IndexPatchCtx2.cornerCaseReplacementValue;
295      } else if (index >= ctx->IndexPatchCtx2.baseIndexToInvert) {
296         return ctx->IndexPatchCtx2.indexInversionEndPoint - index;
297      }
298   }
299
300   return index;
301}
302
303static void
304DefinePoint(global struct libagx_tess_point *out, FXP fxpU, FXP fxpV)
305{
306   out->u = fxpU;
307   out->v = fxpV;
308}
309
310static void
311DefineIndex(private struct CHWTessellator *ctx, int index,
312            int indexStorageOffset)
313{
314   global uint32_t *indices = (global uint32_t *)ctx->Index;
315   indices[indexStorageOffset] = ctx->index_bias + PatchIndexValue(ctx, index);
316}
317
318static void
319DefineTriangle(private struct CHWTessellator *ctx, int index0, int index1,
320               int index2, int indexStorageBaseOffset)
321{
322   index0 = PatchIndexValue(ctx, index0);
323   index1 = PatchIndexValue(ctx, index1);
324   index2 = PatchIndexValue(ctx, index2);
325
326   vstore3(ctx->index_bias + (uint3)(index0, index1, index2), 0,
327           (global uint *)ctx->Index + indexStorageBaseOffset);
328}
329
330static uint32_t
331RemoveMSB(uint32_t val)
332{
333   uint32_t bit = val ? (1 << (31 - clz(val))) : 0;
334   return val & ~bit;
335}
336
337static int
338NumPointsForTessFactor(bool odd, FXP fxpTessFactor)
339{
340   // Add epsilon for rounding and add 1 for odd
341   FXP f = fxpTessFactor + (odd ? (FXP_ONE + 1) : 1);
342   int r = fxpCeil(f / 2) >> (FXP_FRACTION_BITS - 1);
343   return odd ? r : r + 1;
344}
345
346static void
347ComputeTessFactorCtx(bool odd, FXP fxpTessFactor,
348                     private TESS_FACTOR_CONTEXT *TessFactorCtx)
349{
350   // fxpHalfTessFactor == 1/2 if TessFactor is 1,
351   // but we're pretending we are even.
352   FXP fxpHalfTessFactor = (fxpTessFactor + 1 /*round*/) / 2;
353   if (odd || (fxpHalfTessFactor == FXP_ONE_HALF)) {
354      fxpHalfTessFactor += FXP_ONE_HALF;
355   }
356   FXP fxpFloorHalfTessFactor = fxpFloor(fxpHalfTessFactor);
357   FXP fxpCeilHalfTessFactor = fxpCeil(fxpHalfTessFactor);
358   TessFactorCtx->fxpHalfTessFactorFraction = fxpHalfTessFactor - fxpFloorHalfTessFactor;
359   TessFactorCtx->numHalfTessFactorPoints =
360      (fxpCeilHalfTessFactor >> FXP_FRACTION_BITS); // for EVEN, we don't include the point always
361                                                    // fixed at the midpoint of the TessFactor
362   if (fxpCeilHalfTessFactor == fxpFloorHalfTessFactor) {
363      TessFactorCtx->splitPointOnFloorHalfTessFactor =
364         /*pick value to cause this to be ignored*/ TessFactorCtx->numHalfTessFactorPoints + 1;
365   } else if (odd) {
366      if (fxpFloorHalfTessFactor == FXP_ONE) {
367         TessFactorCtx->splitPointOnFloorHalfTessFactor = 0;
368      } else {
369         TessFactorCtx->splitPointOnFloorHalfTessFactor =
370            (RemoveMSB((fxpFloorHalfTessFactor >> FXP_FRACTION_BITS) - 1) << 1) + 1;
371      }
372   } else {
373      TessFactorCtx->splitPointOnFloorHalfTessFactor =
374         (RemoveMSB(fxpFloorHalfTessFactor >> FXP_FRACTION_BITS) << 1) + 1;
375   }
376   int numFloorSegments = (fxpFloorHalfTessFactor * 2) >> FXP_FRACTION_BITS;
377   int numCeilSegments = (fxpCeilHalfTessFactor * 2) >> FXP_FRACTION_BITS;
378   if (odd) {
379      numFloorSegments -= 1;
380      numCeilSegments -= 1;
381   }
382   TessFactorCtx->fxpInvNumSegmentsOnFloorTessFactor =
383      floatToFixed(1.0f / (float)numFloorSegments);
384   TessFactorCtx->fxpInvNumSegmentsOnCeilTessFactor =
385      floatToFixed(1.0f / (float)numCeilSegments);
386}
387
388static FXP
389PlacePointIn1D(private const TESS_FACTOR_CONTEXT *TessFactorCtx, bool odd,
390               int point)
391{
392   bool bFlip = point >= TessFactorCtx->numHalfTessFactorPoints;
393
394   if (bFlip) {
395      point = (TessFactorCtx->numHalfTessFactorPoints << 1) - point - odd;
396   }
397
398   // special casing middle since 16 bit fixed math below can't reproduce 0.5 exactly
399   if (point == TessFactorCtx->numHalfTessFactorPoints)
400      return FXP_ONE_HALF;
401
402   unsigned int indexOnCeilHalfTessFactor = point;
403   unsigned int indexOnFloorHalfTessFactor = indexOnCeilHalfTessFactor;
404   if (point > TessFactorCtx->splitPointOnFloorHalfTessFactor) {
405      indexOnFloorHalfTessFactor -= 1;
406   }
407   // For the fixed point multiplies below, we know the results are <= 16 bits
408   // because the locations on the halfTessFactor are <= half the number of
409   // segments for the total TessFactor. So a number divided by a number that
410   // is at least twice as big will give a result no bigger than 0.5 (which in
411   // fixed point is 16 bits in our case)
412   FXP fxpLocationOnFloorHalfTessFactor =
413      indexOnFloorHalfTessFactor * TessFactorCtx->fxpInvNumSegmentsOnFloorTessFactor;
414   FXP fxpLocationOnCeilHalfTessFactor =
415      indexOnCeilHalfTessFactor * TessFactorCtx->fxpInvNumSegmentsOnCeilTessFactor;
416
417   // Since we know the numbers calculated above are <= fixed point 0.5, and the
418   // equation below is just lerping between two values <= fixed point 0.5
419   // (0x00008000), then we know that the final result before shifting by 16 bits
420   // is no larger than 0x80000000.  Once we shift that down by 16, we get the
421   // result of lerping 2 numbers <= 0.5, which is obviously at most 0.5
422   // (0x00008000)
423   FXP fxpLocation =
424      fxpLocationOnFloorHalfTessFactor * (FXP_ONE - TessFactorCtx->fxpHalfTessFactorFraction) +
425      fxpLocationOnCeilHalfTessFactor * (TessFactorCtx->fxpHalfTessFactorFraction);
426   fxpLocation = (fxpLocation + FXP_ONE_HALF /*round*/) >> FXP_FRACTION_BITS; // get back to n.16
427   if (bFlip) {
428      fxpLocation = FXP_ONE - fxpLocation;
429   }
430   return fxpLocation;
431}
432
433static void
434StitchRegular(private struct CHWTessellator *ctx, bool bTrapezoid,
435              DIAGONALS diagonals, int baseIndexOffset, int numInsideEdgePoints,
436              int insideEdgePointBaseOffset, int outsideEdgePointBaseOffset)
437{
438   int insidePoint = insideEdgePointBaseOffset;
439   int outsidePoint = outsideEdgePointBaseOffset;
440   if (bTrapezoid) {
441      DefineTriangle(ctx, outsidePoint, outsidePoint + 1, insidePoint,
442                     baseIndexOffset);
443      baseIndexOffset += 3;
444      outsidePoint++;
445   }
446   int p;
447   switch (diagonals) {
448   case DIAGONALS_INSIDE_TO_OUTSIDE:
449      // Diagonals pointing from inside edge forward towards outside edge
450      for (p = 0; p < numInsideEdgePoints - 1; p++) {
451         DefineTriangle(ctx, insidePoint, outsidePoint, outsidePoint + 1,
452                        baseIndexOffset);
453         baseIndexOffset += 3;
454
455         DefineTriangle(ctx, insidePoint, outsidePoint + 1, insidePoint + 1,
456                        baseIndexOffset);
457         baseIndexOffset += 3;
458         insidePoint++;
459         outsidePoint++;
460      }
461      break;
462   case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation
463      // Diagonals pointing from outside edge forward towards inside edge
464
465      // First half
466      for (p = 0; p < numInsideEdgePoints / 2 - 1; p++) {
467         DefineTriangle(ctx, outsidePoint, outsidePoint + 1, insidePoint,
468                        baseIndexOffset);
469         baseIndexOffset += 3;
470         DefineTriangle(ctx, insidePoint, outsidePoint + 1, insidePoint + 1,
471                        baseIndexOffset);
472         baseIndexOffset += 3;
473         insidePoint++;
474         outsidePoint++;
475      }
476
477      // Middle
478      DefineTriangle(ctx, outsidePoint, insidePoint + 1, insidePoint,
479                     baseIndexOffset);
480      baseIndexOffset += 3;
481      DefineTriangle(ctx, outsidePoint, outsidePoint + 1, insidePoint + 1,
482                     baseIndexOffset);
483      baseIndexOffset += 3;
484      insidePoint++;
485      outsidePoint++;
486      p += 2;
487
488      // Second half
489      for (; p < numInsideEdgePoints; p++) {
490         DefineTriangle(ctx, outsidePoint, outsidePoint + 1, insidePoint,
491                        baseIndexOffset);
492         baseIndexOffset += 3;
493         DefineTriangle(ctx, insidePoint, outsidePoint + 1, insidePoint + 1,
494                        baseIndexOffset);
495         baseIndexOffset += 3;
496         insidePoint++;
497         outsidePoint++;
498      }
499      break;
500   case DIAGONALS_MIRRORED:
501      // First half, diagonals pointing from outside of outside edge to inside of
502      // inside edge
503      for (p = 0; p < numInsideEdgePoints / 2; p++) {
504         DefineTriangle(ctx, outsidePoint, insidePoint + 1, insidePoint,
505                        baseIndexOffset);
506         baseIndexOffset += 3;
507         DefineTriangle(ctx, outsidePoint, outsidePoint + 1, insidePoint + 1,
508                        baseIndexOffset);
509         baseIndexOffset += 3;
510         insidePoint++;
511         outsidePoint++;
512      }
513      // Second half, diagonals pointing from inside of inside edge to outside of
514      // outside edge
515      for (; p < numInsideEdgePoints - 1; p++) {
516         DefineTriangle(ctx, insidePoint, outsidePoint, outsidePoint + 1,
517                        baseIndexOffset);
518         baseIndexOffset += 3;
519         DefineTriangle(ctx, insidePoint, outsidePoint + 1, insidePoint + 1,
520                        baseIndexOffset);
521         baseIndexOffset += 3;
522         insidePoint++;
523         outsidePoint++;
524      }
525      break;
526   }
527   if (bTrapezoid) {
528      DefineTriangle(ctx, outsidePoint, outsidePoint + 1, insidePoint,
529                     baseIndexOffset);
530      baseIndexOffset += 3;
531   }
532}
533
534// loop_start and loop_end give optimal loop bounds for
535// the stitching algorithm further below, for any given halfTssFactor. There
536// is probably a better way to encode this...
537//
538// Return the FIRST entry in finalPointPositionTable awhich is less than
539// halfTessFactor, except entry 0 and 1 which are set up to skip the loop.
540static int
541loop_start(int N)
542{
543   if (N < 2)
544      return 1;
545   else if (N == 2)
546      return 17;
547   else if (N < 5)
548      return 9;
549   else if (N < 9)
550      return 5;
551   else if (N < 17)
552      return 3;
553   else
554      return 2;
555}
556
557// Return the LAST entry in finalPointPositionTable[] which is less than
558// halfTessFactor, except entry 0 and 1 which are set up to skip the loop.
559static int
560loop_end(int N)
561{
562   if (N < 2)
563      return 0;
564   else if (N < 4)
565      return 17;
566   else if (N < 8)
567      return 25;
568   else if (N < 16)
569      return 29;
570   else if (N < 32)
571      return 31;
572   else
573      return 32;
574}
575
576// Tables to assist in the stitching of 2 rows of points having arbitrary
577// TessFactors. The stitching order is governed by Ruler Function vertex
578// split ordering (see external documentation).
579//
580// The contents of the finalPointPositionTable are where vertex i [0..33]
581// ends up on the half-edge at the max tessellation amount given
582// ruler-function split order. Recall the other half of an edge is mirrored,
583// so we only need to deal with one half. This table is used to decide when
584// to advance a point on the interior or exterior. It supports odd TessFactor
585// up to 65 and even TessFactor up to 64.
586
587/* TODO: Is this actually faster than a LUT? */
588static uint32_t
589finalPointPositionTable(uint32_t x)
590{
591   if (x == 0)
592      return 0;
593   if (x == 1)
594      return 0x20;
595
596   uint32_t shift;
597   if ((x & 1) == 0) {
598      shift = 1;
599   } else if ((x & 3) == 3) {
600      shift = 2;
601   } else if ((x & 7) == 5) {
602      shift = 3;
603   } else if (x != 17) {
604      shift = 4;
605   } else {
606      shift = 5;
607   }
608
609   // SWAR vectorized right-shift of (0x20, x)
610   // We're calculating `min(0xf, 0x20 >> shift) + (x >> shift)`.
611   uint32_t items_to_shift = x | (0x20 << 16);
612   uint32_t shifted = items_to_shift >> shift;
613
614   uint32_t bias = min(0xfu, shifted >> 16);
615   return bias + (shifted & 0xffff);
616}
617
618static void
619StitchTransition(private struct CHWTessellator *ctx, int baseIndexOffset,
620                 int insideEdgePointBaseOffset,
621                 int insideNumHalfTessFactorPoints,
622                 bool insideEdgeTessFactorOdd, int outsideEdgePointBaseOffset,
623                 int outsideNumHalfTessFactorPoints, bool outsideTessFactorOdd)
624{
625   if (insideEdgeTessFactorOdd) {
626      insideNumHalfTessFactorPoints -= 1;
627   }
628   if (outsideTessFactorOdd) {
629      outsideNumHalfTessFactorPoints -= 1;
630   }
631   // Walk first half
632   int outsidePoint = outsideEdgePointBaseOffset;
633   int insidePoint = insideEdgePointBaseOffset;
634
635   // iStart,iEnd are a small optimization so the loop below doesn't have to go
636   // from 0 up to 31
637   int iStart = min(loop_start(insideNumHalfTessFactorPoints),
638                    loop_start(outsideNumHalfTessFactorPoints));
639   int iEnd = loop_end(
640      max(insideNumHalfTessFactorPoints, outsideNumHalfTessFactorPoints));
641
642   // since we don't start the loop at 0 below, we need a special case.
643   if (0 < outsideNumHalfTessFactorPoints) {
644      // Advance outside
645      DefineTriangle(ctx, outsidePoint, outsidePoint + 1, insidePoint,
646                     baseIndexOffset);
647      baseIndexOffset += 3;
648      outsidePoint++;
649   }
650
651   for (int i = iStart; i <= iEnd; i++) {
652      int bound = finalPointPositionTable(i);
653
654      if (bound < insideNumHalfTessFactorPoints) {
655         // Advance inside
656         DefineTriangle(ctx, insidePoint, outsidePoint, insidePoint + 1,
657                        baseIndexOffset);
658         baseIndexOffset += 3;
659         insidePoint++;
660      }
661      if (bound < outsideNumHalfTessFactorPoints) {
662         // Advance outside
663         DefineTriangle(ctx, outsidePoint, outsidePoint + 1, insidePoint,
664                        baseIndexOffset);
665         baseIndexOffset += 3;
666         outsidePoint++;
667      }
668   }
669
670   if ((insideEdgeTessFactorOdd != outsideTessFactorOdd) ||
671       insideEdgeTessFactorOdd) {
672      if (insideEdgeTessFactorOdd == outsideTessFactorOdd) {
673         // Quad in the middle
674         DefineTriangle(ctx, insidePoint, outsidePoint, insidePoint + 1,
675                        baseIndexOffset);
676         baseIndexOffset += 3;
677         DefineTriangle(ctx, insidePoint + 1, outsidePoint, outsidePoint + 1,
678                        baseIndexOffset);
679         baseIndexOffset += 3;
680         insidePoint++;
681         outsidePoint++;
682      } else if (!insideEdgeTessFactorOdd) {
683         // Triangle pointing inside
684         DefineTriangle(ctx, insidePoint, outsidePoint, outsidePoint + 1,
685                        baseIndexOffset);
686         baseIndexOffset += 3;
687         outsidePoint++;
688      } else {
689         // Triangle pointing outside
690         DefineTriangle(ctx, insidePoint, outsidePoint, insidePoint + 1,
691                        baseIndexOffset);
692         baseIndexOffset += 3;
693         insidePoint++;
694      }
695   }
696
697   // Walk second half.
698   for (int i = iEnd; i >= iStart; i--) {
699      int bound = finalPointPositionTable(i);
700
701      if (bound < outsideNumHalfTessFactorPoints) {
702         // Advance outside
703         DefineTriangle(ctx, outsidePoint, outsidePoint + 1, insidePoint,
704                        baseIndexOffset);
705         baseIndexOffset += 3;
706         outsidePoint++;
707      }
708      if (bound < insideNumHalfTessFactorPoints) {
709         // Advance inside
710         DefineTriangle(ctx, insidePoint, outsidePoint, insidePoint + 1,
711                        baseIndexOffset);
712         baseIndexOffset += 3;
713         insidePoint++;
714      }
715   }
716   // Below case is not needed if we didn't optimize loop above and made it run
717   // from 31 down to 0.
718   if (0 < outsideNumHalfTessFactorPoints) {
719      DefineTriangle(ctx, outsidePoint, outsidePoint + 1, insidePoint,
720                     baseIndexOffset);
721      baseIndexOffset += 3;
722      outsidePoint++;
723   }
724}
725
726KERNEL(64)
727libagx_tess_isoline(constant struct libagx_tess_args *p,
728                    enum libagx_tess_mode mode__2)
729{
730   enum libagx_tess_mode mode = mode__2;
731   uint patch = get_global_id(0);
732   enum libagx_tess_partitioning partitioning = p->partitioning;
733
734   bool lineDensityOdd;
735   bool lineDetailOdd;
736   TESS_FACTOR_CONTEXT lineDensityTessFactorCtx;
737   TESS_FACTOR_CONTEXT lineDetailTessFactorCtx;
738
739   global float *factors = tess_factors(p, patch);
740   float TessFactor_V_LineDensity = factors[0];
741   float TessFactor_U_LineDetail = factors[1];
742
743   // Is the patch culled? NaN will pass.
744   if (!(TessFactor_V_LineDensity > 0) || !(TessFactor_U_LineDetail > 0)) {
745      libagx_draw_empty(p, mode, patch);
746      return;
747   }
748
749   // Clamp edge TessFactors
750   TessFactor_V_LineDensity =
751      clamp(TessFactor_V_LineDensity,
752            LIBAGX_TESS_MIN_ISOLINE_DENSITY_TESSELLATION_FACTOR,
753            LIBAGX_TESS_MAX_ISOLINE_DENSITY_TESSELLATION_FACTOR);
754   TessFactor_U_LineDetail =
755      clamp_factor(TessFactor_U_LineDetail, partitioning, 0);
756
757   // Process tessFactors
758   if (partitioning == LIBAGX_TESS_PARTITIONING_INTEGER) {
759      lineDetailOdd = isOdd(TessFactor_U_LineDetail);
760   } else {
761      lineDetailOdd = (partitioning == LIBAGX_TESS_PARTITIONING_FRACTIONAL_ODD);
762   }
763
764   FXP fxpTessFactor_U_LineDetail = floatToFixed(TessFactor_U_LineDetail);
765
766   ComputeTessFactorCtx(lineDetailOdd, fxpTessFactor_U_LineDetail,
767                        &lineDetailTessFactorCtx);
768   int numPointsPerLine =
769      NumPointsForTessFactor(lineDetailOdd, fxpTessFactor_U_LineDetail);
770
771   TessFactor_V_LineDensity = ceil(TessFactor_V_LineDensity);
772   lineDensityOdd = isOdd(TessFactor_V_LineDensity);
773   FXP fxpTessFactor_V_LineDensity = floatToFixed(TessFactor_V_LineDensity);
774   ComputeTessFactorCtx(lineDensityOdd, fxpTessFactor_V_LineDensity,
775                        &lineDensityTessFactorCtx);
776
777   // don't draw last line at V == 1.
778   int numLines =
779      NumPointsForTessFactor(lineDensityOdd, fxpTessFactor_V_LineDensity) - 1;
780
781   /* Points */
782   uint num_points = numPointsPerLine * numLines;
783   if (mode != LIBAGX_TESS_MODE_COUNT) {
784      global struct libagx_tess_point *points =
785         libagx_heap_alloc_points(p, patch, num_points);
786
787      for (int line = 0, pointOffset = 0; line < numLines; line++) {
788         FXP fxpV =
789            PlacePointIn1D(&lineDensityTessFactorCtx, lineDensityOdd, line);
790
791         for (int point = 0; point < numPointsPerLine; point++) {
792            FXP fxpU =
793               PlacePointIn1D(&lineDetailTessFactorCtx, lineDetailOdd, point);
794
795            DefinePoint(&points[pointOffset++], fxpU, fxpV);
796         }
797      }
798   }
799
800   struct CHWTessellator ctx = {
801      .mode = mode,
802      .index_bias = patch * LIBAGX_TES_PATCH_ID_STRIDE,
803   };
804
805   /* Connectivity */
806   if (!p->points_mode) {
807      uint num_indices = numLines * (numPointsPerLine - 1) * 2;
808      ctx.Index = libagx_draw(p, mode, true, patch, num_indices);
809
810      if (mode == LIBAGX_TESS_MODE_COUNT)
811         return;
812
813      for (int line = 0, pointOffset = 0, indexOffset = 0; line < numLines;
814           line++) {
815         pointOffset++;
816
817         for (int point = 1; point < numPointsPerLine; point++) {
818            DefineIndex(&ctx, pointOffset - 1, indexOffset++);
819            DefineIndex(&ctx, pointOffset, indexOffset++);
820            pointOffset++;
821         }
822      }
823   } else {
824      libagx_draw_points(&ctx, p, patch, num_points);
825   }
826}
827
828KERNEL(64)
829libagx_tess_tri(constant struct libagx_tess_args *p,
830                enum libagx_tess_mode mode__2)
831{
832   enum libagx_tess_mode mode = mode__2;
833   uint patch = get_global_id(0);
834   enum libagx_tess_partitioning partitioning = p->partitioning;
835
836   global float *factors = tess_factors(p, patch);
837   float tessFactor_Ueq0 = factors[0];
838   float tessFactor_Veq0 = factors[1];
839   float tessFactor_Weq0 = factors[2];
840   float insideTessFactor_f = factors[4];
841
842   struct CHWTessellator ctx = {
843      .mode = mode,
844      .index_bias = patch * LIBAGX_TES_PATCH_ID_STRIDE,
845   };
846
847   // Is the patch culled? NaN will pass.
848   if (!(tessFactor_Ueq0 > 0) || !(tessFactor_Veq0 > 0) ||
849       !(tessFactor_Weq0 > 0)) {
850
851      libagx_draw_empty(p, mode, patch);
852
853      return;
854   }
855
856   FXP outsideTessFactor[TRI_EDGES];
857   FXP insideTessFactor;
858   bool outsideTessFactorOdd[TRI_EDGES];
859   bool insideTessFactorOdd;
860   TESS_FACTOR_CONTEXT outsideTessFactorCtx[TRI_EDGES];
861   TESS_FACTOR_CONTEXT insideTessFactorCtx;
862   // Stuff below is just specific to the traversal order
863   // this code happens to use to generate points/lines
864   int numPointsForOutsideEdge[TRI_EDGES];
865   int numPointsForInsideTessFactor;
866   int insideEdgePointBaseOffset;
867
868   // Clamp TessFactors
869   tessFactor_Ueq0 = clamp_factor(tessFactor_Ueq0, partitioning, 0);
870   tessFactor_Veq0 = clamp_factor(tessFactor_Veq0, partitioning, 0);
871   tessFactor_Weq0 = clamp_factor(tessFactor_Weq0, partitioning, 0);
872
873   float maxf = max(max(tessFactor_Ueq0, tessFactor_Veq0), tessFactor_Weq0);
874   insideTessFactor_f = clamp_factor(insideTessFactor_f, partitioning, maxf);
875   // Note the above clamps map NaN to the lower bound
876
877   // Process tessFactors
878   float outsideTessFactor_f[TRI_EDGES] = {tessFactor_Ueq0, tessFactor_Veq0,
879                                           tessFactor_Weq0};
880   if (partitioning == LIBAGX_TESS_PARTITIONING_INTEGER) {
881      for (int edge = 0; edge < TRI_EDGES; edge++) {
882         outsideTessFactorOdd[edge] = isOdd(outsideTessFactor_f[edge]);
883      }
884      insideTessFactorOdd =
885         isOdd(insideTessFactor_f) && (1.0f != insideTessFactor_f);
886   } else {
887      bool odd = (partitioning == LIBAGX_TESS_PARTITIONING_FRACTIONAL_ODD);
888
889      for (int edge = 0; edge < TRI_EDGES; edge++) {
890         outsideTessFactorOdd[edge] = odd;
891      }
892      insideTessFactorOdd = odd;
893   }
894
895   // Save fixed point TessFactors
896   for (int edge = 0; edge < TRI_EDGES; edge++) {
897      outsideTessFactor[edge] = floatToFixed(outsideTessFactor_f[edge]);
898   }
899   insideTessFactor = floatToFixed(insideTessFactor_f);
900
901   if (partitioning != LIBAGX_TESS_PARTITIONING_FRACTIONAL_EVEN) {
902      // Special case if all TessFactors are 1
903      if ((FXP_ONE == insideTessFactor) &&
904          (FXP_ONE == outsideTessFactor[Ueq0]) &&
905          (FXP_ONE == outsideTessFactor[Veq0]) &&
906          (FXP_ONE == outsideTessFactor[Weq0])) {
907
908         /* Just do minimum tess factor */
909         if (mode == LIBAGX_TESS_MODE_COUNT) {
910            p->counts[patch] = 3;
911            return;
912         }
913
914         global struct libagx_tess_point *points =
915            libagx_heap_alloc_points(p, patch, 3);
916
917         DefinePoint(&points[0], 0,
918                     FXP_ONE);          // V=1 (beginning of Ueq0 edge VW)
919         DefinePoint(&points[1], 0, 0); // W=1 (beginning of Veq0 edge WU)
920         DefinePoint(&points[2], FXP_ONE,
921                     0); // U=1 (beginning of Weq0 edge UV)
922
923         if (!p->points_mode) {
924            ctx.Index = libagx_draw(p, mode, false, patch, 3);
925
926            DefineTriangle(&ctx, 0, 1, 2,
927                           /*indexStorageBaseOffset*/ 0);
928         } else {
929            libagx_draw_points(&ctx, p, patch, 3);
930         }
931
932         return;
933      }
934   }
935
936   // Compute per-TessFactor metadata
937   for (int edge = 0; edge < TRI_EDGES; edge++) {
938      ComputeTessFactorCtx(outsideTessFactorOdd[edge], outsideTessFactor[edge],
939                           &outsideTessFactorCtx[edge]);
940   }
941   ComputeTessFactorCtx(insideTessFactorOdd, insideTessFactor,
942                        &insideTessFactorCtx);
943
944   // Compute some initial data.
945   int NumPoints = 0;
946
947   // outside edge offsets and storage
948   for (int edge = 0; edge < TRI_EDGES; edge++) {
949      numPointsForOutsideEdge[edge] = NumPointsForTessFactor(
950         outsideTessFactorOdd[edge], outsideTessFactor[edge]);
951      NumPoints += numPointsForOutsideEdge[edge];
952   }
953   NumPoints -= 3;
954
955   // inside edge offsets
956   numPointsForInsideTessFactor =
957      NumPointsForTessFactor(insideTessFactorOdd, insideTessFactor);
958   {
959      int pointCountMin = insideTessFactorOdd ? 4 : 3;
960      // max() allows degenerate transition regions when inside TessFactor == 1
961      numPointsForInsideTessFactor =
962         max(pointCountMin, numPointsForInsideTessFactor);
963   }
964
965   insideEdgePointBaseOffset = NumPoints;
966
967   // inside storage, including interior edges above
968   {
969      int interiorRings = (numPointsForInsideTessFactor >> 1) - 1;
970      int even = insideTessFactorOdd ? 0 : 1;
971      NumPoints += TRI_EDGES * (interiorRings * (interiorRings + even)) + even;
972   }
973
974   /* GENERATE POINTS */
975   if (mode != LIBAGX_TESS_MODE_COUNT) {
976      ctx.Point = libagx_heap_alloc_points(p, patch, NumPoints);
977
978      // Generate exterior ring edge points, clockwise starting from point V
979      // (VW, the U==0 edge)
980      int pointOffset = 0;
981      for (int edge = 0; edge < TRI_EDGES; edge++) {
982         int odd = edge & 0x1;
983         int endPoint = numPointsForOutsideEdge[edge] - 1;
984         // don't include end, since next edge starts with it.
985         for (int p = 0; p < endPoint; p++, pointOffset++) {
986            // whether to reverse point order given we are defining V or U (W
987            // implicit): edge0, VW, has V decreasing, so reverse 1D points
988            // below edge1, WU, has U increasing, so don't reverse 1D points
989            // below edge2, UV, has U decreasing, so reverse 1D points below
990            int q = odd ? p : endPoint - p;
991
992            FXP fxpParam = PlacePointIn1D(&outsideTessFactorCtx[edge],
993                                          outsideTessFactorOdd[edge], q);
994            DefinePoint(&ctx.Point[pointOffset], (edge == 0) ? 0 : fxpParam,
995                        (edge == 0)   ? fxpParam
996                        : (edge == 2) ? FXP_ONE - fxpParam
997                                      : 0);
998         }
999      }
1000
1001      // Generate interior ring points, clockwise spiralling in
1002      int numRings = (numPointsForInsideTessFactor >> 1);
1003      for (int ring = 1; ring < numRings; ring++) {
1004         int startPoint = ring;
1005         int endPoint = numPointsForInsideTessFactor - 1 - startPoint;
1006
1007         int perpendicularAxisPoint = startPoint;
1008         FXP fxpPerpParam = PlacePointIn1D(
1009            &insideTessFactorCtx, insideTessFactorOdd, perpendicularAxisPoint);
1010
1011         // Map location to the right size in
1012         // barycentric space. We know this fixed
1013         // point math won't over/underflow
1014         fxpPerpParam *= FXP_TWO_THIRDS;
1015         fxpPerpParam = (fxpPerpParam + FXP_ONE_HALF /*round*/) >>
1016                        FXP_FRACTION_BITS; // get back to n.16
1017
1018         for (int edge = 0; edge < TRI_EDGES; edge++) {
1019            int odd = edge & 0x1;
1020
1021            // don't include end: next edge starts with it.
1022            for (int p = startPoint; p < endPoint; p++, pointOffset++) {
1023               // whether to reverse point given we are defining V or U (W
1024               // implicit): edge0, VW, has V decreasing, so reverse 1D points
1025               // below edge1, WU, has U increasing, so don't reverse 1D points
1026               // below edge2, UV, has U decreasing, so reverse 1D points below
1027               int q = odd ? p : endPoint - (p - startPoint);
1028
1029               FXP fxpParam =
1030                  PlacePointIn1D(&insideTessFactorCtx, insideTessFactorOdd, q);
1031               // edge0 VW, has perpendicular parameter U constant
1032               // edge1 WU, has perpendicular parameter V constant
1033               // edge2 UV, has perpendicular parameter W constant
1034               // reciprocal is the rate of change of edge-parallel parameters
1035               // as they are pushed into the triangle
1036               const unsigned int deriv = 2;
1037
1038               // we know this fixed point math won't over/underflow
1039               FXP tmp = fxpParam - (fxpPerpParam + 1 /*round*/) / deriv;
1040
1041               DefinePoint(&ctx.Point[pointOffset],
1042                           edge > 0 ? tmp : fxpPerpParam,
1043                           edge == 0   ? tmp
1044                           : edge == 1 ? fxpPerpParam
1045                                       : FXP_ONE - tmp - fxpPerpParam);
1046            }
1047         }
1048      }
1049      if (!insideTessFactorOdd) {
1050         // Last point is the point at the center.
1051         DefinePoint(&ctx.Point[pointOffset], FXP_ONE_THIRD, FXP_ONE_THIRD);
1052      }
1053   }
1054
1055   if (p->points_mode) {
1056      libagx_draw_points(&ctx, p, patch, NumPoints);
1057      return;
1058   }
1059
1060   {
1061      // Generate primitives for all the concentric rings, one side at a time
1062      // for each ring +1 is so even tess includes the center point, which we
1063      // want to now
1064      int numRings = ((numPointsForInsideTessFactor + 1) >> 1);
1065
1066      int NumIndices = 0;
1067      {
1068         int OuterPoints = numPointsForOutsideEdge[0] +
1069                           numPointsForOutsideEdge[1] +
1070                           numPointsForOutsideEdge[2];
1071
1072         int numRings18 = numRings * 18;
1073         NumIndices = ((numRings18 - 27) * numPointsForInsideTessFactor) +
1074                      (3 * OuterPoints) - (numRings18 * (numRings - 1)) +
1075                      (insideTessFactorOdd ? 3 : 0);
1076      }
1077
1078      // Generate the draw and allocate the index buffer now that we know the size
1079      ctx.Index = libagx_draw(p, mode, false, patch, NumIndices);
1080
1081      if (mode == LIBAGX_TESS_MODE_COUNT)
1082         return;
1083
1084      int insideOffset = insideEdgePointBaseOffset;
1085      int outsideEdgePointBaseOffset = 0;
1086
1087      NumIndices = 0;
1088      for (int ring = 1; ring < numRings; ring++) {
1089         int numPointsForInsideEdge = numPointsForInsideTessFactor - 2 * ring;
1090         int edge0InsidePointBaseOffset = insideOffset;
1091         int edge0OutsidePointBaseOffset = outsideEdgePointBaseOffset;
1092         for (int edge = 0; edge < TRI_EDGES; edge++) {
1093            int outsidePoints = ring == 1 ? numPointsForOutsideEdge[edge]
1094                                          : (numPointsForInsideEdge + 2);
1095
1096            int numTriangles = numPointsForInsideEdge + outsidePoints - 2;
1097
1098            int insideBaseOffset;
1099            int outsideBaseOffset;
1100            if (edge == 2) {
1101               ctx.IndexPatchCtx.insidePointIndexDeltaToRealValue =
1102                  insideOffset;
1103               ctx.IndexPatchCtx.insidePointIndexBadValue =
1104                  numPointsForInsideEdge - 1;
1105               ctx.IndexPatchCtx.insidePointIndexReplacementValue =
1106                  edge0InsidePointBaseOffset;
1107               ctx.IndexPatchCtx.outsidePointIndexPatchBase =
1108                  ctx.IndexPatchCtx.insidePointIndexBadValue +
1109                  1; // past inside patched index range
1110               ctx.IndexPatchCtx.outsidePointIndexDeltaToRealValue =
1111                  outsideEdgePointBaseOffset -
1112                  ctx.IndexPatchCtx.outsidePointIndexPatchBase;
1113               ctx.IndexPatchCtx.outsidePointIndexBadValue =
1114                  ctx.IndexPatchCtx.outsidePointIndexPatchBase + outsidePoints -
1115                  1;
1116               ctx.IndexPatchCtx.outsidePointIndexReplacementValue =
1117                  edge0OutsidePointBaseOffset;
1118               ctx.bUsingPatchedIndices = true;
1119               insideBaseOffset = 0;
1120               outsideBaseOffset = ctx.IndexPatchCtx.outsidePointIndexPatchBase;
1121            } else {
1122               insideBaseOffset = insideOffset;
1123               outsideBaseOffset = outsideEdgePointBaseOffset;
1124            }
1125            if (ring == 1) {
1126               StitchTransition(
1127                  &ctx, /*baseIndexOffset: */ NumIndices, insideBaseOffset,
1128                  insideTessFactorCtx.numHalfTessFactorPoints,
1129                  insideTessFactorOdd, outsideBaseOffset,
1130                  outsideTessFactorCtx[edge].numHalfTessFactorPoints,
1131                  outsideTessFactorOdd[edge]);
1132            } else {
1133               StitchRegular(&ctx, /*bTrapezoid*/ true, DIAGONALS_MIRRORED,
1134                             /*baseIndexOffset: */ NumIndices,
1135                             numPointsForInsideEdge, insideBaseOffset,
1136                             outsideBaseOffset);
1137            }
1138            if (2 == edge) {
1139               ctx.bUsingPatchedIndices = false;
1140            }
1141            NumIndices += numTriangles * 3;
1142            outsideEdgePointBaseOffset += outsidePoints - 1;
1143            insideOffset += numPointsForInsideEdge - 1;
1144         }
1145      }
1146      if (insideTessFactorOdd) {
1147         // Triangulate center (a single triangle)
1148         DefineTriangle(&ctx, outsideEdgePointBaseOffset,
1149                        outsideEdgePointBaseOffset + 1,
1150                        outsideEdgePointBaseOffset + 2, NumIndices);
1151         NumIndices += 3;
1152      }
1153   }
1154}
1155
1156KERNEL(64)
1157libagx_tess_quad(constant struct libagx_tess_args *p,
1158                 enum libagx_tess_mode mode__2)
1159{
1160   enum libagx_tess_mode mode = mode__2;
1161   uint patch = get_global_id(0);
1162   enum libagx_tess_partitioning partitioning = p->partitioning;
1163   global float *factors = tess_factors(p, patch);
1164
1165   float tessFactor_Ueq0 = factors[0];
1166   float tessFactor_Veq0 = factors[1];
1167   float tessFactor_Ueq1 = factors[2];
1168   float tessFactor_Veq1 = factors[3];
1169
1170   float insideTessFactor_U = factors[4];
1171   float insideTessFactor_V = factors[5];
1172
1173   struct CHWTessellator ctx = {
1174      .mode = mode,
1175      .index_bias = patch * LIBAGX_TES_PATCH_ID_STRIDE,
1176   };
1177
1178   // Is the patch culled?
1179   if (!(tessFactor_Ueq0 > 0) || // NaN will pass
1180       !(tessFactor_Veq0 > 0) || !(tessFactor_Ueq1 > 0) ||
1181       !(tessFactor_Veq1 > 0)) {
1182      libagx_draw_empty(p, mode, patch);
1183      return;
1184   }
1185
1186   FXP outsideTessFactor[QUAD_EDGES];
1187   FXP insideTessFactor[QUAD_AXES];
1188   bool outsideTessFactorOdd[QUAD_EDGES];
1189   bool insideTessFactorOdd[QUAD_AXES];
1190   TESS_FACTOR_CONTEXT outsideTessFactorCtx[QUAD_EDGES];
1191   TESS_FACTOR_CONTEXT insideTessFactorCtx[QUAD_AXES];
1192   // Stuff below is just specific to the traversal order
1193   // this code happens to use to generate points/lines
1194   int numPointsForOutsideEdge[QUAD_EDGES];
1195   int numPointsForInsideTessFactor[QUAD_AXES];
1196   int insideEdgePointBaseOffset;
1197
1198   // Clamp edge TessFactors
1199   tessFactor_Ueq0 = clamp_factor(tessFactor_Ueq0, partitioning, 0);
1200   tessFactor_Veq0 = clamp_factor(tessFactor_Veq0, partitioning, 0);
1201   tessFactor_Ueq1 = clamp_factor(tessFactor_Ueq1, partitioning, 0);
1202   tessFactor_Veq1 = clamp_factor(tessFactor_Veq1, partitioning, 0);
1203
1204   float maxf = max(max(max(tessFactor_Ueq0, tessFactor_Veq0),
1205                        max(tessFactor_Ueq1, tessFactor_Veq1)),
1206                    max(insideTessFactor_U, insideTessFactor_V));
1207
1208   insideTessFactor_U = clamp_factor(insideTessFactor_U, partitioning, maxf);
1209   insideTessFactor_V = clamp_factor(insideTessFactor_V, partitioning, maxf);
1210   // Note the above clamps map NaN to lowerBound
1211
1212   // Process tessFactors
1213   float outsideTessFactor_f[QUAD_EDGES] = {tessFactor_Ueq0, tessFactor_Veq0,
1214                                            tessFactor_Ueq1, tessFactor_Veq1};
1215   float insideTessFactor_f[QUAD_AXES] = {insideTessFactor_U,
1216                                          insideTessFactor_V};
1217   if (partitioning == LIBAGX_TESS_PARTITIONING_INTEGER) {
1218      for (int edge = 0; edge < QUAD_EDGES; edge++) {
1219         outsideTessFactorOdd[edge] = isOdd(outsideTessFactor_f[edge]);
1220      }
1221      for (int axis = 0; axis < QUAD_AXES; axis++) {
1222         insideTessFactorOdd[axis] = isOdd(insideTessFactor_f[axis]) &&
1223                                     (1.0f != insideTessFactor_f[axis]);
1224      }
1225   } else {
1226      bool odd = (partitioning == LIBAGX_TESS_PARTITIONING_FRACTIONAL_ODD);
1227
1228      for (int edge = 0; edge < QUAD_EDGES; edge++) {
1229         outsideTessFactorOdd[edge] = odd;
1230      }
1231      insideTessFactorOdd[U] = insideTessFactorOdd[V] = odd;
1232   }
1233
1234   // Save fixed point TessFactors
1235   for (int edge = 0; edge < QUAD_EDGES; edge++) {
1236      outsideTessFactor[edge] = floatToFixed(outsideTessFactor_f[edge]);
1237   }
1238   for (int axis = 0; axis < QUAD_AXES; axis++) {
1239      insideTessFactor[axis] = floatToFixed(insideTessFactor_f[axis]);
1240   }
1241
1242   if (partitioning != LIBAGX_TESS_PARTITIONING_FRACTIONAL_EVEN) {
1243      // Special case if all TessFactors are 1
1244      if ((FXP_ONE == insideTessFactor[U]) &&
1245          (FXP_ONE == insideTessFactor[V]) &&
1246          (FXP_ONE == outsideTessFactor[Ueq0]) &&
1247          (FXP_ONE == outsideTessFactor[Veq0]) &&
1248          (FXP_ONE == outsideTessFactor[Ueq1]) &&
1249          (FXP_ONE == outsideTessFactor[Veq1])) {
1250
1251         /* Just do minimum tess factor */
1252         if (!p->points_mode) {
1253            ctx.Index = libagx_draw(p, mode, false, patch, 6);
1254            if (mode == LIBAGX_TESS_MODE_COUNT)
1255               return;
1256
1257            DefineTriangle(&ctx, 0, 1, 3, /*indexStorageOffset*/ 0);
1258            DefineTriangle(&ctx, 1, 2, 3, /*indexStorageOffset*/ 3);
1259         } else {
1260            libagx_draw_points(&ctx, p, patch, 4);
1261            if (mode == LIBAGX_TESS_MODE_COUNT)
1262               return;
1263         }
1264
1265         global struct libagx_tess_point *points =
1266            libagx_heap_alloc_points(p, patch, 4);
1267
1268         DefinePoint(&points[0], 0, 0);
1269         DefinePoint(&points[1], FXP_ONE, 0);
1270         DefinePoint(&points[2], FXP_ONE, FXP_ONE);
1271         DefinePoint(&points[3], 0, FXP_ONE);
1272         return;
1273      }
1274   }
1275
1276   // Compute TessFactor-specific metadata
1277   for (int edge = 0; edge < QUAD_EDGES; edge++) {
1278      ComputeTessFactorCtx(outsideTessFactorOdd[edge], outsideTessFactor[edge],
1279                           &outsideTessFactorCtx[edge]);
1280   }
1281
1282   for (int axis = 0; axis < QUAD_AXES; axis++) {
1283      ComputeTessFactorCtx(insideTessFactorOdd[axis], insideTessFactor[axis],
1284                           &insideTessFactorCtx[axis]);
1285   }
1286
1287   int NumPoints = 0;
1288
1289   // outside edge offsets and storage
1290   for (int edge = 0; edge < QUAD_EDGES; edge++) {
1291      numPointsForOutsideEdge[edge] = NumPointsForTessFactor(
1292         outsideTessFactorOdd[edge], outsideTessFactor[edge]);
1293      NumPoints += numPointsForOutsideEdge[edge];
1294   }
1295   NumPoints -= 4;
1296
1297   // inside edge offsets
1298   for (int axis = 0; axis < QUAD_AXES; axis++) {
1299      numPointsForInsideTessFactor[axis] = NumPointsForTessFactor(
1300         insideTessFactorOdd[axis], insideTessFactor[axis]);
1301      int pointCountMin = insideTessFactorOdd[axis] ? 4 : 3;
1302      // max() allows degenerate transition regions when inside TessFactor == 1
1303      numPointsForInsideTessFactor[axis] =
1304         max(pointCountMin, numPointsForInsideTessFactor[axis]);
1305   }
1306
1307   insideEdgePointBaseOffset = NumPoints;
1308
1309   // inside storage, including interior edges above
1310   int numInteriorPoints = (numPointsForInsideTessFactor[U] - 2) *
1311                           (numPointsForInsideTessFactor[V] - 2);
1312   NumPoints += numInteriorPoints;
1313
1314   if (mode != LIBAGX_TESS_MODE_COUNT) {
1315      ctx.Point = libagx_heap_alloc_points(p, patch, NumPoints);
1316
1317      // Generate exterior ring edge points, clockwise from top-left
1318      int pointOffset = 0;
1319      for (int edge = 0; edge < QUAD_EDGES; edge++) {
1320         int odd = edge & 0x1;
1321         // don't include end, since next edge starts with it.
1322         int endPoint = numPointsForOutsideEdge[edge] - 1;
1323         for (int p = 0; p < endPoint; p++, pointOffset++) {
1324            int q =
1325               ((edge == 1) || (edge == 2)) ? p : endPoint - p; // reverse order
1326            FXP fxpParam = PlacePointIn1D(&outsideTessFactorCtx[edge],
1327                                          outsideTessFactorOdd[edge], q);
1328
1329            FXP u = odd ? fxpParam : ((edge == 2) ? FXP_ONE : 0);
1330            FXP v = odd ? ((edge == 3) ? FXP_ONE : 0) : fxpParam;
1331            DefinePoint(&ctx.Point[pointOffset], u, v);
1332         }
1333      }
1334
1335      // Generate interior ring points, clockwise from (U==0,V==1) (bottom-left)
1336      // spiralling toward center
1337      int minNumPointsForTessFactor =
1338         min(numPointsForInsideTessFactor[U], numPointsForInsideTessFactor[V]);
1339      // note for even tess we aren't counting center point here.
1340      int numRings = (minNumPointsForTessFactor >> 1);
1341
1342      for (int ring = 1; ring < numRings; ring++) {
1343         int startPoint = ring;
1344         int endPoint[QUAD_AXES] = {
1345            numPointsForInsideTessFactor[U] - 1 - startPoint,
1346            numPointsForInsideTessFactor[V] - 1 - startPoint,
1347         };
1348
1349         for (int edge = 0; edge < QUAD_EDGES; edge++) {
1350            int odd[QUAD_AXES] = {edge & 0x1, ((edge + 1) & 0x1)};
1351            int perpendicularAxisPoint =
1352               (edge < 2) ? startPoint : endPoint[odd[0]];
1353            FXP fxpPerpParam = PlacePointIn1D(&insideTessFactorCtx[odd[0]],
1354                                              insideTessFactorOdd[odd[0]],
1355                                              perpendicularAxisPoint);
1356
1357            for (int p = startPoint; p < endPoint[odd[1]]; p++,
1358                     pointOffset++) // don't include end: next edge starts with
1359                                    // it.
1360            {
1361               bool odd_ = odd[1];
1362               int q = ((edge == 1) || (edge == 2))
1363                          ? p
1364                          : endPoint[odd_] - (p - startPoint);
1365               FXP fxpParam = PlacePointIn1D(&insideTessFactorCtx[odd_],
1366                                             insideTessFactorOdd[odd_], q);
1367               DefinePoint(&ctx.Point[pointOffset],
1368                           odd_ ? fxpPerpParam : fxpParam,
1369                           odd_ ? fxpParam : fxpPerpParam);
1370            }
1371         }
1372      }
1373      // For even tessellation, the inner "ring" is degenerate - a row of points
1374      if ((numPointsForInsideTessFactor[U] > numPointsForInsideTessFactor[V]) &&
1375          !insideTessFactorOdd[V]) {
1376         int startPoint = numRings;
1377         int endPoint = numPointsForInsideTessFactor[U] - 1 - startPoint;
1378         for (int p = startPoint; p <= endPoint; p++, pointOffset++) {
1379            FXP fxpParam = PlacePointIn1D(&insideTessFactorCtx[U],
1380                                          insideTessFactorOdd[U], p);
1381            DefinePoint(&ctx.Point[pointOffset], fxpParam, FXP_ONE_HALF);
1382         }
1383      } else if ((numPointsForInsideTessFactor[V] >=
1384                  numPointsForInsideTessFactor[U]) &&
1385                 !insideTessFactorOdd[U]) {
1386         int startPoint = numRings;
1387         int endPoint = numPointsForInsideTessFactor[V] - 1 - startPoint;
1388         for (int p = endPoint; p >= startPoint; p--, pointOffset++) {
1389            FXP fxpParam = PlacePointIn1D(&insideTessFactorCtx[V],
1390                                          insideTessFactorOdd[V], p);
1391            DefinePoint(&ctx.Point[pointOffset], FXP_ONE_HALF, fxpParam);
1392         }
1393      }
1394   }
1395
1396   if (p->points_mode) {
1397      libagx_draw_points(&ctx, p, patch, NumPoints);
1398      return;
1399   }
1400
1401   /* CONNECTIVITY */
1402   {
1403      // Generate primitives for all the concentric rings, one side at a time
1404      // for each ring. +1 is so even tess includes the center point
1405      int numPointRowsToCenter[QUAD_AXES] = {
1406         (numPointsForInsideTessFactor[U] + 1) >> 1,
1407         (numPointsForInsideTessFactor[V] + 1) >> 1,
1408      };
1409
1410      int numRings = min(numPointRowsToCenter[U], numPointRowsToCenter[V]);
1411
1412      /* Calculate # of indices so we can allocate */
1413      {
1414         /* Handle main case */
1415         int OuterPoints =
1416            numPointsForOutsideEdge[0] + numPointsForOutsideEdge[1] +
1417            numPointsForOutsideEdge[2] + numPointsForOutsideEdge[3];
1418
1419         int InnerPoints =
1420            numPointsForInsideTessFactor[U] + numPointsForInsideTessFactor[V];
1421
1422         int NumIndices = (OuterPoints * 3) + (12 * numRings * InnerPoints) -
1423                          (InnerPoints * 18) - (24 * numRings * (numRings - 1));
1424
1425         /* Determine major/minor axes */
1426         bool U_major =
1427            (numPointsForInsideTessFactor[U] > numPointsForInsideTessFactor[V]);
1428         unsigned M = U_major ? U : V;
1429         unsigned m = U_major ? V : U;
1430
1431         /* Handle degenerate ring */
1432         if (insideTessFactorOdd[m]) {
1433            NumIndices += 12 * ((numPointsForInsideTessFactor[M] >> 1) -
1434                                (numPointsForInsideTessFactor[m] >> 1));
1435            NumIndices += (insideTessFactorOdd[M] ? 6 : 12);
1436         }
1437
1438         // Generate the draw and allocate the index buffer with the size
1439         ctx.Index = libagx_draw(p, mode, false, patch, NumIndices);
1440      }
1441
1442      if (mode == LIBAGX_TESS_MODE_COUNT)
1443         return;
1444
1445      int degeneratePointRing[QUAD_AXES] = {
1446         // Even partitioning causes degenerate row of points,
1447         // which results in exceptions to the point ordering conventions
1448         // when travelling around the rings counterclockwise.
1449         !insideTessFactorOdd[V] ? numPointRowsToCenter[V] - 1 : -1,
1450         !insideTessFactorOdd[U] ? numPointRowsToCenter[U] - 1 : -1,
1451      };
1452
1453      int numPointsForOutsideEdge_[QUAD_EDGES] = {
1454         numPointsForOutsideEdge[Ueq0],
1455         numPointsForOutsideEdge[Veq0],
1456         numPointsForOutsideEdge[Ueq1],
1457         numPointsForOutsideEdge[Veq1],
1458      };
1459
1460      int insideEdgePointBaseOffset_ = insideEdgePointBaseOffset;
1461      int outsideEdgePointBaseOffset = 0;
1462
1463      int NumIndices = 0;
1464
1465      for (int ring = 1; ring < numRings; ring++) {
1466         int numPointsForInsideEdge[QUAD_AXES] = {
1467            numPointsForInsideTessFactor[U] - 2 * ring,
1468            numPointsForInsideTessFactor[V] - 2 * ring};
1469
1470         int edge0InsidePointBaseOffset = insideEdgePointBaseOffset_;
1471         int edge0OutsidePointBaseOffset = outsideEdgePointBaseOffset;
1472
1473         for (int edge = 0; edge < QUAD_EDGES; edge++) {
1474            int odd = (edge + 1) & 0x1;
1475
1476            int numTriangles =
1477               numPointsForInsideEdge[odd] + numPointsForOutsideEdge_[edge] - 2;
1478            int insideBaseOffset;
1479            int outsideBaseOffset;
1480
1481            // We need to patch the indexing so Stitch() can think it sees 2
1482            // sequentially increasing rows of points, even though we have
1483            // wrapped around to the end of the inner and outer ring's points,
1484            // so the last point is really the first point for the ring. We make
1485            // it so that when Stitch() calls AddIndex(), that function will do
1486            // any necessary index adjustment.
1487            if (edge == 3) {
1488               if (ring == degeneratePointRing[odd]) {
1489                  ctx.IndexPatchCtx2.baseIndexToInvert =
1490                     insideEdgePointBaseOffset_ + 1;
1491                  ctx.IndexPatchCtx2.cornerCaseBadValue =
1492                     outsideEdgePointBaseOffset +
1493                     numPointsForOutsideEdge_[edge] - 1;
1494                  ctx.IndexPatchCtx2.cornerCaseReplacementValue =
1495                     edge0OutsidePointBaseOffset;
1496                  ctx.IndexPatchCtx2.indexInversionEndPoint =
1497                     (ctx.IndexPatchCtx2.baseIndexToInvert << 1) - 1;
1498                  insideBaseOffset = ctx.IndexPatchCtx2.baseIndexToInvert;
1499                  outsideBaseOffset = outsideEdgePointBaseOffset;
1500                  ctx.bUsingPatchedIndices2 = true;
1501               } else {
1502                  ctx.IndexPatchCtx.insidePointIndexDeltaToRealValue =
1503                     insideEdgePointBaseOffset_;
1504                  ctx.IndexPatchCtx.insidePointIndexBadValue =
1505                     numPointsForInsideEdge[odd] - 1;
1506                  ctx.IndexPatchCtx.insidePointIndexReplacementValue =
1507                     edge0InsidePointBaseOffset;
1508                  ctx.IndexPatchCtx.outsidePointIndexPatchBase =
1509                     ctx.IndexPatchCtx.insidePointIndexBadValue +
1510                     1; // past inside patched index range
1511                  ctx.IndexPatchCtx.outsidePointIndexDeltaToRealValue =
1512                     outsideEdgePointBaseOffset -
1513                     ctx.IndexPatchCtx.outsidePointIndexPatchBase;
1514                  ctx.IndexPatchCtx.outsidePointIndexBadValue =
1515                     ctx.IndexPatchCtx.outsidePointIndexPatchBase +
1516                     numPointsForOutsideEdge_[edge] - 1;
1517                  ctx.IndexPatchCtx.outsidePointIndexReplacementValue =
1518                     edge0OutsidePointBaseOffset;
1519
1520                  insideBaseOffset = 0;
1521                  outsideBaseOffset =
1522                     ctx.IndexPatchCtx.outsidePointIndexPatchBase;
1523                  ctx.bUsingPatchedIndices = true;
1524               }
1525            } else if ((edge == 2) && (ring == degeneratePointRing[odd])) {
1526               ctx.IndexPatchCtx2.baseIndexToInvert =
1527                  insideEdgePointBaseOffset_;
1528               ctx.IndexPatchCtx2.cornerCaseBadValue = -1;         // unused
1529               ctx.IndexPatchCtx2.cornerCaseReplacementValue = -1; // unused
1530               ctx.IndexPatchCtx2.indexInversionEndPoint =
1531                  ctx.IndexPatchCtx2.baseIndexToInvert << 1;
1532               insideBaseOffset = ctx.IndexPatchCtx2.baseIndexToInvert;
1533               outsideBaseOffset = outsideEdgePointBaseOffset;
1534               ctx.bUsingPatchedIndices2 = true;
1535            } else {
1536               insideBaseOffset = insideEdgePointBaseOffset_;
1537               outsideBaseOffset = outsideEdgePointBaseOffset;
1538            }
1539            if (ring == 1) {
1540               StitchTransition(
1541                  &ctx, /*baseIndexOffset: */ NumIndices, insideBaseOffset,
1542                  insideTessFactorCtx[odd].numHalfTessFactorPoints,
1543                  insideTessFactorOdd[odd], outsideBaseOffset,
1544                  outsideTessFactorCtx[edge].numHalfTessFactorPoints,
1545                  outsideTessFactorOdd[edge]);
1546            } else {
1547               StitchRegular(&ctx, /*bTrapezoid*/ true, DIAGONALS_MIRRORED,
1548                             /*baseIndexOffset: */ NumIndices,
1549                             numPointsForInsideEdge[odd], insideBaseOffset,
1550                             outsideBaseOffset);
1551            }
1552            ctx.bUsingPatchedIndices = false;
1553            ctx.bUsingPatchedIndices2 = false;
1554            NumIndices += numTriangles * 3;
1555            outsideEdgePointBaseOffset += numPointsForOutsideEdge_[edge] - 1;
1556            if ((edge == 2) && (ring == degeneratePointRing[odd])) {
1557               insideEdgePointBaseOffset_ -= numPointsForInsideEdge[odd] - 1;
1558            } else {
1559               insideEdgePointBaseOffset_ += numPointsForInsideEdge[odd] - 1;
1560            }
1561            numPointsForOutsideEdge_[edge] = numPointsForInsideEdge[odd];
1562         }
1563      }
1564
1565      // Triangulate center - a row of quads if odd
1566      // This triangulation may be producing diagonals that are asymmetric about
1567      // the center of the patch in this region.
1568      if ((numPointsForInsideTessFactor[U] > numPointsForInsideTessFactor[V]) &&
1569          insideTessFactorOdd[V]) {
1570         ctx.bUsingPatchedIndices2 = true;
1571         int stripNumQuads = (((numPointsForInsideTessFactor[U] >> 1) -
1572                               (numPointsForInsideTessFactor[V] >> 1))
1573                              << 1) +
1574                             (insideTessFactorOdd[U] ? 1 : 2);
1575         ctx.IndexPatchCtx2.baseIndexToInvert =
1576            outsideEdgePointBaseOffset + stripNumQuads + 2;
1577         ctx.IndexPatchCtx2.cornerCaseBadValue =
1578            ctx.IndexPatchCtx2.baseIndexToInvert;
1579         ctx.IndexPatchCtx2.cornerCaseReplacementValue =
1580            outsideEdgePointBaseOffset;
1581         ctx.IndexPatchCtx2.indexInversionEndPoint =
1582            ctx.IndexPatchCtx2.baseIndexToInvert +
1583            ctx.IndexPatchCtx2.baseIndexToInvert + stripNumQuads;
1584         StitchRegular(
1585            &ctx, /*bTrapezoid*/ false, DIAGONALS_INSIDE_TO_OUTSIDE,
1586            /*baseIndexOffset: */ NumIndices,
1587            /*numInsideEdgePoints:*/ stripNumQuads + 1,
1588            /*insideEdgePointBaseOffset*/ ctx.IndexPatchCtx2.baseIndexToInvert,
1589            outsideEdgePointBaseOffset + 1);
1590         ctx.bUsingPatchedIndices2 = false;
1591         NumIndices += stripNumQuads * 6;
1592      } else if ((numPointsForInsideTessFactor[V] >=
1593                  numPointsForInsideTessFactor[U]) &&
1594                 insideTessFactorOdd[U]) {
1595         ctx.bUsingPatchedIndices2 = true;
1596         int stripNumQuads = (((numPointsForInsideTessFactor[V] >> 1) -
1597                               (numPointsForInsideTessFactor[U] >> 1))
1598                              << 1) +
1599                             (insideTessFactorOdd[V] ? 1 : 2);
1600         ctx.IndexPatchCtx2.baseIndexToInvert =
1601            outsideEdgePointBaseOffset + stripNumQuads + 1;
1602         ctx.IndexPatchCtx2.cornerCaseBadValue = -1; // unused
1603         ctx.IndexPatchCtx2.indexInversionEndPoint =
1604            ctx.IndexPatchCtx2.baseIndexToInvert +
1605            ctx.IndexPatchCtx2.baseIndexToInvert + stripNumQuads;
1606         DIAGONALS diag = insideTessFactorOdd[V]
1607                             ? DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE
1608                             : DIAGONALS_INSIDE_TO_OUTSIDE;
1609         StitchRegular(
1610            &ctx, /*bTrapezoid*/ false, diag,
1611            /*baseIndexOffset: */ NumIndices,
1612            /*numInsideEdgePoints:*/ stripNumQuads + 1,
1613            /*insideEdgePointBaseOffset*/ ctx.IndexPatchCtx2.baseIndexToInvert,
1614            outsideEdgePointBaseOffset);
1615         ctx.bUsingPatchedIndices2 = false;
1616         NumIndices += stripNumQuads * 6;
1617      }
1618   }
1619}
1620