• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright 2023 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6#include "geometry.h"
7#include "tessellator.h"
8#include <agx_pack.h>
9
10uint
11libagx_tcs_patch_vertices_in(constant struct libagx_tess_args *p)
12{
13   return p->input_patch_size;
14}
15
16uint
17libagx_tes_patch_vertices_in(constant struct libagx_tess_args *p)
18{
19   return p->output_patch_size;
20}
21
22uint
23libagx_tcs_unrolled_id(constant struct libagx_tess_args *p, uint3 wg_id)
24{
25   return (wg_id.y * p->patches_per_instance) + wg_id.x;
26}
27
28uint64_t
29libagx_tes_buffer(constant struct libagx_tess_args *p)
30{
31   return p->tes_buffer;
32}
33
34/*
35 * Helper to lower indexing for a tess eval shader ran as a compute shader. This
36 * handles the tess+geom case. This is simpler than the general input assembly
37 * lowering, as we know:
38 *
39 * 1. the index buffer is U32
40 * 2. the index is in bounds
41 *
42 * Therefore we do a simple load. No bounds checking needed.
43 */
44uint32_t
45libagx_load_tes_index(constant struct libagx_tess_args *p, uint32_t index)
46{
47   /* Swap second and third vertices of each triangle to flip winding order
48    * dynamically if needed.
49    */
50   if (p->ccw) {
51      uint id = index % 3;
52
53      if (id == 1)
54         index++;
55      else if (id == 2)
56         index--;
57   }
58
59   return p->index_buffer[index];
60}
61
62ushort
63libagx_tcs_in_offset(uint vtx, gl_varying_slot location,
64                     uint64_t crosslane_vs_out_mask)
65{
66   return libagx_tcs_in_offs(vtx, location, crosslane_vs_out_mask);
67}
68
69uintptr_t
70libagx_tcs_out_address(constant struct libagx_tess_args *p, uint patch_id,
71                       uint vtx_id, gl_varying_slot location, uint nr_patch_out,
72                       uint out_patch_size, uint64_t vtx_out_mask)
73{
74   uint stride_el =
75      libagx_tcs_out_stride_el(nr_patch_out, out_patch_size, vtx_out_mask);
76
77   uint offs_el =
78      libagx_tcs_out_offs_el(vtx_id, location, nr_patch_out, vtx_out_mask);
79
80   offs_el += patch_id * stride_el;
81
82   /* Written to match the AGX addressing mode */
83   return (uintptr_t)(p->tcs_buffer) + (((uintptr_t)offs_el) << 2);
84}
85
86static uint
87libagx_tes_unrolled_patch_id(uint raw_id)
88{
89   return raw_id / LIBAGX_TES_PATCH_ID_STRIDE;
90}
91
92uint
93libagx_tes_patch_id(constant struct libagx_tess_args *p, uint raw_id)
94{
95   return libagx_tes_unrolled_patch_id(raw_id) % p->patches_per_instance;
96}
97
98static uint
99tes_vertex_id_in_patch(uint raw_id)
100{
101   return raw_id % LIBAGX_TES_PATCH_ID_STRIDE;
102}
103
104float2
105libagx_load_tess_coord(constant struct libagx_tess_args *p, uint raw_id)
106{
107   uint patch = libagx_tes_unrolled_patch_id(raw_id);
108   uint vtx = tes_vertex_id_in_patch(raw_id);
109
110   global struct libagx_tess_point *t =
111      &p->patch_coord_buffer[p->coord_allocs[patch] + vtx];
112
113   /* Written weirdly because NIR struggles with loads of structs */
114   uint2 fixed = *((global uint2 *)t);
115
116   /* Convert fixed point to float */
117   return convert_float2(fixed) / (1u << 16);
118}
119
120uintptr_t
121libagx_tes_in_address(constant struct libagx_tess_args *p, uint raw_id,
122                      uint vtx_id, gl_varying_slot location)
123{
124   uint patch = libagx_tes_unrolled_patch_id(raw_id);
125
126   return libagx_tcs_out_address(p, patch, vtx_id, location,
127                                 p->tcs_patch_constants, p->output_patch_size,
128                                 p->tcs_per_vertex_outputs);
129}
130
131float4
132libagx_tess_level_outer_default(constant struct libagx_tess_args *p)
133{
134   return (
135      float4)(p->tess_level_outer_default[0], p->tess_level_outer_default[1],
136              p->tess_level_outer_default[2], p->tess_level_outer_default[3]);
137}
138
139float2
140libagx_tess_level_inner_default(constant struct libagx_tess_args *p)
141{
142   return (float2)(p->tess_level_inner_default[0],
143                   p->tess_level_inner_default[1]);
144}
145
146KERNEL(1)
147libagx_tess_setup_indirect(
148   global struct libagx_tess_args *p,
149   global uint32_t *grids /* output: VS then TCS then tess */,
150   global struct agx_ia_state *ia /* output */, global uint32_t *indirect,
151   global uint64_t *vertex_output_buffer_ptr, uint64_t in_index_buffer,
152   uint32_t in_index_buffer_range_el, uint32_t in_index_size_B,
153   uint64_t vertex_outputs /* bitfield */,
154
155   /* Tess control invocation counter if active, else zero */
156   global uint32_t *tcs_statistic)
157{
158   uint count = indirect[0], instance_count = indirect[1];
159   unsigned in_patches = count / p->input_patch_size;
160
161   /* TCS invocation counter increments once per-patch */
162   if (tcs_statistic) {
163      *tcs_statistic += in_patches;
164   }
165
166   size_t draw_stride = 5 * sizeof(uint32_t);
167   unsigned unrolled_patches = in_patches * instance_count;
168
169   uint32_t alloc = 0;
170   uint32_t tcs_out_offs = alloc;
171   alloc += unrolled_patches * p->tcs_stride_el * 4;
172
173   uint32_t patch_coord_offs = alloc;
174   alloc += unrolled_patches * 4;
175
176   uint32_t count_offs = alloc;
177   alloc += unrolled_patches * sizeof(uint32_t);
178
179   uint vb_offs = alloc;
180   uint vb_size = libagx_tcs_in_size(count * instance_count, vertex_outputs);
181   alloc += vb_size;
182
183   /* Allocate all patch calculations in one go */
184   global uchar *blob = p->heap->heap + p->heap->heap_bottom;
185   p->heap->heap_bottom += alloc;
186
187   p->tcs_buffer = (global float *)(blob + tcs_out_offs);
188   p->patches_per_instance = in_patches;
189   p->coord_allocs = (global uint *)(blob + patch_coord_offs);
190   p->nr_patches = unrolled_patches;
191
192   *vertex_output_buffer_ptr = (uintptr_t)(blob + vb_offs);
193   p->counts = (global uint32_t *)(blob + count_offs);
194
195   ia->verts_per_instance = count;
196
197   /* If indexing is enabled, the third word is the offset into the index buffer
198    * in elements. Apply that offset now that we have it. For a hardware
199    * indirect draw, the hardware would do this for us, but for software input
200    * assembly we need to do it ourselves.
201    *
202    * XXX: Deduplicate?
203    */
204   if (in_index_size_B) {
205      ia->index_buffer =
206         libagx_index_buffer(in_index_buffer, in_index_buffer_range_el,
207                             indirect[2], in_index_size_B, 0);
208
209      ia->index_buffer_range_el =
210         libagx_index_buffer_range_el(in_index_buffer_range_el, indirect[2]);
211   }
212
213   /* VS grid size */
214   grids[0] = count;
215   grids[1] = instance_count;
216   grids[2] = 1;
217
218   /* VS workgroup size */
219   grids[3] = 64;
220   grids[4] = 1;
221   grids[5] = 1;
222
223   /* TCS grid size */
224   grids[6] = in_patches * p->output_patch_size;
225   grids[7] = instance_count;
226   grids[8] = 1;
227
228   /* TCS workgroup size */
229   grids[9] = p->output_patch_size;
230   grids[10] = 1;
231   grids[11] = 1;
232
233   /* Tess grid size */
234   grids[12] = unrolled_patches;
235   grids[13] = 1;
236   grids[14] = 1;
237
238   /* Tess workgroup size */
239   grids[15] = 64;
240   grids[16] = 1;
241   grids[17] = 1;
242}
243