1/* 2 * Copyright 2023 Alyssa Rosenzweig 3 * SPDX-License-Identifier: MIT 4 */ 5 6#include "geometry.h" 7#include "tessellator.h" 8#include <agx_pack.h> 9 10uint 11libagx_tcs_patch_vertices_in(constant struct libagx_tess_args *p) 12{ 13 return p->input_patch_size; 14} 15 16uint 17libagx_tes_patch_vertices_in(constant struct libagx_tess_args *p) 18{ 19 return p->output_patch_size; 20} 21 22uint 23libagx_tcs_unrolled_id(constant struct libagx_tess_args *p, uint3 wg_id) 24{ 25 return (wg_id.y * p->patches_per_instance) + wg_id.x; 26} 27 28uint64_t 29libagx_tes_buffer(constant struct libagx_tess_args *p) 30{ 31 return p->tes_buffer; 32} 33 34/* 35 * Helper to lower indexing for a tess eval shader ran as a compute shader. This 36 * handles the tess+geom case. This is simpler than the general input assembly 37 * lowering, as we know: 38 * 39 * 1. the index buffer is U32 40 * 2. the index is in bounds 41 * 42 * Therefore we do a simple load. No bounds checking needed. 43 */ 44uint32_t 45libagx_load_tes_index(constant struct libagx_tess_args *p, uint32_t index) 46{ 47 /* Swap second and third vertices of each triangle to flip winding order 48 * dynamically if needed. 49 */ 50 if (p->ccw) { 51 uint id = index % 3; 52 53 if (id == 1) 54 index++; 55 else if (id == 2) 56 index--; 57 } 58 59 return p->index_buffer[index]; 60} 61 62ushort 63libagx_tcs_in_offset(uint vtx, gl_varying_slot location, 64 uint64_t crosslane_vs_out_mask) 65{ 66 return libagx_tcs_in_offs(vtx, location, crosslane_vs_out_mask); 67} 68 69uintptr_t 70libagx_tcs_out_address(constant struct libagx_tess_args *p, uint patch_id, 71 uint vtx_id, gl_varying_slot location, uint nr_patch_out, 72 uint out_patch_size, uint64_t vtx_out_mask) 73{ 74 uint stride_el = 75 libagx_tcs_out_stride_el(nr_patch_out, out_patch_size, vtx_out_mask); 76 77 uint offs_el = 78 libagx_tcs_out_offs_el(vtx_id, location, nr_patch_out, vtx_out_mask); 79 80 offs_el += patch_id * stride_el; 81 82 /* Written to match the AGX addressing mode */ 83 return (uintptr_t)(p->tcs_buffer) + (((uintptr_t)offs_el) << 2); 84} 85 86static uint 87libagx_tes_unrolled_patch_id(uint raw_id) 88{ 89 return raw_id / LIBAGX_TES_PATCH_ID_STRIDE; 90} 91 92uint 93libagx_tes_patch_id(constant struct libagx_tess_args *p, uint raw_id) 94{ 95 return libagx_tes_unrolled_patch_id(raw_id) % p->patches_per_instance; 96} 97 98static uint 99tes_vertex_id_in_patch(uint raw_id) 100{ 101 return raw_id % LIBAGX_TES_PATCH_ID_STRIDE; 102} 103 104float2 105libagx_load_tess_coord(constant struct libagx_tess_args *p, uint raw_id) 106{ 107 uint patch = libagx_tes_unrolled_patch_id(raw_id); 108 uint vtx = tes_vertex_id_in_patch(raw_id); 109 110 global struct libagx_tess_point *t = 111 &p->patch_coord_buffer[p->coord_allocs[patch] + vtx]; 112 113 /* Written weirdly because NIR struggles with loads of structs */ 114 uint2 fixed = *((global uint2 *)t); 115 116 /* Convert fixed point to float */ 117 return convert_float2(fixed) / (1u << 16); 118} 119 120uintptr_t 121libagx_tes_in_address(constant struct libagx_tess_args *p, uint raw_id, 122 uint vtx_id, gl_varying_slot location) 123{ 124 uint patch = libagx_tes_unrolled_patch_id(raw_id); 125 126 return libagx_tcs_out_address(p, patch, vtx_id, location, 127 p->tcs_patch_constants, p->output_patch_size, 128 p->tcs_per_vertex_outputs); 129} 130 131float4 132libagx_tess_level_outer_default(constant struct libagx_tess_args *p) 133{ 134 return ( 135 float4)(p->tess_level_outer_default[0], p->tess_level_outer_default[1], 136 p->tess_level_outer_default[2], p->tess_level_outer_default[3]); 137} 138 139float2 140libagx_tess_level_inner_default(constant struct libagx_tess_args *p) 141{ 142 return (float2)(p->tess_level_inner_default[0], 143 p->tess_level_inner_default[1]); 144} 145 146KERNEL(1) 147libagx_tess_setup_indirect( 148 global struct libagx_tess_args *p, 149 global uint32_t *grids /* output: VS then TCS then tess */, 150 global struct agx_ia_state *ia /* output */, global uint32_t *indirect, 151 global uint64_t *vertex_output_buffer_ptr, uint64_t in_index_buffer, 152 uint32_t in_index_buffer_range_el, uint32_t in_index_size_B, 153 uint64_t vertex_outputs /* bitfield */, 154 155 /* Tess control invocation counter if active, else zero */ 156 global uint32_t *tcs_statistic) 157{ 158 uint count = indirect[0], instance_count = indirect[1]; 159 unsigned in_patches = count / p->input_patch_size; 160 161 /* TCS invocation counter increments once per-patch */ 162 if (tcs_statistic) { 163 *tcs_statistic += in_patches; 164 } 165 166 size_t draw_stride = 5 * sizeof(uint32_t); 167 unsigned unrolled_patches = in_patches * instance_count; 168 169 uint32_t alloc = 0; 170 uint32_t tcs_out_offs = alloc; 171 alloc += unrolled_patches * p->tcs_stride_el * 4; 172 173 uint32_t patch_coord_offs = alloc; 174 alloc += unrolled_patches * 4; 175 176 uint32_t count_offs = alloc; 177 alloc += unrolled_patches * sizeof(uint32_t); 178 179 uint vb_offs = alloc; 180 uint vb_size = libagx_tcs_in_size(count * instance_count, vertex_outputs); 181 alloc += vb_size; 182 183 /* Allocate all patch calculations in one go */ 184 global uchar *blob = p->heap->heap + p->heap->heap_bottom; 185 p->heap->heap_bottom += alloc; 186 187 p->tcs_buffer = (global float *)(blob + tcs_out_offs); 188 p->patches_per_instance = in_patches; 189 p->coord_allocs = (global uint *)(blob + patch_coord_offs); 190 p->nr_patches = unrolled_patches; 191 192 *vertex_output_buffer_ptr = (uintptr_t)(blob + vb_offs); 193 p->counts = (global uint32_t *)(blob + count_offs); 194 195 ia->verts_per_instance = count; 196 197 /* If indexing is enabled, the third word is the offset into the index buffer 198 * in elements. Apply that offset now that we have it. For a hardware 199 * indirect draw, the hardware would do this for us, but for software input 200 * assembly we need to do it ourselves. 201 * 202 * XXX: Deduplicate? 203 */ 204 if (in_index_size_B) { 205 ia->index_buffer = 206 libagx_index_buffer(in_index_buffer, in_index_buffer_range_el, 207 indirect[2], in_index_size_B, 0); 208 209 ia->index_buffer_range_el = 210 libagx_index_buffer_range_el(in_index_buffer_range_el, indirect[2]); 211 } 212 213 /* VS grid size */ 214 grids[0] = count; 215 grids[1] = instance_count; 216 grids[2] = 1; 217 218 /* VS workgroup size */ 219 grids[3] = 64; 220 grids[4] = 1; 221 grids[5] = 1; 222 223 /* TCS grid size */ 224 grids[6] = in_patches * p->output_patch_size; 225 grids[7] = instance_count; 226 grids[8] = 1; 227 228 /* TCS workgroup size */ 229 grids[9] = p->output_patch_size; 230 grids[10] = 1; 231 grids[11] = 1; 232 233 /* Tess grid size */ 234 grids[12] = unrolled_patches; 235 grids[13] = 1; 236 grids[14] = 1; 237 238 /* Tess workgroup size */ 239 grids[15] = 64; 240 grids[16] = 1; 241 grids[17] = 1; 242} 243