• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2023 Alyssa Rosenzweig
3  * Copyright 2023 Valve Corporation
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "compiler/libcl/libcl.h"
8 #include "compiler/shader_enums.h"
9 
10 #ifndef __OPENCL_VERSION__
11 #include "util/bitscan.h"
12 #define libagx_popcount(x)   util_bitcount64(x)
13 #define libagx_sub_sat(x, y) ((x >= y) ? (x - y) : 0)
14 #else
15 #define libagx_popcount(x)   popcount(x)
16 #define libagx_sub_sat(x, y) sub_sat(x, y)
17 #endif
18 
19 #ifndef LIBAGX_GEOMETRY_H
20 #define LIBAGX_GEOMETRY_H
21 
22 #define MAX_SO_BUFFERS     4
23 #define MAX_VERTEX_STREAMS 4
24 
25 /* Packed geometry state buffer */
26 struct agx_geometry_state {
27    /* Heap to allocate from. */
28    DEVICE(uchar) heap;
29    uint32_t heap_bottom, heap_size;
30 } PACKED;
31 static_assert(sizeof(struct agx_geometry_state) == 4 * 4);
32 
33 #ifdef __OPENCL_VERSION__
34 static inline global void *
agx_heap_alloc_nonatomic(global struct agx_geometry_state * heap,size_t size)35 agx_heap_alloc_nonatomic(global struct agx_geometry_state *heap, size_t size)
36 {
37    global void *out = heap->heap + heap->heap_bottom;
38    heap->heap_bottom += size;
39    return out;
40 }
41 #endif
42 
43 struct agx_ia_state {
44    /* Index buffer if present. */
45    uint64_t index_buffer;
46 
47    /* Size of the bound index buffer for bounds checking */
48    uint32_t index_buffer_range_el;
49 
50    /* Number of vertices per instance. Written by CPU for direct draw, indirect
51     * setup kernel for indirect. This is used for VS->GS and VS->TCS indexing.
52     */
53    uint32_t verts_per_instance;
54 } PACKED;
55 static_assert(sizeof(struct agx_ia_state) == 4 * 4);
56 
57 static inline uint64_t
libagx_index_buffer(uint64_t index_buffer,uint size_el,uint offset_el,uint elsize_B,uint64_t zero_sink)58 libagx_index_buffer(uint64_t index_buffer, uint size_el, uint offset_el,
59                     uint elsize_B, uint64_t zero_sink)
60 {
61    if (offset_el < size_el)
62       return index_buffer + (offset_el * elsize_B);
63    else
64       return zero_sink;
65 }
66 
67 static inline uint
libagx_index_buffer_range_el(uint size_el,uint offset_el)68 libagx_index_buffer_range_el(uint size_el, uint offset_el)
69 {
70    return libagx_sub_sat(size_el, offset_el);
71 }
72 
73 struct agx_geometry_params {
74    /* Persistent (cross-draw) geometry state */
75    DEVICE(struct agx_geometry_state) state;
76 
77    /* Address of associated indirect draw buffer */
78    DEVICE(uint) indirect_desc;
79 
80    /* Address of count buffer. For an indirect draw, this will be written by the
81     * indirect setup kernel.
82     */
83    DEVICE(uint) count_buffer;
84 
85    /* Address of the primitives generated counters */
86    DEVICE(uint) prims_generated_counter[MAX_VERTEX_STREAMS];
87    DEVICE(uint) xfb_prims_generated_counter[MAX_VERTEX_STREAMS];
88    DEVICE(uint) xfb_overflow[MAX_VERTEX_STREAMS];
89    DEVICE(uint) xfb_any_overflow;
90 
91    /* Pointers to transform feedback buffer offsets in bytes */
92    DEVICE(uint) xfb_offs_ptrs[MAX_SO_BUFFERS];
93 
94    /* Output index buffer, allocated by pre-GS. */
95    DEVICE(uint) output_index_buffer;
96 
97    /* Address of transform feedback buffer in general, supplied by the CPU. */
98    DEVICE(uchar) xfb_base_original[MAX_SO_BUFFERS];
99 
100    /* Address of transform feedback for the current primitive. Written by pre-GS
101     * program.
102     */
103    DEVICE(uchar) xfb_base[MAX_SO_BUFFERS];
104 
105    /* Address and present mask for the input to the geometry shader. These will
106     * reflect the vertex shader for VS->GS or instead the tessellation
107     * evaluation shader for TES->GS.
108     */
109    uint64_t input_buffer;
110    uint64_t input_mask;
111 
112    /* Location-indexed mask of flat outputs, used for lowering GL edge flags. */
113    uint64_t flat_outputs;
114 
115    uint32_t xfb_size[MAX_SO_BUFFERS];
116 
117    /* Number of primitives emitted by transform feedback per stream. Written by
118     * the pre-GS program.
119     */
120    uint32_t xfb_prims[MAX_VERTEX_STREAMS];
121 
122    /* Within an indirect GS draw, the grids used to dispatch the VS/GS written
123     * out by the GS indirect setup kernel or the CPU for a direct draw.
124     */
125    uint32_t vs_grid[3];
126    uint32_t gs_grid[3];
127 
128    /* Number of input primitives across all instances, calculated by the CPU for
129     * a direct draw or the GS indirect setup kernel for an indirect draw.
130     */
131    uint32_t input_primitives;
132 
133    /* Number of input primitives per instance, rounded up to a power-of-two and
134     * with the base-2 log taken. This is used to partition the output vertex IDs
135     * efficiently.
136     */
137    uint32_t primitives_log2;
138 
139    /* Number of bytes output by the GS count shader per input primitive (may be
140     * 0), written by CPU and consumed by indirect draw setup shader for
141     * allocating counts.
142     */
143    uint32_t count_buffer_stride;
144 
145    /* Dynamic input topology. Must be compatible with the geometry shader's
146     * layout() declared input class.
147     */
148    uint32_t input_topology;
149 } PACKED;
150 static_assert(sizeof(struct agx_geometry_params) == 82 * 4);
151 
152 /* TCS shared memory layout:
153  *
154  *    vec4 vs_outputs[VERTICES_IN_INPUT_PATCH][TOTAL_VERTEX_OUTPUTS];
155  *
156  * TODO: compact.
157  */
158 static inline uint
libagx_tcs_in_offs_el(uint vtx,gl_varying_slot location,uint64_t crosslane_vs_out_mask)159 libagx_tcs_in_offs_el(uint vtx, gl_varying_slot location,
160                       uint64_t crosslane_vs_out_mask)
161 {
162    uint base = vtx * libagx_popcount(crosslane_vs_out_mask);
163    uint offs = libagx_popcount(crosslane_vs_out_mask &
164                                (((uint64_t)(1) << location) - 1));
165 
166    return base + offs;
167 }
168 
169 static inline uint
libagx_tcs_in_offs(uint vtx,gl_varying_slot location,uint64_t crosslane_vs_out_mask)170 libagx_tcs_in_offs(uint vtx, gl_varying_slot location,
171                    uint64_t crosslane_vs_out_mask)
172 {
173    return libagx_tcs_in_offs_el(vtx, location, crosslane_vs_out_mask) * 16;
174 }
175 
176 static inline uint
libagx_tcs_in_size(uint32_t vertices_in_patch,uint64_t crosslane_vs_out_mask)177 libagx_tcs_in_size(uint32_t vertices_in_patch, uint64_t crosslane_vs_out_mask)
178 {
179    return vertices_in_patch * libagx_popcount(crosslane_vs_out_mask) * 16;
180 }
181 
182 /*
183  * TCS out buffer layout, per-patch:
184  *
185  *    float tess_level_outer[4];
186  *    float tess_level_inner[2];
187  *    vec4 patch_out[MAX_PATCH_OUTPUTS];
188  *    vec4 vtx_out[OUT_PATCH_SIZE][TOTAL_VERTEX_OUTPUTS];
189  *
190  * Vertex out are compacted based on the mask of written out. Patch
191  * out are used as-is.
192  *
193  * Bounding boxes are ignored.
194  */
195 static inline uint
libagx_tcs_out_offs_el(uint vtx_id,gl_varying_slot location,uint nr_patch_out,uint64_t vtx_out_mask)196 libagx_tcs_out_offs_el(uint vtx_id, gl_varying_slot location, uint nr_patch_out,
197                        uint64_t vtx_out_mask)
198 {
199    uint off = 0;
200    if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
201       return off;
202 
203    off += 4;
204    if (location == VARYING_SLOT_TESS_LEVEL_INNER)
205       return off;
206 
207    off += 2;
208    if (location >= VARYING_SLOT_PATCH0)
209       return off + (4 * (location - VARYING_SLOT_PATCH0));
210 
211    /* Anything else is a per-vtx output */
212    off += 4 * nr_patch_out;
213    off += 4 * vtx_id * libagx_popcount(vtx_out_mask);
214 
215    uint idx = libagx_popcount(vtx_out_mask & (((uint64_t)(1) << location) - 1));
216    return off + (4 * idx);
217 }
218 
219 static inline uint
libagx_tcs_out_offs(uint vtx_id,gl_varying_slot location,uint nr_patch_out,uint64_t vtx_out_mask)220 libagx_tcs_out_offs(uint vtx_id, gl_varying_slot location, uint nr_patch_out,
221                     uint64_t vtx_out_mask)
222 {
223    return libagx_tcs_out_offs_el(vtx_id, location, nr_patch_out, vtx_out_mask) *
224           4;
225 }
226 
227 static inline uint
libagx_tcs_out_stride_el(uint nr_patch_out,uint out_patch_size,uint64_t vtx_out_mask)228 libagx_tcs_out_stride_el(uint nr_patch_out, uint out_patch_size,
229                          uint64_t vtx_out_mask)
230 {
231    return libagx_tcs_out_offs_el(out_patch_size, 0, nr_patch_out, vtx_out_mask);
232 }
233 
234 static inline uint
libagx_tcs_out_stride(uint nr_patch_out,uint out_patch_size,uint64_t vtx_out_mask)235 libagx_tcs_out_stride(uint nr_patch_out, uint out_patch_size,
236                       uint64_t vtx_out_mask)
237 {
238    return libagx_tcs_out_stride_el(nr_patch_out, out_patch_size, vtx_out_mask) *
239           4;
240 }
241 
242 /* In a tess eval shader, stride for hw vertex ID */
243 #define LIBAGX_TES_PATCH_ID_STRIDE 8192
244 
245 static uint
libagx_compact_prim(enum mesa_prim prim)246 libagx_compact_prim(enum mesa_prim prim)
247 {
248    static_assert(MESA_PRIM_QUAD_STRIP == MESA_PRIM_QUADS + 1);
249    static_assert(MESA_PRIM_POLYGON == MESA_PRIM_QUADS + 2);
250 
251 #ifndef __OPENCL_VERSION__
252    assert(prim != MESA_PRIM_QUADS);
253    assert(prim != MESA_PRIM_QUAD_STRIP);
254    assert(prim != MESA_PRIM_POLYGON);
255    assert(prim != MESA_PRIM_PATCHES);
256 #endif
257 
258    return (prim >= MESA_PRIM_QUADS) ? (prim - 3) : prim;
259 }
260 
261 static enum mesa_prim
libagx_uncompact_prim(uint packed)262 libagx_uncompact_prim(uint packed)
263 {
264    return (packed >= MESA_PRIM_QUADS) ? (packed + 3) : packed;
265 }
266 
267 #endif
268