• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_xfb_info.h"
27 
28 /**
29  * \file nir_lower_gs_intrinsics.c
30  *
31  * Geometry Shaders can call EmitVertex()/EmitStreamVertex() to output an
32  * arbitrary number of vertices.  However, the shader must declare the maximum
33  * number of vertices that it will ever output - further attempts to emit
34  * vertices result in undefined behavior according to the GLSL specification.
35  *
36  * Drivers might use this maximum number of vertices to allocate enough space
37  * to hold the geometry shader's output.  Some drivers (such as i965) need to
38  * implement "safety checks" which ensure that the shader hasn't emitted too
39  * many vertices, to avoid overflowing that space and trashing other memory.
40  *
41  * The count of emitted vertices can also be useful in buffer offset
42  * calculations, so drivers know where to write the GS output.
43  *
44  * However, for simple geometry shaders that emit a statically determinable
45  * number of vertices, this extra bookkeeping is unnecessary and inefficient.
46  * By tracking the vertex count in NIR, we allow constant folding/propagation
47  * and dead control flow optimizations to eliminate most of it where possible.
48  *
49  * This pass introduces a new global variable which stores the current vertex
50  * count (initialized to 0), and converts emit_vertex/end_primitive intrinsics
51  * to their *_with_counter variants.  emit_vertex is also wrapped in a safety
52  * check to avoid buffer overflows.  Finally, it adds a set_vertex_count
53  * intrinsic at the end of the program, informing the driver of the final
54  * vertex count.
55  */
56 
57 struct state {
58    nir_builder *builder;
59    nir_variable *vertex_count_vars[NIR_MAX_XFB_STREAMS];
60    nir_variable *vtxcnt_per_prim_vars[NIR_MAX_XFB_STREAMS];
61    nir_variable *primitive_count_vars[NIR_MAX_XFB_STREAMS];
62    bool per_stream;
63    bool count_prims;
64    bool count_vtx_per_prim;
65    bool overwrite_incomplete;
66    bool is_points;
67    bool progress;
68 };
69 
70 /**
71  * Replace emit_vertex intrinsics with:
72  *
73  * if (vertex_count < max_vertices) {
74  *    emit_vertex_with_counter vertex_count, vertex_count_per_primitive (optional) ...
75  *    vertex_count += 1
76  *    vertex_count_per_primitive += 1
77  * }
78  */
79 static void
rewrite_emit_vertex(nir_intrinsic_instr * intrin,struct state * state)80 rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
81 {
82    nir_builder *b = state->builder;
83    unsigned stream = nir_intrinsic_stream_id(intrin);
84 
85    /* Load the vertex count */
86    b->cursor = nir_before_instr(&intrin->instr);
87    assert(state->vertex_count_vars[stream] != NULL);
88    nir_ssa_def *count = nir_load_var(b, state->vertex_count_vars[stream]);
89    nir_ssa_def *count_per_primitive;
90 
91    if (state->count_vtx_per_prim)
92       count_per_primitive = nir_load_var(b, state->vtxcnt_per_prim_vars[stream]);
93    else if (state->is_points)
94       count_per_primitive = nir_imm_int(b, 0);
95    else
96       count_per_primitive = nir_ssa_undef(b, 1, 32);
97 
98    nir_ssa_def *max_vertices =
99       nir_imm_int(b, b->shader->info.gs.vertices_out);
100 
101    /* Create: if (vertex_count < max_vertices) and insert it.
102     *
103     * The new if statement needs to be hooked up to the control flow graph
104     * before we start inserting instructions into it.
105     */
106    nir_push_if(b, nir_ilt(b, count, max_vertices));
107 
108    nir_emit_vertex_with_counter(b, count, count_per_primitive, stream);
109 
110    /* Increment the vertex count by 1 */
111    nir_store_var(b, state->vertex_count_vars[stream],
112                  nir_iadd_imm(b, count, 1),
113                  0x1); /* .x */
114 
115    if (state->count_vtx_per_prim) {
116       /* Increment the per-primitive vertex count by 1 */
117       nir_variable *var = state->vtxcnt_per_prim_vars[stream];
118       nir_ssa_def *vtx_per_prim_cnt = nir_load_var(b, var);
119       nir_store_var(b, var,
120                     nir_iadd_imm(b, vtx_per_prim_cnt, 1),
121                     0x1); /* .x */
122    }
123 
124    nir_pop_if(b, NULL);
125 
126    nir_instr_remove(&intrin->instr);
127 
128    state->progress = true;
129 }
130 
131 /**
132  * Emits code that overwrites incomplete primitives and their vertices.
133  *
134  * A primitive is considered incomplete when it doesn't have enough vertices.
135  * For example, a triangle strip that has 2 or fewer vertices, or a line strip
136  * with 1 vertex are considered incomplete.
137  *
138  * After each end_primitive and at the end of the shader before emitting
139  * set_vertex_and_primitive_count, we check if the primitive that is being
140  * emitted has enough vertices or not, and we adjust the vertex and primitive
141  * counters accordingly.
142  *
143  * This means that the following emit_vertex can reuse the vertex index of
144  * a previous vertex, if the previous primitive was incomplete, so the compiler
145  * backend is expected to simply overwrite any data that belonged to those.
146  */
147 static void
overwrite_incomplete_primitives(struct state * state,unsigned stream)148 overwrite_incomplete_primitives(struct state *state, unsigned stream)
149 {
150    assert(state->count_vtx_per_prim);
151 
152    nir_builder *b = state->builder;
153    enum shader_prim outprim = b->shader->info.gs.output_primitive;
154    unsigned outprim_min_vertices;
155 
156    if (outprim == SHADER_PRIM_POINTS)
157       outprim_min_vertices = 1;
158    else if (outprim == SHADER_PRIM_LINE_STRIP)
159       outprim_min_vertices = 2;
160    else if (outprim == SHADER_PRIM_TRIANGLE_STRIP)
161       outprim_min_vertices = 3;
162    else
163       unreachable("Invalid GS output primitive type.");
164 
165    /* Total count of vertices emitted so far. */
166    nir_ssa_def *vtxcnt_total =
167       nir_load_var(b, state->vertex_count_vars[stream]);
168 
169    /* Number of vertices emitted for the last primitive */
170    nir_ssa_def *vtxcnt_per_primitive =
171       nir_load_var(b, state->vtxcnt_per_prim_vars[stream]);
172 
173    /* See if the current primitive is a incomplete */
174    nir_ssa_def *is_inc_prim =
175       nir_ilt(b, vtxcnt_per_primitive, nir_imm_int(b, outprim_min_vertices));
176 
177    /* Number of vertices in the incomplete primitive */
178    nir_ssa_def *num_inc_vtx =
179       nir_bcsel(b, is_inc_prim, vtxcnt_per_primitive, nir_imm_int(b, 0));
180 
181    /* Store corrected total vertex count */
182    nir_store_var(b, state->vertex_count_vars[stream],
183                  nir_isub(b, vtxcnt_total, num_inc_vtx),
184                  0x1); /* .x */
185 
186    if (state->count_prims) {
187       /* Number of incomplete primitives (0 or 1) */
188       nir_ssa_def *num_inc_prim = nir_b2i32(b, is_inc_prim);
189 
190       /* Store corrected primitive count */
191       nir_ssa_def *prim_cnt = nir_load_var(b, state->primitive_count_vars[stream]);
192       nir_store_var(b, state->primitive_count_vars[stream],
193                     nir_isub(b, prim_cnt, num_inc_prim),
194                     0x1); /* .x */
195    }
196 }
197 
198 /**
199  * Replace end_primitive with end_primitive_with_counter.
200  */
201 static void
rewrite_end_primitive(nir_intrinsic_instr * intrin,struct state * state)202 rewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state)
203 {
204    nir_builder *b = state->builder;
205    unsigned stream = nir_intrinsic_stream_id(intrin);
206 
207    b->cursor = nir_before_instr(&intrin->instr);
208    assert(state->vertex_count_vars[stream] != NULL);
209    nir_ssa_def *count = nir_load_var(b, state->vertex_count_vars[stream]);
210    nir_ssa_def *count_per_primitive;
211 
212    if (state->count_vtx_per_prim)
213       count_per_primitive = nir_load_var(b, state->vtxcnt_per_prim_vars[stream]);
214    else if (state->is_points)
215       count_per_primitive = nir_imm_int(b, 0);
216    else
217       count_per_primitive = nir_ssa_undef(b, count->num_components, count->bit_size);
218 
219    nir_end_primitive_with_counter(b, count, count_per_primitive, stream);
220 
221    if (state->count_prims) {
222       /* Increment the primitive count by 1 */
223       nir_ssa_def *prim_cnt = nir_load_var(b, state->primitive_count_vars[stream]);
224       nir_store_var(b, state->primitive_count_vars[stream],
225                     nir_iadd_imm(b, prim_cnt, 1),
226                     0x1); /* .x */
227    }
228 
229    if (state->count_vtx_per_prim) {
230       if (state->overwrite_incomplete)
231          overwrite_incomplete_primitives(state, stream);
232 
233       /* Store 0 to per-primitive vertex count */
234       nir_store_var(b, state->vtxcnt_per_prim_vars[stream],
235                     nir_imm_int(b, 0),
236                     0x1); /* .x */
237    }
238 
239    nir_instr_remove(&intrin->instr);
240 
241    state->progress = true;
242 }
243 
244 static bool
rewrite_intrinsics(nir_block * block,struct state * state)245 rewrite_intrinsics(nir_block *block, struct state *state)
246 {
247    nir_foreach_instr_safe(instr, block) {
248       if (instr->type != nir_instr_type_intrinsic)
249          continue;
250 
251       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
252       switch (intrin->intrinsic) {
253       case nir_intrinsic_emit_vertex:
254       case nir_intrinsic_emit_vertex_with_counter:
255          rewrite_emit_vertex(intrin, state);
256          break;
257       case nir_intrinsic_end_primitive:
258       case nir_intrinsic_end_primitive_with_counter:
259          rewrite_end_primitive(intrin, state);
260          break;
261       default:
262          /* not interesting; skip this */
263          break;
264       }
265    }
266 
267    return true;
268 }
269 
270 /**
271  * Add a set_vertex_and_primitive_count intrinsic at the end of the program
272  * (representing the final total vertex and primitive count).
273  */
274 static void
append_set_vertex_and_primitive_count(nir_block * end_block,struct state * state)275 append_set_vertex_and_primitive_count(nir_block *end_block, struct state *state)
276 {
277    nir_builder *b = state->builder;
278    nir_shader *shader = state->builder->shader;
279 
280    /* Insert the new intrinsic in all of the predecessors of the end block,
281     * but before any jump instructions (return).
282     */
283    set_foreach(end_block->predecessors, entry) {
284       nir_block *pred = (nir_block *) entry->key;
285       b->cursor = nir_after_block_before_jump(pred);
286 
287       for (unsigned stream = 0; stream < NIR_MAX_XFB_STREAMS; ++stream) {
288          /* When it's not per-stream, we only need to write one variable. */
289          if (!state->per_stream && stream != 0)
290             continue;
291 
292          nir_ssa_def *vtx_cnt;
293          nir_ssa_def *prim_cnt;
294 
295          if (state->per_stream && !(shader->info.gs.active_stream_mask & (1 << stream))) {
296             /* Inactive stream: vertex count is 0, primitive count is 0 or undef. */
297             vtx_cnt = nir_imm_int(b, 0);
298             prim_cnt = state->count_prims || state->is_points
299                        ? nir_imm_int(b, 0)
300                        : nir_ssa_undef(b, 1, 32);
301          } else {
302             if (state->overwrite_incomplete)
303                overwrite_incomplete_primitives(state, stream);
304 
305             vtx_cnt = nir_load_var(b, state->vertex_count_vars[stream]);
306 
307             if (state->count_prims)
308                prim_cnt = nir_load_var(b, state->primitive_count_vars[stream]);
309             else if (state->is_points)
310                /* EndPrimitive does not affect primitive count for points,
311                 * just use vertex count instead
312                 */
313                prim_cnt = vtx_cnt;
314             else
315                prim_cnt = nir_ssa_undef(b, 1, 32);
316          }
317 
318          nir_set_vertex_and_primitive_count(b, vtx_cnt, prim_cnt, stream);
319          state->progress = true;
320       }
321    }
322 }
323 
324 /**
325  * Check to see if there are any blocks that need set_vertex_and_primitive_count
326  *
327  * If every block that could need the set_vertex_and_primitive_count intrinsic
328  * already has one, there is nothing for this pass to do.
329  */
330 static bool
a_block_needs_set_vertex_and_primitive_count(nir_block * end_block,bool per_stream)331 a_block_needs_set_vertex_and_primitive_count(nir_block *end_block, bool per_stream)
332 {
333    set_foreach(end_block->predecessors, entry) {
334       nir_block *pred = (nir_block *) entry->key;
335 
336 
337       for (unsigned stream = 0; stream < NIR_MAX_XFB_STREAMS; ++stream) {
338          /* When it's not per-stream, we only need to write one variable. */
339          if (!per_stream && stream != 0)
340             continue;
341 
342          bool found = false;
343 
344          nir_foreach_instr_reverse(instr, pred) {
345             if (instr->type != nir_instr_type_intrinsic)
346                continue;
347 
348             const nir_intrinsic_instr *const intrin =
349                nir_instr_as_intrinsic(instr);
350 
351             if (intrin->intrinsic == nir_intrinsic_set_vertex_and_primitive_count &&
352                 intrin->const_index[0] == stream) {
353                found = true;
354                break;
355             }
356          }
357 
358          if (!found)
359             return true;
360       }
361    }
362 
363    return false;
364 }
365 
366 bool
nir_lower_gs_intrinsics(nir_shader * shader,nir_lower_gs_intrinsics_flags options)367 nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options)
368 {
369    bool per_stream = options & nir_lower_gs_intrinsics_per_stream;
370    bool count_primitives = options & nir_lower_gs_intrinsics_count_primitives;
371    bool overwrite_incomplete = options & nir_lower_gs_intrinsics_overwrite_incomplete;
372    bool count_vtx_per_prim =
373       overwrite_incomplete ||
374       (options & nir_lower_gs_intrinsics_count_vertices_per_primitive);
375 
376    bool is_points = shader->info.gs.output_primitive == SHADER_PRIM_POINTS;
377    /* points are always complete primitives with a single vertex, so these are
378     * not needed when primitive is points.
379     */
380    if (is_points) {
381       count_primitives = false;
382       overwrite_incomplete = false;
383       count_vtx_per_prim = false;
384    }
385 
386    struct state state;
387    state.progress = false;
388    state.count_prims = count_primitives;
389    state.count_vtx_per_prim = count_vtx_per_prim;
390    state.overwrite_incomplete = overwrite_incomplete;
391    state.per_stream = per_stream;
392    state.is_points = is_points;
393 
394    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
395    assert(impl);
396 
397    if (!a_block_needs_set_vertex_and_primitive_count(impl->end_block, per_stream))
398       return false;
399 
400    nir_builder b;
401    nir_builder_init(&b, impl);
402    state.builder = &b;
403 
404    b.cursor = nir_before_cf_list(&impl->body);
405 
406    for (unsigned i = 0; i < NIR_MAX_XFB_STREAMS; i++) {
407       if (per_stream && !(shader->info.gs.active_stream_mask & (1 << i)))
408          continue;
409 
410       if (i == 0 || per_stream) {
411          state.vertex_count_vars[i] =
412             nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
413          /* initialize to 0 */
414          nir_store_var(&b, state.vertex_count_vars[i], nir_imm_int(&b, 0), 0x1);
415 
416          if (count_primitives) {
417             state.primitive_count_vars[i] =
418                nir_local_variable_create(impl, glsl_uint_type(), "primitive_count");
419             /* initialize to 1 */
420             nir_store_var(&b, state.primitive_count_vars[i], nir_imm_int(&b, 1), 0x1);
421          }
422          if (count_vtx_per_prim) {
423             state.vtxcnt_per_prim_vars[i] =
424                nir_local_variable_create(impl, glsl_uint_type(), "vertices_per_primitive");
425             /* initialize to 0 */
426             nir_store_var(&b, state.vtxcnt_per_prim_vars[i], nir_imm_int(&b, 0), 0x1);
427          }
428       } else {
429          /* If per_stream is false, we only have one counter of each kind which we
430           * want to use for all streams. Duplicate the counter pointers so all
431           * streams use the same counters.
432           */
433          state.vertex_count_vars[i] = state.vertex_count_vars[0];
434 
435          if (count_primitives)
436             state.primitive_count_vars[i] = state.primitive_count_vars[0];
437          if (count_vtx_per_prim)
438             state.vtxcnt_per_prim_vars[i] = state.vtxcnt_per_prim_vars[0];
439       }
440    }
441 
442    nir_foreach_block_safe(block, impl)
443       rewrite_intrinsics(block, &state);
444 
445    /* This only works because we have a single main() function. */
446    append_set_vertex_and_primitive_count(impl->end_block, &state);
447 
448    nir_metadata_preserve(impl, 0);
449 
450    return state.progress;
451 }
452