• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "ac_nir.h"
8 #include "ac_nir_helpers.h"
9 
10 #include "nir_builder.h"
11 
12 typedef struct {
13    nir_def *outputs[64][4];
14    nir_def *outputs_16bit_lo[16][4];
15    nir_def *outputs_16bit_hi[16][4];
16 
17    ac_nir_gs_output_info *info;
18 
19    nir_def *vertex_count[4];
20    nir_def *primitive_count[4];
21 } lower_legacy_gs_state;
22 
23 static bool
lower_legacy_gs_store_output(nir_builder * b,nir_intrinsic_instr * intrin,lower_legacy_gs_state * s)24 lower_legacy_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin,
25                              lower_legacy_gs_state *s)
26 {
27    /* Assume:
28     * - the shader used nir_lower_io_to_temporaries
29     * - 64-bit outputs are lowered
30     * - no indirect indexing is present
31     */
32    assert(nir_src_is_const(intrin->src[1]) && !nir_src_as_uint(intrin->src[1]));
33 
34    b->cursor = nir_before_instr(&intrin->instr);
35 
36    unsigned component = nir_intrinsic_component(intrin);
37    unsigned write_mask = nir_intrinsic_write_mask(intrin);
38    nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
39 
40    nir_def **outputs;
41    if (sem.location < VARYING_SLOT_VAR0_16BIT) {
42       outputs = s->outputs[sem.location];
43    } else {
44       unsigned index = sem.location - VARYING_SLOT_VAR0_16BIT;
45       if (sem.high_16bits)
46          outputs = s->outputs_16bit_hi[index];
47       else
48          outputs = s->outputs_16bit_lo[index];
49    }
50 
51    nir_def *store_val = intrin->src[0].ssa;
52    /* 64bit output has been lowered to 32bit */
53    assert(store_val->bit_size <= 32);
54 
55    /* 16-bit output stored in a normal varying slot that isn't a dedicated 16-bit slot. */
56    const bool non_dedicated_16bit = sem.location < VARYING_SLOT_VAR0_16BIT && store_val->bit_size == 16;
57 
58    u_foreach_bit (i, write_mask) {
59       unsigned comp = component + i;
60       nir_def *store_component = nir_channel(b, store_val, i);
61 
62       if (non_dedicated_16bit) {
63          if (sem.high_16bits) {
64             nir_def *lo = outputs[comp] ? nir_unpack_32_2x16_split_x(b, outputs[comp]) : nir_imm_intN_t(b, 0, 16);
65             outputs[comp] = nir_pack_32_2x16_split(b, lo, store_component);
66          } else {
67             nir_def *hi = outputs[comp] ? nir_unpack_32_2x16_split_y(b, outputs[comp]) : nir_imm_intN_t(b, 0, 16);
68             outputs[comp] = nir_pack_32_2x16_split(b, store_component, hi);
69          }
70       } else {
71          outputs[comp] = store_component;
72       }
73    }
74 
75    nir_instr_remove(&intrin->instr);
76    return true;
77 }
78 
79 static bool
lower_legacy_gs_emit_vertex_with_counter(nir_builder * b,nir_intrinsic_instr * intrin,lower_legacy_gs_state * s)80 lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *intrin,
81                                          lower_legacy_gs_state *s)
82 {
83    b->cursor = nir_before_instr(&intrin->instr);
84 
85    unsigned stream = nir_intrinsic_stream_id(intrin);
86    nir_def *vtxidx = intrin->src[0].ssa;
87 
88    nir_def *gsvs_ring = nir_load_ring_gsvs_amd(b, .stream_id = stream);
89    nir_def *soffset = nir_load_ring_gs2vs_offset_amd(b);
90 
91    unsigned offset = 0;
92    u_foreach_bit64 (i, b->shader->info.outputs_written) {
93       for (unsigned j = 0; j < 4; j++) {
94          nir_def *output = s->outputs[i][j];
95          /* Next vertex emit need a new value, reset all outputs. */
96          s->outputs[i][j] = NULL;
97 
98          const uint8_t usage_mask = s->info->varying_mask[i] | s->info->sysval_mask[i];
99 
100          if (!(usage_mask & (1 << j)) ||
101              ((s->info->streams[i] >> (j * 2)) & 0x3) != stream)
102             continue;
103 
104          unsigned base = offset * b->shader->info.gs.vertices_out * 4;
105          offset++;
106 
107          /* no one set this output, skip the buffer store */
108          if (!output)
109             continue;
110 
111          nir_def *voffset = nir_ishl_imm(b, vtxidx, 2);
112 
113          /* extend 8/16 bit to 32 bit, 64 bit has been lowered */
114          nir_def *data = nir_u2uN(b, output, 32);
115 
116          nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
117                               .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
118                                         ACCESS_IS_SWIZZLED_AMD,
119                               .base = base,
120                               /* For ACO to not reorder this store around EmitVertex/EndPrimitve */
121                               .memory_modes = nir_var_shader_out);
122       }
123    }
124 
125    u_foreach_bit (i, b->shader->info.outputs_written_16bit) {
126       for (unsigned j = 0; j < 4; j++) {
127          nir_def *output_lo = s->outputs_16bit_lo[i][j];
128          nir_def *output_hi = s->outputs_16bit_hi[i][j];
129          /* Next vertex emit need a new value, reset all outputs. */
130          s->outputs_16bit_lo[i][j] = NULL;
131          s->outputs_16bit_hi[i][j] = NULL;
132 
133          bool has_lo_16bit = (s->info->varying_mask_16bit_lo[i] & (1 << j)) &&
134             ((s->info->streams_16bit_lo[i] >> (j * 2)) & 0x3) == stream;
135          bool has_hi_16bit = (s->info->varying_mask_16bit_hi[i] & (1 << j)) &&
136             ((s->info->streams_16bit_hi[i] >> (j * 2)) & 0x3) == stream;
137          if (!has_lo_16bit && !has_hi_16bit)
138             continue;
139 
140          unsigned base = offset * b->shader->info.gs.vertices_out;
141          offset++;
142 
143          bool has_lo_16bit_out = has_lo_16bit && output_lo;
144          bool has_hi_16bit_out = has_hi_16bit && output_hi;
145 
146          /* no one set needed output, skip the buffer store */
147          if (!has_lo_16bit_out && !has_hi_16bit_out)
148             continue;
149 
150          if (!has_lo_16bit_out)
151             output_lo = nir_undef(b, 1, 16);
152 
153          if (!has_hi_16bit_out)
154             output_hi = nir_undef(b, 1, 16);
155 
156          nir_def *voffset = nir_iadd_imm(b, vtxidx, base);
157          voffset = nir_ishl_imm(b, voffset, 2);
158 
159          nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi),
160                               gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
161                               .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
162                                         ACCESS_IS_SWIZZLED_AMD,
163                               /* For ACO to not reorder this store around EmitVertex/EndPrimitve */
164                               .memory_modes = nir_var_shader_out);
165       }
166    }
167 
168    /* Signal vertex emission. */
169    nir_sendmsg_amd(b, nir_load_gs_wave_id_amd(b),
170                    .base = AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8));
171 
172    nir_instr_remove(&intrin->instr);
173    return true;
174 }
175 
176 static bool
lower_legacy_gs_set_vertex_and_primitive_count(nir_builder * b,nir_intrinsic_instr * intrin,lower_legacy_gs_state * s)177 lower_legacy_gs_set_vertex_and_primitive_count(nir_builder *b, nir_intrinsic_instr *intrin,
178                                                lower_legacy_gs_state *s)
179 {
180    b->cursor = nir_before_instr(&intrin->instr);
181 
182    unsigned stream = nir_intrinsic_stream_id(intrin);
183 
184    s->vertex_count[stream] = intrin->src[0].ssa;
185    s->primitive_count[stream] = intrin->src[1].ssa;
186 
187    nir_instr_remove(&intrin->instr);
188    return true;
189 }
190 
191 static bool
lower_legacy_gs_end_primitive_with_counter(nir_builder * b,nir_intrinsic_instr * intrin,lower_legacy_gs_state * s)192 lower_legacy_gs_end_primitive_with_counter(nir_builder *b, nir_intrinsic_instr *intrin,
193                                                lower_legacy_gs_state *s)
194 {
195    b->cursor = nir_before_instr(&intrin->instr);
196    const unsigned stream = nir_intrinsic_stream_id(intrin);
197 
198    /* Signal primitive emission. */
199    nir_sendmsg_amd(b, nir_load_gs_wave_id_amd(b),
200                    .base = AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8));
201 
202    nir_instr_remove(&intrin->instr);
203    return true;
204 }
205 
206 static bool
lower_legacy_gs_intrinsic(nir_builder * b,nir_instr * instr,void * state)207 lower_legacy_gs_intrinsic(nir_builder *b, nir_instr *instr, void *state)
208 {
209    lower_legacy_gs_state *s = (lower_legacy_gs_state *) state;
210 
211    if (instr->type != nir_instr_type_intrinsic)
212       return false;
213 
214    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
215 
216    if (intrin->intrinsic == nir_intrinsic_store_output)
217       return lower_legacy_gs_store_output(b, intrin, s);
218    else if (intrin->intrinsic == nir_intrinsic_emit_vertex_with_counter)
219       return lower_legacy_gs_emit_vertex_with_counter(b, intrin, s);
220    else if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter)
221       return lower_legacy_gs_end_primitive_with_counter(b, intrin, s);
222    else if (intrin->intrinsic == nir_intrinsic_set_vertex_and_primitive_count)
223       return lower_legacy_gs_set_vertex_and_primitive_count(b, intrin, s);
224 
225    return false;
226 }
227 
228 void
ac_nir_lower_legacy_gs(nir_shader * nir,bool has_gen_prim_query,bool has_pipeline_stats_query,ac_nir_gs_output_info * output_info)229 ac_nir_lower_legacy_gs(nir_shader *nir,
230                        bool has_gen_prim_query,
231                        bool has_pipeline_stats_query,
232                        ac_nir_gs_output_info *output_info)
233 {
234    lower_legacy_gs_state s = {
235       .info = output_info,
236    };
237 
238    unsigned num_vertices_per_primitive = 0;
239    switch (nir->info.gs.output_primitive) {
240    case MESA_PRIM_POINTS:
241       num_vertices_per_primitive = 1;
242       break;
243    case MESA_PRIM_LINE_STRIP:
244       num_vertices_per_primitive = 2;
245       break;
246    case MESA_PRIM_TRIANGLE_STRIP:
247       num_vertices_per_primitive = 3;
248       break;
249    default:
250       unreachable("Invalid GS output primitive.");
251       break;
252    }
253 
254    nir_shader_instructions_pass(nir, lower_legacy_gs_intrinsic,
255                                 nir_metadata_control_flow, &s);
256 
257    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
258 
259    nir_builder builder = nir_builder_at(nir_after_impl(impl));
260    nir_builder *b = &builder;
261 
262    /* Emit shader query for mix use legacy/NGG GS */
263    bool progress = ac_nir_gs_shader_query(b,
264                                           has_gen_prim_query,
265                                           has_pipeline_stats_query,
266                                           has_pipeline_stats_query,
267                                           num_vertices_per_primitive,
268                                           64,
269                                           s.vertex_count,
270                                           s.primitive_count);
271 
272    /* Wait for all stores to finish. */
273    nir_barrier(b, .execution_scope = SCOPE_INVOCATION,
274                       .memory_scope = SCOPE_DEVICE,
275                       .memory_semantics = NIR_MEMORY_RELEASE,
276                       .memory_modes = nir_var_shader_out | nir_var_mem_ssbo |
277                                       nir_var_mem_global | nir_var_image);
278 
279    /* Signal that the GS is done. */
280    nir_sendmsg_amd(b, nir_load_gs_wave_id_amd(b),
281                    .base = AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE);
282 
283    if (progress)
284       nir_metadata_preserve(impl, nir_metadata_none);
285 }
286