1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * Copyright © 2023 Valve Corporation
5 *
6 * SPDX-License-Identifier: MIT
7 */
8
9 #include "ac_nir.h"
10 #include "nir.h"
11 #include "nir_builder.h"
12 #include "radv_device.h"
13 #include "radv_nir.h"
14 #include "radv_physical_device.h"
15 #include "radv_shader.h"
16
17 static int
type_size_vec4(const struct glsl_type * type,bool bindless)18 type_size_vec4(const struct glsl_type *type, bool bindless)
19 {
20 return glsl_count_attribute_slots(type, false);
21 }
22
23 void
radv_nir_lower_io_to_scalar_early(nir_shader * nir,nir_variable_mode mask)24 radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
25 {
26 bool progress = false;
27
28 NIR_PASS(progress, nir, nir_lower_io_to_scalar_early, mask);
29 if (progress) {
30 /* Optimize the new vector code and then remove dead vars */
31 NIR_PASS(_, nir, nir_copy_prop);
32 NIR_PASS(_, nir, nir_opt_shrink_vectors, true);
33
34 if (mask & nir_var_shader_out) {
35 /* Optimize swizzled movs of load_const for nir_link_opt_varyings's constant propagation. */
36 NIR_PASS(_, nir, nir_opt_constant_folding);
37
38 /* For nir_link_opt_varyings's duplicate input opt */
39 NIR_PASS(_, nir, nir_opt_cse);
40 }
41
42 /* Run copy-propagation to help remove dead output variables (some shaders have useless copies
43 * to/from an output), so compaction later will be more effective.
44 *
45 * This will have been done earlier but it might not have worked because the outputs were
46 * vector.
47 */
48 if (nir->info.stage == MESA_SHADER_TESS_CTRL)
49 NIR_PASS(_, nir, nir_opt_copy_prop_vars);
50
51 NIR_PASS(_, nir, nir_opt_dce);
52 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
53 }
54 }
55
56 typedef struct {
57 uint64_t always_per_vertex;
58 uint64_t potentially_per_primitive;
59 uint64_t always_per_primitive;
60 unsigned num_always_per_vertex;
61 unsigned num_potentially_per_primitive;
62 } radv_recompute_fs_input_bases_state;
63
64 static bool
radv_recompute_fs_input_bases_callback(UNUSED nir_builder * b,nir_intrinsic_instr * intrin,void * data)65 radv_recompute_fs_input_bases_callback(UNUSED nir_builder *b, nir_intrinsic_instr *intrin, void *data)
66 {
67 const radv_recompute_fs_input_bases_state *s = (const radv_recompute_fs_input_bases_state *)data;
68
69 /* Filter possible FS input intrinsics */
70 switch (intrin->intrinsic) {
71 case nir_intrinsic_load_input:
72 case nir_intrinsic_load_per_primitive_input:
73 case nir_intrinsic_load_interpolated_input:
74 case nir_intrinsic_load_input_vertex:
75 break;
76 default:
77 return false;
78 }
79
80 const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
81 const uint64_t location_bit = BITFIELD64_BIT(sem.location);
82 const uint64_t location_mask = BITFIELD64_MASK(sem.location);
83 const unsigned old_base = nir_intrinsic_base(intrin);
84 unsigned new_base = 0;
85
86 if (location_bit & s->always_per_vertex) {
87 new_base = util_bitcount64(s->always_per_vertex & location_mask);
88 } else if (location_bit & s->potentially_per_primitive) {
89 new_base = s->num_always_per_vertex;
90
91 switch (location_bit) {
92 case VARYING_BIT_VIEWPORT:
93 break;
94 case VARYING_BIT_PRIMITIVE_ID:
95 new_base += !!(s->potentially_per_primitive & VARYING_BIT_VIEWPORT);
96 break;
97 }
98 } else if (location_bit & s->always_per_primitive) {
99 new_base = s->num_always_per_vertex + s->num_potentially_per_primitive +
100 util_bitcount64(s->always_per_primitive & location_mask);
101 } else {
102 unreachable("invalid FS input");
103 }
104
105 if (new_base != old_base) {
106 nir_intrinsic_set_base(intrin, new_base);
107 return true;
108 }
109
110 return false;
111 }
112
113 bool
radv_recompute_fs_input_bases(nir_shader * nir)114 radv_recompute_fs_input_bases(nir_shader *nir)
115 {
116 const uint64_t always_per_vertex = nir->info.inputs_read & ~nir->info.per_primitive_inputs &
117 ~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
118
119 const uint64_t potentially_per_primitive = nir->info.inputs_read & (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);
120
121 const uint64_t always_per_primitive = nir->info.inputs_read & nir->info.per_primitive_inputs &
122 ~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
123
124 radv_recompute_fs_input_bases_state s = {
125 .always_per_vertex = always_per_vertex,
126 .potentially_per_primitive = potentially_per_primitive,
127 .always_per_primitive = always_per_primitive,
128 .num_always_per_vertex = util_bitcount64(always_per_vertex),
129 .num_potentially_per_primitive = util_bitcount64(potentially_per_primitive),
130 };
131
132 return nir_shader_intrinsics_pass(nir, radv_recompute_fs_input_bases_callback, nir_metadata_control_flow, &s);
133 }
134
135 void
radv_nir_lower_io(struct radv_device * device,nir_shader * nir)136 radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
137 {
138 const struct radv_physical_device *pdev = radv_device_physical(device);
139
140 /* The nir_lower_io pass currently cannot handle array deref of vectors.
141 * Call this here to make sure there are no such derefs left in the shader.
142 */
143 NIR_PASS(_, nir, nir_lower_array_deref_of_vec, nir_var_shader_in | nir_var_shader_out, NULL,
144 nir_lower_direct_array_deref_of_vec_load | nir_lower_indirect_array_deref_of_vec_load |
145 nir_lower_direct_array_deref_of_vec_store | nir_lower_indirect_array_deref_of_vec_store);
146
147 if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
148 NIR_PASS(_, nir, nir_vectorize_tess_levels);
149 }
150
151 if (nir->info.stage == MESA_SHADER_VERTEX) {
152 NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
153 NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
154 } else {
155 NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4,
156 nir_lower_io_lower_64bit_to_32 | nir_lower_io_use_interpolated_input_intrinsics);
157 }
158
159 /* This pass needs actual constants */
160 NIR_PASS(_, nir, nir_opt_constant_folding);
161
162 NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
163
164 if (nir->xfb_info) {
165 NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);
166
167 if (pdev->use_ngg_streamout) {
168 /* The total number of shader outputs is required for computing the pervertex LDS size for
169 * VS/TES when lowering NGG streamout.
170 */
171 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
172 }
173 }
174
175 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
176 /* Lower explicit input load intrinsics to sysvals for the layer ID. */
177 NIR_PASS(_, nir, nir_lower_system_values);
178
179 /* Recompute FS input intrinsic bases to assign a location to each FS input.
180 * The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n.
181 */
182 radv_recompute_fs_input_bases(nir);
183 }
184
185 NIR_PASS_V(nir, nir_opt_dce);
186 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in | nir_var_shader_out, NULL);
187 }
188
189 /* IO slot layout for stages that aren't linked. */
190 enum {
191 RADV_IO_SLOT_POS = 0,
192 RADV_IO_SLOT_CLIP_DIST0,
193 RADV_IO_SLOT_CLIP_DIST1,
194 RADV_IO_SLOT_PSIZ,
195 RADV_IO_SLOT_VAR0, /* 0..31 */
196 };
197
198 unsigned
radv_map_io_driver_location(unsigned semantic)199 radv_map_io_driver_location(unsigned semantic)
200 {
201 if ((semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX) ||
202 semantic == VARYING_SLOT_TESS_LEVEL_INNER || semantic == VARYING_SLOT_TESS_LEVEL_OUTER)
203 return ac_shader_io_get_unique_index_patch(semantic);
204
205 switch (semantic) {
206 case VARYING_SLOT_POS:
207 return RADV_IO_SLOT_POS;
208 case VARYING_SLOT_CLIP_DIST0:
209 return RADV_IO_SLOT_CLIP_DIST0;
210 case VARYING_SLOT_CLIP_DIST1:
211 return RADV_IO_SLOT_CLIP_DIST1;
212 case VARYING_SLOT_PSIZ:
213 return RADV_IO_SLOT_PSIZ;
214 default:
215 assert(semantic >= VARYING_SLOT_VAR0 && semantic <= VARYING_SLOT_VAR31);
216 return RADV_IO_SLOT_VAR0 + (semantic - VARYING_SLOT_VAR0);
217 }
218 }
219
220 bool
radv_nir_lower_io_to_mem(struct radv_device * device,struct radv_shader_stage * stage)221 radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage)
222 {
223 const struct radv_physical_device *pdev = radv_device_physical(device);
224 const struct radv_shader_info *info = &stage->info;
225 ac_nir_map_io_driver_location map_input = info->inputs_linked ? NULL : radv_map_io_driver_location;
226 ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location;
227 nir_shader *nir = stage->nir;
228
229 if (nir->info.stage == MESA_SHADER_VERTEX) {
230 if (info->vs.as_ls) {
231 NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, map_output, pdev->info.gfx_level, info->vs.tcs_in_out_eq,
232 info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds);
233 return true;
234 } else if (info->vs.as_es) {
235 NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize, info->gs_inputs_read);
236 return true;
237 }
238 } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
239 NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, pdev->info.gfx_level, info->vs.tcs_in_out_eq,
240 info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds);
241 NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, &info->tcs.info, map_output, pdev->info.gfx_level,
242 info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->wave_size);
243
244 return true;
245 } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
246 NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, map_input);
247
248 if (info->tes.as_es) {
249 NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize, info->gs_inputs_read);
250 }
251
252 return true;
253 } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
254 NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, map_input, pdev->info.gfx_level, false);
255 return true;
256 } else if (nir->info.stage == MESA_SHADER_TASK) {
257 ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries,
258 info->cs.has_query);
259 return true;
260 } else if (nir->info.stage == MESA_SHADER_MESH) {
261 ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries);
262 return true;
263 }
264
265 return false;
266 }
267
268 static bool
radv_nir_lower_draw_id_to_zero_callback(struct nir_builder * b,nir_intrinsic_instr * intrin,UNUSED void * state)269 radv_nir_lower_draw_id_to_zero_callback(struct nir_builder *b, nir_intrinsic_instr *intrin, UNUSED void *state)
270 {
271 if (intrin->intrinsic != nir_intrinsic_load_draw_id)
272 return false;
273
274 nir_def *replacement = nir_imm_zero(b, intrin->def.num_components, intrin->def.bit_size);
275 nir_def_replace(&intrin->def, replacement);
276 nir_instr_free(&intrin->instr);
277
278 return true;
279 }
280
281 bool
radv_nir_lower_draw_id_to_zero(nir_shader * shader)282 radv_nir_lower_draw_id_to_zero(nir_shader *shader)
283 {
284 return nir_shader_intrinsics_pass(shader, radv_nir_lower_draw_id_to_zero_callback, nir_metadata_control_flow, NULL);
285 }
286