• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2023 Pavel Ondračka <pavel.ondracka@gmail.com>
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "r300_nir.h"
7 
8 #include "compiler/nir/nir_builder.h"
9 #include "r300_screen.h"
10 
11 bool
r300_is_only_used_as_float(const nir_alu_instr * instr)12 r300_is_only_used_as_float(const nir_alu_instr *instr)
13 {
14    nir_foreach_use (src, &instr->def) {
15       if (nir_src_is_if(src))
16          return false;
17 
18       nir_instr *user_instr = nir_src_parent_instr(src);
19       if (user_instr->type == nir_instr_type_alu) {
20          nir_alu_instr *alu = nir_instr_as_alu(user_instr);
21          switch (alu->op) {
22          case nir_op_mov:
23          case nir_op_vec2:
24          case nir_op_vec3:
25          case nir_op_vec4:
26          case nir_op_bcsel:
27          case nir_op_b32csel:
28             if (!r300_is_only_used_as_float(alu))
29                return false;
30             break;
31          default:
32             break;
33          }
34 
35          const nir_op_info *info = &nir_op_infos[alu->op];
36          nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
37          int src_idx = alu_src - &alu->src[0];
38          if ((info->input_types[src_idx] & nir_type_int) ||
39              (info->input_types[src_idx] & nir_type_bool))
40             return false;
41       }
42    }
43    return true;
44 }
45 
46 static unsigned char
r300_should_vectorize_instr(const nir_instr * instr,const void * data)47 r300_should_vectorize_instr(const nir_instr *instr, const void *data)
48 {
49    bool *too_many_ubos = (bool *)data;
50 
51    if (instr->type != nir_instr_type_alu)
52       return 0;
53 
54    /* Vectorization can make the constant layout worse and increase
55     * the constant register usage. The worst scenario is vectorization
56     * of lowered indirect register access, where we access i-th element
57     * and later we access i-1 or i+1 (most notably glamor and gsk shaders).
58     * In this case we already added constants 1..n where n is the array
59     * size, however we can reuse them unless the lowered ladder gets
60     * vectorized later.
61     *
62     * Thus prevent vectorization of the specific patterns from lowered
63     * indirect access.
64     *
65     * This is quite a heavy hammer, we could in theory estimate how many
66     * slots will the current ubos and constants need and only disable
67     * vectorization when we are close to the limit. However, this would
68     * likely need a global shader analysis each time r300_should_vectorize_inst
69     * is called, which we want to avoid.
70     *
71     * So for now just don't vectorize anything that loads constants.
72     */
73    if (*too_many_ubos) {
74       nir_alu_instr *alu = nir_instr_as_alu(instr);
75       unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
76       for (unsigned i = 0; i < num_srcs; i++) {
77          if (nir_src_is_const(alu->src[i].src)) {
78             return 0;
79          }
80       }
81    }
82 
83    return 4;
84 }
85 
86 /* R300 and R400 have just 32 vec4 constant register slots in fs.
87  * Therefore, while its possible we will be able to compact some of
88  * the constants later, we need to be extra careful with adding
89  * new constants anyway.
90  */
91 static bool
have_too_many_ubos(nir_shader * s,bool is_r500)92 have_too_many_ubos(nir_shader *s, bool is_r500)
93 {
94    if (s->info.stage != MESA_SHADER_FRAGMENT)
95       return false;
96 
97    if (is_r500)
98       return false;
99 
100    nir_foreach_variable_with_modes (var, s, nir_var_mem_ubo) {
101       int ubo = var->data.driver_location;
102       assert(ubo == 0);
103 
104       unsigned size = glsl_get_explicit_size(var->interface_type, false);
105       if (DIV_ROUND_UP(size, 16) > 32)
106          return true;
107    }
108 
109    return false;
110 }
111 
112 static bool
set_speculate(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * _)113 set_speculate(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_)
114 {
115    if (intr->intrinsic == nir_intrinsic_load_ubo_vec4) {
116       nir_intrinsic_set_access(intr, nir_intrinsic_access(intr) | ACCESS_CAN_SPECULATE);
117       return true;
118    }
119    return false;
120 }
121 
122 static bool
remove_clip_vertex(nir_builder * b,nir_instr * instr,UNUSED void * _)123 remove_clip_vertex(nir_builder *b, nir_instr *instr, UNUSED void *_)
124 {
125    if (instr->type != nir_instr_type_deref)
126       return false;
127    nir_deref_instr *deref = nir_instr_as_deref(instr);
128    if (deref->deref_type == nir_deref_type_var &&
129        deref->var->data.mode == nir_var_shader_out &&
130        deref->var->data.location == VARYING_SLOT_CLIP_VERTEX) {
131        nir_foreach_use_safe(src, &deref->def) {
132           nir_instr_remove(nir_src_parent_instr(src));
133        }
134        nir_instr_remove(instr);
135        return true;
136    }
137    return false;
138 }
139 
140 static void
r300_optimize_nir(struct nir_shader * s,struct pipe_screen * screen)141 r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
142 {
143    bool is_r500 = r300_screen(screen)->caps.is_r500;
144 
145    bool progress;
146    if (s->info.stage == MESA_SHADER_VERTEX && r300_screen(screen)->caps.has_tcl) {
147       /* There is no HW support for gl_ClipVertex, so we just remove it early. */
148       if (nir_shader_instructions_pass(s, remove_clip_vertex,
149                                        nir_metadata_control_flow, NULL)) {
150          unsigned clip_vertex_location = 0;
151          nir_foreach_variable_with_modes(var, s, nir_var_shader_out) {
152             if (var->data.location == VARYING_SLOT_CLIP_VERTEX) {
153                clip_vertex_location = var->data.driver_location;
154             }
155          }
156          nir_foreach_variable_with_modes(var, s, nir_var_shader_out) {
157             if (var->data.driver_location > clip_vertex_location) {
158                var->data.driver_location--;
159             }
160          }
161          NIR_PASS_V(s, nir_remove_dead_variables, nir_var_shader_out, NULL);
162          fprintf(stderr, "r300: no HW support for clip vertex, expect misrendering.\n");
163          fprintf(stderr, "r300: software emulation can be enabled with RADEON_DEBUG=notcl.\n");
164       }
165    }
166 
167    do {
168       progress = false;
169 
170       NIR_PASS_V(s, nir_lower_vars_to_ssa);
171 
172       NIR_PASS(progress, s, nir_copy_prop);
173       NIR_PASS(progress, s, r300_nir_lower_flrp);
174       NIR_PASS(progress, s, nir_opt_algebraic);
175       if (s->info.stage == MESA_SHADER_VERTEX) {
176          if (!is_r500)
177             NIR_PASS(progress, s, r300_nir_lower_bool_to_float);
178          NIR_PASS(progress, s, r300_nir_fuse_fround_d3d9);
179       }
180       NIR_PASS(progress, s, nir_opt_constant_folding);
181       NIR_PASS(progress, s, nir_opt_remove_phis);
182       NIR_PASS(progress, s, nir_opt_conditional_discard);
183       NIR_PASS(progress, s, nir_opt_dce);
184       NIR_PASS(progress, s, nir_opt_dead_cf);
185       NIR_PASS(progress, s, nir_opt_cse);
186       NIR_PASS(progress, s, nir_opt_find_array_copies);
187       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
188       NIR_PASS(progress, s, nir_opt_dead_write_vars);
189 
190       NIR_PASS(progress, s, nir_opt_if, nir_opt_if_optimize_phi_true_false);
191       if (is_r500)
192          nir_shader_intrinsics_pass(s, set_speculate, nir_metadata_control_flow, NULL);
193       NIR_PASS(progress, s, nir_opt_peephole_select, is_r500 ? 8 : ~0, true, true);
194       if (s->info.stage == MESA_SHADER_FRAGMENT) {
195          NIR_PASS(progress, s, r300_nir_lower_bool_to_float_fs);
196       }
197       NIR_PASS(progress, s, nir_opt_algebraic);
198       NIR_PASS(progress, s, nir_opt_constant_folding);
199       NIR_PASS(progress, s, nir_opt_shrink_stores, true);
200       NIR_PASS(progress, s, nir_opt_shrink_vectors, false);
201       NIR_PASS(progress, s, nir_opt_loop);
202 
203       bool too_many_ubos = have_too_many_ubos(s, is_r500);
204       NIR_PASS(progress, s, nir_opt_vectorize, r300_should_vectorize_instr, &too_many_ubos);
205       NIR_PASS(progress, s, nir_opt_undef);
206       if (!progress)
207          NIR_PASS(progress, s, nir_lower_undef_to_zero);
208       NIR_PASS(progress, s, nir_opt_loop_unroll);
209 
210       /* Try to fold addressing math into ubo_vec4's base to avoid load_consts
211        * and ALU ops for it.
212        */
213       nir_opt_offsets_options offset_options = {
214          .ubo_vec4_max = 255,
215 
216          /* No const offset in TGSI for shared accesses. */
217          .shared_max = 0,
218 
219          /* unused intrinsics */
220          .uniform_max = 0,
221          .buffer_max = 0,
222       };
223 
224       NIR_PASS(progress, s, nir_opt_offsets, &offset_options);
225    } while (progress);
226 
227    NIR_PASS_V(s, nir_lower_var_copies);
228    NIR_PASS(progress, s, nir_remove_dead_variables, nir_var_function_temp, NULL);
229 }
230 
231 static char *
r300_check_control_flow(nir_shader * s)232 r300_check_control_flow(nir_shader *s)
233 {
234    nir_function_impl *impl = nir_shader_get_entrypoint(s);
235    nir_block *first = nir_start_block(impl);
236    nir_cf_node *next = nir_cf_node_next(&first->cf_node);
237 
238    if (next) {
239       switch (next->type) {
240       case nir_cf_node_if:
241          return "If/then statements not supported by R300/R400 shaders, should have been "
242                 "flattened by peephole_select.";
243       case nir_cf_node_loop:
244          return "Looping not supported R300/R400 shaders, all loops must be statically "
245                 "unrollable.";
246       default:
247          return "Unknown control flow type";
248       }
249    }
250 
251    return NULL;
252 }
253 
254 char *
r300_finalize_nir(struct pipe_screen * pscreen,struct nir_shader * s)255 r300_finalize_nir(struct pipe_screen *pscreen, struct nir_shader *s)
256 {
257    r300_optimize_nir(s, pscreen);
258 
259    /* st_program.c's parameter list optimization requires that future nir
260     * variants don't reallocate the uniform storage, so we have to remove
261     * uniforms that occupy storage.  But we don't want to remove samplers,
262     * because they're needed for YUV variant lowering.
263     */
264    nir_remove_dead_derefs(s);
265    nir_foreach_uniform_variable_safe (var, s) {
266       if (var->data.mode == nir_var_uniform &&
267           (glsl_type_get_image_count(var->type) || glsl_type_get_sampler_count(var->type)))
268          continue;
269 
270       exec_node_remove(&var->node);
271    }
272    nir_validate_shader(s, "after uniform var removal");
273 
274    nir_sweep(s);
275 
276    if (!r300_screen(pscreen)->caps.is_r500 &&
277        (r300_screen(pscreen)->caps.has_tcl || s->info.stage == MESA_SHADER_FRAGMENT)) {
278       char *msg = r300_check_control_flow(s);
279       if (msg)
280          return strdup(msg);
281    }
282 
283    return NULL;
284 }
285