• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2023 Pavel Ondračka <pavel.ondracka@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22 
23 #include "r300_nir.h"
24 
25 #include "compiler/nir/nir_builder.h"
26 #include "r300_screen.h"
27 
28 bool
r300_is_only_used_as_float(const nir_alu_instr * instr)29 r300_is_only_used_as_float(const nir_alu_instr *instr)
30 {
31    nir_foreach_use(src, &instr->def) {
32       if (nir_src_is_if(src))
33          return false;
34 
35       nir_instr *user_instr = nir_src_parent_instr(src);
36       if (user_instr->type == nir_instr_type_alu) {
37          nir_alu_instr *alu = nir_instr_as_alu(user_instr);
38          switch (alu->op) {
39          case nir_op_mov:
40          case nir_op_vec2:
41          case nir_op_vec3:
42          case nir_op_vec4:
43          case nir_op_bcsel:
44          case nir_op_b32csel:
45             if (!r300_is_only_used_as_float(alu))
46                return false;
47             break;
48          default:
49 	    break;
50          }
51 
52          const nir_op_info *info = &nir_op_infos[alu->op];
53          nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
54          int src_idx = alu_src - &alu->src[0];
55          if ((info->input_types[src_idx] & nir_type_int) ||
56              (info->input_types[src_idx] & nir_type_bool))
57             return false;
58       }
59    }
60    return true;
61 }
62 
63 static unsigned char
r300_should_vectorize_instr(const nir_instr * instr,const void * data)64 r300_should_vectorize_instr(const nir_instr *instr, const void *data)
65 {
66    if (instr->type != nir_instr_type_alu)
67       return 0;
68 
69    return 4;
70 }
71 
72 static bool
r300_should_vectorize_io(unsigned align,unsigned bit_size,unsigned num_components,unsigned high_offset,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)73 r300_should_vectorize_io(unsigned align, unsigned bit_size,
74                         unsigned num_components, unsigned high_offset,
75                         nir_intrinsic_instr *low, nir_intrinsic_instr *high,
76                         void *data)
77 {
78    if (bit_size != 32)
79       return false;
80 
81    /* Our offset alignment should aways be at least 4 bytes */
82    if (align < 4)
83       return false;
84 
85    /* No wrapping off the end of a TGSI reg.  We could do a bit better by
86     * looking at low's actual offset.  XXX: With LOAD_CONSTBUF maybe we don't
87     * need this restriction.
88     */
89    unsigned worst_start_component = align == 4 ? 3 : align / 4;
90    if (worst_start_component + num_components > 4)
91       return false;
92 
93    return true;
94 }
95 
96 static bool
set_speculate(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * _)97 set_speculate(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_)
98 {
99    if (intr->intrinsic == nir_intrinsic_load_ubo_vec4) {
100       nir_intrinsic_set_access(intr, nir_intrinsic_access(intr) | ACCESS_CAN_SPECULATE);
101       return true;
102    }
103    return false;
104 }
105 
106 static void
r300_optimize_nir(struct nir_shader * s,struct pipe_screen * screen)107 r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
108 {
109    bool is_r500 = r300_screen(screen)->caps.is_r500;
110 
111    bool progress;
112    do {
113       progress = false;
114 
115       NIR_PASS_V(s, nir_lower_vars_to_ssa);
116 
117       NIR_PASS(progress, s, nir_copy_prop);
118       NIR_PASS(progress, s, r300_nir_lower_flrp);
119       NIR_PASS(progress, s, nir_opt_algebraic);
120       if (s->info.stage == MESA_SHADER_VERTEX) {
121          if (!is_r500)
122             NIR_PASS(progress, s, r300_nir_lower_bool_to_float);
123          NIR_PASS(progress, s, r300_nir_fuse_fround_d3d9);
124       }
125       NIR_PASS(progress, s, nir_opt_constant_folding);
126       NIR_PASS(progress, s, nir_opt_remove_phis);
127       NIR_PASS(progress, s, nir_opt_conditional_discard);
128       NIR_PASS(progress, s, nir_opt_dce);
129       NIR_PASS(progress, s, nir_opt_dead_cf);
130       NIR_PASS(progress, s, nir_opt_cse);
131       NIR_PASS(progress, s, nir_opt_find_array_copies);
132       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
133       NIR_PASS(progress, s, nir_opt_dead_write_vars);
134 
135       NIR_PASS(progress, s, nir_opt_if, nir_opt_if_optimize_phi_true_false);
136       if (is_r500)
137          nir_shader_intrinsics_pass(s, set_speculate,
138                                     nir_metadata_block_index |
139                                     nir_metadata_dominance, NULL);
140       NIR_PASS(progress, s, nir_opt_peephole_select, is_r500 ? 8 : ~0, true, true);
141       if (s->info.stage == MESA_SHADER_FRAGMENT) {
142          NIR_PASS(progress, s, r300_nir_lower_bool_to_float_fs);
143       }
144       NIR_PASS(progress, s, nir_opt_algebraic);
145       NIR_PASS(progress, s, nir_opt_constant_folding);
146       nir_load_store_vectorize_options vectorize_opts = {
147          .modes = nir_var_mem_ubo,
148          .callback = r300_should_vectorize_io,
149          .robust_modes = 0,
150       };
151       NIR_PASS(progress, s, nir_opt_load_store_vectorize, &vectorize_opts);
152       NIR_PASS(progress, s, nir_opt_shrink_stores, true);
153       NIR_PASS(progress, s, nir_opt_shrink_vectors);
154       NIR_PASS(progress, s, nir_opt_loop);
155       NIR_PASS(progress, s, nir_opt_vectorize, r300_should_vectorize_instr, NULL);
156       NIR_PASS(progress, s, nir_opt_undef);
157       if(!progress)
158          NIR_PASS(progress, s, nir_lower_undef_to_zero);
159       NIR_PASS(progress, s, nir_opt_loop_unroll);
160 
161       /* Try to fold addressing math into ubo_vec4's base to avoid load_consts
162        * and ALU ops for it.
163        */
164       nir_opt_offsets_options offset_options = {
165          .ubo_vec4_max = 255,
166 
167          /* No const offset in TGSI for shared accesses. */
168          .shared_max = 0,
169 
170          /* unused intrinsics */
171          .uniform_max = 0,
172          .buffer_max = 0,
173       };
174 
175       NIR_PASS(progress, s, nir_opt_offsets, &offset_options);
176    } while (progress);
177 
178    NIR_PASS_V(s, nir_lower_var_copies);
179    NIR_PASS(progress, s, nir_remove_dead_variables, nir_var_function_temp,
180 			NULL);
181 }
182 
r300_check_control_flow(nir_shader * s)183 static char *r300_check_control_flow(nir_shader *s)
184 {
185    nir_function_impl *impl = nir_shader_get_entrypoint(s);
186    nir_block *first = nir_start_block(impl);
187    nir_cf_node *next = nir_cf_node_next(&first->cf_node);
188 
189    if (next) {
190       switch (next->type) {
191          case nir_cf_node_if:
192             return "If/then statements not supported by R300/R400 shaders, should have been flattened by peephole_select.";
193          case nir_cf_node_loop:
194             return "Looping not supported R300/R400 shaders, all loops must be statically unrollable.";
195          default:
196             return "Unknown control flow type";
197       }
198    }
199 
200    return NULL;
201 }
202 
203 char *
r300_finalize_nir(struct pipe_screen * pscreen,void * nir)204 r300_finalize_nir(struct pipe_screen *pscreen, void *nir)
205 {
206    nir_shader *s = nir;
207 
208    r300_optimize_nir(s, pscreen);
209 
210    /* st_program.c's parameter list optimization requires that future nir
211     * variants don't reallocate the uniform storage, so we have to remove
212     * uniforms that occupy storage.  But we don't want to remove samplers,
213     * because they're needed for YUV variant lowering.
214     */
215    nir_remove_dead_derefs(s);
216    nir_foreach_uniform_variable_safe(var, s) {
217       if (var->data.mode == nir_var_uniform &&
218           (glsl_type_get_image_count(var->type) ||
219            glsl_type_get_sampler_count(var->type)))
220          continue;
221 
222       exec_node_remove(&var->node);
223    }
224    nir_validate_shader(s, "after uniform var removal");
225 
226    nir_sweep(s);
227 
228    if (!r300_screen(pscreen)->caps.is_r500 &&
229        (r300_screen(pscreen)->caps.has_tcl || s->info.stage == MESA_SHADER_FRAGMENT)) {
230       char *msg = r300_check_control_flow(s);
231       if (msg)
232          return strdup(msg);
233    }
234 
235    return NULL;
236 }
237