1 /*
2 * Copyright 2023 Pavel Ondračka <pavel.ondracka@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "r300_nir.h"
24
25 #include "compiler/nir/nir_builder.h"
26 #include "r300_screen.h"
27
28 bool
r300_is_only_used_as_float(const nir_alu_instr * instr)29 r300_is_only_used_as_float(const nir_alu_instr *instr)
30 {
31 nir_foreach_use(src, &instr->def) {
32 if (nir_src_is_if(src))
33 return false;
34
35 nir_instr *user_instr = nir_src_parent_instr(src);
36 if (user_instr->type == nir_instr_type_alu) {
37 nir_alu_instr *alu = nir_instr_as_alu(user_instr);
38 switch (alu->op) {
39 case nir_op_mov:
40 case nir_op_vec2:
41 case nir_op_vec3:
42 case nir_op_vec4:
43 case nir_op_bcsel:
44 case nir_op_b32csel:
45 if (!r300_is_only_used_as_float(alu))
46 return false;
47 break;
48 default:
49 break;
50 }
51
52 const nir_op_info *info = &nir_op_infos[alu->op];
53 nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
54 int src_idx = alu_src - &alu->src[0];
55 if ((info->input_types[src_idx] & nir_type_int) ||
56 (info->input_types[src_idx] & nir_type_bool))
57 return false;
58 }
59 }
60 return true;
61 }
62
63 static unsigned char
r300_should_vectorize_instr(const nir_instr * instr,const void * data)64 r300_should_vectorize_instr(const nir_instr *instr, const void *data)
65 {
66 if (instr->type != nir_instr_type_alu)
67 return 0;
68
69 return 4;
70 }
71
72 static bool
r300_should_vectorize_io(unsigned align,unsigned bit_size,unsigned num_components,unsigned high_offset,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)73 r300_should_vectorize_io(unsigned align, unsigned bit_size,
74 unsigned num_components, unsigned high_offset,
75 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
76 void *data)
77 {
78 if (bit_size != 32)
79 return false;
80
81 /* Our offset alignment should aways be at least 4 bytes */
82 if (align < 4)
83 return false;
84
85 /* No wrapping off the end of a TGSI reg. We could do a bit better by
86 * looking at low's actual offset. XXX: With LOAD_CONSTBUF maybe we don't
87 * need this restriction.
88 */
89 unsigned worst_start_component = align == 4 ? 3 : align / 4;
90 if (worst_start_component + num_components > 4)
91 return false;
92
93 return true;
94 }
95
96 static bool
set_speculate(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * _)97 set_speculate(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_)
98 {
99 if (intr->intrinsic == nir_intrinsic_load_ubo_vec4) {
100 nir_intrinsic_set_access(intr, nir_intrinsic_access(intr) | ACCESS_CAN_SPECULATE);
101 return true;
102 }
103 return false;
104 }
105
106 static void
r300_optimize_nir(struct nir_shader * s,struct pipe_screen * screen)107 r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
108 {
109 bool is_r500 = r300_screen(screen)->caps.is_r500;
110
111 bool progress;
112 do {
113 progress = false;
114
115 NIR_PASS_V(s, nir_lower_vars_to_ssa);
116
117 NIR_PASS(progress, s, nir_copy_prop);
118 NIR_PASS(progress, s, r300_nir_lower_flrp);
119 NIR_PASS(progress, s, nir_opt_algebraic);
120 if (s->info.stage == MESA_SHADER_VERTEX) {
121 if (!is_r500)
122 NIR_PASS(progress, s, r300_nir_lower_bool_to_float);
123 NIR_PASS(progress, s, r300_nir_fuse_fround_d3d9);
124 }
125 NIR_PASS(progress, s, nir_opt_constant_folding);
126 NIR_PASS(progress, s, nir_opt_remove_phis);
127 NIR_PASS(progress, s, nir_opt_conditional_discard);
128 NIR_PASS(progress, s, nir_opt_dce);
129 NIR_PASS(progress, s, nir_opt_dead_cf);
130 NIR_PASS(progress, s, nir_opt_cse);
131 NIR_PASS(progress, s, nir_opt_find_array_copies);
132 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
133 NIR_PASS(progress, s, nir_opt_dead_write_vars);
134
135 NIR_PASS(progress, s, nir_opt_if, nir_opt_if_optimize_phi_true_false);
136 if (is_r500)
137 nir_shader_intrinsics_pass(s, set_speculate,
138 nir_metadata_block_index |
139 nir_metadata_dominance, NULL);
140 NIR_PASS(progress, s, nir_opt_peephole_select, is_r500 ? 8 : ~0, true, true);
141 if (s->info.stage == MESA_SHADER_FRAGMENT) {
142 NIR_PASS(progress, s, r300_nir_lower_bool_to_float_fs);
143 }
144 NIR_PASS(progress, s, nir_opt_algebraic);
145 NIR_PASS(progress, s, nir_opt_constant_folding);
146 nir_load_store_vectorize_options vectorize_opts = {
147 .modes = nir_var_mem_ubo,
148 .callback = r300_should_vectorize_io,
149 .robust_modes = 0,
150 };
151 NIR_PASS(progress, s, nir_opt_load_store_vectorize, &vectorize_opts);
152 NIR_PASS(progress, s, nir_opt_shrink_stores, true);
153 NIR_PASS(progress, s, nir_opt_shrink_vectors);
154 NIR_PASS(progress, s, nir_opt_loop);
155 NIR_PASS(progress, s, nir_opt_vectorize, r300_should_vectorize_instr, NULL);
156 NIR_PASS(progress, s, nir_opt_undef);
157 if(!progress)
158 NIR_PASS(progress, s, nir_lower_undef_to_zero);
159 NIR_PASS(progress, s, nir_opt_loop_unroll);
160
161 /* Try to fold addressing math into ubo_vec4's base to avoid load_consts
162 * and ALU ops for it.
163 */
164 nir_opt_offsets_options offset_options = {
165 .ubo_vec4_max = 255,
166
167 /* No const offset in TGSI for shared accesses. */
168 .shared_max = 0,
169
170 /* unused intrinsics */
171 .uniform_max = 0,
172 .buffer_max = 0,
173 };
174
175 NIR_PASS(progress, s, nir_opt_offsets, &offset_options);
176 } while (progress);
177
178 NIR_PASS_V(s, nir_lower_var_copies);
179 NIR_PASS(progress, s, nir_remove_dead_variables, nir_var_function_temp,
180 NULL);
181 }
182
r300_check_control_flow(nir_shader * s)183 static char *r300_check_control_flow(nir_shader *s)
184 {
185 nir_function_impl *impl = nir_shader_get_entrypoint(s);
186 nir_block *first = nir_start_block(impl);
187 nir_cf_node *next = nir_cf_node_next(&first->cf_node);
188
189 if (next) {
190 switch (next->type) {
191 case nir_cf_node_if:
192 return "If/then statements not supported by R300/R400 shaders, should have been flattened by peephole_select.";
193 case nir_cf_node_loop:
194 return "Looping not supported R300/R400 shaders, all loops must be statically unrollable.";
195 default:
196 return "Unknown control flow type";
197 }
198 }
199
200 return NULL;
201 }
202
203 char *
r300_finalize_nir(struct pipe_screen * pscreen,void * nir)204 r300_finalize_nir(struct pipe_screen *pscreen, void *nir)
205 {
206 nir_shader *s = nir;
207
208 r300_optimize_nir(s, pscreen);
209
210 /* st_program.c's parameter list optimization requires that future nir
211 * variants don't reallocate the uniform storage, so we have to remove
212 * uniforms that occupy storage. But we don't want to remove samplers,
213 * because they're needed for YUV variant lowering.
214 */
215 nir_remove_dead_derefs(s);
216 nir_foreach_uniform_variable_safe(var, s) {
217 if (var->data.mode == nir_var_uniform &&
218 (glsl_type_get_image_count(var->type) ||
219 glsl_type_get_sampler_count(var->type)))
220 continue;
221
222 exec_node_remove(&var->node);
223 }
224 nir_validate_shader(s, "after uniform var removal");
225
226 nir_sweep(s);
227
228 if (!r300_screen(pscreen)->caps.is_r500 &&
229 (r300_screen(pscreen)->caps.has_tcl || s->info.stage == MESA_SHADER_FRAGMENT)) {
230 char *msg = r300_check_control_flow(s);
231 if (msg)
232 return strdup(msg);
233 }
234
235 return NULL;
236 }
237