1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/mesa-sha1.h"
27 #include <math.h>
28
29 /** @file nir_opt_undef.c
30 *
31 * Handles optimization of operations involving ssa_undef.
32 */
33
34 struct undef_options {
35 bool disallow_undef_to_nan;
36 };
37
38 /**
39 * Turn conditional selects between an undef and some other value into a move
40 * of that other value (on the assumption that the condition's going to be
41 * choosing the defined value). This reduces work after if flattening when
42 * each side of the if is defining a variable.
43 */
44 static bool
opt_undef_csel(nir_builder * b,nir_alu_instr * instr)45 opt_undef_csel(nir_builder *b, nir_alu_instr *instr)
46 {
47 if (!nir_op_is_selection(instr->op))
48 return false;
49
50 for (int i = 1; i <= 2; i++) {
51 nir_instr *parent = instr->src[i].src.ssa->parent_instr;
52 if (parent->type != nir_instr_type_undef)
53 continue;
54
55 b->cursor = nir_instr_remove(&instr->instr);
56 nir_def *mov = nir_mov_alu(b, instr->src[i == 1 ? 2 : 1],
57 instr->def.num_components);
58 nir_def_rewrite_uses(&instr->def, mov);
59
60 return true;
61 }
62
63 return false;
64 }
65
66 /**
67 * Replace vecN(undef, undef, ...) with a single undef.
68 */
69 static bool
opt_undef_vecN(nir_builder * b,nir_alu_instr * alu)70 opt_undef_vecN(nir_builder *b, nir_alu_instr *alu)
71 {
72 if (!nir_op_is_vec_or_mov(alu->op))
73 return false;
74
75 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
76 if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_undef)
77 return false;
78 }
79
80 b->cursor = nir_before_instr(&alu->instr);
81 nir_def *undef = nir_undef(b, alu->def.num_components,
82 alu->def.bit_size);
83 nir_def_rewrite_uses(&alu->def, undef);
84
85 return true;
86 }
87
88 static uint32_t
nir_get_undef_mask(nir_def * def)89 nir_get_undef_mask(nir_def *def)
90 {
91 nir_instr *instr = def->parent_instr;
92
93 if (instr->type == nir_instr_type_undef)
94 return BITSET_MASK(def->num_components);
95
96 if (instr->type != nir_instr_type_alu)
97 return 0;
98
99 nir_alu_instr *alu = nir_instr_as_alu(instr);
100 unsigned undef = 0;
101
102 /* nir_op_mov of undef is handled by opt_undef_vecN() */
103 if (nir_op_is_vec(alu->op)) {
104 for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
105 if (alu->src[i].src.ssa->parent_instr->type ==
106 nir_instr_type_undef) {
107 undef |= BITSET_MASK(nir_ssa_alu_instr_src_components(alu, i)) << i;
108 }
109 }
110 }
111
112 return undef;
113 }
114
115 /**
116 * Remove any store intrinsic writemask channels whose value is undefined (the
117 * existing value is a fine representation of "undefined").
118 */
119 static bool
opt_undef_store(nir_intrinsic_instr * intrin)120 opt_undef_store(nir_intrinsic_instr *intrin)
121 {
122 int arg_index;
123 switch (intrin->intrinsic) {
124 case nir_intrinsic_store_deref:
125 arg_index = 1;
126 break;
127 case nir_intrinsic_store_output:
128 case nir_intrinsic_store_per_vertex_output:
129 case nir_intrinsic_store_per_primitive_output:
130 case nir_intrinsic_store_ssbo:
131 case nir_intrinsic_store_shared:
132 case nir_intrinsic_store_global:
133 case nir_intrinsic_store_scratch:
134 arg_index = 0;
135 break;
136 default:
137 return false;
138 }
139
140 nir_def *def = intrin->src[arg_index].ssa;
141
142 unsigned write_mask = nir_intrinsic_write_mask(intrin);
143 unsigned undef_mask = nir_get_undef_mask(def);
144
145 if (!(write_mask & undef_mask))
146 return false;
147
148 write_mask &= ~undef_mask;
149 if (!write_mask)
150 nir_instr_remove(&intrin->instr);
151 else
152 nir_intrinsic_set_write_mask(intrin, write_mask);
153
154 return true;
155 }
156
157 struct visit_info {
158 bool replace_undef_with_constant;
159 bool prefer_nan;
160 bool must_keep_undef;
161 };
162
163 /**
164 * Analyze an undef use to see if replacing undef with a constant is
165 * beneficial.
166 */
167 static void
visit_undef_use(nir_src * src,struct visit_info * info)168 visit_undef_use(nir_src *src, struct visit_info *info)
169 {
170 if (nir_src_is_if(src)) {
171 /* If the use is "if", keep undef because the branch will be eliminated
172 * by nir_opt_dead_cf.
173 */
174 info->must_keep_undef = true;
175 return;
176 }
177
178 nir_instr *instr = nir_src_parent_instr(src);
179
180 if (instr->type == nir_instr_type_alu) {
181 /* Replacing undef with a constant is only beneficial with ALU
182 * instructions because it can eliminate them or simplify them.
183 */
184 nir_alu_instr *alu = nir_instr_as_alu(instr);
185
186 /* Follow movs and vecs.
187 *
188 * Note that all vector component uses are followed and swizzles are
189 * ignored.
190 */
191 if (alu->op == nir_op_mov || nir_op_is_vec(alu->op)) {
192 nir_foreach_use_including_if(next_src, &alu->def) {
193 visit_undef_use(next_src, info);
194 }
195 return;
196 }
197
198 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
199
200 for (unsigned i = 0; i < num_srcs; i++) {
201 if (&alu->src[i].src != src)
202 continue;
203
204 if (nir_op_is_selection(alu->op) && i != 0) {
205 /* nir_opt_algebraic can eliminate a select opcode only if src0 is
206 * a constant. If the undef use is src1 or src2, it will be
207 * handled by opt_undef_csel.
208 */
209 continue;
210 }
211
212 info->replace_undef_with_constant = true;
213 if (nir_op_infos[alu->op].input_types[i] & nir_type_float &&
214 alu->op != nir_op_fmulz &&
215 (alu->op != nir_op_ffmaz || i == 2))
216 info->prefer_nan = true;
217 }
218 } else {
219 /* If the use is not ALU, don't replace undef. We need to preserve
220 * undef for stores and phis because those are handled differently,
221 * and replacing undef with a constant would result in worse code.
222 */
223 info->must_keep_undef = true;
224 return;
225 }
226 }
227
228 /**
229 * Replace ssa_undef used by ALU opcodes with 0 or NaN, whichever eliminates
230 * more code.
231 *
232 * Replace it with NaN if an FP opcode uses undef, which will cause the opcode
233 * to be eliminated by nir_opt_algebraic. 0 would not eliminate the FP opcode.
234 */
235 static bool
replace_ssa_undef(nir_builder * b,nir_instr * instr,const struct undef_options * options)236 replace_ssa_undef(nir_builder *b, nir_instr *instr,
237 const struct undef_options *options)
238 {
239 nir_undef_instr *undef = nir_instr_as_undef(instr);
240 struct visit_info info = {0};
241
242 nir_foreach_use_including_if(src, &undef->def) {
243 visit_undef_use(src, &info);
244 }
245
246 if (info.must_keep_undef || !info.replace_undef_with_constant)
247 return false;
248
249 b->cursor = nir_before_instr(&undef->instr);
250 nir_def *replacement;
251
252 /* If undef is used as float, replace it with NaN, which will
253 * eliminate all FP instructions that consume it. Else, replace it
254 * with 0, which is more likely to eliminate non-FP instructions.
255 */
256 if (info.prefer_nan && !options->disallow_undef_to_nan)
257 replacement = nir_imm_floatN_t(b, NAN, undef->def.bit_size);
258 else
259 replacement = nir_imm_intN_t(b, 0, undef->def.bit_size);
260
261 if (undef->def.num_components > 1)
262 replacement = nir_replicate(b, replacement, undef->def.num_components);
263
264 nir_def_rewrite_uses_after(&undef->def, replacement, &undef->instr);
265 nir_instr_remove(&undef->instr);
266 return true;
267 }
268
269 static bool
nir_opt_undef_instr(nir_builder * b,nir_instr * instr,void * data)270 nir_opt_undef_instr(nir_builder *b, nir_instr *instr, void *data)
271 {
272 const struct undef_options *options = data;
273
274 if (instr->type == nir_instr_type_undef) {
275 return replace_ssa_undef(b, instr, options);
276 } else if (instr->type == nir_instr_type_alu) {
277 nir_alu_instr *alu = nir_instr_as_alu(instr);
278 return opt_undef_csel(b, alu) ||
279 opt_undef_vecN(b, alu);
280 } else if (instr->type == nir_instr_type_intrinsic) {
281 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
282 return opt_undef_store(intrin);
283 }
284
285 return false;
286 }
287
288 bool
nir_opt_undef(nir_shader * shader)289 nir_opt_undef(nir_shader *shader)
290 {
291 struct undef_options options = {0};
292
293 /* Disallow the undef->NaN transformation only for those shaders where
294 * it's known to break rendering. These are shader source SHA1s printed by
295 * nir_print_shader().
296 */
297 uint32_t shader_sha1s[][SHA1_DIGEST_LENGTH32] = {
298 /* gputest/gimark */
299 {0x9a1af9e2, 0x68f185bf, 0x11fc1257, 0x1102e80b, 0x5ca350fa},
300
301 /* Viewperf13/CATIA_car_01 */
302 {0x4746a4a4, 0xe3b27d27, 0xe6d2b0fb, 0xb7e9ceb3, 0x973e6152}, /* Taillights */
303 {0xc49cc90d, 0xd7208212, 0x726502ea, 0xe1fe62c0, 0xb62fbd1f}, /* Grill */
304 {0xde23f35b, 0xb6fa45ae, 0x96da7e6b, 0x5a6e4a60, 0xce0b6b31}, /* Headlights */
305 {0xdf36242c, 0x0705db59, 0xf1ddac9b, 0xcd1c8466, 0x4c73203b}, /* Rims */
306
307 /* Viewperf13/CATIA_car_04 */
308 {0x631da72a, 0xc971e849, 0xd6489a15, 0xf7c8dddb, 0xe8efd982}, /* Headlights */
309 {0x85984b88, 0xd16b8fee, 0x0d49d97b, 0x5f6cc66e, 0xadcafad9}, /* Rims */
310 {0xad023488, 0x09930735, 0xb0567e58, 0x336dce36, 0xe3c1e448}, /* Tires */
311 {0xdcc4a549, 0x587873fa, 0xeed94361, 0x9a47cbff, 0x846d0167}, /* Windows */
312 {0xfa0074a2, 0xef868430, 0x87935a0c, 0x19bc96be, 0xb5b95c74}, /* Body */
313 };
314
315 for (unsigned i = 0; i < ARRAY_SIZE(shader_sha1s); i++) {
316 if (_mesa_printed_sha1_equal(shader->info.source_sha1, shader_sha1s[i])) {
317 options.disallow_undef_to_nan = true;
318 break;
319 }
320 }
321
322 return nir_shader_instructions_pass(shader,
323 nir_opt_undef_instr,
324 nir_metadata_block_index |
325 nir_metadata_dominance,
326 &options);
327 }
328