• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/mesa-sha1.h"
27 #include <math.h>
28 
29 /** @file nir_opt_undef.c
30  *
31  * Handles optimization of operations involving ssa_undef.
32  */
33 
34 struct undef_options {
35    bool disallow_undef_to_nan;
36 };
37 
38 /**
39  * Turn conditional selects between an undef and some other value into a move
40  * of that other value (on the assumption that the condition's going to be
41  * choosing the defined value).  This reduces work after if flattening when
42  * each side of the if is defining a variable.
43  */
44 static bool
opt_undef_csel(nir_builder * b,nir_alu_instr * instr)45 opt_undef_csel(nir_builder *b, nir_alu_instr *instr)
46 {
47    if (!nir_op_is_selection(instr->op))
48       return false;
49 
50    for (int i = 1; i <= 2; i++) {
51       nir_instr *parent = instr->src[i].src.ssa->parent_instr;
52       if (parent->type != nir_instr_type_undef)
53          continue;
54 
55       b->cursor = nir_instr_remove(&instr->instr);
56       nir_def *mov = nir_mov_alu(b, instr->src[i == 1 ? 2 : 1],
57                                  instr->def.num_components);
58       nir_def_rewrite_uses(&instr->def, mov);
59 
60       return true;
61    }
62 
63    return false;
64 }
65 
66 /**
67  * Replace vecN(undef, undef, ...) with a single undef.
68  */
69 static bool
opt_undef_vecN(nir_builder * b,nir_alu_instr * alu)70 opt_undef_vecN(nir_builder *b, nir_alu_instr *alu)
71 {
72    if (!nir_op_is_vec_or_mov(alu->op))
73       return false;
74 
75    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
76       if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_undef)
77          return false;
78    }
79 
80    b->cursor = nir_before_instr(&alu->instr);
81    nir_def *undef = nir_undef(b, alu->def.num_components,
82                               alu->def.bit_size);
83    nir_def_rewrite_uses(&alu->def, undef);
84 
85    return true;
86 }
87 
88 static uint32_t
nir_get_undef_mask(nir_def * def)89 nir_get_undef_mask(nir_def *def)
90 {
91    nir_instr *instr = def->parent_instr;
92 
93    if (instr->type == nir_instr_type_undef)
94       return BITSET_MASK(def->num_components);
95 
96    if (instr->type != nir_instr_type_alu)
97       return 0;
98 
99    nir_alu_instr *alu = nir_instr_as_alu(instr);
100    unsigned undef = 0;
101 
102    /* nir_op_mov of undef is handled by opt_undef_vecN() */
103    if (nir_op_is_vec(alu->op)) {
104       for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
105          if (alu->src[i].src.ssa->parent_instr->type ==
106              nir_instr_type_undef) {
107             undef |= BITSET_MASK(nir_ssa_alu_instr_src_components(alu, i)) << i;
108          }
109       }
110    }
111 
112    return undef;
113 }
114 
115 /**
116  * Remove any store intrinsic writemask channels whose value is undefined (the
117  * existing value is a fine representation of "undefined").
118  */
119 static bool
opt_undef_store(nir_intrinsic_instr * intrin)120 opt_undef_store(nir_intrinsic_instr *intrin)
121 {
122    int arg_index;
123    switch (intrin->intrinsic) {
124    case nir_intrinsic_store_deref:
125       arg_index = 1;
126       break;
127    case nir_intrinsic_store_output:
128    case nir_intrinsic_store_per_vertex_output:
129    case nir_intrinsic_store_per_primitive_output:
130    case nir_intrinsic_store_ssbo:
131    case nir_intrinsic_store_shared:
132    case nir_intrinsic_store_global:
133    case nir_intrinsic_store_scratch:
134       arg_index = 0;
135       break;
136    default:
137       return false;
138    }
139 
140    nir_def *def = intrin->src[arg_index].ssa;
141 
142    unsigned write_mask = nir_intrinsic_write_mask(intrin);
143    unsigned undef_mask = nir_get_undef_mask(def);
144 
145    if (!(write_mask & undef_mask))
146       return false;
147 
148    write_mask &= ~undef_mask;
149    if (!write_mask)
150       nir_instr_remove(&intrin->instr);
151    else
152       nir_intrinsic_set_write_mask(intrin, write_mask);
153 
154    return true;
155 }
156 
157 struct visit_info {
158    bool replace_undef_with_constant;
159    bool prefer_nan;
160    bool must_keep_undef;
161 };
162 
163 /**
164  * Analyze an undef use to see if replacing undef with a constant is
165  * beneficial.
166  */
167 static void
visit_undef_use(nir_src * src,struct visit_info * info)168 visit_undef_use(nir_src *src, struct visit_info *info)
169 {
170    if (nir_src_is_if(src)) {
171       /* If the use is "if", keep undef because the branch will be eliminated
172        * by nir_opt_dead_cf.
173        */
174       info->must_keep_undef = true;
175       return;
176    }
177 
178    nir_instr *instr = nir_src_parent_instr(src);
179 
180    if (instr->type == nir_instr_type_alu) {
181       /* Replacing undef with a constant is only beneficial with ALU
182        * instructions because it can eliminate them or simplify them.
183        */
184       nir_alu_instr *alu = nir_instr_as_alu(instr);
185 
186       /* Follow movs and vecs.
187        *
188        * Note that all vector component uses are followed and swizzles are
189        * ignored.
190        */
191       if (alu->op == nir_op_mov || nir_op_is_vec(alu->op)) {
192          nir_foreach_use_including_if(next_src, &alu->def) {
193             visit_undef_use(next_src, info);
194          }
195          return;
196       }
197 
198       unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
199 
200       for (unsigned i = 0; i < num_srcs; i++) {
201          if (&alu->src[i].src != src)
202             continue;
203 
204          if (nir_op_is_selection(alu->op) && i != 0) {
205             /* nir_opt_algebraic can eliminate a select opcode only if src0 is
206              * a constant. If the undef use is src1 or src2, it will be
207              * handled by opt_undef_csel.
208              */
209             continue;
210          }
211 
212          info->replace_undef_with_constant = true;
213          if (nir_op_infos[alu->op].input_types[i] & nir_type_float &&
214              alu->op != nir_op_fmulz &&
215              (alu->op != nir_op_ffmaz || i == 2))
216             info->prefer_nan = true;
217       }
218    } else {
219       /* If the use is not ALU, don't replace undef. We need to preserve
220        * undef for stores and phis because those are handled differently,
221        * and replacing undef with a constant would result in worse code.
222        */
223       info->must_keep_undef = true;
224       return;
225    }
226 }
227 
228 /**
229  * Replace ssa_undef used by ALU opcodes with 0 or NaN, whichever eliminates
230  * more code.
231  *
232  * Replace it with NaN if an FP opcode uses undef, which will cause the opcode
233  * to be eliminated by nir_opt_algebraic. 0 would not eliminate the FP opcode.
234  */
235 static bool
replace_ssa_undef(nir_builder * b,nir_instr * instr,const struct undef_options * options)236 replace_ssa_undef(nir_builder *b, nir_instr *instr,
237                   const struct undef_options *options)
238 {
239    nir_undef_instr *undef = nir_instr_as_undef(instr);
240    struct visit_info info = {0};
241 
242    nir_foreach_use_including_if(src, &undef->def) {
243       visit_undef_use(src, &info);
244    }
245 
246    if (info.must_keep_undef || !info.replace_undef_with_constant)
247       return false;
248 
249    b->cursor = nir_before_instr(&undef->instr);
250    nir_def *replacement;
251 
252    /* If undef is used as float, replace it with NaN, which will
253     * eliminate all FP instructions that consume it. Else, replace it
254     * with 0, which is more likely to eliminate non-FP instructions.
255     */
256    if (info.prefer_nan && !options->disallow_undef_to_nan)
257       replacement = nir_imm_floatN_t(b, NAN, undef->def.bit_size);
258    else
259       replacement = nir_imm_intN_t(b, 0, undef->def.bit_size);
260 
261    if (undef->def.num_components > 1)
262       replacement = nir_replicate(b, replacement, undef->def.num_components);
263 
264    nir_def_rewrite_uses_after(&undef->def, replacement, &undef->instr);
265    nir_instr_remove(&undef->instr);
266    return true;
267 }
268 
269 static bool
nir_opt_undef_instr(nir_builder * b,nir_instr * instr,void * data)270 nir_opt_undef_instr(nir_builder *b, nir_instr *instr, void *data)
271 {
272    const struct undef_options *options = data;
273 
274    if (instr->type == nir_instr_type_undef) {
275       return replace_ssa_undef(b, instr, options);
276    } else if (instr->type == nir_instr_type_alu) {
277       nir_alu_instr *alu = nir_instr_as_alu(instr);
278       return opt_undef_csel(b, alu) ||
279              opt_undef_vecN(b, alu);
280    } else if (instr->type == nir_instr_type_intrinsic) {
281       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
282       return opt_undef_store(intrin);
283    }
284 
285    return false;
286 }
287 
288 bool
nir_opt_undef(nir_shader * shader)289 nir_opt_undef(nir_shader *shader)
290 {
291    struct undef_options options = {0};
292 
293    /* Disallow the undef->NaN transformation only for those shaders where
294     * it's known to break rendering. These are shader source SHA1s printed by
295     * nir_print_shader().
296     */
297    uint32_t shader_sha1s[][SHA1_DIGEST_LENGTH32] = {
298       /* gputest/gimark */
299       {0x9a1af9e2, 0x68f185bf, 0x11fc1257, 0x1102e80b, 0x5ca350fa},
300 
301       /* Viewperf13/CATIA_car_01 */
302       {0x4746a4a4, 0xe3b27d27, 0xe6d2b0fb, 0xb7e9ceb3, 0x973e6152}, /* Taillights */
303       {0xc49cc90d, 0xd7208212, 0x726502ea, 0xe1fe62c0, 0xb62fbd1f}, /* Grill */
304       {0xde23f35b, 0xb6fa45ae, 0x96da7e6b, 0x5a6e4a60, 0xce0b6b31}, /* Headlights */
305       {0xdf36242c, 0x0705db59, 0xf1ddac9b, 0xcd1c8466, 0x4c73203b}, /* Rims */
306 
307       /* Viewperf13/CATIA_car_04 */
308       {0x631da72a, 0xc971e849, 0xd6489a15, 0xf7c8dddb, 0xe8efd982}, /* Headlights */
309       {0x85984b88, 0xd16b8fee, 0x0d49d97b, 0x5f6cc66e, 0xadcafad9}, /* Rims */
310       {0xad023488, 0x09930735, 0xb0567e58, 0x336dce36, 0xe3c1e448}, /* Tires */
311       {0xdcc4a549, 0x587873fa, 0xeed94361, 0x9a47cbff, 0x846d0167}, /* Windows */
312       {0xfa0074a2, 0xef868430, 0x87935a0c, 0x19bc96be, 0xb5b95c74}, /* Body */
313    };
314 
315    for (unsigned i = 0; i < ARRAY_SIZE(shader_sha1s); i++) {
316       if (_mesa_printed_sha1_equal(shader->info.source_sha1, shader_sha1s[i])) {
317          options.disallow_undef_to_nan = true;
318          break;
319       }
320    }
321 
322    return nir_shader_instructions_pass(shader,
323                                        nir_opt_undef_instr,
324                                        nir_metadata_block_index |
325                                        nir_metadata_dominance,
326                                        &options);
327 }
328