• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/mesa-blake3.h"
27 #include <math.h>
28 
29 /** @file nir_opt_undef.c
30  *
31  * Handles optimization of operations involving ssa_undef.
32  */
33 
34 struct undef_options {
35    bool disallow_undef_to_nan;
36 };
37 
38 /**
39  * Turn conditional selects between an undef and some other value into a move
40  * of that other value (on the assumption that the condition's going to be
41  * choosing the defined value).  This reduces work after if flattening when
42  * each side of the if is defining a variable.
43  */
44 static bool
opt_undef_csel(nir_builder * b,nir_alu_instr * instr)45 opt_undef_csel(nir_builder *b, nir_alu_instr *instr)
46 {
47    if (!nir_op_is_selection(instr->op))
48       return false;
49 
50    for (int i = 1; i <= 2; i++) {
51       if (!nir_src_is_undef(instr->src[i].src))
52          continue;
53 
54       b->cursor = nir_instr_remove(&instr->instr);
55       nir_def *mov = nir_mov_alu(b, instr->src[i == 1 ? 2 : 1],
56                                  instr->def.num_components);
57       nir_def_rewrite_uses(&instr->def, mov);
58 
59       return true;
60    }
61 
62    return false;
63 }
64 
65 static bool
op_is_mov_or_vec_or_pack_or_unpack(nir_op op)66 op_is_mov_or_vec_or_pack_or_unpack(nir_op op)
67 {
68    switch (op) {
69    case nir_op_pack_32_2x16:
70    case nir_op_pack_32_2x16_split:
71    case nir_op_pack_32_4x8:
72    case nir_op_pack_32_4x8_split:
73    case nir_op_pack_64_2x32:
74    case nir_op_pack_64_2x32_split:
75    case nir_op_pack_64_4x16:
76    case nir_op_unpack_32_2x16:
77    case nir_op_unpack_32_2x16_split_x:
78    case nir_op_unpack_32_2x16_split_y:
79    case nir_op_unpack_32_4x8:
80    case nir_op_unpack_64_2x32:
81    case nir_op_unpack_64_2x32_split_x:
82    case nir_op_unpack_64_2x32_split_y:
83    case nir_op_unpack_64_4x16:
84       return true;
85    default:
86       return nir_op_is_vec_or_mov(op);
87    }
88 }
89 
90 /**
91  * Replace vecN(undef, undef, ...) with a single undef.
92  */
93 static bool
opt_undef_vecN(nir_builder * b,nir_alu_instr * alu)94 opt_undef_vecN(nir_builder *b, nir_alu_instr *alu)
95 {
96    if (!op_is_mov_or_vec_or_pack_or_unpack(alu->op))
97       return false;
98 
99    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
100       if (!nir_src_is_undef(alu->src[i].src))
101          return false;
102    }
103 
104    b->cursor = nir_before_instr(&alu->instr);
105    nir_def *undef = nir_undef(b, alu->def.num_components,
106                               alu->def.bit_size);
107    nir_def_replace(&alu->def, undef);
108 
109    return true;
110 }
111 
112 static uint32_t
nir_get_undef_mask(nir_def * def)113 nir_get_undef_mask(nir_def *def)
114 {
115    nir_instr *instr = def->parent_instr;
116 
117    if (instr->type == nir_instr_type_undef)
118       return BITSET_MASK(def->num_components);
119 
120    if (instr->type != nir_instr_type_alu)
121       return 0;
122 
123    nir_alu_instr *alu = nir_instr_as_alu(instr);
124    unsigned undef = 0;
125 
126    /* nir_op_mov of undef is handled by opt_undef_vecN() */
127    if (nir_op_is_vec(alu->op)) {
128       for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
129          if (nir_src_is_undef(alu->src[i].src)) {
130             undef |= BITSET_MASK(nir_ssa_alu_instr_src_components(alu, i)) << i;
131          }
132       }
133    }
134 
135    return undef;
136 }
137 
138 /**
139  * Remove any store intrinsic writemask channels whose value is undefined (the
140  * existing value is a fine representation of "undefined").
141  */
142 static bool
opt_undef_store(nir_intrinsic_instr * intrin)143 opt_undef_store(nir_intrinsic_instr *intrin)
144 {
145    int arg_index;
146    switch (intrin->intrinsic) {
147    case nir_intrinsic_store_deref:
148       arg_index = 1;
149       break;
150    case nir_intrinsic_store_output:
151    case nir_intrinsic_store_per_vertex_output:
152    case nir_intrinsic_store_per_view_output:
153    case nir_intrinsic_store_per_primitive_output:
154    case nir_intrinsic_store_ssbo:
155    case nir_intrinsic_store_shared:
156    case nir_intrinsic_store_global:
157    case nir_intrinsic_store_scratch:
158       arg_index = 0;
159       break;
160    default:
161       return false;
162    }
163 
164    nir_def *def = intrin->src[arg_index].ssa;
165 
166    unsigned write_mask = nir_intrinsic_write_mask(intrin);
167    unsigned undef_mask = nir_get_undef_mask(def);
168 
169    if (!(write_mask & undef_mask))
170       return false;
171 
172    write_mask &= ~undef_mask;
173    if (!write_mask)
174       nir_instr_remove(&intrin->instr);
175    else
176       nir_intrinsic_set_write_mask(intrin, write_mask);
177 
178    return true;
179 }
180 
181 struct visit_info {
182    bool replace_undef_with_constant;
183    bool prefer_nan;
184    bool must_keep_undef;
185 };
186 
187 /**
188  * Analyze an undef use to see if replacing undef with a constant is
189  * beneficial.
190  */
191 static void
visit_undef_use(nir_src * src,struct visit_info * info)192 visit_undef_use(nir_src *src, struct visit_info *info)
193 {
194    if (nir_src_is_if(src)) {
195       /* If the use is "if", keep undef because the branch will be eliminated
196        * by nir_opt_dead_cf.
197        */
198       info->must_keep_undef = true;
199       return;
200    }
201 
202    nir_instr *instr = nir_src_parent_instr(src);
203 
204    if (instr->type == nir_instr_type_alu) {
205       /* Replacing undef with a constant is only beneficial with ALU
206        * instructions because it can eliminate them or simplify them.
207        */
208       nir_alu_instr *alu = nir_instr_as_alu(instr);
209 
210       /* opt_undef_vecN already copy propagated. */
211       if (op_is_mov_or_vec_or_pack_or_unpack(alu->op)) {
212          info->must_keep_undef = true;
213          return;
214       }
215 
216       unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
217 
218       for (unsigned i = 0; i < num_srcs; i++) {
219          if (&alu->src[i].src != src)
220             continue;
221 
222          info->replace_undef_with_constant = true;
223          if (nir_op_infos[alu->op].input_types[i] & nir_type_float &&
224              alu->op != nir_op_fmulz &&
225              (alu->op != nir_op_ffmaz || i == 2))
226             info->prefer_nan = true;
227       }
228    } else {
229       /* If the use is not ALU, don't replace undef. We need to preserve
230        * undef for stores and phis because those are handled differently,
231        * and replacing undef with a constant would result in worse code.
232        */
233       info->must_keep_undef = true;
234       return;
235    }
236 }
237 
238 /**
239  * Replace ssa_undef used by ALU opcodes with 0 or NaN, whichever eliminates
240  * more code.
241  *
242  * Replace it with NaN if an FP opcode uses undef, which will cause the opcode
243  * to be eliminated by nir_opt_algebraic. 0 would not eliminate the FP opcode.
244  */
245 static bool
replace_ssa_undef(nir_builder * b,nir_instr * instr,void * data)246 replace_ssa_undef(nir_builder *b, nir_instr *instr, void *data)
247 {
248    if (instr->type != nir_instr_type_undef)
249       return false;
250 
251    const struct undef_options *options = data;
252 
253    nir_undef_instr *undef = nir_instr_as_undef(instr);
254    struct visit_info info = {0};
255 
256    nir_foreach_use_including_if(src, &undef->def) {
257       visit_undef_use(src, &info);
258    }
259 
260    if (info.must_keep_undef || !info.replace_undef_with_constant)
261       return false;
262 
263    b->cursor = nir_before_instr(&undef->instr);
264    nir_def *replacement;
265 
266    /* If undef is used as float, replace it with NaN, which will
267     * eliminate all FP instructions that consume it. Else, replace it
268     * with 0, which is more likely to eliminate non-FP instructions.
269     */
270    if (info.prefer_nan && !options->disallow_undef_to_nan)
271       replacement = nir_imm_floatN_t(b, NAN, undef->def.bit_size);
272    else
273       replacement = nir_imm_intN_t(b, 0, undef->def.bit_size);
274 
275    if (undef->def.num_components > 1)
276       replacement = nir_replicate(b, replacement, undef->def.num_components);
277 
278    nir_def_replace(&undef->def, replacement);
279    return true;
280 }
281 
282 static bool
opt_undef_uses(nir_builder * b,nir_instr * instr,void * data)283 opt_undef_uses(nir_builder *b, nir_instr *instr, void *data)
284 {
285    if (instr->type == nir_instr_type_alu) {
286       nir_alu_instr *alu = nir_instr_as_alu(instr);
287       return opt_undef_csel(b, alu) ||
288              opt_undef_vecN(b, alu);
289    } else if (instr->type == nir_instr_type_intrinsic) {
290       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
291       return opt_undef_store(intrin);
292    }
293 
294    return false;
295 }
296 
297 bool
nir_opt_undef(nir_shader * shader)298 nir_opt_undef(nir_shader *shader)
299 {
300    struct undef_options options = {0};
301 
302    /* Disallow the undef->NaN transformation only for those shaders where
303     * it's known to break rendering. These are shader source BLAKE3s printed by
304     * nir_print_shader().
305     */
306    uint32_t shader_blake3s[][BLAKE3_OUT_LEN32] = {
307       /* gputest/gimark */
308       {0x582c214b, 0x25478275, 0xc9a835d2, 0x95c9b643, 0x69deae47, 0x213c7427, 0xa9da66a5, 0xac254ed2},
309 
310       /* Viewperf13/CATIA_car_01 */
311       {0x880dfa0f, 0x60e32201, 0xe3a89f59, 0xb1cc6f07, 0xcdbebe66, 0x20122aec, 0x83450d4e, 0x8f42843d}, /* Taillights */
312       {0x624e53bb, 0x8eb635ba, 0xb1e4ed9b, 0x651b0fec, 0x86fcf79a, 0xde0863fb, 0x09ce80c1, 0xd972e40f}, /* Grill */
313       {0x01a8db39, 0xfa175175, 0x621f7302, 0xfcde9177, 0x72d873bf, 0x048d38c1, 0xe669d2de, 0xaa6584af}, /* Headlights */
314       {0x32029770, 0xab295b41, 0x3f1daf07, 0x9dd9153e, 0xd598be73, 0xe555b2f3, 0x6e087eaf, 0x084d329c}, /* Rims */
315 
316       /* Viewperf13/CATIA_car_04 */
317       {0x55207b90, 0x08fa2f8f, 0x9db62464, 0xadba6570, 0xb6d5d962, 0xf434bff5, 0x46a34d64, 0x021bfb45}, /* Headlights */
318       {0x83fbdd6a, 0x231b027e, 0x6f142248, 0x2b3045de, 0xd2a4f460, 0x59dfb8d8, 0x6dbc00f9, 0xcca13143}, /* Rims */
319       {0x88ed3a0a, 0xf128d384, 0x8161fdac, 0xd10cb257, 0x5e63db2d, 0x56798b6f, 0x881e81ee, 0xa4e937d4}, /* Tires */
320       {0xbf84697c, 0x3bc75bb6, 0x9d012175, 0x2dd90bcf, 0x0562f0ed, 0x5aa80e62, 0xb5793ae3, 0x9127bcab}, /* Windows */
321       {0x47a3eb4b, 0x136f676d, 0x94045ed3, 0x57b00972, 0x8cda7550, 0x88327fda, 0x37f7cf37, 0x66db05e3}, /* Body */
322    };
323 
324    for (unsigned i = 0; i < ARRAY_SIZE(shader_blake3s); i++) {
325       if (_mesa_printed_blake3_equal(shader->info.source_blake3, shader_blake3s[i])) {
326          options.disallow_undef_to_nan = true;
327          break;
328       }
329    }
330 
331    if (shader->info.use_legacy_math_rules)
332       options.disallow_undef_to_nan = true;
333 
334    bool progress = nir_shader_instructions_pass(shader,
335                                                 opt_undef_uses,
336                                                 nir_metadata_control_flow,
337                                                 &options);
338    progress |= nir_shader_instructions_pass(shader,
339                                             replace_ssa_undef,
340                                             nir_metadata_control_flow,
341                                             &options);
342 
343    return progress;
344 }
345