• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Google LLC
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file
26  *
27  * Removes unused components of SSA defs.
28  *
29  * Due to various optimization passes (or frontend implementations,
30  * particularly prog_to_nir), we may have instructions generating vectors
31  * whose components don't get read by any instruction.
32  *
33  * For memory loads, while it can be tricky to eliminate unused low components
34  * or channels in the middle of a writemask (you might need to increment some
35  * offset from a load_uniform, for example), it is trivial to just drop the
36  * trailing components.
37  * For vector ALU and load_const, only used by other ALU instructions,
38  * this pass eliminates arbitrary channels as well as duplicate channels,
39  * and reswizzles the uses.
40  *
41  * This pass is probably only of use to vector backends -- scalar backends
42  * typically get unused def channel trimming by scalarizing and dead code
43  * elimination.
44  */
45 
46 #include "nir.h"
47 #include "nir_builder.h"
48 #include "util/u_math.h"
49 
50 /*
51  * Round up a vector size to a vector size that's valid in NIR. At present, NIR
52  * supports only vec2-5, vec8, and vec16. Attempting to generate other sizes
53  * will fail validation.
54  */
55 static unsigned
round_up_components(unsigned n)56 round_up_components(unsigned n)
57 {
58    return (n > 5) ? util_next_power_of_two(n) : n;
59 }
60 
61 static bool
shrink_dest_to_read_mask(nir_ssa_def * def)62 shrink_dest_to_read_mask(nir_ssa_def *def)
63 {
64    /* early out if there's nothing to do. */
65    if (def->num_components == 1)
66       return false;
67 
68    /* don't remove any channels if used by an intrinsic */
69    nir_foreach_use(use_src, def) {
70       if (use_src->parent_instr->type == nir_instr_type_intrinsic)
71          return false;
72    }
73 
74    unsigned mask = nir_ssa_def_components_read(def);
75    int last_bit = util_last_bit(mask);
76 
77    /* If nothing was read, leave it up to DCE. */
78    if (!mask)
79       return false;
80 
81    unsigned rounded = round_up_components(last_bit);
82    assert(rounded <= def->num_components);
83    last_bit = rounded;
84 
85    if (def->num_components > last_bit) {
86       def->num_components = last_bit;
87       return true;
88    }
89 
90    return false;
91 }
92 
93 static void
reswizzle_alu_uses(nir_ssa_def * def,uint8_t * reswizzle)94 reswizzle_alu_uses(nir_ssa_def *def, uint8_t *reswizzle)
95 {
96    nir_foreach_use(use_src, def) {
97       /* all uses must be ALU instructions */
98       assert(use_src->parent_instr->type == nir_instr_type_alu);
99       nir_alu_src *alu_src = (nir_alu_src*)use_src;
100 
101       /* reswizzle ALU sources */
102       for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
103          alu_src->swizzle[i] = reswizzle[alu_src->swizzle[i]];
104    }
105 }
106 
107 static bool
is_only_used_by_alu(nir_ssa_def * def)108 is_only_used_by_alu(nir_ssa_def *def)
109 {
110    nir_foreach_use(use_src, def) {
111       if (use_src->parent_instr->type != nir_instr_type_alu)
112          return false;
113    }
114 
115    return true;
116 }
117 
118 static bool
opt_shrink_vector(nir_builder * b,nir_alu_instr * instr)119 opt_shrink_vector(nir_builder *b, nir_alu_instr *instr)
120 {
121    nir_ssa_def *def = &instr->dest.dest.ssa;
122    unsigned mask = nir_ssa_def_components_read(def);
123 
124    /* If nothing was read, leave it up to DCE. */
125    if (mask == 0)
126       return false;
127 
128    /* don't remove any channels if used by non-ALU */
129    if (!is_only_used_by_alu(def))
130       return false;
131 
132    uint8_t reswizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
133    nir_ssa_scalar srcs[NIR_MAX_VEC_COMPONENTS] = { 0 };
134    unsigned num_components = 0;
135    for (unsigned i = 0; i < def->num_components; i++) {
136       if (!((mask >> i) & 0x1))
137          continue;
138 
139       nir_ssa_scalar scalar = nir_get_ssa_scalar(instr->src[i].src.ssa, instr->src[i].swizzle[0]);
140 
141       /* Try reuse a component with the same value */
142       unsigned j;
143       for (j = 0; j < num_components; j++) {
144          if (scalar.def == srcs[j].def && scalar.comp == srcs[j].comp) {
145             reswizzle[i] = j;
146             break;
147          }
148       }
149 
150       /* Otherwise, just append the value */
151       if (j == num_components) {
152          srcs[num_components] = scalar;
153          reswizzle[i] = num_components++;
154       }
155    }
156 
157    /* return if no component was removed */
158    if (num_components == def->num_components)
159       return false;
160 
161    /* create new vecN and replace uses */
162    nir_ssa_def *new_vec = nir_vec_scalars(b, srcs, num_components);
163    nir_ssa_def_rewrite_uses(def, new_vec);
164    reswizzle_alu_uses(new_vec, reswizzle);
165 
166    return true;
167 }
168 
169 static bool
opt_shrink_vectors_alu(nir_builder * b,nir_alu_instr * instr)170 opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
171 {
172    nir_ssa_def *def = &instr->dest.dest.ssa;
173 
174    /* Nothing to shrink */
175    if (def->num_components == 1)
176       return false;
177 
178    switch (instr->op) {
179       /* don't use nir_op_is_vec() as not all vector sizes are supported. */
180       case nir_op_vec4:
181       case nir_op_vec3:
182       case nir_op_vec2:
183          return opt_shrink_vector(b, instr);
184       default:
185          if (nir_op_infos[instr->op].output_size != 0)
186             return false;
187          break;
188    }
189 
190    /* don't remove any channels if used by non-ALU */
191    if (!is_only_used_by_alu(def))
192       return false;
193 
194    unsigned mask = nir_ssa_def_components_read(def);
195    unsigned last_bit = util_last_bit(mask);
196    unsigned num_components = util_bitcount(mask);
197 
198    unsigned rounded = round_up_components(num_components);
199    assert(rounded <= def->num_components);
200    num_components = rounded;
201 
202    /* return, if there is nothing to do */
203    if (mask == 0 || num_components == def->num_components)
204       return false;
205 
206    const bool is_bitfield_mask = last_bit == num_components;
207    if (is_bitfield_mask) {
208       /* just reduce the number of components and return */
209       def->num_components = num_components;
210       instr->dest.write_mask = mask;
211       return true;
212    }
213 
214    uint8_t reswizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
215    unsigned index = 0;
216    for (unsigned i = 0; i < last_bit; i++) {
217       /* skip unused components */
218       if (!((mask >> i) & 0x1))
219          continue;
220 
221       /* reswizzle the sources */
222       for (int k = 0; k < nir_op_infos[instr->op].num_inputs; k++) {
223          instr->src[k].swizzle[index] = instr->src[k].swizzle[i];
224          reswizzle[i] = index;
225       }
226       index++;
227    }
228    assert(index == num_components);
229 
230    /* update dest */
231    def->num_components = num_components;
232    instr->dest.write_mask = BITFIELD_MASK(num_components);
233 
234    /* update uses */
235    reswizzle_alu_uses(def, reswizzle);
236 
237    return true;
238 }
239 
240 static bool
opt_shrink_vectors_intrinsic(nir_builder * b,nir_intrinsic_instr * instr)241 opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
242 {
243    switch (instr->intrinsic) {
244    case nir_intrinsic_load_uniform:
245    case nir_intrinsic_load_ubo:
246    case nir_intrinsic_load_input:
247    case nir_intrinsic_load_input_vertex:
248    case nir_intrinsic_load_per_vertex_input:
249    case nir_intrinsic_load_interpolated_input:
250    case nir_intrinsic_load_ssbo:
251    case nir_intrinsic_load_push_constant:
252    case nir_intrinsic_load_constant:
253    case nir_intrinsic_load_shared:
254    case nir_intrinsic_load_global:
255    case nir_intrinsic_load_global_constant:
256    case nir_intrinsic_load_kernel_input:
257    case nir_intrinsic_load_scratch:
258       break;
259    default:
260       return false;
261    }
262 
263    /* Must be a vectorized intrinsic that we can resize. */
264    assert(instr->num_components != 0);
265 
266    /* Trim the dest to the used channels */
267    if (shrink_dest_to_read_mask(&instr->dest.ssa)) {
268       instr->num_components = instr->dest.ssa.num_components;
269       return true;
270    }
271 
272    return false;
273 }
274 
275 static bool
opt_shrink_vectors_load_const(nir_load_const_instr * instr)276 opt_shrink_vectors_load_const(nir_load_const_instr *instr)
277 {
278    nir_ssa_def *def = &instr->def;
279 
280    /* early out if there's nothing to do. */
281    if (def->num_components == 1)
282       return false;
283 
284    /* don't remove any channels if used by non-ALU */
285    if (!is_only_used_by_alu(def))
286       return false;
287 
288    unsigned mask = nir_ssa_def_components_read(def);
289 
290    /* If nothing was read, leave it up to DCE. */
291    if (!mask)
292       return false;
293 
294    uint8_t reswizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
295    unsigned num_components = 0;
296    for (unsigned i = 0; i < def->num_components; i++) {
297       if (!((mask >> i) & 0x1))
298          continue;
299 
300       /* Try reuse a component with the same constant */
301       unsigned j;
302       for (j = 0; j < num_components; j++) {
303          if (instr->value[i].u64 == instr->value[j].u64) {
304             reswizzle[i] = j;
305             break;
306          }
307       }
308 
309       /* Otherwise, just append the value */
310       if (j == num_components) {
311          instr->value[num_components] = instr->value[i];
312          reswizzle[i] = num_components++;
313       }
314    }
315 
316    unsigned rounded = round_up_components(num_components);
317    assert(rounded <= def->num_components);
318    num_components = rounded;
319 
320    if (num_components == def->num_components)
321       return false;
322 
323    def->num_components = num_components;
324    reswizzle_alu_uses(def, reswizzle);
325 
326    return true;
327 }
328 
329 static bool
opt_shrink_vectors_ssa_undef(nir_ssa_undef_instr * instr)330 opt_shrink_vectors_ssa_undef(nir_ssa_undef_instr *instr)
331 {
332    return shrink_dest_to_read_mask(&instr->def);
333 }
334 
335 static bool
opt_shrink_vectors_instr(nir_builder * b,nir_instr * instr)336 opt_shrink_vectors_instr(nir_builder *b, nir_instr *instr)
337 {
338    b->cursor = nir_before_instr(instr);
339 
340    switch (instr->type) {
341    case nir_instr_type_alu:
342       return opt_shrink_vectors_alu(b, nir_instr_as_alu(instr));
343 
344    case nir_instr_type_intrinsic:
345       return opt_shrink_vectors_intrinsic(b, nir_instr_as_intrinsic(instr));
346 
347    case nir_instr_type_load_const:
348       return opt_shrink_vectors_load_const(nir_instr_as_load_const(instr));
349 
350    case nir_instr_type_ssa_undef:
351       return opt_shrink_vectors_ssa_undef(nir_instr_as_ssa_undef(instr));
352 
353    default:
354       return false;
355    }
356 
357    return true;
358 }
359 
360 bool
nir_opt_shrink_vectors(nir_shader * shader)361 nir_opt_shrink_vectors(nir_shader *shader)
362 {
363    bool progress = false;
364 
365    nir_foreach_function(function, shader) {
366       if (!function->impl)
367          continue;
368 
369       nir_builder b;
370       nir_builder_init(&b, function->impl);
371 
372       nir_foreach_block_reverse(block, function->impl) {
373          nir_foreach_instr_reverse(instr, block) {
374             progress |= opt_shrink_vectors_instr(&b, instr);
375          }
376       }
377 
378       if (progress) {
379          nir_metadata_preserve(function->impl,
380                                nir_metadata_block_index |
381                                nir_metadata_dominance);
382       } else {
383          nir_metadata_preserve(function->impl, nir_metadata_all);
384       }
385    }
386 
387    return progress;
388 }
389