• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 
27 /*
28  * This pass tries to reduce the bitsize of phi instructions by either
29  * moving narrowing conversions from the phi's consumers to the phi's
30  * sources, if all the uses of the phi are equivalent narrowing
31  * instructions.  In other words, convert:
32  *
33  *    vec1 32 ssa_124 = load_const (0x00000000)
34  *    ...
35  *    loop {
36  *        ...
37  *        vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53
38  *        vec1 16 ssa_8 = i2imp ssa_155
39  *        ...
40  *        vec1 32 ssa_53 = i2i32 ssa_52
41  *    }
42  *
43  * into:
44  *
45  *    vec1 32 ssa_124 = load_const (0x00000000)
46  *    vec1 16 ssa_156 = i2imp ssa_124
47  *    ...
48  *    loop {
49  *        ...
50  *        vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157
51  *        ...
52  *        vec1 32 ssa_53 = i2i32 ssa_52
53  *        vec1 16 ssa_157 = i2i16 ssa_53
54  *    }
55  *
56  * Or failing that, tries to push widening conversion of phi srcs to
57  * the phi def.  In this case, since load_const is frequently one
58  * of the phi sources this pass checks if can be narrowed without a
59  * loss of precision:
60  *
61  *    vec1 32 ssa_0 = load_const (0x00000000)
62  *    ...
63  *    loop {
64  *        ...
65  *        vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19
66  *        ...
67  *        vec1 16 ssa_18 = iadd ssa_21, ssa_3
68  *        vec1 32 ssa_19 = i2i32 ssa_18
69  *    }
70  *
71  * into:
72  *
73  *    vec1 32 ssa_0 = load_const (0x00000000)
74  *    vec1 16 ssa_22 = i2i16 ssa_0
75  *    ...
76  *    loop {
77  *        ...
78  *        vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18
79  *        vec1 32 ssa_23 = i2i32 ssa_8
80  *        ...
81  *        vec1 16 ssa_18 = iadd ssa_21, ssa_3
82  *    }
83  *
84  * Note that either transformations can convert x2ymp  into x2y16, which
85  * is normally done later in nir_opt_algebraic_late(), losing the option
86  * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts
87  * cannot see through phis.
88  */
89 
90 #define INVALID_OP nir_num_opcodes
91 
92 /**
93  * Get the corresponding exact conversion for a x2ymp conversion
94  */
95 static nir_op
concrete_conversion(nir_op op)96 concrete_conversion(nir_op op)
97 {
98    switch (op) {
99    case nir_op_i2imp: return nir_op_i2i16;
100    case nir_op_i2fmp: return nir_op_i2f16;
101    case nir_op_u2fmp: return nir_op_u2f16;
102    case nir_op_f2fmp: return nir_op_f2f16;
103    case nir_op_f2imp: return nir_op_f2i16;
104    case nir_op_f2ump: return nir_op_f2u16;
105    default:           return op;
106    }
107 }
108 
109 static nir_op
narrowing_conversion_op(nir_instr * instr,nir_op current_op)110 narrowing_conversion_op(nir_instr *instr, nir_op current_op)
111 {
112    if (instr->type != nir_instr_type_alu)
113       return INVALID_OP;
114 
115    nir_op op = nir_instr_as_alu(instr)->op;
116    switch (op) {
117    case nir_op_i2imp:
118    case nir_op_i2i16:
119    case nir_op_i2fmp:
120    case nir_op_i2f16:
121    case nir_op_u2fmp:
122    case nir_op_u2f16:
123    case nir_op_f2fmp:
124    case nir_op_f2f16:
125    case nir_op_f2imp:
126    case nir_op_f2i16:
127    case nir_op_f2ump:
128    case nir_op_f2u16:
129    case nir_op_f2f16_rtne:
130    case nir_op_f2f16_rtz:
131       break;
132    default:
133       return INVALID_OP;
134    }
135 
136    /* If we've already picked a conversion op from a previous phi use,
137     * make sure it is compatible with the current use
138     */
139    if (current_op != INVALID_OP) {
140       if (current_op != op) {
141          /* If we have different conversions, but one can be converted
142           * to the other, then let's do that:
143           */
144          if (concrete_conversion(current_op) == concrete_conversion(op)) {
145             op = concrete_conversion(op);
146          } else {
147             return INVALID_OP;
148          }
149       }
150    }
151 
152    return op;
153 }
154 
155 static nir_op
widening_conversion_op(nir_instr * instr,unsigned * bit_size)156 widening_conversion_op(nir_instr *instr, unsigned *bit_size)
157 {
158    if (instr->type != nir_instr_type_alu)
159       return INVALID_OP;
160 
161    nir_alu_instr *alu = nir_instr_as_alu(instr);
162    switch (alu->op) {
163    case nir_op_i2i32:
164    case nir_op_i2f32:
165    case nir_op_u2f32:
166    case nir_op_f2f32:
167    case nir_op_f2i32:
168    case nir_op_f2u32:
169       break;
170    default:
171       return INVALID_OP;
172    }
173 
174    *bit_size = nir_src_bit_size(alu->src[0].src);
175 
176    /* We also need to check that the conversion's dest was actually
177     * wider:
178     */
179    if (nir_dest_bit_size(alu->dest.dest) <= *bit_size)
180       return INVALID_OP;
181 
182    return alu->op;
183 }
184 
185 static nir_alu_type
op_to_type(nir_op op)186 op_to_type(nir_op op)
187 {
188    return nir_alu_type_get_base_type(nir_op_infos[op].output_type);
189 }
190 
191 /* Try to move narrowing instructions consuming the phi into the phi's
192  * sources to reduce the phi's precision:
193  */
194 static bool
try_move_narrowing_dst(nir_builder * b,nir_phi_instr * phi)195 try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi)
196 {
197    nir_op op = INVALID_OP;
198 
199    assert(phi->dest.is_ssa);
200 
201    /* If the phi has already been narrowed, nothing more to do: */
202    if (phi->dest.ssa.bit_size != 32)
203       return false;
204 
205    /* Are the only uses of the phi conversion instructions, and
206     * are they all the same conversion?
207     */
208    nir_foreach_use (use, &phi->dest.ssa) {
209       op = narrowing_conversion_op(use->parent_instr, op);
210 
211       /* Not a (compatible) narrowing conversion: */
212       if (op == INVALID_OP)
213          return false;
214    }
215 
216    /* an if_uses means the phi is used directly in a conditional, ie.
217     * without a conversion
218     */
219    if (!list_is_empty(&phi->dest.ssa.if_uses))
220       return false;
221 
222    /* If the phi has no uses, then nothing to do: */
223    if (op == INVALID_OP)
224       return false;
225 
226    /* construct replacement phi instruction: */
227    nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
228    nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
229                      phi->dest.ssa.num_components,
230                      nir_alu_type_get_type_size(nir_op_infos[op].output_type),
231                      NULL);
232 
233    /* Push the conversion into the new phi sources: */
234    nir_foreach_phi_src (src, phi) {
235       assert(src->src.is_ssa);
236 
237       /* insert new conversion instr in block of original phi src: */
238       b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr);
239       nir_ssa_def *old_src = src->src.ssa;
240       nir_ssa_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL);
241 
242       /* and add corresponding phi_src to the new_phi: */
243       nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src));
244    }
245 
246    /* And finally rewrite the original uses of the original phi uses to
247     * directly use the new phi, skipping the conversion out of the orig
248     * phi
249     */
250    nir_foreach_use (use, &phi->dest.ssa) {
251       /* We've previously established that all the uses were alu
252        * conversion ops:
253        */
254       nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
255 
256       assert(alu->dest.dest.is_ssa);
257 
258       nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, &new_phi->dest.ssa);
259    }
260 
261    /* And finally insert the new phi after all sources are in place: */
262    b->cursor = nir_after_instr(&phi->instr);
263    nir_builder_instr_insert(b, &new_phi->instr);
264 
265    return true;
266 }
267 
268 static bool
can_convert_load_const(nir_load_const_instr * lc,nir_op op)269 can_convert_load_const(nir_load_const_instr *lc, nir_op op)
270 {
271    nir_alu_type type = op_to_type(op);
272 
273    /* Note that we only handle phi's with bit_size == 32: */
274    assert(lc->def.bit_size == 32);
275 
276    for (unsigned i = 0; i < lc->def.num_components; i++) {
277       switch (type) {
278       case nir_type_int:
279          if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32)
280             return false;
281          break;
282       case nir_type_uint:
283          if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32)
284             return false;
285          break;
286       case nir_type_float:
287          if (lc->value[i].f32 != _mesa_half_to_float(
288                _mesa_float_to_half(lc->value[i].f32)))
289             return false;
290          break;
291       default:
292          unreachable("bad type");
293          return false;
294       }
295    }
296 
297    return true;
298 }
299 
300 /* Check all the phi sources to see if they are the same widening op, in
301  * which case we can push the widening op to the other side of the phi
302  */
303 static nir_op
find_widening_op(nir_phi_instr * phi,unsigned * bit_size)304 find_widening_op(nir_phi_instr *phi, unsigned *bit_size)
305 {
306    nir_op op = INVALID_OP;
307 
308    bool has_load_const = false;
309    *bit_size = 0;
310 
311    nir_foreach_phi_src (src, phi) {
312       assert(src->src.is_ssa);
313 
314       nir_instr *instr = src->src.ssa->parent_instr;
315       if (instr->type == nir_instr_type_load_const) {
316          has_load_const = true;
317          continue;
318       }
319 
320       unsigned src_bit_size;
321       nir_op src_op = widening_conversion_op(instr, &src_bit_size);
322 
323       /* Not a widening conversion: */
324       if (src_op == INVALID_OP)
325          return INVALID_OP;
326 
327       /* If it is a widening conversion, it needs to be the same op as
328        * other phi sources:
329        */
330       if ((op != INVALID_OP) && (op != src_op))
331          return INVALID_OP;
332 
333       if (*bit_size && (*bit_size != src_bit_size))
334          return INVALID_OP;
335 
336       op = src_op;
337       *bit_size = src_bit_size;
338    }
339 
340    if ((op == INVALID_OP) || !has_load_const)
341       return op;
342 
343    /* If we could otherwise move widening sources, but load_const is
344     * one of the phi sources (and does not have a widening conversion,
345     * but could have a narrowing->widening sequence inserted without
346     * loss of precision), then we could insert a narrowing->widening
347     * sequence to make the rest of the transformation possible:
348     */
349    nir_foreach_phi_src (src, phi) {
350       assert(src->src.is_ssa);
351 
352       nir_instr *instr = src->src.ssa->parent_instr;
353       if (instr->type != nir_instr_type_load_const)
354          continue;
355 
356       if (!can_convert_load_const(nir_instr_as_load_const(instr), op))
357          return INVALID_OP;
358    }
359 
360    return op;
361 }
362 
363 /* Try to move widening conversions into the phi to the phi's output
364  * to reduce the phi's precision:
365  */
366 static bool
try_move_widening_src(nir_builder * b,nir_phi_instr * phi)367 try_move_widening_src(nir_builder *b, nir_phi_instr *phi)
368 {
369    assert(phi->dest.is_ssa);
370 
371    /* If the phi has already been narrowed, nothing more to do: */
372    if (phi->dest.ssa.bit_size != 32)
373       return false;
374 
375    unsigned bit_size;
376    nir_op op = find_widening_op(phi, &bit_size);
377 
378    if (op == INVALID_OP)
379       return false;
380 
381    /* construct replacement phi instruction: */
382    nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
383    nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
384                      phi->dest.ssa.num_components,
385                      bit_size, NULL);
386 
387    /* Remove the widening conversions from the phi sources: */
388    nir_foreach_phi_src (src, phi) {
389       assert(src->src.is_ssa);
390 
391       nir_instr *instr = src->src.ssa->parent_instr;
392       nir_ssa_def *new_src;
393 
394       b->cursor = nir_after_instr(instr);
395 
396       if (instr->type == nir_instr_type_load_const) {
397          /* if the src is a load_const, we've already verified that it
398           * is safe to insert a narrowing conversion to make the rest
399           * of this transformation legal:
400           */
401          nir_load_const_instr *lc = nir_instr_as_load_const(instr);
402 
403          if (op_to_type(op) == nir_type_float) {
404             new_src = nir_f2f16(b, &lc->def);
405          } else {
406             new_src = nir_i2i16(b, &lc->def);
407          }
408       } else {
409          /* at this point we know the sources source is a conversion: */
410          nir_alu_instr *alu = nir_instr_as_alu(instr);
411 
412          /* The conversion we are stripping off could have had a swizzle,
413           * so replace it with a mov if necessary:
414           */
415          unsigned num_comp = nir_dest_num_components(alu->dest.dest);
416          new_src = nir_mov_alu(b, alu->src[0], num_comp);
417       }
418 
419       /* add corresponding phi_src to the new_phi: */
420       nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src));
421    }
422 
423    /* And insert the new phi after all sources are in place: */
424    b->cursor = nir_after_instr(&phi->instr);
425    nir_builder_instr_insert(b, &new_phi->instr);
426 
427    /* And finally add back the widening conversion after the phi,
428     * and re-write the original phi's uses
429     */
430    b->cursor = nir_after_instr_and_phis(&new_phi->instr);
431    nir_ssa_def *def = nir_build_alu(b, op, &new_phi->dest.ssa, NULL, NULL, NULL);
432 
433    nir_ssa_def_rewrite_uses(&phi->dest.ssa, def);
434 
435    return true;
436 }
437 
438 static bool
lower_phi(nir_builder * b,nir_phi_instr * phi)439 lower_phi(nir_builder *b, nir_phi_instr *phi)
440 {
441    bool progress = try_move_narrowing_dst(b, phi);
442    if (!progress)
443       progress = try_move_widening_src(b, phi);
444    return progress;
445 }
446 
447 bool
nir_opt_phi_precision(nir_shader * shader)448 nir_opt_phi_precision(nir_shader *shader)
449 {
450    bool progress = false;
451 
452    /* If 8b or 16b bit_sizes are not used, no point to run this pass: */
453    unsigned bit_sizes_used = shader->info.bit_sizes_float |
454                              shader->info.bit_sizes_int;
455 
456    if (!bit_sizes_used) {
457       nir_shader_gather_info(shader, nir_shader_get_entrypoint(shader));
458       bit_sizes_used = shader->info.bit_sizes_float |
459                        shader->info.bit_sizes_int;
460    }
461 
462    if (!(bit_sizes_used & (8 | 16)))
463       return false;
464 
465    nir_foreach_function(function, shader) {
466       if (!function->impl)
467          continue;
468 
469       nir_builder b;
470       nir_builder_init(&b, function->impl);
471 
472       nir_foreach_block (block, function->impl) {
473          nir_foreach_instr_safe (instr, block) {
474             if (instr->type != nir_instr_type_phi)
475                break;
476 
477             progress |= lower_phi(&b, nir_instr_as_phi(instr));
478          }
479       }
480 
481       if (progress) {
482          nir_metadata_preserve(function->impl,
483                                nir_metadata_block_index |
484                                nir_metadata_dominance);
485       } else {
486          nir_metadata_preserve(function->impl, nir_metadata_all);
487       }
488    }
489 
490    return progress;
491 }
492 
493