• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 
27 /*
28  * This pass tries to reduce the bitsize of phi instructions by either
29  * moving narrowing conversions from the phi's consumers to the phi's
30  * sources, if all the uses of the phi are equivalent narrowing
31  * instructions.  In other words, convert:
32  *
33  *    vec1 32 ssa_124 = load_const (0x00000000)
34  *    ...
35  *    loop {
36  *        ...
37  *        vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53
38  *        vec1 16 ssa_8 = i2imp ssa_155
39  *        ...
40  *        vec1 32 ssa_53 = i2i32 ssa_52
41  *    }
42  *
43  * into:
44  *
45  *    vec1 32 ssa_124 = load_const (0x00000000)
46  *    vec1 16 ssa_156 = i2imp ssa_124
47  *    ...
48  *    loop {
49  *        ...
50  *        vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157
51  *        ...
52  *        vec1 32 ssa_53 = i2i32 ssa_52
53  *        vec1 16 ssa_157 = i2i16 ssa_53
54  *    }
55  *
56  * Or failing that, tries to push widening conversion of phi srcs to
57  * the phi def.  In this case, since load_const is frequently one
58  * of the phi sources this pass checks if can be narrowed without a
59  * loss of precision:
60  *
61  *    vec1 32 ssa_0 = load_const (0x00000000)
62  *    ...
63  *    loop {
64  *        ...
65  *        vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19
66  *        ...
67  *        vec1 16 ssa_18 = iadd ssa_21, ssa_3
68  *        vec1 32 ssa_19 = i2i32 ssa_18
69  *    }
70  *
71  * into:
72  *
73  *    vec1 32 ssa_0 = load_const (0x00000000)
74  *    vec1 16 ssa_22 = i2i16 ssa_0
75  *    ...
76  *    loop {
77  *        ...
78  *        vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18
79  *        vec1 32 ssa_23 = i2i32 ssa_8
80  *        ...
81  *        vec1 16 ssa_18 = iadd ssa_21, ssa_3
82  *    }
83  *
84  * Note that either transformations can convert x2ymp  into x2y16, which
85  * is normally done later in nir_opt_algebraic_late(), losing the option
86  * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts
87  * cannot see through phis.
88  */
89 
90 #define INVALID_OP nir_num_opcodes
91 
92 /**
93  * Get the corresponding exact conversion for a x2ymp conversion
94  */
95 static nir_op
concrete_conversion(nir_op op)96 concrete_conversion(nir_op op)
97 {
98    switch (op) {
99    case nir_op_i2imp: return nir_op_i2i16;
100    case nir_op_i2fmp: return nir_op_i2f16;
101    case nir_op_u2fmp: return nir_op_u2f16;
102    case nir_op_f2fmp: return nir_op_f2f16;
103    case nir_op_f2imp: return nir_op_f2i16;
104    case nir_op_f2ump: return nir_op_f2u16;
105    default:           return op;
106    }
107 }
108 
109 static nir_op
narrowing_conversion_op(nir_instr * instr,nir_op current_op)110 narrowing_conversion_op(nir_instr *instr, nir_op current_op)
111 {
112    if (instr->type != nir_instr_type_alu)
113       return INVALID_OP;
114 
115    nir_op op = nir_instr_as_alu(instr)->op;
116    switch (op) {
117    case nir_op_i2imp:
118    case nir_op_i2i16:
119    case nir_op_i2fmp:
120    case nir_op_i2f16:
121    case nir_op_u2fmp:
122    case nir_op_u2f16:
123    case nir_op_f2fmp:
124    case nir_op_f2f16:
125    case nir_op_f2imp:
126    case nir_op_f2i16:
127    case nir_op_f2ump:
128    case nir_op_f2u16:
129    case nir_op_f2f16_rtne:
130    case nir_op_f2f16_rtz:
131       break;
132    default:
133       return INVALID_OP;
134    }
135 
136    /* If we've already picked a conversion op from a previous phi use,
137     * make sure it is compatible with the current use
138     */
139    if (current_op != INVALID_OP) {
140       if (current_op != op) {
141          /* If we have different conversions, but one can be converted
142           * to the other, then let's do that:
143           */
144          if (concrete_conversion(current_op) == concrete_conversion(op)) {
145             op = concrete_conversion(op);
146          } else {
147             return INVALID_OP;
148          }
149       }
150    }
151 
152    return op;
153 }
154 
155 static nir_op
widening_conversion_op(nir_instr * instr,unsigned * bit_size)156 widening_conversion_op(nir_instr *instr, unsigned *bit_size)
157 {
158    if (instr->type != nir_instr_type_alu)
159       return INVALID_OP;
160 
161    nir_alu_instr *alu = nir_instr_as_alu(instr);
162    switch (alu->op) {
163    case nir_op_i2i32:
164    case nir_op_i2f32:
165    case nir_op_u2f32:
166    case nir_op_f2f32:
167    case nir_op_f2i32:
168    case nir_op_f2u32:
169       break;
170    default:
171       return INVALID_OP;
172    }
173 
174    *bit_size = nir_src_bit_size(alu->src[0].src);
175 
176    /* We also need to check that the conversion's dest was actually
177     * wider:
178     */
179    if (nir_dest_bit_size(alu->dest.dest) <= *bit_size)
180       return INVALID_OP;
181 
182    return alu->op;
183 }
184 
185 static nir_alu_type
op_to_type(nir_op op)186 op_to_type(nir_op op)
187 {
188    return nir_alu_type_get_base_type(nir_op_infos[op].output_type);
189 }
190 
191 /* Try to move narrowing instructions consuming the phi into the phi's
192  * sources to reduce the phi's precision:
193  */
194 static bool
try_move_narrowing_dst(nir_builder * b,nir_phi_instr * phi)195 try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi)
196 {
197    nir_op op = INVALID_OP;
198 
199    assert(phi->dest.is_ssa);
200 
201    /* If the phi has already been narrowed, nothing more to do: */
202    if (phi->dest.ssa.bit_size != 32)
203       return false;
204 
205    /* Are the only uses of the phi conversion instructions, and
206     * are they all the same conversion?
207     */
208    nir_foreach_use (use, &phi->dest.ssa) {
209       op = narrowing_conversion_op(use->parent_instr, op);
210 
211       /* Not a (compatible) narrowing conversion: */
212       if (op == INVALID_OP)
213          return false;
214    }
215 
216    /* an if_uses means the phi is used directly in a conditional, ie.
217     * without a conversion
218     */
219    if (!list_is_empty(&phi->dest.ssa.if_uses))
220       return false;
221 
222    /* If the phi has no uses, then nothing to do: */
223    if (op == INVALID_OP)
224       return false;
225 
226    /* construct replacement phi instruction: */
227    nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
228    nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
229                      phi->dest.ssa.num_components,
230                      nir_alu_type_get_type_size(nir_op_infos[op].output_type),
231                      NULL);
232 
233    /* Push the conversion into the new phi sources: */
234    nir_foreach_phi_src (src, phi) {
235       assert(src->src.is_ssa);
236 
237       /* insert new conversion instr in block of original phi src: */
238       b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr);
239       nir_ssa_def *old_src = src->src.ssa;
240       nir_ssa_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL);
241 
242       /* and add corresponding phi_src to the new_phi: */
243       nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src));
244    }
245 
246    /* And finally rewrite the original uses of the original phi uses to
247     * directly use the new phi, skipping the conversion out of the orig
248     * phi
249     */
250    nir_foreach_use (use, &phi->dest.ssa) {
251       /* We've previously established that all the uses were alu
252        * conversion ops.  Turn them into movs instead.
253        */
254       nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
255       alu->op = nir_op_mov;
256    }
257    nir_ssa_def_rewrite_uses(&phi->dest.ssa, &new_phi->dest.ssa);
258 
259    /* And finally insert the new phi after all sources are in place: */
260    b->cursor = nir_after_instr(&phi->instr);
261    nir_builder_instr_insert(b, &new_phi->instr);
262 
263    return true;
264 }
265 
266 static bool
can_convert_load_const(nir_load_const_instr * lc,nir_op op)267 can_convert_load_const(nir_load_const_instr *lc, nir_op op)
268 {
269    nir_alu_type type = op_to_type(op);
270 
271    /* Note that we only handle phi's with bit_size == 32: */
272    assert(lc->def.bit_size == 32);
273 
274    for (unsigned i = 0; i < lc->def.num_components; i++) {
275       switch (type) {
276       case nir_type_int:
277          if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32)
278             return false;
279          break;
280       case nir_type_uint:
281          if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32)
282             return false;
283          break;
284       case nir_type_float:
285          if (lc->value[i].f32 != _mesa_half_to_float(
286                _mesa_float_to_half(lc->value[i].f32)))
287             return false;
288          break;
289       default:
290          unreachable("bad type");
291          return false;
292       }
293    }
294 
295    return true;
296 }
297 
298 /* Check all the phi sources to see if they are the same widening op, in
299  * which case we can push the widening op to the other side of the phi
300  */
301 static nir_op
find_widening_op(nir_phi_instr * phi,unsigned * bit_size)302 find_widening_op(nir_phi_instr *phi, unsigned *bit_size)
303 {
304    nir_op op = INVALID_OP;
305 
306    bool has_load_const = false;
307    *bit_size = 0;
308 
309    nir_foreach_phi_src (src, phi) {
310       assert(src->src.is_ssa);
311 
312       nir_instr *instr = src->src.ssa->parent_instr;
313       if (instr->type == nir_instr_type_load_const) {
314          has_load_const = true;
315          continue;
316       }
317 
318       unsigned src_bit_size;
319       nir_op src_op = widening_conversion_op(instr, &src_bit_size);
320 
321       /* Not a widening conversion: */
322       if (src_op == INVALID_OP)
323          return INVALID_OP;
324 
325       /* If it is a widening conversion, it needs to be the same op as
326        * other phi sources:
327        */
328       if ((op != INVALID_OP) && (op != src_op))
329          return INVALID_OP;
330 
331       if (*bit_size && (*bit_size != src_bit_size))
332          return INVALID_OP;
333 
334       op = src_op;
335       *bit_size = src_bit_size;
336    }
337 
338    if ((op == INVALID_OP) || !has_load_const)
339       return op;
340 
341    /* If we could otherwise move widening sources, but load_const is
342     * one of the phi sources (and does not have a widening conversion,
343     * but could have a narrowing->widening sequence inserted without
344     * loss of precision), then we could insert a narrowing->widening
345     * sequence to make the rest of the transformation possible:
346     */
347    nir_foreach_phi_src (src, phi) {
348       assert(src->src.is_ssa);
349 
350       nir_instr *instr = src->src.ssa->parent_instr;
351       if (instr->type != nir_instr_type_load_const)
352          continue;
353 
354       if (!can_convert_load_const(nir_instr_as_load_const(instr), op))
355          return INVALID_OP;
356    }
357 
358    return op;
359 }
360 
361 /* Try to move widening conversions into the phi to the phi's output
362  * to reduce the phi's precision:
363  */
364 static bool
try_move_widening_src(nir_builder * b,nir_phi_instr * phi)365 try_move_widening_src(nir_builder *b, nir_phi_instr *phi)
366 {
367    assert(phi->dest.is_ssa);
368 
369    /* If the phi has already been narrowed, nothing more to do: */
370    if (phi->dest.ssa.bit_size != 32)
371       return false;
372 
373    unsigned bit_size;
374    nir_op op = find_widening_op(phi, &bit_size);
375 
376    if (op == INVALID_OP)
377       return false;
378 
379    /* construct replacement phi instruction: */
380    nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
381    nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
382                      phi->dest.ssa.num_components,
383                      bit_size, NULL);
384 
385    /* Remove the widening conversions from the phi sources: */
386    nir_foreach_phi_src (src, phi) {
387       assert(src->src.is_ssa);
388 
389       nir_instr *instr = src->src.ssa->parent_instr;
390       nir_ssa_def *new_src;
391 
392       b->cursor = nir_after_instr(instr);
393 
394       if (instr->type == nir_instr_type_load_const) {
395          /* if the src is a load_const, we've already verified that it
396           * is safe to insert a narrowing conversion to make the rest
397           * of this transformation legal:
398           */
399          nir_load_const_instr *lc = nir_instr_as_load_const(instr);
400 
401          if (op_to_type(op) == nir_type_float) {
402             new_src = nir_f2f16(b, &lc->def);
403          } else {
404             new_src = nir_i2i16(b, &lc->def);
405          }
406       } else {
407          /* at this point we know the sources source is a conversion: */
408          nir_alu_instr *alu = nir_instr_as_alu(instr);
409 
410          /* The conversion we are stripping off could have had a swizzle,
411           * so replace it with a mov if necessary:
412           */
413          unsigned num_comp = nir_dest_num_components(alu->dest.dest);
414          new_src = nir_mov_alu(b, alu->src[0], num_comp);
415       }
416 
417       /* add corresponding phi_src to the new_phi: */
418       nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src));
419    }
420 
421    /* And insert the new phi after all sources are in place: */
422    b->cursor = nir_after_instr(&phi->instr);
423    nir_builder_instr_insert(b, &new_phi->instr);
424 
425    /* And finally add back the widening conversion after the phi,
426     * and re-write the original phi's uses
427     */
428    b->cursor = nir_after_instr_and_phis(&new_phi->instr);
429    nir_ssa_def *def = nir_build_alu(b, op, &new_phi->dest.ssa, NULL, NULL, NULL);
430 
431    nir_ssa_def_rewrite_uses(&phi->dest.ssa, def);
432 
433    return true;
434 }
435 
436 static bool
lower_phi(nir_builder * b,nir_phi_instr * phi)437 lower_phi(nir_builder *b, nir_phi_instr *phi)
438 {
439    bool progress = try_move_narrowing_dst(b, phi);
440    if (!progress)
441       progress = try_move_widening_src(b, phi);
442    return progress;
443 }
444 
445 bool
nir_opt_phi_precision(nir_shader * shader)446 nir_opt_phi_precision(nir_shader *shader)
447 {
448    bool progress = false;
449 
450    /* If 8b or 16b bit_sizes are not used, no point to run this pass: */
451    unsigned bit_sizes_used = shader->info.bit_sizes_float |
452                              shader->info.bit_sizes_int;
453 
454    if (!bit_sizes_used) {
455       nir_shader_gather_info(shader, nir_shader_get_entrypoint(shader));
456       bit_sizes_used = shader->info.bit_sizes_float |
457                        shader->info.bit_sizes_int;
458    }
459 
460    if (!(bit_sizes_used & (8 | 16)))
461       return false;
462 
463    nir_foreach_function(function, shader) {
464       if (!function->impl)
465          continue;
466 
467       nir_builder b;
468       nir_builder_init(&b, function->impl);
469 
470       nir_foreach_block (block, function->impl) {
471          nir_foreach_instr_safe (instr, block) {
472             if (instr->type != nir_instr_type_phi)
473                break;
474 
475             progress |= lower_phi(&b, nir_instr_as_phi(instr));
476          }
477       }
478 
479       if (progress) {
480          nir_metadata_preserve(function->impl,
481                                nir_metadata_block_index |
482                                nir_metadata_dominance);
483       } else {
484          nir_metadata_preserve(function->impl, nir_metadata_all);
485       }
486    }
487 
488    return progress;
489 }
490 
491