• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir/nir_builder.h"
25 #include "nir.h"
26 #include "nir_builder.h"
27 #include "nir_control_flow.h"
28 #include "nir_search_helpers.h"
29 
30 /*
31  * Implements a small peephole optimization that looks for
32  *
33  * if (cond) {
34  *    <then SSA defs>
35  * } else {
36  *    <else SSA defs>
37  * }
38  * phi
39  * ...
40  * phi
41  *
42  * and replaces it with:
43  *
44  * <then SSA defs>
45  * <else SSA defs>
46  * bcsel
47  * ...
48  * bcsel
49  *
50  * where the SSA defs are ALU operations or other cheap instructions (not
51  * texturing, for example).
52  *
53  * If the number of ALU operations in the branches is greater than the limit
54  * parameter, then the optimization is skipped.  In limit=0 mode, the SSA defs
55  * must only be MOVs which we expect to get copy-propagated away once they're
56  * out of the inner blocks.
57  */
58 
59 static bool
block_check_for_allowed_instrs(nir_block * block,unsigned * count,unsigned limit,bool indirect_load_ok,bool expensive_alu_ok)60 block_check_for_allowed_instrs(nir_block *block, unsigned *count,
61                                unsigned limit, bool indirect_load_ok,
62                                bool expensive_alu_ok)
63 {
64    bool alu_ok = limit != 0;
65 
66    /* Used on non-control-flow HW to flatten all IFs. */
67    if (limit == ~0) {
68       nir_foreach_instr(instr, block) {
69          switch (instr->type) {
70          case nir_instr_type_alu:
71          case nir_instr_type_deref:
72          case nir_instr_type_load_const:
73          case nir_instr_type_phi:
74          case nir_instr_type_undef:
75          case nir_instr_type_tex:
76             break;
77 
78          case nir_instr_type_intrinsic: {
79             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
80             switch (intr->intrinsic) {
81             case nir_intrinsic_discard:
82             case nir_intrinsic_discard_if:
83               /* For non-CF hardware, we need to be able to move discards up
84                * and flatten, so let them pass.
85                */
86               continue;
87             default:
88                if (!nir_intrinsic_can_reorder(intr))
89                   return false;
90             }
91             break;
92          }
93 
94          case nir_instr_type_call:
95          case nir_instr_type_jump:
96          case nir_instr_type_parallel_copy:
97             return false;
98          }
99       }
100       return true;
101    }
102 
103    nir_foreach_instr(instr, block) {
104       switch (instr->type) {
105       case nir_instr_type_intrinsic: {
106          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
107 
108          switch (intrin->intrinsic) {
109          case nir_intrinsic_load_deref: {
110             nir_deref_instr *const deref = nir_src_as_deref(intrin->src[0]);
111 
112             switch (deref->modes) {
113             case nir_var_shader_in:
114             case nir_var_uniform:
115             case nir_var_image:
116                /* Don't try to remove flow control around an indirect load
117                 * because that flow control may be trying to avoid invalid
118                 * loads.
119                 */
120                if (!indirect_load_ok && nir_deref_instr_has_indirect(deref))
121                   return false;
122 
123                break;
124 
125             default:
126                return false;
127             }
128             break;
129          }
130 
131          case nir_intrinsic_load_ubo:
132          case nir_intrinsic_load_ubo_vec4:
133             if (!indirect_load_ok && !nir_src_is_const(intrin->src[1]))
134                return false;
135             if (!(nir_intrinsic_access(intrin) & ACCESS_CAN_SPECULATE))
136                return false;
137             break;
138 
139          case nir_intrinsic_load_uniform:
140          case nir_intrinsic_load_preamble:
141          case nir_intrinsic_load_helper_invocation:
142          case nir_intrinsic_is_helper_invocation:
143          case nir_intrinsic_load_front_face:
144          case nir_intrinsic_load_view_index:
145          case nir_intrinsic_load_layer_id:
146          case nir_intrinsic_load_frag_coord:
147          case nir_intrinsic_load_sample_pos:
148          case nir_intrinsic_load_sample_pos_or_center:
149          case nir_intrinsic_load_sample_id:
150          case nir_intrinsic_load_sample_mask_in:
151          case nir_intrinsic_load_vertex_id_zero_base:
152          case nir_intrinsic_load_first_vertex:
153          case nir_intrinsic_load_base_instance:
154          case nir_intrinsic_load_instance_id:
155          case nir_intrinsic_load_draw_id:
156          case nir_intrinsic_load_num_workgroups:
157          case nir_intrinsic_load_workgroup_id:
158          case nir_intrinsic_load_local_invocation_id:
159          case nir_intrinsic_load_local_invocation_index:
160          case nir_intrinsic_load_subgroup_id:
161          case nir_intrinsic_load_subgroup_invocation:
162          case nir_intrinsic_load_num_subgroups:
163          case nir_intrinsic_load_frag_shading_rate:
164          case nir_intrinsic_is_sparse_texels_resident:
165          case nir_intrinsic_sparse_residency_code_and:
166          case nir_intrinsic_read_invocation:
167          case nir_intrinsic_quad_broadcast:
168          case nir_intrinsic_quad_swap_horizontal:
169          case nir_intrinsic_quad_swap_vertical:
170          case nir_intrinsic_quad_swap_diagonal:
171          case nir_intrinsic_quad_swizzle_amd:
172          case nir_intrinsic_masked_swizzle_amd:
173          case nir_intrinsic_lane_permute_16_amd:
174             if (!alu_ok)
175                return false;
176             break;
177 
178          default:
179             return false;
180          }
181 
182          break;
183       }
184 
185       case nir_instr_type_deref:
186       case nir_instr_type_load_const:
187       case nir_instr_type_undef:
188          break;
189 
190       case nir_instr_type_alu: {
191          nir_alu_instr *mov = nir_instr_as_alu(instr);
192          bool movelike = false;
193 
194          switch (mov->op) {
195          case nir_op_mov:
196          case nir_op_fneg:
197          case nir_op_ineg:
198          case nir_op_fabs:
199          case nir_op_iabs:
200          case nir_op_vec2:
201          case nir_op_vec3:
202          case nir_op_vec4:
203          case nir_op_vec5:
204          case nir_op_vec8:
205          case nir_op_vec16:
206             movelike = true;
207             break;
208 
209          case nir_op_fcos:
210          case nir_op_fdiv:
211          case nir_op_fexp2:
212          case nir_op_flog2:
213          case nir_op_fmod:
214          case nir_op_fpow:
215          case nir_op_frcp:
216          case nir_op_frem:
217          case nir_op_frsq:
218          case nir_op_fsin:
219          case nir_op_idiv:
220          case nir_op_irem:
221          case nir_op_udiv:
222             if (!alu_ok || !expensive_alu_ok)
223                return false;
224 
225             break;
226 
227          default:
228             if (!alu_ok) {
229                /* It must be a move-like operation. */
230                return false;
231             }
232             break;
233          }
234 
235          if (alu_ok) {
236             /* If the ALU operation is an fsat or a move-like operation, do
237              * not count it.  The expectation is that it will eventually be
238              * merged as a destination modifier or source modifier on some
239              * other instruction.
240              */
241             if (mov->op != nir_op_fsat && !movelike)
242                (*count)++;
243          } else {
244             /* The only uses of this definition must be phis in the successor */
245             nir_foreach_use_including_if(use, &mov->def) {
246                if (nir_src_is_if(use) ||
247                    nir_src_parent_instr(use)->type != nir_instr_type_phi ||
248                    nir_src_parent_instr(use)->block != block->successors[0])
249                   return false;
250             }
251          }
252          break;
253       }
254 
255       default:
256          return false;
257       }
258    }
259 
260    return true;
261 }
262 
263 /**
264  * Try to collapse nested ifs:
265  * This optimization turns
266  *
267  * if (cond1) {
268  *   <allowed instruction>
269  *   if (cond2) {
270  *     <any code>
271  *   } else {
272  *   }
273  * } else {
274  * }
275  *
276  * into
277  *
278  * <allowed instruction>
279  * if (cond1 && cond2) {
280  *   <any code>
281  * } else {
282  * }
283  *
284  */
285 static bool
nir_opt_collapse_if(nir_if * if_stmt,nir_shader * shader,unsigned limit,bool indirect_load_ok,bool expensive_alu_ok)286 nir_opt_collapse_if(nir_if *if_stmt, nir_shader *shader, unsigned limit,
287                     bool indirect_load_ok, bool expensive_alu_ok)
288 {
289    /* the if has to be nested */
290    if (if_stmt->cf_node.parent->type != nir_cf_node_if)
291       return false;
292 
293    nir_if *parent_if = nir_cf_node_as_if(if_stmt->cf_node.parent);
294    if (parent_if->control == nir_selection_control_dont_flatten)
295       return false;
296 
297    /* check if the else block is empty */
298    if (!nir_cf_list_is_empty_block(&if_stmt->else_list))
299       return false;
300 
301    /* this opt doesn't make much sense if the branch is empty */
302    if (nir_cf_list_is_empty_block(&if_stmt->then_list))
303       return false;
304 
305    /* the nested if has to be the only cf_node:
306     * i.e. <block> <if_stmt> <block> */
307    if (exec_list_length(&parent_if->then_list) != 3)
308       return false;
309 
310    /* check if the else block of the parent if is empty */
311    if (!nir_cf_list_is_empty_block(&parent_if->else_list))
312       return false;
313 
314    /* check if the block after the nested if is empty except for phis */
315    nir_block *last = nir_if_last_then_block(parent_if);
316    nir_instr *last_instr = nir_block_last_instr(last);
317    if (last_instr && last_instr->type != nir_instr_type_phi)
318       return false;
319 
320    /* check if all outer phis become trivial after merging the ifs */
321    nir_foreach_instr(instr, last) {
322       if (parent_if->control == nir_selection_control_flatten)
323          break;
324 
325       nir_phi_instr *phi = nir_instr_as_phi(instr);
326       nir_phi_src *else_src =
327          nir_phi_get_src_from_block(phi, nir_if_first_else_block(if_stmt));
328 
329       nir_foreach_use(src, &phi->def) {
330          assert(nir_src_parent_instr(src)->type == nir_instr_type_phi);
331          nir_phi_src *phi_src =
332             nir_phi_get_src_from_block(nir_instr_as_phi(nir_src_parent_instr(src)),
333                                        nir_if_first_else_block(parent_if));
334          if (phi_src->src.ssa != else_src->src.ssa)
335             return false;
336       }
337    }
338 
339    if (parent_if->control == nir_selection_control_flatten) {
340       /* Override driver defaults */
341       indirect_load_ok = true;
342       expensive_alu_ok = true;
343    }
344 
345    /* check if the block before the nested if matches the requirements */
346    nir_block *first = nir_if_first_then_block(parent_if);
347    unsigned count = 0;
348    if (!block_check_for_allowed_instrs(first, &count, limit != 0,
349                                        indirect_load_ok, expensive_alu_ok))
350       return false;
351 
352    if (count > limit && parent_if->control != nir_selection_control_flatten)
353       return false;
354 
355    /* trivialize succeeding phis */
356    nir_foreach_instr(instr, last) {
357       nir_phi_instr *phi = nir_instr_as_phi(instr);
358       nir_phi_src *else_src =
359          nir_phi_get_src_from_block(phi, nir_if_first_else_block(if_stmt));
360       nir_foreach_use_safe(src, &phi->def) {
361          nir_phi_src *phi_src =
362             nir_phi_get_src_from_block(nir_instr_as_phi(nir_src_parent_instr(src)),
363                                        nir_if_first_else_block(parent_if));
364          if (phi_src->src.ssa == else_src->src.ssa)
365             nir_src_rewrite(&phi_src->src, &phi->def);
366       }
367    }
368 
369    /* combine the conditions */
370    struct nir_builder b = nir_builder_at(nir_before_cf_node(&if_stmt->cf_node));
371    nir_def *cond = nir_iand(&b, if_stmt->condition.ssa,
372                             parent_if->condition.ssa);
373    nir_src_rewrite(&if_stmt->condition, cond);
374 
375    /* move the whole inner if before the parent if */
376    nir_cf_list tmp;
377    nir_cf_extract(&tmp, nir_before_block(first),
378                   nir_after_block(last));
379    nir_cf_reinsert(&tmp, nir_before_cf_node(&parent_if->cf_node));
380 
381    /* The now empty parent if will be cleaned up by other passes */
382    return true;
383 }
384 
385 /* If we're moving discards out of the if for non-CF hardware, we need to add
386  * the if's condition to it
387  */
388 static void
rewrite_discard_conds(nir_instr * instr,nir_def * if_cond,bool is_else)389 rewrite_discard_conds(nir_instr *instr, nir_def *if_cond, bool is_else)
390 {
391    if (instr->type != nir_instr_type_intrinsic)
392       return;
393    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
394 
395    if (intr->intrinsic != nir_intrinsic_discard_if && intr->intrinsic != nir_intrinsic_discard)
396       return;
397 
398    nir_builder b = nir_builder_at(nir_before_instr(instr));
399 
400    if (is_else)
401       if_cond = nir_inot(&b, if_cond);
402 
403    if (intr->intrinsic == nir_intrinsic_discard_if) {
404       nir_src_rewrite(&intr->src[0], nir_iand(&b, intr->src[0].ssa, if_cond));
405    } else {
406       nir_discard_if(&b, if_cond);
407       nir_instr_remove(instr);
408    }
409 }
410 
411 static bool
nir_opt_peephole_select_block(nir_block * block,nir_shader * shader,unsigned limit,bool indirect_load_ok,bool expensive_alu_ok)412 nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
413                               unsigned limit, bool indirect_load_ok,
414                               bool expensive_alu_ok)
415 {
416    if (nir_cf_node_is_first(&block->cf_node))
417       return false;
418 
419    nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
420    if (prev_node->type != nir_cf_node_if)
421       return false;
422 
423    nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node));
424 
425    /* If the last instruction before this if/else block is a jump, we can't
426     * append stuff after it because it would break a bunch of assumption about
427     * control flow (nir_validate expects the successor of a return/halt jump
428     * to be the end of the function, which might not match the successor of
429     * the if/else blocks).
430     */
431    if (nir_block_ends_in_return_or_halt(prev_block))
432       return false;
433 
434    nir_if *if_stmt = nir_cf_node_as_if(prev_node);
435 
436    /* first, try to collapse the if */
437    if (nir_opt_collapse_if(if_stmt, shader, limit,
438                            indirect_load_ok, expensive_alu_ok))
439       return true;
440 
441    if (if_stmt->control == nir_selection_control_dont_flatten)
442       return false;
443 
444    nir_block *then_block = nir_if_first_then_block(if_stmt);
445    nir_block *else_block = nir_if_first_else_block(if_stmt);
446 
447    /* We can only have one block in each side ... */
448    if (nir_if_last_then_block(if_stmt) != then_block ||
449        nir_if_last_else_block(if_stmt) != else_block)
450       return false;
451 
452    if (if_stmt->control == nir_selection_control_flatten) {
453       /* Override driver defaults */
454       indirect_load_ok = true;
455       expensive_alu_ok = true;
456    }
457 
458    /* ... and those blocks must only contain "allowed" instructions. */
459    unsigned count = 0;
460    if (!block_check_for_allowed_instrs(then_block, &count, limit,
461                                        indirect_load_ok, expensive_alu_ok) ||
462        !block_check_for_allowed_instrs(else_block, &count, limit,
463                                        indirect_load_ok, expensive_alu_ok))
464       return false;
465 
466    if (count > limit && if_stmt->control != nir_selection_control_flatten)
467       return false;
468 
469    /* At this point, we know that the previous CFG node is an if-then
470     * statement containing only moves to phi nodes in this block.  We can
471     * just remove that entire CF node and replace all of the phi nodes with
472     * selects.
473     */
474 
475    /* First, we move the remaining instructions from the blocks to the
476     * block before.  We have already guaranteed that this is safe by
477     * calling block_check_for_allowed_instrs()
478     */
479    nir_foreach_instr_safe(instr, then_block) {
480       exec_node_remove(&instr->node);
481       instr->block = prev_block;
482       exec_list_push_tail(&prev_block->instr_list, &instr->node);
483       rewrite_discard_conds(instr, if_stmt->condition.ssa, false);
484    }
485 
486    nir_foreach_instr_safe(instr, else_block) {
487       exec_node_remove(&instr->node);
488       instr->block = prev_block;
489       exec_list_push_tail(&prev_block->instr_list, &instr->node);
490       rewrite_discard_conds(instr, if_stmt->condition.ssa, true);
491    }
492 
493    nir_foreach_phi_safe(phi, block) {
494       nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_bcsel);
495       sel->src[0].src = nir_src_for_ssa(if_stmt->condition.ssa);
496       /* Splat the condition to all channels */
497       memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
498 
499       assert(exec_list_length(&phi->srcs) == 2);
500       nir_foreach_phi_src(src, phi) {
501          assert(src->pred == then_block || src->pred == else_block);
502 
503          unsigned idx = src->pred == then_block ? 1 : 2;
504          sel->src[idx].src = nir_src_for_ssa(src->src.ssa);
505       }
506 
507       nir_def_init(&sel->instr, &sel->def,
508                    phi->def.num_components, phi->def.bit_size);
509 
510       nir_def_rewrite_uses(&phi->def,
511                            &sel->def);
512 
513       nir_instr_insert_before(&phi->instr, &sel->instr);
514       nir_instr_remove(&phi->instr);
515    }
516 
517    nir_cf_node_remove(&if_stmt->cf_node);
518    return true;
519 }
520 
521 static bool
nir_opt_peephole_select_impl(nir_function_impl * impl,unsigned limit,bool indirect_load_ok,bool expensive_alu_ok)522 nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
523                              bool indirect_load_ok, bool expensive_alu_ok)
524 {
525    nir_shader *shader = impl->function->shader;
526    bool progress = false;
527 
528    nir_foreach_block_safe(block, impl) {
529       progress |= nir_opt_peephole_select_block(block, shader, limit,
530                                                 indirect_load_ok,
531                                                 expensive_alu_ok);
532    }
533 
534    if (progress) {
535       nir_metadata_preserve(impl, nir_metadata_none);
536    } else {
537       nir_metadata_preserve(impl, nir_metadata_all);
538    }
539 
540    return progress;
541 }
542 
543 bool
nir_opt_peephole_select(nir_shader * shader,unsigned limit,bool indirect_load_ok,bool expensive_alu_ok)544 nir_opt_peephole_select(nir_shader *shader, unsigned limit,
545                         bool indirect_load_ok, bool expensive_alu_ok)
546 {
547    bool progress = false;
548 
549    nir_foreach_function_impl(impl, shader) {
550       progress |= nir_opt_peephole_select_impl(impl, limit,
551                                                indirect_load_ok,
552                                                expensive_alu_ok);
553    }
554 
555    return progress;
556 }
557