• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_control_flow.h"
27 #include "nir_loop_analyze.h"
28 
29 /* Prepare this loop for unrolling by first converting to lcssa and then
30  * converting the phis from the loops first block and the block that follows
31  * the loop into regs.  Partially converting out of SSA allows us to unroll
32  * the loop without having to keep track of and update phis along the way
33  * which gets tricky and doesn't add much value over conveting to regs.
34  *
35  * The loop may have a continue instruction at the end of the loop which does
36  * nothing.  Once we're out of SSA, we can safely delete it so we don't have
37  * to deal with it later.
38  */
39 static void
loop_prepare_for_unroll(nir_loop * loop)40 loop_prepare_for_unroll(nir_loop *loop)
41 {
42    nir_convert_loop_to_lcssa(loop);
43 
44    nir_lower_phis_to_regs_block(nir_loop_first_block(loop));
45 
46    nir_block *block_after_loop =
47       nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
48 
49    nir_lower_phis_to_regs_block(block_after_loop);
50 
51    nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
52    if (last_instr && last_instr->type == nir_instr_type_jump) {
53       assert(nir_instr_as_jump(last_instr)->type == nir_jump_continue);
54       nir_instr_remove(last_instr);
55    }
56 }
57 
58 static void
get_first_blocks_in_terminator(nir_loop_terminator * term,nir_block ** first_break_block,nir_block ** first_continue_block)59 get_first_blocks_in_terminator(nir_loop_terminator *term,
60                                nir_block **first_break_block,
61                                nir_block **first_continue_block)
62 {
63    if (term->continue_from_then) {
64       *first_continue_block = nir_if_first_then_block(term->nif);
65       *first_break_block = nir_if_first_else_block(term->nif);
66    } else {
67       *first_continue_block = nir_if_first_else_block(term->nif);
68       *first_break_block = nir_if_first_then_block(term->nif);
69    }
70 }
71 
72 /**
73  * Unroll a loop where we know exactly how many iterations there are and there
74  * is only a single exit point.  Note here we can unroll loops with multiple
75  * theoretical exits that only have a single terminating exit that we always
76  * know is the "real" exit.
77  *
78  *     loop {
79  *         ...instrs...
80  *     }
81  *
82  * And the iteration count is 3, the output will be:
83  *
84  *     ...instrs... ...instrs... ...instrs...
85  */
86 static void
simple_unroll(nir_loop * loop)87 simple_unroll(nir_loop *loop)
88 {
89    nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
90    assert(nir_is_trivial_loop_if(limiting_term->nif,
91                                  limiting_term->break_block));
92 
93    loop_prepare_for_unroll(loop);
94 
95    /* Skip over loop terminator and get the loop body. */
96    list_for_each_entry(nir_loop_terminator, terminator,
97                        &loop->info->loop_terminator_list,
98                        loop_terminator_link) {
99 
100       /* Remove all but the limiting terminator as we know the other exit
101        * conditions can never be met. Note we need to extract any instructions
102        * in the continue from branch and insert then into the loop body before
103        * removing it.
104        */
105       if (terminator->nif != limiting_term->nif) {
106          nir_block *first_break_block;
107          nir_block *first_continue_block;
108          get_first_blocks_in_terminator(terminator, &first_break_block,
109                                         &first_continue_block);
110 
111          assert(nir_is_trivial_loop_if(terminator->nif,
112                                        terminator->break_block));
113 
114          nir_cf_list continue_from_lst;
115          nir_cf_extract(&continue_from_lst,
116                         nir_before_block(first_continue_block),
117                         nir_after_block(terminator->continue_from_block));
118          nir_cf_reinsert(&continue_from_lst,
119                          nir_after_cf_node(&terminator->nif->cf_node));
120 
121          nir_cf_node_remove(&terminator->nif->cf_node);
122       }
123    }
124 
125    nir_block *first_break_block;
126    nir_block *first_continue_block;
127    get_first_blocks_in_terminator(limiting_term, &first_break_block,
128                                   &first_continue_block);
129 
130    /* Pluck out the loop header */
131    nir_block *header_blk = nir_loop_first_block(loop);
132    nir_cf_list lp_header;
133    nir_cf_extract(&lp_header, nir_before_block(header_blk),
134                   nir_before_cf_node(&limiting_term->nif->cf_node));
135 
136    /* Add the continue from block of the limiting terminator to the loop body
137     */
138    nir_cf_list continue_from_lst;
139    nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block),
140                   nir_after_block(limiting_term->continue_from_block));
141    nir_cf_reinsert(&continue_from_lst,
142                    nir_after_cf_node(&limiting_term->nif->cf_node));
143 
144    /* Pluck out the loop body */
145    nir_cf_list loop_body;
146    nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
147                   nir_after_block(nir_loop_last_block(loop)));
148 
149    struct hash_table *remap_table =
150       _mesa_hash_table_create(NULL, _mesa_hash_pointer,
151                               _mesa_key_pointer_equal);
152 
153    /* Clone the loop header */
154    nir_cf_list cloned_header;
155    nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
156                      remap_table);
157 
158    /* Insert cloned loop header before the loop */
159    nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
160 
161    /* Temp list to store the cloned loop body as we unroll */
162    nir_cf_list unrolled_lp_body;
163 
164    /* Clone loop header and append to the loop body */
165    for (unsigned i = 0; i < loop->info->trip_count; i++) {
166       /* Clone loop body */
167       nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
168                         remap_table);
169 
170       /* Insert unrolled loop body before the loop */
171       nir_cf_reinsert(&unrolled_lp_body, nir_before_cf_node(&loop->cf_node));
172 
173       /* Clone loop header */
174       nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
175                         remap_table);
176 
177       /* Insert loop header after loop body */
178       nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
179    }
180 
181    /* Remove the break from the loop terminator and add instructions from
182     * the break block after the unrolled loop.
183     */
184    nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
185    nir_instr_remove(break_instr);
186    nir_cf_list break_list;
187    nir_cf_extract(&break_list, nir_before_block(first_break_block),
188                   nir_after_block(limiting_term->break_block));
189 
190    /* Clone so things get properly remapped */
191    nir_cf_list cloned_break_list;
192    nir_cf_list_clone(&cloned_break_list, &break_list, loop->cf_node.parent,
193                      remap_table);
194 
195    nir_cf_reinsert(&cloned_break_list, nir_before_cf_node(&loop->cf_node));
196 
197    /* Remove the loop */
198    nir_cf_node_remove(&loop->cf_node);
199 
200    /* Delete the original loop body, break block & header */
201    nir_cf_delete(&lp_header);
202    nir_cf_delete(&loop_body);
203    nir_cf_delete(&break_list);
204 
205    _mesa_hash_table_destroy(remap_table, NULL);
206 }
207 
208 static void
move_cf_list_into_loop_term(nir_cf_list * lst,nir_loop_terminator * term)209 move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)
210 {
211    /* Move the rest of the loop inside the continue-from-block */
212    nir_cf_reinsert(lst, nir_after_block(term->continue_from_block));
213 
214    /* Remove the break */
215    nir_instr_remove(nir_block_last_instr(term->break_block));
216 }
217 
218 static nir_cursor
get_complex_unroll_insert_location(nir_cf_node * node,bool continue_from_then)219 get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
220 {
221    if (node->type == nir_cf_node_loop) {
222       return nir_before_cf_node(node);
223    } else {
224       nir_if *if_stmt = nir_cf_node_as_if(node);
225       if (continue_from_then) {
226          return nir_after_block(nir_if_last_then_block(if_stmt));
227       } else {
228          return nir_after_block(nir_if_last_else_block(if_stmt));
229       }
230    }
231 }
232 
233 /**
234  * Unroll a loop with two exists when the trip count of one of the exits is
235  * unknown.  If continue_from_then is true, the loop is repeated only when the
236  * "then" branch of the if is taken; otherwise it is repeated only
237  * when the "else" branch of the if is taken.
238  *
239  * For example, if the input is:
240  *
241  *      loop {
242  *         ...phis/condition...
243  *         if condition {
244  *            ...then instructions...
245  *         } else {
246  *            ...continue instructions...
247  *            break
248  *         }
249  *         ...body...
250  *      }
251  *
252  * And the iteration count is 3, and unlimit_term->continue_from_then is true,
253  * then the output will be:
254  *
255  *      ...condition...
256  *      if condition {
257  *         ...then instructions...
258  *         ...body...
259  *         if condition {
260  *            ...then instructions...
261  *            ...body...
262  *            if condition {
263  *               ...then instructions...
264  *               ...body...
265  *            } else {
266  *               ...continue instructions...
267  *            }
268  *         } else {
269  *            ...continue instructions...
270  *         }
271  *      } else {
272  *         ...continue instructions...
273  *      }
274  */
275 static void
complex_unroll(nir_loop * loop,nir_loop_terminator * unlimit_term,bool limiting_term_second)276 complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
277                bool limiting_term_second)
278 {
279    assert(nir_is_trivial_loop_if(unlimit_term->nif,
280                                  unlimit_term->break_block));
281 
282    nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
283    assert(nir_is_trivial_loop_if(limiting_term->nif,
284                                  limiting_term->break_block));
285 
286    loop_prepare_for_unroll(loop);
287 
288    nir_block *header_blk = nir_loop_first_block(loop);
289 
290    nir_cf_list lp_header;
291    nir_cf_list limit_break_list;
292    unsigned num_times_to_clone;
293    if (limiting_term_second) {
294       /* Pluck out the loop header */
295       nir_cf_extract(&lp_header, nir_before_block(header_blk),
296                      nir_before_cf_node(&unlimit_term->nif->cf_node));
297 
298       /* We need some special handling when its the second terminator causing
299        * us to exit the loop for example:
300        *
301        *   for (int i = 0; i < uniform_lp_count; i++) {
302        *      colour = vec4(0.0, 1.0, 0.0, 1.0);
303        *
304        *      if (i == 1) {
305        *         break;
306        *      }
307        *      ... any further code is unreachable after i == 1 ...
308        *   }
309        */
310       nir_cf_list after_lt;
311       nir_if *limit_if = limiting_term->nif;
312       nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node),
313                      nir_after_block(nir_loop_last_block(loop)));
314       move_cf_list_into_loop_term(&after_lt, limiting_term);
315 
316       /* Because the trip count is the number of times we pass over the entire
317        * loop before hitting a break when the second terminator is the
318        * limiting terminator we can actually execute code inside the loop when
319        * trip count == 0 e.g. the code above the break.  So we need to bump
320        * the trip_count in order for the code below to clone anything.  When
321        * trip count == 1 we execute the code above the break twice and the
322        * code below it once so we need clone things twice and so on.
323        */
324       num_times_to_clone = loop->info->trip_count + 1;
325    } else {
326       /* Pluck out the loop header */
327       nir_cf_extract(&lp_header, nir_before_block(header_blk),
328                      nir_before_cf_node(&limiting_term->nif->cf_node));
329 
330       nir_block *first_break_block;
331       nir_block *first_continue_block;
332       get_first_blocks_in_terminator(limiting_term, &first_break_block,
333                                      &first_continue_block);
334 
335       /* Remove the break then extract instructions from the break block so we
336        * can insert them in the innermost else of the unrolled loop.
337        */
338       nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
339       nir_instr_remove(break_instr);
340       nir_cf_extract(&limit_break_list, nir_before_block(first_break_block),
341                      nir_after_block(limiting_term->break_block));
342 
343       nir_cf_list continue_list;
344       nir_cf_extract(&continue_list, nir_before_block(first_continue_block),
345                      nir_after_block(limiting_term->continue_from_block));
346 
347       nir_cf_reinsert(&continue_list,
348                       nir_after_cf_node(&limiting_term->nif->cf_node));
349 
350       nir_cf_node_remove(&limiting_term->nif->cf_node);
351 
352       num_times_to_clone = loop->info->trip_count;
353    }
354 
355    /* In the terminator that we have no trip count for move everything after
356     * the terminator into the continue from branch.
357     */
358    nir_cf_list loop_end;
359    nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
360                   nir_after_block(nir_loop_last_block(loop)));
361    move_cf_list_into_loop_term(&loop_end, unlimit_term);
362 
363    /* Pluck out the loop body. */
364    nir_cf_list loop_body;
365    nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
366                   nir_after_block(nir_loop_last_block(loop)));
367 
368    struct hash_table *remap_table =
369       _mesa_hash_table_create(NULL, _mesa_hash_pointer,
370                               _mesa_key_pointer_equal);
371 
372    /* Set unroll_loc to the loop as we will insert the unrolled loop before it
373     */
374    nir_cf_node *unroll_loc = &loop->cf_node;
375 
376    /* Temp lists to store the cloned loop as we unroll */
377    nir_cf_list unrolled_lp_body;
378    nir_cf_list cloned_header;
379 
380    for (unsigned i = 0; i < num_times_to_clone; i++) {
381       /* Clone loop header */
382       nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
383                         remap_table);
384 
385       nir_cursor cursor =
386          get_complex_unroll_insert_location(unroll_loc,
387                                             unlimit_term->continue_from_then);
388 
389       /* Insert cloned loop header */
390       nir_cf_reinsert(&cloned_header, cursor);
391 
392       cursor =
393          get_complex_unroll_insert_location(unroll_loc,
394                                             unlimit_term->continue_from_then);
395 
396       /* Clone loop body */
397       nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
398                         remap_table);
399 
400       unroll_loc = exec_node_data(nir_cf_node,
401                                   exec_list_get_tail(&unrolled_lp_body.list),
402                                   node);
403       assert(unroll_loc->type == nir_cf_node_block &&
404              exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
405 
406       /* Get the unrolled if node */
407       unroll_loc = nir_cf_node_prev(unroll_loc);
408 
409       /* Insert unrolled loop body */
410       nir_cf_reinsert(&unrolled_lp_body, cursor);
411    }
412 
413    if (!limiting_term_second) {
414       assert(unroll_loc->type == nir_cf_node_if);
415 
416       nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
417                         remap_table);
418 
419       nir_cursor cursor =
420          get_complex_unroll_insert_location(unroll_loc,
421                                             unlimit_term->continue_from_then);
422 
423       /* Insert cloned loop header */
424       nir_cf_reinsert(&cloned_header, cursor);
425 
426       /* Clone so things get properly remapped, and insert break block from
427        * the limiting terminator.
428        */
429       nir_cf_list cloned_break_blk;
430       nir_cf_list_clone(&cloned_break_blk, &limit_break_list,
431                         loop->cf_node.parent, remap_table);
432 
433       cursor =
434          get_complex_unroll_insert_location(unroll_loc,
435                                             unlimit_term->continue_from_then);
436 
437       nir_cf_reinsert(&cloned_break_blk, cursor);
438       nir_cf_delete(&limit_break_list);
439    }
440 
441    /* The loop has been unrolled so remove it. */
442    nir_cf_node_remove(&loop->cf_node);
443 
444    /* Delete the original loop header and body */
445    nir_cf_delete(&lp_header);
446    nir_cf_delete(&loop_body);
447 
448    _mesa_hash_table_destroy(remap_table, NULL);
449 }
450 
451 static bool
is_loop_small_enough_to_unroll(nir_shader * shader,nir_loop_info * li)452 is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
453 {
454    unsigned max_iter = shader->options->max_unroll_iterations;
455 
456    if (li->trip_count > max_iter)
457       return false;
458 
459    if (li->force_unroll)
460       return true;
461 
462    bool loop_not_too_large =
463       li->num_instructions * li->trip_count <= max_iter * 25;
464 
465    return loop_not_too_large;
466 }
467 
468 static bool
process_loops(nir_shader * sh,nir_cf_node * cf_node,bool * innermost_loop)469 process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop)
470 {
471    bool progress = false;
472    nir_loop *loop;
473 
474    switch (cf_node->type) {
475    case nir_cf_node_block:
476       return progress;
477    case nir_cf_node_if: {
478       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
479       foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list)
480          progress |= process_loops(sh, nested_node, innermost_loop);
481       foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list)
482          progress |= process_loops(sh, nested_node, innermost_loop);
483       return progress;
484    }
485    case nir_cf_node_loop: {
486       loop = nir_cf_node_as_loop(cf_node);
487       foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body)
488          progress |= process_loops(sh, nested_node, innermost_loop);
489       break;
490    }
491    default:
492       unreachable("unknown cf node type");
493    }
494 
495    if (*innermost_loop) {
496       /* Don't attempt to unroll outer loops or a second inner loop in
497        * this pass wait until the next pass as we have altered the cf.
498        */
499       *innermost_loop = false;
500 
501       if (loop->info->limiting_terminator == NULL)
502          return progress;
503 
504       if (!is_loop_small_enough_to_unroll(sh, loop->info))
505          return progress;
506 
507       if (loop->info->is_trip_count_known) {
508          simple_unroll(loop);
509          progress = true;
510       } else {
511          /* Attempt to unroll loops with two terminators. */
512          unsigned num_lt = list_length(&loop->info->loop_terminator_list);
513          if (num_lt == 2) {
514             bool limiting_term_second = true;
515             nir_loop_terminator *terminator =
516                list_last_entry(&loop->info->loop_terminator_list,
517                                 nir_loop_terminator, loop_terminator_link);
518 
519 
520             if (terminator->nif == loop->info->limiting_terminator->nif) {
521                limiting_term_second = false;
522                terminator =
523                   list_first_entry(&loop->info->loop_terminator_list,
524                                   nir_loop_terminator, loop_terminator_link);
525             }
526 
527             /* If the first terminator has a trip count of zero and is the
528              * limiting terminator just do a simple unroll as the second
529              * terminator can never be reached.
530              */
531             if (loop->info->trip_count == 0 && !limiting_term_second) {
532                simple_unroll(loop);
533             } else {
534                complex_unroll(loop, terminator, limiting_term_second);
535             }
536             progress = true;
537          }
538       }
539    }
540 
541    return progress;
542 }
543 
544 static bool
nir_opt_loop_unroll_impl(nir_function_impl * impl,nir_variable_mode indirect_mask)545 nir_opt_loop_unroll_impl(nir_function_impl *impl,
546                          nir_variable_mode indirect_mask)
547 {
548    bool progress = false;
549    nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask);
550    nir_metadata_require(impl, nir_metadata_block_index);
551 
552    foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
553       bool innermost_loop = true;
554       progress |= process_loops(impl->function->shader, node,
555                                 &innermost_loop);
556    }
557 
558    if (progress)
559       nir_lower_regs_to_ssa_impl(impl);
560 
561    return progress;
562 }
563 
564 bool
nir_opt_loop_unroll(nir_shader * shader,nir_variable_mode indirect_mask)565 nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
566 {
567    bool progress = false;
568 
569    nir_foreach_function(function, shader) {
570       if (function->impl) {
571          progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask);
572       }
573    }
574    return progress;
575 }
576