1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_control_flow.h"
27 #include "nir_loop_analyze.h"
28
29
30 /* This limit is chosen fairly arbitrarily. GLSL IR max iteration is 32
31 * instructions. (Multiply counting nodes and magic number 5.) But there is
32 * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed
33 * to give about the same results. Around 5 instructions per node. But some
34 * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
35 * we set it to 26.
36 * This was bumped to 96 because it unrolled more loops with a positive
37 * effect (vulkan ssao demo).
38 */
39 #define LOOP_UNROLL_LIMIT 96
40
41 /* Prepare this loop for unrolling by first converting to lcssa and then
42 * converting the phis from the top level of the loop body to regs.
43 * Partially converting out of SSA allows us to unroll the loop without having
44 * to keep track of and update phis along the way which gets tricky and
45 * doesn't add much value over converting to regs.
46 *
47 * The loop may have a continue instruction at the end of the loop which does
48 * nothing. Once we're out of SSA, we can safely delete it so we don't have
49 * to deal with it later.
50 */
51 static void
loop_prepare_for_unroll(nir_loop * loop)52 loop_prepare_for_unroll(nir_loop *loop)
53 {
54 nir_convert_loop_to_lcssa(loop);
55
56 /* Lower phis at the top level of the loop body */
57 foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
58 if (nir_cf_node_block == node->type) {
59 nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
60 }
61 }
62
63 /* Lower phis after the loop */
64 nir_block *block_after_loop =
65 nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
66
67 nir_lower_phis_to_regs_block(block_after_loop);
68
69 /* Remove continue if its the last instruction in the loop */
70 nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
71 if (last_instr && last_instr->type == nir_instr_type_jump) {
72 assert(nir_instr_as_jump(last_instr)->type == nir_jump_continue);
73 nir_instr_remove(last_instr);
74 }
75 }
76
77 static void
get_first_blocks_in_terminator(nir_loop_terminator * term,nir_block ** first_break_block,nir_block ** first_continue_block)78 get_first_blocks_in_terminator(nir_loop_terminator *term,
79 nir_block **first_break_block,
80 nir_block **first_continue_block)
81 {
82 if (term->continue_from_then) {
83 *first_continue_block = nir_if_first_then_block(term->nif);
84 *first_break_block = nir_if_first_else_block(term->nif);
85 } else {
86 *first_continue_block = nir_if_first_else_block(term->nif);
87 *first_break_block = nir_if_first_then_block(term->nif);
88 }
89 }
90
91 /**
92 * Unroll a loop where we know exactly how many iterations there are and there
93 * is only a single exit point. Note here we can unroll loops with multiple
94 * theoretical exits that only have a single terminating exit that we always
95 * know is the "real" exit.
96 *
97 * loop {
98 * ...instrs...
99 * }
100 *
101 * And the iteration count is 3, the output will be:
102 *
103 * ...instrs... ...instrs... ...instrs...
104 */
105 static void
simple_unroll(nir_loop * loop)106 simple_unroll(nir_loop *loop)
107 {
108 nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
109 assert(nir_is_trivial_loop_if(limiting_term->nif,
110 limiting_term->break_block));
111
112 loop_prepare_for_unroll(loop);
113
114 /* Skip over loop terminator and get the loop body. */
115 list_for_each_entry(nir_loop_terminator, terminator,
116 &loop->info->loop_terminator_list,
117 loop_terminator_link) {
118
119 /* Remove all but the limiting terminator as we know the other exit
120 * conditions can never be met. Note we need to extract any instructions
121 * in the continue from branch and insert then into the loop body before
122 * removing it.
123 */
124 if (terminator->nif != limiting_term->nif) {
125 nir_block *first_break_block;
126 nir_block *first_continue_block;
127 get_first_blocks_in_terminator(terminator, &first_break_block,
128 &first_continue_block);
129
130 assert(nir_is_trivial_loop_if(terminator->nif,
131 terminator->break_block));
132
133 nir_cf_list continue_from_lst;
134 nir_cf_extract(&continue_from_lst,
135 nir_before_block(first_continue_block),
136 nir_after_block(terminator->continue_from_block));
137 nir_cf_reinsert(&continue_from_lst,
138 nir_after_cf_node(&terminator->nif->cf_node));
139
140 nir_cf_node_remove(&terminator->nif->cf_node);
141 }
142 }
143
144 nir_block *first_break_block;
145 nir_block *first_continue_block;
146 get_first_blocks_in_terminator(limiting_term, &first_break_block,
147 &first_continue_block);
148
149 /* Pluck out the loop header */
150 nir_block *header_blk = nir_loop_first_block(loop);
151 nir_cf_list lp_header;
152 nir_cf_extract(&lp_header, nir_before_block(header_blk),
153 nir_before_cf_node(&limiting_term->nif->cf_node));
154
155 /* Add the continue from block of the limiting terminator to the loop body
156 */
157 nir_cf_list continue_from_lst;
158 nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block),
159 nir_after_block(limiting_term->continue_from_block));
160 nir_cf_reinsert(&continue_from_lst,
161 nir_after_cf_node(&limiting_term->nif->cf_node));
162
163 /* Pluck out the loop body */
164 nir_cf_list loop_body;
165 nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
166 nir_after_block(nir_loop_last_block(loop)));
167
168 struct hash_table *remap_table =
169 _mesa_hash_table_create(NULL, _mesa_hash_pointer,
170 _mesa_key_pointer_equal);
171
172 /* Clone the loop header */
173 nir_cf_list cloned_header;
174 nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
175 remap_table);
176
177 /* Insert cloned loop header before the loop */
178 nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
179
180 /* Temp list to store the cloned loop body as we unroll */
181 nir_cf_list unrolled_lp_body;
182
183 /* Clone loop header and append to the loop body */
184 for (unsigned i = 0; i < loop->info->trip_count; i++) {
185 /* Clone loop body */
186 nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
187 remap_table);
188
189 /* Insert unrolled loop body before the loop */
190 nir_cf_reinsert(&unrolled_lp_body, nir_before_cf_node(&loop->cf_node));
191
192 /* Clone loop header */
193 nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
194 remap_table);
195
196 /* Insert loop header after loop body */
197 nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
198 }
199
200 /* Remove the break from the loop terminator and add instructions from
201 * the break block after the unrolled loop.
202 */
203 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
204 nir_instr_remove(break_instr);
205 nir_cf_list break_list;
206 nir_cf_extract(&break_list, nir_before_block(first_break_block),
207 nir_after_block(limiting_term->break_block));
208
209 /* Clone so things get properly remapped */
210 nir_cf_list cloned_break_list;
211 nir_cf_list_clone(&cloned_break_list, &break_list, loop->cf_node.parent,
212 remap_table);
213
214 nir_cf_reinsert(&cloned_break_list, nir_before_cf_node(&loop->cf_node));
215
216 /* Remove the loop */
217 nir_cf_node_remove(&loop->cf_node);
218
219 /* Delete the original loop body, break block & header */
220 nir_cf_delete(&lp_header);
221 nir_cf_delete(&loop_body);
222 nir_cf_delete(&break_list);
223
224 _mesa_hash_table_destroy(remap_table, NULL);
225 }
226
227 static void
move_cf_list_into_loop_term(nir_cf_list * lst,nir_loop_terminator * term)228 move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)
229 {
230 /* Move the rest of the loop inside the continue-from-block */
231 nir_cf_reinsert(lst, nir_after_block(term->continue_from_block));
232
233 /* Remove the break */
234 nir_instr_remove(nir_block_last_instr(term->break_block));
235 }
236
237 static nir_cursor
get_complex_unroll_insert_location(nir_cf_node * node,bool continue_from_then)238 get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
239 {
240 if (node->type == nir_cf_node_loop) {
241 return nir_before_cf_node(node);
242 } else {
243 nir_if *if_stmt = nir_cf_node_as_if(node);
244 if (continue_from_then) {
245 return nir_after_block(nir_if_last_then_block(if_stmt));
246 } else {
247 return nir_after_block(nir_if_last_else_block(if_stmt));
248 }
249 }
250 }
251
252 /**
253 * Unroll a loop with two exists when the trip count of one of the exits is
254 * unknown. If continue_from_then is true, the loop is repeated only when the
255 * "then" branch of the if is taken; otherwise it is repeated only
256 * when the "else" branch of the if is taken.
257 *
258 * For example, if the input is:
259 *
260 * loop {
261 * ...phis/condition...
262 * if condition {
263 * ...then instructions...
264 * } else {
265 * ...continue instructions...
266 * break
267 * }
268 * ...body...
269 * }
270 *
271 * And the iteration count is 3, and unlimit_term->continue_from_then is true,
272 * then the output will be:
273 *
274 * ...condition...
275 * if condition {
276 * ...then instructions...
277 * ...body...
278 * if condition {
279 * ...then instructions...
280 * ...body...
281 * if condition {
282 * ...then instructions...
283 * ...body...
284 * } else {
285 * ...continue instructions...
286 * }
287 * } else {
288 * ...continue instructions...
289 * }
290 * } else {
291 * ...continue instructions...
292 * }
293 */
294 static void
complex_unroll(nir_loop * loop,nir_loop_terminator * unlimit_term,bool limiting_term_second)295 complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
296 bool limiting_term_second)
297 {
298 assert(nir_is_trivial_loop_if(unlimit_term->nif,
299 unlimit_term->break_block));
300
301 nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
302 assert(nir_is_trivial_loop_if(limiting_term->nif,
303 limiting_term->break_block));
304
305 loop_prepare_for_unroll(loop);
306
307 nir_block *header_blk = nir_loop_first_block(loop);
308
309 nir_cf_list lp_header;
310 nir_cf_list limit_break_list;
311 unsigned num_times_to_clone;
312 if (limiting_term_second) {
313 /* Pluck out the loop header */
314 nir_cf_extract(&lp_header, nir_before_block(header_blk),
315 nir_before_cf_node(&unlimit_term->nif->cf_node));
316
317 /* We need some special handling when its the second terminator causing
318 * us to exit the loop for example:
319 *
320 * for (int i = 0; i < uniform_lp_count; i++) {
321 * colour = vec4(0.0, 1.0, 0.0, 1.0);
322 *
323 * if (i == 1) {
324 * break;
325 * }
326 * ... any further code is unreachable after i == 1 ...
327 * }
328 */
329 nir_cf_list after_lt;
330 nir_if *limit_if = limiting_term->nif;
331 nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node),
332 nir_after_block(nir_loop_last_block(loop)));
333 move_cf_list_into_loop_term(&after_lt, limiting_term);
334
335 /* Because the trip count is the number of times we pass over the entire
336 * loop before hitting a break when the second terminator is the
337 * limiting terminator we can actually execute code inside the loop when
338 * trip count == 0 e.g. the code above the break. So we need to bump
339 * the trip_count in order for the code below to clone anything. When
340 * trip count == 1 we execute the code above the break twice and the
341 * code below it once so we need clone things twice and so on.
342 */
343 num_times_to_clone = loop->info->trip_count + 1;
344 } else {
345 /* Pluck out the loop header */
346 nir_cf_extract(&lp_header, nir_before_block(header_blk),
347 nir_before_cf_node(&limiting_term->nif->cf_node));
348
349 nir_block *first_break_block;
350 nir_block *first_continue_block;
351 get_first_blocks_in_terminator(limiting_term, &first_break_block,
352 &first_continue_block);
353
354 /* Remove the break then extract instructions from the break block so we
355 * can insert them in the innermost else of the unrolled loop.
356 */
357 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
358 nir_instr_remove(break_instr);
359 nir_cf_extract(&limit_break_list, nir_before_block(first_break_block),
360 nir_after_block(limiting_term->break_block));
361
362 nir_cf_list continue_list;
363 nir_cf_extract(&continue_list, nir_before_block(first_continue_block),
364 nir_after_block(limiting_term->continue_from_block));
365
366 nir_cf_reinsert(&continue_list,
367 nir_after_cf_node(&limiting_term->nif->cf_node));
368
369 nir_cf_node_remove(&limiting_term->nif->cf_node);
370
371 num_times_to_clone = loop->info->trip_count;
372 }
373
374 /* In the terminator that we have no trip count for move everything after
375 * the terminator into the continue from branch.
376 */
377 nir_cf_list loop_end;
378 nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
379 nir_after_block(nir_loop_last_block(loop)));
380 move_cf_list_into_loop_term(&loop_end, unlimit_term);
381
382 /* Pluck out the loop body. */
383 nir_cf_list loop_body;
384 nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
385 nir_after_block(nir_loop_last_block(loop)));
386
387 struct hash_table *remap_table =
388 _mesa_hash_table_create(NULL, _mesa_hash_pointer,
389 _mesa_key_pointer_equal);
390
391 /* Set unroll_loc to the loop as we will insert the unrolled loop before it
392 */
393 nir_cf_node *unroll_loc = &loop->cf_node;
394
395 /* Temp lists to store the cloned loop as we unroll */
396 nir_cf_list unrolled_lp_body;
397 nir_cf_list cloned_header;
398
399 for (unsigned i = 0; i < num_times_to_clone; i++) {
400 /* Clone loop header */
401 nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
402 remap_table);
403
404 nir_cursor cursor =
405 get_complex_unroll_insert_location(unroll_loc,
406 unlimit_term->continue_from_then);
407
408 /* Insert cloned loop header */
409 nir_cf_reinsert(&cloned_header, cursor);
410
411 cursor =
412 get_complex_unroll_insert_location(unroll_loc,
413 unlimit_term->continue_from_then);
414
415 /* Clone loop body */
416 nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
417 remap_table);
418
419 unroll_loc = exec_node_data(nir_cf_node,
420 exec_list_get_tail(&unrolled_lp_body.list),
421 node);
422 assert(unroll_loc->type == nir_cf_node_block &&
423 exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
424
425 /* Get the unrolled if node */
426 unroll_loc = nir_cf_node_prev(unroll_loc);
427
428 /* Insert unrolled loop body */
429 nir_cf_reinsert(&unrolled_lp_body, cursor);
430 }
431
432 if (!limiting_term_second) {
433 assert(unroll_loc->type == nir_cf_node_if);
434
435 nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
436 remap_table);
437
438 nir_cursor cursor =
439 get_complex_unroll_insert_location(unroll_loc,
440 unlimit_term->continue_from_then);
441
442 /* Insert cloned loop header */
443 nir_cf_reinsert(&cloned_header, cursor);
444
445 /* Clone so things get properly remapped, and insert break block from
446 * the limiting terminator.
447 */
448 nir_cf_list cloned_break_blk;
449 nir_cf_list_clone(&cloned_break_blk, &limit_break_list,
450 loop->cf_node.parent, remap_table);
451
452 cursor =
453 get_complex_unroll_insert_location(unroll_loc,
454 unlimit_term->continue_from_then);
455
456 nir_cf_reinsert(&cloned_break_blk, cursor);
457 nir_cf_delete(&limit_break_list);
458 }
459
460 /* The loop has been unrolled so remove it. */
461 nir_cf_node_remove(&loop->cf_node);
462
463 /* Delete the original loop header and body */
464 nir_cf_delete(&lp_header);
465 nir_cf_delete(&loop_body);
466
467 _mesa_hash_table_destroy(remap_table, NULL);
468 }
469
470 static bool
is_loop_small_enough_to_unroll(nir_shader * shader,nir_loop_info * li)471 is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
472 {
473 unsigned max_iter = shader->options->max_unroll_iterations;
474
475 if (li->trip_count > max_iter)
476 return false;
477
478 if (li->force_unroll)
479 return true;
480
481 bool loop_not_too_large =
482 li->num_instructions * li->trip_count <= max_iter * LOOP_UNROLL_LIMIT;
483
484 return loop_not_too_large;
485 }
486
487 static bool
process_loops(nir_shader * sh,nir_cf_node * cf_node,bool * innermost_loop)488 process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop)
489 {
490 bool progress = false;
491 nir_loop *loop;
492
493 switch (cf_node->type) {
494 case nir_cf_node_block:
495 return progress;
496 case nir_cf_node_if: {
497 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
498 foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list)
499 progress |= process_loops(sh, nested_node, innermost_loop);
500 foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list)
501 progress |= process_loops(sh, nested_node, innermost_loop);
502 return progress;
503 }
504 case nir_cf_node_loop: {
505 loop = nir_cf_node_as_loop(cf_node);
506 foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body)
507 progress |= process_loops(sh, nested_node, innermost_loop);
508 break;
509 }
510 default:
511 unreachable("unknown cf node type");
512 }
513
514 if (*innermost_loop) {
515 /* Don't attempt to unroll outer loops or a second inner loop in
516 * this pass wait until the next pass as we have altered the cf.
517 */
518 *innermost_loop = false;
519
520 if (loop->info->limiting_terminator == NULL)
521 return progress;
522
523 if (!is_loop_small_enough_to_unroll(sh, loop->info))
524 return progress;
525
526 if (loop->info->is_trip_count_known) {
527 simple_unroll(loop);
528 progress = true;
529 } else {
530 /* Attempt to unroll loops with two terminators. */
531 unsigned num_lt = list_length(&loop->info->loop_terminator_list);
532 if (num_lt == 2) {
533 bool limiting_term_second = true;
534 nir_loop_terminator *terminator =
535 list_last_entry(&loop->info->loop_terminator_list,
536 nir_loop_terminator, loop_terminator_link);
537
538
539 if (terminator->nif == loop->info->limiting_terminator->nif) {
540 limiting_term_second = false;
541 terminator =
542 list_first_entry(&loop->info->loop_terminator_list,
543 nir_loop_terminator, loop_terminator_link);
544 }
545
546 /* If the first terminator has a trip count of zero and is the
547 * limiting terminator just do a simple unroll as the second
548 * terminator can never be reached.
549 */
550 if (loop->info->trip_count == 0 && !limiting_term_second) {
551 simple_unroll(loop);
552 } else {
553 complex_unroll(loop, terminator, limiting_term_second);
554 }
555 progress = true;
556 }
557 }
558 }
559
560 return progress;
561 }
562
563 static bool
nir_opt_loop_unroll_impl(nir_function_impl * impl,nir_variable_mode indirect_mask)564 nir_opt_loop_unroll_impl(nir_function_impl *impl,
565 nir_variable_mode indirect_mask)
566 {
567 bool progress = false;
568 nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask);
569 nir_metadata_require(impl, nir_metadata_block_index);
570
571 foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
572 bool innermost_loop = true;
573 progress |= process_loops(impl->function->shader, node,
574 &innermost_loop);
575 }
576
577 if (progress)
578 nir_lower_regs_to_ssa_impl(impl);
579
580 return progress;
581 }
582
583 bool
nir_opt_loop_unroll(nir_shader * shader,nir_variable_mode indirect_mask)584 nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
585 {
586 bool progress = false;
587
588 nir_foreach_function(function, shader) {
589 if (function->impl) {
590 progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask);
591 }
592 }
593 return progress;
594 }
595