• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "compiler/glsl_types.h"
25 #include "loop_analysis.h"
26 #include "ir_hierarchical_visitor.h"
27 
28 #include "main/mtypes.h"
29 
30 namespace {
31 
32 class loop_unroll_visitor : public ir_hierarchical_visitor {
33 public:
loop_unroll_visitor(loop_state * state,const struct gl_shader_compiler_options * options)34    loop_unroll_visitor(loop_state *state,
35                        const struct gl_shader_compiler_options *options)
36    {
37       this->state = state;
38       this->progress = false;
39       this->options = options;
40    }
41 
42    virtual ir_visitor_status visit_leave(ir_loop *ir);
43    void simple_unroll(ir_loop *ir, int iterations);
44    void complex_unroll(ir_loop *ir, int iterations,
45                        bool continue_from_then_branch);
46    void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest);
47 
48    loop_state *state;
49 
50    bool progress;
51    const struct gl_shader_compiler_options *options;
52 };
53 
54 } /* anonymous namespace */
55 
56 static bool
is_break(ir_instruction * ir)57 is_break(ir_instruction *ir)
58 {
59    return ir != NULL && ir->ir_type == ir_type_loop_jump
60 		     && ((ir_loop_jump *) ir)->is_break();
61 }
62 
63 class loop_unroll_count : public ir_hierarchical_visitor {
64 public:
65    int nodes;
66    bool unsupported_variable_indexing;
67    bool array_indexed_by_induction_var_with_exact_iterations;
68    /* If there are nested loops, the node count will be inaccurate. */
69    bool nested_loop;
70 
loop_unroll_count(exec_list * list,loop_variable_state * ls,const struct gl_shader_compiler_options * options)71    loop_unroll_count(exec_list *list, loop_variable_state *ls,
72                      const struct gl_shader_compiler_options *options)
73       : ls(ls), options(options)
74    {
75       nodes = 0;
76       nested_loop = false;
77       unsupported_variable_indexing = false;
78       array_indexed_by_induction_var_with_exact_iterations = false;
79 
80       run(list);
81    }
82 
visit_enter(ir_assignment *)83    virtual ir_visitor_status visit_enter(ir_assignment *)
84    {
85       nodes++;
86       return visit_continue;
87    }
88 
visit_enter(ir_expression *)89    virtual ir_visitor_status visit_enter(ir_expression *)
90    {
91       nodes++;
92       return visit_continue;
93    }
94 
visit_enter(ir_loop *)95    virtual ir_visitor_status visit_enter(ir_loop *)
96    {
97       nested_loop = true;
98       return visit_continue;
99    }
100 
visit_enter(ir_dereference_array * ir)101    virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
102    {
103       /* Force unroll in case of dynamic indexing with sampler arrays
104        * when EmitNoIndirectSampler is set.
105        */
106       if (options->EmitNoIndirectSampler) {
107          if ((ir->array->type->is_array() &&
108               ir->array->type->contains_sampler()) &&
109              !ir->array_index->constant_expression_value()) {
110             unsupported_variable_indexing = true;
111             return visit_continue;
112          }
113       }
114 
115       /* Check for arrays variably-indexed by a loop induction variable.
116        * Unrolling the loop may convert that access into constant-indexing.
117        *
118        * Many drivers don't support particular kinds of variable indexing,
119        * and have to resort to using lower_variable_index_to_cond_assign to
120        * handle it.  This results in huge amounts of horrible code, so we'd
121        * like to avoid that if possible.  Here, we just note that it will
122        * happen.
123        */
124       if ((ir->array->type->is_array() || ir->array->type->is_matrix()) &&
125           !ir->array_index->as_constant()) {
126          ir_variable *array = ir->array->variable_referenced();
127          loop_variable *lv = ls->get(ir->array_index->variable_referenced());
128          if (array && lv && lv->is_induction_var()) {
129             /* If an array is indexed by a loop induction variable, and the
130              * array size is exactly the number of loop iterations, this is
131              * probably a simple for-loop trying to access each element in
132              * turn; the application may expect it to be unrolled.
133              */
134             if (int(array->type->length) == ls->limiting_terminator->iterations)
135                array_indexed_by_induction_var_with_exact_iterations = true;
136 
137             switch (array->data.mode) {
138             case ir_var_auto:
139             case ir_var_temporary:
140             case ir_var_const_in:
141             case ir_var_function_in:
142             case ir_var_function_out:
143             case ir_var_function_inout:
144                if (options->EmitNoIndirectTemp)
145                   unsupported_variable_indexing = true;
146                break;
147             case ir_var_uniform:
148             case ir_var_shader_storage:
149                if (options->EmitNoIndirectUniform)
150                   unsupported_variable_indexing = true;
151                break;
152             case ir_var_shader_in:
153                if (options->EmitNoIndirectInput)
154                   unsupported_variable_indexing = true;
155                break;
156             case ir_var_shader_out:
157                if (options->EmitNoIndirectOutput)
158                   unsupported_variable_indexing = true;
159                break;
160             }
161          }
162       }
163       return visit_continue;
164    }
165 
166 private:
167    loop_variable_state *ls;
168    const struct gl_shader_compiler_options *options;
169 };
170 
171 
172 /**
173  * Unroll a loop which does not contain any jumps.  For example, if the input
174  * is:
175  *
176  *     (loop (...) ...instrs...)
177  *
178  * And the iteration count is 3, the output will be:
179  *
180  *     ...instrs... ...instrs... ...instrs...
181  */
182 void
simple_unroll(ir_loop * ir,int iterations)183 loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations)
184 {
185    void *const mem_ctx = ralloc_parent(ir);
186 
187    for (int i = 0; i < iterations; i++) {
188       exec_list copy_list;
189 
190       copy_list.make_empty();
191       clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
192 
193       ir->insert_before(&copy_list);
194    }
195 
196    /* The loop has been replaced by the unrolled copies.  Remove the original
197     * loop from the IR sequence.
198     */
199    ir->remove();
200 
201    this->progress = true;
202 }
203 
204 
205 /**
206  * Unroll a loop whose last statement is an ir_if.  If \c
207  * continue_from_then_branch is true, the loop is repeated only when the
208  * "then" branch of the if is taken; otherwise it is repeated only when the
209  * "else" branch of the if is taken.
210  *
211  * For example, if the input is:
212  *
213  *     (loop (...)
214  *      ...body...
215  *      (if (cond)
216  *          (...then_instrs...)
217  *        (...else_instrs...)))
218  *
219  * And the iteration count is 3, and \c continue_from_then_branch is true,
220  * then the output will be:
221  *
222  *     ...body...
223  *     (if (cond)
224  *         (...then_instrs...
225  *          ...body...
226  *          (if (cond)
227  *              (...then_instrs...
228  *               ...body...
229  *               (if (cond)
230  *                   (...then_instrs...)
231  *                 (...else_instrs...)))
232  *            (...else_instrs...)))
233  *       (...else_instrs))
234  */
235 void
complex_unroll(ir_loop * ir,int iterations,bool continue_from_then_branch)236 loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations,
237                                     bool continue_from_then_branch)
238 {
239    void *const mem_ctx = ralloc_parent(ir);
240    ir_instruction *ir_to_replace = ir;
241 
242    for (int i = 0; i < iterations; i++) {
243       exec_list copy_list;
244 
245       copy_list.make_empty();
246       clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
247 
248       ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if();
249       assert(ir_if != NULL);
250 
251       ir_to_replace->insert_before(&copy_list);
252       ir_to_replace->remove();
253 
254       /* placeholder that will be removed in the next iteration */
255       ir_to_replace =
256          new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue);
257 
258       exec_list *const list = (continue_from_then_branch)
259          ? &ir_if->then_instructions : &ir_if->else_instructions;
260 
261       list->push_tail(ir_to_replace);
262    }
263 
264    ir_to_replace->remove();
265 
266    this->progress = true;
267 }
268 
269 
270 /**
271  * Move all of the instructions which follow \c ir_if to the end of
272  * \c splice_dest.
273  *
274  * For example, in the code snippet:
275  *
276  *     (if (cond)
277  *         (...then_instructions...
278  *          break)
279  *       (...else_instructions...))
280  *     ...post_if_instructions...
281  *
282  * If \c ir_if points to the "if" instruction, and \c splice_dest points to
283  * (...else_instructions...), the code snippet is transformed into:
284  *
285  *     (if (cond)
286  *         (...then_instructions...
287  *          break)
288  *       (...else_instructions...
289  *        ...post_if_instructions...))
290  */
291 void
splice_post_if_instructions(ir_if * ir_if,exec_list * splice_dest)292 loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if,
293                                                  exec_list *splice_dest)
294 {
295    while (!ir_if->get_next()->is_tail_sentinel()) {
296       ir_instruction *move_ir = (ir_instruction *) ir_if->get_next();
297 
298       move_ir->remove();
299       splice_dest->push_tail(move_ir);
300    }
301 }
302 
303 
304 ir_visitor_status
visit_leave(ir_loop * ir)305 loop_unroll_visitor::visit_leave(ir_loop *ir)
306 {
307    loop_variable_state *const ls = this->state->get(ir);
308    int iterations;
309 
310    /* If we've entered a loop that hasn't been analyzed, something really,
311     * really bad has happened.
312     */
313    if (ls == NULL) {
314       assert(ls != NULL);
315       return visit_continue;
316    }
317 
318    if (ls->limiting_terminator == NULL) {
319       ir_instruction *last_ir =
320          (ir_instruction *) ir->body_instructions.get_tail();
321 
322       /* If a loop has no induction variable and the last instruction is
323        * a break, unroll the loop with a count of 1.  This is the classic
324        *
325        *    do {
326        *        // ...
327        *    } while (false)
328        *
329        * that is used to wrap multi-line macros.
330        *
331        * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to
332        * be at least num_loop_jumps instructions in the loop.
333        */
334       if (ls->num_loop_jumps == 1 && is_break(last_ir)) {
335          last_ir->remove();
336 
337          simple_unroll(ir, 1);
338       }
339 
340       /* Don't try to unroll loops where the number of iterations is not known
341        * at compile-time.
342        */
343       return visit_continue;
344    }
345 
346    iterations = ls->limiting_terminator->iterations;
347 
348    const int max_iterations = options->MaxUnrollIterations;
349 
350    /* Don't try to unroll loops that have zillions of iterations either.
351     */
352    if (iterations > max_iterations)
353       return visit_continue;
354 
355    /* Don't try to unroll nested loops and loops with a huge body.
356     */
357    loop_unroll_count count(&ir->body_instructions, ls, options);
358 
359    bool loop_too_large =
360       count.nested_loop || count.nodes * iterations > max_iterations * 5;
361 
362    if (loop_too_large && !count.unsupported_variable_indexing &&
363        !count.array_indexed_by_induction_var_with_exact_iterations)
364       return visit_continue;
365 
366    /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
367     * We'll be removing the limiting terminator before we unroll.
368     */
369    assert(ls->num_loop_jumps > 0);
370    unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1;
371 
372    if (predicted_num_loop_jumps > 1)
373       return visit_continue;
374 
375    if (predicted_num_loop_jumps == 0) {
376       ls->limiting_terminator->ir->remove();
377       simple_unroll(ir, iterations);
378       return visit_continue;
379    }
380 
381    ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail();
382    assert(last_ir != NULL);
383 
384    if (is_break(last_ir)) {
385       /* If the only loop-jump is a break at the end of the loop, the loop
386        * will execute exactly once.  Remove the break and use the simple
387        * unroller with an iteration count of 1.
388        */
389       last_ir->remove();
390 
391       ls->limiting_terminator->ir->remove();
392       simple_unroll(ir, 1);
393       return visit_continue;
394    }
395 
396    /* recognize loops in the form produced by ir_lower_jumps */
397    foreach_in_list(ir_instruction, cur_ir, &ir->body_instructions) {
398       /* Skip the limiting terminator, since it will go away when we
399        * unroll.
400        */
401       if (cur_ir == ls->limiting_terminator->ir)
402          continue;
403 
404       ir_if *ir_if = cur_ir->as_if();
405       if (ir_if != NULL) {
406          /* Determine which if-statement branch, if any, ends with a
407           * break.  The branch that did *not* have the break will get a
408           * temporary continue inserted in each iteration of the loop
409           * unroll.
410           *
411           * Note that since ls->num_loop_jumps is <= 1, it is impossible
412           * for both branches to end with a break.
413           */
414          ir_instruction *ir_if_last =
415             (ir_instruction *) ir_if->then_instructions.get_tail();
416 
417          if (is_break(ir_if_last)) {
418             ls->limiting_terminator->ir->remove();
419             splice_post_if_instructions(ir_if, &ir_if->else_instructions);
420             ir_if_last->remove();
421             complex_unroll(ir, iterations, false);
422             return visit_continue;
423          } else {
424             ir_if_last =
425                (ir_instruction *) ir_if->else_instructions.get_tail();
426 
427             if (is_break(ir_if_last)) {
428                ls->limiting_terminator->ir->remove();
429                splice_post_if_instructions(ir_if, &ir_if->then_instructions);
430                ir_if_last->remove();
431                complex_unroll(ir, iterations, true);
432                return visit_continue;
433             }
434          }
435       }
436    }
437 
438    /* Did not find the break statement.  It must be in a complex if-nesting,
439     * so don't try to unroll.
440     */
441    return visit_continue;
442 }
443 
444 
445 bool
unroll_loops(exec_list * instructions,loop_state * ls,const struct gl_shader_compiler_options * options)446 unroll_loops(exec_list *instructions, loop_state *ls,
447              const struct gl_shader_compiler_options *options)
448 {
449    loop_unroll_visitor v(ls, options);
450 
451    v.run(instructions);
452 
453    return v.progress;
454 }
455