• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Thomas Helland
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_loop_analyze.h"
25 #include "util/hash_table.h"
26 #include "util/ralloc.h"
27 #include "nir.h"
28 #include "nir_constant_expressions.h"
29 
30 typedef struct {
31    /* The loop we store information for */
32    nir_loop *loop;
33 
34    nir_variable_mode indirect_mask;
35 
36    bool force_unroll_sampler_indirect;
37 } loop_info_state;
38 
39 static nir_loop_induction_variable *
get_loop_var(nir_def * value,loop_info_state * state)40 get_loop_var(nir_def *value, loop_info_state *state)
41 {
42    struct hash_entry *entry = _mesa_hash_table_search(state->loop->info->induction_vars, value);
43    if (entry)
44       return entry->data;
45    else
46       return NULL;
47 }
48 
49 /** Calculate an estimated cost in number of instructions
50  *
51  * We do this so that we don't unroll loops which will later get massively
52  * inflated due to int64 or fp64 lowering.  The estimates provided here don't
53  * have to be massively accurate; they just have to be good enough that loop
54  * unrolling doesn't cause things to blow up too much.
55  */
56 static unsigned
instr_cost(loop_info_state * state,nir_instr * instr,const nir_shader_compiler_options * options)57 instr_cost(loop_info_state *state, nir_instr *instr,
58            const nir_shader_compiler_options *options)
59 {
60    if (instr->type == nir_instr_type_intrinsic ||
61        instr->type == nir_instr_type_tex)
62       return 1;
63 
64    if (instr->type != nir_instr_type_alu)
65       return 0;
66 
67    nir_alu_instr *alu = nir_instr_as_alu(instr);
68    const nir_op_info *info = &nir_op_infos[alu->op];
69    unsigned cost = 1;
70 
71    if (nir_op_is_selection(alu->op)) {
72       nir_scalar cond_scalar = { alu->src[0].src.ssa, 0 };
73       if (nir_is_terminator_condition_with_two_inputs(cond_scalar)) {
74          nir_instr *sel_cond = alu->src[0].src.ssa->parent_instr;
75          nir_alu_instr *sel_alu = nir_instr_as_alu(sel_cond);
76 
77          nir_scalar rhs, lhs;
78          lhs = nir_scalar_chase_alu_src(cond_scalar, 0);
79          rhs = nir_scalar_chase_alu_src(cond_scalar, 1);
80 
81          /* If the selects condition is a comparision between a constant and
82           * a basic induction variable we know that it will be eliminated once
83           * the loop is unrolled so here we assign it a cost of 0.
84           */
85          if ((nir_src_is_const(sel_alu->src[0].src) &&
86               get_loop_var(rhs.def, state)) ||
87              (nir_src_is_const(sel_alu->src[1].src) &&
88               get_loop_var(lhs.def, state))) {
89             /* Also if the selects condition is only used by the select then
90              * remove that alu instructons cost from the cost total also.
91              */
92             if (!list_is_singular(&sel_alu->def.uses) ||
93                 nir_def_used_by_if(&sel_alu->def))
94                return 0;
95             else
96                return -1;
97          }
98       }
99    }
100 
101    if (alu->op == nir_op_flrp) {
102       if ((options->lower_flrp16 && alu->def.bit_size == 16) ||
103           (options->lower_flrp32 && alu->def.bit_size == 32) ||
104           (options->lower_flrp64 && alu->def.bit_size == 64))
105          cost *= 3;
106    }
107 
108    /* Assume everything 16 or 32-bit is cheap.
109     *
110     * There are no 64-bit ops that don't have a 64-bit thing as their
111     * destination or first source.
112     */
113    if (alu->def.bit_size < 64 &&
114        nir_src_bit_size(alu->src[0].src) < 64)
115       return cost;
116 
117    bool is_fp64 = alu->def.bit_size == 64 &&
118                   nir_alu_type_get_base_type(info->output_type) == nir_type_float;
119    for (unsigned i = 0; i < info->num_inputs; i++) {
120       if (nir_src_bit_size(alu->src[i].src) == 64 &&
121           nir_alu_type_get_base_type(info->input_types[i]) == nir_type_float)
122          is_fp64 = true;
123    }
124 
125    if (is_fp64) {
126       /* If it's something lowered normally, it's expensive. */
127       if (options->lower_doubles_options &
128           nir_lower_doubles_op_to_options_mask(alu->op))
129          cost *= 20;
130 
131       /* If it's full software, it's even more expensive */
132       if (options->lower_doubles_options & nir_lower_fp64_full_software) {
133          cost *= 100;
134          state->loop->info->has_soft_fp64 = true;
135       }
136 
137       return cost;
138    } else {
139       if (options->lower_int64_options &
140           nir_lower_int64_op_to_options_mask(alu->op)) {
141          /* These require a doing the division algorithm. */
142          if (alu->op == nir_op_idiv || alu->op == nir_op_udiv ||
143              alu->op == nir_op_imod || alu->op == nir_op_umod ||
144              alu->op == nir_op_irem)
145             return cost * 100;
146 
147          /* Other int64 lowering isn't usually all that expensive */
148          return cost * 5;
149       }
150 
151       return cost;
152    }
153 }
154 
155 /* If all of the instruction sources point to identical ALU instructions (as
156  * per nir_instrs_equal), return one of the ALU instructions.  Otherwise,
157  * return NULL.
158  */
159 static nir_alu_instr *
phi_instr_as_alu(nir_phi_instr * phi)160 phi_instr_as_alu(nir_phi_instr *phi)
161 {
162    nir_alu_instr *first = NULL;
163    nir_foreach_phi_src(src, phi) {
164       if (src->src.ssa->parent_instr->type != nir_instr_type_alu)
165          return NULL;
166 
167       nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr);
168       if (first == NULL) {
169          first = alu;
170       } else {
171          if (!nir_instrs_equal(&first->instr, &alu->instr))
172             return NULL;
173       }
174    }
175 
176    return first;
177 }
178 
179 static bool
alu_src_has_identity_swizzle(nir_alu_instr * alu,unsigned src_idx)180 alu_src_has_identity_swizzle(nir_alu_instr *alu, unsigned src_idx)
181 {
182    assert(nir_op_infos[alu->op].input_sizes[src_idx] == 0);
183    for (unsigned i = 0; i < alu->def.num_components; i++) {
184       if (alu->src[src_idx].swizzle[i] != i)
185          return false;
186    }
187 
188    return true;
189 }
190 
191 static bool
is_only_uniform_src(nir_src * src)192 is_only_uniform_src(nir_src *src)
193 {
194    nir_instr *instr = src->ssa->parent_instr;
195 
196    switch (instr->type) {
197    case nir_instr_type_alu: {
198       /* Return true if all sources return true. */
199       nir_alu_instr *alu = nir_instr_as_alu(instr);
200       for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
201          if (!is_only_uniform_src(&alu->src[i].src))
202             return false;
203       }
204       return true;
205    }
206 
207    case nir_instr_type_intrinsic: {
208       nir_intrinsic_instr *inst = nir_instr_as_intrinsic(instr);
209       /* current uniform inline only support load ubo */
210       return inst->intrinsic == nir_intrinsic_load_ubo;
211    }
212 
213    case nir_instr_type_load_const:
214       /* Always return true for constants. */
215       return true;
216 
217    default:
218       return false;
219    }
220 }
221 
222 static bool
compute_induction_information(loop_info_state * state)223 compute_induction_information(loop_info_state *state)
224 {
225    bool progress = false;
226 
227    /* We are only interested in checking phis for the basic induction
228     * variable case as its simple to detect. All basic induction variables
229     * have a phi node
230     */
231    nir_block *header = nir_loop_first_block(state->loop);
232    nir_block *preheader = nir_block_cf_tree_prev(header);
233 
234    nir_foreach_phi(phi, header) {
235       nir_loop_induction_variable var = { .basis = &phi->def };
236 
237       nir_foreach_phi_src(phi_src, phi) {
238          nir_def *src = phi_src->src.ssa;
239 
240          if (phi_src->pred == preheader) {
241             var.init_src = &phi_src->src;
242             continue;
243          }
244 
245          /* If one of the sources is in an if branch or nested loop then don't
246           * attempt to go any further.
247           */
248          if (src->parent_instr->block->cf_node.parent != &state->loop->cf_node)
249             break;
250 
251          /* Detect inductions variables that are incremented in both branches
252           * of an unnested if rather than in a loop block.
253           */
254          if (src->parent_instr->type == nir_instr_type_phi) {
255             nir_phi_instr *src_phi = nir_instr_as_phi(src->parent_instr);
256             nir_alu_instr *src_phi_alu = phi_instr_as_alu(src_phi);
257             if (src_phi_alu) {
258                src = &src_phi_alu->def;
259             }
260          }
261 
262          if (src->parent_instr->type == nir_instr_type_alu && !var.update_src) {
263             var.def = src;
264             nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr);
265 
266             /* Check for unsupported alu operations */
267             if (alu->op != nir_op_iadd && alu->op != nir_op_fadd &&
268                 alu->op != nir_op_imul && alu->op != nir_op_fmul &&
269                 alu->op != nir_op_ishl && alu->op != nir_op_ishr &&
270                 alu->op != nir_op_ushr)
271                break;
272 
273             if (nir_op_infos[alu->op].num_inputs == 2) {
274                for (unsigned i = 0; i < 2; i++) {
275                   /* Is one of the operands const or uniform, and the other the phi.
276                    * The phi source can't be swizzled in any way.
277                    */
278                   if (alu->src[1 - i].src.ssa == &phi->def &&
279                       alu_src_has_identity_swizzle(alu, 1 - i)) {
280                      if (is_only_uniform_src(&alu->src[i].src))
281                         var.update_src = alu->src + i;
282                   }
283                }
284             }
285 
286             if (!var.update_src)
287                break;
288          } else {
289             var.update_src = NULL;
290             break;
291          }
292       }
293 
294       if (var.update_src && var.init_src &&
295           is_only_uniform_src(var.init_src)) {
296          /* Insert induction variable into hash table. */
297          struct hash_table *vars = state->loop->info->induction_vars;
298          nir_loop_induction_variable *induction_var = ralloc(vars, nir_loop_induction_variable);
299          *induction_var = var;
300          _mesa_hash_table_insert(vars, induction_var->def, induction_var);
301          _mesa_hash_table_insert(vars, induction_var->basis, induction_var);
302          progress = true;
303       }
304    }
305 
306    return progress;
307 }
308 
309 static bool
find_loop_terminators(loop_info_state * state)310 find_loop_terminators(loop_info_state *state)
311 {
312    bool success = false;
313    foreach_list_typed_safe(nir_cf_node, node, node, &state->loop->body) {
314       if (node->type == nir_cf_node_if) {
315          nir_if *nif = nir_cf_node_as_if(node);
316 
317          nir_block *break_blk = NULL;
318          nir_block *continue_from_blk = NULL;
319          bool continue_from_then = true;
320 
321          nir_block *last_then = nir_if_last_then_block(nif);
322          nir_block *last_else = nir_if_last_else_block(nif);
323          if (nir_block_ends_in_break(last_then)) {
324             break_blk = last_then;
325             continue_from_blk = last_else;
326             continue_from_then = false;
327          } else if (nir_block_ends_in_break(last_else)) {
328             break_blk = last_else;
329             continue_from_blk = last_then;
330          }
331 
332          /* If there is a break then we should find a terminator. If we can
333           * not find a loop terminator, but there is a break-statement then
334           * we should return false so that we do not try to find trip-count
335           */
336          if (!nir_is_trivial_loop_if(nif, break_blk)) {
337             state->loop->info->complex_loop = true;
338             return false;
339          }
340 
341          /* Continue if the if contained no jumps at all */
342          if (!break_blk)
343             continue;
344 
345          if (nif->condition.ssa->parent_instr->type == nir_instr_type_phi) {
346             state->loop->info->complex_loop = true;
347             return false;
348          }
349 
350          nir_loop_terminator *terminator =
351             rzalloc(state->loop->info, nir_loop_terminator);
352 
353          list_addtail(&terminator->loop_terminator_link,
354                       &state->loop->info->loop_terminator_list);
355 
356          terminator->nif = nif;
357          terminator->break_block = break_blk;
358          terminator->continue_from_block = continue_from_blk;
359          terminator->continue_from_then = continue_from_then;
360          terminator->conditional_instr = nif->condition.ssa->parent_instr;
361 
362          success = true;
363       }
364    }
365 
366    return success;
367 }
368 
369 /* This function looks for an array access within a loop that uses an
370  * induction variable for the array index. If found it returns the size of the
371  * array, otherwise 0 is returned. If we find an induction var we pass it back
372  * to the caller via array_index_out.
373  */
374 static unsigned
find_array_access_via_induction(loop_info_state * state,nir_deref_instr * deref,nir_loop_induction_variable ** array_index_out)375 find_array_access_via_induction(loop_info_state *state,
376                                 nir_deref_instr *deref,
377                                 nir_loop_induction_variable **array_index_out)
378 {
379    for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
380       if (d->deref_type != nir_deref_type_array)
381          continue;
382 
383       nir_loop_induction_variable *array_index = get_loop_var(d->arr.index.ssa, state);
384 
385       if (!array_index)
386          continue;
387 
388       if (array_index_out)
389          *array_index_out = array_index;
390 
391       nir_deref_instr *parent = nir_deref_instr_parent(d);
392 
393       if (glsl_type_is_array_or_matrix(parent->type)) {
394          return glsl_get_length(parent->type);
395       } else {
396          assert(glsl_type_is_vector(parent->type));
397          return glsl_get_vector_elements(parent->type);
398       }
399    }
400 
401    return 0;
402 }
403 
404 static unsigned
guess_loop_limit(loop_info_state * state)405 guess_loop_limit(loop_info_state *state)
406 {
407    unsigned min_array_size = UINT_MAX;
408 
409    nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
410       nir_foreach_instr(instr, block) {
411          if (instr->type != nir_instr_type_intrinsic)
412             continue;
413 
414          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
415 
416          /* Check for arrays variably-indexed by a loop induction variable. */
417          if (intrin->intrinsic == nir_intrinsic_load_deref ||
418              intrin->intrinsic == nir_intrinsic_store_deref ||
419              intrin->intrinsic == nir_intrinsic_copy_deref) {
420 
421             nir_loop_induction_variable *array_idx = NULL;
422             unsigned array_size =
423                find_array_access_via_induction(state,
424                                                nir_src_as_deref(intrin->src[0]),
425                                                &array_idx);
426             if (array_idx)
427                min_array_size = MIN2(min_array_size, array_size);
428 
429             if (intrin->intrinsic != nir_intrinsic_copy_deref)
430                continue;
431 
432             array_size =
433                find_array_access_via_induction(state,
434                                                nir_src_as_deref(intrin->src[1]),
435                                                &array_idx);
436             if (array_idx)
437                min_array_size = MIN2(min_array_size, array_size);
438          }
439       }
440    }
441 
442    if (min_array_size != UINT_MAX)
443       return min_array_size;
444    else
445       return 0;
446 }
447 
448 static nir_op invert_comparison_if_needed(nir_op alu_op, bool invert);
449 
450 /* Returns whether "limit_op(a, b) alu_op c" is equivalent to "(a alu_op c) || (b alu_op c)". */
451 static bool
is_minmax_compatible(nir_op limit_op,nir_op alu_op,bool limit_rhs,bool invert_cond)452 is_minmax_compatible(nir_op limit_op, nir_op alu_op, bool limit_rhs, bool invert_cond)
453 {
454    bool is_max;
455    switch (limit_op) {
456    case nir_op_imin:
457    case nir_op_fmin:
458    case nir_op_umin:
459       is_max = false;
460       break;
461    case nir_op_imax:
462    case nir_op_fmax:
463    case nir_op_umax:
464       is_max = true;
465       break;
466    default:
467       return false;
468    }
469 
470    if (nir_op_infos[limit_op].input_types[0] != nir_op_infos[alu_op].input_types[0])
471       return false;
472 
473    /* Comparisons we can split are:
474     * - min(a, b) < c
475     * - c < max(a, b)
476     * - max(a, b) >= c
477     * - c >= min(a, b)
478     */
479    switch (invert_comparison_if_needed(alu_op, invert_cond)) {
480    case nir_op_ilt:
481    case nir_op_flt:
482    case nir_op_ult:
483       return (!limit_rhs && !is_max) || (limit_rhs && is_max);
484    case nir_op_ige:
485    case nir_op_fge:
486    case nir_op_uge:
487       return (!limit_rhs && is_max) || (limit_rhs && !is_max);
488    default:
489       return false;
490    }
491 }
492 
493 static bool
try_find_limit_of_alu(nir_scalar limit,nir_const_value * limit_val,nir_op alu_op,bool invert_cond,nir_loop_terminator * terminator,loop_info_state * state)494 try_find_limit_of_alu(nir_scalar limit, nir_const_value *limit_val, nir_op alu_op,
495                       bool invert_cond, nir_loop_terminator *terminator,
496                       loop_info_state *state)
497 {
498    if (!nir_scalar_is_alu(limit))
499       return false;
500 
501    nir_op limit_op = nir_scalar_alu_op(limit);
502    if (is_minmax_compatible(limit_op, alu_op, !terminator->induction_rhs, invert_cond)) {
503       for (unsigned i = 0; i < 2; i++) {
504          nir_scalar src = nir_scalar_chase_alu_src(limit, i);
505          if (nir_scalar_is_const(src)) {
506             *limit_val = nir_scalar_as_const_value(src);
507             terminator->exact_trip_count_unknown = true;
508             return true;
509          }
510       }
511    }
512 
513    return false;
514 }
515 
516 static nir_const_value
eval_const_unop(nir_op op,unsigned bit_size,nir_const_value src0,unsigned execution_mode)517 eval_const_unop(nir_op op, unsigned bit_size, nir_const_value src0,
518                 unsigned execution_mode)
519 {
520    assert(nir_op_infos[op].num_inputs == 1);
521    nir_const_value dest;
522    nir_const_value *src[1] = { &src0 };
523    nir_eval_const_opcode(op, &dest, 1, bit_size, src, execution_mode);
524    return dest;
525 }
526 
527 static nir_const_value
eval_const_binop(nir_op op,unsigned bit_size,nir_const_value src0,nir_const_value src1,unsigned execution_mode)528 eval_const_binop(nir_op op, unsigned bit_size,
529                  nir_const_value src0, nir_const_value src1,
530                  unsigned execution_mode)
531 {
532    assert(nir_op_infos[op].num_inputs == 2);
533    nir_const_value dest;
534    nir_const_value *src[2] = { &src0, &src1 };
535    nir_eval_const_opcode(op, &dest, 1, bit_size, src, execution_mode);
536    return dest;
537 }
538 
539 static int
find_replacement(const nir_scalar * originals,nir_scalar key,unsigned num_replacements)540 find_replacement(const nir_scalar *originals, nir_scalar key,
541                  unsigned num_replacements)
542 {
543    for (int i = 0; i < num_replacements; i++) {
544       if (nir_scalar_equal(originals[i], key))
545          return i;
546    }
547 
548    return -1;
549 }
550 
551 /**
552  * Try to evaluate an ALU instruction as a constant with a replacement
553  *
554  * Much like \c nir_opt_constant_folding.c:try_fold_alu, this method attempts
555  * to evaluate an ALU instruction as a constant. There are two significant
556  * differences.
557  *
558  * First, this method performs the evaluation recursively. If any source of
559  * the ALU instruction is not itself a constant, it is first evaluated.
560  *
561  * Second, if the SSA value \c original is encountered as a source of the ALU
562  * instruction, the value \c replacement is substituted.
563  *
564  * The intended purpose of this function is to evaluate an arbitrary
565  * expression involving a loop induction variable. In this case, \c original
566  * would be the phi node associated with the induction variable, and
567  * \c replacement is the initial value of the induction variable.
568  *
569  * \returns true if the ALU instruction can be evaluated as constant (after
570  * applying the previously described substitution) or false otherwise.
571  */
572 static bool
try_eval_const_alu(nir_const_value * dest,nir_scalar alu_s,const nir_scalar * originals,const nir_const_value * replacements,unsigned num_replacements,unsigned execution_mode)573 try_eval_const_alu(nir_const_value *dest, nir_scalar alu_s, const nir_scalar *originals,
574                    const nir_const_value *replacements,
575                    unsigned num_replacements, unsigned execution_mode)
576 {
577    nir_alu_instr *alu = nir_instr_as_alu(alu_s.def->parent_instr);
578 
579    if (nir_op_infos[alu->op].output_size)
580       return false;
581 
582    /* In the case that any outputs/inputs have unsized types, then we need to
583     * guess the bit-size. In this case, the validator ensures that all
584     * bit-sizes match so we can just take the bit-size from first
585     * output/input with an unsized type. If all the outputs/inputs are sized
586     * then we don't need to guess the bit-size at all because the code we
587     * generate for constant opcodes in this case already knows the sizes of
588     * the types involved and does not need the provided bit-size for anything
589     * (although it still requires to receive a valid bit-size).
590     */
591    unsigned bit_size = 0;
592    if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].output_type)) {
593       bit_size = alu->def.bit_size;
594    } else {
595       for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
596          if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].input_types[i]))
597             bit_size = alu->src[i].src.ssa->bit_size;
598       }
599 
600       if (bit_size == 0)
601          bit_size = 32;
602    }
603 
604    nir_const_value src[NIR_MAX_VEC_COMPONENTS];
605    nir_const_value *src_ptrs[NIR_MAX_VEC_COMPONENTS];
606 
607    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
608       nir_scalar src_s = nir_scalar_chase_alu_src(alu_s, i);
609 
610       src_ptrs[i] = &src[i];
611       if (nir_scalar_is_const(src_s)) {
612          src[i] = nir_scalar_as_const_value(src_s);
613          continue;
614       }
615 
616       int r = find_replacement(originals, src_s, num_replacements);
617       if (r >= 0) {
618          src[i] = replacements[r];
619       } else if (!nir_scalar_is_alu(src_s) ||
620                  !try_eval_const_alu(&src[i], src_s,
621                                      originals, replacements,
622                                      num_replacements, execution_mode)) {
623          return false;
624       }
625    }
626 
627    nir_eval_const_opcode(alu->op, dest, 1, bit_size, src_ptrs, execution_mode);
628 
629    return true;
630 }
631 
632 static nir_op
invert_comparison_if_needed(nir_op alu_op,bool invert)633 invert_comparison_if_needed(nir_op alu_op, bool invert)
634 {
635    if (!invert)
636       return alu_op;
637 
638    switch (alu_op) {
639       case nir_op_fge:
640          return nir_op_flt;
641       case nir_op_ige:
642          return nir_op_ilt;
643       case nir_op_uge:
644          return nir_op_ult;
645       case nir_op_flt:
646          return nir_op_fge;
647       case nir_op_ilt:
648          return nir_op_ige;
649       case nir_op_ult:
650          return nir_op_uge;
651       case nir_op_feq:
652          return nir_op_fneu;
653       case nir_op_ieq:
654          return nir_op_ine;
655       case nir_op_fneu:
656          return nir_op_feq;
657       case nir_op_ine:
658          return nir_op_ieq;
659       default:
660          unreachable("Unsuported comparison!");
661    }
662 }
663 
664 static int32_t
get_iteration(nir_op cond_op,nir_const_value initial,nir_const_value step,nir_const_value limit,bool invert_cond,unsigned bit_size,unsigned execution_mode)665 get_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step,
666               nir_const_value limit, bool invert_cond, unsigned bit_size,
667               unsigned execution_mode)
668 {
669    nir_const_value span, iter;
670    unsigned iter_bit_size = bit_size;
671 
672    switch (invert_comparison_if_needed(cond_op, invert_cond)) {
673    case nir_op_ine:
674       /* In order for execution to be here, limit must be the same as initial.
675        * Otherwise will_break_on_first_iteration would have returned false.
676        * If step is zero, the loop is infinite.  Otherwise the loop will
677        * execute once.
678        */
679       return step.u64 == 0 ? -1 : 1;
680 
681    case nir_op_ige:
682    case nir_op_ilt:
683    case nir_op_ieq:
684       span = eval_const_binop(nir_op_isub, bit_size, limit, initial,
685                               execution_mode);
686       iter = eval_const_binop(nir_op_idiv, bit_size, span, step,
687                               execution_mode);
688       break;
689 
690    case nir_op_uge:
691    case nir_op_ult:
692       span = eval_const_binop(nir_op_isub, bit_size, limit, initial,
693                               execution_mode);
694       iter = eval_const_binop(nir_op_udiv, bit_size, span, step,
695                               execution_mode);
696       break;
697 
698    case nir_op_fneu:
699       /* In order for execution to be here, limit must be the same as initial.
700        * Otherwise will_break_on_first_iteration would have returned false.
701        * If step is zero, the loop is infinite.  Otherwise the loop will
702        * execute once.
703        *
704        * This is a little more tricky for floating point since X-Y might still
705        * be X even if Y is not zero.  Instead check that (initial + step) !=
706        * initial.
707        */
708       span = eval_const_binop(nir_op_fadd, bit_size, initial, step,
709                               execution_mode);
710       iter = eval_const_binop(nir_op_feq, bit_size, initial,
711                               span, execution_mode);
712 
713       /* return (initial + step) == initial ? -1 : 1 */
714       return iter.b ? -1 : 1;
715 
716    case nir_op_fge:
717    case nir_op_flt:
718    case nir_op_feq:
719       span = eval_const_binop(nir_op_fsub, bit_size, limit, initial,
720                               execution_mode);
721       iter = eval_const_binop(nir_op_fdiv, bit_size, span,
722                               step, execution_mode);
723       iter = eval_const_unop(nir_op_f2i64, bit_size, iter, execution_mode);
724       iter_bit_size = 64;
725       break;
726 
727    default:
728       return -1;
729    }
730 
731    uint64_t iter_u64 = nir_const_value_as_uint(iter, iter_bit_size);
732    return iter_u64 > u_intN_max(iter_bit_size) ? -1 : (int)iter_u64;
733 }
734 
735 static int32_t
get_iteration_empirical(nir_scalar cond,nir_alu_instr * incr_alu,nir_scalar basis,nir_const_value initial,nir_scalar limit_basis,nir_const_value limit,bool invert_cond,unsigned execution_mode,unsigned max_unroll_iterations)736 get_iteration_empirical(nir_scalar cond, nir_alu_instr *incr_alu,
737                         nir_scalar basis, nir_const_value initial,
738                         nir_scalar limit_basis, nir_const_value limit,
739                         bool invert_cond, unsigned execution_mode,
740                         unsigned max_unroll_iterations)
741 {
742    int iter_count = 0;
743    nir_const_value result;
744 
745    const nir_scalar incr = nir_get_scalar(&incr_alu->def, basis.comp);
746 
747    const nir_scalar original[] = {basis, limit_basis};
748    nir_const_value replacement[] = {initial, limit};
749 
750    while (iter_count <= max_unroll_iterations) {
751       bool success;
752 
753       success = try_eval_const_alu(&result, cond, original, replacement,
754                                    2, execution_mode);
755       if (!success)
756          return -1;
757 
758       const bool cond_succ = invert_cond ? !result.b : result.b;
759       if (cond_succ)
760          return iter_count;
761 
762       iter_count++;
763 
764       success = try_eval_const_alu(&result, incr, original, replacement,
765                                    2, execution_mode);
766       assert(success);
767 
768       replacement[0] = result;
769    }
770 
771    return -1;
772 }
773 
774 static bool
will_break_on_first_iteration(nir_scalar cond,nir_scalar basis,nir_scalar limit_basis,nir_const_value initial,nir_const_value limit,bool invert_cond,unsigned execution_mode)775 will_break_on_first_iteration(nir_scalar cond, nir_scalar basis,
776                               nir_scalar limit_basis,
777                               nir_const_value initial, nir_const_value limit,
778                               bool invert_cond, unsigned execution_mode)
779 {
780    nir_const_value result;
781 
782    const nir_scalar originals[2] = { basis, limit_basis };
783    const nir_const_value replacements[2] = { initial, limit };
784 
785    ASSERTED bool success = try_eval_const_alu(&result, cond, originals,
786                                               replacements, 2, execution_mode);
787 
788    assert(success);
789 
790    return invert_cond ? !result.b : result.b;
791 }
792 
793 static bool
test_iterations(int32_t iter_int,nir_const_value step,nir_const_value limit,nir_op cond_op,unsigned bit_size,nir_alu_type induction_base_type,nir_const_value initial,bool limit_rhs,bool invert_cond,unsigned execution_mode)794 test_iterations(int32_t iter_int, nir_const_value step,
795                 nir_const_value limit, nir_op cond_op, unsigned bit_size,
796                 nir_alu_type induction_base_type,
797                 nir_const_value initial, bool limit_rhs, bool invert_cond,
798                 unsigned execution_mode)
799 {
800    assert(nir_op_infos[cond_op].num_inputs == 2);
801 
802    nir_const_value iter_src;
803    nir_op mul_op;
804    nir_op add_op;
805    switch (induction_base_type) {
806    case nir_type_float:
807       iter_src = nir_const_value_for_float(iter_int, bit_size);
808       mul_op = nir_op_fmul;
809       add_op = nir_op_fadd;
810       break;
811    case nir_type_int:
812    case nir_type_uint:
813       iter_src = nir_const_value_for_int(iter_int, bit_size);
814       mul_op = nir_op_imul;
815       add_op = nir_op_iadd;
816       break;
817    default:
818       unreachable("Unhandled induction variable base type!");
819    }
820 
821    /* Multiple the iteration count we are testing by the number of times we
822     * step the induction variable each iteration.
823     */
824    nir_const_value mul_result =
825       eval_const_binop(mul_op, bit_size, iter_src, step, execution_mode);
826 
827    /* Add the initial value to the accumulated induction variable total */
828    nir_const_value add_result =
829       eval_const_binop(add_op, bit_size, mul_result, initial, execution_mode);
830 
831    nir_const_value *src[2];
832    src[limit_rhs ? 0 : 1] = &add_result;
833    src[limit_rhs ? 1 : 0] = &limit;
834 
835    /* Evaluate the loop exit condition */
836    nir_const_value result;
837    nir_eval_const_opcode(cond_op, &result, 1, bit_size, src, execution_mode);
838 
839    return invert_cond ? !result.b : result.b;
840 }
841 
842 static int
calculate_iterations(nir_scalar basis,nir_scalar limit_basis,nir_const_value initial,nir_const_value step,nir_const_value limit,nir_alu_instr * alu,nir_scalar cond,nir_op alu_op,bool limit_rhs,bool invert_cond,unsigned execution_mode,unsigned max_unroll_iterations)843 calculate_iterations(nir_scalar basis, nir_scalar limit_basis,
844                      nir_const_value initial, nir_const_value step,
845                      nir_const_value limit, nir_alu_instr *alu,
846                      nir_scalar cond, nir_op alu_op, bool limit_rhs,
847                      bool invert_cond, unsigned execution_mode,
848                      unsigned max_unroll_iterations)
849 {
850    /* nir_op_isub should have been lowered away by this point */
851    assert(alu->op != nir_op_isub);
852 
853    /* Make sure the alu type for our induction variable is compatible with the
854     * conditional alus input type. If its not something has gone really wrong.
855     */
856    nir_alu_type induction_base_type =
857       nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type);
858    if (induction_base_type == nir_type_int || induction_base_type == nir_type_uint) {
859       assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) == nir_type_int ||
860              nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) == nir_type_uint);
861    } else {
862       assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[0]) ==
863              induction_base_type);
864    }
865 
866    /* do-while loops can increment the starting value before the condition is
867     * checked. e.g.
868     *
869     *    do {
870     *        ndx++;
871     *     } while (ndx < 3);
872     *
873     * Here we check if the induction variable is used directly by the loop
874     * condition and if so we assume we need to step the initial value.
875     */
876    unsigned trip_offset = 0;
877    nir_alu_instr *cond_alu = nir_instr_as_alu(cond.def->parent_instr);
878    if (cond_alu->src[0].src.ssa == &alu->def ||
879        cond_alu->src[1].src.ssa == &alu->def) {
880       trip_offset = 1;
881    }
882 
883    unsigned bit_size = nir_src_bit_size(alu->src[0].src);
884 
885    /* get_iteration works under assumption that iterator will be
886     * incremented or decremented until it hits the limit,
887     * however if the loop condition is false on the first iteration
888     * get_iteration's assumption is broken. Handle such loops first.
889     */
890    if (will_break_on_first_iteration(cond, basis, limit_basis, initial,
891                                      limit, invert_cond, execution_mode)) {
892       return 0;
893    }
894 
895    /* For loops incremented with addition operation, it's easy to
896     * calculate the number of iterations theoretically. Even though it
897     * is possible for other operations as well, it is much more error
898     * prone, and doesn't cover all possible cases. So, we try to
899     * emulate the loop.
900     */
901    int iter_int;
902    switch (alu->op) {
903    case nir_op_iadd:
904    case nir_op_fadd:
905       assert(nir_src_bit_size(alu->src[0].src) ==
906              nir_src_bit_size(alu->src[1].src));
907 
908       iter_int = get_iteration(alu_op, initial, step, limit, invert_cond,
909                                bit_size, execution_mode);
910       break;
911    case nir_op_fmul:
912       /* Detecting non-zero loop counts when the loop increment is floating
913        * point multiplication triggers a preexisting problem in
914        * glsl-fs-loop-unroll-mul-fp64.shader_test. See
915        * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3445#note_1779438.
916        */
917       return -1;
918    case nir_op_imul:
919    case nir_op_ishl:
920    case nir_op_ishr:
921    case nir_op_ushr:
922       return get_iteration_empirical(cond, alu, basis, initial,
923                                      limit_basis, limit, invert_cond,
924                                      execution_mode, max_unroll_iterations);
925    default:
926       unreachable("Invalid induction variable increment operation.");
927    }
928 
929    /* If iter_int is negative the loop is ill-formed or is the conditional is
930     * unsigned with a huge iteration count so don't bother going any further.
931     */
932    if (iter_int < 0)
933       return -1;
934 
935    nir_op actual_alu_op = invert_comparison_if_needed(alu_op, invert_cond);
936    if (actual_alu_op == nir_op_ine || actual_alu_op == nir_op_fneu)
937       return iter_int;
938 
939    /* An explanation from the GLSL unrolling pass:
940     *
941     * Make sure that the calculated number of iterations satisfies the exit
942     * condition.  This is needed to catch off-by-one errors and some types of
943     * ill-formed loops.  For example, we need to detect that the following
944     * loop does not have a maximum iteration count.
945     *
946     *    for (float x = 0.0; x != 0.9; x += 0.2);
947     */
948    for (int bias = -1; bias <= 1; bias++) {
949       const int iter_bias = iter_int + bias;
950       if (iter_bias < 1)
951          continue;
952 
953       if (test_iterations(iter_bias, step, limit, alu_op, bit_size,
954                           induction_base_type, initial,
955                           limit_rhs, invert_cond, execution_mode)) {
956          return iter_bias - trip_offset;
957       }
958    }
959 
960    return -1;
961 }
962 
963 static bool
get_induction_and_limit_vars(nir_scalar cond,nir_scalar * ind,nir_scalar * limit,bool * limit_rhs,loop_info_state * state)964 get_induction_and_limit_vars(nir_scalar cond,
965                              nir_scalar *ind,
966                              nir_scalar *limit,
967                              bool *limit_rhs,
968                              loop_info_state *state)
969 {
970    nir_scalar rhs, lhs;
971    lhs = nir_scalar_chase_alu_src(cond, 0);
972    rhs = nir_scalar_chase_alu_src(cond, 1);
973 
974    nir_loop_induction_variable *src0_lv = get_loop_var(lhs.def, state);
975    nir_loop_induction_variable *src1_lv = get_loop_var(rhs.def, state);
976 
977    if (src0_lv) {
978       *ind = lhs;
979       *limit = rhs;
980       *limit_rhs = true;
981       return true;
982    } else if (src1_lv) {
983       *ind = rhs;
984       *limit = lhs;
985       *limit_rhs = false;
986       return true;
987    } else {
988       return false;
989    }
990 }
991 
992 static bool
try_find_trip_count_vars_in_logical_op(nir_scalar * cond,nir_scalar * ind,nir_scalar * limit,bool * limit_rhs,loop_info_state * state)993 try_find_trip_count_vars_in_logical_op(nir_scalar *cond,
994                                        nir_scalar *ind,
995                                        nir_scalar *limit,
996                                        bool *limit_rhs,
997                                        loop_info_state *state)
998 {
999    const nir_op alu_op = nir_scalar_alu_op(*cond);
1000    bool exit_loop_on_false = alu_op == nir_op_ieq || alu_op == nir_op_inot;
1001    nir_scalar logical_op = exit_loop_on_false ?
1002       nir_scalar_chase_alu_src(*cond, 0) : *cond;
1003 
1004    if (alu_op == nir_op_ieq) {
1005       nir_scalar zero = nir_scalar_chase_alu_src(*cond, 1);
1006 
1007       if (!nir_scalar_is_alu(logical_op) || !nir_scalar_is_const(zero)) {
1008          /* Maybe we had it the wrong way, flip things around */
1009          nir_scalar tmp = zero;
1010          zero = logical_op;
1011          logical_op = tmp;
1012 
1013          /* If we still didn't find what we need then return */
1014          if (!nir_scalar_is_const(zero))
1015             return false;
1016       }
1017 
1018       /* If the loop is not breaking on (x && y) == 0 then return */
1019       if (nir_scalar_as_uint(zero) != 0)
1020          return false;
1021    }
1022 
1023    if (!nir_scalar_is_alu(logical_op))
1024       return false;
1025 
1026    if ((exit_loop_on_false && (nir_scalar_alu_op(logical_op) != nir_op_iand)) ||
1027        (!exit_loop_on_false && (nir_scalar_alu_op(logical_op) != nir_op_ior)))
1028       return false;
1029 
1030    /* Check if iand src is a terminator condition and try get induction var
1031     * and trip limit var.
1032     */
1033    bool found_induction_var = false;
1034    for (unsigned i = 0; i < 2; i++) {
1035       nir_scalar src = nir_scalar_chase_alu_src(logical_op, i);
1036       if (nir_is_terminator_condition_with_two_inputs(src) &&
1037           get_induction_and_limit_vars(src, ind, limit, limit_rhs, state)) {
1038          *cond = src;
1039          found_induction_var = true;
1040 
1041          /* If we've found one with a constant limit, stop. */
1042          if (nir_scalar_is_const(*limit))
1043             return true;
1044       }
1045    }
1046 
1047    return found_induction_var;
1048 }
1049 
1050 /* Run through each of the terminators of the loop and try to infer a possible
1051  * trip-count. We need to check them all, and set the lowest trip-count as the
1052  * trip-count of our loop. If one of the terminators has an undecidable
1053  * trip-count we can not safely assume anything about the duration of the
1054  * loop.
1055  */
1056 static void
find_trip_count(loop_info_state * state,unsigned execution_mode,unsigned max_unroll_iterations)1057 find_trip_count(loop_info_state *state, unsigned execution_mode,
1058                 unsigned max_unroll_iterations)
1059 {
1060    bool trip_count_known = true;
1061    bool guessed_trip_count = false;
1062    nir_loop_terminator *limiting_terminator = NULL;
1063    int max_trip_count = -1;
1064 
1065    list_for_each_entry(nir_loop_terminator, terminator,
1066                        &state->loop->info->loop_terminator_list,
1067                        loop_terminator_link) {
1068       nir_scalar cond = { terminator->nif->condition.ssa, 0 };
1069 
1070       if (!nir_scalar_is_alu(cond)) {
1071          /* If we get here the loop is dead and will get cleaned up by the
1072           * nir_opt_dead_cf pass.
1073           */
1074          trip_count_known = false;
1075          terminator->exact_trip_count_unknown = true;
1076          continue;
1077       }
1078 
1079       nir_op alu_op = nir_scalar_alu_op(cond);
1080 
1081       bool invert_cond = terminator->continue_from_then;
1082 
1083       bool limit_rhs;
1084       nir_scalar basic_ind = { NULL, 0 };
1085       nir_scalar limit;
1086 
1087       if ((alu_op == nir_op_inot || alu_op == nir_op_ieq || alu_op == nir_op_ior) &&
1088           try_find_trip_count_vars_in_logical_op(&cond, &basic_ind, &limit,
1089                                                  &limit_rhs, state)) {
1090 
1091          /* The loop is exiting on (x && y) == 0 so we need to get the
1092           * inverse of x or y (i.e. which ever contained the induction var) in
1093           * order to compute the trip count.
1094           */
1095          if (alu_op == nir_op_inot || alu_op == nir_op_ieq)
1096             invert_cond = !invert_cond;
1097 
1098          alu_op = nir_scalar_alu_op(cond);
1099          trip_count_known = false;
1100          terminator->conditional_instr = cond.def->parent_instr;
1101          terminator->exact_trip_count_unknown = true;
1102       }
1103 
1104       if (!basic_ind.def) {
1105          if (nir_is_supported_terminator_condition(cond)) {
1106             /* Extract and inverse the comparision if it is wrapped in an inot
1107              */
1108             if (alu_op == nir_op_inot) {
1109                cond = nir_scalar_chase_alu_src(cond, 0);
1110                alu_op = nir_scalar_alu_op(cond);
1111                invert_cond = !invert_cond;
1112             }
1113 
1114             get_induction_and_limit_vars(cond, &basic_ind,
1115                                          &limit, &limit_rhs, state);
1116          }
1117       }
1118 
1119       /* The comparison has to have a basic induction variable for us to be
1120        * able to find trip counts.
1121        */
1122       if (!basic_ind.def) {
1123          trip_count_known = false;
1124          terminator->exact_trip_count_unknown = true;
1125          continue;
1126       }
1127 
1128       terminator->induction_rhs = !limit_rhs;
1129 
1130       /* Attempt to find a constant limit for the loop */
1131       nir_const_value limit_val;
1132       if (nir_scalar_is_const(limit)) {
1133          limit_val = nir_scalar_as_const_value(limit);
1134       } else {
1135          trip_count_known = false;
1136 
1137          if (!try_find_limit_of_alu(limit, &limit_val, alu_op, invert_cond, terminator, state)) {
1138             /* Guess loop limit based on array access */
1139             unsigned guessed_loop_limit = guess_loop_limit(state);
1140             if (guessed_loop_limit) {
1141                limit_val = nir_const_value_for_uint(guessed_loop_limit,
1142                                                     basic_ind.def->bit_size);
1143             } else {
1144                terminator->exact_trip_count_unknown = true;
1145                continue;
1146             }
1147 
1148             guessed_trip_count = true;
1149          }
1150       }
1151 
1152       /* We have determined that we have the following constants:
1153        * (With the typical int i = 0; i < x; i++; as an example)
1154        *    - Upper limit.
1155        *    - Starting value
1156        *    - Step / iteration size
1157        * Thats all thats needed to calculate the trip-count
1158        */
1159 
1160       nir_loop_induction_variable *lv = get_loop_var(basic_ind.def, state);
1161 
1162       /* The basic induction var might be a vector but, because we guarantee
1163        * earlier that the phi source has a scalar swizzle, we can take the
1164        * component from basic_ind.
1165        */
1166       nir_scalar initial_s = { lv->init_src->ssa, basic_ind.comp };
1167       nir_scalar alu_s = {
1168          lv->update_src->src.ssa,
1169          lv->update_src->swizzle[basic_ind.comp]
1170       };
1171 
1172       nir_alu_instr *step_alu =
1173          nir_instr_as_alu(nir_src_parent_instr(&lv->update_src->src));
1174 
1175       /* If the comparision is of unsigned type we don't necessarily need to
1176        * know the initial value to be able to calculate the max number of
1177        * iterations
1178        */
1179       bool can_find_max_trip_count = step_alu->op == nir_op_iadd &&
1180          ((alu_op == nir_op_uge && !invert_cond && limit_rhs) ||
1181           (alu_op == nir_op_ult && !invert_cond && !limit_rhs));
1182 
1183       /* nir_op_isub should have been lowered away by this point */
1184       assert(step_alu->op != nir_op_isub);
1185 
1186       /* For nir_op_uge as alu_op, the induction variable is [0,limit). For
1187        * nir_op_ult, it's [0,limit]. It must always be step_val larger in the
1188        * next iteration to use the can_find_max_trip_count=true path. This
1189        * check ensures that no unsigned overflow happens.
1190        * TODO: support for overflow could be added if a non-zero initial_val
1191        * is chosen.
1192        */
1193       if (can_find_max_trip_count && nir_scalar_is_const(alu_s)) {
1194          uint64_t uint_max = u_uintN_max(alu_s.def->bit_size);
1195          uint64_t max_step_val =
1196             uint_max - nir_const_value_as_uint(limit_val, alu_s.def->bit_size) +
1197             (alu_op == nir_op_uge ? 1 : 0);
1198          can_find_max_trip_count &= nir_scalar_as_uint(alu_s) <= max_step_val;
1199       }
1200 
1201       /* We are not guaranteed by that at one of these sources is a constant.
1202        * Try to find one.
1203        */
1204       if ((!nir_scalar_is_const(initial_s) && !can_find_max_trip_count) ||
1205           !nir_scalar_is_const(alu_s))
1206          continue;
1207 
1208       nir_const_value initial_val;
1209       if (nir_scalar_is_const(initial_s))
1210          initial_val = nir_scalar_as_const_value(initial_s);
1211       else {
1212          trip_count_known = false;
1213          terminator->exact_trip_count_unknown = true;
1214          initial_val = nir_const_value_for_uint(0, 32);
1215          assert(can_find_max_trip_count);
1216       }
1217       nir_const_value step_val = nir_scalar_as_const_value(alu_s);
1218 
1219       int iterations = calculate_iterations(nir_get_scalar(lv->basis, basic_ind.comp), limit,
1220                                             initial_val, step_val, limit_val,
1221                                             step_alu, cond,
1222                                             alu_op, limit_rhs,
1223                                             invert_cond,
1224                                             execution_mode,
1225                                             max_unroll_iterations);
1226 
1227       /* Where we not able to calculate the iteration count */
1228       if (iterations == -1) {
1229          trip_count_known = false;
1230          guessed_trip_count = false;
1231          terminator->exact_trip_count_unknown = true;
1232          continue;
1233       }
1234 
1235       if (guessed_trip_count) {
1236          guessed_trip_count = false;
1237          terminator->exact_trip_count_unknown = true;
1238          if (state->loop->info->guessed_trip_count == 0 ||
1239              state->loop->info->guessed_trip_count > iterations)
1240             state->loop->info->guessed_trip_count = iterations;
1241 
1242          continue;
1243       }
1244 
1245       /* If this is the first run or we have found a smaller amount of
1246        * iterations than previously (we have identified a more limiting
1247        * terminator) set the trip count and limiting terminator.
1248        */
1249       if (max_trip_count == -1 || iterations < max_trip_count) {
1250          max_trip_count = iterations;
1251          limiting_terminator = terminator;
1252       }
1253    }
1254 
1255    state->loop->info->exact_trip_count_known = trip_count_known;
1256    if (max_trip_count > -1)
1257       state->loop->info->max_trip_count = max_trip_count;
1258    state->loop->info->limiting_terminator = limiting_terminator;
1259 }
1260 
1261 static bool
force_unroll_array_access(loop_info_state * state,nir_deref_instr * deref,bool contains_sampler)1262 force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref,
1263                           bool contains_sampler)
1264 {
1265    unsigned array_size = find_array_access_via_induction(state, deref, NULL);
1266    if (array_size) {
1267       if ((array_size == state->loop->info->max_trip_count) &&
1268           nir_deref_mode_must_be(deref, nir_var_shader_in |
1269                                            nir_var_shader_out |
1270                                            nir_var_shader_temp |
1271                                            nir_var_function_temp))
1272          return true;
1273 
1274       if (nir_deref_mode_must_be(deref, state->indirect_mask))
1275          return true;
1276 
1277       if (contains_sampler && state->force_unroll_sampler_indirect)
1278          return true;
1279    }
1280 
1281    return false;
1282 }
1283 
1284 static bool
force_unroll_heuristics(loop_info_state * state,nir_block * block)1285 force_unroll_heuristics(loop_info_state *state, nir_block *block)
1286 {
1287    nir_foreach_instr(instr, block) {
1288       if (instr->type == nir_instr_type_tex) {
1289          nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
1290          int sampler_idx =
1291             nir_tex_instr_src_index(tex_instr,
1292                                     nir_tex_src_sampler_deref);
1293 
1294          if (sampler_idx >= 0) {
1295             nir_deref_instr *deref =
1296                nir_instr_as_deref(tex_instr->src[sampler_idx].src.ssa->parent_instr);
1297             if (force_unroll_array_access(state, deref, true))
1298                return true;
1299          }
1300       }
1301 
1302       if (instr->type != nir_instr_type_intrinsic)
1303          continue;
1304 
1305       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1306 
1307       /* Check for arrays variably-indexed by a loop induction variable.
1308        * Unrolling the loop may convert that access into constant-indexing.
1309        */
1310       if (intrin->intrinsic == nir_intrinsic_load_deref ||
1311           intrin->intrinsic == nir_intrinsic_store_deref ||
1312           intrin->intrinsic == nir_intrinsic_copy_deref) {
1313          if (force_unroll_array_access(state,
1314                                        nir_src_as_deref(intrin->src[0]),
1315                                        false))
1316             return true;
1317 
1318          if (intrin->intrinsic == nir_intrinsic_copy_deref &&
1319              force_unroll_array_access(state,
1320                                        nir_src_as_deref(intrin->src[1]),
1321                                        false))
1322             return true;
1323       }
1324    }
1325 
1326    return false;
1327 }
1328 
1329 static void
get_loop_info(loop_info_state * state,nir_function_impl * impl)1330 get_loop_info(loop_info_state *state, nir_function_impl *impl)
1331 {
1332    nir_shader *shader = impl->function->shader;
1333    const nir_shader_compiler_options *options = shader->options;
1334 
1335    /* Try to find all simple terminators of the loop. If we can't find any,
1336     * or we find possible terminators that have side effects then bail.
1337     */
1338    if (!find_loop_terminators(state)) {
1339       list_for_each_entry_safe(nir_loop_terminator, terminator,
1340                                &state->loop->info->loop_terminator_list,
1341                                loop_terminator_link) {
1342          list_del(&terminator->loop_terminator_link);
1343          ralloc_free(terminator);
1344       }
1345       return;
1346    }
1347 
1348    if (!compute_induction_information(state))
1349       return;
1350 
1351    /* Run through each of the terminators and try to compute a trip-count */
1352    find_trip_count(state,
1353                    impl->function->shader->info.float_controls_execution_mode,
1354                    impl->function->shader->options->max_unroll_iterations);
1355 
1356    nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
1357       nir_foreach_instr(instr, block) {
1358          state->loop->info->instr_cost += instr_cost(state, instr, options);
1359       }
1360 
1361       if (state->loop->info->force_unroll)
1362          continue;
1363 
1364       if (force_unroll_heuristics(state, block)) {
1365          state->loop->info->force_unroll = true;
1366       }
1367    }
1368 }
1369 
1370 static void
initialize_loop_info(nir_loop * loop)1371 initialize_loop_info(nir_loop *loop)
1372 {
1373    if (loop->info)
1374       ralloc_free(loop->info);
1375 
1376    loop->info = rzalloc(loop, nir_loop_info);
1377    loop->info->induction_vars = _mesa_pointer_hash_table_create(loop->info);
1378 
1379    list_inithead(&loop->info->loop_terminator_list);
1380 }
1381 
1382 static void
process_loops(nir_cf_node * cf_node,nir_variable_mode indirect_mask,bool force_unroll_sampler_indirect)1383 process_loops(nir_cf_node *cf_node, nir_variable_mode indirect_mask,
1384               bool force_unroll_sampler_indirect)
1385 {
1386    switch (cf_node->type) {
1387    case nir_cf_node_block:
1388       return;
1389    case nir_cf_node_if: {
1390       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
1391       foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)
1392          process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect);
1393       foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)
1394          process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect);
1395       return;
1396    }
1397    case nir_cf_node_loop: {
1398       nir_loop *loop = nir_cf_node_as_loop(cf_node);
1399       assert(!nir_loop_has_continue_construct(loop));
1400 
1401       foreach_list_typed(nir_cf_node, nested_node, node, &loop->body)
1402          process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect);
1403       break;
1404    }
1405    default:
1406       unreachable("unknown cf node type");
1407    }
1408 
1409    nir_loop *loop = nir_cf_node_as_loop(cf_node);
1410    nir_function_impl *impl = nir_cf_node_get_function(cf_node);
1411    loop_info_state state = {
1412       .loop = loop,
1413       .indirect_mask = indirect_mask,
1414       .force_unroll_sampler_indirect = force_unroll_sampler_indirect,
1415    };
1416 
1417    initialize_loop_info(loop);
1418    get_loop_info(&state, impl);
1419 }
1420 
1421 void
nir_loop_analyze_impl(nir_function_impl * impl,nir_variable_mode indirect_mask,bool force_unroll_sampler_indirect)1422 nir_loop_analyze_impl(nir_function_impl *impl,
1423                       nir_variable_mode indirect_mask,
1424                       bool force_unroll_sampler_indirect)
1425 {
1426    foreach_list_typed(nir_cf_node, node, node, &impl->body)
1427       process_loops(node, indirect_mask, force_unroll_sampler_indirect);
1428 
1429    impl->loop_analysis_indirect_mask = indirect_mask;
1430    impl->loop_analysis_force_unroll_sampler_indirect = force_unroll_sampler_indirect;
1431 }
1432