1 /*
2 * Copyright © 2015 Thomas Helland
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_loop_analyze.h"
25 #include "util/hash_table.h"
26 #include "util/ralloc.h"
27 #include "nir.h"
28 #include "nir_constant_expressions.h"
29
30 typedef struct {
31 /* The loop we store information for */
32 nir_loop *loop;
33
34 nir_variable_mode indirect_mask;
35
36 bool force_unroll_sampler_indirect;
37 } loop_info_state;
38
39 static nir_loop_induction_variable *
get_loop_var(nir_def * value,loop_info_state * state)40 get_loop_var(nir_def *value, loop_info_state *state)
41 {
42 struct hash_entry *entry = _mesa_hash_table_search(state->loop->info->induction_vars, value);
43 if (entry)
44 return entry->data;
45 else
46 return NULL;
47 }
48
49 /** Calculate an estimated cost in number of instructions
50 *
51 * We do this so that we don't unroll loops which will later get massively
52 * inflated due to int64 or fp64 lowering. The estimates provided here don't
53 * have to be massively accurate; they just have to be good enough that loop
54 * unrolling doesn't cause things to blow up too much.
55 */
56 static unsigned
instr_cost(loop_info_state * state,nir_instr * instr,const nir_shader_compiler_options * options)57 instr_cost(loop_info_state *state, nir_instr *instr,
58 const nir_shader_compiler_options *options)
59 {
60 if (instr->type == nir_instr_type_intrinsic ||
61 instr->type == nir_instr_type_tex)
62 return 1;
63
64 if (instr->type != nir_instr_type_alu)
65 return 0;
66
67 nir_alu_instr *alu = nir_instr_as_alu(instr);
68 const nir_op_info *info = &nir_op_infos[alu->op];
69 unsigned cost = 1;
70
71 if (nir_op_is_selection(alu->op)) {
72 nir_scalar cond_scalar = { alu->src[0].src.ssa, 0 };
73 if (nir_is_terminator_condition_with_two_inputs(cond_scalar)) {
74 nir_instr *sel_cond = alu->src[0].src.ssa->parent_instr;
75 nir_alu_instr *sel_alu = nir_instr_as_alu(sel_cond);
76
77 nir_scalar rhs, lhs;
78 lhs = nir_scalar_chase_alu_src(cond_scalar, 0);
79 rhs = nir_scalar_chase_alu_src(cond_scalar, 1);
80
81 /* If the selects condition is a comparision between a constant and
82 * a basic induction variable we know that it will be eliminated once
83 * the loop is unrolled so here we assign it a cost of 0.
84 */
85 if ((nir_src_is_const(sel_alu->src[0].src) &&
86 get_loop_var(rhs.def, state)) ||
87 (nir_src_is_const(sel_alu->src[1].src) &&
88 get_loop_var(lhs.def, state))) {
89 /* Also if the selects condition is only used by the select then
90 * remove that alu instructons cost from the cost total also.
91 */
92 if (!list_is_singular(&sel_alu->def.uses) ||
93 nir_def_used_by_if(&sel_alu->def))
94 return 0;
95 else
96 return -1;
97 }
98 }
99 }
100
101 if (alu->op == nir_op_flrp) {
102 if ((options->lower_flrp16 && alu->def.bit_size == 16) ||
103 (options->lower_flrp32 && alu->def.bit_size == 32) ||
104 (options->lower_flrp64 && alu->def.bit_size == 64))
105 cost *= 3;
106 }
107
108 /* Assume everything 16 or 32-bit is cheap.
109 *
110 * There are no 64-bit ops that don't have a 64-bit thing as their
111 * destination or first source.
112 */
113 if (alu->def.bit_size < 64 &&
114 nir_src_bit_size(alu->src[0].src) < 64)
115 return cost;
116
117 bool is_fp64 = alu->def.bit_size == 64 &&
118 nir_alu_type_get_base_type(info->output_type) == nir_type_float;
119 for (unsigned i = 0; i < info->num_inputs; i++) {
120 if (nir_src_bit_size(alu->src[i].src) == 64 &&
121 nir_alu_type_get_base_type(info->input_types[i]) == nir_type_float)
122 is_fp64 = true;
123 }
124
125 if (is_fp64) {
126 /* If it's something lowered normally, it's expensive. */
127 if (options->lower_doubles_options &
128 nir_lower_doubles_op_to_options_mask(alu->op))
129 cost *= 20;
130
131 /* If it's full software, it's even more expensive */
132 if (options->lower_doubles_options & nir_lower_fp64_full_software) {
133 cost *= 100;
134 state->loop->info->has_soft_fp64 = true;
135 }
136
137 return cost;
138 } else {
139 if (options->lower_int64_options &
140 nir_lower_int64_op_to_options_mask(alu->op)) {
141 /* These require a doing the division algorithm. */
142 if (alu->op == nir_op_idiv || alu->op == nir_op_udiv ||
143 alu->op == nir_op_imod || alu->op == nir_op_umod ||
144 alu->op == nir_op_irem)
145 return cost * 100;
146
147 /* Other int64 lowering isn't usually all that expensive */
148 return cost * 5;
149 }
150
151 return cost;
152 }
153 }
154
155 /* If all of the instruction sources point to identical ALU instructions (as
156 * per nir_instrs_equal), return one of the ALU instructions. Otherwise,
157 * return NULL.
158 */
159 static nir_alu_instr *
phi_instr_as_alu(nir_phi_instr * phi)160 phi_instr_as_alu(nir_phi_instr *phi)
161 {
162 nir_alu_instr *first = NULL;
163 nir_foreach_phi_src(src, phi) {
164 if (src->src.ssa->parent_instr->type != nir_instr_type_alu)
165 return NULL;
166
167 nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr);
168 if (first == NULL) {
169 first = alu;
170 } else {
171 if (!nir_instrs_equal(&first->instr, &alu->instr))
172 return NULL;
173 }
174 }
175
176 return first;
177 }
178
179 static bool
alu_src_has_identity_swizzle(nir_alu_instr * alu,unsigned src_idx)180 alu_src_has_identity_swizzle(nir_alu_instr *alu, unsigned src_idx)
181 {
182 assert(nir_op_infos[alu->op].input_sizes[src_idx] == 0);
183 for (unsigned i = 0; i < alu->def.num_components; i++) {
184 if (alu->src[src_idx].swizzle[i] != i)
185 return false;
186 }
187
188 return true;
189 }
190
191 static bool
is_only_uniform_src(nir_src * src)192 is_only_uniform_src(nir_src *src)
193 {
194 nir_instr *instr = src->ssa->parent_instr;
195
196 switch (instr->type) {
197 case nir_instr_type_alu: {
198 /* Return true if all sources return true. */
199 nir_alu_instr *alu = nir_instr_as_alu(instr);
200 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
201 if (!is_only_uniform_src(&alu->src[i].src))
202 return false;
203 }
204 return true;
205 }
206
207 case nir_instr_type_intrinsic: {
208 nir_intrinsic_instr *inst = nir_instr_as_intrinsic(instr);
209 /* current uniform inline only support load ubo */
210 return inst->intrinsic == nir_intrinsic_load_ubo;
211 }
212
213 case nir_instr_type_load_const:
214 /* Always return true for constants. */
215 return true;
216
217 default:
218 return false;
219 }
220 }
221
222 static bool
compute_induction_information(loop_info_state * state)223 compute_induction_information(loop_info_state *state)
224 {
225 bool progress = false;
226
227 /* We are only interested in checking phis for the basic induction
228 * variable case as its simple to detect. All basic induction variables
229 * have a phi node
230 */
231 nir_block *header = nir_loop_first_block(state->loop);
232 nir_block *preheader = nir_block_cf_tree_prev(header);
233
234 nir_foreach_phi(phi, header) {
235 nir_loop_induction_variable var = { .basis = &phi->def };
236
237 nir_foreach_phi_src(phi_src, phi) {
238 nir_def *src = phi_src->src.ssa;
239
240 if (phi_src->pred == preheader) {
241 var.init_src = &phi_src->src;
242 continue;
243 }
244
245 /* If one of the sources is in an if branch or nested loop then don't
246 * attempt to go any further.
247 */
248 if (src->parent_instr->block->cf_node.parent != &state->loop->cf_node)
249 break;
250
251 /* Detect inductions variables that are incremented in both branches
252 * of an unnested if rather than in a loop block.
253 */
254 if (src->parent_instr->type == nir_instr_type_phi) {
255 nir_phi_instr *src_phi = nir_instr_as_phi(src->parent_instr);
256 nir_alu_instr *src_phi_alu = phi_instr_as_alu(src_phi);
257 if (src_phi_alu) {
258 src = &src_phi_alu->def;
259 }
260 }
261
262 if (src->parent_instr->type == nir_instr_type_alu && !var.update_src) {
263 var.def = src;
264 nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr);
265
266 /* Check for unsupported alu operations */
267 if (alu->op != nir_op_iadd && alu->op != nir_op_fadd &&
268 alu->op != nir_op_imul && alu->op != nir_op_fmul &&
269 alu->op != nir_op_ishl && alu->op != nir_op_ishr &&
270 alu->op != nir_op_ushr)
271 break;
272
273 if (nir_op_infos[alu->op].num_inputs == 2) {
274 for (unsigned i = 0; i < 2; i++) {
275 /* Is one of the operands const or uniform, and the other the phi.
276 * The phi source can't be swizzled in any way.
277 */
278 if (alu->src[1 - i].src.ssa == &phi->def &&
279 alu_src_has_identity_swizzle(alu, 1 - i)) {
280 if (is_only_uniform_src(&alu->src[i].src))
281 var.update_src = alu->src + i;
282 }
283 }
284 }
285
286 if (!var.update_src)
287 break;
288 } else {
289 var.update_src = NULL;
290 break;
291 }
292 }
293
294 if (var.update_src && var.init_src &&
295 is_only_uniform_src(var.init_src)) {
296 /* Insert induction variable into hash table. */
297 struct hash_table *vars = state->loop->info->induction_vars;
298 nir_loop_induction_variable *induction_var = ralloc(vars, nir_loop_induction_variable);
299 *induction_var = var;
300 _mesa_hash_table_insert(vars, induction_var->def, induction_var);
301 _mesa_hash_table_insert(vars, induction_var->basis, induction_var);
302 progress = true;
303 }
304 }
305
306 return progress;
307 }
308
309 static bool
find_loop_terminators(loop_info_state * state)310 find_loop_terminators(loop_info_state *state)
311 {
312 bool success = false;
313 foreach_list_typed_safe(nir_cf_node, node, node, &state->loop->body) {
314 if (node->type == nir_cf_node_if) {
315 nir_if *nif = nir_cf_node_as_if(node);
316
317 nir_block *break_blk = NULL;
318 nir_block *continue_from_blk = NULL;
319 bool continue_from_then = true;
320
321 nir_block *last_then = nir_if_last_then_block(nif);
322 nir_block *last_else = nir_if_last_else_block(nif);
323 if (nir_block_ends_in_break(last_then)) {
324 break_blk = last_then;
325 continue_from_blk = last_else;
326 continue_from_then = false;
327 } else if (nir_block_ends_in_break(last_else)) {
328 break_blk = last_else;
329 continue_from_blk = last_then;
330 }
331
332 /* If there is a break then we should find a terminator. If we can
333 * not find a loop terminator, but there is a break-statement then
334 * we should return false so that we do not try to find trip-count
335 */
336 if (!nir_is_trivial_loop_if(nif, break_blk)) {
337 state->loop->info->complex_loop = true;
338 return false;
339 }
340
341 /* Continue if the if contained no jumps at all */
342 if (!break_blk)
343 continue;
344
345 if (nif->condition.ssa->parent_instr->type == nir_instr_type_phi) {
346 state->loop->info->complex_loop = true;
347 return false;
348 }
349
350 nir_loop_terminator *terminator =
351 rzalloc(state->loop->info, nir_loop_terminator);
352
353 list_addtail(&terminator->loop_terminator_link,
354 &state->loop->info->loop_terminator_list);
355
356 terminator->nif = nif;
357 terminator->break_block = break_blk;
358 terminator->continue_from_block = continue_from_blk;
359 terminator->continue_from_then = continue_from_then;
360 terminator->conditional_instr = nif->condition.ssa->parent_instr;
361
362 success = true;
363 }
364 }
365
366 return success;
367 }
368
369 /* This function looks for an array access within a loop that uses an
370 * induction variable for the array index. If found it returns the size of the
371 * array, otherwise 0 is returned. If we find an induction var we pass it back
372 * to the caller via array_index_out.
373 */
374 static unsigned
find_array_access_via_induction(loop_info_state * state,nir_deref_instr * deref,nir_loop_induction_variable ** array_index_out)375 find_array_access_via_induction(loop_info_state *state,
376 nir_deref_instr *deref,
377 nir_loop_induction_variable **array_index_out)
378 {
379 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
380 if (d->deref_type != nir_deref_type_array)
381 continue;
382
383 nir_loop_induction_variable *array_index = get_loop_var(d->arr.index.ssa, state);
384
385 if (!array_index)
386 continue;
387
388 if (array_index_out)
389 *array_index_out = array_index;
390
391 nir_deref_instr *parent = nir_deref_instr_parent(d);
392
393 if (glsl_type_is_array_or_matrix(parent->type)) {
394 return glsl_get_length(parent->type);
395 } else {
396 assert(glsl_type_is_vector(parent->type));
397 return glsl_get_vector_elements(parent->type);
398 }
399 }
400
401 return 0;
402 }
403
404 static unsigned
guess_loop_limit(loop_info_state * state)405 guess_loop_limit(loop_info_state *state)
406 {
407 unsigned min_array_size = UINT_MAX;
408
409 nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
410 nir_foreach_instr(instr, block) {
411 if (instr->type != nir_instr_type_intrinsic)
412 continue;
413
414 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
415
416 /* Check for arrays variably-indexed by a loop induction variable. */
417 if (intrin->intrinsic == nir_intrinsic_load_deref ||
418 intrin->intrinsic == nir_intrinsic_store_deref ||
419 intrin->intrinsic == nir_intrinsic_copy_deref) {
420
421 nir_loop_induction_variable *array_idx = NULL;
422 unsigned array_size =
423 find_array_access_via_induction(state,
424 nir_src_as_deref(intrin->src[0]),
425 &array_idx);
426 if (array_idx)
427 min_array_size = MIN2(min_array_size, array_size);
428
429 if (intrin->intrinsic != nir_intrinsic_copy_deref)
430 continue;
431
432 array_size =
433 find_array_access_via_induction(state,
434 nir_src_as_deref(intrin->src[1]),
435 &array_idx);
436 if (array_idx)
437 min_array_size = MIN2(min_array_size, array_size);
438 }
439 }
440 }
441
442 if (min_array_size != UINT_MAX)
443 return min_array_size;
444 else
445 return 0;
446 }
447
448 static nir_op invert_comparison_if_needed(nir_op alu_op, bool invert);
449
450 /* Returns whether "limit_op(a, b) alu_op c" is equivalent to "(a alu_op c) || (b alu_op c)". */
451 static bool
is_minmax_compatible(nir_op limit_op,nir_op alu_op,bool limit_rhs,bool invert_cond)452 is_minmax_compatible(nir_op limit_op, nir_op alu_op, bool limit_rhs, bool invert_cond)
453 {
454 bool is_max;
455 switch (limit_op) {
456 case nir_op_imin:
457 case nir_op_fmin:
458 case nir_op_umin:
459 is_max = false;
460 break;
461 case nir_op_imax:
462 case nir_op_fmax:
463 case nir_op_umax:
464 is_max = true;
465 break;
466 default:
467 return false;
468 }
469
470 if (nir_op_infos[limit_op].input_types[0] != nir_op_infos[alu_op].input_types[0])
471 return false;
472
473 /* Comparisons we can split are:
474 * - min(a, b) < c
475 * - c < max(a, b)
476 * - max(a, b) >= c
477 * - c >= min(a, b)
478 */
479 switch (invert_comparison_if_needed(alu_op, invert_cond)) {
480 case nir_op_ilt:
481 case nir_op_flt:
482 case nir_op_ult:
483 return (!limit_rhs && !is_max) || (limit_rhs && is_max);
484 case nir_op_ige:
485 case nir_op_fge:
486 case nir_op_uge:
487 return (!limit_rhs && is_max) || (limit_rhs && !is_max);
488 default:
489 return false;
490 }
491 }
492
493 static bool
try_find_limit_of_alu(nir_scalar limit,nir_const_value * limit_val,nir_op alu_op,bool invert_cond,nir_loop_terminator * terminator,loop_info_state * state)494 try_find_limit_of_alu(nir_scalar limit, nir_const_value *limit_val, nir_op alu_op,
495 bool invert_cond, nir_loop_terminator *terminator,
496 loop_info_state *state)
497 {
498 if (!nir_scalar_is_alu(limit))
499 return false;
500
501 nir_op limit_op = nir_scalar_alu_op(limit);
502 if (is_minmax_compatible(limit_op, alu_op, !terminator->induction_rhs, invert_cond)) {
503 for (unsigned i = 0; i < 2; i++) {
504 nir_scalar src = nir_scalar_chase_alu_src(limit, i);
505 if (nir_scalar_is_const(src)) {
506 *limit_val = nir_scalar_as_const_value(src);
507 terminator->exact_trip_count_unknown = true;
508 return true;
509 }
510 }
511 }
512
513 return false;
514 }
515
516 static nir_const_value
eval_const_unop(nir_op op,unsigned bit_size,nir_const_value src0,unsigned execution_mode)517 eval_const_unop(nir_op op, unsigned bit_size, nir_const_value src0,
518 unsigned execution_mode)
519 {
520 assert(nir_op_infos[op].num_inputs == 1);
521 nir_const_value dest;
522 nir_const_value *src[1] = { &src0 };
523 nir_eval_const_opcode(op, &dest, 1, bit_size, src, execution_mode);
524 return dest;
525 }
526
527 static nir_const_value
eval_const_binop(nir_op op,unsigned bit_size,nir_const_value src0,nir_const_value src1,unsigned execution_mode)528 eval_const_binop(nir_op op, unsigned bit_size,
529 nir_const_value src0, nir_const_value src1,
530 unsigned execution_mode)
531 {
532 assert(nir_op_infos[op].num_inputs == 2);
533 nir_const_value dest;
534 nir_const_value *src[2] = { &src0, &src1 };
535 nir_eval_const_opcode(op, &dest, 1, bit_size, src, execution_mode);
536 return dest;
537 }
538
539 static int
find_replacement(const nir_scalar * originals,nir_scalar key,unsigned num_replacements)540 find_replacement(const nir_scalar *originals, nir_scalar key,
541 unsigned num_replacements)
542 {
543 for (int i = 0; i < num_replacements; i++) {
544 if (nir_scalar_equal(originals[i], key))
545 return i;
546 }
547
548 return -1;
549 }
550
551 /**
552 * Try to evaluate an ALU instruction as a constant with a replacement
553 *
554 * Much like \c nir_opt_constant_folding.c:try_fold_alu, this method attempts
555 * to evaluate an ALU instruction as a constant. There are two significant
556 * differences.
557 *
558 * First, this method performs the evaluation recursively. If any source of
559 * the ALU instruction is not itself a constant, it is first evaluated.
560 *
561 * Second, if the SSA value \c original is encountered as a source of the ALU
562 * instruction, the value \c replacement is substituted.
563 *
564 * The intended purpose of this function is to evaluate an arbitrary
565 * expression involving a loop induction variable. In this case, \c original
566 * would be the phi node associated with the induction variable, and
567 * \c replacement is the initial value of the induction variable.
568 *
569 * \returns true if the ALU instruction can be evaluated as constant (after
570 * applying the previously described substitution) or false otherwise.
571 */
572 static bool
try_eval_const_alu(nir_const_value * dest,nir_scalar alu_s,const nir_scalar * originals,const nir_const_value * replacements,unsigned num_replacements,unsigned execution_mode)573 try_eval_const_alu(nir_const_value *dest, nir_scalar alu_s, const nir_scalar *originals,
574 const nir_const_value *replacements,
575 unsigned num_replacements, unsigned execution_mode)
576 {
577 nir_alu_instr *alu = nir_instr_as_alu(alu_s.def->parent_instr);
578
579 if (nir_op_infos[alu->op].output_size)
580 return false;
581
582 /* In the case that any outputs/inputs have unsized types, then we need to
583 * guess the bit-size. In this case, the validator ensures that all
584 * bit-sizes match so we can just take the bit-size from first
585 * output/input with an unsized type. If all the outputs/inputs are sized
586 * then we don't need to guess the bit-size at all because the code we
587 * generate for constant opcodes in this case already knows the sizes of
588 * the types involved and does not need the provided bit-size for anything
589 * (although it still requires to receive a valid bit-size).
590 */
591 unsigned bit_size = 0;
592 if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].output_type)) {
593 bit_size = alu->def.bit_size;
594 } else {
595 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
596 if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].input_types[i]))
597 bit_size = alu->src[i].src.ssa->bit_size;
598 }
599
600 if (bit_size == 0)
601 bit_size = 32;
602 }
603
604 nir_const_value src[NIR_MAX_VEC_COMPONENTS];
605 nir_const_value *src_ptrs[NIR_MAX_VEC_COMPONENTS];
606
607 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
608 nir_scalar src_s = nir_scalar_chase_alu_src(alu_s, i);
609
610 src_ptrs[i] = &src[i];
611 if (nir_scalar_is_const(src_s)) {
612 src[i] = nir_scalar_as_const_value(src_s);
613 continue;
614 }
615
616 int r = find_replacement(originals, src_s, num_replacements);
617 if (r >= 0) {
618 src[i] = replacements[r];
619 } else if (!nir_scalar_is_alu(src_s) ||
620 !try_eval_const_alu(&src[i], src_s,
621 originals, replacements,
622 num_replacements, execution_mode)) {
623 return false;
624 }
625 }
626
627 nir_eval_const_opcode(alu->op, dest, 1, bit_size, src_ptrs, execution_mode);
628
629 return true;
630 }
631
632 static nir_op
invert_comparison_if_needed(nir_op alu_op,bool invert)633 invert_comparison_if_needed(nir_op alu_op, bool invert)
634 {
635 if (!invert)
636 return alu_op;
637
638 switch (alu_op) {
639 case nir_op_fge:
640 return nir_op_flt;
641 case nir_op_ige:
642 return nir_op_ilt;
643 case nir_op_uge:
644 return nir_op_ult;
645 case nir_op_flt:
646 return nir_op_fge;
647 case nir_op_ilt:
648 return nir_op_ige;
649 case nir_op_ult:
650 return nir_op_uge;
651 case nir_op_feq:
652 return nir_op_fneu;
653 case nir_op_ieq:
654 return nir_op_ine;
655 case nir_op_fneu:
656 return nir_op_feq;
657 case nir_op_ine:
658 return nir_op_ieq;
659 default:
660 unreachable("Unsuported comparison!");
661 }
662 }
663
664 static int32_t
get_iteration(nir_op cond_op,nir_const_value initial,nir_const_value step,nir_const_value limit,bool invert_cond,unsigned bit_size,unsigned execution_mode)665 get_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step,
666 nir_const_value limit, bool invert_cond, unsigned bit_size,
667 unsigned execution_mode)
668 {
669 nir_const_value span, iter;
670 unsigned iter_bit_size = bit_size;
671
672 switch (invert_comparison_if_needed(cond_op, invert_cond)) {
673 case nir_op_ine:
674 /* In order for execution to be here, limit must be the same as initial.
675 * Otherwise will_break_on_first_iteration would have returned false.
676 * If step is zero, the loop is infinite. Otherwise the loop will
677 * execute once.
678 */
679 return step.u64 == 0 ? -1 : 1;
680
681 case nir_op_ige:
682 case nir_op_ilt:
683 case nir_op_ieq:
684 span = eval_const_binop(nir_op_isub, bit_size, limit, initial,
685 execution_mode);
686 iter = eval_const_binop(nir_op_idiv, bit_size, span, step,
687 execution_mode);
688 break;
689
690 case nir_op_uge:
691 case nir_op_ult:
692 span = eval_const_binop(nir_op_isub, bit_size, limit, initial,
693 execution_mode);
694 iter = eval_const_binop(nir_op_udiv, bit_size, span, step,
695 execution_mode);
696 break;
697
698 case nir_op_fneu:
699 /* In order for execution to be here, limit must be the same as initial.
700 * Otherwise will_break_on_first_iteration would have returned false.
701 * If step is zero, the loop is infinite. Otherwise the loop will
702 * execute once.
703 *
704 * This is a little more tricky for floating point since X-Y might still
705 * be X even if Y is not zero. Instead check that (initial + step) !=
706 * initial.
707 */
708 span = eval_const_binop(nir_op_fadd, bit_size, initial, step,
709 execution_mode);
710 iter = eval_const_binop(nir_op_feq, bit_size, initial,
711 span, execution_mode);
712
713 /* return (initial + step) == initial ? -1 : 1 */
714 return iter.b ? -1 : 1;
715
716 case nir_op_fge:
717 case nir_op_flt:
718 case nir_op_feq:
719 span = eval_const_binop(nir_op_fsub, bit_size, limit, initial,
720 execution_mode);
721 iter = eval_const_binop(nir_op_fdiv, bit_size, span,
722 step, execution_mode);
723 iter = eval_const_unop(nir_op_f2i64, bit_size, iter, execution_mode);
724 iter_bit_size = 64;
725 break;
726
727 default:
728 return -1;
729 }
730
731 uint64_t iter_u64 = nir_const_value_as_uint(iter, iter_bit_size);
732 return iter_u64 > u_intN_max(iter_bit_size) ? -1 : (int)iter_u64;
733 }
734
735 static int32_t
get_iteration_empirical(nir_scalar cond,nir_alu_instr * incr_alu,nir_scalar basis,nir_const_value initial,nir_scalar limit_basis,nir_const_value limit,bool invert_cond,unsigned execution_mode,unsigned max_unroll_iterations)736 get_iteration_empirical(nir_scalar cond, nir_alu_instr *incr_alu,
737 nir_scalar basis, nir_const_value initial,
738 nir_scalar limit_basis, nir_const_value limit,
739 bool invert_cond, unsigned execution_mode,
740 unsigned max_unroll_iterations)
741 {
742 int iter_count = 0;
743 nir_const_value result;
744
745 const nir_scalar incr = nir_get_scalar(&incr_alu->def, basis.comp);
746
747 const nir_scalar original[] = {basis, limit_basis};
748 nir_const_value replacement[] = {initial, limit};
749
750 while (iter_count <= max_unroll_iterations) {
751 bool success;
752
753 success = try_eval_const_alu(&result, cond, original, replacement,
754 2, execution_mode);
755 if (!success)
756 return -1;
757
758 const bool cond_succ = invert_cond ? !result.b : result.b;
759 if (cond_succ)
760 return iter_count;
761
762 iter_count++;
763
764 success = try_eval_const_alu(&result, incr, original, replacement,
765 2, execution_mode);
766 assert(success);
767
768 replacement[0] = result;
769 }
770
771 return -1;
772 }
773
774 static bool
will_break_on_first_iteration(nir_scalar cond,nir_scalar basis,nir_scalar limit_basis,nir_const_value initial,nir_const_value limit,bool invert_cond,unsigned execution_mode)775 will_break_on_first_iteration(nir_scalar cond, nir_scalar basis,
776 nir_scalar limit_basis,
777 nir_const_value initial, nir_const_value limit,
778 bool invert_cond, unsigned execution_mode)
779 {
780 nir_const_value result;
781
782 const nir_scalar originals[2] = { basis, limit_basis };
783 const nir_const_value replacements[2] = { initial, limit };
784
785 ASSERTED bool success = try_eval_const_alu(&result, cond, originals,
786 replacements, 2, execution_mode);
787
788 assert(success);
789
790 return invert_cond ? !result.b : result.b;
791 }
792
793 static bool
test_iterations(int32_t iter_int,nir_const_value step,nir_const_value limit,nir_op cond_op,unsigned bit_size,nir_alu_type induction_base_type,nir_const_value initial,bool limit_rhs,bool invert_cond,unsigned execution_mode)794 test_iterations(int32_t iter_int, nir_const_value step,
795 nir_const_value limit, nir_op cond_op, unsigned bit_size,
796 nir_alu_type induction_base_type,
797 nir_const_value initial, bool limit_rhs, bool invert_cond,
798 unsigned execution_mode)
799 {
800 assert(nir_op_infos[cond_op].num_inputs == 2);
801
802 nir_const_value iter_src;
803 nir_op mul_op;
804 nir_op add_op;
805 switch (induction_base_type) {
806 case nir_type_float:
807 iter_src = nir_const_value_for_float(iter_int, bit_size);
808 mul_op = nir_op_fmul;
809 add_op = nir_op_fadd;
810 break;
811 case nir_type_int:
812 case nir_type_uint:
813 iter_src = nir_const_value_for_int(iter_int, bit_size);
814 mul_op = nir_op_imul;
815 add_op = nir_op_iadd;
816 break;
817 default:
818 unreachable("Unhandled induction variable base type!");
819 }
820
821 /* Multiple the iteration count we are testing by the number of times we
822 * step the induction variable each iteration.
823 */
824 nir_const_value mul_result =
825 eval_const_binop(mul_op, bit_size, iter_src, step, execution_mode);
826
827 /* Add the initial value to the accumulated induction variable total */
828 nir_const_value add_result =
829 eval_const_binop(add_op, bit_size, mul_result, initial, execution_mode);
830
831 nir_const_value *src[2];
832 src[limit_rhs ? 0 : 1] = &add_result;
833 src[limit_rhs ? 1 : 0] = &limit;
834
835 /* Evaluate the loop exit condition */
836 nir_const_value result;
837 nir_eval_const_opcode(cond_op, &result, 1, bit_size, src, execution_mode);
838
839 return invert_cond ? !result.b : result.b;
840 }
841
842 static int
calculate_iterations(nir_scalar basis,nir_scalar limit_basis,nir_const_value initial,nir_const_value step,nir_const_value limit,nir_alu_instr * alu,nir_scalar cond,nir_op alu_op,bool limit_rhs,bool invert_cond,unsigned execution_mode,unsigned max_unroll_iterations)843 calculate_iterations(nir_scalar basis, nir_scalar limit_basis,
844 nir_const_value initial, nir_const_value step,
845 nir_const_value limit, nir_alu_instr *alu,
846 nir_scalar cond, nir_op alu_op, bool limit_rhs,
847 bool invert_cond, unsigned execution_mode,
848 unsigned max_unroll_iterations)
849 {
850 /* nir_op_isub should have been lowered away by this point */
851 assert(alu->op != nir_op_isub);
852
853 /* Make sure the alu type for our induction variable is compatible with the
854 * conditional alus input type. If its not something has gone really wrong.
855 */
856 nir_alu_type induction_base_type =
857 nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type);
858 if (induction_base_type == nir_type_int || induction_base_type == nir_type_uint) {
859 assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) == nir_type_int ||
860 nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) == nir_type_uint);
861 } else {
862 assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[0]) ==
863 induction_base_type);
864 }
865
866 /* do-while loops can increment the starting value before the condition is
867 * checked. e.g.
868 *
869 * do {
870 * ndx++;
871 * } while (ndx < 3);
872 *
873 * Here we check if the induction variable is used directly by the loop
874 * condition and if so we assume we need to step the initial value.
875 */
876 unsigned trip_offset = 0;
877 nir_alu_instr *cond_alu = nir_instr_as_alu(cond.def->parent_instr);
878 if (cond_alu->src[0].src.ssa == &alu->def ||
879 cond_alu->src[1].src.ssa == &alu->def) {
880 trip_offset = 1;
881 }
882
883 unsigned bit_size = nir_src_bit_size(alu->src[0].src);
884
885 /* get_iteration works under assumption that iterator will be
886 * incremented or decremented until it hits the limit,
887 * however if the loop condition is false on the first iteration
888 * get_iteration's assumption is broken. Handle such loops first.
889 */
890 if (will_break_on_first_iteration(cond, basis, limit_basis, initial,
891 limit, invert_cond, execution_mode)) {
892 return 0;
893 }
894
895 /* For loops incremented with addition operation, it's easy to
896 * calculate the number of iterations theoretically. Even though it
897 * is possible for other operations as well, it is much more error
898 * prone, and doesn't cover all possible cases. So, we try to
899 * emulate the loop.
900 */
901 int iter_int;
902 switch (alu->op) {
903 case nir_op_iadd:
904 case nir_op_fadd:
905 assert(nir_src_bit_size(alu->src[0].src) ==
906 nir_src_bit_size(alu->src[1].src));
907
908 iter_int = get_iteration(alu_op, initial, step, limit, invert_cond,
909 bit_size, execution_mode);
910 break;
911 case nir_op_fmul:
912 /* Detecting non-zero loop counts when the loop increment is floating
913 * point multiplication triggers a preexisting problem in
914 * glsl-fs-loop-unroll-mul-fp64.shader_test. See
915 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3445#note_1779438.
916 */
917 return -1;
918 case nir_op_imul:
919 case nir_op_ishl:
920 case nir_op_ishr:
921 case nir_op_ushr:
922 return get_iteration_empirical(cond, alu, basis, initial,
923 limit_basis, limit, invert_cond,
924 execution_mode, max_unroll_iterations);
925 default:
926 unreachable("Invalid induction variable increment operation.");
927 }
928
929 /* If iter_int is negative the loop is ill-formed or is the conditional is
930 * unsigned with a huge iteration count so don't bother going any further.
931 */
932 if (iter_int < 0)
933 return -1;
934
935 nir_op actual_alu_op = invert_comparison_if_needed(alu_op, invert_cond);
936 if (actual_alu_op == nir_op_ine || actual_alu_op == nir_op_fneu)
937 return iter_int;
938
939 /* An explanation from the GLSL unrolling pass:
940 *
941 * Make sure that the calculated number of iterations satisfies the exit
942 * condition. This is needed to catch off-by-one errors and some types of
943 * ill-formed loops. For example, we need to detect that the following
944 * loop does not have a maximum iteration count.
945 *
946 * for (float x = 0.0; x != 0.9; x += 0.2);
947 */
948 for (int bias = -1; bias <= 1; bias++) {
949 const int iter_bias = iter_int + bias;
950 if (iter_bias < 1)
951 continue;
952
953 if (test_iterations(iter_bias, step, limit, alu_op, bit_size,
954 induction_base_type, initial,
955 limit_rhs, invert_cond, execution_mode)) {
956 return iter_bias - trip_offset;
957 }
958 }
959
960 return -1;
961 }
962
963 static bool
get_induction_and_limit_vars(nir_scalar cond,nir_scalar * ind,nir_scalar * limit,bool * limit_rhs,loop_info_state * state)964 get_induction_and_limit_vars(nir_scalar cond,
965 nir_scalar *ind,
966 nir_scalar *limit,
967 bool *limit_rhs,
968 loop_info_state *state)
969 {
970 nir_scalar rhs, lhs;
971 lhs = nir_scalar_chase_alu_src(cond, 0);
972 rhs = nir_scalar_chase_alu_src(cond, 1);
973
974 nir_loop_induction_variable *src0_lv = get_loop_var(lhs.def, state);
975 nir_loop_induction_variable *src1_lv = get_loop_var(rhs.def, state);
976
977 if (src0_lv) {
978 *ind = lhs;
979 *limit = rhs;
980 *limit_rhs = true;
981 return true;
982 } else if (src1_lv) {
983 *ind = rhs;
984 *limit = lhs;
985 *limit_rhs = false;
986 return true;
987 } else {
988 return false;
989 }
990 }
991
992 static bool
try_find_trip_count_vars_in_logical_op(nir_scalar * cond,nir_scalar * ind,nir_scalar * limit,bool * limit_rhs,loop_info_state * state)993 try_find_trip_count_vars_in_logical_op(nir_scalar *cond,
994 nir_scalar *ind,
995 nir_scalar *limit,
996 bool *limit_rhs,
997 loop_info_state *state)
998 {
999 const nir_op alu_op = nir_scalar_alu_op(*cond);
1000 bool exit_loop_on_false = alu_op == nir_op_ieq || alu_op == nir_op_inot;
1001 nir_scalar logical_op = exit_loop_on_false ?
1002 nir_scalar_chase_alu_src(*cond, 0) : *cond;
1003
1004 if (alu_op == nir_op_ieq) {
1005 nir_scalar zero = nir_scalar_chase_alu_src(*cond, 1);
1006
1007 if (!nir_scalar_is_alu(logical_op) || !nir_scalar_is_const(zero)) {
1008 /* Maybe we had it the wrong way, flip things around */
1009 nir_scalar tmp = zero;
1010 zero = logical_op;
1011 logical_op = tmp;
1012
1013 /* If we still didn't find what we need then return */
1014 if (!nir_scalar_is_const(zero))
1015 return false;
1016 }
1017
1018 /* If the loop is not breaking on (x && y) == 0 then return */
1019 if (nir_scalar_as_uint(zero) != 0)
1020 return false;
1021 }
1022
1023 if (!nir_scalar_is_alu(logical_op))
1024 return false;
1025
1026 if ((exit_loop_on_false && (nir_scalar_alu_op(logical_op) != nir_op_iand)) ||
1027 (!exit_loop_on_false && (nir_scalar_alu_op(logical_op) != nir_op_ior)))
1028 return false;
1029
1030 /* Check if iand src is a terminator condition and try get induction var
1031 * and trip limit var.
1032 */
1033 bool found_induction_var = false;
1034 for (unsigned i = 0; i < 2; i++) {
1035 nir_scalar src = nir_scalar_chase_alu_src(logical_op, i);
1036 if (nir_is_terminator_condition_with_two_inputs(src) &&
1037 get_induction_and_limit_vars(src, ind, limit, limit_rhs, state)) {
1038 *cond = src;
1039 found_induction_var = true;
1040
1041 /* If we've found one with a constant limit, stop. */
1042 if (nir_scalar_is_const(*limit))
1043 return true;
1044 }
1045 }
1046
1047 return found_induction_var;
1048 }
1049
1050 /* Run through each of the terminators of the loop and try to infer a possible
1051 * trip-count. We need to check them all, and set the lowest trip-count as the
1052 * trip-count of our loop. If one of the terminators has an undecidable
1053 * trip-count we can not safely assume anything about the duration of the
1054 * loop.
1055 */
1056 static void
find_trip_count(loop_info_state * state,unsigned execution_mode,unsigned max_unroll_iterations)1057 find_trip_count(loop_info_state *state, unsigned execution_mode,
1058 unsigned max_unroll_iterations)
1059 {
1060 bool trip_count_known = true;
1061 bool guessed_trip_count = false;
1062 nir_loop_terminator *limiting_terminator = NULL;
1063 int max_trip_count = -1;
1064
1065 list_for_each_entry(nir_loop_terminator, terminator,
1066 &state->loop->info->loop_terminator_list,
1067 loop_terminator_link) {
1068 nir_scalar cond = { terminator->nif->condition.ssa, 0 };
1069
1070 if (!nir_scalar_is_alu(cond)) {
1071 /* If we get here the loop is dead and will get cleaned up by the
1072 * nir_opt_dead_cf pass.
1073 */
1074 trip_count_known = false;
1075 terminator->exact_trip_count_unknown = true;
1076 continue;
1077 }
1078
1079 nir_op alu_op = nir_scalar_alu_op(cond);
1080
1081 bool invert_cond = terminator->continue_from_then;
1082
1083 bool limit_rhs;
1084 nir_scalar basic_ind = { NULL, 0 };
1085 nir_scalar limit;
1086
1087 if ((alu_op == nir_op_inot || alu_op == nir_op_ieq || alu_op == nir_op_ior) &&
1088 try_find_trip_count_vars_in_logical_op(&cond, &basic_ind, &limit,
1089 &limit_rhs, state)) {
1090
1091 /* The loop is exiting on (x && y) == 0 so we need to get the
1092 * inverse of x or y (i.e. which ever contained the induction var) in
1093 * order to compute the trip count.
1094 */
1095 if (alu_op == nir_op_inot || alu_op == nir_op_ieq)
1096 invert_cond = !invert_cond;
1097
1098 alu_op = nir_scalar_alu_op(cond);
1099 trip_count_known = false;
1100 terminator->conditional_instr = cond.def->parent_instr;
1101 terminator->exact_trip_count_unknown = true;
1102 }
1103
1104 if (!basic_ind.def) {
1105 if (nir_is_supported_terminator_condition(cond)) {
1106 /* Extract and inverse the comparision if it is wrapped in an inot
1107 */
1108 if (alu_op == nir_op_inot) {
1109 cond = nir_scalar_chase_alu_src(cond, 0);
1110 alu_op = nir_scalar_alu_op(cond);
1111 invert_cond = !invert_cond;
1112 }
1113
1114 get_induction_and_limit_vars(cond, &basic_ind,
1115 &limit, &limit_rhs, state);
1116 }
1117 }
1118
1119 /* The comparison has to have a basic induction variable for us to be
1120 * able to find trip counts.
1121 */
1122 if (!basic_ind.def) {
1123 trip_count_known = false;
1124 terminator->exact_trip_count_unknown = true;
1125 continue;
1126 }
1127
1128 terminator->induction_rhs = !limit_rhs;
1129
1130 /* Attempt to find a constant limit for the loop */
1131 nir_const_value limit_val;
1132 if (nir_scalar_is_const(limit)) {
1133 limit_val = nir_scalar_as_const_value(limit);
1134 } else {
1135 trip_count_known = false;
1136
1137 if (!try_find_limit_of_alu(limit, &limit_val, alu_op, invert_cond, terminator, state)) {
1138 /* Guess loop limit based on array access */
1139 unsigned guessed_loop_limit = guess_loop_limit(state);
1140 if (guessed_loop_limit) {
1141 limit_val = nir_const_value_for_uint(guessed_loop_limit,
1142 basic_ind.def->bit_size);
1143 } else {
1144 terminator->exact_trip_count_unknown = true;
1145 continue;
1146 }
1147
1148 guessed_trip_count = true;
1149 }
1150 }
1151
1152 /* We have determined that we have the following constants:
1153 * (With the typical int i = 0; i < x; i++; as an example)
1154 * - Upper limit.
1155 * - Starting value
1156 * - Step / iteration size
1157 * Thats all thats needed to calculate the trip-count
1158 */
1159
1160 nir_loop_induction_variable *lv = get_loop_var(basic_ind.def, state);
1161
1162 /* The basic induction var might be a vector but, because we guarantee
1163 * earlier that the phi source has a scalar swizzle, we can take the
1164 * component from basic_ind.
1165 */
1166 nir_scalar initial_s = { lv->init_src->ssa, basic_ind.comp };
1167 nir_scalar alu_s = {
1168 lv->update_src->src.ssa,
1169 lv->update_src->swizzle[basic_ind.comp]
1170 };
1171
1172 nir_alu_instr *step_alu =
1173 nir_instr_as_alu(nir_src_parent_instr(&lv->update_src->src));
1174
1175 /* If the comparision is of unsigned type we don't necessarily need to
1176 * know the initial value to be able to calculate the max number of
1177 * iterations
1178 */
1179 bool can_find_max_trip_count = step_alu->op == nir_op_iadd &&
1180 ((alu_op == nir_op_uge && !invert_cond && limit_rhs) ||
1181 (alu_op == nir_op_ult && !invert_cond && !limit_rhs));
1182
1183 /* nir_op_isub should have been lowered away by this point */
1184 assert(step_alu->op != nir_op_isub);
1185
1186 /* For nir_op_uge as alu_op, the induction variable is [0,limit). For
1187 * nir_op_ult, it's [0,limit]. It must always be step_val larger in the
1188 * next iteration to use the can_find_max_trip_count=true path. This
1189 * check ensures that no unsigned overflow happens.
1190 * TODO: support for overflow could be added if a non-zero initial_val
1191 * is chosen.
1192 */
1193 if (can_find_max_trip_count && nir_scalar_is_const(alu_s)) {
1194 uint64_t uint_max = u_uintN_max(alu_s.def->bit_size);
1195 uint64_t max_step_val =
1196 uint_max - nir_const_value_as_uint(limit_val, alu_s.def->bit_size) +
1197 (alu_op == nir_op_uge ? 1 : 0);
1198 can_find_max_trip_count &= nir_scalar_as_uint(alu_s) <= max_step_val;
1199 }
1200
1201 /* We are not guaranteed by that at one of these sources is a constant.
1202 * Try to find one.
1203 */
1204 if ((!nir_scalar_is_const(initial_s) && !can_find_max_trip_count) ||
1205 !nir_scalar_is_const(alu_s))
1206 continue;
1207
1208 nir_const_value initial_val;
1209 if (nir_scalar_is_const(initial_s))
1210 initial_val = nir_scalar_as_const_value(initial_s);
1211 else {
1212 trip_count_known = false;
1213 terminator->exact_trip_count_unknown = true;
1214 initial_val = nir_const_value_for_uint(0, 32);
1215 assert(can_find_max_trip_count);
1216 }
1217 nir_const_value step_val = nir_scalar_as_const_value(alu_s);
1218
1219 int iterations = calculate_iterations(nir_get_scalar(lv->basis, basic_ind.comp), limit,
1220 initial_val, step_val, limit_val,
1221 step_alu, cond,
1222 alu_op, limit_rhs,
1223 invert_cond,
1224 execution_mode,
1225 max_unroll_iterations);
1226
1227 /* Where we not able to calculate the iteration count */
1228 if (iterations == -1) {
1229 trip_count_known = false;
1230 guessed_trip_count = false;
1231 terminator->exact_trip_count_unknown = true;
1232 continue;
1233 }
1234
1235 if (guessed_trip_count) {
1236 guessed_trip_count = false;
1237 terminator->exact_trip_count_unknown = true;
1238 if (state->loop->info->guessed_trip_count == 0 ||
1239 state->loop->info->guessed_trip_count > iterations)
1240 state->loop->info->guessed_trip_count = iterations;
1241
1242 continue;
1243 }
1244
1245 /* If this is the first run or we have found a smaller amount of
1246 * iterations than previously (we have identified a more limiting
1247 * terminator) set the trip count and limiting terminator.
1248 */
1249 if (max_trip_count == -1 || iterations < max_trip_count) {
1250 max_trip_count = iterations;
1251 limiting_terminator = terminator;
1252 }
1253 }
1254
1255 state->loop->info->exact_trip_count_known = trip_count_known;
1256 if (max_trip_count > -1)
1257 state->loop->info->max_trip_count = max_trip_count;
1258 state->loop->info->limiting_terminator = limiting_terminator;
1259 }
1260
1261 static bool
force_unroll_array_access(loop_info_state * state,nir_deref_instr * deref,bool contains_sampler)1262 force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref,
1263 bool contains_sampler)
1264 {
1265 unsigned array_size = find_array_access_via_induction(state, deref, NULL);
1266 if (array_size) {
1267 if ((array_size == state->loop->info->max_trip_count) &&
1268 nir_deref_mode_must_be(deref, nir_var_shader_in |
1269 nir_var_shader_out |
1270 nir_var_shader_temp |
1271 nir_var_function_temp))
1272 return true;
1273
1274 if (nir_deref_mode_must_be(deref, state->indirect_mask))
1275 return true;
1276
1277 if (contains_sampler && state->force_unroll_sampler_indirect)
1278 return true;
1279 }
1280
1281 return false;
1282 }
1283
1284 static bool
force_unroll_heuristics(loop_info_state * state,nir_block * block)1285 force_unroll_heuristics(loop_info_state *state, nir_block *block)
1286 {
1287 nir_foreach_instr(instr, block) {
1288 if (instr->type == nir_instr_type_tex) {
1289 nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
1290 int sampler_idx =
1291 nir_tex_instr_src_index(tex_instr,
1292 nir_tex_src_sampler_deref);
1293
1294 if (sampler_idx >= 0) {
1295 nir_deref_instr *deref =
1296 nir_instr_as_deref(tex_instr->src[sampler_idx].src.ssa->parent_instr);
1297 if (force_unroll_array_access(state, deref, true))
1298 return true;
1299 }
1300 }
1301
1302 if (instr->type != nir_instr_type_intrinsic)
1303 continue;
1304
1305 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1306
1307 /* Check for arrays variably-indexed by a loop induction variable.
1308 * Unrolling the loop may convert that access into constant-indexing.
1309 */
1310 if (intrin->intrinsic == nir_intrinsic_load_deref ||
1311 intrin->intrinsic == nir_intrinsic_store_deref ||
1312 intrin->intrinsic == nir_intrinsic_copy_deref) {
1313 if (force_unroll_array_access(state,
1314 nir_src_as_deref(intrin->src[0]),
1315 false))
1316 return true;
1317
1318 if (intrin->intrinsic == nir_intrinsic_copy_deref &&
1319 force_unroll_array_access(state,
1320 nir_src_as_deref(intrin->src[1]),
1321 false))
1322 return true;
1323 }
1324 }
1325
1326 return false;
1327 }
1328
1329 static void
get_loop_info(loop_info_state * state,nir_function_impl * impl)1330 get_loop_info(loop_info_state *state, nir_function_impl *impl)
1331 {
1332 nir_shader *shader = impl->function->shader;
1333 const nir_shader_compiler_options *options = shader->options;
1334
1335 /* Try to find all simple terminators of the loop. If we can't find any,
1336 * or we find possible terminators that have side effects then bail.
1337 */
1338 if (!find_loop_terminators(state)) {
1339 list_for_each_entry_safe(nir_loop_terminator, terminator,
1340 &state->loop->info->loop_terminator_list,
1341 loop_terminator_link) {
1342 list_del(&terminator->loop_terminator_link);
1343 ralloc_free(terminator);
1344 }
1345 return;
1346 }
1347
1348 if (!compute_induction_information(state))
1349 return;
1350
1351 /* Run through each of the terminators and try to compute a trip-count */
1352 find_trip_count(state,
1353 impl->function->shader->info.float_controls_execution_mode,
1354 impl->function->shader->options->max_unroll_iterations);
1355
1356 nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
1357 nir_foreach_instr(instr, block) {
1358 state->loop->info->instr_cost += instr_cost(state, instr, options);
1359 }
1360
1361 if (state->loop->info->force_unroll)
1362 continue;
1363
1364 if (force_unroll_heuristics(state, block)) {
1365 state->loop->info->force_unroll = true;
1366 }
1367 }
1368 }
1369
1370 static void
initialize_loop_info(nir_loop * loop)1371 initialize_loop_info(nir_loop *loop)
1372 {
1373 if (loop->info)
1374 ralloc_free(loop->info);
1375
1376 loop->info = rzalloc(loop, nir_loop_info);
1377 loop->info->induction_vars = _mesa_pointer_hash_table_create(loop->info);
1378
1379 list_inithead(&loop->info->loop_terminator_list);
1380 }
1381
1382 static void
process_loops(nir_cf_node * cf_node,nir_variable_mode indirect_mask,bool force_unroll_sampler_indirect)1383 process_loops(nir_cf_node *cf_node, nir_variable_mode indirect_mask,
1384 bool force_unroll_sampler_indirect)
1385 {
1386 switch (cf_node->type) {
1387 case nir_cf_node_block:
1388 return;
1389 case nir_cf_node_if: {
1390 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
1391 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)
1392 process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect);
1393 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)
1394 process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect);
1395 return;
1396 }
1397 case nir_cf_node_loop: {
1398 nir_loop *loop = nir_cf_node_as_loop(cf_node);
1399 assert(!nir_loop_has_continue_construct(loop));
1400
1401 foreach_list_typed(nir_cf_node, nested_node, node, &loop->body)
1402 process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect);
1403 break;
1404 }
1405 default:
1406 unreachable("unknown cf node type");
1407 }
1408
1409 nir_loop *loop = nir_cf_node_as_loop(cf_node);
1410 nir_function_impl *impl = nir_cf_node_get_function(cf_node);
1411 loop_info_state state = {
1412 .loop = loop,
1413 .indirect_mask = indirect_mask,
1414 .force_unroll_sampler_indirect = force_unroll_sampler_indirect,
1415 };
1416
1417 initialize_loop_info(loop);
1418 get_loop_info(&state, impl);
1419 }
1420
1421 void
nir_loop_analyze_impl(nir_function_impl * impl,nir_variable_mode indirect_mask,bool force_unroll_sampler_indirect)1422 nir_loop_analyze_impl(nir_function_impl *impl,
1423 nir_variable_mode indirect_mask,
1424 bool force_unroll_sampler_indirect)
1425 {
1426 foreach_list_typed(nir_cf_node, node, node, &impl->body)
1427 process_loops(node, indirect_mask, force_unroll_sampler_indirect);
1428
1429 impl->loop_analysis_indirect_mask = indirect_mask;
1430 impl->loop_analysis_force_unroll_sampler_indirect = force_unroll_sampler_indirect;
1431 }
1432