• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/set.h"
25 #include "nir.h"
26 #include "nir_builder.h"
27 
28 /* This pass provides a way to move computations that are always the same for
29  * an entire draw/compute dispatch into a "preamble" that runs before the main
30  * entrypoint.
31  *
32  * We also expose a separate API to get or construct the preamble of a shader
33  * in case backends want to insert their own code.
34  */
35 
36 nir_function_impl *
nir_shader_get_preamble(nir_shader * shader)37 nir_shader_get_preamble(nir_shader *shader)
38 {
39    nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
40    if (entrypoint->preamble) {
41       return entrypoint->preamble->impl;
42    } else {
43       nir_function *preamble = nir_function_create(shader, "@preamble");
44       preamble->is_preamble = true;
45       nir_function_impl *impl = nir_function_impl_create(preamble);
46       entrypoint->preamble = preamble;
47       return impl;
48    }
49 }
50 
51 typedef struct {
52    bool can_move;
53    bool candidate;
54    bool must_stay;
55    bool replace;
56 
57    unsigned can_move_users;
58 
59    unsigned size, align;
60 
61    unsigned offset;
62 
63    /* Average the cost of a value among its users, to try to account for
64     * values that have multiple can_move uses.
65     */
66    float value;
67 
68    /* Overall benefit, i.e. the value minus any cost to inserting
69     * load_preamble.
70     */
71    float benefit;
72 } def_state;
73 
74 typedef struct {
75    /* Per-definition array of states */
76    def_state *states;
77 
78    /* Number of levels of non-uniform control flow we're in. We don't
79     * reconstruct loops, so loops count as non-uniform conservatively. If-else
80     * is counted if the condition is not marked can_move.
81     */
82    unsigned nonuniform_cf_nesting;
83 
84    /* Set of nir_if's that must be reconstructed in the preamble. Note an if may
85     * need reconstruction even when not entirely moved. This does not account
86     * for nesting: the parent CF nodes of ifs in this set must be reconstructed
87     * but may not be in this set, even if the parent is another if.
88     */
89    struct set *reconstructed_ifs;
90 
91    /* Set of definitions that must be reconstructed in the preamble. This is a
92     * subset of can_move instructions, determined after replacement.
93     */
94    BITSET_WORD *reconstructed_defs;
95 
96    nir_def *def;
97 
98    const nir_opt_preamble_options *options;
99 } opt_preamble_ctx;
100 
101 static bool
instr_can_speculate(nir_instr * instr)102 instr_can_speculate(nir_instr *instr)
103 {
104    /* Intrinsics with an ACCESS index can only be speculated if they are
105     * explicitly CAN_SPECULATE.
106     */
107    if (instr->type == nir_instr_type_intrinsic) {
108       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
109 
110       if (nir_intrinsic_has_access(intr))
111          return nir_intrinsic_access(intr) & ACCESS_CAN_SPECULATE;
112    }
113 
114    /* For now, everything else can be speculated. TODO: Bindless textures. */
115    return true;
116 }
117 
118 static float
get_instr_cost(nir_instr * instr,const nir_opt_preamble_options * options)119 get_instr_cost(nir_instr *instr, const nir_opt_preamble_options *options)
120 {
121    /* No backend will want to hoist load_const or undef by itself, so handle
122     * this for them.
123     */
124    if (instr->type == nir_instr_type_load_const ||
125        instr->type == nir_instr_type_undef)
126       return 0;
127 
128    return options->instr_cost_cb(instr, options->cb_data);
129 }
130 
131 static bool
can_move_src(nir_src * src,void * state)132 can_move_src(nir_src *src, void *state)
133 {
134    opt_preamble_ctx *ctx = state;
135 
136    return ctx->states[src->ssa->index].can_move;
137 }
138 
139 static bool
can_move_srcs(nir_instr * instr,opt_preamble_ctx * ctx)140 can_move_srcs(nir_instr *instr, opt_preamble_ctx *ctx)
141 {
142    return nir_foreach_src(instr, can_move_src, ctx);
143 }
144 
145 static bool
can_move_intrinsic(nir_intrinsic_instr * instr,opt_preamble_ctx * ctx)146 can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
147 {
148    switch (instr->intrinsic) {
149    /* Intrinsics which can always be moved */
150    case nir_intrinsic_load_push_constant:
151    case nir_intrinsic_load_work_dim:
152    case nir_intrinsic_load_num_workgroups:
153    case nir_intrinsic_load_ray_launch_size:
154    case nir_intrinsic_load_sbt_base_amd:
155    case nir_intrinsic_load_is_indexed_draw:
156    case nir_intrinsic_load_viewport_scale:
157    case nir_intrinsic_load_user_clip_plane:
158    case nir_intrinsic_load_viewport_x_scale:
159    case nir_intrinsic_load_viewport_y_scale:
160    case nir_intrinsic_load_viewport_z_scale:
161    case nir_intrinsic_load_viewport_offset:
162    case nir_intrinsic_load_viewport_x_offset:
163    case nir_intrinsic_load_viewport_y_offset:
164    case nir_intrinsic_load_viewport_z_offset:
165    case nir_intrinsic_load_blend_const_color_a_float:
166    case nir_intrinsic_load_blend_const_color_b_float:
167    case nir_intrinsic_load_blend_const_color_g_float:
168    case nir_intrinsic_load_blend_const_color_r_float:
169    case nir_intrinsic_load_blend_const_color_rgba:
170    case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
171    case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
172    case nir_intrinsic_load_line_width:
173    case nir_intrinsic_load_aa_line_width:
174    case nir_intrinsic_load_fb_layers_v3d:
175    case nir_intrinsic_load_fep_w_v3d:
176    case nir_intrinsic_load_tcs_num_patches_amd:
177    case nir_intrinsic_load_sample_positions_pan:
178    case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
179    case nir_intrinsic_load_prim_gen_query_enabled_amd:
180    case nir_intrinsic_load_prim_xfb_query_enabled_amd:
181    case nir_intrinsic_load_clamp_vertex_color_amd:
182    case nir_intrinsic_load_cull_front_face_enabled_amd:
183    case nir_intrinsic_load_cull_back_face_enabled_amd:
184    case nir_intrinsic_load_cull_ccw_amd:
185    case nir_intrinsic_load_cull_small_triangles_enabled_amd:
186    case nir_intrinsic_load_cull_small_lines_enabled_amd:
187    case nir_intrinsic_load_cull_any_enabled_amd:
188    case nir_intrinsic_load_cull_small_triangle_precision_amd:
189    case nir_intrinsic_load_vbo_base_agx:
190       return true;
191 
192    /* Intrinsics which can be moved depending on hardware */
193    case nir_intrinsic_load_base_instance:
194    case nir_intrinsic_load_base_vertex:
195    case nir_intrinsic_load_first_vertex:
196    case nir_intrinsic_load_draw_id:
197       return ctx->options->drawid_uniform;
198 
199    case nir_intrinsic_load_subgroup_size:
200    case nir_intrinsic_load_num_subgroups:
201       return ctx->options->subgroup_size_uniform;
202 
203    case nir_intrinsic_load_workgroup_size:
204       return ctx->options->load_workgroup_size_allowed;
205 
206    /* Intrinsics which can be moved if the sources can */
207    case nir_intrinsic_load_ubo:
208    case nir_intrinsic_load_ubo_vec4:
209    case nir_intrinsic_get_ubo_size:
210    case nir_intrinsic_get_ssbo_size:
211    case nir_intrinsic_ballot_bitfield_extract:
212    case nir_intrinsic_ballot_find_lsb:
213    case nir_intrinsic_ballot_find_msb:
214    case nir_intrinsic_ballot_bit_count_reduce:
215    case nir_intrinsic_load_deref:
216    case nir_intrinsic_load_global_constant:
217    case nir_intrinsic_load_uniform:
218    case nir_intrinsic_load_preamble:
219    case nir_intrinsic_load_constant:
220    case nir_intrinsic_load_sample_pos_from_id:
221    case nir_intrinsic_load_kernel_input:
222    case nir_intrinsic_load_buffer_amd:
223    case nir_intrinsic_image_levels:
224    case nir_intrinsic_image_deref_levels:
225    case nir_intrinsic_bindless_image_levels:
226    case nir_intrinsic_image_samples:
227    case nir_intrinsic_image_deref_samples:
228    case nir_intrinsic_bindless_image_samples:
229    case nir_intrinsic_image_size:
230    case nir_intrinsic_image_deref_size:
231    case nir_intrinsic_bindless_image_size:
232    case nir_intrinsic_vulkan_resource_index:
233    case nir_intrinsic_vulkan_resource_reindex:
234    case nir_intrinsic_load_vulkan_descriptor:
235    case nir_intrinsic_quad_swizzle_amd:
236    case nir_intrinsic_masked_swizzle_amd:
237    case nir_intrinsic_load_ssbo_address:
238    case nir_intrinsic_bindless_resource_ir3:
239    case nir_intrinsic_load_const_ir3:
240    case nir_intrinsic_load_constant_agx:
241       return can_move_srcs(&instr->instr, ctx);
242 
243    /* Image/SSBO loads can be moved if they are CAN_REORDER and their
244     * sources can be moved.
245     */
246    case nir_intrinsic_image_load:
247    case nir_intrinsic_image_samples_identical:
248    case nir_intrinsic_bindless_image_load:
249    case nir_intrinsic_load_ssbo:
250    case nir_intrinsic_load_ssbo_ir3:
251       return (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER) &&
252              can_move_srcs(&instr->instr, ctx);
253 
254    default:
255       return false;
256    }
257 }
258 
259 static bool
can_move_instr(nir_instr * instr,opt_preamble_ctx * ctx)260 can_move_instr(nir_instr *instr, opt_preamble_ctx *ctx)
261 {
262    /* If we are only contained within uniform control flow, no speculation is
263     * needed since the control flow will be reconstructed in the preamble. But
264     * if we are not, we must be able to speculate instructions to move them.
265     */
266    if (ctx->nonuniform_cf_nesting > 0 && !instr_can_speculate(instr))
267       return false;
268 
269    switch (instr->type) {
270    case nir_instr_type_tex: {
271       nir_tex_instr *tex = nir_instr_as_tex(instr);
272       /* See note below about derivatives. We have special code to convert tex
273        * to txd, though, because it's a common case.
274        */
275       if (nir_tex_instr_has_implicit_derivative(tex) &&
276           tex->op != nir_texop_tex) {
277          return false;
278       }
279       return can_move_srcs(instr, ctx);
280    }
281    case nir_instr_type_alu:
282       return can_move_srcs(instr, ctx);
283 
284    case nir_instr_type_intrinsic:
285       return can_move_intrinsic(nir_instr_as_intrinsic(instr), ctx);
286 
287    case nir_instr_type_load_const:
288    case nir_instr_type_undef:
289       return true;
290 
291    case nir_instr_type_deref: {
292       nir_deref_instr *deref = nir_instr_as_deref(instr);
293       if (deref->deref_type == nir_deref_type_var) {
294          switch (deref->modes) {
295          case nir_var_uniform:
296          case nir_var_mem_ubo:
297             return true;
298          default:
299             return false;
300          }
301       } else {
302          return can_move_srcs(instr, ctx);
303       }
304    }
305 
306    /* We can only move phis if all of their sources are movable, and it is a phi
307     * for an if-else that is itself movable.
308     */
309    case nir_instr_type_phi: {
310       nir_cf_node *prev_node = nir_cf_node_prev(&instr->block->cf_node);
311       if (!prev_node)
312          return false;
313 
314       if (prev_node->type != nir_cf_node_if) {
315          assert(prev_node->type == nir_cf_node_loop);
316          return false;
317       }
318 
319       nir_if *nif = nir_cf_node_as_if(prev_node);
320       if (!can_move_src(&nif->condition, ctx))
321          return false;
322 
323       return can_move_srcs(instr, ctx);
324    }
325 
326    default:
327       return false;
328    }
329 }
330 
331 /* True if we should avoid making this a candidate. This is only called on
332  * instructions we already determined we can move, this just makes it so that
333  * uses of this instruction cannot be rewritten. Typically this happens
334  * because of static constraints on the IR, for example some deref chains
335  * cannot be broken.
336  */
337 static bool
avoid_instr(nir_instr * instr,const nir_opt_preamble_options * options)338 avoid_instr(nir_instr *instr, const nir_opt_preamble_options *options)
339 {
340    if (instr->type == nir_instr_type_deref)
341       return true;
342 
343    return options->avoid_instr_cb(instr, options->cb_data);
344 }
345 
346 static bool
update_src_value(nir_src * src,void * data)347 update_src_value(nir_src *src, void *data)
348 {
349    opt_preamble_ctx *ctx = data;
350 
351    def_state *state = &ctx->states[ctx->def->index];
352    def_state *src_state = &ctx->states[src->ssa->index];
353 
354    assert(src_state->can_move);
355 
356    /* If an instruction has can_move and non-can_move users, it becomes a
357     * candidate and its value shouldn't propagate downwards. For example,
358     * imagine a chain like this:
359     *
360     *         -- F (cannot move)
361     *        /
362     *  A <-- B <-- C <-- D <-- E (cannot move)
363     *
364     * B and D are marked candidates. Picking B removes A and B, picking D
365     * removes C and D, and picking both removes all 4. Therefore B and D are
366     * independent and B's value shouldn't flow into D.
367     *
368     * A similar argument holds for must_stay values.
369     */
370    if (!src_state->must_stay && !src_state->candidate)
371       state->value += src_state->value;
372    return true;
373 }
374 
375 static int
candidate_sort(const void * data1,const void * data2)376 candidate_sort(const void *data1, const void *data2)
377 {
378    const def_state *state1 = *(def_state **)data1;
379    const def_state *state2 = *(def_state **)data2;
380 
381    float value1 = state1->value / state1->size;
382    float value2 = state2->value / state2->size;
383    if (value1 < value2)
384       return 1;
385    else if (value1 > value2)
386       return -1;
387    else
388       return 0;
389 }
390 
391 static bool
calculate_can_move_for_block(opt_preamble_ctx * ctx,nir_block * block)392 calculate_can_move_for_block(opt_preamble_ctx *ctx, nir_block *block)
393 {
394    bool all_can_move = true;
395 
396    nir_foreach_instr(instr, block) {
397       nir_def *def = nir_instr_def(instr);
398       if (!def)
399          continue;
400 
401       def_state *state = &ctx->states[def->index];
402       state->can_move = can_move_instr(instr, ctx);
403       all_can_move &= state->can_move;
404    }
405 
406    return all_can_move;
407 }
408 
409 static bool
calculate_can_move_for_cf_list(opt_preamble_ctx * ctx,struct exec_list * list)410 calculate_can_move_for_cf_list(opt_preamble_ctx *ctx, struct exec_list *list)
411 {
412    bool all_can_move = true;
413 
414    foreach_list_typed(nir_cf_node, node, node, list) {
415       switch (node->type) {
416       case nir_cf_node_block:
417          all_can_move &=
418             calculate_can_move_for_block(ctx, nir_cf_node_as_block(node));
419          break;
420 
421       case nir_cf_node_if: {
422          nir_if *nif = nir_cf_node_as_if(node);
423          bool uniform = can_move_src(&nif->condition, ctx);
424 
425          if (!uniform)
426             ctx->nonuniform_cf_nesting++;
427 
428          bool if_can_move = uniform;
429          if_can_move &= calculate_can_move_for_cf_list(ctx, &nif->then_list);
430          if_can_move &= calculate_can_move_for_cf_list(ctx, &nif->else_list);
431 
432          if (!uniform)
433             ctx->nonuniform_cf_nesting--;
434 
435          all_can_move &= if_can_move;
436          break;
437       }
438 
439       case nir_cf_node_loop: {
440          nir_loop *loop = nir_cf_node_as_loop(node);
441 
442          /* Conservatively treat loops like conditional control flow, since an
443           * instruction might be conditionally unreachabled due to an earlier
444           * break in a loop that executes only one iteration.
445           */
446          ctx->nonuniform_cf_nesting++;
447          calculate_can_move_for_cf_list(ctx, &loop->body);
448          ctx->nonuniform_cf_nesting--;
449          all_can_move = false;
450          break;
451       }
452 
453       default:
454          unreachable("Unexpected CF node type");
455       }
456    }
457 
458    return all_can_move;
459 }
460 
461 static void
replace_for_block(nir_builder * b,opt_preamble_ctx * ctx,struct hash_table * remap_table,nir_block * block)462 replace_for_block(nir_builder *b, opt_preamble_ctx *ctx,
463                   struct hash_table *remap_table, nir_block *block)
464 {
465    nir_foreach_instr(instr, block) {
466       nir_def *def = nir_instr_def(instr);
467       if (!def)
468          continue;
469 
470       /* Only replace what we actually need. This is a micro-optimization for
471        * compile-time performance of regular instructions, but it's required for
472        * correctness with phi nodes, since we might not reconstruct the
473        * corresponding if.
474        */
475       if (!BITSET_TEST(ctx->reconstructed_defs, def->index))
476          continue;
477 
478       def_state *state = &ctx->states[def->index];
479       assert(state->can_move && "reconstructed => can_move");
480 
481       nir_instr *clone;
482 
483       if (instr->type == nir_instr_type_phi) {
484          /* Phis are special since they can't be cloned with nir_instr_clone */
485          nir_phi_instr *phi = nir_instr_as_phi(instr);
486 
487          nir_cf_node *nif_cf = nir_cf_node_prev(&block->cf_node);
488          assert(nif_cf->type == nir_cf_node_if && "only if's are moveable");
489          nir_if *nif = nir_cf_node_as_if(nif_cf);
490 
491          nir_block *then_block = nir_if_last_then_block(nif);
492          nir_block *else_block = nir_if_last_else_block(nif);
493 
494          nir_def *then_def = NULL, *else_def = NULL;
495 
496          nir_foreach_phi_src(phi_src, phi) {
497             if (phi_src->pred == then_block) {
498                assert(then_def == NULL);
499                then_def = phi_src->src.ssa;
500             } else if (phi_src->pred == else_block) {
501                assert(else_def == NULL);
502                else_def = phi_src->src.ssa;
503             } else {
504                unreachable("Invalid predecessor for phi of if");
505             }
506          }
507 
508          assert(exec_list_length(&phi->srcs) == 2 && "only if's are movable");
509          assert(then_def && else_def && "all sources seen");
510 
511          /* Remap */
512          then_def = _mesa_hash_table_search(remap_table, then_def)->data;
513          else_def = _mesa_hash_table_search(remap_table, else_def)->data;
514 
515          b->cursor =
516             nir_before_block_after_phis(nir_cursor_current_block(b->cursor));
517 
518          nir_def *repl = nir_if_phi(b, then_def, else_def);
519          clone = repl->parent_instr;
520 
521          _mesa_hash_table_insert(remap_table, &phi->def, repl);
522       } else {
523          clone = nir_instr_clone_deep(b->shader, instr, remap_table);
524          nir_builder_instr_insert(b, clone);
525       }
526 
527       if (clone->type == nir_instr_type_tex) {
528          nir_tex_instr *tex = nir_instr_as_tex(clone);
529          if (tex->op == nir_texop_tex) {
530             /* For maximum compatibility, replace normal textures with
531              * textureGrad with a gradient of 0.
532              * TODO: Handle txb somehow.
533              */
534             b->cursor = nir_before_instr(clone);
535 
536             nir_def *zero =
537                nir_imm_zero(b, tex->coord_components - tex->is_array, 32);
538             nir_tex_instr_add_src(tex, nir_tex_src_ddx, zero);
539             nir_tex_instr_add_src(tex, nir_tex_src_ddy, zero);
540             tex->op = nir_texop_txd;
541 
542             b->cursor = nir_after_instr(clone);
543          }
544       }
545 
546       if (state->replace) {
547          nir_def *clone_def = nir_instr_def(clone);
548          nir_store_preamble(b, clone_def, .base = state->offset);
549       }
550    }
551 }
552 
553 static void
replace_for_cf_list(nir_builder * b,opt_preamble_ctx * ctx,struct hash_table * remap_table,struct exec_list * list)554 replace_for_cf_list(nir_builder *b, opt_preamble_ctx *ctx,
555                     struct hash_table *remap_table, struct exec_list *list)
556 {
557    foreach_list_typed(nir_cf_node, node, node, list) {
558       switch (node->type) {
559       case nir_cf_node_block: {
560          replace_for_block(b, ctx, remap_table, nir_cf_node_as_block(node));
561          break;
562       }
563 
564       case nir_cf_node_if: {
565          nir_if *nif = nir_cf_node_as_if(node);
566 
567          /* If we moved something that requires reconstructing the if, do so */
568          if (_mesa_set_search(ctx->reconstructed_ifs, nif)) {
569             assert(can_move_src(&nif->condition, ctx));
570 
571             struct hash_entry *entry =
572                _mesa_hash_table_search(remap_table, nif->condition.ssa);
573             assert(entry != NULL && "can_move condition, def dominates use");
574             nir_def *remap_cond = entry->data;
575 
576             nir_if *reconstructed_nif = NULL;
577             reconstructed_nif = nir_push_if(b, remap_cond);
578 
579             b->cursor = nir_before_cf_list(&reconstructed_nif->then_list);
580             replace_for_cf_list(b, ctx, remap_table, &nif->then_list);
581 
582             b->cursor = nir_before_cf_list(&reconstructed_nif->else_list);
583             replace_for_cf_list(b, ctx, remap_table, &nif->else_list);
584 
585             nir_pop_if(b, reconstructed_nif);
586             b->cursor = nir_after_cf_node(&reconstructed_nif->cf_node);
587          } else {
588             replace_for_cf_list(b, ctx, remap_table, &nif->then_list);
589             replace_for_cf_list(b, ctx, remap_table, &nif->else_list);
590          }
591 
592          break;
593       }
594 
595       case nir_cf_node_loop: {
596          /* We don't try to reconstruct loops */
597          nir_loop *loop = nir_cf_node_as_loop(node);
598          replace_for_cf_list(b, ctx, remap_table, &loop->body);
599          break;
600       }
601 
602       default:
603          unreachable("Unexpected CF node type");
604       }
605    }
606 }
607 
608 /*
609  * If an if-statement contains an instruction that cannot be speculated, the
610  * if-statement must be reconstructed so we avoid the speculation. This applies
611  * even for nested if-statements. Determine which if-statements must be
612  * reconstructed for this reason by walking the program forward and looking
613  * inside uniform if's.
614  *
615  * Returns whether the CF list contains a reconstructed instruction that would
616  * otherwise be speculated, updating the reconstructed_ifs set. This depends on
617  * reconstructed_defs being correctly set by analyze_reconstructed.
618  */
619 static bool
analyze_speculation_for_cf_list(opt_preamble_ctx * ctx,struct exec_list * list)620 analyze_speculation_for_cf_list(opt_preamble_ctx *ctx, struct exec_list *list)
621 {
622    bool reconstruct_cf_list = false;
623 
624    foreach_list_typed(nir_cf_node, node, node, list) {
625       switch (node->type) {
626       case nir_cf_node_block: {
627          nir_foreach_instr(instr, nir_cf_node_as_block(node)) {
628             nir_def *def = nir_instr_def(instr);
629             if (!def)
630                continue;
631 
632             if (!BITSET_TEST(ctx->reconstructed_defs, def->index))
633                continue;
634 
635             if (!instr_can_speculate(instr)) {
636                reconstruct_cf_list = true;
637                break;
638             }
639          }
640 
641          break;
642       }
643 
644       case nir_cf_node_if: {
645          nir_if *nif = nir_cf_node_as_if(node);
646 
647          /* If we can move the if, we might need to reconstruct */
648          if (can_move_src(&nif->condition, ctx)) {
649             bool any = false;
650             any |= analyze_speculation_for_cf_list(ctx, &nif->then_list);
651             any |= analyze_speculation_for_cf_list(ctx, &nif->else_list);
652 
653             if (any)
654                _mesa_set_add(ctx->reconstructed_ifs, nif);
655 
656             reconstruct_cf_list |= any;
657          }
658 
659          break;
660       }
661 
662       /* We don't reconstruct loops */
663       default:
664          break;
665       }
666    }
667 
668    return reconstruct_cf_list;
669 }
670 
671 static bool
mark_reconstructed(nir_src * src,void * state)672 mark_reconstructed(nir_src *src, void *state)
673 {
674    BITSET_WORD *reconstructed_defs = state;
675    BITSET_SET(reconstructed_defs, src->ssa->index);
676    return true;
677 }
678 
679 /*
680  * If a phi is moved into the preamble, then the if it depends on must also be
681  * moved. However, it is not necessary to consider any nested control flow. As
682  * an example, if we have a shader:
683  *
684  *    if (not moveable condition) {
685  *       if (moveable condition) {
686  *          x = moveable
687  *       }
688  *       y = phi x, moveable
689  *       z = floor y
690  *    }
691  *
692  * Then if 'z' is in the replace set, we need to reconstruct the inner if, but
693  * not the outer if, unless there's also speculation to worry about.
694  *
695  * We do this by marking defs that need to be reconstructed, with a backwards
696  * sweep of the program (compatible with reverse dominance), and marking the
697  * if's preceding reconstructed phis.
698  */
699 static void
analyze_reconstructed(opt_preamble_ctx * ctx,nir_function_impl * impl)700 analyze_reconstructed(opt_preamble_ctx *ctx, nir_function_impl *impl)
701 {
702    nir_foreach_block_reverse(block, impl) {
703       /* If an if-statement is reconstructed, its condition must be as well */
704       nir_if *nif = nir_block_get_following_if(block);
705       if (nif && _mesa_set_search(ctx->reconstructed_ifs, nif))
706          BITSET_SET(ctx->reconstructed_defs, nif->condition.ssa->index);
707 
708       nir_foreach_instr_reverse(instr, block) {
709          nir_def *def = nir_instr_def(instr);
710          if (!def)
711             continue;
712 
713          def_state *state = &ctx->states[def->index];
714 
715          /* Anything that's replaced must be reconstructed */
716          if (state->replace)
717             BITSET_SET(ctx->reconstructed_defs, def->index);
718          else if (!BITSET_TEST(ctx->reconstructed_defs, def->index))
719             continue;
720 
721          /* If it must be reconstructed, it better be moveable */
722          assert(state->can_move);
723 
724          /* Anything that depends on something reconstructed is reconstructed */
725          nir_foreach_src(instr, mark_reconstructed, ctx->reconstructed_defs);
726 
727          /* Reconstructed phis need their ifs reconstructed */
728          if (instr->type == nir_instr_type_phi) {
729             nir_cf_node *prev_node = nir_cf_node_prev(&instr->block->cf_node);
730 
731             /* Invariants guaranteed by can_move_instr */
732             assert(prev_node != NULL);
733             assert(prev_node->type == nir_cf_node_if);
734 
735             nir_if *nif = nir_cf_node_as_if(prev_node);
736             assert(can_move_src(&nif->condition, ctx));
737 
738             /* Mark the if for reconstruction */
739             _mesa_set_add(ctx->reconstructed_ifs, nif);
740          }
741       }
742    }
743 }
744 
745 bool
nir_opt_preamble(nir_shader * shader,const nir_opt_preamble_options * options,unsigned * size)746 nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
747                  unsigned *size)
748 {
749    opt_preamble_ctx ctx = {
750       .options = options,
751    };
752 
753    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
754    ctx.states = calloc(impl->ssa_alloc, sizeof(*ctx.states));
755 
756    /* Step 1: Calculate can_move */
757    calculate_can_move_for_cf_list(&ctx, &impl->body);
758 
759    /* Step 2: Calculate is_candidate. This is complicated by the presence of
760     * non-candidate instructions like derefs whose users cannot be rewritten.
761     * If a deref chain is used at all by a non-can_move thing, then any offset
762     * sources anywhere along the chain should be considered candidates because
763     * the entire deref chain will never be deleted, but if it's only used by
764     * can_move things then it becomes subsumed by its users and none of the
765     * offset sources should be considered candidates as they will be removed
766     * when the users of the deref chain are moved. We need to replace "are
767     * there any non-can_move users" with "are there any non-can_move users,
768     * *recursing through non-candidate users*". We do this by walking backward
769     * and marking when a non-candidate instruction must stay in the final
770     * program because it has a non-can_move user, including recursively.
771     */
772    unsigned num_candidates = 0;
773    nir_foreach_block_reverse(block, impl) {
774       nir_foreach_instr_reverse(instr, block) {
775          nir_def *def = nir_instr_def(instr);
776          if (!def)
777             continue;
778 
779          def_state *state = &ctx.states[def->index];
780          if (!state->can_move)
781             continue;
782 
783          state->value = get_instr_cost(instr, options);
784          bool is_candidate = !avoid_instr(instr, options);
785          state->candidate = false;
786          state->must_stay = false;
787          nir_foreach_use_including_if(use, def) {
788             bool is_can_move_user;
789 
790             if (nir_src_is_if(use)) {
791                is_can_move_user = false;
792             } else {
793                nir_def *use_def = nir_instr_def(nir_src_parent_instr(use));
794                is_can_move_user = use_def != NULL &&
795                                   ctx.states[use_def->index].can_move &&
796                                   !ctx.states[use_def->index].must_stay;
797             }
798 
799             if (is_can_move_user) {
800                state->can_move_users++;
801             } else {
802                if (is_candidate)
803                   state->candidate = true;
804                else
805                   state->must_stay = true;
806             }
807          }
808 
809          if (state->candidate)
810             num_candidates++;
811       }
812    }
813 
814    if (num_candidates == 0) {
815       free(ctx.states);
816       return false;
817    }
818 
819    def_state **candidates = malloc(sizeof(*candidates) * num_candidates);
820    unsigned candidate_idx = 0;
821    unsigned total_size = 0;
822 
823    /* Step 3: Calculate value of candidates by propagating downwards. We try
824     * to share the value amongst can_move uses, in case there are multiple.
825     * This won't always find the most optimal solution, but is hopefully a
826     * good heuristic.
827     *
828     * Note that we use the can_move adjusted in the last pass, because if a
829     * can_move instruction cannot be moved because it's not a candidate and it
830     * has a non-can_move source then we don't want to count it as a use.
831     *
832     * While we're here, also collect an array of candidates.
833     */
834    nir_foreach_block(block, impl) {
835       nir_foreach_instr(instr, block) {
836          nir_def *def = nir_instr_def(instr);
837          if (!def)
838             continue;
839 
840          def_state *state = &ctx.states[def->index];
841          if (!state->can_move || state->must_stay)
842             continue;
843 
844          ctx.def = def;
845          nir_foreach_src(instr, update_src_value, &ctx);
846 
847          /* If this instruction is a candidate, its value shouldn't be
848           * propagated so we skip dividing it.
849           *
850           * Note: if it's can_move but not a candidate, then all its users
851           * must be can_move, so if there are no users then it must be dead.
852           */
853          if (!state->candidate && !state->must_stay) {
854             if (state->can_move_users > 0)
855                state->value /= state->can_move_users;
856             else
857                state->value = 0;
858          }
859 
860          if (state->candidate) {
861             state->benefit = state->value -
862                              options->rewrite_cost_cb(def, options->cb_data);
863 
864             if (state->benefit > 0) {
865                options->def_size(def, &state->size, &state->align);
866                total_size = ALIGN_POT(total_size, state->align);
867                total_size += state->size;
868                candidates[candidate_idx++] = state;
869             }
870          }
871       }
872    }
873 
874    assert(candidate_idx <= num_candidates);
875    num_candidates = candidate_idx;
876 
877    if (num_candidates == 0) {
878       free(ctx.states);
879       free(candidates);
880       return false;
881    }
882 
883    /* Step 4: Figure out which candidates we're going to replace and assign an
884     * offset. Assuming there is no expression sharing, this is similar to the
885     * 0-1 knapsack problem, except when there is a gap introduced by
886     * alignment. We use a well-known greedy approximation, sorting by value
887     * divided by size.
888     */
889 
890    if (((*size) + total_size) > options->preamble_storage_size) {
891       qsort(candidates, num_candidates, sizeof(*candidates), candidate_sort);
892    }
893 
894    unsigned offset = *size;
895    for (unsigned i = 0; i < num_candidates; i++) {
896       def_state *state = candidates[i];
897       offset = ALIGN_POT(offset, state->align);
898 
899       if (offset + state->size > options->preamble_storage_size)
900          break;
901 
902       state->replace = true;
903       state->offset = offset;
904 
905       offset += state->size;
906    }
907 
908    *size = offset;
909 
910    free(candidates);
911 
912    /* Determine which if's need to be reconstructed, based on the replacements
913     * we did.
914     */
915    ctx.reconstructed_ifs = _mesa_pointer_set_create(NULL);
916    ctx.reconstructed_defs = calloc(BITSET_WORDS(impl->ssa_alloc),
917                                    sizeof(BITSET_WORD));
918    analyze_reconstructed(&ctx, impl);
919 
920    /* If we make progress analyzing speculation, we need to re-analyze
921     * reconstructed defs to get the if-conditions in there.
922     */
923    if (analyze_speculation_for_cf_list(&ctx, &impl->body))
924       analyze_reconstructed(&ctx, impl);
925 
926    /* Step 5: Actually do the replacement. */
927    struct hash_table *remap_table =
928       _mesa_pointer_hash_table_create(NULL);
929    nir_function_impl *preamble =
930       nir_shader_get_preamble(impl->function->shader);
931    nir_builder preamble_builder = nir_builder_at(nir_before_impl(preamble));
932    nir_builder *b = &preamble_builder;
933 
934    replace_for_cf_list(b, &ctx, remap_table, &impl->body);
935 
936    nir_builder builder = nir_builder_create(impl);
937    b = &builder;
938 
939    unsigned max_index = impl->ssa_alloc;
940    nir_foreach_block(block, impl) {
941       nir_foreach_instr_safe(instr, block) {
942          nir_def *def = nir_instr_def(instr);
943          if (!def)
944             continue;
945 
946          /* Ignore new load_preamble instructions */
947          if (def->index >= max_index)
948             continue;
949 
950          def_state *state = &ctx.states[def->index];
951          if (!state->replace)
952             continue;
953 
954          b->cursor = nir_after_instr_and_phis(instr);
955 
956          nir_def *new_def =
957             nir_load_preamble(b, def->num_components, def->bit_size,
958                               .base = state->offset);
959 
960          nir_def_rewrite_uses(def, new_def);
961          nir_instr_free_and_dce(instr);
962       }
963    }
964 
965    nir_metadata_preserve(impl,
966                          nir_metadata_control_flow);
967 
968    ralloc_free(remap_table);
969    free(ctx.states);
970    free(ctx.reconstructed_defs);
971    _mesa_set_destroy(ctx.reconstructed_ifs, NULL);
972    return true;
973 }
974