• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2025 Advanced Micro Devices, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 /* This pass moves output stores to the end of the shader.
7  * (only those that can be moved trivially)
8  */
9 
10 #include "nir.h"
11 #include "nir_builder.h"
12 
13 /* Put the position in the last slot to make its store last. */
14 #define LAST_SLOT    NUM_TOTAL_VARYING_SLOTS
15 #define NUM_SLOTS    ((LAST_SLOT + 1) * 4)
16 
17 typedef struct {
18    nir_instr *stores[NUM_SLOTS];
19    /* Whether the output component is written only once or multiple times. */
20    BITSET_DECLARE(single, NUM_SLOTS);
21    BITSET_DECLARE(multiple, NUM_SLOTS);
22 } output_stores_state;
23 
24 static bool
gather_output_stores(struct nir_builder * b,nir_intrinsic_instr * intr,void * opaque)25 gather_output_stores(struct nir_builder *b, nir_intrinsic_instr *intr,
26                      void *opaque)
27 {
28    output_stores_state *state = (output_stores_state *)opaque;
29 
30    if (intr->intrinsic != nir_intrinsic_store_output)
31       return false;
32 
33    unsigned location = nir_intrinsic_io_semantics(intr).location;
34    unsigned component = nir_intrinsic_component(intr);
35    assert(location < NUM_TOTAL_VARYING_SLOTS);
36    assert(component < 4);
37    assert(!nir_intrinsic_io_semantics(intr).high_16bits);
38    /* Stores must be in the top level block. */
39    assert(intr->instr.block->cf_node.parent->type == nir_cf_node_function);
40 
41    /* Put the position in the last slot to make its store last. */
42    if (location == VARYING_SLOT_POS)
43       location = LAST_SLOT;
44 
45    unsigned slot = location * 4 + component;
46    unsigned num_components = intr->src[0].ssa->num_components;
47 
48    /* Each component must be written only once. */
49    bool multiple = false;
50    for (unsigned i = 0; i < num_components; i++) {
51       if (BITSET_TEST(state->multiple, slot)) {
52          multiple = true;
53       } else if (BITSET_TEST(state->single, slot)) {
54          BITSET_CLEAR(state->single, slot);
55          BITSET_SET(state->multiple, slot);
56          multiple = true;
57       }
58    }
59 
60    if (!multiple) {
61       state->stores[slot] = &intr->instr;
62       BITSET_SET_RANGE_INSIDE_WORD(state->single, slot,
63                                    slot + num_components - 1);
64    }
65    return false;
66 }
67 
68 bool
nir_move_output_stores_to_end(nir_shader * nir)69 nir_move_output_stores_to_end(nir_shader *nir)
70 {
71    assert(nir->info.stage == MESA_SHADER_VERTEX ||
72           nir->info.stage == MESA_SHADER_TESS_EVAL);
73 
74    output_stores_state state;
75    memset(&state, 0, sizeof(state));
76 
77    /* Gather output stores. */
78    nir_shader_intrinsics_pass(nir, gather_output_stores, nir_metadata_all,
79                               &state);
80 
81    /* Move output stores to the end (only those that we can move). */
82    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
83    bool progress = false;
84    unsigned i;
85 
86    BITSET_FOREACH_SET(i, state.single, NUM_SLOTS) {
87       if (!state.stores[i])
88          continue;
89 
90       nir_instr_remove(state.stores[i]);
91       nir_instr_insert(nir_after_impl(impl), state.stores[i]);
92       progress = true;
93    }
94 
95    nir_metadata_preserve(impl, progress ? nir_metadata_control_flow :
96                                           nir_metadata_all);
97    return progress;
98 }
99