1 /*
2 * Copyright © 2025 Advanced Micro Devices, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 /* This pass moves output stores to the end of the shader.
7 * (only those that can be moved trivially)
8 */
9
10 #include "nir.h"
11 #include "nir_builder.h"
12
13 /* Put the position in the last slot to make its store last. */
14 #define LAST_SLOT NUM_TOTAL_VARYING_SLOTS
15 #define NUM_SLOTS ((LAST_SLOT + 1) * 4)
16
17 typedef struct {
18 nir_instr *stores[NUM_SLOTS];
19 /* Whether the output component is written only once or multiple times. */
20 BITSET_DECLARE(single, NUM_SLOTS);
21 BITSET_DECLARE(multiple, NUM_SLOTS);
22 } output_stores_state;
23
24 static bool
gather_output_stores(struct nir_builder * b,nir_intrinsic_instr * intr,void * opaque)25 gather_output_stores(struct nir_builder *b, nir_intrinsic_instr *intr,
26 void *opaque)
27 {
28 output_stores_state *state = (output_stores_state *)opaque;
29
30 if (intr->intrinsic != nir_intrinsic_store_output)
31 return false;
32
33 unsigned location = nir_intrinsic_io_semantics(intr).location;
34 unsigned component = nir_intrinsic_component(intr);
35 assert(location < NUM_TOTAL_VARYING_SLOTS);
36 assert(component < 4);
37 assert(!nir_intrinsic_io_semantics(intr).high_16bits);
38 /* Stores must be in the top level block. */
39 assert(intr->instr.block->cf_node.parent->type == nir_cf_node_function);
40
41 /* Put the position in the last slot to make its store last. */
42 if (location == VARYING_SLOT_POS)
43 location = LAST_SLOT;
44
45 unsigned slot = location * 4 + component;
46 unsigned num_components = intr->src[0].ssa->num_components;
47
48 /* Each component must be written only once. */
49 bool multiple = false;
50 for (unsigned i = 0; i < num_components; i++) {
51 if (BITSET_TEST(state->multiple, slot)) {
52 multiple = true;
53 } else if (BITSET_TEST(state->single, slot)) {
54 BITSET_CLEAR(state->single, slot);
55 BITSET_SET(state->multiple, slot);
56 multiple = true;
57 }
58 }
59
60 if (!multiple) {
61 state->stores[slot] = &intr->instr;
62 BITSET_SET_RANGE_INSIDE_WORD(state->single, slot,
63 slot + num_components - 1);
64 }
65 return false;
66 }
67
68 bool
nir_move_output_stores_to_end(nir_shader * nir)69 nir_move_output_stores_to_end(nir_shader *nir)
70 {
71 assert(nir->info.stage == MESA_SHADER_VERTEX ||
72 nir->info.stage == MESA_SHADER_TESS_EVAL);
73
74 output_stores_state state;
75 memset(&state, 0, sizeof(state));
76
77 /* Gather output stores. */
78 nir_shader_intrinsics_pass(nir, gather_output_stores, nir_metadata_all,
79 &state);
80
81 /* Move output stores to the end (only those that we can move). */
82 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
83 bool progress = false;
84 unsigned i;
85
86 BITSET_FOREACH_SET(i, state.single, NUM_SLOTS) {
87 if (!state.stores[i])
88 continue;
89
90 nir_instr_remove(state.stores[i]);
91 nir_instr_insert(nir_after_impl(impl), state.stores[i]);
92 progress = true;
93 }
94
95 nir_metadata_preserve(impl, progress ? nir_metadata_control_flow :
96 nir_metadata_all);
97 return progress;
98 }
99