1 /*
2 * Copyright 2010 Marek Olšák <maraeo@gmail.com>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "radeon_remove_constants.h"
7 #include <stdbool.h>
8 #include <stdlib.h>
9 #include "util/bitscan.h"
10 #include "radeon_dataflow.h"
11
12 struct const_remap_state {
13 /* Used when emiting shaders constants. */
14 struct const_remap *remap_table;
15 /* Used when rewritign registers */
16 struct const_remap *inv_remap_table;
17 /* Old costant layout. */
18 struct rc_constant *constants;
19 /* New constant layout. */
20 struct rc_constant_list new_constants;
21 /* Marks immediates that are used as a vector. Those will be just copied. */
22 uint8_t *is_used_as_vector;
23 bool has_rel_addr;
24 bool are_externals_remapped;
25 bool is_identity;
26 };
27
28 static void
remap_regs(struct rc_instruction * inst,struct const_remap * inv_remap_table)29 remap_regs(struct rc_instruction *inst, struct const_remap *inv_remap_table)
30 {
31 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
32 for (unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
33 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT)
34 continue;
35 unsigned old_index = inst->U.I.SrcReg[src].Index;
36 for (unsigned chan = 0; chan < 4; chan++) {
37 unsigned old_swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
38 if (old_swz <= RC_SWIZZLE_W) {
39 inst->U.I.SrcReg[src].Index = inv_remap_table[old_index].index[old_swz];
40 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
41 inv_remap_table[old_index].swizzle[old_swz]);
42 }
43 }
44 }
45 }
46
47 static void
mark_used(void * userdata,struct rc_instruction * inst,struct rc_src_register * src)48 mark_used(void *userdata, struct rc_instruction *inst, struct rc_src_register *src)
49 {
50 struct const_remap_state *d = userdata;
51
52 if (src->File == RC_FILE_CONSTANT) {
53 uint8_t mask = 0;
54 if (src->RelAddr) {
55 d->has_rel_addr = true;
56 } else {
57 for (unsigned chan = 0; chan < 4; chan++) {
58 char swz = GET_SWZ(src->Swizzle, chan);
59 if (swz > RC_SWIZZLE_W)
60 continue;
61 mask |= 1 << swz;
62 }
63 }
64 d->constants[src->Index].UseMask |= mask;
65 if (d->constants[src->Index].Type == RC_CONSTANT_IMMEDIATE && util_bitcount(mask) > 1) {
66 d->is_used_as_vector[src->Index] |= mask;
67 }
68 }
69 }
70
71 static void
place_constant_in_free_slot(struct const_remap_state * s,unsigned i)72 place_constant_in_free_slot(struct const_remap_state *s, unsigned i)
73 {
74 unsigned count = s->new_constants.Count;
75 for (unsigned chan = 0; chan < 4; chan++) {
76 s->inv_remap_table[i].index[chan] = count;
77 s->inv_remap_table[i].swizzle[chan] = chan;
78 if (s->constants[i].UseMask & (1 << chan)) {
79 s->remap_table[count].index[chan] = i;
80 s->remap_table[count].swizzle[chan] = chan;
81 }
82 }
83 s->new_constants.Constants[count] = s->constants[i];
84
85 if (count != i) {
86 if (s->constants[i].Type == RC_CONSTANT_EXTERNAL)
87 s->are_externals_remapped = true;
88 s->is_identity = false;
89 }
90 s->new_constants.Count++;
91 }
92
93 static void
place_immediate_in_free_slot(struct const_remap_state * s,unsigned i)94 place_immediate_in_free_slot(struct const_remap_state *s, unsigned i)
95 {
96 assert(util_bitcount(s->is_used_as_vector[i]) > 1);
97
98 unsigned count = s->new_constants.Count;
99
100 s->new_constants.Constants[count] = s->constants[i];
101 s->new_constants.Constants[count].UseMask = s->is_used_as_vector[i];
102 for (unsigned chan = 0; chan < 4; chan++) {
103 if (s->constants[i].UseMask & 1 << chan & s->is_used_as_vector[i]) {
104 s->inv_remap_table[i].index[chan] = count;
105 s->inv_remap_table[i].swizzle[chan] = chan;
106 }
107 }
108 if (count != i) {
109 s->is_identity = false;
110 }
111 s->new_constants.Count++;
112 }
113
114 static void
try_merge_constants_external(struct const_remap_state * s,unsigned i)115 try_merge_constants_external(struct const_remap_state *s, unsigned i)
116 {
117 assert(util_bitcount(s->constants[i].UseMask) == 1);
118 for (unsigned j = 0; j < s->new_constants.Count; j++) {
119 for (unsigned chan = 0; chan < 4; chan++) {
120 if (s->remap_table[j].swizzle[chan] == RC_SWIZZLE_UNUSED) {
121 /* Writemask to swizzle */
122 unsigned swizzle = 0;
123 for (; swizzle < 4; swizzle++)
124 if (s->constants[i].UseMask >> swizzle == 1)
125 break;
126 /* Update the remap tables. */
127 s->remap_table[j].index[chan] = i;
128 s->remap_table[j].swizzle[chan] = swizzle;
129 s->inv_remap_table[i].index[swizzle] = j;
130 s->inv_remap_table[i].swizzle[swizzle] = chan;
131 s->are_externals_remapped = true;
132 s->is_identity = false;
133 return;
134 }
135 }
136 }
137 place_constant_in_free_slot(s, i);
138 }
139
140 static void
init_constant_remap_state(struct radeon_compiler * c,struct const_remap_state * s)141 init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s)
142 {
143 s->is_identity = true;
144 s->is_used_as_vector = malloc(c->Program.Constants.Count);
145 s->new_constants.Constants = malloc(sizeof(struct rc_constant) * c->Program.Constants.Count);
146 s->new_constants._Reserved = c->Program.Constants.Count;
147 s->constants = c->Program.Constants.Constants;
148 memset(s->is_used_as_vector, 0, c->Program.Constants.Count);
149
150 s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
151 s->inv_remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
152 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
153 /* Clear the UseMask, we will update it later. */
154 s->constants[i].UseMask = 0;
155 for (unsigned swz = 0; swz < 4; swz++) {
156 s->remap_table[i].index[swz] = -1;
157 s->remap_table[i].swizzle[swz] = RC_SWIZZLE_UNUSED;
158 }
159 }
160 }
161
162 void
rc_remove_unused_constants(struct radeon_compiler * c,void * user)163 rc_remove_unused_constants(struct radeon_compiler *c, void *user)
164 {
165 struct const_remap **out_remap_table = (struct const_remap **)user;
166 struct rc_constant *constants = c->Program.Constants.Constants;
167 struct const_remap_state remap_state = {};
168 struct const_remap_state *s = &remap_state;
169
170 if (!c->Program.Constants.Count) {
171 *out_remap_table = NULL;
172 return;
173 }
174
175 init_constant_remap_state(c, s);
176
177 /* Pass 1: Mark used constants. */
178 for (struct rc_instruction *inst = c->Program.Instructions.Next;
179 inst != &c->Program.Instructions; inst = inst->Next) {
180 rc_for_all_reads_src(inst, mark_used, s);
181 }
182
183 /* Pass 2: If there is relative addressing or dead constant elimination
184 * is disabled, mark all externals as used. */
185 if (s->has_rel_addr || !c->remove_unused_constants) {
186 for (unsigned i = 0; i < c->Program.Constants.Count; i++)
187 if (constants[i].Type == RC_CONSTANT_EXTERNAL)
188 s->constants[i].UseMask = RC_MASK_XYZW;
189 }
190
191 /* Pass 3: Make the remapping table and remap constants.
192 * First iterate over used vec2, vec3 and vec4 externals and place them in a free
193 * slots. While we could in theory merge 2 vec2 together, its not worth it
194 * as we would have to a) check that the swizzle is valid, b) transforming
195 * xy to zw would mean we need rgb and alpha source slot, thus it would hurt
196 * us potentially during pair scheduling. */
197 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
198 if (constants[i].Type != RC_CONSTANT_EXTERNAL)
199 continue;
200 if (util_bitcount(s->constants[i].UseMask) > 1) {
201 place_constant_in_free_slot(s, i);
202 }
203 }
204
205 /* Now iterate over scalarar externals and put them into empty slots. */
206 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
207 if (constants[i].Type != RC_CONSTANT_EXTERNAL)
208 continue;
209 if (util_bitcount(s->constants[i].UseMask) == 1)
210 try_merge_constants_external(s, i);
211 }
212
213 /* Now put immediates which are used as vectors. */
214 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
215 if (constants[i].Type == RC_CONSTANT_IMMEDIATE &&
216 util_bitcount(s->constants[i].UseMask) > 0 &&
217 util_bitcount(s->is_used_as_vector[i]) > 0) {
218 place_immediate_in_free_slot(s, i);
219 }
220 }
221
222 /* Now walk over scalar immediates and try to:
223 * a) check for duplicates,
224 * b) find free slot.
225 * All of this is already done by rc_constants_add_immediate_scalar,
226 * so just use it.
227 */
228 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
229 if (constants[i].Type != RC_CONSTANT_IMMEDIATE)
230 continue;
231 for (unsigned chan = 0; chan < 4; chan++) {
232 if ((s->constants[i].UseMask) & (1 << chan) &&
233 (~(s->is_used_as_vector[i]) & (1 << chan))) {
234 unsigned swz;
235 s->inv_remap_table[i].index[chan] = rc_constants_add_immediate_scalar(
236 &s->new_constants, constants[i].u.Immediate[chan], &swz);
237 s->inv_remap_table[i].swizzle[chan] = GET_SWZ(swz, 0);
238 s->is_identity = false;
239 }
240 }
241 }
242
243 /* Finally place state constants. */
244 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
245 if (constants[i].Type != RC_CONSTANT_STATE)
246 continue;
247 if (util_bitcount(s->constants[i].UseMask) > 0) {
248 place_constant_in_free_slot(s, i);
249 }
250 }
251
252 /* is_identity ==> new_count == old_count
253 * !is_identity ==> new_count < old_count */
254 assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped));
255
256 /* Pass 4: Redirect reads of all constants to their new locations. */
257 if (!s->is_identity) {
258 for (struct rc_instruction *inst = c->Program.Instructions.Next;
259 inst != &c->Program.Instructions; inst = inst->Next) {
260 remap_regs(inst, s->inv_remap_table);
261 }
262 }
263
264 /* Set the new constant count. Note that new_count may be less than
265 * Count even though the remapping function is identity. In that case,
266 * the constants have been removed at the end of the array. */
267 rc_constants_destroy(&c->Program.Constants);
268 c->Program.Constants = s->new_constants;
269
270 if (s->are_externals_remapped) {
271 *out_remap_table = s->remap_table;
272 } else {
273 *out_remap_table = NULL;
274 free(s->remap_table);
275 }
276
277 free(s->inv_remap_table);
278
279 if (c->Debug & RC_DBG_LOG)
280 rc_constants_print(&c->Program.Constants, s->remap_table);
281 }
282