• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2010 Marek Olšák <maraeo@gmail.com>
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "radeon_remove_constants.h"
7 #include <stdbool.h>
8 #include <stdlib.h>
9 #include "util/bitscan.h"
10 #include "radeon_dataflow.h"
11 
12 struct const_remap_state {
13    /* Used when emiting shaders constants. */
14    struct const_remap *remap_table;
15    /* Used when rewritign registers */
16    struct const_remap *inv_remap_table;
17    /* Old costant layout. */
18    struct rc_constant *constants;
19    /* New constant layout. */
20    struct rc_constant_list new_constants;
21    /* Marks immediates that are used as a vector. Those will be just copied. */
22    uint8_t *is_used_as_vector;
23    bool has_rel_addr;
24    bool are_externals_remapped;
25    bool is_identity;
26 };
27 
28 static void
remap_regs(struct rc_instruction * inst,struct const_remap * inv_remap_table)29 remap_regs(struct rc_instruction *inst, struct const_remap *inv_remap_table)
30 {
31    const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
32    for (unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
33       if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT)
34          continue;
35       unsigned old_index = inst->U.I.SrcReg[src].Index;
36       for (unsigned chan = 0; chan < 4; chan++) {
37          unsigned old_swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
38          if (old_swz <= RC_SWIZZLE_W) {
39             inst->U.I.SrcReg[src].Index = inv_remap_table[old_index].index[old_swz];
40             SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
41                     inv_remap_table[old_index].swizzle[old_swz]);
42          }
43       }
44    }
45 }
46 
47 static void
mark_used(void * userdata,struct rc_instruction * inst,struct rc_src_register * src)48 mark_used(void *userdata, struct rc_instruction *inst, struct rc_src_register *src)
49 {
50    struct const_remap_state *d = userdata;
51 
52    if (src->File == RC_FILE_CONSTANT) {
53       uint8_t mask = 0;
54       if (src->RelAddr) {
55          d->has_rel_addr = true;
56       } else {
57          for (unsigned chan = 0; chan < 4; chan++) {
58             char swz = GET_SWZ(src->Swizzle, chan);
59             if (swz > RC_SWIZZLE_W)
60                continue;
61             mask |= 1 << swz;
62          }
63       }
64       d->constants[src->Index].UseMask |= mask;
65       if (d->constants[src->Index].Type == RC_CONSTANT_IMMEDIATE && util_bitcount(mask) > 1) {
66          d->is_used_as_vector[src->Index] |= mask;
67       }
68    }
69 }
70 
71 static void
place_constant_in_free_slot(struct const_remap_state * s,unsigned i)72 place_constant_in_free_slot(struct const_remap_state *s, unsigned i)
73 {
74    unsigned count = s->new_constants.Count;
75    for (unsigned chan = 0; chan < 4; chan++) {
76       s->inv_remap_table[i].index[chan] = count;
77       s->inv_remap_table[i].swizzle[chan] = chan;
78       if (s->constants[i].UseMask & (1 << chan)) {
79          s->remap_table[count].index[chan] = i;
80          s->remap_table[count].swizzle[chan] = chan;
81       }
82    }
83    s->new_constants.Constants[count] = s->constants[i];
84 
85    if (count != i) {
86       if (s->constants[i].Type == RC_CONSTANT_EXTERNAL)
87          s->are_externals_remapped = true;
88       s->is_identity = false;
89    }
90    s->new_constants.Count++;
91 }
92 
93 static void
place_immediate_in_free_slot(struct const_remap_state * s,unsigned i)94 place_immediate_in_free_slot(struct const_remap_state *s, unsigned i)
95 {
96    assert(util_bitcount(s->is_used_as_vector[i]) > 1);
97 
98    unsigned count = s->new_constants.Count;
99 
100    s->new_constants.Constants[count] = s->constants[i];
101    s->new_constants.Constants[count].UseMask = s->is_used_as_vector[i];
102    for (unsigned chan = 0; chan < 4; chan++) {
103       if (s->constants[i].UseMask & 1 << chan & s->is_used_as_vector[i]) {
104          s->inv_remap_table[i].index[chan] = count;
105          s->inv_remap_table[i].swizzle[chan] = chan;
106       }
107    }
108    if (count != i) {
109       s->is_identity = false;
110    }
111    s->new_constants.Count++;
112 }
113 
114 static void
try_merge_constants_external(struct const_remap_state * s,unsigned i)115 try_merge_constants_external(struct const_remap_state *s, unsigned i)
116 {
117    assert(util_bitcount(s->constants[i].UseMask) == 1);
118    for (unsigned j = 0; j < s->new_constants.Count; j++) {
119       for (unsigned chan = 0; chan < 4; chan++) {
120          if (s->remap_table[j].swizzle[chan] == RC_SWIZZLE_UNUSED) {
121             /* Writemask to swizzle */
122             unsigned swizzle = 0;
123             for (; swizzle < 4; swizzle++)
124                if (s->constants[i].UseMask >> swizzle == 1)
125                   break;
126             /* Update the remap tables. */
127             s->remap_table[j].index[chan] = i;
128             s->remap_table[j].swizzle[chan] = swizzle;
129             s->inv_remap_table[i].index[swizzle] = j;
130             s->inv_remap_table[i].swizzle[swizzle] = chan;
131             s->are_externals_remapped = true;
132             s->is_identity = false;
133             return;
134          }
135       }
136    }
137    place_constant_in_free_slot(s, i);
138 }
139 
140 static void
init_constant_remap_state(struct radeon_compiler * c,struct const_remap_state * s)141 init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s)
142 {
143    s->is_identity = true;
144    s->is_used_as_vector = malloc(c->Program.Constants.Count);
145    s->new_constants.Constants = malloc(sizeof(struct rc_constant) * c->Program.Constants.Count);
146    s->new_constants._Reserved = c->Program.Constants.Count;
147    s->constants = c->Program.Constants.Constants;
148    memset(s->is_used_as_vector, 0, c->Program.Constants.Count);
149 
150    s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
151    s->inv_remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
152    for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
153       /* Clear the UseMask, we will update it later. */
154       s->constants[i].UseMask = 0;
155       for (unsigned swz = 0; swz < 4; swz++) {
156          s->remap_table[i].index[swz] = -1;
157          s->remap_table[i].swizzle[swz] = RC_SWIZZLE_UNUSED;
158       }
159    }
160 }
161 
162 void
rc_remove_unused_constants(struct radeon_compiler * c,void * user)163 rc_remove_unused_constants(struct radeon_compiler *c, void *user)
164 {
165    struct const_remap **out_remap_table = (struct const_remap **)user;
166    struct rc_constant *constants = c->Program.Constants.Constants;
167    struct const_remap_state remap_state = {};
168    struct const_remap_state *s = &remap_state;
169 
170    if (!c->Program.Constants.Count) {
171       *out_remap_table = NULL;
172       return;
173    }
174 
175    init_constant_remap_state(c, s);
176 
177    /* Pass 1: Mark used constants. */
178    for (struct rc_instruction *inst = c->Program.Instructions.Next;
179         inst != &c->Program.Instructions; inst = inst->Next) {
180       rc_for_all_reads_src(inst, mark_used, s);
181    }
182 
183    /* Pass 2: If there is relative addressing or dead constant elimination
184     * is disabled, mark all externals as used. */
185    if (s->has_rel_addr || !c->remove_unused_constants) {
186       for (unsigned i = 0; i < c->Program.Constants.Count; i++)
187          if (constants[i].Type == RC_CONSTANT_EXTERNAL)
188             s->constants[i].UseMask = RC_MASK_XYZW;
189    }
190 
191    /* Pass 3: Make the remapping table and remap constants.
192     * First iterate over used vec2, vec3 and vec4 externals and place them in a free
193     * slots. While we could in theory merge 2 vec2 together, its not worth it
194     * as we would have to a) check that the swizzle is valid, b) transforming
195     * xy to zw would mean we need rgb and alpha source slot, thus it would hurt
196     * us potentially during pair scheduling. */
197    for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
198       if (constants[i].Type != RC_CONSTANT_EXTERNAL)
199          continue;
200       if (util_bitcount(s->constants[i].UseMask) > 1) {
201          place_constant_in_free_slot(s, i);
202       }
203    }
204 
205    /* Now iterate over scalarar externals and put them into empty slots. */
206    for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
207       if (constants[i].Type != RC_CONSTANT_EXTERNAL)
208          continue;
209       if (util_bitcount(s->constants[i].UseMask) == 1)
210          try_merge_constants_external(s, i);
211    }
212 
213    /* Now put immediates which are used as vectors. */
214    for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
215       if (constants[i].Type == RC_CONSTANT_IMMEDIATE &&
216           util_bitcount(s->constants[i].UseMask) > 0 &&
217           util_bitcount(s->is_used_as_vector[i]) > 0) {
218          place_immediate_in_free_slot(s, i);
219       }
220    }
221 
222    /* Now walk over scalar immediates and try to:
223     *  a) check for duplicates,
224     *  b) find free slot.
225     *  All of this is already done by rc_constants_add_immediate_scalar,
226     *  so just use it.
227     */
228    for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
229       if (constants[i].Type != RC_CONSTANT_IMMEDIATE)
230          continue;
231       for (unsigned chan = 0; chan < 4; chan++) {
232          if ((s->constants[i].UseMask) & (1 << chan) &&
233              (~(s->is_used_as_vector[i]) & (1 << chan))) {
234             unsigned swz;
235             s->inv_remap_table[i].index[chan] = rc_constants_add_immediate_scalar(
236                &s->new_constants, constants[i].u.Immediate[chan], &swz);
237             s->inv_remap_table[i].swizzle[chan] = GET_SWZ(swz, 0);
238             s->is_identity = false;
239          }
240       }
241    }
242 
243    /* Finally place state constants. */
244    for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
245       if (constants[i].Type != RC_CONSTANT_STATE)
246          continue;
247       if (util_bitcount(s->constants[i].UseMask) > 0) {
248          place_constant_in_free_slot(s, i);
249       }
250    }
251 
252    /*  is_identity ==> new_count == old_count
253     * !is_identity ==> new_count <  old_count */
254    assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped));
255 
256    /* Pass 4: Redirect reads of all constants to their new locations. */
257    if (!s->is_identity) {
258       for (struct rc_instruction *inst = c->Program.Instructions.Next;
259            inst != &c->Program.Instructions; inst = inst->Next) {
260          remap_regs(inst, s->inv_remap_table);
261       }
262    }
263 
264    /* Set the new constant count. Note that new_count may be less than
265     * Count even though the remapping function is identity. In that case,
266     * the constants have been removed at the end of the array. */
267    rc_constants_destroy(&c->Program.Constants);
268    c->Program.Constants = s->new_constants;
269 
270    if (s->are_externals_remapped) {
271       *out_remap_table = s->remap_table;
272    } else {
273       *out_remap_table = NULL;
274       free(s->remap_table);
275    }
276 
277    free(s->inv_remap_table);
278 
279    if (c->Debug & RC_DBG_LOG)
280       rc_constants_print(&c->Program.Constants, s->remap_table);
281 }
282