1 /*
2 * Copyright 2009 Nicolai Haehnle.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "radeon_dataflow.h"
7
8 #include "radeon_compiler.h"
9
10 struct updatemask_state {
11 unsigned char Output[RC_REGISTER_MAX_INDEX];
12 unsigned char Temporary[RC_REGISTER_MAX_INDEX];
13 unsigned char Address;
14 unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
15 };
16
17 struct instruction_state {
18 unsigned char WriteMask : 4;
19 unsigned char WriteALUResult : 1;
20 unsigned char SrcReg[3];
21 };
22
23 struct loopinfo {
24 struct updatemask_state StoreEndloop;
25 unsigned int BreakCount;
26 unsigned int BreaksReserved;
27 };
28
29 struct branchinfo {
30 unsigned int HaveElse : 1;
31
32 struct updatemask_state StoreEndif;
33 struct updatemask_state StoreElse;
34 };
35
36 struct deadcode_state {
37 struct radeon_compiler *C;
38 struct instruction_state *Instructions;
39
40 struct updatemask_state R;
41
42 struct branchinfo *BranchStack;
43 unsigned int BranchStackSize;
44 unsigned int BranchStackReserved;
45
46 struct loopinfo *LoopStack;
47 unsigned int LoopStackSize;
48 unsigned int LoopStackReserved;
49 };
50
51 static void
or_updatemasks(struct updatemask_state * dst,struct updatemask_state * a,struct updatemask_state * b)52 or_updatemasks(struct updatemask_state *dst, struct updatemask_state *a, struct updatemask_state *b)
53 {
54 for (unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
55 dst->Output[i] = a->Output[i] | b->Output[i];
56 dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
57 }
58
59 for (unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
60 dst->Special[i] = a->Special[i] | b->Special[i];
61
62 dst->Address = a->Address | b->Address;
63 }
64
65 static void
push_loop(struct deadcode_state * s)66 push_loop(struct deadcode_state *s)
67 {
68 memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, s->LoopStackSize,
69 s->LoopStackReserved, 1);
70 memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
71 memcpy(&s->LoopStack[s->LoopStackSize - 1].StoreEndloop, &s->R, sizeof(s->R));
72 }
73
74 static void
push_branch(struct deadcode_state * s)75 push_branch(struct deadcode_state *s)
76 {
77 struct branchinfo *branch;
78
79 memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, s->BranchStackSize,
80 s->BranchStackReserved, 1);
81
82 branch = &s->BranchStack[s->BranchStackSize++];
83 branch->HaveElse = 0;
84 memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
85 }
86
87 static unsigned char *
get_used_ptr(struct deadcode_state * s,rc_register_file file,unsigned int index)88 get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
89 {
90 if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
91 if (index >= RC_REGISTER_MAX_INDEX) {
92 rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __func__, index, file);
93 return NULL;
94 }
95
96 if (file == RC_FILE_OUTPUT)
97 return &s->R.Output[index];
98 else
99 return &s->R.Temporary[index];
100 } else if (file == RC_FILE_ADDRESS) {
101 return &s->R.Address;
102 } else if (file == RC_FILE_SPECIAL) {
103 if (index >= RC_NUM_SPECIAL_REGISTERS) {
104 rc_error(s->C, "%s: special file index %i out of bounds\n", __func__, index);
105 return NULL;
106 }
107
108 return &s->R.Special[index];
109 }
110
111 return NULL;
112 }
113
114 static void
mark_used(struct deadcode_state * s,rc_register_file file,unsigned int index,unsigned int mask)115 mark_used(struct deadcode_state *s, rc_register_file file, unsigned int index, unsigned int mask)
116 {
117 unsigned char *pused = get_used_ptr(s, file, index);
118 if (pused)
119 *pused |= mask;
120 }
121
122 static void
update_instruction(struct deadcode_state * s,struct rc_instruction * inst)123 update_instruction(struct deadcode_state *s, struct rc_instruction *inst)
124 {
125 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
126 struct instruction_state *insts = &s->Instructions[inst->IP];
127 unsigned int usedmask = 0;
128 unsigned int srcmasks[3];
129
130 if (opcode->HasDstReg) {
131 unsigned char *pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
132 if (pused) {
133 usedmask = *pused & inst->U.I.DstReg.WriteMask;
134 *pused &= ~usedmask;
135 }
136 }
137
138 insts->WriteMask |= usedmask;
139
140 if (inst->U.I.WriteALUResult) {
141 unsigned char *pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
142 if (pused && *pused) {
143 if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
144 usedmask |= RC_MASK_X;
145 else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
146 usedmask |= RC_MASK_W;
147
148 *pused = 0;
149 insts->WriteALUResult = 1;
150 }
151 }
152
153 rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
154
155 for (unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
156 unsigned int refmask = 0;
157 unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
158 insts->SrcReg[src] |= newsrcmask;
159
160 for (unsigned int chan = 0; chan < 4; ++chan) {
161 if (GET_BIT(newsrcmask, chan))
162 refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
163 }
164
165 /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
166 refmask &= RC_MASK_XYZW;
167
168 if (!refmask)
169 continue;
170
171 mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
172
173 if (inst->U.I.SrcReg[src].RelAddr)
174 mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
175 }
176 }
177
178 void
rc_dataflow_deadcode(struct radeon_compiler * c,void * user)179 rc_dataflow_deadcode(struct radeon_compiler *c, void *user)
180 {
181 struct deadcode_state s;
182 unsigned int nr_instructions;
183 unsigned int ip;
184
185 memset(&s, 0, sizeof(s));
186 s.C = c;
187
188 nr_instructions = rc_recompute_ips(c);
189 s.Instructions =
190 memory_pool_malloc(&c->Pool, sizeof(struct instruction_state) * nr_instructions);
191 memset(s.Instructions, 0, sizeof(struct instruction_state) * nr_instructions);
192
193 for (struct rc_instruction *inst = c->Program.Instructions.Prev;
194 inst != &c->Program.Instructions; inst = inst->Prev) {
195 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
196
197 /* Assume all output regs are live. Anything else should have been
198 * eliminated before it got to us.
199 */
200 if (opcode->HasDstReg)
201 mark_used(&s, RC_FILE_OUTPUT, inst->U.I.DstReg.Index, inst->U.I.DstReg.WriteMask);
202
203 switch (opcode->Opcode) {
204 /* Mark all sources in the loop body as used before doing
205 * normal deadcode analysis. This is probably not optimal.
206 * Save this pessimistic deadcode state and restore it anytime
207 * we see a break just to be extra sure.
208 */
209 case RC_OPCODE_ENDLOOP: {
210 int endloops = 1;
211 struct rc_instruction *ptr;
212 for (ptr = inst->Prev; endloops > 0; ptr = ptr->Prev) {
213 opcode = rc_get_opcode_info(ptr->U.I.Opcode);
214 if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
215 endloops--;
216 continue;
217 }
218 if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
219 endloops++;
220 continue;
221 }
222 if (opcode->HasDstReg) {
223 int src = 0;
224 unsigned int srcmasks[3];
225 unsigned int writemask = ptr->U.I.DstReg.WriteMask;
226 if (ptr->U.I.WriteALUResult == RC_ALURESULT_X)
227 writemask |= RC_MASK_X;
228 else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W)
229 writemask |= RC_MASK_W;
230
231 rc_compute_sources_for_writemask(ptr, writemask, srcmasks);
232 for (src = 0; src < opcode->NumSrcRegs; src++) {
233 mark_used(&s, ptr->U.I.SrcReg[src].File, ptr->U.I.SrcReg[src].Index,
234 srcmasks[src]);
235 }
236 }
237 }
238 push_loop(&s);
239 break;
240 }
241 case RC_OPCODE_BRK: {
242 struct loopinfo *loop = &s.LoopStack[s.LoopStackSize - 1];
243 memcpy(&s.R, &loop->StoreEndloop, sizeof(s.R));
244 break;
245 }
246 case RC_OPCODE_BGNLOOP:
247 s.LoopStackSize--;
248 break;
249 case RC_OPCODE_CONT:
250 break;
251 case RC_OPCODE_ENDIF:
252 push_branch(&s);
253 break;
254 default:
255 if (opcode->IsFlowControl && s.BranchStackSize) {
256 struct branchinfo *branch = &s.BranchStack[s.BranchStackSize - 1];
257 if (opcode->Opcode == RC_OPCODE_IF) {
258 or_updatemasks(&s.R, &s.R,
259 branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
260
261 s.BranchStackSize--;
262 } else if (opcode->Opcode == RC_OPCODE_ELSE) {
263 if (branch->HaveElse) {
264 rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __func__);
265 } else {
266 memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
267 memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
268 branch->HaveElse = 1;
269 }
270 } else {
271 rc_error(c, "%s: Unhandled control flow instruction %s\n", __func__, opcode->Name);
272 }
273 }
274 }
275
276 update_instruction(&s, inst);
277 }
278
279 ip = 0;
280 for (struct rc_instruction *inst = c->Program.Instructions.Next;
281 inst != &c->Program.Instructions; inst = inst->Next, ++ip) {
282 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
283 int dead = 1;
284 unsigned int srcmasks[3];
285 unsigned int usemask;
286
287 if (!opcode->HasDstReg) {
288 dead = 0;
289 } else {
290 inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
291 if (s.Instructions[ip].WriteMask)
292 dead = 0;
293
294 if (s.Instructions[ip].WriteALUResult)
295 dead = 0;
296 else
297 inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
298 }
299
300 if (dead) {
301 struct rc_instruction *todelete = inst;
302 inst = inst->Prev;
303 rc_remove_instruction(todelete);
304 continue;
305 }
306
307 usemask = s.Instructions[ip].WriteMask;
308
309 if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
310 usemask |= RC_MASK_X;
311 else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
312 usemask |= RC_MASK_W;
313
314 rc_compute_sources_for_writemask(inst, usemask, srcmasks);
315
316 for (unsigned int src = 0; src < 3; ++src) {
317 for (unsigned int chan = 0; chan < 4; ++chan) {
318 if (!GET_BIT(srcmasks[src], chan))
319 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
320 }
321 }
322 }
323
324 rc_calculate_inputs_outputs(c);
325 }
326