1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_dataflow.h"
29
30 #include "radeon_compiler.h"
31
32
33 struct updatemask_state {
34 unsigned char Output[RC_REGISTER_MAX_INDEX];
35 unsigned char Temporary[RC_REGISTER_MAX_INDEX];
36 unsigned char Address;
37 unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
38 };
39
40 struct instruction_state {
41 unsigned char WriteMask:4;
42 unsigned char WriteALUResult:1;
43 unsigned char SrcReg[3];
44 };
45
46 struct loopinfo {
47 struct updatemask_state StoreEndloop;
48 unsigned int BreakCount;
49 unsigned int BreaksReserved;
50 };
51
52 struct branchinfo {
53 unsigned int HaveElse:1;
54
55 struct updatemask_state StoreEndif;
56 struct updatemask_state StoreElse;
57 };
58
59 struct deadcode_state {
60 struct radeon_compiler * C;
61 struct instruction_state * Instructions;
62
63 struct updatemask_state R;
64
65 struct branchinfo * BranchStack;
66 unsigned int BranchStackSize;
67 unsigned int BranchStackReserved;
68
69 struct loopinfo * LoopStack;
70 unsigned int LoopStackSize;
71 unsigned int LoopStackReserved;
72 };
73
74
or_updatemasks(struct updatemask_state * dst,struct updatemask_state * a,struct updatemask_state * b)75 static void or_updatemasks(
76 struct updatemask_state * dst,
77 struct updatemask_state * a,
78 struct updatemask_state * b)
79 {
80 for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
81 dst->Output[i] = a->Output[i] | b->Output[i];
82 dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
83 }
84
85 for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
86 dst->Special[i] = a->Special[i] | b->Special[i];
87
88 dst->Address = a->Address | b->Address;
89 }
90
push_loop(struct deadcode_state * s)91 static void push_loop(struct deadcode_state * s)
92 {
93 memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
94 s->LoopStackSize, s->LoopStackReserved, 1);
95 memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
96 memcpy(&s->LoopStack[s->LoopStackSize - 1].StoreEndloop, &s->R, sizeof(s->R));
97 }
98
push_branch(struct deadcode_state * s)99 static void push_branch(struct deadcode_state * s)
100 {
101 struct branchinfo * branch;
102
103 memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
104 s->BranchStackSize, s->BranchStackReserved, 1);
105
106 branch = &s->BranchStack[s->BranchStackSize++];
107 branch->HaveElse = 0;
108 memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
109 }
110
get_used_ptr(struct deadcode_state * s,rc_register_file file,unsigned int index)111 static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
112 {
113 if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
114 if (index >= RC_REGISTER_MAX_INDEX) {
115 rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
116 return NULL;
117 }
118
119 if (file == RC_FILE_OUTPUT)
120 return &s->R.Output[index];
121 else
122 return &s->R.Temporary[index];
123 } else if (file == RC_FILE_ADDRESS) {
124 return &s->R.Address;
125 } else if (file == RC_FILE_SPECIAL) {
126 if (index >= RC_NUM_SPECIAL_REGISTERS) {
127 rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
128 return NULL;
129 }
130
131 return &s->R.Special[index];
132 }
133
134 return NULL;
135 }
136
mark_used(struct deadcode_state * s,rc_register_file file,unsigned int index,unsigned int mask)137 static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
138 {
139 unsigned char * pused = get_used_ptr(s, file, index);
140 if (pused)
141 *pused |= mask;
142 }
143
update_instruction(struct deadcode_state * s,struct rc_instruction * inst)144 static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
145 {
146 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
147 struct instruction_state * insts = &s->Instructions[inst->IP];
148 unsigned int usedmask = 0;
149 unsigned int srcmasks[3];
150
151 if (opcode->HasDstReg) {
152 unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
153 if (pused) {
154 usedmask = *pused & inst->U.I.DstReg.WriteMask;
155 *pused &= ~usedmask;
156 }
157 }
158
159 insts->WriteMask |= usedmask;
160
161 if (inst->U.I.WriteALUResult) {
162 unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
163 if (pused && *pused) {
164 if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
165 usedmask |= RC_MASK_X;
166 else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
167 usedmask |= RC_MASK_W;
168
169 *pused = 0;
170 insts->WriteALUResult = 1;
171 }
172 }
173
174 rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
175
176 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
177 unsigned int refmask = 0;
178 unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
179 insts->SrcReg[src] |= newsrcmask;
180
181 for(unsigned int chan = 0; chan < 4; ++chan) {
182 if (GET_BIT(newsrcmask, chan))
183 refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
184 }
185
186 /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
187 refmask &= RC_MASK_XYZW;
188
189 if (!refmask)
190 continue;
191
192 mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
193
194 if (inst->U.I.SrcReg[src].RelAddr)
195 mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
196 }
197 }
198
rc_dataflow_deadcode(struct radeon_compiler * c,void * user)199 void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
200 {
201 struct deadcode_state s;
202 unsigned int nr_instructions;
203 unsigned int ip;
204
205 memset(&s, 0, sizeof(s));
206 s.C = c;
207
208 nr_instructions = rc_recompute_ips(c);
209 s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
210 memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
211
212 for(struct rc_instruction * inst = c->Program.Instructions.Prev;
213 inst != &c->Program.Instructions;
214 inst = inst->Prev) {
215 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
216
217 /* Assume all output regs are live. Anything else should have been
218 * eliminated before it got to us.
219 */
220 if (opcode->HasDstReg)
221 mark_used(&s, RC_FILE_OUTPUT, inst->U.I.DstReg.Index, inst->U.I.DstReg.WriteMask);
222
223 switch(opcode->Opcode){
224 /* Mark all sources in the loop body as used before doing
225 * normal deadcode analysis. This is probably not optimal.
226 * Save this pessimistic deadcode state and restore it anytime
227 * we see a break just to be extra sure.
228 */
229 case RC_OPCODE_ENDLOOP:
230 {
231 int endloops = 1;
232 struct rc_instruction *ptr;
233 for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
234 opcode = rc_get_opcode_info(ptr->U.I.Opcode);
235 if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
236 endloops--;
237 continue;
238 }
239 if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
240 endloops++;
241 continue;
242 }
243 if(opcode->HasDstReg){
244 int src = 0;
245 unsigned int srcmasks[3];
246 unsigned int writemask = ptr->U.I.DstReg.WriteMask;
247 if (ptr->U.I.WriteALUResult == RC_ALURESULT_X)
248 writemask |= RC_MASK_X;
249 else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W)
250 writemask |= RC_MASK_W;
251
252 rc_compute_sources_for_writemask(ptr, writemask, srcmasks);
253 for(src=0; src < opcode->NumSrcRegs; src++){
254 mark_used(&s,
255 ptr->U.I.SrcReg[src].File,
256 ptr->U.I.SrcReg[src].Index,
257 srcmasks[src]);
258 }
259 }
260 }
261 push_loop(&s);
262 break;
263 }
264 case RC_OPCODE_BRK:
265 {
266 struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
267 memcpy(&s.R, &loop->StoreEndloop, sizeof(s.R));
268 break;
269 }
270 case RC_OPCODE_BGNLOOP:
271 s.LoopStackSize--;
272 break;
273 case RC_OPCODE_CONT:
274 break;
275 case RC_OPCODE_ENDIF:
276 push_branch(&s);
277 break;
278 default:
279 if (opcode->IsFlowControl && s.BranchStackSize) {
280 struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
281 if (opcode->Opcode == RC_OPCODE_IF) {
282 or_updatemasks(&s.R,
283 &s.R,
284 branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
285
286 s.BranchStackSize--;
287 } else if (opcode->Opcode == RC_OPCODE_ELSE) {
288 if (branch->HaveElse) {
289 rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
290 } else {
291 memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
292 memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
293 branch->HaveElse = 1;
294 }
295 } else {
296 rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
297 }
298 }
299 }
300
301 update_instruction(&s, inst);
302 }
303
304 ip = 0;
305 for(struct rc_instruction * inst = c->Program.Instructions.Next;
306 inst != &c->Program.Instructions;
307 inst = inst->Next, ++ip) {
308 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
309 int dead = 1;
310 unsigned int srcmasks[3];
311 unsigned int usemask;
312
313 if (!opcode->HasDstReg) {
314 dead = 0;
315 } else {
316 inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
317 if (s.Instructions[ip].WriteMask)
318 dead = 0;
319
320 if (s.Instructions[ip].WriteALUResult)
321 dead = 0;
322 else
323 inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
324 }
325
326 if (dead) {
327 struct rc_instruction * todelete = inst;
328 inst = inst->Prev;
329 rc_remove_instruction(todelete);
330 continue;
331 }
332
333 usemask = s.Instructions[ip].WriteMask;
334
335 if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
336 usemask |= RC_MASK_X;
337 else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
338 usemask |= RC_MASK_W;
339
340 rc_compute_sources_for_writemask(inst, usemask, srcmasks);
341
342 for(unsigned int src = 0; src < 3; ++src) {
343 for(unsigned int chan = 0; chan < 4; ++chan) {
344 if (!GET_BIT(srcmasks[src], chan))
345 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
346 }
347 }
348 }
349
350 rc_calculate_inputs_outputs(c);
351 }
352