• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27 
28 #include "radeon_dataflow.h"
29 
30 #include "radeon_compiler.h"
31 
32 
33 struct updatemask_state {
34 	unsigned char Output[RC_REGISTER_MAX_INDEX];
35 	unsigned char Temporary[RC_REGISTER_MAX_INDEX];
36 	unsigned char Address;
37 	unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
38 };
39 
40 struct instruction_state {
41 	unsigned char WriteMask:4;
42 	unsigned char WriteALUResult:1;
43 	unsigned char SrcReg[3];
44 };
45 
46 struct loopinfo {
47 	struct updatemask_state * Breaks;
48 	unsigned int BreakCount;
49 	unsigned int BreaksReserved;
50 };
51 
52 struct branchinfo {
53 	unsigned int HaveElse:1;
54 
55 	struct updatemask_state StoreEndif;
56 	struct updatemask_state StoreElse;
57 };
58 
59 struct deadcode_state {
60 	struct radeon_compiler * C;
61 	struct instruction_state * Instructions;
62 
63 	struct updatemask_state R;
64 
65 	struct branchinfo * BranchStack;
66 	unsigned int BranchStackSize;
67 	unsigned int BranchStackReserved;
68 
69 	struct loopinfo * LoopStack;
70 	unsigned int LoopStackSize;
71 	unsigned int LoopStackReserved;
72 };
73 
74 
or_updatemasks(struct updatemask_state * dst,struct updatemask_state * a,struct updatemask_state * b)75 static void or_updatemasks(
76 	struct updatemask_state * dst,
77 	struct updatemask_state * a,
78 	struct updatemask_state * b)
79 {
80 	for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
81 		dst->Output[i] = a->Output[i] | b->Output[i];
82 		dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
83 	}
84 
85 	for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
86 		dst->Special[i] = a->Special[i] | b->Special[i];
87 
88 	dst->Address = a->Address | b->Address;
89 }
90 
push_break(struct deadcode_state * s)91 static void push_break(struct deadcode_state *s)
92 {
93 	struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
94 	memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
95 		loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
96 
97 	memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
98 }
99 
push_loop(struct deadcode_state * s)100 static void push_loop(struct deadcode_state * s)
101 {
102 	memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
103 			s->LoopStackSize, s->LoopStackReserved, 1);
104 	memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
105 }
106 
push_branch(struct deadcode_state * s)107 static void push_branch(struct deadcode_state * s)
108 {
109 	struct branchinfo * branch;
110 
111 	memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
112 			s->BranchStackSize, s->BranchStackReserved, 1);
113 
114 	branch = &s->BranchStack[s->BranchStackSize++];
115 	branch->HaveElse = 0;
116 	memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
117 }
118 
get_used_ptr(struct deadcode_state * s,rc_register_file file,unsigned int index)119 static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
120 {
121 	if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
122 		if (index >= RC_REGISTER_MAX_INDEX) {
123 			rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
124 			return 0;
125 		}
126 
127 		if (file == RC_FILE_OUTPUT)
128 			return &s->R.Output[index];
129 		else
130 			return &s->R.Temporary[index];
131 	} else if (file == RC_FILE_ADDRESS) {
132 		return &s->R.Address;
133 	} else if (file == RC_FILE_SPECIAL) {
134 		if (index >= RC_NUM_SPECIAL_REGISTERS) {
135 			rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
136 			return 0;
137 		}
138 
139 		return &s->R.Special[index];
140 	}
141 
142 	return 0;
143 }
144 
mark_used(struct deadcode_state * s,rc_register_file file,unsigned int index,unsigned int mask)145 static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
146 {
147 	unsigned char * pused = get_used_ptr(s, file, index);
148 	if (pused)
149 		*pused |= mask;
150 }
151 
update_instruction(struct deadcode_state * s,struct rc_instruction * inst)152 static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
153 {
154 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
155 	struct instruction_state * insts = &s->Instructions[inst->IP];
156 	unsigned int usedmask = 0;
157 	unsigned int srcmasks[3];
158 
159 	if (opcode->HasDstReg) {
160 		unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
161 		if (pused) {
162 			usedmask = *pused & inst->U.I.DstReg.WriteMask;
163 			*pused &= ~usedmask;
164 		}
165 	}
166 
167 	insts->WriteMask |= usedmask;
168 
169 	if (inst->U.I.WriteALUResult) {
170 		unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
171 		if (pused && *pused) {
172 			if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
173 				usedmask |= RC_MASK_X;
174 			else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
175 				usedmask |= RC_MASK_W;
176 
177 			*pused = 0;
178 			insts->WriteALUResult = 1;
179 		}
180 	}
181 
182 	rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
183 
184 	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
185 		unsigned int refmask = 0;
186 		unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
187 		insts->SrcReg[src] |= newsrcmask;
188 
189 		for(unsigned int chan = 0; chan < 4; ++chan) {
190 			if (GET_BIT(newsrcmask, chan))
191 				refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
192 		}
193 
194 		/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
195 		refmask &= RC_MASK_XYZW;
196 
197 		if (!refmask)
198 			continue;
199 
200 		mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
201 
202 		if (inst->U.I.SrcReg[src].RelAddr)
203 			mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
204 	}
205 }
206 
mark_output_use(void * data,unsigned int index,unsigned int mask)207 static void mark_output_use(void * data, unsigned int index, unsigned int mask)
208 {
209 	struct deadcode_state * s = data;
210 
211 	mark_used(s, RC_FILE_OUTPUT, index, mask);
212 }
213 
rc_dataflow_deadcode(struct radeon_compiler * c,void * user)214 void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
215 {
216 	struct deadcode_state s;
217 	unsigned int nr_instructions;
218 	rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
219 	unsigned int ip;
220 
221 	memset(&s, 0, sizeof(s));
222 	s.C = c;
223 
224 	nr_instructions = rc_recompute_ips(c);
225 	s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
226 	memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
227 
228 	dce(c, &s, &mark_output_use);
229 
230 	for(struct rc_instruction * inst = c->Program.Instructions.Prev;
231 	    inst != &c->Program.Instructions;
232 	    inst = inst->Prev) {
233 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
234 
235 		switch(opcode->Opcode){
236 		/* Mark all sources in the loop body as used before doing
237 		 * normal deadcode analysis.  This is probably not optimal.
238 		 */
239 		case RC_OPCODE_ENDLOOP:
240 		{
241 			int endloops = 1;
242 			struct rc_instruction *ptr;
243 			for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
244 				opcode = rc_get_opcode_info(ptr->U.I.Opcode);
245 				if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
246 					endloops--;
247 					continue;
248 				}
249 				if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
250 					endloops++;
251 					continue;
252 				}
253 				if(opcode->HasDstReg){
254 					int src = 0;
255 					unsigned int srcmasks[3];
256 					unsigned int writemask = ptr->U.I.DstReg.WriteMask;
257 					if (ptr->U.I.WriteALUResult == RC_ALURESULT_X)
258 						writemask |= RC_MASK_X;
259 					else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W)
260 						writemask |= RC_MASK_W;
261 
262 					rc_compute_sources_for_writemask(ptr, writemask, srcmasks);
263 					for(src=0; src < opcode->NumSrcRegs; src++){
264 						mark_used(&s,
265 							ptr->U.I.SrcReg[src].File,
266 							ptr->U.I.SrcReg[src].Index,
267 							srcmasks[src]);
268 					}
269 				}
270 			}
271 			push_loop(&s);
272 			break;
273 		}
274 		case RC_OPCODE_BRK:
275 			push_break(&s);
276 			break;
277 		case RC_OPCODE_BGNLOOP:
278 		{
279 			unsigned int i;
280 			struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
281 			for(i = 0; i < loop->BreakCount; i++) {
282 				or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
283 			}
284 			break;
285 		}
286 		case RC_OPCODE_CONT:
287 			break;
288 		case RC_OPCODE_ENDIF:
289 			push_branch(&s);
290 			break;
291 		default:
292 			if (opcode->IsFlowControl && s.BranchStackSize) {
293 				struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
294 				if (opcode->Opcode == RC_OPCODE_IF) {
295 					or_updatemasks(&s.R,
296 							&s.R,
297 							branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
298 
299 					s.BranchStackSize--;
300 				} else if (opcode->Opcode == RC_OPCODE_ELSE) {
301 					if (branch->HaveElse) {
302 						rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
303 					} else {
304 						memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
305 						memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
306 						branch->HaveElse = 1;
307 					}
308 				} else {
309 					rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
310 				}
311 			}
312 		}
313 
314 		update_instruction(&s, inst);
315 	}
316 
317 	ip = 0;
318 	for(struct rc_instruction * inst = c->Program.Instructions.Next;
319 	    inst != &c->Program.Instructions;
320 	    inst = inst->Next, ++ip) {
321 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
322 		int dead = 1;
323 		unsigned int srcmasks[3];
324 		unsigned int usemask;
325 
326 		if (!opcode->HasDstReg) {
327 			dead = 0;
328 		} else {
329 			inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
330 			if (s.Instructions[ip].WriteMask)
331 				dead = 0;
332 
333 			if (s.Instructions[ip].WriteALUResult)
334 				dead = 0;
335 			else
336 				inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
337 		}
338 
339 		if (dead) {
340 			struct rc_instruction * todelete = inst;
341 			inst = inst->Prev;
342 			rc_remove_instruction(todelete);
343 			continue;
344 		}
345 
346 		usemask = s.Instructions[ip].WriteMask;
347 
348 		if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
349 			usemask |= RC_MASK_X;
350 		else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
351 			usemask |= RC_MASK_W;
352 
353 		rc_compute_sources_for_writemask(inst, usemask, srcmasks);
354 
355 		for(unsigned int src = 0; src < 3; ++src) {
356 			for(unsigned int chan = 0; chan < 4; ++chan) {
357 				if (!GET_BIT(srcmasks[src], chan))
358 					SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
359 			}
360 		}
361 	}
362 
363 	rc_calculate_inputs_outputs(c);
364 }
365