• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Author: Tom Stellard <thomas.stellard@amd.com>
24  */
25 
26 #include "radeon_compiler.h"
27 #include "radeon_compiler_util.h"
28 #include "radeon_dataflow.h"
29 #include "radeon_program.h"
30 #include "radeon_program_constants.h"
31 
32 struct vert_fc_state {
33 	struct radeon_compiler *C;
34 	unsigned BranchDepth;
35 	unsigned LoopDepth;
36 	unsigned LoopsReserved;
37 	int PredStack[R500_PVS_MAX_LOOP_DEPTH];
38 	int PredicateReg;
39 };
40 
build_pred_src(struct rc_src_register * src,struct vert_fc_state * fc_state)41 static void build_pred_src(
42 	struct rc_src_register * src,
43 	struct vert_fc_state * fc_state)
44 {
45 	src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
46 					RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
47 	src->File = RC_FILE_TEMPORARY;
48 	src->Index = fc_state->PredicateReg;
49 }
50 
build_pred_dst(struct rc_dst_register * dst,struct vert_fc_state * fc_state)51 static void build_pred_dst(
52 	struct rc_dst_register * dst,
53 	struct vert_fc_state * fc_state)
54 {
55 	dst->WriteMask = RC_MASK_W;
56 	dst->File = RC_FILE_TEMPORARY;
57 	dst->Index = fc_state->PredicateReg;
58 }
59 
mark_write(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)60 static void mark_write(void * userdata,	struct rc_instruction * inst,
61 		rc_register_file file,	unsigned int index, unsigned int mask)
62 {
63 	unsigned int * writemasks = userdata;
64 
65 	if (file != RC_FILE_TEMPORARY)
66 		return;
67 
68 	if (index >= R300_VS_MAX_TEMPS)
69 		return;
70 
71 	writemasks[index] |= mask;
72 }
73 
reserve_predicate_reg(struct vert_fc_state * fc_state)74 static int reserve_predicate_reg(struct vert_fc_state * fc_state)
75 {
76 	int i;
77 	unsigned int writemasks[RC_REGISTER_MAX_INDEX];
78 	struct rc_instruction * inst;
79 	memset(writemasks, 0, sizeof(writemasks));
80 	for(inst = fc_state->C->Program.Instructions.Next;
81 				inst != &fc_state->C->Program.Instructions;
82 				inst = inst->Next) {
83 		rc_for_all_writes_mask(inst, mark_write, writemasks);
84 	}
85 
86 	for(i = 0; i < fc_state->C->max_temp_regs; i++) {
87 		/* Most of the control flow instructions only write the
88 		 * W component of the Predicate Register, but
89 		 * the docs say that ME_PRED_SET_CLR and
90 		 * ME_PRED_SET_RESTORE write all components of the
91 		 * register, so we must reserve a register that has
92 		 * all its components free. */
93 		if (!writemasks[i]) {
94 			fc_state->PredicateReg = i;
95 			break;
96 		}
97 	}
98 	if (i == fc_state->C->max_temp_regs) {
99 		rc_error(fc_state->C, "No free temporary to use for"
100 				" predicate stack counter.\n");
101 		return -1;
102 	}
103 	return 1;
104 }
105 
lower_bgnloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)106 static void lower_bgnloop(
107 	struct rc_instruction * inst,
108 	struct vert_fc_state * fc_state)
109 {
110 	struct rc_instruction * new_inst =
111 			rc_insert_new_instruction(fc_state->C, inst->Prev);
112 
113 	if ((!fc_state->C->is_r500
114 		&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
115 	     || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
116 		rc_error(fc_state->C, "Loops are nested too deep.");
117 		return;
118 	}
119 
120 	if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
121 		if (fc_state->PredicateReg == -1) {
122 			if (reserve_predicate_reg(fc_state) == -1) {
123 				return;
124 			}
125 		}
126 
127 		/* Initialize the predicate bit to true. */
128 		new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
129 		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
130 		new_inst->U.I.SrcReg[0].Index = 0;
131 		new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
132 		new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
133 	} else {
134 		fc_state->PredStack[fc_state->LoopDepth] =
135 						fc_state->PredicateReg;
136 		/* Copy the current predicate value to this loop's
137 		 * predicate register */
138 
139 		/* Use the old predicate value for src0 */
140 		build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
141 
142 		/* Reserve this loop's predicate register */
143 		if (reserve_predicate_reg(fc_state) == -1) {
144 			return;
145 		}
146 
147 		/* Copy the old predicate value to the new register */
148 		new_inst->U.I.Opcode = RC_OPCODE_ADD;
149 		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
150 		new_inst->U.I.SrcReg[1].Index = 0;
151 		new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
152 		new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
153 	}
154 
155 }
156 
lower_brk(struct rc_instruction * inst,struct vert_fc_state * fc_state)157 static void lower_brk(
158 	struct rc_instruction * inst,
159 	struct vert_fc_state * fc_state)
160 {
161 	if (fc_state->LoopDepth == 1) {
162 		inst->U.I.Opcode = RC_OPCODE_RCP;
163 		inst->U.I.DstReg.Pred = RC_PRED_SET;
164 		inst->U.I.SrcReg[0].Index = 0;
165 		inst->U.I.SrcReg[0].File = RC_FILE_NONE;
166 		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
167 	} else {
168 		inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
169 		inst->U.I.DstReg.Pred = RC_PRED_SET;
170 	}
171 
172 	build_pred_dst(&inst->U.I.DstReg, fc_state);
173 }
174 
lower_endloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)175 static void lower_endloop(
176 	struct rc_instruction * inst,
177 	struct vert_fc_state * fc_state)
178 {
179 	struct rc_instruction * new_inst =
180 			rc_insert_new_instruction(fc_state->C, inst);
181 
182 	new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
183 	build_pred_dst(&new_inst->U.I.DstReg, fc_state);
184 	/* Restore the previous predicate register. */
185 	fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
186 	build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
187 }
188 
lower_if(struct rc_instruction * inst,struct vert_fc_state * fc_state)189 static void lower_if(
190 	struct rc_instruction * inst,
191 	struct vert_fc_state * fc_state)
192 {
193 	/* Reserve a temporary to use as our predicate stack counter, if we
194 	 * don't already have one. */
195 	if (fc_state->PredicateReg == -1) {
196 		/* If we are inside a loop, the Predicate Register should
197 		 * have already been defined. */
198 		assert(fc_state->LoopDepth == 0);
199 
200 		if (reserve_predicate_reg(fc_state) == -1) {
201 			return;
202 		}
203 	}
204 
205 	if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) {
206 		inst->U.I.Opcode = RC_ME_PRED_SNEQ;
207 	} else {
208 		unsigned swz;
209 		inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
210 		memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
211 						sizeof(inst->U.I.SrcReg[1]));
212 		swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
213 		/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
214 		 * w component */
215 		inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
216 				RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
217 		build_pred_src(&inst->U.I.SrcReg[0], fc_state);
218 	}
219 	build_pred_dst(&inst->U.I.DstReg, fc_state);
220 }
221 
rc_vert_fc(struct radeon_compiler * c,void * user)222 void rc_vert_fc(struct radeon_compiler *c, void *user)
223 {
224 	struct rc_instruction * inst;
225 	struct vert_fc_state fc_state;
226 
227 	memset(&fc_state, 0, sizeof(fc_state));
228 	fc_state.PredicateReg = -1;
229 	fc_state.C = c;
230 
231 	for(inst = c->Program.Instructions.Next;
232 					inst != &c->Program.Instructions;
233 					inst = inst->Next) {
234 
235 		switch (inst->U.I.Opcode) {
236 
237 		case RC_OPCODE_BGNLOOP:
238 			lower_bgnloop(inst, &fc_state);
239 			fc_state.LoopDepth++;
240 			break;
241 
242 		case RC_OPCODE_BRK:
243 			lower_brk(inst, &fc_state);
244 			break;
245 
246 		case RC_OPCODE_ENDLOOP:
247 			if (fc_state.BranchDepth != 0
248 					|| fc_state.LoopDepth != 1) {
249 				lower_endloop(inst, &fc_state);
250 				/* Skip the new PRED_RESTORE */
251 				inst = inst->Next;
252 			}
253 			fc_state.LoopDepth--;
254 			break;
255 		case RC_OPCODE_IF:
256 			lower_if(inst, &fc_state);
257 			fc_state.BranchDepth++;
258 			break;
259 
260 		case RC_OPCODE_ELSE:
261 			inst->U.I.Opcode = RC_ME_PRED_SET_INV;
262 			build_pred_dst(&inst->U.I.DstReg, &fc_state);
263 			build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
264 			break;
265 
266 		case RC_OPCODE_ENDIF:
267 			/* TODO: If LoopDepth == 1 and there is only a single break
268 			 * we can optimize out the endif just after the break. However
269 			 * previous attempts were buggy, so keep it simple for now.
270 			 */
271 			inst->U.I.Opcode = RC_ME_PRED_SET_POP;
272 			build_pred_dst(&inst->U.I.DstReg, &fc_state);
273 			build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
274 			fc_state.BranchDepth--;
275 			break;
276 
277 		default:
278 			if (fc_state.BranchDepth || fc_state.LoopDepth) {
279 				inst->U.I.DstReg.Pred = RC_PRED_SET;
280 			}
281 			break;
282 		}
283 
284 		if (c->Error) {
285 			return;
286 		}
287 	}
288 }
289