1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 * Author: Tom Stellard <thomas.stellard@amd.com>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radeon_compiler.h"
8 #include "radeon_compiler_util.h"
9 #include "radeon_dataflow.h"
10 #include "radeon_program.h"
11 #include "radeon_program_constants.h"
12
13 struct vert_fc_state {
14 struct radeon_compiler *C;
15 unsigned BranchDepth;
16 unsigned LoopDepth;
17 unsigned LoopsReserved;
18 int PredStack[R500_PVS_MAX_LOOP_DEPTH];
19 int PredicateReg;
20 };
21
22 static void
build_pred_src(struct rc_src_register * src,struct vert_fc_state * fc_state)23 build_pred_src(struct rc_src_register *src, struct vert_fc_state *fc_state)
24 {
25 src->Swizzle =
26 RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
27 src->File = RC_FILE_TEMPORARY;
28 src->Index = fc_state->PredicateReg;
29 }
30
31 static void
build_pred_dst(struct rc_dst_register * dst,struct vert_fc_state * fc_state)32 build_pred_dst(struct rc_dst_register *dst, struct vert_fc_state *fc_state)
33 {
34 dst->WriteMask = RC_MASK_W;
35 dst->File = RC_FILE_TEMPORARY;
36 dst->Index = fc_state->PredicateReg;
37 }
38
39 static void
mark_write(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)40 mark_write(void *userdata, struct rc_instruction *inst, rc_register_file file, unsigned int index,
41 unsigned int mask)
42 {
43 unsigned int *writemasks = userdata;
44
45 if (file != RC_FILE_TEMPORARY)
46 return;
47
48 if (index >= R300_VS_MAX_TEMPS)
49 return;
50
51 writemasks[index] |= mask;
52 }
53
54 static int
reserve_predicate_reg(struct vert_fc_state * fc_state)55 reserve_predicate_reg(struct vert_fc_state *fc_state)
56 {
57 int i;
58 unsigned int writemasks[RC_REGISTER_MAX_INDEX];
59 struct rc_instruction *inst;
60 memset(writemasks, 0, sizeof(writemasks));
61 for (inst = fc_state->C->Program.Instructions.Next; inst != &fc_state->C->Program.Instructions;
62 inst = inst->Next) {
63 rc_for_all_writes_mask(inst, mark_write, writemasks);
64 }
65
66 for (i = 0; i < fc_state->C->max_temp_regs; i++) {
67 /* Most of the control flow instructions only write the
68 * W component of the Predicate Register, but
69 * the docs say that ME_PRED_SET_CLR and
70 * ME_PRED_SET_RESTORE write all components of the
71 * register, so we must reserve a register that has
72 * all its components free. */
73 if (!writemasks[i]) {
74 fc_state->PredicateReg = i;
75 break;
76 }
77 }
78 if (i == fc_state->C->max_temp_regs) {
79 rc_error(fc_state->C, "No free temporary to use for"
80 " predicate stack counter.\n");
81 return -1;
82 }
83 return 1;
84 }
85
86 static void
lower_bgnloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)87 lower_bgnloop(struct rc_instruction *inst, struct vert_fc_state *fc_state)
88 {
89 struct rc_instruction *new_inst = rc_insert_new_instruction(fc_state->C, inst->Prev);
90
91 if ((!fc_state->C->is_r500 && fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH) ||
92 fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
93 rc_error(fc_state->C, "Loops are nested too deep.");
94 return;
95 }
96
97 if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
98 if (fc_state->PredicateReg == -1) {
99 if (reserve_predicate_reg(fc_state) == -1) {
100 return;
101 }
102 }
103
104 /* Initialize the predicate bit to true. */
105 new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
106 build_pred_dst(&new_inst->U.I.DstReg, fc_state);
107 new_inst->U.I.SrcReg[0].Index = 0;
108 new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
109 new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
110 } else {
111 fc_state->PredStack[fc_state->LoopDepth] = fc_state->PredicateReg;
112 /* Copy the current predicate value to this loop's
113 * predicate register */
114
115 /* Use the old predicate value for src0 */
116 build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
117
118 /* Reserve this loop's predicate register */
119 if (reserve_predicate_reg(fc_state) == -1) {
120 return;
121 }
122
123 /* Copy the old predicate value to the new register */
124 new_inst->U.I.Opcode = RC_OPCODE_ADD;
125 build_pred_dst(&new_inst->U.I.DstReg, fc_state);
126 new_inst->U.I.SrcReg[1].Index = 0;
127 new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
128 new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
129 }
130 }
131
132 static void
lower_brk(struct rc_instruction * inst,struct vert_fc_state * fc_state)133 lower_brk(struct rc_instruction *inst, struct vert_fc_state *fc_state)
134 {
135 if (fc_state->LoopDepth == 1) {
136 inst->U.I.Opcode = RC_OPCODE_RCP;
137 inst->U.I.DstReg.Pred = RC_PRED_SET;
138 inst->U.I.SrcReg[0].Index = 0;
139 inst->U.I.SrcReg[0].File = RC_FILE_NONE;
140 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
141 } else {
142 inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
143 inst->U.I.DstReg.Pred = RC_PRED_SET;
144 }
145
146 build_pred_dst(&inst->U.I.DstReg, fc_state);
147 }
148
149 static void
lower_endloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)150 lower_endloop(struct rc_instruction *inst, struct vert_fc_state *fc_state)
151 {
152 struct rc_instruction *new_inst = rc_insert_new_instruction(fc_state->C, inst);
153
154 new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
155 build_pred_dst(&new_inst->U.I.DstReg, fc_state);
156 /* Restore the previous predicate register. */
157 fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
158 build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
159 }
160
161 static void
lower_if(struct rc_instruction * inst,struct vert_fc_state * fc_state)162 lower_if(struct rc_instruction *inst, struct vert_fc_state *fc_state)
163 {
164 /* Reserve a temporary to use as our predicate stack counter, if we
165 * don't already have one. */
166 if (fc_state->PredicateReg == -1) {
167 /* If we are inside a loop, the Predicate Register should
168 * have already been defined. */
169 assert(fc_state->LoopDepth == 0);
170
171 if (reserve_predicate_reg(fc_state) == -1) {
172 return;
173 }
174 }
175
176 if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) {
177 inst->U.I.Opcode = RC_ME_PRED_SNEQ;
178 } else {
179 unsigned swz;
180 inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
181 memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0], sizeof(inst->U.I.SrcReg[1]));
182 swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
183 /* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
184 * w component */
185 inst->U.I.SrcReg[1].Swizzle =
186 RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
187 build_pred_src(&inst->U.I.SrcReg[0], fc_state);
188 }
189 build_pred_dst(&inst->U.I.DstReg, fc_state);
190 }
191
192 void
rc_vert_fc(struct radeon_compiler * c,void * user)193 rc_vert_fc(struct radeon_compiler *c, void *user)
194 {
195 struct rc_instruction *inst;
196 struct vert_fc_state fc_state;
197
198 memset(&fc_state, 0, sizeof(fc_state));
199 fc_state.PredicateReg = -1;
200 fc_state.C = c;
201
202 for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
203
204 switch (inst->U.I.Opcode) {
205
206 case RC_OPCODE_BGNLOOP:
207 lower_bgnloop(inst, &fc_state);
208 fc_state.LoopDepth++;
209 break;
210
211 case RC_OPCODE_BRK:
212 lower_brk(inst, &fc_state);
213 break;
214
215 case RC_OPCODE_ENDLOOP:
216 if (fc_state.BranchDepth != 0 || fc_state.LoopDepth != 1) {
217 lower_endloop(inst, &fc_state);
218 /* Skip the new PRED_RESTORE */
219 inst = inst->Next;
220 }
221 fc_state.LoopDepth--;
222 break;
223 case RC_OPCODE_IF:
224 lower_if(inst, &fc_state);
225 fc_state.BranchDepth++;
226 break;
227
228 case RC_OPCODE_ELSE:
229 inst->U.I.Opcode = RC_ME_PRED_SET_INV;
230 build_pred_dst(&inst->U.I.DstReg, &fc_state);
231 build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
232 break;
233
234 case RC_OPCODE_ENDIF:
235 /* TODO: If LoopDepth == 1 and there is only a single break
236 * we can optimize out the endif just after the break. However
237 * previous attempts were buggy, so keep it simple for now.
238 */
239 inst->U.I.Opcode = RC_ME_PRED_SET_POP;
240 build_pred_dst(&inst->U.I.DstReg, &fc_state);
241 build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
242 fc_state.BranchDepth--;
243 break;
244
245 default:
246 if (fc_state.BranchDepth || fc_state.LoopDepth) {
247 inst->U.I.DstReg.Pred = RC_PRED_SET;
248 }
249 break;
250 }
251
252 if (c->Error) {
253 return;
254 }
255 }
256 }
257