• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  * Author: Tom Stellard <thomas.stellard@amd.com>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radeon_compiler.h"
8 #include "radeon_compiler_util.h"
9 #include "radeon_dataflow.h"
10 #include "radeon_program.h"
11 #include "radeon_program_constants.h"
12 
13 struct vert_fc_state {
14    struct radeon_compiler *C;
15    unsigned BranchDepth;
16    unsigned LoopDepth;
17    unsigned LoopsReserved;
18    int PredStack[R500_PVS_MAX_LOOP_DEPTH];
19    int PredicateReg;
20 };
21 
22 static void
build_pred_src(struct rc_src_register * src,struct vert_fc_state * fc_state)23 build_pred_src(struct rc_src_register *src, struct vert_fc_state *fc_state)
24 {
25    src->Swizzle =
26       RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
27    src->File = RC_FILE_TEMPORARY;
28    src->Index = fc_state->PredicateReg;
29 }
30 
31 static void
build_pred_dst(struct rc_dst_register * dst,struct vert_fc_state * fc_state)32 build_pred_dst(struct rc_dst_register *dst, struct vert_fc_state *fc_state)
33 {
34    dst->WriteMask = RC_MASK_W;
35    dst->File = RC_FILE_TEMPORARY;
36    dst->Index = fc_state->PredicateReg;
37 }
38 
39 static void
mark_write(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)40 mark_write(void *userdata, struct rc_instruction *inst, rc_register_file file, unsigned int index,
41            unsigned int mask)
42 {
43    unsigned int *writemasks = userdata;
44 
45    if (file != RC_FILE_TEMPORARY)
46       return;
47 
48    if (index >= R300_VS_MAX_TEMPS)
49       return;
50 
51    writemasks[index] |= mask;
52 }
53 
54 static int
reserve_predicate_reg(struct vert_fc_state * fc_state)55 reserve_predicate_reg(struct vert_fc_state *fc_state)
56 {
57    int i;
58    unsigned int writemasks[RC_REGISTER_MAX_INDEX];
59    struct rc_instruction *inst;
60    memset(writemasks, 0, sizeof(writemasks));
61    for (inst = fc_state->C->Program.Instructions.Next; inst != &fc_state->C->Program.Instructions;
62         inst = inst->Next) {
63       rc_for_all_writes_mask(inst, mark_write, writemasks);
64    }
65 
66    for (i = 0; i < fc_state->C->max_temp_regs; i++) {
67       /* Most of the control flow instructions only write the
68        * W component of the Predicate Register, but
69        * the docs say that ME_PRED_SET_CLR and
70        * ME_PRED_SET_RESTORE write all components of the
71        * register, so we must reserve a register that has
72        * all its components free. */
73       if (!writemasks[i]) {
74          fc_state->PredicateReg = i;
75          break;
76       }
77    }
78    if (i == fc_state->C->max_temp_regs) {
79       rc_error(fc_state->C, "No free temporary to use for"
80                             " predicate stack counter.\n");
81       return -1;
82    }
83    return 1;
84 }
85 
86 static void
lower_bgnloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)87 lower_bgnloop(struct rc_instruction *inst, struct vert_fc_state *fc_state)
88 {
89    struct rc_instruction *new_inst = rc_insert_new_instruction(fc_state->C, inst->Prev);
90 
91    if ((!fc_state->C->is_r500 && fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH) ||
92        fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
93       rc_error(fc_state->C, "Loops are nested too deep.");
94       return;
95    }
96 
97    if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
98       if (fc_state->PredicateReg == -1) {
99          if (reserve_predicate_reg(fc_state) == -1) {
100             return;
101          }
102       }
103 
104       /* Initialize the predicate bit to true. */
105       new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
106       build_pred_dst(&new_inst->U.I.DstReg, fc_state);
107       new_inst->U.I.SrcReg[0].Index = 0;
108       new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
109       new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
110    } else {
111       fc_state->PredStack[fc_state->LoopDepth] = fc_state->PredicateReg;
112       /* Copy the current predicate value to this loop's
113        * predicate register */
114 
115       /* Use the old predicate value for src0 */
116       build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
117 
118       /* Reserve this loop's predicate register */
119       if (reserve_predicate_reg(fc_state) == -1) {
120          return;
121       }
122 
123       /* Copy the old predicate value to the new register */
124       new_inst->U.I.Opcode = RC_OPCODE_ADD;
125       build_pred_dst(&new_inst->U.I.DstReg, fc_state);
126       new_inst->U.I.SrcReg[1].Index = 0;
127       new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
128       new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
129    }
130 }
131 
132 static void
lower_brk(struct rc_instruction * inst,struct vert_fc_state * fc_state)133 lower_brk(struct rc_instruction *inst, struct vert_fc_state *fc_state)
134 {
135    if (fc_state->LoopDepth == 1) {
136       inst->U.I.Opcode = RC_OPCODE_RCP;
137       inst->U.I.DstReg.Pred = RC_PRED_SET;
138       inst->U.I.SrcReg[0].Index = 0;
139       inst->U.I.SrcReg[0].File = RC_FILE_NONE;
140       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
141    } else {
142       inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
143       inst->U.I.DstReg.Pred = RC_PRED_SET;
144    }
145 
146    build_pred_dst(&inst->U.I.DstReg, fc_state);
147 }
148 
149 static void
lower_endloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)150 lower_endloop(struct rc_instruction *inst, struct vert_fc_state *fc_state)
151 {
152    struct rc_instruction *new_inst = rc_insert_new_instruction(fc_state->C, inst);
153 
154    new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
155    build_pred_dst(&new_inst->U.I.DstReg, fc_state);
156    /* Restore the previous predicate register. */
157    fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
158    build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
159 }
160 
161 static void
lower_if(struct rc_instruction * inst,struct vert_fc_state * fc_state)162 lower_if(struct rc_instruction *inst, struct vert_fc_state *fc_state)
163 {
164    /* Reserve a temporary to use as our predicate stack counter, if we
165     * don't already have one. */
166    if (fc_state->PredicateReg == -1) {
167       /* If we are inside a loop, the Predicate Register should
168        * have already been defined. */
169       assert(fc_state->LoopDepth == 0);
170 
171       if (reserve_predicate_reg(fc_state) == -1) {
172          return;
173       }
174    }
175 
176    if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) {
177       inst->U.I.Opcode = RC_ME_PRED_SNEQ;
178    } else {
179       unsigned swz;
180       inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
181       memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0], sizeof(inst->U.I.SrcReg[1]));
182       swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
183       /* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
184        * w component */
185       inst->U.I.SrcReg[1].Swizzle =
186          RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
187       build_pred_src(&inst->U.I.SrcReg[0], fc_state);
188    }
189    build_pred_dst(&inst->U.I.DstReg, fc_state);
190 }
191 
192 void
rc_vert_fc(struct radeon_compiler * c,void * user)193 rc_vert_fc(struct radeon_compiler *c, void *user)
194 {
195    struct rc_instruction *inst;
196    struct vert_fc_state fc_state;
197 
198    memset(&fc_state, 0, sizeof(fc_state));
199    fc_state.PredicateReg = -1;
200    fc_state.C = c;
201 
202    for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
203 
204       switch (inst->U.I.Opcode) {
205 
206       case RC_OPCODE_BGNLOOP:
207          lower_bgnloop(inst, &fc_state);
208          fc_state.LoopDepth++;
209          break;
210 
211       case RC_OPCODE_BRK:
212          lower_brk(inst, &fc_state);
213          break;
214 
215       case RC_OPCODE_ENDLOOP:
216          if (fc_state.BranchDepth != 0 || fc_state.LoopDepth != 1) {
217             lower_endloop(inst, &fc_state);
218             /* Skip the new PRED_RESTORE */
219             inst = inst->Next;
220          }
221          fc_state.LoopDepth--;
222          break;
223       case RC_OPCODE_IF:
224          lower_if(inst, &fc_state);
225          fc_state.BranchDepth++;
226          break;
227 
228       case RC_OPCODE_ELSE:
229          inst->U.I.Opcode = RC_ME_PRED_SET_INV;
230          build_pred_dst(&inst->U.I.DstReg, &fc_state);
231          build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
232          break;
233 
234       case RC_OPCODE_ENDIF:
235          /* TODO: If LoopDepth == 1 and there is only a single break
236           * we can optimize out the endif just after the break. However
237           * previous attempts were buggy, so keep it simple for now.
238           */
239          inst->U.I.Opcode = RC_ME_PRED_SET_POP;
240          build_pred_dst(&inst->U.I.DstReg, &fc_state);
241          build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
242          fc_state.BranchDepth--;
243          break;
244 
245       default:
246          if (fc_state.BranchDepth || fc_state.LoopDepth) {
247             inst->U.I.DstReg.Pred = RC_PRED_SET;
248          }
249          break;
250       }
251 
252       if (c->Error) {
253          return;
254       }
255    }
256 }
257