• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "radeon_dataflow.h"
7 
8 #include "radeon_compiler.h"
9 
10 struct updatemask_state {
11    unsigned char Output[RC_REGISTER_MAX_INDEX];
12    unsigned char Temporary[RC_REGISTER_MAX_INDEX];
13    unsigned char Address;
14    unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
15 };
16 
17 struct instruction_state {
18    unsigned char WriteMask : 4;
19    unsigned char WriteALUResult : 1;
20    unsigned char SrcReg[3];
21 };
22 
23 struct loopinfo {
24    struct updatemask_state StoreEndloop;
25    unsigned int BreakCount;
26    unsigned int BreaksReserved;
27 };
28 
29 struct branchinfo {
30    unsigned int HaveElse : 1;
31 
32    struct updatemask_state StoreEndif;
33    struct updatemask_state StoreElse;
34 };
35 
36 struct deadcode_state {
37    struct radeon_compiler *C;
38    struct instruction_state *Instructions;
39 
40    struct updatemask_state R;
41 
42    struct branchinfo *BranchStack;
43    unsigned int BranchStackSize;
44    unsigned int BranchStackReserved;
45 
46    struct loopinfo *LoopStack;
47    unsigned int LoopStackSize;
48    unsigned int LoopStackReserved;
49 };
50 
51 static void
or_updatemasks(struct updatemask_state * dst,struct updatemask_state * a,struct updatemask_state * b)52 or_updatemasks(struct updatemask_state *dst, struct updatemask_state *a, struct updatemask_state *b)
53 {
54    for (unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
55       dst->Output[i] = a->Output[i] | b->Output[i];
56       dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
57    }
58 
59    for (unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
60       dst->Special[i] = a->Special[i] | b->Special[i];
61 
62    dst->Address = a->Address | b->Address;
63 }
64 
65 static void
push_loop(struct deadcode_state * s)66 push_loop(struct deadcode_state *s)
67 {
68    memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, s->LoopStackSize,
69                              s->LoopStackReserved, 1);
70    memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
71    memcpy(&s->LoopStack[s->LoopStackSize - 1].StoreEndloop, &s->R, sizeof(s->R));
72 }
73 
74 static void
push_branch(struct deadcode_state * s)75 push_branch(struct deadcode_state *s)
76 {
77    struct branchinfo *branch;
78 
79    memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, s->BranchStackSize,
80                              s->BranchStackReserved, 1);
81 
82    branch = &s->BranchStack[s->BranchStackSize++];
83    branch->HaveElse = 0;
84    memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
85 }
86 
87 static unsigned char *
get_used_ptr(struct deadcode_state * s,rc_register_file file,unsigned int index)88 get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
89 {
90    if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
91       if (index >= RC_REGISTER_MAX_INDEX) {
92          rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __func__, index, file);
93          return NULL;
94       }
95 
96       if (file == RC_FILE_OUTPUT)
97          return &s->R.Output[index];
98       else
99          return &s->R.Temporary[index];
100    } else if (file == RC_FILE_ADDRESS) {
101       return &s->R.Address;
102    } else if (file == RC_FILE_SPECIAL) {
103       if (index >= RC_NUM_SPECIAL_REGISTERS) {
104          rc_error(s->C, "%s: special file index %i out of bounds\n", __func__, index);
105          return NULL;
106       }
107 
108       return &s->R.Special[index];
109    }
110 
111    return NULL;
112 }
113 
114 static void
mark_used(struct deadcode_state * s,rc_register_file file,unsigned int index,unsigned int mask)115 mark_used(struct deadcode_state *s, rc_register_file file, unsigned int index, unsigned int mask)
116 {
117    unsigned char *pused = get_used_ptr(s, file, index);
118    if (pused)
119       *pused |= mask;
120 }
121 
122 static void
update_instruction(struct deadcode_state * s,struct rc_instruction * inst)123 update_instruction(struct deadcode_state *s, struct rc_instruction *inst)
124 {
125    const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
126    struct instruction_state *insts = &s->Instructions[inst->IP];
127    unsigned int usedmask = 0;
128    unsigned int srcmasks[3];
129 
130    if (opcode->HasDstReg) {
131       unsigned char *pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
132       if (pused) {
133          usedmask = *pused & inst->U.I.DstReg.WriteMask;
134          *pused &= ~usedmask;
135       }
136    }
137 
138    insts->WriteMask |= usedmask;
139 
140    if (inst->U.I.WriteALUResult) {
141       unsigned char *pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
142       if (pused && *pused) {
143          if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
144             usedmask |= RC_MASK_X;
145          else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
146             usedmask |= RC_MASK_W;
147 
148          *pused = 0;
149          insts->WriteALUResult = 1;
150       }
151    }
152 
153    rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
154 
155    for (unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
156       unsigned int refmask = 0;
157       unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
158       insts->SrcReg[src] |= newsrcmask;
159 
160       for (unsigned int chan = 0; chan < 4; ++chan) {
161          if (GET_BIT(newsrcmask, chan))
162             refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
163       }
164 
165       /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
166       refmask &= RC_MASK_XYZW;
167 
168       if (!refmask)
169          continue;
170 
171       mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
172 
173       if (inst->U.I.SrcReg[src].RelAddr)
174          mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
175    }
176 }
177 
178 void
rc_dataflow_deadcode(struct radeon_compiler * c,void * user)179 rc_dataflow_deadcode(struct radeon_compiler *c, void *user)
180 {
181    struct deadcode_state s;
182    unsigned int nr_instructions;
183    unsigned int ip;
184 
185    memset(&s, 0, sizeof(s));
186    s.C = c;
187 
188    nr_instructions = rc_recompute_ips(c);
189    s.Instructions =
190       memory_pool_malloc(&c->Pool, sizeof(struct instruction_state) * nr_instructions);
191    memset(s.Instructions, 0, sizeof(struct instruction_state) * nr_instructions);
192 
193    for (struct rc_instruction *inst = c->Program.Instructions.Prev;
194         inst != &c->Program.Instructions; inst = inst->Prev) {
195       const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
196 
197       /* Assume all output regs are live.  Anything else should have been
198        * eliminated before it got to us.
199        */
200       if (opcode->HasDstReg)
201          mark_used(&s, RC_FILE_OUTPUT, inst->U.I.DstReg.Index, inst->U.I.DstReg.WriteMask);
202 
203       switch (opcode->Opcode) {
204       /* Mark all sources in the loop body as used before doing
205        * normal deadcode analysis. This is probably not optimal.
206        * Save this pessimistic deadcode state and restore it anytime
207        * we see a break just to be extra sure.
208        */
209       case RC_OPCODE_ENDLOOP: {
210          int endloops = 1;
211          struct rc_instruction *ptr;
212          for (ptr = inst->Prev; endloops > 0; ptr = ptr->Prev) {
213             opcode = rc_get_opcode_info(ptr->U.I.Opcode);
214             if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
215                endloops--;
216                continue;
217             }
218             if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
219                endloops++;
220                continue;
221             }
222             if (opcode->HasDstReg) {
223                int src = 0;
224                unsigned int srcmasks[3];
225                unsigned int writemask = ptr->U.I.DstReg.WriteMask;
226                if (ptr->U.I.WriteALUResult == RC_ALURESULT_X)
227                   writemask |= RC_MASK_X;
228                else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W)
229                   writemask |= RC_MASK_W;
230 
231                rc_compute_sources_for_writemask(ptr, writemask, srcmasks);
232                for (src = 0; src < opcode->NumSrcRegs; src++) {
233                   mark_used(&s, ptr->U.I.SrcReg[src].File, ptr->U.I.SrcReg[src].Index,
234                             srcmasks[src]);
235                }
236             }
237          }
238          push_loop(&s);
239          break;
240       }
241       case RC_OPCODE_BRK: {
242          struct loopinfo *loop = &s.LoopStack[s.LoopStackSize - 1];
243          memcpy(&s.R, &loop->StoreEndloop, sizeof(s.R));
244          break;
245       }
246       case RC_OPCODE_BGNLOOP:
247          s.LoopStackSize--;
248          break;
249       case RC_OPCODE_CONT:
250          break;
251       case RC_OPCODE_ENDIF:
252          push_branch(&s);
253          break;
254       default:
255          if (opcode->IsFlowControl && s.BranchStackSize) {
256             struct branchinfo *branch = &s.BranchStack[s.BranchStackSize - 1];
257             if (opcode->Opcode == RC_OPCODE_IF) {
258                or_updatemasks(&s.R, &s.R,
259                               branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
260 
261                s.BranchStackSize--;
262             } else if (opcode->Opcode == RC_OPCODE_ELSE) {
263                if (branch->HaveElse) {
264                   rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __func__);
265                } else {
266                   memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
267                   memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
268                   branch->HaveElse = 1;
269                }
270             } else {
271                rc_error(c, "%s: Unhandled control flow instruction %s\n", __func__, opcode->Name);
272             }
273          }
274       }
275 
276       update_instruction(&s, inst);
277    }
278 
279    ip = 0;
280    for (struct rc_instruction *inst = c->Program.Instructions.Next;
281         inst != &c->Program.Instructions; inst = inst->Next, ++ip) {
282       const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
283       int dead = 1;
284       unsigned int srcmasks[3];
285       unsigned int usemask;
286 
287       if (!opcode->HasDstReg) {
288          dead = 0;
289       } else {
290          inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
291          if (s.Instructions[ip].WriteMask)
292             dead = 0;
293 
294          if (s.Instructions[ip].WriteALUResult)
295             dead = 0;
296          else
297             inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
298       }
299 
300       if (dead) {
301          struct rc_instruction *todelete = inst;
302          inst = inst->Prev;
303          rc_remove_instruction(todelete);
304          continue;
305       }
306 
307       usemask = s.Instructions[ip].WriteMask;
308 
309       if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
310          usemask |= RC_MASK_X;
311       else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
312          usemask |= RC_MASK_W;
313 
314       rc_compute_sources_for_writemask(inst, usemask, srcmasks);
315 
316       for (unsigned int src = 0; src < 3; ++src) {
317          for (unsigned int chan = 0; chan < 4; ++chan) {
318             if (!GET_BIT(srcmasks[src], chan))
319                SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
320          }
321       }
322    }
323 
324    rc_calculate_inputs_outputs(c);
325 }
326