• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radeon_program_pair.h"
8 
9 #include <stdio.h>
10 
11 #include "util/glheader.h"
12 #include "util/ralloc.h"
13 #include "util/register_allocate.h"
14 #include "util/u_memory.h"
15 
16 #include "r300_fragprog_swizzle.h"
17 #include "radeon_compiler.h"
18 #include "radeon_compiler_util.h"
19 #include "radeon_dataflow.h"
20 #include "radeon_list.h"
21 #include "radeon_regalloc.h"
22 #include "radeon_variable.h"
23 
24 static void
scan_read_callback(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)25 scan_read_callback(void *data, struct rc_instruction *inst, rc_register_file file,
26                    unsigned int index, unsigned int mask)
27 {
28    struct regalloc_state *s = data;
29    struct register_info *reg;
30    unsigned int i;
31 
32    if (file != RC_FILE_INPUT)
33       return;
34 
35    s->Input[index].Used = 1;
36    reg = &s->Input[index];
37 
38    for (i = 0; i < 4; i++) {
39       if (!((mask >> i) & 0x1)) {
40          continue;
41       }
42       reg->Live[i].Used = 1;
43       reg->Live[i].Start = 0;
44       reg->Live[i].End = s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
45    }
46 }
47 
48 static void
remap_register(void * data,struct rc_instruction * inst,rc_register_file * file,unsigned int * index)49 remap_register(void *data, struct rc_instruction *inst, rc_register_file *file, unsigned int *index)
50 {
51    struct regalloc_state *s = data;
52    const struct register_info *reg;
53 
54    if (*file == RC_FILE_TEMPORARY && s->Simple)
55       reg = &s->Temporary[*index];
56    else if (*file == RC_FILE_INPUT)
57       reg = &s->Input[*index];
58    else
59       return;
60 
61    if (reg->Allocated) {
62       *index = reg->Index;
63    }
64 }
65 
66 static void
alloc_input_simple(void * data,unsigned int input,unsigned int hwreg)67 alloc_input_simple(void *data, unsigned int input, unsigned int hwreg)
68 {
69    struct regalloc_state *s = data;
70 
71    if (input >= s->NumInputs)
72       return;
73 
74    s->Input[input].Allocated = 1;
75    s->Input[input].File = RC_FILE_TEMPORARY;
76    s->Input[input].Index = hwreg;
77 }
78 
79 /* This functions offsets the temporary register indices by the number
80  * of input registers, because input registers are actually temporaries and
81  * should not occupy the same space.
82  *
83  * This pass is supposed to be used to maintain correct allocation of inputs
84  * if the standard register allocation is disabled. */
85 static void
do_regalloc_inputs_only(struct regalloc_state * s)86 do_regalloc_inputs_only(struct regalloc_state *s)
87 {
88    for (unsigned i = 0; i < s->NumTemporaries; i++) {
89       s->Temporary[i].Allocated = 1;
90       s->Temporary[i].File = RC_FILE_TEMPORARY;
91       s->Temporary[i].Index = i + s->NumInputs;
92    }
93 }
94 
95 static unsigned int
is_derivative(rc_opcode op)96 is_derivative(rc_opcode op)
97 {
98    return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
99 }
100 
101 struct variable_get_class_cb_data {
102    unsigned int *can_change_writemask;
103    unsigned int conversion_swizzle;
104    struct radeon_compiler *c;
105 };
106 
107 static void
variable_get_class_read_cb(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)108 variable_get_class_read_cb(void *userdata, struct rc_instruction *inst,
109                            struct rc_pair_instruction_arg *arg,
110                            struct rc_pair_instruction_source *src)
111 {
112    struct variable_get_class_cb_data *d = userdata;
113    unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle, d->conversion_swizzle);
114    /* We can't just call r300_swizzle_is_native basic here, because it ignores the
115     * extra requirements for presubtract. However, after pair translation we no longer
116     * have the rc_src_register required for the native swizzle, so we have to
117     * reconstruct it. */
118    struct rc_src_register reg = {};
119    reg.Swizzle = new_swizzle;
120    reg.File = src->File;
121 
122    assert(inst->Type == RC_INSTRUCTION_PAIR);
123    /* The opcode is unimportant, we can't have TEX here. */
124    if (!d->c->SwizzleCaps->IsNative(RC_OPCODE_MAD, reg)) {
125       *d->can_change_writemask = 0;
126    }
127 }
128 
129 static unsigned
variable_get_class(struct rc_variable * variable,const struct rc_class * classes)130 variable_get_class(struct rc_variable *variable, const struct rc_class *classes)
131 {
132    unsigned int i;
133    unsigned int can_change_writemask = 1;
134    unsigned int writemask = rc_variable_writemask_sum(variable);
135    struct rc_list *readers = rc_variable_readers_union(variable);
136    int class_index;
137 
138    if (!variable->C->is_r500) {
139       struct rc_class c;
140       struct rc_variable *var_ptr;
141       /* The assumption here is that if an instruction has type
142        * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
143        * r300 and r400 can't swizzle the result of a TEX lookup. */
144       for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
145          if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
146             writemask = RC_MASK_XYZW;
147          }
148       }
149 
150       /* Check if it is possible to do swizzle packing for r300/r400
151        * without creating non-native swizzles. */
152       class_index = rc_find_class(classes, writemask, 3);
153       if (class_index < 0) {
154          goto error;
155       }
156       c = classes[class_index];
157       if (c.WritemaskCount == 1) {
158          goto done;
159       }
160       for (i = 0; i < c.WritemaskCount; i++) {
161          struct rc_variable *var_ptr;
162          for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
163             int j;
164             unsigned int conversion_swizzle =
165                rc_make_conversion_swizzle(writemask, c.Writemasks[i]);
166             struct variable_get_class_cb_data d;
167             d.can_change_writemask = &can_change_writemask;
168             d.conversion_swizzle = conversion_swizzle;
169             d.c = variable->C;
170             /* If we get this far var_ptr->Inst has to
171              * be a pair instruction.  If variable or any
172              * of its friends are normal instructions,
173              * then the writemask will be set to RC_MASK_XYZW
174              * and the function will return before it gets
175              * here. */
176             rc_pair_for_all_reads_arg(var_ptr->Inst, variable_get_class_read_cb, &d);
177 
178             for (j = 0; j < var_ptr->ReaderCount; j++) {
179                unsigned int old_swizzle;
180                unsigned int new_swizzle;
181                struct rc_reader r = var_ptr->Readers[j];
182                if (r.Inst->Type == RC_INSTRUCTION_PAIR) {
183                   old_swizzle = r.U.P.Arg->Swizzle;
184                } else {
185                   /* Source operands of TEX
186                    * instructions can't be
187                    * swizzle on r300/r400 GPUs.
188                    */
189                   can_change_writemask = 0;
190                   break;
191                }
192                new_swizzle = rc_rewrite_swizzle(old_swizzle, conversion_swizzle);
193                if (!r300_swizzle_is_native_basic(new_swizzle)) {
194                   can_change_writemask = 0;
195                   break;
196                }
197             }
198             if (!can_change_writemask) {
199                break;
200             }
201          }
202          if (!can_change_writemask) {
203             break;
204          }
205       }
206    }
207 
208    if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
209       /* DDX/DDY seem to always fail when their writemasks are
210        * changed.*/
211       if (is_derivative(variable->Inst->U.P.RGB.Opcode) ||
212           is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
213          can_change_writemask = 0;
214       }
215    }
216    for (; readers; readers = readers->Next) {
217       struct rc_reader *r = readers->Item;
218       if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
219          if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
220             can_change_writemask = 0;
221             break;
222          }
223          /* DDX/DDY also fail when their swizzles are changed. */
224          if (is_derivative(r->Inst->U.P.RGB.Opcode) || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
225             can_change_writemask = 0;
226             break;
227          }
228       }
229    }
230 
231    class_index = rc_find_class(classes, writemask, can_change_writemask ? 3 : 1);
232 done:
233    if (class_index > -1) {
234       return classes[class_index].ID;
235    } else {
236    error:
237       rc_error(variable->C, "Could not find class for index=%u mask=%u\n", variable->Dst.Index,
238                writemask);
239       return 0;
240    }
241 }
242 
243 static void
do_advanced_regalloc(struct regalloc_state * s)244 do_advanced_regalloc(struct regalloc_state *s)
245 {
246 
247    unsigned int i, input_node, node_count, node_index;
248    struct ra_class **node_classes;
249    struct rc_instruction *inst;
250    struct rc_list *var_ptr;
251    struct rc_list *variables;
252    struct ra_graph *graph;
253    const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
254 
255    /* Get list of program variables */
256    variables = rc_get_variables(s->C);
257    node_count = rc_list_count(variables);
258    node_classes = memory_pool_malloc(&s->C->Pool, node_count * sizeof(struct ra_class *));
259 
260    for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) {
261       unsigned int class_index;
262       /* Compute the live intervals */
263       rc_variable_compute_live_intervals(var_ptr->Item);
264 
265       class_index = variable_get_class(var_ptr->Item, ra_state->class_list);
266       node_classes[node_index] = ra_state->classes[class_index];
267    }
268 
269    /* Calculate live intervals for input registers */
270    for (inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions;
271         inst = inst->Next) {
272       rc_opcode op = rc_get_flow_control_inst(inst);
273       if (op == RC_OPCODE_BGNLOOP) {
274          struct rc_instruction *endloop = rc_match_bgnloop(inst);
275          if (endloop->IP > s->LoopEnd) {
276             s->LoopEnd = endloop->IP;
277          }
278       }
279       rc_for_all_reads_mask(inst, scan_read_callback, s);
280    }
281 
282    /* Compute the writemask for inputs. */
283    for (i = 0; i < s->NumInputs; i++) {
284       unsigned int chan, writemask = 0;
285       for (chan = 0; chan < 4; chan++) {
286          if (s->Input[i].Live[chan].Used) {
287             writemask |= (1 << chan);
288          }
289       }
290       s->Input[i].Writemask = writemask;
291    }
292 
293    graph = ra_alloc_interference_graph(ra_state->regs, node_count + s->NumInputs);
294 
295    for (node_index = 0; node_index < node_count; node_index++) {
296       ra_set_node_class(graph, node_index, node_classes[node_index]);
297    }
298 
299    rc_build_interference_graph(graph, variables);
300 
301    /* Add input registers to the interference graph */
302    for (i = 0, input_node = 0; i < s->NumInputs; i++) {
303       if (!s->Input[i].Writemask) {
304          continue;
305       }
306       for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) {
307          struct rc_variable *var = var_ptr->Item;
308          if (rc_overlap_live_intervals_array(s->Input[i].Live, var->Live)) {
309             ra_add_node_interference(graph, node_index, node_count + input_node);
310          }
311       }
312       /* Manually allocate a register for this input */
313       ra_set_node_reg(graph, node_count + input_node,
314                       get_reg_id(s->Input[i].Index, s->Input[i].Writemask));
315       input_node++;
316    }
317 
318    if (!ra_allocate(graph)) {
319       rc_error(s->C, "Ran out of hardware temporaries\n");
320       ralloc_free(graph);
321       return;
322    }
323 
324    /* Rewrite the registers */
325    for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) {
326       int reg = ra_get_node_reg(graph, node_index);
327       unsigned int writemask = reg_get_writemask(reg);
328       unsigned int index = reg_get_index(reg);
329       struct rc_variable *var = var_ptr->Item;
330 
331       if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
332          writemask = rc_variable_writemask_sum(var);
333       }
334 
335       if (var->Dst.File == RC_FILE_INPUT) {
336          continue;
337       }
338       rc_variable_change_dst(var, index, writemask);
339    }
340 
341    ralloc_free(graph);
342 }
343 
344 /**
345  * @param user This parameter should be a pointer to an integer value.  If this
346  * integer value is zero, then a simple register allocator will be used that
347  * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
348  * user is non-zero, then the regular register allocator will be used
349  * (\sa do_regalloc).
350  */
351 void
rc_pair_regalloc(struct radeon_compiler * cc,void * user)352 rc_pair_regalloc(struct radeon_compiler *cc, void *user)
353 {
354    struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc;
355    struct regalloc_state s;
356    int *do_full_regalloc = (int *)user;
357 
358    memset(&s, 0, sizeof(s));
359    s.C = cc;
360    s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
361    s.Input = memory_pool_malloc(&cc->Pool, s.NumInputs * sizeof(struct register_info));
362    memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
363 
364    s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
365    s.Temporary = memory_pool_malloc(&cc->Pool, s.NumTemporaries * sizeof(struct register_info));
366    memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
367 
368    rc_recompute_ips(s.C);
369 
370    c->AllocateHwInputs(c, &alloc_input_simple, &s);
371    if (*do_full_regalloc) {
372       do_advanced_regalloc(&s);
373    } else {
374       s.Simple = 1;
375       do_regalloc_inputs_only(&s);
376    }
377 
378    /* Rewrite inputs and if we are doing the simple allocation, rewrite
379     * temporaries too. */
380    for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
381         inst != &s.C->Program.Instructions; inst = inst->Next) {
382       rc_remap_registers(inst, &remap_register, &s);
383    }
384 }
385