• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "radeon_compiler.h"
7 
8 #include <stdarg.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 
13 #include "pipe/p_state.h"
14 #include "util/u_debug.h"
15 #include "radeon_compiler_util.h"
16 #include "radeon_dataflow.h"
17 #include "radeon_program.h"
18 #include "radeon_program_pair.h"
19 #include "radeon_regalloc.h"
20 
21 void
rc_init(struct radeon_compiler * c,const struct rc_regalloc_state * rs)22 rc_init(struct radeon_compiler *c, const struct rc_regalloc_state *rs)
23 {
24    memset(c, 0, sizeof(*c));
25 
26    memory_pool_init(&c->Pool);
27    c->Program.Instructions.Prev = &c->Program.Instructions;
28    c->Program.Instructions.Next = &c->Program.Instructions;
29    c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
30    c->regalloc_state = rs;
31    c->max_temp_index = -1;
32 }
33 
34 void
rc_destroy(struct radeon_compiler * c)35 rc_destroy(struct radeon_compiler *c)
36 {
37    rc_constants_destroy(&c->Program.Constants);
38    memory_pool_destroy(&c->Pool);
39    free(c->ErrorMsg);
40 }
41 
42 void
rc_debug(struct radeon_compiler * c,const char * fmt,...)43 rc_debug(struct radeon_compiler *c, const char *fmt, ...)
44 {
45    va_list ap;
46 
47    if (!(c->Debug & RC_DBG_LOG))
48       return;
49 
50    va_start(ap, fmt);
51    vfprintf(stderr, fmt, ap);
52    va_end(ap);
53 }
54 
55 void
rc_error(struct radeon_compiler * c,const char * fmt,...)56 rc_error(struct radeon_compiler *c, const char *fmt, ...)
57 {
58    va_list ap;
59 
60    c->Error = 1;
61 
62    if (!c->ErrorMsg) {
63       /* Only remember the first error */
64       char buf[1024];
65       int written;
66 
67       va_start(ap, fmt);
68       written = vsnprintf(buf, sizeof(buf), fmt, ap);
69       va_end(ap);
70 
71       if (written < sizeof(buf)) {
72          c->ErrorMsg = strdup(buf);
73       } else {
74          c->ErrorMsg = malloc(written + 1);
75 
76          va_start(ap, fmt);
77          vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
78          va_end(ap);
79       }
80    }
81 
82    if (c->Debug & RC_DBG_LOG) {
83       fprintf(stderr, "r300compiler error: ");
84 
85       va_start(ap, fmt);
86       vfprintf(stderr, fmt, ap);
87       va_end(ap);
88    }
89 }
90 
91 int
rc_if_fail_helper(struct radeon_compiler * c,const char * file,int line,const char * assertion)92 rc_if_fail_helper(struct radeon_compiler *c, const char *file, int line, const char *assertion)
93 {
94    rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
95    return 1;
96 }
97 
98 void
rc_mark_unused_channels(struct radeon_compiler * c,void * user)99 rc_mark_unused_channels(struct radeon_compiler *c, void *user)
100 {
101    unsigned int srcmasks[3];
102 
103    for (struct rc_instruction *inst = c->Program.Instructions.Next;
104         inst != &c->Program.Instructions; inst = inst->Next) {
105 
106       rc_compute_sources_for_writemask(inst, inst->U.I.DstReg.WriteMask, srcmasks);
107 
108       for (unsigned int src = 0; src < 3; ++src) {
109          for (unsigned int chan = 0; chan < 4; ++chan) {
110             if (!GET_BIT(srcmasks[src], chan))
111                SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
112          }
113       }
114    }
115 }
116 
117 /**
118  * Recompute c->Program.InputsRead and c->Program.OutputsWritten
119  * based on which inputs and outputs are actually referenced
120  * in program instructions.
121  */
122 void
rc_calculate_inputs_outputs(struct radeon_compiler * c)123 rc_calculate_inputs_outputs(struct radeon_compiler *c)
124 {
125    struct rc_instruction *inst;
126 
127    c->Program.InputsRead = 0;
128    c->Program.OutputsWritten = 0;
129 
130    for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
131       const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
132       int i;
133 
134       for (i = 0; i < opcode->NumSrcRegs; ++i) {
135          if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
136             c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index;
137       }
138 
139       if (opcode->HasDstReg) {
140          if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
141             c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index;
142       }
143    }
144 }
145 
146 /**
147  * Rewrite the program such that a given output is duplicated.
148  */
149 void
rc_copy_output(struct radeon_compiler * c,unsigned output,unsigned dup_output)150 rc_copy_output(struct radeon_compiler *c, unsigned output, unsigned dup_output)
151 {
152    unsigned tempreg = rc_find_free_temporary(c);
153    struct rc_instruction *inst;
154    struct rc_instruction *insert_pos = c->Program.Instructions.Prev;
155    struct rc_instruction *last_write_inst = NULL;
156    unsigned branch_depth = 0;
157    unsigned loop_depth = 0;
158    bool emit_after_control_flow = false;
159    unsigned num_writes = 0;
160 
161    for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
162       const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
163 
164       if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
165          loop_depth++;
166       if (inst->U.I.Opcode == RC_OPCODE_IF)
167          branch_depth++;
168       if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) ||
169           (inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--))
170          if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) {
171             insert_pos = inst;
172             emit_after_control_flow = false;
173          }
174 
175       if (opcode->HasDstReg) {
176          if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
177             num_writes++;
178             inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
179             inst->U.I.DstReg.Index = tempreg;
180             insert_pos = inst;
181             last_write_inst = inst;
182             if (loop_depth != 0 && branch_depth != 0)
183                emit_after_control_flow = true;
184          }
185       }
186    }
187 
188    /* If there is only a single write, just duplicate the whole instruction instead.
189     * We can do this even when the single write was is a control flow.
190     */
191    if (num_writes == 1) {
192       last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT;
193       last_write_inst->U.I.DstReg.Index = output;
194 
195       inst = rc_insert_new_instruction(c, last_write_inst);
196       struct rc_instruction *prev = inst->Prev;
197       struct rc_instruction *next = inst->Next;
198       memcpy(inst, last_write_inst, sizeof(struct rc_instruction));
199       inst->Prev = prev;
200       inst->Next = next;
201       inst->U.I.DstReg.Index = dup_output;
202    } else {
203       inst = rc_insert_new_instruction(c, insert_pos);
204       inst->U.I.Opcode = RC_OPCODE_MOV;
205       inst->U.I.DstReg.File = RC_FILE_OUTPUT;
206       inst->U.I.DstReg.Index = output;
207 
208       inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
209       inst->U.I.SrcReg[0].Index = tempreg;
210       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
211 
212       inst = rc_insert_new_instruction(c, inst);
213       inst->U.I.Opcode = RC_OPCODE_MOV;
214       inst->U.I.DstReg.File = RC_FILE_OUTPUT;
215       inst->U.I.DstReg.Index = dup_output;
216 
217       inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
218       inst->U.I.SrcReg[0].Index = tempreg;
219       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
220    }
221 
222    c->Program.OutputsWritten |= 1U << dup_output;
223 }
224 
225 /**
226  * Introduce standard code fragment to deal with fragment.position.
227  */
228 void
rc_transform_fragment_wpos(struct radeon_compiler * c,unsigned wpos,unsigned new_input,int full_vtransform)229 rc_transform_fragment_wpos(struct radeon_compiler *c, unsigned wpos, unsigned new_input,
230                            int full_vtransform)
231 {
232    unsigned tempregi = rc_find_free_temporary(c);
233    struct rc_instruction *inst_rcp;
234    struct rc_instruction *inst_mul;
235    struct rc_instruction *inst_mad;
236    struct rc_instruction *inst;
237 
238    c->Program.InputsRead &= ~(1U << wpos);
239    c->Program.InputsRead |= 1U << new_input;
240 
241    /* perspective divide */
242    inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
243    inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
244 
245    inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
246    inst_rcp->U.I.DstReg.Index = tempregi;
247    inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
248 
249    inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
250    inst_rcp->U.I.SrcReg[0].Index = new_input;
251    inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
252 
253    inst_mul = rc_insert_new_instruction(c, inst_rcp);
254    inst_mul->U.I.Opcode = RC_OPCODE_MUL;
255 
256    inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
257    inst_mul->U.I.DstReg.Index = tempregi;
258    inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
259 
260    inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
261    inst_mul->U.I.SrcReg[0].Index = new_input;
262 
263    inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
264    inst_mul->U.I.SrcReg[1].Index = tempregi;
265    inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
266 
267    /* viewport transformation */
268    inst_mad = rc_insert_new_instruction(c, inst_mul);
269    inst_mad->U.I.Opcode = RC_OPCODE_MAD;
270 
271    inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
272    inst_mad->U.I.DstReg.Index = tempregi;
273    inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
274 
275    inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
276    inst_mad->U.I.SrcReg[0].Index = tempregi;
277    inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
278 
279    inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
280    inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
281 
282    inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
283    inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
284 
285    if (full_vtransform) {
286       inst_mad->U.I.SrcReg[1].Index =
287          rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
288       inst_mad->U.I.SrcReg[2].Index =
289          rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
290    } else {
291       inst_mad->U.I.SrcReg[1].Index = inst_mad->U.I.SrcReg[2].Index =
292          rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
293    }
294 
295    for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
296       const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
297       unsigned i;
298 
299       for (i = 0; i < opcode->NumSrcRegs; i++) {
300          if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == wpos) {
301             inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
302             inst->U.I.SrcReg[i].Index = tempregi;
303          }
304       }
305    }
306 }
307 
308 /**
309  * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
310  * Gallium and OpenGL define it the other way around.
311  *
312  * So let's just negate FACE at the beginning of the shader and rewrite the rest
313  * of the shader to read from the newly allocated temporary.
314  */
315 void
rc_transform_fragment_face(struct radeon_compiler * c,unsigned face)316 rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
317 {
318    unsigned tempregi = rc_find_free_temporary(c);
319    struct rc_instruction *inst_add;
320    struct rc_instruction *inst;
321 
322    /* perspective divide */
323    inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
324    inst_add->U.I.Opcode = RC_OPCODE_ADD;
325 
326    inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
327    inst_add->U.I.DstReg.Index = tempregi;
328    inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
329 
330    inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
331    inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
332 
333    inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
334    inst_add->U.I.SrcReg[1].Index = face;
335    inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
336    inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
337 
338    for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
339       const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
340       unsigned i;
341 
342       for (i = 0; i < opcode->NumSrcRegs; i++) {
343          if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == face) {
344             inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
345             inst->U.I.SrcReg[i].Index = tempregi;
346          }
347       }
348    }
349 }
350 
351 static void
reg_count_callback(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)352 reg_count_callback(void *userdata, struct rc_instruction *inst, rc_register_file file,
353                    unsigned int index, unsigned int mask)
354 {
355    struct rc_program_stats *s = userdata;
356    if (file == RC_FILE_TEMPORARY)
357       (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
358    if (file == RC_FILE_INLINE)
359       s->num_inline_literals++;
360    if (file == RC_FILE_CONSTANT)
361       s->num_consts = MAX2(s->num_consts, index + 1);
362 }
363 
364 void
rc_get_stats(struct radeon_compiler * c,struct rc_program_stats * s)365 rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
366 {
367    struct rc_instruction *tmp;
368    memset(s, 0, sizeof(*s));
369    unsigned ip = 0;
370    int last_begintex = -1;
371 
372    for (tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
373         tmp = tmp->Next, ip++) {
374       const struct rc_opcode_info *info;
375       rc_for_all_reads_mask(tmp, reg_count_callback, s);
376       if (tmp->Type == RC_INSTRUCTION_NORMAL) {
377          info = rc_get_opcode_info(tmp->U.I.Opcode);
378          if (info->Opcode == RC_OPCODE_BEGIN_TEX) {
379             /* The R5xx docs mention ~30 cycles in section 8.3.1
380              * The only case when we don't want to add the cycles
381              * penalty is when the texblock contains only kil.
382              */
383             const struct rc_opcode_info *next_op = rc_get_opcode_info(tmp->Next->U.I.Opcode);
384             struct rc_instruction *second_next_instr = tmp->Next->Next;
385             const struct rc_opcode_info *second_next_op;
386             if (second_next_instr->Type == RC_INSTRUCTION_NORMAL) {
387                second_next_op = rc_get_opcode_info(second_next_instr->U.I.Opcode);
388             } else {
389                second_next_op = rc_get_opcode_info(second_next_instr->U.P.RGB.Opcode);
390             }
391             if (next_op->Opcode != RC_OPCODE_KIL ||
392                 (second_next_instr->Type == RC_INSTRUCTION_NORMAL && second_next_op->HasTexture)) {
393                s->num_cycles += 30;
394                last_begintex = ip;
395             }
396             continue;
397          }
398          if (info->Opcode == RC_OPCODE_MAD && rc_inst_has_three_diff_temp_srcs(tmp))
399             s->num_cycles++;
400       } else {
401          if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
402             s->num_presub_ops++;
403          if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
404             s->num_presub_ops++;
405          /* Assuming alpha will never be a flow control or
406           * a tex instruction. */
407          if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
408             s->num_alpha_insts++;
409          if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
410             s->num_rgb_insts++;
411          if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 && tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
412             s->num_omod_ops++;
413          }
414          if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 && tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
415             s->num_omod_ops++;
416          }
417          if (tmp->U.P.Nop)
418             s->num_cycles++;
419          /* SemWait has effect only on R500, the more instructions we can put
420           * between the tex block and the first texture semaphore, the better.
421           */
422          if (tmp->U.P.SemWait && c->is_r500 && last_begintex != -1) {
423             s->num_cycles -= MIN2(30, ip - last_begintex);
424             last_begintex = -1;
425          }
426          info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
427       }
428       if (info->IsFlowControl) {
429          s->num_fc_insts++;
430          if (info->Opcode == RC_OPCODE_BGNLOOP)
431             s->num_loops++;
432       }
433       /* VS flow control was already translated to the predicate instructions */
434       if (c->type == RC_VERTEX_PROGRAM)
435          if (strstr(info->Name, "PRED") != NULL)
436             s->num_pred_insts++;
437 
438       if (info->HasTexture)
439          s->num_tex_insts++;
440       s->num_insts++;
441       s->num_cycles++;
442    }
443    /* Increment here because the reg_count_callback store the max
444     * temporary reg index in s->nun_temp_regs. */
445    s->num_temp_regs++;
446 }
447 
448 static void
print_stats(struct radeon_compiler * c)449 print_stats(struct radeon_compiler *c)
450 {
451    struct rc_program_stats s;
452 
453    rc_get_stats(c, &s);
454 
455    /* Note that we print some dummy values for instruction categories that
456     * only the FS has, because shader-db's report.py wants all shaders to
457     * have the same set.
458     */
459    util_debug_message(
460       c->debug, SHADER_INFO,
461       "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, "
462       "%u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits, %u cycles",
463       c->type == RC_VERTEX_PROGRAM ? "VS" : "FS", s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
464       s.num_pred_insts, s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops,
465       s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals, s.num_cycles);
466 }
467 
468 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {"Vertex Program", "Fragment Program"};
469 
470 bool
rc_run_compiler_passes(struct radeon_compiler * c,struct radeon_compiler_pass * list)471 rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
472 {
473    for (unsigned i = 0; list[i].name; i++) {
474       if (list[i].predicate) {
475          list[i].run(c, list[i].user);
476 
477          if (c->Error)
478             return false;
479 
480          if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
481             fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
482             rc_print_program(&c->Program);
483          }
484       }
485    }
486    return true;
487 }
488 
489 /* Executes a list of compiler passes given in the parameter 'list'. */
490 void
rc_run_compiler(struct radeon_compiler * c,struct radeon_compiler_pass * list)491 rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
492 {
493    if (c->Debug & RC_DBG_LOG) {
494       fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
495       rc_print_program(&c->Program);
496    }
497 
498    if (rc_run_compiler_passes(c, list)) {
499       print_stats(c);
500    }
501 }
502 
503 void
rc_validate_final_shader(struct radeon_compiler * c,void * user)504 rc_validate_final_shader(struct radeon_compiler *c, void *user)
505 {
506    /* Check the number of constants. */
507    if (c->Program.Constants.Count > c->max_constants) {
508       rc_error(c, "Too many constants. Max: %i, Got: %i\n", c->max_constants,
509                c->Program.Constants.Count);
510    }
511 }
512