/* * Copyright 2009 Nicolai Hähnle * SPDX-License-Identifier: MIT */ #include "radeon_compiler.h" #include #include #include #include #include "pipe/p_state.h" #include "util/u_debug.h" #include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_program.h" #include "radeon_program_pair.h" #include "radeon_regalloc.h" void rc_init(struct radeon_compiler *c, const struct rc_regalloc_state *rs) { memset(c, 0, sizeof(*c)); memory_pool_init(&c->Pool); c->Program.Instructions.Prev = &c->Program.Instructions; c->Program.Instructions.Next = &c->Program.Instructions; c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; c->regalloc_state = rs; c->max_temp_index = -1; } void rc_destroy(struct radeon_compiler *c) { rc_constants_destroy(&c->Program.Constants); memory_pool_destroy(&c->Pool); free(c->ErrorMsg); } void rc_debug(struct radeon_compiler *c, const char *fmt, ...) { va_list ap; if (!(c->Debug & RC_DBG_LOG)) return; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); } void rc_error(struct radeon_compiler *c, const char *fmt, ...) { va_list ap; c->Error = 1; if (!c->ErrorMsg) { /* Only remember the first error */ char buf[1024]; int written; va_start(ap, fmt); written = vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); if (written < sizeof(buf)) { c->ErrorMsg = strdup(buf); } else { c->ErrorMsg = malloc(written + 1); va_start(ap, fmt); vsnprintf(c->ErrorMsg, written + 1, fmt, ap); va_end(ap); } } if (c->Debug & RC_DBG_LOG) { fprintf(stderr, "r300compiler error: "); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); } } int rc_if_fail_helper(struct radeon_compiler *c, const char *file, int line, const char *assertion) { rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); return 1; } void rc_mark_unused_channels(struct radeon_compiler *c, void *user) { unsigned int srcmasks[3]; for (struct rc_instruction *inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { rc_compute_sources_for_writemask(inst, inst->U.I.DstReg.WriteMask, srcmasks); for (unsigned int src = 0; src < 3; ++src) { for (unsigned int chan = 0; chan < 4; ++chan) { if (!GET_BIT(srcmasks[src], chan)) SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); } } } } /** * Recompute c->Program.InputsRead and c->Program.OutputsWritten * based on which inputs and outputs are actually referenced * in program instructions. */ void rc_calculate_inputs_outputs(struct radeon_compiler *c) { struct rc_instruction *inst; c->Program.InputsRead = 0; c->Program.OutputsWritten = 0; for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); int i; for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index; } if (opcode->HasDstReg) { if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index; } } } /** * Rewrite the program such that a given output is duplicated. */ void rc_copy_output(struct radeon_compiler *c, unsigned output, unsigned dup_output) { unsigned tempreg = rc_find_free_temporary(c); struct rc_instruction *inst; struct rc_instruction *insert_pos = c->Program.Instructions.Prev; struct rc_instruction *last_write_inst = NULL; unsigned branch_depth = 0; unsigned loop_depth = 0; bool emit_after_control_flow = false; unsigned num_writes = 0; for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) loop_depth++; if (inst->U.I.Opcode == RC_OPCODE_IF) branch_depth++; if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) || (inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--)) if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) { insert_pos = inst; emit_after_control_flow = false; } if (opcode->HasDstReg) { if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { num_writes++; inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = tempreg; insert_pos = inst; last_write_inst = inst; if (loop_depth != 0 && branch_depth != 0) emit_after_control_flow = true; } } } /* If there is only a single write, just duplicate the whole instruction instead. * We can do this even when the single write was is a control flow. */ if (num_writes == 1) { last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT; last_write_inst->U.I.DstReg.Index = output; inst = rc_insert_new_instruction(c, last_write_inst); struct rc_instruction *prev = inst->Prev; struct rc_instruction *next = inst->Next; memcpy(inst, last_write_inst, sizeof(struct rc_instruction)); inst->Prev = prev; inst->Next = next; inst->U.I.DstReg.Index = dup_output; } else { inst = rc_insert_new_instruction(c, insert_pos); inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = output; inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = tempreg; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; inst = rc_insert_new_instruction(c, inst); inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = dup_output; inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[0].Index = tempreg; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; } c->Program.OutputsWritten |= 1U << dup_output; } /** * Introduce standard code fragment to deal with fragment.position. */ void rc_transform_fragment_wpos(struct radeon_compiler *c, unsigned wpos, unsigned new_input, int full_vtransform) { unsigned tempregi = rc_find_free_temporary(c); struct rc_instruction *inst_rcp; struct rc_instruction *inst_mul; struct rc_instruction *inst_mad; struct rc_instruction *inst; c->Program.InputsRead &= ~(1U << wpos); c->Program.InputsRead |= 1U << new_input; /* perspective divide */ inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->U.I.DstReg.Index = tempregi; inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; inst_rcp->U.I.SrcReg[0].Index = new_input; inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; inst_mul = rc_insert_new_instruction(c, inst_rcp); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->U.I.DstReg.Index = tempregi; inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; inst_mul->U.I.SrcReg[0].Index = new_input; inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mul->U.I.SrcReg[1].Index = tempregi; inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; /* viewport transformation */ inst_mad = rc_insert_new_instruction(c, inst_mul); inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = tempregi; inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[0].Index = tempregi; inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; if (full_vtransform) { inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); } else { inst_mad->U.I.SrcReg[1].Index = inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); } for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; for (i = 0; i < opcode->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == wpos) { inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[i].Index = tempregi; } } } } /** * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. * Gallium and OpenGL define it the other way around. * * So let's just negate FACE at the beginning of the shader and rewrite the rest * of the shader to read from the newly allocated temporary. */ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) { unsigned tempregi = rc_find_free_temporary(c); struct rc_instruction *inst_add; struct rc_instruction *inst; /* perspective divide */ inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); inst_add->U.I.Opcode = RC_OPCODE_ADD; inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_add->U.I.DstReg.Index = tempregi; inst_add->U.I.DstReg.WriteMask = RC_MASK_X; inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; inst_add->U.I.SrcReg[1].Index = face; inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; for (i = 0; i < opcode->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == face) { inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->U.I.SrcReg[i].Index = tempregi; } } } } static void reg_count_callback(void *userdata, struct rc_instruction *inst, rc_register_file file, unsigned int index, unsigned int mask) { struct rc_program_stats *s = userdata; if (file == RC_FILE_TEMPORARY) (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0; if (file == RC_FILE_INLINE) s->num_inline_literals++; if (file == RC_FILE_CONSTANT) s->num_consts = MAX2(s->num_consts, index + 1); } void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) { struct rc_instruction *tmp; memset(s, 0, sizeof(*s)); unsigned ip = 0; int last_begintex = -1; for (tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; tmp = tmp->Next, ip++) { const struct rc_opcode_info *info; rc_for_all_reads_mask(tmp, reg_count_callback, s); if (tmp->Type == RC_INSTRUCTION_NORMAL) { info = rc_get_opcode_info(tmp->U.I.Opcode); if (info->Opcode == RC_OPCODE_BEGIN_TEX) { /* The R5xx docs mention ~30 cycles in section 8.3.1 * The only case when we don't want to add the cycles * penalty is when the texblock contains only kil. */ const struct rc_opcode_info *next_op = rc_get_opcode_info(tmp->Next->U.I.Opcode); struct rc_instruction *second_next_instr = tmp->Next->Next; const struct rc_opcode_info *second_next_op; if (second_next_instr->Type == RC_INSTRUCTION_NORMAL) { second_next_op = rc_get_opcode_info(second_next_instr->U.I.Opcode); } else { second_next_op = rc_get_opcode_info(second_next_instr->U.P.RGB.Opcode); } if (next_op->Opcode != RC_OPCODE_KIL || (second_next_instr->Type == RC_INSTRUCTION_NORMAL && second_next_op->HasTexture)) { s->num_cycles += 30; last_begintex = ip; } continue; } if (info->Opcode == RC_OPCODE_MAD && rc_inst_has_three_diff_temp_srcs(tmp)) s->num_cycles++; } else { if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) s->num_presub_ops++; if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) s->num_presub_ops++; /* Assuming alpha will never be a flow control or * a tex instruction. */ if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) s->num_alpha_insts++; if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) s->num_rgb_insts++; if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 && tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) { s->num_omod_ops++; } if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 && tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) { s->num_omod_ops++; } if (tmp->U.P.Nop) s->num_cycles++; /* SemWait has effect only on R500, the more instructions we can put * between the tex block and the first texture semaphore, the better. */ if (tmp->U.P.SemWait && c->is_r500 && last_begintex != -1) { s->num_cycles -= MIN2(30, ip - last_begintex); last_begintex = -1; } info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); } if (info->IsFlowControl) { s->num_fc_insts++; if (info->Opcode == RC_OPCODE_BGNLOOP) s->num_loops++; } /* VS flow control was already translated to the predicate instructions */ if (c->type == RC_VERTEX_PROGRAM) if (strstr(info->Name, "PRED") != NULL) s->num_pred_insts++; if (info->HasTexture) s->num_tex_insts++; s->num_insts++; s->num_cycles++; } /* Increment here because the reg_count_callback store the max * temporary reg index in s->nun_temp_regs. */ s->num_temp_regs++; } static void print_stats(struct radeon_compiler *c) { struct rc_program_stats s; rc_get_stats(c, &s); /* Note that we print some dummy values for instruction categories that * only the FS has, because shader-db's report.py wants all shaders to * have the same set. */ util_debug_message( c->debug, SHADER_INFO, "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, " "%u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits, %u cycles", c->type == RC_VERTEX_PROGRAM ? "VS" : "FS", s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_pred_insts, s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops, s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals, s.num_cycles); } static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {"Vertex Program", "Fragment Program"}; bool rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) { for (unsigned i = 0; list[i].name; i++) { if (list[i].predicate) { list[i].run(c, list[i].user); if (c->Error) return false; if ((c->Debug & RC_DBG_LOG) && list[i].dump) { fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); rc_print_program(&c->Program); } } } return true; } /* Executes a list of compiler passes given in the parameter 'list'. */ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) { if (c->Debug & RC_DBG_LOG) { fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); rc_print_program(&c->Program); } if (rc_run_compiler_passes(c, list)) { print_stats(c); } } void rc_validate_final_shader(struct radeon_compiler *c, void *user) { /* Check the number of constants. */ if (c->Program.Constants.Count > c->max_constants) { rc_error(c, "Too many constants. Max: %i, Got: %i\n", c->max_constants, c->Program.Constants.Count); } }