/* * Copyright (C) 2014 Rob Clark * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Authors: * Rob Clark */ #include "util/u_math.h" #include "util/register_allocate.h" #include "util/ralloc.h" #include "util/bitset.h" #include "ir3.h" #include "ir3_compiler.h" #include "ir3_ra.h" static void setup_conflicts(struct ir3_ra_reg_set *set) { unsigned reg; reg = 0; for (unsigned i = 0; i < class_count; i++) { for (unsigned j = 0; j < CLASS_REGS(i); j++) { for (unsigned br = j; br < j + class_sizes[i]; br++) { ra_add_transitive_reg_conflict(set->regs, br, reg); } reg++; } } for (unsigned i = 0; i < half_class_count; i++) { for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) { for (unsigned br = j; br < j + half_class_sizes[i]; br++) { ra_add_transitive_reg_conflict(set->regs, br + set->first_half_reg, reg); } reg++; } } for (unsigned i = 0; i < high_class_count; i++) { for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) { for (unsigned br = j; br < j + high_class_sizes[i]; br++) { ra_add_transitive_reg_conflict(set->regs, br + set->first_high_reg, reg); } reg++; } } /* * Setup conflicts with registers over 0x3f for the special vreg * that exists to use as interference for tex-prefetch: */ for (unsigned i = 0x40; i < CLASS_REGS(0); i++) { ra_add_transitive_reg_conflict(set->regs, i, set->prefetch_exclude_reg); } for (unsigned i = 0x40; i < HALF_CLASS_REGS(0); i++) { ra_add_transitive_reg_conflict(set->regs, i + set->first_half_reg, set->prefetch_exclude_reg); } } /* One-time setup of RA register-set, which describes all the possible * "virtual" registers and their interferences. Ie. double register * occupies (and conflicts with) two single registers, and so forth. * Since registers do not need to be aligned to their class size, they * can conflict with other registers in the same class too. Ie: * * Single (base) | Double * --------------+--------------- * R0 | D0 * R1 | D0 D1 * R2 | D1 D2 * R3 | D2 * .. and so on.. * * (NOTE the disassembler uses notation like r0.x/y/z/w but those are * really just four scalar registers. Don't let that confuse you.) */ struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler, bool mergedregs) { struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set); unsigned ra_reg_count, reg, base; /* calculate # of regs across all classes: */ ra_reg_count = 0; for (unsigned i = 0; i < class_count; i++) ra_reg_count += CLASS_REGS(i); for (unsigned i = 0; i < half_class_count; i++) ra_reg_count += HALF_CLASS_REGS(i); for (unsigned i = 0; i < high_class_count; i++) ra_reg_count += HIGH_CLASS_REGS(i); ra_reg_count += 1; /* for tex-prefetch excludes */ /* allocate the reg-set.. */ set->regs = ra_alloc_reg_set(set, ra_reg_count, true); set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count); set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count); /* .. and classes */ reg = 0; for (unsigned i = 0; i < class_count; i++) { set->classes[i] = ra_alloc_reg_class(set->regs); set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i)); for (unsigned j = 0; j < CLASS_REGS(i); j++) { ra_class_add_reg(set->regs, set->classes[i], reg); set->ra_reg_to_gpr[reg] = j; set->gpr_to_ra_reg[i][j] = reg; reg++; } } set->first_half_reg = reg; base = HALF_OFFSET; for (unsigned i = 0; i < half_class_count; i++) { set->half_classes[i] = ra_alloc_reg_class(set->regs); set->gpr_to_ra_reg[base + i] = ralloc_array(set, uint16_t, HALF_CLASS_REGS(i)); for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) { ra_class_add_reg(set->regs, set->half_classes[i], reg); set->ra_reg_to_gpr[reg] = j; set->gpr_to_ra_reg[base + i][j] = reg; reg++; } } set->first_high_reg = reg; base = HIGH_OFFSET; for (unsigned i = 0; i < high_class_count; i++) { set->high_classes[i] = ra_alloc_reg_class(set->regs); set->gpr_to_ra_reg[base + i] = ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i)); for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) { ra_class_add_reg(set->regs, set->high_classes[i], reg); set->ra_reg_to_gpr[reg] = j; set->gpr_to_ra_reg[base + i][j] = reg; reg++; } } /* * Setup an additional class, with one vreg, to simply conflict * with registers that are too high to encode tex-prefetch. This * vreg is only used to setup additional conflicts so that RA * knows to allocate prefetch dst regs below the limit: */ set->prefetch_exclude_class = ra_alloc_reg_class(set->regs); ra_class_add_reg(set->regs, set->prefetch_exclude_class, reg); set->prefetch_exclude_reg = reg++; /* * And finally setup conflicts. Starting a6xx, half precision regs * conflict w/ full precision regs (when using MERGEDREGS): */ if (mergedregs) { for (unsigned i = 0; i < CLASS_REGS(0) / 2; i++) { unsigned freg = set->gpr_to_ra_reg[0][i]; unsigned hreg0 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 0]; unsigned hreg1 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 1]; ra_add_transitive_reg_pair_conflict(set->regs, freg, hreg0, hreg1); } } setup_conflicts(set); ra_set_finalize(set->regs, NULL); return set; } int ra_size_to_class(unsigned sz, bool half, bool high) { if (high) { for (unsigned i = 0; i < high_class_count; i++) if (high_class_sizes[i] >= sz) return i + HIGH_OFFSET; } else if (half) { for (unsigned i = 0; i < half_class_count; i++) if (half_class_sizes[i] >= sz) return i + HALF_OFFSET; } else { for (unsigned i = 0; i < class_count; i++) if (class_sizes[i] >= sz) return i; } debug_assert(0); return -1; } int ra_class_to_size(unsigned class, bool *half, bool *high) { *half = *high = false; if (class >= HIGH_OFFSET) { *high = true; return high_class_sizes[class - HIGH_OFFSET]; } else if (class >= HALF_OFFSET) { *half = true; return half_class_sizes[class - HALF_OFFSET]; } else { return class_sizes[class]; } }