• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "util/u_math.h"
28 #include "util/register_allocate.h"
29 #include "util/ralloc.h"
30 #include "util/bitset.h"
31 
32 #include "ir3.h"
33 #include "ir3_compiler.h"
34 #include "ir3_ra.h"
35 
36 static void
setup_conflicts(struct ir3_ra_reg_set * set)37 setup_conflicts(struct ir3_ra_reg_set *set)
38 {
39 	unsigned reg;
40 
41 	reg = 0;
42 	for (unsigned i = 0; i < class_count; i++) {
43 		for (unsigned j = 0; j < CLASS_REGS(i); j++) {
44 			for (unsigned br = j; br < j + class_sizes[i]; br++) {
45 				ra_add_transitive_reg_conflict(set->regs, br, reg);
46 			}
47 
48 			reg++;
49 		}
50 	}
51 
52 	for (unsigned i = 0; i < half_class_count; i++) {
53 		for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
54 			for (unsigned br = j; br < j + half_class_sizes[i]; br++) {
55 				ra_add_transitive_reg_conflict(set->regs,
56 						br + set->first_half_reg, reg);
57 			}
58 
59 			reg++;
60 		}
61 	}
62 
63 	for (unsigned i = 0; i < high_class_count; i++) {
64 		for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
65 			for (unsigned br = j; br < j + high_class_sizes[i]; br++) {
66 				ra_add_transitive_reg_conflict(set->regs,
67 						br + set->first_high_reg, reg);
68 			}
69 
70 			reg++;
71 		}
72 	}
73 
74 	/*
75 	 * Setup conflicts with registers over 0x3f for the special vreg
76 	 * that exists to use as interference for tex-prefetch:
77 	 */
78 
79 	for (unsigned i = 0x40; i < CLASS_REGS(0); i++) {
80 		ra_add_transitive_reg_conflict(set->regs, i,
81 				set->prefetch_exclude_reg);
82 	}
83 
84 	for (unsigned i = 0x40; i < HALF_CLASS_REGS(0); i++) {
85 		ra_add_transitive_reg_conflict(set->regs, i + set->first_half_reg,
86 				set->prefetch_exclude_reg);
87 	}
88 }
89 
90 /* One-time setup of RA register-set, which describes all the possible
91  * "virtual" registers and their interferences.  Ie. double register
92  * occupies (and conflicts with) two single registers, and so forth.
93  * Since registers do not need to be aligned to their class size, they
94  * can conflict with other registers in the same class too.  Ie:
95  *
96  *    Single (base) |  Double
97  *    --------------+---------------
98  *       R0         |  D0
99  *       R1         |  D0 D1
100  *       R2         |     D1 D2
101  *       R3         |        D2
102  *           .. and so on..
103  *
104  * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
105  * really just four scalar registers.  Don't let that confuse you.)
106  */
107 struct ir3_ra_reg_set *
ir3_ra_alloc_reg_set(struct ir3_compiler * compiler,bool mergedregs)108 ir3_ra_alloc_reg_set(struct ir3_compiler *compiler, bool mergedregs)
109 {
110 	struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set);
111 	unsigned ra_reg_count, reg, base;
112 
113 	/* calculate # of regs across all classes: */
114 	ra_reg_count = 0;
115 	for (unsigned i = 0; i < class_count; i++)
116 		ra_reg_count += CLASS_REGS(i);
117 	for (unsigned i = 0; i < half_class_count; i++)
118 		ra_reg_count += HALF_CLASS_REGS(i);
119 	for (unsigned i = 0; i < high_class_count; i++)
120 		ra_reg_count += HIGH_CLASS_REGS(i);
121 
122 	ra_reg_count += 1;   /* for tex-prefetch excludes */
123 
124 	/* allocate the reg-set.. */
125 	set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
126 	set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
127 	set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
128 
129 	/* .. and classes */
130 	reg = 0;
131 	for (unsigned i = 0; i < class_count; i++) {
132 		set->classes[i] = ra_alloc_reg_class(set->regs);
133 
134 		set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
135 
136 		for (unsigned j = 0; j < CLASS_REGS(i); j++) {
137 			ra_class_add_reg(set->regs, set->classes[i], reg);
138 
139 			set->ra_reg_to_gpr[reg] = j;
140 			set->gpr_to_ra_reg[i][j] = reg;
141 
142 			reg++;
143 		}
144 	}
145 
146 	set->first_half_reg = reg;
147 	base = HALF_OFFSET;
148 
149 	for (unsigned i = 0; i < half_class_count; i++) {
150 		set->half_classes[i] = ra_alloc_reg_class(set->regs);
151 
152 		set->gpr_to_ra_reg[base + i] =
153 				ralloc_array(set, uint16_t, HALF_CLASS_REGS(i));
154 
155 		for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
156 			ra_class_add_reg(set->regs, set->half_classes[i], reg);
157 
158 			set->ra_reg_to_gpr[reg] = j;
159 			set->gpr_to_ra_reg[base + i][j] = reg;
160 
161 			reg++;
162 		}
163 	}
164 
165 	set->first_high_reg = reg;
166 	base = HIGH_OFFSET;
167 
168 	for (unsigned i = 0; i < high_class_count; i++) {
169 		set->high_classes[i] = ra_alloc_reg_class(set->regs);
170 
171 		set->gpr_to_ra_reg[base + i] =
172 				ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i));
173 
174 		for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
175 			ra_class_add_reg(set->regs, set->high_classes[i], reg);
176 
177 			set->ra_reg_to_gpr[reg] = j;
178 			set->gpr_to_ra_reg[base + i][j] = reg;
179 
180 			reg++;
181 		}
182 	}
183 
184 	/*
185 	 * Setup an additional class, with one vreg, to simply conflict
186 	 * with registers that are too high to encode tex-prefetch.  This
187 	 * vreg is only used to setup additional conflicts so that RA
188 	 * knows to allocate prefetch dst regs below the limit:
189 	 */
190 	set->prefetch_exclude_class = ra_alloc_reg_class(set->regs);
191 	ra_class_add_reg(set->regs, set->prefetch_exclude_class, reg);
192 	set->prefetch_exclude_reg = reg++;
193 
194 	/*
195 	 * And finally setup conflicts.  Starting a6xx, half precision regs
196 	 * conflict w/ full precision regs (when using MERGEDREGS):
197 	 */
198 	if (mergedregs) {
199 		for (unsigned i = 0; i < CLASS_REGS(0) / 2; i++) {
200 			unsigned freg  = set->gpr_to_ra_reg[0][i];
201 			unsigned hreg0 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 0];
202 			unsigned hreg1 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 1];
203 
204 			ra_add_transitive_reg_pair_conflict(set->regs, freg, hreg0, hreg1);
205 		}
206 	}
207 
208 	setup_conflicts(set);
209 
210 	ra_set_finalize(set->regs, NULL);
211 
212 	return set;
213 }
214 
215 int
ra_size_to_class(unsigned sz,bool half,bool high)216 ra_size_to_class(unsigned sz, bool half, bool high)
217 {
218 	if (high) {
219 		for (unsigned i = 0; i < high_class_count; i++)
220 			if (high_class_sizes[i] >= sz)
221 				return i + HIGH_OFFSET;
222 	} else if (half) {
223 		for (unsigned i = 0; i < half_class_count; i++)
224 			if (half_class_sizes[i] >= sz)
225 				return i + HALF_OFFSET;
226 	} else {
227 		for (unsigned i = 0; i < class_count; i++)
228 			if (class_sizes[i] >= sz)
229 				return i;
230 	}
231 	debug_assert(0);
232 	return -1;
233 }
234 
235 int
ra_class_to_size(unsigned class,bool * half,bool * high)236 ra_class_to_size(unsigned class, bool *half, bool *high)
237 {
238 	*half = *high = false;
239 
240 	if (class >= HIGH_OFFSET) {
241 		*high = true;
242 		return high_class_sizes[class - HIGH_OFFSET];
243 	} else if (class >= HALF_OFFSET) {
244 		*half = true;
245 		return half_class_sizes[class - HALF_OFFSET];
246 	} else {
247 		return class_sizes[class];
248 	}
249 }
250