1 /*
2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "agx_compiler.h"
25 #include "agx_builder.h"
26
27 /* Trivial register allocator that never frees anything.
28 *
29 * TODO: Write a real register allocator.
30 * TODO: Handle phi nodes.
31 */
32
33 /** Returns number of registers written by an instruction */
34 static unsigned
agx_write_registers(agx_instr * I,unsigned d)35 agx_write_registers(agx_instr *I, unsigned d)
36 {
37 unsigned size = I->dest[d].size == AGX_SIZE_32 ? 2 : 1;
38
39 switch (I->op) {
40 case AGX_OPCODE_LD_VARY:
41 case AGX_OPCODE_DEVICE_LOAD:
42 case AGX_OPCODE_TEXTURE_SAMPLE:
43 case AGX_OPCODE_LD_TILE:
44 return 8;
45 case AGX_OPCODE_LD_VARY_FLAT:
46 return 6;
47 case AGX_OPCODE_P_COMBINE:
48 {
49 unsigned components = 0;
50
51 for (unsigned i = 0; i < 4; ++i) {
52 if (!agx_is_null(I->src[i]))
53 components = i + 1;
54 }
55
56 return components * size;
57 }
58 default:
59 return size;
60 }
61 }
62
63 static unsigned
agx_assign_regs(BITSET_WORD * used_regs,unsigned count,unsigned align,unsigned max)64 agx_assign_regs(BITSET_WORD *used_regs, unsigned count, unsigned align, unsigned max)
65 {
66 for (unsigned reg = 0; reg < max; reg += align) {
67 bool conflict = false;
68
69 for (unsigned j = 0; j < count; ++j)
70 conflict |= BITSET_TEST(used_regs, reg + j);
71
72 if (!conflict) {
73 for (unsigned j = 0; j < count; ++j)
74 BITSET_SET(used_regs, reg + j);
75
76 return reg;
77 }
78 }
79
80 /* Couldn't find a free register, dump the state of the register file */
81 fprintf(stderr, "Failed to find register of size %u aligned %u max %u.\n",
82 count, align, max);
83
84 fprintf(stderr, "Register file:\n");
85 for (unsigned i = 0; i < BITSET_WORDS(max); ++i)
86 fprintf(stderr, " %08X\n", used_regs[i]);
87
88 unreachable("Could not find a free register");
89 }
90
91 /** Assign registers to SSA values in a block. */
92
93 static void
agx_ra_assign_local(agx_block * block,uint8_t * ssa_to_reg,uint8_t * ncomps,unsigned max_reg)94 agx_ra_assign_local(agx_block *block, uint8_t *ssa_to_reg, uint8_t *ncomps, unsigned max_reg)
95 {
96 BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 };
97
98 agx_foreach_predecessor(block, pred) {
99 for (unsigned i = 0; i < BITSET_WORDS(AGX_NUM_REGS); ++i)
100 used_regs[i] |= pred->regs_out[i];
101 }
102
103 BITSET_SET(used_regs, 0); // control flow writes r0l
104 BITSET_SET(used_regs, 5*2); // TODO: precolouring, don't overwrite vertex ID
105 BITSET_SET(used_regs, (5*2 + 1));
106 BITSET_SET(used_regs, (6*2 + 0));
107 BITSET_SET(used_regs, (6*2 + 1));
108
109 agx_foreach_instr_in_block(block, I) {
110 /* First, free killed sources */
111 agx_foreach_src(I, s) {
112 if (I->src[s].type == AGX_INDEX_NORMAL && I->src[s].kill) {
113 unsigned reg = ssa_to_reg[I->src[s].value];
114 unsigned count = ncomps[I->src[s].value];
115
116 for (unsigned i = 0; i < count; ++i)
117 BITSET_CLEAR(used_regs, reg + i);
118 }
119 }
120
121 /* Next, assign destinations. Always legal in SSA form. */
122 agx_foreach_dest(I, d) {
123 if (I->dest[d].type == AGX_INDEX_NORMAL) {
124 unsigned count = agx_write_registers(I, d);
125 unsigned align = (I->dest[d].size == AGX_SIZE_16) ? 1 : 2;
126 unsigned reg = agx_assign_regs(used_regs, count, align, max_reg);
127
128 ssa_to_reg[I->dest[d].value] = reg;
129 }
130 }
131 }
132
133 STATIC_ASSERT(sizeof(block->regs_out) == sizeof(used_regs));
134 memcpy(block->regs_out, used_regs, sizeof(used_regs));
135 }
136
137 void
agx_ra(agx_context * ctx)138 agx_ra(agx_context *ctx)
139 {
140 unsigned *alloc = calloc(ctx->alloc, sizeof(unsigned));
141
142 agx_compute_liveness(ctx);
143 uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t));
144 uint8_t *ncomps = calloc(ctx->alloc, sizeof(uint8_t));
145
146 agx_foreach_instr_global(ctx, I) {
147 agx_foreach_dest(I, d) {
148 if (I->dest[d].type != AGX_INDEX_NORMAL) continue;
149
150 unsigned v = I->dest[d].value;
151 assert(ncomps[v] == 0 && "broken SSA");
152 ncomps[v] = agx_write_registers(I, d);
153 }
154 }
155
156 agx_foreach_block(ctx, block)
157 agx_ra_assign_local(block, ssa_to_reg, ncomps, ctx->max_register);
158
159 /* TODO: Coalesce combines */
160
161 agx_foreach_instr_global_safe(ctx, ins) {
162 /* Lower away RA pseudo-instructions */
163 if (ins->op == AGX_OPCODE_P_COMBINE) {
164 /* TODO: Optimize out the moves! */
165 assert(ins->dest[0].type == AGX_INDEX_NORMAL);
166 enum agx_size common_size = ins->dest[0].size;
167 unsigned base = ssa_to_reg[ins->dest[0].value];
168 unsigned size = common_size == AGX_SIZE_32 ? 2 : 1;
169
170 /* Move the sources */
171 agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
172
173 /* TODO: Eliminate the intermediate copy by handling parallel copies */
174 for (unsigned i = 0; i < 4; ++i) {
175 if (agx_is_null(ins->src[i])) continue;
176 unsigned base = ins->src[i].value;
177 if (ins->src[i].type == AGX_INDEX_NORMAL)
178 base = ssa_to_reg[base];
179 else
180 assert(ins->src[i].type == AGX_INDEX_REGISTER);
181
182 assert(ins->src[i].size == common_size);
183
184 agx_mov_to(&b, agx_register(124*2 + (i * size), common_size),
185 agx_register(base, common_size));
186 }
187
188 for (unsigned i = 0; i < 4; ++i) {
189 if (agx_is_null(ins->src[i])) continue;
190 agx_index src = ins->src[i];
191
192 if (src.type == AGX_INDEX_NORMAL)
193 src = agx_register(alloc[src.value], src.size);
194
195 agx_mov_to(&b, agx_register(base + (i * size), common_size),
196 agx_register(124*2 + (i * size), common_size));
197 }
198
199 /* We've lowered away, delete the old */
200 agx_remove_instruction(ins);
201 continue;
202 } else if (ins->op == AGX_OPCODE_P_EXTRACT) {
203 /* Uses the destination size */
204 assert(ins->dest[0].type == AGX_INDEX_NORMAL);
205 unsigned base = ins->src[0].value;
206
207 if (ins->src[0].type != AGX_INDEX_REGISTER) {
208 assert(ins->src[0].type == AGX_INDEX_NORMAL);
209 base = alloc[base];
210 }
211
212 unsigned size = ins->dest[0].size == AGX_SIZE_64 ? 4 : ins->dest[0].size == AGX_SIZE_32 ? 2 : 1;
213 unsigned left = ssa_to_reg[ins->dest[0].value];
214 unsigned right = ssa_to_reg[ins->src[0].value] + (size * ins->imm);
215
216 if (left != right) {
217 agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
218 agx_mov_to(&b, agx_register(left, ins->dest[0].size),
219 agx_register(right, ins->src[0].size));
220 }
221
222 agx_remove_instruction(ins);
223 continue;
224 }
225
226 agx_foreach_src(ins, s) {
227 if (ins->src[s].type == AGX_INDEX_NORMAL) {
228 unsigned v = ssa_to_reg[ins->src[s].value];
229 ins->src[s] = agx_replace_index(ins->src[s], agx_register(v, ins->src[s].size));
230 }
231 }
232
233 agx_foreach_dest(ins, d) {
234 if (ins->dest[d].type == AGX_INDEX_NORMAL) {
235 unsigned v = ssa_to_reg[ins->dest[d].value];
236 ins->dest[d] = agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size));
237 }
238 }
239 }
240
241 free(ssa_to_reg);
242 free(ncomps);
243 free(alloc);
244 }
245