1 /*
2 * Copyright (C) 2022 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3 * Copyright (C) 2021 Valve Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "agx_compiler.h"
26 #include "agx_builder.h"
27
28 /*
29 * Emits code for
30 *
31 * for (int i = 0; i < n; ++i)
32 * registers[dests[i]] = registers[srcs[i]];
33 *
34 * ...with all copies happening in parallel.
35 *
36 * That is, emit machine instructions equivalent to a parallel copy. This is
37 * used to lower not only parallel copies but also collects and splits, which
38 * also have parallel copy semantics.
39 *
40 * We only handles register-register copies, not general agx_index sources. This
41 * suffices for its internal use for register allocation.
42 */
43
44 static void
do_copy(agx_builder * b,const struct agx_copy * copy)45 do_copy(agx_builder *b, const struct agx_copy *copy)
46 {
47 agx_mov_to(b, agx_register(copy->dest, copy->size),
48 agx_register(copy->src, copy->size));
49 }
50
51 static void
do_swap(agx_builder * b,const struct agx_copy * copy)52 do_swap(agx_builder *b, const struct agx_copy *copy)
53 {
54 if (copy->dest == copy->src)
55 return;
56
57 agx_index x = agx_register(copy->dest, copy->size);
58 agx_index y = agx_register(copy->src, copy->size);
59
60 agx_xor_to(b, x, x, y);
61 agx_xor_to(b, y, x, y);
62 agx_xor_to(b, x, x, y);
63 }
64
65 struct copy_ctx {
66 /* Number of copies being processed */
67 unsigned entry_count;
68
69 /* For each physreg, the number of pending copy entries that use it as a
70 * source. Once this drops to zero, then the physreg is unblocked and can
71 * be moved to.
72 */
73 unsigned physreg_use_count[AGX_NUM_REGS];
74
75 /* For each physreg, the pending copy_entry that uses it as a dest. */
76 struct agx_copy *physreg_dest[AGX_NUM_REGS];
77
78 struct agx_copy entries[AGX_NUM_REGS];
79 };
80
81 static bool
entry_blocked(struct agx_copy * entry,struct copy_ctx * ctx)82 entry_blocked(struct agx_copy *entry, struct copy_ctx *ctx)
83 {
84 for (unsigned i = 0; i < agx_size_align_16(entry->size); i++) {
85 if (ctx->physreg_use_count[entry->dest + i] != 0)
86 return true;
87 }
88
89 return false;
90 }
91
92 static bool
is_real(struct agx_copy * entry)93 is_real(struct agx_copy *entry)
94 {
95 /* TODO: Allow immediates in agx_copy */
96 return true;
97 }
98
99 /* TODO: Generalize to other bit sizes */
100 static void
split_32bit_copy(struct copy_ctx * ctx,struct agx_copy * entry)101 split_32bit_copy(struct copy_ctx *ctx, struct agx_copy *entry)
102 {
103 assert(!entry->done);
104 assert(is_real(entry));
105 assert(agx_size_align_16(entry->size) == 2);
106 struct agx_copy *new_entry = &ctx->entries[ctx->entry_count++];
107
108 new_entry->dest = entry->dest + 1;
109 new_entry->src = entry->src + 1;
110 new_entry->done = false;
111 entry->size = AGX_SIZE_16;
112 new_entry->size = AGX_SIZE_16;
113 ctx->physreg_dest[entry->dest + 1] = new_entry;
114 }
115
116 void
agx_emit_parallel_copies(agx_builder * b,struct agx_copy * copies,unsigned num_copies)117 agx_emit_parallel_copies(agx_builder *b,
118 struct agx_copy *copies,
119 unsigned num_copies)
120 {
121 struct copy_ctx _ctx = {
122 .entry_count = num_copies
123 };
124
125 struct copy_ctx *ctx = &_ctx;
126
127 /* Set up the bookkeeping */
128 memset(ctx->physreg_dest, 0, sizeof(ctx->physreg_dest));
129 memset(ctx->physreg_use_count, 0, sizeof(ctx->physreg_use_count));
130
131 for (unsigned i = 0; i < ctx->entry_count; i++) {
132 struct agx_copy *entry = &copies[i];
133
134 ctx->entries[i] = *entry;
135
136 for (unsigned j = 0; j < agx_size_align_16(entry->size); j++) {
137 if (is_real(entry))
138 ctx->physreg_use_count[entry->src + j]++;
139
140 /* Copies should not have overlapping destinations. */
141 assert(!ctx->physreg_dest[entry->dest + j]);
142 ctx->physreg_dest[entry->dest + j] = entry;
143 }
144 }
145
146 bool progress = true;
147 while (progress) {
148 progress = false;
149
150 /* Step 1: resolve paths in the transfer graph. This means finding
151 * copies whose destination aren't blocked by something else and then
152 * emitting them, continuing this process until every copy is blocked
153 * and there are only cycles left.
154 *
155 * TODO: We should note that src is also available in dest to unblock
156 * cycles that src is involved in.
157 */
158
159 for (unsigned i = 0; i < ctx->entry_count; i++) {
160 struct agx_copy *entry = &ctx->entries[i];
161 if (!entry->done && !entry_blocked(entry, ctx)) {
162 entry->done = true;
163 progress = true;
164 do_copy(b, entry);
165 for (unsigned j = 0; j < agx_size_align_16(entry->size); j++) {
166 if (is_real(entry))
167 ctx->physreg_use_count[entry->src + j]--;
168 ctx->physreg_dest[entry->dest + j] = NULL;
169 }
170 }
171 }
172
173 if (progress)
174 continue;
175
176 /* Step 2: Find partially blocked copies and split them. In the
177 * mergedregs case, we can 32-bit copies which are only blocked on one
178 * 16-bit half, and splitting them helps get things moving.
179 *
180 * We can skip splitting copies if the source isn't a register,
181 * however, because it does not unblock anything and therefore doesn't
182 * contribute to making forward progress with step 1. These copies
183 * should still be resolved eventually in step 1 because they can't be
184 * part of a cycle.
185 */
186 for (unsigned i = 0; i < ctx->entry_count; i++) {
187 struct agx_copy *entry = &ctx->entries[i];
188 if (entry->done || (agx_size_align_16(entry->size) != 2))
189 continue;
190
191 if (((ctx->physreg_use_count[entry->dest] == 0 ||
192 ctx->physreg_use_count[entry->dest + 1] == 0)) &&
193 is_real(entry)) {
194 split_32bit_copy(ctx, entry);
195 progress = true;
196 }
197 }
198 }
199
200 /* Step 3: resolve cycles through swapping.
201 *
202 * At this point, the transfer graph should consist of only cycles.
203 * The reason is that, given any physreg n_1 that's the source of a
204 * remaining entry, it has a destination n_2, which (because every
205 * copy is blocked) is the source of some other copy whose destination
206 * is n_3, and so we can follow the chain until we get a cycle. If we
207 * reached some other node than n_1:
208 *
209 * n_1 -> n_2 -> ... -> n_i
210 * ^ |
211 * |-------------|
212 *
213 * then n_2 would be the destination of 2 copies, which is illegal
214 * (checked above in an assert). So n_1 must be part of a cycle:
215 *
216 * n_1 -> n_2 -> ... -> n_i
217 * ^ |
218 * |---------------------|
219 *
220 * and this must be only cycle n_1 is involved in, because any other
221 * path starting from n_1 would also have to end in n_1, resulting in
222 * a node somewhere along the way being the destination of 2 copies
223 * when the 2 paths merge.
224 *
225 * The way we resolve the cycle is through picking a copy (n_1, n_2)
226 * and swapping n_1 and n_2. This moves n_1 to n_2, so n_2 is taken
227 * out of the cycle:
228 *
229 * n_1 -> ... -> n_i
230 * ^ |
231 * |--------------|
232 *
233 * and we can keep repeating this until the cycle is empty.
234 */
235
236 for (unsigned i = 0; i < ctx->entry_count; i++) {
237 struct agx_copy *entry = &ctx->entries[i];
238 if (entry->done)
239 continue;
240
241 assert(is_real(entry));
242
243 /* catch trivial copies */
244 if (entry->dest == entry->src) {
245 entry->done = true;
246 continue;
247 }
248
249 do_swap(b, entry);
250
251 /* Split any blocking copies whose sources are only partially
252 * contained within our destination.
253 */
254 if (agx_size_align_16(entry->size) == 1) {
255 for (unsigned j = 0; j < ctx->entry_count; j++) {
256 struct agx_copy *blocking = &ctx->entries[j];
257
258 if (blocking->done)
259 continue;
260
261 if (blocking->src <= entry->dest &&
262 blocking->src + 1 >= entry->dest &&
263 agx_size_align_16(blocking->size) == 2) {
264 split_32bit_copy(ctx, blocking);
265 }
266 }
267 }
268
269 /* Update sources of blocking copies.
270 *
271 * Note: at this point, every blocking copy's source should be
272 * contained within our destination.
273 */
274 for (unsigned j = 0; j < ctx->entry_count; j++) {
275 struct agx_copy *blocking = &ctx->entries[j];
276 if (blocking->src >= entry->dest &&
277 blocking->src < entry->dest + agx_size_align_16(entry->size)) {
278 blocking->src = entry->src + (blocking->src - entry->dest);
279 }
280 }
281
282 entry->done = true;
283 }
284 }
285