• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3  * Copyright (C) 2021 Valve Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "agx_compiler.h"
26 #include "agx_builder.h"
27 
28 /*
29  * Emits code for
30  *
31  *    for (int i = 0; i < n; ++i)
32  *       registers[dests[i]] = registers[srcs[i]];
33  *
34  * ...with all copies happening in parallel.
35  *
36  * That is, emit machine instructions equivalent to a parallel copy. This is
37  * used to lower not only parallel copies but also collects and splits, which
38  * also have parallel copy semantics.
39  *
40  * We only handles register-register copies, not general agx_index sources. This
41  * suffices for its internal use for register allocation.
42  */
43 
44 static void
do_copy(agx_builder * b,const struct agx_copy * copy)45 do_copy(agx_builder *b, const struct agx_copy *copy)
46 {
47    agx_mov_to(b, agx_register(copy->dest, copy->size),
48                  agx_register(copy->src, copy->size));
49 }
50 
51 static void
do_swap(agx_builder * b,const struct agx_copy * copy)52 do_swap(agx_builder *b, const struct agx_copy *copy)
53 {
54    if (copy->dest == copy->src)
55       return;
56 
57    agx_index x = agx_register(copy->dest, copy->size);
58    agx_index y = agx_register(copy->src, copy->size);
59 
60    agx_xor_to(b, x, x, y);
61    agx_xor_to(b, y, x, y);
62    agx_xor_to(b, x, x, y);
63 }
64 
65 struct copy_ctx {
66    /* Number of copies being processed */
67    unsigned entry_count;
68 
69    /* For each physreg, the number of pending copy entries that use it as a
70     * source. Once this drops to zero, then the physreg is unblocked and can
71     * be moved to.
72     */
73    unsigned physreg_use_count[AGX_NUM_REGS];
74 
75    /* For each physreg, the pending copy_entry that uses it as a dest. */
76    struct agx_copy *physreg_dest[AGX_NUM_REGS];
77 
78    struct agx_copy entries[AGX_NUM_REGS];
79 };
80 
81 static bool
entry_blocked(struct agx_copy * entry,struct copy_ctx * ctx)82 entry_blocked(struct agx_copy *entry, struct copy_ctx *ctx)
83 {
84    for (unsigned i = 0; i < agx_size_align_16(entry->size); i++) {
85       if (ctx->physreg_use_count[entry->dest + i] != 0)
86          return true;
87    }
88 
89    return false;
90 }
91 
92 static bool
is_real(struct agx_copy * entry)93 is_real(struct agx_copy *entry)
94 {
95    /* TODO: Allow immediates in agx_copy */
96    return true;
97 }
98 
99 /* TODO: Generalize to other bit sizes */
100 static void
split_32bit_copy(struct copy_ctx * ctx,struct agx_copy * entry)101 split_32bit_copy(struct copy_ctx *ctx, struct agx_copy *entry)
102 {
103    assert(!entry->done);
104    assert(is_real(entry));
105    assert(agx_size_align_16(entry->size) == 2);
106    struct agx_copy *new_entry = &ctx->entries[ctx->entry_count++];
107 
108    new_entry->dest = entry->dest + 1;
109    new_entry->src = entry->src + 1;
110    new_entry->done = false;
111    entry->size = AGX_SIZE_16;
112    new_entry->size = AGX_SIZE_16;
113    ctx->physreg_dest[entry->dest + 1] = new_entry;
114 }
115 
116 void
agx_emit_parallel_copies(agx_builder * b,struct agx_copy * copies,unsigned num_copies)117 agx_emit_parallel_copies(agx_builder *b,
118                          struct agx_copy *copies,
119                          unsigned num_copies)
120 {
121    struct copy_ctx _ctx = {
122       .entry_count = num_copies
123    };
124 
125    struct copy_ctx *ctx = &_ctx;
126 
127    /* Set up the bookkeeping */
128    memset(ctx->physreg_dest, 0, sizeof(ctx->physreg_dest));
129    memset(ctx->physreg_use_count, 0, sizeof(ctx->physreg_use_count));
130 
131    for (unsigned i = 0; i < ctx->entry_count; i++) {
132       struct agx_copy *entry = &copies[i];
133 
134       ctx->entries[i] = *entry;
135 
136       for (unsigned j = 0; j < agx_size_align_16(entry->size); j++) {
137          if (is_real(entry))
138             ctx->physreg_use_count[entry->src + j]++;
139 
140          /* Copies should not have overlapping destinations. */
141          assert(!ctx->physreg_dest[entry->dest + j]);
142          ctx->physreg_dest[entry->dest + j] = entry;
143       }
144    }
145 
146    bool progress = true;
147    while (progress) {
148       progress = false;
149 
150       /* Step 1: resolve paths in the transfer graph. This means finding
151        * copies whose destination aren't blocked by something else and then
152        * emitting them, continuing this process until every copy is blocked
153        * and there are only cycles left.
154        *
155        * TODO: We should note that src is also available in dest to unblock
156        * cycles that src is involved in.
157        */
158 
159       for (unsigned i = 0; i < ctx->entry_count; i++) {
160          struct agx_copy *entry = &ctx->entries[i];
161          if (!entry->done && !entry_blocked(entry, ctx)) {
162             entry->done = true;
163             progress = true;
164             do_copy(b, entry);
165             for (unsigned j = 0; j < agx_size_align_16(entry->size); j++) {
166                if (is_real(entry))
167                   ctx->physreg_use_count[entry->src + j]--;
168                ctx->physreg_dest[entry->dest + j] = NULL;
169             }
170          }
171       }
172 
173       if (progress)
174          continue;
175 
176       /* Step 2: Find partially blocked copies and split them. In the
177        * mergedregs case, we can 32-bit copies which are only blocked on one
178        * 16-bit half, and splitting them helps get things moving.
179        *
180        * We can skip splitting copies if the source isn't a register,
181        * however, because it does not unblock anything and therefore doesn't
182        * contribute to making forward progress with step 1. These copies
183        * should still be resolved eventually in step 1 because they can't be
184        * part of a cycle.
185        */
186       for (unsigned i = 0; i < ctx->entry_count; i++) {
187          struct agx_copy *entry = &ctx->entries[i];
188          if (entry->done || (agx_size_align_16(entry->size) != 2))
189             continue;
190 
191          if (((ctx->physreg_use_count[entry->dest] == 0 ||
192                ctx->physreg_use_count[entry->dest + 1] == 0)) &&
193              is_real(entry)) {
194             split_32bit_copy(ctx, entry);
195             progress = true;
196          }
197       }
198    }
199 
200    /* Step 3: resolve cycles through swapping.
201     *
202     * At this point, the transfer graph should consist of only cycles.
203     * The reason is that, given any physreg n_1 that's the source of a
204     * remaining entry, it has a destination n_2, which (because every
205     * copy is blocked) is the source of some other copy whose destination
206     * is n_3, and so we can follow the chain until we get a cycle. If we
207     * reached some other node than n_1:
208     *
209     *  n_1 -> n_2 -> ... -> n_i
210     *          ^             |
211     *          |-------------|
212     *
213     *  then n_2 would be the destination of 2 copies, which is illegal
214     *  (checked above in an assert). So n_1 must be part of a cycle:
215     *
216     *  n_1 -> n_2 -> ... -> n_i
217     *  ^                     |
218     *  |---------------------|
219     *
220     *  and this must be only cycle n_1 is involved in, because any other
221     *  path starting from n_1 would also have to end in n_1, resulting in
222     *  a node somewhere along the way being the destination of 2 copies
223     *  when the 2 paths merge.
224     *
225     *  The way we resolve the cycle is through picking a copy (n_1, n_2)
226     *  and swapping n_1 and n_2. This moves n_1 to n_2, so n_2 is taken
227     *  out of the cycle:
228     *
229     *  n_1 -> ... -> n_i
230     *  ^              |
231     *  |--------------|
232     *
233     *  and we can keep repeating this until the cycle is empty.
234     */
235 
236    for (unsigned i = 0; i < ctx->entry_count; i++) {
237       struct agx_copy *entry = &ctx->entries[i];
238       if (entry->done)
239          continue;
240 
241       assert(is_real(entry));
242 
243       /* catch trivial copies */
244       if (entry->dest == entry->src) {
245          entry->done = true;
246          continue;
247       }
248 
249       do_swap(b, entry);
250 
251       /* Split any blocking copies whose sources are only partially
252        * contained within our destination.
253        */
254       if (agx_size_align_16(entry->size) == 1) {
255          for (unsigned j = 0; j < ctx->entry_count; j++) {
256             struct agx_copy *blocking = &ctx->entries[j];
257 
258             if (blocking->done)
259                continue;
260 
261             if (blocking->src <= entry->dest &&
262                 blocking->src + 1 >= entry->dest &&
263                 agx_size_align_16(blocking->size) == 2) {
264                split_32bit_copy(ctx, blocking);
265             }
266          }
267       }
268 
269       /* Update sources of blocking copies.
270        *
271        * Note: at this point, every blocking copy's source should be
272        * contained within our destination.
273        */
274       for (unsigned j = 0; j < ctx->entry_count; j++) {
275          struct agx_copy *blocking = &ctx->entries[j];
276          if (blocking->src >= entry->dest &&
277              blocking->src < entry->dest + agx_size_align_16(entry->size)) {
278             blocking->src = entry->src + (blocking->src - entry->dest);
279          }
280       }
281 
282       entry->done = true;
283    }
284 }
285