• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef _IR3_RA_H
25 #define _IR3_RA_H
26 
27 #include "util/rb_tree.h"
28 #include "ir3.h"
29 #include "ir3_compiler.h"
30 
31 #ifdef DEBUG
32 #define RA_DEBUG (ir3_shader_debug & IR3_DBG_RAMSGS)
33 #else
34 #define RA_DEBUG 0
35 #endif
36 #define d(fmt, ...)                                                            \
37    do {                                                                        \
38       if (RA_DEBUG) {                                                          \
39          mesa_logi("RA: " fmt, ##__VA_ARGS__);                                 \
40       }                                                                        \
41    } while (0)
42 
43 #define di(instr, fmt, ...)                                                    \
44    do {                                                                        \
45       if (RA_DEBUG) {                                                          \
46          struct log_stream *stream = mesa_log_streami();                       \
47          mesa_log_stream_printf(stream, "RA: " fmt ": ", ##__VA_ARGS__);       \
48          ir3_print_instr_stream(stream, instr);                                \
49          mesa_log_stream_destroy(stream);                                      \
50       }                                                                        \
51    } while (0)
52 
53 typedef uint16_t physreg_t;
54 
55 static inline unsigned
ra_physreg_to_num(physreg_t physreg,unsigned flags)56 ra_physreg_to_num(physreg_t physreg, unsigned flags)
57 {
58    if (!(flags & IR3_REG_HALF))
59       physreg /= 2;
60    if (flags & IR3_REG_SHARED)
61       physreg += 48 * 4;
62    return physreg;
63 }
64 
65 static inline physreg_t
ra_num_to_physreg(unsigned num,unsigned flags)66 ra_num_to_physreg(unsigned num, unsigned flags)
67 {
68    if (flags & IR3_REG_SHARED)
69       num -= 48 * 4;
70    if (!(flags & IR3_REG_HALF))
71       num *= 2;
72    return num;
73 }
74 
75 static inline unsigned
ra_reg_get_num(const struct ir3_register * reg)76 ra_reg_get_num(const struct ir3_register *reg)
77 {
78    return (reg->flags & IR3_REG_ARRAY) ? reg->array.base : reg->num;
79 }
80 
81 static inline physreg_t
ra_reg_get_physreg(const struct ir3_register * reg)82 ra_reg_get_physreg(const struct ir3_register *reg)
83 {
84    return ra_num_to_physreg(ra_reg_get_num(reg), reg->flags);
85 }
86 
87 static inline bool
def_is_gpr(const struct ir3_register * reg)88 def_is_gpr(const struct ir3_register *reg)
89 {
90    return reg_num(reg) != REG_A0 && reg_num(reg) != REG_P0;
91 }
92 
93 /* Note: don't count undef as a source.
94  */
95 static inline bool
ra_reg_is_src(const struct ir3_register * reg)96 ra_reg_is_src(const struct ir3_register *reg)
97 {
98    return (reg->flags & IR3_REG_SSA) && reg->def && def_is_gpr(reg->def);
99 }
100 
101 static inline bool
ra_reg_is_dst(const struct ir3_register * reg)102 ra_reg_is_dst(const struct ir3_register *reg)
103 {
104    return (reg->flags & IR3_REG_SSA) && def_is_gpr(reg) &&
105           ((reg->flags & IR3_REG_ARRAY) || reg->wrmask);
106 }
107 
108 /* Iterators for sources and destinations which:
109  * - Don't include fake sources (irrelevant for RA)
110  * - Don't include non-SSA sources (immediates and constants, also irrelevant)
111  */
112 
113 #define ra_foreach_src_n(__srcreg, __n, __instr)                               \
114    foreach_src_n(__srcreg, __n, __instr)                                       \
115       if (ra_reg_is_src(__srcreg))
116 
117 #define ra_foreach_src(__srcreg, __instr)                                      \
118    ra_foreach_src_n(__srcreg, __i, __instr)
119 
120 #define ra_foreach_src_rev(__srcreg, __instr)                                  \
121    for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \
122       for (int __cnt = (__instr)->srcs_count, __i = __cnt - 1; __i >= 0;       \
123            __i--)                                                              \
124          if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i])))
125 
126 #define ra_foreach_dst_n(__dstreg, __n, __instr)                               \
127    foreach_dst_n(__dstreg, __n, __instr)                                         \
128       if (ra_reg_is_dst(__dstreg))
129 
130 #define ra_foreach_dst(__dstreg, __instr)                                      \
131    ra_foreach_dst_n(__dstreg, __i, __instr)
132 
133 #define RA_HALF_SIZE     (4 * 48)
134 #define RA_FULL_SIZE     (4 * 48 * 2)
135 #define RA_SHARED_SIZE   (2 * 4 * 8)
136 #define RA_MAX_FILE_SIZE RA_FULL_SIZE
137 
138 struct ir3_liveness {
139    unsigned block_count;
140    unsigned interval_offset;
141    DECLARE_ARRAY(struct ir3_register *, definitions);
142    DECLARE_ARRAY(BITSET_WORD *, live_out);
143    DECLARE_ARRAY(BITSET_WORD *, live_in);
144 };
145 
146 struct ir3_liveness *ir3_calc_liveness(void *mem_ctx, struct ir3 *ir);
147 
148 bool ir3_def_live_after(struct ir3_liveness *live, struct ir3_register *def,
149                         struct ir3_instruction *instr);
150 
151 void ir3_create_parallel_copies(struct ir3 *ir);
152 
153 void ir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir);
154 
155 void ir3_force_merge(struct ir3_register *a, struct ir3_register *b,
156                      int b_offset);
157 
158 struct ir3_pressure {
159    unsigned full, half, shared;
160 };
161 
162 void ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
163                        struct ir3_pressure *max_pressure);
164 
165 bool ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
166                struct ir3_liveness **live,
167                const struct ir3_pressure *limit_pressure);
168 
169 bool ir3_lower_spill(struct ir3 *ir);
170 
171 void ir3_ra_shared(struct ir3_shader_variant *v, struct ir3_liveness *live);
172 
173 void ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
174                      unsigned half_size, unsigned block_count, bool shared_ra);
175 
176 void ir3_lower_copies(struct ir3_shader_variant *v);
177 
178 /* Register interval datastructure
179  *
180  * ir3_reg_ctx is used to track which registers are live. The tricky part is
181  * that some registers may overlap each other, when registers with overlapping
182  * live ranges get coalesced. For example, splits will overlap with their
183  * parent vector and sometimes collect sources will also overlap with the
184  * collect'ed vector. ir3_merge_regs guarantees for us that none of the
185  * registers in a merge set that are live at any given point partially
186  * overlap, which means that we can organize them into a forest. While each
187  * register has a per-merge-set offset, ir3_merge_regs also computes a
188  * "global" offset which allows us to throw away the original merge sets and
189  * think of registers as just intervals in a forest of live intervals. When a
190  * register becomes live, we insert it into the forest, and when it dies we
191  * remove it from the forest (and then its children get moved up a level). We
192  * use red-black trees to keep track of each level of the forest, so insertion
193  * and deletion should be fast operations. ir3_reg_ctx handles all the
194  * internal bookkeeping for this, so that it can be shared between RA,
195  * spilling, and register pressure tracking.
196  */
197 
198 struct ir3_reg_interval {
199    struct rb_node node;
200 
201    struct rb_tree children;
202 
203    struct ir3_reg_interval *parent;
204 
205    struct ir3_register *reg;
206 
207    bool inserted;
208 };
209 
210 struct ir3_reg_ctx {
211    /* The tree of top-level intervals in the forest. */
212    struct rb_tree intervals;
213 
214    /* Users of ir3_reg_ctx need to keep around additional state that is
215     * modified when top-level intervals are added or removed. For register
216     * pressure tracking, this is just the register pressure, but for RA we
217     * need to keep track of the physreg of each top-level interval. These
218     * callbacks provide a place to let users deriving from ir3_reg_ctx update
219     * their state when top-level intervals are inserted/removed.
220     */
221 
222    /* Called when an interval is added and it turns out to be at the top
223     * level.
224     */
225    void (*interval_add)(struct ir3_reg_ctx *ctx,
226                         struct ir3_reg_interval *interval);
227 
228    /* Called when an interval is deleted from the top level. */
229    void (*interval_delete)(struct ir3_reg_ctx *ctx,
230                            struct ir3_reg_interval *interval);
231 
232    /* Called when an interval is deleted and its child becomes top-level.
233     */
234    void (*interval_readd)(struct ir3_reg_ctx *ctx,
235                           struct ir3_reg_interval *parent,
236                           struct ir3_reg_interval *child);
237 };
238 
239 static inline struct ir3_reg_interval *
ir3_rb_node_to_interval(struct rb_node * node)240 ir3_rb_node_to_interval(struct rb_node *node)
241 {
242    return rb_node_data(struct ir3_reg_interval, node, node);
243 }
244 
245 static inline const struct ir3_reg_interval *
ir3_rb_node_to_interval_const(const struct rb_node * node)246 ir3_rb_node_to_interval_const(const struct rb_node *node)
247 {
248    return rb_node_data(struct ir3_reg_interval, node, node);
249 }
250 
251 static inline struct ir3_reg_interval *
ir3_reg_interval_next(struct ir3_reg_interval * interval)252 ir3_reg_interval_next(struct ir3_reg_interval *interval)
253 {
254    struct rb_node *next = rb_node_next(&interval->node);
255    return next ? ir3_rb_node_to_interval(next) : NULL;
256 }
257 
258 static inline struct ir3_reg_interval *
ir3_reg_interval_next_or_null(struct ir3_reg_interval * interval)259 ir3_reg_interval_next_or_null(struct ir3_reg_interval *interval)
260 {
261    return interval ? ir3_reg_interval_next(interval) : NULL;
262 }
263 
264 static inline void
ir3_reg_interval_init(struct ir3_reg_interval * interval,struct ir3_register * reg)265 ir3_reg_interval_init(struct ir3_reg_interval *interval,
266                       struct ir3_register *reg)
267 {
268    rb_tree_init(&interval->children);
269    interval->reg = reg;
270    interval->parent = NULL;
271    interval->inserted = false;
272 }
273 
274 void ir3_reg_interval_dump(struct log_stream *stream,
275                            struct ir3_reg_interval *interval);
276 
277 void ir3_reg_interval_insert(struct ir3_reg_ctx *ctx,
278                              struct ir3_reg_interval *interval);
279 
280 void ir3_reg_interval_remove(struct ir3_reg_ctx *ctx,
281                              struct ir3_reg_interval *interval);
282 
283 void ir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx,
284                                  struct ir3_reg_interval *interval);
285 
286 #endif
287