• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "ir3_ra.h"
7 
8 /* The spilling pass leaves out a few details required to successfully operate
9  * ldp/stp:
10  *
11  * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores
12  *    that and just spills/restores entire values, including arrays and values
13  *    created for texture setup which can be more than 4 components.
14  * 2. The immediate offset only has 13 bits and is signed, so if we spill a lot
15  *    or have very large arrays before spilling then we could run out.
16  * 3. The spiller doesn't add barrier dependencies needed for post-RA
17  *    scheduling.
18  *
19  * The first one, in particular, is much easier to handle after RA because
20  * arrays and normal values can be treated the same way. Therefore this pass
21  * runs after RA, and handles all three issues. This keeps the complexity out of
22  * the spiller.
23  */
24 
25 static unsigned
component_bytes(struct ir3_register * src)26 component_bytes(struct ir3_register *src)
27 {
28    return (src->flags & IR3_REG_HALF) ? 2 : 4;
29 }
30 
31 /* Note: this won't work if the base register is anything other than 0!
32  * Dynamic bases, which we'll need for "real" function call support, will
33  * probably be a lot harder to handle and may require reserving another
34  * register.
35  */
36 static void
set_base_reg(struct ir3_instruction * mem,unsigned val)37 set_base_reg(struct ir3_instruction *mem, unsigned val)
38 {
39    struct ir3_instruction *mov =
40       ir3_instr_create_at(ir3_before_instr(mem), OPC_MOV, 1, 1);
41    ir3_dst_create(mov, mem->srcs[0]->num, mem->srcs[0]->flags);
42    ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = val;
43    mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
44 }
45 
46 static void
reset_base_reg(struct ir3_instruction * mem)47 reset_base_reg(struct ir3_instruction *mem)
48 {
49    /* If the base register is killed, then we don't need to clobber it and it
50     * may be reused as a destination so we can't always clobber it after the
51     * instruction anyway.
52     */
53    struct ir3_register *base = mem->srcs[0];
54    if (base->flags & IR3_REG_KILL)
55       return;
56 
57    struct ir3_instruction *mov =
58       ir3_instr_create_at(ir3_after_instr(mem), OPC_MOV, 1, 1);
59    ir3_dst_create(mov, base->num, base->flags);
60    ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = 0;
61    mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
62 }
63 
64 /* There are 13 bits, but 1 << 12 will be sign-extended into a negative offset
65  * so it can't be used directly. Therefore only offsets under 1 << 12 can be
66  * used without any adjustments.
67  */
68 #define MAX_CAT6_SIZE (1u << 12)
69 
70 static void
handle_oob_offset_spill(struct ir3_instruction * spill)71 handle_oob_offset_spill(struct ir3_instruction *spill)
72 {
73    unsigned components = spill->srcs[2]->uim_val;
74 
75    if (spill->cat6.dst_offset + components * component_bytes(spill->srcs[1]) < MAX_CAT6_SIZE)
76       return;
77 
78    set_base_reg(spill, spill->cat6.dst_offset);
79    reset_base_reg(spill);
80    spill->cat6.dst_offset = 0;
81 }
82 
83 static void
handle_oob_offset_reload(struct ir3_instruction * reload)84 handle_oob_offset_reload(struct ir3_instruction *reload)
85 {
86    unsigned components = reload->srcs[2]->uim_val;
87    unsigned offset = reload->srcs[1]->uim_val;
88    if (offset + components * component_bytes(reload->dsts[0]) < MAX_CAT6_SIZE)
89       return;
90 
91    set_base_reg(reload, offset);
92    reset_base_reg(reload);
93    reload->srcs[1]->uim_val = 0;
94 }
95 
96 static void
split_spill(struct ir3_instruction * spill)97 split_spill(struct ir3_instruction *spill)
98 {
99    unsigned orig_components = spill->srcs[2]->uim_val;
100 
101    /* We don't handle splitting dependencies. */
102    assert(spill->deps_count == 0);
103 
104    if (orig_components <= 4) {
105       if (spill->srcs[1]->flags & IR3_REG_ARRAY) {
106          spill->srcs[1]->wrmask = MASK(orig_components);
107          spill->srcs[1]->num = spill->srcs[1]->array.base;
108          spill->srcs[1]->flags &= ~IR3_REG_ARRAY;
109       }
110       return;
111    }
112 
113    for (unsigned comp = 0; comp < orig_components; comp += 4) {
114       unsigned components = MIN2(orig_components - comp, 4);
115       struct ir3_instruction *clone = ir3_instr_clone(spill);
116       ir3_instr_move_before(clone, spill);
117 
118       clone->srcs[1]->wrmask = MASK(components);
119       if (clone->srcs[1]->flags & IR3_REG_ARRAY) {
120          clone->srcs[1]->num = clone->srcs[1]->array.base + comp;
121          clone->srcs[1]->flags &= ~IR3_REG_ARRAY;
122       } else {
123          clone->srcs[1]->num += comp;
124       }
125 
126       clone->srcs[2]->uim_val = components;
127       clone->cat6.dst_offset += comp * component_bytes(spill->srcs[1]);
128    }
129 
130    list_delinit(&spill->node);
131 }
132 
133 static void
split_reload(struct ir3_instruction * reload)134 split_reload(struct ir3_instruction *reload)
135 {
136    unsigned orig_components = reload->srcs[2]->uim_val;
137 
138    assert(reload->deps_count == 0);
139 
140    if (orig_components <= 4) {
141       if (reload->dsts[0]->flags & IR3_REG_ARRAY) {
142          reload->dsts[0]->wrmask = MASK(orig_components);
143          reload->dsts[0]->num = reload->dsts[0]->array.base;
144          reload->dsts[0]->flags &= ~IR3_REG_ARRAY;
145       }
146       return;
147    }
148 
149    for (unsigned comp = 0; comp < orig_components; comp += 4) {
150       unsigned components = MIN2(orig_components - comp, 4);
151       struct ir3_instruction *clone = ir3_instr_clone(reload);
152       ir3_instr_move_before(clone, reload);
153 
154       clone->dsts[0]->wrmask = MASK(components);
155       if (clone->dsts[0]->flags & IR3_REG_ARRAY) {
156          clone->dsts[0]->num = clone->dsts[0]->array.base + comp;
157          clone->dsts[0]->flags &= ~IR3_REG_ARRAY;
158       } else {
159          clone->dsts[0]->num += comp;
160       }
161 
162       clone->srcs[2]->uim_val = components;
163       clone->srcs[1]->uim_val += comp * component_bytes(reload->dsts[0]);
164    }
165 
166    list_delinit(&reload->node);
167 }
168 
169 static void
add_spill_reload_deps(struct ir3_block * block)170 add_spill_reload_deps(struct ir3_block *block)
171 {
172    struct ir3_instruction *last_spill = NULL;
173 
174    foreach_instr (instr, &block->instr_list) {
175       if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
176           last_spill) {
177          ir3_instr_add_dep(instr, last_spill);
178       }
179 
180       if (instr->opc == OPC_SPILL_MACRO)
181          last_spill = instr;
182    }
183 
184 
185    last_spill = NULL;
186 
187    foreach_instr_rev (instr, &block->instr_list) {
188       if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
189           last_spill) {
190          ir3_instr_add_dep(last_spill, instr);
191       }
192 
193       if (instr->opc == OPC_SPILL_MACRO)
194          last_spill = instr;
195    }
196 }
197 
198 bool
ir3_lower_spill(struct ir3 * ir)199 ir3_lower_spill(struct ir3 *ir)
200 {
201    foreach_block (block, &ir->block_list) {
202       foreach_instr_safe (instr, &block->instr_list) {
203          if (instr->opc == OPC_SPILL_MACRO) {
204             handle_oob_offset_spill(instr);
205             split_spill(instr);
206          } else if (instr->opc == OPC_RELOAD_MACRO) {
207             handle_oob_offset_reload(instr);
208             split_reload(instr);
209          }
210       }
211 
212       add_spill_reload_deps(block);
213 
214       foreach_instr (instr, &block->instr_list) {
215          if (instr->opc == OPC_SPILL_MACRO)
216             instr->opc = OPC_STP;
217          else if (instr->opc == OPC_RELOAD_MACRO)
218             instr->opc = OPC_LDP;
219       }
220    }
221 
222    return true;
223 }
224