1 /*
2 * Copyright © 2021 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "ir3_ra.h"
7
8 /* The spilling pass leaves out a few details required to successfully operate
9 * ldp/stp:
10 *
11 * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores
12 * that and just spills/restores entire values, including arrays and values
13 * created for texture setup which can be more than 4 components.
14 * 2. The immediate offset only has 13 bits and is signed, so if we spill a lot
15 * or have very large arrays before spilling then we could run out.
16 * 3. The spiller doesn't add barrier dependencies needed for post-RA
17 * scheduling.
18 *
19 * The first one, in particular, is much easier to handle after RA because
20 * arrays and normal values can be treated the same way. Therefore this pass
21 * runs after RA, and handles all three issues. This keeps the complexity out of
22 * the spiller.
23 */
24
25 static unsigned
component_bytes(struct ir3_register * src)26 component_bytes(struct ir3_register *src)
27 {
28 return (src->flags & IR3_REG_HALF) ? 2 : 4;
29 }
30
31 /* Note: this won't work if the base register is anything other than 0!
32 * Dynamic bases, which we'll need for "real" function call support, will
33 * probably be a lot harder to handle and may require reserving another
34 * register.
35 */
36 static void
set_base_reg(struct ir3_instruction * mem,unsigned val)37 set_base_reg(struct ir3_instruction *mem, unsigned val)
38 {
39 struct ir3_instruction *mov =
40 ir3_instr_create_at(ir3_before_instr(mem), OPC_MOV, 1, 1);
41 ir3_dst_create(mov, mem->srcs[0]->num, mem->srcs[0]->flags);
42 ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = val;
43 mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
44 }
45
46 static void
reset_base_reg(struct ir3_instruction * mem)47 reset_base_reg(struct ir3_instruction *mem)
48 {
49 /* If the base register is killed, then we don't need to clobber it and it
50 * may be reused as a destination so we can't always clobber it after the
51 * instruction anyway.
52 */
53 struct ir3_register *base = mem->srcs[0];
54 if (base->flags & IR3_REG_KILL)
55 return;
56
57 struct ir3_instruction *mov =
58 ir3_instr_create_at(ir3_after_instr(mem), OPC_MOV, 1, 1);
59 ir3_dst_create(mov, base->num, base->flags);
60 ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = 0;
61 mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
62 }
63
64 /* There are 13 bits, but 1 << 12 will be sign-extended into a negative offset
65 * so it can't be used directly. Therefore only offsets under 1 << 12 can be
66 * used without any adjustments.
67 */
68 #define MAX_CAT6_SIZE (1u << 12)
69
70 static void
handle_oob_offset_spill(struct ir3_instruction * spill)71 handle_oob_offset_spill(struct ir3_instruction *spill)
72 {
73 unsigned components = spill->srcs[2]->uim_val;
74
75 if (spill->cat6.dst_offset + components * component_bytes(spill->srcs[1]) < MAX_CAT6_SIZE)
76 return;
77
78 set_base_reg(spill, spill->cat6.dst_offset);
79 reset_base_reg(spill);
80 spill->cat6.dst_offset = 0;
81 }
82
83 static void
handle_oob_offset_reload(struct ir3_instruction * reload)84 handle_oob_offset_reload(struct ir3_instruction *reload)
85 {
86 unsigned components = reload->srcs[2]->uim_val;
87 unsigned offset = reload->srcs[1]->uim_val;
88 if (offset + components * component_bytes(reload->dsts[0]) < MAX_CAT6_SIZE)
89 return;
90
91 set_base_reg(reload, offset);
92 reset_base_reg(reload);
93 reload->srcs[1]->uim_val = 0;
94 }
95
96 static void
split_spill(struct ir3_instruction * spill)97 split_spill(struct ir3_instruction *spill)
98 {
99 unsigned orig_components = spill->srcs[2]->uim_val;
100
101 /* We don't handle splitting dependencies. */
102 assert(spill->deps_count == 0);
103
104 if (orig_components <= 4) {
105 if (spill->srcs[1]->flags & IR3_REG_ARRAY) {
106 spill->srcs[1]->wrmask = MASK(orig_components);
107 spill->srcs[1]->num = spill->srcs[1]->array.base;
108 spill->srcs[1]->flags &= ~IR3_REG_ARRAY;
109 }
110 return;
111 }
112
113 for (unsigned comp = 0; comp < orig_components; comp += 4) {
114 unsigned components = MIN2(orig_components - comp, 4);
115 struct ir3_instruction *clone = ir3_instr_clone(spill);
116 ir3_instr_move_before(clone, spill);
117
118 clone->srcs[1]->wrmask = MASK(components);
119 if (clone->srcs[1]->flags & IR3_REG_ARRAY) {
120 clone->srcs[1]->num = clone->srcs[1]->array.base + comp;
121 clone->srcs[1]->flags &= ~IR3_REG_ARRAY;
122 } else {
123 clone->srcs[1]->num += comp;
124 }
125
126 clone->srcs[2]->uim_val = components;
127 clone->cat6.dst_offset += comp * component_bytes(spill->srcs[1]);
128 }
129
130 list_delinit(&spill->node);
131 }
132
133 static void
split_reload(struct ir3_instruction * reload)134 split_reload(struct ir3_instruction *reload)
135 {
136 unsigned orig_components = reload->srcs[2]->uim_val;
137
138 assert(reload->deps_count == 0);
139
140 if (orig_components <= 4) {
141 if (reload->dsts[0]->flags & IR3_REG_ARRAY) {
142 reload->dsts[0]->wrmask = MASK(orig_components);
143 reload->dsts[0]->num = reload->dsts[0]->array.base;
144 reload->dsts[0]->flags &= ~IR3_REG_ARRAY;
145 }
146 return;
147 }
148
149 for (unsigned comp = 0; comp < orig_components; comp += 4) {
150 unsigned components = MIN2(orig_components - comp, 4);
151 struct ir3_instruction *clone = ir3_instr_clone(reload);
152 ir3_instr_move_before(clone, reload);
153
154 clone->dsts[0]->wrmask = MASK(components);
155 if (clone->dsts[0]->flags & IR3_REG_ARRAY) {
156 clone->dsts[0]->num = clone->dsts[0]->array.base + comp;
157 clone->dsts[0]->flags &= ~IR3_REG_ARRAY;
158 } else {
159 clone->dsts[0]->num += comp;
160 }
161
162 clone->srcs[2]->uim_val = components;
163 clone->srcs[1]->uim_val += comp * component_bytes(reload->dsts[0]);
164 }
165
166 list_delinit(&reload->node);
167 }
168
169 static void
add_spill_reload_deps(struct ir3_block * block)170 add_spill_reload_deps(struct ir3_block *block)
171 {
172 struct ir3_instruction *last_spill = NULL;
173
174 foreach_instr (instr, &block->instr_list) {
175 if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
176 last_spill) {
177 ir3_instr_add_dep(instr, last_spill);
178 }
179
180 if (instr->opc == OPC_SPILL_MACRO)
181 last_spill = instr;
182 }
183
184
185 last_spill = NULL;
186
187 foreach_instr_rev (instr, &block->instr_list) {
188 if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
189 last_spill) {
190 ir3_instr_add_dep(last_spill, instr);
191 }
192
193 if (instr->opc == OPC_SPILL_MACRO)
194 last_spill = instr;
195 }
196 }
197
198 bool
ir3_lower_spill(struct ir3 * ir)199 ir3_lower_spill(struct ir3 *ir)
200 {
201 foreach_block (block, &ir->block_list) {
202 foreach_instr_safe (instr, &block->instr_list) {
203 if (instr->opc == OPC_SPILL_MACRO) {
204 handle_oob_offset_spill(instr);
205 split_spill(instr);
206 } else if (instr->opc == OPC_RELOAD_MACRO) {
207 handle_oob_offset_reload(instr);
208 split_reload(instr);
209 }
210 }
211
212 add_spill_reload_deps(block);
213
214 foreach_instr (instr, &block->instr_list) {
215 if (instr->opc == OPC_SPILL_MACRO)
216 instr->opc = OPC_STP;
217 else if (instr->opc == OPC_RELOAD_MACRO)
218 instr->opc = OPC_LDP;
219 }
220 }
221
222 return true;
223 }
224