1 /*
2 * Copyright (C) 2021 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir3_ra.h"
25
26 /* The spilling pass leaves out a few details required to successfully operate
27 * ldp/stp:
28 *
29 * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores
30 * that and just spills/restores entire values, including arrays and values
31 * created for texture setup which can be more than 4 components.
32 * 2. The spiller doesn't add barrier dependencies needed for post-RA
33 * scheduling.
34 *
35 * The first one, in particular, is much easier to handle after RA because
36 * arrays and normal values can be treated the same way. Therefore this pass
37 * runs after RA, and handles both issues. This keeps the complexity out of the
38 * spiller.
39 */
40
41 static void
split_spill(struct ir3_instruction * spill)42 split_spill(struct ir3_instruction *spill)
43 {
44 unsigned orig_components = spill->srcs[2]->uim_val;
45
46 /* We don't handle splitting dependencies. */
47 assert(spill->deps_count == 0);
48
49 if (orig_components <= 4) {
50 if (spill->srcs[1]->flags & IR3_REG_ARRAY) {
51 spill->srcs[1]->wrmask = MASK(orig_components);
52 spill->srcs[1]->num = spill->srcs[1]->array.base;
53 spill->srcs[1]->flags &= ~IR3_REG_ARRAY;
54 }
55 return;
56 }
57
58 for (unsigned comp = 0; comp < orig_components; comp += 4) {
59 unsigned components = MIN2(orig_components - comp, 4);
60 struct ir3_instruction *clone = ir3_instr_clone(spill);
61 ir3_instr_move_before(clone, spill);
62
63 clone->srcs[1]->wrmask = MASK(components);
64 if (clone->srcs[1]->flags & IR3_REG_ARRAY) {
65 clone->srcs[1]->num = clone->srcs[1]->array.base + comp;
66 clone->srcs[1]->flags &= ~IR3_REG_ARRAY;
67 }
68
69 clone->srcs[2]->uim_val = components;
70 clone->cat6.dst_offset +=
71 comp * ((spill->srcs[1]->flags & IR3_REG_HALF) ? 2 : 4);
72 }
73
74 list_delinit(&spill->node);
75 }
76
77 static void
split_reload(struct ir3_instruction * reload)78 split_reload(struct ir3_instruction *reload)
79 {
80 unsigned orig_components = reload->srcs[2]->uim_val;
81
82 assert(reload->deps_count == 0);
83
84 if (orig_components <= 4) {
85 if (reload->dsts[0]->flags & IR3_REG_ARRAY) {
86 reload->dsts[0]->wrmask = MASK(orig_components);
87 reload->dsts[0]->num = reload->dsts[0]->array.base;
88 reload->dsts[0]->flags &= ~IR3_REG_ARRAY;
89 }
90 return;
91 }
92
93 for (unsigned comp = 0; comp < orig_components; comp += 4) {
94 unsigned components = MIN2(orig_components - comp, 4);
95 struct ir3_instruction *clone = ir3_instr_clone(reload);
96 ir3_instr_move_before(clone, reload);
97
98 clone->dsts[0]->wrmask = MASK(components);
99 if (clone->dsts[0]->flags & IR3_REG_ARRAY) {
100 clone->dsts[0]->num = clone->dsts[0]->array.base + comp;
101 clone->dsts[0]->flags &= ~IR3_REG_ARRAY;
102 }
103
104 clone->srcs[2]->uim_val = components;
105 clone->srcs[1]->uim_val +=
106 comp * ((reload->dsts[0]->flags & IR3_REG_HALF) ? 2 : 4);
107 }
108
109 list_delinit(&reload->node);
110 }
111
112 static void
add_spill_reload_deps(struct ir3_block * block)113 add_spill_reload_deps(struct ir3_block *block)
114 {
115 struct ir3_instruction *last_spill = NULL;
116
117 foreach_instr (instr, &block->instr_list) {
118 if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
119 last_spill) {
120 ir3_instr_add_dep(instr, last_spill);
121 }
122
123 if (instr->opc == OPC_SPILL_MACRO)
124 last_spill = instr;
125 }
126
127
128 last_spill = NULL;
129
130 foreach_instr_rev (instr, &block->instr_list) {
131 if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
132 last_spill) {
133 ir3_instr_add_dep(last_spill, instr);
134 }
135
136 if (instr->opc == OPC_SPILL_MACRO)
137 last_spill = instr;
138 }
139 }
140
141 bool
ir3_lower_spill(struct ir3 * ir)142 ir3_lower_spill(struct ir3 *ir)
143 {
144 foreach_block (block, &ir->block_list) {
145 foreach_instr_safe (instr, &block->instr_list) {
146 if (instr->opc == OPC_SPILL_MACRO)
147 split_spill(instr);
148 else if (instr->opc == OPC_RELOAD_MACRO)
149 split_reload(instr);
150 }
151
152 add_spill_reload_deps(block);
153
154 foreach_instr (instr, &block->instr_list) {
155 if (instr->opc == OPC_SPILL_MACRO)
156 instr->opc = OPC_STP;
157 else if (instr->opc == OPC_RELOAD_MACRO)
158 instr->opc = OPC_LDP;
159 }
160 }
161
162 return true;
163 }
164