1 /*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "compiler.h"
28
29 /* Midgard's generic load/store instructions, particularly to implement SSBOs
30 * and globals, have support for address arithmetic natively. In particularly,
31 * they take two indirect arguments A, B and two immediates #s, #c, calculating
32 * the address:
33 *
34 * A + (zext?(B) << #s) + #c
35 *
36 * This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe.
37 */
38
39 struct mir_address {
40 nir_ssa_scalar A;
41 nir_ssa_scalar B;
42
43 bool zext;
44 unsigned shift;
45 unsigned bias;
46 };
47
48 static bool
mir_args_ssa(nir_ssa_scalar s,unsigned count)49 mir_args_ssa(nir_ssa_scalar s, unsigned count)
50 {
51 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
52 assert(count <= nir_op_infos[alu->op].num_inputs);
53
54 for (unsigned i = 0; i < count; ++i) {
55 if (!alu->src[i].src.is_ssa)
56 return false;
57 }
58
59 return true;
60 }
61
62 /* Matches a constant in either slot and moves it to the bias */
63
64 static void
mir_match_constant(struct mir_address * address)65 mir_match_constant(struct mir_address *address)
66 {
67 if (address->A.def && nir_ssa_scalar_is_const(address->A)) {
68 address->bias += nir_ssa_scalar_as_uint(address->A);
69 address->A.def = NULL;
70 }
71
72 if (address->B.def && nir_ssa_scalar_is_const(address->B)) {
73 address->bias += nir_ssa_scalar_as_uint(address->B);
74 address->B.def = NULL;
75 }
76 }
77
78 /* Matches an iadd when there is a free slot or constant */
79
80 static void
mir_match_iadd(struct mir_address * address,bool first_free)81 mir_match_iadd(struct mir_address *address, bool first_free)
82 {
83 if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
84 return;
85
86 if (!mir_args_ssa(address->B, 2))
87 return;
88
89 nir_op op = nir_ssa_scalar_alu_op(address->B);
90
91 if (op != nir_op_iadd) return;
92
93 nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);
94 nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);
95
96 if (nir_ssa_scalar_is_const(op1)) {
97 address->bias += nir_ssa_scalar_as_uint(op1);
98 address->B = op2;
99 } else if (nir_ssa_scalar_is_const(op2)) {
100 address->bias += nir_ssa_scalar_as_uint(op2);
101 address->B = op1;
102 } else if (first_free && !address->A.def) {
103 address->A = op1;
104 address->B = op2;
105 }
106 }
107
108 /* Matches u2u64 and sets zext */
109
110 static void
mir_match_u2u64(struct mir_address * address)111 mir_match_u2u64(struct mir_address *address)
112 {
113 if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
114 return;
115
116 if (!mir_args_ssa(address->B, 1))
117 return;
118
119 nir_op op = nir_ssa_scalar_alu_op(address->B);
120 if (op != nir_op_u2u64) return;
121 nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
122
123 address->B = arg;
124 address->zext = true;
125 }
126
127 /* Matches ishl to shift */
128
129 static void
mir_match_ishl(struct mir_address * address)130 mir_match_ishl(struct mir_address *address)
131 {
132 if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
133 return;
134
135 if (!mir_args_ssa(address->B, 2))
136 return;
137
138 nir_op op = nir_ssa_scalar_alu_op(address->B);
139 if (op != nir_op_ishl) return;
140 nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);
141 nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);
142
143 if (!nir_ssa_scalar_is_const(op2)) return;
144
145 unsigned shift = nir_ssa_scalar_as_uint(op2);
146 if (shift > 0x7) return;
147
148 address->B = op1;
149 address->shift = shift;
150 }
151
152 /* Strings through mov which can happen from NIR vectorization */
153
154 static void
mir_match_mov(struct mir_address * address)155 mir_match_mov(struct mir_address *address)
156 {
157 if (address->A.def && nir_ssa_scalar_is_alu(address->A)) {
158 nir_op op = nir_ssa_scalar_alu_op(address->A);
159
160 if (op == nir_op_mov && mir_args_ssa(address->A, 1))
161 address->A = nir_ssa_scalar_chase_alu_src(address->A, 0);
162 }
163
164 if (address->B.def && nir_ssa_scalar_is_alu(address->B)) {
165 nir_op op = nir_ssa_scalar_alu_op(address->B);
166
167 if (op == nir_op_mov && mir_args_ssa(address->B, 1))
168 address->B = nir_ssa_scalar_chase_alu_src(address->B, 0);
169 }
170 }
171
172 /* Tries to pattern match into mir_address */
173
174 static struct mir_address
mir_match_offset(nir_ssa_def * offset,bool first_free)175 mir_match_offset(nir_ssa_def *offset, bool first_free)
176 {
177 struct mir_address address = {
178 .B = { .def = offset }
179 };
180
181 mir_match_mov(&address);
182 mir_match_constant(&address);
183 mir_match_mov(&address);
184 mir_match_iadd(&address, first_free);
185 mir_match_mov(&address);
186 mir_match_u2u64(&address);
187 mir_match_mov(&address);
188 mir_match_ishl(&address);
189
190 return address;
191 }
192
193 void
mir_set_offset(compiler_context * ctx,midgard_instruction * ins,nir_src * offset,bool is_shared)194 mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, bool is_shared)
195 {
196 for(unsigned i = 0; i < 16; ++i) {
197 ins->swizzle[1][i] = 0;
198 ins->swizzle[2][i] = 0;
199 }
200
201 bool force_zext = (nir_src_bit_size(*offset) < 64);
202
203 if (!offset->is_ssa) {
204 ins->load_store.arg_1 |= is_shared ? 0x6E : 0x7E;
205 ins->src[2] = nir_src_index(ctx, offset);
206 ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset);
207
208 if (force_zext)
209 ins->load_store.arg_1 |= 0x80;
210
211 return;
212 }
213
214 struct mir_address match = mir_match_offset(offset->ssa, !is_shared);
215
216 if (match.A.def) {
217 ins->src[1] = nir_ssa_index(match.A.def);
218 ins->swizzle[1][0] = match.A.comp;
219 ins->src_types[1] = nir_type_uint | match.A.def->bit_size;
220 } else
221 ins->load_store.arg_1 |= is_shared ? 0x6E : 0x7E;
222
223 if (match.B.def) {
224 ins->src[2] = nir_ssa_index(match.B.def);
225 ins->swizzle[2][0] = match.B.comp;
226 ins->src_types[2] = nir_type_uint | match.B.def->bit_size;
227 } else
228 ins->load_store.arg_2 = 0x1E;
229
230 if (match.zext || force_zext)
231 ins->load_store.arg_1 |= 0x80;
232
233 assert(match.shift <= 7);
234 ins->load_store.arg_2 |= (match.shift) << 5;
235
236 ins->constants.u32[0] = match.bias;
237 }
238