• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include "compiler.h"
28 
29 bool
bi_has_arg(const bi_instr * ins,bi_index arg)30 bi_has_arg(const bi_instr *ins, bi_index arg)
31 {
32         if (!ins)
33                 return false;
34 
35         bi_foreach_src(ins, s) {
36                 if (bi_is_equiv(ins->src[s], arg))
37                         return true;
38         }
39 
40         return false;
41 }
42 
43 /* Precondition: valid 16-bit or 32-bit register format. Returns whether it is
44  * 32-bit. Note auto reads to 32-bit registers even if the memory format is
45  * 16-bit, so is considered as such here */
46 
47 bool
bi_is_regfmt_16(enum bi_register_format fmt)48 bi_is_regfmt_16(enum bi_register_format fmt)
49 {
50         switch  (fmt) {
51         case BI_REGISTER_FORMAT_F16:
52         case BI_REGISTER_FORMAT_S16:
53         case BI_REGISTER_FORMAT_U16:
54                 return true;
55         case BI_REGISTER_FORMAT_F32:
56         case BI_REGISTER_FORMAT_S32:
57         case BI_REGISTER_FORMAT_U32:
58         case BI_REGISTER_FORMAT_AUTO:
59                 return false;
60         default:
61                 unreachable("Invalid register format");
62         }
63 }
64 
65 static unsigned
bi_count_staging_registers(const bi_instr * ins)66 bi_count_staging_registers(const bi_instr *ins)
67 {
68         enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
69         unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */
70 
71         switch (count) {
72         case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
73                 return count;
74         case BI_SR_COUNT_FORMAT:
75                 return bi_is_regfmt_16(ins->register_format) ?
76                         DIV_ROUND_UP(vecsize, 2) : vecsize;
77         case BI_SR_COUNT_VECSIZE:
78                 return vecsize;
79         case BI_SR_COUNT_SR_COUNT:
80                 return ins->sr_count;
81         }
82 
83         unreachable("Invalid sr_count");
84 }
85 
86 unsigned
bi_count_read_registers(const bi_instr * ins,unsigned s)87 bi_count_read_registers(const bi_instr *ins, unsigned s)
88 {
89         /* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
90         if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
91                 return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
92         else if (s == 0 && bi_opcode_props[ins->op].sr_read)
93                 return bi_count_staging_registers(ins);
94         else if (s == 4 && ins->op == BI_OPCODE_BLEND)
95                 return ins->sr_count_2; /* Dual source blending */
96         else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
97                 return ins->nr_dests;
98         else
99                 return 1;
100 }
101 
102 unsigned
bi_count_write_registers(const bi_instr * ins,unsigned d)103 bi_count_write_registers(const bi_instr *ins, unsigned d)
104 {
105         if (d == 0 && bi_opcode_props[ins->op].sr_write) {
106                 switch (ins->op) {
107                 case BI_OPCODE_TEXC:
108                         if (ins->sr_count_2)
109                                 return ins->sr_count;
110                         else
111                                 return bi_is_regfmt_16(ins->register_format) ? 2 : 4;
112 
113                 case BI_OPCODE_TEX_SINGLE:
114                 case BI_OPCODE_TEX_FETCH:
115                 case BI_OPCODE_TEX_GATHER: {
116                         unsigned chans = util_bitcount(ins->write_mask);
117 
118                         return bi_is_regfmt_16(ins->register_format) ?
119                                 DIV_ROUND_UP(chans, 2) : chans;
120                 }
121 
122                 case BI_OPCODE_ACMPXCHG_I32:
123                         /* Reads 2 but writes 1 */
124                         return 1;
125 
126                 case BI_OPCODE_ATOM1_RETURN_I32:
127                         /* Allow omitting the destination for plain ATOM1 */
128                         return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
129                 default:
130                         return bi_count_staging_registers(ins);
131                 }
132         } else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
133                 return 2;
134         } else if (ins->op == BI_OPCODE_TEXC && d == 1) {
135                 return ins->sr_count_2;
136         } else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
137                 return ins->nr_srcs;
138         }
139 
140         return 1;
141 }
142 
143 unsigned
bi_writemask(const bi_instr * ins,unsigned d)144 bi_writemask(const bi_instr *ins, unsigned d)
145 {
146         unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
147         unsigned shift = ins->dest[d].offset;
148         return (mask << shift);
149 }
150 
151 bi_clause *
bi_next_clause(bi_context * ctx,bi_block * block,bi_clause * clause)152 bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
153 {
154         if (!block && !clause)
155                 return NULL;
156 
157         /* Try the first clause in this block if we're starting from scratch */
158         if (!clause && !list_is_empty(&block->clauses))
159                 return list_first_entry(&block->clauses, bi_clause, link);
160 
161         /* Try the next clause in this block */
162         if (clause && clause->link.next != &block->clauses)
163                 return list_first_entry(&(clause->link), bi_clause, link);
164 
165         /* Try the next block, or the one after that if it's empty, etc .*/
166         bi_block *next_block = bi_next_block(block);
167 
168         bi_foreach_block_from(ctx, next_block, block) {
169                 if (!list_is_empty(&block->clauses))
170                         return list_first_entry(&block->clauses, bi_clause, link);
171         }
172 
173         return NULL;
174 }
175 
176 /* Does an instruction have a side effect not captured by its register
177  * destination? Applies to certain message-passing instructions, +DISCARD, and
178  * branching only, used in dead code elimation. Branches are characterized by
179  * `last` which applies to them and some atomics, +BARRIER, +BLEND which
180  * implies no loss of generality */
181 
182 bool
bi_side_effects(const bi_instr * I)183 bi_side_effects(const bi_instr *I)
184 {
185         if (bi_opcode_props[I->op].last)
186                 return true;
187 
188         switch (I->op) {
189         case BI_OPCODE_DISCARD_F32:
190         case BI_OPCODE_DISCARD_B32:
191                 return true;
192         default:
193                 break;
194         }
195 
196         switch (bi_opcode_props[I->op].message) {
197         case BIFROST_MESSAGE_NONE:
198         case BIFROST_MESSAGE_VARYING:
199         case BIFROST_MESSAGE_ATTRIBUTE:
200         case BIFROST_MESSAGE_TEX:
201         case BIFROST_MESSAGE_VARTEX:
202         case BIFROST_MESSAGE_LOAD:
203         case BIFROST_MESSAGE_64BIT:
204                 return false;
205 
206         case BIFROST_MESSAGE_STORE:
207         case BIFROST_MESSAGE_ATOMIC:
208         case BIFROST_MESSAGE_BARRIER:
209         case BIFROST_MESSAGE_BLEND:
210         case BIFROST_MESSAGE_Z_STENCIL:
211         case BIFROST_MESSAGE_ATEST:
212         case BIFROST_MESSAGE_JOB:
213                 return true;
214 
215         case BIFROST_MESSAGE_TILE:
216                 return (I->op != BI_OPCODE_LD_TILE);
217         }
218 
219         unreachable("Invalid message type");
220 }
221 
222 /* Branch reconvergence is required when the execution mask may change
223  * between adjacent instructions (clauses). This occurs for conditional
224  * branches and for the last instruction (clause) in a block whose
225  * fallthrough successor has multiple predecessors.
226  */
227 
228 bool
bi_reconverge_branches(bi_block * block)229 bi_reconverge_branches(bi_block *block)
230 {
231         /* Last block of a program */
232         if (!block->successors[0]) {
233                 assert(!block->successors[1]);
234                 return true;
235         }
236 
237         /* Multiple successors? We're branching */
238         if (block->successors[1])
239                 return true;
240 
241         /* Must have at least one successor */
242         struct bi_block *succ = block->successors[0];
243 
244         /* Reconverge if the successor has multiple predecessors */
245         return bi_num_predecessors(succ) > 1;
246 }
247 
248 /*
249  * When MUX.i32 or MUX.v2i16 is used to multiplex entire sources, they can be
250  * replaced by CSEL as follows:
251  *
252  *      MUX.neg(x, y, b) -> CSEL.s.lt(b, 0, x, y)
253  *      MUX.int_zero(x, y, b) -> CSEL.i.eq(b, 0, x, y)
254  *      MUX.fp_zero(x, y, b) -> CSEL.f.eq(b, 0, x, y)
255  *
256  * MUX.bit cannot be transformed like this.
257  *
258  * Note that MUX.v2i16 has partial support for swizzles, which CSEL.v2i16 lacks.
259  * So we must check the swizzles too.
260  */
261 bool
bi_can_replace_with_csel(bi_instr * I)262 bi_can_replace_with_csel(bi_instr *I)
263 {
264         return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
265                 (I->mux != BI_MUX_BIT) &&
266                 (I->src[0].swizzle == BI_SWIZZLE_H01) &&
267                 (I->src[1].swizzle == BI_SWIZZLE_H01) &&
268                 (I->src[2].swizzle == BI_SWIZZLE_H01);
269 }
270 
271 static enum bi_opcode
bi_csel_for_mux(bool must_sign,bool b32,enum bi_mux mux)272 bi_csel_for_mux(bool must_sign, bool b32, enum bi_mux mux)
273 {
274         switch (mux) {
275         case BI_MUX_INT_ZERO:
276                 if (must_sign)
277                         return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
278                 else
279                         return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
280         case BI_MUX_NEG:
281                 return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
282         case BI_MUX_FP_ZERO:
283                 return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
284         default:
285              unreachable("No CSEL for MUX.bit");
286         }
287 }
288 
289 void
bi_replace_mux_with_csel(bi_instr * I,bool must_sign)290 bi_replace_mux_with_csel(bi_instr *I, bool must_sign)
291 {
292         assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
293         I->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
294         I->cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
295         I->mux = 0;
296 
297         bi_index vTrue = I->src[0], vFalse = I->src[1], cond = I->src[2];
298 
299         I->src[0] = cond;
300         I->src[1] = bi_zero();
301         I->src[2] = vTrue;
302         I->src[3] = vFalse;
303 }
304