• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "compiler.h"
26 #include "bi_builder.h"
27 
28 /*
29  * Due to a Bifrost encoding restriction, some instructions cannot have an abs
30  * modifier on both sources. Check if adding a fabs modifier to a given source
31  * of a binary instruction would cause this restriction to be hit.
32  */
33 static bool
bi_would_impact_abs(unsigned arch,bi_instr * I,bi_index repl,unsigned s)34 bi_would_impact_abs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
35 {
36         return (arch <= 8) && I->src[1 - s].abs &&
37                bi_is_word_equiv(I->src[1 - s], repl);
38 }
39 
40 static bool
bi_takes_fabs(unsigned arch,bi_instr * I,bi_index repl,unsigned s)41 bi_takes_fabs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
42 {
43         switch (I->op) {
44         case BI_OPCODE_FCMP_V2F16:
45         case BI_OPCODE_FMAX_V2F16:
46         case BI_OPCODE_FMIN_V2F16:
47                 return !bi_would_impact_abs(arch, I, repl, s);
48         case BI_OPCODE_FADD_V2F16:
49                 /*
50                  * For FADD.v2f16, the FMA pipe has the abs encoding hazard,
51                  * while the FADD pipe cannot encode a clamp. Either case in
52                  * isolation can be worked around in the scheduler, but both
53                  * together is impossible to encode. Avoid the hazard.
54                  */
55                 return !(I->clamp && bi_would_impact_abs(arch, I, repl, s));
56         case BI_OPCODE_V2F32_TO_V2F16:
57                 /* TODO: Needs both match or lower */
58                 return false;
59         case BI_OPCODE_FLOG_TABLE_F32:
60                 /* TODO: Need to check mode */
61                 return false;
62         default:
63                 return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
64         }
65 }
66 
67 static bool
bi_takes_fneg(unsigned arch,bi_instr * I,unsigned s)68 bi_takes_fneg(unsigned arch, bi_instr *I, unsigned s)
69 {
70         switch (I->op) {
71         case BI_OPCODE_CUBE_SSEL:
72         case BI_OPCODE_CUBE_TSEL:
73         case BI_OPCODE_CUBEFACE:
74                 /* TODO: Bifrost encoding restriction: need to match or lower */
75                 return arch >= 9;
76         case BI_OPCODE_FREXPE_F32:
77         case BI_OPCODE_FREXPE_V2F16:
78         case BI_OPCODE_FLOG_TABLE_F32:
79                 /* TODO: Need to check mode */
80                 return false;
81         default:
82                 return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
83         }
84 }
85 
86 static bool
bi_is_fabsneg(enum bi_opcode op,enum bi_size size)87 bi_is_fabsneg(enum bi_opcode op, enum bi_size size)
88 {
89         return (size == BI_SIZE_32 && op == BI_OPCODE_FABSNEG_F32) ||
90                (size == BI_SIZE_16 && op == BI_OPCODE_FABSNEG_V2F16);
91 }
92 
93 static enum bi_swizzle
bi_compose_swizzle_16(enum bi_swizzle a,enum bi_swizzle b)94 bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
95 {
96         assert(a <= BI_SWIZZLE_H11);
97         assert(b <= BI_SWIZZLE_H11);
98 
99         bool al = (a & BI_SWIZZLE_H10);
100         bool ar = (a & BI_SWIZZLE_H01);
101         bool bl = (b & BI_SWIZZLE_H10);
102         bool br = (b & BI_SWIZZLE_H01);
103 
104         return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
105                ((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
106 }
107 
108 /* Like bi_replace_index, but composes instead of overwrites */
109 
110 static inline bi_index
bi_compose_float_index(bi_index old,bi_index repl)111 bi_compose_float_index(bi_index old, bi_index repl)
112 {
113         /* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
114          * -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
115         repl.neg = old.neg ^ (repl.neg && !old.abs);
116 
117         /* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
118         repl.abs |= old.abs;
119 
120         /* Use the old swizzle to select from the replacement swizzle */
121         repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
122 
123         return repl;
124 }
125 
126 /* DISCARD.b32(FCMP.f(x, y)) --> DISCARD.f(x, y) */
127 
128 static inline void
bi_fuse_discard_fcmp(bi_instr * I,bi_instr * mod,unsigned arch)129 bi_fuse_discard_fcmp(bi_instr *I, bi_instr *mod, unsigned arch)
130 {
131         if (I->op != BI_OPCODE_DISCARD_B32) return;
132         if (mod->op != BI_OPCODE_FCMP_F32 && mod->op != BI_OPCODE_FCMP_V2F16) return;
133         if (mod->cmpf >= BI_CMPF_GTLT) return;
134 
135         /* .abs and .neg modifiers allowed on Valhall DISCARD but not Bifrost */
136         bool absneg = mod->src[0].neg || mod->src[0].abs;
137         absneg     |= mod->src[1].neg || mod->src[1].abs;
138 
139         if (arch <= 8 && absneg) return;
140 
141         enum bi_swizzle r = I->src[0].swizzle;
142 
143         /* result_type doesn't matter */
144         I->op = BI_OPCODE_DISCARD_F32;
145         I->cmpf = mod->cmpf;
146         I->src[0] = mod->src[0];
147         I->src[1] = mod->src[1];
148 
149         if (mod->op == BI_OPCODE_FCMP_V2F16) {
150                 I->src[0].swizzle = bi_compose_swizzle_16(r, I->src[0].swizzle);
151                 I->src[1].swizzle = bi_compose_swizzle_16(r, I->src[1].swizzle);
152         }
153 }
154 
155 void
bi_opt_mod_prop_forward(bi_context * ctx)156 bi_opt_mod_prop_forward(bi_context *ctx)
157 {
158         bi_instr **lut = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
159 
160         bi_foreach_instr_global_safe(ctx, I) {
161                 if (bi_is_ssa(I->dest[0]))
162                         lut[I->dest[0].value] = I;
163 
164                 bi_foreach_src(I, s) {
165                         if (!bi_is_ssa(I->src[s]))
166                                 continue;
167 
168                         bi_instr *mod = lut[I->src[s].value];
169 
170                         if (!mod)
171                                 continue;
172 
173                         unsigned size = bi_opcode_props[I->op].size;
174 
175                         bi_fuse_discard_fcmp(I, mod, ctx->arch);
176 
177                         if (bi_is_fabsneg(mod->op, size)) {
178                                 if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
179                                         continue;
180 
181                                 if (mod->src[0].neg && !bi_takes_fneg(ctx->arch, I, s))
182                                         continue;
183 
184                                 I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
185                         }
186                 }
187         }
188 
189         free(lut);
190 }
191 
192 /* RSCALE has restrictions on how the clamp may be used, only used for
193  * specialized transcendental sequences that set the clamp explicitly anyway */
194 
195 static bool
bi_takes_clamp(bi_instr * I)196 bi_takes_clamp(bi_instr *I)
197 {
198         switch (I->op) {
199         case BI_OPCODE_FMA_RSCALE_F32:
200         case BI_OPCODE_FMA_RSCALE_V2F16:
201         case BI_OPCODE_FADD_RSCALE_F32:
202                 return false;
203         case BI_OPCODE_FADD_V2F16:
204                 /* Encoding restriction */
205                 return !(I->src[0].abs && I->src[1].abs &&
206                          bi_is_word_equiv(I->src[0], I->src[1]));
207         default:
208                 return bi_opcode_props[I->op].clamp;
209         }
210 }
211 
212 static bool
bi_is_fclamp(enum bi_opcode op,enum bi_size size)213 bi_is_fclamp(enum bi_opcode op, enum bi_size size)
214 {
215         return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) ||
216                (size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16);
217 }
218 
219 static bool
bi_optimizer_clamp(bi_instr * I,bi_instr * use)220 bi_optimizer_clamp(bi_instr *I, bi_instr *use)
221 {
222         if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size)) return false;
223         if (!bi_takes_clamp(I)) return false;
224 
225         /* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
226         I->clamp |= use->clamp;
227         I->dest[0] = use->dest[0];
228         return true;
229 }
230 
231 static enum bi_opcode
bi_sized_mux_op(unsigned size)232 bi_sized_mux_op(unsigned size)
233 {
234         switch (size) {
235         case  8: return BI_OPCODE_MUX_V4I8;
236         case 16: return BI_OPCODE_MUX_V2I16;
237         case 32: return BI_OPCODE_MUX_I32;
238         default: unreachable("invalid size");
239         }
240 }
241 
242 static bool
bi_is_fixed_mux(bi_instr * I,unsigned size,bi_index v1)243 bi_is_fixed_mux(bi_instr *I, unsigned size, bi_index v1)
244 {
245         return I->op == bi_sized_mux_op(size) &&
246                bi_is_value_equiv(I->src[0], bi_zero()) &&
247                bi_is_value_equiv(I->src[1], v1);
248 }
249 
250 static bool
bi_takes_int_result_type(enum bi_opcode op)251 bi_takes_int_result_type(enum bi_opcode op)
252 {
253         switch (op) {
254         case BI_OPCODE_ICMP_I32:
255         case BI_OPCODE_ICMP_S32:
256         case BI_OPCODE_ICMP_U32:
257         case BI_OPCODE_ICMP_V2I16:
258         case BI_OPCODE_ICMP_V2S16:
259         case BI_OPCODE_ICMP_V2U16:
260         case BI_OPCODE_ICMP_V4I8:
261         case BI_OPCODE_ICMP_V4S8:
262         case BI_OPCODE_ICMP_V4U8:
263         case BI_OPCODE_FCMP_F32:
264         case BI_OPCODE_FCMP_V2F16:
265                 return true;
266         default:
267                 return false;
268         }
269 }
270 
271 static bool
bi_takes_float_result_type(enum bi_opcode op)272 bi_takes_float_result_type(enum bi_opcode op)
273 {
274         return (op == BI_OPCODE_FCMP_F32) ||
275                (op == BI_OPCODE_FCMP_V2F16);
276 }
277 
278 /* CMP+MUX -> CMP with result type */
279 static bool
bi_optimizer_result_type(bi_instr * I,bi_instr * mux)280 bi_optimizer_result_type(bi_instr *I, bi_instr *mux)
281 {
282         if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size)
283                 return false;
284 
285         if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) ||
286             bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) {
287 
288                 if (!bi_takes_float_result_type(I->op))
289                         return false;
290 
291                 I->result_type = BI_RESULT_TYPE_F1;
292         } else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) ||
293                    bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) ||
294                    bi_is_fixed_mux(mux,  8, bi_imm_u8(1))) {
295 
296                 if (!bi_takes_int_result_type(I->op))
297                         return false;
298 
299                 I->result_type = BI_RESULT_TYPE_I1;
300         } else {
301                 return false;
302         }
303 
304         I->dest[0] = mux->dest[0];
305         return true;
306 }
307 
308 static bool
bi_is_var_tex(bi_instr * var,bi_instr * tex)309 bi_is_var_tex(bi_instr *var, bi_instr *tex)
310 {
311         return (var->op == BI_OPCODE_LD_VAR_IMM) &&
312                 (tex->op == BI_OPCODE_TEXS_2D_F16 || tex->op == BI_OPCODE_TEXS_2D_F32) &&
313                 (var->register_format == BI_REGISTER_FORMAT_F32) &&
314                 ((var->sample == BI_SAMPLE_CENTER && var->update == BI_UPDATE_STORE) ||
315                  (var->sample == BI_SAMPLE_NONE && var->update == BI_UPDATE_RETRIEVE)) &&
316                 (tex->texture_index == tex->sampler_index) &&
317                 (tex->texture_index < 4) &&
318                 (var->index < 8);
319 }
320 
321 static bool
bi_optimizer_var_tex(bi_context * ctx,bi_instr * var,bi_instr * tex)322 bi_optimizer_var_tex(bi_context *ctx, bi_instr *var, bi_instr *tex)
323 {
324         if (!bi_is_var_tex(var, tex)) return false;
325 
326         /* Construct the corresponding VAR_TEX intruction */
327         bi_builder b = bi_init_builder(ctx, bi_after_instr(var));
328 
329         bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode,
330                         var->sample, var->update, tex->texture_index, var->index);
331         I->skip = tex->skip;
332 
333         if (tex->op == BI_OPCODE_TEXS_2D_F16)
334                 I->op = BI_OPCODE_VAR_TEX_F16;
335 
336         /* Dead code elimination will clean up for us */
337         return true;
338 }
339 
340 void
bi_opt_mod_prop_backward(bi_context * ctx)341 bi_opt_mod_prop_backward(bi_context *ctx)
342 {
343         unsigned count = ctx->ssa_alloc;
344         bi_instr **uses = calloc(count, sizeof(*uses));
345         BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
346 
347         bi_foreach_instr_global_rev(ctx, I) {
348                 bi_foreach_src(I, s) {
349                         if (bi_is_ssa(I->src[s])) {
350                                 unsigned v = I->src[s].value;
351 
352                                 if (uses[v] && uses[v] != I)
353                                         BITSET_SET(multiple, v);
354                                 else
355                                         uses[v] = I;
356                         }
357                 }
358 
359                 if (!bi_is_ssa(I->dest[0]))
360                         continue;
361 
362                 bi_instr *use = uses[I->dest[0].value];
363 
364                 if (!use || BITSET_TEST(multiple, I->dest[0].value))
365                         continue;
366 
367                 /* Destination has a single use, try to propagate */
368                 bool propagated =
369                         bi_optimizer_clamp(I, use) ||
370                         bi_optimizer_result_type(I, use);
371 
372                 if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM && use->op == BI_OPCODE_SPLIT_I32) {
373                         /* Need to see through the split in a
374                          * ld_var_imm/split/var_tex  sequence
375                          */
376                         assert(bi_is_ssa(use->dest[0]));
377                         bi_instr *tex = uses[use->dest[0].value];
378 
379                         if (!tex || BITSET_TEST(multiple, use->dest[0].value))
380                                 continue;
381 
382                         use = tex;
383                         propagated = bi_optimizer_var_tex(ctx, I, use);
384                 }
385 
386                 if (propagated) {
387                         bi_remove_instruction(use);
388                         continue;
389                 }
390         }
391 
392         free(uses);
393         free(multiple);
394 }
395 
396 /** Lower pseudo instructions that exist to simplify the optimizer */
397 
398 void
bi_lower_opt_instruction(bi_instr * I)399 bi_lower_opt_instruction(bi_instr *I)
400 {
401         switch (I->op) {
402         case BI_OPCODE_FABSNEG_F32:
403         case BI_OPCODE_FABSNEG_V2F16:
404         case BI_OPCODE_FCLAMP_F32:
405         case BI_OPCODE_FCLAMP_V2F16:
406                 I->op = (bi_opcode_props[I->op].size == BI_SIZE_32) ?
407                         BI_OPCODE_FADD_F32 : BI_OPCODE_FADD_V2F16;
408 
409                 I->round = BI_ROUND_NONE;
410                 I->src[1] = bi_negzero();
411                 break;
412 
413         case BI_OPCODE_DISCARD_B32:
414                 I->op = BI_OPCODE_DISCARD_F32;
415                 I->src[1] = bi_imm_u32(0);
416                 I->cmpf = BI_CMPF_NE;
417                 break;
418 
419         default:
420                 break;
421         }
422 }
423