• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "compiler.h"
26 #include "bi_builder.h"
27 
28 static bool
bi_takes_fabs(unsigned arch,bi_instr * I,bi_index repl,unsigned s)29 bi_takes_fabs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
30 {
31         switch (I->op) {
32         case BI_OPCODE_FCMP_V2F16:
33         case BI_OPCODE_FMAX_V2F16:
34         case BI_OPCODE_FMIN_V2F16:
35                 /* Bifrost encoding restriction: can't have both abs if equal sources */
36                 return !(arch <= 8 && I->src[1 - s].abs
37                                    && bi_is_word_equiv(I->src[1 - s], repl));
38         case BI_OPCODE_V2F32_TO_V2F16:
39                 /* TODO: Needs both match or lower */
40                 return false;
41         case BI_OPCODE_FLOG_TABLE_F32:
42                 /* TODO: Need to check mode */
43                 return false;
44         default:
45                 return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
46         }
47 }
48 
49 static bool
bi_takes_fneg(unsigned arch,bi_instr * I,unsigned s)50 bi_takes_fneg(unsigned arch, bi_instr *I, unsigned s)
51 {
52         switch (I->op) {
53         case BI_OPCODE_CUBE_SSEL:
54         case BI_OPCODE_CUBE_TSEL:
55         case BI_OPCODE_CUBEFACE:
56                 /* TODO: Bifrost encoding restriction: need to match or lower */
57                 return arch >= 9;
58         case BI_OPCODE_FREXPE_F32:
59         case BI_OPCODE_FREXPE_V2F16:
60         case BI_OPCODE_FLOG_TABLE_F32:
61                 /* TODO: Need to check mode */
62                 return false;
63         default:
64                 return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
65         }
66 }
67 
68 static bool
bi_is_fabsneg(enum bi_opcode op,enum bi_size size)69 bi_is_fabsneg(enum bi_opcode op, enum bi_size size)
70 {
71         return (size == BI_SIZE_32 && op == BI_OPCODE_FABSNEG_F32) ||
72                (size == BI_SIZE_16 && op == BI_OPCODE_FABSNEG_V2F16);
73 }
74 
75 static enum bi_swizzle
bi_compose_swizzle_16(enum bi_swizzle a,enum bi_swizzle b)76 bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
77 {
78         assert(a <= BI_SWIZZLE_H11);
79         assert(b <= BI_SWIZZLE_H11);
80 
81         bool al = (a & BI_SWIZZLE_H10);
82         bool ar = (a & BI_SWIZZLE_H01);
83         bool bl = (b & BI_SWIZZLE_H10);
84         bool br = (b & BI_SWIZZLE_H01);
85 
86         return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
87                ((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
88 }
89 
90 /* Like bi_replace_index, but composes instead of overwrites */
91 
92 static inline bi_index
bi_compose_float_index(bi_index old,bi_index repl)93 bi_compose_float_index(bi_index old, bi_index repl)
94 {
95         /* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
96          * -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
97         repl.neg = old.neg ^ (repl.neg && !old.abs);
98 
99         /* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
100         repl.abs |= old.abs;
101 
102         /* Use the old swizzle to select from the replacement swizzle */
103         repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
104 
105         return repl;
106 }
107 
108 /* DISCARD.b32(FCMP.f(x, y)) --> DISCARD.f(x, y) */
109 
110 static inline void
bi_fuse_discard_fcmp(bi_instr * I,bi_instr * mod,unsigned arch)111 bi_fuse_discard_fcmp(bi_instr *I, bi_instr *mod, unsigned arch)
112 {
113         if (I->op != BI_OPCODE_DISCARD_B32) return;
114         if (mod->op != BI_OPCODE_FCMP_F32 && mod->op != BI_OPCODE_FCMP_V2F16) return;
115         if (mod->cmpf >= BI_CMPF_GTLT) return;
116 
117         /* .abs and .neg modifiers allowed on Valhall DISCARD but not Bifrost */
118         bool absneg = mod->src[0].neg || mod->src[0].abs;
119         absneg     |= mod->src[1].neg || mod->src[1].abs;
120 
121         if (arch <= 8 && absneg) return;
122 
123         enum bi_swizzle r = I->src[0].swizzle;
124 
125         /* result_type doesn't matter */
126         I->op = BI_OPCODE_DISCARD_F32;
127         I->cmpf = mod->cmpf;
128         I->src[0] = mod->src[0];
129         I->src[1] = mod->src[1];
130 
131         if (mod->op == BI_OPCODE_FCMP_V2F16) {
132                 I->src[0].swizzle = bi_compose_swizzle_16(r, I->src[0].swizzle);
133                 I->src[1].swizzle = bi_compose_swizzle_16(r, I->src[1].swizzle);
134         }
135 }
136 
137 void
bi_opt_mod_prop_forward(bi_context * ctx)138 bi_opt_mod_prop_forward(bi_context *ctx)
139 {
140         bi_instr **lut = calloc(sizeof(bi_instr *), ((ctx->ssa_alloc + 1) << 2));
141 
142         bi_foreach_instr_global_safe(ctx, I) {
143                 if (bi_is_ssa(I->dest[0]))
144                         lut[bi_word_node(I->dest[0])] = I;
145 
146                 bi_foreach_src(I, s) {
147                         if (!bi_is_ssa(I->src[s]))
148                                 continue;
149 
150                         bi_instr *mod = lut[bi_word_node(I->src[s])];
151 
152                         if (!mod)
153                                 continue;
154 
155                         unsigned size = bi_opcode_props[I->op].size;
156 
157                         bi_fuse_discard_fcmp(I, mod, ctx->arch);
158 
159                         if (bi_is_fabsneg(mod->op, size)) {
160                                 if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
161                                         continue;
162 
163                                 if (mod->src[0].neg && !bi_takes_fneg(ctx->arch, I, s))
164                                         continue;
165 
166                                 I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
167                         }
168                 }
169         }
170 
171         free(lut);
172 }
173 
174 /* RSCALE has restrictions on how the clamp may be used, only used for
175  * specialized transcendental sequences that set the clamp explicitly anyway */
176 
177 static bool
bi_takes_clamp(bi_instr * I)178 bi_takes_clamp(bi_instr *I)
179 {
180         switch (I->op) {
181         case BI_OPCODE_FMA_RSCALE_F32:
182         case BI_OPCODE_FMA_RSCALE_V2F16:
183         case BI_OPCODE_FADD_RSCALE_F32:
184                 return false;
185         default:
186                 return bi_opcode_props[I->op].clamp;
187         }
188 }
189 
190 static bool
bi_is_fclamp(enum bi_opcode op,enum bi_size size)191 bi_is_fclamp(enum bi_opcode op, enum bi_size size)
192 {
193         return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) ||
194                (size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16);
195 }
196 
197 static bool
bi_optimizer_clamp(bi_instr * I,bi_instr * use)198 bi_optimizer_clamp(bi_instr *I, bi_instr *use)
199 {
200         if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size)) return false;
201         if (!bi_takes_clamp(I)) return false;
202 
203         /* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
204         I->clamp |= use->clamp;
205         I->dest[0] = use->dest[0];
206         return true;
207 }
208 
209 static bool
bi_is_var_tex(bi_instr * var,bi_instr * tex)210 bi_is_var_tex(bi_instr *var, bi_instr *tex)
211 {
212         return (var->op == BI_OPCODE_LD_VAR_IMM) &&
213                 (tex->op == BI_OPCODE_TEXS_2D_F16 || tex->op == BI_OPCODE_TEXS_2D_F32) &&
214                 (var->register_format == BI_REGISTER_FORMAT_F32) &&
215                 ((var->sample == BI_SAMPLE_CENTER && var->update == BI_UPDATE_STORE) ||
216                  (var->sample == BI_SAMPLE_NONE && var->update == BI_UPDATE_RETRIEVE)) &&
217                 (tex->texture_index == tex->sampler_index) &&
218                 (tex->texture_index < 4) &&
219                 (var->index < 8);
220 }
221 
222 static bool
bi_optimizer_var_tex(bi_context * ctx,bi_instr * var,bi_instr * tex)223 bi_optimizer_var_tex(bi_context *ctx, bi_instr *var, bi_instr *tex)
224 {
225         if (!bi_is_var_tex(var, tex)) return false;
226 
227         /* Construct the corresponding VAR_TEX intruction */
228         bi_builder b = bi_init_builder(ctx, bi_after_instr(var));
229 
230         bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode,
231                         var->sample, var->update, tex->texture_index, var->index);
232         I->skip = tex->skip;
233 
234         if (tex->op == BI_OPCODE_TEXS_2D_F16)
235                 I->op = BI_OPCODE_VAR_TEX_F16;
236 
237         /* Dead code elimination will clean up for us */
238         return true;
239 }
240 
241 void
bi_opt_mod_prop_backward(bi_context * ctx)242 bi_opt_mod_prop_backward(bi_context *ctx)
243 {
244         unsigned count = ((ctx->ssa_alloc + 1) << 2);
245         bi_instr **uses = calloc(count, sizeof(*uses));
246         BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
247 
248         bi_foreach_instr_global_rev(ctx, I) {
249                 bi_foreach_src(I, s) {
250                         if (bi_is_ssa(I->src[s])) {
251                                 unsigned v = bi_word_node(I->src[s]);
252 
253                                 if (uses[v] && uses[v] != I)
254                                         BITSET_SET(multiple, v);
255                                 else
256                                         uses[v] = I;
257                         }
258                 }
259 
260                 if (!bi_is_ssa(I->dest[0]))
261                         continue;
262 
263                 bi_instr *use = uses[bi_word_node(I->dest[0])];
264 
265                 if (!use || BITSET_TEST(multiple, bi_word_node(I->dest[0])))
266                         continue;
267 
268                 /* Destination has a single use, try to propagate */
269                 bool propagated =
270                         bi_optimizer_clamp(I, use) ||
271                         bi_optimizer_var_tex(ctx, I, use);
272 
273                 if (propagated) {
274                         bi_remove_instruction(use);
275                         continue;
276                 }
277         }
278 
279         free(uses);
280         free(multiple);
281 }
282 
283 /** Lower pseudo instructions that exist to simplify the optimizer */
284 
285 void
bi_lower_opt_instruction(bi_instr * I)286 bi_lower_opt_instruction(bi_instr *I)
287 {
288         switch (I->op) {
289         case BI_OPCODE_FABSNEG_F32:
290         case BI_OPCODE_FABSNEG_V2F16:
291         case BI_OPCODE_FCLAMP_F32:
292         case BI_OPCODE_FCLAMP_V2F16:
293                 I->op = (bi_opcode_props[I->op].size == BI_SIZE_32) ?
294                         BI_OPCODE_FADD_F32 : BI_OPCODE_FADD_V2F16;
295 
296                 I->round = BI_ROUND_NONE;
297                 I->src[1] = bi_negzero();
298                 break;
299 
300         case BI_OPCODE_DISCARD_B32:
301                 I->op = BI_OPCODE_DISCARD_F32;
302                 I->src[1] = bi_imm_u16(0);
303                 I->cmpf = BI_CMPF_NE;
304                 break;
305 
306         default:
307                 break;
308         }
309 }
310