• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "agx_compiler.h"
25 #include "agx_minifloat.h"
26 
27 /* AGX peephole optimizer responsible for instruction combining. It operates in
28  * a forward direction and a backward direction, in each case traversing in
29  * source order. SSA means the forward pass satisfies the invariant:
30  *
31  *    Every def is visited before any of its uses.
32  *
33  * Dually, the backend pass satisfies the invariant:
34  *
35  *    Every use of a def is visited before the def.
36  *
37  * This means the forward pass can propagate modifiers forward, whereas the
38  * backwards pass propagates modifiers backward. Consider an example:
39  *
40  *    1 = fabs 0
41  *    2 = fround 1
42  *    3 = fsat 1
43  *
44  * The forwards pass would propagate the fabs to the fround (since we can
45  * lookup the fabs from the fround source and do the replacement). By contrast
46  * the backwards pass would propagate the fsat back to the fround (since when
47  * we see the fround we know it has only a single user, fsat).  Propagatable
48  * instruction have natural directions (like pushforwards and pullbacks).
49  *
50  * We are careful to update the tracked state whenever we modify an instruction
51  * to ensure the passes are linear-time and converge in a single iteration.
52  *
53  * Size conversions are worth special discussion. Consider the snippet:
54  *
55  *    2 = fadd 0, 1
56  *    3 = f2f16 2
57  *    4 = fround 3
58  *
59  * A priori, we can move the f2f16 in either direction. But it's not equal --
60  * if we move it up to the fadd, we get FP16 for two instructions, whereas if
61  * we push it into the fround, we effectively get FP32 for two instructions. So
62  * f2f16 is backwards. Likewise, consider
63  *
64  *    2 = fadd 0, 1
65  *    3 = f2f32 1
66  *    4 = fround 3
67  *
68  * This time if we move f2f32 up to the fadd, we get FP32 for two, but if we
69  * move it down to the fround, we get FP16 to too. So f2f32 is backwards.
70  */
71 
72 static bool
agx_is_fmov(agx_instr * def)73 agx_is_fmov(agx_instr *def)
74 {
75    return (def->op == AGX_OPCODE_FADD)
76       && agx_is_equiv(def->src[1], agx_negzero());
77 }
78 
79 /* Compose floating-point modifiers with floating-point sources */
80 
81 static agx_index
agx_compose_float_src(agx_index to,agx_index from)82 agx_compose_float_src(agx_index to, agx_index from)
83 {
84    if (to.abs) {
85       from.neg = false;
86       from.abs = true;
87    }
88 
89    from.neg ^= to.neg;
90 
91    return from;
92 }
93 
94 static void
agx_optimizer_fmov(agx_instr ** defs,agx_instr * ins)95 agx_optimizer_fmov(agx_instr **defs, agx_instr *ins)
96 {
97    agx_foreach_src(ins, s) {
98       agx_index src = ins->src[s];
99       if (src.type != AGX_INDEX_NORMAL) continue;
100 
101       agx_instr *def = defs[src.value];
102       if (def == NULL) continue; /* happens for phis in loops */
103       if (!agx_is_fmov(def)) continue;
104       if (def->saturate) continue;
105 
106       ins->src[s] = agx_compose_float_src(src, def->src[0]);
107    }
108 }
109 
110 static void
agx_optimizer_inline_imm(agx_instr ** defs,agx_instr * I,unsigned srcs,bool is_float)111 agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I,
112       unsigned srcs, bool is_float)
113 {
114    for (unsigned s = 0; s < srcs; ++s) {
115       agx_index src = I->src[s];
116       if (src.type != AGX_INDEX_NORMAL) continue;
117 
118       agx_instr *def = defs[src.value];
119       if (def->op != AGX_OPCODE_MOV_IMM) continue;
120 
121       uint8_t value = def->imm;
122       bool float_src = is_float;
123 
124       /* cmpselsrc takes integer immediates only */
125       if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false;
126 
127       if (float_src) {
128          bool fp16 = (def->dest[0].size == AGX_SIZE_16);
129          assert(fp16 || (def->dest[0].size == AGX_SIZE_32));
130 
131          float f = fp16 ? _mesa_half_to_float(def->imm) : uif(def->imm);
132          if (!agx_minifloat_exact(f)) continue;
133 
134          value = agx_minifloat_encode(f);
135       } else if (value != def->imm) {
136          continue;
137       }
138 
139       I->src[s].type = AGX_INDEX_IMMEDIATE;
140       I->src[s].value = value;
141    }
142 }
143 
144 static bool
agx_optimizer_fmov_rev(agx_instr * I,agx_instr * use)145 agx_optimizer_fmov_rev(agx_instr *I, agx_instr *use)
146 {
147    if (!agx_is_fmov(use)) return false;
148    if (use->src[0].neg || use->src[0].abs) return false;
149 
150    /* saturate(saturate(x)) = saturate(x) */
151    I->saturate |= use->saturate;
152    I->dest[0] = use->dest[0];
153    return true;
154 }
155 
156 static void
agx_optimizer_copyprop(agx_instr ** defs,agx_instr * I)157 agx_optimizer_copyprop(agx_instr **defs, agx_instr *I)
158 {
159    agx_foreach_src(I, s) {
160       agx_index src = I->src[s];
161       if (src.type != AGX_INDEX_NORMAL) continue;
162 
163       agx_instr *def = defs[src.value];
164       if (def == NULL) continue; /* happens for phis in loops */
165       if (def->op != AGX_OPCODE_MOV) continue;
166 
167       /* At the moment, not all instructions support size conversions. Notably
168        * RA pseudo instructions don't handle size conversions. This should be
169        * refined in the future.
170        */
171       if (def->src[0].size != src.size) continue;
172 
173       /* Immediate inlining happens elsewhere */
174       if (def->src[0].type == AGX_INDEX_IMMEDIATE) continue;
175 
176       I->src[s] = agx_replace_index(src, def->src[0]);
177    }
178 }
179 
180 static void
agx_optimizer_forward(agx_context * ctx)181 agx_optimizer_forward(agx_context *ctx)
182 {
183    agx_instr **defs = calloc(ctx->alloc, sizeof(*defs));
184 
185    agx_foreach_instr_global(ctx, I) {
186       struct agx_opcode_info info = agx_opcodes_info[I->op];
187 
188       agx_foreach_dest(I, d) {
189          if (I->dest[d].type == AGX_INDEX_NORMAL)
190             defs[I->dest[d].value] = I;
191       }
192 
193       /* Optimize moves */
194       agx_optimizer_copyprop(defs, I);
195 
196       /* Propagate fmov down */
197       if (info.is_float)
198          agx_optimizer_fmov(defs, I);
199 
200       /* Inline immediates if we can. TODO: systematic */
201       if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE && I->op != AGX_OPCODE_P_EXTRACT && I->op != AGX_OPCODE_P_COMBINE)
202          agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float);
203    }
204 
205    free(defs);
206 }
207 
208 static void
agx_optimizer_backward(agx_context * ctx)209 agx_optimizer_backward(agx_context *ctx)
210 {
211    agx_instr **uses = calloc(ctx->alloc, sizeof(*uses));
212    BITSET_WORD *multiple = calloc(BITSET_WORDS(ctx->alloc), sizeof(*multiple));
213 
214    agx_foreach_instr_global_rev(ctx, I) {
215       struct agx_opcode_info info = agx_opcodes_info[I->op];
216 
217       for (unsigned s = 0; s < info.nr_srcs; ++s) {
218          if (I->src[s].type == AGX_INDEX_NORMAL) {
219             unsigned v = I->src[s].value;
220 
221             if (uses[v])
222                BITSET_SET(multiple, v);
223             else
224                uses[v] = I;
225          }
226       }
227 
228       if (info.nr_dests != 1)
229          continue;
230 
231       if (I->dest[0].type != AGX_INDEX_NORMAL)
232          continue;
233 
234       agx_instr *use = uses[I->dest[0].value];
235 
236       if (!use || BITSET_TEST(multiple, I->dest[0].value))
237          continue;
238 
239       /* Destination has a single use, try to propagate */
240       if (info.is_float && agx_optimizer_fmov_rev(I, use)) {
241          agx_remove_instruction(use);
242          continue;
243       }
244    }
245 
246    free(uses);
247    free(multiple);
248 }
249 
250 void
agx_optimizer(agx_context * ctx)251 agx_optimizer(agx_context *ctx)
252 {
253    agx_optimizer_backward(ctx);
254    agx_optimizer_forward(ctx);
255 }
256