1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir/nir_builder.h"
25 #include "nir.h"
26 #include "nir_constant_expressions.h"
27 #include "nir_control_flow.h"
28 #include "nir_loop_analyze.h"
29 
30 static bool
is_two_src_comparison(const nir_alu_instr * instr)31 is_two_src_comparison(const nir_alu_instr *instr)
32 {
33    switch (instr->op) {
34    case nir_op_flt:
35    case nir_op_flt32:
36    case nir_op_fge:
37    case nir_op_fge32:
38    case nir_op_feq:
39    case nir_op_feq32:
40    case nir_op_fneu:
41    case nir_op_fneu32:
42    case nir_op_ilt:
43    case nir_op_ilt32:
44    case nir_op_ult:
45    case nir_op_ult32:
46    case nir_op_ige:
47    case nir_op_ige32:
48    case nir_op_uge:
49    case nir_op_uge32:
50    case nir_op_ieq:
51    case nir_op_ieq32:
52    case nir_op_ine:
53    case nir_op_ine32:
54       return true;
55    default:
56       return false;
57    }
58 }
59 
60 static inline bool
is_zero(const nir_alu_instr * instr,unsigned src,unsigned num_components,const uint8_t * swizzle)61 is_zero(const nir_alu_instr *instr, unsigned src, unsigned num_components,
62         const uint8_t *swizzle)
63 {
64    /* only constant srcs: */
65    if (!nir_src_is_const(instr->src[src].src))
66       return false;
67 
68    for (unsigned i = 0; i < num_components; i++) {
69       nir_alu_type type = nir_op_infos[instr->op].input_types[src];
70       switch (nir_alu_type_get_base_type(type)) {
71       case nir_type_int:
72       case nir_type_uint: {
73          if (nir_src_comp_as_int(instr->src[src].src, swizzle[i]) != 0)
74             return false;
75          break;
76       }
77       case nir_type_float: {
78          if (nir_src_comp_as_float(instr->src[src].src, swizzle[i]) != 0)
79             return false;
80          break;
81       }
82       default:
83          return false;
84       }
85    }
86 
87    return true;
88 }
89 
90 static bool
all_uses_are_bcsel(const nir_alu_instr * instr)91 all_uses_are_bcsel(const nir_alu_instr *instr)
92 {
93    nir_foreach_use(use, &instr->def) {
94       if (nir_src_parent_instr(use)->type != nir_instr_type_alu)
95          return false;
96 
97       nir_alu_instr *const alu = nir_instr_as_alu(nir_src_parent_instr(use));
98       if (alu->op != nir_op_bcsel &&
99           alu->op != nir_op_b32csel)
100          return false;
101 
102       /* Not only must the result be used by a bcsel, but it must be used as
103        * the first source (the condition).
104        */
105       if (alu->src[0].src.ssa != &instr->def)
106          return false;
107    }
108 
109    return true;
110 }
111 
112 static bool
all_uses_are_compare_with_zero(const nir_alu_instr * instr)113 all_uses_are_compare_with_zero(const nir_alu_instr *instr)
114 {
115    nir_foreach_use(use, &instr->def) {
116       if (nir_src_parent_instr(use)->type != nir_instr_type_alu)
117          return false;
118 
119       nir_alu_instr *const alu = nir_instr_as_alu(nir_src_parent_instr(use));
120       if (!is_two_src_comparison(alu))
121          return false;
122 
123       if (!is_zero(alu, 0, 1, alu->src[0].swizzle) &&
124           !is_zero(alu, 1, 1, alu->src[1].swizzle))
125          return false;
126 
127       if (!all_uses_are_bcsel(alu))
128           return false;
129    }
130 
131    return true;
132 }
133 
134 static bool
nir_opt_rematerialize_compares_impl(nir_shader * shader,nir_function_impl * impl)135 nir_opt_rematerialize_compares_impl(nir_shader *shader, nir_function_impl *impl)
136 {
137    bool progress = false;
138 
139    nir_foreach_block(block, impl) {
140       nir_foreach_instr(instr, block) {
141          if (instr->type != nir_instr_type_alu)
142             continue;
143 
144          nir_alu_instr *const alu = nir_instr_as_alu(instr);
145          if (!is_two_src_comparison(alu))
146             continue;
147 
148          if (!all_uses_are_bcsel(alu))
149             continue;
150 
151          /* At this point it is known that alu is a comparison instruction
152           * that is only used by nir_op_bcsel and possibly by if-statements
153           * (though the latter has not been explicitly checked).
154           *
155           * Iterate through each use of the comparison.  For every use (or use
156           * by an if-statement) that is in a different block, emit a copy of
157           * the comparison.  Care must be taken here.  The original
158           * instruction must be duplicated only once in each block because CSE
159           * cannot be run after this pass.
160           */
161          nir_foreach_use_including_if_safe(use, &alu->def) {
162             if (nir_src_is_if(use)) {
163                nir_if *const if_stmt = nir_src_parent_if(use);
164 
165                nir_block *const prev_block =
166                   nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node));
167 
168                /* If the compare is from the previous block, don't
169                 * rematerialize.
170                 */
171                if (prev_block == alu->instr.block)
172                   continue;
173 
174                nir_alu_instr *clone = nir_alu_instr_clone(shader, alu);
175 
176                nir_instr_insert_after_block(prev_block, &clone->instr);
177 
178                nir_src_rewrite(&if_stmt->condition, &clone->def);
179                progress = true;
180             } else {
181                nir_instr *const use_instr = nir_src_parent_instr(use);
182 
183                /* If the use is in the same block as the def, don't
184                 * rematerialize.
185                 */
186                if (use_instr->block == alu->instr.block)
187                   continue;
188 
189                nir_alu_instr *clone = nir_alu_instr_clone(shader, alu);
190 
191                nir_instr_insert_before(use_instr, &clone->instr);
192 
193                nir_alu_instr *const use_alu = nir_instr_as_alu(use_instr);
194                for (unsigned i = 0; i < nir_op_infos[use_alu->op].num_inputs; i++) {
195                   if (use_alu->src[i].src.ssa == &alu->def) {
196                      nir_src_rewrite(&use_alu->src[i].src, &clone->def);
197                      progress = true;
198                   }
199                }
200             }
201          }
202       }
203    }
204 
205    if (progress) {
206       nir_metadata_preserve(impl, nir_metadata_control_flow);
207    } else {
208       nir_metadata_preserve(impl, nir_metadata_all);
209    }
210 
211    return progress;
212 }
213 
214 static bool
nir_opt_rematerialize_alu_impl(nir_shader * shader,nir_function_impl * impl)215 nir_opt_rematerialize_alu_impl(nir_shader *shader, nir_function_impl *impl)
216 {
217    bool progress = false;
218 
219    nir_foreach_block(block, impl) {
220       nir_foreach_instr(instr, block) {
221          if (instr->type != nir_instr_type_alu)
222             continue;
223 
224          nir_alu_instr *const alu = nir_instr_as_alu(instr);
225 
226          /* This list only include ALU ops that are likely to be able to have
227           * cmod propagation on Intel GPUs.
228           */
229          switch (alu->op) {
230          case nir_op_ineg:
231          case nir_op_iabs:
232          case nir_op_fneg:
233          case nir_op_fabs:
234          case nir_op_fadd:
235          case nir_op_iadd:
236          case nir_op_iadd_sat:
237          case nir_op_uadd_sat:
238          case nir_op_isub_sat:
239          case nir_op_usub_sat:
240          case nir_op_irhadd:
241          case nir_op_urhadd:
242          case nir_op_fmul:
243          case nir_op_inot:
244          case nir_op_iand:
245          case nir_op_ior:
246          case nir_op_ixor:
247          case nir_op_ffloor:
248          case nir_op_ffract:
249          case nir_op_uclz:
250          case nir_op_ishl:
251          case nir_op_ishr:
252          case nir_op_ushr:
253          case nir_op_urol:
254          case nir_op_uror:
255             break; /* ... from switch. */
256          default:
257             continue; /* ... with loop. */
258          }
259 
260          /* To help prevent increasing live ranges, require that one of the
261           * sources be a constant.
262           */
263          if (nir_op_infos[alu->op].num_inputs == 2 &&
264              !nir_src_is_const(alu->src[0].src) &&
265              !nir_src_is_const(alu->src[1].src))
266             continue;
267 
268          if (!all_uses_are_compare_with_zero(alu))
269             continue;
270 
271          /* At this point it is known that the alu is only used by a
272           * comparison with zero that is used by nir_op_bcsel and possibly by
273           * if-statements (though the latter has not been explicitly checked).
274           *
275           * Iterate through each use of the ALU.  For every use that is in a
276           * different block, emit a copy of the ALU.  Care must be taken here.
277           * The original instruction must be duplicated only once in each
278           * block because CSE cannot be run after this pass.
279           */
280          nir_foreach_use_safe(use, &alu->def) {
281             nir_instr *const use_instr = nir_src_parent_instr(use);
282 
283             /* If the use is in the same block as the def, don't
284              * rematerialize.
285              */
286             if (use_instr->block == alu->instr.block)
287                continue;
288 
289             nir_alu_instr *clone = nir_alu_instr_clone(shader, alu);
290 
291             nir_instr_insert_before(use_instr, &clone->instr);
292 
293             nir_alu_instr *const use_alu = nir_instr_as_alu(use_instr);
294             for (unsigned i = 0; i < nir_op_infos[use_alu->op].num_inputs; i++) {
295                if (use_alu->src[i].src.ssa == &alu->def) {
296                   nir_src_rewrite(&use_alu->src[i].src, &clone->def);
297                   progress = true;
298                }
299             }
300          }
301       }
302    }
303 
304    if (progress) {
305       nir_metadata_preserve(impl, nir_metadata_control_flow);
306    } else {
307       nir_metadata_preserve(impl, nir_metadata_all);
308    }
309 
310    return progress;
311 }
312 
313 bool
nir_opt_rematerialize_compares(nir_shader * shader)314 nir_opt_rematerialize_compares(nir_shader *shader)
315 {
316    bool progress = false;
317 
318    nir_foreach_function_impl(impl, shader) {
319       progress = nir_opt_rematerialize_compares_impl(shader, impl) || progress;
320 
321       progress = nir_opt_rematerialize_alu_impl(shader, impl) || progress;
322    }
323 
324    return progress;
325 }
326