• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright Pavel Ondračka <pavel.ondracka@gmail.com>
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <stdbool.h>
7 #include "nir_builder.h"
8 #include "r300_nir.h"
9 
10 static int
follow_modifiers(nir_instr * instr)11 follow_modifiers(nir_instr *instr)
12 {
13    /* We don't have texturing so the only other options besides alus are
14     * just load input, load ubo or phi. We can copy propagate the first two
15     * in most cases. The cases when the copy propagate is not guaranteed
16     * to work is with indirect ubo load and in the presence of control flow.
17     * So just be safe and count this as a separate tmp.
18     */
19    if (instr->type == nir_instr_type_intrinsic) {
20       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
21       /* It should be enough to check if any of the uses is in phi. */
22       if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 ||
23           intrin->intrinsic == nir_intrinsic_load_constant ||
24           intrin->intrinsic == nir_intrinsic_load_input) {
25          nir_foreach_use (use, &intrin->def) {
26             if (nir_src_parent_instr(use)->type == nir_instr_type_phi)
27                return intrin->def.index;
28          }
29       }
30       if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 && !nir_src_is_const(intrin->src[1]))
31          return intrin->def.index;
32    }
33    /* Assume the worst when we see a phi. */
34    if (instr->type == nir_instr_type_phi)
35       return nir_instr_as_phi(instr)->def.index;
36 
37    if (instr->type != nir_instr_type_alu)
38       return -1;
39 
40    nir_alu_instr *alu = nir_instr_as_alu(instr);
41 
42    if (alu->op == nir_op_fneg || alu->op == nir_op_fabs) {
43       return follow_modifiers(alu->src[0].src.ssa->parent_instr);
44    }
45    return alu->def.index;
46 }
47 
48 static bool
has_three_different_tmp_sources(nir_alu_instr * fcsel)49 has_three_different_tmp_sources(nir_alu_instr *fcsel)
50 {
51    unsigned src_def_index[3];
52    for (unsigned i = 0; i < 3; i++) {
53       int index = follow_modifiers(fcsel->src[i].src.ssa->parent_instr);
54       if (index == -1)
55          return false;
56       else
57          src_def_index[i] = index;
58    }
59    return src_def_index[0] != src_def_index[1] && src_def_index[0] != src_def_index[2] &&
60           src_def_index[1] != src_def_index[2];
61 }
62 
63 static bool
is_comparison(nir_instr * instr)64 is_comparison(nir_instr *instr)
65 {
66    if (instr->type != nir_instr_type_alu)
67       return false;
68 
69    nir_alu_instr *alu = nir_instr_as_alu(instr);
70 
71    switch (alu->op) {
72    case nir_op_sge:
73    case nir_op_slt:
74    case nir_op_seq:
75    case nir_op_sne:
76       return true;
77    default:
78       return false;
79    }
80 }
81 
82 static bool
r300_nir_lower_fcsel_instr(nir_builder * b,nir_alu_instr * alu,void * data)83 r300_nir_lower_fcsel_instr(nir_builder *b, nir_alu_instr *alu, void *data)
84 {
85    if (alu->op != nir_op_fcsel && alu->op != nir_op_fcsel_ge && alu->op != nir_op_fcsel_gt)
86       return false;
87 
88    if (has_three_different_tmp_sources(alu)) {
89       nir_def *lrp;
90       b->cursor = nir_before_instr(&alu->instr);
91       /* Lower to LRP.
92        * At this point there are no fcsels as all bcsels were converted to
93        * fcsel_gt by nir_lower_bool_to_float, however we can save on the slt
94        * even for nir_op_fcsel_gt if the source is 0 or 1 anyway.
95        */
96       nir_instr *src0_instr = alu->src[0].src.ssa->parent_instr;
97       if (alu->op == nir_op_fcsel || (alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) {
98          lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1),
99                         nir_ssa_for_alu_src(b, alu, 0));
100       } else if (alu->op == nir_op_fcsel_ge) {
101          nir_def *sge = nir_sge(b, nir_ssa_for_alu_src(b, alu, 0), nir_imm_float(b, 0.0));
102          lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1), sge);
103       } else {
104          nir_def *slt =
105             nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)), nir_imm_float(b, 0.0));
106          lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1), slt);
107       }
108 
109       nir_def_replace(&alu->def, lrp);
110       return true;
111    }
112    return false;
113 }
114 
115 bool
r300_nir_lower_fcsel_r500(nir_shader * shader)116 r300_nir_lower_fcsel_r500(nir_shader *shader)
117 {
118    return nir_shader_alu_pass(shader, r300_nir_lower_fcsel_instr, nir_metadata_control_flow, NULL);
119 }
120