• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdbool.h>
2 #include "r300_nir.h"
3 #include "nir_builder.h"
4 
5 static int
follow_modifiers(nir_instr * instr)6 follow_modifiers(nir_instr *instr)
7 {
8    /* We don't have texturing so the only other options besides alus are
9     * just load input, load ubo or phi. We can copy propagate the first two
10     * in most cases. The cases when the copy propagate is not guaranteed
11     * to work is with indirect ubo load and in the presence of control flow.
12     * So just be safe and count this as a separate tmp.
13     */
14    if (instr->type == nir_instr_type_intrinsic) {
15       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
16       /* It should be enough to check if any of the uses is in phi. */
17       if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 ||
18           intrin->intrinsic == nir_intrinsic_load_constant ||
19           intrin->intrinsic == nir_intrinsic_load_input) {
20           nir_foreach_use(use, &intrin->def) {
21               if (nir_src_parent_instr(use)->type == nir_instr_type_phi)
22                  return intrin->def.index;
23           }
24       }
25       if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 &&
26           !nir_src_is_const(intrin->src[1]))
27       return intrin->def.index;
28    }
29    /* Assume the worst when we see a phi. */
30    if (instr->type == nir_instr_type_phi)
31       return nir_instr_as_phi(instr)->def.index;
32 
33    if (instr->type != nir_instr_type_alu)
34       return -1;
35 
36    nir_alu_instr *alu = nir_instr_as_alu(instr);
37 
38    if (alu->op == nir_op_fneg || alu->op == nir_op_fabs) {
39       return follow_modifiers(alu->src[0].src.ssa->parent_instr);
40    }
41    return alu->def.index;
42 }
43 
44 static bool
has_three_different_tmp_sources(nir_alu_instr * fcsel)45 has_three_different_tmp_sources(nir_alu_instr *fcsel)
46 {
47    unsigned src_def_index[3];
48    for (unsigned i = 0; i < 3; i++) {
49       int index = follow_modifiers(fcsel->src[i].src.ssa->parent_instr);
50       if (index == -1)
51          return false;
52       else
53 	 src_def_index[i] = index;
54    }
55    return src_def_index[0] != src_def_index[1] &&
56           src_def_index[0] != src_def_index[2] &&
57           src_def_index[1] != src_def_index[2];
58 }
59 
60 static bool
is_comparison(nir_instr * instr)61 is_comparison(nir_instr *instr)
62 {
63    if (instr->type != nir_instr_type_alu)
64       return false;
65 
66    nir_alu_instr *alu = nir_instr_as_alu(instr);
67 
68    switch (alu->op) {
69    case nir_op_sge:
70    case nir_op_slt:
71    case nir_op_seq:
72    case nir_op_sne:
73       return true;
74    default:
75       return false;
76    }
77 }
78 
79 static bool
r300_nir_lower_fcsel_instr(nir_builder * b,nir_instr * instr,void * data)80 r300_nir_lower_fcsel_instr(nir_builder *b, nir_instr *instr, void *data)
81 {
82    if (instr->type != nir_instr_type_alu)
83       return false;
84 
85    nir_alu_instr *alu = nir_instr_as_alu(instr);
86 
87    if (alu->op != nir_op_fcsel && alu->op != nir_op_fcsel_ge && alu->op != nir_op_fcsel_gt)
88       return false;
89 
90    if (has_three_different_tmp_sources(alu)) {
91       nir_def *lrp;
92       b->cursor = nir_before_instr(&alu->instr);
93       /* Lower to LRP.
94        * At this point there are no fcsels as all bcsels were converted to
95        * fcsel_gt by nir_lower_bool_to_float, however we can save on the slt
96        * even for nir_op_fcsel_gt if the source is 0 or 1 anyway.
97        */
98       nir_instr *src0_instr = alu->src[0].src.ssa->parent_instr;
99       if (alu->op == nir_op_fcsel ||
100           (alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) {
101          lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
102                         nir_ssa_for_alu_src(b, alu, 1),
103                         nir_ssa_for_alu_src(b, alu, 0));
104       } else if (alu->op == nir_op_fcsel_ge) {
105          nir_def *sge = nir_sge(b, nir_ssa_for_alu_src(b, alu, 0), nir_imm_float(b, 0.0));
106          lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
107                         nir_ssa_for_alu_src(b, alu, 1), sge);
108       } else {
109          nir_def *slt = nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)),
110                                 nir_imm_float(b, 0.0));
111          lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
112                         nir_ssa_for_alu_src(b, alu, 1), slt);
113       }
114 
115       nir_def_rewrite_uses(&alu->def, lrp);
116       nir_instr_remove(&alu->instr);
117       return true;
118    }
119    return false;
120 }
121 
122 bool
r300_nir_lower_fcsel_r500(nir_shader * shader)123 r300_nir_lower_fcsel_r500(nir_shader *shader)
124 {
125    bool progress = nir_shader_instructions_pass(shader,
126                                                 r300_nir_lower_fcsel_instr,
127                                                 nir_metadata_block_index |
128                                                    nir_metadata_dominance,
129                                                 NULL);
130    return progress;
131 }
132