1 /*
2 * Copyright Pavel Ondračka <pavel.ondracka@gmail.com>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <stdbool.h>
7 #include "nir_builder.h"
8 #include "r300_nir.h"
9
10 static int
follow_modifiers(nir_instr * instr)11 follow_modifiers(nir_instr *instr)
12 {
13 /* We don't have texturing so the only other options besides alus are
14 * just load input, load ubo or phi. We can copy propagate the first two
15 * in most cases. The cases when the copy propagate is not guaranteed
16 * to work is with indirect ubo load and in the presence of control flow.
17 * So just be safe and count this as a separate tmp.
18 */
19 if (instr->type == nir_instr_type_intrinsic) {
20 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
21 /* It should be enough to check if any of the uses is in phi. */
22 if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 ||
23 intrin->intrinsic == nir_intrinsic_load_constant ||
24 intrin->intrinsic == nir_intrinsic_load_input) {
25 nir_foreach_use (use, &intrin->def) {
26 if (nir_src_parent_instr(use)->type == nir_instr_type_phi)
27 return intrin->def.index;
28 }
29 }
30 if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 && !nir_src_is_const(intrin->src[1]))
31 return intrin->def.index;
32 }
33 /* Assume the worst when we see a phi. */
34 if (instr->type == nir_instr_type_phi)
35 return nir_instr_as_phi(instr)->def.index;
36
37 if (instr->type != nir_instr_type_alu)
38 return -1;
39
40 nir_alu_instr *alu = nir_instr_as_alu(instr);
41
42 if (alu->op == nir_op_fneg || alu->op == nir_op_fabs) {
43 return follow_modifiers(alu->src[0].src.ssa->parent_instr);
44 }
45 return alu->def.index;
46 }
47
48 static bool
has_three_different_tmp_sources(nir_alu_instr * fcsel)49 has_three_different_tmp_sources(nir_alu_instr *fcsel)
50 {
51 unsigned src_def_index[3];
52 for (unsigned i = 0; i < 3; i++) {
53 int index = follow_modifiers(fcsel->src[i].src.ssa->parent_instr);
54 if (index == -1)
55 return false;
56 else
57 src_def_index[i] = index;
58 }
59 return src_def_index[0] != src_def_index[1] && src_def_index[0] != src_def_index[2] &&
60 src_def_index[1] != src_def_index[2];
61 }
62
63 static bool
is_comparison(nir_instr * instr)64 is_comparison(nir_instr *instr)
65 {
66 if (instr->type != nir_instr_type_alu)
67 return false;
68
69 nir_alu_instr *alu = nir_instr_as_alu(instr);
70
71 switch (alu->op) {
72 case nir_op_sge:
73 case nir_op_slt:
74 case nir_op_seq:
75 case nir_op_sne:
76 return true;
77 default:
78 return false;
79 }
80 }
81
82 static bool
r300_nir_lower_fcsel_instr(nir_builder * b,nir_alu_instr * alu,void * data)83 r300_nir_lower_fcsel_instr(nir_builder *b, nir_alu_instr *alu, void *data)
84 {
85 if (alu->op != nir_op_fcsel && alu->op != nir_op_fcsel_ge && alu->op != nir_op_fcsel_gt)
86 return false;
87
88 if (has_three_different_tmp_sources(alu)) {
89 nir_def *lrp;
90 b->cursor = nir_before_instr(&alu->instr);
91 /* Lower to LRP.
92 * At this point there are no fcsels as all bcsels were converted to
93 * fcsel_gt by nir_lower_bool_to_float, however we can save on the slt
94 * even for nir_op_fcsel_gt if the source is 0 or 1 anyway.
95 */
96 nir_instr *src0_instr = alu->src[0].src.ssa->parent_instr;
97 if (alu->op == nir_op_fcsel || (alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) {
98 lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1),
99 nir_ssa_for_alu_src(b, alu, 0));
100 } else if (alu->op == nir_op_fcsel_ge) {
101 nir_def *sge = nir_sge(b, nir_ssa_for_alu_src(b, alu, 0), nir_imm_float(b, 0.0));
102 lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1), sge);
103 } else {
104 nir_def *slt =
105 nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)), nir_imm_float(b, 0.0));
106 lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1), slt);
107 }
108
109 nir_def_replace(&alu->def, lrp);
110 return true;
111 }
112 return false;
113 }
114
115 bool
r300_nir_lower_fcsel_r500(nir_shader * shader)116 r300_nir_lower_fcsel_r500(nir_shader *shader)
117 {
118 return nir_shader_alu_pass(shader, r300_nir_lower_fcsel_instr, nir_metadata_control_flow, NULL);
119 }
120