1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 static bool
assert_ssa_def_is_not_1bit(nir_def * def,UNUSED void * unused)28 assert_ssa_def_is_not_1bit(nir_def *def, UNUSED void *unused)
29 {
30 assert(def->bit_size > 1);
31 return true;
32 }
33
34 static bool
rewrite_1bit_ssa_def_to_32bit(nir_def * def,void * _progress)35 rewrite_1bit_ssa_def_to_32bit(nir_def *def, void *_progress)
36 {
37 bool *progress = _progress;
38 if (def->bit_size == 1) {
39 def->bit_size = 32;
40 *progress = true;
41 }
42 return true;
43 }
44
45 static bool
lower_alu_instr(nir_builder * b,nir_alu_instr * alu,bool has_fcsel_ne,bool has_fcsel_gt)46 lower_alu_instr(nir_builder *b, nir_alu_instr *alu, bool has_fcsel_ne,
47 bool has_fcsel_gt)
48 {
49 const nir_op_info *op_info = &nir_op_infos[alu->op];
50
51 b->cursor = nir_before_instr(&alu->instr);
52
53 /* Replacement SSA value */
54 nir_def *rep = NULL;
55 switch (alu->op) {
56 case nir_op_mov:
57 case nir_op_vec2:
58 case nir_op_vec3:
59 case nir_op_vec4:
60 case nir_op_vec5:
61 case nir_op_vec8:
62 case nir_op_vec16:
63 if (alu->def.bit_size != 1)
64 return false;
65 /* These we expect to have booleans but the opcode doesn't change */
66 break;
67
68 case nir_op_b2f32:
69 alu->op = nir_op_mov;
70 break;
71 case nir_op_b2i32:
72 alu->op = nir_op_mov;
73 break;
74 case nir_op_b2b1:
75 alu->op = nir_op_mov;
76 break;
77
78 case nir_op_flt:
79 alu->op = nir_op_slt;
80 break;
81 case nir_op_fge:
82 alu->op = nir_op_sge;
83 break;
84 case nir_op_feq:
85 alu->op = nir_op_seq;
86 break;
87 case nir_op_fneu:
88 alu->op = nir_op_sne;
89 break;
90 case nir_op_ilt:
91 alu->op = nir_op_slt;
92 break;
93 case nir_op_ige:
94 alu->op = nir_op_sge;
95 break;
96 case nir_op_ieq:
97 alu->op = nir_op_seq;
98 break;
99 case nir_op_ine:
100 alu->op = nir_op_sne;
101 break;
102 case nir_op_ult:
103 alu->op = nir_op_slt;
104 break;
105 case nir_op_uge:
106 alu->op = nir_op_sge;
107 break;
108
109 case nir_op_ball_fequal2:
110 alu->op = nir_op_fall_equal2;
111 break;
112 case nir_op_ball_fequal3:
113 alu->op = nir_op_fall_equal3;
114 break;
115 case nir_op_ball_fequal4:
116 alu->op = nir_op_fall_equal4;
117 break;
118 case nir_op_bany_fnequal2:
119 alu->op = nir_op_fany_nequal2;
120 break;
121 case nir_op_bany_fnequal3:
122 alu->op = nir_op_fany_nequal3;
123 break;
124 case nir_op_bany_fnequal4:
125 alu->op = nir_op_fany_nequal4;
126 break;
127 case nir_op_ball_iequal2:
128 alu->op = nir_op_fall_equal2;
129 break;
130 case nir_op_ball_iequal3:
131 alu->op = nir_op_fall_equal3;
132 break;
133 case nir_op_ball_iequal4:
134 alu->op = nir_op_fall_equal4;
135 break;
136 case nir_op_bany_inequal2:
137 alu->op = nir_op_fany_nequal2;
138 break;
139 case nir_op_bany_inequal3:
140 alu->op = nir_op_fany_nequal3;
141 break;
142 case nir_op_bany_inequal4:
143 alu->op = nir_op_fany_nequal4;
144 break;
145
146 case nir_op_bcsel:
147 if (has_fcsel_gt)
148 alu->op = nir_op_fcsel_gt;
149 else if (has_fcsel_ne)
150 alu->op = nir_op_fcsel;
151 else {
152 /* Only a few pre-VS 4.0 platforms (e.g., r300 vertex shaders) should
153 * hit this path.
154 */
155 rep = nir_flrp(b,
156 nir_ssa_for_alu_src(b, alu, 2),
157 nir_ssa_for_alu_src(b, alu, 1),
158 nir_ssa_for_alu_src(b, alu, 0));
159 }
160
161 break;
162
163 case nir_op_iand:
164 alu->op = nir_op_fmul;
165 break;
166 case nir_op_ixor:
167 alu->op = nir_op_sne;
168 break;
169 case nir_op_ior:
170 alu->op = nir_op_fmax;
171 break;
172
173 case nir_op_inot:
174 rep = nir_seq(b, nir_ssa_for_alu_src(b, alu, 0),
175 nir_imm_float(b, 0));
176 break;
177
178 default:
179 assert(alu->def.bit_size > 1);
180 for (unsigned i = 0; i < op_info->num_inputs; i++)
181 assert(alu->src[i].src.ssa->bit_size > 1);
182 return false;
183 }
184
185 if (rep) {
186 /* We've emitted a replacement instruction */
187 nir_def_rewrite_uses(&alu->def, rep);
188 nir_instr_remove(&alu->instr);
189 } else {
190 if (alu->def.bit_size == 1)
191 alu->def.bit_size = 32;
192 }
193
194 return true;
195 }
196
197 static bool
lower_tex_instr(nir_tex_instr * tex)198 lower_tex_instr(nir_tex_instr *tex)
199 {
200 bool progress = false;
201 rewrite_1bit_ssa_def_to_32bit(&tex->def, &progress);
202 if (tex->dest_type == nir_type_bool1) {
203 tex->dest_type = nir_type_bool32;
204 progress = true;
205 }
206 return progress;
207 }
208
209 struct lower_bool_to_float_data {
210 bool has_fcsel_ne;
211 bool has_fcsel_gt;
212 };
213
214 static bool
nir_lower_bool_to_float_instr(nir_builder * b,nir_instr * instr,void * cb_data)215 nir_lower_bool_to_float_instr(nir_builder *b,
216 nir_instr *instr,
217 void *cb_data)
218 {
219 struct lower_bool_to_float_data *data = cb_data;
220
221 switch (instr->type) {
222 case nir_instr_type_alu:
223 return lower_alu_instr(b, nir_instr_as_alu(instr),
224 data->has_fcsel_ne, data->has_fcsel_gt);
225
226 case nir_instr_type_load_const: {
227 nir_load_const_instr *load = nir_instr_as_load_const(instr);
228 if (load->def.bit_size == 1) {
229 nir_const_value *value = load->value;
230 for (unsigned i = 0; i < load->def.num_components; i++)
231 load->value[i].f32 = value[i].b ? 1.0 : 0.0;
232 load->def.bit_size = 32;
233 return true;
234 }
235 return false;
236 }
237
238 case nir_instr_type_intrinsic:
239 case nir_instr_type_undef:
240 case nir_instr_type_phi: {
241 bool progress = false;
242 nir_foreach_def(instr, rewrite_1bit_ssa_def_to_32bit, &progress);
243 return progress;
244 }
245
246 case nir_instr_type_tex:
247 return lower_tex_instr(nir_instr_as_tex(instr));
248
249 default:
250 nir_foreach_def(instr, assert_ssa_def_is_not_1bit, NULL);
251 return false;
252 }
253 }
254
255 bool
nir_lower_bool_to_float(nir_shader * shader,bool has_fcsel_ne)256 nir_lower_bool_to_float(nir_shader *shader, bool has_fcsel_ne)
257 {
258 struct lower_bool_to_float_data data = {
259 .has_fcsel_ne = has_fcsel_ne,
260 .has_fcsel_gt = shader->options->has_fused_comp_and_csel
261 };
262
263 return nir_shader_instructions_pass(shader, nir_lower_bool_to_float_instr,
264 nir_metadata_block_index |
265 nir_metadata_dominance,
266 &data);
267 }
268