1 /*
2 * Copyright © 2021 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 /*
28 * This pass tries to reduce the bitsize of phi instructions by either
29 * moving narrowing conversions from the phi's consumers to the phi's
30 * sources, if all the uses of the phi are equivalent narrowing
31 * instructions. In other words, convert:
32 *
33 * vec1 32 ssa_124 = load_const (0x00000000)
34 * ...
35 * loop {
36 * ...
37 * vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53
38 * vec1 16 ssa_8 = i2imp ssa_155
39 * ...
40 * vec1 32 ssa_53 = i2i32 ssa_52
41 * }
42 *
43 * into:
44 *
45 * vec1 32 ssa_124 = load_const (0x00000000)
46 * vec1 16 ssa_156 = i2imp ssa_124
47 * ...
48 * loop {
49 * ...
50 * vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157
51 * ...
52 * vec1 32 ssa_53 = i2i32 ssa_52
53 * vec1 16 ssa_157 = i2i16 ssa_53
54 * }
55 *
56 * Or failing that, tries to push widening conversion of phi srcs to
57 * the phi def. In this case, since load_const is frequently one
58 * of the phi sources this pass checks if can be narrowed without a
59 * loss of precision:
60 *
61 * vec1 32 ssa_0 = load_const (0x00000000)
62 * ...
63 * loop {
64 * ...
65 * vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19
66 * ...
67 * vec1 16 ssa_18 = iadd ssa_21, ssa_3
68 * vec1 32 ssa_19 = i2i32 ssa_18
69 * }
70 *
71 * into:
72 *
73 * vec1 32 ssa_0 = load_const (0x00000000)
74 * vec1 16 ssa_22 = i2i16 ssa_0
75 * ...
76 * loop {
77 * ...
78 * vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18
79 * vec1 32 ssa_23 = i2i32 ssa_8
80 * ...
81 * vec1 16 ssa_18 = iadd ssa_21, ssa_3
82 * }
83 *
84 * Note that either transformations can convert x2ymp into x2y16, which
85 * is normally done later in nir_opt_algebraic_late(), losing the option
86 * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts
87 * cannot see through phis.
88 */
89
90 #define INVALID_OP nir_num_opcodes
91
92 /**
93 * Get the corresponding exact conversion for a x2ymp conversion
94 */
95 static nir_op
concrete_conversion(nir_op op)96 concrete_conversion(nir_op op)
97 {
98 switch (op) {
99 case nir_op_i2imp: return nir_op_i2i16;
100 case nir_op_i2fmp: return nir_op_i2f16;
101 case nir_op_u2fmp: return nir_op_u2f16;
102 case nir_op_f2fmp: return nir_op_f2f16;
103 case nir_op_f2imp: return nir_op_f2i16;
104 case nir_op_f2ump: return nir_op_f2u16;
105 default: return op;
106 }
107 }
108
109 static nir_op
narrowing_conversion_op(nir_instr * instr,nir_op current_op)110 narrowing_conversion_op(nir_instr *instr, nir_op current_op)
111 {
112 if (instr->type != nir_instr_type_alu)
113 return INVALID_OP;
114
115 nir_op op = nir_instr_as_alu(instr)->op;
116 switch (op) {
117 case nir_op_i2imp:
118 case nir_op_i2i16:
119 case nir_op_i2fmp:
120 case nir_op_i2f16:
121 case nir_op_u2fmp:
122 case nir_op_u2f16:
123 case nir_op_f2fmp:
124 case nir_op_f2f16:
125 case nir_op_f2imp:
126 case nir_op_f2i16:
127 case nir_op_f2ump:
128 case nir_op_f2u16:
129 case nir_op_f2f16_rtne:
130 case nir_op_f2f16_rtz:
131 break;
132 default:
133 return INVALID_OP;
134 }
135
136 /* If we've already picked a conversion op from a previous phi use,
137 * make sure it is compatible with the current use
138 */
139 if (current_op != INVALID_OP) {
140 if (current_op != op) {
141 /* If we have different conversions, but one can be converted
142 * to the other, then let's do that:
143 */
144 if (concrete_conversion(current_op) == concrete_conversion(op)) {
145 op = concrete_conversion(op);
146 } else {
147 return INVALID_OP;
148 }
149 }
150 }
151
152 return op;
153 }
154
155 static nir_op
widening_conversion_op(nir_instr * instr,unsigned * bit_size)156 widening_conversion_op(nir_instr *instr, unsigned *bit_size)
157 {
158 if (instr->type != nir_instr_type_alu)
159 return INVALID_OP;
160
161 nir_alu_instr *alu = nir_instr_as_alu(instr);
162 switch (alu->op) {
163 case nir_op_i2i32:
164 case nir_op_i2f32:
165 case nir_op_u2f32:
166 case nir_op_f2f32:
167 case nir_op_f2i32:
168 case nir_op_f2u32:
169 break;
170 default:
171 return INVALID_OP;
172 }
173
174 *bit_size = nir_src_bit_size(alu->src[0].src);
175
176 /* We also need to check that the conversion's dest was actually
177 * wider:
178 */
179 if (nir_dest_bit_size(alu->dest.dest) <= *bit_size)
180 return INVALID_OP;
181
182 return alu->op;
183 }
184
185 static nir_alu_type
op_to_type(nir_op op)186 op_to_type(nir_op op)
187 {
188 return nir_alu_type_get_base_type(nir_op_infos[op].output_type);
189 }
190
191 /* Try to move narrowing instructions consuming the phi into the phi's
192 * sources to reduce the phi's precision:
193 */
194 static bool
try_move_narrowing_dst(nir_builder * b,nir_phi_instr * phi)195 try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi)
196 {
197 nir_op op = INVALID_OP;
198
199 assert(phi->dest.is_ssa);
200
201 /* If the phi has already been narrowed, nothing more to do: */
202 if (phi->dest.ssa.bit_size != 32)
203 return false;
204
205 /* Are the only uses of the phi conversion instructions, and
206 * are they all the same conversion?
207 */
208 nir_foreach_use (use, &phi->dest.ssa) {
209 op = narrowing_conversion_op(use->parent_instr, op);
210
211 /* Not a (compatible) narrowing conversion: */
212 if (op == INVALID_OP)
213 return false;
214 }
215
216 /* an if_uses means the phi is used directly in a conditional, ie.
217 * without a conversion
218 */
219 if (!list_is_empty(&phi->dest.ssa.if_uses))
220 return false;
221
222 /* If the phi has no uses, then nothing to do: */
223 if (op == INVALID_OP)
224 return false;
225
226 /* construct replacement phi instruction: */
227 nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
228 nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
229 phi->dest.ssa.num_components,
230 nir_alu_type_get_type_size(nir_op_infos[op].output_type),
231 NULL);
232
233 /* Push the conversion into the new phi sources: */
234 nir_foreach_phi_src (src, phi) {
235 assert(src->src.is_ssa);
236
237 /* insert new conversion instr in block of original phi src: */
238 b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr);
239 nir_ssa_def *old_src = src->src.ssa;
240 nir_ssa_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL);
241
242 /* and add corresponding phi_src to the new_phi: */
243 nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src));
244 }
245
246 /* And finally rewrite the original uses of the original phi uses to
247 * directly use the new phi, skipping the conversion out of the orig
248 * phi
249 */
250 nir_foreach_use (use, &phi->dest.ssa) {
251 /* We've previously established that all the uses were alu
252 * conversion ops:
253 */
254 nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
255
256 assert(alu->dest.dest.is_ssa);
257
258 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, &new_phi->dest.ssa);
259 }
260
261 /* And finally insert the new phi after all sources are in place: */
262 b->cursor = nir_after_instr(&phi->instr);
263 nir_builder_instr_insert(b, &new_phi->instr);
264
265 return true;
266 }
267
268 static bool
can_convert_load_const(nir_load_const_instr * lc,nir_op op)269 can_convert_load_const(nir_load_const_instr *lc, nir_op op)
270 {
271 nir_alu_type type = op_to_type(op);
272
273 /* Note that we only handle phi's with bit_size == 32: */
274 assert(lc->def.bit_size == 32);
275
276 for (unsigned i = 0; i < lc->def.num_components; i++) {
277 switch (type) {
278 case nir_type_int:
279 if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32)
280 return false;
281 break;
282 case nir_type_uint:
283 if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32)
284 return false;
285 break;
286 case nir_type_float:
287 if (lc->value[i].f32 != _mesa_half_to_float(
288 _mesa_float_to_half(lc->value[i].f32)))
289 return false;
290 break;
291 default:
292 unreachable("bad type");
293 return false;
294 }
295 }
296
297 return true;
298 }
299
300 /* Check all the phi sources to see if they are the same widening op, in
301 * which case we can push the widening op to the other side of the phi
302 */
303 static nir_op
find_widening_op(nir_phi_instr * phi,unsigned * bit_size)304 find_widening_op(nir_phi_instr *phi, unsigned *bit_size)
305 {
306 nir_op op = INVALID_OP;
307
308 bool has_load_const = false;
309 *bit_size = 0;
310
311 nir_foreach_phi_src (src, phi) {
312 assert(src->src.is_ssa);
313
314 nir_instr *instr = src->src.ssa->parent_instr;
315 if (instr->type == nir_instr_type_load_const) {
316 has_load_const = true;
317 continue;
318 }
319
320 unsigned src_bit_size;
321 nir_op src_op = widening_conversion_op(instr, &src_bit_size);
322
323 /* Not a widening conversion: */
324 if (src_op == INVALID_OP)
325 return INVALID_OP;
326
327 /* If it is a widening conversion, it needs to be the same op as
328 * other phi sources:
329 */
330 if ((op != INVALID_OP) && (op != src_op))
331 return INVALID_OP;
332
333 if (*bit_size && (*bit_size != src_bit_size))
334 return INVALID_OP;
335
336 op = src_op;
337 *bit_size = src_bit_size;
338 }
339
340 if ((op == INVALID_OP) || !has_load_const)
341 return op;
342
343 /* If we could otherwise move widening sources, but load_const is
344 * one of the phi sources (and does not have a widening conversion,
345 * but could have a narrowing->widening sequence inserted without
346 * loss of precision), then we could insert a narrowing->widening
347 * sequence to make the rest of the transformation possible:
348 */
349 nir_foreach_phi_src (src, phi) {
350 assert(src->src.is_ssa);
351
352 nir_instr *instr = src->src.ssa->parent_instr;
353 if (instr->type != nir_instr_type_load_const)
354 continue;
355
356 if (!can_convert_load_const(nir_instr_as_load_const(instr), op))
357 return INVALID_OP;
358 }
359
360 return op;
361 }
362
363 /* Try to move widening conversions into the phi to the phi's output
364 * to reduce the phi's precision:
365 */
366 static bool
try_move_widening_src(nir_builder * b,nir_phi_instr * phi)367 try_move_widening_src(nir_builder *b, nir_phi_instr *phi)
368 {
369 assert(phi->dest.is_ssa);
370
371 /* If the phi has already been narrowed, nothing more to do: */
372 if (phi->dest.ssa.bit_size != 32)
373 return false;
374
375 unsigned bit_size;
376 nir_op op = find_widening_op(phi, &bit_size);
377
378 if (op == INVALID_OP)
379 return false;
380
381 /* construct replacement phi instruction: */
382 nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
383 nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
384 phi->dest.ssa.num_components,
385 bit_size, NULL);
386
387 /* Remove the widening conversions from the phi sources: */
388 nir_foreach_phi_src (src, phi) {
389 assert(src->src.is_ssa);
390
391 nir_instr *instr = src->src.ssa->parent_instr;
392 nir_ssa_def *new_src;
393
394 b->cursor = nir_after_instr(instr);
395
396 if (instr->type == nir_instr_type_load_const) {
397 /* if the src is a load_const, we've already verified that it
398 * is safe to insert a narrowing conversion to make the rest
399 * of this transformation legal:
400 */
401 nir_load_const_instr *lc = nir_instr_as_load_const(instr);
402
403 if (op_to_type(op) == nir_type_float) {
404 new_src = nir_f2f16(b, &lc->def);
405 } else {
406 new_src = nir_i2i16(b, &lc->def);
407 }
408 } else {
409 /* at this point we know the sources source is a conversion: */
410 nir_alu_instr *alu = nir_instr_as_alu(instr);
411
412 /* The conversion we are stripping off could have had a swizzle,
413 * so replace it with a mov if necessary:
414 */
415 unsigned num_comp = nir_dest_num_components(alu->dest.dest);
416 new_src = nir_mov_alu(b, alu->src[0], num_comp);
417 }
418
419 /* add corresponding phi_src to the new_phi: */
420 nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src));
421 }
422
423 /* And insert the new phi after all sources are in place: */
424 b->cursor = nir_after_instr(&phi->instr);
425 nir_builder_instr_insert(b, &new_phi->instr);
426
427 /* And finally add back the widening conversion after the phi,
428 * and re-write the original phi's uses
429 */
430 b->cursor = nir_after_instr_and_phis(&new_phi->instr);
431 nir_ssa_def *def = nir_build_alu(b, op, &new_phi->dest.ssa, NULL, NULL, NULL);
432
433 nir_ssa_def_rewrite_uses(&phi->dest.ssa, def);
434
435 return true;
436 }
437
438 static bool
lower_phi(nir_builder * b,nir_phi_instr * phi)439 lower_phi(nir_builder *b, nir_phi_instr *phi)
440 {
441 bool progress = try_move_narrowing_dst(b, phi);
442 if (!progress)
443 progress = try_move_widening_src(b, phi);
444 return progress;
445 }
446
447 bool
nir_opt_phi_precision(nir_shader * shader)448 nir_opt_phi_precision(nir_shader *shader)
449 {
450 bool progress = false;
451
452 /* If 8b or 16b bit_sizes are not used, no point to run this pass: */
453 unsigned bit_sizes_used = shader->info.bit_sizes_float |
454 shader->info.bit_sizes_int;
455
456 if (!bit_sizes_used) {
457 nir_shader_gather_info(shader, nir_shader_get_entrypoint(shader));
458 bit_sizes_used = shader->info.bit_sizes_float |
459 shader->info.bit_sizes_int;
460 }
461
462 if (!(bit_sizes_used & (8 | 16)))
463 return false;
464
465 nir_foreach_function(function, shader) {
466 if (!function->impl)
467 continue;
468
469 nir_builder b;
470 nir_builder_init(&b, function->impl);
471
472 nir_foreach_block (block, function->impl) {
473 nir_foreach_instr_safe (instr, block) {
474 if (instr->type != nir_instr_type_phi)
475 break;
476
477 progress |= lower_phi(&b, nir_instr_as_phi(instr));
478 }
479 }
480
481 if (progress) {
482 nir_metadata_preserve(function->impl,
483 nir_metadata_block_index |
484 nir_metadata_dominance);
485 } else {
486 nir_metadata_preserve(function->impl, nir_metadata_all);
487 }
488 }
489
490 return progress;
491 }
492
493