1 /*
2 * Copyright © 2021 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 /*
28 * This pass tries to reduce the bitsize of phi instructions by either
29 * moving narrowing conversions from the phi's consumers to the phi's
30 * sources, if all the uses of the phi are equivalent narrowing
31 * instructions. In other words, convert:
32 *
33 * vec1 32 ssa_124 = load_const (0x00000000)
34 * ...
35 * loop {
36 * ...
37 * vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53
38 * vec1 16 ssa_8 = i2imp ssa_155
39 * ...
40 * vec1 32 ssa_53 = i2i32 ssa_52
41 * }
42 *
43 * into:
44 *
45 * vec1 32 ssa_124 = load_const (0x00000000)
46 * vec1 16 ssa_156 = i2imp ssa_124
47 * ...
48 * loop {
49 * ...
50 * vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157
51 * ...
52 * vec1 32 ssa_53 = i2i32 ssa_52
53 * vec1 16 ssa_157 = i2i16 ssa_53
54 * }
55 *
56 * Or failing that, tries to push widening conversion of phi srcs to
57 * the phi def. In this case, since load_const is frequently one
58 * of the phi sources this pass checks if can be narrowed without a
59 * loss of precision:
60 *
61 * vec1 32 ssa_0 = load_const (0x00000000)
62 * ...
63 * loop {
64 * ...
65 * vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19
66 * ...
67 * vec1 16 ssa_18 = iadd ssa_21, ssa_3
68 * vec1 32 ssa_19 = i2i32 ssa_18
69 * }
70 *
71 * into:
72 *
73 * vec1 32 ssa_0 = load_const (0x00000000)
74 * vec1 16 ssa_22 = i2i16 ssa_0
75 * ...
76 * loop {
77 * ...
78 * vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18
79 * vec1 32 ssa_23 = i2i32 ssa_8
80 * ...
81 * vec1 16 ssa_18 = iadd ssa_21, ssa_3
82 * }
83 *
84 * Note that either transformations can convert x2ymp into x2y16, which
85 * is normally done later in nir_opt_algebraic_late(), losing the option
86 * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts
87 * cannot see through phis.
88 */
89
90 #define INVALID_OP nir_num_opcodes
91
92 /**
93 * Get the corresponding exact conversion for a x2ymp conversion
94 */
95 static nir_op
concrete_conversion(nir_op op)96 concrete_conversion(nir_op op)
97 {
98 switch (op) {
99 case nir_op_i2imp: return nir_op_i2i16;
100 case nir_op_i2fmp: return nir_op_i2f16;
101 case nir_op_u2fmp: return nir_op_u2f16;
102 case nir_op_f2fmp: return nir_op_f2f16;
103 case nir_op_f2imp: return nir_op_f2i16;
104 case nir_op_f2ump: return nir_op_f2u16;
105 default: return op;
106 }
107 }
108
109 static nir_op
narrowing_conversion_op(nir_instr * instr,nir_op current_op)110 narrowing_conversion_op(nir_instr *instr, nir_op current_op)
111 {
112 if (instr->type != nir_instr_type_alu)
113 return INVALID_OP;
114
115 nir_op op = nir_instr_as_alu(instr)->op;
116 switch (op) {
117 case nir_op_i2imp:
118 case nir_op_i2i16:
119 case nir_op_i2fmp:
120 case nir_op_i2f16:
121 case nir_op_u2fmp:
122 case nir_op_u2f16:
123 case nir_op_f2fmp:
124 case nir_op_f2f16:
125 case nir_op_f2imp:
126 case nir_op_f2i16:
127 case nir_op_f2ump:
128 case nir_op_f2u16:
129 case nir_op_f2f16_rtne:
130 case nir_op_f2f16_rtz:
131 break;
132 default:
133 return INVALID_OP;
134 }
135
136 /* If we've already picked a conversion op from a previous phi use,
137 * make sure it is compatible with the current use
138 */
139 if (current_op != INVALID_OP) {
140 if (current_op != op) {
141 /* If we have different conversions, but one can be converted
142 * to the other, then let's do that:
143 */
144 if (concrete_conversion(current_op) == concrete_conversion(op)) {
145 op = concrete_conversion(op);
146 } else {
147 return INVALID_OP;
148 }
149 }
150 }
151
152 return op;
153 }
154
155 static nir_op
widening_conversion_op(nir_instr * instr,unsigned * bit_size)156 widening_conversion_op(nir_instr *instr, unsigned *bit_size)
157 {
158 if (instr->type != nir_instr_type_alu)
159 return INVALID_OP;
160
161 nir_alu_instr *alu = nir_instr_as_alu(instr);
162 switch (alu->op) {
163 case nir_op_i2i32:
164 case nir_op_i2f32:
165 case nir_op_u2f32:
166 case nir_op_f2f32:
167 case nir_op_f2i32:
168 case nir_op_f2u32:
169 break;
170 default:
171 return INVALID_OP;
172 }
173
174 *bit_size = nir_src_bit_size(alu->src[0].src);
175
176 /* We also need to check that the conversion's dest was actually
177 * wider:
178 */
179 if (nir_dest_bit_size(alu->dest.dest) <= *bit_size)
180 return INVALID_OP;
181
182 return alu->op;
183 }
184
185 static nir_alu_type
op_to_type(nir_op op)186 op_to_type(nir_op op)
187 {
188 return nir_alu_type_get_base_type(nir_op_infos[op].output_type);
189 }
190
191 /* Try to move narrowing instructions consuming the phi into the phi's
192 * sources to reduce the phi's precision:
193 */
194 static bool
try_move_narrowing_dst(nir_builder * b,nir_phi_instr * phi)195 try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi)
196 {
197 nir_op op = INVALID_OP;
198
199 assert(phi->dest.is_ssa);
200
201 /* If the phi has already been narrowed, nothing more to do: */
202 if (phi->dest.ssa.bit_size != 32)
203 return false;
204
205 /* Are the only uses of the phi conversion instructions, and
206 * are they all the same conversion?
207 */
208 nir_foreach_use (use, &phi->dest.ssa) {
209 op = narrowing_conversion_op(use->parent_instr, op);
210
211 /* Not a (compatible) narrowing conversion: */
212 if (op == INVALID_OP)
213 return false;
214 }
215
216 /* an if_uses means the phi is used directly in a conditional, ie.
217 * without a conversion
218 */
219 if (!list_is_empty(&phi->dest.ssa.if_uses))
220 return false;
221
222 /* If the phi has no uses, then nothing to do: */
223 if (op == INVALID_OP)
224 return false;
225
226 /* construct replacement phi instruction: */
227 nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
228 nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
229 phi->dest.ssa.num_components,
230 nir_alu_type_get_type_size(nir_op_infos[op].output_type),
231 NULL);
232
233 /* Push the conversion into the new phi sources: */
234 nir_foreach_phi_src (src, phi) {
235 assert(src->src.is_ssa);
236
237 /* insert new conversion instr in block of original phi src: */
238 b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr);
239 nir_ssa_def *old_src = src->src.ssa;
240 nir_ssa_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL);
241
242 /* and add corresponding phi_src to the new_phi: */
243 nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src));
244 }
245
246 /* And finally rewrite the original uses of the original phi uses to
247 * directly use the new phi, skipping the conversion out of the orig
248 * phi
249 */
250 nir_foreach_use (use, &phi->dest.ssa) {
251 /* We've previously established that all the uses were alu
252 * conversion ops. Turn them into movs instead.
253 */
254 nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
255 alu->op = nir_op_mov;
256 }
257 nir_ssa_def_rewrite_uses(&phi->dest.ssa, &new_phi->dest.ssa);
258
259 /* And finally insert the new phi after all sources are in place: */
260 b->cursor = nir_after_instr(&phi->instr);
261 nir_builder_instr_insert(b, &new_phi->instr);
262
263 return true;
264 }
265
266 static bool
can_convert_load_const(nir_load_const_instr * lc,nir_op op)267 can_convert_load_const(nir_load_const_instr *lc, nir_op op)
268 {
269 nir_alu_type type = op_to_type(op);
270
271 /* Note that we only handle phi's with bit_size == 32: */
272 assert(lc->def.bit_size == 32);
273
274 for (unsigned i = 0; i < lc->def.num_components; i++) {
275 switch (type) {
276 case nir_type_int:
277 if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32)
278 return false;
279 break;
280 case nir_type_uint:
281 if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32)
282 return false;
283 break;
284 case nir_type_float:
285 if (lc->value[i].f32 != _mesa_half_to_float(
286 _mesa_float_to_half(lc->value[i].f32)))
287 return false;
288 break;
289 default:
290 unreachable("bad type");
291 return false;
292 }
293 }
294
295 return true;
296 }
297
298 /* Check all the phi sources to see if they are the same widening op, in
299 * which case we can push the widening op to the other side of the phi
300 */
301 static nir_op
find_widening_op(nir_phi_instr * phi,unsigned * bit_size)302 find_widening_op(nir_phi_instr *phi, unsigned *bit_size)
303 {
304 nir_op op = INVALID_OP;
305
306 bool has_load_const = false;
307 *bit_size = 0;
308
309 nir_foreach_phi_src (src, phi) {
310 assert(src->src.is_ssa);
311
312 nir_instr *instr = src->src.ssa->parent_instr;
313 if (instr->type == nir_instr_type_load_const) {
314 has_load_const = true;
315 continue;
316 }
317
318 unsigned src_bit_size;
319 nir_op src_op = widening_conversion_op(instr, &src_bit_size);
320
321 /* Not a widening conversion: */
322 if (src_op == INVALID_OP)
323 return INVALID_OP;
324
325 /* If it is a widening conversion, it needs to be the same op as
326 * other phi sources:
327 */
328 if ((op != INVALID_OP) && (op != src_op))
329 return INVALID_OP;
330
331 if (*bit_size && (*bit_size != src_bit_size))
332 return INVALID_OP;
333
334 op = src_op;
335 *bit_size = src_bit_size;
336 }
337
338 if ((op == INVALID_OP) || !has_load_const)
339 return op;
340
341 /* If we could otherwise move widening sources, but load_const is
342 * one of the phi sources (and does not have a widening conversion,
343 * but could have a narrowing->widening sequence inserted without
344 * loss of precision), then we could insert a narrowing->widening
345 * sequence to make the rest of the transformation possible:
346 */
347 nir_foreach_phi_src (src, phi) {
348 assert(src->src.is_ssa);
349
350 nir_instr *instr = src->src.ssa->parent_instr;
351 if (instr->type != nir_instr_type_load_const)
352 continue;
353
354 if (!can_convert_load_const(nir_instr_as_load_const(instr), op))
355 return INVALID_OP;
356 }
357
358 return op;
359 }
360
361 /* Try to move widening conversions into the phi to the phi's output
362 * to reduce the phi's precision:
363 */
364 static bool
try_move_widening_src(nir_builder * b,nir_phi_instr * phi)365 try_move_widening_src(nir_builder *b, nir_phi_instr *phi)
366 {
367 assert(phi->dest.is_ssa);
368
369 /* If the phi has already been narrowed, nothing more to do: */
370 if (phi->dest.ssa.bit_size != 32)
371 return false;
372
373 unsigned bit_size;
374 nir_op op = find_widening_op(phi, &bit_size);
375
376 if (op == INVALID_OP)
377 return false;
378
379 /* construct replacement phi instruction: */
380 nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
381 nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
382 phi->dest.ssa.num_components,
383 bit_size, NULL);
384
385 /* Remove the widening conversions from the phi sources: */
386 nir_foreach_phi_src (src, phi) {
387 assert(src->src.is_ssa);
388
389 nir_instr *instr = src->src.ssa->parent_instr;
390 nir_ssa_def *new_src;
391
392 b->cursor = nir_after_instr(instr);
393
394 if (instr->type == nir_instr_type_load_const) {
395 /* if the src is a load_const, we've already verified that it
396 * is safe to insert a narrowing conversion to make the rest
397 * of this transformation legal:
398 */
399 nir_load_const_instr *lc = nir_instr_as_load_const(instr);
400
401 if (op_to_type(op) == nir_type_float) {
402 new_src = nir_f2f16(b, &lc->def);
403 } else {
404 new_src = nir_i2i16(b, &lc->def);
405 }
406 } else {
407 /* at this point we know the sources source is a conversion: */
408 nir_alu_instr *alu = nir_instr_as_alu(instr);
409
410 /* The conversion we are stripping off could have had a swizzle,
411 * so replace it with a mov if necessary:
412 */
413 unsigned num_comp = nir_dest_num_components(alu->dest.dest);
414 new_src = nir_mov_alu(b, alu->src[0], num_comp);
415 }
416
417 /* add corresponding phi_src to the new_phi: */
418 nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src));
419 }
420
421 /* And insert the new phi after all sources are in place: */
422 b->cursor = nir_after_instr(&phi->instr);
423 nir_builder_instr_insert(b, &new_phi->instr);
424
425 /* And finally add back the widening conversion after the phi,
426 * and re-write the original phi's uses
427 */
428 b->cursor = nir_after_instr_and_phis(&new_phi->instr);
429 nir_ssa_def *def = nir_build_alu(b, op, &new_phi->dest.ssa, NULL, NULL, NULL);
430
431 nir_ssa_def_rewrite_uses(&phi->dest.ssa, def);
432
433 return true;
434 }
435
436 static bool
lower_phi(nir_builder * b,nir_phi_instr * phi)437 lower_phi(nir_builder *b, nir_phi_instr *phi)
438 {
439 bool progress = try_move_narrowing_dst(b, phi);
440 if (!progress)
441 progress = try_move_widening_src(b, phi);
442 return progress;
443 }
444
445 bool
nir_opt_phi_precision(nir_shader * shader)446 nir_opt_phi_precision(nir_shader *shader)
447 {
448 bool progress = false;
449
450 /* If 8b or 16b bit_sizes are not used, no point to run this pass: */
451 unsigned bit_sizes_used = shader->info.bit_sizes_float |
452 shader->info.bit_sizes_int;
453
454 if (!bit_sizes_used) {
455 nir_shader_gather_info(shader, nir_shader_get_entrypoint(shader));
456 bit_sizes_used = shader->info.bit_sizes_float |
457 shader->info.bit_sizes_int;
458 }
459
460 if (!(bit_sizes_used & (8 | 16)))
461 return false;
462
463 nir_foreach_function(function, shader) {
464 if (!function->impl)
465 continue;
466
467 nir_builder b;
468 nir_builder_init(&b, function->impl);
469
470 nir_foreach_block (block, function->impl) {
471 nir_foreach_instr_safe (instr, block) {
472 if (instr->type != nir_instr_type_phi)
473 break;
474
475 progress |= lower_phi(&b, nir_instr_as_phi(instr));
476 }
477 }
478
479 if (progress) {
480 nir_metadata_preserve(function->impl,
481 nir_metadata_block_index |
482 nir_metadata_dominance);
483 } else {
484 nir_metadata_preserve(function->impl, nir_metadata_all);
485 }
486 }
487
488 return progress;
489 }
490
491