1 /*
2 * Copyright 2022 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "compiler/nir/nir_builder.h"
7 #include "agx_compiler.h"
8
9 /*
10 * Lower load_interpolated_input instructions with unused components of their
11 * destination, duplicating the intrinsic and shrinking to avoid the holes.
12 * load_interpolated_input becomes iter instructions, which lack a write mask.
13 */
14 static bool
pass(struct nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)15 pass(struct nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
16 {
17 if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
18 return false;
19
20 unsigned mask = nir_def_components_read(&intr->def);
21 if (mask == 0 || mask == nir_component_mask(intr->num_components))
22 return false;
23
24 b->cursor = nir_before_instr(&intr->instr);
25 unsigned bit_size = intr->def.bit_size;
26 nir_def *comps[4] = {NULL};
27
28 for (unsigned c = 0; c < intr->num_components; ++c) {
29 if (mask & BITFIELD_BIT(c)) {
30 /* Count contiguous components to combine with */
31 unsigned next_mask = mask >> c;
32 unsigned next_zero = ffs(~next_mask);
33 unsigned count = next_zero - 1;
34
35 assert(next_zero >= 2);
36 assert(count >= 1);
37
38 nir_instr *clone = nir_instr_clone(b->shader, &intr->instr);
39 nir_intrinsic_instr *clone_intr = nir_instr_as_intrinsic(clone);
40
41 /* Shrink the load to count contiguous components */
42 nir_def_init(clone, &clone_intr->def, count, bit_size);
43 nir_def *clone_vec = &clone_intr->def;
44 clone_intr->num_components = count;
45
46 /* The load starts from component c relative to the original load */
47 nir_intrinsic_set_component(clone_intr,
48 nir_intrinsic_component(intr) + c);
49
50 nir_builder_instr_insert(b, &clone_intr->instr);
51
52 /* The destination is a vector with `count` components, extract the
53 * components so we can recombine into the final vector.
54 */
55 for (unsigned d = 0; d < count; ++d)
56 comps[c + d] = nir_channel(b, clone_vec, d);
57
58 c += (count - 1);
59 } else {
60 /* The value of unused components is irrelevant, but use an undef for
61 * semantics. It will be eliminated by DCE after copyprop.
62 */
63 comps[c] = nir_undef(b, 1, bit_size);
64 }
65 }
66
67 nir_def_rewrite_uses(&intr->def, nir_vec(b, comps, intr->num_components));
68 return true;
69 }
70
71 bool
agx_nir_lower_load_mask(nir_shader * shader)72 agx_nir_lower_load_mask(nir_shader *shader)
73 {
74 return nir_shader_intrinsics_pass(
75 shader, pass, nir_metadata_block_index | nir_metadata_dominance, NULL);
76 }
77