1 /*
2 * Copyright © 2022 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_builder.h"
25 #include "nir_search_helpers.h"
26
27 /**
28 * Attempt to reassociate and optimize some combinations of bfi instructions.
29 *
30 * Many shaders will use a sequence of bfi instructions to build complex
31 * bitfields. Sequences in real shaders look like:
32 *
33 * bfi(A, B, bfi(C, D, 0))
34 *
35 * Let A and C be constants,
36 *
37 * (A & (B << find_lsb(A)) | (~A & bfi(C, D, 0))
38 *
39 * If find_lsb(A) = 0 (i.e., A is odd), then
40 *
41 * (A & B) | (~A & bfi(C, D, 0))
42 * (A & B) | (~A & ((D << find_lsb(C)) & C) | (0 & ~C))
43 * (A & B) | (~A & ((D << find_lsb(C)) & C))
44 * (A & B) | ((D << find_lsb(C)) & (~A & C))
45 *
46 * If A and C are completely disjoint, that is (A & C) == 0, then (~A & C) == C
47 *
48 * (A & B) | ((D << find_lsb(C)) & C)
49 *
50 * If A and C are completely disjoint, then A & ~C == A
51 *
52 * (~C & (A & B)) | ((D << find_lsb(C)) & C)
53 * bfi(C, D, A & B)
54 *
55 * On some architectures, bfi instructions cannot use immediate values as
56 * sources, so the constants A, C, and 0 would have to be loaded into
57 * registers. After reassociation, only C must be loaded into a register.
58 * This saves instructions and register pressure.
59 *
60 * Ideally this would be implemented as an algebraic optimization, but there
61 * is no way to enforce the requirement that (A & C) == 0.
62 */
63
64 static bool
nir_opt_reassociate_bfi_instr(nir_builder * b,nir_instr * instr,UNUSED void * cb_data)65 nir_opt_reassociate_bfi_instr(nir_builder *b,
66 nir_instr *instr,
67 UNUSED void *cb_data)
68 {
69 if (instr->type != nir_instr_type_alu)
70 return false;
71
72 nir_alu_instr *bfiCD0 = nir_instr_as_alu(instr);
73 if (bfiCD0->op != nir_op_bfi || bfiCD0->def.num_components != 1)
74 return false;
75
76 /* Enforce the bfi('#c', d, 0) part of the pattern. */
77 if (!nir_src_is_const(bfiCD0->src[0].src) ||
78 !nir_src_is_const(bfiCD0->src[2].src) ||
79 nir_src_comp_as_uint(bfiCD0->src[2].src,
80 bfiCD0->src[2].swizzle[0]) != 0) {
81 return false;
82 }
83
84 const uint64_t C = nir_src_comp_as_uint(bfiCD0->src[0].src,
85 bfiCD0->src[0].swizzle[0]);
86
87 if (!is_used_once(bfiCD0))
88 return false;
89
90 nir_src *use = list_first_entry(&bfiCD0->def.uses,
91 nir_src, use_link);
92
93 if (nir_src_parent_instr(use)->type != nir_instr_type_alu)
94 return false;
95
96 nir_alu_instr *bfiABx = nir_instr_as_alu(nir_src_parent_instr(use));
97 if (bfiABx->op != nir_op_bfi || bfiABx->def.num_components != 1)
98 return false;
99
100 /* Enforce the bfi('#a', b, ...) part of the pattern. */
101 if (!nir_src_is_const(bfiABx->src[0].src) ||
102 bfiABx->src[2].src.ssa != &bfiCD0->def) {
103 return false;
104 }
105
106 const uint64_t A = nir_src_comp_as_uint(bfiABx->src[0].src,
107 bfiABx->src[0].swizzle[0]);
108
109 /* Enforce the find_lsb(A) == 0 part of the pattern. */
110 if ((A & 1) == 0)
111 return false;
112
113 /* Enforce the "A and C are disjoint" part of the pattern. */
114 if ((A & C) != 0)
115 return false;
116
117 /* Now emit the new instructions. */
118 b->cursor = nir_before_instr(&bfiABx->instr);
119
120 /* The extra nir_mov_alu are to handle swizzles that might be on the
121 * original sources.
122 */
123 nir_def *new_bfi = nir_bfi(b,
124 nir_mov_alu(b, bfiCD0->src[0], 1),
125 nir_mov_alu(b, bfiCD0->src[1], 1),
126 nir_iand(b,
127 nir_mov_alu(b, bfiABx->src[0], 1),
128 nir_mov_alu(b, bfiABx->src[1], 1)));
129
130 nir_def_rewrite_uses(&bfiABx->def, new_bfi);
131 return true;
132 }
133
134 bool
nir_opt_reassociate_bfi(nir_shader * shader)135 nir_opt_reassociate_bfi(nir_shader *shader)
136 {
137 bool progress = nir_shader_instructions_pass(shader,
138 nir_opt_reassociate_bfi_instr,
139 nir_metadata_block_index |
140 nir_metadata_dominance,
141 NULL);
142
143 return progress;
144 }
145