1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "compiler.h"
28
29 /* Does an instruction respect outmods and source mods? Depend
30 * on the types involved */
31
32 bool
bi_has_outmod(bi_instruction * ins)33 bi_has_outmod(bi_instruction *ins)
34 {
35 bool classy = bi_class_props[ins->type] & BI_MODS;
36 bool floaty = nir_alu_type_get_base_type(ins->dest_type) == nir_type_float;
37
38 return classy && floaty;
39 }
40
41 /* Have to check source for e.g. compares */
42
43 bool
bi_has_source_mods(bi_instruction * ins)44 bi_has_source_mods(bi_instruction *ins)
45 {
46 bool classy = bi_class_props[ins->type] & BI_MODS;
47 bool floaty = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_float;
48
49 return classy && floaty;
50 }
51
52 /* A source is swizzled if the op is swizzlable, in 8-bit or
53 * 16-bit mode, and the swizzled op. TODO: multi args */
54
55 bool
bi_is_src_swizzled(bi_instruction * ins,unsigned s)56 bi_is_src_swizzled(bi_instruction *ins, unsigned s)
57 {
58 bool classy = bi_class_props[ins->type] & BI_SWIZZLABLE;
59 bool small = nir_alu_type_get_type_size(ins->dest_type) < 32;
60 bool first = (s == 0); /* TODO: prop? */
61
62 return classy && small && first;
63 }
64
65 bool
bi_has_arg(bi_instruction * ins,unsigned arg)66 bi_has_arg(bi_instruction *ins, unsigned arg)
67 {
68 if (!ins)
69 return false;
70
71 bi_foreach_src(ins, s) {
72 if (ins->src[s] == arg)
73 return true;
74 }
75
76 return false;
77 }
78
79 uint16_t
bi_from_bytemask(uint16_t bytemask,unsigned bytes)80 bi_from_bytemask(uint16_t bytemask, unsigned bytes)
81 {
82 unsigned value = 0;
83
84 for (unsigned c = 0, d = 0; c < 16; c += bytes, ++d) {
85 bool a = (bytemask & (1 << c)) != 0;
86
87 for (unsigned q = c; q < bytes; ++q)
88 assert(((bytemask & (1 << q)) != 0) == a);
89
90 value |= (a << d);
91 }
92
93 return value;
94 }
95
96 unsigned
bi_get_component_count(bi_instruction * ins,signed src)97 bi_get_component_count(bi_instruction *ins, signed src)
98 {
99 /* Discards and branches are oddball since they're not BI_VECTOR but no
100 * destination. So special case.. */
101 if (ins->type == BI_DISCARD || ins->type == BI_BRANCH)
102 return 1;
103
104 if (bi_class_props[ins->type] & BI_VECTOR) {
105 assert(ins->vector_channels);
106 return (src <= 0) ? ins->vector_channels : 1;
107 } else {
108 unsigned dest_bytes = nir_alu_type_get_type_size(ins->dest_type);
109 unsigned src_bytes = nir_alu_type_get_type_size(ins->src_types[MAX2(src, 0)]);
110
111 /* If there's either f32 on either end, it's only a single
112 * component, etc. */
113
114 unsigned bytes = src < 0 ? dest_bytes : src_bytes;
115
116 if (ins->type == BI_CONVERT)
117 bytes = MAX2(dest_bytes, src_bytes);
118
119 if (ins->type == BI_ATEST || ins->type == BI_SELECT)
120 return 1;
121
122 return MAX2(32 / bytes, 1);
123 }
124 }
125
126 uint16_t
bi_bytemask_of_read_components(bi_instruction * ins,unsigned node)127 bi_bytemask_of_read_components(bi_instruction *ins, unsigned node)
128 {
129 uint16_t mask = 0x0;
130
131 bi_foreach_src(ins, s) {
132 if (ins->src[s] != node) continue;
133 unsigned component_count = bi_get_component_count(ins, s);
134 nir_alu_type T = ins->src_types[s];
135 unsigned size = nir_alu_type_get_type_size(T);
136 unsigned bytes = size / 8;
137 unsigned cmask = (1 << bytes) - 1;
138
139 for (unsigned i = 0; i < component_count; ++i) {
140 unsigned c = ins->swizzle[s][i];
141 mask |= (cmask << (c * bytes));
142 }
143 }
144
145 return mask;
146 }
147
148 uint64_t
bi_get_immediate(bi_instruction * ins,unsigned index)149 bi_get_immediate(bi_instruction *ins, unsigned index)
150 {
151 unsigned v = ins->src[index];
152 assert(v & BIR_INDEX_CONSTANT);
153 unsigned shift = v & ~BIR_INDEX_CONSTANT;
154 uint64_t shifted = ins->constant.u64 >> shift;
155
156 /* Mask off the accessed part */
157 unsigned sz = nir_alu_type_get_type_size(ins->src_types[index]);
158
159 if (sz == 64)
160 return shifted;
161 else
162 return shifted & ((1ull << sz) - 1);
163 }
164
165 bool
bi_writes_component(bi_instruction * ins,unsigned comp)166 bi_writes_component(bi_instruction *ins, unsigned comp)
167 {
168 return comp < bi_get_component_count(ins, -1);
169 }
170
171 /* Determine effective writemask for RA/DCE, noting that we currently act
172 * per-register hence aligning. TODO: when real write masks are handled in
173 * packing (not for a while), update this routine, removing the align */
174
175 unsigned
bi_writemask(bi_instruction * ins)176 bi_writemask(bi_instruction *ins)
177 {
178 nir_alu_type T = ins->dest_type;
179 unsigned size = nir_alu_type_get_type_size(T);
180 unsigned bytes_per_comp = size / 8;
181 unsigned components = bi_get_component_count(ins, -1);
182 unsigned bytes = ALIGN_POT(bytes_per_comp * components, 4);
183 unsigned mask = (1 << bytes) - 1;
184 unsigned shift = ins->dest_offset * 4; /* 32-bit words */
185 return (mask << shift);
186 }
187
188 /* Rewrites uses of an index. This is O(nc) to the program and number of
189 * uses, so combine lowering is effectively O(n^2). Better bookkeeping
190 * would bring down to linear if that's an issue. */
191
192 void
bi_rewrite_uses(bi_context * ctx,unsigned old,unsigned oldc,unsigned new,unsigned newc)193 bi_rewrite_uses(bi_context *ctx,
194 unsigned old, unsigned oldc,
195 unsigned new, unsigned newc)
196 {
197 assert(newc >= oldc);
198
199 bi_foreach_instr_global(ctx, ins) {
200 bi_foreach_src(ins, s) {
201 if (ins->src[s] != old) continue;
202
203 for (unsigned i = 0; i < 16; ++i)
204 ins->swizzle[s][i] += (newc - oldc);
205
206 ins->src[s] = new;
207 }
208 }
209 }
210
211
212