1 /*
2 * Copyright (C) 2021 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "va_compiler.h"
25 #include "valhall.h"
26 #include "bi_builder.h"
27
28 /* Only some special immediates are available, as specified in the Table of
29 * Immediates in the specification. Other immediates must be lowered, either to
30 * uniforms or to moves.
31 */
32
33 static bi_index
va_mov_imm(bi_builder * b,uint32_t imm)34 va_mov_imm(bi_builder *b, uint32_t imm)
35 {
36 bi_index zero = bi_fau(BIR_FAU_IMMEDIATE | 0, false);
37 return bi_iadd_imm_i32(b, zero, imm);
38 }
39
40 static bi_index
va_lut_index_32(uint32_t imm)41 va_lut_index_32(uint32_t imm)
42 {
43 for (unsigned i = 0; i < ARRAY_SIZE(valhall_immediates); ++i) {
44 if (valhall_immediates[i] == imm)
45 return va_lut(i);
46 }
47
48 return bi_null();
49 }
50
51 static bi_index
va_lut_index_16(uint16_t imm)52 va_lut_index_16(uint16_t imm)
53 {
54 uint16_t *arr16 = (uint16_t *) valhall_immediates;
55
56 for (unsigned i = 0; i < (2 * ARRAY_SIZE(valhall_immediates)); ++i) {
57 if (arr16[i] == imm)
58 return bi_half(va_lut(i >> 1), i & 1);
59 }
60
61 return bi_null();
62 }
63
64 UNUSED static bi_index
va_lut_index_8(uint8_t imm)65 va_lut_index_8(uint8_t imm)
66 {
67 uint8_t *arr8 = (uint8_t *) valhall_immediates;
68
69 for (unsigned i = 0; i < (4 * ARRAY_SIZE(valhall_immediates)); ++i) {
70 if (arr8[i] == imm)
71 return bi_byte(va_lut(i >> 2), i & 3);
72 }
73
74 return bi_null();
75 }
76
77 static bi_index
va_demote_constant_fp16(uint32_t value)78 va_demote_constant_fp16(uint32_t value)
79 {
80 uint16_t fp16 = _mesa_float_to_half(uif(value));
81
82 /* Only convert if it is exact */
83 if (fui(_mesa_half_to_float(fp16)) == value)
84 return va_lut_index_16(fp16);
85 else
86 return bi_null();
87 }
88
89 /*
90 * Test if a 32-bit word arises as a sign or zero extension of some 8/16-bit
91 * value.
92 */
93 static bool
is_extension_of_8(uint32_t x,bool is_signed)94 is_extension_of_8(uint32_t x, bool is_signed)
95 {
96 if (is_signed)
97 return (x <= INT8_MAX) || ((x >> 7) == BITFIELD_MASK(24 + 1));
98 else
99 return (x <= UINT8_MAX);
100 }
101
102 static bool
is_extension_of_16(uint32_t x,bool is_signed)103 is_extension_of_16(uint32_t x, bool is_signed)
104 {
105 if (is_signed)
106 return (x <= INT16_MAX) || ((x >> 15) == BITFIELD_MASK(16 + 1));
107 else
108 return (x <= UINT16_MAX);
109 }
110
111 static bi_index
va_resolve_constant(bi_builder * b,uint32_t value,struct va_src_info info,bool is_signed,bool staging)112 va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool is_signed, bool staging)
113 {
114 /* Try the constant as-is */
115 if (!staging) {
116 bi_index lut = va_lut_index_32(value);
117 if (!bi_is_null(lut)) return lut;
118
119 /* ...or negated as a FP32 constant */
120 if (info.absneg && info.size == VA_SIZE_32) {
121 lut = bi_neg(va_lut_index_32(fui(-uif(value))));
122 if (!bi_is_null(lut)) return lut;
123 }
124
125 /* ...or negated as a FP16 constant */
126 if (info.absneg && info.size == VA_SIZE_16) {
127 lut = bi_neg(va_lut_index_32(value ^ 0x80008000));
128 if (!bi_is_null(lut)) return lut;
129 }
130 }
131
132 /* Try using a single half of a FP16 constant */
133 bool replicated_halves = (value & 0xFFFF) == (value >> 16);
134 if (!staging && info.swizzle && info.size == VA_SIZE_16 && replicated_halves) {
135 bi_index lut = va_lut_index_16(value & 0xFFFF);
136 if (!bi_is_null(lut)) return lut;
137
138 /* ...possibly negated */
139 if (info.absneg) {
140 lut = bi_neg(va_lut_index_16((value & 0xFFFF) ^ 0x8000));
141 if (!bi_is_null(lut)) return lut;
142 }
143 }
144
145 /* Try extending a byte */
146 if (!staging && (info.widen || info.lanes) &&
147 is_extension_of_8(value, is_signed)) {
148
149 bi_index lut = va_lut_index_8(value & 0xFF);
150 if (!bi_is_null(lut)) return lut;
151 }
152
153 /* Try extending a halfword */
154 if (!staging && info.widen &&
155 is_extension_of_16(value, is_signed)) {
156
157 bi_index lut = va_lut_index_16(value & 0xFFFF);
158 if (!bi_is_null(lut)) return lut;
159 }
160
161 /* Try demoting the constant to FP16 */
162 if (!staging && info.swizzle && info.size == VA_SIZE_32) {
163 bi_index lut = va_demote_constant_fp16(value);
164 if (!bi_is_null(lut)) return lut;
165
166 if (info.absneg) {
167 bi_index lut = bi_neg(va_demote_constant_fp16(fui(-uif(value))));
168 if (!bi_is_null(lut)) return lut;
169 }
170 }
171
172 /* TODO: Optimize to uniform */
173 return va_mov_imm(b, value);
174 }
175
176 void
va_lower_constants(bi_context * ctx,bi_instr * I)177 va_lower_constants(bi_context *ctx, bi_instr *I)
178 {
179 bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
180
181 bi_foreach_src(I, s) {
182 if (I->src[s].type == BI_INDEX_CONSTANT) {
183 /* abs(#c) is pointless, but -#c occurs in transcendental sequences */
184 assert(!I->src[s].abs && "redundant .abs modifier");
185
186 bool is_signed = valhall_opcodes[I->op].is_signed;
187 bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs);
188 struct va_src_info info = va_src_info(I->op, s);
189 uint32_t value = I->src[s].value;
190 enum bi_swizzle swz = I->src[s].swizzle;
191
192 /* Resolve any swizzle, keeping in mind the different interpretations
193 * swizzles in different contexts.
194 */
195 if (info.size == VA_SIZE_32) {
196 /* Extracting a half from the 32-bit value */
197 if (swz == BI_SWIZZLE_H00)
198 value = (value & 0xFFFF);
199 else if (swz == BI_SWIZZLE_H11)
200 value = (value >> 16);
201 else
202 assert(swz == BI_SWIZZLE_H01);
203
204 /* FP16 -> FP32 */
205 if (info.swizzle && swz != BI_SWIZZLE_H01)
206 value = fui(_mesa_half_to_float(value));
207 } else if (info.size == VA_SIZE_16) {
208 assert(swz >= BI_SWIZZLE_H00 && swz <= BI_SWIZZLE_H11);
209 value = bi_apply_swizzle(value, swz);
210 } else if (info.size == VA_SIZE_8 && info.lanes) {
211 /* 8-bit extract */
212 unsigned chan = (swz - BI_SWIZZLE_B0000);
213 assert(chan < 4);
214
215 value = (value >> (8 * chan)) & 0xFF;
216 } else {
217 /* TODO: Any other special handling? */
218 value = bi_apply_swizzle(value, swz);
219 }
220
221 bi_index cons = va_resolve_constant(&b, value, info, is_signed, staging);
222 cons.neg ^= I->src[s].neg;
223 I->src[s] = cons;
224 }
225 }
226 }
227