1 /*
2 * Copyright (C) 2021 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "compiler.h"
25 #include "bi_test.h"
26 #include "bi_builder.h"
27
28 #define CASE(instr, expected) do { \
29 bi_builder *A = bit_builder(ralloc_ctx); \
30 bi_builder *B = bit_builder(ralloc_ctx); \
31 { \
32 bi_builder *b = A; \
33 instr; \
34 } \
35 { \
36 bi_builder *b = B; \
37 expected; \
38 } \
39 bi_opt_mod_prop_forward(A->shader); \
40 bi_opt_mod_prop_backward(A->shader); \
41 bi_opt_dead_code_eliminate(A->shader); \
42 if (bit_shader_equal(A->shader, B->shader)) { \
43 nr_pass++; \
44 } else { \
45 fprintf(stderr, "Got:\n"); \
46 bi_print_shader(A->shader, stderr); \
47 fprintf(stderr, "Expected:\n"); \
48 bi_print_shader(B->shader, stderr); \
49 fprintf(stderr, "\n"); \
50 nr_fail++; \
51 } \
52 } while(0)
53
54 #define NEGCASE(instr) CASE(instr, instr)
55
56 int
main(int argc,const char ** argv)57 main(int argc, const char **argv)
58 {
59 unsigned nr_fail = 0, nr_pass = 0;
60 void *ralloc_ctx = ralloc_context(NULL);
61 bi_index zero = bi_zero();
62 bi_index reg = bi_register(0);
63 bi_index x = bi_register(1);
64 bi_index y = bi_register(2);
65 bi_index negabsx = bi_neg(bi_abs(x));
66
67 /* Check absneg is fused */
68
69 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), y, BI_ROUND_NONE),
70 bi_fadd_f32_to(b, reg, bi_abs(x), y, BI_ROUND_NONE));
71
72 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_neg(x)), y, BI_ROUND_NONE),
73 bi_fadd_f32_to(b, reg, bi_neg(x), y, BI_ROUND_NONE));
74
75 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), y, BI_ROUND_NONE),
76 bi_fadd_f32_to(b, reg, negabsx, y, BI_ROUND_NONE));
77
78 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, x), y, BI_ROUND_NONE),
79 bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE));
80
81 /* Check absneg is fused on a variety of instructions */
82
83 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), y, BI_ROUND_RTP),
84 bi_fadd_f32_to(b, reg, negabsx, y, BI_ROUND_RTP));
85
86 CASE(bi_fmin_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), bi_neg(y)),
87 bi_fmin_f32_to(b, reg, negabsx, bi_neg(y)));
88
89 /* Check absneg is fused on fp16 */
90
91 CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y, BI_ROUND_RTP),
92 bi_fadd_v2f16_to(b, reg, negabsx, y, BI_ROUND_RTP));
93
94 CASE(bi_fmin_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), bi_neg(y)),
95 bi_fmin_v2f16_to(b, reg, negabsx, bi_neg(y)));
96
97 /* Check that swizzles are composed for fp16 */
98
99 CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y, BI_ROUND_RTP),
100 bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y, BI_ROUND_RTP));
101
102 CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y, BI_ROUND_RTP),
103 bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y, BI_ROUND_RTP));
104
105 CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true, false), y, BI_ROUND_RTP),
106 bi_fadd_v2f16_to(b, reg, negabsx, y, BI_ROUND_RTP));
107
108 CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false), y, BI_ROUND_RTP),
109 bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y, BI_ROUND_RTP));
110
111 CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false), y, BI_ROUND_RTP),
112 bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y, BI_ROUND_RTP));
113
114 /* Check that widens are passed through */
115
116 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, false)), y, BI_ROUND_NONE),
117 bi_fadd_f32_to(b, reg, bi_half(negabsx, false), y, BI_ROUND_NONE));
118
119 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y, BI_ROUND_NONE),
120 bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y, BI_ROUND_NONE));
121
122 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)), bi_fabsneg_f32(b, bi_half(x, false)), BI_ROUND_NONE),
123 bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false), BI_ROUND_NONE));
124
125 /* Refuse to mix sizes for fabsneg, that's wrong */
126
127 NEGCASE(bi_fadd_f32_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y, BI_ROUND_NONE));
128 NEGCASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_f32(b, negabsx), y, BI_ROUND_NONE));
129
130 /* It's tempting to use addition by 0.0 as the absneg primitive, but that
131 * has footguns around signed zero and round modes. Check we don't
132 * incorrectly fuse these rules. */
133
134 NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), zero, BI_ROUND_NONE), y, BI_ROUND_NONE));
135 NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(x), zero, BI_ROUND_NONE), y, BI_ROUND_NONE));
136 NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(bi_abs(x)), zero, BI_ROUND_NONE), y, BI_ROUND_NONE));
137 NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, zero, BI_ROUND_NONE), y, BI_ROUND_NONE));
138
139 /* Check clamps are propagated */
140 CASE({
141 bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y, BI_ROUND_NONE));
142 I->clamp = BI_CLAMP_CLAMP_0_INF;
143 }, {
144 bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE);
145 I->clamp = BI_CLAMP_CLAMP_0_INF;
146 });
147
148 CASE({
149 bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y, BI_ROUND_NONE));
150 I->clamp = BI_CLAMP_CLAMP_0_1;
151 }, {
152 bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE);
153 I->clamp = BI_CLAMP_CLAMP_0_1;
154 });
155
156 /* Check clamps are composed */
157 CASE({
158 bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
159 bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
160 I->clamp = BI_CLAMP_CLAMP_M1_1;
161 J->clamp = BI_CLAMP_CLAMP_0_INF;
162 }, {
163 bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE);
164 I->clamp = BI_CLAMP_CLAMP_0_1;
165 });
166
167 CASE({
168 bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
169 bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
170 I->clamp = BI_CLAMP_CLAMP_0_1;
171 J->clamp = BI_CLAMP_CLAMP_0_INF;
172 }, {
173 bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE);
174 I->clamp = BI_CLAMP_CLAMP_0_1;
175 });
176
177 CASE({
178 bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
179 bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
180 I->clamp = BI_CLAMP_CLAMP_0_INF;
181 J->clamp = BI_CLAMP_CLAMP_0_INF;
182 }, {
183 bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE);
184 I->clamp = BI_CLAMP_CLAMP_0_INF;
185 });
186
187 CASE({
188 bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
189 bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
190 I->clamp = BI_CLAMP_CLAMP_M1_1;
191 J->clamp = BI_CLAMP_CLAMP_0_INF;
192 }, {
193 bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE);
194 I->clamp = BI_CLAMP_CLAMP_0_1;
195 });
196
197 CASE({
198 bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
199 bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
200 I->clamp = BI_CLAMP_CLAMP_0_1;
201 J->clamp = BI_CLAMP_CLAMP_0_INF;
202 }, {
203 bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE);
204 I->clamp = BI_CLAMP_CLAMP_0_1;
205 });
206
207 CASE({
208 bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
209 bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
210 I->clamp = BI_CLAMP_CLAMP_0_INF;
211 J->clamp = BI_CLAMP_CLAMP_0_INF;
212 }, {
213 bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE);
214 I->clamp = BI_CLAMP_CLAMP_0_INF;
215 });
216
217 /* We can't mix sizes */
218
219 NEGCASE({
220 bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_v2f16(b, x, y, BI_ROUND_NONE));
221 I->clamp = BI_CLAMP_CLAMP_0_1;
222 });
223
224 NEGCASE({
225 bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_f32(b, x, y, BI_ROUND_NONE));
226 I->clamp = BI_CLAMP_CLAMP_0_1;
227 });
228
229 /* We can't use addition by 0.0 for clamps due to signed zeros. */
230 NEGCASE({
231 bi_instr *I = bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, y, BI_ROUND_NONE), zero, BI_ROUND_NONE);
232 I->clamp = BI_CLAMP_CLAMP_M1_1;
233 });
234
235 NEGCASE({
236 bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y, BI_ROUND_NONE), zero, BI_ROUND_NONE);
237 I->clamp = BI_CLAMP_CLAMP_0_1;
238 });
239
240 /* Check that we fuse comparisons with DISCARD */
241
242 CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1)),
243 bi_discard_f32(b, x, y, BI_CMPF_LE));
244
245 CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1)),
246 bi_discard_f32(b, x, y, BI_CMPF_NE));
247
248 CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1)),
249 bi_discard_f32(b, x, y, BI_CMPF_EQ));
250
251 for (unsigned h = 0; h < 2; ++h) {
252 CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1), h)),
253 bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_LE));
254
255 CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1), h)),
256 bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_NE));
257
258 CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1), h)),
259 bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_EQ));
260 }
261
262 /* Refuse to fuse special comparisons */
263 NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1)));
264 NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1)));
265
266 ralloc_free(ralloc_ctx);
267 TEST_END(nr_pass, nr_fail);
268 }
269