1 /*
2 * Copyright © 2010 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "brw_fs.h"
7 #include "brw_fs_builder.h"
8
9 using namespace brw;
10
11 static uint64_t
src_as_uint(const fs_reg & src)12 src_as_uint(const fs_reg &src)
13 {
14 assert(src.file == IMM);
15
16 switch (src.type) {
17 case BRW_REGISTER_TYPE_W:
18 return (uint64_t)(int16_t)(src.ud & 0xffff);
19
20 case BRW_REGISTER_TYPE_UW:
21 return (uint64_t)(uint16_t)(src.ud & 0xffff);
22
23 case BRW_REGISTER_TYPE_D:
24 return (uint64_t)src.d;
25
26 case BRW_REGISTER_TYPE_UD:
27 return (uint64_t)src.ud;
28
29 case BRW_REGISTER_TYPE_Q:
30 return src.d64;
31
32 case BRW_REGISTER_TYPE_UQ:
33 return src.u64;
34
35 default:
36 unreachable("Invalid integer type.");
37 }
38 }
39
40 static fs_reg
brw_imm_for_type(uint64_t value,enum brw_reg_type type)41 brw_imm_for_type(uint64_t value, enum brw_reg_type type)
42 {
43 switch (type) {
44 case BRW_REGISTER_TYPE_W:
45 return brw_imm_w(value);
46
47 case BRW_REGISTER_TYPE_UW:
48 return brw_imm_uw(value);
49
50 case BRW_REGISTER_TYPE_D:
51 return brw_imm_d(value);
52
53 case BRW_REGISTER_TYPE_UD:
54 return brw_imm_ud(value);
55
56 case BRW_REGISTER_TYPE_Q:
57 return brw_imm_d(value);
58
59 case BRW_REGISTER_TYPE_UQ:
60 return brw_imm_uq(value);
61
62 default:
63 unreachable("Invalid integer type.");
64 }
65 }
66
67 bool
brw_fs_opt_algebraic(fs_visitor & s)68 brw_fs_opt_algebraic(fs_visitor &s)
69 {
70 const intel_device_info *devinfo = s.devinfo;
71 bool progress = false;
72
73 foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
74 switch (inst->opcode) {
75 case BRW_OPCODE_MOV:
76 if (!devinfo->has_64bit_float &&
77 inst->dst.type == BRW_REGISTER_TYPE_DF) {
78 assert(inst->dst.type == inst->src[0].type);
79 assert(!inst->saturate);
80 assert(!inst->src[0].abs);
81 assert(!inst->src[0].negate);
82 const brw::fs_builder ibld(&s, block, inst);
83
84 if (!inst->is_partial_write())
85 ibld.emit_undef_for_dst(inst);
86
87 ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1),
88 subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1));
89 ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0),
90 subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0));
91
92 inst->remove(block);
93 progress = true;
94 }
95
96 if (!devinfo->has_64bit_int &&
97 (inst->dst.type == BRW_REGISTER_TYPE_UQ ||
98 inst->dst.type == BRW_REGISTER_TYPE_Q)) {
99 assert(inst->dst.type == inst->src[0].type);
100 assert(!inst->saturate);
101 assert(!inst->src[0].abs);
102 assert(!inst->src[0].negate);
103 const brw::fs_builder ibld(&s, block, inst);
104
105 if (!inst->is_partial_write())
106 ibld.emit_undef_for_dst(inst);
107
108 ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
109 subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
110 ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
111 subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
112
113 inst->remove(block);
114 progress = true;
115 }
116
117 if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
118 inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
119 inst->dst.is_null() &&
120 (inst->src[0].abs || inst->src[0].negate)) {
121 inst->src[0].abs = false;
122 inst->src[0].negate = false;
123 progress = true;
124 break;
125 }
126
127 if (inst->src[0].file != IMM)
128 break;
129
130 if (inst->saturate) {
131 /* Full mixed-type saturates don't happen. However, we can end up
132 * with things like:
133 *
134 * mov.sat(8) g21<1>DF -1F
135 *
136 * Other mixed-size-but-same-base-type cases may also be possible.
137 */
138 if (inst->dst.type != inst->src[0].type &&
139 inst->dst.type != BRW_REGISTER_TYPE_DF &&
140 inst->src[0].type != BRW_REGISTER_TYPE_F)
141 assert(!"unimplemented: saturate mixed types");
142
143 if (brw_saturate_immediate(inst->src[0].type,
144 &inst->src[0].as_brw_reg())) {
145 inst->saturate = false;
146 progress = true;
147 }
148 }
149 break;
150
151 case BRW_OPCODE_MUL:
152 if (inst->src[1].file != IMM)
153 continue;
154
155 if (brw_reg_type_is_floating_point(inst->src[1].type))
156 break;
157
158 /* From the BDW PRM, Vol 2a, "mul - Multiply":
159 *
160 * "When multiplying integer datatypes, if src0 is DW and src1
161 * is W, irrespective of the destination datatype, the
162 * accumulator maintains full 48-bit precision."
163 * ...
164 * "When multiplying integer data types, if one of the sources
165 * is a DW, the resulting full precision data is stored in
166 * the accumulator."
167 *
168 * There are also similar notes in earlier PRMs.
169 *
170 * The MOV instruction can copy the bits of the source, but it
171 * does not clear the higher bits of the accumulator. So, because
172 * we might use the full accumulator in the MUL/MACH macro, we
173 * shouldn't replace such MULs with MOVs.
174 */
175 if ((brw_reg_type_to_size(inst->src[0].type) == 4 ||
176 brw_reg_type_to_size(inst->src[1].type) == 4) &&
177 (inst->dst.is_accumulator() ||
178 inst->writes_accumulator_implicitly(devinfo)))
179 break;
180
181 /* a * 1.0 = a */
182 if (inst->src[1].is_one()) {
183 inst->opcode = BRW_OPCODE_MOV;
184 inst->sources = 1;
185 inst->src[1] = reg_undef;
186 progress = true;
187 break;
188 }
189
190 /* a * -1.0 = -a */
191 if (inst->src[1].is_negative_one()) {
192 inst->opcode = BRW_OPCODE_MOV;
193 inst->sources = 1;
194 inst->src[0].negate = !inst->src[0].negate;
195 inst->src[1] = reg_undef;
196 progress = true;
197 break;
198 }
199
200 break;
201 case BRW_OPCODE_ADD:
202 if (inst->src[1].file != IMM)
203 continue;
204
205 if (brw_reg_type_is_integer(inst->src[1].type) &&
206 inst->src[1].is_zero()) {
207 inst->opcode = BRW_OPCODE_MOV;
208 inst->sources = 1;
209 inst->src[1] = reg_undef;
210 progress = true;
211 break;
212 }
213
214 if (inst->src[0].file == IMM) {
215 assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
216 inst->opcode = BRW_OPCODE_MOV;
217 inst->sources = 1;
218 inst->src[0].f += inst->src[1].f;
219 inst->src[1] = reg_undef;
220 progress = true;
221 break;
222 }
223 break;
224
225 case BRW_OPCODE_AND:
226 if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
227 const uint64_t src0 = src_as_uint(inst->src[0]);
228 const uint64_t src1 = src_as_uint(inst->src[1]);
229
230 inst->opcode = BRW_OPCODE_MOV;
231 inst->sources = 1;
232 inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
233 inst->src[1] = reg_undef;
234 progress = true;
235 break;
236 }
237
238 break;
239
240 case BRW_OPCODE_OR:
241 if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
242 const uint64_t src0 = src_as_uint(inst->src[0]);
243 const uint64_t src1 = src_as_uint(inst->src[1]);
244
245 inst->opcode = BRW_OPCODE_MOV;
246 inst->sources = 1;
247 inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
248 inst->src[1] = reg_undef;
249 progress = true;
250 break;
251 }
252
253 if (inst->src[0].equals(inst->src[1]) ||
254 inst->src[1].is_zero()) {
255 /* On Gfx8+, the OR instruction can have a source modifier that
256 * performs logical not on the operand. Cases of 'OR r0, ~r1, 0'
257 * or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV.
258 */
259 if (inst->src[0].negate) {
260 inst->opcode = BRW_OPCODE_NOT;
261 inst->sources = 1;
262 inst->src[0].negate = false;
263 } else {
264 inst->opcode = BRW_OPCODE_MOV;
265 inst->sources = 1;
266 }
267 inst->src[1] = reg_undef;
268 progress = true;
269 break;
270 }
271 break;
272 case BRW_OPCODE_CMP:
273 if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
274 inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
275 inst->src[1].is_zero() &&
276 (inst->src[0].abs || inst->src[0].negate)) {
277 inst->src[0].abs = false;
278 inst->src[0].negate = false;
279 progress = true;
280 break;
281 }
282 break;
283 case BRW_OPCODE_SEL:
284 if (!devinfo->has_64bit_float &&
285 !devinfo->has_64bit_int &&
286 (inst->dst.type == BRW_REGISTER_TYPE_DF ||
287 inst->dst.type == BRW_REGISTER_TYPE_UQ ||
288 inst->dst.type == BRW_REGISTER_TYPE_Q)) {
289 assert(inst->dst.type == inst->src[0].type);
290 assert(!inst->saturate);
291 assert(!inst->src[0].abs && !inst->src[0].negate);
292 assert(!inst->src[1].abs && !inst->src[1].negate);
293 const brw::fs_builder ibld(&s, block, inst);
294
295 if (!inst->is_partial_write())
296 ibld.emit_undef_for_dst(inst);
297
298 set_predicate(inst->predicate,
299 ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
300 subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
301 subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));
302 set_predicate(inst->predicate,
303 ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
304 subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),
305 subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));
306
307 inst->remove(block);
308 progress = true;
309 }
310 if (inst->src[0].equals(inst->src[1])) {
311 inst->opcode = BRW_OPCODE_MOV;
312 inst->sources = 1;
313 inst->src[1] = reg_undef;
314 inst->predicate = BRW_PREDICATE_NONE;
315 inst->predicate_inverse = false;
316 progress = true;
317 } else if (inst->saturate && inst->src[1].file == IMM) {
318 switch (inst->conditional_mod) {
319 case BRW_CONDITIONAL_LE:
320 case BRW_CONDITIONAL_L:
321 switch (inst->src[1].type) {
322 case BRW_REGISTER_TYPE_F:
323 if (inst->src[1].f >= 1.0f) {
324 inst->opcode = BRW_OPCODE_MOV;
325 inst->sources = 1;
326 inst->src[1] = reg_undef;
327 inst->conditional_mod = BRW_CONDITIONAL_NONE;
328 progress = true;
329 }
330 break;
331 default:
332 break;
333 }
334 break;
335 case BRW_CONDITIONAL_GE:
336 case BRW_CONDITIONAL_G:
337 switch (inst->src[1].type) {
338 case BRW_REGISTER_TYPE_F:
339 if (inst->src[1].f <= 0.0f) {
340 inst->opcode = BRW_OPCODE_MOV;
341 inst->sources = 1;
342 inst->src[1] = reg_undef;
343 inst->conditional_mod = BRW_CONDITIONAL_NONE;
344 progress = true;
345 }
346 break;
347 default:
348 break;
349 }
350 default:
351 break;
352 }
353 }
354 break;
355 case BRW_OPCODE_MAD:
356 if (inst->src[0].type != BRW_REGISTER_TYPE_F ||
357 inst->src[1].type != BRW_REGISTER_TYPE_F ||
358 inst->src[2].type != BRW_REGISTER_TYPE_F)
359 break;
360 if (inst->src[1].is_one()) {
361 inst->opcode = BRW_OPCODE_ADD;
362 inst->sources = 2;
363 inst->src[1] = inst->src[2];
364 inst->src[2] = reg_undef;
365 progress = true;
366 } else if (inst->src[2].is_one()) {
367 inst->opcode = BRW_OPCODE_ADD;
368 inst->sources = 2;
369 inst->src[2] = reg_undef;
370 progress = true;
371 }
372 break;
373 case BRW_OPCODE_SHL:
374 if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
375 /* It's not currently possible to generate this, and this constant
376 * folding does not handle it.
377 */
378 assert(!inst->saturate);
379
380 fs_reg result;
381
382 switch (type_sz(inst->src[0].type)) {
383 case 2:
384 result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
385 break;
386 case 4:
387 result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
388 break;
389 case 8:
390 result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
391 break;
392 default:
393 /* Just in case a future platform re-enables B or UB types. */
394 unreachable("Invalid source size.");
395 }
396
397 inst->opcode = BRW_OPCODE_MOV;
398 inst->src[0] = retype(result, inst->dst.type);
399 inst->src[1] = reg_undef;
400 inst->sources = 1;
401
402 progress = true;
403 }
404 break;
405
406 case SHADER_OPCODE_BROADCAST:
407 if (is_uniform(inst->src[0])) {
408 inst->opcode = BRW_OPCODE_MOV;
409 inst->sources = 1;
410 inst->force_writemask_all = true;
411 progress = true;
412 } else if (inst->src[1].file == IMM) {
413 inst->opcode = BRW_OPCODE_MOV;
414 /* It's possible that the selected component will be too large and
415 * overflow the register. This can happen if someone does a
416 * readInvocation() from GLSL or SPIR-V and provides an OOB
417 * invocationIndex. If this happens and we some how manage
418 * to constant fold it in and get here, then component() may cause
419 * us to start reading outside of the VGRF which will lead to an
420 * assert later. Instead, just let it wrap around if it goes over
421 * exec_size.
422 */
423 const unsigned comp = inst->src[1].ud & (inst->exec_size - 1);
424 inst->src[0] = component(inst->src[0], comp);
425 inst->sources = 1;
426 inst->force_writemask_all = true;
427 progress = true;
428 }
429 break;
430
431 case SHADER_OPCODE_SHUFFLE:
432 if (is_uniform(inst->src[0])) {
433 inst->opcode = BRW_OPCODE_MOV;
434 inst->sources = 1;
435 progress = true;
436 } else if (inst->src[1].file == IMM) {
437 inst->opcode = BRW_OPCODE_MOV;
438 inst->src[0] = component(inst->src[0],
439 inst->src[1].ud);
440 inst->sources = 1;
441 progress = true;
442 }
443 break;
444
445 default:
446 break;
447 }
448
449 /* Ensure that the correct source has the immediate value. 2-source
450 * instructions must have the immediate in src[1]. On Gfx12 and later,
451 * some 3-source instructions can have the immediate in src[0] or
452 * src[2]. It's complicated, so don't mess with 3-source instructions
453 * here.
454 */
455 if (progress && inst->sources == 2 && inst->is_commutative()) {
456 if (inst->src[0].file == IMM) {
457 fs_reg tmp = inst->src[1];
458 inst->src[1] = inst->src[0];
459 inst->src[0] = tmp;
460 }
461 }
462 }
463
464 if (progress)
465 s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
466 DEPENDENCY_INSTRUCTION_DETAIL);
467
468 return progress;
469 }
470