• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "brw_fs.h"
7 #include "brw_fs_builder.h"
8 
9 using namespace brw;
10 
11 static uint64_t
src_as_uint(const fs_reg & src)12 src_as_uint(const fs_reg &src)
13 {
14    assert(src.file == IMM);
15 
16    switch (src.type) {
17    case BRW_REGISTER_TYPE_W:
18       return (uint64_t)(int16_t)(src.ud & 0xffff);
19 
20    case BRW_REGISTER_TYPE_UW:
21       return (uint64_t)(uint16_t)(src.ud & 0xffff);
22 
23    case BRW_REGISTER_TYPE_D:
24       return (uint64_t)src.d;
25 
26    case BRW_REGISTER_TYPE_UD:
27       return (uint64_t)src.ud;
28 
29    case BRW_REGISTER_TYPE_Q:
30       return src.d64;
31 
32    case BRW_REGISTER_TYPE_UQ:
33       return src.u64;
34 
35    default:
36       unreachable("Invalid integer type.");
37    }
38 }
39 
40 static fs_reg
brw_imm_for_type(uint64_t value,enum brw_reg_type type)41 brw_imm_for_type(uint64_t value, enum brw_reg_type type)
42 {
43    switch (type) {
44    case BRW_REGISTER_TYPE_W:
45       return brw_imm_w(value);
46 
47    case BRW_REGISTER_TYPE_UW:
48       return brw_imm_uw(value);
49 
50    case BRW_REGISTER_TYPE_D:
51       return brw_imm_d(value);
52 
53    case BRW_REGISTER_TYPE_UD:
54       return brw_imm_ud(value);
55 
56    case BRW_REGISTER_TYPE_Q:
57       return brw_imm_d(value);
58 
59    case BRW_REGISTER_TYPE_UQ:
60       return brw_imm_uq(value);
61 
62    default:
63       unreachable("Invalid integer type.");
64    }
65 }
66 
67 bool
brw_fs_opt_algebraic(fs_visitor & s)68 brw_fs_opt_algebraic(fs_visitor &s)
69 {
70    const intel_device_info *devinfo = s.devinfo;
71    bool progress = false;
72 
73    foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
74       switch (inst->opcode) {
75       case BRW_OPCODE_MOV:
76          if (!devinfo->has_64bit_float &&
77              inst->dst.type == BRW_REGISTER_TYPE_DF) {
78             assert(inst->dst.type == inst->src[0].type);
79             assert(!inst->saturate);
80             assert(!inst->src[0].abs);
81             assert(!inst->src[0].negate);
82             const brw::fs_builder ibld(&s, block, inst);
83 
84             if (!inst->is_partial_write())
85                ibld.emit_undef_for_dst(inst);
86 
87             ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1),
88                      subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1));
89             ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0),
90                      subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0));
91 
92             inst->remove(block);
93             progress = true;
94          }
95 
96          if (!devinfo->has_64bit_int &&
97              (inst->dst.type == BRW_REGISTER_TYPE_UQ ||
98               inst->dst.type == BRW_REGISTER_TYPE_Q)) {
99             assert(inst->dst.type == inst->src[0].type);
100             assert(!inst->saturate);
101             assert(!inst->src[0].abs);
102             assert(!inst->src[0].negate);
103             const brw::fs_builder ibld(&s, block, inst);
104 
105             if (!inst->is_partial_write())
106                ibld.emit_undef_for_dst(inst);
107 
108             ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
109                      subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
110             ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
111                      subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
112 
113             inst->remove(block);
114             progress = true;
115          }
116 
117          if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
118               inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
119              inst->dst.is_null() &&
120              (inst->src[0].abs || inst->src[0].negate)) {
121             inst->src[0].abs = false;
122             inst->src[0].negate = false;
123             progress = true;
124             break;
125          }
126 
127          if (inst->src[0].file != IMM)
128             break;
129 
130          if (inst->saturate) {
131             /* Full mixed-type saturates don't happen.  However, we can end up
132              * with things like:
133              *
134              *    mov.sat(8) g21<1>DF       -1F
135              *
136              * Other mixed-size-but-same-base-type cases may also be possible.
137              */
138             if (inst->dst.type != inst->src[0].type &&
139                 inst->dst.type != BRW_REGISTER_TYPE_DF &&
140                 inst->src[0].type != BRW_REGISTER_TYPE_F)
141                assert(!"unimplemented: saturate mixed types");
142 
143             if (brw_saturate_immediate(inst->src[0].type,
144                                        &inst->src[0].as_brw_reg())) {
145                inst->saturate = false;
146                progress = true;
147             }
148          }
149          break;
150 
151       case BRW_OPCODE_MUL:
152          if (inst->src[1].file != IMM)
153             continue;
154 
155          if (brw_reg_type_is_floating_point(inst->src[1].type))
156             break;
157 
158          /* From the BDW PRM, Vol 2a, "mul - Multiply":
159           *
160           *    "When multiplying integer datatypes, if src0 is DW and src1
161           *    is W, irrespective of the destination datatype, the
162           *    accumulator maintains full 48-bit precision."
163           *    ...
164           *    "When multiplying integer data types, if one of the sources
165           *    is a DW, the resulting full precision data is stored in
166           *    the accumulator."
167           *
168           * There are also similar notes in earlier PRMs.
169           *
170           * The MOV instruction can copy the bits of the source, but it
171           * does not clear the higher bits of the accumulator. So, because
172           * we might use the full accumulator in the MUL/MACH macro, we
173           * shouldn't replace such MULs with MOVs.
174           */
175          if ((brw_reg_type_to_size(inst->src[0].type) == 4 ||
176               brw_reg_type_to_size(inst->src[1].type) == 4) &&
177              (inst->dst.is_accumulator() ||
178               inst->writes_accumulator_implicitly(devinfo)))
179             break;
180 
181          /* a * 1.0 = a */
182          if (inst->src[1].is_one()) {
183             inst->opcode = BRW_OPCODE_MOV;
184             inst->sources = 1;
185             inst->src[1] = reg_undef;
186             progress = true;
187             break;
188          }
189 
190          /* a * -1.0 = -a */
191          if (inst->src[1].is_negative_one()) {
192             inst->opcode = BRW_OPCODE_MOV;
193             inst->sources = 1;
194             inst->src[0].negate = !inst->src[0].negate;
195             inst->src[1] = reg_undef;
196             progress = true;
197             break;
198          }
199 
200          break;
201       case BRW_OPCODE_ADD:
202          if (inst->src[1].file != IMM)
203             continue;
204 
205          if (brw_reg_type_is_integer(inst->src[1].type) &&
206              inst->src[1].is_zero()) {
207             inst->opcode = BRW_OPCODE_MOV;
208             inst->sources = 1;
209             inst->src[1] = reg_undef;
210             progress = true;
211             break;
212          }
213 
214          if (inst->src[0].file == IMM) {
215             assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
216             inst->opcode = BRW_OPCODE_MOV;
217             inst->sources = 1;
218             inst->src[0].f += inst->src[1].f;
219             inst->src[1] = reg_undef;
220             progress = true;
221             break;
222          }
223          break;
224 
225       case BRW_OPCODE_AND:
226          if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
227             const uint64_t src0 = src_as_uint(inst->src[0]);
228             const uint64_t src1 = src_as_uint(inst->src[1]);
229 
230             inst->opcode = BRW_OPCODE_MOV;
231             inst->sources = 1;
232             inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
233             inst->src[1] = reg_undef;
234             progress = true;
235             break;
236          }
237 
238          break;
239 
240       case BRW_OPCODE_OR:
241          if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
242             const uint64_t src0 = src_as_uint(inst->src[0]);
243             const uint64_t src1 = src_as_uint(inst->src[1]);
244 
245             inst->opcode = BRW_OPCODE_MOV;
246             inst->sources = 1;
247             inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
248             inst->src[1] = reg_undef;
249             progress = true;
250             break;
251          }
252 
253          if (inst->src[0].equals(inst->src[1]) ||
254              inst->src[1].is_zero()) {
255             /* On Gfx8+, the OR instruction can have a source modifier that
256              * performs logical not on the operand.  Cases of 'OR r0, ~r1, 0'
257              * or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV.
258              */
259             if (inst->src[0].negate) {
260                inst->opcode = BRW_OPCODE_NOT;
261                inst->sources = 1;
262                inst->src[0].negate = false;
263             } else {
264                inst->opcode = BRW_OPCODE_MOV;
265                inst->sources = 1;
266             }
267             inst->src[1] = reg_undef;
268             progress = true;
269             break;
270          }
271          break;
272       case BRW_OPCODE_CMP:
273          if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
274               inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
275              inst->src[1].is_zero() &&
276              (inst->src[0].abs || inst->src[0].negate)) {
277             inst->src[0].abs = false;
278             inst->src[0].negate = false;
279             progress = true;
280             break;
281          }
282          break;
283       case BRW_OPCODE_SEL:
284          if (!devinfo->has_64bit_float &&
285              !devinfo->has_64bit_int &&
286              (inst->dst.type == BRW_REGISTER_TYPE_DF ||
287               inst->dst.type == BRW_REGISTER_TYPE_UQ ||
288               inst->dst.type == BRW_REGISTER_TYPE_Q)) {
289             assert(inst->dst.type == inst->src[0].type);
290             assert(!inst->saturate);
291             assert(!inst->src[0].abs && !inst->src[0].negate);
292             assert(!inst->src[1].abs && !inst->src[1].negate);
293             const brw::fs_builder ibld(&s, block, inst);
294 
295             if (!inst->is_partial_write())
296                ibld.emit_undef_for_dst(inst);
297 
298             set_predicate(inst->predicate,
299                           ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
300                                    subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
301                                    subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));
302             set_predicate(inst->predicate,
303                           ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
304                                    subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),
305                                    subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));
306 
307             inst->remove(block);
308             progress = true;
309          }
310          if (inst->src[0].equals(inst->src[1])) {
311             inst->opcode = BRW_OPCODE_MOV;
312             inst->sources = 1;
313             inst->src[1] = reg_undef;
314             inst->predicate = BRW_PREDICATE_NONE;
315             inst->predicate_inverse = false;
316             progress = true;
317          } else if (inst->saturate && inst->src[1].file == IMM) {
318             switch (inst->conditional_mod) {
319             case BRW_CONDITIONAL_LE:
320             case BRW_CONDITIONAL_L:
321                switch (inst->src[1].type) {
322                case BRW_REGISTER_TYPE_F:
323                   if (inst->src[1].f >= 1.0f) {
324                      inst->opcode = BRW_OPCODE_MOV;
325                      inst->sources = 1;
326                      inst->src[1] = reg_undef;
327                      inst->conditional_mod = BRW_CONDITIONAL_NONE;
328                      progress = true;
329                   }
330                   break;
331                default:
332                   break;
333                }
334                break;
335             case BRW_CONDITIONAL_GE:
336             case BRW_CONDITIONAL_G:
337                switch (inst->src[1].type) {
338                case BRW_REGISTER_TYPE_F:
339                   if (inst->src[1].f <= 0.0f) {
340                      inst->opcode = BRW_OPCODE_MOV;
341                      inst->sources = 1;
342                      inst->src[1] = reg_undef;
343                      inst->conditional_mod = BRW_CONDITIONAL_NONE;
344                      progress = true;
345                   }
346                   break;
347                default:
348                   break;
349                }
350             default:
351                break;
352             }
353          }
354          break;
355       case BRW_OPCODE_MAD:
356          if (inst->src[0].type != BRW_REGISTER_TYPE_F ||
357              inst->src[1].type != BRW_REGISTER_TYPE_F ||
358              inst->src[2].type != BRW_REGISTER_TYPE_F)
359             break;
360          if (inst->src[1].is_one()) {
361             inst->opcode = BRW_OPCODE_ADD;
362             inst->sources = 2;
363             inst->src[1] = inst->src[2];
364             inst->src[2] = reg_undef;
365             progress = true;
366          } else if (inst->src[2].is_one()) {
367             inst->opcode = BRW_OPCODE_ADD;
368             inst->sources = 2;
369             inst->src[2] = reg_undef;
370             progress = true;
371          }
372          break;
373       case BRW_OPCODE_SHL:
374          if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
375             /* It's not currently possible to generate this, and this constant
376              * folding does not handle it.
377              */
378             assert(!inst->saturate);
379 
380             fs_reg result;
381 
382             switch (type_sz(inst->src[0].type)) {
383             case 2:
384                result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
385                break;
386             case 4:
387                result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
388                break;
389             case 8:
390                result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
391                break;
392             default:
393                /* Just in case a future platform re-enables B or UB types. */
394                unreachable("Invalid source size.");
395             }
396 
397             inst->opcode = BRW_OPCODE_MOV;
398             inst->src[0] = retype(result, inst->dst.type);
399             inst->src[1] = reg_undef;
400             inst->sources = 1;
401 
402             progress = true;
403          }
404          break;
405 
406       case SHADER_OPCODE_BROADCAST:
407          if (is_uniform(inst->src[0])) {
408             inst->opcode = BRW_OPCODE_MOV;
409             inst->sources = 1;
410             inst->force_writemask_all = true;
411             progress = true;
412          } else if (inst->src[1].file == IMM) {
413             inst->opcode = BRW_OPCODE_MOV;
414             /* It's possible that the selected component will be too large and
415              * overflow the register.  This can happen if someone does a
416              * readInvocation() from GLSL or SPIR-V and provides an OOB
417              * invocationIndex.  If this happens and we some how manage
418              * to constant fold it in and get here, then component() may cause
419              * us to start reading outside of the VGRF which will lead to an
420              * assert later.  Instead, just let it wrap around if it goes over
421              * exec_size.
422              */
423             const unsigned comp = inst->src[1].ud & (inst->exec_size - 1);
424             inst->src[0] = component(inst->src[0], comp);
425             inst->sources = 1;
426             inst->force_writemask_all = true;
427             progress = true;
428          }
429          break;
430 
431       case SHADER_OPCODE_SHUFFLE:
432          if (is_uniform(inst->src[0])) {
433             inst->opcode = BRW_OPCODE_MOV;
434             inst->sources = 1;
435             progress = true;
436          } else if (inst->src[1].file == IMM) {
437             inst->opcode = BRW_OPCODE_MOV;
438             inst->src[0] = component(inst->src[0],
439                                      inst->src[1].ud);
440             inst->sources = 1;
441             progress = true;
442          }
443          break;
444 
445       default:
446 	 break;
447       }
448 
449       /* Ensure that the correct source has the immediate value. 2-source
450        * instructions must have the immediate in src[1]. On Gfx12 and later,
451        * some 3-source instructions can have the immediate in src[0] or
452        * src[2]. It's complicated, so don't mess with 3-source instructions
453        * here.
454        */
455       if (progress && inst->sources == 2 && inst->is_commutative()) {
456          if (inst->src[0].file == IMM) {
457             fs_reg tmp = inst->src[1];
458             inst->src[1] = inst->src[0];
459             inst->src[0] = tmp;
460          }
461       }
462    }
463 
464    if (progress)
465       s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
466                             DEPENDENCY_INSTRUCTION_DETAIL);
467 
468    return progress;
469 }
470