• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "brw_fs.h"
7 #include "brw_builder.h"
8 #include "util/half_float.h"
9 
10 using namespace brw;
11 
12 static uint64_t
src_as_uint(const brw_reg & src)13 src_as_uint(const brw_reg &src)
14 {
15    assert(src.file == IMM);
16 
17    switch (src.type) {
18    case BRW_TYPE_W:
19       return (uint64_t)(int16_t)(src.ud & 0xffff);
20 
21    case BRW_TYPE_UW:
22       return (uint64_t)(uint16_t)(src.ud & 0xffff);
23 
24    case BRW_TYPE_D:
25       return (uint64_t)src.d;
26 
27    case BRW_TYPE_UD:
28       return (uint64_t)src.ud;
29 
30    case BRW_TYPE_Q:
31       return src.d64;
32 
33    case BRW_TYPE_UQ:
34       return src.u64;
35 
36    default:
37       unreachable("Invalid integer type.");
38    }
39 }
40 
41 static double
src_as_float(const brw_reg & src)42 src_as_float(const brw_reg &src)
43 {
44    assert(src.file == IMM);
45 
46    switch (src.type) {
47    case BRW_TYPE_HF:
48       return _mesa_half_to_float((uint16_t)src.d);
49 
50    case BRW_TYPE_F:
51       return src.f;
52 
53    case BRW_TYPE_DF:
54       return src.df;
55 
56    default:
57       unreachable("Invalid float type.");
58    }
59 }
60 
61 static brw_reg
brw_imm_for_type(uint64_t value,enum brw_reg_type type)62 brw_imm_for_type(uint64_t value, enum brw_reg_type type)
63 {
64    switch (type) {
65    case BRW_TYPE_W:
66       return brw_imm_w(value);
67 
68    case BRW_TYPE_UW:
69       return brw_imm_uw(value);
70 
71    case BRW_TYPE_D:
72       return brw_imm_d(value);
73 
74    case BRW_TYPE_UD:
75       return brw_imm_ud(value);
76 
77    case BRW_TYPE_Q:
78       return brw_imm_d(value);
79 
80    case BRW_TYPE_UQ:
81       return brw_imm_uq(value);
82 
83    default:
84       unreachable("Invalid integer type.");
85    }
86 }
87 
88 /**
89  * Converts a MAD to an ADD by folding the multiplicand sources.
90  */
91 static void
fold_multiplicands_of_MAD(fs_inst * inst)92 fold_multiplicands_of_MAD(fs_inst *inst)
93 {
94    assert(inst->opcode == BRW_OPCODE_MAD);
95    assert (inst->src[1].file == IMM &&
96            inst->src[2].file == IMM &&
97            !brw_type_is_vector_imm(inst->src[1].type) &&
98            !brw_type_is_vector_imm(inst->src[2].type));
99 
100    if (brw_type_is_int(inst->src[1].type)) {
101       const uint64_t imm1 = src_as_uint(inst->src[1]);
102       const uint64_t imm2 = src_as_uint(inst->src[2]);
103 
104       brw_reg product = brw_imm_ud(imm1 * imm2);
105 
106       inst->src[1] = retype(product,
107                             brw_type_larger_of(inst->src[1].type,
108                                                inst->src[2].type));
109    } else {
110       const double product = src_as_float(inst->src[1]) *
111          src_as_float(inst->src[2]);
112 
113       switch (brw_type_larger_of(inst->src[1].type,
114                                  inst->src[2].type)) {
115       case BRW_TYPE_HF:
116          inst->src[1] = retype(brw_imm_w(_mesa_float_to_half(product)),
117                                BRW_TYPE_HF);
118          break;
119 
120       case BRW_TYPE_F:
121          inst->src[1] = brw_imm_f(product);
122          break;
123 
124       case BRW_TYPE_DF:
125          unreachable("float64 should be impossible.");
126          break;
127 
128       default:
129          unreachable("Invalid float type.");
130       }
131    }
132 
133    inst->opcode = BRW_OPCODE_ADD;
134    inst->resize_sources(2);
135 }
136 
137 bool
brw_opt_constant_fold_instruction(const intel_device_info * devinfo,fs_inst * inst)138 brw_opt_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
139 {
140    bool progress = false;
141 
142    switch (inst->opcode) {
143    case BRW_OPCODE_ADD:
144       if (inst->src[0].file != IMM || inst->src[1].file != IMM)
145          break;
146 
147       if (brw_type_is_int(inst->src[0].type)) {
148          const uint64_t src0 = src_as_uint(inst->src[0]);
149          const uint64_t src1 = src_as_uint(inst->src[1]);
150 
151          inst->src[0] = brw_imm_for_type(src0 + src1, inst->dst.type);
152       } else {
153          assert(inst->src[0].type == BRW_TYPE_F);
154          inst->src[0].f += inst->src[1].f;
155       }
156 
157       inst->opcode = BRW_OPCODE_MOV;
158       inst->resize_sources(1);
159       progress = true;
160       break;
161 
162    case BRW_OPCODE_ADD3:
163       if (inst->src[0].file == IMM &&
164           inst->src[1].file == IMM &&
165           inst->src[2].file == IMM) {
166          const uint64_t src0 = src_as_uint(inst->src[0]);
167          const uint64_t src1 = src_as_uint(inst->src[1]);
168          const uint64_t src2 = src_as_uint(inst->src[2]);
169 
170          inst->opcode = BRW_OPCODE_MOV;
171          inst->src[0] = brw_imm_for_type(src0 + src1 + src2,
172                                          inst->dst.type);
173          inst->resize_sources(1);
174          progress = true;
175       }
176 
177       break;
178 
179    case BRW_OPCODE_AND:
180       if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
181          const uint64_t src0 = src_as_uint(inst->src[0]);
182          const uint64_t src1 = src_as_uint(inst->src[1]);
183 
184          inst->opcode = BRW_OPCODE_MOV;
185          inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
186          inst->resize_sources(1);
187          progress = true;
188          break;
189       }
190 
191       break;
192 
193    case BRW_OPCODE_MAD:
194       if (inst->src[1].file == IMM &&
195           inst->src[2].file == IMM &&
196           inst->src[3].file == IMM &&
197           !brw_type_is_vector_imm(inst->src[1].type) &&
198           !brw_type_is_vector_imm(inst->src[2].type) &&
199           !brw_type_is_vector_imm(inst->src[3].type)) {
200          fold_multiplicands_of_MAD(inst);
201          assert(inst->opcode == BRW_OPCODE_ADD);
202 
203          ASSERTED bool folded = brw_opt_constant_fold_instruction(devinfo, inst);
204          assert(folded);
205 
206          progress = true;
207          break;
208       }
209 
210       break;
211 
212    case BRW_OPCODE_MUL:
213       if (brw_type_is_float(inst->src[1].type))
214          break;
215 
216       /* From the BDW PRM, Vol 2a, "mul - Multiply":
217        *
218        *    "When multiplying integer datatypes, if src0 is DW and src1
219        *    is W, irrespective of the destination datatype, the
220        *    accumulator maintains full 48-bit precision."
221        *    ...
222        *    "When multiplying integer data types, if one of the sources
223        *    is a DW, the resulting full precision data is stored in
224        *    the accumulator."
225        *
226        * There are also similar notes in earlier PRMs.
227        *
228        * The MOV instruction can copy the bits of the source, but it
229        * does not clear the higher bits of the accumulator. So, because
230        * we might use the full accumulator in the MUL/MACH macro, we
231        * shouldn't replace such MULs with MOVs.
232        */
233       if ((brw_type_size_bytes(inst->src[0].type) == 4 ||
234            brw_type_size_bytes(inst->src[1].type) == 4) &&
235           (inst->dst.is_accumulator() ||
236            inst->writes_accumulator_implicitly(devinfo)))
237          break;
238 
239       if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
240          inst->opcode = BRW_OPCODE_MOV;
241          inst->src[0] = brw_imm_d(0);
242          inst->resize_sources(1);
243          progress = true;
244          break;
245       }
246 
247       if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
248          const uint64_t src0 = src_as_uint(inst->src[0]);
249          const uint64_t src1 = src_as_uint(inst->src[1]);
250 
251          inst->opcode = BRW_OPCODE_MOV;
252          inst->src[0] = brw_imm_for_type(src0 * src1, inst->dst.type);
253          inst->resize_sources(1);
254          progress = true;
255          break;
256       }
257       break;
258 
259    case BRW_OPCODE_OR:
260       if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
261          const uint64_t src0 = src_as_uint(inst->src[0]);
262          const uint64_t src1 = src_as_uint(inst->src[1]);
263 
264          inst->opcode = BRW_OPCODE_MOV;
265          inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
266          inst->resize_sources(1);
267          progress = true;
268          break;
269       }
270 
271       break;
272 
273    case BRW_OPCODE_SHL:
274       if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
275          /* It's not currently possible to generate this, and this constant
276           * folding does not handle it.
277           */
278          assert(!inst->saturate);
279 
280          brw_reg result;
281 
282          switch (brw_type_size_bytes(inst->src[0].type)) {
283          case 2:
284             result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
285             break;
286          case 4:
287             result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
288             break;
289          case 8:
290             result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
291             break;
292          default:
293             /* Just in case a future platform re-enables B or UB types. */
294             unreachable("Invalid source size.");
295          }
296 
297          inst->opcode = BRW_OPCODE_MOV;
298          inst->src[0] = retype(result, inst->dst.type);
299          inst->resize_sources(1);
300 
301          progress = true;
302       }
303       break;
304 
305    default:
306       break;
307    }
308 
309 #ifndef NDEBUG
310    /* The function is only intended to do constant folding, so the result of
311     * progress must be a MOV of an immediate value.
312     */
313    if (progress) {
314       assert(inst->opcode == BRW_OPCODE_MOV);
315       assert(inst->src[0].file == IMM);
316    }
317 #endif
318 
319    return progress;
320 }
321 
322 bool
brw_opt_algebraic(fs_visitor & s)323 brw_opt_algebraic(fs_visitor &s)
324 {
325    const intel_device_info *devinfo = s.devinfo;
326    bool progress = false;
327 
328    foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
329       if (brw_opt_constant_fold_instruction(devinfo, inst)) {
330          progress = true;
331          continue;
332       }
333 
334       switch (inst->opcode) {
335       case BRW_OPCODE_ADD:
336          if (brw_type_is_int(inst->src[1].type) &&
337                     inst->src[1].is_zero()) {
338             inst->opcode = BRW_OPCODE_MOV;
339             inst->resize_sources(1);
340             progress = true;
341          }
342 
343          break;
344 
345       case BRW_OPCODE_ADD3: {
346          const unsigned num_imm = (inst->src[0].file == IMM) +
347                                   (inst->src[1].file == IMM) +
348                                   (inst->src[2].file == IMM);
349 
350          /* If there is more than one immediate value, fold the values and
351           * convert the instruction to either ADD or MOV.
352           */
353          assert(num_imm < 3);
354          if (num_imm == 2) {
355             uint64_t sum = 0;
356             brw_reg src;
357 
358             for (unsigned i = 0; i < 3; i++) {
359                if (inst->src[i].file == IMM) {
360                   sum += src_as_uint(inst->src[i]);
361                } else {
362                   assert(src.file == BAD_FILE);
363                   src = inst->src[i];
364                }
365             }
366 
367             assert(src.file != BAD_FILE);
368 
369             if (uint32_t(sum) == 0) {
370                inst->opcode = BRW_OPCODE_MOV;
371                inst->src[0] = src;
372                inst->resize_sources(1);
373             } else {
374                inst->opcode = BRW_OPCODE_ADD;
375                inst->src[0] = src;
376                inst->src[1] = brw_imm_ud(sum);
377                inst->resize_sources(2);
378             }
379 
380             progress = true;
381          } else if (num_imm == 1) {
382             /* If there is a single constant, and that constant is zero,
383              * convert the instruction to regular ADD.
384              */
385             for (unsigned i = 0; i < 3; i++) {
386                if (inst->src[i].is_zero()) {
387                   inst->opcode = BRW_OPCODE_ADD;
388                   inst->src[i] = inst->src[2];
389                   inst->resize_sources(2);
390                   progress = true;
391                   break;
392                }
393             }
394          }
395 
396          break;
397       }
398 
399       case BRW_OPCODE_MOV:
400          if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
401               inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
402              inst->dst.is_null() &&
403              (inst->src[0].abs || inst->src[0].negate)) {
404             inst->src[0].abs = false;
405             inst->src[0].negate = false;
406             progress = true;
407             break;
408          }
409 
410          if (inst->src[0].file != IMM)
411             break;
412 
413          if (inst->saturate) {
414             /* Full mixed-type saturates don't happen.  However, we can end up
415              * with things like:
416              *
417              *    mov.sat(8) g21<1>DF       -1F
418              *
419              * Other mixed-size-but-same-base-type cases may also be possible.
420              */
421             if (inst->dst.type != inst->src[0].type &&
422                 inst->dst.type != BRW_TYPE_DF &&
423                 inst->src[0].type != BRW_TYPE_F)
424                assert(!"unimplemented: saturate mixed types");
425 
426             if (brw_reg_saturate_immediate(&inst->src[0])) {
427                inst->saturate = false;
428                progress = true;
429             }
430          }
431          break;
432 
433       case BRW_OPCODE_MUL:
434          if (brw_type_is_int(inst->src[0].type)){
435             /* From the BDW PRM, Vol 2a, "mul - Multiply":
436              *
437              *    "When multiplying integer datatypes, if src0 is DW and src1
438              *    is W, irrespective of the destination datatype, the
439              *    accumulator maintains full 48-bit precision."
440              *    ...
441              *    "When multiplying integer data types, if one of the sources
442              *    is a DW, the resulting full precision data is stored in the
443              *    accumulator."
444              *
445              * There are also similar notes in earlier PRMs.
446              *
447              * The MOV instruction can copy the bits of the source, but it
448              * does not clear the higher bits of the accumulator. So, because
449              * we might use the full accumulator in the MUL/MACH macro, we
450              * shouldn't replace such MULs with MOVs.
451              */
452             if ((brw_type_size_bytes(inst->src[0].type) == 4 ||
453                  brw_type_size_bytes(inst->src[1].type) == 4) &&
454                 (inst->dst.is_accumulator() ||
455                  inst->writes_accumulator_implicitly(devinfo)))
456                break;
457 
458             for (unsigned i = 0; i < 2; i++) {
459                /* a * 1 = a */
460                if (inst->src[i].is_one()) {
461                   inst->opcode = BRW_OPCODE_MOV;
462                } else if (inst->src[i].is_negative_one()) {
463                   /* a * -1 = -a */
464                   inst->opcode = BRW_OPCODE_MOV;
465 
466                   /* If the source other than the -1 is immediate, just
467                    * toggling the negation flag will not work. Due to the
468                    * previous call to brw_constant_fold_instruction, this
469                    * should not be possible.
470                    */
471                   assert(inst->src[1 - i].file != IMM);
472                   inst->src[1 - i].negate = !inst->src[1 - i].negate;
473                }
474 
475                if (inst->opcode == BRW_OPCODE_MOV) {
476                   /* If the literal 1 was src0, put the old src1 in src0. */
477                   if (i == 0)
478                      inst->src[0] = inst->src[1];
479 
480                   inst->resize_sources(1);
481                   progress = true;
482                   break;
483                }
484             }
485          }
486          break;
487       case BRW_OPCODE_OR:
488          if (inst->src[0].equals(inst->src[1]) || inst->src[1].is_zero()) {
489             /* On Gfx8+, the OR instruction can have a source modifier that
490              * performs logical not on the operand.  Cases of 'OR r0, ~r1, 0'
491              * or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV.
492              */
493             if (inst->src[0].negate) {
494                inst->opcode = BRW_OPCODE_NOT;
495                inst->src[0].negate = false;
496             } else {
497                inst->opcode = BRW_OPCODE_MOV;
498             }
499             inst->resize_sources(1);
500             progress = true;
501             break;
502          }
503          break;
504       case BRW_OPCODE_CMP:
505          if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
506               inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
507              inst->src[1].is_zero() &&
508              (inst->src[0].abs || inst->src[0].negate)) {
509             inst->src[0].abs = false;
510             inst->src[0].negate = false;
511             progress = true;
512             break;
513          }
514          break;
515       case BRW_OPCODE_SEL:
516          if (inst->src[0].equals(inst->src[1])) {
517             inst->opcode = BRW_OPCODE_MOV;
518             inst->predicate = BRW_PREDICATE_NONE;
519             inst->predicate_inverse = false;
520             inst->resize_sources(1);
521             progress = true;
522          } else if (inst->saturate && inst->src[1].file == IMM) {
523             switch (inst->conditional_mod) {
524             case BRW_CONDITIONAL_LE:
525             case BRW_CONDITIONAL_L:
526                switch (inst->src[1].type) {
527                case BRW_TYPE_F:
528                   if (inst->src[1].f >= 1.0f) {
529                      inst->opcode = BRW_OPCODE_MOV;
530                      inst->conditional_mod = BRW_CONDITIONAL_NONE;
531                      inst->resize_sources(1);
532                      progress = true;
533                   }
534                   break;
535                default:
536                   break;
537                }
538                break;
539             case BRW_CONDITIONAL_GE:
540             case BRW_CONDITIONAL_G:
541                switch (inst->src[1].type) {
542                case BRW_TYPE_F:
543                   if (inst->src[1].f <= 0.0f) {
544                      inst->opcode = BRW_OPCODE_MOV;
545                      inst->conditional_mod = BRW_CONDITIONAL_NONE;
546                      inst->resize_sources(1);
547                      progress = true;
548                   }
549                   break;
550                default:
551                   break;
552                }
553             default:
554                break;
555             }
556          }
557          break;
558       case BRW_OPCODE_CSEL:
559          if (brw_type_is_float(inst->dst.type)) {
560             /* This transformation can both clean up spurious modifiers
561              * (making assembly dumps easier to read) and convert GE with -abs
562              * to LE with abs. See abs handling below.
563              */
564             if (inst->src[2].negate) {
565                inst->conditional_mod = brw_swap_cmod(inst->conditional_mod);
566                inst->src[2].negate = false;
567                progress = true;
568             }
569 
570             if (inst->src[2].abs) {
571                switch (inst->conditional_mod) {
572                case BRW_CONDITIONAL_Z:
573                case BRW_CONDITIONAL_NZ:
574                   inst->src[2].abs = false;
575                   progress = true;
576                   break;
577 
578                case BRW_CONDITIONAL_LE:
579                   /* Converting to Z can help constant propagation into src0
580                    * and src1.
581                    */
582                   inst->conditional_mod = BRW_CONDITIONAL_Z;
583                   inst->src[2].abs = false;
584                   progress = true;
585                   break;
586 
587                default:
588                   /* GE or L conditions with absolute value could be used to
589                    * implement isnan(x) in CSEL. Transforming G with absolute
590                    * value to NZ is **not** NaN safe.
591                    */
592                   break;
593                }
594             }
595          } else if (brw_type_is_sint(inst->src[2].type)) {
596             /* Integer transformations are more challenging than floating
597              * point transformations due to INT_MIN == -(INT_MIN) ==
598              * abs(INT_MIN).
599              */
600             if (inst->src[2].negate && inst->src[2].abs) {
601                switch (inst->conditional_mod) {
602                case BRW_CONDITIONAL_GE:
603                   inst->src[2].negate = false;
604                   inst->src[2].abs = false;
605                   inst->conditional_mod = BRW_CONDITIONAL_Z;
606                   progress = true;
607                   break;
608                case BRW_CONDITIONAL_L:
609                   inst->src[2].negate = false;
610                   inst->src[2].abs = false;
611                   inst->conditional_mod = BRW_CONDITIONAL_NZ;
612                   progress = true;
613                   break;
614                case BRW_CONDITIONAL_G:
615                   /* This is a contradtion. -abs(x) cannot be > 0. */
616                   inst->opcode = BRW_OPCODE_MOV;
617                   inst->src[0] = inst->src[1];
618                   inst->resize_sources(1);
619                   progress = true;
620                   break;
621                case BRW_CONDITIONAL_LE:
622                   /* This is a tautology. -abs(x) must be <= 0. */
623                   inst->opcode = BRW_OPCODE_MOV;
624                   inst->resize_sources(1);
625                   progress = true;
626                   break;
627                case BRW_CONDITIONAL_Z:
628                case BRW_CONDITIONAL_NZ:
629                   inst->src[2].negate = false;
630                   inst->src[2].abs = false;
631                   progress = true;
632                   break;
633                default:
634                   unreachable("Impossible icsel condition.");
635                }
636             }
637          }
638          break;
639       case BRW_OPCODE_MAD:
640          if (inst->src[1].file == IMM &&
641              inst->src[2].file == IMM &&
642              !brw_type_is_vector_imm(inst->src[1].type) &&
643              !brw_type_is_vector_imm(inst->src[2].type)) {
644             fold_multiplicands_of_MAD(inst);
645 
646             /* This could result in (x + 0). For floats, we want to leave this
647              * as an ADD so that a subnormal x will get flushed to zero.
648              */
649             assert(inst->opcode == BRW_OPCODE_ADD);
650             progress = true;
651             break;
652          }
653 
654          if (inst->src[1].is_one()) {
655             inst->opcode = BRW_OPCODE_ADD;
656             inst->src[1] = inst->src[2];
657             inst->resize_sources(2);
658             progress = true;
659          } else if (inst->src[2].is_one()) {
660             inst->opcode = BRW_OPCODE_ADD;
661             inst->resize_sources(2);
662             progress = true;
663          }
664          break;
665       case SHADER_OPCODE_BROADCAST:
666          if (is_uniform(inst->src[0])) {
667             inst->opcode = BRW_OPCODE_MOV;
668             inst->force_writemask_all = true;
669             inst->exec_size = 8 * reg_unit(devinfo);
670             assert(inst->size_written == inst->dst.component_size(inst->exec_size));
671             inst->resize_sources(1);
672             progress = true;
673          } else if (inst->src[1].file == IMM) {
674             inst->opcode = BRW_OPCODE_MOV;
675             /* It's possible that the selected component will be too large and
676              * overflow the register.  This can happen if someone does a
677              * readInvocation() from GLSL or SPIR-V and provides an OOB
678              * invocationIndex.  If this happens and we some how manage
679              * to constant fold it in and get here, then component() may cause
680              * us to start reading outside of the VGRF which will lead to an
681              * assert later.  Instead, just let it wrap around if it goes over
682              * exec_size.
683              */
684             const unsigned comp = inst->src[1].ud & (inst->exec_size - 1);
685             inst->src[0] = component(inst->src[0], comp);
686             inst->force_writemask_all = true;
687             inst->exec_size = 8 * reg_unit(devinfo);
688             assert(inst->size_written == inst->dst.component_size(inst->exec_size));
689             inst->resize_sources(1);
690             progress = true;
691          }
692          break;
693 
694       case SHADER_OPCODE_SHUFFLE:
695          if (is_uniform(inst->src[0])) {
696             inst->opcode = BRW_OPCODE_MOV;
697             inst->resize_sources(1);
698             progress = true;
699          } else if (inst->src[1].file == IMM) {
700             inst->opcode = BRW_OPCODE_MOV;
701             inst->src[0] = component(inst->src[0],
702                                      inst->src[1].ud);
703             inst->resize_sources(1);
704             progress = true;
705          }
706          break;
707 
708       default:
709 	 break;
710       }
711 
712       /* Ensure that the correct source has the immediate value. 2-source
713        * instructions must have the immediate in src[1]. On Gfx12 and later,
714        * some 3-source instructions can have the immediate in src[0] or
715        * src[2]. It's complicated, so don't mess with 3-source instructions
716        * here.
717        */
718       if (progress && inst->sources == 2 && inst->is_commutative()) {
719          if (inst->src[0].file == IMM) {
720             brw_reg tmp = inst->src[1];
721             inst->src[1] = inst->src[0];
722             inst->src[0] = tmp;
723          }
724       }
725    }
726 
727    if (progress)
728       s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
729                             DEPENDENCY_INSTRUCTION_DETAIL);
730 
731    return progress;
732 }
733