• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_fs.h"
25 #include "brw_cfg.h"
26 #include "brw_fs_builder.h"
27 
28 using namespace brw;
29 
30 namespace {
31    /* From the SKL PRM Vol 2a, "Move":
32     *
33     * "A mov with the same source and destination type, no source modifier,
34     *  and no saturation is a raw move. A packed byte destination region (B
35     *  or UB type with HorzStride == 1 and ExecSize > 1) can only be written
36     *  using raw move."
37     */
38    bool
is_byte_raw_mov(const fs_inst * inst)39    is_byte_raw_mov(const fs_inst *inst)
40    {
41       return type_sz(inst->dst.type) == 1 &&
42              inst->opcode == BRW_OPCODE_MOV &&
43              inst->src[0].type == inst->dst.type &&
44              !inst->saturate &&
45              !inst->src[0].negate &&
46              !inst->src[0].abs;
47    }
48 
49    /*
50     * Return an acceptable byte stride for the destination of an instruction
51     * that requires it to have some particular alignment.
52     */
53    unsigned
required_dst_byte_stride(const fs_inst * inst)54    required_dst_byte_stride(const fs_inst *inst)
55    {
56       if (inst->dst.is_accumulator()) {
57          /* If the destination is an accumulator, insist that we leave the
58           * stride alone.  We cannot "fix" accumulator destinations by writing
59           * to a temporary and emitting a MOV into the original destination.
60           * For multiply instructions (our one use of the accumulator), the
61           * MUL writes the full 66 bits of the accumulator whereas the MOV we
62           * would emit only writes 33 bits and leaves the top 33 bits
63           * undefined.
64           *
65           * It's safe to just require the original stride here because the
66           * lowering pass will detect the mismatch in has_invalid_src_region
67           * and fix the sources of the multiply instead of the destination.
68           */
69          return inst->dst.stride * type_sz(inst->dst.type);
70       } else if (type_sz(inst->dst.type) < get_exec_type_size(inst) &&
71           !is_byte_raw_mov(inst)) {
72          return get_exec_type_size(inst);
73       } else {
74          /* Calculate the maximum byte stride and the minimum/maximum type
75           * size across all source and destination operands we are required to
76           * lower.
77           */
78          unsigned max_stride = inst->dst.stride * type_sz(inst->dst.type);
79          unsigned min_size = type_sz(inst->dst.type);
80          unsigned max_size = type_sz(inst->dst.type);
81 
82          for (unsigned i = 0; i < inst->sources; i++) {
83             if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) {
84                const unsigned size = type_sz(inst->src[i].type);
85                max_stride = MAX2(max_stride, inst->src[i].stride * size);
86                min_size = MIN2(min_size, size);
87                max_size = MAX2(max_size, size);
88             }
89          }
90 
91          /* All operands involved in lowering need to fit in the calculated
92           * stride.
93           */
94          assert(max_size <= 4 * min_size);
95 
96          /* Attempt to use the largest byte stride among all present operands,
97           * but never exceed a stride of 4 since that would lead to illegal
98           * destination regions during lowering.
99           */
100          return MIN2(max_stride, 4 * min_size);
101       }
102    }
103 
104    /*
105     * Return an acceptable byte sub-register offset for the destination of an
106     * instruction that requires it to be aligned to the sub-register offset of
107     * the sources.
108     */
109    unsigned
required_dst_byte_offset(const fs_inst * inst)110    required_dst_byte_offset(const fs_inst *inst)
111    {
112       for (unsigned i = 0; i < inst->sources; i++) {
113          if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))
114             if (reg_offset(inst->src[i]) % REG_SIZE !=
115                 reg_offset(inst->dst) % REG_SIZE)
116                return 0;
117       }
118 
119       return reg_offset(inst->dst) % REG_SIZE;
120    }
121 
122    /*
123     * Return the closest legal execution type for an instruction on
124     * the specified platform.
125     */
126    brw_reg_type
required_exec_type(const intel_device_info * devinfo,const fs_inst * inst)127    required_exec_type(const intel_device_info *devinfo, const fs_inst *inst)
128    {
129       const brw_reg_type t = get_exec_type(inst);
130       const bool has_64bit = brw_reg_type_is_floating_point(t) ?
131          devinfo->has_64bit_float : devinfo->has_64bit_int;
132 
133       switch (inst->opcode) {
134       case SHADER_OPCODE_SHUFFLE:
135          /* IVB has an issue (which we found empirically) where it reads
136           * two address register components per channel for indirectly
137           * addressed 64-bit sources.
138           *
139           * From the Cherryview PRM Vol 7. "Register Region Restrictions":
140           *
141           *    "When source or destination datatype is 64b or operation is
142           *    integer DWord multiply, indirect addressing must not be
143           *    used."
144           *
145           * Work around both of the above and handle platforms that
146           * don't support 64-bit types at all.
147           */
148          if ((!has_64bit || devinfo->verx10 == 70 ||
149               devinfo->platform == INTEL_PLATFORM_CHV ||
150               intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4)
151             return BRW_REGISTER_TYPE_UD;
152          else if (has_dst_aligned_region_restriction(devinfo, inst))
153             return brw_int_type(type_sz(t), false);
154          else
155             return t;
156 
157       case SHADER_OPCODE_SEL_EXEC:
158          if (!has_64bit && type_sz(t) > 4)
159             return BRW_REGISTER_TYPE_UD;
160          else
161             return t;
162 
163       case SHADER_OPCODE_QUAD_SWIZZLE:
164          if (has_dst_aligned_region_restriction(devinfo, inst))
165             return brw_int_type(type_sz(t), false);
166          else
167             return t;
168 
169       case SHADER_OPCODE_CLUSTER_BROADCAST:
170          /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
171           *
172           *    "When source or destination datatype is 64b or operation is
173           *    integer DWord multiply, indirect addressing must not be
174           *    used."
175           *
176           * Work around the above and handle platforms that don't
177           * support 64-bit types at all.
178           */
179          if ((!has_64bit || devinfo->platform == INTEL_PLATFORM_CHV ||
180               intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4)
181             return BRW_REGISTER_TYPE_UD;
182          else
183             return brw_int_type(type_sz(t), false);
184 
185       case SHADER_OPCODE_BROADCAST:
186       case SHADER_OPCODE_MOV_INDIRECT:
187          if (((devinfo->verx10 == 70 ||
188                devinfo->platform == INTEL_PLATFORM_CHV ||
189                intel_device_info_is_9lp(devinfo) ||
190                devinfo->verx10 >= 125) && type_sz(inst->src[0].type) > 4) ||
191              (devinfo->verx10 >= 125 &&
192               brw_reg_type_is_floating_point(inst->src[0].type)))
193             return brw_int_type(type_sz(t), false);
194          else
195             return t;
196 
197       default:
198          return t;
199       }
200    }
201 
202    /*
203     * Return whether the instruction has an unsupported channel bit layout
204     * specified for the i-th source region.
205     */
206    bool
has_invalid_src_region(const intel_device_info * devinfo,const fs_inst * inst,unsigned i)207    has_invalid_src_region(const intel_device_info *devinfo, const fs_inst *inst,
208                           unsigned i)
209    {
210       if (is_unordered(inst) || inst->is_control_source(i))
211          return false;
212 
213       /* Empirical testing shows that Broadwell has a bug affecting half-float
214        * MAD instructions when any of its sources has a non-zero offset, such
215        * as:
216        *
217        * mad(8) g18<1>HF -g17<4,4,1>HF g14.8<4,4,1>HF g11<4,4,1>HF { align16 1Q };
218        *
219        * We used to generate code like this for SIMD8 executions where we
220        * used to pack components Y and W of a vector at offset 16B of a SIMD
221        * register. The problem doesn't occur if the stride of the source is 0.
222        */
223       if (devinfo->ver == 8 &&
224           inst->opcode == BRW_OPCODE_MAD &&
225           inst->src[i].type == BRW_REGISTER_TYPE_HF &&
226           reg_offset(inst->src[i]) % REG_SIZE > 0 &&
227           inst->src[i].stride != 0) {
228          return true;
229       }
230 
231       const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);
232       const unsigned src_byte_stride = inst->src[i].stride *
233          type_sz(inst->src[i].type);
234       const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE;
235       const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE;
236 
237       return has_dst_aligned_region_restriction(devinfo, inst) &&
238              !is_uniform(inst->src[i]) &&
239              (src_byte_stride != dst_byte_stride ||
240               src_byte_offset != dst_byte_offset);
241    }
242 
243    /*
244     * Return whether the instruction has an unsupported channel bit layout
245     * specified for the destination region.
246     */
247    bool
has_invalid_dst_region(const intel_device_info * devinfo,const fs_inst * inst)248    has_invalid_dst_region(const intel_device_info *devinfo,
249                           const fs_inst *inst)
250    {
251       if (is_unordered(inst)) {
252          return false;
253       } else {
254          const brw_reg_type exec_type = get_exec_type(inst);
255          const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE;
256          const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);
257          const bool is_narrowing_conversion = !is_byte_raw_mov(inst) &&
258             type_sz(inst->dst.type) < type_sz(exec_type);
259 
260          return (has_dst_aligned_region_restriction(devinfo, inst) &&
261                  (required_dst_byte_stride(inst) != dst_byte_stride ||
262                   required_dst_byte_offset(inst) != dst_byte_offset)) ||
263                 (is_narrowing_conversion &&
264                  required_dst_byte_stride(inst) != dst_byte_stride);
265       }
266    }
267 
268    /**
269     * Return a non-zero value if the execution type of the instruction is
270     * unsupported.  The destination and sources matching the returned mask
271     * will be bit-cast to an integer type of appropriate size, lowering any
272     * source or destination modifiers into separate MOV instructions.
273     */
274    unsigned
has_invalid_exec_type(const intel_device_info * devinfo,const fs_inst * inst)275    has_invalid_exec_type(const intel_device_info *devinfo, const fs_inst *inst)
276    {
277       if (required_exec_type(devinfo, inst) != get_exec_type(inst)) {
278          switch (inst->opcode) {
279          case SHADER_OPCODE_SHUFFLE:
280          case SHADER_OPCODE_QUAD_SWIZZLE:
281          case SHADER_OPCODE_CLUSTER_BROADCAST:
282          case SHADER_OPCODE_BROADCAST:
283          case SHADER_OPCODE_MOV_INDIRECT:
284             return 0x1;
285 
286          case SHADER_OPCODE_SEL_EXEC:
287             return 0x3;
288 
289          default:
290             unreachable("Unknown invalid execution type source mask.");
291          }
292       } else {
293          return 0;
294       }
295    }
296 
297    /*
298     * Return whether the instruction has unsupported source modifiers
299     * specified for the i-th source region.
300     */
301    bool
has_invalid_src_modifiers(const intel_device_info * devinfo,const fs_inst * inst,unsigned i)302    has_invalid_src_modifiers(const intel_device_info *devinfo,
303                              const fs_inst *inst, unsigned i)
304    {
305       return (!inst->can_do_source_mods(devinfo) &&
306               (inst->src[i].negate || inst->src[i].abs)) ||
307              ((has_invalid_exec_type(devinfo, inst) & (1u << i)) &&
308               (inst->src[i].negate || inst->src[i].abs ||
309                inst->src[i].type != get_exec_type(inst)));
310    }
311 
312    /*
313     * Return whether the instruction has an unsupported type conversion
314     * specified for the destination.
315     */
316    bool
has_invalid_conversion(const intel_device_info * devinfo,const fs_inst * inst)317    has_invalid_conversion(const intel_device_info *devinfo, const fs_inst *inst)
318    {
319       switch (inst->opcode) {
320       case BRW_OPCODE_MOV:
321          return false;
322       case BRW_OPCODE_SEL:
323          return inst->dst.type != get_exec_type(inst);
324       default:
325          /* FIXME: We assume the opcodes not explicitly mentioned before just
326           * work fine with arbitrary conversions, unless they need to be
327           * bit-cast.
328           */
329          return has_invalid_exec_type(devinfo, inst) &&
330                 inst->dst.type != get_exec_type(inst);
331       }
332    }
333 
334    /**
335     * Return whether the instruction has unsupported destination modifiers.
336     */
337    bool
has_invalid_dst_modifiers(const intel_device_info * devinfo,const fs_inst * inst)338    has_invalid_dst_modifiers(const intel_device_info *devinfo, const fs_inst *inst)
339    {
340       return (has_invalid_exec_type(devinfo, inst) &&
341               (inst->saturate || inst->conditional_mod)) ||
342              has_invalid_conversion(devinfo, inst);
343    }
344 
345    /**
346     * Return whether the instruction has non-standard semantics for the
347     * conditional mod which don't cause the flag register to be updated with
348     * the comparison result.
349     */
350    bool
has_inconsistent_cmod(const fs_inst * inst)351    has_inconsistent_cmod(const fs_inst *inst)
352    {
353       return inst->opcode == BRW_OPCODE_SEL ||
354              inst->opcode == BRW_OPCODE_CSEL ||
355              inst->opcode == BRW_OPCODE_IF ||
356              inst->opcode == BRW_OPCODE_WHILE;
357    }
358 
359    bool
360    lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst);
361 }
362 
363 namespace brw {
364    /**
365     * Remove any modifiers from the \p i-th source region of the instruction,
366     * including negate, abs and any implicit type conversion to the execution
367     * type.  Instead any source modifiers will be implemented as a separate
368     * MOV instruction prior to the original instruction.
369     */
370    bool
lower_src_modifiers(fs_visitor * v,bblock_t * block,fs_inst * inst,unsigned i)371    lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i)
372    {
373       assert(inst->components_read(i) == 1);
374       assert(v->devinfo->has_integer_dword_mul ||
375              inst->opcode != BRW_OPCODE_MUL ||
376              brw_reg_type_is_floating_point(get_exec_type(inst)) ||
377              MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4 ||
378              type_sz(inst->src[i].type) == get_exec_type_size(inst));
379 
380       const fs_builder ibld(v, block, inst);
381       const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
382 
383       lower_instruction(v, block, ibld.MOV(tmp, inst->src[i]));
384       inst->src[i] = tmp;
385 
386       return true;
387    }
388 }
389 
390 namespace {
391    /**
392     * Remove any modifiers from the destination region of the instruction,
393     * including saturate, conditional mod and any implicit type conversion
394     * from the execution type.  Instead any destination modifiers will be
395     * implemented as a separate MOV instruction after the original
396     * instruction.
397     */
398    bool
lower_dst_modifiers(fs_visitor * v,bblock_t * block,fs_inst * inst)399    lower_dst_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst)
400    {
401       const fs_builder ibld(v, block, inst);
402       const brw_reg_type type = get_exec_type(inst);
403       /* Not strictly necessary, but if possible use a temporary with the same
404        * channel alignment as the current destination in order to avoid
405        * violating the restrictions enforced later on by lower_src_region()
406        * and lower_dst_region(), which would introduce additional copy
407        * instructions into the program unnecessarily.
408        */
409       const unsigned stride =
410          type_sz(inst->dst.type) * inst->dst.stride <= type_sz(type) ? 1 :
411          type_sz(inst->dst.type) * inst->dst.stride / type_sz(type);
412       fs_reg tmp = ibld.vgrf(type, stride);
413       ibld.UNDEF(tmp);
414       tmp = horiz_stride(tmp, stride);
415 
416       /* Emit a MOV taking care of all the destination modifiers. */
417       fs_inst *mov = ibld.at(block, inst->next).MOV(inst->dst, tmp);
418       mov->saturate = inst->saturate;
419       if (!has_inconsistent_cmod(inst))
420          mov->conditional_mod = inst->conditional_mod;
421       if (inst->opcode != BRW_OPCODE_SEL) {
422          mov->predicate = inst->predicate;
423          mov->predicate_inverse = inst->predicate_inverse;
424       }
425       mov->flag_subreg = inst->flag_subreg;
426       lower_instruction(v, block, mov);
427 
428       /* Point the original instruction at the temporary, and clean up any
429        * destination modifiers.
430        */
431       assert(inst->size_written == inst->dst.component_size(inst->exec_size));
432       inst->dst = tmp;
433       inst->size_written = inst->dst.component_size(inst->exec_size);
434       inst->saturate = false;
435       if (!has_inconsistent_cmod(inst))
436          inst->conditional_mod = BRW_CONDITIONAL_NONE;
437 
438       assert(!inst->flags_written(v->devinfo) || !mov->predicate);
439       return true;
440    }
441 
442    /**
443     * Remove any non-trivial shuffling of data from the \p i-th source region
444     * of the instruction.  Instead implement the region as a series of integer
445     * copies into a temporary with the same channel layout as the destination.
446     */
447    bool
lower_src_region(fs_visitor * v,bblock_t * block,fs_inst * inst,unsigned i)448    lower_src_region(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i)
449    {
450       assert(inst->components_read(i) == 1);
451       const fs_builder ibld(v, block, inst);
452       const unsigned stride = type_sz(inst->dst.type) * inst->dst.stride /
453                               type_sz(inst->src[i].type);
454       assert(stride > 0);
455       fs_reg tmp = ibld.vgrf(inst->src[i].type, stride);
456       ibld.UNDEF(tmp);
457       tmp = horiz_stride(tmp, stride);
458 
459       /* Emit a series of 32-bit integer copies with any source modifiers
460        * cleaned up (because their semantics are dependent on the type).
461        */
462       const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4),
463                                                  false);
464       const unsigned n = type_sz(tmp.type) / type_sz(raw_type);
465       fs_reg raw_src = inst->src[i];
466       raw_src.negate = false;
467       raw_src.abs = false;
468 
469       for (unsigned j = 0; j < n; j++)
470          ibld.MOV(subscript(tmp, raw_type, j), subscript(raw_src, raw_type, j));
471 
472       /* Point the original instruction at the temporary, making sure to keep
473        * any source modifiers in the instruction.
474        */
475       fs_reg lower_src = tmp;
476       lower_src.negate = inst->src[i].negate;
477       lower_src.abs = inst->src[i].abs;
478       inst->src[i] = lower_src;
479 
480       return true;
481    }
482 
483    /**
484     * Remove any non-trivial shuffling of data from the destination region of
485     * the instruction.  Instead implement the region as a series of integer
486     * copies from a temporary with a channel layout compatible with the
487     * sources.
488     */
489    bool
lower_dst_region(fs_visitor * v,bblock_t * block,fs_inst * inst)490    lower_dst_region(fs_visitor *v, bblock_t *block, fs_inst *inst)
491    {
492       /* We cannot replace the result of an integer multiply which writes the
493        * accumulator because MUL+MACH pairs act on the accumulator as a 66-bit
494        * value whereas the MOV will act on only 32 or 33 bits of the
495        * accumulator.
496        */
497       assert(inst->opcode != BRW_OPCODE_MUL || !inst->dst.is_accumulator() ||
498              brw_reg_type_is_floating_point(inst->dst.type));
499 
500       const fs_builder ibld(v, block, inst);
501       const unsigned stride = required_dst_byte_stride(inst) /
502                               type_sz(inst->dst.type);
503       assert(stride > 0);
504       fs_reg tmp = ibld.vgrf(inst->dst.type, stride);
505       ibld.UNDEF(tmp);
506       tmp = horiz_stride(tmp, stride);
507 
508       /* Emit a series of 32-bit integer copies from the temporary into the
509        * original destination.
510        */
511       const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4),
512                                                  false);
513       const unsigned n = type_sz(tmp.type) / type_sz(raw_type);
514 
515       if (inst->predicate && inst->opcode != BRW_OPCODE_SEL) {
516          /* Note that in general we cannot simply predicate the copies on the
517           * same flag register as the original instruction, since it may have
518           * been overwritten by the instruction itself.  Instead initialize
519           * the temporary with the previous contents of the destination
520           * register.
521           */
522          for (unsigned j = 0; j < n; j++)
523             ibld.MOV(subscript(tmp, raw_type, j),
524                      subscript(inst->dst, raw_type, j));
525       }
526 
527       for (unsigned j = 0; j < n; j++)
528          ibld.at(block, inst->next).MOV(subscript(inst->dst, raw_type, j),
529                                         subscript(tmp, raw_type, j));
530 
531       /* Point the original instruction at the temporary, making sure to keep
532        * any destination modifiers in the instruction.
533        */
534       assert(inst->size_written == inst->dst.component_size(inst->exec_size));
535       inst->dst = tmp;
536       inst->size_written = inst->dst.component_size(inst->exec_size);
537 
538       return true;
539    }
540 
541    /**
542     * Change sources and destination of the instruction to an
543     * appropriate legal type, splitting the instruction into multiple
544     * ones of smaller execution type if necessary, to be used in cases
545     * where the execution type of an instruction is unsupported.
546     */
547    bool
lower_exec_type(fs_visitor * v,bblock_t * block,fs_inst * inst)548    lower_exec_type(fs_visitor *v, bblock_t *block, fs_inst *inst)
549    {
550       assert(inst->dst.type == get_exec_type(inst));
551       const unsigned mask = has_invalid_exec_type(v->devinfo, inst);
552       const brw_reg_type raw_type = required_exec_type(v->devinfo, inst);
553       const unsigned n = get_exec_type_size(inst) / type_sz(raw_type);
554       const fs_builder ibld(v, block, inst);
555 
556       fs_reg tmp = ibld.vgrf(inst->dst.type, inst->dst.stride);
557       ibld.UNDEF(tmp);
558       tmp = horiz_stride(tmp, inst->dst.stride);
559 
560       for (unsigned j = 0; j < n; j++) {
561          fs_inst sub_inst = *inst;
562 
563          for (unsigned i = 0; i < inst->sources; i++) {
564             if (mask & (1u << i)) {
565                assert(inst->src[i].type == inst->dst.type);
566                sub_inst.src[i] = subscript(inst->src[i], raw_type, j);
567             }
568          }
569 
570          sub_inst.dst = subscript(tmp, raw_type, j);
571 
572          assert(sub_inst.size_written == sub_inst.dst.component_size(sub_inst.exec_size));
573          assert(!sub_inst.flags_written(v->devinfo) && !sub_inst.saturate);
574          ibld.emit(sub_inst);
575 
576          fs_inst *mov = ibld.MOV(subscript(inst->dst, raw_type, j),
577                                  subscript(tmp, raw_type, j));
578          if (inst->opcode != BRW_OPCODE_SEL) {
579             mov->predicate = inst->predicate;
580             mov->predicate_inverse = inst->predicate_inverse;
581          }
582          lower_instruction(v, block, mov);
583       }
584 
585       inst->remove(block);
586 
587       return true;
588    }
589 
590    /**
591     * Legalize the source and destination regioning controls of the specified
592     * instruction.
593     */
594    bool
lower_instruction(fs_visitor * v,bblock_t * block,fs_inst * inst)595    lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst)
596    {
597       const intel_device_info *devinfo = v->devinfo;
598       bool progress = false;
599 
600       if (has_invalid_dst_modifiers(devinfo, inst))
601          progress |= lower_dst_modifiers(v, block, inst);
602 
603       if (has_invalid_dst_region(devinfo, inst))
604          progress |= lower_dst_region(v, block, inst);
605 
606       for (unsigned i = 0; i < inst->sources; i++) {
607          if (has_invalid_src_modifiers(devinfo, inst, i))
608             progress |= lower_src_modifiers(v, block, inst, i);
609 
610          if (has_invalid_src_region(devinfo, inst, i))
611             progress |= lower_src_region(v, block, inst, i);
612       }
613 
614       if (has_invalid_exec_type(devinfo, inst))
615          progress |= lower_exec_type(v, block, inst);
616 
617       return progress;
618    }
619 }
620 
621 bool
lower_regioning()622 fs_visitor::lower_regioning()
623 {
624    bool progress = false;
625 
626    foreach_block_and_inst_safe(block, fs_inst, inst, cfg)
627       progress |= lower_instruction(this, block, inst);
628 
629    if (progress)
630       invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
631 
632    return progress;
633 }
634