• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- c++ -*- */
2 /*
3  * Copyright © 2010-2015 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #ifndef ELK_IR_FS_H
26 #define ELK_IR_FS_H
27 
28 #include "elk_shader.h"
29 
30 class elk_fs_inst;
31 
32 class elk_fs_reg : public elk_backend_reg {
33 public:
34    DECLARE_RALLOC_CXX_OPERATORS(elk_fs_reg)
35 
36    void init();
37 
38    elk_fs_reg();
39    elk_fs_reg(struct ::elk_reg reg);
40    elk_fs_reg(enum elk_reg_file file, unsigned nr);
41    elk_fs_reg(enum elk_reg_file file, unsigned nr, enum elk_reg_type type);
42 
43    bool equals(const elk_fs_reg &r) const;
44    bool negative_equals(const elk_fs_reg &r) const;
45    bool is_contiguous() const;
46 
47    /**
48     * Return the size in bytes of a single logical component of the
49     * register assuming the given execution width.
50     */
51    unsigned component_size(unsigned width) const;
52 
53    /** Register region horizontal stride */
54    uint8_t stride;
55 };
56 
57 static inline elk_fs_reg
negate(elk_fs_reg reg)58 negate(elk_fs_reg reg)
59 {
60    assert(reg.file != IMM);
61    reg.negate = !reg.negate;
62    return reg;
63 }
64 
65 static inline elk_fs_reg
retype(elk_fs_reg reg,enum elk_reg_type type)66 retype(elk_fs_reg reg, enum elk_reg_type type)
67 {
68    reg.type = type;
69    return reg;
70 }
71 
72 static inline elk_fs_reg
byte_offset(elk_fs_reg reg,unsigned delta)73 byte_offset(elk_fs_reg reg, unsigned delta)
74 {
75    switch (reg.file) {
76    case BAD_FILE:
77       break;
78    case VGRF:
79    case ATTR:
80    case UNIFORM:
81       reg.offset += delta;
82       break;
83    case MRF: {
84       const unsigned suboffset = reg.offset + delta;
85       reg.nr += suboffset / REG_SIZE;
86       reg.offset = suboffset % REG_SIZE;
87       break;
88    }
89    case ARF:
90    case FIXED_GRF: {
91       const unsigned suboffset = reg.subnr + delta;
92       reg.nr += suboffset / REG_SIZE;
93       reg.subnr = suboffset % REG_SIZE;
94       break;
95    }
96    case IMM:
97    default:
98       assert(delta == 0);
99    }
100    return reg;
101 }
102 
103 static inline elk_fs_reg
horiz_offset(const elk_fs_reg & reg,unsigned delta)104 horiz_offset(const elk_fs_reg &reg, unsigned delta)
105 {
106    switch (reg.file) {
107    case BAD_FILE:
108    case UNIFORM:
109    case IMM:
110       /* These only have a single component that is implicitly splatted.  A
111        * horizontal offset should be a harmless no-op.
112        * XXX - Handle vector immediates correctly.
113        */
114       return reg;
115    case VGRF:
116    case MRF:
117    case ATTR:
118       return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
119    case ARF:
120    case FIXED_GRF:
121       if (reg.is_null()) {
122          return reg;
123       } else {
124          const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
125          const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
126          const unsigned width = 1 << reg.width;
127 
128          if (delta % width == 0) {
129             return byte_offset(reg, delta / width * vstride * type_sz(reg.type));
130          } else {
131             assert(vstride == hstride * width);
132             return byte_offset(reg, delta * hstride * type_sz(reg.type));
133          }
134       }
135    }
136    unreachable("Invalid register file");
137 }
138 
139 static inline elk_fs_reg
offset(elk_fs_reg reg,unsigned width,unsigned delta)140 offset(elk_fs_reg reg, unsigned width, unsigned delta)
141 {
142    switch (reg.file) {
143    case BAD_FILE:
144       break;
145    case ARF:
146    case FIXED_GRF:
147    case MRF:
148    case VGRF:
149    case ATTR:
150    case UNIFORM:
151       return byte_offset(reg, delta * reg.component_size(width));
152    case IMM:
153       assert(delta == 0);
154    }
155    return reg;
156 }
157 
158 /**
159  * Get the scalar channel of \p reg given by \p idx and replicate it to all
160  * channels of the result.
161  */
162 static inline elk_fs_reg
component(elk_fs_reg reg,unsigned idx)163 component(elk_fs_reg reg, unsigned idx)
164 {
165    reg = horiz_offset(reg, idx);
166    reg.stride = 0;
167    if (reg.file == ARF || reg.file == FIXED_GRF) {
168       reg.vstride = ELK_VERTICAL_STRIDE_0;
169       reg.width = ELK_WIDTH_1;
170       reg.hstride = ELK_HORIZONTAL_STRIDE_0;
171    }
172    return reg;
173 }
174 
175 /**
176  * Return an integer identifying the discrete address space a register is
177  * contained in.  A register is by definition fully contained in the single
178  * reg_space it belongs to, so two registers with different reg_space ids are
179  * guaranteed not to overlap.  Most register files are a single reg_space of
180  * its own, only the VGRF and ATTR files are composed of multiple discrete
181  * address spaces, one for each allocation and input attribute respectively.
182  */
183 static inline uint32_t
reg_space(const elk_fs_reg & r)184 reg_space(const elk_fs_reg &r)
185 {
186    return r.file << 16 | (r.file == VGRF || r.file == ATTR ? r.nr : 0);
187 }
188 
189 /**
190  * Return the base offset in bytes of a register relative to the start of its
191  * reg_space().
192  */
193 static inline unsigned
reg_offset(const elk_fs_reg & r)194 reg_offset(const elk_fs_reg &r)
195 {
196    return (r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) *
197           (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
198           (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
199 }
200 
201 /**
202  * Return the amount of padding in bytes left unused between individual
203  * components of register \p r due to a (horizontal) stride value greater than
204  * one, or zero if components are tightly packed in the register file.
205  */
206 static inline unsigned
reg_padding(const elk_fs_reg & r)207 reg_padding(const elk_fs_reg &r)
208 {
209    const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
210                             r.hstride == 0 ? 0 :
211                             1 << (r.hstride - 1));
212    return (MAX2(1, stride) - 1) * type_sz(r.type);
213 }
214 
215 /* Do not call this directly. Call regions_overlap() instead. */
216 static inline bool
regions_overlap_MRF(const elk_fs_reg & r,unsigned dr,const elk_fs_reg & s,unsigned ds)217 regions_overlap_MRF(const elk_fs_reg &r, unsigned dr, const elk_fs_reg &s, unsigned ds)
218 {
219    if (r.nr & ELK_MRF_COMPR4) {
220       elk_fs_reg t = r;
221       t.nr &= ~ELK_MRF_COMPR4;
222       /* COMPR4 regions are translated by the hardware during decompression
223        * into two separate half-regions 4 MRFs apart from each other.
224        *
225        * Note: swapping s and t in this parameter list eliminates one possible
226        * level of recursion (since the s in the called versions of
227        * regions_overlap_MRF can't be COMPR4), and that makes the compiled
228        * code a lot smaller.
229        */
230       return regions_overlap_MRF(s, ds, t, dr / 2) ||
231              regions_overlap_MRF(s, ds, byte_offset(t, 4 * REG_SIZE), dr / 2);
232    } else if (s.nr & ELK_MRF_COMPR4) {
233       return regions_overlap_MRF(s, ds, r, dr);
234    }
235 
236    return !((r.nr * REG_SIZE + r.offset + dr) <= (s.nr * REG_SIZE + s.offset) ||
237             (s.nr * REG_SIZE + s.offset + ds) <= (r.nr * REG_SIZE + r.offset));
238 }
239 
240 /**
241  * Return whether the register region starting at \p r and spanning \p dr
242  * bytes could potentially overlap the register region starting at \p s and
243  * spanning \p ds bytes.
244  */
245 static inline bool
regions_overlap(const elk_fs_reg & r,unsigned dr,const elk_fs_reg & s,unsigned ds)246 regions_overlap(const elk_fs_reg &r, unsigned dr, const elk_fs_reg &s, unsigned ds)
247 {
248    if (r.file != s.file)
249       return false;
250 
251    if (r.file == VGRF) {
252       return r.nr == s.nr &&
253              !(r.offset + dr <= s.offset || s.offset + ds <= r.offset);
254    } else if (r.file != MRF) {
255       return !(reg_offset(r) + dr <= reg_offset(s) ||
256                reg_offset(s) + ds <= reg_offset(r));
257    } else {
258       return regions_overlap_MRF(r, dr, s, ds);
259    }
260 }
261 
262 /**
263  * Check that the register region given by r [r.offset, r.offset + dr[
264  * is fully contained inside the register region given by s
265  * [s.offset, s.offset + ds[.
266  */
267 static inline bool
region_contained_in(const elk_fs_reg & r,unsigned dr,const elk_fs_reg & s,unsigned ds)268 region_contained_in(const elk_fs_reg &r, unsigned dr, const elk_fs_reg &s, unsigned ds)
269 {
270    return reg_space(r) == reg_space(s) &&
271           reg_offset(r) >= reg_offset(s) &&
272           reg_offset(r) + dr <= reg_offset(s) + ds;
273 }
274 
275 /**
276  * Return whether the given register region is n-periodic, i.e. whether the
277  * original region remains invariant after shifting it by \p n scalar
278  * channels.
279  */
280 static inline bool
is_periodic(const elk_fs_reg & reg,unsigned n)281 is_periodic(const elk_fs_reg &reg, unsigned n)
282 {
283    if (reg.file == BAD_FILE || reg.is_null()) {
284       return true;
285 
286    } else if (reg.file == IMM) {
287       const unsigned period = (reg.type == ELK_REGISTER_TYPE_UV ||
288                                reg.type == ELK_REGISTER_TYPE_V ? 8 :
289                                reg.type == ELK_REGISTER_TYPE_VF ? 4 :
290                                1);
291       return n % period == 0;
292 
293    } else if (reg.file == ARF || reg.file == FIXED_GRF) {
294       const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
295                                reg.vstride == 0 ? 1 << reg.width :
296                                ~0);
297       return n % period == 0;
298 
299    } else {
300       return reg.stride == 0;
301    }
302 }
303 
304 static inline bool
is_uniform(const elk_fs_reg & reg)305 is_uniform(const elk_fs_reg &reg)
306 {
307    return is_periodic(reg, 1);
308 }
309 
310 /**
311  * Get the specified 8-component quarter of a register.
312  */
313 static inline elk_fs_reg
quarter(const elk_fs_reg & reg,unsigned idx)314 quarter(const elk_fs_reg &reg, unsigned idx)
315 {
316    assert(idx < 4);
317    return horiz_offset(reg, 8 * idx);
318 }
319 
320 /**
321  * Reinterpret each channel of register \p reg as a vector of values of the
322  * given smaller type and take the i-th subcomponent from each.
323  */
324 static inline elk_fs_reg
subscript(elk_fs_reg reg,elk_reg_type type,unsigned i)325 subscript(elk_fs_reg reg, elk_reg_type type, unsigned i)
326 {
327    assert((i + 1) * type_sz(type) <= type_sz(reg.type));
328 
329    if (reg.file == ARF || reg.file == FIXED_GRF) {
330       /* The stride is encoded inconsistently for fixed GRF and ARF registers
331        * as the log2 of the actual vertical and horizontal strides.
332        */
333       const int delta = util_logbase2(type_sz(reg.type)) -
334                         util_logbase2(type_sz(type));
335       reg.hstride += (reg.hstride ? delta : 0);
336       reg.vstride += (reg.vstride ? delta : 0);
337 
338    } else if (reg.file == IMM) {
339       unsigned bit_size = type_sz(type) * 8;
340       reg.u64 >>= i * bit_size;
341       reg.u64 &= BITFIELD64_MASK(bit_size);
342       if (bit_size <= 16)
343          reg.u64 |= reg.u64 << 16;
344       return retype(reg, type);
345    } else {
346       reg.stride *= type_sz(reg.type) / type_sz(type);
347    }
348 
349    return byte_offset(retype(reg, type), i * type_sz(type));
350 }
351 
352 static inline elk_fs_reg
horiz_stride(elk_fs_reg reg,unsigned s)353 horiz_stride(elk_fs_reg reg, unsigned s)
354 {
355    reg.stride *= s;
356    return reg;
357 }
358 
359 static const elk_fs_reg reg_undef;
360 
361 class elk_fs_inst : public elk_backend_instruction {
362    elk_fs_inst &operator=(const elk_fs_inst &);
363 
364    void init(enum elk_opcode opcode, uint8_t exec_width, const elk_fs_reg &dst,
365              const elk_fs_reg *src, unsigned sources);
366 
367 public:
368    DECLARE_RALLOC_CXX_OPERATORS(elk_fs_inst)
369 
370    elk_fs_inst();
371    elk_fs_inst(enum elk_opcode opcode, uint8_t exec_size);
372    elk_fs_inst(enum elk_opcode opcode, uint8_t exec_size, const elk_fs_reg &dst);
373    elk_fs_inst(enum elk_opcode opcode, uint8_t exec_size, const elk_fs_reg &dst,
374            const elk_fs_reg &src0);
375    elk_fs_inst(enum elk_opcode opcode, uint8_t exec_size, const elk_fs_reg &dst,
376            const elk_fs_reg &src0, const elk_fs_reg &src1);
377    elk_fs_inst(enum elk_opcode opcode, uint8_t exec_size, const elk_fs_reg &dst,
378            const elk_fs_reg &src0, const elk_fs_reg &src1, const elk_fs_reg &src2);
379    elk_fs_inst(enum elk_opcode opcode, uint8_t exec_size, const elk_fs_reg &dst,
380            const elk_fs_reg src[], unsigned sources);
381    elk_fs_inst(const elk_fs_inst &that);
382    ~elk_fs_inst();
383 
384    void resize_sources(uint8_t num_sources);
385 
386    bool is_send_from_grf() const;
387    bool is_payload(unsigned arg) const;
388    bool is_partial_write() const;
389    unsigned components_read(unsigned i) const;
390    unsigned size_read(int arg) const;
391    bool can_do_source_mods(const struct intel_device_info *devinfo) const;
392    bool can_do_cmod();
393    bool can_change_types() const;
394    bool has_source_and_destination_hazard() const;
395    unsigned implied_mrf_writes() const;
396 
397    /**
398     * Return whether \p arg is a control source of a virtual instruction which
399     * shouldn't contribute to the execution type and usual regioning
400     * restriction calculations of arithmetic instructions.
401     */
402    bool is_control_source(unsigned arg) const;
403 
404    /**
405     * Return the subset of flag registers read by the instruction as a bitset
406     * with byte granularity.
407     */
408    unsigned flags_read(const intel_device_info *devinfo) const;
409 
410    /**
411     * Return the subset of flag registers updated by the instruction (either
412     * partially or fully) as a bitset with byte granularity.
413     */
414    unsigned flags_written(const intel_device_info *devinfo) const;
415 
416    /**
417     * Return true if this instruction is a sampler message gathering residency
418     * data.
419     */
420    bool has_sampler_residency() const;
421 
422    elk_fs_reg dst;
423    elk_fs_reg *src;
424 
425    uint8_t sources; /**< Number of elk_fs_reg sources. */
426 
427    bool last_rt:1;
428    bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */
429    bool keep_payload_trailing_zeros;
430 
431    tgl_swsb sched; /**< Scheduling info. */
432 };
433 
434 /**
435  * Make the execution of \p inst dependent on the evaluation of a possibly
436  * inverted predicate.
437  */
438 static inline elk_fs_inst *
set_predicate_inv(enum elk_predicate pred,bool inverse,elk_fs_inst * inst)439 set_predicate_inv(enum elk_predicate pred, bool inverse,
440                   elk_fs_inst *inst)
441 {
442    inst->predicate = pred;
443    inst->predicate_inverse = inverse;
444    return inst;
445 }
446 
447 /**
448  * Make the execution of \p inst dependent on the evaluation of a predicate.
449  */
450 static inline elk_fs_inst *
set_predicate(enum elk_predicate pred,elk_fs_inst * inst)451 set_predicate(enum elk_predicate pred, elk_fs_inst *inst)
452 {
453    return set_predicate_inv(pred, false, inst);
454 }
455 
456 /**
457  * Write the result of evaluating the condition given by \p mod to a flag
458  * register.
459  */
460 static inline elk_fs_inst *
set_condmod(enum elk_conditional_mod mod,elk_fs_inst * inst)461 set_condmod(enum elk_conditional_mod mod, elk_fs_inst *inst)
462 {
463    inst->conditional_mod = mod;
464    return inst;
465 }
466 
467 /**
468  * Clamp the result of \p inst to the saturation range of its destination
469  * datatype.
470  */
471 static inline elk_fs_inst *
set_saturate(bool saturate,elk_fs_inst * inst)472 set_saturate(bool saturate, elk_fs_inst *inst)
473 {
474    inst->saturate = saturate;
475    return inst;
476 }
477 
478 /**
479  * Return the number of dataflow registers written by the instruction (either
480  * fully or partially) counted from 'floor(reg_offset(inst->dst) /
481  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
482  * UNIFORM and IMM files and 32B for all other files.
483  */
484 inline unsigned
regs_written(const elk_fs_inst * inst)485 regs_written(const elk_fs_inst *inst)
486 {
487    assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
488    return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE +
489                        inst->size_written -
490                        MIN2(inst->size_written, reg_padding(inst->dst)),
491                        REG_SIZE);
492 }
493 
494 /**
495  * Return the number of dataflow registers read by the instruction (either
496  * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
497  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
498  * UNIFORM files and 32B for all other files.
499  */
500 inline unsigned
regs_read(const elk_fs_inst * inst,unsigned i)501 regs_read(const elk_fs_inst *inst, unsigned i)
502 {
503    if (inst->src[i].file == IMM)
504       return 1;
505 
506    const unsigned reg_size = inst->src[i].file == UNIFORM ? 4 : REG_SIZE;
507    return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size +
508                        inst->size_read(i) -
509                        MIN2(inst->size_read(i), reg_padding(inst->src[i])),
510                        reg_size);
511 }
512 
513 static inline enum elk_reg_type
get_exec_type(const elk_fs_inst * inst)514 get_exec_type(const elk_fs_inst *inst)
515 {
516    elk_reg_type exec_type = ELK_REGISTER_TYPE_B;
517 
518    for (int i = 0; i < inst->sources; i++) {
519       if (inst->src[i].file != BAD_FILE &&
520           !inst->is_control_source(i)) {
521          const elk_reg_type t = get_exec_type(inst->src[i].type);
522          if (type_sz(t) > type_sz(exec_type))
523             exec_type = t;
524          else if (type_sz(t) == type_sz(exec_type) &&
525                   elk_reg_type_is_floating_point(t))
526             exec_type = t;
527       }
528    }
529 
530    if (exec_type == ELK_REGISTER_TYPE_B)
531       exec_type = inst->dst.type;
532 
533    assert(exec_type != ELK_REGISTER_TYPE_B);
534 
535    /* Promotion of the execution type to 32-bit for conversions from or to
536     * half-float seems to be consistent with the following text from the
537     * Cherryview PRM Vol. 7, "Execution Data Type":
538     *
539     * "When single precision and half precision floats are mixed between
540     *  source operands or between source and destination operand [..] single
541     *  precision float is the execution datatype."
542     *
543     * and from "Register Region Restrictions":
544     *
545     * "Conversion between Integer and HF (Half Float) must be DWord aligned
546     *  and strided by a DWord on the destination."
547     */
548    if (type_sz(exec_type) == 2 &&
549        inst->dst.type != exec_type) {
550       if (exec_type == ELK_REGISTER_TYPE_HF)
551          exec_type = ELK_REGISTER_TYPE_F;
552       else if (inst->dst.type == ELK_REGISTER_TYPE_HF)
553          exec_type = ELK_REGISTER_TYPE_D;
554    }
555 
556    return exec_type;
557 }
558 
559 static inline unsigned
get_exec_type_size(const elk_fs_inst * inst)560 get_exec_type_size(const elk_fs_inst *inst)
561 {
562    return type_sz(get_exec_type(inst));
563 }
564 
565 static inline bool
is_send(const elk_fs_inst * inst)566 is_send(const elk_fs_inst *inst)
567 {
568    return inst->mlen || inst->is_send_from_grf();
569 }
570 
571 /**
572  * Return whether the instruction isn't an ALU instruction and cannot be
573  * assumed to complete in-order.
574  */
575 static inline bool
is_unordered(const intel_device_info * devinfo,const elk_fs_inst * inst)576 is_unordered(const intel_device_info *devinfo, const elk_fs_inst *inst)
577 {
578    return is_send(inst) || (devinfo->ver < 20 && inst->is_math()) ||
579           inst->opcode == ELK_OPCODE_DPAS ||
580           (devinfo->has_64bit_float_via_math_pipe &&
581            (get_exec_type(inst) == ELK_REGISTER_TYPE_DF ||
582             inst->dst.type == ELK_REGISTER_TYPE_DF));
583 }
584 
585 /**
586  * Return whether the following regioning restriction applies to the specified
587  * instruction.  From the Cherryview PRM Vol 7. "Register Region
588  * Restrictions":
589  *
590  * "When source or destination datatype is 64b or operation is integer DWord
591  *  multiply, regioning in Align1 must follow these rules:
592  *
593  *  1. Source and Destination horizontal stride must be aligned to the same qword.
594  *  2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
595  *  3. Source and Destination offset must be the same, except the case of
596  *     scalar source."
597  */
598 static inline bool
has_dst_aligned_region_restriction(const intel_device_info * devinfo,const elk_fs_inst * inst,elk_reg_type dst_type)599 has_dst_aligned_region_restriction(const intel_device_info *devinfo,
600                                    const elk_fs_inst *inst,
601                                    elk_reg_type dst_type)
602 {
603    const elk_reg_type exec_type = get_exec_type(inst);
604    /* Even though the hardware spec claims that "integer DWord multiply"
605     * operations are restricted, empirical evidence and the behavior of the
606     * simulator suggest that only 32x32-bit integer multiplication is
607     * restricted.
608     */
609    const bool is_dword_multiply = !elk_reg_type_is_floating_point(exec_type) &&
610       ((inst->opcode == ELK_OPCODE_MUL &&
611         MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) ||
612        (inst->opcode == ELK_OPCODE_MAD &&
613         MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4));
614 
615    if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 ||
616        (type_sz(exec_type) == 4 && is_dword_multiply))
617       return devinfo->platform == INTEL_PLATFORM_CHV ||
618              intel_device_info_is_9lp(devinfo) ||
619              devinfo->verx10 >= 125;
620 
621    else if (elk_reg_type_is_floating_point(dst_type))
622       return devinfo->verx10 >= 125;
623 
624    else
625       return false;
626 }
627 
628 static inline bool
has_dst_aligned_region_restriction(const intel_device_info * devinfo,const elk_fs_inst * inst)629 has_dst_aligned_region_restriction(const intel_device_info *devinfo,
630                                    const elk_fs_inst *inst)
631 {
632    return has_dst_aligned_region_restriction(devinfo, inst, inst->dst.type);
633 }
634 
635 /**
636  * Return whether the LOAD_PAYLOAD instruction is a plain copy of bits from
637  * the specified register file into a VGRF.
638  *
639  * This implies identity register regions without any source-destination
640  * overlap, but otherwise has no implications on the location of sources and
641  * destination in the register file: Gathering any number of portions from
642  * multiple virtual registers in any order is allowed.
643  */
644 inline bool
is_copy_payload(elk_reg_file file,const elk_fs_inst * inst)645 is_copy_payload(elk_reg_file file, const elk_fs_inst *inst)
646 {
647    if (inst->opcode != ELK_SHADER_OPCODE_LOAD_PAYLOAD ||
648        inst->is_partial_write() || inst->saturate ||
649        inst->dst.file != VGRF)
650       return false;
651 
652    for (unsigned i = 0; i < inst->sources; i++) {
653       if (inst->src[i].file != file ||
654           inst->src[i].abs || inst->src[i].negate)
655          return false;
656 
657       if (!inst->src[i].is_contiguous())
658          return false;
659 
660       if (regions_overlap(inst->dst, inst->size_written,
661                           inst->src[i], inst->size_read(i)))
662          return false;
663    }
664 
665    return true;
666 }
667 
668 /**
669  * Like is_copy_payload(), but the instruction is required to copy a single
670  * contiguous block of registers from the given register file into the
671  * destination without any reordering.
672  */
673 inline bool
is_identity_payload(elk_reg_file file,const elk_fs_inst * inst)674 is_identity_payload(elk_reg_file file, const elk_fs_inst *inst) {
675    if (is_copy_payload(file, inst)) {
676       elk_fs_reg reg = inst->src[0];
677 
678       for (unsigned i = 0; i < inst->sources; i++) {
679          reg.type = inst->src[i].type;
680          if (!inst->src[i].equals(reg))
681             return false;
682 
683          reg = byte_offset(reg, inst->size_read(i));
684       }
685 
686       return true;
687    } else {
688       return false;
689    }
690 }
691 
692 /**
693  * Like is_copy_payload(), but the instruction is required to source data from
694  * at least two disjoint VGRFs.
695  *
696  * This doesn't necessarily rule out the elimination of this instruction
697  * through register coalescing, but due to limitations of the register
698  * coalesce pass it might be impossible to do so directly until a later stage,
699  * when the LOAD_PAYLOAD instruction is unrolled into a sequence of MOV
700  * instructions.
701  */
702 inline bool
is_multi_copy_payload(const elk_fs_inst * inst)703 is_multi_copy_payload(const elk_fs_inst *inst) {
704    if (is_copy_payload(VGRF, inst)) {
705       for (unsigned i = 0; i < inst->sources; i++) {
706             if (inst->src[i].nr != inst->src[0].nr)
707                return true;
708       }
709    }
710 
711    return false;
712 }
713 
714 /**
715  * Like is_identity_payload(), but the instruction is required to copy the
716  * whole contents of a single VGRF into the destination.
717  *
718  * This means that there is a good chance that the instruction will be
719  * eliminated through register coalescing, but it's neither a necessary nor a
720  * sufficient condition for that to happen -- E.g. consider the case where
721  * source and destination registers diverge due to other instructions in the
722  * program overwriting part of their contents, which isn't something we can
723  * predict up front based on a cheap strictly local test of the copy
724  * instruction.
725  */
726 inline bool
is_coalescing_payload(const elk::simple_allocator & alloc,const elk_fs_inst * inst)727 is_coalescing_payload(const elk::simple_allocator &alloc, const elk_fs_inst *inst)
728 {
729    return is_identity_payload(VGRF, inst) &&
730           inst->src[0].offset == 0 &&
731           alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written;
732 }
733 
734 bool
735 elk_has_bank_conflict(const struct elk_isa_info *isa, const elk_fs_inst *inst);
736 
737 #endif
738