• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- c++ -*- */
2 /*
3  * Copyright © 2010-2015 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #ifndef BRW_IR_FS_H
26 #define BRW_IR_FS_H
27 
28 #include "brw_shader.h"
29 
30 class fs_inst;
31 
32 class fs_reg : public backend_reg {
33 public:
34    DECLARE_RALLOC_CXX_OPERATORS(fs_reg)
35 
36    void init();
37 
38    fs_reg();
39    fs_reg(struct ::brw_reg reg);
40    fs_reg(enum brw_reg_file file, unsigned nr);
41    fs_reg(enum brw_reg_file file, unsigned nr, enum brw_reg_type type);
42 
43    bool equals(const fs_reg &r) const;
44    bool negative_equals(const fs_reg &r) const;
45    bool is_contiguous() const;
46 
47    /**
48     * Return the size in bytes of a single logical component of the
49     * register assuming the given execution width.
50     */
51    unsigned component_size(unsigned width) const;
52 
53    /** Register region horizontal stride */
54    uint8_t stride;
55 };
56 
57 static inline fs_reg
negate(fs_reg reg)58 negate(fs_reg reg)
59 {
60    assert(reg.file != IMM);
61    reg.negate = !reg.negate;
62    return reg;
63 }
64 
65 static inline fs_reg
retype(fs_reg reg,enum brw_reg_type type)66 retype(fs_reg reg, enum brw_reg_type type)
67 {
68    reg.type = type;
69    return reg;
70 }
71 
72 static inline fs_reg
byte_offset(fs_reg reg,unsigned delta)73 byte_offset(fs_reg reg, unsigned delta)
74 {
75    switch (reg.file) {
76    case BAD_FILE:
77       break;
78    case VGRF:
79    case ATTR:
80    case UNIFORM:
81       reg.offset += delta;
82       break;
83    case ARF:
84    case FIXED_GRF: {
85       const unsigned suboffset = reg.subnr + delta;
86       reg.nr += suboffset / REG_SIZE;
87       reg.subnr = suboffset % REG_SIZE;
88       break;
89    }
90    case IMM:
91    default:
92       assert(delta == 0);
93    }
94    return reg;
95 }
96 
97 static inline fs_reg
horiz_offset(const fs_reg & reg,unsigned delta)98 horiz_offset(const fs_reg &reg, unsigned delta)
99 {
100    switch (reg.file) {
101    case BAD_FILE:
102    case UNIFORM:
103    case IMM:
104       /* These only have a single component that is implicitly splatted.  A
105        * horizontal offset should be a harmless no-op.
106        * XXX - Handle vector immediates correctly.
107        */
108       return reg;
109    case VGRF:
110    case ATTR:
111       return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
112    case ARF:
113    case FIXED_GRF:
114       if (reg.is_null()) {
115          return reg;
116       } else {
117          const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
118          const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
119          const unsigned width = 1 << reg.width;
120 
121          if (delta % width == 0) {
122             return byte_offset(reg, delta / width * vstride * type_sz(reg.type));
123          } else {
124             assert(vstride == hstride * width);
125             return byte_offset(reg, delta * hstride * type_sz(reg.type));
126          }
127       }
128    }
129    unreachable("Invalid register file");
130 }
131 
132 static inline fs_reg
offset(fs_reg reg,unsigned width,unsigned delta)133 offset(fs_reg reg, unsigned width, unsigned delta)
134 {
135    switch (reg.file) {
136    case BAD_FILE:
137       break;
138    case ARF:
139    case FIXED_GRF:
140    case VGRF:
141    case ATTR:
142    case UNIFORM:
143       return byte_offset(reg, delta * reg.component_size(width));
144    case IMM:
145       assert(delta == 0);
146    }
147    return reg;
148 }
149 
150 /**
151  * Get the scalar channel of \p reg given by \p idx and replicate it to all
152  * channels of the result.
153  */
154 static inline fs_reg
component(fs_reg reg,unsigned idx)155 component(fs_reg reg, unsigned idx)
156 {
157    reg = horiz_offset(reg, idx);
158    reg.stride = 0;
159    if (reg.file == ARF || reg.file == FIXED_GRF) {
160       reg.vstride = BRW_VERTICAL_STRIDE_0;
161       reg.width = BRW_WIDTH_1;
162       reg.hstride = BRW_HORIZONTAL_STRIDE_0;
163    }
164    return reg;
165 }
166 
167 /**
168  * Return an integer identifying the discrete address space a register is
169  * contained in.  A register is by definition fully contained in the single
170  * reg_space it belongs to, so two registers with different reg_space ids are
171  * guaranteed not to overlap.  Most register files are a single reg_space of
172  * its own, only the VGRF and ATTR files are composed of multiple discrete
173  * address spaces, one for each allocation and input attribute respectively.
174  */
175 static inline uint32_t
reg_space(const fs_reg & r)176 reg_space(const fs_reg &r)
177 {
178    return r.file << 16 | (r.file == VGRF || r.file == ATTR ? r.nr : 0);
179 }
180 
181 /**
182  * Return the base offset in bytes of a register relative to the start of its
183  * reg_space().
184  */
185 static inline unsigned
reg_offset(const fs_reg & r)186 reg_offset(const fs_reg &r)
187 {
188    return (r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) *
189           (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
190           (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
191 }
192 
193 /**
194  * Return the amount of padding in bytes left unused between individual
195  * components of register \p r due to a (horizontal) stride value greater than
196  * one, or zero if components are tightly packed in the register file.
197  */
198 static inline unsigned
reg_padding(const fs_reg & r)199 reg_padding(const fs_reg &r)
200 {
201    const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
202                             r.hstride == 0 ? 0 :
203                             1 << (r.hstride - 1));
204    return (MAX2(1, stride) - 1) * type_sz(r.type);
205 }
206 
207 /**
208  * Return whether the register region starting at \p r and spanning \p dr
209  * bytes could potentially overlap the register region starting at \p s and
210  * spanning \p ds bytes.
211  */
212 static inline bool
regions_overlap(const fs_reg & r,unsigned dr,const fs_reg & s,unsigned ds)213 regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
214 {
215    if (r.file != s.file)
216       return false;
217 
218    if (r.file == VGRF) {
219       return r.nr == s.nr &&
220              !(r.offset + dr <= s.offset || s.offset + ds <= r.offset);
221    } else {
222       return !(reg_offset(r) + dr <= reg_offset(s) ||
223                reg_offset(s) + ds <= reg_offset(r));
224    }
225 }
226 
227 /**
228  * Check that the register region given by r [r.offset, r.offset + dr[
229  * is fully contained inside the register region given by s
230  * [s.offset, s.offset + ds[.
231  */
232 static inline bool
region_contained_in(const fs_reg & r,unsigned dr,const fs_reg & s,unsigned ds)233 region_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
234 {
235    return reg_space(r) == reg_space(s) &&
236           reg_offset(r) >= reg_offset(s) &&
237           reg_offset(r) + dr <= reg_offset(s) + ds;
238 }
239 
240 /**
241  * Return whether the given register region is n-periodic, i.e. whether the
242  * original region remains invariant after shifting it by \p n scalar
243  * channels.
244  */
245 static inline bool
is_periodic(const fs_reg & reg,unsigned n)246 is_periodic(const fs_reg &reg, unsigned n)
247 {
248    if (reg.file == BAD_FILE || reg.is_null()) {
249       return true;
250 
251    } else if (reg.file == IMM) {
252       const unsigned period = (reg.type == BRW_REGISTER_TYPE_UV ||
253                                reg.type == BRW_REGISTER_TYPE_V ? 8 :
254                                reg.type == BRW_REGISTER_TYPE_VF ? 4 :
255                                1);
256       return n % period == 0;
257 
258    } else if (reg.file == ARF || reg.file == FIXED_GRF) {
259       const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
260                                reg.vstride == 0 ? 1 << reg.width :
261                                ~0);
262       return n % period == 0;
263 
264    } else {
265       return reg.stride == 0;
266    }
267 }
268 
269 static inline bool
is_uniform(const fs_reg & reg)270 is_uniform(const fs_reg &reg)
271 {
272    return is_periodic(reg, 1);
273 }
274 
275 /**
276  * Get the specified 8-component quarter of a register.
277  */
278 static inline fs_reg
quarter(const fs_reg & reg,unsigned idx)279 quarter(const fs_reg &reg, unsigned idx)
280 {
281    assert(idx < 4);
282    return horiz_offset(reg, 8 * idx);
283 }
284 
285 /**
286  * Reinterpret each channel of register \p reg as a vector of values of the
287  * given smaller type and take the i-th subcomponent from each.
288  */
289 static inline fs_reg
subscript(fs_reg reg,brw_reg_type type,unsigned i)290 subscript(fs_reg reg, brw_reg_type type, unsigned i)
291 {
292    assert((i + 1) * type_sz(type) <= type_sz(reg.type));
293 
294    if (reg.file == ARF || reg.file == FIXED_GRF) {
295       /* The stride is encoded inconsistently for fixed GRF and ARF registers
296        * as the log2 of the actual vertical and horizontal strides.
297        */
298       const int delta = util_logbase2(type_sz(reg.type)) -
299                         util_logbase2(type_sz(type));
300       reg.hstride += (reg.hstride ? delta : 0);
301       reg.vstride += (reg.vstride ? delta : 0);
302 
303    } else if (reg.file == IMM) {
304       unsigned bit_size = type_sz(type) * 8;
305       reg.u64 >>= i * bit_size;
306       reg.u64 &= BITFIELD64_MASK(bit_size);
307       if (bit_size <= 16)
308          reg.u64 |= reg.u64 << 16;
309       return retype(reg, type);
310    } else {
311       reg.stride *= type_sz(reg.type) / type_sz(type);
312    }
313 
314    return byte_offset(retype(reg, type), i * type_sz(type));
315 }
316 
317 static inline fs_reg
horiz_stride(fs_reg reg,unsigned s)318 horiz_stride(fs_reg reg, unsigned s)
319 {
320    reg.stride *= s;
321    return reg;
322 }
323 
324 static const fs_reg reg_undef;
325 
326 class fs_inst : public backend_instruction {
327    fs_inst &operator=(const fs_inst &);
328 
329    void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
330              const fs_reg *src, unsigned sources);
331 
332 public:
333    DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
334 
335    fs_inst();
336    fs_inst(enum opcode opcode, uint8_t exec_size);
337    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
338    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
339            const fs_reg &src0);
340    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
341            const fs_reg &src0, const fs_reg &src1);
342    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
343            const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
344    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
345            const fs_reg src[], unsigned sources);
346    fs_inst(const fs_inst &that);
347    ~fs_inst();
348 
349    void resize_sources(uint8_t num_sources);
350 
351    bool is_send_from_grf() const;
352    bool is_payload(unsigned arg) const;
353    bool is_partial_write() const;
354    unsigned components_read(unsigned i) const;
355    unsigned size_read(int arg) const;
356    bool can_do_source_mods(const struct intel_device_info *devinfo) const;
357    bool can_do_cmod();
358    bool can_change_types() const;
359    bool has_source_and_destination_hazard() const;
360 
361    /**
362     * Return whether \p arg is a control source of a virtual instruction which
363     * shouldn't contribute to the execution type and usual regioning
364     * restriction calculations of arithmetic instructions.
365     */
366    bool is_control_source(unsigned arg) const;
367 
368    /**
369     * Return the subset of flag registers read by the instruction as a bitset
370     * with byte granularity.
371     */
372    unsigned flags_read(const intel_device_info *devinfo) const;
373 
374    /**
375     * Return the subset of flag registers updated by the instruction (either
376     * partially or fully) as a bitset with byte granularity.
377     */
378    unsigned flags_written(const intel_device_info *devinfo) const;
379 
380    /**
381     * Return true if this instruction is a sampler message gathering residency
382     * data.
383     */
384    bool has_sampler_residency() const;
385 
386    fs_reg dst;
387    fs_reg *src;
388 
389    uint8_t sources; /**< Number of fs_reg sources. */
390 
391    bool last_rt:1;
392    bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */
393    bool keep_payload_trailing_zeros;
394 
395    tgl_swsb sched; /**< Scheduling info. */
396 
397    /* Hint that this instruction has combined LOD/LOD bias with array index */
398    bool has_packed_lod_ai_src;
399 };
400 
401 /**
402  * Make the execution of \p inst dependent on the evaluation of a possibly
403  * inverted predicate.
404  */
405 static inline fs_inst *
set_predicate_inv(enum brw_predicate pred,bool inverse,fs_inst * inst)406 set_predicate_inv(enum brw_predicate pred, bool inverse,
407                   fs_inst *inst)
408 {
409    inst->predicate = pred;
410    inst->predicate_inverse = inverse;
411    return inst;
412 }
413 
414 /**
415  * Make the execution of \p inst dependent on the evaluation of a predicate.
416  */
417 static inline fs_inst *
set_predicate(enum brw_predicate pred,fs_inst * inst)418 set_predicate(enum brw_predicate pred, fs_inst *inst)
419 {
420    return set_predicate_inv(pred, false, inst);
421 }
422 
423 /**
424  * Write the result of evaluating the condition given by \p mod to a flag
425  * register.
426  */
427 static inline fs_inst *
set_condmod(enum brw_conditional_mod mod,fs_inst * inst)428 set_condmod(enum brw_conditional_mod mod, fs_inst *inst)
429 {
430    inst->conditional_mod = mod;
431    return inst;
432 }
433 
434 /**
435  * Clamp the result of \p inst to the saturation range of its destination
436  * datatype.
437  */
438 static inline fs_inst *
set_saturate(bool saturate,fs_inst * inst)439 set_saturate(bool saturate, fs_inst *inst)
440 {
441    inst->saturate = saturate;
442    return inst;
443 }
444 
445 /**
446  * Return the number of dataflow registers written by the instruction (either
447  * fully or partially) counted from 'floor(reg_offset(inst->dst) /
448  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
449  * UNIFORM and IMM files and 32B for all other files.
450  */
451 inline unsigned
regs_written(const fs_inst * inst)452 regs_written(const fs_inst *inst)
453 {
454    assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
455    return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE +
456                        inst->size_written -
457                        MIN2(inst->size_written, reg_padding(inst->dst)),
458                        REG_SIZE);
459 }
460 
461 /**
462  * Return the number of dataflow registers read by the instruction (either
463  * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
464  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
465  * UNIFORM files and 32B for all other files.
466  */
467 inline unsigned
regs_read(const fs_inst * inst,unsigned i)468 regs_read(const fs_inst *inst, unsigned i)
469 {
470    if (inst->src[i].file == IMM)
471       return 1;
472 
473    const unsigned reg_size = inst->src[i].file == UNIFORM ? 4 : REG_SIZE;
474    return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size +
475                        inst->size_read(i) -
476                        MIN2(inst->size_read(i), reg_padding(inst->src[i])),
477                        reg_size);
478 }
479 
480 static inline enum brw_reg_type
get_exec_type(const fs_inst * inst)481 get_exec_type(const fs_inst *inst)
482 {
483    brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
484 
485    for (int i = 0; i < inst->sources; i++) {
486       if (inst->src[i].file != BAD_FILE &&
487           !inst->is_control_source(i)) {
488          const brw_reg_type t = get_exec_type(inst->src[i].type);
489          if (type_sz(t) > type_sz(exec_type))
490             exec_type = t;
491          else if (type_sz(t) == type_sz(exec_type) &&
492                   brw_reg_type_is_floating_point(t))
493             exec_type = t;
494       }
495    }
496 
497    if (exec_type == BRW_REGISTER_TYPE_B)
498       exec_type = inst->dst.type;
499 
500    assert(exec_type != BRW_REGISTER_TYPE_B);
501 
502    /* Promotion of the execution type to 32-bit for conversions from or to
503     * half-float seems to be consistent with the following text from the
504     * Cherryview PRM Vol. 7, "Execution Data Type":
505     *
506     * "When single precision and half precision floats are mixed between
507     *  source operands or between source and destination operand [..] single
508     *  precision float is the execution datatype."
509     *
510     * and from "Register Region Restrictions":
511     *
512     * "Conversion between Integer and HF (Half Float) must be DWord aligned
513     *  and strided by a DWord on the destination."
514     */
515    if (type_sz(exec_type) == 2 &&
516        inst->dst.type != exec_type) {
517       if (exec_type == BRW_REGISTER_TYPE_HF)
518          exec_type = BRW_REGISTER_TYPE_F;
519       else if (inst->dst.type == BRW_REGISTER_TYPE_HF)
520          exec_type = BRW_REGISTER_TYPE_D;
521    }
522 
523    return exec_type;
524 }
525 
526 static inline unsigned
get_exec_type_size(const fs_inst * inst)527 get_exec_type_size(const fs_inst *inst)
528 {
529    return type_sz(get_exec_type(inst));
530 }
531 
532 static inline bool
is_send(const fs_inst * inst)533 is_send(const fs_inst *inst)
534 {
535    return inst->mlen || inst->is_send_from_grf();
536 }
537 
538 /**
539  * Return whether the instruction isn't an ALU instruction and cannot be
540  * assumed to complete in-order.
541  */
542 static inline bool
is_unordered(const intel_device_info * devinfo,const fs_inst * inst)543 is_unordered(const intel_device_info *devinfo, const fs_inst *inst)
544 {
545    return is_send(inst) || (devinfo->ver < 20 && inst->is_math()) ||
546           inst->opcode == BRW_OPCODE_DPAS ||
547           (devinfo->has_64bit_float_via_math_pipe &&
548            (get_exec_type(inst) == BRW_REGISTER_TYPE_DF ||
549             inst->dst.type == BRW_REGISTER_TYPE_DF));
550 }
551 
552 /**
553  * Return whether the following regioning restriction applies to the specified
554  * instruction.  From the Cherryview PRM Vol 7. "Register Region
555  * Restrictions":
556  *
557  * "When source or destination datatype is 64b or operation is integer DWord
558  *  multiply, regioning in Align1 must follow these rules:
559  *
560  *  1. Source and Destination horizontal stride must be aligned to the same qword.
561  *  2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
562  *  3. Source and Destination offset must be the same, except the case of
563  *     scalar source."
564  */
565 static inline bool
has_dst_aligned_region_restriction(const intel_device_info * devinfo,const fs_inst * inst,brw_reg_type dst_type)566 has_dst_aligned_region_restriction(const intel_device_info *devinfo,
567                                    const fs_inst *inst,
568                                    brw_reg_type dst_type)
569 {
570    const brw_reg_type exec_type = get_exec_type(inst);
571    /* Even though the hardware spec claims that "integer DWord multiply"
572     * operations are restricted, empirical evidence and the behavior of the
573     * simulator suggest that only 32x32-bit integer multiplication is
574     * restricted.
575     */
576    const bool is_dword_multiply = !brw_reg_type_is_floating_point(exec_type) &&
577       ((inst->opcode == BRW_OPCODE_MUL &&
578         MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) ||
579        (inst->opcode == BRW_OPCODE_MAD &&
580         MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4));
581 
582    if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 ||
583        (type_sz(exec_type) == 4 && is_dword_multiply))
584       return devinfo->platform == INTEL_PLATFORM_CHV ||
585              intel_device_info_is_9lp(devinfo) ||
586              devinfo->verx10 >= 125;
587 
588    else if (brw_reg_type_is_floating_point(dst_type))
589       return devinfo->verx10 >= 125;
590 
591    else
592       return false;
593 }
594 
595 static inline bool
has_dst_aligned_region_restriction(const intel_device_info * devinfo,const fs_inst * inst)596 has_dst_aligned_region_restriction(const intel_device_info *devinfo,
597                                    const fs_inst *inst)
598 {
599    return has_dst_aligned_region_restriction(devinfo, inst, inst->dst.type);
600 }
601 
602 /**
603  * Return whether the LOAD_PAYLOAD instruction is a plain copy of bits from
604  * the specified register file into a VGRF.
605  *
606  * This implies identity register regions without any source-destination
607  * overlap, but otherwise has no implications on the location of sources and
608  * destination in the register file: Gathering any number of portions from
609  * multiple virtual registers in any order is allowed.
610  */
611 inline bool
is_copy_payload(brw_reg_file file,const fs_inst * inst)612 is_copy_payload(brw_reg_file file, const fs_inst *inst)
613 {
614    if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD ||
615        inst->is_partial_write() || inst->saturate ||
616        inst->dst.file != VGRF)
617       return false;
618 
619    for (unsigned i = 0; i < inst->sources; i++) {
620       if (inst->src[i].file != file ||
621           inst->src[i].abs || inst->src[i].negate)
622          return false;
623 
624       if (!inst->src[i].is_contiguous())
625          return false;
626 
627       if (regions_overlap(inst->dst, inst->size_written,
628                           inst->src[i], inst->size_read(i)))
629          return false;
630    }
631 
632    return true;
633 }
634 
635 /**
636  * Like is_copy_payload(), but the instruction is required to copy a single
637  * contiguous block of registers from the given register file into the
638  * destination without any reordering.
639  */
640 inline bool
is_identity_payload(brw_reg_file file,const fs_inst * inst)641 is_identity_payload(brw_reg_file file, const fs_inst *inst) {
642    if (is_copy_payload(file, inst)) {
643       fs_reg reg = inst->src[0];
644 
645       for (unsigned i = 0; i < inst->sources; i++) {
646          reg.type = inst->src[i].type;
647          if (!inst->src[i].equals(reg))
648             return false;
649 
650          reg = byte_offset(reg, inst->size_read(i));
651       }
652 
653       return true;
654    } else {
655       return false;
656    }
657 }
658 
659 /**
660  * Like is_copy_payload(), but the instruction is required to source data from
661  * at least two disjoint VGRFs.
662  *
663  * This doesn't necessarily rule out the elimination of this instruction
664  * through register coalescing, but due to limitations of the register
665  * coalesce pass it might be impossible to do so directly until a later stage,
666  * when the LOAD_PAYLOAD instruction is unrolled into a sequence of MOV
667  * instructions.
668  */
669 inline bool
is_multi_copy_payload(const fs_inst * inst)670 is_multi_copy_payload(const fs_inst *inst) {
671    if (is_copy_payload(VGRF, inst)) {
672       for (unsigned i = 0; i < inst->sources; i++) {
673             if (inst->src[i].nr != inst->src[0].nr)
674                return true;
675       }
676    }
677 
678    return false;
679 }
680 
681 /**
682  * Like is_identity_payload(), but the instruction is required to copy the
683  * whole contents of a single VGRF into the destination.
684  *
685  * This means that there is a good chance that the instruction will be
686  * eliminated through register coalescing, but it's neither a necessary nor a
687  * sufficient condition for that to happen -- E.g. consider the case where
688  * source and destination registers diverge due to other instructions in the
689  * program overwriting part of their contents, which isn't something we can
690  * predict up front based on a cheap strictly local test of the copy
691  * instruction.
692  */
693 inline bool
is_coalescing_payload(const brw::simple_allocator & alloc,const fs_inst * inst)694 is_coalescing_payload(const brw::simple_allocator &alloc, const fs_inst *inst)
695 {
696    return is_identity_payload(VGRF, inst) &&
697           inst->src[0].offset == 0 &&
698           alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written;
699 }
700 
701 bool
702 has_bank_conflict(const struct brw_isa_info *isa, const fs_inst *inst);
703 
704 /* Return the subset of flag registers that an instruction could
705  * potentially read or write based on the execution controls and flag
706  * subregister number of the instruction.
707  */
708 static inline unsigned
brw_fs_flag_mask(const fs_inst * inst,unsigned width)709 brw_fs_flag_mask(const fs_inst *inst, unsigned width)
710 {
711    assert(util_is_power_of_two_nonzero(width));
712    const unsigned start = (inst->flag_subreg * 16 + inst->group) &
713                           ~(width - 1);
714   const unsigned end = start + ALIGN(inst->exec_size, width);
715    return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);
716 }
717 
718 static inline unsigned
brw_fs_bit_mask(unsigned n)719 brw_fs_bit_mask(unsigned n)
720 {
721    return (n >= CHAR_BIT * sizeof(brw_fs_bit_mask(n)) ? ~0u : (1u << n) - 1);
722 }
723 
724 static inline unsigned
brw_fs_flag_mask(const fs_reg & r,unsigned sz)725 brw_fs_flag_mask(const fs_reg &r, unsigned sz)
726 {
727    if (r.file == ARF) {
728       const unsigned start = (r.nr - BRW_ARF_FLAG) * 4 + r.subnr;
729       const unsigned end = start + sz;
730       return brw_fs_bit_mask(end) & ~brw_fs_bit_mask(start);
731    } else {
732       return 0;
733    }
734 }
735 
736 #endif
737