• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- c++ -*- */
2 /*
3  * Copyright © 2011-2015 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #ifndef BRW_IR_VEC4_H
26 #define BRW_IR_VEC4_H
27 
28 #include "brw_shader.h"
29 
30 namespace brw {
31 
32 class dst_reg;
33 
34 class src_reg : public backend_reg
35 {
36 public:
37    DECLARE_RALLOC_CXX_OPERATORS(src_reg)
38 
39    void init();
40 
41    src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
42    src_reg();
43    src_reg(struct ::brw_reg reg);
44 
45    bool equals(const src_reg &r) const;
46    bool negative_equals(const src_reg &r) const;
47 
48    src_reg(class vec4_visitor *v, const struct glsl_type *type);
49    src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
50 
51    explicit src_reg(const dst_reg &reg);
52 
53    src_reg *reladdr;
54 };
55 
56 static inline src_reg
retype(src_reg reg,enum brw_reg_type type)57 retype(src_reg reg, enum brw_reg_type type)
58 {
59    reg.type = type;
60    return reg;
61 }
62 
63 namespace detail {
64 
65 static inline void
add_byte_offset(backend_reg * reg,unsigned bytes)66 add_byte_offset(backend_reg *reg, unsigned bytes)
67 {
68    switch (reg->file) {
69       case BAD_FILE:
70          break;
71       case VGRF:
72       case ATTR:
73       case UNIFORM:
74          reg->offset += bytes;
75          assert(reg->offset % 16 == 0);
76          break;
77       case MRF: {
78          const unsigned suboffset = reg->offset + bytes;
79          reg->nr += suboffset / REG_SIZE;
80          reg->offset = suboffset % REG_SIZE;
81          assert(reg->offset % 16 == 0);
82          break;
83       }
84       case ARF:
85       case FIXED_GRF: {
86          const unsigned suboffset = reg->subnr + bytes;
87          reg->nr += suboffset / REG_SIZE;
88          reg->subnr = suboffset % REG_SIZE;
89          assert(reg->subnr % 16 == 0);
90          break;
91       }
92       default:
93          assert(bytes == 0);
94    }
95 }
96 
97 } /* namespace detail */
98 
99 static inline src_reg
byte_offset(src_reg reg,unsigned bytes)100 byte_offset(src_reg reg, unsigned bytes)
101 {
102    detail::add_byte_offset(&reg, bytes);
103    return reg;
104 }
105 
106 static inline src_reg
offset(src_reg reg,unsigned width,unsigned delta)107 offset(src_reg reg, unsigned width, unsigned delta)
108 {
109    const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
110    const unsigned num_components = MAX2(width / 4 * stride, 4);
111    return byte_offset(reg, num_components * type_sz(reg.type) * delta);
112 }
113 
114 static inline src_reg
horiz_offset(src_reg reg,unsigned delta)115 horiz_offset(src_reg reg, unsigned delta)
116 {
117    return byte_offset(reg, delta * type_sz(reg.type));
118 }
119 
120 /**
121  * Reswizzle a given source register.
122  * \sa brw_swizzle().
123  */
124 static inline src_reg
swizzle(src_reg reg,unsigned swizzle)125 swizzle(src_reg reg, unsigned swizzle)
126 {
127    if (reg.file == IMM)
128       reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle);
129    else
130       reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
131 
132    return reg;
133 }
134 
135 static inline src_reg
negate(src_reg reg)136 negate(src_reg reg)
137 {
138    assert(reg.file != IMM);
139    reg.negate = !reg.negate;
140    return reg;
141 }
142 
143 static inline bool
is_uniform(const src_reg & reg)144 is_uniform(const src_reg &reg)
145 {
146    return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) &&
147           (!reg.reladdr || is_uniform(*reg.reladdr));
148 }
149 
150 class dst_reg : public backend_reg
151 {
152 public:
153    DECLARE_RALLOC_CXX_OPERATORS(dst_reg)
154 
155    void init();
156 
157    dst_reg();
158    dst_reg(enum brw_reg_file file, int nr);
159    dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
160            unsigned writemask);
161    dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
162            unsigned writemask);
163    dst_reg(struct ::brw_reg reg);
164    dst_reg(class vec4_visitor *v, const struct glsl_type *type);
165 
166    explicit dst_reg(const src_reg &reg);
167 
168    bool equals(const dst_reg &r) const;
169 
170    src_reg *reladdr;
171 };
172 
173 static inline dst_reg
retype(dst_reg reg,enum brw_reg_type type)174 retype(dst_reg reg, enum brw_reg_type type)
175 {
176    reg.type = type;
177    return reg;
178 }
179 
180 static inline dst_reg
byte_offset(dst_reg reg,unsigned bytes)181 byte_offset(dst_reg reg, unsigned bytes)
182 {
183    detail::add_byte_offset(&reg, bytes);
184    return reg;
185 }
186 
187 static inline dst_reg
offset(dst_reg reg,unsigned width,unsigned delta)188 offset(dst_reg reg, unsigned width, unsigned delta)
189 {
190    const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
191    const unsigned num_components = MAX2(width / 4 * stride, 4);
192    return byte_offset(reg, num_components * type_sz(reg.type) * delta);
193 }
194 
195 static inline dst_reg
horiz_offset(const dst_reg & reg,unsigned delta)196 horiz_offset(const dst_reg &reg, unsigned delta)
197 {
198    if (is_uniform(src_reg(reg)))
199       return reg;
200    else
201       return byte_offset(reg, delta * type_sz(reg.type));
202 }
203 
204 static inline dst_reg
writemask(dst_reg reg,unsigned mask)205 writemask(dst_reg reg, unsigned mask)
206 {
207    assert(reg.file != IMM);
208    assert((reg.writemask & mask) != 0);
209    reg.writemask &= mask;
210    return reg;
211 }
212 
213 /**
214  * Return an integer identifying the discrete address space a register is
215  * contained in.  A register is by definition fully contained in the single
216  * reg_space it belongs to, so two registers with different reg_space ids are
217  * guaranteed not to overlap.  Most register files are a single reg_space of
218  * its own, only the VGRF file is composed of multiple discrete address
219  * spaces, one for each VGRF allocation.
220  */
221 static inline uint32_t
reg_space(const backend_reg & r)222 reg_space(const backend_reg &r)
223 {
224    return r.file << 16 | (r.file == VGRF ? r.nr : 0);
225 }
226 
227 /**
228  * Return the base offset in bytes of a register relative to the start of its
229  * reg_space().
230  */
231 static inline unsigned
reg_offset(const backend_reg & r)232 reg_offset(const backend_reg &r)
233 {
234    return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
235           (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset +
236           (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
237 }
238 
239 /**
240  * Return whether the register region starting at \p r and spanning \p dr
241  * bytes could potentially overlap the register region starting at \p s and
242  * spanning \p ds bytes.
243  */
244 static inline bool
regions_overlap(const backend_reg & r,unsigned dr,const backend_reg & s,unsigned ds)245 regions_overlap(const backend_reg &r, unsigned dr,
246                 const backend_reg &s, unsigned ds)
247 {
248    if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
249       /* COMPR4 regions are translated by the hardware during decompression
250        * into two separate half-regions 4 MRFs apart from each other.
251        */
252       backend_reg t0 = r;
253       t0.nr &= ~BRW_MRF_COMPR4;
254       backend_reg t1 = t0;
255       t1.offset += 4 * REG_SIZE;
256       return regions_overlap(t0, dr / 2, s, ds) ||
257              regions_overlap(t1, dr / 2, s, ds);
258 
259    } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
260       return regions_overlap(s, ds, r, dr);
261 
262    } else {
263       return reg_space(r) == reg_space(s) &&
264              !(reg_offset(r) + dr <= reg_offset(s) ||
265                reg_offset(s) + ds <= reg_offset(r));
266    }
267 }
268 
269 class vec4_instruction : public backend_instruction {
270 public:
271    DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
272 
273    vec4_instruction(enum opcode opcode,
274                     const dst_reg &dst = dst_reg(),
275                     const src_reg &src0 = src_reg(),
276                     const src_reg &src1 = src_reg(),
277                     const src_reg &src2 = src_reg());
278 
279    dst_reg dst;
280    src_reg src[3];
281 
282    enum brw_urb_write_flags urb_write_flags;
283 
284    unsigned sol_binding; /**< gfx6: SOL binding table index */
285    bool sol_final_write; /**< gfx6: send commit message */
286    unsigned sol_vertex; /**< gfx6: used for setting dst index in SVB header */
287 
288    bool is_send_from_grf() const;
289    unsigned size_read(unsigned arg) const;
290    bool can_reswizzle(const struct intel_device_info *devinfo,
291                       int dst_writemask,
292                       int swizzle, int swizzle_mask);
293    void reswizzle(int dst_writemask, int swizzle);
294    bool can_do_source_mods(const struct intel_device_info *devinfo);
295    bool can_do_cmod();
296    bool can_do_writemask(const struct intel_device_info *devinfo);
297    bool can_change_types() const;
298    bool has_source_and_destination_hazard() const;
299    unsigned implied_mrf_writes() const;
300 
is_align1_partial_write()301    bool is_align1_partial_write()
302    {
303       return opcode == VEC4_OPCODE_SET_LOW_32BIT ||
304              opcode == VEC4_OPCODE_SET_HIGH_32BIT;
305    }
306 
reads_flag()307    bool reads_flag() const
308    {
309       return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
310    }
311 
reads_flag(unsigned c)312    bool reads_flag(unsigned c)
313    {
314       if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
315          return true;
316 
317       switch (predicate) {
318       case BRW_PREDICATE_NONE:
319          return false;
320       case BRW_PREDICATE_ALIGN16_REPLICATE_X:
321          return c == 0;
322       case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
323          return c == 1;
324       case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
325          return c == 2;
326       case BRW_PREDICATE_ALIGN16_REPLICATE_W:
327          return c == 3;
328       default:
329          return true;
330       }
331    }
332 
writes_flag(const intel_device_info * devinfo)333    bool writes_flag(const intel_device_info *devinfo) const
334    {
335       return (conditional_mod && ((opcode != BRW_OPCODE_SEL || devinfo->ver <= 5) &&
336                                   opcode != BRW_OPCODE_CSEL &&
337                                   opcode != BRW_OPCODE_IF &&
338                                   opcode != BRW_OPCODE_WHILE));
339    }
340 
reads_g0_implicitly()341    bool reads_g0_implicitly() const
342    {
343       switch (opcode) {
344       case SHADER_OPCODE_TEX:
345       case SHADER_OPCODE_TXL:
346       case SHADER_OPCODE_TXD:
347       case SHADER_OPCODE_TXF:
348       case SHADER_OPCODE_TXF_CMS_W:
349       case SHADER_OPCODE_TXF_CMS:
350       case SHADER_OPCODE_TXF_MCS:
351       case SHADER_OPCODE_TXS:
352       case SHADER_OPCODE_TG4:
353       case SHADER_OPCODE_TG4_OFFSET:
354       case SHADER_OPCODE_SAMPLEINFO:
355       case VS_OPCODE_PULL_CONSTANT_LOAD:
356       case GS_OPCODE_SET_PRIMITIVE_ID:
357       case GS_OPCODE_GET_INSTANCE_ID:
358       case SHADER_OPCODE_GFX4_SCRATCH_READ:
359       case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
360          return true;
361       default:
362          return false;
363       }
364    }
365 };
366 
367 /**
368  * Make the execution of \p inst dependent on the evaluation of a possibly
369  * inverted predicate.
370  */
371 inline vec4_instruction *
set_predicate_inv(enum brw_predicate pred,bool inverse,vec4_instruction * inst)372 set_predicate_inv(enum brw_predicate pred, bool inverse,
373                   vec4_instruction *inst)
374 {
375    inst->predicate = pred;
376    inst->predicate_inverse = inverse;
377    return inst;
378 }
379 
380 /**
381  * Make the execution of \p inst dependent on the evaluation of a predicate.
382  */
383 inline vec4_instruction *
set_predicate(enum brw_predicate pred,vec4_instruction * inst)384 set_predicate(enum brw_predicate pred, vec4_instruction *inst)
385 {
386    return set_predicate_inv(pred, false, inst);
387 }
388 
389 /**
390  * Write the result of evaluating the condition given by \p mod to a flag
391  * register.
392  */
393 inline vec4_instruction *
set_condmod(enum brw_conditional_mod mod,vec4_instruction * inst)394 set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
395 {
396    inst->conditional_mod = mod;
397    return inst;
398 }
399 
400 /**
401  * Clamp the result of \p inst to the saturation range of its destination
402  * datatype.
403  */
404 inline vec4_instruction *
set_saturate(bool saturate,vec4_instruction * inst)405 set_saturate(bool saturate, vec4_instruction *inst)
406 {
407    inst->saturate = saturate;
408    return inst;
409 }
410 
411 /**
412  * Return the number of dataflow registers written by the instruction (either
413  * fully or partially) counted from 'floor(reg_offset(inst->dst) /
414  * register_size)'.  The somewhat arbitrary register size unit is 16B for the
415  * UNIFORM and IMM files and 32B for all other files.
416  */
417 inline unsigned
regs_written(const vec4_instruction * inst)418 regs_written(const vec4_instruction *inst)
419 {
420    assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
421    return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written,
422                        REG_SIZE);
423 }
424 
425 /**
426  * Return the number of dataflow registers read by the instruction (either
427  * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
428  * register_size)'.  The somewhat arbitrary register size unit is 16B for the
429  * UNIFORM and IMM files and 32B for all other files.
430  */
431 inline unsigned
regs_read(const vec4_instruction * inst,unsigned i)432 regs_read(const vec4_instruction *inst, unsigned i)
433 {
434    const unsigned reg_size =
435       inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE;
436    return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i),
437                        reg_size);
438 }
439 
440 static inline enum brw_reg_type
get_exec_type(const vec4_instruction * inst)441 get_exec_type(const vec4_instruction *inst)
442 {
443    enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
444 
445    for (int i = 0; i < 3; i++) {
446       if (inst->src[i].file != BAD_FILE) {
447          const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type));
448          if (type_sz(t) > type_sz(exec_type))
449             exec_type = t;
450          else if (type_sz(t) == type_sz(exec_type) &&
451                   brw_reg_type_is_floating_point(t))
452             exec_type = t;
453       }
454    }
455 
456    if (exec_type == BRW_REGISTER_TYPE_B)
457       exec_type = inst->dst.type;
458 
459    /* TODO: We need to handle half-float conversions. */
460    assert(exec_type != BRW_REGISTER_TYPE_HF ||
461           inst->dst.type == BRW_REGISTER_TYPE_HF);
462    assert(exec_type != BRW_REGISTER_TYPE_B);
463 
464    return exec_type;
465 }
466 
467 static inline unsigned
get_exec_type_size(const vec4_instruction * inst)468 get_exec_type_size(const vec4_instruction *inst)
469 {
470    return type_sz(get_exec_type(inst));
471 }
472 
473 } /* namespace brw */
474 
475 #endif
476