• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 /** @file
33  *
34  * This file defines struct brw_reg, which is our representation for EU
35  * registers.  They're not a hardware specific format, just an abstraction
36  * that intends to capture the full flexibility of the hardware registers.
37  *
38  * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
39  * the abstract brw_reg type into the actual hardware instruction encoding.
40  */
41 
42 #pragma once
43 
44 #include <stdbool.h>
45 #include "util/compiler.h"
46 #include "util/glheader.h"
47 #include "util/macros.h"
48 #include "util/rounding.h"
49 #include "util/u_math.h"
50 #include "brw_eu_defines.h"
51 #include "brw_reg_type.h"
52 
53 #ifdef __cplusplus
54 extern "C" {
55 #endif
56 
57 struct intel_device_info;
58 
59 /** Size of general purpose register space in REG_SIZE units */
60 #define BRW_MAX_GRF 128
61 #define XE2_MAX_GRF 256
62 
63 /**
64  * BRW hardware swizzles.
65  * Only defines XYZW to ensure it can be contained in 2 bits
66  */
67 #define BRW_SWIZZLE_X 0
68 #define BRW_SWIZZLE_Y 1
69 #define BRW_SWIZZLE_Z 2
70 #define BRW_SWIZZLE_W 3
71 
72 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
73 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
74 
75 #define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
76 #define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
77 #define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
78 #define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
79 #define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
80 #define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
81 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
82 #define BRW_SWIZZLE_YXYX      BRW_SWIZZLE4(1,0,1,0)
83 #define BRW_SWIZZLE_XZXZ      BRW_SWIZZLE4(0,2,0,2)
84 #define BRW_SWIZZLE_YZXW      BRW_SWIZZLE4(1,2,0,3)
85 #define BRW_SWIZZLE_YWYW      BRW_SWIZZLE4(1,3,1,3)
86 #define BRW_SWIZZLE_ZXYW      BRW_SWIZZLE4(2,0,1,3)
87 #define BRW_SWIZZLE_ZWZW      BRW_SWIZZLE4(2,3,2,3)
88 #define BRW_SWIZZLE_WZWZ      BRW_SWIZZLE4(3,2,3,2)
89 #define BRW_SWIZZLE_WZYX      BRW_SWIZZLE4(3,2,1,0)
90 #define BRW_SWIZZLE_XXZZ      BRW_SWIZZLE4(0,0,2,2)
91 #define BRW_SWIZZLE_YYWW      BRW_SWIZZLE4(1,1,3,3)
92 #define BRW_SWIZZLE_YXWZ      BRW_SWIZZLE4(1,0,3,2)
93 
94 #define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2))
95 #define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2))
96 
97 static inline bool
brw_is_single_value_swizzle(unsigned swiz)98 brw_is_single_value_swizzle(unsigned swiz)
99 {
100    return (swiz == BRW_SWIZZLE_XXXX ||
101            swiz == BRW_SWIZZLE_YYYY ||
102            swiz == BRW_SWIZZLE_ZZZZ ||
103            swiz == BRW_SWIZZLE_WWWW);
104 }
105 
106 /**
107  * Compute the swizzle obtained from the application of \p swz0 on the result
108  * of \p swz1.  The argument ordering is expected to match function
109  * composition.
110  */
111 static inline unsigned
brw_compose_swizzle(unsigned swz0,unsigned swz1)112 brw_compose_swizzle(unsigned swz0, unsigned swz1)
113 {
114    return BRW_SWIZZLE4(
115       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
116       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
117       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
118       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
119 }
120 
121 /**
122  * Construct an identity swizzle for the set of enabled channels given by \p
123  * mask.  The result will only reference channels enabled in the provided \p
124  * mask, assuming that \p mask is non-zero.  The constructed swizzle will
125  * satisfy the property that for any instruction OP and any mask:
126  *
127  *    brw_OP(p, brw_writemask(dst, mask),
128  *           brw_swizzle(src, brw_swizzle_for_mask(mask)));
129  *
130  * will be equivalent to the same instruction without swizzle:
131  *
132  *    brw_OP(p, brw_writemask(dst, mask), src);
133  */
134 static inline unsigned
brw_swizzle_for_mask(unsigned mask)135 brw_swizzle_for_mask(unsigned mask)
136 {
137    unsigned last = (mask ? ffs(mask) - 1 : 0);
138    unsigned swz[4];
139 
140    for (unsigned i = 0; i < 4; i++)
141       last = swz[i] = (mask & (1 << i) ? i : last);
142 
143    return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
144 }
145 
146 uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz);
147 
148 #define REG_SIZE (8*4)
149 
150 /* These aren't hardware structs, just something useful for us to pass around:
151  *
152  * Align1 operation has a lot of control over input ranges.  Used in
153  * WM programs to implement shaders decomposed into "channel serial"
154  * or "structure of array" form:
155  */
156 typedef struct brw_reg {
157    union {
158       struct {
159          enum brw_reg_type type:5;
160          enum brw_reg_file file:3;
161          unsigned negate:1;             /* source only */
162          unsigned abs:1;                /* source only */
163          unsigned address_mode:1;       /* relative addressing, hopefully! */
164          unsigned pad0:16;
165          unsigned subnr:5;              /* :1 in align16 */
166       };
167       uint32_t bits;
168    };
169 
170    /** Offset from the start of the virtual register in bytes. */
171    uint16_t offset;
172 
173    /** Register region horizontal stride of virtual registers */
174    uint8_t stride;
175 
176    union {
177       struct {
178          unsigned nr;
179          unsigned swizzle:8;      /* src only, align16 only */
180          unsigned writemask:4;    /* dest only, align16 only */
181          int  indirect_offset:10; /* relative addressing offset */
182          unsigned vstride:4;      /* source only */
183          unsigned width:3;        /* src only, align1 only */
184          unsigned hstride:2;      /* align1 only */
185 
186          /**
187           * Does this register represent a scalar value?
188           *
189           * Registers are allocated in SIMD8 parcels, but may be used to
190           * represent convergent (i.e., scalar) values. As a destination, it
191           * is written as SIMD8. As a source, it may be read as <8,8,1> in
192           * SIMD8 instructions or <0,1,0> on other execution sizes.
193           *
194           * If the value represents a vector (e.g., a convergent load_uniform
195           * of a vec4), it will be stored as multiple SIMD8 registers.
196           */
197          unsigned is_scalar:1;
198       };
199 
200       double df;
201       uint64_t u64;
202       int64_t d64;
203       float f;
204       int   d;
205       unsigned ud;
206    };
207 
208 #ifdef __cplusplus
209    /* TODO: Remove this constructor to make this type a POD.  Need
210     * to make sure that rest of compiler doesn't rely on type or
211     * stride of BAD_FILE registers.
212     */
brw_regbrw_reg213    brw_reg() {
214       memset((void*)this, 0, sizeof(*this));
215       this->type = BRW_TYPE_UD;
216       this->stride = 1;
217       this->file = BAD_FILE;
218    }
219 
220    bool equals(const brw_reg &r) const;
221    bool negative_equals(const brw_reg &r) const;
222    bool is_contiguous() const;
223 
224    bool is_zero() const;
225    bool is_one() const;
226    bool is_negative_one() const;
227    bool is_null() const;
228    bool is_accumulator() const;
229    bool is_address() const;
230 
231    unsigned address_slot(unsigned byte_offset) const;
232 
233    /**
234     * Return the size in bytes of a single logical component of the
235     * register assuming the given execution width.
236     */
237    unsigned component_size(unsigned width) const;
238 #endif /* __cplusplus */
239 } brw_reg;
240 
241 static inline unsigned
phys_file(const struct brw_reg reg)242 phys_file(const struct brw_reg reg)
243 {
244    switch (reg.file) {
245    case ARF:
246    case FIXED_GRF:
247    case IMM:
248       return reg.file;
249 
250    case ADDRESS:
251       return ARF;
252 
253    default:
254       unreachable("register type should have been lowered");
255    }
256 }
257 
258 static inline unsigned
phys_nr(const struct intel_device_info * devinfo,const struct brw_reg reg)259 phys_nr(const struct intel_device_info *devinfo, const struct brw_reg reg)
260 {
261    if (devinfo->ver >= 20) {
262       if (reg.file == FIXED_GRF)
263          return reg.nr / 2;
264       else if (reg.file == ADDRESS)
265          return BRW_ARF_ADDRESS;
266       else if (reg.file == ARF &&
267                reg.nr >= BRW_ARF_ACCUMULATOR &&
268                reg.nr < BRW_ARF_FLAG)
269          return BRW_ARF_ACCUMULATOR + (reg.nr - BRW_ARF_ACCUMULATOR) / 2;
270       else
271          return reg.nr;
272    } else if (reg.file == ADDRESS) {
273       return BRW_ARF_ADDRESS;
274    } else {
275       return reg.nr;
276    }
277 }
278 
279 static inline unsigned
phys_subnr(const struct intel_device_info * devinfo,const struct brw_reg reg)280 phys_subnr(const struct intel_device_info *devinfo, const struct brw_reg reg)
281 {
282    if (devinfo->ver >= 20) {
283       if (reg.file == FIXED_GRF ||
284           (reg.file == ARF &&
285            reg.nr >= BRW_ARF_ACCUMULATOR &&
286            reg.nr < BRW_ARF_FLAG))
287          return (reg.nr & 1) * REG_SIZE + reg.subnr;
288       else
289          return reg.subnr;
290    } else {
291       return reg.subnr;
292    }
293 }
294 
295 static inline bool
brw_regs_equal(const struct brw_reg * a,const struct brw_reg * b)296 brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b)
297 {
298    return a->bits   == b->bits &&
299           a->u64    == b->u64 &&
300           a->offset == b->offset &&
301           a->stride == b->stride;
302 }
303 
304 static inline bool
brw_regs_negative_equal(const struct brw_reg * a,const struct brw_reg * b)305 brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b)
306 {
307    if (a->file == IMM) {
308       if (a->bits != b->bits)
309          return false;
310 
311       switch ((enum brw_reg_type) a->type) {
312       case BRW_TYPE_UQ:
313       case BRW_TYPE_Q:
314          return a->d64 == -b->d64;
315       case BRW_TYPE_DF:
316          return a->df == -b->df;
317       case BRW_TYPE_UD:
318       case BRW_TYPE_D:
319          return a->d == -b->d;
320       case BRW_TYPE_F:
321          return a->f == -b->f;
322       case BRW_TYPE_VF:
323          /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation
324           * of -0).  There are occasions where 0 or -0 is used and the exact
325           * bit pattern is desired.  At the very least, changing this to allow
326           * 0 as a negation of 0 causes some fp64 tests to fail on IVB.
327           */
328          return a->ud == (b->ud ^ 0x80808080);
329       case BRW_TYPE_UW:
330       case BRW_TYPE_W:
331       case BRW_TYPE_UV:
332       case BRW_TYPE_V:
333       case BRW_TYPE_HF:
334          /* FINISHME: Implement support for these types once there is
335           * something in the compiler that can generate them.  Until then,
336           * they cannot be tested.
337           */
338          return false;
339       case BRW_TYPE_UB:
340       case BRW_TYPE_B:
341       default:
342          unreachable("not reached");
343       }
344    } else {
345       struct brw_reg tmp = *a;
346 
347       tmp.negate = !tmp.negate;
348 
349       return brw_regs_equal(&tmp, b);
350    }
351 }
352 
353 static inline enum brw_reg_type
get_exec_type(const enum brw_reg_type type)354 get_exec_type(const enum brw_reg_type type)
355 {
356    switch (type) {
357    case BRW_TYPE_B:
358    case BRW_TYPE_V:
359       return BRW_TYPE_W;
360    case BRW_TYPE_UB:
361    case BRW_TYPE_UV:
362       return BRW_TYPE_UW;
363    case BRW_TYPE_VF:
364       return BRW_TYPE_F;
365    default:
366       return type;
367    }
368 }
369 
370 /**
371  * Return an integer type of the requested size and signedness.
372  */
373 static inline enum brw_reg_type
brw_int_type(unsigned sz,bool is_signed)374 brw_int_type(unsigned sz, bool is_signed)
375 {
376    switch (sz) {
377    case 1:
378       return (is_signed ? BRW_TYPE_B : BRW_TYPE_UB);
379    case 2:
380       return (is_signed ? BRW_TYPE_W : BRW_TYPE_UW);
381    case 4:
382       return (is_signed ? BRW_TYPE_D : BRW_TYPE_UD);
383    case 8:
384       return (is_signed ? BRW_TYPE_Q : BRW_TYPE_UQ);
385    default:
386       unreachable("Not reached.");
387    }
388 }
389 
390 /**
391  * Construct a brw_reg.
392  * \param file      one of the BRW_x_REGISTER_FILE values
393  * \param nr        register number/index
394  * \param subnr     register sub number
395  * \param negate    register negate modifier
396  * \param abs       register abs modifier
397  * \param type      one of BRW_TYPE_x
398  * \param vstride   one of BRW_VERTICAL_STRIDE_x
399  * \param width     one of BRW_WIDTH_x
400  * \param hstride   one of BRW_HORIZONTAL_STRIDE_x
401  * \param swizzle   one of BRW_SWIZZLE_x
402  * \param writemask WRITEMASK_X/Y/Z/W bitfield
403  */
404 static inline struct brw_reg
brw_make_reg(enum brw_reg_file file,unsigned nr,unsigned subnr,unsigned negate,unsigned abs,enum brw_reg_type type,unsigned vstride,unsigned width,unsigned hstride,unsigned swizzle,unsigned writemask)405 brw_make_reg(enum brw_reg_file file,
406              unsigned nr,
407              unsigned subnr,
408              unsigned negate,
409              unsigned abs,
410              enum brw_reg_type type,
411              unsigned vstride,
412              unsigned width,
413              unsigned hstride,
414              unsigned swizzle,
415              unsigned writemask)
416 {
417    struct brw_reg reg;
418    if (file == FIXED_GRF)
419       assert(nr < XE2_MAX_GRF);
420    else if (file == ARF)
421       assert(nr <= BRW_ARF_TIMESTAMP);
422 
423    reg.type = type;
424    reg.file = file;
425    reg.negate = negate;
426    reg.abs = abs;
427    reg.address_mode = BRW_ADDRESS_DIRECT;
428    reg.pad0 = 0;
429    reg.subnr = subnr * brw_type_size_bytes(type);
430    reg.nr = nr;
431 
432    /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
433     * set swizzle and writemask to W, as the lower bits of subnr will
434     * be lost when converted to align16.  This is probably too much to
435     * keep track of as you'd want it adjusted by suboffset(), etc.
436     * Perhaps fix up when converting to align16?
437     */
438    reg.swizzle = swizzle;
439    reg.writemask = writemask;
440    reg.indirect_offset = 0;
441    reg.vstride = vstride;
442    reg.width = width;
443    reg.hstride = hstride;
444    reg.is_scalar = 0;
445 
446    reg.offset = 0;
447    reg.stride = 1;
448    if (file == IMM &&
449        type != BRW_TYPE_V &&
450        type != BRW_TYPE_UV &&
451        type != BRW_TYPE_VF) {
452       reg.stride = 0;
453    }
454 
455    return reg;
456 }
457 
458 /** Construct float[16] register */
459 static inline struct brw_reg
brw_vec16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)460 brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
461 {
462    return brw_make_reg(file,
463                   nr,
464                   subnr,
465                   0,
466                   0,
467                   BRW_TYPE_F,
468                   BRW_VERTICAL_STRIDE_16,
469                   BRW_WIDTH_16,
470                   BRW_HORIZONTAL_STRIDE_1,
471                   BRW_SWIZZLE_XYZW,
472                   WRITEMASK_XYZW);
473 }
474 
475 /** Construct float[8] register */
476 static inline struct brw_reg
brw_vec8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)477 brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
478 {
479    return brw_make_reg(file,
480                   nr,
481                   subnr,
482                   0,
483                   0,
484                   BRW_TYPE_F,
485                   BRW_VERTICAL_STRIDE_8,
486                   BRW_WIDTH_8,
487                   BRW_HORIZONTAL_STRIDE_1,
488                   BRW_SWIZZLE_XYZW,
489                   WRITEMASK_XYZW);
490 }
491 
492 /** Construct float[4] register */
493 static inline struct brw_reg
brw_vec4_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)494 brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
495 {
496    return brw_make_reg(file,
497                   nr,
498                   subnr,
499                   0,
500                   0,
501                   BRW_TYPE_F,
502                   BRW_VERTICAL_STRIDE_4,
503                   BRW_WIDTH_4,
504                   BRW_HORIZONTAL_STRIDE_1,
505                   BRW_SWIZZLE_XYZW,
506                   WRITEMASK_XYZW);
507 }
508 
509 /** Construct float[2] register */
510 static inline struct brw_reg
brw_vec2_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)511 brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
512 {
513    return brw_make_reg(file,
514                   nr,
515                   subnr,
516                   0,
517                   0,
518                   BRW_TYPE_F,
519                   BRW_VERTICAL_STRIDE_2,
520                   BRW_WIDTH_2,
521                   BRW_HORIZONTAL_STRIDE_1,
522                   BRW_SWIZZLE_XYXY,
523                   WRITEMASK_XY);
524 }
525 
526 /** Construct float[1] register */
527 static inline struct brw_reg
brw_vec1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)528 brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
529 {
530    return brw_make_reg(file,
531                   nr,
532                   subnr,
533                   0,
534                   0,
535                   BRW_TYPE_F,
536                   BRW_VERTICAL_STRIDE_0,
537                   BRW_WIDTH_1,
538                   BRW_HORIZONTAL_STRIDE_0,
539                   BRW_SWIZZLE_XXXX,
540                   WRITEMASK_X);
541 }
542 
543 static inline struct brw_reg
brw_vecn_reg(unsigned width,enum brw_reg_file file,unsigned nr,unsigned subnr)544 brw_vecn_reg(unsigned width, enum brw_reg_file file,
545              unsigned nr, unsigned subnr)
546 {
547    switch (width) {
548    case 1:
549       return brw_vec1_reg(file, nr, subnr);
550    case 2:
551       return brw_vec2_reg(file, nr, subnr);
552    case 4:
553       return brw_vec4_reg(file, nr, subnr);
554    case 8:
555       return brw_vec8_reg(file, nr, subnr);
556    case 16:
557       return brw_vec16_reg(file, nr, subnr);
558    default:
559       unreachable("Invalid register width");
560    }
561 }
562 
563 static inline struct brw_reg
retype(struct brw_reg reg,enum brw_reg_type type)564 retype(struct brw_reg reg, enum brw_reg_type type)
565 {
566    reg.type = type;
567    return reg;
568 }
569 
570 static inline struct brw_reg
firsthalf(struct brw_reg reg)571 firsthalf(struct brw_reg reg)
572 {
573    return reg;
574 }
575 
576 static inline struct brw_reg
sechalf(struct brw_reg reg)577 sechalf(struct brw_reg reg)
578 {
579    if (reg.vstride)
580       reg.nr++;
581    return reg;
582 }
583 
584 static inline struct brw_reg
offset(struct brw_reg reg,unsigned delta)585 offset(struct brw_reg reg, unsigned delta)
586 {
587    reg.nr += delta;
588    return reg;
589 }
590 
591 
592 static inline struct brw_reg
byte_offset(struct brw_reg reg,unsigned bytes)593 byte_offset(struct brw_reg reg, unsigned bytes)
594 {
595    switch (reg.file) {
596    case BAD_FILE:
597       break;
598    case VGRF:
599    case ATTR:
600    case UNIFORM:
601       reg.offset += bytes;
602       break;
603    case ADDRESS:
604    case ARF:
605    case FIXED_GRF: {
606       const unsigned suboffset = reg.subnr + bytes;
607       reg.nr += suboffset / REG_SIZE;
608       reg.subnr = suboffset % REG_SIZE;
609       break;
610    }
611    case IMM:
612    default:
613       assert(bytes == 0);
614    }
615    return reg;
616 }
617 
618 static inline struct brw_reg
suboffset(struct brw_reg reg,unsigned delta)619 suboffset(struct brw_reg reg, unsigned delta)
620 {
621    return byte_offset(reg, delta * brw_type_size_bytes(reg.type));
622 }
623 
624 /** Construct unsigned word[16] register */
625 static inline struct brw_reg
brw_uw16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)626 brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
627 {
628    return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_TYPE_UW), subnr);
629 }
630 
631 /** Construct unsigned word[8] register */
632 static inline struct brw_reg
brw_uw8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)633 brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
634 {
635    return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_TYPE_UW), subnr);
636 }
637 
638 /** Construct unsigned word[1] register */
639 static inline struct brw_reg
brw_uw1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)640 brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
641 {
642    return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_TYPE_UW), subnr);
643 }
644 
645 static inline struct brw_reg
brw_ud8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)646 brw_ud8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
647 {
648    return retype(brw_vec8_reg(file, nr, subnr), BRW_TYPE_UD);
649 }
650 
651 static inline struct brw_reg
brw_ud1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)652 brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
653 {
654    return retype(brw_vec1_reg(file, nr, subnr), BRW_TYPE_UD);
655 }
656 
657 static inline struct brw_reg
brw_imm_reg(enum brw_reg_type type)658 brw_imm_reg(enum brw_reg_type type)
659 {
660    return brw_make_reg(IMM,
661                   0,
662                   0,
663                   0,
664                   0,
665                   type,
666                   BRW_VERTICAL_STRIDE_0,
667                   BRW_WIDTH_1,
668                   BRW_HORIZONTAL_STRIDE_0,
669                   0,
670                   0);
671 }
672 
673 /** Construct float immediate register */
674 static inline struct brw_reg
brw_imm_df(double df)675 brw_imm_df(double df)
676 {
677    struct brw_reg imm = brw_imm_reg(BRW_TYPE_DF);
678    imm.df = df;
679    return imm;
680 }
681 
682 static inline struct brw_reg
brw_imm_u64(uint64_t u64)683 brw_imm_u64(uint64_t u64)
684 {
685    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
686    imm.u64 = u64;
687    return imm;
688 }
689 
690 static inline struct brw_reg
brw_imm_f(float f)691 brw_imm_f(float f)
692 {
693    struct brw_reg imm = brw_imm_reg(BRW_TYPE_F);
694    imm.f = f;
695    return imm;
696 }
697 
698 /** Construct int64_t immediate register */
699 static inline struct brw_reg
brw_imm_q(int64_t q)700 brw_imm_q(int64_t q)
701 {
702    struct brw_reg imm = brw_imm_reg(BRW_TYPE_Q);
703    imm.d64 = q;
704    return imm;
705 }
706 
707 /** Construct int64_t immediate register */
708 static inline struct brw_reg
brw_imm_uq(uint64_t uq)709 brw_imm_uq(uint64_t uq)
710 {
711    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
712    imm.u64 = uq;
713    return imm;
714 }
715 
716 /** Construct integer immediate register */
717 static inline struct brw_reg
brw_imm_d(int d)718 brw_imm_d(int d)
719 {
720    struct brw_reg imm = brw_imm_reg(BRW_TYPE_D);
721    imm.d = d;
722    return imm;
723 }
724 
725 /** Construct uint immediate register */
726 static inline struct brw_reg
brw_imm_ud(unsigned ud)727 brw_imm_ud(unsigned ud)
728 {
729    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UD);
730    imm.ud = ud;
731    return imm;
732 }
733 
734 /** Construct ushort immediate register */
735 static inline struct brw_reg
brw_imm_uw(uint16_t uw)736 brw_imm_uw(uint16_t uw)
737 {
738    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UW);
739    imm.ud = uw | ((uint32_t)uw << 16);
740    return imm;
741 }
742 
743 /** Construct short immediate register */
744 static inline struct brw_reg
brw_imm_w(int16_t w)745 brw_imm_w(int16_t w)
746 {
747    struct brw_reg imm = brw_imm_reg(BRW_TYPE_W);
748    imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16;
749    return imm;
750 }
751 
752 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
753  * numbers alias with _V and _VF below:
754  */
755 
756 /** Construct vector of eight signed half-byte values */
757 static inline struct brw_reg
brw_imm_v(unsigned v)758 brw_imm_v(unsigned v)
759 {
760    struct brw_reg imm = brw_imm_reg(BRW_TYPE_V);
761    imm.ud = v;
762    return imm;
763 }
764 
765 /** Construct vector of eight unsigned half-byte values */
766 static inline struct brw_reg
brw_imm_uv(unsigned uv)767 brw_imm_uv(unsigned uv)
768 {
769    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UV);
770    imm.ud = uv;
771    return imm;
772 }
773 
774 /** Construct vector of four 8-bit float values */
775 static inline struct brw_reg
brw_imm_vf(unsigned v)776 brw_imm_vf(unsigned v)
777 {
778    struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
779    imm.ud = v;
780    return imm;
781 }
782 
783 static inline struct brw_reg
brw_imm_vf4(unsigned v0,unsigned v1,unsigned v2,unsigned v3)784 brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
785 {
786    struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
787    imm.vstride = BRW_VERTICAL_STRIDE_0;
788    imm.width = BRW_WIDTH_4;
789    imm.hstride = BRW_HORIZONTAL_STRIDE_1;
790    imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
791    return imm;
792 }
793 
794 
795 static inline struct brw_reg
brw_address(struct brw_reg reg)796 brw_address(struct brw_reg reg)
797 {
798    return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
799 }
800 
801 /** Construct float[1] general-purpose register */
802 static inline struct brw_reg
brw_vec1_grf(unsigned nr,unsigned subnr)803 brw_vec1_grf(unsigned nr, unsigned subnr)
804 {
805    return brw_vec1_reg(FIXED_GRF, nr, subnr);
806 }
807 
808 static inline struct brw_reg
xe2_vec1_grf(unsigned nr,unsigned subnr)809 xe2_vec1_grf(unsigned nr, unsigned subnr)
810 {
811    return brw_vec1_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
812 }
813 
814 /** Construct float[2] general-purpose register */
815 static inline struct brw_reg
brw_vec2_grf(unsigned nr,unsigned subnr)816 brw_vec2_grf(unsigned nr, unsigned subnr)
817 {
818    return brw_vec2_reg(FIXED_GRF, nr, subnr);
819 }
820 
821 static inline struct brw_reg
xe2_vec2_grf(unsigned nr,unsigned subnr)822 xe2_vec2_grf(unsigned nr, unsigned subnr)
823 {
824    return brw_vec2_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
825 }
826 
827 /** Construct float[4] general-purpose register */
828 static inline struct brw_reg
brw_vec4_grf(unsigned nr,unsigned subnr)829 brw_vec4_grf(unsigned nr, unsigned subnr)
830 {
831    return brw_vec4_reg(FIXED_GRF, nr, subnr);
832 }
833 
834 static inline struct brw_reg
xe2_vec4_grf(unsigned nr,unsigned subnr)835 xe2_vec4_grf(unsigned nr, unsigned subnr)
836 {
837    return brw_vec4_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
838 }
839 
840 /** Construct float[8] general-purpose register */
841 static inline struct brw_reg
brw_vec8_grf(unsigned nr,unsigned subnr)842 brw_vec8_grf(unsigned nr, unsigned subnr)
843 {
844    return brw_vec8_reg(FIXED_GRF, nr, subnr);
845 }
846 
847 static inline struct brw_reg
xe2_vec8_grf(unsigned nr,unsigned subnr)848 xe2_vec8_grf(unsigned nr, unsigned subnr)
849 {
850    return brw_vec8_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
851 }
852 
853 /** Construct float[16] general-purpose register */
854 static inline struct brw_reg
brw_vec16_grf(unsigned nr,unsigned subnr)855 brw_vec16_grf(unsigned nr, unsigned subnr)
856 {
857    return brw_vec16_reg(FIXED_GRF, nr, subnr);
858 }
859 
860 static inline struct brw_reg
xe2_vec16_grf(unsigned nr,unsigned subnr)861 xe2_vec16_grf(unsigned nr, unsigned subnr)
862 {
863    return brw_vec16_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
864 }
865 
866 static inline struct brw_reg
brw_vecn_grf(unsigned width,unsigned nr,unsigned subnr)867 brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
868 {
869    return brw_vecn_reg(width, FIXED_GRF, nr, subnr);
870 }
871 
872 static inline struct brw_reg
xe2_vecn_grf(unsigned width,unsigned nr,unsigned subnr)873 xe2_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
874 {
875    return brw_vecn_reg(width, FIXED_GRF, nr + subnr / 8, subnr % 8);
876 }
877 
878 static inline struct brw_reg
brw_uw1_grf(unsigned nr,unsigned subnr)879 brw_uw1_grf(unsigned nr, unsigned subnr)
880 {
881    return brw_uw1_reg(FIXED_GRF, nr, subnr);
882 }
883 
884 static inline struct brw_reg
brw_uw8_grf(unsigned nr,unsigned subnr)885 brw_uw8_grf(unsigned nr, unsigned subnr)
886 {
887    return brw_uw8_reg(FIXED_GRF, nr, subnr);
888 }
889 
890 static inline struct brw_reg
brw_uw16_grf(unsigned nr,unsigned subnr)891 brw_uw16_grf(unsigned nr, unsigned subnr)
892 {
893    return brw_uw16_reg(FIXED_GRF, nr, subnr);
894 }
895 
896 static inline struct brw_reg
brw_ud8_grf(unsigned nr,unsigned subnr)897 brw_ud8_grf(unsigned nr, unsigned subnr)
898 {
899    return brw_ud8_reg(FIXED_GRF, nr, subnr);
900 }
901 
902 static inline struct brw_reg
brw_ud1_grf(unsigned nr,unsigned subnr)903 brw_ud1_grf(unsigned nr, unsigned subnr)
904 {
905    return brw_ud1_reg(FIXED_GRF, nr, subnr);
906 }
907 
908 
909 /** Construct null register (usually used for setting condition codes) */
910 static inline struct brw_reg
brw_null_reg(void)911 brw_null_reg(void)
912 {
913    return brw_vec8_reg(ARF, BRW_ARF_NULL, 0);
914 }
915 
916 static inline struct brw_reg
brw_null_vec(unsigned width)917 brw_null_vec(unsigned width)
918 {
919    return brw_vecn_reg(width, ARF, BRW_ARF_NULL, 0);
920 }
921 
922 static inline struct brw_reg
brw_address_reg(unsigned subnr)923 brw_address_reg(unsigned subnr)
924 {
925    return brw_uw1_reg(ADDRESS, 0, subnr);
926 }
927 
928 static inline struct brw_reg
brw_tdr_reg(void)929 brw_tdr_reg(void)
930 {
931    return brw_uw1_reg(ARF, BRW_ARF_TDR, 0);
932 }
933 
934 /* If/else instructions break in align16 mode if writemask & swizzle
935  * aren't xyzw.  This goes against the convention for other scalar
936  * regs:
937  */
938 static inline struct brw_reg
brw_ip_reg(void)939 brw_ip_reg(void)
940 {
941    return brw_make_reg(ARF,
942                   BRW_ARF_IP,
943                   0,
944                   0,
945                   0,
946                   BRW_TYPE_UD,
947                   BRW_VERTICAL_STRIDE_4, /* ? */
948                   BRW_WIDTH_1,
949                   BRW_HORIZONTAL_STRIDE_0,
950                   BRW_SWIZZLE_XYZW, /* NOTE! */
951                   WRITEMASK_XYZW); /* NOTE! */
952 }
953 
954 static inline struct brw_reg
brw_notification_reg(void)955 brw_notification_reg(void)
956 {
957    return brw_make_reg(ARF,
958                   BRW_ARF_NOTIFICATION_COUNT,
959                   0,
960                   0,
961                   0,
962                   BRW_TYPE_UD,
963                   BRW_VERTICAL_STRIDE_0,
964                   BRW_WIDTH_1,
965                   BRW_HORIZONTAL_STRIDE_0,
966                   BRW_SWIZZLE_XXXX,
967                   WRITEMASK_X);
968 }
969 
970 static inline struct brw_reg
brw_cr0_reg(unsigned subnr)971 brw_cr0_reg(unsigned subnr)
972 {
973    return brw_ud1_reg(ARF, BRW_ARF_CONTROL, subnr);
974 }
975 
976 static inline struct brw_reg
brw_sr0_reg(unsigned subnr)977 brw_sr0_reg(unsigned subnr)
978 {
979    return brw_ud1_reg(ARF, BRW_ARF_STATE, subnr);
980 }
981 
982 static inline struct brw_reg
brw_acc_reg(unsigned width)983 brw_acc_reg(unsigned width)
984 {
985    return brw_vecn_reg(width, ARF,
986                        BRW_ARF_ACCUMULATOR, 0);
987 }
988 
989 static inline struct brw_reg
brw_flag_reg(int reg,int subreg)990 brw_flag_reg(int reg, int subreg)
991 {
992    return brw_uw1_reg(ARF,
993                       BRW_ARF_FLAG + reg, subreg);
994 }
995 
996 static inline struct brw_reg
brw_flag_subreg(unsigned subreg)997 brw_flag_subreg(unsigned subreg)
998 {
999    return brw_uw1_reg(ARF,
1000                       BRW_ARF_FLAG + subreg / 2, subreg % 2);
1001 }
1002 
1003 /**
1004  * Return the mask register present in Gfx4-5, or the related register present
1005  * in Gfx7.5 and later hardware referred to as "channel enable" register in
1006  * the documentation.
1007  */
1008 static inline struct brw_reg
brw_mask_reg(unsigned subnr)1009 brw_mask_reg(unsigned subnr)
1010 {
1011    return brw_uw1_reg(ARF, BRW_ARF_MASK, subnr);
1012 }
1013 
1014 static inline struct brw_reg
brw_vmask_reg()1015 brw_vmask_reg()
1016 {
1017    return brw_sr0_reg(3);
1018 }
1019 
1020 static inline struct brw_reg
brw_dmask_reg()1021 brw_dmask_reg()
1022 {
1023    return brw_sr0_reg(2);
1024 }
1025 
1026 static inline struct brw_reg
brw_vgrf(unsigned nr,enum brw_reg_type type)1027 brw_vgrf(unsigned nr, enum brw_reg_type type)
1028 {
1029    struct brw_reg reg = {};
1030    reg.file = VGRF;
1031    reg.nr = nr;
1032    reg.type = type;
1033    reg.stride = 1;
1034    return reg;
1035 }
1036 
1037 static inline struct brw_reg
brw_attr_reg(unsigned nr,enum brw_reg_type type)1038 brw_attr_reg(unsigned nr, enum brw_reg_type type)
1039 {
1040    struct brw_reg reg = {};
1041    reg.file = ATTR;
1042    reg.nr = nr;
1043    reg.type = type;
1044    reg.stride = 1;
1045    return reg;
1046 }
1047 
1048 static inline struct brw_reg
brw_uniform_reg(unsigned nr,enum brw_reg_type type)1049 brw_uniform_reg(unsigned nr, enum brw_reg_type type)
1050 {
1051    struct brw_reg reg = {};
1052    reg.file = UNIFORM;
1053    reg.nr = nr;
1054    reg.type = type;
1055    reg.stride = 0;
1056    return reg;
1057 }
1058 
1059 /* This is almost always called with a numeric constant argument, so
1060  * make things easy to evaluate at compile time:
1061  */
cvt(unsigned val)1062 static inline unsigned cvt(unsigned val)
1063 {
1064    switch (val) {
1065    case 0: return 0;
1066    case 1: return 1;
1067    case 2: return 2;
1068    case 4: return 3;
1069    case 8: return 4;
1070    case 16: return 5;
1071    case 32: return 6;
1072    }
1073    return 0;
1074 }
1075 
1076 static inline struct brw_reg
stride(struct brw_reg reg,unsigned vstride,unsigned width,unsigned hstride)1077 stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
1078 {
1079    reg.vstride = cvt(vstride);
1080    reg.width = cvt(width) - 1;
1081    reg.hstride = cvt(hstride);
1082    return reg;
1083 }
1084 
1085 /**
1086  * Multiply the vertical and horizontal stride of a register by the given
1087  * factor \a s.
1088  */
1089 static inline struct brw_reg
spread(struct brw_reg reg,unsigned s)1090 spread(struct brw_reg reg, unsigned s)
1091 {
1092    if (s) {
1093       assert(util_is_power_of_two_nonzero(s));
1094 
1095       if (reg.hstride)
1096          reg.hstride += cvt(s) - 1;
1097 
1098       if (reg.vstride)
1099          reg.vstride += cvt(s) - 1;
1100 
1101       return reg;
1102    } else {
1103       return stride(reg, 0, 1, 0);
1104    }
1105 }
1106 
1107 /**
1108  * Reinterpret each channel of register \p reg as a vector of values of the
1109  * given smaller type and take the i-th subcomponent from each.
1110  */
1111 static inline struct brw_reg
subscript(struct brw_reg reg,enum brw_reg_type type,unsigned i)1112 subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i)
1113 {
1114    assert((i + 1) * brw_type_size_bytes(type) <= brw_type_size_bytes(reg.type));
1115 
1116    if (reg.file == ARF || reg.file == FIXED_GRF) {
1117       /* The stride is encoded inconsistently for fixed GRF and ARF registers
1118        * as the log2 of the actual vertical and horizontal strides.
1119        */
1120       const int delta = util_logbase2(brw_type_size_bytes(reg.type)) -
1121                         util_logbase2(brw_type_size_bytes(type));
1122       reg.hstride += (reg.hstride ? delta : 0);
1123       reg.vstride += (reg.vstride ? delta : 0);
1124 
1125    } else if (reg.file == IMM) {
1126       unsigned bit_size = brw_type_size_bits(type);
1127       reg.u64 >>= i * bit_size;
1128       reg.u64 &= BITFIELD64_MASK(bit_size);
1129       if (bit_size <= 16)
1130          reg.u64 |= reg.u64 << 16;
1131       return retype(reg, type);
1132    } else {
1133       reg.stride *= brw_type_size_bytes(reg.type) / brw_type_size_bytes(type);
1134    }
1135 
1136    return byte_offset(retype(reg, type), i * brw_type_size_bytes(type));
1137 }
1138 
1139 static inline struct brw_reg
vec16(struct brw_reg reg)1140 vec16(struct brw_reg reg)
1141 {
1142    return stride(reg, 16,16,1);
1143 }
1144 
1145 static inline struct brw_reg
vec8(struct brw_reg reg)1146 vec8(struct brw_reg reg)
1147 {
1148    return stride(reg, 8,8,1);
1149 }
1150 
1151 static inline struct brw_reg
vec4(struct brw_reg reg)1152 vec4(struct brw_reg reg)
1153 {
1154    return stride(reg, 4,4,1);
1155 }
1156 
1157 static inline struct brw_reg
vec2(struct brw_reg reg)1158 vec2(struct brw_reg reg)
1159 {
1160    return stride(reg, 2,2,1);
1161 }
1162 
1163 static inline struct brw_reg
vec1(struct brw_reg reg)1164 vec1(struct brw_reg reg)
1165 {
1166    return stride(reg, 0,1,0);
1167 }
1168 
1169 
1170 static inline struct brw_reg
get_element(struct brw_reg reg,unsigned elt)1171 get_element(struct brw_reg reg, unsigned elt)
1172 {
1173    return vec1(suboffset(reg, elt));
1174 }
1175 
1176 static inline struct brw_reg
get_element_ud(struct brw_reg reg,unsigned elt)1177 get_element_ud(struct brw_reg reg, unsigned elt)
1178 {
1179    return vec1(suboffset(retype(reg, BRW_TYPE_UD), elt));
1180 }
1181 
1182 static inline struct brw_reg
get_element_d(struct brw_reg reg,unsigned elt)1183 get_element_d(struct brw_reg reg, unsigned elt)
1184 {
1185    return vec1(suboffset(retype(reg, BRW_TYPE_D), elt));
1186 }
1187 
1188 static inline struct brw_reg
brw_swizzle(struct brw_reg reg,unsigned swz)1189 brw_swizzle(struct brw_reg reg, unsigned swz)
1190 {
1191    if (reg.file == IMM)
1192       reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz);
1193    else
1194       reg.swizzle = brw_compose_swizzle(swz, reg.swizzle);
1195 
1196    return reg;
1197 }
1198 
1199 static inline struct brw_reg
brw_writemask(struct brw_reg reg,unsigned mask)1200 brw_writemask(struct brw_reg reg, unsigned mask)
1201 {
1202    assert(reg.file != IMM);
1203    reg.writemask &= mask;
1204    return reg;
1205 }
1206 
1207 static inline struct brw_reg
brw_set_writemask(struct brw_reg reg,unsigned mask)1208 brw_set_writemask(struct brw_reg reg, unsigned mask)
1209 {
1210    assert(reg.file != IMM);
1211    reg.writemask = mask;
1212    return reg;
1213 }
1214 
1215 static inline unsigned
brw_writemask_for_size(unsigned n)1216 brw_writemask_for_size(unsigned n)
1217 {
1218    return (1 << n) - 1;
1219 }
1220 
1221 static inline unsigned
brw_writemask_for_component_packing(unsigned n,unsigned first_component)1222 brw_writemask_for_component_packing(unsigned n, unsigned first_component)
1223 {
1224    assert(first_component + n <= 4);
1225    return (((1 << n) - 1) << first_component);
1226 }
1227 
1228 static inline struct brw_reg
negate(struct brw_reg reg)1229 negate(struct brw_reg reg)
1230 {
1231    reg.negate ^= 1;
1232    return reg;
1233 }
1234 
1235 static inline struct brw_reg
brw_abs(struct brw_reg reg)1236 brw_abs(struct brw_reg reg)
1237 {
1238    reg.abs = 1;
1239    reg.negate = 0;
1240    return reg;
1241 }
1242 
1243 /************************************************************************/
1244 
1245 static inline struct brw_reg
brw_vec1_indirect(unsigned subnr,int offset)1246 brw_vec1_indirect(unsigned subnr, int offset)
1247 {
1248    struct brw_reg reg =  brw_vec1_grf(0, 0);
1249    reg.subnr = subnr;
1250    reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1251    reg.indirect_offset = offset;
1252    return reg;
1253 }
1254 
1255 static inline struct brw_reg
brw_VxH_indirect(unsigned subnr,int offset)1256 brw_VxH_indirect(unsigned subnr, int offset)
1257 {
1258    struct brw_reg reg = brw_vec1_grf(0, 0);
1259    reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
1260    reg.subnr = subnr;
1261    reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1262    reg.indirect_offset = offset;
1263    return reg;
1264 }
1265 
1266 static inline bool
region_matches(struct brw_reg reg,enum brw_vertical_stride v,enum brw_width w,enum brw_horizontal_stride h)1267 region_matches(struct brw_reg reg, enum brw_vertical_stride v,
1268                enum brw_width w, enum brw_horizontal_stride h)
1269 {
1270    return reg.vstride == v &&
1271           reg.width == w &&
1272           reg.hstride == h;
1273 }
1274 
1275 #define has_scalar_region(reg) \
1276    region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
1277                   BRW_HORIZONTAL_STRIDE_0)
1278 
1279 /**
1280  * Return the size in bytes per data element of register \p reg on the
1281  * corresponding register file.
1282  */
1283 static inline unsigned
element_sz(struct brw_reg reg)1284 element_sz(struct brw_reg reg)
1285 {
1286    if (reg.file == IMM || has_scalar_region(reg)) {
1287       return brw_type_size_bytes(reg.type);
1288 
1289    } else if (reg.width == BRW_WIDTH_1 &&
1290               reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
1291       assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
1292       return brw_type_size_bytes(reg.type) << (reg.vstride - 1);
1293 
1294    } else {
1295       assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
1296       assert(reg.vstride == reg.hstride + reg.width);
1297       return brw_type_size_bytes(reg.type) << (reg.hstride - 1);
1298    }
1299 }
1300 
1301 /* brw_packed_float.c */
1302 int brw_float_to_vf(float f);
1303 float brw_vf_to_float(unsigned char vf);
1304 
1305 bool brw_reg_saturate_immediate(brw_reg *reg);
1306 bool brw_reg_negate_immediate(brw_reg *reg);
1307 bool brw_reg_abs_immediate(brw_reg *reg);
1308 
1309 #ifdef __cplusplus
1310 }
1311 #endif
1312 
1313 #ifdef __cplusplus
1314 
1315 static inline brw_reg
horiz_offset(const brw_reg & reg,unsigned delta)1316 horiz_offset(const brw_reg &reg, unsigned delta)
1317 {
1318    switch (reg.file) {
1319    case BAD_FILE:
1320    case UNIFORM:
1321    case IMM:
1322       /* These only have a single component that is implicitly splatted.  A
1323        * horizontal offset should be a harmless no-op.
1324        * XXX - Handle vector immediates correctly.
1325        */
1326       return reg;
1327    case VGRF:
1328    case ATTR:
1329       return byte_offset(reg, delta * reg.stride * brw_type_size_bytes(reg.type));
1330    case ADDRESS:
1331    case ARF:
1332    case FIXED_GRF:
1333       if (reg.is_null()) {
1334          return reg;
1335       } else {
1336          const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1337          const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1338          const unsigned width = 1 << reg.width;
1339 
1340          if (delta % width == 0) {
1341             return byte_offset(reg, delta / width * vstride * brw_type_size_bytes(reg.type));
1342          } else {
1343             assert(vstride == hstride * width);
1344             return byte_offset(reg, delta * hstride * brw_type_size_bytes(reg.type));
1345          }
1346       }
1347    }
1348    unreachable("Invalid register file");
1349 }
1350 
1351 static inline brw_reg
offset(brw_reg reg,unsigned width,unsigned delta)1352 offset(brw_reg reg, unsigned width, unsigned delta)
1353 {
1354    switch (reg.file) {
1355    case BAD_FILE:
1356       break;
1357    case ADDRESS:
1358    case ARF:
1359    case FIXED_GRF:
1360    case VGRF:
1361    case ATTR:
1362    case UNIFORM:
1363       return byte_offset(reg, delta * reg.component_size(width));
1364    case IMM:
1365       assert(delta == 0);
1366    }
1367    return reg;
1368 }
1369 
1370 /**
1371  * Get the scalar channel of \p reg given by \p idx and replicate it to all
1372  * channels of the result.
1373  */
1374 static inline brw_reg
component(brw_reg reg,unsigned idx)1375 component(brw_reg reg, unsigned idx)
1376 {
1377    reg = horiz_offset(reg, idx);
1378    reg.stride = 0;
1379    if (reg.file == ARF || reg.file == FIXED_GRF) {
1380       reg.vstride = BRW_VERTICAL_STRIDE_0;
1381       reg.width = BRW_WIDTH_1;
1382       reg.hstride = BRW_HORIZONTAL_STRIDE_0;
1383    }
1384    return reg;
1385 }
1386 
1387 /**
1388  * Return an integer identifying the discrete address space a register is
1389  * contained in.  A register is by definition fully contained in the single
1390  * reg_space it belongs to, so two registers with different reg_space ids are
1391  * guaranteed not to overlap.  Most register files are a single reg_space of
1392  * its own, only the VGRF and ATTR files are composed of multiple discrete
1393  * address spaces, one for each allocation and input attribute respectively.
1394  */
1395 static inline uint32_t
reg_space(const brw_reg & r)1396 reg_space(const brw_reg &r)
1397 {
1398    return r.file << 16 | (r.file == VGRF || r.file == ATTR ? r.nr : 0);
1399 }
1400 
1401 /**
1402  * Return the base offset in bytes of a register relative to the start of its
1403  * reg_space().
1404  */
1405 static inline unsigned
reg_offset(const brw_reg & r)1406 reg_offset(const brw_reg &r)
1407 {
1408    return (r.file == ADDRESS || r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) *
1409           (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
1410           (r.file == ADDRESS || r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
1411 }
1412 
1413 /**
1414  * Return the amount of padding in bytes left unused between individual
1415  * components of register \p r due to a (horizontal) stride value greater than
1416  * one, or zero if components are tightly packed in the register file.
1417  */
1418 static inline unsigned
reg_padding(const brw_reg & r)1419 reg_padding(const brw_reg &r)
1420 {
1421    const unsigned stride = ((r.file != ADDRESS &&
1422                              r.file != ARF &&
1423                              r.file != FIXED_GRF) ? r.stride :
1424                             r.hstride == 0 ? 0 :
1425                             1 << (r.hstride - 1));
1426    return (MAX2(1, stride) - 1) * brw_type_size_bytes(r.type);
1427 }
1428 
1429 /**
1430  * Return whether the register region starting at \p r and spanning \p dr
1431  * bytes could potentially overlap the register region starting at \p s and
1432  * spanning \p ds bytes.
1433  */
1434 static inline bool
regions_overlap(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1435 regions_overlap(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1436 {
1437    if (r.file != s.file)
1438       return false;
1439 
1440    if (r.file == VGRF) {
1441       return r.nr == s.nr &&
1442              !(r.offset + dr <= s.offset || s.offset + ds <= r.offset);
1443    } else {
1444       return !(reg_offset(r) + dr <= reg_offset(s) ||
1445                reg_offset(s) + ds <= reg_offset(r));
1446    }
1447 }
1448 
1449 /**
1450  * Check that the register region given by r [r.offset, r.offset + dr[
1451  * is fully contained inside the register region given by s
1452  * [s.offset, s.offset + ds[.
1453  */
1454 static inline bool
region_contained_in(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1455 region_contained_in(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1456 {
1457    return reg_space(r) == reg_space(s) &&
1458           reg_offset(r) >= reg_offset(s) &&
1459           reg_offset(r) + dr <= reg_offset(s) + ds;
1460 }
1461 
1462 /**
1463  * Return whether the given register region is n-periodic, i.e. whether the
1464  * original region remains invariant after shifting it by \p n scalar
1465  * channels.
1466  */
1467 static inline bool
is_periodic(const brw_reg & reg,unsigned n)1468 is_periodic(const brw_reg &reg, unsigned n)
1469 {
1470    if (reg.file == BAD_FILE || reg.is_null()) {
1471       return true;
1472 
1473    } else if (reg.file == IMM) {
1474       const unsigned period = (reg.type == BRW_TYPE_UV ||
1475                                reg.type == BRW_TYPE_V ? 8 :
1476                                reg.type == BRW_TYPE_VF ? 4 :
1477                                1);
1478       return n % period == 0;
1479 
1480    } else if (reg.file == ADDRESS || reg.file == ARF || reg.file == FIXED_GRF) {
1481       const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
1482                                reg.vstride == 0 ? 1 << reg.width :
1483                                ~0);
1484       return n % period == 0;
1485 
1486    } else {
1487       return reg.stride == 0;
1488    }
1489 }
1490 
1491 static inline bool
is_uniform(const brw_reg & reg)1492 is_uniform(const brw_reg &reg)
1493 {
1494    return is_periodic(reg, 1);
1495 }
1496 
1497 /**
1498  * Get the specified 8-component quarter of a register.
1499  */
1500 static inline brw_reg
quarter(const brw_reg & reg,unsigned idx)1501 quarter(const brw_reg &reg, unsigned idx)
1502 {
1503    assert(idx < 4);
1504    return horiz_offset(reg, 8 * idx);
1505 }
1506 
1507 static inline brw_reg
horiz_stride(brw_reg reg,unsigned s)1508 horiz_stride(brw_reg reg, unsigned s)
1509 {
1510    reg.stride *= s;
1511    return reg;
1512 }
1513 
1514 static const brw_reg reg_undef;
1515 
1516 /*
1517  * Return the stride between channels of the specified register in
1518  * byte units, or ~0u if the region cannot be represented with a
1519  * single one-dimensional stride.
1520  */
1521 static inline unsigned
byte_stride(const brw_reg & reg)1522 byte_stride(const brw_reg &reg)
1523 {
1524    switch (reg.file) {
1525    case BAD_FILE:
1526    case UNIFORM:
1527    case IMM:
1528    case VGRF:
1529    case ATTR:
1530       return reg.stride * brw_type_size_bytes(reg.type);
1531    case ADDRESS:
1532    case ARF:
1533    case FIXED_GRF:
1534       if (reg.is_null()) {
1535          return 0;
1536       } else {
1537          const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1538          const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1539          const unsigned width = 1 << reg.width;
1540 
1541          if (width == 1) {
1542             return vstride * brw_type_size_bytes(reg.type);
1543          } else if (hstride * width == vstride) {
1544             return hstride * brw_type_size_bytes(reg.type);
1545          } else {
1546             return ~0u;
1547          }
1548       }
1549    default:
1550       unreachable("Invalid register file");
1551    }
1552 }
1553 
1554 #endif /* __cplusplus */
1555