• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 /** @file
33  *
34  * This file defines struct brw_reg, which is our representation for EU
35  * registers.  They're not a hardware specific format, just an abstraction
36  * that intends to capture the full flexibility of the hardware registers.
37  *
38  * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
39  * the abstract brw_reg type into the actual hardware instruction encoding.
40  */
41 
42 #pragma once
43 
44 #include <stdbool.h>
45 #include "util/compiler.h"
46 #include "util/glheader.h"
47 #include "util/macros.h"
48 #include "util/rounding.h"
49 #include "util/u_math.h"
50 #include "brw_eu_defines.h"
51 #include "brw_reg_type.h"
52 
53 #ifdef __cplusplus
54 extern "C" {
55 #endif
56 
57 struct intel_device_info;
58 
59 /** Size of general purpose register space in REG_SIZE units */
60 #define BRW_MAX_GRF 128
61 #define XE2_MAX_GRF 256
62 #define XE3_MAX_GRF 512
63 
64 /**
65  * BRW hardware swizzles.
66  * Only defines XYZW to ensure it can be contained in 2 bits
67  */
68 #define BRW_SWIZZLE_X 0
69 #define BRW_SWIZZLE_Y 1
70 #define BRW_SWIZZLE_Z 2
71 #define BRW_SWIZZLE_W 3
72 
73 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
74 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
75 
76 #define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
77 #define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
78 #define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
79 #define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
80 #define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
81 #define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
82 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
83 #define BRW_SWIZZLE_YXYX      BRW_SWIZZLE4(1,0,1,0)
84 #define BRW_SWIZZLE_XZXZ      BRW_SWIZZLE4(0,2,0,2)
85 #define BRW_SWIZZLE_YZXW      BRW_SWIZZLE4(1,2,0,3)
86 #define BRW_SWIZZLE_YWYW      BRW_SWIZZLE4(1,3,1,3)
87 #define BRW_SWIZZLE_ZXYW      BRW_SWIZZLE4(2,0,1,3)
88 #define BRW_SWIZZLE_ZWZW      BRW_SWIZZLE4(2,3,2,3)
89 #define BRW_SWIZZLE_WZWZ      BRW_SWIZZLE4(3,2,3,2)
90 #define BRW_SWIZZLE_WZYX      BRW_SWIZZLE4(3,2,1,0)
91 #define BRW_SWIZZLE_XXZZ      BRW_SWIZZLE4(0,0,2,2)
92 #define BRW_SWIZZLE_YYWW      BRW_SWIZZLE4(1,1,3,3)
93 #define BRW_SWIZZLE_YXWZ      BRW_SWIZZLE4(1,0,3,2)
94 
95 #define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2))
96 #define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2))
97 
98 static inline bool
brw_is_single_value_swizzle(unsigned swiz)99 brw_is_single_value_swizzle(unsigned swiz)
100 {
101    return (swiz == BRW_SWIZZLE_XXXX ||
102            swiz == BRW_SWIZZLE_YYYY ||
103            swiz == BRW_SWIZZLE_ZZZZ ||
104            swiz == BRW_SWIZZLE_WWWW);
105 }
106 
107 /**
108  * Compute the swizzle obtained from the application of \p swz0 on the result
109  * of \p swz1.  The argument ordering is expected to match function
110  * composition.
111  */
112 static inline unsigned
brw_compose_swizzle(unsigned swz0,unsigned swz1)113 brw_compose_swizzle(unsigned swz0, unsigned swz1)
114 {
115    return BRW_SWIZZLE4(
116       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
117       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
118       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
119       BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
120 }
121 
122 /**
123  * Construct an identity swizzle for the set of enabled channels given by \p
124  * mask.  The result will only reference channels enabled in the provided \p
125  * mask, assuming that \p mask is non-zero.  The constructed swizzle will
126  * satisfy the property that for any instruction OP and any mask:
127  *
128  *    brw_OP(p, brw_writemask(dst, mask),
129  *           brw_swizzle(src, brw_swizzle_for_mask(mask)));
130  *
131  * will be equivalent to the same instruction without swizzle:
132  *
133  *    brw_OP(p, brw_writemask(dst, mask), src);
134  */
135 static inline unsigned
brw_swizzle_for_mask(unsigned mask)136 brw_swizzle_for_mask(unsigned mask)
137 {
138    unsigned last = (mask ? ffs(mask) - 1 : 0);
139    unsigned swz[4];
140 
141    for (unsigned i = 0; i < 4; i++)
142       last = swz[i] = (mask & (1 << i) ? i : last);
143 
144    return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
145 }
146 
147 uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz);
148 
149 #define REG_SIZE (8*4)
150 
151 /* These aren't hardware structs, just something useful for us to pass around:
152  *
153  * Align1 operation has a lot of control over input ranges.  Used in
154  * WM programs to implement shaders decomposed into "channel serial"
155  * or "structure of array" form:
156  */
157 typedef struct brw_reg {
158    union {
159       struct {
160          enum brw_reg_type type:5;
161          enum brw_reg_file file:3;
162          unsigned negate:1;             /* source only */
163          unsigned abs:1;                /* source only */
164          unsigned address_mode:1;       /* relative addressing, hopefully! */
165          unsigned pad0:15;
166          unsigned subnr:6;              /* :1 in align16 */
167       };
168       uint32_t bits;
169    };
170 
171    /** Offset from the start of the virtual register in bytes. */
172    uint16_t offset;
173 
174    /** Register region horizontal stride of virtual registers */
175    uint8_t stride;
176 
177    union {
178       struct {
179          unsigned nr;
180          unsigned swizzle:8;      /* src only, align16 only */
181          unsigned writemask:4;    /* dest only, align16 only */
182          int  indirect_offset:10; /* relative addressing offset */
183          unsigned vstride:4;      /* source only */
184          unsigned width:3;        /* src only, align1 only */
185          unsigned hstride:2;      /* align1 only */
186 
187          /**
188           * Does this register represent a scalar value?
189           *
190           * Registers are allocated in SIMD8 parcels, but may be used to
191           * represent convergent (i.e., scalar) values. As a destination, it
192           * is written as SIMD8. As a source, it may be read as <8,8,1> in
193           * SIMD8 instructions or <0,1,0> on other execution sizes.
194           *
195           * If the value represents a vector (e.g., a convergent load_uniform
196           * of a vec4), it will be stored as multiple SIMD8 registers.
197           */
198          unsigned is_scalar:1;
199       };
200 
201       double df;
202       uint64_t u64;
203       int64_t d64;
204       float f;
205       int   d;
206       unsigned ud;
207    };
208 
209 #ifdef __cplusplus
210    /* TODO: Remove this constructor to make this type a POD.  Need
211     * to make sure that rest of compiler doesn't rely on type or
212     * stride of BAD_FILE registers.
213     */
brw_regbrw_reg214    brw_reg() {
215       memset((void*)this, 0, sizeof(*this));
216       this->type = BRW_TYPE_UD;
217       this->stride = 1;
218       this->file = BAD_FILE;
219    }
220 
221    bool equals(const brw_reg &r) const;
222    bool negative_equals(const brw_reg &r) const;
223    bool is_contiguous() const;
224 
225    bool is_zero() const;
226    bool is_one() const;
227    bool is_negative_one() const;
228    bool is_null() const;
229    bool is_accumulator() const;
230    bool is_address() const;
231 
232    unsigned address_slot(unsigned byte_offset) const;
233 
234    /**
235     * Return the size in bytes of a single logical component of the
236     * register assuming the given execution width.
237     */
238    unsigned component_size(unsigned width) const;
239 #endif /* __cplusplus */
240 } brw_reg;
241 
242 static inline unsigned
phys_file(const struct brw_reg reg)243 phys_file(const struct brw_reg reg)
244 {
245    switch (reg.file) {
246    case ARF:
247    case FIXED_GRF:
248    case IMM:
249       return reg.file;
250 
251    case ADDRESS:
252       return ARF;
253 
254    default:
255       unreachable("register type should have been lowered");
256    }
257 }
258 
259 static inline unsigned
phys_nr(const struct intel_device_info * devinfo,const struct brw_reg reg)260 phys_nr(const struct intel_device_info *devinfo, const struct brw_reg reg)
261 {
262    if (devinfo->ver >= 20) {
263       if (reg.file == FIXED_GRF)
264          return reg.nr / 2;
265       else if (reg.file == ADDRESS)
266          return BRW_ARF_ADDRESS;
267       else if (reg.file == ARF &&
268                reg.nr >= BRW_ARF_ACCUMULATOR &&
269                reg.nr < BRW_ARF_FLAG)
270          return BRW_ARF_ACCUMULATOR + (reg.nr - BRW_ARF_ACCUMULATOR) / 2;
271       else
272          return reg.nr;
273    } else if (reg.file == ADDRESS) {
274       return BRW_ARF_ADDRESS;
275    } else {
276       return reg.nr;
277    }
278 }
279 
280 static inline unsigned
phys_subnr(const struct intel_device_info * devinfo,const struct brw_reg reg)281 phys_subnr(const struct intel_device_info *devinfo, const struct brw_reg reg)
282 {
283    if (devinfo->ver >= 20) {
284       if (reg.file == FIXED_GRF ||
285           (reg.file == ARF &&
286            reg.nr >= BRW_ARF_ACCUMULATOR &&
287            reg.nr < BRW_ARF_FLAG))
288          return (reg.nr & 1) * REG_SIZE + reg.subnr;
289       else
290          return reg.subnr;
291    } else {
292       return reg.subnr;
293    }
294 }
295 
296 static inline bool
brw_regs_equal(const struct brw_reg * a,const struct brw_reg * b)297 brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b)
298 {
299    return a->bits   == b->bits &&
300           a->u64    == b->u64 &&
301           a->offset == b->offset &&
302           a->stride == b->stride;
303 }
304 
305 static inline bool
brw_regs_negative_equal(const struct brw_reg * a,const struct brw_reg * b)306 brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b)
307 {
308    if (a->file == IMM) {
309       if (a->bits != b->bits)
310          return false;
311 
312       switch ((enum brw_reg_type) a->type) {
313       case BRW_TYPE_UQ:
314       case BRW_TYPE_Q:
315          return a->d64 == -b->d64;
316       case BRW_TYPE_DF:
317          return a->df == -b->df;
318       case BRW_TYPE_UD:
319       case BRW_TYPE_D:
320          return a->d == -b->d;
321       case BRW_TYPE_F:
322          return a->f == -b->f;
323       case BRW_TYPE_VF:
324          /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation
325           * of -0).  There are occasions where 0 or -0 is used and the exact
326           * bit pattern is desired.  At the very least, changing this to allow
327           * 0 as a negation of 0 causes some fp64 tests to fail on IVB.
328           */
329          return a->ud == (b->ud ^ 0x80808080);
330       case BRW_TYPE_UW:
331       case BRW_TYPE_W:
332       case BRW_TYPE_UV:
333       case BRW_TYPE_V:
334       case BRW_TYPE_HF:
335          /* FINISHME: Implement support for these types once there is
336           * something in the compiler that can generate them.  Until then,
337           * they cannot be tested.
338           */
339          return false;
340       case BRW_TYPE_UB:
341       case BRW_TYPE_B:
342       default:
343          unreachable("not reached");
344       }
345    } else {
346       struct brw_reg tmp = *a;
347 
348       tmp.negate = !tmp.negate;
349 
350       return brw_regs_equal(&tmp, b);
351    }
352 }
353 
354 static inline enum brw_reg_type
get_exec_type(const enum brw_reg_type type)355 get_exec_type(const enum brw_reg_type type)
356 {
357    switch (type) {
358    case BRW_TYPE_B:
359    case BRW_TYPE_V:
360       return BRW_TYPE_W;
361    case BRW_TYPE_UB:
362    case BRW_TYPE_UV:
363       return BRW_TYPE_UW;
364    case BRW_TYPE_VF:
365       return BRW_TYPE_F;
366    default:
367       return type;
368    }
369 }
370 
371 /**
372  * Return an integer type of the requested size and signedness.
373  */
374 static inline enum brw_reg_type
brw_int_type(unsigned sz,bool is_signed)375 brw_int_type(unsigned sz, bool is_signed)
376 {
377    switch (sz) {
378    case 1:
379       return (is_signed ? BRW_TYPE_B : BRW_TYPE_UB);
380    case 2:
381       return (is_signed ? BRW_TYPE_W : BRW_TYPE_UW);
382    case 4:
383       return (is_signed ? BRW_TYPE_D : BRW_TYPE_UD);
384    case 8:
385       return (is_signed ? BRW_TYPE_Q : BRW_TYPE_UQ);
386    default:
387       unreachable("Not reached.");
388    }
389 }
390 
391 /**
392  * Construct a brw_reg.
393  * \param file      one of the BRW_x_REGISTER_FILE values
394  * \param nr        register number/index
395  * \param subnr     register sub number
396  * \param negate    register negate modifier
397  * \param abs       register abs modifier
398  * \param type      one of BRW_TYPE_x
399  * \param vstride   one of BRW_VERTICAL_STRIDE_x
400  * \param width     one of BRW_WIDTH_x
401  * \param hstride   one of BRW_HORIZONTAL_STRIDE_x
402  * \param swizzle   one of BRW_SWIZZLE_x
403  * \param writemask WRITEMASK_X/Y/Z/W bitfield
404  */
405 static inline struct brw_reg
brw_make_reg(enum brw_reg_file file,unsigned nr,unsigned subnr,unsigned negate,unsigned abs,enum brw_reg_type type,unsigned vstride,unsigned width,unsigned hstride,unsigned swizzle,unsigned writemask)406 brw_make_reg(enum brw_reg_file file,
407              unsigned nr,
408              unsigned subnr,
409              unsigned negate,
410              unsigned abs,
411              enum brw_reg_type type,
412              unsigned vstride,
413              unsigned width,
414              unsigned hstride,
415              unsigned swizzle,
416              unsigned writemask)
417 {
418    struct brw_reg reg;
419    if (file == FIXED_GRF)
420       assert(nr < XE3_MAX_GRF);
421    else if (file == ARF)
422       assert(nr <= BRW_ARF_TIMESTAMP);
423 
424    reg.type = type;
425    reg.file = file;
426    reg.negate = negate;
427    reg.abs = abs;
428    reg.address_mode = BRW_ADDRESS_DIRECT;
429    reg.pad0 = 0;
430    reg.subnr = subnr * brw_type_size_bytes(type);
431    reg.nr = nr;
432 
433    /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
434     * set swizzle and writemask to W, as the lower bits of subnr will
435     * be lost when converted to align16.  This is probably too much to
436     * keep track of as you'd want it adjusted by suboffset(), etc.
437     * Perhaps fix up when converting to align16?
438     */
439    reg.swizzle = swizzle;
440    reg.writemask = writemask;
441    reg.indirect_offset = 0;
442    reg.vstride = vstride;
443    reg.width = width;
444    reg.hstride = hstride;
445    reg.is_scalar = 0;
446 
447    reg.offset = 0;
448    reg.stride = 1;
449    if (file == IMM &&
450        type != BRW_TYPE_V &&
451        type != BRW_TYPE_UV &&
452        type != BRW_TYPE_VF) {
453       reg.stride = 0;
454    }
455 
456    return reg;
457 }
458 
459 /** Construct float[16] register */
460 static inline struct brw_reg
brw_vec16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)461 brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
462 {
463    return brw_make_reg(file,
464                   nr,
465                   subnr,
466                   0,
467                   0,
468                   BRW_TYPE_F,
469                   BRW_VERTICAL_STRIDE_16,
470                   BRW_WIDTH_16,
471                   BRW_HORIZONTAL_STRIDE_1,
472                   BRW_SWIZZLE_XYZW,
473                   WRITEMASK_XYZW);
474 }
475 
476 /** Construct float[8] register */
477 static inline struct brw_reg
brw_vec8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)478 brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
479 {
480    return brw_make_reg(file,
481                   nr,
482                   subnr,
483                   0,
484                   0,
485                   BRW_TYPE_F,
486                   BRW_VERTICAL_STRIDE_8,
487                   BRW_WIDTH_8,
488                   BRW_HORIZONTAL_STRIDE_1,
489                   BRW_SWIZZLE_XYZW,
490                   WRITEMASK_XYZW);
491 }
492 
493 /** Construct float[4] register */
494 static inline struct brw_reg
brw_vec4_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)495 brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
496 {
497    return brw_make_reg(file,
498                   nr,
499                   subnr,
500                   0,
501                   0,
502                   BRW_TYPE_F,
503                   BRW_VERTICAL_STRIDE_4,
504                   BRW_WIDTH_4,
505                   BRW_HORIZONTAL_STRIDE_1,
506                   BRW_SWIZZLE_XYZW,
507                   WRITEMASK_XYZW);
508 }
509 
510 /** Construct float[2] register */
511 static inline struct brw_reg
brw_vec2_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)512 brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
513 {
514    return brw_make_reg(file,
515                   nr,
516                   subnr,
517                   0,
518                   0,
519                   BRW_TYPE_F,
520                   BRW_VERTICAL_STRIDE_2,
521                   BRW_WIDTH_2,
522                   BRW_HORIZONTAL_STRIDE_1,
523                   BRW_SWIZZLE_XYXY,
524                   WRITEMASK_XY);
525 }
526 
527 /** Construct float[1] register */
528 static inline struct brw_reg
brw_vec1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)529 brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
530 {
531    return brw_make_reg(file,
532                   nr,
533                   subnr,
534                   0,
535                   0,
536                   BRW_TYPE_F,
537                   BRW_VERTICAL_STRIDE_0,
538                   BRW_WIDTH_1,
539                   BRW_HORIZONTAL_STRIDE_0,
540                   BRW_SWIZZLE_XXXX,
541                   WRITEMASK_X);
542 }
543 
544 static inline struct brw_reg
brw_vecn_reg(unsigned width,enum brw_reg_file file,unsigned nr,unsigned subnr)545 brw_vecn_reg(unsigned width, enum brw_reg_file file,
546              unsigned nr, unsigned subnr)
547 {
548    switch (width) {
549    case 1:
550       return brw_vec1_reg(file, nr, subnr);
551    case 2:
552       return brw_vec2_reg(file, nr, subnr);
553    case 4:
554       return brw_vec4_reg(file, nr, subnr);
555    case 8:
556       return brw_vec8_reg(file, nr, subnr);
557    case 16:
558       return brw_vec16_reg(file, nr, subnr);
559    default:
560       unreachable("Invalid register width");
561    }
562 }
563 
564 static inline struct brw_reg
retype(struct brw_reg reg,enum brw_reg_type type)565 retype(struct brw_reg reg, enum brw_reg_type type)
566 {
567    reg.type = type;
568    return reg;
569 }
570 
571 static inline struct brw_reg
firsthalf(struct brw_reg reg)572 firsthalf(struct brw_reg reg)
573 {
574    return reg;
575 }
576 
577 static inline struct brw_reg
sechalf(struct brw_reg reg)578 sechalf(struct brw_reg reg)
579 {
580    if (reg.vstride)
581       reg.nr++;
582    return reg;
583 }
584 
585 static inline struct brw_reg
offset(struct brw_reg reg,unsigned delta)586 offset(struct brw_reg reg, unsigned delta)
587 {
588    reg.nr += delta;
589    return reg;
590 }
591 
592 
593 static inline struct brw_reg
byte_offset(struct brw_reg reg,unsigned bytes)594 byte_offset(struct brw_reg reg, unsigned bytes)
595 {
596    switch (reg.file) {
597    case BAD_FILE:
598       break;
599    case VGRF:
600    case ATTR:
601    case UNIFORM:
602       reg.offset += bytes;
603       break;
604    case ADDRESS:
605    case ARF:
606    case FIXED_GRF: {
607       const unsigned suboffset = reg.subnr + bytes;
608       reg.nr += suboffset / REG_SIZE;
609       reg.subnr = suboffset % REG_SIZE;
610       break;
611    }
612    case IMM:
613    default:
614       assert(bytes == 0);
615    }
616    return reg;
617 }
618 
619 static inline struct brw_reg
suboffset(struct brw_reg reg,unsigned delta)620 suboffset(struct brw_reg reg, unsigned delta)
621 {
622    return byte_offset(reg, delta * brw_type_size_bytes(reg.type));
623 }
624 
625 /** Construct unsigned word[16] register */
626 static inline struct brw_reg
brw_uw16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)627 brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
628 {
629    return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_TYPE_UW), subnr);
630 }
631 
632 /** Construct unsigned word[8] register */
633 static inline struct brw_reg
brw_uw8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)634 brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
635 {
636    return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_TYPE_UW), subnr);
637 }
638 
639 /** Construct unsigned word[1] register */
640 static inline struct brw_reg
brw_uw1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)641 brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
642 {
643    return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_TYPE_UW), subnr);
644 }
645 
646 static inline struct brw_reg
brw_ud8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)647 brw_ud8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
648 {
649    return retype(brw_vec8_reg(file, nr, subnr), BRW_TYPE_UD);
650 }
651 
652 static inline struct brw_reg
brw_ud1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)653 brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
654 {
655    return retype(brw_vec1_reg(file, nr, subnr), BRW_TYPE_UD);
656 }
657 
658 static inline struct brw_reg
brw_imm_reg(enum brw_reg_type type)659 brw_imm_reg(enum brw_reg_type type)
660 {
661    return brw_make_reg(IMM,
662                   0,
663                   0,
664                   0,
665                   0,
666                   type,
667                   BRW_VERTICAL_STRIDE_0,
668                   BRW_WIDTH_1,
669                   BRW_HORIZONTAL_STRIDE_0,
670                   0,
671                   0);
672 }
673 
674 /** Construct float immediate register */
675 static inline struct brw_reg
brw_imm_df(double df)676 brw_imm_df(double df)
677 {
678    struct brw_reg imm = brw_imm_reg(BRW_TYPE_DF);
679    imm.df = df;
680    return imm;
681 }
682 
683 static inline struct brw_reg
brw_imm_u64(uint64_t u64)684 brw_imm_u64(uint64_t u64)
685 {
686    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
687    imm.u64 = u64;
688    return imm;
689 }
690 
691 static inline struct brw_reg
brw_imm_f(float f)692 brw_imm_f(float f)
693 {
694    struct brw_reg imm = brw_imm_reg(BRW_TYPE_F);
695    imm.f = f;
696    return imm;
697 }
698 
699 /** Construct int64_t immediate register */
700 static inline struct brw_reg
brw_imm_q(int64_t q)701 brw_imm_q(int64_t q)
702 {
703    struct brw_reg imm = brw_imm_reg(BRW_TYPE_Q);
704    imm.d64 = q;
705    return imm;
706 }
707 
708 /** Construct int64_t immediate register */
709 static inline struct brw_reg
brw_imm_uq(uint64_t uq)710 brw_imm_uq(uint64_t uq)
711 {
712    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
713    imm.u64 = uq;
714    return imm;
715 }
716 
717 /** Construct integer immediate register */
718 static inline struct brw_reg
brw_imm_d(int d)719 brw_imm_d(int d)
720 {
721    struct brw_reg imm = brw_imm_reg(BRW_TYPE_D);
722    imm.d = d;
723    return imm;
724 }
725 
726 /** Construct uint immediate register */
727 static inline struct brw_reg
brw_imm_ud(unsigned ud)728 brw_imm_ud(unsigned ud)
729 {
730    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UD);
731    imm.ud = ud;
732    return imm;
733 }
734 
735 /** Construct ushort immediate register */
736 static inline struct brw_reg
brw_imm_uw(uint16_t uw)737 brw_imm_uw(uint16_t uw)
738 {
739    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UW);
740    imm.ud = uw | ((uint32_t)uw << 16);
741    return imm;
742 }
743 
744 /** Construct short immediate register */
745 static inline struct brw_reg
brw_imm_w(int16_t w)746 brw_imm_w(int16_t w)
747 {
748    struct brw_reg imm = brw_imm_reg(BRW_TYPE_W);
749    imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16;
750    return imm;
751 }
752 
753 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
754  * numbers alias with _V and _VF below:
755  */
756 
757 /** Construct vector of eight signed half-byte values */
758 static inline struct brw_reg
brw_imm_v(unsigned v)759 brw_imm_v(unsigned v)
760 {
761    struct brw_reg imm = brw_imm_reg(BRW_TYPE_V);
762    imm.ud = v;
763    return imm;
764 }
765 
766 /** Construct vector of eight unsigned half-byte values */
767 static inline struct brw_reg
brw_imm_uv(unsigned uv)768 brw_imm_uv(unsigned uv)
769 {
770    struct brw_reg imm = brw_imm_reg(BRW_TYPE_UV);
771    imm.ud = uv;
772    return imm;
773 }
774 
775 /** Construct vector of four 8-bit float values */
776 static inline struct brw_reg
brw_imm_vf(unsigned v)777 brw_imm_vf(unsigned v)
778 {
779    struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
780    imm.ud = v;
781    return imm;
782 }
783 
784 static inline struct brw_reg
brw_imm_vf4(unsigned v0,unsigned v1,unsigned v2,unsigned v3)785 brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
786 {
787    struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
788    imm.vstride = BRW_VERTICAL_STRIDE_0;
789    imm.width = BRW_WIDTH_4;
790    imm.hstride = BRW_HORIZONTAL_STRIDE_1;
791    imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
792    return imm;
793 }
794 
795 
796 static inline struct brw_reg
brw_address(struct brw_reg reg)797 brw_address(struct brw_reg reg)
798 {
799    return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
800 }
801 
802 /** Construct float[1] general-purpose register */
803 static inline struct brw_reg
brw_vec1_grf(unsigned nr,unsigned subnr)804 brw_vec1_grf(unsigned nr, unsigned subnr)
805 {
806    return brw_vec1_reg(FIXED_GRF, nr, subnr);
807 }
808 
809 static inline struct brw_reg
xe2_vec1_grf(unsigned nr,unsigned subnr)810 xe2_vec1_grf(unsigned nr, unsigned subnr)
811 {
812    return brw_vec1_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
813 }
814 
815 /** Construct float[2] general-purpose register */
816 static inline struct brw_reg
brw_vec2_grf(unsigned nr,unsigned subnr)817 brw_vec2_grf(unsigned nr, unsigned subnr)
818 {
819    return brw_vec2_reg(FIXED_GRF, nr, subnr);
820 }
821 
822 static inline struct brw_reg
xe2_vec2_grf(unsigned nr,unsigned subnr)823 xe2_vec2_grf(unsigned nr, unsigned subnr)
824 {
825    return brw_vec2_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
826 }
827 
828 /** Construct float[4] general-purpose register */
829 static inline struct brw_reg
brw_vec4_grf(unsigned nr,unsigned subnr)830 brw_vec4_grf(unsigned nr, unsigned subnr)
831 {
832    return brw_vec4_reg(FIXED_GRF, nr, subnr);
833 }
834 
835 static inline struct brw_reg
xe2_vec4_grf(unsigned nr,unsigned subnr)836 xe2_vec4_grf(unsigned nr, unsigned subnr)
837 {
838    return brw_vec4_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
839 }
840 
841 /** Construct float[8] general-purpose register */
842 static inline struct brw_reg
brw_vec8_grf(unsigned nr,unsigned subnr)843 brw_vec8_grf(unsigned nr, unsigned subnr)
844 {
845    return brw_vec8_reg(FIXED_GRF, nr, subnr);
846 }
847 
848 static inline struct brw_reg
xe2_vec8_grf(unsigned nr,unsigned subnr)849 xe2_vec8_grf(unsigned nr, unsigned subnr)
850 {
851    return brw_vec8_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
852 }
853 
854 /** Construct float[16] general-purpose register */
855 static inline struct brw_reg
brw_vec16_grf(unsigned nr,unsigned subnr)856 brw_vec16_grf(unsigned nr, unsigned subnr)
857 {
858    return brw_vec16_reg(FIXED_GRF, nr, subnr);
859 }
860 
861 static inline struct brw_reg
xe2_vec16_grf(unsigned nr,unsigned subnr)862 xe2_vec16_grf(unsigned nr, unsigned subnr)
863 {
864    return brw_vec16_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
865 }
866 
867 static inline struct brw_reg
brw_vecn_grf(unsigned width,unsigned nr,unsigned subnr)868 brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
869 {
870    return brw_vecn_reg(width, FIXED_GRF, nr, subnr);
871 }
872 
873 static inline struct brw_reg
xe2_vecn_grf(unsigned width,unsigned nr,unsigned subnr)874 xe2_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
875 {
876    return brw_vecn_reg(width, FIXED_GRF, nr + subnr / 8, subnr % 8);
877 }
878 
879 static inline struct brw_reg
brw_uw1_grf(unsigned nr,unsigned subnr)880 brw_uw1_grf(unsigned nr, unsigned subnr)
881 {
882    return brw_uw1_reg(FIXED_GRF, nr, subnr);
883 }
884 
885 static inline struct brw_reg
brw_uw8_grf(unsigned nr,unsigned subnr)886 brw_uw8_grf(unsigned nr, unsigned subnr)
887 {
888    return brw_uw8_reg(FIXED_GRF, nr, subnr);
889 }
890 
891 static inline struct brw_reg
brw_uw16_grf(unsigned nr,unsigned subnr)892 brw_uw16_grf(unsigned nr, unsigned subnr)
893 {
894    return brw_uw16_reg(FIXED_GRF, nr, subnr);
895 }
896 
897 static inline struct brw_reg
brw_ud8_grf(unsigned nr,unsigned subnr)898 brw_ud8_grf(unsigned nr, unsigned subnr)
899 {
900    return brw_ud8_reg(FIXED_GRF, nr, subnr);
901 }
902 
903 static inline struct brw_reg
brw_ud1_grf(unsigned nr,unsigned subnr)904 brw_ud1_grf(unsigned nr, unsigned subnr)
905 {
906    return brw_ud1_reg(FIXED_GRF, nr, subnr);
907 }
908 
909 
910 /** Construct null register (usually used for setting condition codes) */
911 static inline struct brw_reg
brw_null_reg(void)912 brw_null_reg(void)
913 {
914    return brw_vec8_reg(ARF, BRW_ARF_NULL, 0);
915 }
916 
917 static inline struct brw_reg
brw_null_vec(unsigned width)918 brw_null_vec(unsigned width)
919 {
920    return brw_vecn_reg(width, ARF, BRW_ARF_NULL, 0);
921 }
922 
923 static inline struct brw_reg
brw_address_reg(unsigned subnr)924 brw_address_reg(unsigned subnr)
925 {
926    return brw_uw1_reg(ADDRESS, 0, subnr);
927 }
928 
929 static inline struct brw_reg
brw_tdr_reg(void)930 brw_tdr_reg(void)
931 {
932    return brw_uw1_reg(ARF, BRW_ARF_TDR, 0);
933 }
934 
935 /* If/else instructions break in align16 mode if writemask & swizzle
936  * aren't xyzw.  This goes against the convention for other scalar
937  * regs:
938  */
939 static inline struct brw_reg
brw_ip_reg(void)940 brw_ip_reg(void)
941 {
942    return brw_make_reg(ARF,
943                   BRW_ARF_IP,
944                   0,
945                   0,
946                   0,
947                   BRW_TYPE_UD,
948                   BRW_VERTICAL_STRIDE_4, /* ? */
949                   BRW_WIDTH_1,
950                   BRW_HORIZONTAL_STRIDE_0,
951                   BRW_SWIZZLE_XYZW, /* NOTE! */
952                   WRITEMASK_XYZW); /* NOTE! */
953 }
954 
955 static inline struct brw_reg
brw_notification_reg(void)956 brw_notification_reg(void)
957 {
958    return brw_make_reg(ARF,
959                   BRW_ARF_NOTIFICATION_COUNT,
960                   0,
961                   0,
962                   0,
963                   BRW_TYPE_UD,
964                   BRW_VERTICAL_STRIDE_0,
965                   BRW_WIDTH_1,
966                   BRW_HORIZONTAL_STRIDE_0,
967                   BRW_SWIZZLE_XXXX,
968                   WRITEMASK_X);
969 }
970 
971 static inline struct brw_reg
brw_cr0_reg(unsigned subnr)972 brw_cr0_reg(unsigned subnr)
973 {
974    return brw_ud1_reg(ARF, BRW_ARF_CONTROL, subnr);
975 }
976 
977 static inline struct brw_reg
brw_sr0_reg(unsigned subnr)978 brw_sr0_reg(unsigned subnr)
979 {
980    return brw_ud1_reg(ARF, BRW_ARF_STATE, subnr);
981 }
982 
983 static inline struct brw_reg
brw_acc_reg(unsigned width)984 brw_acc_reg(unsigned width)
985 {
986    return brw_vecn_reg(width, ARF,
987                        BRW_ARF_ACCUMULATOR, 0);
988 }
989 
990 static inline struct brw_reg
brw_flag_reg(int reg,int subreg)991 brw_flag_reg(int reg, int subreg)
992 {
993    return brw_uw1_reg(ARF,
994                       BRW_ARF_FLAG + reg, subreg);
995 }
996 
997 static inline struct brw_reg
brw_flag_subreg(unsigned subreg)998 brw_flag_subreg(unsigned subreg)
999 {
1000    return brw_uw1_reg(ARF,
1001                       BRW_ARF_FLAG + subreg / 2, subreg % 2);
1002 }
1003 
1004 /**
1005  * Return the mask register present in Gfx4-5, or the related register present
1006  * in Gfx7.5 and later hardware referred to as "channel enable" register in
1007  * the documentation.
1008  */
1009 static inline struct brw_reg
brw_mask_reg(unsigned subnr)1010 brw_mask_reg(unsigned subnr)
1011 {
1012    return brw_uw1_reg(ARF, BRW_ARF_MASK, subnr);
1013 }
1014 
1015 static inline struct brw_reg
brw_vmask_reg()1016 brw_vmask_reg()
1017 {
1018    return brw_sr0_reg(3);
1019 }
1020 
1021 static inline struct brw_reg
brw_dmask_reg()1022 brw_dmask_reg()
1023 {
1024    return brw_sr0_reg(2);
1025 }
1026 
1027 static inline struct brw_reg
brw_vgrf(unsigned nr,enum brw_reg_type type)1028 brw_vgrf(unsigned nr, enum brw_reg_type type)
1029 {
1030    struct brw_reg reg = {};
1031    reg.file = VGRF;
1032    reg.nr = nr;
1033    reg.type = type;
1034    reg.stride = 1;
1035    return reg;
1036 }
1037 
1038 static inline struct brw_reg
brw_attr_reg(unsigned nr,enum brw_reg_type type)1039 brw_attr_reg(unsigned nr, enum brw_reg_type type)
1040 {
1041    struct brw_reg reg = {};
1042    reg.file = ATTR;
1043    reg.nr = nr;
1044    reg.type = type;
1045    reg.stride = 1;
1046    return reg;
1047 }
1048 
1049 static inline struct brw_reg
brw_uniform_reg(unsigned nr,enum brw_reg_type type)1050 brw_uniform_reg(unsigned nr, enum brw_reg_type type)
1051 {
1052    struct brw_reg reg = {};
1053    reg.file = UNIFORM;
1054    reg.nr = nr;
1055    reg.type = type;
1056    reg.stride = 0;
1057    return reg;
1058 }
1059 
1060 /* This is almost always called with a numeric constant argument, so
1061  * make things easy to evaluate at compile time:
1062  */
cvt(unsigned val)1063 static inline unsigned cvt(unsigned val)
1064 {
1065    switch (val) {
1066    case 0: return 0;
1067    case 1: return 1;
1068    case 2: return 2;
1069    case 4: return 3;
1070    case 8: return 4;
1071    case 16: return 5;
1072    case 32: return 6;
1073    }
1074    return 0;
1075 }
1076 
1077 static inline struct brw_reg
stride(struct brw_reg reg,unsigned vstride,unsigned width,unsigned hstride)1078 stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
1079 {
1080    reg.vstride = cvt(vstride);
1081    reg.width = cvt(width) - 1;
1082    reg.hstride = cvt(hstride);
1083    return reg;
1084 }
1085 
1086 /**
1087  * Multiply the vertical and horizontal stride of a register by the given
1088  * factor \a s.
1089  */
1090 static inline struct brw_reg
spread(struct brw_reg reg,unsigned s)1091 spread(struct brw_reg reg, unsigned s)
1092 {
1093    if (s) {
1094       assert(util_is_power_of_two_nonzero(s));
1095 
1096       if (reg.hstride)
1097          reg.hstride += cvt(s) - 1;
1098 
1099       if (reg.vstride)
1100          reg.vstride += cvt(s) - 1;
1101 
1102       return reg;
1103    } else {
1104       return stride(reg, 0, 1, 0);
1105    }
1106 }
1107 
1108 /**
1109  * Reinterpret each channel of register \p reg as a vector of values of the
1110  * given smaller type and take the i-th subcomponent from each.
1111  */
1112 static inline struct brw_reg
subscript(struct brw_reg reg,enum brw_reg_type type,unsigned i)1113 subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i)
1114 {
1115    assert((i + 1) * brw_type_size_bytes(type) <= brw_type_size_bytes(reg.type));
1116 
1117    if (reg.file == ARF || reg.file == FIXED_GRF) {
1118       /* The stride is encoded inconsistently for fixed GRF and ARF registers
1119        * as the log2 of the actual vertical and horizontal strides.
1120        */
1121       const int delta = util_logbase2(brw_type_size_bytes(reg.type)) -
1122                         util_logbase2(brw_type_size_bytes(type));
1123       reg.hstride += (reg.hstride ? delta : 0);
1124       reg.vstride += (reg.vstride ? delta : 0);
1125 
1126    } else if (reg.file == IMM) {
1127       unsigned bit_size = brw_type_size_bits(type);
1128       reg.u64 >>= i * bit_size;
1129       reg.u64 &= BITFIELD64_MASK(bit_size);
1130       if (bit_size <= 16)
1131          reg.u64 |= reg.u64 << 16;
1132       return retype(reg, type);
1133    } else {
1134       reg.stride *= brw_type_size_bytes(reg.type) / brw_type_size_bytes(type);
1135    }
1136 
1137    return byte_offset(retype(reg, type), i * brw_type_size_bytes(type));
1138 }
1139 
1140 static inline struct brw_reg
vec16(struct brw_reg reg)1141 vec16(struct brw_reg reg)
1142 {
1143    return stride(reg, 16,16,1);
1144 }
1145 
1146 static inline struct brw_reg
vec8(struct brw_reg reg)1147 vec8(struct brw_reg reg)
1148 {
1149    return stride(reg, 8,8,1);
1150 }
1151 
1152 static inline struct brw_reg
vec4(struct brw_reg reg)1153 vec4(struct brw_reg reg)
1154 {
1155    return stride(reg, 4,4,1);
1156 }
1157 
1158 static inline struct brw_reg
vec2(struct brw_reg reg)1159 vec2(struct brw_reg reg)
1160 {
1161    return stride(reg, 2,2,1);
1162 }
1163 
1164 static inline struct brw_reg
vec1(struct brw_reg reg)1165 vec1(struct brw_reg reg)
1166 {
1167    return stride(reg, 0,1,0);
1168 }
1169 
1170 
1171 static inline struct brw_reg
get_element(struct brw_reg reg,unsigned elt)1172 get_element(struct brw_reg reg, unsigned elt)
1173 {
1174    return vec1(suboffset(reg, elt));
1175 }
1176 
1177 static inline struct brw_reg
get_element_ud(struct brw_reg reg,unsigned elt)1178 get_element_ud(struct brw_reg reg, unsigned elt)
1179 {
1180    return vec1(suboffset(retype(reg, BRW_TYPE_UD), elt));
1181 }
1182 
1183 static inline struct brw_reg
get_element_d(struct brw_reg reg,unsigned elt)1184 get_element_d(struct brw_reg reg, unsigned elt)
1185 {
1186    return vec1(suboffset(retype(reg, BRW_TYPE_D), elt));
1187 }
1188 
1189 static inline struct brw_reg
brw_swizzle(struct brw_reg reg,unsigned swz)1190 brw_swizzle(struct brw_reg reg, unsigned swz)
1191 {
1192    if (reg.file == IMM)
1193       reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz);
1194    else
1195       reg.swizzle = brw_compose_swizzle(swz, reg.swizzle);
1196 
1197    return reg;
1198 }
1199 
1200 static inline struct brw_reg
brw_writemask(struct brw_reg reg,unsigned mask)1201 brw_writemask(struct brw_reg reg, unsigned mask)
1202 {
1203    assert(reg.file != IMM);
1204    reg.writemask &= mask;
1205    return reg;
1206 }
1207 
1208 static inline struct brw_reg
brw_set_writemask(struct brw_reg reg,unsigned mask)1209 brw_set_writemask(struct brw_reg reg, unsigned mask)
1210 {
1211    assert(reg.file != IMM);
1212    reg.writemask = mask;
1213    return reg;
1214 }
1215 
1216 static inline unsigned
brw_writemask_for_size(unsigned n)1217 brw_writemask_for_size(unsigned n)
1218 {
1219    return (1 << n) - 1;
1220 }
1221 
1222 static inline unsigned
brw_writemask_for_component_packing(unsigned n,unsigned first_component)1223 brw_writemask_for_component_packing(unsigned n, unsigned first_component)
1224 {
1225    assert(first_component + n <= 4);
1226    return (((1 << n) - 1) << first_component);
1227 }
1228 
1229 static inline struct brw_reg
negate(struct brw_reg reg)1230 negate(struct brw_reg reg)
1231 {
1232    reg.negate ^= 1;
1233    return reg;
1234 }
1235 
1236 static inline struct brw_reg
brw_abs(struct brw_reg reg)1237 brw_abs(struct brw_reg reg)
1238 {
1239    reg.abs = 1;
1240    reg.negate = 0;
1241    return reg;
1242 }
1243 
1244 /************************************************************************/
1245 
1246 static inline struct brw_reg
brw_vec1_indirect(unsigned subnr,int offset)1247 brw_vec1_indirect(unsigned subnr, int offset)
1248 {
1249    struct brw_reg reg =  brw_vec1_grf(0, 0);
1250    reg.subnr = subnr;
1251    reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1252    reg.indirect_offset = offset;
1253    return reg;
1254 }
1255 
1256 static inline struct brw_reg
brw_VxH_indirect(unsigned subnr,int offset)1257 brw_VxH_indirect(unsigned subnr, int offset)
1258 {
1259    struct brw_reg reg = brw_vec1_grf(0, 0);
1260    reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
1261    reg.subnr = subnr;
1262    reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1263    reg.indirect_offset = offset;
1264    return reg;
1265 }
1266 
1267 static inline bool
region_matches(struct brw_reg reg,enum brw_vertical_stride v,enum brw_width w,enum brw_horizontal_stride h)1268 region_matches(struct brw_reg reg, enum brw_vertical_stride v,
1269                enum brw_width w, enum brw_horizontal_stride h)
1270 {
1271    return reg.vstride == v &&
1272           reg.width == w &&
1273           reg.hstride == h;
1274 }
1275 
1276 #define has_scalar_region(reg) \
1277    region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
1278                   BRW_HORIZONTAL_STRIDE_0)
1279 
1280 /**
1281  * Return the size in bytes per data element of register \p reg on the
1282  * corresponding register file.
1283  */
1284 static inline unsigned
element_sz(struct brw_reg reg)1285 element_sz(struct brw_reg reg)
1286 {
1287    if (reg.file == IMM || has_scalar_region(reg)) {
1288       return brw_type_size_bytes(reg.type);
1289 
1290    } else if (reg.width == BRW_WIDTH_1 &&
1291               reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
1292       assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
1293       return brw_type_size_bytes(reg.type) << (reg.vstride - 1);
1294 
1295    } else {
1296       assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
1297       assert(reg.vstride == reg.hstride + reg.width);
1298       return brw_type_size_bytes(reg.type) << (reg.hstride - 1);
1299    }
1300 }
1301 
1302 /* brw_packed_float.c */
1303 int brw_float_to_vf(float f);
1304 float brw_vf_to_float(unsigned char vf);
1305 
1306 bool brw_reg_saturate_immediate(brw_reg *reg);
1307 bool brw_reg_negate_immediate(brw_reg *reg);
1308 bool brw_reg_abs_immediate(brw_reg *reg);
1309 
1310 #ifdef __cplusplus
1311 }
1312 #endif
1313 
1314 #ifdef __cplusplus
1315 
1316 static inline brw_reg
horiz_offset(const brw_reg & reg,unsigned delta)1317 horiz_offset(const brw_reg &reg, unsigned delta)
1318 {
1319    switch (reg.file) {
1320    case BAD_FILE:
1321    case UNIFORM:
1322    case IMM:
1323       /* These only have a single component that is implicitly splatted.  A
1324        * horizontal offset should be a harmless no-op.
1325        * XXX - Handle vector immediates correctly.
1326        */
1327       return reg;
1328    case VGRF:
1329    case ATTR:
1330       return byte_offset(reg, delta * reg.stride * brw_type_size_bytes(reg.type));
1331    case ADDRESS:
1332    case ARF:
1333    case FIXED_GRF:
1334       if (reg.is_null()) {
1335          return reg;
1336       } else {
1337          const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1338          const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1339          const unsigned width = 1 << reg.width;
1340 
1341          if (delta % width == 0) {
1342             return byte_offset(reg, delta / width * vstride * brw_type_size_bytes(reg.type));
1343          } else {
1344             assert(vstride == hstride * width);
1345             return byte_offset(reg, delta * hstride * brw_type_size_bytes(reg.type));
1346          }
1347       }
1348    }
1349    unreachable("Invalid register file");
1350 }
1351 
1352 static inline brw_reg
offset(brw_reg reg,unsigned width,unsigned delta)1353 offset(brw_reg reg, unsigned width, unsigned delta)
1354 {
1355    switch (reg.file) {
1356    case BAD_FILE:
1357       break;
1358    case ADDRESS:
1359    case ARF:
1360    case FIXED_GRF:
1361    case VGRF:
1362    case ATTR:
1363    case UNIFORM:
1364       return byte_offset(reg, delta * reg.component_size(width));
1365    case IMM:
1366       assert(delta == 0);
1367    }
1368    return reg;
1369 }
1370 
1371 /**
1372  * Get the scalar channel of \p reg given by \p idx and replicate it to all
1373  * channels of the result.
1374  */
1375 static inline brw_reg
component(brw_reg reg,unsigned idx)1376 component(brw_reg reg, unsigned idx)
1377 {
1378    reg = horiz_offset(reg, idx);
1379    reg.stride = 0;
1380    if (reg.file == ARF || reg.file == FIXED_GRF) {
1381       reg.vstride = BRW_VERTICAL_STRIDE_0;
1382       reg.width = BRW_WIDTH_1;
1383       reg.hstride = BRW_HORIZONTAL_STRIDE_0;
1384    }
1385    return reg;
1386 }
1387 
1388 /**
1389  * Return an integer identifying the discrete address space a register is
1390  * contained in.  A register is by definition fully contained in the single
1391  * reg_space it belongs to, so two registers with different reg_space ids are
1392  * guaranteed not to overlap.  Most register files are a single reg_space of
1393  * its own, only the VGRF and ATTR files are composed of multiple discrete
1394  * address spaces, one for each allocation and input attribute respectively.
1395  */
1396 static inline uint32_t
reg_space(const brw_reg & r)1397 reg_space(const brw_reg &r)
1398 {
1399    return r.file << 16 | (r.file == VGRF || r.file == ATTR ? r.nr : 0);
1400 }
1401 
1402 /**
1403  * Return the base offset in bytes of a register relative to the start of its
1404  * reg_space().
1405  */
1406 static inline unsigned
reg_offset(const brw_reg & r)1407 reg_offset(const brw_reg &r)
1408 {
1409    return (r.file == ADDRESS || r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) *
1410           (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
1411           (r.file == ADDRESS || r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
1412 }
1413 
1414 /**
1415  * Return the amount of padding in bytes left unused between individual
1416  * components of register \p r due to a (horizontal) stride value greater than
1417  * one, or zero if components are tightly packed in the register file.
1418  */
1419 static inline unsigned
reg_padding(const brw_reg & r)1420 reg_padding(const brw_reg &r)
1421 {
1422    const unsigned stride = ((r.file != ADDRESS &&
1423                              r.file != ARF &&
1424                              r.file != FIXED_GRF) ? r.stride :
1425                             r.hstride == 0 ? 0 :
1426                             1 << (r.hstride - 1));
1427    return (MAX2(1, stride) - 1) * brw_type_size_bytes(r.type);
1428 }
1429 
1430 /**
1431  * Return whether the register region starting at \p r and spanning \p dr
1432  * bytes could potentially overlap the register region starting at \p s and
1433  * spanning \p ds bytes.
1434  */
1435 static inline bool
regions_overlap(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1436 regions_overlap(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1437 {
1438    if (r.file != s.file)
1439       return false;
1440 
1441    if (r.file == VGRF) {
1442       return r.nr == s.nr &&
1443              !(r.offset + dr <= s.offset || s.offset + ds <= r.offset);
1444    } else {
1445       return !(reg_offset(r) + dr <= reg_offset(s) ||
1446                reg_offset(s) + ds <= reg_offset(r));
1447    }
1448 }
1449 
1450 /**
1451  * Check that the register region given by r [r.offset, r.offset + dr[
1452  * is fully contained inside the register region given by s
1453  * [s.offset, s.offset + ds[.
1454  */
1455 static inline bool
region_contained_in(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1456 region_contained_in(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1457 {
1458    return reg_space(r) == reg_space(s) &&
1459           reg_offset(r) >= reg_offset(s) &&
1460           reg_offset(r) + dr <= reg_offset(s) + ds;
1461 }
1462 
1463 /**
1464  * Return whether the given register region is n-periodic, i.e. whether the
1465  * original region remains invariant after shifting it by \p n scalar
1466  * channels.
1467  */
1468 static inline bool
is_periodic(const brw_reg & reg,unsigned n)1469 is_periodic(const brw_reg &reg, unsigned n)
1470 {
1471    if (reg.file == BAD_FILE || reg.is_null()) {
1472       return true;
1473 
1474    } else if (reg.file == IMM) {
1475       const unsigned period = (reg.type == BRW_TYPE_UV ||
1476                                reg.type == BRW_TYPE_V ? 8 :
1477                                reg.type == BRW_TYPE_VF ? 4 :
1478                                1);
1479       return n % period == 0;
1480 
1481    } else if (reg.file == ADDRESS || reg.file == ARF || reg.file == FIXED_GRF) {
1482       const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
1483                                reg.vstride == 0 ? 1 << reg.width :
1484                                ~0);
1485       return n % period == 0;
1486 
1487    } else {
1488       return reg.stride == 0;
1489    }
1490 }
1491 
1492 static inline bool
is_uniform(const brw_reg & reg)1493 is_uniform(const brw_reg &reg)
1494 {
1495    return is_periodic(reg, 1);
1496 }
1497 
1498 /**
1499  * Get the specified 8-component quarter of a register.
1500  */
1501 static inline brw_reg
quarter(const brw_reg & reg,unsigned idx)1502 quarter(const brw_reg &reg, unsigned idx)
1503 {
1504    assert(idx < 4);
1505    return horiz_offset(reg, 8 * idx);
1506 }
1507 
1508 static inline brw_reg
horiz_stride(brw_reg reg,unsigned s)1509 horiz_stride(brw_reg reg, unsigned s)
1510 {
1511    reg.stride *= s;
1512    return reg;
1513 }
1514 
1515 static const brw_reg reg_undef;
1516 
1517 /*
1518  * Return the stride between channels of the specified register in
1519  * byte units, or ~0u if the region cannot be represented with a
1520  * single one-dimensional stride.
1521  */
1522 static inline unsigned
byte_stride(const brw_reg & reg)1523 byte_stride(const brw_reg &reg)
1524 {
1525    switch (reg.file) {
1526    case BAD_FILE:
1527    case UNIFORM:
1528    case IMM:
1529    case VGRF:
1530    case ATTR:
1531       return reg.stride * brw_type_size_bytes(reg.type);
1532    case ADDRESS:
1533    case ARF:
1534    case FIXED_GRF:
1535       if (reg.is_null()) {
1536          return 0;
1537       } else {
1538          const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1539          const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1540          const unsigned width = 1 << reg.width;
1541 
1542          if (width == 1) {
1543             return vstride * brw_type_size_bytes(reg.type);
1544          } else if (hstride * width == vstride) {
1545             return hstride * brw_type_size_bytes(reg.type);
1546          } else {
1547             return ~0u;
1548          }
1549       }
1550    default:
1551       unreachable("Invalid register file");
1552    }
1553 }
1554 
1555 #endif /* __cplusplus */
1556