1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 /** @file
33 *
34 * This file defines struct brw_reg, which is our representation for EU
35 * registers. They're not a hardware specific format, just an abstraction
36 * that intends to capture the full flexibility of the hardware registers.
37 *
38 * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
39 * the abstract brw_reg type into the actual hardware instruction encoding.
40 */
41
42 #pragma once
43
44 #include <stdbool.h>
45 #include "util/compiler.h"
46 #include "util/glheader.h"
47 #include "util/macros.h"
48 #include "util/rounding.h"
49 #include "util/u_math.h"
50 #include "brw_eu_defines.h"
51 #include "brw_reg_type.h"
52
53 #ifdef __cplusplus
54 extern "C" {
55 #endif
56
57 struct intel_device_info;
58
59 /** Size of general purpose register space in REG_SIZE units */
60 #define BRW_MAX_GRF 128
61 #define XE2_MAX_GRF 256
62 #define XE3_MAX_GRF 512
63
64 /**
65 * BRW hardware swizzles.
66 * Only defines XYZW to ensure it can be contained in 2 bits
67 */
68 #define BRW_SWIZZLE_X 0
69 #define BRW_SWIZZLE_Y 1
70 #define BRW_SWIZZLE_Z 2
71 #define BRW_SWIZZLE_W 3
72
73 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
74 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
75
76 #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
77 #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
78 #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
79 #define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
80 #define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
81 #define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
82 #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
83 #define BRW_SWIZZLE_YXYX BRW_SWIZZLE4(1,0,1,0)
84 #define BRW_SWIZZLE_XZXZ BRW_SWIZZLE4(0,2,0,2)
85 #define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3)
86 #define BRW_SWIZZLE_YWYW BRW_SWIZZLE4(1,3,1,3)
87 #define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3)
88 #define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3)
89 #define BRW_SWIZZLE_WZWZ BRW_SWIZZLE4(3,2,3,2)
90 #define BRW_SWIZZLE_WZYX BRW_SWIZZLE4(3,2,1,0)
91 #define BRW_SWIZZLE_XXZZ BRW_SWIZZLE4(0,0,2,2)
92 #define BRW_SWIZZLE_YYWW BRW_SWIZZLE4(1,1,3,3)
93 #define BRW_SWIZZLE_YXWZ BRW_SWIZZLE4(1,0,3,2)
94
95 #define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2))
96 #define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2))
97
98 static inline bool
brw_is_single_value_swizzle(unsigned swiz)99 brw_is_single_value_swizzle(unsigned swiz)
100 {
101 return (swiz == BRW_SWIZZLE_XXXX ||
102 swiz == BRW_SWIZZLE_YYYY ||
103 swiz == BRW_SWIZZLE_ZZZZ ||
104 swiz == BRW_SWIZZLE_WWWW);
105 }
106
107 /**
108 * Compute the swizzle obtained from the application of \p swz0 on the result
109 * of \p swz1. The argument ordering is expected to match function
110 * composition.
111 */
112 static inline unsigned
brw_compose_swizzle(unsigned swz0,unsigned swz1)113 brw_compose_swizzle(unsigned swz0, unsigned swz1)
114 {
115 return BRW_SWIZZLE4(
116 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
117 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
118 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
119 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
120 }
121
122 /**
123 * Construct an identity swizzle for the set of enabled channels given by \p
124 * mask. The result will only reference channels enabled in the provided \p
125 * mask, assuming that \p mask is non-zero. The constructed swizzle will
126 * satisfy the property that for any instruction OP and any mask:
127 *
128 * brw_OP(p, brw_writemask(dst, mask),
129 * brw_swizzle(src, brw_swizzle_for_mask(mask)));
130 *
131 * will be equivalent to the same instruction without swizzle:
132 *
133 * brw_OP(p, brw_writemask(dst, mask), src);
134 */
135 static inline unsigned
brw_swizzle_for_mask(unsigned mask)136 brw_swizzle_for_mask(unsigned mask)
137 {
138 unsigned last = (mask ? ffs(mask) - 1 : 0);
139 unsigned swz[4];
140
141 for (unsigned i = 0; i < 4; i++)
142 last = swz[i] = (mask & (1 << i) ? i : last);
143
144 return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
145 }
146
147 uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz);
148
149 #define REG_SIZE (8*4)
150
151 /* These aren't hardware structs, just something useful for us to pass around:
152 *
153 * Align1 operation has a lot of control over input ranges. Used in
154 * WM programs to implement shaders decomposed into "channel serial"
155 * or "structure of array" form:
156 */
157 typedef struct brw_reg {
158 union {
159 struct {
160 enum brw_reg_type type:5;
161 enum brw_reg_file file:3;
162 unsigned negate:1; /* source only */
163 unsigned abs:1; /* source only */
164 unsigned address_mode:1; /* relative addressing, hopefully! */
165 unsigned pad0:15;
166 unsigned subnr:6; /* :1 in align16 */
167 };
168 uint32_t bits;
169 };
170
171 /** Offset from the start of the virtual register in bytes. */
172 uint16_t offset;
173
174 /** Register region horizontal stride of virtual registers */
175 uint8_t stride;
176
177 union {
178 struct {
179 unsigned nr;
180 unsigned swizzle:8; /* src only, align16 only */
181 unsigned writemask:4; /* dest only, align16 only */
182 int indirect_offset:10; /* relative addressing offset */
183 unsigned vstride:4; /* source only */
184 unsigned width:3; /* src only, align1 only */
185 unsigned hstride:2; /* align1 only */
186
187 /**
188 * Does this register represent a scalar value?
189 *
190 * Registers are allocated in SIMD8 parcels, but may be used to
191 * represent convergent (i.e., scalar) values. As a destination, it
192 * is written as SIMD8. As a source, it may be read as <8,8,1> in
193 * SIMD8 instructions or <0,1,0> on other execution sizes.
194 *
195 * If the value represents a vector (e.g., a convergent load_uniform
196 * of a vec4), it will be stored as multiple SIMD8 registers.
197 */
198 unsigned is_scalar:1;
199 };
200
201 double df;
202 uint64_t u64;
203 int64_t d64;
204 float f;
205 int d;
206 unsigned ud;
207 };
208
209 #ifdef __cplusplus
210 /* TODO: Remove this constructor to make this type a POD. Need
211 * to make sure that rest of compiler doesn't rely on type or
212 * stride of BAD_FILE registers.
213 */
brw_regbrw_reg214 brw_reg() {
215 memset((void*)this, 0, sizeof(*this));
216 this->type = BRW_TYPE_UD;
217 this->stride = 1;
218 this->file = BAD_FILE;
219 }
220
221 bool equals(const brw_reg &r) const;
222 bool negative_equals(const brw_reg &r) const;
223 bool is_contiguous() const;
224
225 bool is_zero() const;
226 bool is_one() const;
227 bool is_negative_one() const;
228 bool is_null() const;
229 bool is_accumulator() const;
230 bool is_address() const;
231
232 unsigned address_slot(unsigned byte_offset) const;
233
234 /**
235 * Return the size in bytes of a single logical component of the
236 * register assuming the given execution width.
237 */
238 unsigned component_size(unsigned width) const;
239 #endif /* __cplusplus */
240 } brw_reg;
241
242 static inline unsigned
phys_file(const struct brw_reg reg)243 phys_file(const struct brw_reg reg)
244 {
245 switch (reg.file) {
246 case ARF:
247 case FIXED_GRF:
248 case IMM:
249 return reg.file;
250
251 case ADDRESS:
252 return ARF;
253
254 default:
255 unreachable("register type should have been lowered");
256 }
257 }
258
259 static inline unsigned
phys_nr(const struct intel_device_info * devinfo,const struct brw_reg reg)260 phys_nr(const struct intel_device_info *devinfo, const struct brw_reg reg)
261 {
262 if (devinfo->ver >= 20) {
263 if (reg.file == FIXED_GRF)
264 return reg.nr / 2;
265 else if (reg.file == ADDRESS)
266 return BRW_ARF_ADDRESS;
267 else if (reg.file == ARF &&
268 reg.nr >= BRW_ARF_ACCUMULATOR &&
269 reg.nr < BRW_ARF_FLAG)
270 return BRW_ARF_ACCUMULATOR + (reg.nr - BRW_ARF_ACCUMULATOR) / 2;
271 else
272 return reg.nr;
273 } else if (reg.file == ADDRESS) {
274 return BRW_ARF_ADDRESS;
275 } else {
276 return reg.nr;
277 }
278 }
279
280 static inline unsigned
phys_subnr(const struct intel_device_info * devinfo,const struct brw_reg reg)281 phys_subnr(const struct intel_device_info *devinfo, const struct brw_reg reg)
282 {
283 if (devinfo->ver >= 20) {
284 if (reg.file == FIXED_GRF ||
285 (reg.file == ARF &&
286 reg.nr >= BRW_ARF_ACCUMULATOR &&
287 reg.nr < BRW_ARF_FLAG))
288 return (reg.nr & 1) * REG_SIZE + reg.subnr;
289 else
290 return reg.subnr;
291 } else {
292 return reg.subnr;
293 }
294 }
295
296 static inline bool
brw_regs_equal(const struct brw_reg * a,const struct brw_reg * b)297 brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b)
298 {
299 return a->bits == b->bits &&
300 a->u64 == b->u64 &&
301 a->offset == b->offset &&
302 a->stride == b->stride;
303 }
304
305 static inline bool
brw_regs_negative_equal(const struct brw_reg * a,const struct brw_reg * b)306 brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b)
307 {
308 if (a->file == IMM) {
309 if (a->bits != b->bits)
310 return false;
311
312 switch ((enum brw_reg_type) a->type) {
313 case BRW_TYPE_UQ:
314 case BRW_TYPE_Q:
315 return a->d64 == -b->d64;
316 case BRW_TYPE_DF:
317 return a->df == -b->df;
318 case BRW_TYPE_UD:
319 case BRW_TYPE_D:
320 return a->d == -b->d;
321 case BRW_TYPE_F:
322 return a->f == -b->f;
323 case BRW_TYPE_VF:
324 /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation
325 * of -0). There are occasions where 0 or -0 is used and the exact
326 * bit pattern is desired. At the very least, changing this to allow
327 * 0 as a negation of 0 causes some fp64 tests to fail on IVB.
328 */
329 return a->ud == (b->ud ^ 0x80808080);
330 case BRW_TYPE_UW:
331 case BRW_TYPE_W:
332 case BRW_TYPE_UV:
333 case BRW_TYPE_V:
334 case BRW_TYPE_HF:
335 /* FINISHME: Implement support for these types once there is
336 * something in the compiler that can generate them. Until then,
337 * they cannot be tested.
338 */
339 return false;
340 case BRW_TYPE_UB:
341 case BRW_TYPE_B:
342 default:
343 unreachable("not reached");
344 }
345 } else {
346 struct brw_reg tmp = *a;
347
348 tmp.negate = !tmp.negate;
349
350 return brw_regs_equal(&tmp, b);
351 }
352 }
353
354 static inline enum brw_reg_type
get_exec_type(const enum brw_reg_type type)355 get_exec_type(const enum brw_reg_type type)
356 {
357 switch (type) {
358 case BRW_TYPE_B:
359 case BRW_TYPE_V:
360 return BRW_TYPE_W;
361 case BRW_TYPE_UB:
362 case BRW_TYPE_UV:
363 return BRW_TYPE_UW;
364 case BRW_TYPE_VF:
365 return BRW_TYPE_F;
366 default:
367 return type;
368 }
369 }
370
371 /**
372 * Return an integer type of the requested size and signedness.
373 */
374 static inline enum brw_reg_type
brw_int_type(unsigned sz,bool is_signed)375 brw_int_type(unsigned sz, bool is_signed)
376 {
377 switch (sz) {
378 case 1:
379 return (is_signed ? BRW_TYPE_B : BRW_TYPE_UB);
380 case 2:
381 return (is_signed ? BRW_TYPE_W : BRW_TYPE_UW);
382 case 4:
383 return (is_signed ? BRW_TYPE_D : BRW_TYPE_UD);
384 case 8:
385 return (is_signed ? BRW_TYPE_Q : BRW_TYPE_UQ);
386 default:
387 unreachable("Not reached.");
388 }
389 }
390
391 /**
392 * Construct a brw_reg.
393 * \param file one of the BRW_x_REGISTER_FILE values
394 * \param nr register number/index
395 * \param subnr register sub number
396 * \param negate register negate modifier
397 * \param abs register abs modifier
398 * \param type one of BRW_TYPE_x
399 * \param vstride one of BRW_VERTICAL_STRIDE_x
400 * \param width one of BRW_WIDTH_x
401 * \param hstride one of BRW_HORIZONTAL_STRIDE_x
402 * \param swizzle one of BRW_SWIZZLE_x
403 * \param writemask WRITEMASK_X/Y/Z/W bitfield
404 */
405 static inline struct brw_reg
brw_make_reg(enum brw_reg_file file,unsigned nr,unsigned subnr,unsigned negate,unsigned abs,enum brw_reg_type type,unsigned vstride,unsigned width,unsigned hstride,unsigned swizzle,unsigned writemask)406 brw_make_reg(enum brw_reg_file file,
407 unsigned nr,
408 unsigned subnr,
409 unsigned negate,
410 unsigned abs,
411 enum brw_reg_type type,
412 unsigned vstride,
413 unsigned width,
414 unsigned hstride,
415 unsigned swizzle,
416 unsigned writemask)
417 {
418 struct brw_reg reg;
419 if (file == FIXED_GRF)
420 assert(nr < XE3_MAX_GRF);
421 else if (file == ARF)
422 assert(nr <= BRW_ARF_TIMESTAMP);
423
424 reg.type = type;
425 reg.file = file;
426 reg.negate = negate;
427 reg.abs = abs;
428 reg.address_mode = BRW_ADDRESS_DIRECT;
429 reg.pad0 = 0;
430 reg.subnr = subnr * brw_type_size_bytes(type);
431 reg.nr = nr;
432
433 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
434 * set swizzle and writemask to W, as the lower bits of subnr will
435 * be lost when converted to align16. This is probably too much to
436 * keep track of as you'd want it adjusted by suboffset(), etc.
437 * Perhaps fix up when converting to align16?
438 */
439 reg.swizzle = swizzle;
440 reg.writemask = writemask;
441 reg.indirect_offset = 0;
442 reg.vstride = vstride;
443 reg.width = width;
444 reg.hstride = hstride;
445 reg.is_scalar = 0;
446
447 reg.offset = 0;
448 reg.stride = 1;
449 if (file == IMM &&
450 type != BRW_TYPE_V &&
451 type != BRW_TYPE_UV &&
452 type != BRW_TYPE_VF) {
453 reg.stride = 0;
454 }
455
456 return reg;
457 }
458
459 /** Construct float[16] register */
460 static inline struct brw_reg
brw_vec16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)461 brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
462 {
463 return brw_make_reg(file,
464 nr,
465 subnr,
466 0,
467 0,
468 BRW_TYPE_F,
469 BRW_VERTICAL_STRIDE_16,
470 BRW_WIDTH_16,
471 BRW_HORIZONTAL_STRIDE_1,
472 BRW_SWIZZLE_XYZW,
473 WRITEMASK_XYZW);
474 }
475
476 /** Construct float[8] register */
477 static inline struct brw_reg
brw_vec8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)478 brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
479 {
480 return brw_make_reg(file,
481 nr,
482 subnr,
483 0,
484 0,
485 BRW_TYPE_F,
486 BRW_VERTICAL_STRIDE_8,
487 BRW_WIDTH_8,
488 BRW_HORIZONTAL_STRIDE_1,
489 BRW_SWIZZLE_XYZW,
490 WRITEMASK_XYZW);
491 }
492
493 /** Construct float[4] register */
494 static inline struct brw_reg
brw_vec4_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)495 brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
496 {
497 return brw_make_reg(file,
498 nr,
499 subnr,
500 0,
501 0,
502 BRW_TYPE_F,
503 BRW_VERTICAL_STRIDE_4,
504 BRW_WIDTH_4,
505 BRW_HORIZONTAL_STRIDE_1,
506 BRW_SWIZZLE_XYZW,
507 WRITEMASK_XYZW);
508 }
509
510 /** Construct float[2] register */
511 static inline struct brw_reg
brw_vec2_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)512 brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
513 {
514 return brw_make_reg(file,
515 nr,
516 subnr,
517 0,
518 0,
519 BRW_TYPE_F,
520 BRW_VERTICAL_STRIDE_2,
521 BRW_WIDTH_2,
522 BRW_HORIZONTAL_STRIDE_1,
523 BRW_SWIZZLE_XYXY,
524 WRITEMASK_XY);
525 }
526
527 /** Construct float[1] register */
528 static inline struct brw_reg
brw_vec1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)529 brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
530 {
531 return brw_make_reg(file,
532 nr,
533 subnr,
534 0,
535 0,
536 BRW_TYPE_F,
537 BRW_VERTICAL_STRIDE_0,
538 BRW_WIDTH_1,
539 BRW_HORIZONTAL_STRIDE_0,
540 BRW_SWIZZLE_XXXX,
541 WRITEMASK_X);
542 }
543
544 static inline struct brw_reg
brw_vecn_reg(unsigned width,enum brw_reg_file file,unsigned nr,unsigned subnr)545 brw_vecn_reg(unsigned width, enum brw_reg_file file,
546 unsigned nr, unsigned subnr)
547 {
548 switch (width) {
549 case 1:
550 return brw_vec1_reg(file, nr, subnr);
551 case 2:
552 return brw_vec2_reg(file, nr, subnr);
553 case 4:
554 return brw_vec4_reg(file, nr, subnr);
555 case 8:
556 return brw_vec8_reg(file, nr, subnr);
557 case 16:
558 return brw_vec16_reg(file, nr, subnr);
559 default:
560 unreachable("Invalid register width");
561 }
562 }
563
564 static inline struct brw_reg
retype(struct brw_reg reg,enum brw_reg_type type)565 retype(struct brw_reg reg, enum brw_reg_type type)
566 {
567 reg.type = type;
568 return reg;
569 }
570
571 static inline struct brw_reg
firsthalf(struct brw_reg reg)572 firsthalf(struct brw_reg reg)
573 {
574 return reg;
575 }
576
577 static inline struct brw_reg
sechalf(struct brw_reg reg)578 sechalf(struct brw_reg reg)
579 {
580 if (reg.vstride)
581 reg.nr++;
582 return reg;
583 }
584
585 static inline struct brw_reg
offset(struct brw_reg reg,unsigned delta)586 offset(struct brw_reg reg, unsigned delta)
587 {
588 reg.nr += delta;
589 return reg;
590 }
591
592
593 static inline struct brw_reg
byte_offset(struct brw_reg reg,unsigned bytes)594 byte_offset(struct brw_reg reg, unsigned bytes)
595 {
596 switch (reg.file) {
597 case BAD_FILE:
598 break;
599 case VGRF:
600 case ATTR:
601 case UNIFORM:
602 reg.offset += bytes;
603 break;
604 case ADDRESS:
605 case ARF:
606 case FIXED_GRF: {
607 const unsigned suboffset = reg.subnr + bytes;
608 reg.nr += suboffset / REG_SIZE;
609 reg.subnr = suboffset % REG_SIZE;
610 break;
611 }
612 case IMM:
613 default:
614 assert(bytes == 0);
615 }
616 return reg;
617 }
618
619 static inline struct brw_reg
suboffset(struct brw_reg reg,unsigned delta)620 suboffset(struct brw_reg reg, unsigned delta)
621 {
622 return byte_offset(reg, delta * brw_type_size_bytes(reg.type));
623 }
624
625 /** Construct unsigned word[16] register */
626 static inline struct brw_reg
brw_uw16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)627 brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
628 {
629 return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_TYPE_UW), subnr);
630 }
631
632 /** Construct unsigned word[8] register */
633 static inline struct brw_reg
brw_uw8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)634 brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
635 {
636 return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_TYPE_UW), subnr);
637 }
638
639 /** Construct unsigned word[1] register */
640 static inline struct brw_reg
brw_uw1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)641 brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
642 {
643 return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_TYPE_UW), subnr);
644 }
645
646 static inline struct brw_reg
brw_ud8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)647 brw_ud8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
648 {
649 return retype(brw_vec8_reg(file, nr, subnr), BRW_TYPE_UD);
650 }
651
652 static inline struct brw_reg
brw_ud1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)653 brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
654 {
655 return retype(brw_vec1_reg(file, nr, subnr), BRW_TYPE_UD);
656 }
657
658 static inline struct brw_reg
brw_imm_reg(enum brw_reg_type type)659 brw_imm_reg(enum brw_reg_type type)
660 {
661 return brw_make_reg(IMM,
662 0,
663 0,
664 0,
665 0,
666 type,
667 BRW_VERTICAL_STRIDE_0,
668 BRW_WIDTH_1,
669 BRW_HORIZONTAL_STRIDE_0,
670 0,
671 0);
672 }
673
674 /** Construct float immediate register */
675 static inline struct brw_reg
brw_imm_df(double df)676 brw_imm_df(double df)
677 {
678 struct brw_reg imm = brw_imm_reg(BRW_TYPE_DF);
679 imm.df = df;
680 return imm;
681 }
682
683 static inline struct brw_reg
brw_imm_u64(uint64_t u64)684 brw_imm_u64(uint64_t u64)
685 {
686 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
687 imm.u64 = u64;
688 return imm;
689 }
690
691 static inline struct brw_reg
brw_imm_f(float f)692 brw_imm_f(float f)
693 {
694 struct brw_reg imm = brw_imm_reg(BRW_TYPE_F);
695 imm.f = f;
696 return imm;
697 }
698
699 /** Construct int64_t immediate register */
700 static inline struct brw_reg
brw_imm_q(int64_t q)701 brw_imm_q(int64_t q)
702 {
703 struct brw_reg imm = brw_imm_reg(BRW_TYPE_Q);
704 imm.d64 = q;
705 return imm;
706 }
707
708 /** Construct int64_t immediate register */
709 static inline struct brw_reg
brw_imm_uq(uint64_t uq)710 brw_imm_uq(uint64_t uq)
711 {
712 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
713 imm.u64 = uq;
714 return imm;
715 }
716
717 /** Construct integer immediate register */
718 static inline struct brw_reg
brw_imm_d(int d)719 brw_imm_d(int d)
720 {
721 struct brw_reg imm = brw_imm_reg(BRW_TYPE_D);
722 imm.d = d;
723 return imm;
724 }
725
726 /** Construct uint immediate register */
727 static inline struct brw_reg
brw_imm_ud(unsigned ud)728 brw_imm_ud(unsigned ud)
729 {
730 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UD);
731 imm.ud = ud;
732 return imm;
733 }
734
735 /** Construct ushort immediate register */
736 static inline struct brw_reg
brw_imm_uw(uint16_t uw)737 brw_imm_uw(uint16_t uw)
738 {
739 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UW);
740 imm.ud = uw | ((uint32_t)uw << 16);
741 return imm;
742 }
743
744 /** Construct short immediate register */
745 static inline struct brw_reg
brw_imm_w(int16_t w)746 brw_imm_w(int16_t w)
747 {
748 struct brw_reg imm = brw_imm_reg(BRW_TYPE_W);
749 imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16;
750 return imm;
751 }
752
753 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
754 * numbers alias with _V and _VF below:
755 */
756
757 /** Construct vector of eight signed half-byte values */
758 static inline struct brw_reg
brw_imm_v(unsigned v)759 brw_imm_v(unsigned v)
760 {
761 struct brw_reg imm = brw_imm_reg(BRW_TYPE_V);
762 imm.ud = v;
763 return imm;
764 }
765
766 /** Construct vector of eight unsigned half-byte values */
767 static inline struct brw_reg
brw_imm_uv(unsigned uv)768 brw_imm_uv(unsigned uv)
769 {
770 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UV);
771 imm.ud = uv;
772 return imm;
773 }
774
775 /** Construct vector of four 8-bit float values */
776 static inline struct brw_reg
brw_imm_vf(unsigned v)777 brw_imm_vf(unsigned v)
778 {
779 struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
780 imm.ud = v;
781 return imm;
782 }
783
784 static inline struct brw_reg
brw_imm_vf4(unsigned v0,unsigned v1,unsigned v2,unsigned v3)785 brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
786 {
787 struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
788 imm.vstride = BRW_VERTICAL_STRIDE_0;
789 imm.width = BRW_WIDTH_4;
790 imm.hstride = BRW_HORIZONTAL_STRIDE_1;
791 imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
792 return imm;
793 }
794
795
796 static inline struct brw_reg
brw_address(struct brw_reg reg)797 brw_address(struct brw_reg reg)
798 {
799 return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
800 }
801
802 /** Construct float[1] general-purpose register */
803 static inline struct brw_reg
brw_vec1_grf(unsigned nr,unsigned subnr)804 brw_vec1_grf(unsigned nr, unsigned subnr)
805 {
806 return brw_vec1_reg(FIXED_GRF, nr, subnr);
807 }
808
809 static inline struct brw_reg
xe2_vec1_grf(unsigned nr,unsigned subnr)810 xe2_vec1_grf(unsigned nr, unsigned subnr)
811 {
812 return brw_vec1_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
813 }
814
815 /** Construct float[2] general-purpose register */
816 static inline struct brw_reg
brw_vec2_grf(unsigned nr,unsigned subnr)817 brw_vec2_grf(unsigned nr, unsigned subnr)
818 {
819 return brw_vec2_reg(FIXED_GRF, nr, subnr);
820 }
821
822 static inline struct brw_reg
xe2_vec2_grf(unsigned nr,unsigned subnr)823 xe2_vec2_grf(unsigned nr, unsigned subnr)
824 {
825 return brw_vec2_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
826 }
827
828 /** Construct float[4] general-purpose register */
829 static inline struct brw_reg
brw_vec4_grf(unsigned nr,unsigned subnr)830 brw_vec4_grf(unsigned nr, unsigned subnr)
831 {
832 return brw_vec4_reg(FIXED_GRF, nr, subnr);
833 }
834
835 static inline struct brw_reg
xe2_vec4_grf(unsigned nr,unsigned subnr)836 xe2_vec4_grf(unsigned nr, unsigned subnr)
837 {
838 return brw_vec4_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
839 }
840
841 /** Construct float[8] general-purpose register */
842 static inline struct brw_reg
brw_vec8_grf(unsigned nr,unsigned subnr)843 brw_vec8_grf(unsigned nr, unsigned subnr)
844 {
845 return brw_vec8_reg(FIXED_GRF, nr, subnr);
846 }
847
848 static inline struct brw_reg
xe2_vec8_grf(unsigned nr,unsigned subnr)849 xe2_vec8_grf(unsigned nr, unsigned subnr)
850 {
851 return brw_vec8_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
852 }
853
854 /** Construct float[16] general-purpose register */
855 static inline struct brw_reg
brw_vec16_grf(unsigned nr,unsigned subnr)856 brw_vec16_grf(unsigned nr, unsigned subnr)
857 {
858 return brw_vec16_reg(FIXED_GRF, nr, subnr);
859 }
860
861 static inline struct brw_reg
xe2_vec16_grf(unsigned nr,unsigned subnr)862 xe2_vec16_grf(unsigned nr, unsigned subnr)
863 {
864 return brw_vec16_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
865 }
866
867 static inline struct brw_reg
brw_vecn_grf(unsigned width,unsigned nr,unsigned subnr)868 brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
869 {
870 return brw_vecn_reg(width, FIXED_GRF, nr, subnr);
871 }
872
873 static inline struct brw_reg
xe2_vecn_grf(unsigned width,unsigned nr,unsigned subnr)874 xe2_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
875 {
876 return brw_vecn_reg(width, FIXED_GRF, nr + subnr / 8, subnr % 8);
877 }
878
879 static inline struct brw_reg
brw_uw1_grf(unsigned nr,unsigned subnr)880 brw_uw1_grf(unsigned nr, unsigned subnr)
881 {
882 return brw_uw1_reg(FIXED_GRF, nr, subnr);
883 }
884
885 static inline struct brw_reg
brw_uw8_grf(unsigned nr,unsigned subnr)886 brw_uw8_grf(unsigned nr, unsigned subnr)
887 {
888 return brw_uw8_reg(FIXED_GRF, nr, subnr);
889 }
890
891 static inline struct brw_reg
brw_uw16_grf(unsigned nr,unsigned subnr)892 brw_uw16_grf(unsigned nr, unsigned subnr)
893 {
894 return brw_uw16_reg(FIXED_GRF, nr, subnr);
895 }
896
897 static inline struct brw_reg
brw_ud8_grf(unsigned nr,unsigned subnr)898 brw_ud8_grf(unsigned nr, unsigned subnr)
899 {
900 return brw_ud8_reg(FIXED_GRF, nr, subnr);
901 }
902
903 static inline struct brw_reg
brw_ud1_grf(unsigned nr,unsigned subnr)904 brw_ud1_grf(unsigned nr, unsigned subnr)
905 {
906 return brw_ud1_reg(FIXED_GRF, nr, subnr);
907 }
908
909
910 /** Construct null register (usually used for setting condition codes) */
911 static inline struct brw_reg
brw_null_reg(void)912 brw_null_reg(void)
913 {
914 return brw_vec8_reg(ARF, BRW_ARF_NULL, 0);
915 }
916
917 static inline struct brw_reg
brw_null_vec(unsigned width)918 brw_null_vec(unsigned width)
919 {
920 return brw_vecn_reg(width, ARF, BRW_ARF_NULL, 0);
921 }
922
923 static inline struct brw_reg
brw_address_reg(unsigned subnr)924 brw_address_reg(unsigned subnr)
925 {
926 return brw_uw1_reg(ADDRESS, 0, subnr);
927 }
928
929 static inline struct brw_reg
brw_tdr_reg(void)930 brw_tdr_reg(void)
931 {
932 return brw_uw1_reg(ARF, BRW_ARF_TDR, 0);
933 }
934
935 /* If/else instructions break in align16 mode if writemask & swizzle
936 * aren't xyzw. This goes against the convention for other scalar
937 * regs:
938 */
939 static inline struct brw_reg
brw_ip_reg(void)940 brw_ip_reg(void)
941 {
942 return brw_make_reg(ARF,
943 BRW_ARF_IP,
944 0,
945 0,
946 0,
947 BRW_TYPE_UD,
948 BRW_VERTICAL_STRIDE_4, /* ? */
949 BRW_WIDTH_1,
950 BRW_HORIZONTAL_STRIDE_0,
951 BRW_SWIZZLE_XYZW, /* NOTE! */
952 WRITEMASK_XYZW); /* NOTE! */
953 }
954
955 static inline struct brw_reg
brw_notification_reg(void)956 brw_notification_reg(void)
957 {
958 return brw_make_reg(ARF,
959 BRW_ARF_NOTIFICATION_COUNT,
960 0,
961 0,
962 0,
963 BRW_TYPE_UD,
964 BRW_VERTICAL_STRIDE_0,
965 BRW_WIDTH_1,
966 BRW_HORIZONTAL_STRIDE_0,
967 BRW_SWIZZLE_XXXX,
968 WRITEMASK_X);
969 }
970
971 static inline struct brw_reg
brw_cr0_reg(unsigned subnr)972 brw_cr0_reg(unsigned subnr)
973 {
974 return brw_ud1_reg(ARF, BRW_ARF_CONTROL, subnr);
975 }
976
977 static inline struct brw_reg
brw_sr0_reg(unsigned subnr)978 brw_sr0_reg(unsigned subnr)
979 {
980 return brw_ud1_reg(ARF, BRW_ARF_STATE, subnr);
981 }
982
983 static inline struct brw_reg
brw_acc_reg(unsigned width)984 brw_acc_reg(unsigned width)
985 {
986 return brw_vecn_reg(width, ARF,
987 BRW_ARF_ACCUMULATOR, 0);
988 }
989
990 static inline struct brw_reg
brw_flag_reg(int reg,int subreg)991 brw_flag_reg(int reg, int subreg)
992 {
993 return brw_uw1_reg(ARF,
994 BRW_ARF_FLAG + reg, subreg);
995 }
996
997 static inline struct brw_reg
brw_flag_subreg(unsigned subreg)998 brw_flag_subreg(unsigned subreg)
999 {
1000 return brw_uw1_reg(ARF,
1001 BRW_ARF_FLAG + subreg / 2, subreg % 2);
1002 }
1003
1004 /**
1005 * Return the mask register present in Gfx4-5, or the related register present
1006 * in Gfx7.5 and later hardware referred to as "channel enable" register in
1007 * the documentation.
1008 */
1009 static inline struct brw_reg
brw_mask_reg(unsigned subnr)1010 brw_mask_reg(unsigned subnr)
1011 {
1012 return brw_uw1_reg(ARF, BRW_ARF_MASK, subnr);
1013 }
1014
1015 static inline struct brw_reg
brw_vmask_reg()1016 brw_vmask_reg()
1017 {
1018 return brw_sr0_reg(3);
1019 }
1020
1021 static inline struct brw_reg
brw_dmask_reg()1022 brw_dmask_reg()
1023 {
1024 return brw_sr0_reg(2);
1025 }
1026
1027 static inline struct brw_reg
brw_vgrf(unsigned nr,enum brw_reg_type type)1028 brw_vgrf(unsigned nr, enum brw_reg_type type)
1029 {
1030 struct brw_reg reg = {};
1031 reg.file = VGRF;
1032 reg.nr = nr;
1033 reg.type = type;
1034 reg.stride = 1;
1035 return reg;
1036 }
1037
1038 static inline struct brw_reg
brw_attr_reg(unsigned nr,enum brw_reg_type type)1039 brw_attr_reg(unsigned nr, enum brw_reg_type type)
1040 {
1041 struct brw_reg reg = {};
1042 reg.file = ATTR;
1043 reg.nr = nr;
1044 reg.type = type;
1045 reg.stride = 1;
1046 return reg;
1047 }
1048
1049 static inline struct brw_reg
brw_uniform_reg(unsigned nr,enum brw_reg_type type)1050 brw_uniform_reg(unsigned nr, enum brw_reg_type type)
1051 {
1052 struct brw_reg reg = {};
1053 reg.file = UNIFORM;
1054 reg.nr = nr;
1055 reg.type = type;
1056 reg.stride = 0;
1057 return reg;
1058 }
1059
1060 /* This is almost always called with a numeric constant argument, so
1061 * make things easy to evaluate at compile time:
1062 */
cvt(unsigned val)1063 static inline unsigned cvt(unsigned val)
1064 {
1065 switch (val) {
1066 case 0: return 0;
1067 case 1: return 1;
1068 case 2: return 2;
1069 case 4: return 3;
1070 case 8: return 4;
1071 case 16: return 5;
1072 case 32: return 6;
1073 }
1074 return 0;
1075 }
1076
1077 static inline struct brw_reg
stride(struct brw_reg reg,unsigned vstride,unsigned width,unsigned hstride)1078 stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
1079 {
1080 reg.vstride = cvt(vstride);
1081 reg.width = cvt(width) - 1;
1082 reg.hstride = cvt(hstride);
1083 return reg;
1084 }
1085
1086 /**
1087 * Multiply the vertical and horizontal stride of a register by the given
1088 * factor \a s.
1089 */
1090 static inline struct brw_reg
spread(struct brw_reg reg,unsigned s)1091 spread(struct brw_reg reg, unsigned s)
1092 {
1093 if (s) {
1094 assert(util_is_power_of_two_nonzero(s));
1095
1096 if (reg.hstride)
1097 reg.hstride += cvt(s) - 1;
1098
1099 if (reg.vstride)
1100 reg.vstride += cvt(s) - 1;
1101
1102 return reg;
1103 } else {
1104 return stride(reg, 0, 1, 0);
1105 }
1106 }
1107
1108 /**
1109 * Reinterpret each channel of register \p reg as a vector of values of the
1110 * given smaller type and take the i-th subcomponent from each.
1111 */
1112 static inline struct brw_reg
subscript(struct brw_reg reg,enum brw_reg_type type,unsigned i)1113 subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i)
1114 {
1115 assert((i + 1) * brw_type_size_bytes(type) <= brw_type_size_bytes(reg.type));
1116
1117 if (reg.file == ARF || reg.file == FIXED_GRF) {
1118 /* The stride is encoded inconsistently for fixed GRF and ARF registers
1119 * as the log2 of the actual vertical and horizontal strides.
1120 */
1121 const int delta = util_logbase2(brw_type_size_bytes(reg.type)) -
1122 util_logbase2(brw_type_size_bytes(type));
1123 reg.hstride += (reg.hstride ? delta : 0);
1124 reg.vstride += (reg.vstride ? delta : 0);
1125
1126 } else if (reg.file == IMM) {
1127 unsigned bit_size = brw_type_size_bits(type);
1128 reg.u64 >>= i * bit_size;
1129 reg.u64 &= BITFIELD64_MASK(bit_size);
1130 if (bit_size <= 16)
1131 reg.u64 |= reg.u64 << 16;
1132 return retype(reg, type);
1133 } else {
1134 reg.stride *= brw_type_size_bytes(reg.type) / brw_type_size_bytes(type);
1135 }
1136
1137 return byte_offset(retype(reg, type), i * brw_type_size_bytes(type));
1138 }
1139
1140 static inline struct brw_reg
vec16(struct brw_reg reg)1141 vec16(struct brw_reg reg)
1142 {
1143 return stride(reg, 16,16,1);
1144 }
1145
1146 static inline struct brw_reg
vec8(struct brw_reg reg)1147 vec8(struct brw_reg reg)
1148 {
1149 return stride(reg, 8,8,1);
1150 }
1151
1152 static inline struct brw_reg
vec4(struct brw_reg reg)1153 vec4(struct brw_reg reg)
1154 {
1155 return stride(reg, 4,4,1);
1156 }
1157
1158 static inline struct brw_reg
vec2(struct brw_reg reg)1159 vec2(struct brw_reg reg)
1160 {
1161 return stride(reg, 2,2,1);
1162 }
1163
1164 static inline struct brw_reg
vec1(struct brw_reg reg)1165 vec1(struct brw_reg reg)
1166 {
1167 return stride(reg, 0,1,0);
1168 }
1169
1170
1171 static inline struct brw_reg
get_element(struct brw_reg reg,unsigned elt)1172 get_element(struct brw_reg reg, unsigned elt)
1173 {
1174 return vec1(suboffset(reg, elt));
1175 }
1176
1177 static inline struct brw_reg
get_element_ud(struct brw_reg reg,unsigned elt)1178 get_element_ud(struct brw_reg reg, unsigned elt)
1179 {
1180 return vec1(suboffset(retype(reg, BRW_TYPE_UD), elt));
1181 }
1182
1183 static inline struct brw_reg
get_element_d(struct brw_reg reg,unsigned elt)1184 get_element_d(struct brw_reg reg, unsigned elt)
1185 {
1186 return vec1(suboffset(retype(reg, BRW_TYPE_D), elt));
1187 }
1188
1189 static inline struct brw_reg
brw_swizzle(struct brw_reg reg,unsigned swz)1190 brw_swizzle(struct brw_reg reg, unsigned swz)
1191 {
1192 if (reg.file == IMM)
1193 reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz);
1194 else
1195 reg.swizzle = brw_compose_swizzle(swz, reg.swizzle);
1196
1197 return reg;
1198 }
1199
1200 static inline struct brw_reg
brw_writemask(struct brw_reg reg,unsigned mask)1201 brw_writemask(struct brw_reg reg, unsigned mask)
1202 {
1203 assert(reg.file != IMM);
1204 reg.writemask &= mask;
1205 return reg;
1206 }
1207
1208 static inline struct brw_reg
brw_set_writemask(struct brw_reg reg,unsigned mask)1209 brw_set_writemask(struct brw_reg reg, unsigned mask)
1210 {
1211 assert(reg.file != IMM);
1212 reg.writemask = mask;
1213 return reg;
1214 }
1215
1216 static inline unsigned
brw_writemask_for_size(unsigned n)1217 brw_writemask_for_size(unsigned n)
1218 {
1219 return (1 << n) - 1;
1220 }
1221
1222 static inline unsigned
brw_writemask_for_component_packing(unsigned n,unsigned first_component)1223 brw_writemask_for_component_packing(unsigned n, unsigned first_component)
1224 {
1225 assert(first_component + n <= 4);
1226 return (((1 << n) - 1) << first_component);
1227 }
1228
1229 static inline struct brw_reg
negate(struct brw_reg reg)1230 negate(struct brw_reg reg)
1231 {
1232 reg.negate ^= 1;
1233 return reg;
1234 }
1235
1236 static inline struct brw_reg
brw_abs(struct brw_reg reg)1237 brw_abs(struct brw_reg reg)
1238 {
1239 reg.abs = 1;
1240 reg.negate = 0;
1241 return reg;
1242 }
1243
1244 /************************************************************************/
1245
1246 static inline struct brw_reg
brw_vec1_indirect(unsigned subnr,int offset)1247 brw_vec1_indirect(unsigned subnr, int offset)
1248 {
1249 struct brw_reg reg = brw_vec1_grf(0, 0);
1250 reg.subnr = subnr;
1251 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1252 reg.indirect_offset = offset;
1253 return reg;
1254 }
1255
1256 static inline struct brw_reg
brw_VxH_indirect(unsigned subnr,int offset)1257 brw_VxH_indirect(unsigned subnr, int offset)
1258 {
1259 struct brw_reg reg = brw_vec1_grf(0, 0);
1260 reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
1261 reg.subnr = subnr;
1262 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1263 reg.indirect_offset = offset;
1264 return reg;
1265 }
1266
1267 static inline bool
region_matches(struct brw_reg reg,enum brw_vertical_stride v,enum brw_width w,enum brw_horizontal_stride h)1268 region_matches(struct brw_reg reg, enum brw_vertical_stride v,
1269 enum brw_width w, enum brw_horizontal_stride h)
1270 {
1271 return reg.vstride == v &&
1272 reg.width == w &&
1273 reg.hstride == h;
1274 }
1275
1276 #define has_scalar_region(reg) \
1277 region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
1278 BRW_HORIZONTAL_STRIDE_0)
1279
1280 /**
1281 * Return the size in bytes per data element of register \p reg on the
1282 * corresponding register file.
1283 */
1284 static inline unsigned
element_sz(struct brw_reg reg)1285 element_sz(struct brw_reg reg)
1286 {
1287 if (reg.file == IMM || has_scalar_region(reg)) {
1288 return brw_type_size_bytes(reg.type);
1289
1290 } else if (reg.width == BRW_WIDTH_1 &&
1291 reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
1292 assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
1293 return brw_type_size_bytes(reg.type) << (reg.vstride - 1);
1294
1295 } else {
1296 assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
1297 assert(reg.vstride == reg.hstride + reg.width);
1298 return brw_type_size_bytes(reg.type) << (reg.hstride - 1);
1299 }
1300 }
1301
1302 /* brw_packed_float.c */
1303 int brw_float_to_vf(float f);
1304 float brw_vf_to_float(unsigned char vf);
1305
1306 bool brw_reg_saturate_immediate(brw_reg *reg);
1307 bool brw_reg_negate_immediate(brw_reg *reg);
1308 bool brw_reg_abs_immediate(brw_reg *reg);
1309
1310 #ifdef __cplusplus
1311 }
1312 #endif
1313
1314 #ifdef __cplusplus
1315
1316 static inline brw_reg
horiz_offset(const brw_reg & reg,unsigned delta)1317 horiz_offset(const brw_reg ®, unsigned delta)
1318 {
1319 switch (reg.file) {
1320 case BAD_FILE:
1321 case UNIFORM:
1322 case IMM:
1323 /* These only have a single component that is implicitly splatted. A
1324 * horizontal offset should be a harmless no-op.
1325 * XXX - Handle vector immediates correctly.
1326 */
1327 return reg;
1328 case VGRF:
1329 case ATTR:
1330 return byte_offset(reg, delta * reg.stride * brw_type_size_bytes(reg.type));
1331 case ADDRESS:
1332 case ARF:
1333 case FIXED_GRF:
1334 if (reg.is_null()) {
1335 return reg;
1336 } else {
1337 const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1338 const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1339 const unsigned width = 1 << reg.width;
1340
1341 if (delta % width == 0) {
1342 return byte_offset(reg, delta / width * vstride * brw_type_size_bytes(reg.type));
1343 } else {
1344 assert(vstride == hstride * width);
1345 return byte_offset(reg, delta * hstride * brw_type_size_bytes(reg.type));
1346 }
1347 }
1348 }
1349 unreachable("Invalid register file");
1350 }
1351
1352 static inline brw_reg
offset(brw_reg reg,unsigned width,unsigned delta)1353 offset(brw_reg reg, unsigned width, unsigned delta)
1354 {
1355 switch (reg.file) {
1356 case BAD_FILE:
1357 break;
1358 case ADDRESS:
1359 case ARF:
1360 case FIXED_GRF:
1361 case VGRF:
1362 case ATTR:
1363 case UNIFORM:
1364 return byte_offset(reg, delta * reg.component_size(width));
1365 case IMM:
1366 assert(delta == 0);
1367 }
1368 return reg;
1369 }
1370
1371 /**
1372 * Get the scalar channel of \p reg given by \p idx and replicate it to all
1373 * channels of the result.
1374 */
1375 static inline brw_reg
component(brw_reg reg,unsigned idx)1376 component(brw_reg reg, unsigned idx)
1377 {
1378 reg = horiz_offset(reg, idx);
1379 reg.stride = 0;
1380 if (reg.file == ARF || reg.file == FIXED_GRF) {
1381 reg.vstride = BRW_VERTICAL_STRIDE_0;
1382 reg.width = BRW_WIDTH_1;
1383 reg.hstride = BRW_HORIZONTAL_STRIDE_0;
1384 }
1385 return reg;
1386 }
1387
1388 /**
1389 * Return an integer identifying the discrete address space a register is
1390 * contained in. A register is by definition fully contained in the single
1391 * reg_space it belongs to, so two registers with different reg_space ids are
1392 * guaranteed not to overlap. Most register files are a single reg_space of
1393 * its own, only the VGRF and ATTR files are composed of multiple discrete
1394 * address spaces, one for each allocation and input attribute respectively.
1395 */
1396 static inline uint32_t
reg_space(const brw_reg & r)1397 reg_space(const brw_reg &r)
1398 {
1399 return r.file << 16 | (r.file == VGRF || r.file == ATTR ? r.nr : 0);
1400 }
1401
1402 /**
1403 * Return the base offset in bytes of a register relative to the start of its
1404 * reg_space().
1405 */
1406 static inline unsigned
reg_offset(const brw_reg & r)1407 reg_offset(const brw_reg &r)
1408 {
1409 return (r.file == ADDRESS || r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) *
1410 (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
1411 (r.file == ADDRESS || r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
1412 }
1413
1414 /**
1415 * Return the amount of padding in bytes left unused between individual
1416 * components of register \p r due to a (horizontal) stride value greater than
1417 * one, or zero if components are tightly packed in the register file.
1418 */
1419 static inline unsigned
reg_padding(const brw_reg & r)1420 reg_padding(const brw_reg &r)
1421 {
1422 const unsigned stride = ((r.file != ADDRESS &&
1423 r.file != ARF &&
1424 r.file != FIXED_GRF) ? r.stride :
1425 r.hstride == 0 ? 0 :
1426 1 << (r.hstride - 1));
1427 return (MAX2(1, stride) - 1) * brw_type_size_bytes(r.type);
1428 }
1429
1430 /**
1431 * Return whether the register region starting at \p r and spanning \p dr
1432 * bytes could potentially overlap the register region starting at \p s and
1433 * spanning \p ds bytes.
1434 */
1435 static inline bool
regions_overlap(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1436 regions_overlap(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1437 {
1438 if (r.file != s.file)
1439 return false;
1440
1441 if (r.file == VGRF) {
1442 return r.nr == s.nr &&
1443 !(r.offset + dr <= s.offset || s.offset + ds <= r.offset);
1444 } else {
1445 return !(reg_offset(r) + dr <= reg_offset(s) ||
1446 reg_offset(s) + ds <= reg_offset(r));
1447 }
1448 }
1449
1450 /**
1451 * Check that the register region given by r [r.offset, r.offset + dr[
1452 * is fully contained inside the register region given by s
1453 * [s.offset, s.offset + ds[.
1454 */
1455 static inline bool
region_contained_in(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1456 region_contained_in(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1457 {
1458 return reg_space(r) == reg_space(s) &&
1459 reg_offset(r) >= reg_offset(s) &&
1460 reg_offset(r) + dr <= reg_offset(s) + ds;
1461 }
1462
1463 /**
1464 * Return whether the given register region is n-periodic, i.e. whether the
1465 * original region remains invariant after shifting it by \p n scalar
1466 * channels.
1467 */
1468 static inline bool
is_periodic(const brw_reg & reg,unsigned n)1469 is_periodic(const brw_reg ®, unsigned n)
1470 {
1471 if (reg.file == BAD_FILE || reg.is_null()) {
1472 return true;
1473
1474 } else if (reg.file == IMM) {
1475 const unsigned period = (reg.type == BRW_TYPE_UV ||
1476 reg.type == BRW_TYPE_V ? 8 :
1477 reg.type == BRW_TYPE_VF ? 4 :
1478 1);
1479 return n % period == 0;
1480
1481 } else if (reg.file == ADDRESS || reg.file == ARF || reg.file == FIXED_GRF) {
1482 const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
1483 reg.vstride == 0 ? 1 << reg.width :
1484 ~0);
1485 return n % period == 0;
1486
1487 } else {
1488 return reg.stride == 0;
1489 }
1490 }
1491
1492 static inline bool
is_uniform(const brw_reg & reg)1493 is_uniform(const brw_reg ®)
1494 {
1495 return is_periodic(reg, 1);
1496 }
1497
1498 /**
1499 * Get the specified 8-component quarter of a register.
1500 */
1501 static inline brw_reg
quarter(const brw_reg & reg,unsigned idx)1502 quarter(const brw_reg ®, unsigned idx)
1503 {
1504 assert(idx < 4);
1505 return horiz_offset(reg, 8 * idx);
1506 }
1507
1508 static inline brw_reg
horiz_stride(brw_reg reg,unsigned s)1509 horiz_stride(brw_reg reg, unsigned s)
1510 {
1511 reg.stride *= s;
1512 return reg;
1513 }
1514
1515 static const brw_reg reg_undef;
1516
1517 /*
1518 * Return the stride between channels of the specified register in
1519 * byte units, or ~0u if the region cannot be represented with a
1520 * single one-dimensional stride.
1521 */
1522 static inline unsigned
byte_stride(const brw_reg & reg)1523 byte_stride(const brw_reg ®)
1524 {
1525 switch (reg.file) {
1526 case BAD_FILE:
1527 case UNIFORM:
1528 case IMM:
1529 case VGRF:
1530 case ATTR:
1531 return reg.stride * brw_type_size_bytes(reg.type);
1532 case ADDRESS:
1533 case ARF:
1534 case FIXED_GRF:
1535 if (reg.is_null()) {
1536 return 0;
1537 } else {
1538 const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1539 const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1540 const unsigned width = 1 << reg.width;
1541
1542 if (width == 1) {
1543 return vstride * brw_type_size_bytes(reg.type);
1544 } else if (hstride * width == vstride) {
1545 return hstride * brw_type_size_bytes(reg.type);
1546 } else {
1547 return ~0u;
1548 }
1549 }
1550 default:
1551 unreachable("Invalid register file");
1552 }
1553 }
1554
1555 #endif /* __cplusplus */
1556