1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 /** @file
33 *
34 * This file defines struct brw_reg, which is our representation for EU
35 * registers. They're not a hardware specific format, just an abstraction
36 * that intends to capture the full flexibility of the hardware registers.
37 *
38 * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
39 * the abstract brw_reg type into the actual hardware instruction encoding.
40 */
41
42 #pragma once
43
44 #include <stdbool.h>
45 #include "util/compiler.h"
46 #include "util/glheader.h"
47 #include "util/macros.h"
48 #include "util/rounding.h"
49 #include "util/u_math.h"
50 #include "brw_eu_defines.h"
51 #include "brw_reg_type.h"
52
53 #ifdef __cplusplus
54 extern "C" {
55 #endif
56
57 struct intel_device_info;
58
59 /** Size of general purpose register space in REG_SIZE units */
60 #define BRW_MAX_GRF 128
61 #define XE2_MAX_GRF 256
62
63 /**
64 * BRW hardware swizzles.
65 * Only defines XYZW to ensure it can be contained in 2 bits
66 */
67 #define BRW_SWIZZLE_X 0
68 #define BRW_SWIZZLE_Y 1
69 #define BRW_SWIZZLE_Z 2
70 #define BRW_SWIZZLE_W 3
71
72 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
73 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
74
75 #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
76 #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
77 #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
78 #define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
79 #define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
80 #define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
81 #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
82 #define BRW_SWIZZLE_YXYX BRW_SWIZZLE4(1,0,1,0)
83 #define BRW_SWIZZLE_XZXZ BRW_SWIZZLE4(0,2,0,2)
84 #define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3)
85 #define BRW_SWIZZLE_YWYW BRW_SWIZZLE4(1,3,1,3)
86 #define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3)
87 #define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3)
88 #define BRW_SWIZZLE_WZWZ BRW_SWIZZLE4(3,2,3,2)
89 #define BRW_SWIZZLE_WZYX BRW_SWIZZLE4(3,2,1,0)
90 #define BRW_SWIZZLE_XXZZ BRW_SWIZZLE4(0,0,2,2)
91 #define BRW_SWIZZLE_YYWW BRW_SWIZZLE4(1,1,3,3)
92 #define BRW_SWIZZLE_YXWZ BRW_SWIZZLE4(1,0,3,2)
93
94 #define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2))
95 #define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2))
96
97 static inline bool
brw_is_single_value_swizzle(unsigned swiz)98 brw_is_single_value_swizzle(unsigned swiz)
99 {
100 return (swiz == BRW_SWIZZLE_XXXX ||
101 swiz == BRW_SWIZZLE_YYYY ||
102 swiz == BRW_SWIZZLE_ZZZZ ||
103 swiz == BRW_SWIZZLE_WWWW);
104 }
105
106 /**
107 * Compute the swizzle obtained from the application of \p swz0 on the result
108 * of \p swz1. The argument ordering is expected to match function
109 * composition.
110 */
111 static inline unsigned
brw_compose_swizzle(unsigned swz0,unsigned swz1)112 brw_compose_swizzle(unsigned swz0, unsigned swz1)
113 {
114 return BRW_SWIZZLE4(
115 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
116 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
117 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
118 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
119 }
120
121 /**
122 * Construct an identity swizzle for the set of enabled channels given by \p
123 * mask. The result will only reference channels enabled in the provided \p
124 * mask, assuming that \p mask is non-zero. The constructed swizzle will
125 * satisfy the property that for any instruction OP and any mask:
126 *
127 * brw_OP(p, brw_writemask(dst, mask),
128 * brw_swizzle(src, brw_swizzle_for_mask(mask)));
129 *
130 * will be equivalent to the same instruction without swizzle:
131 *
132 * brw_OP(p, brw_writemask(dst, mask), src);
133 */
134 static inline unsigned
brw_swizzle_for_mask(unsigned mask)135 brw_swizzle_for_mask(unsigned mask)
136 {
137 unsigned last = (mask ? ffs(mask) - 1 : 0);
138 unsigned swz[4];
139
140 for (unsigned i = 0; i < 4; i++)
141 last = swz[i] = (mask & (1 << i) ? i : last);
142
143 return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
144 }
145
146 uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz);
147
148 #define REG_SIZE (8*4)
149
150 /* These aren't hardware structs, just something useful for us to pass around:
151 *
152 * Align1 operation has a lot of control over input ranges. Used in
153 * WM programs to implement shaders decomposed into "channel serial"
154 * or "structure of array" form:
155 */
156 typedef struct brw_reg {
157 union {
158 struct {
159 enum brw_reg_type type:5;
160 enum brw_reg_file file:3;
161 unsigned negate:1; /* source only */
162 unsigned abs:1; /* source only */
163 unsigned address_mode:1; /* relative addressing, hopefully! */
164 unsigned pad0:16;
165 unsigned subnr:5; /* :1 in align16 */
166 };
167 uint32_t bits;
168 };
169
170 /** Offset from the start of the virtual register in bytes. */
171 uint16_t offset;
172
173 /** Register region horizontal stride of virtual registers */
174 uint8_t stride;
175
176 union {
177 struct {
178 unsigned nr;
179 unsigned swizzle:8; /* src only, align16 only */
180 unsigned writemask:4; /* dest only, align16 only */
181 int indirect_offset:10; /* relative addressing offset */
182 unsigned vstride:4; /* source only */
183 unsigned width:3; /* src only, align1 only */
184 unsigned hstride:2; /* align1 only */
185
186 /**
187 * Does this register represent a scalar value?
188 *
189 * Registers are allocated in SIMD8 parcels, but may be used to
190 * represent convergent (i.e., scalar) values. As a destination, it
191 * is written as SIMD8. As a source, it may be read as <8,8,1> in
192 * SIMD8 instructions or <0,1,0> on other execution sizes.
193 *
194 * If the value represents a vector (e.g., a convergent load_uniform
195 * of a vec4), it will be stored as multiple SIMD8 registers.
196 */
197 unsigned is_scalar:1;
198 };
199
200 double df;
201 uint64_t u64;
202 int64_t d64;
203 float f;
204 int d;
205 unsigned ud;
206 };
207
208 #ifdef __cplusplus
209 /* TODO: Remove this constructor to make this type a POD. Need
210 * to make sure that rest of compiler doesn't rely on type or
211 * stride of BAD_FILE registers.
212 */
brw_regbrw_reg213 brw_reg() {
214 memset((void*)this, 0, sizeof(*this));
215 this->type = BRW_TYPE_UD;
216 this->stride = 1;
217 this->file = BAD_FILE;
218 }
219
220 bool equals(const brw_reg &r) const;
221 bool negative_equals(const brw_reg &r) const;
222 bool is_contiguous() const;
223
224 bool is_zero() const;
225 bool is_one() const;
226 bool is_negative_one() const;
227 bool is_null() const;
228 bool is_accumulator() const;
229 bool is_address() const;
230
231 unsigned address_slot(unsigned byte_offset) const;
232
233 /**
234 * Return the size in bytes of a single logical component of the
235 * register assuming the given execution width.
236 */
237 unsigned component_size(unsigned width) const;
238 #endif /* __cplusplus */
239 } brw_reg;
240
241 static inline unsigned
phys_file(const struct brw_reg reg)242 phys_file(const struct brw_reg reg)
243 {
244 switch (reg.file) {
245 case ARF:
246 case FIXED_GRF:
247 case IMM:
248 return reg.file;
249
250 case ADDRESS:
251 return ARF;
252
253 default:
254 unreachable("register type should have been lowered");
255 }
256 }
257
258 static inline unsigned
phys_nr(const struct intel_device_info * devinfo,const struct brw_reg reg)259 phys_nr(const struct intel_device_info *devinfo, const struct brw_reg reg)
260 {
261 if (devinfo->ver >= 20) {
262 if (reg.file == FIXED_GRF)
263 return reg.nr / 2;
264 else if (reg.file == ADDRESS)
265 return BRW_ARF_ADDRESS;
266 else if (reg.file == ARF &&
267 reg.nr >= BRW_ARF_ACCUMULATOR &&
268 reg.nr < BRW_ARF_FLAG)
269 return BRW_ARF_ACCUMULATOR + (reg.nr - BRW_ARF_ACCUMULATOR) / 2;
270 else
271 return reg.nr;
272 } else if (reg.file == ADDRESS) {
273 return BRW_ARF_ADDRESS;
274 } else {
275 return reg.nr;
276 }
277 }
278
279 static inline unsigned
phys_subnr(const struct intel_device_info * devinfo,const struct brw_reg reg)280 phys_subnr(const struct intel_device_info *devinfo, const struct brw_reg reg)
281 {
282 if (devinfo->ver >= 20) {
283 if (reg.file == FIXED_GRF ||
284 (reg.file == ARF &&
285 reg.nr >= BRW_ARF_ACCUMULATOR &&
286 reg.nr < BRW_ARF_FLAG))
287 return (reg.nr & 1) * REG_SIZE + reg.subnr;
288 else
289 return reg.subnr;
290 } else {
291 return reg.subnr;
292 }
293 }
294
295 static inline bool
brw_regs_equal(const struct brw_reg * a,const struct brw_reg * b)296 brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b)
297 {
298 return a->bits == b->bits &&
299 a->u64 == b->u64 &&
300 a->offset == b->offset &&
301 a->stride == b->stride;
302 }
303
304 static inline bool
brw_regs_negative_equal(const struct brw_reg * a,const struct brw_reg * b)305 brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b)
306 {
307 if (a->file == IMM) {
308 if (a->bits != b->bits)
309 return false;
310
311 switch ((enum brw_reg_type) a->type) {
312 case BRW_TYPE_UQ:
313 case BRW_TYPE_Q:
314 return a->d64 == -b->d64;
315 case BRW_TYPE_DF:
316 return a->df == -b->df;
317 case BRW_TYPE_UD:
318 case BRW_TYPE_D:
319 return a->d == -b->d;
320 case BRW_TYPE_F:
321 return a->f == -b->f;
322 case BRW_TYPE_VF:
323 /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation
324 * of -0). There are occasions where 0 or -0 is used and the exact
325 * bit pattern is desired. At the very least, changing this to allow
326 * 0 as a negation of 0 causes some fp64 tests to fail on IVB.
327 */
328 return a->ud == (b->ud ^ 0x80808080);
329 case BRW_TYPE_UW:
330 case BRW_TYPE_W:
331 case BRW_TYPE_UV:
332 case BRW_TYPE_V:
333 case BRW_TYPE_HF:
334 /* FINISHME: Implement support for these types once there is
335 * something in the compiler that can generate them. Until then,
336 * they cannot be tested.
337 */
338 return false;
339 case BRW_TYPE_UB:
340 case BRW_TYPE_B:
341 default:
342 unreachable("not reached");
343 }
344 } else {
345 struct brw_reg tmp = *a;
346
347 tmp.negate = !tmp.negate;
348
349 return brw_regs_equal(&tmp, b);
350 }
351 }
352
353 static inline enum brw_reg_type
get_exec_type(const enum brw_reg_type type)354 get_exec_type(const enum brw_reg_type type)
355 {
356 switch (type) {
357 case BRW_TYPE_B:
358 case BRW_TYPE_V:
359 return BRW_TYPE_W;
360 case BRW_TYPE_UB:
361 case BRW_TYPE_UV:
362 return BRW_TYPE_UW;
363 case BRW_TYPE_VF:
364 return BRW_TYPE_F;
365 default:
366 return type;
367 }
368 }
369
370 /**
371 * Return an integer type of the requested size and signedness.
372 */
373 static inline enum brw_reg_type
brw_int_type(unsigned sz,bool is_signed)374 brw_int_type(unsigned sz, bool is_signed)
375 {
376 switch (sz) {
377 case 1:
378 return (is_signed ? BRW_TYPE_B : BRW_TYPE_UB);
379 case 2:
380 return (is_signed ? BRW_TYPE_W : BRW_TYPE_UW);
381 case 4:
382 return (is_signed ? BRW_TYPE_D : BRW_TYPE_UD);
383 case 8:
384 return (is_signed ? BRW_TYPE_Q : BRW_TYPE_UQ);
385 default:
386 unreachable("Not reached.");
387 }
388 }
389
390 /**
391 * Construct a brw_reg.
392 * \param file one of the BRW_x_REGISTER_FILE values
393 * \param nr register number/index
394 * \param subnr register sub number
395 * \param negate register negate modifier
396 * \param abs register abs modifier
397 * \param type one of BRW_TYPE_x
398 * \param vstride one of BRW_VERTICAL_STRIDE_x
399 * \param width one of BRW_WIDTH_x
400 * \param hstride one of BRW_HORIZONTAL_STRIDE_x
401 * \param swizzle one of BRW_SWIZZLE_x
402 * \param writemask WRITEMASK_X/Y/Z/W bitfield
403 */
404 static inline struct brw_reg
brw_make_reg(enum brw_reg_file file,unsigned nr,unsigned subnr,unsigned negate,unsigned abs,enum brw_reg_type type,unsigned vstride,unsigned width,unsigned hstride,unsigned swizzle,unsigned writemask)405 brw_make_reg(enum brw_reg_file file,
406 unsigned nr,
407 unsigned subnr,
408 unsigned negate,
409 unsigned abs,
410 enum brw_reg_type type,
411 unsigned vstride,
412 unsigned width,
413 unsigned hstride,
414 unsigned swizzle,
415 unsigned writemask)
416 {
417 struct brw_reg reg;
418 if (file == FIXED_GRF)
419 assert(nr < XE2_MAX_GRF);
420 else if (file == ARF)
421 assert(nr <= BRW_ARF_TIMESTAMP);
422
423 reg.type = type;
424 reg.file = file;
425 reg.negate = negate;
426 reg.abs = abs;
427 reg.address_mode = BRW_ADDRESS_DIRECT;
428 reg.pad0 = 0;
429 reg.subnr = subnr * brw_type_size_bytes(type);
430 reg.nr = nr;
431
432 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
433 * set swizzle and writemask to W, as the lower bits of subnr will
434 * be lost when converted to align16. This is probably too much to
435 * keep track of as you'd want it adjusted by suboffset(), etc.
436 * Perhaps fix up when converting to align16?
437 */
438 reg.swizzle = swizzle;
439 reg.writemask = writemask;
440 reg.indirect_offset = 0;
441 reg.vstride = vstride;
442 reg.width = width;
443 reg.hstride = hstride;
444 reg.is_scalar = 0;
445
446 reg.offset = 0;
447 reg.stride = 1;
448 if (file == IMM &&
449 type != BRW_TYPE_V &&
450 type != BRW_TYPE_UV &&
451 type != BRW_TYPE_VF) {
452 reg.stride = 0;
453 }
454
455 return reg;
456 }
457
458 /** Construct float[16] register */
459 static inline struct brw_reg
brw_vec16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)460 brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
461 {
462 return brw_make_reg(file,
463 nr,
464 subnr,
465 0,
466 0,
467 BRW_TYPE_F,
468 BRW_VERTICAL_STRIDE_16,
469 BRW_WIDTH_16,
470 BRW_HORIZONTAL_STRIDE_1,
471 BRW_SWIZZLE_XYZW,
472 WRITEMASK_XYZW);
473 }
474
475 /** Construct float[8] register */
476 static inline struct brw_reg
brw_vec8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)477 brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
478 {
479 return brw_make_reg(file,
480 nr,
481 subnr,
482 0,
483 0,
484 BRW_TYPE_F,
485 BRW_VERTICAL_STRIDE_8,
486 BRW_WIDTH_8,
487 BRW_HORIZONTAL_STRIDE_1,
488 BRW_SWIZZLE_XYZW,
489 WRITEMASK_XYZW);
490 }
491
492 /** Construct float[4] register */
493 static inline struct brw_reg
brw_vec4_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)494 brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
495 {
496 return brw_make_reg(file,
497 nr,
498 subnr,
499 0,
500 0,
501 BRW_TYPE_F,
502 BRW_VERTICAL_STRIDE_4,
503 BRW_WIDTH_4,
504 BRW_HORIZONTAL_STRIDE_1,
505 BRW_SWIZZLE_XYZW,
506 WRITEMASK_XYZW);
507 }
508
509 /** Construct float[2] register */
510 static inline struct brw_reg
brw_vec2_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)511 brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
512 {
513 return brw_make_reg(file,
514 nr,
515 subnr,
516 0,
517 0,
518 BRW_TYPE_F,
519 BRW_VERTICAL_STRIDE_2,
520 BRW_WIDTH_2,
521 BRW_HORIZONTAL_STRIDE_1,
522 BRW_SWIZZLE_XYXY,
523 WRITEMASK_XY);
524 }
525
526 /** Construct float[1] register */
527 static inline struct brw_reg
brw_vec1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)528 brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
529 {
530 return brw_make_reg(file,
531 nr,
532 subnr,
533 0,
534 0,
535 BRW_TYPE_F,
536 BRW_VERTICAL_STRIDE_0,
537 BRW_WIDTH_1,
538 BRW_HORIZONTAL_STRIDE_0,
539 BRW_SWIZZLE_XXXX,
540 WRITEMASK_X);
541 }
542
543 static inline struct brw_reg
brw_vecn_reg(unsigned width,enum brw_reg_file file,unsigned nr,unsigned subnr)544 brw_vecn_reg(unsigned width, enum brw_reg_file file,
545 unsigned nr, unsigned subnr)
546 {
547 switch (width) {
548 case 1:
549 return brw_vec1_reg(file, nr, subnr);
550 case 2:
551 return brw_vec2_reg(file, nr, subnr);
552 case 4:
553 return brw_vec4_reg(file, nr, subnr);
554 case 8:
555 return brw_vec8_reg(file, nr, subnr);
556 case 16:
557 return brw_vec16_reg(file, nr, subnr);
558 default:
559 unreachable("Invalid register width");
560 }
561 }
562
563 static inline struct brw_reg
retype(struct brw_reg reg,enum brw_reg_type type)564 retype(struct brw_reg reg, enum brw_reg_type type)
565 {
566 reg.type = type;
567 return reg;
568 }
569
570 static inline struct brw_reg
firsthalf(struct brw_reg reg)571 firsthalf(struct brw_reg reg)
572 {
573 return reg;
574 }
575
576 static inline struct brw_reg
sechalf(struct brw_reg reg)577 sechalf(struct brw_reg reg)
578 {
579 if (reg.vstride)
580 reg.nr++;
581 return reg;
582 }
583
584 static inline struct brw_reg
offset(struct brw_reg reg,unsigned delta)585 offset(struct brw_reg reg, unsigned delta)
586 {
587 reg.nr += delta;
588 return reg;
589 }
590
591
592 static inline struct brw_reg
byte_offset(struct brw_reg reg,unsigned bytes)593 byte_offset(struct brw_reg reg, unsigned bytes)
594 {
595 switch (reg.file) {
596 case BAD_FILE:
597 break;
598 case VGRF:
599 case ATTR:
600 case UNIFORM:
601 reg.offset += bytes;
602 break;
603 case ADDRESS:
604 case ARF:
605 case FIXED_GRF: {
606 const unsigned suboffset = reg.subnr + bytes;
607 reg.nr += suboffset / REG_SIZE;
608 reg.subnr = suboffset % REG_SIZE;
609 break;
610 }
611 case IMM:
612 default:
613 assert(bytes == 0);
614 }
615 return reg;
616 }
617
618 static inline struct brw_reg
suboffset(struct brw_reg reg,unsigned delta)619 suboffset(struct brw_reg reg, unsigned delta)
620 {
621 return byte_offset(reg, delta * brw_type_size_bytes(reg.type));
622 }
623
624 /** Construct unsigned word[16] register */
625 static inline struct brw_reg
brw_uw16_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)626 brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
627 {
628 return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_TYPE_UW), subnr);
629 }
630
631 /** Construct unsigned word[8] register */
632 static inline struct brw_reg
brw_uw8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)633 brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
634 {
635 return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_TYPE_UW), subnr);
636 }
637
638 /** Construct unsigned word[1] register */
639 static inline struct brw_reg
brw_uw1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)640 brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
641 {
642 return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_TYPE_UW), subnr);
643 }
644
645 static inline struct brw_reg
brw_ud8_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)646 brw_ud8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
647 {
648 return retype(brw_vec8_reg(file, nr, subnr), BRW_TYPE_UD);
649 }
650
651 static inline struct brw_reg
brw_ud1_reg(enum brw_reg_file file,unsigned nr,unsigned subnr)652 brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
653 {
654 return retype(brw_vec1_reg(file, nr, subnr), BRW_TYPE_UD);
655 }
656
657 static inline struct brw_reg
brw_imm_reg(enum brw_reg_type type)658 brw_imm_reg(enum brw_reg_type type)
659 {
660 return brw_make_reg(IMM,
661 0,
662 0,
663 0,
664 0,
665 type,
666 BRW_VERTICAL_STRIDE_0,
667 BRW_WIDTH_1,
668 BRW_HORIZONTAL_STRIDE_0,
669 0,
670 0);
671 }
672
673 /** Construct float immediate register */
674 static inline struct brw_reg
brw_imm_df(double df)675 brw_imm_df(double df)
676 {
677 struct brw_reg imm = brw_imm_reg(BRW_TYPE_DF);
678 imm.df = df;
679 return imm;
680 }
681
682 static inline struct brw_reg
brw_imm_u64(uint64_t u64)683 brw_imm_u64(uint64_t u64)
684 {
685 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
686 imm.u64 = u64;
687 return imm;
688 }
689
690 static inline struct brw_reg
brw_imm_f(float f)691 brw_imm_f(float f)
692 {
693 struct brw_reg imm = brw_imm_reg(BRW_TYPE_F);
694 imm.f = f;
695 return imm;
696 }
697
698 /** Construct int64_t immediate register */
699 static inline struct brw_reg
brw_imm_q(int64_t q)700 brw_imm_q(int64_t q)
701 {
702 struct brw_reg imm = brw_imm_reg(BRW_TYPE_Q);
703 imm.d64 = q;
704 return imm;
705 }
706
707 /** Construct int64_t immediate register */
708 static inline struct brw_reg
brw_imm_uq(uint64_t uq)709 brw_imm_uq(uint64_t uq)
710 {
711 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UQ);
712 imm.u64 = uq;
713 return imm;
714 }
715
716 /** Construct integer immediate register */
717 static inline struct brw_reg
brw_imm_d(int d)718 brw_imm_d(int d)
719 {
720 struct brw_reg imm = brw_imm_reg(BRW_TYPE_D);
721 imm.d = d;
722 return imm;
723 }
724
725 /** Construct uint immediate register */
726 static inline struct brw_reg
brw_imm_ud(unsigned ud)727 brw_imm_ud(unsigned ud)
728 {
729 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UD);
730 imm.ud = ud;
731 return imm;
732 }
733
734 /** Construct ushort immediate register */
735 static inline struct brw_reg
brw_imm_uw(uint16_t uw)736 brw_imm_uw(uint16_t uw)
737 {
738 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UW);
739 imm.ud = uw | ((uint32_t)uw << 16);
740 return imm;
741 }
742
743 /** Construct short immediate register */
744 static inline struct brw_reg
brw_imm_w(int16_t w)745 brw_imm_w(int16_t w)
746 {
747 struct brw_reg imm = brw_imm_reg(BRW_TYPE_W);
748 imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16;
749 return imm;
750 }
751
752 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
753 * numbers alias with _V and _VF below:
754 */
755
756 /** Construct vector of eight signed half-byte values */
757 static inline struct brw_reg
brw_imm_v(unsigned v)758 brw_imm_v(unsigned v)
759 {
760 struct brw_reg imm = brw_imm_reg(BRW_TYPE_V);
761 imm.ud = v;
762 return imm;
763 }
764
765 /** Construct vector of eight unsigned half-byte values */
766 static inline struct brw_reg
brw_imm_uv(unsigned uv)767 brw_imm_uv(unsigned uv)
768 {
769 struct brw_reg imm = brw_imm_reg(BRW_TYPE_UV);
770 imm.ud = uv;
771 return imm;
772 }
773
774 /** Construct vector of four 8-bit float values */
775 static inline struct brw_reg
brw_imm_vf(unsigned v)776 brw_imm_vf(unsigned v)
777 {
778 struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
779 imm.ud = v;
780 return imm;
781 }
782
783 static inline struct brw_reg
brw_imm_vf4(unsigned v0,unsigned v1,unsigned v2,unsigned v3)784 brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
785 {
786 struct brw_reg imm = brw_imm_reg(BRW_TYPE_VF);
787 imm.vstride = BRW_VERTICAL_STRIDE_0;
788 imm.width = BRW_WIDTH_4;
789 imm.hstride = BRW_HORIZONTAL_STRIDE_1;
790 imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
791 return imm;
792 }
793
794
795 static inline struct brw_reg
brw_address(struct brw_reg reg)796 brw_address(struct brw_reg reg)
797 {
798 return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
799 }
800
801 /** Construct float[1] general-purpose register */
802 static inline struct brw_reg
brw_vec1_grf(unsigned nr,unsigned subnr)803 brw_vec1_grf(unsigned nr, unsigned subnr)
804 {
805 return brw_vec1_reg(FIXED_GRF, nr, subnr);
806 }
807
808 static inline struct brw_reg
xe2_vec1_grf(unsigned nr,unsigned subnr)809 xe2_vec1_grf(unsigned nr, unsigned subnr)
810 {
811 return brw_vec1_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
812 }
813
814 /** Construct float[2] general-purpose register */
815 static inline struct brw_reg
brw_vec2_grf(unsigned nr,unsigned subnr)816 brw_vec2_grf(unsigned nr, unsigned subnr)
817 {
818 return brw_vec2_reg(FIXED_GRF, nr, subnr);
819 }
820
821 static inline struct brw_reg
xe2_vec2_grf(unsigned nr,unsigned subnr)822 xe2_vec2_grf(unsigned nr, unsigned subnr)
823 {
824 return brw_vec2_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
825 }
826
827 /** Construct float[4] general-purpose register */
828 static inline struct brw_reg
brw_vec4_grf(unsigned nr,unsigned subnr)829 brw_vec4_grf(unsigned nr, unsigned subnr)
830 {
831 return brw_vec4_reg(FIXED_GRF, nr, subnr);
832 }
833
834 static inline struct brw_reg
xe2_vec4_grf(unsigned nr,unsigned subnr)835 xe2_vec4_grf(unsigned nr, unsigned subnr)
836 {
837 return brw_vec4_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
838 }
839
840 /** Construct float[8] general-purpose register */
841 static inline struct brw_reg
brw_vec8_grf(unsigned nr,unsigned subnr)842 brw_vec8_grf(unsigned nr, unsigned subnr)
843 {
844 return brw_vec8_reg(FIXED_GRF, nr, subnr);
845 }
846
847 static inline struct brw_reg
xe2_vec8_grf(unsigned nr,unsigned subnr)848 xe2_vec8_grf(unsigned nr, unsigned subnr)
849 {
850 return brw_vec8_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
851 }
852
853 /** Construct float[16] general-purpose register */
854 static inline struct brw_reg
brw_vec16_grf(unsigned nr,unsigned subnr)855 brw_vec16_grf(unsigned nr, unsigned subnr)
856 {
857 return brw_vec16_reg(FIXED_GRF, nr, subnr);
858 }
859
860 static inline struct brw_reg
xe2_vec16_grf(unsigned nr,unsigned subnr)861 xe2_vec16_grf(unsigned nr, unsigned subnr)
862 {
863 return brw_vec16_reg(FIXED_GRF, 2 * nr + subnr / 8, subnr % 8);
864 }
865
866 static inline struct brw_reg
brw_vecn_grf(unsigned width,unsigned nr,unsigned subnr)867 brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
868 {
869 return brw_vecn_reg(width, FIXED_GRF, nr, subnr);
870 }
871
872 static inline struct brw_reg
xe2_vecn_grf(unsigned width,unsigned nr,unsigned subnr)873 xe2_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
874 {
875 return brw_vecn_reg(width, FIXED_GRF, nr + subnr / 8, subnr % 8);
876 }
877
878 static inline struct brw_reg
brw_uw1_grf(unsigned nr,unsigned subnr)879 brw_uw1_grf(unsigned nr, unsigned subnr)
880 {
881 return brw_uw1_reg(FIXED_GRF, nr, subnr);
882 }
883
884 static inline struct brw_reg
brw_uw8_grf(unsigned nr,unsigned subnr)885 brw_uw8_grf(unsigned nr, unsigned subnr)
886 {
887 return brw_uw8_reg(FIXED_GRF, nr, subnr);
888 }
889
890 static inline struct brw_reg
brw_uw16_grf(unsigned nr,unsigned subnr)891 brw_uw16_grf(unsigned nr, unsigned subnr)
892 {
893 return brw_uw16_reg(FIXED_GRF, nr, subnr);
894 }
895
896 static inline struct brw_reg
brw_ud8_grf(unsigned nr,unsigned subnr)897 brw_ud8_grf(unsigned nr, unsigned subnr)
898 {
899 return brw_ud8_reg(FIXED_GRF, nr, subnr);
900 }
901
902 static inline struct brw_reg
brw_ud1_grf(unsigned nr,unsigned subnr)903 brw_ud1_grf(unsigned nr, unsigned subnr)
904 {
905 return brw_ud1_reg(FIXED_GRF, nr, subnr);
906 }
907
908
909 /** Construct null register (usually used for setting condition codes) */
910 static inline struct brw_reg
brw_null_reg(void)911 brw_null_reg(void)
912 {
913 return brw_vec8_reg(ARF, BRW_ARF_NULL, 0);
914 }
915
916 static inline struct brw_reg
brw_null_vec(unsigned width)917 brw_null_vec(unsigned width)
918 {
919 return brw_vecn_reg(width, ARF, BRW_ARF_NULL, 0);
920 }
921
922 static inline struct brw_reg
brw_address_reg(unsigned subnr)923 brw_address_reg(unsigned subnr)
924 {
925 return brw_uw1_reg(ADDRESS, 0, subnr);
926 }
927
928 static inline struct brw_reg
brw_tdr_reg(void)929 brw_tdr_reg(void)
930 {
931 return brw_uw1_reg(ARF, BRW_ARF_TDR, 0);
932 }
933
934 /* If/else instructions break in align16 mode if writemask & swizzle
935 * aren't xyzw. This goes against the convention for other scalar
936 * regs:
937 */
938 static inline struct brw_reg
brw_ip_reg(void)939 brw_ip_reg(void)
940 {
941 return brw_make_reg(ARF,
942 BRW_ARF_IP,
943 0,
944 0,
945 0,
946 BRW_TYPE_UD,
947 BRW_VERTICAL_STRIDE_4, /* ? */
948 BRW_WIDTH_1,
949 BRW_HORIZONTAL_STRIDE_0,
950 BRW_SWIZZLE_XYZW, /* NOTE! */
951 WRITEMASK_XYZW); /* NOTE! */
952 }
953
954 static inline struct brw_reg
brw_notification_reg(void)955 brw_notification_reg(void)
956 {
957 return brw_make_reg(ARF,
958 BRW_ARF_NOTIFICATION_COUNT,
959 0,
960 0,
961 0,
962 BRW_TYPE_UD,
963 BRW_VERTICAL_STRIDE_0,
964 BRW_WIDTH_1,
965 BRW_HORIZONTAL_STRIDE_0,
966 BRW_SWIZZLE_XXXX,
967 WRITEMASK_X);
968 }
969
970 static inline struct brw_reg
brw_cr0_reg(unsigned subnr)971 brw_cr0_reg(unsigned subnr)
972 {
973 return brw_ud1_reg(ARF, BRW_ARF_CONTROL, subnr);
974 }
975
976 static inline struct brw_reg
brw_sr0_reg(unsigned subnr)977 brw_sr0_reg(unsigned subnr)
978 {
979 return brw_ud1_reg(ARF, BRW_ARF_STATE, subnr);
980 }
981
982 static inline struct brw_reg
brw_acc_reg(unsigned width)983 brw_acc_reg(unsigned width)
984 {
985 return brw_vecn_reg(width, ARF,
986 BRW_ARF_ACCUMULATOR, 0);
987 }
988
989 static inline struct brw_reg
brw_flag_reg(int reg,int subreg)990 brw_flag_reg(int reg, int subreg)
991 {
992 return brw_uw1_reg(ARF,
993 BRW_ARF_FLAG + reg, subreg);
994 }
995
996 static inline struct brw_reg
brw_flag_subreg(unsigned subreg)997 brw_flag_subreg(unsigned subreg)
998 {
999 return brw_uw1_reg(ARF,
1000 BRW_ARF_FLAG + subreg / 2, subreg % 2);
1001 }
1002
1003 /**
1004 * Return the mask register present in Gfx4-5, or the related register present
1005 * in Gfx7.5 and later hardware referred to as "channel enable" register in
1006 * the documentation.
1007 */
1008 static inline struct brw_reg
brw_mask_reg(unsigned subnr)1009 brw_mask_reg(unsigned subnr)
1010 {
1011 return brw_uw1_reg(ARF, BRW_ARF_MASK, subnr);
1012 }
1013
1014 static inline struct brw_reg
brw_vmask_reg()1015 brw_vmask_reg()
1016 {
1017 return brw_sr0_reg(3);
1018 }
1019
1020 static inline struct brw_reg
brw_dmask_reg()1021 brw_dmask_reg()
1022 {
1023 return brw_sr0_reg(2);
1024 }
1025
1026 static inline struct brw_reg
brw_vgrf(unsigned nr,enum brw_reg_type type)1027 brw_vgrf(unsigned nr, enum brw_reg_type type)
1028 {
1029 struct brw_reg reg = {};
1030 reg.file = VGRF;
1031 reg.nr = nr;
1032 reg.type = type;
1033 reg.stride = 1;
1034 return reg;
1035 }
1036
1037 static inline struct brw_reg
brw_attr_reg(unsigned nr,enum brw_reg_type type)1038 brw_attr_reg(unsigned nr, enum brw_reg_type type)
1039 {
1040 struct brw_reg reg = {};
1041 reg.file = ATTR;
1042 reg.nr = nr;
1043 reg.type = type;
1044 reg.stride = 1;
1045 return reg;
1046 }
1047
1048 static inline struct brw_reg
brw_uniform_reg(unsigned nr,enum brw_reg_type type)1049 brw_uniform_reg(unsigned nr, enum brw_reg_type type)
1050 {
1051 struct brw_reg reg = {};
1052 reg.file = UNIFORM;
1053 reg.nr = nr;
1054 reg.type = type;
1055 reg.stride = 0;
1056 return reg;
1057 }
1058
1059 /* This is almost always called with a numeric constant argument, so
1060 * make things easy to evaluate at compile time:
1061 */
cvt(unsigned val)1062 static inline unsigned cvt(unsigned val)
1063 {
1064 switch (val) {
1065 case 0: return 0;
1066 case 1: return 1;
1067 case 2: return 2;
1068 case 4: return 3;
1069 case 8: return 4;
1070 case 16: return 5;
1071 case 32: return 6;
1072 }
1073 return 0;
1074 }
1075
1076 static inline struct brw_reg
stride(struct brw_reg reg,unsigned vstride,unsigned width,unsigned hstride)1077 stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
1078 {
1079 reg.vstride = cvt(vstride);
1080 reg.width = cvt(width) - 1;
1081 reg.hstride = cvt(hstride);
1082 return reg;
1083 }
1084
1085 /**
1086 * Multiply the vertical and horizontal stride of a register by the given
1087 * factor \a s.
1088 */
1089 static inline struct brw_reg
spread(struct brw_reg reg,unsigned s)1090 spread(struct brw_reg reg, unsigned s)
1091 {
1092 if (s) {
1093 assert(util_is_power_of_two_nonzero(s));
1094
1095 if (reg.hstride)
1096 reg.hstride += cvt(s) - 1;
1097
1098 if (reg.vstride)
1099 reg.vstride += cvt(s) - 1;
1100
1101 return reg;
1102 } else {
1103 return stride(reg, 0, 1, 0);
1104 }
1105 }
1106
1107 /**
1108 * Reinterpret each channel of register \p reg as a vector of values of the
1109 * given smaller type and take the i-th subcomponent from each.
1110 */
1111 static inline struct brw_reg
subscript(struct brw_reg reg,enum brw_reg_type type,unsigned i)1112 subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i)
1113 {
1114 assert((i + 1) * brw_type_size_bytes(type) <= brw_type_size_bytes(reg.type));
1115
1116 if (reg.file == ARF || reg.file == FIXED_GRF) {
1117 /* The stride is encoded inconsistently for fixed GRF and ARF registers
1118 * as the log2 of the actual vertical and horizontal strides.
1119 */
1120 const int delta = util_logbase2(brw_type_size_bytes(reg.type)) -
1121 util_logbase2(brw_type_size_bytes(type));
1122 reg.hstride += (reg.hstride ? delta : 0);
1123 reg.vstride += (reg.vstride ? delta : 0);
1124
1125 } else if (reg.file == IMM) {
1126 unsigned bit_size = brw_type_size_bits(type);
1127 reg.u64 >>= i * bit_size;
1128 reg.u64 &= BITFIELD64_MASK(bit_size);
1129 if (bit_size <= 16)
1130 reg.u64 |= reg.u64 << 16;
1131 return retype(reg, type);
1132 } else {
1133 reg.stride *= brw_type_size_bytes(reg.type) / brw_type_size_bytes(type);
1134 }
1135
1136 return byte_offset(retype(reg, type), i * brw_type_size_bytes(type));
1137 }
1138
1139 static inline struct brw_reg
vec16(struct brw_reg reg)1140 vec16(struct brw_reg reg)
1141 {
1142 return stride(reg, 16,16,1);
1143 }
1144
1145 static inline struct brw_reg
vec8(struct brw_reg reg)1146 vec8(struct brw_reg reg)
1147 {
1148 return stride(reg, 8,8,1);
1149 }
1150
1151 static inline struct brw_reg
vec4(struct brw_reg reg)1152 vec4(struct brw_reg reg)
1153 {
1154 return stride(reg, 4,4,1);
1155 }
1156
1157 static inline struct brw_reg
vec2(struct brw_reg reg)1158 vec2(struct brw_reg reg)
1159 {
1160 return stride(reg, 2,2,1);
1161 }
1162
1163 static inline struct brw_reg
vec1(struct brw_reg reg)1164 vec1(struct brw_reg reg)
1165 {
1166 return stride(reg, 0,1,0);
1167 }
1168
1169
1170 static inline struct brw_reg
get_element(struct brw_reg reg,unsigned elt)1171 get_element(struct brw_reg reg, unsigned elt)
1172 {
1173 return vec1(suboffset(reg, elt));
1174 }
1175
1176 static inline struct brw_reg
get_element_ud(struct brw_reg reg,unsigned elt)1177 get_element_ud(struct brw_reg reg, unsigned elt)
1178 {
1179 return vec1(suboffset(retype(reg, BRW_TYPE_UD), elt));
1180 }
1181
1182 static inline struct brw_reg
get_element_d(struct brw_reg reg,unsigned elt)1183 get_element_d(struct brw_reg reg, unsigned elt)
1184 {
1185 return vec1(suboffset(retype(reg, BRW_TYPE_D), elt));
1186 }
1187
1188 static inline struct brw_reg
brw_swizzle(struct brw_reg reg,unsigned swz)1189 brw_swizzle(struct brw_reg reg, unsigned swz)
1190 {
1191 if (reg.file == IMM)
1192 reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz);
1193 else
1194 reg.swizzle = brw_compose_swizzle(swz, reg.swizzle);
1195
1196 return reg;
1197 }
1198
1199 static inline struct brw_reg
brw_writemask(struct brw_reg reg,unsigned mask)1200 brw_writemask(struct brw_reg reg, unsigned mask)
1201 {
1202 assert(reg.file != IMM);
1203 reg.writemask &= mask;
1204 return reg;
1205 }
1206
1207 static inline struct brw_reg
brw_set_writemask(struct brw_reg reg,unsigned mask)1208 brw_set_writemask(struct brw_reg reg, unsigned mask)
1209 {
1210 assert(reg.file != IMM);
1211 reg.writemask = mask;
1212 return reg;
1213 }
1214
1215 static inline unsigned
brw_writemask_for_size(unsigned n)1216 brw_writemask_for_size(unsigned n)
1217 {
1218 return (1 << n) - 1;
1219 }
1220
1221 static inline unsigned
brw_writemask_for_component_packing(unsigned n,unsigned first_component)1222 brw_writemask_for_component_packing(unsigned n, unsigned first_component)
1223 {
1224 assert(first_component + n <= 4);
1225 return (((1 << n) - 1) << first_component);
1226 }
1227
1228 static inline struct brw_reg
negate(struct brw_reg reg)1229 negate(struct brw_reg reg)
1230 {
1231 reg.negate ^= 1;
1232 return reg;
1233 }
1234
1235 static inline struct brw_reg
brw_abs(struct brw_reg reg)1236 brw_abs(struct brw_reg reg)
1237 {
1238 reg.abs = 1;
1239 reg.negate = 0;
1240 return reg;
1241 }
1242
1243 /************************************************************************/
1244
1245 static inline struct brw_reg
brw_vec1_indirect(unsigned subnr,int offset)1246 brw_vec1_indirect(unsigned subnr, int offset)
1247 {
1248 struct brw_reg reg = brw_vec1_grf(0, 0);
1249 reg.subnr = subnr;
1250 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1251 reg.indirect_offset = offset;
1252 return reg;
1253 }
1254
1255 static inline struct brw_reg
brw_VxH_indirect(unsigned subnr,int offset)1256 brw_VxH_indirect(unsigned subnr, int offset)
1257 {
1258 struct brw_reg reg = brw_vec1_grf(0, 0);
1259 reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
1260 reg.subnr = subnr;
1261 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1262 reg.indirect_offset = offset;
1263 return reg;
1264 }
1265
1266 static inline bool
region_matches(struct brw_reg reg,enum brw_vertical_stride v,enum brw_width w,enum brw_horizontal_stride h)1267 region_matches(struct brw_reg reg, enum brw_vertical_stride v,
1268 enum brw_width w, enum brw_horizontal_stride h)
1269 {
1270 return reg.vstride == v &&
1271 reg.width == w &&
1272 reg.hstride == h;
1273 }
1274
1275 #define has_scalar_region(reg) \
1276 region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
1277 BRW_HORIZONTAL_STRIDE_0)
1278
1279 /**
1280 * Return the size in bytes per data element of register \p reg on the
1281 * corresponding register file.
1282 */
1283 static inline unsigned
element_sz(struct brw_reg reg)1284 element_sz(struct brw_reg reg)
1285 {
1286 if (reg.file == IMM || has_scalar_region(reg)) {
1287 return brw_type_size_bytes(reg.type);
1288
1289 } else if (reg.width == BRW_WIDTH_1 &&
1290 reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
1291 assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
1292 return brw_type_size_bytes(reg.type) << (reg.vstride - 1);
1293
1294 } else {
1295 assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
1296 assert(reg.vstride == reg.hstride + reg.width);
1297 return brw_type_size_bytes(reg.type) << (reg.hstride - 1);
1298 }
1299 }
1300
1301 /* brw_packed_float.c */
1302 int brw_float_to_vf(float f);
1303 float brw_vf_to_float(unsigned char vf);
1304
1305 bool brw_reg_saturate_immediate(brw_reg *reg);
1306 bool brw_reg_negate_immediate(brw_reg *reg);
1307 bool brw_reg_abs_immediate(brw_reg *reg);
1308
1309 #ifdef __cplusplus
1310 }
1311 #endif
1312
1313 #ifdef __cplusplus
1314
1315 static inline brw_reg
horiz_offset(const brw_reg & reg,unsigned delta)1316 horiz_offset(const brw_reg ®, unsigned delta)
1317 {
1318 switch (reg.file) {
1319 case BAD_FILE:
1320 case UNIFORM:
1321 case IMM:
1322 /* These only have a single component that is implicitly splatted. A
1323 * horizontal offset should be a harmless no-op.
1324 * XXX - Handle vector immediates correctly.
1325 */
1326 return reg;
1327 case VGRF:
1328 case ATTR:
1329 return byte_offset(reg, delta * reg.stride * brw_type_size_bytes(reg.type));
1330 case ADDRESS:
1331 case ARF:
1332 case FIXED_GRF:
1333 if (reg.is_null()) {
1334 return reg;
1335 } else {
1336 const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1337 const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1338 const unsigned width = 1 << reg.width;
1339
1340 if (delta % width == 0) {
1341 return byte_offset(reg, delta / width * vstride * brw_type_size_bytes(reg.type));
1342 } else {
1343 assert(vstride == hstride * width);
1344 return byte_offset(reg, delta * hstride * brw_type_size_bytes(reg.type));
1345 }
1346 }
1347 }
1348 unreachable("Invalid register file");
1349 }
1350
1351 static inline brw_reg
offset(brw_reg reg,unsigned width,unsigned delta)1352 offset(brw_reg reg, unsigned width, unsigned delta)
1353 {
1354 switch (reg.file) {
1355 case BAD_FILE:
1356 break;
1357 case ADDRESS:
1358 case ARF:
1359 case FIXED_GRF:
1360 case VGRF:
1361 case ATTR:
1362 case UNIFORM:
1363 return byte_offset(reg, delta * reg.component_size(width));
1364 case IMM:
1365 assert(delta == 0);
1366 }
1367 return reg;
1368 }
1369
1370 /**
1371 * Get the scalar channel of \p reg given by \p idx and replicate it to all
1372 * channels of the result.
1373 */
1374 static inline brw_reg
component(brw_reg reg,unsigned idx)1375 component(brw_reg reg, unsigned idx)
1376 {
1377 reg = horiz_offset(reg, idx);
1378 reg.stride = 0;
1379 if (reg.file == ARF || reg.file == FIXED_GRF) {
1380 reg.vstride = BRW_VERTICAL_STRIDE_0;
1381 reg.width = BRW_WIDTH_1;
1382 reg.hstride = BRW_HORIZONTAL_STRIDE_0;
1383 }
1384 return reg;
1385 }
1386
1387 /**
1388 * Return an integer identifying the discrete address space a register is
1389 * contained in. A register is by definition fully contained in the single
1390 * reg_space it belongs to, so two registers with different reg_space ids are
1391 * guaranteed not to overlap. Most register files are a single reg_space of
1392 * its own, only the VGRF and ATTR files are composed of multiple discrete
1393 * address spaces, one for each allocation and input attribute respectively.
1394 */
1395 static inline uint32_t
reg_space(const brw_reg & r)1396 reg_space(const brw_reg &r)
1397 {
1398 return r.file << 16 | (r.file == VGRF || r.file == ATTR ? r.nr : 0);
1399 }
1400
1401 /**
1402 * Return the base offset in bytes of a register relative to the start of its
1403 * reg_space().
1404 */
1405 static inline unsigned
reg_offset(const brw_reg & r)1406 reg_offset(const brw_reg &r)
1407 {
1408 return (r.file == ADDRESS || r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) *
1409 (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
1410 (r.file == ADDRESS || r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
1411 }
1412
1413 /**
1414 * Return the amount of padding in bytes left unused between individual
1415 * components of register \p r due to a (horizontal) stride value greater than
1416 * one, or zero if components are tightly packed in the register file.
1417 */
1418 static inline unsigned
reg_padding(const brw_reg & r)1419 reg_padding(const brw_reg &r)
1420 {
1421 const unsigned stride = ((r.file != ADDRESS &&
1422 r.file != ARF &&
1423 r.file != FIXED_GRF) ? r.stride :
1424 r.hstride == 0 ? 0 :
1425 1 << (r.hstride - 1));
1426 return (MAX2(1, stride) - 1) * brw_type_size_bytes(r.type);
1427 }
1428
1429 /**
1430 * Return whether the register region starting at \p r and spanning \p dr
1431 * bytes could potentially overlap the register region starting at \p s and
1432 * spanning \p ds bytes.
1433 */
1434 static inline bool
regions_overlap(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1435 regions_overlap(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1436 {
1437 if (r.file != s.file)
1438 return false;
1439
1440 if (r.file == VGRF) {
1441 return r.nr == s.nr &&
1442 !(r.offset + dr <= s.offset || s.offset + ds <= r.offset);
1443 } else {
1444 return !(reg_offset(r) + dr <= reg_offset(s) ||
1445 reg_offset(s) + ds <= reg_offset(r));
1446 }
1447 }
1448
1449 /**
1450 * Check that the register region given by r [r.offset, r.offset + dr[
1451 * is fully contained inside the register region given by s
1452 * [s.offset, s.offset + ds[.
1453 */
1454 static inline bool
region_contained_in(const brw_reg & r,unsigned dr,const brw_reg & s,unsigned ds)1455 region_contained_in(const brw_reg &r, unsigned dr, const brw_reg &s, unsigned ds)
1456 {
1457 return reg_space(r) == reg_space(s) &&
1458 reg_offset(r) >= reg_offset(s) &&
1459 reg_offset(r) + dr <= reg_offset(s) + ds;
1460 }
1461
1462 /**
1463 * Return whether the given register region is n-periodic, i.e. whether the
1464 * original region remains invariant after shifting it by \p n scalar
1465 * channels.
1466 */
1467 static inline bool
is_periodic(const brw_reg & reg,unsigned n)1468 is_periodic(const brw_reg ®, unsigned n)
1469 {
1470 if (reg.file == BAD_FILE || reg.is_null()) {
1471 return true;
1472
1473 } else if (reg.file == IMM) {
1474 const unsigned period = (reg.type == BRW_TYPE_UV ||
1475 reg.type == BRW_TYPE_V ? 8 :
1476 reg.type == BRW_TYPE_VF ? 4 :
1477 1);
1478 return n % period == 0;
1479
1480 } else if (reg.file == ADDRESS || reg.file == ARF || reg.file == FIXED_GRF) {
1481 const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
1482 reg.vstride == 0 ? 1 << reg.width :
1483 ~0);
1484 return n % period == 0;
1485
1486 } else {
1487 return reg.stride == 0;
1488 }
1489 }
1490
1491 static inline bool
is_uniform(const brw_reg & reg)1492 is_uniform(const brw_reg ®)
1493 {
1494 return is_periodic(reg, 1);
1495 }
1496
1497 /**
1498 * Get the specified 8-component quarter of a register.
1499 */
1500 static inline brw_reg
quarter(const brw_reg & reg,unsigned idx)1501 quarter(const brw_reg ®, unsigned idx)
1502 {
1503 assert(idx < 4);
1504 return horiz_offset(reg, 8 * idx);
1505 }
1506
1507 static inline brw_reg
horiz_stride(brw_reg reg,unsigned s)1508 horiz_stride(brw_reg reg, unsigned s)
1509 {
1510 reg.stride *= s;
1511 return reg;
1512 }
1513
1514 static const brw_reg reg_undef;
1515
1516 /*
1517 * Return the stride between channels of the specified register in
1518 * byte units, or ~0u if the region cannot be represented with a
1519 * single one-dimensional stride.
1520 */
1521 static inline unsigned
byte_stride(const brw_reg & reg)1522 byte_stride(const brw_reg ®)
1523 {
1524 switch (reg.file) {
1525 case BAD_FILE:
1526 case UNIFORM:
1527 case IMM:
1528 case VGRF:
1529 case ATTR:
1530 return reg.stride * brw_type_size_bytes(reg.type);
1531 case ADDRESS:
1532 case ARF:
1533 case FIXED_GRF:
1534 if (reg.is_null()) {
1535 return 0;
1536 } else {
1537 const unsigned hstride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
1538 const unsigned vstride = reg.vstride ? 1 << (reg.vstride - 1) : 0;
1539 const unsigned width = 1 << reg.width;
1540
1541 if (width == 1) {
1542 return vstride * brw_type_size_bytes(reg.type);
1543 } else if (hstride * width == vstride) {
1544 return hstride * brw_type_size_bytes(reg.type);
1545 } else {
1546 return ~0u;
1547 }
1548 }
1549 default:
1550 unreachable("Invalid register file");
1551 }
1552 }
1553
1554 #endif /* __cplusplus */
1555