1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 /** @file elk_reg.h
33 *
34 * This file defines struct elk_reg, which is our representation for EU
35 * registers. They're not a hardware specific format, just an abstraction
36 * that intends to capture the full flexibility of the hardware registers.
37 *
38 * The elk_eu_emit.c layer's elk_set_dest/elk_set_src[01] functions encode
39 * the abstract elk_reg type into the actual hardware instruction encoding.
40 */
41
42 #ifndef ELK_REG_H
43 #define ELK_REG_H
44
45 #include <stdbool.h>
46 #include "util/compiler.h"
47 #include "util/glheader.h"
48 #include "util/macros.h"
49 #include "util/rounding.h"
50 #include "util/u_math.h"
51 #include "elk_eu_defines.h"
52 #include "elk_reg_type.h"
53
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57
58 struct intel_device_info;
59
60 /** Size of general purpose register space in REG_SIZE units */
61 #define ELK_MAX_GRF 128
62 #define XE2_MAX_GRF 256
63
64 /**
65 * First GRF used for the MRF hack.
66 *
67 * On gfx7, MRFs are no longer used, and contiguous GRFs are used instead. We
68 * haven't converted our compiler to be aware of this, so it asks for MRFs and
69 * elk_eu_emit.c quietly converts them to be accesses of the top GRFs. The
70 * register allocators have to be careful of this to avoid corrupting the "MRF"s
71 * with actual GRF allocations.
72 */
73 #define GFX7_MRF_HACK_START 112
74
75 /**
76 * BRW hardware swizzles.
77 * Only defines XYZW to ensure it can be contained in 2 bits
78 */
79 #define ELK_SWIZZLE_X 0
80 #define ELK_SWIZZLE_Y 1
81 #define ELK_SWIZZLE_Z 2
82 #define ELK_SWIZZLE_W 3
83
84 /** Number of message register file registers */
85 #define ELK_MAX_MRF(gen) (gen == 6 ? 24 : 16)
86
87 #define ELK_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
88 #define ELK_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
89
90 #define ELK_SWIZZLE_NOOP ELK_SWIZZLE4(0,1,2,3)
91 #define ELK_SWIZZLE_XYZW ELK_SWIZZLE4(0,1,2,3)
92 #define ELK_SWIZZLE_XXXX ELK_SWIZZLE4(0,0,0,0)
93 #define ELK_SWIZZLE_YYYY ELK_SWIZZLE4(1,1,1,1)
94 #define ELK_SWIZZLE_ZZZZ ELK_SWIZZLE4(2,2,2,2)
95 #define ELK_SWIZZLE_WWWW ELK_SWIZZLE4(3,3,3,3)
96 #define ELK_SWIZZLE_XYXY ELK_SWIZZLE4(0,1,0,1)
97 #define ELK_SWIZZLE_YXYX ELK_SWIZZLE4(1,0,1,0)
98 #define ELK_SWIZZLE_XZXZ ELK_SWIZZLE4(0,2,0,2)
99 #define ELK_SWIZZLE_YZXW ELK_SWIZZLE4(1,2,0,3)
100 #define ELK_SWIZZLE_YWYW ELK_SWIZZLE4(1,3,1,3)
101 #define ELK_SWIZZLE_ZXYW ELK_SWIZZLE4(2,0,1,3)
102 #define ELK_SWIZZLE_ZWZW ELK_SWIZZLE4(2,3,2,3)
103 #define ELK_SWIZZLE_WZWZ ELK_SWIZZLE4(3,2,3,2)
104 #define ELK_SWIZZLE_WZYX ELK_SWIZZLE4(3,2,1,0)
105 #define ELK_SWIZZLE_XXZZ ELK_SWIZZLE4(0,0,2,2)
106 #define ELK_SWIZZLE_YYWW ELK_SWIZZLE4(1,1,3,3)
107 #define ELK_SWIZZLE_YXWZ ELK_SWIZZLE4(1,0,3,2)
108
109 #define ELK_SWZ_COMP_INPUT(comp) (ELK_SWIZZLE_XYZW >> ((comp)*2))
110 #define ELK_SWZ_COMP_OUTPUT(comp) (ELK_SWIZZLE_XYZW << ((comp)*2))
111
112 static inline bool
elk_is_single_value_swizzle(unsigned swiz)113 elk_is_single_value_swizzle(unsigned swiz)
114 {
115 return (swiz == ELK_SWIZZLE_XXXX ||
116 swiz == ELK_SWIZZLE_YYYY ||
117 swiz == ELK_SWIZZLE_ZZZZ ||
118 swiz == ELK_SWIZZLE_WWWW);
119 }
120
121 /**
122 * Compute the swizzle obtained from the application of \p swz0 on the result
123 * of \p swz1. The argument ordering is expected to match function
124 * composition.
125 */
126 static inline unsigned
elk_compose_swizzle(unsigned swz0,unsigned swz1)127 elk_compose_swizzle(unsigned swz0, unsigned swz1)
128 {
129 return ELK_SWIZZLE4(
130 ELK_GET_SWZ(swz1, ELK_GET_SWZ(swz0, 0)),
131 ELK_GET_SWZ(swz1, ELK_GET_SWZ(swz0, 1)),
132 ELK_GET_SWZ(swz1, ELK_GET_SWZ(swz0, 2)),
133 ELK_GET_SWZ(swz1, ELK_GET_SWZ(swz0, 3)));
134 }
135
136 /**
137 * Return the result of applying swizzle \p swz to shuffle the bits of \p mask
138 * (AKA image).
139 */
140 static inline unsigned
elk_apply_swizzle_to_mask(unsigned swz,unsigned mask)141 elk_apply_swizzle_to_mask(unsigned swz, unsigned mask)
142 {
143 unsigned result = 0;
144
145 for (unsigned i = 0; i < 4; i++) {
146 if (mask & (1 << ELK_GET_SWZ(swz, i)))
147 result |= 1 << i;
148 }
149
150 return result;
151 }
152
153 /**
154 * Return the result of applying the inverse of swizzle \p swz to shuffle the
155 * bits of \p mask (AKA preimage). Useful to find out which components are
156 * read from a swizzled source given the instruction writemask.
157 */
158 static inline unsigned
elk_apply_inv_swizzle_to_mask(unsigned swz,unsigned mask)159 elk_apply_inv_swizzle_to_mask(unsigned swz, unsigned mask)
160 {
161 unsigned result = 0;
162
163 for (unsigned i = 0; i < 4; i++) {
164 if (mask & (1 << i))
165 result |= 1 << ELK_GET_SWZ(swz, i);
166 }
167
168 return result;
169 }
170
171 /**
172 * Construct an identity swizzle for the set of enabled channels given by \p
173 * mask. The result will only reference channels enabled in the provided \p
174 * mask, assuming that \p mask is non-zero. The constructed swizzle will
175 * satisfy the property that for any instruction OP and any mask:
176 *
177 * elk_OP(p, elk_writemask(dst, mask),
178 * elk_swizzle(src, elk_swizzle_for_mask(mask)));
179 *
180 * will be equivalent to the same instruction without swizzle:
181 *
182 * elk_OP(p, elk_writemask(dst, mask), src);
183 */
184 static inline unsigned
elk_swizzle_for_mask(unsigned mask)185 elk_swizzle_for_mask(unsigned mask)
186 {
187 unsigned last = (mask ? ffs(mask) - 1 : 0);
188 unsigned swz[4];
189
190 for (unsigned i = 0; i < 4; i++)
191 last = swz[i] = (mask & (1 << i) ? i : last);
192
193 return ELK_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
194 }
195
196 /**
197 * Construct an identity swizzle for the first \p n components of a vector.
198 * When only a subset of channels of a vec4 are used we don't want to
199 * reference the other channels, as that will tell optimization passes that
200 * those other channels are used.
201 */
202 static inline unsigned
elk_swizzle_for_size(unsigned n)203 elk_swizzle_for_size(unsigned n)
204 {
205 return elk_swizzle_for_mask((1 << n) - 1);
206 }
207
208 /**
209 * Converse of elk_swizzle_for_mask(). Returns the mask of components
210 * accessed by the specified swizzle \p swz.
211 */
212 static inline unsigned
elk_mask_for_swizzle(unsigned swz)213 elk_mask_for_swizzle(unsigned swz)
214 {
215 return elk_apply_inv_swizzle_to_mask(swz, ~0);
216 }
217
218 uint32_t elk_swizzle_immediate(enum elk_reg_type type, uint32_t x, unsigned swz);
219
220 #define REG_SIZE (8*4)
221
222 /* These aren't hardware structs, just something useful for us to pass around:
223 *
224 * Align1 operation has a lot of control over input ranges. Used in
225 * WM programs to implement shaders decomposed into "channel serial"
226 * or "structure of array" form:
227 */
228 struct elk_reg {
229 union {
230 struct {
231 enum elk_reg_type type:4;
232 enum elk_reg_file file:3; /* :2 hardware format */
233 unsigned negate:1; /* source only */
234 unsigned abs:1; /* source only */
235 unsigned address_mode:1; /* relative addressing, hopefully! */
236 unsigned pad0:17;
237 unsigned subnr:5; /* :1 in align16 */
238 };
239 uint32_t bits;
240 };
241
242 union {
243 struct {
244 unsigned nr;
245 unsigned swizzle:8; /* src only, align16 only */
246 unsigned writemask:4; /* dest only, align16 only */
247 int indirect_offset:10; /* relative addressing offset */
248 unsigned vstride:4; /* source only */
249 unsigned width:3; /* src only, align1 only */
250 unsigned hstride:2; /* align1 only */
251 unsigned pad1:1;
252 };
253
254 double df;
255 uint64_t u64;
256 int64_t d64;
257 float f;
258 int d;
259 unsigned ud;
260 };
261 };
262
263 static inline unsigned
phys_nr(const struct intel_device_info * devinfo,const struct elk_reg reg)264 phys_nr(const struct intel_device_info *devinfo, const struct elk_reg reg)
265 {
266 if (devinfo->ver >= 20) {
267 if (reg.file == ELK_GENERAL_REGISTER_FILE)
268 return reg.nr / 2;
269 else if (reg.file == ELK_ARCHITECTURE_REGISTER_FILE &&
270 reg.nr >= ELK_ARF_ACCUMULATOR &&
271 reg.nr < ELK_ARF_FLAG)
272 return ELK_ARF_ACCUMULATOR + (reg.nr - ELK_ARF_ACCUMULATOR) / 2;
273 else
274 return reg.nr;
275 } else {
276 return reg.nr;
277 }
278 }
279
280 static inline unsigned
phys_subnr(const struct intel_device_info * devinfo,const struct elk_reg reg)281 phys_subnr(const struct intel_device_info *devinfo, const struct elk_reg reg)
282 {
283 if (devinfo->ver >= 20) {
284 if (reg.file == ELK_GENERAL_REGISTER_FILE ||
285 (reg.file == ELK_ARCHITECTURE_REGISTER_FILE &&
286 reg.nr >= ELK_ARF_ACCUMULATOR &&
287 reg.nr < ELK_ARF_FLAG))
288 return (reg.nr & 1) * REG_SIZE + reg.subnr;
289 else
290 return reg.subnr;
291 } else {
292 return reg.subnr;
293 }
294 }
295
296 static inline bool
elk_regs_equal(const struct elk_reg * a,const struct elk_reg * b)297 elk_regs_equal(const struct elk_reg *a, const struct elk_reg *b)
298 {
299 return a->bits == b->bits && a->u64 == b->u64;
300 }
301
302 static inline bool
elk_regs_negative_equal(const struct elk_reg * a,const struct elk_reg * b)303 elk_regs_negative_equal(const struct elk_reg *a, const struct elk_reg *b)
304 {
305 if (a->file == IMM) {
306 if (a->bits != b->bits)
307 return false;
308
309 switch ((enum elk_reg_type) a->type) {
310 case ELK_REGISTER_TYPE_UQ:
311 case ELK_REGISTER_TYPE_Q:
312 return a->d64 == -b->d64;
313 case ELK_REGISTER_TYPE_DF:
314 return a->df == -b->df;
315 case ELK_REGISTER_TYPE_UD:
316 case ELK_REGISTER_TYPE_D:
317 return a->d == -b->d;
318 case ELK_REGISTER_TYPE_F:
319 return a->f == -b->f;
320 case ELK_REGISTER_TYPE_VF:
321 /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation
322 * of -0). There are occasions where 0 or -0 is used and the exact
323 * bit pattern is desired. At the very least, changing this to allow
324 * 0 as a negation of 0 causes some fp64 tests to fail on IVB.
325 */
326 return a->ud == (b->ud ^ 0x80808080);
327 case ELK_REGISTER_TYPE_UW:
328 case ELK_REGISTER_TYPE_W:
329 case ELK_REGISTER_TYPE_UV:
330 case ELK_REGISTER_TYPE_V:
331 case ELK_REGISTER_TYPE_HF:
332 /* FINISHME: Implement support for these types once there is
333 * something in the compiler that can generate them. Until then,
334 * they cannot be tested.
335 */
336 return false;
337 case ELK_REGISTER_TYPE_UB:
338 case ELK_REGISTER_TYPE_B:
339 case ELK_REGISTER_TYPE_NF:
340 default:
341 unreachable("not reached");
342 }
343 } else {
344 struct elk_reg tmp = *a;
345
346 tmp.negate = !tmp.negate;
347
348 return elk_regs_equal(&tmp, b);
349 }
350 }
351
352 struct elk_indirect {
353 unsigned addr_subnr:4;
354 int addr_offset:10;
355 unsigned pad:18;
356 };
357
358
359 static inline unsigned
type_sz(unsigned type)360 type_sz(unsigned type)
361 {
362 switch(type) {
363 case ELK_REGISTER_TYPE_UQ:
364 case ELK_REGISTER_TYPE_Q:
365 case ELK_REGISTER_TYPE_DF:
366 case ELK_REGISTER_TYPE_NF:
367 return 8;
368 case ELK_REGISTER_TYPE_UD:
369 case ELK_REGISTER_TYPE_D:
370 case ELK_REGISTER_TYPE_F:
371 case ELK_REGISTER_TYPE_VF:
372 return 4;
373 case ELK_REGISTER_TYPE_UW:
374 case ELK_REGISTER_TYPE_W:
375 case ELK_REGISTER_TYPE_HF:
376 /* [U]V components are 4-bit, but HW unpacks them to 16-bit (2 bytes) */
377 case ELK_REGISTER_TYPE_UV:
378 case ELK_REGISTER_TYPE_V:
379 return 2;
380 case ELK_REGISTER_TYPE_UB:
381 case ELK_REGISTER_TYPE_B:
382 return 1;
383 default:
384 unreachable("not reached");
385 }
386 }
387
388 static inline enum elk_reg_type
get_exec_type(const enum elk_reg_type type)389 get_exec_type(const enum elk_reg_type type)
390 {
391 switch (type) {
392 case ELK_REGISTER_TYPE_B:
393 case ELK_REGISTER_TYPE_V:
394 return ELK_REGISTER_TYPE_W;
395 case ELK_REGISTER_TYPE_UB:
396 case ELK_REGISTER_TYPE_UV:
397 return ELK_REGISTER_TYPE_UW;
398 case ELK_REGISTER_TYPE_VF:
399 return ELK_REGISTER_TYPE_F;
400 default:
401 return type;
402 }
403 }
404
405 /**
406 * Return an integer type of the requested size and signedness.
407 */
408 static inline enum elk_reg_type
elk_int_type(unsigned sz,bool is_signed)409 elk_int_type(unsigned sz, bool is_signed)
410 {
411 switch (sz) {
412 case 1:
413 return (is_signed ? ELK_REGISTER_TYPE_B : ELK_REGISTER_TYPE_UB);
414 case 2:
415 return (is_signed ? ELK_REGISTER_TYPE_W : ELK_REGISTER_TYPE_UW);
416 case 4:
417 return (is_signed ? ELK_REGISTER_TYPE_D : ELK_REGISTER_TYPE_UD);
418 case 8:
419 return (is_signed ? ELK_REGISTER_TYPE_Q : ELK_REGISTER_TYPE_UQ);
420 default:
421 unreachable("Not reached.");
422 }
423 }
424
425 /**
426 * Construct a elk_reg.
427 * \param file one of the ELK_x_REGISTER_FILE values
428 * \param nr register number/index
429 * \param subnr register sub number
430 * \param negate register negate modifier
431 * \param abs register abs modifier
432 * \param type one of ELK_REGISTER_TYPE_x
433 * \param vstride one of ELK_VERTICAL_STRIDE_x
434 * \param width one of ELK_WIDTH_x
435 * \param hstride one of ELK_HORIZONTAL_STRIDE_x
436 * \param swizzle one of ELK_SWIZZLE_x
437 * \param writemask WRITEMASK_X/Y/Z/W bitfield
438 */
439 static inline struct elk_reg
elk_reg(enum elk_reg_file file,unsigned nr,unsigned subnr,unsigned negate,unsigned abs,enum elk_reg_type type,unsigned vstride,unsigned width,unsigned hstride,unsigned swizzle,unsigned writemask)440 elk_reg(enum elk_reg_file file,
441 unsigned nr,
442 unsigned subnr,
443 unsigned negate,
444 unsigned abs,
445 enum elk_reg_type type,
446 unsigned vstride,
447 unsigned width,
448 unsigned hstride,
449 unsigned swizzle,
450 unsigned writemask)
451 {
452 struct elk_reg reg;
453 if (file == ELK_GENERAL_REGISTER_FILE)
454 assert(nr < XE2_MAX_GRF);
455 else if (file == ELK_ARCHITECTURE_REGISTER_FILE)
456 assert(nr <= ELK_ARF_TIMESTAMP);
457 /* Asserting on the MRF register number requires to know the hardware gen
458 * (gfx6 has 24 MRF registers), which we don't know here, so we assert
459 * for that in the generators and in elk_eu_emit.c
460 */
461
462 reg.type = type;
463 reg.file = file;
464 reg.negate = negate;
465 reg.abs = abs;
466 reg.address_mode = ELK_ADDRESS_DIRECT;
467 reg.pad0 = 0;
468 reg.subnr = subnr * type_sz(type);
469 reg.nr = nr;
470
471 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
472 * set swizzle and writemask to W, as the lower bits of subnr will
473 * be lost when converted to align16. This is probably too much to
474 * keep track of as you'd want it adjusted by suboffset(), etc.
475 * Perhaps fix up when converting to align16?
476 */
477 reg.swizzle = swizzle;
478 reg.writemask = writemask;
479 reg.indirect_offset = 0;
480 reg.vstride = vstride;
481 reg.width = width;
482 reg.hstride = hstride;
483 reg.pad1 = 0;
484 return reg;
485 }
486
487 /** Construct float[16] register */
488 static inline struct elk_reg
elk_vec16_reg(enum elk_reg_file file,unsigned nr,unsigned subnr)489 elk_vec16_reg(enum elk_reg_file file, unsigned nr, unsigned subnr)
490 {
491 return elk_reg(file,
492 nr,
493 subnr,
494 0,
495 0,
496 ELK_REGISTER_TYPE_F,
497 ELK_VERTICAL_STRIDE_16,
498 ELK_WIDTH_16,
499 ELK_HORIZONTAL_STRIDE_1,
500 ELK_SWIZZLE_XYZW,
501 WRITEMASK_XYZW);
502 }
503
504 /** Construct float[8] register */
505 static inline struct elk_reg
elk_vec8_reg(enum elk_reg_file file,unsigned nr,unsigned subnr)506 elk_vec8_reg(enum elk_reg_file file, unsigned nr, unsigned subnr)
507 {
508 return elk_reg(file,
509 nr,
510 subnr,
511 0,
512 0,
513 ELK_REGISTER_TYPE_F,
514 ELK_VERTICAL_STRIDE_8,
515 ELK_WIDTH_8,
516 ELK_HORIZONTAL_STRIDE_1,
517 ELK_SWIZZLE_XYZW,
518 WRITEMASK_XYZW);
519 }
520
521 /** Construct float[4] register */
522 static inline struct elk_reg
elk_vec4_reg(enum elk_reg_file file,unsigned nr,unsigned subnr)523 elk_vec4_reg(enum elk_reg_file file, unsigned nr, unsigned subnr)
524 {
525 return elk_reg(file,
526 nr,
527 subnr,
528 0,
529 0,
530 ELK_REGISTER_TYPE_F,
531 ELK_VERTICAL_STRIDE_4,
532 ELK_WIDTH_4,
533 ELK_HORIZONTAL_STRIDE_1,
534 ELK_SWIZZLE_XYZW,
535 WRITEMASK_XYZW);
536 }
537
538 /** Construct float[2] register */
539 static inline struct elk_reg
elk_vec2_reg(enum elk_reg_file file,unsigned nr,unsigned subnr)540 elk_vec2_reg(enum elk_reg_file file, unsigned nr, unsigned subnr)
541 {
542 return elk_reg(file,
543 nr,
544 subnr,
545 0,
546 0,
547 ELK_REGISTER_TYPE_F,
548 ELK_VERTICAL_STRIDE_2,
549 ELK_WIDTH_2,
550 ELK_HORIZONTAL_STRIDE_1,
551 ELK_SWIZZLE_XYXY,
552 WRITEMASK_XY);
553 }
554
555 /** Construct float[1] register */
556 static inline struct elk_reg
elk_vec1_reg(enum elk_reg_file file,unsigned nr,unsigned subnr)557 elk_vec1_reg(enum elk_reg_file file, unsigned nr, unsigned subnr)
558 {
559 return elk_reg(file,
560 nr,
561 subnr,
562 0,
563 0,
564 ELK_REGISTER_TYPE_F,
565 ELK_VERTICAL_STRIDE_0,
566 ELK_WIDTH_1,
567 ELK_HORIZONTAL_STRIDE_0,
568 ELK_SWIZZLE_XXXX,
569 WRITEMASK_X);
570 }
571
572 static inline struct elk_reg
elk_vecn_reg(unsigned width,enum elk_reg_file file,unsigned nr,unsigned subnr)573 elk_vecn_reg(unsigned width, enum elk_reg_file file,
574 unsigned nr, unsigned subnr)
575 {
576 switch (width) {
577 case 1:
578 return elk_vec1_reg(file, nr, subnr);
579 case 2:
580 return elk_vec2_reg(file, nr, subnr);
581 case 4:
582 return elk_vec4_reg(file, nr, subnr);
583 case 8:
584 return elk_vec8_reg(file, nr, subnr);
585 case 16:
586 return elk_vec16_reg(file, nr, subnr);
587 default:
588 unreachable("Invalid register width");
589 }
590 }
591
592 static inline struct elk_reg
retype(struct elk_reg reg,enum elk_reg_type type)593 retype(struct elk_reg reg, enum elk_reg_type type)
594 {
595 reg.type = type;
596 return reg;
597 }
598
599 static inline struct elk_reg
firsthalf(struct elk_reg reg)600 firsthalf(struct elk_reg reg)
601 {
602 return reg;
603 }
604
605 static inline struct elk_reg
sechalf(struct elk_reg reg)606 sechalf(struct elk_reg reg)
607 {
608 if (reg.vstride)
609 reg.nr++;
610 return reg;
611 }
612
613 static inline struct elk_reg
offset(struct elk_reg reg,unsigned delta)614 offset(struct elk_reg reg, unsigned delta)
615 {
616 reg.nr += delta;
617 return reg;
618 }
619
620
621 static inline struct elk_reg
byte_offset(struct elk_reg reg,unsigned bytes)622 byte_offset(struct elk_reg reg, unsigned bytes)
623 {
624 unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
625 reg.nr = newoffset / REG_SIZE;
626 reg.subnr = newoffset % REG_SIZE;
627 return reg;
628 }
629
630 static inline struct elk_reg
suboffset(struct elk_reg reg,unsigned delta)631 suboffset(struct elk_reg reg, unsigned delta)
632 {
633 return byte_offset(reg, delta * type_sz(reg.type));
634 }
635
636 /** Construct unsigned word[16] register */
637 static inline struct elk_reg
elk_uw16_reg(enum elk_reg_file file,unsigned nr,unsigned subnr)638 elk_uw16_reg(enum elk_reg_file file, unsigned nr, unsigned subnr)
639 {
640 return suboffset(retype(elk_vec16_reg(file, nr, 0), ELK_REGISTER_TYPE_UW), subnr);
641 }
642
643 /** Construct unsigned word[8] register */
644 static inline struct elk_reg
elk_uw8_reg(enum elk_reg_file file,unsigned nr,unsigned subnr)645 elk_uw8_reg(enum elk_reg_file file, unsigned nr, unsigned subnr)
646 {
647 return suboffset(retype(elk_vec8_reg(file, nr, 0), ELK_REGISTER_TYPE_UW), subnr);
648 }
649
650 /** Construct unsigned word[1] register */
651 static inline struct elk_reg
elk_uw1_reg(enum elk_reg_file file,unsigned nr,unsigned subnr)652 elk_uw1_reg(enum elk_reg_file file, unsigned nr, unsigned subnr)
653 {
654 return suboffset(retype(elk_vec1_reg(file, nr, 0), ELK_REGISTER_TYPE_UW), subnr);
655 }
656
657 static inline struct elk_reg
elk_ud8_reg(enum elk_reg_file file,unsigned nr,unsigned subnr)658 elk_ud8_reg(enum elk_reg_file file, unsigned nr, unsigned subnr)
659 {
660 return retype(elk_vec8_reg(file, nr, subnr), ELK_REGISTER_TYPE_UD);
661 }
662
663 static inline struct elk_reg
elk_ud1_reg(enum elk_reg_file file,unsigned nr,unsigned subnr)664 elk_ud1_reg(enum elk_reg_file file, unsigned nr, unsigned subnr)
665 {
666 return retype(elk_vec1_reg(file, nr, subnr), ELK_REGISTER_TYPE_UD);
667 }
668
669 static inline struct elk_reg
elk_imm_reg(enum elk_reg_type type)670 elk_imm_reg(enum elk_reg_type type)
671 {
672 return elk_reg(ELK_IMMEDIATE_VALUE,
673 0,
674 0,
675 0,
676 0,
677 type,
678 ELK_VERTICAL_STRIDE_0,
679 ELK_WIDTH_1,
680 ELK_HORIZONTAL_STRIDE_0,
681 0,
682 0);
683 }
684
685 /** Construct float immediate register */
686 static inline struct elk_reg
elk_imm_df(double df)687 elk_imm_df(double df)
688 {
689 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_DF);
690 imm.df = df;
691 return imm;
692 }
693
694 static inline struct elk_reg
elk_imm_u64(uint64_t u64)695 elk_imm_u64(uint64_t u64)
696 {
697 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_UQ);
698 imm.u64 = u64;
699 return imm;
700 }
701
702 static inline struct elk_reg
elk_imm_f(float f)703 elk_imm_f(float f)
704 {
705 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_F);
706 imm.f = f;
707 return imm;
708 }
709
710 /** Construct int64_t immediate register */
711 static inline struct elk_reg
elk_imm_q(int64_t q)712 elk_imm_q(int64_t q)
713 {
714 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_Q);
715 imm.d64 = q;
716 return imm;
717 }
718
719 /** Construct int64_t immediate register */
720 static inline struct elk_reg
elk_imm_uq(uint64_t uq)721 elk_imm_uq(uint64_t uq)
722 {
723 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_UQ);
724 imm.u64 = uq;
725 return imm;
726 }
727
728 /** Construct integer immediate register */
729 static inline struct elk_reg
elk_imm_d(int d)730 elk_imm_d(int d)
731 {
732 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_D);
733 imm.d = d;
734 return imm;
735 }
736
737 /** Construct uint immediate register */
738 static inline struct elk_reg
elk_imm_ud(unsigned ud)739 elk_imm_ud(unsigned ud)
740 {
741 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_UD);
742 imm.ud = ud;
743 return imm;
744 }
745
746 /** Construct ushort immediate register */
747 static inline struct elk_reg
elk_imm_uw(uint16_t uw)748 elk_imm_uw(uint16_t uw)
749 {
750 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_UW);
751 imm.ud = uw | (uw << 16);
752 return imm;
753 }
754
755 /** Construct short immediate register */
756 static inline struct elk_reg
elk_imm_w(int16_t w)757 elk_imm_w(int16_t w)
758 {
759 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_W);
760 imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16;
761 return imm;
762 }
763
764 /* elk_imm_b and elk_imm_ub aren't supported by hardware - the type
765 * numbers alias with _V and _VF below:
766 */
767
768 /** Construct vector of eight signed half-byte values */
769 static inline struct elk_reg
elk_imm_v(unsigned v)770 elk_imm_v(unsigned v)
771 {
772 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_V);
773 imm.ud = v;
774 return imm;
775 }
776
777 /** Construct vector of eight unsigned half-byte values */
778 static inline struct elk_reg
elk_imm_uv(unsigned uv)779 elk_imm_uv(unsigned uv)
780 {
781 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_UV);
782 imm.ud = uv;
783 return imm;
784 }
785
786 /** Construct vector of four 8-bit float values */
787 static inline struct elk_reg
elk_imm_vf(unsigned v)788 elk_imm_vf(unsigned v)
789 {
790 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_VF);
791 imm.ud = v;
792 return imm;
793 }
794
795 static inline struct elk_reg
elk_imm_vf4(unsigned v0,unsigned v1,unsigned v2,unsigned v3)796 elk_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
797 {
798 struct elk_reg imm = elk_imm_reg(ELK_REGISTER_TYPE_VF);
799 imm.vstride = ELK_VERTICAL_STRIDE_0;
800 imm.width = ELK_WIDTH_4;
801 imm.hstride = ELK_HORIZONTAL_STRIDE_1;
802 imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
803 return imm;
804 }
805
806
807 static inline struct elk_reg
elk_address(struct elk_reg reg)808 elk_address(struct elk_reg reg)
809 {
810 return elk_imm_uw(reg.nr * REG_SIZE + reg.subnr);
811 }
812
813 /** Construct float[1] general-purpose register */
814 static inline struct elk_reg
elk_vec1_grf(unsigned nr,unsigned subnr)815 elk_vec1_grf(unsigned nr, unsigned subnr)
816 {
817 return elk_vec1_reg(ELK_GENERAL_REGISTER_FILE, nr, subnr);
818 }
819
820 static inline struct elk_reg
xe2_vec1_grf(unsigned nr,unsigned subnr)821 xe2_vec1_grf(unsigned nr, unsigned subnr)
822 {
823 return elk_vec1_reg(ELK_GENERAL_REGISTER_FILE, 2 * nr + subnr / 8, subnr % 8);
824 }
825
826 /** Construct float[2] general-purpose register */
827 static inline struct elk_reg
elk_vec2_grf(unsigned nr,unsigned subnr)828 elk_vec2_grf(unsigned nr, unsigned subnr)
829 {
830 return elk_vec2_reg(ELK_GENERAL_REGISTER_FILE, nr, subnr);
831 }
832
833 static inline struct elk_reg
xe2_vec2_grf(unsigned nr,unsigned subnr)834 xe2_vec2_grf(unsigned nr, unsigned subnr)
835 {
836 return elk_vec2_reg(ELK_GENERAL_REGISTER_FILE, 2 * nr + subnr / 8, subnr % 8);
837 }
838
839 /** Construct float[4] general-purpose register */
840 static inline struct elk_reg
elk_vec4_grf(unsigned nr,unsigned subnr)841 elk_vec4_grf(unsigned nr, unsigned subnr)
842 {
843 return elk_vec4_reg(ELK_GENERAL_REGISTER_FILE, nr, subnr);
844 }
845
846 static inline struct elk_reg
xe2_vec4_grf(unsigned nr,unsigned subnr)847 xe2_vec4_grf(unsigned nr, unsigned subnr)
848 {
849 return elk_vec4_reg(ELK_GENERAL_REGISTER_FILE, 2 * nr + subnr / 8, subnr % 8);
850 }
851
852 /** Construct float[8] general-purpose register */
853 static inline struct elk_reg
elk_vec8_grf(unsigned nr,unsigned subnr)854 elk_vec8_grf(unsigned nr, unsigned subnr)
855 {
856 return elk_vec8_reg(ELK_GENERAL_REGISTER_FILE, nr, subnr);
857 }
858
859 static inline struct elk_reg
xe2_vec8_grf(unsigned nr,unsigned subnr)860 xe2_vec8_grf(unsigned nr, unsigned subnr)
861 {
862 return elk_vec8_reg(ELK_GENERAL_REGISTER_FILE, 2 * nr + subnr / 8, subnr % 8);
863 }
864
865 /** Construct float[16] general-purpose register */
866 static inline struct elk_reg
elk_vec16_grf(unsigned nr,unsigned subnr)867 elk_vec16_grf(unsigned nr, unsigned subnr)
868 {
869 return elk_vec16_reg(ELK_GENERAL_REGISTER_FILE, nr, subnr);
870 }
871
872 static inline struct elk_reg
xe2_vec16_grf(unsigned nr,unsigned subnr)873 xe2_vec16_grf(unsigned nr, unsigned subnr)
874 {
875 return elk_vec16_reg(ELK_GENERAL_REGISTER_FILE, 2 * nr + subnr / 8, subnr % 8);
876 }
877
878 static inline struct elk_reg
elk_vecn_grf(unsigned width,unsigned nr,unsigned subnr)879 elk_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
880 {
881 return elk_vecn_reg(width, ELK_GENERAL_REGISTER_FILE, nr, subnr);
882 }
883
884 static inline struct elk_reg
xe2_vecn_grf(unsigned width,unsigned nr,unsigned subnr)885 xe2_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
886 {
887 return elk_vecn_reg(width, ELK_GENERAL_REGISTER_FILE, nr + subnr / 8, subnr % 8);
888 }
889
890 static inline struct elk_reg
elk_uw1_grf(unsigned nr,unsigned subnr)891 elk_uw1_grf(unsigned nr, unsigned subnr)
892 {
893 return elk_uw1_reg(ELK_GENERAL_REGISTER_FILE, nr, subnr);
894 }
895
896 static inline struct elk_reg
elk_uw8_grf(unsigned nr,unsigned subnr)897 elk_uw8_grf(unsigned nr, unsigned subnr)
898 {
899 return elk_uw8_reg(ELK_GENERAL_REGISTER_FILE, nr, subnr);
900 }
901
902 static inline struct elk_reg
elk_uw16_grf(unsigned nr,unsigned subnr)903 elk_uw16_grf(unsigned nr, unsigned subnr)
904 {
905 return elk_uw16_reg(ELK_GENERAL_REGISTER_FILE, nr, subnr);
906 }
907
908 static inline struct elk_reg
elk_ud8_grf(unsigned nr,unsigned subnr)909 elk_ud8_grf(unsigned nr, unsigned subnr)
910 {
911 return elk_ud8_reg(ELK_GENERAL_REGISTER_FILE, nr, subnr);
912 }
913
914 static inline struct elk_reg
elk_ud1_grf(unsigned nr,unsigned subnr)915 elk_ud1_grf(unsigned nr, unsigned subnr)
916 {
917 return elk_ud1_reg(ELK_GENERAL_REGISTER_FILE, nr, subnr);
918 }
919
920
921 /** Construct null register (usually used for setting condition codes) */
922 static inline struct elk_reg
elk_null_reg(void)923 elk_null_reg(void)
924 {
925 return elk_vec8_reg(ELK_ARCHITECTURE_REGISTER_FILE, ELK_ARF_NULL, 0);
926 }
927
928 static inline struct elk_reg
elk_null_vec(unsigned width)929 elk_null_vec(unsigned width)
930 {
931 return elk_vecn_reg(width, ELK_ARCHITECTURE_REGISTER_FILE, ELK_ARF_NULL, 0);
932 }
933
934 static inline struct elk_reg
elk_address_reg(unsigned subnr)935 elk_address_reg(unsigned subnr)
936 {
937 return elk_uw1_reg(ELK_ARCHITECTURE_REGISTER_FILE, ELK_ARF_ADDRESS, subnr);
938 }
939
940 static inline struct elk_reg
elk_tdr_reg(void)941 elk_tdr_reg(void)
942 {
943 return elk_uw1_reg(ELK_ARCHITECTURE_REGISTER_FILE, ELK_ARF_TDR, 0);
944 }
945
946 /* If/else instructions break in align16 mode if writemask & swizzle
947 * aren't xyzw. This goes against the convention for other scalar
948 * regs:
949 */
950 static inline struct elk_reg
elk_ip_reg(void)951 elk_ip_reg(void)
952 {
953 return elk_reg(ELK_ARCHITECTURE_REGISTER_FILE,
954 ELK_ARF_IP,
955 0,
956 0,
957 0,
958 ELK_REGISTER_TYPE_UD,
959 ELK_VERTICAL_STRIDE_4, /* ? */
960 ELK_WIDTH_1,
961 ELK_HORIZONTAL_STRIDE_0,
962 ELK_SWIZZLE_XYZW, /* NOTE! */
963 WRITEMASK_XYZW); /* NOTE! */
964 }
965
966 static inline struct elk_reg
elk_notification_reg(void)967 elk_notification_reg(void)
968 {
969 return elk_reg(ELK_ARCHITECTURE_REGISTER_FILE,
970 ELK_ARF_NOTIFICATION_COUNT,
971 0,
972 0,
973 0,
974 ELK_REGISTER_TYPE_UD,
975 ELK_VERTICAL_STRIDE_0,
976 ELK_WIDTH_1,
977 ELK_HORIZONTAL_STRIDE_0,
978 ELK_SWIZZLE_XXXX,
979 WRITEMASK_X);
980 }
981
982 static inline struct elk_reg
elk_cr0_reg(unsigned subnr)983 elk_cr0_reg(unsigned subnr)
984 {
985 return elk_ud1_reg(ELK_ARCHITECTURE_REGISTER_FILE, ELK_ARF_CONTROL, subnr);
986 }
987
988 static inline struct elk_reg
elk_sr0_reg(unsigned subnr)989 elk_sr0_reg(unsigned subnr)
990 {
991 return elk_ud1_reg(ELK_ARCHITECTURE_REGISTER_FILE, ELK_ARF_STATE, subnr);
992 }
993
994 static inline struct elk_reg
elk_acc_reg(unsigned width)995 elk_acc_reg(unsigned width)
996 {
997 return elk_vecn_reg(width, ELK_ARCHITECTURE_REGISTER_FILE,
998 ELK_ARF_ACCUMULATOR, 0);
999 }
1000
1001 static inline struct elk_reg
elk_flag_reg(int reg,int subreg)1002 elk_flag_reg(int reg, int subreg)
1003 {
1004 return elk_uw1_reg(ELK_ARCHITECTURE_REGISTER_FILE,
1005 ELK_ARF_FLAG + reg, subreg);
1006 }
1007
1008 static inline struct elk_reg
elk_flag_subreg(unsigned subreg)1009 elk_flag_subreg(unsigned subreg)
1010 {
1011 return elk_uw1_reg(ELK_ARCHITECTURE_REGISTER_FILE,
1012 ELK_ARF_FLAG + subreg / 2, subreg % 2);
1013 }
1014
1015 /**
1016 * Return the mask register present in Gfx4-5, or the related register present
1017 * in Gfx7.5 and later hardware referred to as "channel enable" register in
1018 * the documentation.
1019 */
1020 static inline struct elk_reg
elk_mask_reg(unsigned subnr)1021 elk_mask_reg(unsigned subnr)
1022 {
1023 return elk_uw1_reg(ELK_ARCHITECTURE_REGISTER_FILE, ELK_ARF_MASK, subnr);
1024 }
1025
1026 static inline struct elk_reg
elk_vmask_reg()1027 elk_vmask_reg()
1028 {
1029 return elk_sr0_reg(3);
1030 }
1031
1032 static inline struct elk_reg
elk_dmask_reg()1033 elk_dmask_reg()
1034 {
1035 return elk_sr0_reg(2);
1036 }
1037
1038 static inline struct elk_reg
elk_mask_stack_reg(unsigned subnr)1039 elk_mask_stack_reg(unsigned subnr)
1040 {
1041 return suboffset(retype(elk_vec16_reg(ELK_ARCHITECTURE_REGISTER_FILE,
1042 ELK_ARF_MASK_STACK, 0),
1043 ELK_REGISTER_TYPE_UB), subnr);
1044 }
1045
1046 static inline struct elk_reg
elk_mask_stack_depth_reg(unsigned subnr)1047 elk_mask_stack_depth_reg(unsigned subnr)
1048 {
1049 return elk_uw1_reg(ELK_ARCHITECTURE_REGISTER_FILE,
1050 ELK_ARF_MASK_STACK_DEPTH, subnr);
1051 }
1052
1053 static inline struct elk_reg
elk_message_reg(unsigned nr)1054 elk_message_reg(unsigned nr)
1055 {
1056 return elk_vec8_reg(ELK_MESSAGE_REGISTER_FILE, nr, 0);
1057 }
1058
1059 static inline struct elk_reg
elk_uvec_mrf(unsigned width,unsigned nr,unsigned subnr)1060 elk_uvec_mrf(unsigned width, unsigned nr, unsigned subnr)
1061 {
1062 return retype(elk_vecn_reg(width, ELK_MESSAGE_REGISTER_FILE, nr, subnr),
1063 ELK_REGISTER_TYPE_UD);
1064 }
1065
1066 /* This is almost always called with a numeric constant argument, so
1067 * make things easy to evaluate at compile time:
1068 */
cvt(unsigned val)1069 static inline unsigned cvt(unsigned val)
1070 {
1071 switch (val) {
1072 case 0: return 0;
1073 case 1: return 1;
1074 case 2: return 2;
1075 case 4: return 3;
1076 case 8: return 4;
1077 case 16: return 5;
1078 case 32: return 6;
1079 }
1080 return 0;
1081 }
1082
1083 static inline struct elk_reg
stride(struct elk_reg reg,unsigned vstride,unsigned width,unsigned hstride)1084 stride(struct elk_reg reg, unsigned vstride, unsigned width, unsigned hstride)
1085 {
1086 reg.vstride = cvt(vstride);
1087 reg.width = cvt(width) - 1;
1088 reg.hstride = cvt(hstride);
1089 return reg;
1090 }
1091
1092 /**
1093 * Multiply the vertical and horizontal stride of a register by the given
1094 * factor \a s.
1095 */
1096 static inline struct elk_reg
spread(struct elk_reg reg,unsigned s)1097 spread(struct elk_reg reg, unsigned s)
1098 {
1099 if (s) {
1100 assert(util_is_power_of_two_nonzero(s));
1101
1102 if (reg.hstride)
1103 reg.hstride += cvt(s) - 1;
1104
1105 if (reg.vstride)
1106 reg.vstride += cvt(s) - 1;
1107
1108 return reg;
1109 } else {
1110 return stride(reg, 0, 1, 0);
1111 }
1112 }
1113
1114 /**
1115 * Reinterpret each channel of register \p reg as a vector of values of the
1116 * given smaller type and take the i-th subcomponent from each.
1117 */
1118 static inline struct elk_reg
subscript(struct elk_reg reg,enum elk_reg_type type,unsigned i)1119 subscript(struct elk_reg reg, enum elk_reg_type type, unsigned i)
1120 {
1121 unsigned scale = type_sz(reg.type) / type_sz(type);
1122 assert(scale >= 1 && i < scale);
1123
1124 if (reg.file == IMM) {
1125 unsigned bit_size = type_sz(type) * 8;
1126 reg.u64 >>= i * bit_size;
1127 reg.u64 &= BITFIELD64_MASK(bit_size);
1128 if (bit_size <= 16)
1129 reg.u64 |= reg.u64 << 16;
1130 return retype(reg, type);
1131 }
1132
1133 return suboffset(retype(spread(reg, scale), type), i);
1134 }
1135
1136 static inline struct elk_reg
vec16(struct elk_reg reg)1137 vec16(struct elk_reg reg)
1138 {
1139 return stride(reg, 16,16,1);
1140 }
1141
1142 static inline struct elk_reg
vec8(struct elk_reg reg)1143 vec8(struct elk_reg reg)
1144 {
1145 return stride(reg, 8,8,1);
1146 }
1147
1148 static inline struct elk_reg
vec4(struct elk_reg reg)1149 vec4(struct elk_reg reg)
1150 {
1151 return stride(reg, 4,4,1);
1152 }
1153
1154 static inline struct elk_reg
vec2(struct elk_reg reg)1155 vec2(struct elk_reg reg)
1156 {
1157 return stride(reg, 2,2,1);
1158 }
1159
1160 static inline struct elk_reg
vec1(struct elk_reg reg)1161 vec1(struct elk_reg reg)
1162 {
1163 return stride(reg, 0,1,0);
1164 }
1165
1166
1167 static inline struct elk_reg
get_element(struct elk_reg reg,unsigned elt)1168 get_element(struct elk_reg reg, unsigned elt)
1169 {
1170 return vec1(suboffset(reg, elt));
1171 }
1172
1173 static inline struct elk_reg
get_element_ud(struct elk_reg reg,unsigned elt)1174 get_element_ud(struct elk_reg reg, unsigned elt)
1175 {
1176 return vec1(suboffset(retype(reg, ELK_REGISTER_TYPE_UD), elt));
1177 }
1178
1179 static inline struct elk_reg
get_element_d(struct elk_reg reg,unsigned elt)1180 get_element_d(struct elk_reg reg, unsigned elt)
1181 {
1182 return vec1(suboffset(retype(reg, ELK_REGISTER_TYPE_D), elt));
1183 }
1184
1185 static inline struct elk_reg
elk_swizzle(struct elk_reg reg,unsigned swz)1186 elk_swizzle(struct elk_reg reg, unsigned swz)
1187 {
1188 if (reg.file == ELK_IMMEDIATE_VALUE)
1189 reg.ud = elk_swizzle_immediate(reg.type, reg.ud, swz);
1190 else
1191 reg.swizzle = elk_compose_swizzle(swz, reg.swizzle);
1192
1193 return reg;
1194 }
1195
1196 static inline struct elk_reg
elk_writemask(struct elk_reg reg,unsigned mask)1197 elk_writemask(struct elk_reg reg, unsigned mask)
1198 {
1199 assert(reg.file != ELK_IMMEDIATE_VALUE);
1200 reg.writemask &= mask;
1201 return reg;
1202 }
1203
1204 static inline struct elk_reg
elk_set_writemask(struct elk_reg reg,unsigned mask)1205 elk_set_writemask(struct elk_reg reg, unsigned mask)
1206 {
1207 assert(reg.file != ELK_IMMEDIATE_VALUE);
1208 reg.writemask = mask;
1209 return reg;
1210 }
1211
1212 static inline unsigned
elk_writemask_for_size(unsigned n)1213 elk_writemask_for_size(unsigned n)
1214 {
1215 return (1 << n) - 1;
1216 }
1217
1218 static inline unsigned
elk_writemask_for_component_packing(unsigned n,unsigned first_component)1219 elk_writemask_for_component_packing(unsigned n, unsigned first_component)
1220 {
1221 assert(first_component + n <= 4);
1222 return (((1 << n) - 1) << first_component);
1223 }
1224
1225 static inline struct elk_reg
negate(struct elk_reg reg)1226 negate(struct elk_reg reg)
1227 {
1228 reg.negate ^= 1;
1229 return reg;
1230 }
1231
1232 static inline struct elk_reg
elk_abs(struct elk_reg reg)1233 elk_abs(struct elk_reg reg)
1234 {
1235 reg.abs = 1;
1236 reg.negate = 0;
1237 return reg;
1238 }
1239
1240 /************************************************************************/
1241
1242 static inline struct elk_reg
elk_vec4_indirect(unsigned subnr,int offset)1243 elk_vec4_indirect(unsigned subnr, int offset)
1244 {
1245 struct elk_reg reg = elk_vec4_grf(0, 0);
1246 reg.subnr = subnr;
1247 reg.address_mode = ELK_ADDRESS_REGISTER_INDIRECT_REGISTER;
1248 reg.indirect_offset = offset;
1249 return reg;
1250 }
1251
1252 static inline struct elk_reg
elk_vec1_indirect(unsigned subnr,int offset)1253 elk_vec1_indirect(unsigned subnr, int offset)
1254 {
1255 struct elk_reg reg = elk_vec1_grf(0, 0);
1256 reg.subnr = subnr;
1257 reg.address_mode = ELK_ADDRESS_REGISTER_INDIRECT_REGISTER;
1258 reg.indirect_offset = offset;
1259 return reg;
1260 }
1261
1262 static inline struct elk_reg
elk_VxH_indirect(unsigned subnr,int offset)1263 elk_VxH_indirect(unsigned subnr, int offset)
1264 {
1265 struct elk_reg reg = elk_vec1_grf(0, 0);
1266 reg.vstride = ELK_VERTICAL_STRIDE_ONE_DIMENSIONAL;
1267 reg.subnr = subnr;
1268 reg.address_mode = ELK_ADDRESS_REGISTER_INDIRECT_REGISTER;
1269 reg.indirect_offset = offset;
1270 return reg;
1271 }
1272
1273 static inline struct elk_reg
deref_4f(struct elk_indirect ptr,int offset)1274 deref_4f(struct elk_indirect ptr, int offset)
1275 {
1276 return elk_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1277 }
1278
1279 static inline struct elk_reg
deref_1f(struct elk_indirect ptr,int offset)1280 deref_1f(struct elk_indirect ptr, int offset)
1281 {
1282 return elk_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1283 }
1284
1285 static inline struct elk_reg
deref_4b(struct elk_indirect ptr,int offset)1286 deref_4b(struct elk_indirect ptr, int offset)
1287 {
1288 return retype(deref_4f(ptr, offset), ELK_REGISTER_TYPE_B);
1289 }
1290
1291 static inline struct elk_reg
deref_1uw(struct elk_indirect ptr,int offset)1292 deref_1uw(struct elk_indirect ptr, int offset)
1293 {
1294 return retype(deref_1f(ptr, offset), ELK_REGISTER_TYPE_UW);
1295 }
1296
1297 static inline struct elk_reg
deref_1d(struct elk_indirect ptr,int offset)1298 deref_1d(struct elk_indirect ptr, int offset)
1299 {
1300 return retype(deref_1f(ptr, offset), ELK_REGISTER_TYPE_D);
1301 }
1302
1303 static inline struct elk_reg
deref_1ud(struct elk_indirect ptr,int offset)1304 deref_1ud(struct elk_indirect ptr, int offset)
1305 {
1306 return retype(deref_1f(ptr, offset), ELK_REGISTER_TYPE_UD);
1307 }
1308
1309 static inline struct elk_reg
get_addr_reg(struct elk_indirect ptr)1310 get_addr_reg(struct elk_indirect ptr)
1311 {
1312 return elk_address_reg(ptr.addr_subnr);
1313 }
1314
1315 static inline struct elk_indirect
elk_indirect_offset(struct elk_indirect ptr,int offset)1316 elk_indirect_offset(struct elk_indirect ptr, int offset)
1317 {
1318 ptr.addr_offset += offset;
1319 return ptr;
1320 }
1321
1322 static inline struct elk_indirect
elk_indirect(unsigned addr_subnr,int offset)1323 elk_indirect(unsigned addr_subnr, int offset)
1324 {
1325 struct elk_indirect ptr;
1326 ptr.addr_subnr = addr_subnr;
1327 ptr.addr_offset = offset;
1328 ptr.pad = 0;
1329 return ptr;
1330 }
1331
1332 static inline bool
region_matches(struct elk_reg reg,enum elk_vertical_stride v,enum elk_width w,enum elk_horizontal_stride h)1333 region_matches(struct elk_reg reg, enum elk_vertical_stride v,
1334 enum elk_width w, enum elk_horizontal_stride h)
1335 {
1336 return reg.vstride == v &&
1337 reg.width == w &&
1338 reg.hstride == h;
1339 }
1340
1341 #define has_scalar_region(reg) \
1342 region_matches(reg, ELK_VERTICAL_STRIDE_0, ELK_WIDTH_1, \
1343 ELK_HORIZONTAL_STRIDE_0)
1344
1345 /**
1346 * Return the size in bytes per data element of register \p reg on the
1347 * corresponding register file.
1348 */
1349 static inline unsigned
element_sz(struct elk_reg reg)1350 element_sz(struct elk_reg reg)
1351 {
1352 if (reg.file == ELK_IMMEDIATE_VALUE || has_scalar_region(reg)) {
1353 return type_sz(reg.type);
1354
1355 } else if (reg.width == ELK_WIDTH_1 &&
1356 reg.hstride == ELK_HORIZONTAL_STRIDE_0) {
1357 assert(reg.vstride != ELK_VERTICAL_STRIDE_0);
1358 return type_sz(reg.type) << (reg.vstride - 1);
1359
1360 } else {
1361 assert(reg.hstride != ELK_HORIZONTAL_STRIDE_0);
1362 assert(reg.vstride == reg.hstride + reg.width);
1363 return type_sz(reg.type) << (reg.hstride - 1);
1364 }
1365 }
1366
1367 /* elk_packed_float.c */
1368 int elk_float_to_vf(float f);
1369 float elk_vf_to_float(unsigned char vf);
1370
1371 #ifdef __cplusplus
1372 }
1373 #endif
1374
1375 #endif
1376