1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef MI_BUILDER_H
25 #define MI_BUILDER_H
26
27 #include "dev/intel_device_info.h"
28 #include "genxml/genX_bits.h"
29 #include "util/bitscan.h"
30 #include "util/fast_idiv_by_const.h"
31 #include "util/u_math.h"
32
33 #ifndef MI_BUILDER_NUM_ALLOC_GPRS
34 /** The number of GPRs the MI builder is allowed to allocate
35 *
36 * This may be set by a user of this API so that it can reserve some GPRs at
37 * the top end for its own use.
38 */
39 #define MI_BUILDER_NUM_ALLOC_GPRS 16
40 #endif
41
42 /** These must be defined by the user of the builder
43 *
44 * void *__gen_get_batch_dwords(__gen_user_data *user_data,
45 * unsigned num_dwords);
46 *
47 * __gen_address_type
48 * __gen_address_offset(__gen_address_type addr, uint64_t offset);
49 *
50 *
51 * If self-modifying batches are supported, we must be able to pass batch
52 * addresses around as void*s so pinning as well as batch chaining or some
53 * other mechanism for ensuring batch pointers remain valid during building is
54 * required. The following function must also be defined, it returns an
55 * address in canonical form:
56 *
57 * __gen_address_type
58 * __gen_get_batch_address(__gen_user_data *user_data, void *location);
59 *
60 * Also, __gen_combine_address must accept a location value of NULL and return
61 * a fully valid 64-bit address.
62 */
63
64 /*
65 * Start of the actual MI builder
66 */
67
68 #define __genxml_cmd_length(cmd) cmd ## _length
69 #define __genxml_cmd_header(cmd) cmd ## _header
70 #define __genxml_cmd_pack(cmd) cmd ## _pack
71
72 #define mi_builder_pack(b, cmd, dst, name) \
73 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
74 *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
75 __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \
76 _dst = NULL)
77
78 #define mi_builder_emit(b, cmd, name) \
79 mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
80
81
82 enum mi_value_type {
83 MI_VALUE_TYPE_IMM,
84 MI_VALUE_TYPE_MEM32,
85 MI_VALUE_TYPE_MEM64,
86 MI_VALUE_TYPE_REG32,
87 MI_VALUE_TYPE_REG64,
88 };
89
90 struct mi_value {
91 enum mi_value_type type;
92
93 union {
94 uint64_t imm;
95 __gen_address_type addr;
96 uint32_t reg;
97 };
98
99 #if GFX_VERx10 >= 75
100 bool invert;
101 #endif
102 };
103
104 struct mi_reg_num {
105 uint32_t num;
106 #if GFX_VER >= 11
107 bool cs;
108 #endif
109 };
110
111 static inline struct mi_reg_num
mi_adjust_reg_num(uint32_t reg)112 mi_adjust_reg_num(uint32_t reg)
113 {
114 #if GFX_VER >= 11
115 bool cs = reg >= 0x2000 && reg < 0x4000;
116 return (struct mi_reg_num) {
117 .num = reg - (cs ? 0x2000 : 0),
118 .cs = cs,
119 };
120 #else
121 return (struct mi_reg_num) { .num = reg, };
122 #endif
123 }
124
125 #if GFX_VER >= 9
126 #define MI_BUILDER_MAX_MATH_DWORDS 256
127 #else
128 #define MI_BUILDER_MAX_MATH_DWORDS 64
129 #endif
130
131 struct mi_builder {
132 const struct intel_device_info *devinfo;
133 __gen_user_data *user_data;
134
135 #if GFX_VERx10 >= 75
136 uint32_t gprs;
137 uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS];
138
139 unsigned num_math_dwords;
140 uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS];
141 #endif
142 };
143
144 static inline void
mi_builder_init(struct mi_builder * b,const struct intel_device_info * devinfo,__gen_user_data * user_data)145 mi_builder_init(struct mi_builder *b,
146 const struct intel_device_info *devinfo,
147 __gen_user_data *user_data)
148 {
149 memset(b, 0, sizeof(*b));
150 b->devinfo = devinfo;
151 b->user_data = user_data;
152
153 #if GFX_VERx10 >= 75
154 b->gprs = 0;
155 b->num_math_dwords = 0;
156 #endif
157 }
158
159 static inline void
mi_builder_flush_math(struct mi_builder * b)160 mi_builder_flush_math(struct mi_builder *b)
161 {
162 #if GFX_VERx10 >= 75
163 if (b->num_math_dwords == 0)
164 return;
165
166 uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
167 1 + b->num_math_dwords);
168 mi_builder_pack(b, GENX(MI_MATH), dw, math) {
169 math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
170 }
171 memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
172 b->num_math_dwords = 0;
173 #endif
174 }
175
176 #define _MI_BUILDER_GPR_BASE 0x2600
177 /* The actual hardware limit on GPRs */
178 #define _MI_BUILDER_NUM_HW_GPRS 16
179
180 #if GFX_VERx10 >= 75
181
182 static inline bool
mi_value_is_reg(struct mi_value val)183 mi_value_is_reg(struct mi_value val)
184 {
185 return val.type == MI_VALUE_TYPE_REG32 ||
186 val.type == MI_VALUE_TYPE_REG64;
187 }
188
189 static inline bool
mi_value_is_gpr(struct mi_value val)190 mi_value_is_gpr(struct mi_value val)
191 {
192 return mi_value_is_reg(val) &&
193 val.reg >= _MI_BUILDER_GPR_BASE &&
194 val.reg < _MI_BUILDER_GPR_BASE +
195 _MI_BUILDER_NUM_HW_GPRS * 8;
196 }
197
198 static inline bool
_mi_value_is_allocated_gpr(struct mi_value val)199 _mi_value_is_allocated_gpr(struct mi_value val)
200 {
201 return mi_value_is_reg(val) &&
202 val.reg >= _MI_BUILDER_GPR_BASE &&
203 val.reg < _MI_BUILDER_GPR_BASE +
204 MI_BUILDER_NUM_ALLOC_GPRS * 8;
205 }
206
207 static inline uint32_t
_mi_value_as_gpr(struct mi_value val)208 _mi_value_as_gpr(struct mi_value val)
209 {
210 assert(mi_value_is_gpr(val));
211 assert(val.reg % 8 == 0);
212 return (val.reg - _MI_BUILDER_GPR_BASE) / 8;
213 }
214
215 static inline struct mi_value
mi_new_gpr(struct mi_builder * b)216 mi_new_gpr(struct mi_builder *b)
217 {
218 unsigned gpr = ffs(~b->gprs) - 1;
219 assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
220 assert(b->gpr_refs[gpr] == 0);
221 b->gprs |= (1u << gpr);
222 b->gpr_refs[gpr] = 1;
223
224 return (struct mi_value) {
225 .type = MI_VALUE_TYPE_REG64,
226 .reg = _MI_BUILDER_GPR_BASE + gpr * 8,
227 };
228 }
229 #endif /* GFX_VERx10 >= 75 */
230
231 /** Take a reference to a mi_value
232 *
233 * The MI builder uses reference counting to automatically free ALU GPRs for
234 * re-use in calculations. All mi_* math functions consume the reference
235 * they are handed for each source and return a reference to a value which the
236 * caller must consume. In particular, if you pas the same value into a
237 * single mi_* math function twice (say to add a number to itself), you
238 * are responsible for calling mi_value_ref() to get a second reference
239 * because the mi_* math function will consume it twice.
240 */
241 static inline struct mi_value
mi_value_ref(struct mi_builder * b,struct mi_value val)242 mi_value_ref(struct mi_builder *b, struct mi_value val)
243 {
244 #if GFX_VERx10 >= 75
245 if (_mi_value_is_allocated_gpr(val)) {
246 unsigned gpr = _mi_value_as_gpr(val);
247 assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
248 assert(b->gprs & (1u << gpr));
249 assert(b->gpr_refs[gpr] < UINT8_MAX);
250 b->gpr_refs[gpr]++;
251 }
252 #endif /* GFX_VERx10 >= 75 */
253
254 return val;
255 }
256
257 /** Drop a reference to a mi_value
258 *
259 * See also mi_value_ref.
260 */
261 static inline void
mi_value_unref(struct mi_builder * b,struct mi_value val)262 mi_value_unref(struct mi_builder *b, struct mi_value val)
263 {
264 #if GFX_VERx10 >= 75
265 if (_mi_value_is_allocated_gpr(val)) {
266 unsigned gpr = _mi_value_as_gpr(val);
267 assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
268 assert(b->gprs & (1u << gpr));
269 assert(b->gpr_refs[gpr] > 0);
270 if (--b->gpr_refs[gpr] == 0)
271 b->gprs &= ~(1u << gpr);
272 }
273 #endif /* GFX_VERx10 >= 75 */
274 }
275
276 static inline struct mi_value
mi_imm(uint64_t imm)277 mi_imm(uint64_t imm)
278 {
279 return (struct mi_value) {
280 .type = MI_VALUE_TYPE_IMM,
281 .imm = imm,
282 };
283 }
284
285 static inline struct mi_value
mi_reg32(uint32_t reg)286 mi_reg32(uint32_t reg)
287 {
288 struct mi_value val = {
289 .type = MI_VALUE_TYPE_REG32,
290 .reg = reg,
291 };
292 #if GFX_VERx10 >= 75
293 assert(!_mi_value_is_allocated_gpr(val));
294 #endif
295 return val;
296 }
297
298 static inline struct mi_value
mi_reg64(uint32_t reg)299 mi_reg64(uint32_t reg)
300 {
301 struct mi_value val = {
302 .type = MI_VALUE_TYPE_REG64,
303 .reg = reg,
304 };
305 #if GFX_VERx10 >= 75
306 assert(!_mi_value_is_allocated_gpr(val));
307 #endif
308 return val;
309 }
310
311 static inline struct mi_value
mi_mem32(__gen_address_type addr)312 mi_mem32(__gen_address_type addr)
313 {
314 return (struct mi_value) {
315 .type = MI_VALUE_TYPE_MEM32,
316 .addr = addr,
317 };
318 }
319
320 static inline struct mi_value
mi_mem64(__gen_address_type addr)321 mi_mem64(__gen_address_type addr)
322 {
323 return (struct mi_value) {
324 .type = MI_VALUE_TYPE_MEM64,
325 .addr = addr,
326 };
327 }
328
329 static inline struct mi_value
mi_value_half(struct mi_value value,bool top_32_bits)330 mi_value_half(struct mi_value value, bool top_32_bits)
331 {
332 switch (value.type) {
333 case MI_VALUE_TYPE_IMM:
334 if (top_32_bits)
335 value.imm >>= 32;
336 else
337 value.imm &= 0xffffffffu;
338 return value;
339
340 case MI_VALUE_TYPE_MEM32:
341 assert(!top_32_bits);
342 return value;
343
344 case MI_VALUE_TYPE_MEM64:
345 if (top_32_bits)
346 value.addr = __gen_address_offset(value.addr, 4);
347 value.type = MI_VALUE_TYPE_MEM32;
348 return value;
349
350 case MI_VALUE_TYPE_REG32:
351 assert(!top_32_bits);
352 return value;
353
354 case MI_VALUE_TYPE_REG64:
355 if (top_32_bits)
356 value.reg += 4;
357 value.type = MI_VALUE_TYPE_REG32;
358 return value;
359 }
360
361 unreachable("Invalid mi_value type");
362 }
363
364 static inline void
_mi_copy_no_unref(struct mi_builder * b,struct mi_value dst,struct mi_value src)365 _mi_copy_no_unref(struct mi_builder *b,
366 struct mi_value dst, struct mi_value src)
367 {
368 #if GFX_VERx10 >= 75
369 /* TODO: We could handle src.invert by emitting a bit of math if we really
370 * wanted to.
371 */
372 assert(!dst.invert && !src.invert);
373 #endif
374 mi_builder_flush_math(b);
375
376 switch (dst.type) {
377 case MI_VALUE_TYPE_IMM:
378 unreachable("Cannot copy to an immediate");
379
380 case MI_VALUE_TYPE_MEM64:
381 case MI_VALUE_TYPE_REG64:
382 switch (src.type) {
383 case MI_VALUE_TYPE_IMM:
384 if (dst.type == MI_VALUE_TYPE_REG64) {
385 uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
386 GENX(MI_LOAD_REGISTER_IMM_length) + 2);
387 struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
388 mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) {
389 lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 -
390 GENX(MI_LOAD_REGISTER_IMM_length_bias);
391 #if GFX_VER >= 11
392 lri.AddCSMMIOStartOffset = reg.cs;
393 #endif
394 }
395 dw[1] = reg.num;
396 dw[2] = src.imm;
397 dw[3] = reg.num + 4;
398 dw[4] = src.imm >> 32;
399 } else {
400 #if GFX_VER >= 8
401 assert(dst.type == MI_VALUE_TYPE_MEM64);
402 uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
403 GENX(MI_STORE_DATA_IMM_length) + 1);
404 mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) {
405 sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 -
406 GENX(MI_STORE_DATA_IMM_length_bias);
407 sdm.StoreQword = true;
408 sdm.Address = dst.addr;
409 }
410 dw[3] = src.imm;
411 dw[4] = src.imm >> 32;
412 #else
413 _mi_copy_no_unref(b, mi_value_half(dst, false),
414 mi_value_half(src, false));
415 _mi_copy_no_unref(b, mi_value_half(dst, true),
416 mi_value_half(src, true));
417 #endif
418 }
419 break;
420 case MI_VALUE_TYPE_REG32:
421 case MI_VALUE_TYPE_MEM32:
422 _mi_copy_no_unref(b, mi_value_half(dst, false),
423 mi_value_half(src, false));
424 _mi_copy_no_unref(b, mi_value_half(dst, true),
425 mi_imm(0));
426 break;
427 case MI_VALUE_TYPE_REG64:
428 case MI_VALUE_TYPE_MEM64:
429 _mi_copy_no_unref(b, mi_value_half(dst, false),
430 mi_value_half(src, false));
431 _mi_copy_no_unref(b, mi_value_half(dst, true),
432 mi_value_half(src, true));
433 break;
434 default:
435 unreachable("Invalid mi_value type");
436 }
437 break;
438
439 case MI_VALUE_TYPE_MEM32:
440 switch (src.type) {
441 case MI_VALUE_TYPE_IMM:
442 mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
443 sdi.Address = dst.addr;
444 #if GFX_VER >= 12
445 sdi.ForceWriteCompletionCheck = true;
446 #endif
447 sdi.ImmediateData = src.imm;
448 }
449 break;
450
451 case MI_VALUE_TYPE_MEM32:
452 case MI_VALUE_TYPE_MEM64:
453 #if GFX_VER >= 8
454 mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
455 cmm.DestinationMemoryAddress = dst.addr;
456 cmm.SourceMemoryAddress = src.addr;
457 }
458 #elif GFX_VERx10 == 75
459 {
460 struct mi_value tmp = mi_new_gpr(b);
461 _mi_copy_no_unref(b, tmp, src);
462 _mi_copy_no_unref(b, dst, tmp);
463 mi_value_unref(b, tmp);
464 }
465 #else
466 unreachable("Cannot do mem <-> mem copy on IVB and earlier");
467 #endif
468 break;
469
470 case MI_VALUE_TYPE_REG32:
471 case MI_VALUE_TYPE_REG64:
472 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
473 struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
474 srm.RegisterAddress = reg.num;
475 #if GFX_VER >= 11
476 srm.AddCSMMIOStartOffset = reg.cs;
477 #endif
478 srm.MemoryAddress = dst.addr;
479 }
480 break;
481
482 default:
483 unreachable("Invalid mi_value type");
484 }
485 break;
486
487 case MI_VALUE_TYPE_REG32:
488 switch (src.type) {
489 case MI_VALUE_TYPE_IMM:
490 mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
491 struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
492 lri.RegisterOffset = reg.num;
493 #if GFX_VER >= 11
494 lri.AddCSMMIOStartOffset = reg.cs;
495 #endif
496 lri.DataDWord = src.imm;
497 }
498 break;
499
500 case MI_VALUE_TYPE_MEM32:
501 case MI_VALUE_TYPE_MEM64:
502 #if GFX_VER >= 7
503 mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
504 struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
505 lrm.RegisterAddress = reg.num;
506 #if GFX_VER >= 11
507 lrm.AddCSMMIOStartOffset = reg.cs;
508 #endif
509 lrm.MemoryAddress = src.addr;
510 }
511 #else
512 unreachable("Cannot load do mem -> reg copy on SNB and earlier");
513 #endif
514 break;
515
516 case MI_VALUE_TYPE_REG32:
517 case MI_VALUE_TYPE_REG64:
518 #if GFX_VERx10 >= 75
519 if (src.reg != dst.reg) {
520 mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
521 struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
522 lrr.SourceRegisterAddress = reg.num;
523 #if GFX_VER >= 11
524 lrr.AddCSMMIOStartOffsetSource = reg.cs;
525 #endif
526 reg = mi_adjust_reg_num(dst.reg);
527 lrr.DestinationRegisterAddress = reg.num;
528 #if GFX_VER >= 11
529 lrr.AddCSMMIOStartOffsetDestination = reg.cs;
530 #endif
531 }
532 }
533 #else
534 unreachable("Cannot do reg <-> reg copy on IVB and earlier");
535 #endif
536 break;
537
538 default:
539 unreachable("Invalid mi_value type");
540 }
541 break;
542
543 default:
544 unreachable("Invalid mi_value type");
545 }
546 }
547
548 #if GFX_VERx10 >= 75
549 static inline struct mi_value
550 mi_resolve_invert(struct mi_builder *b, struct mi_value src);
551 #endif
552
553 /** Store the value in src to the value represented by dst
554 *
555 * If the bit size of src and dst mismatch, this function does an unsigned
556 * integer cast. If src has more bits than dst, it takes the bottom bits. If
557 * src has fewer bits then dst, it fills the top bits with zeros.
558 *
559 * This function consumes one reference for each of src and dst.
560 */
561 static inline void
mi_store(struct mi_builder * b,struct mi_value dst,struct mi_value src)562 mi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src)
563 {
564 #if GFX_VERx10 >= 75
565 src = mi_resolve_invert(b, src);
566 #endif
567 _mi_copy_no_unref(b, dst, src);
568 mi_value_unref(b, src);
569 mi_value_unref(b, dst);
570 }
571
572 static inline void
mi_memset(struct mi_builder * b,__gen_address_type dst,uint32_t value,uint32_t size)573 mi_memset(struct mi_builder *b, __gen_address_type dst,
574 uint32_t value, uint32_t size)
575 {
576 #if GFX_VERx10 >= 75
577 assert(b->num_math_dwords == 0);
578 #endif
579
580 /* This memset operates in units of dwords. */
581 assert(size % 4 == 0);
582
583 for (uint32_t i = 0; i < size; i += 4) {
584 mi_store(b, mi_mem32(__gen_address_offset(dst, i)),
585 mi_imm(value));
586 }
587 }
588
589 /* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */
590 static inline void
mi_memcpy(struct mi_builder * b,__gen_address_type dst,__gen_address_type src,uint32_t size)591 mi_memcpy(struct mi_builder *b, __gen_address_type dst,
592 __gen_address_type src, uint32_t size)
593 {
594 #if GFX_VERx10 >= 75
595 assert(b->num_math_dwords == 0);
596 #endif
597
598 /* This memcpy operates in units of dwords. */
599 assert(size % 4 == 0);
600
601 for (uint32_t i = 0; i < size; i += 4) {
602 struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i));
603 struct mi_value src_val = mi_mem32(__gen_address_offset(src, i));
604 #if GFX_VERx10 >= 75
605 mi_store(b, dst_val, src_val);
606 #else
607 /* IVB does not have a general purpose register for command streamer
608 * commands. Therefore, we use an alternate temporary register.
609 */
610 struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */
611 mi_store(b, tmp_reg, src_val);
612 mi_store(b, dst_val, tmp_reg);
613 #endif
614 }
615 }
616
617 /*
618 * MI_MATH Section. Only available on Haswell+
619 */
620
621 #if GFX_VERx10 >= 75
622
623 /**
624 * Perform a predicated store (assuming the condition is already loaded
625 * in the MI_PREDICATE_RESULT register) of the value in src to the memory
626 * location specified by dst. Non-memory destinations are not supported.
627 *
628 * This function consumes one reference for each of src and dst.
629 */
630 static inline void
mi_store_if(struct mi_builder * b,struct mi_value dst,struct mi_value src)631 mi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src)
632 {
633 assert(!dst.invert && !src.invert);
634
635 mi_builder_flush_math(b);
636
637 /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
638 * destination to be memory, and resolve the source to a temporary
639 * register if it isn't in one already.
640 */
641 assert(dst.type == MI_VALUE_TYPE_MEM64 ||
642 dst.type == MI_VALUE_TYPE_MEM32);
643
644 if (src.type != MI_VALUE_TYPE_REG32 &&
645 src.type != MI_VALUE_TYPE_REG64) {
646 struct mi_value tmp = mi_new_gpr(b);
647 _mi_copy_no_unref(b, tmp, src);
648 src = tmp;
649 }
650
651 if (dst.type == MI_VALUE_TYPE_MEM64) {
652 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
653 struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
654 srm.RegisterAddress = reg.num;
655 #if GFX_VER >= 11
656 srm.AddCSMMIOStartOffset = reg.cs;
657 #endif
658 srm.MemoryAddress = dst.addr;
659 srm.PredicateEnable = true;
660 }
661 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
662 struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4);
663 srm.RegisterAddress = reg.num;
664 #if GFX_VER >= 11
665 srm.AddCSMMIOStartOffset = reg.cs;
666 #endif
667 srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
668 srm.PredicateEnable = true;
669 }
670 } else {
671 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
672 struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
673 srm.RegisterAddress = reg.num;
674 #if GFX_VER >= 11
675 srm.AddCSMMIOStartOffset = reg.cs;
676 #endif
677 srm.MemoryAddress = dst.addr;
678 srm.PredicateEnable = true;
679 }
680 }
681
682 mi_value_unref(b, src);
683 mi_value_unref(b, dst);
684 }
685
686 static inline void
_mi_builder_push_math(struct mi_builder * b,const uint32_t * dwords,unsigned num_dwords)687 _mi_builder_push_math(struct mi_builder *b,
688 const uint32_t *dwords,
689 unsigned num_dwords)
690 {
691 assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS);
692 if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS)
693 mi_builder_flush_math(b);
694
695 memcpy(&b->math_dwords[b->num_math_dwords],
696 dwords, num_dwords * sizeof(*dwords));
697 b->num_math_dwords += num_dwords;
698 }
699
700 static inline uint32_t
_mi_pack_alu(uint32_t opcode,uint32_t operand1,uint32_t operand2)701 _mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
702 {
703 struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
704 .Operand2 = operand2,
705 .Operand1 = operand1,
706 .ALUOpcode = opcode,
707 };
708
709 uint32_t dw;
710 GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
711
712 return dw;
713 }
714
715 static inline struct mi_value
mi_value_to_gpr(struct mi_builder * b,struct mi_value val)716 mi_value_to_gpr(struct mi_builder *b, struct mi_value val)
717 {
718 if (mi_value_is_gpr(val))
719 return val;
720
721 /* Save off the invert flag because it makes copy() grumpy */
722 bool invert = val.invert;
723 val.invert = false;
724
725 struct mi_value tmp = mi_new_gpr(b);
726 _mi_copy_no_unref(b, tmp, val);
727 tmp.invert = invert;
728
729 return tmp;
730 }
731
732 static inline uint64_t
mi_value_to_u64(struct mi_value val)733 mi_value_to_u64(struct mi_value val)
734 {
735 assert(val.type == MI_VALUE_TYPE_IMM);
736 return val.invert ? ~val.imm : val.imm;
737 }
738
739 static inline uint32_t
_mi_math_load_src(struct mi_builder * b,unsigned src,struct mi_value * val)740 _mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val)
741 {
742 if (val->type == MI_VALUE_TYPE_IMM &&
743 (val->imm == 0 || val->imm == UINT64_MAX)) {
744 uint64_t imm = val->invert ? ~val->imm : val->imm;
745 return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
746 } else {
747 *val = mi_value_to_gpr(b, *val);
748 return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
749 src, _mi_value_as_gpr(*val));
750 }
751 }
752
753 static inline struct mi_value
mi_math_binop(struct mi_builder * b,uint32_t opcode,struct mi_value src0,struct mi_value src1,uint32_t store_op,uint32_t store_src)754 mi_math_binop(struct mi_builder *b, uint32_t opcode,
755 struct mi_value src0, struct mi_value src1,
756 uint32_t store_op, uint32_t store_src)
757 {
758 struct mi_value dst = mi_new_gpr(b);
759
760 uint32_t dw[4];
761 dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0);
762 dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1);
763 dw[2] = _mi_pack_alu(opcode, 0, 0);
764 dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src);
765 _mi_builder_push_math(b, dw, 4);
766
767 mi_value_unref(b, src0);
768 mi_value_unref(b, src1);
769
770 return dst;
771 }
772
773 static inline struct mi_value
mi_inot(struct mi_builder * b,struct mi_value val)774 mi_inot(struct mi_builder *b, struct mi_value val)
775 {
776 if (val.type == MI_VALUE_TYPE_IMM)
777 return mi_imm(~mi_value_to_u64(val));
778
779 val.invert = !val.invert;
780 return val;
781 }
782
783 static inline struct mi_value
mi_resolve_invert(struct mi_builder * b,struct mi_value src)784 mi_resolve_invert(struct mi_builder *b, struct mi_value src)
785 {
786 if (!src.invert)
787 return src;
788
789 assert(src.type != MI_VALUE_TYPE_IMM);
790 return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
791 MI_ALU_STORE, MI_ALU_ACCU);
792 }
793
794 static inline struct mi_value
mi_iadd(struct mi_builder * b,struct mi_value src0,struct mi_value src1)795 mi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
796 {
797 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
798 return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1));
799
800 return mi_math_binop(b, MI_ALU_ADD, src0, src1,
801 MI_ALU_STORE, MI_ALU_ACCU);
802 }
803
804 static inline struct mi_value
mi_iadd_imm(struct mi_builder * b,struct mi_value src,uint64_t N)805 mi_iadd_imm(struct mi_builder *b,
806 struct mi_value src, uint64_t N)
807 {
808 if (N == 0)
809 return src;
810
811 return mi_iadd(b, src, mi_imm(N));
812 }
813
814 static inline struct mi_value
mi_isub(struct mi_builder * b,struct mi_value src0,struct mi_value src1)815 mi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
816 {
817 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
818 return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1));
819
820 return mi_math_binop(b, MI_ALU_SUB, src0, src1,
821 MI_ALU_STORE, MI_ALU_ACCU);
822 }
823
824 static inline struct mi_value
mi_ieq(struct mi_builder * b,struct mi_value src0,struct mi_value src1)825 mi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
826 {
827 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
828 return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0);
829
830 /* Compute "equal" by subtracting and storing the zero bit */
831 return mi_math_binop(b, MI_ALU_SUB, src0, src1,
832 MI_ALU_STORE, MI_ALU_ZF);
833 }
834
835 static inline struct mi_value
mi_ine(struct mi_builder * b,struct mi_value src0,struct mi_value src1)836 mi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
837 {
838 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
839 return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0);
840
841 /* Compute "not equal" by subtracting and storing the inverse zero bit */
842 return mi_math_binop(b, MI_ALU_SUB, src0, src1,
843 MI_ALU_STOREINV, MI_ALU_ZF);
844 }
845
846 static inline struct mi_value
mi_ult(struct mi_builder * b,struct mi_value src0,struct mi_value src1)847 mi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
848 {
849 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
850 return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0);
851
852 /* Compute "less than" by subtracting and storing the carry bit */
853 return mi_math_binop(b, MI_ALU_SUB, src0, src1,
854 MI_ALU_STORE, MI_ALU_CF);
855 }
856
857 static inline struct mi_value
mi_uge(struct mi_builder * b,struct mi_value src0,struct mi_value src1)858 mi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
859 {
860 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
861 return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0);
862
863 /* Compute "less than" by subtracting and storing the carry bit */
864 return mi_math_binop(b, MI_ALU_SUB, src0, src1,
865 MI_ALU_STOREINV, MI_ALU_CF);
866 }
867
868 static inline struct mi_value
mi_iand(struct mi_builder * b,struct mi_value src0,struct mi_value src1)869 mi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
870 {
871 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
872 return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1));
873
874 return mi_math_binop(b, MI_ALU_AND, src0, src1,
875 MI_ALU_STORE, MI_ALU_ACCU);
876 }
877
878 static inline struct mi_value
mi_nz(struct mi_builder * b,struct mi_value src)879 mi_nz(struct mi_builder *b, struct mi_value src)
880 {
881 if (src.type == MI_VALUE_TYPE_IMM)
882 return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0);
883
884 return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
885 MI_ALU_STOREINV, MI_ALU_ZF);
886 }
887
888 static inline struct mi_value
mi_z(struct mi_builder * b,struct mi_value src)889 mi_z(struct mi_builder *b, struct mi_value src)
890 {
891 if (src.type == MI_VALUE_TYPE_IMM)
892 return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0);
893
894 return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
895 MI_ALU_STORE, MI_ALU_ZF);
896 }
897
898 static inline struct mi_value
mi_ior(struct mi_builder * b,struct mi_value src0,struct mi_value src1)899 mi_ior(struct mi_builder *b,
900 struct mi_value src0, struct mi_value src1)
901 {
902 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
903 return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1));
904
905 return mi_math_binop(b, MI_ALU_OR, src0, src1,
906 MI_ALU_STORE, MI_ALU_ACCU);
907 }
908
909 #if GFX_VERx10 >= 125
910 static inline struct mi_value
mi_ishl(struct mi_builder * b,struct mi_value src0,struct mi_value src1)911 mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
912 {
913 if (src1.type == MI_VALUE_TYPE_IMM) {
914 assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
915 assert(mi_value_to_u64(src1) <= 32);
916 }
917
918 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
919 return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1));
920
921 return mi_math_binop(b, MI_ALU_SHL, src0, src1,
922 MI_ALU_STORE, MI_ALU_ACCU);
923 }
924
925 static inline struct mi_value
mi_ushr(struct mi_builder * b,struct mi_value src0,struct mi_value src1)926 mi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
927 {
928 if (src1.type == MI_VALUE_TYPE_IMM) {
929 assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
930 assert(mi_value_to_u64(src1) <= 32);
931 }
932
933 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
934 return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1));
935
936 return mi_math_binop(b, MI_ALU_SHR, src0, src1,
937 MI_ALU_STORE, MI_ALU_ACCU);
938 }
939
940 static inline struct mi_value
mi_ushr_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)941 mi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
942 {
943 if (shift == 0)
944 return src;
945
946 if (shift >= 64)
947 return mi_imm(0);
948
949 if (src.type == MI_VALUE_TYPE_IMM)
950 return mi_imm(mi_value_to_u64(src) >> shift);
951
952 struct mi_value res = mi_value_to_gpr(b, src);
953
954 /* Annoyingly, we only have power-of-two shifts */
955 while (shift) {
956 int bit = u_bit_scan(&shift);
957 assert(bit <= 5);
958 res = mi_ushr(b, res, mi_imm(1 << bit));
959 }
960
961 return res;
962 }
963
964 static inline struct mi_value
mi_ishr(struct mi_builder * b,struct mi_value src0,struct mi_value src1)965 mi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
966 {
967 if (src1.type == MI_VALUE_TYPE_IMM) {
968 assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
969 assert(mi_value_to_u64(src1) <= 32);
970 }
971
972 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
973 return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1));
974
975 return mi_math_binop(b, MI_ALU_SAR, src0, src1,
976 MI_ALU_STORE, MI_ALU_ACCU);
977 }
978
979 static inline struct mi_value
mi_ishr_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)980 mi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
981 {
982 if (shift == 0)
983 return src;
984
985 if (shift >= 64)
986 return mi_imm(0);
987
988 if (src.type == MI_VALUE_TYPE_IMM)
989 return mi_imm((int64_t)mi_value_to_u64(src) >> shift);
990
991 struct mi_value res = mi_value_to_gpr(b, src);
992
993 /* Annoyingly, we only have power-of-two shifts */
994 while (shift) {
995 int bit = u_bit_scan(&shift);
996 assert(bit <= 5);
997 res = mi_ishr(b, res, mi_imm(1 << bit));
998 }
999
1000 return res;
1001 }
1002 #endif /* if GFX_VERx10 >= 125 */
1003
1004 static inline struct mi_value
mi_imul_imm(struct mi_builder * b,struct mi_value src,uint32_t N)1005 mi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N)
1006 {
1007 if (src.type == MI_VALUE_TYPE_IMM)
1008 return mi_imm(mi_value_to_u64(src) * N);
1009
1010 if (N == 0) {
1011 mi_value_unref(b, src);
1012 return mi_imm(0);
1013 }
1014
1015 if (N == 1)
1016 return src;
1017
1018 src = mi_value_to_gpr(b, src);
1019
1020 struct mi_value res = mi_value_ref(b, src);
1021
1022 unsigned top_bit = 31 - __builtin_clz(N);
1023 for (int i = top_bit - 1; i >= 0; i--) {
1024 res = mi_iadd(b, res, mi_value_ref(b, res));
1025 if (N & (1 << i))
1026 res = mi_iadd(b, res, mi_value_ref(b, src));
1027 }
1028
1029 mi_value_unref(b, src);
1030
1031 return res;
1032 }
1033
1034 static inline struct mi_value
mi_ishl_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)1035 mi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1036 {
1037 if (shift == 0)
1038 return src;
1039
1040 if (shift >= 64)
1041 return mi_imm(0);
1042
1043 if (src.type == MI_VALUE_TYPE_IMM)
1044 return mi_imm(mi_value_to_u64(src) << shift);
1045
1046 struct mi_value res = mi_value_to_gpr(b, src);
1047
1048 #if GFX_VERx10 >= 125
1049 /* Annoyingly, we only have power-of-two shifts */
1050 while (shift) {
1051 int bit = u_bit_scan(&shift);
1052 assert(bit <= 5);
1053 res = mi_ishl(b, res, mi_imm(1 << bit));
1054 }
1055 #else
1056 for (unsigned i = 0; i < shift; i++)
1057 res = mi_iadd(b, res, mi_value_ref(b, res));
1058 #endif
1059
1060 return res;
1061 }
1062
1063 static inline struct mi_value
mi_ushr32_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)1064 mi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1065 {
1066 if (shift == 0)
1067 return src;
1068
1069 if (shift >= 64)
1070 return mi_imm(0);
1071
1072 /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
1073 * of the result.
1074 */
1075 if (src.type == MI_VALUE_TYPE_IMM)
1076 return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX);
1077
1078 if (shift > 32) {
1079 struct mi_value tmp = mi_new_gpr(b);
1080 _mi_copy_no_unref(b, mi_value_half(tmp, false),
1081 mi_value_half(src, true));
1082 _mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0));
1083 mi_value_unref(b, src);
1084 src = tmp;
1085 shift -= 32;
1086 }
1087 assert(shift <= 32);
1088 struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift);
1089 struct mi_value dst = mi_new_gpr(b);
1090 _mi_copy_no_unref(b, mi_value_half(dst, false),
1091 mi_value_half(tmp, true));
1092 _mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0));
1093 mi_value_unref(b, tmp);
1094 return dst;
1095 }
1096
1097 static inline struct mi_value
mi_udiv32_imm(struct mi_builder * b,struct mi_value N,uint32_t D)1098 mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
1099 {
1100 if (N.type == MI_VALUE_TYPE_IMM) {
1101 assert(mi_value_to_u64(N) <= UINT32_MAX);
1102 return mi_imm(mi_value_to_u64(N) / D);
1103 }
1104
1105 /* We implicitly assume that N is only a 32-bit value */
1106 if (D == 0) {
1107 /* This is invalid but we should do something */
1108 return mi_imm(0);
1109 } else if (util_is_power_of_two_or_zero(D)) {
1110 return mi_ushr32_imm(b, N, util_logbase2(D));
1111 } else {
1112 struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
1113 assert(m.multiplier <= UINT32_MAX);
1114
1115 if (m.pre_shift)
1116 N = mi_ushr32_imm(b, N, m.pre_shift);
1117
1118 /* Do the 32x32 multiply into gpr0 */
1119 N = mi_imul_imm(b, N, m.multiplier);
1120
1121 if (m.increment)
1122 N = mi_iadd(b, N, mi_imm(m.multiplier));
1123
1124 N = mi_ushr32_imm(b, N, 32);
1125
1126 if (m.post_shift)
1127 N = mi_ushr32_imm(b, N, m.post_shift);
1128
1129 return N;
1130 }
1131 }
1132
1133 #endif /* MI_MATH section */
1134
1135 /* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
1136 #if MI_BUILDER_CAN_WRITE_BATCH
1137
1138 struct mi_address_token {
1139 /* Pointers to address memory fields in the batch. */
1140 uint64_t *ptrs[2];
1141 };
1142
1143 static inline struct mi_address_token
mi_store_address(struct mi_builder * b,struct mi_value addr_reg)1144 mi_store_address(struct mi_builder *b, struct mi_value addr_reg)
1145 {
1146 mi_builder_flush_math(b);
1147
1148 assert(addr_reg.type == MI_VALUE_TYPE_REG64);
1149
1150 struct mi_address_token token = {};
1151
1152 for (unsigned i = 0; i < 2; i++) {
1153 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
1154 srm.RegisterAddress = addr_reg.reg + (i * 4);
1155
1156 const unsigned addr_dw =
1157 GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
1158 token.ptrs[i] = (void *)_dst + addr_dw;
1159 }
1160 }
1161
1162 mi_value_unref(b, addr_reg);
1163 return token;
1164 }
1165
1166 static inline void
mi_self_mod_barrier(struct mi_builder * b)1167 mi_self_mod_barrier(struct mi_builder *b)
1168 {
1169 /* First make sure all the memory writes from previous modifying commands
1170 * have landed. We want to do this before going through the CS cache,
1171 * otherwise we could be fetching memory that hasn't been written to yet.
1172 */
1173 mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
1174 pc.CommandStreamerStallEnable = true;
1175 }
1176 /* Documentation says Gfx11+ should be able to invalidate the command cache
1177 * but experiment show it doesn't work properly, so for now just get over
1178 * the CS prefetch.
1179 */
1180 for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++)
1181 mi_builder_emit(b, GENX(MI_NOOP), noop);
1182 }
1183
1184 static inline void
_mi_resolve_address_token(struct mi_builder * b,struct mi_address_token token,void * batch_location)1185 _mi_resolve_address_token(struct mi_builder *b,
1186 struct mi_address_token token,
1187 void *batch_location)
1188 {
1189 __gen_address_type addr = __gen_get_batch_address(b->user_data,
1190 batch_location);
1191 uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location,
1192 addr, 0);
1193 *(token.ptrs[0]) = addr_addr_u64;
1194 *(token.ptrs[1]) = addr_addr_u64 + 4;
1195 }
1196
1197 #endif /* MI_BUILDER_CAN_WRITE_BATCH */
1198
1199 #if GFX_VERx10 >= 125
1200
1201 /*
1202 * Indirect load/store. Only available on XE_HP+
1203 */
1204
1205 MUST_CHECK static inline struct mi_value
mi_load_mem64_offset(struct mi_builder * b,__gen_address_type addr,struct mi_value offset)1206 mi_load_mem64_offset(struct mi_builder *b,
1207 __gen_address_type addr, struct mi_value offset)
1208 {
1209 uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1210 struct mi_value addr_val = mi_imm(addr_u64);
1211
1212 struct mi_value dst = mi_new_gpr(b);
1213
1214 uint32_t dw[5];
1215 dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1216 dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1217 dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1218 dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU);
1219 dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0);
1220 _mi_builder_push_math(b, dw, 5);
1221
1222 mi_value_unref(b, addr_val);
1223 mi_value_unref(b, offset);
1224
1225 return dst;
1226 }
1227
1228 static inline void
mi_store_mem64_offset(struct mi_builder * b,__gen_address_type addr,struct mi_value offset,struct mi_value data)1229 mi_store_mem64_offset(struct mi_builder *b,
1230 __gen_address_type addr, struct mi_value offset,
1231 struct mi_value data)
1232 {
1233 uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1234 struct mi_value addr_val = mi_imm(addr_u64);
1235
1236 data = mi_value_to_gpr(b, mi_resolve_invert(b, data));
1237
1238 uint32_t dw[5];
1239 dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1240 dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1241 dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1242 dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data));
1243 dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0);
1244 _mi_builder_push_math(b, dw, 5);
1245
1246 mi_value_unref(b, addr_val);
1247 mi_value_unref(b, offset);
1248 mi_value_unref(b, data);
1249
1250 /* This is the only math case which has side-effects outside of regular
1251 * registers to flush math afterwards so we don't confuse anyone.
1252 */
1253 mi_builder_flush_math(b);
1254 }
1255
1256 /*
1257 * Control-flow Section. Only available on XE_HP+
1258 */
1259
1260 struct _mi_goto {
1261 bool predicated;
1262 void *mi_bbs;
1263 };
1264
1265 struct mi_goto_target {
1266 bool placed;
1267 unsigned num_gotos;
1268 struct _mi_goto gotos[8];
1269 __gen_address_type addr;
1270 };
1271
1272 #define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {})
1273
1274 #define MI_BUILDER_MI_PREDICATE_RESULT_num 0x2418
1275
1276 static inline void
mi_goto_if(struct mi_builder * b,struct mi_value cond,struct mi_goto_target * t)1277 mi_goto_if(struct mi_builder *b, struct mi_value cond,
1278 struct mi_goto_target *t)
1279 {
1280 /* First, set up the predicate, if any */
1281 bool predicated;
1282 if (cond.type == MI_VALUE_TYPE_IMM) {
1283 /* If it's an immediate, the goto either doesn't happen or happens
1284 * unconditionally.
1285 */
1286 if (mi_value_to_u64(cond) == 0)
1287 return;
1288
1289 assert(mi_value_to_u64(cond) == ~0ull);
1290 predicated = false;
1291 } else if (mi_value_is_reg(cond) &&
1292 cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) {
1293 /* If it's MI_PREDICATE_RESULT, we use whatever predicate the client
1294 * provided us with
1295 */
1296 assert(cond.type == MI_VALUE_TYPE_REG32);
1297 predicated = true;
1298 } else {
1299 mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond);
1300 predicated = true;
1301 }
1302
1303 if (predicated) {
1304 mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1305 sp.PredicateEnable = NOOPOnResultClear;
1306 }
1307 }
1308 if (t->placed) {
1309 mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) {
1310 bbs.PredicationEnable = predicated;
1311 bbs.AddressSpaceIndicator = ASI_PPGTT;
1312 bbs.BatchBufferStartAddress = t->addr;
1313 }
1314 } else {
1315 assert(t->num_gotos < ARRAY_SIZE(t->gotos));
1316 struct _mi_goto g = {
1317 .predicated = predicated,
1318 .mi_bbs = __gen_get_batch_dwords(b->user_data,
1319 GENX(MI_BATCH_BUFFER_START_length)),
1320 };
1321 memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length));
1322 t->gotos[t->num_gotos++] = g;
1323 }
1324 if (predicated) {
1325 mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1326 sp.PredicateEnable = NOOPNever;
1327 }
1328 }
1329 }
1330
1331 static inline void
mi_goto(struct mi_builder * b,struct mi_goto_target * t)1332 mi_goto(struct mi_builder *b, struct mi_goto_target *t)
1333 {
1334 mi_goto_if(b, mi_imm(-1), t);
1335 }
1336
1337 static inline void
mi_goto_target(struct mi_builder * b,struct mi_goto_target * t)1338 mi_goto_target(struct mi_builder *b, struct mi_goto_target *t)
1339 {
1340 mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1341 sp.PredicateEnable = NOOPNever;
1342 t->addr = __gen_get_batch_address(b->user_data, _dst);
1343 }
1344 t->placed = true;
1345
1346 struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) };
1347 bbs.AddressSpaceIndicator = ASI_PPGTT;
1348 bbs.BatchBufferStartAddress = t->addr;
1349
1350 for (unsigned i = 0; i < t->num_gotos; i++) {
1351 bbs.PredicationEnable = t->gotos[i].predicated;
1352 GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs);
1353 }
1354 }
1355
1356 static inline struct mi_goto_target
mi_goto_target_init_and_place(struct mi_builder * b)1357 mi_goto_target_init_and_place(struct mi_builder *b)
1358 {
1359 struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1360 mi_goto_target(b, &t);
1361 return t;
1362 }
1363
1364 #define mi_loop(b) \
1365 for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \
1366 __continue = mi_goto_target_init_and_place(b); !__break.placed; \
1367 mi_goto(b, &__continue), mi_goto_target(b, &__break))
1368
1369 #define mi_break(b) mi_goto(b, &__break)
1370 #define mi_break_if(b, cond) mi_goto_if(b, cond, &__break)
1371 #define mi_continue(b) mi_goto(b, &__continue)
1372 #define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue)
1373
1374 #endif /* GFX_VERx10 >= 125 */
1375
1376 #endif /* MI_BUILDER_H */
1377