1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef MI_BUILDER_H
25 #define MI_BUILDER_H
26
27 #include "dev/intel_device_info.h"
28 #include "genxml/genX_bits.h"
29 #include "util/bitscan.h"
30 #include "util/fast_idiv_by_const.h"
31 #include "util/u_math.h"
32
33 #ifndef MI_BUILDER_NUM_ALLOC_GPRS
34 /** The number of GPRs the MI builder is allowed to allocate
35 *
36 * This may be set by a user of this API so that it can reserve some GPRs at
37 * the top end for its own use.
38 */
39 #define MI_BUILDER_NUM_ALLOC_GPRS 16
40 #endif
41
42 /** These must be defined by the user of the builder
43 *
44 * void *__gen_get_batch_dwords(__gen_user_data *user_data,
45 * unsigned num_dwords);
46 *
47 * __gen_address_type
48 * __gen_address_offset(__gen_address_type addr, uint64_t offset);
49 *
50 *
51 * If self-modifying batches are supported, we must be able to pass batch
52 * addresses around as void*s so pinning as well as batch chaining or some
53 * other mechanism for ensuring batch pointers remain valid during building is
54 * required. The following function must also be defined, it returns an
55 * address in canonical form:
56 *
57 * __gen_address_type
58 * __gen_get_batch_address(__gen_user_data *user_data, void *location);
59 *
60 * Also, __gen_combine_address must accept a location value of NULL and return
61 * a fully valid 64-bit address.
62 */
63
64 /*
65 * Start of the actual MI builder
66 */
67
68 #define __genxml_cmd_length(cmd) cmd ## _length
69 #define __genxml_cmd_header(cmd) cmd ## _header
70 #define __genxml_cmd_pack(cmd) cmd ## _pack
71
72 #define mi_builder_pack(b, cmd, dst, name) \
73 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
74 *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
75 __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \
76 _dst = NULL)
77
78 #define mi_builder_emit(b, cmd, name) \
79 mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
80
81
82 enum mi_value_type {
83 MI_VALUE_TYPE_IMM,
84 MI_VALUE_TYPE_MEM32,
85 MI_VALUE_TYPE_MEM64,
86 MI_VALUE_TYPE_REG32,
87 MI_VALUE_TYPE_REG64,
88 };
89
90 struct mi_value {
91 enum mi_value_type type;
92
93 union {
94 uint64_t imm;
95 __gen_address_type addr;
96 uint32_t reg;
97 };
98
99 #if GFX_VERx10 >= 75
100 bool invert;
101 #endif
102 };
103
104 struct mi_reg_num {
105 uint32_t num;
106 #if GFX_VER >= 11
107 bool cs;
108 #endif
109 };
110
111 static inline struct mi_reg_num
mi_adjust_reg_num(uint32_t reg)112 mi_adjust_reg_num(uint32_t reg)
113 {
114 #if GFX_VER >= 11
115 bool cs = reg >= 0x2000 && reg < 0x4000;
116 return (struct mi_reg_num) {
117 .num = reg - (cs ? 0x2000 : 0),
118 .cs = cs,
119 };
120 #else
121 return (struct mi_reg_num) { .num = reg, };
122 #endif
123 }
124
125 #if GFX_VER >= 9
126 #define MI_BUILDER_MAX_MATH_DWORDS 256
127 #else
128 #define MI_BUILDER_MAX_MATH_DWORDS 64
129 #endif
130
131 struct mi_builder {
132 const struct intel_device_info *devinfo;
133 __gen_user_data *user_data;
134
135 #if GFX_VERx10 >= 75
136 uint32_t gprs;
137 uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS];
138
139 unsigned num_math_dwords;
140 uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS];
141 #endif
142
143 #if GFX_VERx10 >= 125
144 uint32_t mocs;
145 #endif
146 };
147
148 static inline void
mi_builder_init(struct mi_builder * b,const struct intel_device_info * devinfo,__gen_user_data * user_data)149 mi_builder_init(struct mi_builder *b,
150 const struct intel_device_info *devinfo,
151 __gen_user_data *user_data)
152 {
153 memset(b, 0, sizeof(*b));
154 b->devinfo = devinfo;
155 b->user_data = user_data;
156
157 #if GFX_VERx10 >= 75
158 b->gprs = 0;
159 b->num_math_dwords = 0;
160 #endif
161 }
162
163 static inline void
mi_builder_flush_math(struct mi_builder * b)164 mi_builder_flush_math(struct mi_builder *b)
165 {
166 #if GFX_VERx10 >= 75
167 if (b->num_math_dwords == 0)
168 return;
169
170 uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
171 1 + b->num_math_dwords);
172 mi_builder_pack(b, GENX(MI_MATH), dw, math) {
173 #if GFX_VERx10 >= 125
174 math.MOCS = b->mocs;
175 #endif
176 math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
177 }
178 memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
179 b->num_math_dwords = 0;
180 #endif
181 }
182
183 /**
184 * Set mocs index to mi_build
185 *
186 * This is required when a MI_MATH instruction will be emitted and
187 * the code is used in GFX 12.5 or newer.
188 */
189 static inline void
mi_builder_set_mocs(UNUSED struct mi_builder * b,UNUSED uint32_t mocs)190 mi_builder_set_mocs(UNUSED struct mi_builder *b, UNUSED uint32_t mocs)
191 {
192 #if GFX_VERx10 >= 125
193 if (b->mocs != 0 && b->mocs != mocs)
194 mi_builder_flush_math(b);
195 b->mocs = mocs;
196 #endif
197 }
198
199 #define _MI_BUILDER_GPR_BASE 0x2600
200 /* The actual hardware limit on GPRs */
201 #define _MI_BUILDER_NUM_HW_GPRS 16
202
203 #if GFX_VERx10 >= 75
204
205 static inline bool
mi_value_is_reg(struct mi_value val)206 mi_value_is_reg(struct mi_value val)
207 {
208 return val.type == MI_VALUE_TYPE_REG32 ||
209 val.type == MI_VALUE_TYPE_REG64;
210 }
211
212 static inline bool
mi_value_is_gpr(struct mi_value val)213 mi_value_is_gpr(struct mi_value val)
214 {
215 return mi_value_is_reg(val) &&
216 val.reg >= _MI_BUILDER_GPR_BASE &&
217 val.reg < _MI_BUILDER_GPR_BASE +
218 _MI_BUILDER_NUM_HW_GPRS * 8;
219 }
220
221 static inline bool
_mi_value_is_allocated_gpr(struct mi_value val)222 _mi_value_is_allocated_gpr(struct mi_value val)
223 {
224 return mi_value_is_reg(val) &&
225 val.reg >= _MI_BUILDER_GPR_BASE &&
226 val.reg < _MI_BUILDER_GPR_BASE +
227 MI_BUILDER_NUM_ALLOC_GPRS * 8;
228 }
229
230 static inline uint32_t
_mi_value_as_gpr(struct mi_value val)231 _mi_value_as_gpr(struct mi_value val)
232 {
233 assert(mi_value_is_gpr(val));
234 /* Some of the GRL metakernels will generate 64bit value in a GP register,
235 * then use only half of that as the last operation on that value. So allow
236 * unref on part of a GP register.
237 */
238 assert(val.reg % 4 == 0);
239 return (val.reg - _MI_BUILDER_GPR_BASE) / 8;
240 }
241
242 static inline struct mi_value
mi_new_gpr(struct mi_builder * b)243 mi_new_gpr(struct mi_builder *b)
244 {
245 unsigned gpr = ffs(~b->gprs) - 1;
246 assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
247 assert(b->gpr_refs[gpr] == 0);
248 b->gprs |= (1u << gpr);
249 b->gpr_refs[gpr] = 1;
250
251 return (struct mi_value) {
252 .type = MI_VALUE_TYPE_REG64,
253 .reg = _MI_BUILDER_GPR_BASE + gpr * 8,
254 };
255 }
256
257 static inline struct mi_value
mi_reserve_gpr(struct mi_builder * b,unsigned gpr)258 mi_reserve_gpr(struct mi_builder *b, unsigned gpr)
259 {
260 assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
261 assert(!(b->gprs & (1 << gpr)));
262 assert(b->gpr_refs[gpr] == 0);
263 b->gprs |= (1u << gpr);
264 b->gpr_refs[gpr] = 128; /* Enough that we won't unref it */
265
266 return (struct mi_value) {
267 .type = MI_VALUE_TYPE_REG64,
268 .reg = _MI_BUILDER_GPR_BASE + gpr * 8,
269 };
270 }
271 #endif /* GFX_VERx10 >= 75 */
272
273 /** Take a reference to a mi_value
274 *
275 * The MI builder uses reference counting to automatically free ALU GPRs for
276 * re-use in calculations. All mi_* math functions consume the reference
277 * they are handed for each source and return a reference to a value which the
278 * caller must consume. In particular, if you pas the same value into a
279 * single mi_* math function twice (say to add a number to itself), you
280 * are responsible for calling mi_value_ref() to get a second reference
281 * because the mi_* math function will consume it twice.
282 */
283 static inline void
mi_value_add_refs(struct mi_builder * b,struct mi_value val,unsigned num_refs)284 mi_value_add_refs(struct mi_builder *b, struct mi_value val, unsigned num_refs)
285 {
286 #if GFX_VERx10 >= 75
287 if (_mi_value_is_allocated_gpr(val)) {
288 unsigned gpr = _mi_value_as_gpr(val);
289 assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
290 assert(b->gprs & (1u << gpr));
291 assert(b->gpr_refs[gpr] < UINT8_MAX);
292 b->gpr_refs[gpr] += num_refs;
293 }
294 #endif /* GFX_VERx10 >= 75 */
295 }
296
297 static inline struct mi_value
mi_value_ref(struct mi_builder * b,struct mi_value val)298 mi_value_ref(struct mi_builder *b, struct mi_value val)
299 {
300 mi_value_add_refs(b, val, 1);
301 return val;
302 }
303
304
305 /** Drop a reference to a mi_value
306 *
307 * See also mi_value_ref.
308 */
309 static inline void
mi_value_unref(struct mi_builder * b,struct mi_value val)310 mi_value_unref(struct mi_builder *b, struct mi_value val)
311 {
312 #if GFX_VERx10 >= 75
313 if (_mi_value_is_allocated_gpr(val)) {
314 unsigned gpr = _mi_value_as_gpr(val);
315 assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
316 assert(b->gprs & (1u << gpr));
317 assert(b->gpr_refs[gpr] > 0);
318 if (--b->gpr_refs[gpr] == 0)
319 b->gprs &= ~(1u << gpr);
320 }
321 #endif /* GFX_VERx10 >= 75 */
322 }
323
324 static inline struct mi_value
mi_imm(uint64_t imm)325 mi_imm(uint64_t imm)
326 {
327 return (struct mi_value) {
328 .type = MI_VALUE_TYPE_IMM,
329 .imm = imm,
330 };
331 }
332
333 static inline struct mi_value
mi_reg32(uint32_t reg)334 mi_reg32(uint32_t reg)
335 {
336 struct mi_value val = {
337 .type = MI_VALUE_TYPE_REG32,
338 .reg = reg,
339 };
340 #if GFX_VERx10 >= 75
341 assert(!_mi_value_is_allocated_gpr(val));
342 #endif
343 return val;
344 }
345
346 static inline struct mi_value
mi_reg64(uint32_t reg)347 mi_reg64(uint32_t reg)
348 {
349 struct mi_value val = {
350 .type = MI_VALUE_TYPE_REG64,
351 .reg = reg,
352 };
353 #if GFX_VERx10 >= 75
354 assert(!_mi_value_is_allocated_gpr(val));
355 #endif
356 return val;
357 }
358
359 static inline struct mi_value
mi_mem32(__gen_address_type addr)360 mi_mem32(__gen_address_type addr)
361 {
362 return (struct mi_value) {
363 .type = MI_VALUE_TYPE_MEM32,
364 .addr = addr,
365 };
366 }
367
368 static inline struct mi_value
mi_mem64(__gen_address_type addr)369 mi_mem64(__gen_address_type addr)
370 {
371 return (struct mi_value) {
372 .type = MI_VALUE_TYPE_MEM64,
373 .addr = addr,
374 };
375 }
376
377 static inline struct mi_value
mi_value_half(struct mi_value value,bool top_32_bits)378 mi_value_half(struct mi_value value, bool top_32_bits)
379 {
380 switch (value.type) {
381 case MI_VALUE_TYPE_IMM:
382 if (top_32_bits)
383 value.imm >>= 32;
384 else
385 value.imm &= 0xffffffffu;
386 return value;
387
388 case MI_VALUE_TYPE_MEM32:
389 assert(!top_32_bits);
390 return value;
391
392 case MI_VALUE_TYPE_MEM64:
393 if (top_32_bits)
394 value.addr = __gen_address_offset(value.addr, 4);
395 value.type = MI_VALUE_TYPE_MEM32;
396 return value;
397
398 case MI_VALUE_TYPE_REG32:
399 assert(!top_32_bits);
400 return value;
401
402 case MI_VALUE_TYPE_REG64:
403 if (top_32_bits)
404 value.reg += 4;
405 value.type = MI_VALUE_TYPE_REG32;
406 return value;
407 }
408
409 unreachable("Invalid mi_value type");
410 }
411
412 static inline void
_mi_copy_no_unref(struct mi_builder * b,struct mi_value dst,struct mi_value src)413 _mi_copy_no_unref(struct mi_builder *b,
414 struct mi_value dst, struct mi_value src)
415 {
416 #if GFX_VERx10 >= 75
417 /* TODO: We could handle src.invert by emitting a bit of math if we really
418 * wanted to.
419 */
420 assert(!dst.invert && !src.invert);
421 #endif
422 mi_builder_flush_math(b);
423
424 switch (dst.type) {
425 case MI_VALUE_TYPE_IMM:
426 unreachable("Cannot copy to an immediate");
427
428 case MI_VALUE_TYPE_MEM64:
429 case MI_VALUE_TYPE_REG64:
430 switch (src.type) {
431 case MI_VALUE_TYPE_IMM:
432 if (dst.type == MI_VALUE_TYPE_REG64) {
433 uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
434 GENX(MI_LOAD_REGISTER_IMM_length) + 2);
435 struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
436 mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) {
437 lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 -
438 GENX(MI_LOAD_REGISTER_IMM_length_bias);
439 #if GFX_VER >= 11
440 lri.AddCSMMIOStartOffset = reg.cs;
441 #endif
442 }
443 dw[1] = reg.num;
444 dw[2] = src.imm;
445 dw[3] = reg.num + 4;
446 dw[4] = src.imm >> 32;
447 } else {
448 #if GFX_VER >= 8
449 assert(dst.type == MI_VALUE_TYPE_MEM64);
450 uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
451 GENX(MI_STORE_DATA_IMM_length) + 1);
452 mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) {
453 sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 -
454 GENX(MI_STORE_DATA_IMM_length_bias);
455 sdm.StoreQword = true;
456 sdm.Address = dst.addr;
457 }
458 dw[3] = src.imm;
459 dw[4] = src.imm >> 32;
460 #else
461 _mi_copy_no_unref(b, mi_value_half(dst, false),
462 mi_value_half(src, false));
463 _mi_copy_no_unref(b, mi_value_half(dst, true),
464 mi_value_half(src, true));
465 #endif
466 }
467 break;
468 case MI_VALUE_TYPE_REG32:
469 case MI_VALUE_TYPE_MEM32:
470 _mi_copy_no_unref(b, mi_value_half(dst, false),
471 mi_value_half(src, false));
472 _mi_copy_no_unref(b, mi_value_half(dst, true),
473 mi_imm(0));
474 break;
475 case MI_VALUE_TYPE_REG64:
476 case MI_VALUE_TYPE_MEM64:
477 _mi_copy_no_unref(b, mi_value_half(dst, false),
478 mi_value_half(src, false));
479 _mi_copy_no_unref(b, mi_value_half(dst, true),
480 mi_value_half(src, true));
481 break;
482 default:
483 unreachable("Invalid mi_value type");
484 }
485 break;
486
487 case MI_VALUE_TYPE_MEM32:
488 switch (src.type) {
489 case MI_VALUE_TYPE_IMM:
490 mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
491 sdi.Address = dst.addr;
492 #if GFX_VER >= 12
493 sdi.ForceWriteCompletionCheck = true;
494 #endif
495 sdi.ImmediateData = src.imm;
496 }
497 break;
498
499 case MI_VALUE_TYPE_MEM32:
500 case MI_VALUE_TYPE_MEM64:
501 #if GFX_VER >= 8
502 mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
503 cmm.DestinationMemoryAddress = dst.addr;
504 cmm.SourceMemoryAddress = src.addr;
505 }
506 #elif GFX_VERx10 == 75
507 {
508 struct mi_value tmp = mi_new_gpr(b);
509 _mi_copy_no_unref(b, tmp, src);
510 _mi_copy_no_unref(b, dst, tmp);
511 mi_value_unref(b, tmp);
512 }
513 #else
514 unreachable("Cannot do mem <-> mem copy on IVB and earlier");
515 #endif
516 break;
517
518 case MI_VALUE_TYPE_REG32:
519 case MI_VALUE_TYPE_REG64:
520 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
521 struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
522 srm.RegisterAddress = reg.num;
523 #if GFX_VER >= 11
524 srm.AddCSMMIOStartOffset = reg.cs;
525 #endif
526 srm.MemoryAddress = dst.addr;
527 }
528 break;
529
530 default:
531 unreachable("Invalid mi_value type");
532 }
533 break;
534
535 case MI_VALUE_TYPE_REG32:
536 switch (src.type) {
537 case MI_VALUE_TYPE_IMM:
538 mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
539 struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
540 lri.RegisterOffset = reg.num;
541 #if GFX_VER >= 11
542 lri.AddCSMMIOStartOffset = reg.cs;
543 #endif
544 lri.DataDWord = src.imm;
545 }
546 break;
547
548 case MI_VALUE_TYPE_MEM32:
549 case MI_VALUE_TYPE_MEM64:
550 #if GFX_VER >= 7
551 mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
552 struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
553 lrm.RegisterAddress = reg.num;
554 #if GFX_VER >= 11
555 lrm.AddCSMMIOStartOffset = reg.cs;
556 #endif
557 lrm.MemoryAddress = src.addr;
558 }
559 #else
560 unreachable("Cannot load do mem -> reg copy on SNB and earlier");
561 #endif
562 break;
563
564 case MI_VALUE_TYPE_REG32:
565 case MI_VALUE_TYPE_REG64:
566 #if GFX_VERx10 >= 75
567 if (src.reg != dst.reg) {
568 mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
569 struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
570 lrr.SourceRegisterAddress = reg.num;
571 #if GFX_VER >= 11
572 lrr.AddCSMMIOStartOffsetSource = reg.cs;
573 #endif
574 reg = mi_adjust_reg_num(dst.reg);
575 lrr.DestinationRegisterAddress = reg.num;
576 #if GFX_VER >= 11
577 lrr.AddCSMMIOStartOffsetDestination = reg.cs;
578 #endif
579 }
580 }
581 #else
582 unreachable("Cannot do reg <-> reg copy on IVB and earlier");
583 #endif
584 break;
585
586 default:
587 unreachable("Invalid mi_value type");
588 }
589 break;
590
591 default:
592 unreachable("Invalid mi_value type");
593 }
594 }
595
596 #if GFX_VERx10 >= 75
597 static inline struct mi_value
598 mi_resolve_invert(struct mi_builder *b, struct mi_value src);
599 #endif
600
601 /** Store the value in src to the value represented by dst
602 *
603 * If the bit size of src and dst mismatch, this function does an unsigned
604 * integer cast. If src has more bits than dst, it takes the bottom bits. If
605 * src has fewer bits then dst, it fills the top bits with zeros.
606 *
607 * This function consumes one reference for each of src and dst.
608 */
609 static inline void
mi_store(struct mi_builder * b,struct mi_value dst,struct mi_value src)610 mi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src)
611 {
612 #if GFX_VERx10 >= 75
613 src = mi_resolve_invert(b, src);
614 #endif
615 _mi_copy_no_unref(b, dst, src);
616 mi_value_unref(b, src);
617 mi_value_unref(b, dst);
618 }
619
620 static inline void
mi_memset(struct mi_builder * b,__gen_address_type dst,uint32_t value,uint32_t size)621 mi_memset(struct mi_builder *b, __gen_address_type dst,
622 uint32_t value, uint32_t size)
623 {
624 #if GFX_VERx10 >= 75
625 assert(b->num_math_dwords == 0);
626 #endif
627
628 /* This memset operates in units of dwords. */
629 assert(size % 4 == 0);
630
631 for (uint32_t i = 0; i < size; i += 4) {
632 mi_store(b, mi_mem32(__gen_address_offset(dst, i)),
633 mi_imm(value));
634 }
635 }
636
637 /* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */
638 static inline void
mi_memcpy(struct mi_builder * b,__gen_address_type dst,__gen_address_type src,uint32_t size)639 mi_memcpy(struct mi_builder *b, __gen_address_type dst,
640 __gen_address_type src, uint32_t size)
641 {
642 #if GFX_VERx10 >= 75
643 assert(b->num_math_dwords == 0);
644 #endif
645
646 /* This memcpy operates in units of dwords. */
647 assert(size % 4 == 0);
648
649 for (uint32_t i = 0; i < size; i += 4) {
650 struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i));
651 struct mi_value src_val = mi_mem32(__gen_address_offset(src, i));
652 #if GFX_VERx10 >= 75
653 mi_store(b, dst_val, src_val);
654 #else
655 /* IVB does not have a general purpose register for command streamer
656 * commands. Therefore, we use an alternate temporary register.
657 */
658 struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */
659 mi_store(b, tmp_reg, src_val);
660 mi_store(b, dst_val, tmp_reg);
661 #endif
662 }
663 }
664
665 /*
666 * MI_MATH Section. Only available on Haswell+
667 */
668
669 #if GFX_VERx10 >= 75
670
671 /**
672 * Perform a predicated store (assuming the condition is already loaded
673 * in the MI_PREDICATE_RESULT register) of the value in src to the memory
674 * location specified by dst. Non-memory destinations are not supported.
675 *
676 * This function consumes one reference for each of src and dst.
677 */
678 static inline void
mi_store_if(struct mi_builder * b,struct mi_value dst,struct mi_value src)679 mi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src)
680 {
681 assert(!dst.invert && !src.invert);
682
683 mi_builder_flush_math(b);
684
685 /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
686 * destination to be memory, and resolve the source to a temporary
687 * register if it isn't in one already.
688 */
689 assert(dst.type == MI_VALUE_TYPE_MEM64 ||
690 dst.type == MI_VALUE_TYPE_MEM32);
691
692 if (src.type != MI_VALUE_TYPE_REG32 &&
693 src.type != MI_VALUE_TYPE_REG64) {
694 struct mi_value tmp = mi_new_gpr(b);
695 _mi_copy_no_unref(b, tmp, src);
696 src = tmp;
697 }
698
699 if (dst.type == MI_VALUE_TYPE_MEM64) {
700 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
701 struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
702 srm.RegisterAddress = reg.num;
703 #if GFX_VER >= 11
704 srm.AddCSMMIOStartOffset = reg.cs;
705 #endif
706 srm.MemoryAddress = dst.addr;
707 srm.PredicateEnable = true;
708 }
709 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
710 struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4);
711 srm.RegisterAddress = reg.num;
712 #if GFX_VER >= 11
713 srm.AddCSMMIOStartOffset = reg.cs;
714 #endif
715 srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
716 srm.PredicateEnable = true;
717 }
718 } else {
719 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
720 struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
721 srm.RegisterAddress = reg.num;
722 #if GFX_VER >= 11
723 srm.AddCSMMIOStartOffset = reg.cs;
724 #endif
725 srm.MemoryAddress = dst.addr;
726 srm.PredicateEnable = true;
727 }
728 }
729
730 mi_value_unref(b, src);
731 mi_value_unref(b, dst);
732 }
733
734 static inline void
_mi_builder_push_math(struct mi_builder * b,const uint32_t * dwords,unsigned num_dwords)735 _mi_builder_push_math(struct mi_builder *b,
736 const uint32_t *dwords,
737 unsigned num_dwords)
738 {
739 assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS);
740 if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS)
741 mi_builder_flush_math(b);
742
743 memcpy(&b->math_dwords[b->num_math_dwords],
744 dwords, num_dwords * sizeof(*dwords));
745 b->num_math_dwords += num_dwords;
746 }
747
748 static inline uint32_t
_mi_pack_alu(uint32_t opcode,uint32_t operand1,uint32_t operand2)749 _mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
750 {
751 struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
752 .Operand2 = operand2,
753 .Operand1 = operand1,
754 .ALUOpcode = opcode,
755 };
756
757 uint32_t dw;
758 GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
759
760 return dw;
761 }
762
763 static inline struct mi_value
mi_value_to_gpr(struct mi_builder * b,struct mi_value val)764 mi_value_to_gpr(struct mi_builder *b, struct mi_value val)
765 {
766 if (mi_value_is_gpr(val))
767 return val;
768
769 /* Save off the invert flag because it makes copy() grumpy */
770 bool invert = val.invert;
771 val.invert = false;
772
773 struct mi_value tmp = mi_new_gpr(b);
774 _mi_copy_no_unref(b, tmp, val);
775 tmp.invert = invert;
776
777 return tmp;
778 }
779
780 static inline uint64_t
mi_value_to_u64(struct mi_value val)781 mi_value_to_u64(struct mi_value val)
782 {
783 assert(val.type == MI_VALUE_TYPE_IMM);
784 return val.invert ? ~val.imm : val.imm;
785 }
786
787 static inline uint32_t
_mi_math_load_src(struct mi_builder * b,unsigned src,struct mi_value * val)788 _mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val)
789 {
790 if (val->type == MI_VALUE_TYPE_IMM &&
791 (val->imm == 0 || val->imm == UINT64_MAX)) {
792 uint64_t imm = val->invert ? ~val->imm : val->imm;
793 return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
794 } else {
795 *val = mi_value_to_gpr(b, *val);
796 return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
797 src, _mi_value_as_gpr(*val));
798 }
799 }
800
801 static inline struct mi_value
mi_math_binop(struct mi_builder * b,uint32_t opcode,struct mi_value src0,struct mi_value src1,uint32_t store_op,uint32_t store_src)802 mi_math_binop(struct mi_builder *b, uint32_t opcode,
803 struct mi_value src0, struct mi_value src1,
804 uint32_t store_op, uint32_t store_src)
805 {
806 struct mi_value dst = mi_new_gpr(b);
807
808 uint32_t dw[4];
809 dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0);
810 dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1);
811 dw[2] = _mi_pack_alu(opcode, 0, 0);
812 dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src);
813 _mi_builder_push_math(b, dw, 4);
814
815 mi_value_unref(b, src0);
816 mi_value_unref(b, src1);
817
818 return dst;
819 }
820
821 static inline struct mi_value
mi_inot(struct mi_builder * b,struct mi_value val)822 mi_inot(struct mi_builder *b, struct mi_value val)
823 {
824 if (val.type == MI_VALUE_TYPE_IMM)
825 return mi_imm(~mi_value_to_u64(val));
826
827 val.invert = !val.invert;
828 return val;
829 }
830
831 static inline struct mi_value
mi_resolve_invert(struct mi_builder * b,struct mi_value src)832 mi_resolve_invert(struct mi_builder *b, struct mi_value src)
833 {
834 if (!src.invert)
835 return src;
836
837 assert(src.type != MI_VALUE_TYPE_IMM);
838 return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
839 MI_ALU_STORE, MI_ALU_ACCU);
840 }
841
842 static inline struct mi_value
mi_iadd(struct mi_builder * b,struct mi_value src0,struct mi_value src1)843 mi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
844 {
845 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
846 return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1));
847
848 return mi_math_binop(b, MI_ALU_ADD, src0, src1,
849 MI_ALU_STORE, MI_ALU_ACCU);
850 }
851
852 static inline struct mi_value
mi_iadd_imm(struct mi_builder * b,struct mi_value src,uint64_t N)853 mi_iadd_imm(struct mi_builder *b,
854 struct mi_value src, uint64_t N)
855 {
856 if (N == 0)
857 return src;
858
859 return mi_iadd(b, src, mi_imm(N));
860 }
861
862 static inline struct mi_value
mi_isub(struct mi_builder * b,struct mi_value src0,struct mi_value src1)863 mi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
864 {
865 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
866 return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1));
867
868 return mi_math_binop(b, MI_ALU_SUB, src0, src1,
869 MI_ALU_STORE, MI_ALU_ACCU);
870 }
871
872 static inline struct mi_value
mi_ieq(struct mi_builder * b,struct mi_value src0,struct mi_value src1)873 mi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
874 {
875 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
876 return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0);
877
878 /* Compute "equal" by subtracting and storing the zero bit */
879 return mi_math_binop(b, MI_ALU_SUB, src0, src1,
880 MI_ALU_STORE, MI_ALU_ZF);
881 }
882
883 static inline struct mi_value
mi_ine(struct mi_builder * b,struct mi_value src0,struct mi_value src1)884 mi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
885 {
886 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
887 return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0);
888
889 /* Compute "not equal" by subtracting and storing the inverse zero bit */
890 return mi_math_binop(b, MI_ALU_SUB, src0, src1,
891 MI_ALU_STOREINV, MI_ALU_ZF);
892 }
893
894 static inline struct mi_value
mi_ult(struct mi_builder * b,struct mi_value src0,struct mi_value src1)895 mi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
896 {
897 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
898 return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0);
899
900 /* Compute "less than" by subtracting and storing the carry bit */
901 return mi_math_binop(b, MI_ALU_SUB, src0, src1,
902 MI_ALU_STORE, MI_ALU_CF);
903 }
904
905 static inline struct mi_value
mi_uge(struct mi_builder * b,struct mi_value src0,struct mi_value src1)906 mi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
907 {
908 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
909 return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0);
910
911 /* Compute "less than" by subtracting and storing the carry bit */
912 return mi_math_binop(b, MI_ALU_SUB, src0, src1,
913 MI_ALU_STOREINV, MI_ALU_CF);
914 }
915
916 static inline struct mi_value
mi_iand(struct mi_builder * b,struct mi_value src0,struct mi_value src1)917 mi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
918 {
919 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
920 return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1));
921
922 return mi_math_binop(b, MI_ALU_AND, src0, src1,
923 MI_ALU_STORE, MI_ALU_ACCU);
924 }
925
926 static inline struct mi_value
mi_nz(struct mi_builder * b,struct mi_value src)927 mi_nz(struct mi_builder *b, struct mi_value src)
928 {
929 if (src.type == MI_VALUE_TYPE_IMM)
930 return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0);
931
932 return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
933 MI_ALU_STOREINV, MI_ALU_ZF);
934 }
935
936 static inline struct mi_value
mi_z(struct mi_builder * b,struct mi_value src)937 mi_z(struct mi_builder *b, struct mi_value src)
938 {
939 if (src.type == MI_VALUE_TYPE_IMM)
940 return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0);
941
942 return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
943 MI_ALU_STORE, MI_ALU_ZF);
944 }
945
946 static inline struct mi_value
mi_ior(struct mi_builder * b,struct mi_value src0,struct mi_value src1)947 mi_ior(struct mi_builder *b,
948 struct mi_value src0, struct mi_value src1)
949 {
950 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
951 return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1));
952
953 return mi_math_binop(b, MI_ALU_OR, src0, src1,
954 MI_ALU_STORE, MI_ALU_ACCU);
955 }
956
957 #if GFX_VERx10 >= 125
958 static inline struct mi_value
mi_ishl(struct mi_builder * b,struct mi_value src0,struct mi_value src1)959 mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
960 {
961 if (src1.type == MI_VALUE_TYPE_IMM) {
962 assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
963 assert(mi_value_to_u64(src1) <= 32);
964 }
965
966 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
967 return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1));
968
969 return mi_math_binop(b, MI_ALU_SHL, src0, src1,
970 MI_ALU_STORE, MI_ALU_ACCU);
971 }
972
973 static inline struct mi_value
mi_ushr(struct mi_builder * b,struct mi_value src0,struct mi_value src1)974 mi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
975 {
976 if (src1.type == MI_VALUE_TYPE_IMM) {
977 assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
978 assert(mi_value_to_u64(src1) <= 32);
979 }
980
981 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
982 return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1));
983
984 return mi_math_binop(b, MI_ALU_SHR, src0, src1,
985 MI_ALU_STORE, MI_ALU_ACCU);
986 }
987
988 static inline struct mi_value
mi_ushr_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)989 mi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
990 {
991 if (shift == 0)
992 return src;
993
994 if (shift >= 64)
995 return mi_imm(0);
996
997 if (src.type == MI_VALUE_TYPE_IMM)
998 return mi_imm(mi_value_to_u64(src) >> shift);
999
1000 struct mi_value res = mi_value_to_gpr(b, src);
1001
1002 /* Annoyingly, we only have power-of-two shifts */
1003 while (shift) {
1004 int bit = u_bit_scan(&shift);
1005 assert(bit <= 5);
1006 res = mi_ushr(b, res, mi_imm(1ULL << bit));
1007 }
1008
1009 return res;
1010 }
1011
1012 static inline struct mi_value
mi_ishr(struct mi_builder * b,struct mi_value src0,struct mi_value src1)1013 mi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
1014 {
1015 if (src1.type == MI_VALUE_TYPE_IMM) {
1016 assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
1017 assert(mi_value_to_u64(src1) <= 32);
1018 }
1019
1020 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
1021 return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1));
1022
1023 return mi_math_binop(b, MI_ALU_SAR, src0, src1,
1024 MI_ALU_STORE, MI_ALU_ACCU);
1025 }
1026
1027 static inline struct mi_value
mi_ishr_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)1028 mi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1029 {
1030 if (shift == 0)
1031 return src;
1032
1033 if (shift >= 64)
1034 return mi_imm(0);
1035
1036 if (src.type == MI_VALUE_TYPE_IMM)
1037 return mi_imm((int64_t)mi_value_to_u64(src) >> shift);
1038
1039 struct mi_value res = mi_value_to_gpr(b, src);
1040
1041 /* Annoyingly, we only have power-of-two shifts */
1042 while (shift) {
1043 int bit = u_bit_scan(&shift);
1044 assert(bit <= 5);
1045 res = mi_ishr(b, res, mi_imm(1 << bit));
1046 }
1047
1048 return res;
1049 }
1050 #endif /* if GFX_VERx10 >= 125 */
1051
1052 static inline struct mi_value
mi_imul_imm(struct mi_builder * b,struct mi_value src,uint32_t N)1053 mi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N)
1054 {
1055 if (src.type == MI_VALUE_TYPE_IMM)
1056 return mi_imm(mi_value_to_u64(src) * N);
1057
1058 if (N == 0) {
1059 mi_value_unref(b, src);
1060 return mi_imm(0);
1061 }
1062
1063 if (N == 1)
1064 return src;
1065
1066 src = mi_value_to_gpr(b, src);
1067
1068 struct mi_value res = mi_value_ref(b, src);
1069
1070 unsigned top_bit = 31 - __builtin_clz(N);
1071 for (int i = top_bit - 1; i >= 0; i--) {
1072 res = mi_iadd(b, res, mi_value_ref(b, res));
1073 if (N & (1 << i))
1074 res = mi_iadd(b, res, mi_value_ref(b, src));
1075 }
1076
1077 mi_value_unref(b, src);
1078
1079 return res;
1080 }
1081
1082 static inline struct mi_value
mi_ishl_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)1083 mi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1084 {
1085 if (shift == 0)
1086 return src;
1087
1088 if (shift >= 64)
1089 return mi_imm(0);
1090
1091 if (src.type == MI_VALUE_TYPE_IMM)
1092 return mi_imm(mi_value_to_u64(src) << shift);
1093
1094 struct mi_value res = mi_value_to_gpr(b, src);
1095
1096 #if GFX_VERx10 >= 125
1097 /* Annoyingly, we only have power-of-two shifts */
1098 while (shift) {
1099 int bit = u_bit_scan(&shift);
1100 assert(bit <= 5);
1101 res = mi_ishl(b, res, mi_imm(1 << bit));
1102 }
1103 #else
1104 for (unsigned i = 0; i < shift; i++)
1105 res = mi_iadd(b, res, mi_value_ref(b, res));
1106 #endif
1107
1108 return res;
1109 }
1110
1111 static inline struct mi_value
mi_ushr32_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)1112 mi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1113 {
1114 if (shift == 0)
1115 return src;
1116
1117 if (shift >= 64)
1118 return mi_imm(0);
1119
1120 /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
1121 * of the result.
1122 */
1123 if (src.type == MI_VALUE_TYPE_IMM)
1124 return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX);
1125
1126 if (shift > 32) {
1127 struct mi_value tmp = mi_new_gpr(b);
1128 _mi_copy_no_unref(b, mi_value_half(tmp, false),
1129 mi_value_half(src, true));
1130 _mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0));
1131 mi_value_unref(b, src);
1132 src = tmp;
1133 shift -= 32;
1134 }
1135 assert(shift <= 32);
1136 struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift);
1137 struct mi_value dst = mi_new_gpr(b);
1138 _mi_copy_no_unref(b, mi_value_half(dst, false),
1139 mi_value_half(tmp, true));
1140 _mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0));
1141 mi_value_unref(b, tmp);
1142 return dst;
1143 }
1144
1145 static inline struct mi_value
mi_udiv32_imm(struct mi_builder * b,struct mi_value N,uint32_t D)1146 mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
1147 {
1148 if (N.type == MI_VALUE_TYPE_IMM) {
1149 assert(mi_value_to_u64(N) <= UINT32_MAX);
1150 return mi_imm(mi_value_to_u64(N) / D);
1151 }
1152
1153 /* We implicitly assume that N is only a 32-bit value */
1154 if (D == 0) {
1155 /* This is invalid but we should do something */
1156 return mi_imm(0);
1157 } else if (util_is_power_of_two_or_zero(D)) {
1158 return mi_ushr32_imm(b, N, util_logbase2(D));
1159 } else {
1160 struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
1161 assert(m.multiplier <= UINT32_MAX);
1162
1163 if (m.pre_shift)
1164 N = mi_ushr32_imm(b, N, m.pre_shift);
1165
1166 /* Do the 32x32 multiply into gpr0 */
1167 N = mi_imul_imm(b, N, m.multiplier);
1168
1169 if (m.increment)
1170 N = mi_iadd(b, N, mi_imm(m.multiplier));
1171
1172 N = mi_ushr32_imm(b, N, 32);
1173
1174 if (m.post_shift)
1175 N = mi_ushr32_imm(b, N, m.post_shift);
1176
1177 return N;
1178 }
1179 }
1180
1181 #endif /* MI_MATH section */
1182
1183 /* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
1184 #if MI_BUILDER_CAN_WRITE_BATCH
1185
1186 struct mi_address_token {
1187 /* Pointers to address memory fields in the batch. */
1188 uint64_t *ptrs[2];
1189 };
1190
1191 static inline struct mi_address_token
mi_store_address(struct mi_builder * b,struct mi_value addr_reg)1192 mi_store_address(struct mi_builder *b, struct mi_value addr_reg)
1193 {
1194 mi_builder_flush_math(b);
1195
1196 assert(addr_reg.type == MI_VALUE_TYPE_REG64);
1197
1198 struct mi_address_token token = {};
1199
1200 for (unsigned i = 0; i < 2; i++) {
1201 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
1202 srm.RegisterAddress = addr_reg.reg + (i * 4);
1203
1204 const unsigned addr_dw =
1205 GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
1206 token.ptrs[i] = (void *)_dst + addr_dw;
1207 }
1208 }
1209
1210 mi_value_unref(b, addr_reg);
1211 return token;
1212 }
1213
1214 static inline void
mi_self_mod_barrier(struct mi_builder * b,unsigned cs_prefetch_size)1215 mi_self_mod_barrier(struct mi_builder *b, unsigned cs_prefetch_size)
1216 {
1217 /* First make sure all the memory writes from previous modifying commands
1218 * have landed. We want to do this before going through the CS cache,
1219 * otherwise we could be fetching memory that hasn't been written to yet.
1220 */
1221 mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
1222 pc.CommandStreamerStallEnable = true;
1223 }
1224 /* Documentation says Gfx11+ should be able to invalidate the command cache
1225 * but experiment show it doesn't work properly, so for now just get over
1226 * the CS prefetch.
1227 */
1228 for (uint32_t i = 0; i < (cs_prefetch_size / 4); i++)
1229 mi_builder_emit(b, GENX(MI_NOOP), noop);
1230 }
1231
1232 static inline void
_mi_resolve_address_token(struct mi_builder * b,struct mi_address_token token,void * batch_location)1233 _mi_resolve_address_token(struct mi_builder *b,
1234 struct mi_address_token token,
1235 void *batch_location)
1236 {
1237 __gen_address_type addr = __gen_get_batch_address(b->user_data,
1238 batch_location);
1239 uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location,
1240 addr, 0);
1241 *(token.ptrs[0]) = addr_addr_u64;
1242 *(token.ptrs[1]) = addr_addr_u64 + 4;
1243 }
1244
1245 #endif /* MI_BUILDER_CAN_WRITE_BATCH */
1246
1247 #if GFX_VERx10 >= 125
1248
1249 /*
1250 * Indirect load/store. Only available on XE_HP+
1251 */
1252
1253 MUST_CHECK static inline struct mi_value
mi_load_mem64_offset(struct mi_builder * b,__gen_address_type addr,struct mi_value offset)1254 mi_load_mem64_offset(struct mi_builder *b,
1255 __gen_address_type addr, struct mi_value offset)
1256 {
1257 uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1258 struct mi_value addr_val = mi_imm(addr_u64);
1259
1260 struct mi_value dst = mi_new_gpr(b);
1261
1262 uint32_t dw[5];
1263 dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1264 dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1265 dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1266 dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU);
1267 dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0);
1268 _mi_builder_push_math(b, dw, 5);
1269
1270 mi_value_unref(b, addr_val);
1271 mi_value_unref(b, offset);
1272
1273 return dst;
1274 }
1275
1276 static inline void
mi_store_mem64_offset(struct mi_builder * b,__gen_address_type addr,struct mi_value offset,struct mi_value data)1277 mi_store_mem64_offset(struct mi_builder *b,
1278 __gen_address_type addr, struct mi_value offset,
1279 struct mi_value data)
1280 {
1281 uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1282 struct mi_value addr_val = mi_imm(addr_u64);
1283
1284 data = mi_value_to_gpr(b, mi_resolve_invert(b, data));
1285
1286 uint32_t dw[5];
1287 dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1288 dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1289 dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1290 dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data));
1291 dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0);
1292 _mi_builder_push_math(b, dw, 5);
1293
1294 mi_value_unref(b, addr_val);
1295 mi_value_unref(b, offset);
1296 mi_value_unref(b, data);
1297
1298 /* This is the only math case which has side-effects outside of regular
1299 * registers to flush math afterwards so we don't confuse anyone.
1300 */
1301 mi_builder_flush_math(b);
1302 }
1303
1304 /*
1305 * Control-flow Section. Only available on XE_HP+
1306 */
1307
1308 struct _mi_goto {
1309 bool predicated;
1310 void *mi_bbs;
1311 };
1312
1313 struct mi_goto_target {
1314 bool placed;
1315 unsigned num_gotos;
1316 struct _mi_goto gotos[8];
1317 __gen_address_type addr;
1318 };
1319
1320 #define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {})
1321
1322 #define MI_BUILDER_MI_PREDICATE_RESULT_num 0x2418
1323
1324 static inline void
mi_goto_if(struct mi_builder * b,struct mi_value cond,struct mi_goto_target * t)1325 mi_goto_if(struct mi_builder *b, struct mi_value cond,
1326 struct mi_goto_target *t)
1327 {
1328 /* First, set up the predicate, if any */
1329 bool predicated;
1330 if (cond.type == MI_VALUE_TYPE_IMM) {
1331 /* If it's an immediate, the goto either doesn't happen or happens
1332 * unconditionally.
1333 */
1334 if (mi_value_to_u64(cond) == 0)
1335 return;
1336
1337 assert(mi_value_to_u64(cond) == ~0ull);
1338 predicated = false;
1339 } else if (mi_value_is_reg(cond) &&
1340 cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) {
1341 /* If it's MI_PREDICATE_RESULT, we use whatever predicate the client
1342 * provided us with
1343 */
1344 assert(cond.type == MI_VALUE_TYPE_REG32);
1345 predicated = true;
1346 } else {
1347 mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond);
1348 predicated = true;
1349 }
1350
1351 if (predicated) {
1352 mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1353 sp.PredicateEnable = NOOPOnResultClear;
1354 }
1355 }
1356 if (t->placed) {
1357 mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) {
1358 bbs.PredicationEnable = predicated;
1359 bbs.AddressSpaceIndicator = ASI_PPGTT;
1360 bbs.BatchBufferStartAddress = t->addr;
1361 }
1362 } else {
1363 assert(t->num_gotos < ARRAY_SIZE(t->gotos));
1364 struct _mi_goto g = {
1365 .predicated = predicated,
1366 .mi_bbs = __gen_get_batch_dwords(b->user_data,
1367 GENX(MI_BATCH_BUFFER_START_length)),
1368 };
1369 memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length));
1370 t->gotos[t->num_gotos++] = g;
1371 }
1372 if (predicated) {
1373 mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1374 sp.PredicateEnable = NOOPNever;
1375 }
1376 }
1377 }
1378
1379 static inline void
mi_goto(struct mi_builder * b,struct mi_goto_target * t)1380 mi_goto(struct mi_builder *b, struct mi_goto_target *t)
1381 {
1382 mi_goto_if(b, mi_imm(-1), t);
1383 }
1384
1385 static inline void
mi_goto_target(struct mi_builder * b,struct mi_goto_target * t)1386 mi_goto_target(struct mi_builder *b, struct mi_goto_target *t)
1387 {
1388 mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1389 sp.PredicateEnable = NOOPNever;
1390 t->addr = __gen_get_batch_address(b->user_data, _dst);
1391 }
1392 t->placed = true;
1393
1394 struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) };
1395 bbs.AddressSpaceIndicator = ASI_PPGTT;
1396 bbs.BatchBufferStartAddress = t->addr;
1397
1398 for (unsigned i = 0; i < t->num_gotos; i++) {
1399 bbs.PredicationEnable = t->gotos[i].predicated;
1400 GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs);
1401 }
1402 }
1403
1404 static inline struct mi_goto_target
mi_goto_target_init_and_place(struct mi_builder * b)1405 mi_goto_target_init_and_place(struct mi_builder *b)
1406 {
1407 struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1408 mi_goto_target(b, &t);
1409 return t;
1410 }
1411
1412 #define mi_loop(b) \
1413 for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \
1414 __continue = mi_goto_target_init_and_place(b); !__break.placed; \
1415 mi_goto(b, &__continue), mi_goto_target(b, &__break))
1416
1417 #define mi_break(b) mi_goto(b, &__break)
1418 #define mi_break_if(b, cond) mi_goto_if(b, cond, &__break)
1419 #define mi_continue(b) mi_goto(b, &__continue)
1420 #define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue)
1421
1422 #endif /* GFX_VERx10 >= 125 */
1423
1424 #endif /* MI_BUILDER_H */
1425