• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef MI_BUILDER_H
25 #define MI_BUILDER_H
26 
27 #include "dev/intel_device_info.h"
28 #include "genxml/genX_bits.h"
29 #include "util/bitscan.h"
30 #include "util/fast_idiv_by_const.h"
31 #include "util/u_math.h"
32 
33 #ifndef MI_BUILDER_NUM_ALLOC_GPRS
34 /** The number of GPRs the MI builder is allowed to allocate
35  *
36  * This may be set by a user of this API so that it can reserve some GPRs at
37  * the top end for its own use.
38  */
39 #define MI_BUILDER_NUM_ALLOC_GPRS 16
40 #endif
41 
42 /** These must be defined by the user of the builder
43  *
44  * void *__gen_get_batch_dwords(__gen_user_data *user_data,
45  *                              unsigned num_dwords);
46  *
47  * __gen_address_type
48  * __gen_address_offset(__gen_address_type addr, uint64_t offset);
49  *
50  *
51  * If self-modifying batches are supported, we must be able to pass batch
52  * addresses around as void*s so pinning as well as batch chaining or some
53  * other mechanism for ensuring batch pointers remain valid during building is
54  * required. The following function must also be defined, it returns an
55  * address in canonical form:
56  *
57  * __gen_address_type
58  * __gen_get_batch_address(__gen_user_data *user_data, void *location);
59  *
60  * Also, __gen_combine_address must accept a location value of NULL and return
61  * a fully valid 64-bit address.
62  */
63 
64 /*
65  * Start of the actual MI builder
66  */
67 
68 #define __genxml_cmd_length(cmd) cmd ## _length
69 #define __genxml_cmd_header(cmd) cmd ## _header
70 #define __genxml_cmd_pack(cmd) cmd ## _pack
71 
72 #define mi_builder_pack(b, cmd, dst, name)                          \
73    for (struct cmd name = { __genxml_cmd_header(cmd) },                 \
74         *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
75         __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name),    \
76         _dst = NULL)
77 
78 #define mi_builder_emit(b, cmd, name)                               \
79    mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
80 
81 
82 enum mi_value_type {
83    MI_VALUE_TYPE_IMM,
84    MI_VALUE_TYPE_MEM32,
85    MI_VALUE_TYPE_MEM64,
86    MI_VALUE_TYPE_REG32,
87    MI_VALUE_TYPE_REG64,
88 };
89 
90 struct mi_value {
91    enum mi_value_type type;
92 
93    union {
94       uint64_t imm;
95       __gen_address_type addr;
96       uint32_t reg;
97    };
98 
99 #if GFX_VERx10 >= 75
100    bool invert;
101 #endif
102 };
103 
104 struct mi_reg_num {
105    uint32_t num;
106 #if GFX_VER >= 11
107    bool cs;
108 #endif
109 };
110 
111 static inline struct mi_reg_num
mi_adjust_reg_num(uint32_t reg)112 mi_adjust_reg_num(uint32_t reg)
113 {
114 #if GFX_VER >= 11
115    bool cs = reg >= 0x2000 && reg < 0x4000;
116    return (struct mi_reg_num) {
117       .num = reg - (cs ? 0x2000 : 0),
118       .cs = cs,
119    };
120 #else
121    return (struct mi_reg_num) { .num = reg, };
122 #endif
123 }
124 
125 #if GFX_VER >= 9
126 #define MI_BUILDER_MAX_MATH_DWORDS 256
127 #else
128 #define MI_BUILDER_MAX_MATH_DWORDS 64
129 #endif
130 
131 struct mi_builder {
132    const struct intel_device_info *devinfo;
133    __gen_user_data *user_data;
134 
135 #if GFX_VERx10 >= 75
136    uint32_t gprs;
137    uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS];
138 
139    unsigned num_math_dwords;
140    uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS];
141 #endif
142 
143 #if GFX_VERx10 >= 125
144    uint32_t mocs;
145 #endif
146 };
147 
148 static inline void
mi_builder_init(struct mi_builder * b,const struct intel_device_info * devinfo,__gen_user_data * user_data)149 mi_builder_init(struct mi_builder *b,
150                 const struct intel_device_info *devinfo,
151                 __gen_user_data *user_data)
152 {
153    memset(b, 0, sizeof(*b));
154    b->devinfo = devinfo;
155    b->user_data = user_data;
156 
157 #if GFX_VERx10 >= 75
158    b->gprs = 0;
159    b->num_math_dwords = 0;
160 #endif
161 }
162 
163 static inline void
mi_builder_flush_math(struct mi_builder * b)164 mi_builder_flush_math(struct mi_builder *b)
165 {
166 #if GFX_VERx10 >= 75
167    if (b->num_math_dwords == 0)
168       return;
169 
170    uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
171                                                      1 + b->num_math_dwords);
172    mi_builder_pack(b, GENX(MI_MATH), dw, math) {
173 #if GFX_VERx10 >= 125
174       math.MOCS = b->mocs;
175 #endif
176       math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
177    }
178    memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
179    b->num_math_dwords = 0;
180 #endif
181 }
182 
183 /**
184  * Set mocs index to mi_build
185  *
186  * This is required when a MI_MATH instruction will be emitted and
187  * the code is used in GFX 12.5 or newer.
188  */
189 static inline void
mi_builder_set_mocs(UNUSED struct mi_builder * b,UNUSED uint32_t mocs)190 mi_builder_set_mocs(UNUSED struct mi_builder *b, UNUSED uint32_t mocs)
191 {
192 #if GFX_VERx10 >= 125
193    if (b->mocs != 0 && b->mocs != mocs)
194       mi_builder_flush_math(b);
195    b->mocs = mocs;
196 #endif
197 }
198 
199 #define _MI_BUILDER_GPR_BASE 0x2600
200 /* The actual hardware limit on GPRs */
201 #define _MI_BUILDER_NUM_HW_GPRS 16
202 
203 #if GFX_VERx10 >= 75
204 
205 static inline bool
mi_value_is_reg(struct mi_value val)206 mi_value_is_reg(struct mi_value val)
207 {
208    return val.type == MI_VALUE_TYPE_REG32 ||
209           val.type == MI_VALUE_TYPE_REG64;
210 }
211 
212 static inline bool
mi_value_is_gpr(struct mi_value val)213 mi_value_is_gpr(struct mi_value val)
214 {
215    return mi_value_is_reg(val) &&
216           val.reg >= _MI_BUILDER_GPR_BASE &&
217           val.reg < _MI_BUILDER_GPR_BASE +
218                     _MI_BUILDER_NUM_HW_GPRS * 8;
219 }
220 
221 static inline bool
_mi_value_is_allocated_gpr(struct mi_value val)222 _mi_value_is_allocated_gpr(struct mi_value val)
223 {
224    return mi_value_is_reg(val) &&
225           val.reg >= _MI_BUILDER_GPR_BASE &&
226           val.reg < _MI_BUILDER_GPR_BASE +
227                     MI_BUILDER_NUM_ALLOC_GPRS * 8;
228 }
229 
230 static inline uint32_t
_mi_value_as_gpr(struct mi_value val)231 _mi_value_as_gpr(struct mi_value val)
232 {
233    assert(mi_value_is_gpr(val));
234    /* Some of the GRL metakernels will generate 64bit value in a GP register,
235     * then use only half of that as the last operation on that value. So allow
236     * unref on part of a GP register.
237     */
238    assert(val.reg % 4 == 0);
239    return (val.reg - _MI_BUILDER_GPR_BASE) / 8;
240 }
241 
242 static inline struct mi_value
mi_new_gpr(struct mi_builder * b)243 mi_new_gpr(struct mi_builder *b)
244 {
245    unsigned gpr = ffs(~b->gprs) - 1;
246    assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
247    assert(b->gpr_refs[gpr] == 0);
248    b->gprs |= (1u << gpr);
249    b->gpr_refs[gpr] = 1;
250 
251    return (struct mi_value) {
252       .type = MI_VALUE_TYPE_REG64,
253       .reg = _MI_BUILDER_GPR_BASE + gpr * 8,
254    };
255 }
256 
257 static inline struct mi_value
mi_reserve_gpr(struct mi_builder * b,unsigned gpr)258 mi_reserve_gpr(struct mi_builder *b, unsigned gpr)
259 {
260    assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
261    assert(!(b->gprs & (1 << gpr)));
262    assert(b->gpr_refs[gpr] == 0);
263    b->gprs |= (1u << gpr);
264    b->gpr_refs[gpr] = 128; /* Enough that we won't unref it */
265 
266    return (struct mi_value) {
267       .type = MI_VALUE_TYPE_REG64,
268       .reg = _MI_BUILDER_GPR_BASE + gpr * 8,
269    };
270 }
271 #endif /* GFX_VERx10 >= 75 */
272 
273 /** Take a reference to a mi_value
274  *
275  * The MI builder uses reference counting to automatically free ALU GPRs for
276  * re-use in calculations.  All mi_* math functions consume the reference
277  * they are handed for each source and return a reference to a value which the
278  * caller must consume.  In particular, if you pas the same value into a
279  * single mi_* math function twice (say to add a number to itself), you
280  * are responsible for calling mi_value_ref() to get a second reference
281  * because the mi_* math function will consume it twice.
282  */
283 static inline void
mi_value_add_refs(struct mi_builder * b,struct mi_value val,unsigned num_refs)284 mi_value_add_refs(struct mi_builder *b, struct mi_value val, unsigned num_refs)
285 {
286 #if GFX_VERx10 >= 75
287    if (_mi_value_is_allocated_gpr(val)) {
288       unsigned gpr = _mi_value_as_gpr(val);
289       assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
290       assert(b->gprs & (1u << gpr));
291       assert(b->gpr_refs[gpr] < UINT8_MAX);
292       b->gpr_refs[gpr] += num_refs;
293    }
294 #endif /* GFX_VERx10 >= 75 */
295 }
296 
297 static inline struct mi_value
mi_value_ref(struct mi_builder * b,struct mi_value val)298 mi_value_ref(struct mi_builder *b, struct mi_value val)
299 {
300    mi_value_add_refs(b, val, 1);
301    return val;
302 }
303 
304 
305 /** Drop a reference to a mi_value
306  *
307  * See also mi_value_ref.
308  */
309 static inline void
mi_value_unref(struct mi_builder * b,struct mi_value val)310 mi_value_unref(struct mi_builder *b, struct mi_value val)
311 {
312 #if GFX_VERx10 >= 75
313    if (_mi_value_is_allocated_gpr(val)) {
314       unsigned gpr = _mi_value_as_gpr(val);
315       assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
316       assert(b->gprs & (1u << gpr));
317       assert(b->gpr_refs[gpr] > 0);
318       if (--b->gpr_refs[gpr] == 0)
319          b->gprs &= ~(1u << gpr);
320    }
321 #endif /* GFX_VERx10 >= 75 */
322 }
323 
324 static inline struct mi_value
mi_imm(uint64_t imm)325 mi_imm(uint64_t imm)
326 {
327    return (struct mi_value) {
328       .type = MI_VALUE_TYPE_IMM,
329       .imm = imm,
330    };
331 }
332 
333 static inline struct mi_value
mi_reg32(uint32_t reg)334 mi_reg32(uint32_t reg)
335 {
336    struct mi_value val = {
337       .type = MI_VALUE_TYPE_REG32,
338       .reg = reg,
339    };
340 #if GFX_VERx10 >= 75
341    assert(!_mi_value_is_allocated_gpr(val));
342 #endif
343    return val;
344 }
345 
346 static inline struct mi_value
mi_reg64(uint32_t reg)347 mi_reg64(uint32_t reg)
348 {
349    struct mi_value val = {
350       .type = MI_VALUE_TYPE_REG64,
351       .reg = reg,
352    };
353 #if GFX_VERx10 >= 75
354    assert(!_mi_value_is_allocated_gpr(val));
355 #endif
356    return val;
357 }
358 
359 static inline struct mi_value
mi_mem32(__gen_address_type addr)360 mi_mem32(__gen_address_type addr)
361 {
362    return (struct mi_value) {
363       .type = MI_VALUE_TYPE_MEM32,
364       .addr = addr,
365    };
366 }
367 
368 static inline struct mi_value
mi_mem64(__gen_address_type addr)369 mi_mem64(__gen_address_type addr)
370 {
371    return (struct mi_value) {
372       .type = MI_VALUE_TYPE_MEM64,
373       .addr = addr,
374    };
375 }
376 
377 static inline struct mi_value
mi_value_half(struct mi_value value,bool top_32_bits)378 mi_value_half(struct mi_value value, bool top_32_bits)
379 {
380    switch (value.type) {
381    case MI_VALUE_TYPE_IMM:
382       if (top_32_bits)
383          value.imm >>= 32;
384       else
385          value.imm &= 0xffffffffu;
386       return value;
387 
388    case MI_VALUE_TYPE_MEM32:
389       assert(!top_32_bits);
390       return value;
391 
392    case MI_VALUE_TYPE_MEM64:
393       if (top_32_bits)
394          value.addr = __gen_address_offset(value.addr, 4);
395       value.type = MI_VALUE_TYPE_MEM32;
396       return value;
397 
398    case MI_VALUE_TYPE_REG32:
399       assert(!top_32_bits);
400       return value;
401 
402    case MI_VALUE_TYPE_REG64:
403       if (top_32_bits)
404          value.reg += 4;
405       value.type = MI_VALUE_TYPE_REG32;
406       return value;
407    }
408 
409    unreachable("Invalid mi_value type");
410 }
411 
412 static inline void
_mi_copy_no_unref(struct mi_builder * b,struct mi_value dst,struct mi_value src)413 _mi_copy_no_unref(struct mi_builder *b,
414                   struct mi_value dst, struct mi_value src)
415 {
416 #if GFX_VERx10 >= 75
417    /* TODO: We could handle src.invert by emitting a bit of math if we really
418     * wanted to.
419     */
420    assert(!dst.invert && !src.invert);
421 #endif
422    mi_builder_flush_math(b);
423 
424    switch (dst.type) {
425    case MI_VALUE_TYPE_IMM:
426       unreachable("Cannot copy to an immediate");
427 
428    case MI_VALUE_TYPE_MEM64:
429    case MI_VALUE_TYPE_REG64:
430       switch (src.type) {
431       case MI_VALUE_TYPE_IMM:
432          if (dst.type == MI_VALUE_TYPE_REG64) {
433             uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
434                                                               GENX(MI_LOAD_REGISTER_IMM_length) + 2);
435             struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
436             mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) {
437                lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 -
438                                  GENX(MI_LOAD_REGISTER_IMM_length_bias);
439 #if GFX_VER >= 11
440                lri.AddCSMMIOStartOffset = reg.cs;
441 #endif
442             }
443             dw[1] = reg.num;
444             dw[2] = src.imm;
445             dw[3] = reg.num + 4;
446             dw[4] = src.imm >> 32;
447          } else {
448 #if GFX_VER >= 8
449             assert(dst.type == MI_VALUE_TYPE_MEM64);
450             uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
451                                                               GENX(MI_STORE_DATA_IMM_length) + 1);
452             mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) {
453                sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 -
454                                  GENX(MI_STORE_DATA_IMM_length_bias);
455                sdm.StoreQword = true;
456                sdm.Address = dst.addr;
457             }
458             dw[3] = src.imm;
459             dw[4] = src.imm >> 32;
460 #else
461          _mi_copy_no_unref(b, mi_value_half(dst, false),
462                               mi_value_half(src, false));
463          _mi_copy_no_unref(b, mi_value_half(dst, true),
464                               mi_value_half(src, true));
465 #endif
466          }
467          break;
468       case MI_VALUE_TYPE_REG32:
469       case MI_VALUE_TYPE_MEM32:
470          _mi_copy_no_unref(b, mi_value_half(dst, false),
471                               mi_value_half(src, false));
472          _mi_copy_no_unref(b, mi_value_half(dst, true),
473                               mi_imm(0));
474          break;
475       case MI_VALUE_TYPE_REG64:
476       case MI_VALUE_TYPE_MEM64:
477          _mi_copy_no_unref(b, mi_value_half(dst, false),
478                               mi_value_half(src, false));
479          _mi_copy_no_unref(b, mi_value_half(dst, true),
480                               mi_value_half(src, true));
481          break;
482       default:
483          unreachable("Invalid mi_value type");
484       }
485       break;
486 
487    case MI_VALUE_TYPE_MEM32:
488       switch (src.type) {
489       case MI_VALUE_TYPE_IMM:
490          mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
491             sdi.Address = dst.addr;
492 #if GFX_VER >= 12
493             sdi.ForceWriteCompletionCheck = true;
494 #endif
495             sdi.ImmediateData = src.imm;
496          }
497          break;
498 
499       case MI_VALUE_TYPE_MEM32:
500       case MI_VALUE_TYPE_MEM64:
501 #if GFX_VER >= 8
502          mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
503             cmm.DestinationMemoryAddress = dst.addr;
504             cmm.SourceMemoryAddress = src.addr;
505          }
506 #elif GFX_VERx10 == 75
507          {
508             struct mi_value tmp = mi_new_gpr(b);
509             _mi_copy_no_unref(b, tmp, src);
510             _mi_copy_no_unref(b, dst, tmp);
511             mi_value_unref(b, tmp);
512          }
513 #else
514          unreachable("Cannot do mem <-> mem copy on IVB and earlier");
515 #endif
516          break;
517 
518       case MI_VALUE_TYPE_REG32:
519       case MI_VALUE_TYPE_REG64:
520          mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
521             struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
522             srm.RegisterAddress = reg.num;
523 #if GFX_VER >= 11
524             srm.AddCSMMIOStartOffset = reg.cs;
525 #endif
526             srm.MemoryAddress = dst.addr;
527          }
528          break;
529 
530       default:
531          unreachable("Invalid mi_value type");
532       }
533       break;
534 
535    case MI_VALUE_TYPE_REG32:
536       switch (src.type) {
537       case MI_VALUE_TYPE_IMM:
538          mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
539             struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
540             lri.RegisterOffset = reg.num;
541 #if GFX_VER >= 11
542             lri.AddCSMMIOStartOffset = reg.cs;
543 #endif
544             lri.DataDWord = src.imm;
545          }
546          break;
547 
548       case MI_VALUE_TYPE_MEM32:
549       case MI_VALUE_TYPE_MEM64:
550 #if GFX_VER >= 7
551          mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
552             struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
553             lrm.RegisterAddress = reg.num;
554 #if GFX_VER >= 11
555             lrm.AddCSMMIOStartOffset = reg.cs;
556 #endif
557             lrm.MemoryAddress = src.addr;
558          }
559 #else
560          unreachable("Cannot load do mem -> reg copy on SNB and earlier");
561 #endif
562          break;
563 
564       case MI_VALUE_TYPE_REG32:
565       case MI_VALUE_TYPE_REG64:
566 #if GFX_VERx10 >= 75
567          if (src.reg != dst.reg) {
568             mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
569                struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
570                lrr.SourceRegisterAddress = reg.num;
571 #if GFX_VER >= 11
572                lrr.AddCSMMIOStartOffsetSource = reg.cs;
573 #endif
574                reg = mi_adjust_reg_num(dst.reg);
575                lrr.DestinationRegisterAddress = reg.num;
576 #if GFX_VER >= 11
577                lrr.AddCSMMIOStartOffsetDestination = reg.cs;
578 #endif
579             }
580          }
581 #else
582          unreachable("Cannot do reg <-> reg copy on IVB and earlier");
583 #endif
584          break;
585 
586       default:
587          unreachable("Invalid mi_value type");
588       }
589       break;
590 
591    default:
592       unreachable("Invalid mi_value type");
593    }
594 }
595 
596 #if GFX_VERx10 >= 75
597 static inline struct mi_value
598 mi_resolve_invert(struct mi_builder *b, struct mi_value src);
599 #endif
600 
601 /** Store the value in src to the value represented by dst
602  *
603  * If the bit size of src and dst mismatch, this function does an unsigned
604  * integer cast.  If src has more bits than dst, it takes the bottom bits.  If
605  * src has fewer bits then dst, it fills the top bits with zeros.
606  *
607  * This function consumes one reference for each of src and dst.
608  */
609 static inline void
mi_store(struct mi_builder * b,struct mi_value dst,struct mi_value src)610 mi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src)
611 {
612 #if GFX_VERx10 >= 75
613    src = mi_resolve_invert(b, src);
614 #endif
615    _mi_copy_no_unref(b, dst, src);
616    mi_value_unref(b, src);
617    mi_value_unref(b, dst);
618 }
619 
620 static inline void
mi_memset(struct mi_builder * b,__gen_address_type dst,uint32_t value,uint32_t size)621 mi_memset(struct mi_builder *b, __gen_address_type dst,
622           uint32_t value, uint32_t size)
623 {
624 #if GFX_VERx10 >= 75
625    assert(b->num_math_dwords == 0);
626 #endif
627 
628    /* This memset operates in units of dwords. */
629    assert(size % 4 == 0);
630 
631    for (uint32_t i = 0; i < size; i += 4) {
632       mi_store(b, mi_mem32(__gen_address_offset(dst, i)),
633                       mi_imm(value));
634    }
635 }
636 
637 /* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */
638 static inline void
mi_memcpy(struct mi_builder * b,__gen_address_type dst,__gen_address_type src,uint32_t size)639 mi_memcpy(struct mi_builder *b, __gen_address_type dst,
640           __gen_address_type src, uint32_t size)
641 {
642 #if GFX_VERx10 >= 75
643    assert(b->num_math_dwords == 0);
644 #endif
645 
646    /* This memcpy operates in units of dwords. */
647    assert(size % 4 == 0);
648 
649    for (uint32_t i = 0; i < size; i += 4) {
650       struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i));
651       struct mi_value src_val = mi_mem32(__gen_address_offset(src, i));
652 #if GFX_VERx10 >= 75
653       mi_store(b, dst_val, src_val);
654 #else
655       /* IVB does not have a general purpose register for command streamer
656        * commands. Therefore, we use an alternate temporary register.
657        */
658       struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */
659       mi_store(b, tmp_reg, src_val);
660       mi_store(b, dst_val, tmp_reg);
661 #endif
662    }
663 }
664 
665 /*
666  * MI_MATH Section.  Only available on Haswell+
667  */
668 
669 #if GFX_VERx10 >= 75
670 
671 /**
672  * Perform a predicated store (assuming the condition is already loaded
673  * in the MI_PREDICATE_RESULT register) of the value in src to the memory
674  * location specified by dst.  Non-memory destinations are not supported.
675  *
676  * This function consumes one reference for each of src and dst.
677  */
678 static inline void
mi_store_if(struct mi_builder * b,struct mi_value dst,struct mi_value src)679 mi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src)
680 {
681    assert(!dst.invert && !src.invert);
682 
683    mi_builder_flush_math(b);
684 
685    /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
686     * destination to be memory, and resolve the source to a temporary
687     * register if it isn't in one already.
688     */
689    assert(dst.type == MI_VALUE_TYPE_MEM64 ||
690           dst.type == MI_VALUE_TYPE_MEM32);
691 
692    if (src.type != MI_VALUE_TYPE_REG32 &&
693        src.type != MI_VALUE_TYPE_REG64) {
694       struct mi_value tmp = mi_new_gpr(b);
695       _mi_copy_no_unref(b, tmp, src);
696       src = tmp;
697    }
698 
699    if (dst.type == MI_VALUE_TYPE_MEM64) {
700       mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
701          struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
702          srm.RegisterAddress = reg.num;
703 #if GFX_VER >= 11
704          srm.AddCSMMIOStartOffset = reg.cs;
705 #endif
706          srm.MemoryAddress = dst.addr;
707          srm.PredicateEnable = true;
708       }
709       mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
710          struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4);
711          srm.RegisterAddress = reg.num;
712 #if GFX_VER >= 11
713          srm.AddCSMMIOStartOffset = reg.cs;
714 #endif
715          srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
716          srm.PredicateEnable = true;
717       }
718    } else {
719       mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
720          struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
721          srm.RegisterAddress = reg.num;
722 #if GFX_VER >= 11
723          srm.AddCSMMIOStartOffset = reg.cs;
724 #endif
725          srm.MemoryAddress = dst.addr;
726          srm.PredicateEnable = true;
727       }
728    }
729 
730    mi_value_unref(b, src);
731    mi_value_unref(b, dst);
732 }
733 
734 static inline void
_mi_builder_push_math(struct mi_builder * b,const uint32_t * dwords,unsigned num_dwords)735 _mi_builder_push_math(struct mi_builder *b,
736                       const uint32_t *dwords,
737                       unsigned num_dwords)
738 {
739    assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS);
740    if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS)
741       mi_builder_flush_math(b);
742 
743    memcpy(&b->math_dwords[b->num_math_dwords],
744           dwords, num_dwords * sizeof(*dwords));
745    b->num_math_dwords += num_dwords;
746 }
747 
748 static inline uint32_t
_mi_pack_alu(uint32_t opcode,uint32_t operand1,uint32_t operand2)749 _mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
750 {
751    struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
752       .Operand2 = operand2,
753       .Operand1 = operand1,
754       .ALUOpcode = opcode,
755    };
756 
757    uint32_t dw;
758    GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
759 
760    return dw;
761 }
762 
763 static inline struct mi_value
mi_value_to_gpr(struct mi_builder * b,struct mi_value val)764 mi_value_to_gpr(struct mi_builder *b, struct mi_value val)
765 {
766    if (mi_value_is_gpr(val))
767       return val;
768 
769    /* Save off the invert flag because it makes copy() grumpy */
770    bool invert = val.invert;
771    val.invert = false;
772 
773    struct mi_value tmp = mi_new_gpr(b);
774    _mi_copy_no_unref(b, tmp, val);
775    tmp.invert = invert;
776 
777    return tmp;
778 }
779 
780 static inline uint64_t
mi_value_to_u64(struct mi_value val)781 mi_value_to_u64(struct mi_value val)
782 {
783    assert(val.type == MI_VALUE_TYPE_IMM);
784    return val.invert ? ~val.imm : val.imm;
785 }
786 
787 static inline uint32_t
_mi_math_load_src(struct mi_builder * b,unsigned src,struct mi_value * val)788 _mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val)
789 {
790    if (val->type == MI_VALUE_TYPE_IMM &&
791        (val->imm == 0 || val->imm == UINT64_MAX)) {
792       uint64_t imm = val->invert ? ~val->imm : val->imm;
793       return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
794    } else {
795       *val = mi_value_to_gpr(b, *val);
796       return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
797                           src, _mi_value_as_gpr(*val));
798    }
799 }
800 
801 static inline struct mi_value
mi_math_binop(struct mi_builder * b,uint32_t opcode,struct mi_value src0,struct mi_value src1,uint32_t store_op,uint32_t store_src)802 mi_math_binop(struct mi_builder *b, uint32_t opcode,
803               struct mi_value src0, struct mi_value src1,
804               uint32_t store_op, uint32_t store_src)
805 {
806    struct mi_value dst = mi_new_gpr(b);
807 
808    uint32_t dw[4];
809    dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0);
810    dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1);
811    dw[2] = _mi_pack_alu(opcode, 0, 0);
812    dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src);
813    _mi_builder_push_math(b, dw, 4);
814 
815    mi_value_unref(b, src0);
816    mi_value_unref(b, src1);
817 
818    return dst;
819 }
820 
821 static inline struct mi_value
mi_inot(struct mi_builder * b,struct mi_value val)822 mi_inot(struct mi_builder *b, struct mi_value val)
823 {
824    if (val.type == MI_VALUE_TYPE_IMM)
825       return mi_imm(~mi_value_to_u64(val));
826 
827    val.invert = !val.invert;
828    return val;
829 }
830 
831 static inline struct mi_value
mi_resolve_invert(struct mi_builder * b,struct mi_value src)832 mi_resolve_invert(struct mi_builder *b, struct mi_value src)
833 {
834    if (!src.invert)
835       return src;
836 
837    assert(src.type != MI_VALUE_TYPE_IMM);
838    return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
839                            MI_ALU_STORE, MI_ALU_ACCU);
840 }
841 
842 static inline struct mi_value
mi_iadd(struct mi_builder * b,struct mi_value src0,struct mi_value src1)843 mi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
844 {
845    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
846       return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1));
847 
848    return mi_math_binop(b, MI_ALU_ADD, src0, src1,
849                            MI_ALU_STORE, MI_ALU_ACCU);
850 }
851 
852 static inline struct mi_value
mi_iadd_imm(struct mi_builder * b,struct mi_value src,uint64_t N)853 mi_iadd_imm(struct mi_builder *b,
854                 struct mi_value src, uint64_t N)
855 {
856    if (N == 0)
857       return src;
858 
859    return mi_iadd(b, src, mi_imm(N));
860 }
861 
862 static inline struct mi_value
mi_isub(struct mi_builder * b,struct mi_value src0,struct mi_value src1)863 mi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
864 {
865    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
866       return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1));
867 
868    return mi_math_binop(b, MI_ALU_SUB, src0, src1,
869                            MI_ALU_STORE, MI_ALU_ACCU);
870 }
871 
872 static inline struct mi_value
mi_ieq(struct mi_builder * b,struct mi_value src0,struct mi_value src1)873 mi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
874 {
875    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
876       return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0);
877 
878    /* Compute "equal" by subtracting and storing the zero bit */
879    return mi_math_binop(b, MI_ALU_SUB, src0, src1,
880                             MI_ALU_STORE, MI_ALU_ZF);
881 }
882 
883 static inline struct mi_value
mi_ine(struct mi_builder * b,struct mi_value src0,struct mi_value src1)884 mi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
885 {
886    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
887       return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0);
888 
889    /* Compute "not equal" by subtracting and storing the inverse zero bit */
890    return mi_math_binop(b, MI_ALU_SUB, src0, src1,
891                             MI_ALU_STOREINV, MI_ALU_ZF);
892 }
893 
894 static inline struct mi_value
mi_ult(struct mi_builder * b,struct mi_value src0,struct mi_value src1)895 mi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
896 {
897    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
898       return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0);
899 
900    /* Compute "less than" by subtracting and storing the carry bit */
901    return mi_math_binop(b, MI_ALU_SUB, src0, src1,
902                            MI_ALU_STORE, MI_ALU_CF);
903 }
904 
905 static inline struct mi_value
mi_uge(struct mi_builder * b,struct mi_value src0,struct mi_value src1)906 mi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
907 {
908    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
909       return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0);
910 
911    /* Compute "less than" by subtracting and storing the carry bit */
912    return mi_math_binop(b, MI_ALU_SUB, src0, src1,
913                            MI_ALU_STOREINV, MI_ALU_CF);
914 }
915 
916 static inline struct mi_value
mi_iand(struct mi_builder * b,struct mi_value src0,struct mi_value src1)917 mi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
918 {
919    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
920       return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1));
921 
922    return mi_math_binop(b, MI_ALU_AND, src0, src1,
923                            MI_ALU_STORE, MI_ALU_ACCU);
924 }
925 
926 static inline struct mi_value
mi_nz(struct mi_builder * b,struct mi_value src)927 mi_nz(struct mi_builder *b, struct mi_value src)
928 {
929    if (src.type == MI_VALUE_TYPE_IMM)
930       return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0);
931 
932    return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
933                            MI_ALU_STOREINV, MI_ALU_ZF);
934 }
935 
936 static inline struct mi_value
mi_z(struct mi_builder * b,struct mi_value src)937 mi_z(struct mi_builder *b, struct mi_value src)
938 {
939    if (src.type == MI_VALUE_TYPE_IMM)
940       return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0);
941 
942    return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
943                            MI_ALU_STORE, MI_ALU_ZF);
944 }
945 
946 static inline struct mi_value
mi_ior(struct mi_builder * b,struct mi_value src0,struct mi_value src1)947 mi_ior(struct mi_builder *b,
948        struct mi_value src0, struct mi_value src1)
949 {
950    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
951       return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1));
952 
953    return mi_math_binop(b, MI_ALU_OR, src0, src1,
954                            MI_ALU_STORE, MI_ALU_ACCU);
955 }
956 
957 #if GFX_VERx10 >= 125
958 static inline struct mi_value
mi_ishl(struct mi_builder * b,struct mi_value src0,struct mi_value src1)959 mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
960 {
961    if (src1.type == MI_VALUE_TYPE_IMM) {
962       assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
963       assert(mi_value_to_u64(src1) <= 32);
964    }
965 
966    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
967       return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1));
968 
969    return mi_math_binop(b, MI_ALU_SHL, src0, src1,
970                            MI_ALU_STORE, MI_ALU_ACCU);
971 }
972 
973 static inline struct mi_value
mi_ushr(struct mi_builder * b,struct mi_value src0,struct mi_value src1)974 mi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
975 {
976    if (src1.type == MI_VALUE_TYPE_IMM) {
977       assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
978       assert(mi_value_to_u64(src1) <= 32);
979    }
980 
981    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
982       return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1));
983 
984    return mi_math_binop(b, MI_ALU_SHR, src0, src1,
985                            MI_ALU_STORE, MI_ALU_ACCU);
986 }
987 
988 static inline struct mi_value
mi_ushr_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)989 mi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
990 {
991    if (shift == 0)
992       return src;
993 
994    if (shift >= 64)
995       return mi_imm(0);
996 
997    if (src.type == MI_VALUE_TYPE_IMM)
998       return mi_imm(mi_value_to_u64(src) >> shift);
999 
1000    struct mi_value res = mi_value_to_gpr(b, src);
1001 
1002    /* Annoyingly, we only have power-of-two shifts */
1003    while (shift) {
1004       int bit = u_bit_scan(&shift);
1005       assert(bit <= 5);
1006       res = mi_ushr(b, res, mi_imm(1ULL << bit));
1007    }
1008 
1009    return res;
1010 }
1011 
1012 static inline struct mi_value
mi_ishr(struct mi_builder * b,struct mi_value src0,struct mi_value src1)1013 mi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
1014 {
1015    if (src1.type == MI_VALUE_TYPE_IMM) {
1016       assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
1017       assert(mi_value_to_u64(src1) <= 32);
1018    }
1019 
1020    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
1021       return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1));
1022 
1023    return mi_math_binop(b, MI_ALU_SAR, src0, src1,
1024                             MI_ALU_STORE, MI_ALU_ACCU);
1025 }
1026 
1027 static inline struct mi_value
mi_ishr_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)1028 mi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1029 {
1030    if (shift == 0)
1031       return src;
1032 
1033    if (shift >= 64)
1034       return mi_imm(0);
1035 
1036    if (src.type == MI_VALUE_TYPE_IMM)
1037       return mi_imm((int64_t)mi_value_to_u64(src) >> shift);
1038 
1039    struct mi_value res = mi_value_to_gpr(b, src);
1040 
1041    /* Annoyingly, we only have power-of-two shifts */
1042    while (shift) {
1043       int bit = u_bit_scan(&shift);
1044       assert(bit <= 5);
1045       res = mi_ishr(b, res, mi_imm(1 << bit));
1046    }
1047 
1048    return res;
1049 }
1050 #endif /* if GFX_VERx10 >= 125 */
1051 
1052 static inline struct mi_value
mi_imul_imm(struct mi_builder * b,struct mi_value src,uint32_t N)1053 mi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N)
1054 {
1055    if (src.type == MI_VALUE_TYPE_IMM)
1056       return mi_imm(mi_value_to_u64(src) * N);
1057 
1058    if (N == 0) {
1059       mi_value_unref(b, src);
1060       return mi_imm(0);
1061    }
1062 
1063    if (N == 1)
1064       return src;
1065 
1066    src = mi_value_to_gpr(b, src);
1067 
1068    struct mi_value res = mi_value_ref(b, src);
1069 
1070    unsigned top_bit = 31 - __builtin_clz(N);
1071    for (int i = top_bit - 1; i >= 0; i--) {
1072       res = mi_iadd(b, res, mi_value_ref(b, res));
1073       if (N & (1 << i))
1074          res = mi_iadd(b, res, mi_value_ref(b, src));
1075    }
1076 
1077    mi_value_unref(b, src);
1078 
1079    return res;
1080 }
1081 
1082 static inline struct mi_value
mi_ishl_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)1083 mi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1084 {
1085    if (shift == 0)
1086       return src;
1087 
1088    if (shift >= 64)
1089       return mi_imm(0);
1090 
1091    if (src.type == MI_VALUE_TYPE_IMM)
1092       return mi_imm(mi_value_to_u64(src) << shift);
1093 
1094    struct mi_value res = mi_value_to_gpr(b, src);
1095 
1096 #if GFX_VERx10 >= 125
1097    /* Annoyingly, we only have power-of-two shifts */
1098    while (shift) {
1099       int bit = u_bit_scan(&shift);
1100       assert(bit <= 5);
1101       res = mi_ishl(b, res, mi_imm(1 << bit));
1102    }
1103 #else
1104    for (unsigned i = 0; i < shift; i++)
1105       res = mi_iadd(b, res, mi_value_ref(b, res));
1106 #endif
1107 
1108    return res;
1109 }
1110 
1111 static inline struct mi_value
mi_ushr32_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)1112 mi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1113 {
1114    if (shift == 0)
1115       return src;
1116 
1117    if (shift >= 64)
1118       return mi_imm(0);
1119 
1120    /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
1121     * of the result.
1122     */
1123    if (src.type == MI_VALUE_TYPE_IMM)
1124       return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX);
1125 
1126    if (shift > 32) {
1127       struct mi_value tmp = mi_new_gpr(b);
1128       _mi_copy_no_unref(b, mi_value_half(tmp, false),
1129                                mi_value_half(src, true));
1130       _mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0));
1131       mi_value_unref(b, src);
1132       src = tmp;
1133       shift -= 32;
1134    }
1135    assert(shift <= 32);
1136    struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift);
1137    struct mi_value dst = mi_new_gpr(b);
1138    _mi_copy_no_unref(b, mi_value_half(dst, false),
1139                             mi_value_half(tmp, true));
1140    _mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0));
1141    mi_value_unref(b, tmp);
1142    return dst;
1143 }
1144 
1145 static inline struct mi_value
mi_udiv32_imm(struct mi_builder * b,struct mi_value N,uint32_t D)1146 mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
1147 {
1148    if (N.type == MI_VALUE_TYPE_IMM) {
1149       assert(mi_value_to_u64(N) <= UINT32_MAX);
1150       return mi_imm(mi_value_to_u64(N) / D);
1151    }
1152 
1153    /* We implicitly assume that N is only a 32-bit value */
1154    if (D == 0) {
1155       /* This is invalid but we should do something */
1156       return mi_imm(0);
1157    } else if (util_is_power_of_two_or_zero(D)) {
1158       return mi_ushr32_imm(b, N, util_logbase2(D));
1159    } else {
1160       struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
1161       assert(m.multiplier <= UINT32_MAX);
1162 
1163       if (m.pre_shift)
1164          N = mi_ushr32_imm(b, N, m.pre_shift);
1165 
1166       /* Do the 32x32 multiply  into gpr0 */
1167       N = mi_imul_imm(b, N, m.multiplier);
1168 
1169       if (m.increment)
1170          N = mi_iadd(b, N, mi_imm(m.multiplier));
1171 
1172       N = mi_ushr32_imm(b, N, 32);
1173 
1174       if (m.post_shift)
1175          N = mi_ushr32_imm(b, N, m.post_shift);
1176 
1177       return N;
1178    }
1179 }
1180 
1181 #endif /* MI_MATH section */
1182 
1183 /* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
1184 #if MI_BUILDER_CAN_WRITE_BATCH
1185 
1186 struct mi_address_token {
1187    /* Pointers to address memory fields in the batch. */
1188    uint64_t *ptrs[2];
1189 };
1190 
1191 static inline struct mi_address_token
mi_store_address(struct mi_builder * b,struct mi_value addr_reg)1192 mi_store_address(struct mi_builder *b, struct mi_value addr_reg)
1193 {
1194    mi_builder_flush_math(b);
1195 
1196    assert(addr_reg.type == MI_VALUE_TYPE_REG64);
1197 
1198    struct mi_address_token token = {};
1199 
1200    for (unsigned i = 0; i < 2; i++) {
1201       mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
1202          srm.RegisterAddress = addr_reg.reg + (i * 4);
1203 
1204          const unsigned addr_dw =
1205             GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
1206          token.ptrs[i] = (void *)_dst + addr_dw;
1207       }
1208    }
1209 
1210    mi_value_unref(b, addr_reg);
1211    return token;
1212 }
1213 
1214 static inline void
mi_self_mod_barrier(struct mi_builder * b,unsigned cs_prefetch_size)1215 mi_self_mod_barrier(struct mi_builder *b, unsigned cs_prefetch_size)
1216 {
1217    /* First make sure all the memory writes from previous modifying commands
1218     * have landed. We want to do this before going through the CS cache,
1219     * otherwise we could be fetching memory that hasn't been written to yet.
1220     */
1221    mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
1222       pc.CommandStreamerStallEnable = true;
1223    }
1224    /* Documentation says Gfx11+ should be able to invalidate the command cache
1225     * but experiment show it doesn't work properly, so for now just get over
1226     * the CS prefetch.
1227     */
1228    for (uint32_t i = 0; i < (cs_prefetch_size / 4); i++)
1229       mi_builder_emit(b, GENX(MI_NOOP), noop);
1230 }
1231 
1232 static inline void
_mi_resolve_address_token(struct mi_builder * b,struct mi_address_token token,void * batch_location)1233 _mi_resolve_address_token(struct mi_builder *b,
1234                           struct mi_address_token token,
1235                           void *batch_location)
1236 {
1237    __gen_address_type addr = __gen_get_batch_address(b->user_data,
1238                                                     batch_location);
1239    uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location,
1240                                                   addr, 0);
1241    *(token.ptrs[0]) = addr_addr_u64;
1242    *(token.ptrs[1]) = addr_addr_u64 + 4;
1243 }
1244 
1245 #endif /* MI_BUILDER_CAN_WRITE_BATCH */
1246 
1247 #if GFX_VERx10 >= 125
1248 
1249 /*
1250  * Indirect load/store.  Only available on XE_HP+
1251  */
1252 
1253 MUST_CHECK static inline struct mi_value
mi_load_mem64_offset(struct mi_builder * b,__gen_address_type addr,struct mi_value offset)1254 mi_load_mem64_offset(struct mi_builder *b,
1255                      __gen_address_type addr, struct mi_value offset)
1256 {
1257    uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1258    struct mi_value addr_val = mi_imm(addr_u64);
1259 
1260    struct mi_value dst = mi_new_gpr(b);
1261 
1262    uint32_t dw[5];
1263    dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1264    dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1265    dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1266    dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU);
1267    dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0);
1268    _mi_builder_push_math(b, dw, 5);
1269 
1270    mi_value_unref(b, addr_val);
1271    mi_value_unref(b, offset);
1272 
1273    return dst;
1274 }
1275 
1276 static inline void
mi_store_mem64_offset(struct mi_builder * b,__gen_address_type addr,struct mi_value offset,struct mi_value data)1277 mi_store_mem64_offset(struct mi_builder *b,
1278                           __gen_address_type addr, struct mi_value offset,
1279                           struct mi_value data)
1280 {
1281    uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1282    struct mi_value addr_val = mi_imm(addr_u64);
1283 
1284    data = mi_value_to_gpr(b, mi_resolve_invert(b, data));
1285 
1286    uint32_t dw[5];
1287    dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1288    dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1289    dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1290    dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data));
1291    dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0);
1292    _mi_builder_push_math(b, dw, 5);
1293 
1294    mi_value_unref(b, addr_val);
1295    mi_value_unref(b, offset);
1296    mi_value_unref(b, data);
1297 
1298    /* This is the only math case which has side-effects outside of regular
1299     * registers to flush math afterwards so we don't confuse anyone.
1300     */
1301    mi_builder_flush_math(b);
1302 }
1303 
1304 /*
1305  * Control-flow Section.  Only available on XE_HP+
1306  */
1307 
1308 struct _mi_goto {
1309    bool predicated;
1310    void *mi_bbs;
1311 };
1312 
1313 struct mi_goto_target {
1314    bool placed;
1315    unsigned num_gotos;
1316    struct _mi_goto gotos[8];
1317    __gen_address_type addr;
1318 };
1319 
1320 #define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {})
1321 
1322 #define MI_BUILDER_MI_PREDICATE_RESULT_num  0x2418
1323 
1324 static inline void
mi_goto_if(struct mi_builder * b,struct mi_value cond,struct mi_goto_target * t)1325 mi_goto_if(struct mi_builder *b, struct mi_value cond,
1326            struct mi_goto_target *t)
1327 {
1328    /* First, set up the predicate, if any */
1329    bool predicated;
1330    if (cond.type == MI_VALUE_TYPE_IMM) {
1331       /* If it's an immediate, the goto either doesn't happen or happens
1332        * unconditionally.
1333        */
1334       if (mi_value_to_u64(cond) == 0)
1335          return;
1336 
1337       assert(mi_value_to_u64(cond) == ~0ull);
1338       predicated = false;
1339    } else if (mi_value_is_reg(cond) &&
1340               cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) {
1341       /* If it's MI_PREDICATE_RESULT, we use whatever predicate the client
1342        * provided us with
1343        */
1344       assert(cond.type == MI_VALUE_TYPE_REG32);
1345       predicated = true;
1346    } else {
1347       mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond);
1348       predicated = true;
1349    }
1350 
1351    if (predicated) {
1352       mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1353          sp.PredicateEnable = NOOPOnResultClear;
1354       }
1355    }
1356    if (t->placed) {
1357       mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) {
1358          bbs.PredicationEnable         = predicated;
1359          bbs.AddressSpaceIndicator     = ASI_PPGTT;
1360          bbs.BatchBufferStartAddress   = t->addr;
1361       }
1362    } else {
1363       assert(t->num_gotos < ARRAY_SIZE(t->gotos));
1364       struct _mi_goto g = {
1365          .predicated = predicated,
1366          .mi_bbs = __gen_get_batch_dwords(b->user_data,
1367                                           GENX(MI_BATCH_BUFFER_START_length)),
1368       };
1369       memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length));
1370       t->gotos[t->num_gotos++] = g;
1371    }
1372    if (predicated) {
1373       mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1374          sp.PredicateEnable = NOOPNever;
1375       }
1376    }
1377 }
1378 
1379 static inline void
mi_goto(struct mi_builder * b,struct mi_goto_target * t)1380 mi_goto(struct mi_builder *b, struct mi_goto_target *t)
1381 {
1382    mi_goto_if(b, mi_imm(-1), t);
1383 }
1384 
1385 static inline void
mi_goto_target(struct mi_builder * b,struct mi_goto_target * t)1386 mi_goto_target(struct mi_builder *b, struct mi_goto_target *t)
1387 {
1388    mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1389       sp.PredicateEnable = NOOPNever;
1390       t->addr = __gen_get_batch_address(b->user_data, _dst);
1391    }
1392    t->placed = true;
1393 
1394    struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) };
1395    bbs.AddressSpaceIndicator     = ASI_PPGTT;
1396    bbs.BatchBufferStartAddress   = t->addr;
1397 
1398    for (unsigned i = 0; i < t->num_gotos; i++) {
1399       bbs.PredicationEnable = t->gotos[i].predicated;
1400       GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs);
1401    }
1402 }
1403 
1404 static inline struct mi_goto_target
mi_goto_target_init_and_place(struct mi_builder * b)1405 mi_goto_target_init_and_place(struct mi_builder *b)
1406 {
1407    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1408    mi_goto_target(b, &t);
1409    return t;
1410 }
1411 
1412 #define mi_loop(b) \
1413    for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \
1414         __continue = mi_goto_target_init_and_place(b); !__break.placed; \
1415         mi_goto(b, &__continue), mi_goto_target(b, &__break))
1416 
1417 #define mi_break(b) mi_goto(b, &__break)
1418 #define mi_break_if(b, cond) mi_goto_if(b, cond, &__break)
1419 #define mi_continue(b) mi_goto(b, &__continue)
1420 #define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue)
1421 
1422 #endif /* GFX_VERx10 >= 125 */
1423 
1424 #endif /* MI_BUILDER_H */
1425