• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 #ifndef MME_BUILDER_H
6 #define MME_BUILDER_H
7 
8 #include "mme_value.h"
9 #include "mme_tu104.h"
10 #include "nv_device_info.h"
11 
12 #include "util/bitscan.h"
13 #include "util/enum_operators.h"
14 
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18 
19 enum mme_alu_op {
20    MME_ALU_OP_ADD,
21    MME_ALU_OP_ADDC,
22    MME_ALU_OP_SUB,
23    MME_ALU_OP_SUBB,
24    MME_ALU_OP_MUL,
25    MME_ALU_OP_MULH,
26    MME_ALU_OP_MULU,
27    MME_ALU_OP_CLZ,
28    MME_ALU_OP_SLL,
29    MME_ALU_OP_SRL,
30    MME_ALU_OP_SRA,
31    MME_ALU_OP_AND,
32    MME_ALU_OP_NAND,
33    MME_ALU_OP_OR,
34    MME_ALU_OP_XOR,
35    MME_ALU_OP_SLT,
36    MME_ALU_OP_SLTU,
37    MME_ALU_OP_SLE,
38    MME_ALU_OP_SLEU,
39    MME_ALU_OP_SEQ,
40    MME_ALU_OP_DREAD,
41    MME_ALU_OP_DWRITE,
42 };
43 
44 enum mme_cmp_op {
45    MME_CMP_OP_LT,
46    MME_CMP_OP_LTU,
47    MME_CMP_OP_LE,
48    MME_CMP_OP_LEU,
49    MME_CMP_OP_EQ,
50 };
51 
52 enum mme_cf_type {
53    MME_CF_TYPE_IF,
54    MME_CF_TYPE_LOOP,
55    MME_CF_TYPE_WHILE,
56 };
57 
58 struct mme_cf {
59    enum mme_cf_type type;
60    uint16_t start_ip;
61 };
62 
63 struct mme_builder;
64 
65 #include "mme_tu104_builder.h"
66 #include "mme_fermi_builder.h"
67 
68 #define MME_CLS_FERMI 0x9000
69 #define MME_CLS_TURING 0xc500
70 
71 struct mme_builder {
72    const struct nv_device_info *devinfo;
73    struct mme_reg_alloc reg_alloc;
74    union {
75       struct mme_tu104_builder tu104;
76       struct mme_fermi_builder fermi;
77    };
78 };
79 
80 static inline void
mme_builder_init(struct mme_builder * b,const struct nv_device_info * dev)81 mme_builder_init(struct mme_builder *b, const struct nv_device_info *dev)
82 {
83    memset(b, 0, sizeof(*b));
84    b->devinfo = dev;
85 
86    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
87       mme_tu104_builder_init(b);
88    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
89       mme_fermi_builder_init(b);
90    else
91       unreachable("Unsupported GPU class");
92 }
93 
94 static inline uint32_t *
mme_builder_finish(struct mme_builder * b,size_t * size_out)95 mme_builder_finish(struct mme_builder *b, size_t *size_out)
96 {
97    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
98       return mme_tu104_builder_finish(&b->tu104, size_out);
99    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
100       return mme_fermi_builder_finish(&b->fermi, size_out);
101    else
102       unreachable("Unsupported GPU class");
103 }
104 
105 static inline void
mme_builder_dump(struct mme_builder * b,FILE * fp)106 mme_builder_dump(struct mme_builder *b, FILE *fp)
107 {
108    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
109       mme_tu104_builder_dump(b, fp);
110    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
111       mme_fermi_builder_dump(b, fp);
112    else
113       unreachable("Unsupported GPU class");
114 }
115 
116 static inline struct mme_value
mme_alloc_reg(struct mme_builder * b)117 mme_alloc_reg(struct mme_builder *b)
118 {
119    return mme_reg_alloc_alloc(&b->reg_alloc);
120 }
121 
122 static inline void
mme_realloc_reg(struct mme_builder * b,struct mme_value value)123 mme_realloc_reg(struct mme_builder *b, struct mme_value value)
124 {
125    return mme_reg_alloc_realloc(&b->reg_alloc, value);
126 }
127 
128 static inline void
mme_free_reg(struct mme_builder * b,struct mme_value val)129 mme_free_reg(struct mme_builder *b, struct mme_value val)
130 {
131    mme_reg_alloc_free(&b->reg_alloc, val);
132 }
133 
134 static inline void
mme_free_reg64(struct mme_builder * b,struct mme_value64 val)135 mme_free_reg64(struct mme_builder *b, struct mme_value64 val)
136 {
137    mme_reg_alloc_free(&b->reg_alloc, val.lo);
138    mme_reg_alloc_free(&b->reg_alloc, val.hi);
139 }
140 
141 static inline void
mme_alu_to(struct mme_builder * b,struct mme_value dst,enum mme_alu_op op,struct mme_value x,struct mme_value y)142 mme_alu_to(struct mme_builder *b,
143            struct mme_value dst,
144            enum mme_alu_op op,
145            struct mme_value x,
146            struct mme_value y)
147 {
148    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
149       mme_tu104_alu_to(b, dst, op, x, y);
150    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
151       mme_fermi_alu_to(b, dst, op, x, y);
152    else
153       unreachable("Unsupported GPU class");
154 }
155 
156 static inline struct mme_value
mme_alu(struct mme_builder * b,enum mme_alu_op op,struct mme_value x,struct mme_value y)157 mme_alu(struct mme_builder *b,
158         enum mme_alu_op op,
159         struct mme_value x,
160         struct mme_value y)
161 {
162    struct mme_value dst = mme_alloc_reg(b);
163    mme_alu_to(b, dst, op, x, y);
164    return dst;
165 }
166 
167 static inline void
mme_alu_no_dst(struct mme_builder * b,enum mme_alu_op op,struct mme_value x,struct mme_value y)168 mme_alu_no_dst(struct mme_builder *b,
169                enum mme_alu_op op,
170                struct mme_value x,
171                struct mme_value y)
172 {
173    mme_alu_to(b, mme_zero(), op, x, y);
174 }
175 
176 static inline void
mme_alu64_to(struct mme_builder * b,struct mme_value64 dst,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)177 mme_alu64_to(struct mme_builder *b,
178              struct mme_value64 dst,
179              enum mme_alu_op op_lo,
180              enum mme_alu_op op_hi,
181              struct mme_value64 x,
182              struct mme_value64 y)
183 {
184    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
185       mme_tu104_alu64_to(b, dst, op_lo, op_hi, x, y);
186    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
187       mme_fermi_alu64_to(b, dst, op_lo, op_hi, x, y);
188    else
189       unreachable("Unsupported GPU class");
190 }
191 
192 static inline struct mme_value64
mme_alu64(struct mme_builder * b,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)193 mme_alu64(struct mme_builder *b,
194           enum mme_alu_op op_lo, enum mme_alu_op op_hi,
195           struct mme_value64 x, struct mme_value64 y)
196 {
197    struct mme_value64 dst = {
198       mme_alloc_reg(b),
199       mme_alloc_reg(b),
200    };
201    mme_alu64_to(b, dst, op_lo, op_hi, x, y);
202    return dst;
203 }
204 
205 #define MME_DEF_ALU1(op, OP)                                \
206 static inline void                                          \
207 mme_##op##_to(struct mme_builder *b, struct mme_value dst,  \
208               struct mme_value x)                           \
209 {                                                           \
210    mme_alu_to(b, dst, MME_ALU_OP_##OP, x, mme_zero());      \
211 }                                                           \
212                                                             \
213 static inline struct mme_value                              \
214 mme_##op(struct mme_builder *b,                             \
215          struct mme_value x)                                \
216 {                                                           \
217    return mme_alu(b, MME_ALU_OP_##OP, x, mme_zero());       \
218 }
219 
220 #define MME_DEF_ALU2(op, OP)                                \
221 static inline void                                          \
222 mme_##op##_to(struct mme_builder *b, struct mme_value dst,  \
223               struct mme_value x, struct mme_value y)       \
224 {                                                           \
225    mme_alu_to(b, dst, MME_ALU_OP_##OP, x, y);               \
226 }                                                           \
227                                                             \
228 static inline struct mme_value                              \
229 mme_##op(struct mme_builder *b,                             \
230          struct mme_value x, struct mme_value y)            \
231 {                                                           \
232    return mme_alu(b, MME_ALU_OP_##OP, x, y);                \
233 }
234 
235 MME_DEF_ALU1(mov,    ADD);
236 MME_DEF_ALU2(add,    ADD);
237 MME_DEF_ALU2(sub,    SUB);
238 MME_DEF_ALU2(mul,    MUL);
239 MME_DEF_ALU1(clz,    CLZ);
240 MME_DEF_ALU2(sll,    SLL);
241 MME_DEF_ALU2(srl,    SRL);
242 MME_DEF_ALU2(sra,    SRA);
243 MME_DEF_ALU2(and,    AND);
244 MME_DEF_ALU2(nand,   NAND);
245 MME_DEF_ALU2(or,     OR);
246 MME_DEF_ALU2(xor,    XOR);
247 MME_DEF_ALU2(slt,    SLT);
248 MME_DEF_ALU2(sltu,   SLTU);
249 MME_DEF_ALU2(sle,    SLE);
250 MME_DEF_ALU2(sleu,   SLEU);
251 MME_DEF_ALU2(seq,    SEQ);
252 MME_DEF_ALU1(dread,  DREAD);
253 
254 #undef MME_DEF_ALU1
255 #undef MME_DEF_ALU2
256 
257 static inline void
mme_mov64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x)258 mme_mov64_to(struct mme_builder *b, struct mme_value64 dst,
259              struct mme_value64 x)
260 {
261    mme_alu64_to(b, dst, MME_ALU_OP_ADD, MME_ALU_OP_ADD, x, mme_imm64(0));
262 }
263 
264 static inline struct mme_value64
mme_mov64(struct mme_builder * b,struct mme_value64 x)265 mme_mov64(struct mme_builder *b, struct mme_value64 x)
266 {
267    return mme_alu64(b, MME_ALU_OP_ADD, MME_ALU_OP_ADD, x, mme_imm64(0));
268 }
269 
270 static inline void
mme_add64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x,struct mme_value64 y)271 mme_add64_to(struct mme_builder *b, struct mme_value64 dst,
272              struct mme_value64 x, struct mme_value64 y)
273 {
274    mme_alu64_to(b, dst, MME_ALU_OP_ADD, MME_ALU_OP_ADDC, x, y);
275 }
276 
277 static inline struct mme_value64
mme_add64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)278 mme_add64(struct mme_builder *b,
279           struct mme_value64 x, struct mme_value64 y)
280 {
281    return mme_alu64(b, MME_ALU_OP_ADD, MME_ALU_OP_ADDC, x, y);
282 }
283 
284 static inline void
mme_sub64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x,struct mme_value64 y)285 mme_sub64_to(struct mme_builder *b, struct mme_value64 dst,
286              struct mme_value64 x, struct mme_value64 y)
287 {
288    mme_alu64_to(b, dst, MME_ALU_OP_SUB, MME_ALU_OP_SUBB, x, y);
289 }
290 
291 static inline struct mme_value64
mme_sub64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)292 mme_sub64(struct mme_builder *b,
293           struct mme_value64 x, struct mme_value64 y)
294 {
295    return mme_alu64(b, MME_ALU_OP_SUB, MME_ALU_OP_SUBB, x, y);
296 }
297 
298 static inline void
mme_imul_32x32_64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value x,struct mme_value y)299 mme_imul_32x32_64_to(struct mme_builder *b, struct mme_value64 dst,
300                      struct mme_value x, struct mme_value y)
301 {
302    mme_alu64_to(b, dst, MME_ALU_OP_MUL, MME_ALU_OP_MULH,
303                 mme_value64(x, mme_zero()),
304                 mme_value64(y, mme_zero()));
305 }
306 
307 static inline struct mme_value64
mme_imul_32x32_64(struct mme_builder * b,struct mme_value x,struct mme_value y)308 mme_imul_32x32_64(struct mme_builder *b,
309                   struct mme_value x, struct mme_value y)
310 {
311    return mme_alu64(b, MME_ALU_OP_MUL, MME_ALU_OP_MULH,
312                     mme_value64(x, mme_zero()),
313                     mme_value64(y, mme_zero()));
314 }
315 
316 static inline void
mme_umul_32x32_64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value x,struct mme_value y)317 mme_umul_32x32_64_to(struct mme_builder *b, struct mme_value64 dst,
318                      struct mme_value x, struct mme_value y)
319 {
320    mme_alu64_to(b, dst, MME_ALU_OP_MULU, MME_ALU_OP_MULH,
321                 mme_value64(x, mme_zero()),
322                 mme_value64(y, mme_zero()));
323 }
324 
325 static inline struct mme_value64
mme_umul_32x32_64(struct mme_builder * b,struct mme_value x,struct mme_value y)326 mme_umul_32x32_64(struct mme_builder *b,
327                   struct mme_value x, struct mme_value y)
328 {
329    return mme_alu64(b, MME_ALU_OP_MULU, MME_ALU_OP_MULH,
330                     mme_value64(x, mme_zero()),
331                     mme_value64(y, mme_zero()));
332 }
333 
334 static inline struct mme_value64
mme_mul64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)335 mme_mul64(struct mme_builder *b,
336           struct mme_value64 x, struct mme_value64 y)
337 {
338    if (mme_is_zero(x.hi) && mme_is_zero(y.hi))
339       return mme_umul_32x32_64(b, x.lo, y.lo);
340 
341    struct mme_value64 dst = mme_umul_32x32_64(b, x.lo, y.lo);
342    struct mme_value tmp = mme_alloc_reg(b);
343 
344    mme_mul_to(b, tmp, x.lo, y.hi);
345    mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
346 
347    mme_mul_to(b, tmp, x.hi, y.lo);
348    mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
349 
350    mme_free_reg(b, tmp);
351 
352    return dst;
353 }
354 
355 static inline void
mme_bfe_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value pos,uint8_t bits)356 mme_bfe_to(struct mme_builder *b, struct mme_value dst,
357            struct mme_value x, struct mme_value pos, uint8_t bits)
358 {
359    if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
360       mme_srl_to(b, dst, x, pos);
361       mme_and_to(b, dst, dst, mme_imm(BITFIELD_MASK(bits)));
362    } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
363       mme_fermi_bfe_to(b, dst, x, pos, bits);
364    } else {
365       unreachable("Unsupported GPU class");
366    }
367 }
368 
369 static inline struct mme_value
mme_bfe(struct mme_builder * b,struct mme_value x,struct mme_value pos,uint8_t bits)370 mme_bfe(struct mme_builder *b,
371         struct mme_value x, struct mme_value pos, uint8_t bits)
372 {
373    struct mme_value dst = mme_alloc_reg(b);
374    mme_bfe_to(b, dst, x, pos, bits);
375    return dst;
376 }
377 
378 static inline void
mme_merge_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)379 mme_merge_to(struct mme_builder *b, struct mme_value dst,
380              struct mme_value x, struct mme_value y,
381              uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
382 {
383    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
384       mme_tu104_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
385   else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
386       mme_fermi_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
387    else
388       unreachable("Unsupported GPU class");
389 }
390 
391 static inline struct mme_value
mme_merge(struct mme_builder * b,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)392 mme_merge(struct mme_builder *b,
393           struct mme_value x, struct mme_value y,
394           uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
395 {
396    struct mme_value dst = mme_alloc_reg(b);
397    mme_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
398    return dst;
399 }
400 
401 #define mme_set_field(b, x, FIELD, val) \
402    mme_merge_to(b, x, x, val, DRF_LO(FIELD), DRF_BITS(FIELD), 0)
403 
404 #define mme_set_field_enum(b, x, FIELD, ENUM) \
405    mme_set_field(b, x, FIELD, mme_imm(FIELD##_##ENUM)) \
406 
407 static inline void
mme_state_arr_to(struct mme_builder * b,struct mme_value dst,uint16_t state,struct mme_value index)408 mme_state_arr_to(struct mme_builder *b, struct mme_value dst,
409                  uint16_t state, struct mme_value index)
410 {
411    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
412       mme_tu104_state_arr_to(b, dst, state, index);
413    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
414       mme_fermi_state_arr_to(b, dst, state, index);
415    else
416       unreachable("Unsupported GPU class");
417 }
418 
419 static inline void
mme_state_to(struct mme_builder * b,struct mme_value dst,uint16_t state)420 mme_state_to(struct mme_builder *b, struct mme_value dst,
421              uint16_t state)
422 {
423    mme_state_arr_to(b, dst, state, mme_zero());
424 }
425 
426 static inline struct mme_value
mme_state_arr(struct mme_builder * b,uint16_t state,struct mme_value index)427 mme_state_arr(struct mme_builder *b,
428               uint16_t state, struct mme_value index)
429 {
430    struct mme_value dst = mme_alloc_reg(b);
431    mme_state_arr_to(b, dst, state, index);
432    return dst;
433 }
434 
435 static inline struct mme_value
mme_state(struct mme_builder * b,uint16_t state)436 mme_state(struct mme_builder *b,
437           uint16_t state)
438 {
439    struct mme_value dst = mme_alloc_reg(b);
440    mme_state_to(b, dst, state);
441    return dst;
442 }
443 
444 static inline void
mme_dwrite(struct mme_builder * b,struct mme_value idx,struct mme_value val)445 mme_dwrite(struct mme_builder *b,
446            struct mme_value idx, struct mme_value val)
447 {
448    mme_alu_no_dst(b, MME_ALU_OP_DWRITE, idx, val);
449 }
450 
451 static inline void
mme_load_to(struct mme_builder * b,struct mme_value dst)452 mme_load_to(struct mme_builder *b, struct mme_value dst)
453 {
454    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
455       mme_tu104_load_to(b, dst);
456    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
457       mme_fermi_load_to(b, dst);
458    else
459       unreachable("Unsupported GPU class");
460 }
461 
462 static inline struct mme_value
mme_tu104_load(struct mme_builder * b)463 mme_tu104_load(struct mme_builder *b)
464 {
465    struct mme_value dst = mme_alloc_reg(b);
466    mme_tu104_load_to(b, dst);
467    return dst;
468 }
469 
470 static inline struct mme_value
mme_load(struct mme_builder * b)471 mme_load(struct mme_builder *b)
472 {
473    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
474       return mme_tu104_load(b);
475    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
476       return mme_fermi_load(b);
477    else
478       unreachable("Unsupported GPU class");
479 }
480 
481 static inline struct mme_value64
mme_load_addr64(struct mme_builder * b)482 mme_load_addr64(struct mme_builder *b)
483 {
484    struct mme_value hi = mme_load(b);
485    struct mme_value lo = mme_load(b);
486    return mme_value64(lo, hi);
487 }
488 
489 static inline void
mme_mthd_arr(struct mme_builder * b,uint16_t mthd,struct mme_value index)490 mme_mthd_arr(struct mme_builder *b, uint16_t mthd,
491              struct mme_value index)
492 {
493    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
494       mme_tu104_mthd(b, mthd, index);
495    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
496       mme_fermi_mthd_arr(b, mthd, index);
497    else
498       unreachable("Unsupported GPU class");
499 }
500 
501 static inline void
mme_mthd(struct mme_builder * b,uint16_t mthd)502 mme_mthd(struct mme_builder *b, uint16_t mthd)
503 {
504    mme_mthd_arr(b, mthd, mme_zero());
505 }
506 
507 static inline void
mme_emit(struct mme_builder * b,struct mme_value data)508 mme_emit(struct mme_builder *b,
509          struct mme_value data)
510 {
511    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
512       mme_tu104_emit(b, data);
513    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
514       mme_fermi_emit(b, data);
515    else
516       unreachable("Unsupported GPU class");
517 }
518 
519 static inline void
mme_emit_addr64(struct mme_builder * b,struct mme_value64 addr)520 mme_emit_addr64(struct mme_builder *b, struct mme_value64 addr)
521 {
522    mme_emit(b, addr.hi);
523    mme_emit(b, addr.lo);
524 }
525 
526 static inline void
mme_tu104_read_fifoed(struct mme_builder * b,struct mme_value64 addr,struct mme_value count)527 mme_tu104_read_fifoed(struct mme_builder *b,
528                       struct mme_value64 addr,
529                       struct mme_value count)
530 {
531    mme_mthd(b, 0x0550 /* NVC597_SET_MME_MEM_ADDRESS_A */);
532    mme_emit_addr64(b, addr);
533 
534    mme_mthd(b, 0x0560 /* NVC597_MME_DMA_READ_FIFOED */);
535    mme_emit(b, count);
536 
537    mme_tu104_load_barrier(b);
538 }
539 
540 static inline void
mme_start_loop(struct mme_builder * b,struct mme_value count)541 mme_start_loop(struct mme_builder *b, struct mme_value count)
542 {
543    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
544       mme_tu104_start_loop(b, count);
545    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
546       mme_fermi_start_loop(b, count);
547    else
548       unreachable("Unsupported GPU class");
549 }
550 
551 static inline void
mme_end_loop(struct mme_builder * b)552 mme_end_loop(struct mme_builder *b)
553 {
554    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
555       mme_tu104_end_loop(b);
556    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
557       mme_fermi_end_loop(b);
558    else
559       unreachable("Unsupported GPU class");
560 }
561 
562 #define mme_loop(b, count) \
563    for (bool run = (mme_start_loop((b), count), true); run; \
564         run = false, mme_end_loop(b))
565 
566 #define MME_DEF_START_IF(op, OP, if_true)                         \
567 static inline void                                                \
568 mme_start_if_##op(struct mme_builder *b,                          \
569                   struct mme_value x, struct mme_value y)         \
570 {                                                                 \
571    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)                   \
572       mme_tu104_start_if(b, MME_CMP_OP_##OP, if_true, x, y);      \
573    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)               \
574       mme_fermi_start_if(b, MME_CMP_OP_##OP, if_true, x, y);      \
575    else                                                           \
576       unreachable("Unsupported GPU class");                       \
577 }
578 
MME_DEF_START_IF(ilt,LT,true)579 MME_DEF_START_IF(ilt,   LT,  true)
580 MME_DEF_START_IF(ult,   LTU, true)
581 MME_DEF_START_IF(ile,   LE,  true)
582 MME_DEF_START_IF(ule,   LEU, true)
583 MME_DEF_START_IF(ieq,   EQ,  true)
584 MME_DEF_START_IF(ige,   LT,  false)
585 MME_DEF_START_IF(uge,   LTU, false)
586 MME_DEF_START_IF(igt,   LE,  false)
587 MME_DEF_START_IF(ugt,   LEU, false)
588 MME_DEF_START_IF(ine,   EQ,  false)
589 
590 #undef MME_DEF_START_IF
591 
592 static inline void
593 mme_end_if(struct mme_builder *b)
594 {
595    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
596       mme_tu104_end_if(b);
597    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
598       mme_fermi_end_if(b);
599    else
600       unreachable("Unsupported GPU class");
601 }
602 
603 #define mme_if(b, cmp, x, y) \
604    for (bool run = (mme_start_if_##cmp((b), x, y), true); run; \
605         run = false, mme_end_if(b))
606 
607 static inline void
mme_start_while(struct mme_builder * b)608 mme_start_while(struct mme_builder *b)
609 {
610    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
611       mme_tu104_start_while(b);
612    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
613       mme_fermi_start_while(b);
614    else
615       unreachable("Unsupported GPU class");
616 }
617 
618 #define MME_DEF_END_WHILE(op, OP, if_true)                        \
619 static inline void                                                \
620 mme_end_while_##op(struct mme_builder *b,                         \
621                    struct mme_value x, struct mme_value y)        \
622 {                                                                 \
623    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)                   \
624       mme_tu104_end_while(b, MME_CMP_OP_##OP, if_true, x, y);     \
625    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)               \
626       mme_fermi_end_while(b, MME_CMP_OP_##OP, if_true, x, y);     \
627    else                                                           \
628       unreachable("Unsupported GPU class");                       \
629 }
630 
MME_DEF_END_WHILE(ilt,LT,true)631 MME_DEF_END_WHILE(ilt,   LT,  true)
632 MME_DEF_END_WHILE(ult,   LTU, true)
633 MME_DEF_END_WHILE(ile,   LE,  true)
634 MME_DEF_END_WHILE(ule,   LEU, true)
635 MME_DEF_END_WHILE(ieq,   EQ,  true)
636 MME_DEF_END_WHILE(ige,   LT,  false)
637 MME_DEF_END_WHILE(uge,   LTU, false)
638 MME_DEF_END_WHILE(igt,   LE,  false)
639 MME_DEF_END_WHILE(ugt,   LEU, false)
640 MME_DEF_END_WHILE(ine,   EQ,  false)
641 
642 #define mme_while(b, cmp, x, y) \
643    for (bool run = (mme_start_while(b), true); run; \
644         run = false, mme_end_while_##cmp((b), x, y))
645 
646 #define MME_DEF_EXIT(op, OP, if_true)                             \
647 static inline void                                                \
648 mme_exit_if_##op(struct mme_builder *b,                           \
649                  struct mme_value x, struct mme_value y)          \
650 {                                                                 \
651    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)                   \
652       mme_tu104_exit_if(b, MME_CMP_OP_##OP, if_true, x, y);       \
653    else                                                           \
654       unreachable("Unsupported GPU class");                       \
655 }
656 
657 MME_DEF_EXIT(ilt,   LT,  true)
658 MME_DEF_EXIT(ult,   LTU, true)
659 MME_DEF_EXIT(ile,   LE,  true)
660 MME_DEF_EXIT(ule,   LEU, true)
661 MME_DEF_EXIT(ieq,   EQ,  true)
662 MME_DEF_EXIT(ige,   LT,  false)
663 MME_DEF_EXIT(uge,   LTU, false)
664 MME_DEF_EXIT(igt,   LE,  false)
665 MME_DEF_EXIT(ugt,   LEU, false)
666 MME_DEF_EXIT(ine,   EQ,  false)
667 
668 #undef MME_DEF_EXIT
669 
670 #define mme_exit_if(b, cmp, x, y) \
671    mme_exit_if_##cmp(b, x, y)
672 
673 static inline void
674 mme_exit(struct mme_builder *b)
675 {
676    mme_exit_if_ieq(b, mme_zero(), mme_zero());
677 }
678 
679 #ifdef __cplusplus
680 }
681 #endif
682 
683 #endif /* MME_BUILDER_H */
684 
685