1 /*
2 * Copyright © 2022 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5 #ifndef MME_BUILDER_H
6 #define MME_BUILDER_H
7
8 #include "mme_value.h"
9 #include "mme_tu104.h"
10 #include "nv_device_info.h"
11
12 #include "util/bitscan.h"
13 #include "util/enum_operators.h"
14
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18
19 enum mme_alu_op {
20 MME_ALU_OP_ADD,
21 MME_ALU_OP_ADDC,
22 MME_ALU_OP_SUB,
23 MME_ALU_OP_SUBB,
24 MME_ALU_OP_MUL,
25 MME_ALU_OP_MULH,
26 MME_ALU_OP_MULU,
27 MME_ALU_OP_CLZ,
28 MME_ALU_OP_SLL,
29 MME_ALU_OP_SRL,
30 MME_ALU_OP_SRA,
31 MME_ALU_OP_AND,
32 MME_ALU_OP_NAND,
33 MME_ALU_OP_OR,
34 MME_ALU_OP_XOR,
35 MME_ALU_OP_SLT,
36 MME_ALU_OP_SLTU,
37 MME_ALU_OP_SLE,
38 MME_ALU_OP_SLEU,
39 MME_ALU_OP_SEQ,
40 MME_ALU_OP_DREAD,
41 MME_ALU_OP_DWRITE,
42 };
43
44 enum mme_cmp_op {
45 MME_CMP_OP_LT,
46 MME_CMP_OP_LTU,
47 MME_CMP_OP_LE,
48 MME_CMP_OP_LEU,
49 MME_CMP_OP_EQ,
50 };
51
52 enum mme_cf_type {
53 MME_CF_TYPE_IF,
54 MME_CF_TYPE_LOOP,
55 MME_CF_TYPE_WHILE,
56 };
57
58 struct mme_cf {
59 enum mme_cf_type type;
60 uint16_t start_ip;
61 };
62
63 struct mme_builder;
64
65 #include "mme_tu104_builder.h"
66 #include "mme_fermi_builder.h"
67
68 #define MME_CLS_FERMI 0x9000
69 #define MME_CLS_TURING 0xc500
70
71 struct mme_builder {
72 const struct nv_device_info *devinfo;
73 struct mme_reg_alloc reg_alloc;
74 union {
75 struct mme_tu104_builder tu104;
76 struct mme_fermi_builder fermi;
77 };
78 };
79
80 static inline void
mme_builder_init(struct mme_builder * b,const struct nv_device_info * dev)81 mme_builder_init(struct mme_builder *b, const struct nv_device_info *dev)
82 {
83 memset(b, 0, sizeof(*b));
84 b->devinfo = dev;
85
86 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
87 mme_tu104_builder_init(b);
88 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
89 mme_fermi_builder_init(b);
90 else
91 unreachable("Unsupported GPU class");
92 }
93
94 static inline uint32_t *
mme_builder_finish(struct mme_builder * b,size_t * size_out)95 mme_builder_finish(struct mme_builder *b, size_t *size_out)
96 {
97 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
98 return mme_tu104_builder_finish(&b->tu104, size_out);
99 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
100 return mme_fermi_builder_finish(&b->fermi, size_out);
101 else
102 unreachable("Unsupported GPU class");
103 }
104
105 static inline void
mme_builder_dump(struct mme_builder * b,FILE * fp)106 mme_builder_dump(struct mme_builder *b, FILE *fp)
107 {
108 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
109 mme_tu104_builder_dump(b, fp);
110 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
111 mme_fermi_builder_dump(b, fp);
112 else
113 unreachable("Unsupported GPU class");
114 }
115
116 static inline struct mme_value
mme_alloc_reg(struct mme_builder * b)117 mme_alloc_reg(struct mme_builder *b)
118 {
119 return mme_reg_alloc_alloc(&b->reg_alloc);
120 }
121
122 static inline void
mme_realloc_reg(struct mme_builder * b,struct mme_value value)123 mme_realloc_reg(struct mme_builder *b, struct mme_value value)
124 {
125 return mme_reg_alloc_realloc(&b->reg_alloc, value);
126 }
127
128 static inline void
mme_free_reg(struct mme_builder * b,struct mme_value val)129 mme_free_reg(struct mme_builder *b, struct mme_value val)
130 {
131 mme_reg_alloc_free(&b->reg_alloc, val);
132 }
133
134 static inline void
mme_free_reg64(struct mme_builder * b,struct mme_value64 val)135 mme_free_reg64(struct mme_builder *b, struct mme_value64 val)
136 {
137 mme_reg_alloc_free(&b->reg_alloc, val.lo);
138 mme_reg_alloc_free(&b->reg_alloc, val.hi);
139 }
140
141 static inline void
mme_alu_to(struct mme_builder * b,struct mme_value dst,enum mme_alu_op op,struct mme_value x,struct mme_value y)142 mme_alu_to(struct mme_builder *b,
143 struct mme_value dst,
144 enum mme_alu_op op,
145 struct mme_value x,
146 struct mme_value y)
147 {
148 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
149 mme_tu104_alu_to(b, dst, op, x, y);
150 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
151 mme_fermi_alu_to(b, dst, op, x, y);
152 else
153 unreachable("Unsupported GPU class");
154 }
155
156 static inline struct mme_value
mme_alu(struct mme_builder * b,enum mme_alu_op op,struct mme_value x,struct mme_value y)157 mme_alu(struct mme_builder *b,
158 enum mme_alu_op op,
159 struct mme_value x,
160 struct mme_value y)
161 {
162 struct mme_value dst = mme_alloc_reg(b);
163 mme_alu_to(b, dst, op, x, y);
164 return dst;
165 }
166
167 static inline void
mme_alu_no_dst(struct mme_builder * b,enum mme_alu_op op,struct mme_value x,struct mme_value y)168 mme_alu_no_dst(struct mme_builder *b,
169 enum mme_alu_op op,
170 struct mme_value x,
171 struct mme_value y)
172 {
173 mme_alu_to(b, mme_zero(), op, x, y);
174 }
175
176 static inline void
mme_alu64_to(struct mme_builder * b,struct mme_value64 dst,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)177 mme_alu64_to(struct mme_builder *b,
178 struct mme_value64 dst,
179 enum mme_alu_op op_lo,
180 enum mme_alu_op op_hi,
181 struct mme_value64 x,
182 struct mme_value64 y)
183 {
184 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
185 mme_tu104_alu64_to(b, dst, op_lo, op_hi, x, y);
186 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
187 mme_fermi_alu64_to(b, dst, op_lo, op_hi, x, y);
188 else
189 unreachable("Unsupported GPU class");
190 }
191
192 static inline struct mme_value64
mme_alu64(struct mme_builder * b,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)193 mme_alu64(struct mme_builder *b,
194 enum mme_alu_op op_lo, enum mme_alu_op op_hi,
195 struct mme_value64 x, struct mme_value64 y)
196 {
197 struct mme_value64 dst = {
198 mme_alloc_reg(b),
199 mme_alloc_reg(b),
200 };
201 mme_alu64_to(b, dst, op_lo, op_hi, x, y);
202 return dst;
203 }
204
205 #define MME_DEF_ALU1(op, OP) \
206 static inline void \
207 mme_##op##_to(struct mme_builder *b, struct mme_value dst, \
208 struct mme_value x) \
209 { \
210 mme_alu_to(b, dst, MME_ALU_OP_##OP, x, mme_zero()); \
211 } \
212 \
213 static inline struct mme_value \
214 mme_##op(struct mme_builder *b, \
215 struct mme_value x) \
216 { \
217 return mme_alu(b, MME_ALU_OP_##OP, x, mme_zero()); \
218 }
219
220 #define MME_DEF_ALU2(op, OP) \
221 static inline void \
222 mme_##op##_to(struct mme_builder *b, struct mme_value dst, \
223 struct mme_value x, struct mme_value y) \
224 { \
225 mme_alu_to(b, dst, MME_ALU_OP_##OP, x, y); \
226 } \
227 \
228 static inline struct mme_value \
229 mme_##op(struct mme_builder *b, \
230 struct mme_value x, struct mme_value y) \
231 { \
232 return mme_alu(b, MME_ALU_OP_##OP, x, y); \
233 }
234
235 MME_DEF_ALU1(mov, ADD);
236 MME_DEF_ALU2(add, ADD);
237 MME_DEF_ALU2(sub, SUB);
238 MME_DEF_ALU2(mul, MUL);
239 MME_DEF_ALU1(clz, CLZ);
240 MME_DEF_ALU2(sll, SLL);
241 MME_DEF_ALU2(srl, SRL);
242 MME_DEF_ALU2(sra, SRA);
243 MME_DEF_ALU2(and, AND);
244 MME_DEF_ALU2(nand, NAND);
245 MME_DEF_ALU2(or, OR);
246 MME_DEF_ALU2(xor, XOR);
247 MME_DEF_ALU2(slt, SLT);
248 MME_DEF_ALU2(sltu, SLTU);
249 MME_DEF_ALU2(sle, SLE);
250 MME_DEF_ALU2(sleu, SLEU);
251 MME_DEF_ALU2(seq, SEQ);
252 MME_DEF_ALU1(dread, DREAD);
253
254 #undef MME_DEF_ALU1
255 #undef MME_DEF_ALU2
256
257 static inline void
mme_mov64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x)258 mme_mov64_to(struct mme_builder *b, struct mme_value64 dst,
259 struct mme_value64 x)
260 {
261 mme_alu64_to(b, dst, MME_ALU_OP_ADD, MME_ALU_OP_ADD, x, mme_imm64(0));
262 }
263
264 static inline struct mme_value64
mme_mov64(struct mme_builder * b,struct mme_value64 x)265 mme_mov64(struct mme_builder *b, struct mme_value64 x)
266 {
267 return mme_alu64(b, MME_ALU_OP_ADD, MME_ALU_OP_ADD, x, mme_imm64(0));
268 }
269
270 static inline void
mme_add64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x,struct mme_value64 y)271 mme_add64_to(struct mme_builder *b, struct mme_value64 dst,
272 struct mme_value64 x, struct mme_value64 y)
273 {
274 mme_alu64_to(b, dst, MME_ALU_OP_ADD, MME_ALU_OP_ADDC, x, y);
275 }
276
277 static inline struct mme_value64
mme_add64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)278 mme_add64(struct mme_builder *b,
279 struct mme_value64 x, struct mme_value64 y)
280 {
281 return mme_alu64(b, MME_ALU_OP_ADD, MME_ALU_OP_ADDC, x, y);
282 }
283
284 static inline void
mme_sub64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x,struct mme_value64 y)285 mme_sub64_to(struct mme_builder *b, struct mme_value64 dst,
286 struct mme_value64 x, struct mme_value64 y)
287 {
288 mme_alu64_to(b, dst, MME_ALU_OP_SUB, MME_ALU_OP_SUBB, x, y);
289 }
290
291 static inline struct mme_value64
mme_sub64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)292 mme_sub64(struct mme_builder *b,
293 struct mme_value64 x, struct mme_value64 y)
294 {
295 return mme_alu64(b, MME_ALU_OP_SUB, MME_ALU_OP_SUBB, x, y);
296 }
297
298 static inline void
mme_imul_32x32_64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value x,struct mme_value y)299 mme_imul_32x32_64_to(struct mme_builder *b, struct mme_value64 dst,
300 struct mme_value x, struct mme_value y)
301 {
302 mme_alu64_to(b, dst, MME_ALU_OP_MUL, MME_ALU_OP_MULH,
303 mme_value64(x, mme_zero()),
304 mme_value64(y, mme_zero()));
305 }
306
307 static inline struct mme_value64
mme_imul_32x32_64(struct mme_builder * b,struct mme_value x,struct mme_value y)308 mme_imul_32x32_64(struct mme_builder *b,
309 struct mme_value x, struct mme_value y)
310 {
311 return mme_alu64(b, MME_ALU_OP_MUL, MME_ALU_OP_MULH,
312 mme_value64(x, mme_zero()),
313 mme_value64(y, mme_zero()));
314 }
315
316 static inline void
mme_umul_32x32_64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value x,struct mme_value y)317 mme_umul_32x32_64_to(struct mme_builder *b, struct mme_value64 dst,
318 struct mme_value x, struct mme_value y)
319 {
320 mme_alu64_to(b, dst, MME_ALU_OP_MULU, MME_ALU_OP_MULH,
321 mme_value64(x, mme_zero()),
322 mme_value64(y, mme_zero()));
323 }
324
325 static inline struct mme_value64
mme_umul_32x32_64(struct mme_builder * b,struct mme_value x,struct mme_value y)326 mme_umul_32x32_64(struct mme_builder *b,
327 struct mme_value x, struct mme_value y)
328 {
329 return mme_alu64(b, MME_ALU_OP_MULU, MME_ALU_OP_MULH,
330 mme_value64(x, mme_zero()),
331 mme_value64(y, mme_zero()));
332 }
333
334 static inline struct mme_value64
mme_mul64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)335 mme_mul64(struct mme_builder *b,
336 struct mme_value64 x, struct mme_value64 y)
337 {
338 if (mme_is_zero(x.hi) && mme_is_zero(y.hi))
339 return mme_umul_32x32_64(b, x.lo, y.lo);
340
341 struct mme_value64 dst = mme_umul_32x32_64(b, x.lo, y.lo);
342 struct mme_value tmp = mme_alloc_reg(b);
343
344 mme_mul_to(b, tmp, x.lo, y.hi);
345 mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
346
347 mme_mul_to(b, tmp, x.hi, y.lo);
348 mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
349
350 mme_free_reg(b, tmp);
351
352 return dst;
353 }
354
355 static inline void
mme_bfe_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value pos,uint8_t bits)356 mme_bfe_to(struct mme_builder *b, struct mme_value dst,
357 struct mme_value x, struct mme_value pos, uint8_t bits)
358 {
359 if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
360 mme_srl_to(b, dst, x, pos);
361 mme_and_to(b, dst, dst, mme_imm(BITFIELD_MASK(bits)));
362 } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
363 mme_fermi_bfe_to(b, dst, x, pos, bits);
364 } else {
365 unreachable("Unsupported GPU class");
366 }
367 }
368
369 static inline struct mme_value
mme_bfe(struct mme_builder * b,struct mme_value x,struct mme_value pos,uint8_t bits)370 mme_bfe(struct mme_builder *b,
371 struct mme_value x, struct mme_value pos, uint8_t bits)
372 {
373 struct mme_value dst = mme_alloc_reg(b);
374 mme_bfe_to(b, dst, x, pos, bits);
375 return dst;
376 }
377
378 static inline void
mme_merge_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)379 mme_merge_to(struct mme_builder *b, struct mme_value dst,
380 struct mme_value x, struct mme_value y,
381 uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
382 {
383 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
384 mme_tu104_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
385 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
386 mme_fermi_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
387 else
388 unreachable("Unsupported GPU class");
389 }
390
391 static inline struct mme_value
mme_merge(struct mme_builder * b,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)392 mme_merge(struct mme_builder *b,
393 struct mme_value x, struct mme_value y,
394 uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
395 {
396 struct mme_value dst = mme_alloc_reg(b);
397 mme_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
398 return dst;
399 }
400
401 #define mme_set_field(b, x, FIELD, val) \
402 mme_merge_to(b, x, x, val, DRF_LO(FIELD), DRF_BITS(FIELD), 0)
403
404 #define mme_set_field_enum(b, x, FIELD, ENUM) \
405 mme_set_field(b, x, FIELD, mme_imm(FIELD##_##ENUM)) \
406
407 static inline void
mme_state_arr_to(struct mme_builder * b,struct mme_value dst,uint16_t state,struct mme_value index)408 mme_state_arr_to(struct mme_builder *b, struct mme_value dst,
409 uint16_t state, struct mme_value index)
410 {
411 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
412 mme_tu104_state_arr_to(b, dst, state, index);
413 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
414 mme_fermi_state_arr_to(b, dst, state, index);
415 else
416 unreachable("Unsupported GPU class");
417 }
418
419 static inline void
mme_state_to(struct mme_builder * b,struct mme_value dst,uint16_t state)420 mme_state_to(struct mme_builder *b, struct mme_value dst,
421 uint16_t state)
422 {
423 mme_state_arr_to(b, dst, state, mme_zero());
424 }
425
426 static inline struct mme_value
mme_state_arr(struct mme_builder * b,uint16_t state,struct mme_value index)427 mme_state_arr(struct mme_builder *b,
428 uint16_t state, struct mme_value index)
429 {
430 struct mme_value dst = mme_alloc_reg(b);
431 mme_state_arr_to(b, dst, state, index);
432 return dst;
433 }
434
435 static inline struct mme_value
mme_state(struct mme_builder * b,uint16_t state)436 mme_state(struct mme_builder *b,
437 uint16_t state)
438 {
439 struct mme_value dst = mme_alloc_reg(b);
440 mme_state_to(b, dst, state);
441 return dst;
442 }
443
444 static inline void
mme_dwrite(struct mme_builder * b,struct mme_value idx,struct mme_value val)445 mme_dwrite(struct mme_builder *b,
446 struct mme_value idx, struct mme_value val)
447 {
448 mme_alu_no_dst(b, MME_ALU_OP_DWRITE, idx, val);
449 }
450
451 static inline void
mme_load_to(struct mme_builder * b,struct mme_value dst)452 mme_load_to(struct mme_builder *b, struct mme_value dst)
453 {
454 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
455 mme_tu104_load_to(b, dst);
456 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
457 mme_fermi_load_to(b, dst);
458 else
459 unreachable("Unsupported GPU class");
460 }
461
462 static inline struct mme_value
mme_tu104_load(struct mme_builder * b)463 mme_tu104_load(struct mme_builder *b)
464 {
465 struct mme_value dst = mme_alloc_reg(b);
466 mme_tu104_load_to(b, dst);
467 return dst;
468 }
469
470 static inline struct mme_value
mme_load(struct mme_builder * b)471 mme_load(struct mme_builder *b)
472 {
473 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
474 return mme_tu104_load(b);
475 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
476 return mme_fermi_load(b);
477 else
478 unreachable("Unsupported GPU class");
479 }
480
481 static inline struct mme_value64
mme_load_addr64(struct mme_builder * b)482 mme_load_addr64(struct mme_builder *b)
483 {
484 struct mme_value hi = mme_load(b);
485 struct mme_value lo = mme_load(b);
486 return mme_value64(lo, hi);
487 }
488
489 static inline void
mme_mthd_arr(struct mme_builder * b,uint16_t mthd,struct mme_value index)490 mme_mthd_arr(struct mme_builder *b, uint16_t mthd,
491 struct mme_value index)
492 {
493 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
494 mme_tu104_mthd(b, mthd, index);
495 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
496 mme_fermi_mthd_arr(b, mthd, index);
497 else
498 unreachable("Unsupported GPU class");
499 }
500
501 static inline void
mme_mthd(struct mme_builder * b,uint16_t mthd)502 mme_mthd(struct mme_builder *b, uint16_t mthd)
503 {
504 mme_mthd_arr(b, mthd, mme_zero());
505 }
506
507 static inline void
mme_emit(struct mme_builder * b,struct mme_value data)508 mme_emit(struct mme_builder *b,
509 struct mme_value data)
510 {
511 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
512 mme_tu104_emit(b, data);
513 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
514 mme_fermi_emit(b, data);
515 else
516 unreachable("Unsupported GPU class");
517 }
518
519 static inline void
mme_emit_addr64(struct mme_builder * b,struct mme_value64 addr)520 mme_emit_addr64(struct mme_builder *b, struct mme_value64 addr)
521 {
522 mme_emit(b, addr.hi);
523 mme_emit(b, addr.lo);
524 }
525
526 static inline void
mme_tu104_read_fifoed(struct mme_builder * b,struct mme_value64 addr,struct mme_value count)527 mme_tu104_read_fifoed(struct mme_builder *b,
528 struct mme_value64 addr,
529 struct mme_value count)
530 {
531 mme_mthd(b, 0x0550 /* NVC597_SET_MME_MEM_ADDRESS_A */);
532 mme_emit_addr64(b, addr);
533
534 mme_mthd(b, 0x0560 /* NVC597_MME_DMA_READ_FIFOED */);
535 mme_emit(b, count);
536
537 mme_tu104_load_barrier(b);
538 }
539
540 static inline void
mme_start_loop(struct mme_builder * b,struct mme_value count)541 mme_start_loop(struct mme_builder *b, struct mme_value count)
542 {
543 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
544 mme_tu104_start_loop(b, count);
545 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
546 mme_fermi_start_loop(b, count);
547 else
548 unreachable("Unsupported GPU class");
549 }
550
551 static inline void
mme_end_loop(struct mme_builder * b)552 mme_end_loop(struct mme_builder *b)
553 {
554 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
555 mme_tu104_end_loop(b);
556 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
557 mme_fermi_end_loop(b);
558 else
559 unreachable("Unsupported GPU class");
560 }
561
562 #define mme_loop(b, count) \
563 for (bool run = (mme_start_loop((b), count), true); run; \
564 run = false, mme_end_loop(b))
565
566 #define MME_DEF_START_IF(op, OP, if_true) \
567 static inline void \
568 mme_start_if_##op(struct mme_builder *b, \
569 struct mme_value x, struct mme_value y) \
570 { \
571 if (b->devinfo->cls_eng3d >= MME_CLS_TURING) \
572 mme_tu104_start_if(b, MME_CMP_OP_##OP, if_true, x, y); \
573 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) \
574 mme_fermi_start_if(b, MME_CMP_OP_##OP, if_true, x, y); \
575 else \
576 unreachable("Unsupported GPU class"); \
577 }
578
MME_DEF_START_IF(ilt,LT,true)579 MME_DEF_START_IF(ilt, LT, true)
580 MME_DEF_START_IF(ult, LTU, true)
581 MME_DEF_START_IF(ile, LE, true)
582 MME_DEF_START_IF(ule, LEU, true)
583 MME_DEF_START_IF(ieq, EQ, true)
584 MME_DEF_START_IF(ige, LT, false)
585 MME_DEF_START_IF(uge, LTU, false)
586 MME_DEF_START_IF(igt, LE, false)
587 MME_DEF_START_IF(ugt, LEU, false)
588 MME_DEF_START_IF(ine, EQ, false)
589
590 #undef MME_DEF_START_IF
591
592 static inline void
593 mme_end_if(struct mme_builder *b)
594 {
595 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
596 mme_tu104_end_if(b);
597 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
598 mme_fermi_end_if(b);
599 else
600 unreachable("Unsupported GPU class");
601 }
602
603 #define mme_if(b, cmp, x, y) \
604 for (bool run = (mme_start_if_##cmp((b), x, y), true); run; \
605 run = false, mme_end_if(b))
606
607 static inline void
mme_start_while(struct mme_builder * b)608 mme_start_while(struct mme_builder *b)
609 {
610 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
611 mme_tu104_start_while(b);
612 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
613 mme_fermi_start_while(b);
614 else
615 unreachable("Unsupported GPU class");
616 }
617
618 #define MME_DEF_END_WHILE(op, OP, if_true) \
619 static inline void \
620 mme_end_while_##op(struct mme_builder *b, \
621 struct mme_value x, struct mme_value y) \
622 { \
623 if (b->devinfo->cls_eng3d >= MME_CLS_TURING) \
624 mme_tu104_end_while(b, MME_CMP_OP_##OP, if_true, x, y); \
625 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) \
626 mme_fermi_end_while(b, MME_CMP_OP_##OP, if_true, x, y); \
627 else \
628 unreachable("Unsupported GPU class"); \
629 }
630
MME_DEF_END_WHILE(ilt,LT,true)631 MME_DEF_END_WHILE(ilt, LT, true)
632 MME_DEF_END_WHILE(ult, LTU, true)
633 MME_DEF_END_WHILE(ile, LE, true)
634 MME_DEF_END_WHILE(ule, LEU, true)
635 MME_DEF_END_WHILE(ieq, EQ, true)
636 MME_DEF_END_WHILE(ige, LT, false)
637 MME_DEF_END_WHILE(uge, LTU, false)
638 MME_DEF_END_WHILE(igt, LE, false)
639 MME_DEF_END_WHILE(ugt, LEU, false)
640 MME_DEF_END_WHILE(ine, EQ, false)
641
642 #define mme_while(b, cmp, x, y) \
643 for (bool run = (mme_start_while(b), true); run; \
644 run = false, mme_end_while_##cmp((b), x, y))
645
646 #define MME_DEF_EXIT(op, OP, if_true) \
647 static inline void \
648 mme_exit_if_##op(struct mme_builder *b, \
649 struct mme_value x, struct mme_value y) \
650 { \
651 if (b->devinfo->cls_eng3d >= MME_CLS_TURING) \
652 mme_tu104_exit_if(b, MME_CMP_OP_##OP, if_true, x, y); \
653 else \
654 unreachable("Unsupported GPU class"); \
655 }
656
657 MME_DEF_EXIT(ilt, LT, true)
658 MME_DEF_EXIT(ult, LTU, true)
659 MME_DEF_EXIT(ile, LE, true)
660 MME_DEF_EXIT(ule, LEU, true)
661 MME_DEF_EXIT(ieq, EQ, true)
662 MME_DEF_EXIT(ige, LT, false)
663 MME_DEF_EXIT(uge, LTU, false)
664 MME_DEF_EXIT(igt, LE, false)
665 MME_DEF_EXIT(ugt, LEU, false)
666 MME_DEF_EXIT(ine, EQ, false)
667
668 #undef MME_DEF_EXIT
669
670 #define mme_exit_if(b, cmp, x, y) \
671 mme_exit_if_##cmp(b, x, y)
672
673 static inline void
674 mme_exit(struct mme_builder *b)
675 {
676 mme_exit_if_ieq(b, mme_zero(), mme_zero());
677 }
678
679 #ifdef __cplusplus
680 }
681 #endif
682
683 #endif /* MME_BUILDER_H */
684
685