1 /*
2 * Copyright © 2022 Mary Guillemard
3 * SPDX-License-Identifier: MIT
4 */
5 #include "mme_runner.h"
6
7 #include "mme_fermi_sim.h"
8 /* for VOLTA_A */
9 #include "nvk_clc397.h"
10
11 class mme_fermi_sim_test : public ::testing::Test, public mme_hw_runner {
12 public:
13 mme_fermi_sim_test();
14 ~mme_fermi_sim_test();
15
16 void SetUp();
17 void test_macro(const mme_builder *b,
18 const std::vector<uint32_t>& macro,
19 const std::vector<uint32_t>& params);
20 };
21
mme_fermi_sim_test()22 mme_fermi_sim_test::mme_fermi_sim_test() :
23 ::testing::Test(),
24 mme_hw_runner()
25 { }
26
~mme_fermi_sim_test()27 mme_fermi_sim_test::~mme_fermi_sim_test()
28 { }
29
30 void
SetUp()31 mme_fermi_sim_test::SetUp()
32 {
33 ASSERT_TRUE(set_up_hw(FERMI_A, VOLTA_A));
34 }
35
36 void
test_macro(const mme_builder * b,const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)37 mme_fermi_sim_test::test_macro(const mme_builder *b,
38 const std::vector<uint32_t>& macro,
39 const std::vector<uint32_t>& params)
40 {
41 const uint32_t data_dwords = DATA_BO_SIZE / sizeof(uint32_t);
42
43 std::vector<mme_fermi_inst> insts(macro.size());
44 mme_fermi_decode(&insts[0], ¯o[0], macro.size());
45
46 /* First, make a copy of the data and simulate the macro */
47 std::vector<uint32_t> sim_data(data, data + (DATA_BO_SIZE / 4));
48 mme_fermi_sim_mem sim_mem = {
49 .addr = data_addr,
50 .data = &sim_data[0],
51 .size = DATA_BO_SIZE,
52 };
53 mme_fermi_sim(insts.size(), &insts[0],
54 params.size(), ¶ms[0],
55 1, &sim_mem);
56
57 run_macro(macro, params);
58
59 /* Check the results */
60 for (uint32_t i = 0; i < data_dwords; i++)
61 ASSERT_EQ(data[i], sim_data[i]);
62 }
63
64 static mme_fermi_reg
mme_fermi_value_as_reg(mme_value val)65 mme_fermi_value_as_reg(mme_value val)
66 {
67 assert(val.type == MME_VALUE_TYPE_REG);
68 return (mme_fermi_reg)(MME_FERMI_REG_ZERO + val.reg);
69 }
70
TEST_F(mme_fermi_sim_test,sanity)71 TEST_F(mme_fermi_sim_test, sanity)
72 {
73 const uint32_t canary = 0xc0ffee01;
74
75 mme_builder b;
76 mme_builder_init(&b, devinfo);
77
78 mme_store_imm_addr(&b, data_addr, mme_imm(canary), false);
79
80 auto macro = mme_builder_finish_vec(&b);
81
82 std::vector<uint32_t> params;
83 test_macro(&b, macro, params);
84 }
85
TEST_F(mme_fermi_sim_test,add)86 TEST_F(mme_fermi_sim_test, add)
87 {
88 mme_builder b;
89 mme_builder_init(&b, devinfo);
90
91 mme_value x = mme_load(&b);
92 mme_value y = mme_load(&b);
93 mme_value sum = mme_add(&b, x, y);
94 mme_store_imm_addr(&b, data_addr, sum, true);
95
96 auto macro = mme_builder_finish_vec(&b);
97
98 std::vector<uint32_t> params;
99 params.push_back(25);
100 params.push_back(138);
101
102 test_macro(&b, macro, params);
103 }
104
TEST_F(mme_fermi_sim_test,add_imm)105 TEST_F(mme_fermi_sim_test, add_imm)
106 {
107 mme_builder b;
108 mme_builder_init(&b, devinfo);
109
110 mme_value x = mme_load(&b);
111
112 mme_value v0 = mme_add(&b, x, mme_imm(0x00000001));
113 mme_store_imm_addr(&b, data_addr + 0, v0, true);
114
115 mme_value v1 = mme_add(&b, x, mme_imm(0xffffffff));
116 mme_store_imm_addr(&b, data_addr + 4, v1, true);
117
118 mme_value v2 = mme_add(&b, x, mme_imm(0xffff8000));
119 mme_store_imm_addr(&b, data_addr + 8, v2, true);
120
121 mme_value v3 = mme_add(&b, mme_imm(0x00000001), x);
122 mme_store_imm_addr(&b, data_addr + 12, v3, true);
123
124 mme_value v4 = mme_add(&b, mme_imm(0xffffffff), x);
125 mme_store_imm_addr(&b, data_addr + 16, v4, true);
126
127 mme_value v5 = mme_add(&b, mme_imm(0xffff8000), x);
128 mme_store_imm_addr(&b, data_addr + 20, v5, true);
129
130 mme_value v6 = mme_add(&b, mme_zero(), mme_imm(0x00000001));
131 mme_store_imm_addr(&b, data_addr + 24, v6, true);
132
133 mme_value v7 = mme_add(&b, mme_zero(), mme_imm(0xffffffff));
134 mme_store_imm_addr(&b, data_addr + 28, v7, true);
135
136 mme_value v8 = mme_add(&b, mme_zero(), mme_imm(0xffff8000));
137 mme_store_imm_addr(&b, data_addr + 32, v8, true);
138
139 auto macro = mme_builder_finish_vec(&b);
140
141 uint32_t vals[] = {
142 0x0000ffff,
143 0x00008000,
144 0x0001ffff,
145 0xffffffff,
146 };
147
148 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++) {
149 reset_push();
150
151 std::vector<uint32_t> params;
152 params.push_back(vals[i]);
153
154 test_macro(&b, macro, params);
155 }
156 }
157
TEST_F(mme_fermi_sim_test,add_imm_no_carry)158 TEST_F(mme_fermi_sim_test, add_imm_no_carry)
159 {
160 mme_builder b;
161 mme_builder_init(&b, devinfo);
162
163 mme_value x_lo = mme_load(&b);
164 mme_value x_hi = mme_load(&b);
165
166 mme_value v1_lo = mme_alloc_reg(&b);
167 mme_value v1_hi = mme_alloc_reg(&b);
168 mme_fermi_asm(&b, i) {
169 i.op = MME_FERMI_OP_ADD_IMM;
170 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
171 i.dst = mme_fermi_value_as_reg(v1_lo);
172 i.src[0] = mme_fermi_value_as_reg(x_lo);
173 i.imm = 0x0001;
174 }
175
176 mme_fermi_asm(&b, i) {
177 i.op = MME_FERMI_OP_ADD_IMM;
178 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
179 i.dst = mme_fermi_value_as_reg(v1_hi);
180 i.src[0] = mme_fermi_value_as_reg(x_hi);
181 i.imm = 0x0000;
182 }
183 mme_store_imm_addr(&b, data_addr + 0, v1_lo, true);
184 mme_store_imm_addr(&b, data_addr + 4, v1_hi, true);
185
186 mme_value v2_lo = mme_alloc_reg(&b);
187 mme_value v2_hi = mme_alloc_reg(&b);
188 mme_fermi_asm(&b, i) {
189 i.op = MME_FERMI_OP_ADD_IMM;
190 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
191 i.dst = mme_fermi_value_as_reg(v2_lo);
192 i.src[0] = mme_fermi_value_as_reg(x_lo);
193 i.imm = 0x0000;
194 }
195
196 mme_fermi_asm(&b, i) {
197 i.op = MME_FERMI_OP_ADD_IMM;
198 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
199 i.dst = mme_fermi_value_as_reg(v2_hi);
200 i.src[0] = mme_fermi_value_as_reg(x_hi);
201 i.imm = 0x0001;
202 }
203 mme_store_imm_addr(&b, data_addr + 8, v2_lo, true);
204 mme_store_imm_addr(&b, data_addr + 12, v2_hi, true);
205
206 mme_value v3_lo = mme_alloc_reg(&b);
207 mme_value v3_hi = mme_alloc_reg(&b);
208 mme_fermi_asm(&b, i) {
209 i.op = MME_FERMI_OP_ADD_IMM;
210 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
211 i.dst = mme_fermi_value_as_reg(v2_lo);
212 i.src[0] = mme_fermi_value_as_reg(x_lo);
213 i.imm = 0x0000;
214 }
215
216 mme_fermi_asm(&b, i) {
217 i.op = MME_FERMI_OP_ADD_IMM;
218 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
219 i.dst = mme_fermi_value_as_reg(v2_hi);
220 i.src[0] = mme_fermi_value_as_reg(x_hi);
221 i.imm = 0xffff;
222 }
223 mme_store_imm_addr(&b, data_addr + 16, v3_lo, true);
224 mme_store_imm_addr(&b, data_addr + 20, v3_hi, true);
225
226 mme_value v4_lo = mme_alloc_reg(&b);
227 mme_value v4_hi = mme_alloc_reg(&b);
228 mme_fermi_asm(&b, i) {
229 i.op = MME_FERMI_OP_ADD_IMM;
230 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
231 i.dst = mme_fermi_value_as_reg(v2_lo);
232 i.src[0] = mme_fermi_value_as_reg(x_lo);
233 i.imm = 0x0000;
234 }
235
236 mme_fermi_asm(&b, i) {
237 i.op = MME_FERMI_OP_ADD_IMM;
238 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
239 i.dst = mme_fermi_value_as_reg(v2_hi);
240 i.src[0] = mme_fermi_value_as_reg(x_hi);
241 i.imm = 0x8000;
242 }
243 mme_store_imm_addr(&b, data_addr + 24, v4_lo, true);
244 mme_store_imm_addr(&b, data_addr + 28, v4_hi, true);
245
246 auto macro = mme_builder_finish_vec(&b);
247
248 uint64_t vals[] = {
249 0x0000ffffffffffffull,
250 0x0000ffffffff8000ull,
251 0x0000ffff00000000ull,
252 0x0000800000000000ull,
253 0x00008000ffffffffull,
254 0x0001ffff00000000ull,
255 0xffffffff00000000ull,
256 0xffffffffffffffffull,
257 };
258
259 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++) {
260 reset_push();
261
262 std::vector<uint32_t> params;
263 params.push_back(low32(vals[i]));
264 params.push_back(high32(vals[i]));
265
266 test_macro(&b, macro, params);
267 }
268 }
269
TEST_F(mme_fermi_sim_test,addc)270 TEST_F(mme_fermi_sim_test, addc)
271 {
272 mme_builder b;
273 mme_builder_init(&b, devinfo);
274
275 struct mme_value64 x = { mme_load(&b), mme_load(&b) };
276 struct mme_value64 y = { mme_load(&b), mme_load(&b) };
277
278 struct mme_value64 sum = mme_add64(&b, x, y);
279
280 mme_store_imm_addr(&b, data_addr + 0, sum.lo, true);
281 mme_store_imm_addr(&b, data_addr + 4, sum.hi, true);
282
283 auto macro = mme_builder_finish_vec(&b);
284
285 std::vector<uint32_t> params;
286 params.push_back(0x80008650);
287 params.push_back(0x596);
288 params.push_back(0x8000a8f6);
289 params.push_back(0x836);
290
291 test_macro(&b, macro, params);
292 }
293
TEST_F(mme_fermi_sim_test,sub)294 TEST_F(mme_fermi_sim_test, sub)
295 {
296 mme_builder b;
297 mme_builder_init(&b, devinfo);
298
299 mme_value x = mme_load(&b);
300 mme_value y = mme_load(&b);
301 mme_value diff = mme_sub(&b, x, y);
302 mme_store_imm_addr(&b, data_addr, diff, true);
303
304 auto macro = mme_builder_finish_vec(&b);
305
306 std::vector<uint32_t> params;
307 params.push_back(25);
308 params.push_back(138);
309
310 test_macro(&b, macro, params);
311 }
312
TEST_F(mme_fermi_sim_test,subb)313 TEST_F(mme_fermi_sim_test, subb)
314 {
315 mme_builder b;
316 mme_builder_init(&b, devinfo);
317
318 struct mme_value64 x = { mme_load(&b), mme_load(&b) };
319 struct mme_value64 y = { mme_load(&b), mme_load(&b) };
320
321 struct mme_value64 sum = mme_sub64(&b, x, y);
322
323 mme_store_imm_addr(&b, data_addr + 0, sum.lo, true);
324 mme_store_imm_addr(&b, data_addr + 4, sum.hi, true);
325
326 auto macro = mme_builder_finish_vec(&b);
327
328 std::vector<uint32_t> params;
329 params.push_back(0x80008650);
330 params.push_back(0x596);
331 params.push_back(0x8000a8f6);
332 params.push_back(0x836);
333
334 test_macro(&b, macro, params);
335 }
336
337 #define SHIFT_TEST(op) \
338 TEST_F(mme_fermi_sim_test, op) \
339 { \
340 mme_builder b; \
341 mme_builder_init(&b, devinfo); \
342 \
343 mme_value val = mme_load(&b); \
344 mme_value shift1 = mme_load(&b); \
345 mme_value shift2 = mme_load(&b); \
346 mme_store_imm_addr(&b, data_addr + 0, mme_##op(&b, val, shift1), true); \
347 mme_store_imm_addr(&b, data_addr + 4, mme_##op(&b, val, shift2), true); \
348 \
349 auto macro = mme_builder_finish_vec(&b); \
350 \
351 std::vector<uint32_t> params; \
352 params.push_back(0x0c406fe0); \
353 params.push_back(5); \
354 params.push_back(51); \
355 \
356 test_macro(&b, macro, params); \
357 }
358
359 SHIFT_TEST(sll)
SHIFT_TEST(srl)360 SHIFT_TEST(srl)
361
362 #undef SHIFT_TEST
363
364 TEST_F(mme_fermi_sim_test, bfe)
365 {
366 const uint32_t canary = 0xc0ffee01;
367
368 mme_builder b;
369 mme_builder_init(&b, devinfo);
370
371 mme_value val = mme_load(&b);
372 mme_value pos = mme_load(&b);
373
374 mme_store_imm_addr(&b, data_addr + 0, mme_bfe(&b, val, pos, 1), true);
375 mme_store_imm_addr(&b, data_addr + 4, mme_bfe(&b, val, pos, 2), true);
376 mme_store_imm_addr(&b, data_addr + 8, mme_bfe(&b, val, pos, 5), true);
377
378 auto macro = mme_builder_finish_vec(&b);
379
380 for (unsigned i = 0; i < 31; i++) {
381 std::vector<uint32_t> params;
382 params.push_back(canary);
383 params.push_back(i);
384
385 test_macro(&b, macro, params);
386
387 ASSERT_EQ(data[0], (canary >> i) & 0x1);
388 ASSERT_EQ(data[1], (canary >> i) & 0x3);
389 ASSERT_EQ(data[2], (canary >> i) & 0x1f);
390 }
391 }
392
393 #define BITOP_TEST(op) \
394 TEST_F(mme_fermi_sim_test, op) \
395 { \
396 mme_builder b; \
397 mme_builder_init(&b, devinfo); \
398 \
399 mme_value x = mme_load(&b); \
400 mme_value y = mme_load(&b); \
401 mme_value v1 = mme_##op(&b, x, y); \
402 mme_value v2 = mme_##op(&b, x, mme_imm(0xffff8000)); \
403 mme_value v3 = mme_##op(&b, x, mme_imm(0xffffffff)); \
404 mme_store_imm_addr(&b, data_addr + 0, v1, true); \
405 mme_store_imm_addr(&b, data_addr + 4, v2, true); \
406 mme_store_imm_addr(&b, data_addr + 8, v3, true); \
407 \
408 auto macro = mme_builder_finish_vec(&b); \
409 \
410 std::vector<uint32_t> params; \
411 params.push_back(0x0c406fe0); \
412 params.push_back(0x00fff0c0); \
413 \
414 test_macro(&b, macro, params); \
415 }
416
417 BITOP_TEST(and)
418 //BITOP_TEST(and_not)
BITOP_TEST(nand)419 BITOP_TEST(nand)
420 BITOP_TEST(or)
421 BITOP_TEST(xor)
422
423 #undef BITOP_TEST
424
425 static bool c_ine(int32_t x, int32_t y) { return x != y; };
c_ieq(int32_t x,int32_t y)426 static bool c_ieq(int32_t x, int32_t y) { return x == y; };
427
428 #define IF_TEST(op) \
429 TEST_F(mme_fermi_sim_test, if_##op) \
430 { \
431 mme_builder b; \
432 mme_builder_init(&b, devinfo); \
433 \
434 mme_value x = mme_load(&b); \
435 mme_value y = mme_load(&b); \
436 mme_value i = mme_mov(&b, mme_zero()); \
437 \
438 mme_start_if_##op(&b, x, y); \
439 { \
440 mme_add_to(&b, i, i, mme_imm(1)); \
441 mme_add_to(&b, i, i, mme_imm(1)); \
442 } \
443 mme_end_if(&b); \
444 mme_add_to(&b, i, i, mme_imm(1)); \
445 mme_add_to(&b, i, i, mme_imm(1)); \
446 mme_add_to(&b, i, i, mme_imm(1)); \
447 \
448 mme_store_imm_addr(&b, data_addr + 0, i, true); \
449 \
450 auto macro = mme_builder_finish_vec(&b); \
451 \
452 uint32_t vals[] = {23, 56, (uint32_t)-5, (uint32_t)-10, 56, 14}; \
453 \
454 for (uint32_t i = 0; i < ARRAY_SIZE(vals) - 1; i++) { \
455 reset_push(); \
456 \
457 std::vector<uint32_t> params; \
458 params.push_back(vals[i + 0]); \
459 params.push_back(vals[i + 1]); \
460 \
461 test_macro(&b, macro, params); \
462 \
463 ASSERT_EQ(data[0], c_##op(params[0], params[1]) ? 5 : 3); \
464 } \
465 }
466
467 IF_TEST(ieq)
IF_TEST(ine)468 IF_TEST(ine)
469
470 #undef IF_TEST
471
472 static inline void
473 mme_fermi_inc_whole_inst(mme_builder *b, mme_value val)
474 {
475 mme_fermi_asm(b, i) {
476 i.op = MME_FERMI_OP_ADD_IMM;
477 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
478 i.dst = mme_fermi_value_as_reg(val);
479 i.src[0] = mme_fermi_value_as_reg(val);
480 i.imm = 1;
481 }
482 }
483
484 #define WHILE_TEST(op, start, step, bound) \
485 TEST_F(mme_fermi_sim_test, while_##op) \
486 { \
487 mme_builder b; \
488 mme_builder_init(&b, devinfo); \
489 \
490 mme_value x = mme_mov(&b, mme_zero()); \
491 mme_value y = mme_mov(&b, mme_zero()); \
492 mme_value z = mme_mov(&b, mme_imm(start)); \
493 mme_value w = mme_mov(&b, mme_zero()); \
494 mme_value v = mme_mov(&b, mme_zero()); \
495 \
496 for (uint32_t j = 0; j < 5; j++) \
497 mme_fermi_inc_whole_inst(&b, x); \
498 mme_store_imm_addr(&b, data_addr + 0, x, true); \
499 \
500 mme_while(&b, op, z, mme_imm(bound)) { \
501 for (uint32_t j = 0; j < 5; j++) \
502 mme_fermi_inc_whole_inst(&b, y); \
503 \
504 mme_add_to(&b, z, z, mme_imm(step)); \
505 \
506 for (uint32_t j = 0; j < 5; j++) \
507 mme_fermi_inc_whole_inst(&b, w); \
508 } \
509 mme_store_imm_addr(&b, data_addr + 4, y, true); \
510 mme_store_imm_addr(&b, data_addr + 8, z, true); \
511 mme_store_imm_addr(&b, data_addr + 12, w, true); \
512 \
513 for (uint32_t j = 0; j < 5; j++) \
514 mme_fermi_inc_whole_inst(&b, v); \
515 \
516 mme_store_imm_addr(&b, data_addr + 16, v, true); \
517 \
518 auto macro = mme_builder_finish_vec(&b); \
519 \
520 uint32_t end = (uint32_t)(start), count = 0; \
521 while (c_##op(end, (bound))) { \
522 end += (uint32_t)(step); \
523 count++; \
524 } \
525 \
526 std::vector<uint32_t> params; \
527 test_macro(&b, macro, params); \
528 ASSERT_EQ(data[0], 5); \
529 ASSERT_EQ(data[1], 5 * count); \
530 ASSERT_EQ(data[2], end); \
531 ASSERT_EQ(data[3], 5 * count); \
532 ASSERT_EQ(data[4], 5); \
533 }
534
535 WHILE_TEST(ieq, 0, 5, 0)
536 WHILE_TEST(ine, 0, 1, 7)
537
538 #undef WHILE_TWST
539
540
TEST_F(mme_fermi_sim_test,loop)541 TEST_F(mme_fermi_sim_test, loop)
542 {
543 mme_builder b;
544 mme_builder_init(&b, devinfo);
545
546 mme_value count = mme_load(&b);
547
548 mme_value x = mme_mov(&b, mme_zero());
549 mme_value y = mme_mov(&b, mme_zero());
550
551 mme_loop(&b, count) {
552 mme_fermi_asm(&b, i) { } /* noop */
553 mme_add_to(&b, x, x, count);
554 }
555 mme_add_to(&b, y, y, mme_imm(1));
556 mme_fermi_asm(&b, i) { } /* noop */
557 mme_fermi_asm(&b, i) { } /* noop */
558 mme_fermi_asm(&b, i) { } /* noop */
559
560 mme_store_imm_addr(&b, data_addr + 0, count, true);
561 mme_store_imm_addr(&b, data_addr + 4, x, true);
562 mme_store_imm_addr(&b, data_addr + 8, y, true);
563
564 auto macro = mme_builder_finish_vec(&b);
565
566 uint32_t counts[] = {0, 1, 5, 9};
567
568 for (uint32_t i = 0; i < ARRAY_SIZE(counts); i++) {
569 reset_push();
570
571 std::vector<uint32_t> params;
572 params.push_back(counts[i]);
573
574 test_macro(&b, macro, params);
575 ASSERT_EQ(data[0], counts[i]);
576 ASSERT_EQ(data[1], counts[i] * counts[i]);
577 ASSERT_EQ(data[2], 1);
578 }
579 }
580
TEST_F(mme_fermi_sim_test,merge)581 TEST_F(mme_fermi_sim_test, merge)
582 {
583 mme_builder b;
584 mme_builder_init(&b, devinfo);
585
586 mme_value x = mme_load(&b);
587 mme_value y = mme_load(&b);
588
589 mme_value m1 = mme_merge(&b, x, y, 12, 12, 20);
590 mme_store_imm_addr(&b, data_addr + 0, m1, true);
591
592 mme_value m2 = mme_merge(&b, x, y, 12, 8, 20);
593 mme_store_imm_addr(&b, data_addr + 4, m2, true);
594
595 mme_value m3 = mme_merge(&b, x, y, 8, 12, 20);
596 mme_store_imm_addr(&b, data_addr + 8, m3, true);
597
598 mme_value m4 = mme_merge(&b, x, y, 12, 16, 8);
599 mme_store_imm_addr(&b, data_addr + 12, m4, true);
600
601 mme_value m5 = mme_merge(&b, x, y, 24, 12, 8);
602 mme_store_imm_addr(&b, data_addr + 16, m5, true);
603
604 auto macro = mme_builder_finish_vec(&b);
605
606 std::vector<uint32_t> params;
607 params.push_back(0x0c406fe0);
608 params.push_back(0x76543210u);
609
610 test_macro(&b, macro, params);
611 }
612
TEST_F(mme_fermi_sim_test,branch_delay_slot)613 TEST_F(mme_fermi_sim_test, branch_delay_slot)
614 {
615 mme_builder b;
616 mme_builder_init(&b, devinfo);
617
618 mme_value x = mme_load(&b);
619 mme_value y = mme_load(&b);
620
621 mme_fermi_asm(&b, i) {
622 i.op = MME_FERMI_OP_BRANCH;
623 i.src[0] = MME_FERMI_REG_ZERO;
624 i.imm = 2;
625 i.branch.no_delay = false;
626 i.branch.not_zero = false;
627 }
628
629 mme_value res = mme_add(&b, x, y);
630
631 mme_store_imm_addr(&b, data_addr + 0, res, true);
632
633 auto macro = mme_builder_finish_vec(&b);
634
635 std::vector<uint32_t> params;
636 params.push_back(3);
637 params.push_back(1);
638
639 test_macro(&b, macro, params);
640 ASSERT_EQ(data[0], 4);
641 }
642
TEST_F(mme_fermi_sim_test,state)643 TEST_F(mme_fermi_sim_test, state)
644 {
645 mme_builder b;
646 mme_builder_init(&b, devinfo);
647
648 mme_value x = mme_load(&b);
649 mme_value y = mme_load(&b);
650
651 mme_mthd(&b, NV9097_SET_MME_SHADOW_SCRATCH(5));
652 mme_emit(&b, x);
653
654 mme_mthd(&b, NV9097_SET_MME_SHADOW_SCRATCH(8));
655 mme_emit(&b, y);
656
657 mme_value y2 = mme_state(&b, NV9097_SET_MME_SHADOW_SCRATCH(8));
658 mme_value x2 = mme_state(&b, NV9097_SET_MME_SHADOW_SCRATCH(5));
659
660 mme_store_imm_addr(&b, data_addr + 0, y2, true);
661 mme_store_imm_addr(&b, data_addr + 4, x2, true);
662
663 auto macro = mme_builder_finish_vec(&b);
664
665 std::vector<uint32_t> params;
666 params.push_back(-10);
667 params.push_back(5);
668
669 test_macro(&b, macro, params);
670 }
671
TEST_F(mme_fermi_sim_test,scratch_limit)672 TEST_F(mme_fermi_sim_test, scratch_limit)
673 {
674 static const uint32_t chunk_size = 32;
675
676 mme_builder b;
677 mme_builder_init(&b, devinfo);
678
679 mme_value start = mme_load(&b);
680 mme_value count = mme_load(&b);
681
682 mme_value i = mme_mov(&b, start);
683 mme_loop(&b, count) {
684 mme_mthd_arr(&b, NV9097_SET_MME_SHADOW_SCRATCH(0), i);
685 mme_emit(&b, i);
686 mme_add_to(&b, i, i, mme_imm(1));
687 }
688 mme_free_reg(&b, i);
689
690 mme_value j = mme_mov(&b, start);
691 mme_free_reg(&b, start);
692 struct mme_value64 addr = mme_mov64(&b, mme_imm64(data_addr));
693
694 mme_loop(&b, count) {
695 mme_value x = mme_state_arr(&b, NV9097_SET_MME_SHADOW_SCRATCH(0), j);
696 mme_store(&b, addr, x, true);
697 mme_add_to(&b, j, j, mme_imm(1));
698 mme_add64_to(&b, addr, addr, mme_imm64(4));
699 }
700 mme_free_reg(&b, j);
701 mme_free_reg(&b, count);
702
703 auto macro = mme_builder_finish_vec(&b);
704
705 for (uint32_t i = 0; i < MME_FERMI_SCRATCH_COUNT; i += chunk_size) {
706 reset_push();
707
708 push_macro(0, macro);
709
710 P_1INC(p, NV9097, CALL_MME_MACRO(0));
711 P_INLINE_DATA(p, i);
712 P_INLINE_DATA(p, chunk_size);
713
714 submit_push();
715
716 for (uint32_t j = 0; j < chunk_size; j++)
717 ASSERT_EQ(data[j], i + j);
718 }
719 }
720
TEST_F(mme_fermi_sim_test,load_imm_to_reg)721 TEST_F(mme_fermi_sim_test, load_imm_to_reg)
722 {
723 mme_builder b;
724 mme_builder_init(&b, devinfo);
725
726 uint32_t vals[] = {
727 0x0001ffff,
728 0x1ffff000,
729 0x0007ffff,
730 0x00080000,
731 0x7fffffff,
732 0x80000000,
733 0xffffffff,
734 };
735
736 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++)
737 mme_store_imm_addr(&b, data_addr + i * 4, mme_imm(vals[i]), false);
738
739 auto macro = mme_builder_finish_vec(&b);
740
741 std::vector<uint32_t> params;
742
743 test_macro(&b, macro, params);
744
745 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++)
746 ASSERT_EQ(data[i], vals[i]);
747 }
748