Lines Matching refs:dst
31 __device__ void test_wmma_buitins(int *src, int *dst, in test_wmma_buitins() argument
39 __hmma_m16n16k16_ld_a(dst, src, ldm, 1); in test_wmma_buitins()
42 __hmma_m16n16k16_ld_a(dst, src, ldm, 0); in test_wmma_buitins()
45 __hmma_m16n16k16_ld_b(dst, src, ldm, 1); in test_wmma_buitins()
48 __hmma_m16n16k16_ld_b(dst, src, ldm, 0); in test_wmma_buitins()
51 __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1); in test_wmma_buitins()
54 __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0); in test_wmma_buitins()
63 __hmma_m16n16k16_st_c_f16(dst, src, ldm, 1); in test_wmma_buitins()
66 __hmma_m16n16k16_st_c_f16(dst, src, ldm, 0); in test_wmma_buitins()
75 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0); in test_wmma_buitins()
78 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1); in test_wmma_buitins()
81 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0); in test_wmma_buitins()
84 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1); in test_wmma_buitins()
87 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0); in test_wmma_buitins()
90 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1); in test_wmma_buitins()
93 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0); in test_wmma_buitins()
96 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1); in test_wmma_buitins()
123 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0); in test_wmma_buitins()
126 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1); in test_wmma_buitins()
129 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0); in test_wmma_buitins()
132 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1); in test_wmma_buitins()
135 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0); in test_wmma_buitins()
138 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1); in test_wmma_buitins()
141 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0); in test_wmma_buitins()
144 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1); in test_wmma_buitins()
175 __hmma_m32n8k16_ld_a(dst, src, ldm, 1); in test_wmma_buitins()
178 __hmma_m32n8k16_ld_a(dst, src, ldm, 0); in test_wmma_buitins()
181 __hmma_m32n8k16_ld_b(dst, src, ldm, 1); in test_wmma_buitins()
184 __hmma_m32n8k16_ld_b(dst, src, ldm, 0); in test_wmma_buitins()
187 __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 1); in test_wmma_buitins()
190 __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 0); in test_wmma_buitins()
199 __hmma_m32n8k16_st_c_f16(dst, src, ldm, 1); in test_wmma_buitins()
202 __hmma_m32n8k16_st_c_f16(dst, src, ldm, 0); in test_wmma_buitins()
211 __hmma_m8n32k16_ld_a(dst, src, ldm, 1); in test_wmma_buitins()
214 __hmma_m8n32k16_ld_a(dst, src, ldm, 0); in test_wmma_buitins()
217 __hmma_m8n32k16_ld_b(dst, src, ldm, 1); in test_wmma_buitins()
220 __hmma_m8n32k16_ld_b(dst, src, ldm, 0); in test_wmma_buitins()
223 __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 1); in test_wmma_buitins()
226 __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 0); in test_wmma_buitins()
235 __hmma_m8n32k16_st_c_f16(dst, src, ldm, 1); in test_wmma_buitins()
238 __hmma_m8n32k16_st_c_f16(dst, src, ldm, 0); in test_wmma_buitins()
247 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 0); in test_wmma_buitins()
250 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 1); in test_wmma_buitins()
253 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 0); in test_wmma_buitins()
256 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 1); in test_wmma_buitins()
259 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 0); in test_wmma_buitins()
262 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 1); in test_wmma_buitins()
265 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 0); in test_wmma_buitins()
268 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 1); in test_wmma_buitins()
295 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 0); in test_wmma_buitins()
298 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 1); in test_wmma_buitins()
301 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 0); in test_wmma_buitins()
304 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 1); in test_wmma_buitins()
307 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 0); in test_wmma_buitins()
310 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 1); in test_wmma_buitins()
313 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 0); in test_wmma_buitins()
316 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 1); in test_wmma_buitins()
343 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 0); in test_wmma_buitins()
346 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 1); in test_wmma_buitins()
349 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 0); in test_wmma_buitins()
352 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 1); in test_wmma_buitins()
355 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 0); in test_wmma_buitins()
358 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 1); in test_wmma_buitins()
361 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 0); in test_wmma_buitins()
364 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 1); in test_wmma_buitins()
391 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 0); in test_wmma_buitins()
394 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 1); in test_wmma_buitins()
397 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 0); in test_wmma_buitins()
400 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 1); in test_wmma_buitins()
403 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 0); in test_wmma_buitins()
406 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 1); in test_wmma_buitins()
409 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 0); in test_wmma_buitins()
412 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 1); in test_wmma_buitins()
443 __imma_m16n16k16_ld_a_s8(dst, src, ldm, 1); in test_wmma_buitins()
446 __imma_m16n16k16_ld_a_s8(dst, src, ldm, 0); in test_wmma_buitins()
449 __imma_m16n16k16_ld_a_u8(dst, src, ldm, 1); in test_wmma_buitins()
452 __imma_m16n16k16_ld_a_u8(dst, src, ldm, 0); in test_wmma_buitins()
455 __imma_m16n16k16_ld_b_s8(dst, src, ldm, 1); in test_wmma_buitins()
458 __imma_m16n16k16_ld_b_s8(dst, src, ldm, 0); in test_wmma_buitins()
461 __imma_m16n16k16_ld_b_u8(dst, src, ldm, 1); in test_wmma_buitins()
464 __imma_m16n16k16_ld_b_u8(dst, src, ldm, 0); in test_wmma_buitins()
467 __imma_m16n16k16_ld_c(dst, src, ldm, 1); in test_wmma_buitins()
470 __imma_m16n16k16_ld_c(dst, src, ldm, 0); in test_wmma_buitins()
473 __imma_m16n16k16_st_c_i32(dst, src, ldm, 1); in test_wmma_buitins()
476 __imma_m16n16k16_st_c_i32(dst, src, ldm, 0); in test_wmma_buitins()
479 __imma_m32n8k16_ld_a_s8(dst, src, ldm, 1); in test_wmma_buitins()
482 __imma_m32n8k16_ld_a_s8(dst, src, ldm, 0); in test_wmma_buitins()
485 __imma_m32n8k16_ld_a_u8(dst, src, ldm, 1); in test_wmma_buitins()
488 __imma_m32n8k16_ld_a_u8(dst, src, ldm, 0); in test_wmma_buitins()
491 __imma_m32n8k16_ld_b_s8(dst, src, ldm, 1); in test_wmma_buitins()
494 __imma_m32n8k16_ld_b_s8(dst, src, ldm, 0); in test_wmma_buitins()
497 __imma_m32n8k16_ld_b_u8(dst, src, ldm, 1); in test_wmma_buitins()
500 __imma_m32n8k16_ld_b_u8(dst, src, ldm, 0); in test_wmma_buitins()
503 __imma_m32n8k16_ld_c(dst, src, ldm, 1); in test_wmma_buitins()
506 __imma_m32n8k16_ld_c(dst, src, ldm, 0); in test_wmma_buitins()
509 __imma_m32n8k16_st_c_i32(dst, src, ldm, 1); in test_wmma_buitins()
512 __imma_m32n8k16_st_c_i32(dst, src, ldm, 0); in test_wmma_buitins()
515 __imma_m8n32k16_ld_a_s8(dst, src, ldm, 1); in test_wmma_buitins()
518 __imma_m8n32k16_ld_a_s8(dst, src, ldm, 0); in test_wmma_buitins()
521 __imma_m8n32k16_ld_a_u8(dst, src, ldm, 1); in test_wmma_buitins()
524 __imma_m8n32k16_ld_a_u8(dst, src, ldm, 0); in test_wmma_buitins()
527 __imma_m8n32k16_ld_b_s8(dst, src, ldm, 1); in test_wmma_buitins()
530 __imma_m8n32k16_ld_b_s8(dst, src, ldm, 0); in test_wmma_buitins()
533 __imma_m8n32k16_ld_b_u8(dst, src, ldm, 1); in test_wmma_buitins()
536 __imma_m8n32k16_ld_b_u8(dst, src, ldm, 0); in test_wmma_buitins()
539 __imma_m8n32k16_ld_c(dst, src, ldm, 1); in test_wmma_buitins()
542 __imma_m8n32k16_ld_c(dst, src, ldm, 0); in test_wmma_buitins()
545 __imma_m8n32k16_st_c_i32(dst, src, ldm, 1); in test_wmma_buitins()
548 __imma_m8n32k16_st_c_i32(dst, src, ldm, 0); in test_wmma_buitins()
551 __imma_m16n16k16_mma_s8(dst, src, src, src, 3, 0); in test_wmma_buitins()
554 __imma_m16n16k16_mma_s8(dst, src, src, src, 3, 1); in test_wmma_buitins()
557 __imma_m16n16k16_mma_s8(dst, src, src, src, 2, 0); in test_wmma_buitins()
560 __imma_m16n16k16_mma_s8(dst, src, src, src, 2, 1); in test_wmma_buitins()
563 __imma_m16n16k16_mma_s8(dst, src, src, src, 1, 0); in test_wmma_buitins()
566 __imma_m16n16k16_mma_s8(dst, src, src, src, 1, 1); in test_wmma_buitins()
569 __imma_m16n16k16_mma_s8(dst, src, src, src, 0, 0); in test_wmma_buitins()
572 __imma_m16n16k16_mma_s8(dst, src, src, src, 0, 1); in test_wmma_buitins()
575 __imma_m16n16k16_mma_u8(dst, src, src, src, 3, 0); in test_wmma_buitins()
578 __imma_m16n16k16_mma_u8(dst, src, src, src, 3, 1); in test_wmma_buitins()
581 __imma_m16n16k16_mma_u8(dst, src, src, src, 2, 0); in test_wmma_buitins()
584 __imma_m16n16k16_mma_u8(dst, src, src, src, 2, 1); in test_wmma_buitins()
587 __imma_m16n16k16_mma_u8(dst, src, src, src, 1, 0); in test_wmma_buitins()
590 __imma_m16n16k16_mma_u8(dst, src, src, src, 1, 1); in test_wmma_buitins()
593 __imma_m16n16k16_mma_u8(dst, src, src, src, 0, 0); in test_wmma_buitins()
596 __imma_m16n16k16_mma_u8(dst, src, src, src, 0, 1); in test_wmma_buitins()
599 __imma_m32n8k16_mma_s8(dst, src, src, src, 3, 0); in test_wmma_buitins()
602 __imma_m32n8k16_mma_s8(dst, src, src, src, 3, 1); in test_wmma_buitins()
605 __imma_m32n8k16_mma_s8(dst, src, src, src, 2, 0); in test_wmma_buitins()
608 __imma_m32n8k16_mma_s8(dst, src, src, src, 2, 1); in test_wmma_buitins()
611 __imma_m32n8k16_mma_s8(dst, src, src, src, 1, 0); in test_wmma_buitins()
614 __imma_m32n8k16_mma_s8(dst, src, src, src, 1, 1); in test_wmma_buitins()
617 __imma_m32n8k16_mma_s8(dst, src, src, src, 0, 0); in test_wmma_buitins()
620 __imma_m32n8k16_mma_s8(dst, src, src, src, 0, 1); in test_wmma_buitins()
623 __imma_m32n8k16_mma_u8(dst, src, src, src, 3, 0); in test_wmma_buitins()
626 __imma_m32n8k16_mma_u8(dst, src, src, src, 3, 1); in test_wmma_buitins()
629 __imma_m32n8k16_mma_u8(dst, src, src, src, 2, 0); in test_wmma_buitins()
632 __imma_m32n8k16_mma_u8(dst, src, src, src, 2, 1); in test_wmma_buitins()
635 __imma_m32n8k16_mma_u8(dst, src, src, src, 1, 0); in test_wmma_buitins()
638 __imma_m32n8k16_mma_u8(dst, src, src, src, 1, 1); in test_wmma_buitins()
641 __imma_m32n8k16_mma_u8(dst, src, src, src, 0, 0); in test_wmma_buitins()
644 __imma_m32n8k16_mma_u8(dst, src, src, src, 0, 1); in test_wmma_buitins()
647 __imma_m8n32k16_mma_s8(dst, src, src, src, 3, 0); in test_wmma_buitins()
650 __imma_m8n32k16_mma_s8(dst, src, src, src, 3, 1); in test_wmma_buitins()
653 __imma_m8n32k16_mma_s8(dst, src, src, src, 2, 0); in test_wmma_buitins()
656 __imma_m8n32k16_mma_s8(dst, src, src, src, 2, 1); in test_wmma_buitins()
659 __imma_m8n32k16_mma_s8(dst, src, src, src, 1, 0); in test_wmma_buitins()
662 __imma_m8n32k16_mma_s8(dst, src, src, src, 1, 1); in test_wmma_buitins()
665 __imma_m8n32k16_mma_s8(dst, src, src, src, 0, 0); in test_wmma_buitins()
668 __imma_m8n32k16_mma_s8(dst, src, src, src, 0, 1); in test_wmma_buitins()
671 __imma_m8n32k16_mma_u8(dst, src, src, src, 3, 0); in test_wmma_buitins()
674 __imma_m8n32k16_mma_u8(dst, src, src, src, 3, 1); in test_wmma_buitins()
677 __imma_m8n32k16_mma_u8(dst, src, src, src, 2, 0); in test_wmma_buitins()
680 __imma_m8n32k16_mma_u8(dst, src, src, src, 2, 1); in test_wmma_buitins()
683 __imma_m8n32k16_mma_u8(dst, src, src, src, 1, 0); in test_wmma_buitins()
686 __imma_m8n32k16_mma_u8(dst, src, src, src, 1, 1); in test_wmma_buitins()
689 __imma_m8n32k16_mma_u8(dst, src, src, src, 0, 0); in test_wmma_buitins()
692 __imma_m8n32k16_mma_u8(dst, src, src, src, 0, 1); in test_wmma_buitins()
699 __bmma_m8n8k128_ld_a_b1(dst, src, ldm, 0); in test_wmma_buitins()
702 __bmma_m8n8k128_ld_b_b1(dst, src, ldm, 1); in test_wmma_buitins()
705 __bmma_m8n8k128_ld_c(dst, src, ldm, 1); in test_wmma_buitins()
708 __bmma_m8n8k128_ld_c(dst, src, ldm, 0); in test_wmma_buitins()
711 __bmma_m8n8k128_st_c_i32(dst, src, ldm, 1); in test_wmma_buitins()
714 __bmma_m8n8k128_st_c_i32(dst, src, ldm, 0); in test_wmma_buitins()
717 __imma_m8n8k32_ld_a_s4(dst, src, ldm, 0); in test_wmma_buitins()
720 __imma_m8n8k32_ld_a_u4(dst, src, ldm, 0); in test_wmma_buitins()
723 __imma_m8n8k32_ld_b_s4(dst, src, ldm, 1); in test_wmma_buitins()
726 __imma_m8n8k32_ld_b_u4(dst, src, ldm, 1); in test_wmma_buitins()
729 __imma_m8n8k32_ld_c(dst, src, ldm, 1); in test_wmma_buitins()
732 __imma_m8n8k32_ld_c(dst, src, ldm, 0); in test_wmma_buitins()
735 __imma_m8n8k32_st_c_i32(dst, src, ldm, 1); in test_wmma_buitins()
738 __imma_m8n8k32_st_c_i32(dst, src, ldm, 0); in test_wmma_buitins()
741 __bmma_m8n8k128_mma_xor_popc_b1(dst, src, src, src, 1); in test_wmma_buitins()
744 __imma_m8n8k32_mma_s4(dst, src, src, src, 1, 0); in test_wmma_buitins()
747 __imma_m8n8k32_mma_s4(dst, src, src, src, 1, 1); in test_wmma_buitins()
750 __imma_m8n8k32_mma_u4(dst, src, src, src, 1, 0); in test_wmma_buitins()
753 __imma_m8n8k32_mma_u4(dst, src, src, src, 1, 1); in test_wmma_buitins()