• Home
  • Raw
  • Download

Lines Matching refs:DL

5 …llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-DL %s
6 …c -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-DL %s
7 …c -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-DL %s
85 ; GFX9-DL-LABEL: udot2:
86 ; GFX9-DL: ; %bb.0: ; %entry
87 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
88 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
89 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
90 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
91 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
92 ; GFX9-DL-NEXT: s_load_dword s3, s[0:1], 0x0
93 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
94 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
95 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s2
96 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
97 ; GFX9-DL-NEXT: v_dot2_u32_u16 v1, s4, v1, v2
98 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
99 ; GFX9-DL-NEXT: s_endpgm
101 ; GFX10-DL-LABEL: udot2:
102 ; GFX10-DL: ; %bb.0: ; %entry
103 ; GFX10-DL-NEXT: s_clause 0x1
104 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
105 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
106 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
107 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
108 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
109 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
110 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
111 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
112 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
113 ; GFX10-DL-NEXT: v_dot2_u32_u16 v0, s1, s0, v0
114 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
115 ; GFX10-DL-NEXT: s_endpgm
217 ; GFX9-DL-LABEL: udot2_MulMul:
218 ; GFX9-DL: ; %bb.0: ; %entry
219 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
220 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
221 ; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff
222 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
223 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
224 ; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0
225 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
226 ; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0
227 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
228 ; GFX9-DL-NEXT: s_and_b32 s6, s3, s2
229 ; GFX9-DL-NEXT: s_and_b32 s2, s4, s2
230 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s6
231 ; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 16
232 ; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 16
233 ; GFX9-DL-NEXT: v_mul_u32_u24_e32 v1, s2, v1
234 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
235 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s4, v2, v1
236 ; GFX9-DL-NEXT: v_add_u32_e32 v1, s5, v1
237 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
238 ; GFX9-DL-NEXT: s_endpgm
240 ; GFX10-DL-LABEL: udot2_MulMul:
241 ; GFX10-DL: ; %bb.0: ; %entry
242 ; GFX10-DL-NEXT: s_clause 0x1
243 ; GFX10-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
244 ; GFX10-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
245 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
246 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
247 ; GFX10-DL-NEXT: s_load_dword s2, s[4:5], 0x0
248 ; GFX10-DL-NEXT: s_load_dword s3, s[6:7], 0x0
249 ; GFX10-DL-NEXT: s_load_dword s4, s[0:1], 0x0
250 ; GFX10-DL-NEXT: s_mov_b32 s5, 0xffff
251 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
252 ; GFX10-DL-NEXT: s_and_b32 s6, s2, s5
253 ; GFX10-DL-NEXT: s_and_b32 s5, s3, s5
254 ; GFX10-DL-NEXT: s_lshr_b32 s2, s2, 16
255 ; GFX10-DL-NEXT: v_mul_u32_u24_e64 v0, s5, s6
256 ; GFX10-DL-NEXT: s_lshr_b32 s3, s3, 16
257 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s3, s2, v0
258 ; GFX10-DL-NEXT: v_add_nc_u32_e32 v0, s4, v0
259 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[0:1]
260 ; GFX10-DL-NEXT: s_endpgm
354 ; GFX9-DL-LABEL: idot2:
355 ; GFX9-DL: ; %bb.0: ; %entry
356 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
357 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
358 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
359 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
360 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
361 ; GFX9-DL-NEXT: s_load_dword s3, s[0:1], 0x0
362 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
363 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
364 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s2
365 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
366 ; GFX9-DL-NEXT: v_dot2_i32_i16 v1, s4, v1, v2
367 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
368 ; GFX9-DL-NEXT: s_endpgm
370 ; GFX10-DL-LABEL: idot2:
371 ; GFX10-DL: ; %bb.0: ; %entry
372 ; GFX10-DL-NEXT: s_clause 0x1
373 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
374 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
375 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
376 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
377 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
378 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
379 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
380 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
381 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
382 ; GFX10-DL-NEXT: v_dot2_i32_i16 v0, s1, s0, v0
383 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
384 ; GFX10-DL-NEXT: s_endpgm
479 ; GFX9-DL-LABEL: idot2_MixedTypedMul:
480 ; GFX9-DL: ; %bb.0: ; %entry
481 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
482 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
483 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
484 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
485 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
486 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
487 ; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0
488 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
489 ; GFX9-DL-NEXT: s_sext_i32_i16 s5, s2
490 ; GFX9-DL-NEXT: s_lshr_b32 s2, s2, 16
491 ; GFX9-DL-NEXT: s_sext_i32_i16 s6, s3
492 ; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 16
493 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s4
494 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s2
495 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s3, v2, v1
496 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s5
497 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, s6, v2, v1
498 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
499 ; GFX9-DL-NEXT: s_endpgm
501 ; GFX10-DL-LABEL: idot2_MixedTypedMul:
502 ; GFX10-DL: ; %bb.0: ; %entry
503 ; GFX10-DL-NEXT: s_clause 0x1
504 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
505 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
506 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
507 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
508 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
509 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
510 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
511 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
512 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
513 ; GFX10-DL-NEXT: s_lshr_b32 s2, s0, 16
514 ; GFX10-DL-NEXT: s_lshr_b32 s3, s1, 16
515 ; GFX10-DL-NEXT: s_sext_i32_i16 s0, s0
516 ; GFX10-DL-NEXT: s_sext_i32_i16 s1, s1
517 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s3, s2, v0
518 ; GFX10-DL-NEXT: v_mad_i32_i24 v0, s1, s0, v0
519 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
520 ; GFX10-DL-NEXT: s_endpgm
619 ; GFX9-DL-LABEL: udot2_alt_AddOperands:
620 ; GFX9-DL: ; %bb.0: ; %entry
621 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
622 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
623 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
624 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
625 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
626 ; GFX9-DL-NEXT: s_load_dword s3, s[0:1], 0x0
627 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
628 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
629 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s2
630 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
631 ; GFX9-DL-NEXT: v_dot2_u32_u16 v1, s4, v1, v2
632 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
633 ; GFX9-DL-NEXT: s_endpgm
635 ; GFX10-DL-LABEL: udot2_alt_AddOperands:
636 ; GFX10-DL: ; %bb.0: ; %entry
637 ; GFX10-DL-NEXT: s_clause 0x1
638 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
639 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
640 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
641 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
642 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
643 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
644 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
645 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
646 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
647 ; GFX10-DL-NEXT: v_dot2_u32_u16 v0, s1, s0, v0
648 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
649 ; GFX10-DL-NEXT: s_endpgm
744 ; GFX9-DL-LABEL: idot2_MixedExt:
745 ; GFX9-DL: ; %bb.0: ; %entry
746 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
747 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
748 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
749 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
750 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
751 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
752 ; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0
753 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
754 ; GFX9-DL-NEXT: s_sext_i32_i16 s5, s2
755 ; GFX9-DL-NEXT: s_ashr_i32 s2, s2, 16
756 ; GFX9-DL-NEXT: s_and_b32 s6, s3, 0xffff
757 ; GFX9-DL-NEXT: s_ashr_i32 s3, s3, 16
758 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s4
759 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s2
760 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, s3, v2, v1
761 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s5
762 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, s6, v2, v1
763 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
764 ; GFX9-DL-NEXT: s_endpgm
766 ; GFX10-DL-LABEL: idot2_MixedExt:
767 ; GFX10-DL: ; %bb.0: ; %entry
768 ; GFX10-DL-NEXT: s_clause 0x1
769 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
770 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
771 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
772 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
773 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
774 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
775 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
776 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
777 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
778 ; GFX10-DL-NEXT: s_ashr_i32 s2, s0, 16
779 ; GFX10-DL-NEXT: s_ashr_i32 s3, s1, 16
780 ; GFX10-DL-NEXT: s_sext_i32_i16 s0, s0
781 ; GFX10-DL-NEXT: s_and_b32 s1, s1, 0xffff
782 ; GFX10-DL-NEXT: v_mad_i32_i24 v0, s3, s2, v0
783 ; GFX10-DL-NEXT: v_mad_i32_i24 v0, s1, s0, v0
784 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
785 ; GFX10-DL-NEXT: s_endpgm
868 ; GFX9-DL-LABEL: notudot2_SameVec:
869 ; GFX9-DL: ; %bb.0: ; %entry
870 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
871 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
872 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
873 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
874 ; GFX9-DL-NEXT: s_load_dword s2, s[6:7], 0x0
875 ; GFX9-DL-NEXT: s_load_dword s3, s[0:1], 0x0
876 ; GFX9-DL-NEXT: s_load_dword s4, s[4:5], 0x0
877 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
878 ; GFX9-DL-NEXT: s_lshr_b32 s2, s2, 16
879 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3
880 ; GFX9-DL-NEXT: s_and_b32 s4, s4, 0xffff
881 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s2, s2, v1
882 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s4, s4, v1
883 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
884 ; GFX9-DL-NEXT: s_endpgm
886 ; GFX10-DL-LABEL: notudot2_SameVec:
887 ; GFX10-DL: ; %bb.0: ; %entry
888 ; GFX10-DL-NEXT: s_clause 0x1
889 ; GFX10-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
890 ; GFX10-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
891 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
892 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
893 ; GFX10-DL-NEXT: s_load_dword s2, s[6:7], 0x0
894 ; GFX10-DL-NEXT: s_load_dword s3, s[0:1], 0x0
895 ; GFX10-DL-NEXT: s_load_dword s4, s[4:5], 0x0
896 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
897 ; GFX10-DL-NEXT: s_lshr_b32 s2, s2, 16
898 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s2, s2, s3
899 ; GFX10-DL-NEXT: s_and_b32 s2, s4, 0xffff
900 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s2, s2, v0
901 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[0:1]
902 ; GFX10-DL-NEXT: s_endpgm
1001 ; GFX9-DL-LABEL: udot2_v4i16:
1002 ; GFX9-DL: ; %bb.0: ; %entry
1003 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1004 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1005 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1006 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1007 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
1008 ; GFX9-DL-NEXT: s_load_dword s3, s[0:1], 0x0
1009 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
1010 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1011 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s2
1012 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
1013 ; GFX9-DL-NEXT: v_dot2_u32_u16 v1, s4, v1, v2
1014 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
1015 ; GFX9-DL-NEXT: s_endpgm
1017 ; GFX10-DL-LABEL: udot2_v4i16:
1018 ; GFX10-DL: ; %bb.0: ; %entry
1019 ; GFX10-DL-NEXT: s_clause 0x1
1020 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1021 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1022 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
1023 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1024 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
1025 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
1026 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
1027 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1028 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
1029 ; GFX10-DL-NEXT: v_dot2_u32_u16 v0, s1, s0, v0
1030 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
1031 ; GFX10-DL-NEXT: s_endpgm
1130 ; GFX9-DL-LABEL: udot2_v4i16_Hi:
1131 ; GFX9-DL: ; %bb.0: ; %entry
1132 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1133 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1134 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1135 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1136 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x4
1137 ; GFX9-DL-NEXT: s_load_dword s3, s[0:1], 0x0
1138 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x4
1139 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1140 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s2
1141 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
1142 ; GFX9-DL-NEXT: v_dot2_u32_u16 v1, s4, v1, v2
1143 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
1144 ; GFX9-DL-NEXT: s_endpgm
1146 ; GFX10-DL-LABEL: udot2_v4i16_Hi:
1147 ; GFX10-DL: ; %bb.0: ; %entry
1148 ; GFX10-DL-NEXT: s_clause 0x1
1149 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1150 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1151 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
1152 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1153 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
1154 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x4
1155 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x4
1156 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1157 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
1158 ; GFX10-DL-NEXT: v_dot2_u32_u16 v0, s1, s0, v0
1159 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
1160 ; GFX10-DL-NEXT: s_endpgm
1259 ; GFX9-DL-LABEL: notudot2_v4i16_Even:
1260 ; GFX9-DL: ; %bb.0: ; %entry
1261 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1262 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1263 ; GFX9-DL-NEXT: s_mov_b32 s8, 0xffff
1264 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1265 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1266 ; GFX9-DL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
1267 ; GFX9-DL-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
1268 ; GFX9-DL-NEXT: s_load_dword s6, s[0:1], 0x0
1269 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1270 ; GFX9-DL-NEXT: s_and_b32 s3, s3, s8
1271 ; GFX9-DL-NEXT: s_and_b32 s2, s2, s8
1272 ; GFX9-DL-NEXT: s_and_b32 s5, s5, s8
1273 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3
1274 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s6
1275 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s5, v1, v2
1276 ; GFX9-DL-NEXT: s_and_b32 s4, s4, s8
1277 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s2
1278 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s4, v2, v1
1279 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
1280 ; GFX9-DL-NEXT: s_endpgm
1282 ; GFX10-DL-LABEL: notudot2_v4i16_Even:
1283 ; GFX10-DL: ; %bb.0: ; %entry
1284 ; GFX10-DL-NEXT: s_clause 0x1
1285 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1286 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1287 ; GFX10-DL-NEXT: s_mov_b32 s7, 0xffff
1288 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
1289 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1290 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
1291 ; GFX10-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1292 ; GFX10-DL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
1293 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1294 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
1295 ; GFX10-DL-NEXT: s_and_b32 s1, s1, s7
1296 ; GFX10-DL-NEXT: s_and_b32 s3, s3, s7
1297 ; GFX10-DL-NEXT: s_and_b32 s0, s0, s7
1298 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s3, s1, v0
1299 ; GFX10-DL-NEXT: s_and_b32 s1, s2, s7
1300 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s1, s0, v0
1301 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
1302 ; GFX10-DL-NEXT: s_endpgm
1401 ; GFX9-DL-LABEL: notudot2_v4i16_Middle:
1402 ; GFX9-DL: ; %bb.0: ; %entry
1403 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1404 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1405 ; GFX9-DL-NEXT: s_mov_b32 s8, 0xffff
1406 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1407 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1408 ; GFX9-DL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
1409 ; GFX9-DL-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
1410 ; GFX9-DL-NEXT: s_load_dword s6, s[0:1], 0x0
1411 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1412 ; GFX9-DL-NEXT: s_and_b32 s3, s3, s8
1413 ; GFX9-DL-NEXT: s_lshr_b32 s2, s2, 16
1414 ; GFX9-DL-NEXT: s_and_b32 s5, s5, s8
1415 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3
1416 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s6
1417 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s5, v1, v2
1418 ; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 16
1419 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s2
1420 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s4, v2, v1
1421 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
1422 ; GFX9-DL-NEXT: s_endpgm
1424 ; GFX10-DL-LABEL: notudot2_v4i16_Middle:
1425 ; GFX10-DL: ; %bb.0: ; %entry
1426 ; GFX10-DL-NEXT: s_clause 0x1
1427 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1428 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1429 ; GFX10-DL-NEXT: s_mov_b32 s7, 0xffff
1430 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
1431 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1432 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
1433 ; GFX10-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
1434 ; GFX10-DL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
1435 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1436 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
1437 ; GFX10-DL-NEXT: s_and_b32 s1, s1, s7
1438 ; GFX10-DL-NEXT: s_and_b32 s3, s3, s7
1439 ; GFX10-DL-NEXT: s_lshr_b32 s0, s0, 16
1440 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s3, s1, v0
1441 ; GFX10-DL-NEXT: s_lshr_b32 s1, s2, 16
1442 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s1, s0, v0
1443 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
1444 ; GFX10-DL-NEXT: s_endpgm
1543 ; GFX9-DL-LABEL: notudot2_DiffIndex:
1544 ; GFX9-DL: ; %bb.0: ; %entry
1545 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1546 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1547 ; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff
1548 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1549 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1550 ; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0
1551 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
1552 ; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0
1553 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1554 ; GFX9-DL-NEXT: s_and_b32 s6, s3, s2
1555 ; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 16
1556 ; GFX9-DL-NEXT: s_and_b32 s2, s4, s2
1557 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3
1558 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s5
1559 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s2, v1, v2
1560 ; GFX9-DL-NEXT: s_lshr_b32 s7, s4, 16
1561 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s6
1562 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s7, v2, v1
1563 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
1564 ; GFX9-DL-NEXT: s_endpgm
1566 ; GFX10-DL-LABEL: notudot2_DiffIndex:
1567 ; GFX10-DL: ; %bb.0: ; %entry
1568 ; GFX10-DL-NEXT: s_clause 0x1
1569 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1570 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1571 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
1572 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1573 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
1574 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
1575 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
1576 ; GFX10-DL-NEXT: s_mov_b32 s2, 0xffff
1577 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1578 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
1579 ; GFX10-DL-NEXT: s_lshr_b32 s3, s0, 16
1580 ; GFX10-DL-NEXT: s_and_b32 s6, s1, s2
1581 ; GFX10-DL-NEXT: s_and_b32 s0, s0, s2
1582 ; GFX10-DL-NEXT: s_lshr_b32 s1, s1, 16
1583 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s6, s3, v0
1584 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s1, s0, v0
1585 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
1586 ; GFX10-DL-NEXT: s_endpgm
1688 ; GFX9-DL-LABEL: udot2_MultipleUses_add1:
1689 ; GFX9-DL: ; %bb.0: ; %entry
1690 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1691 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1692 ; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff
1693 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1694 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1695 ; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0
1696 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
1697 ; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0
1698 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1699 ; GFX9-DL-NEXT: s_and_b32 s6, s3, s2
1700 ; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 16
1701 ; GFX9-DL-NEXT: s_and_b32 s2, s4, s2
1702 ; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 16
1703 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3
1704 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s5
1705 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s4, v1, v2
1706 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s6
1707 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s2, v2, v1
1708 ; GFX9-DL-NEXT: v_add_u32_e32 v1, v2, v1
1709 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
1710 ; GFX9-DL-NEXT: s_endpgm
1712 ; GFX10-DL-LABEL: udot2_MultipleUses_add1:
1713 ; GFX10-DL: ; %bb.0: ; %entry
1714 ; GFX10-DL-NEXT: s_clause 0x1
1715 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1716 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1717 ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0
1718 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1719 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
1720 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
1721 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
1722 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1723 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
1724 ; GFX10-DL-NEXT: s_lshr_b32 s2, s0, 16
1725 ; GFX10-DL-NEXT: s_lshr_b32 s3, s1, 16
1726 ; GFX10-DL-NEXT: s_mov_b32 s6, 0xffff
1727 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s3, s2, v0
1728 ; GFX10-DL-NEXT: s_and_b32 s0, s0, s6
1729 ; GFX10-DL-NEXT: s_and_b32 s1, s1, s6
1730 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s1, s0, v0
1731 ; GFX10-DL-NEXT: v_add_nc_u32_e32 v0, v1, v0
1732 ; GFX10-DL-NEXT: global_store_dword v2, v0, s[4:5]
1733 ; GFX10-DL-NEXT: s_endpgm
1833 ; GFX9-DL-LABEL: idot2_MultipleUses_add1:
1834 ; GFX9-DL: ; %bb.0: ; %entry
1835 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1836 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1837 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1838 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1839 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
1840 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
1841 ; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0
1842 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1843 ; GFX9-DL-NEXT: s_sext_i32_i16 s5, s2
1844 ; GFX9-DL-NEXT: s_ashr_i32 s2, s2, 16
1845 ; GFX9-DL-NEXT: s_sext_i32_i16 s6, s3
1846 ; GFX9-DL-NEXT: s_ashr_i32 s3, s3, 16
1847 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s4
1848 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s2
1849 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, s3, v2, v1
1850 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s5
1851 ; GFX9-DL-NEXT: v_mad_i32_i24 v2, s6, v2, v1
1852 ; GFX9-DL-NEXT: v_add_u32_e32 v1, v2, v1
1853 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
1854 ; GFX9-DL-NEXT: s_endpgm
1856 ; GFX10-DL-LABEL: idot2_MultipleUses_add1:
1857 ; GFX10-DL: ; %bb.0: ; %entry
1858 ; GFX10-DL-NEXT: s_clause 0x1
1859 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1860 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1861 ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0
1862 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1863 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
1864 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
1865 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
1866 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1867 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
1868 ; GFX10-DL-NEXT: s_ashr_i32 s2, s0, 16
1869 ; GFX10-DL-NEXT: s_ashr_i32 s3, s1, 16
1870 ; GFX10-DL-NEXT: s_sext_i32_i16 s0, s0
1871 ; GFX10-DL-NEXT: s_sext_i32_i16 s1, s1
1872 ; GFX10-DL-NEXT: v_mad_i32_i24 v0, s3, s2, v0
1873 ; GFX10-DL-NEXT: v_mad_i32_i24 v1, s1, s0, v0
1874 ; GFX10-DL-NEXT: v_add_nc_u32_e32 v0, v1, v0
1875 ; GFX10-DL-NEXT: global_store_dword v2, v0, s[4:5]
1876 ; GFX10-DL-NEXT: s_endpgm
1980 ; GFX9-DL-LABEL: udot2_MultipleUses_mul1:
1981 ; GFX9-DL: ; %bb.0: ; %entry
1982 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1983 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1984 ; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff
1985 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1986 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1987 ; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0
1988 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
1989 ; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0
1990 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1991 ; GFX9-DL-NEXT: s_and_b32 s6, s3, s2
1992 ; GFX9-DL-NEXT: s_and_b32 s2, s4, s2
1993 ; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 16
1994 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s6
1995 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s5
1996 ; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 16
1997 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s2, v1, v2
1998 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s3
1999 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s4, v3, v2
2000 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s2, v1, v2
2001 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
2002 ; GFX9-DL-NEXT: s_endpgm
2004 ; GFX10-DL-LABEL: udot2_MultipleUses_mul1:
2005 ; GFX10-DL: ; %bb.0: ; %entry
2006 ; GFX10-DL-NEXT: s_clause 0x1
2007 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
2008 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
2009 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
2010 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2011 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
2012 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
2013 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
2014 ; GFX10-DL-NEXT: s_mov_b32 s2, 0xffff
2015 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2016 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
2017 ; GFX10-DL-NEXT: s_and_b32 s3, s0, s2
2018 ; GFX10-DL-NEXT: s_and_b32 s2, s1, s2
2019 ; GFX10-DL-NEXT: s_lshr_b32 s0, s0, 16
2020 ; GFX10-DL-NEXT: s_lshr_b32 s1, s1, 16
2021 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s2, s3, v0
2022 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s1, s0, v0
2023 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s2, s3, v0
2024 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
2025 ; GFX10-DL-NEXT: s_endpgm
2126 ; GFX9-DL-LABEL: idot2_MultipleUses_mul1:
2127 ; GFX9-DL: ; %bb.0: ; %entry
2128 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
2129 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
2130 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
2131 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2132 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
2133 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
2134 ; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0
2135 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2136 ; GFX9-DL-NEXT: s_sext_i32_i16 s5, s2
2137 ; GFX9-DL-NEXT: s_sext_i32_i16 s6, s3
2138 ; GFX9-DL-NEXT: s_ashr_i32 s2, s2, 16
2139 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s4
2140 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s5
2141 ; GFX9-DL-NEXT: s_ashr_i32 s3, s3, 16
2142 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, s6, v2, v1
2143 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s2
2144 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, s3, v3, v1
2145 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, s6, v2, v1
2146 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
2147 ; GFX9-DL-NEXT: s_endpgm
2149 ; GFX10-DL-LABEL: idot2_MultipleUses_mul1:
2150 ; GFX10-DL: ; %bb.0: ; %entry
2151 ; GFX10-DL-NEXT: s_clause 0x1
2152 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
2153 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
2154 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
2155 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2156 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
2157 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
2158 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
2159 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2160 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
2161 ; GFX10-DL-NEXT: s_sext_i32_i16 s2, s0
2162 ; GFX10-DL-NEXT: s_sext_i32_i16 s3, s1
2163 ; GFX10-DL-NEXT: s_ashr_i32 s0, s0, 16
2164 ; GFX10-DL-NEXT: s_ashr_i32 s1, s1, 16
2165 ; GFX10-DL-NEXT: v_mad_i32_i24 v0, s3, s2, v0
2166 ; GFX10-DL-NEXT: v_mad_i32_i24 v0, s1, s0, v0
2167 ; GFX10-DL-NEXT: v_mad_i32_i24 v0, s3, s2, v0
2168 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
2169 ; GFX10-DL-NEXT: s_endpgm
2274 ; GFX9-DL-LABEL: udot2_MultipleUses_mul2:
2275 ; GFX9-DL: ; %bb.0: ; %entry
2276 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
2277 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
2278 ; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff
2279 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
2280 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2281 ; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0
2282 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
2283 ; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0
2284 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2285 ; GFX9-DL-NEXT: s_and_b32 s6, s3, s2
2286 ; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 16
2287 ; GFX9-DL-NEXT: s_and_b32 s2, s4, s2
2288 ; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 16
2289 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3
2290 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s5
2291 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s4, v1, v2
2292 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s4, v1, v2
2293 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s6
2294 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s2, v2, v1
2295 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
2296 ; GFX9-DL-NEXT: s_endpgm
2298 ; GFX10-DL-LABEL: udot2_MultipleUses_mul2:
2299 ; GFX10-DL: ; %bb.0: ; %entry
2300 ; GFX10-DL-NEXT: s_clause 0x1
2301 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
2302 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
2303 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
2304 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2305 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
2306 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
2307 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
2308 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2309 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
2310 ; GFX10-DL-NEXT: s_lshr_b32 s2, s0, 16
2311 ; GFX10-DL-NEXT: s_lshr_b32 s3, s1, 16
2312 ; GFX10-DL-NEXT: s_mov_b32 s6, 0xffff
2313 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s3, s2, v0
2314 ; GFX10-DL-NEXT: s_and_b32 s0, s0, s6
2315 ; GFX10-DL-NEXT: s_and_b32 s1, s1, s6
2316 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s3, s2, v0
2317 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s1, s0, v0
2318 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
2319 ; GFX10-DL-NEXT: s_endpgm
2420 ; GFX9-DL-LABEL: idot2_MultipleUses_mul2:
2421 ; GFX9-DL: ; %bb.0: ; %entry
2422 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
2423 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
2424 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
2425 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2426 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
2427 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
2428 ; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0
2429 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2430 ; GFX9-DL-NEXT: s_sext_i32_i16 s5, s2
2431 ; GFX9-DL-NEXT: s_ashr_i32 s2, s2, 16
2432 ; GFX9-DL-NEXT: s_sext_i32_i16 s6, s3
2433 ; GFX9-DL-NEXT: s_ashr_i32 s3, s3, 16
2434 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s4
2435 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s2
2436 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, s3, v2, v1
2437 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, s3, v2, v1
2438 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s5
2439 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, s6, v2, v1
2440 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
2441 ; GFX9-DL-NEXT: s_endpgm
2443 ; GFX10-DL-LABEL: idot2_MultipleUses_mul2:
2444 ; GFX10-DL: ; %bb.0: ; %entry
2445 ; GFX10-DL-NEXT: s_clause 0x1
2446 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
2447 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
2448 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
2449 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2450 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
2451 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
2452 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
2453 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2454 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
2455 ; GFX10-DL-NEXT: s_ashr_i32 s2, s0, 16
2456 ; GFX10-DL-NEXT: s_ashr_i32 s3, s1, 16
2457 ; GFX10-DL-NEXT: s_sext_i32_i16 s0, s0
2458 ; GFX10-DL-NEXT: s_sext_i32_i16 s1, s1
2459 ; GFX10-DL-NEXT: v_mad_i32_i24 v0, s3, s2, v0
2460 ; GFX10-DL-NEXT: v_mad_i32_i24 v0, s3, s2, v0
2461 ; GFX10-DL-NEXT: v_mad_i32_i24 v0, s1, s0, v0
2462 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
2463 ; GFX10-DL-NEXT: s_endpgm
2564 ; GFX9-DL-LABEL: udot2_acc16:
2565 ; GFX9-DL: ; %bb.0: ; %entry
2566 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
2567 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
2568 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
2569 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2570 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
2571 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
2572 ; GFX9-DL-NEXT: global_load_ushort v1, v0, s[0:1]
2573 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2574 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
2575 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
2576 ; GFX9-DL-NEXT: v_dot2_u32_u16 v1, s2, v2, v1
2577 ; GFX9-DL-NEXT: global_store_short v0, v1, s[0:1]
2578 ; GFX9-DL-NEXT: s_endpgm
2580 ; GFX10-DL-LABEL: udot2_acc16:
2581 ; GFX10-DL: ; %bb.0: ; %entry
2582 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
2583 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
2584 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
2585 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2586 ; GFX10-DL-NEXT: global_load_ushort v1, v0, s[4:5]
2587 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
2588 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
2589 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2590 ; GFX10-DL-NEXT: v_dot2_u32_u16 v1, s0, s1, v1
2591 ; GFX10-DL-NEXT: global_store_short v0, v1, s[4:5]
2592 ; GFX10-DL-NEXT: s_endpgm
2696 ; GFX9-DL-LABEL: notsdot2_sext8:
2697 ; GFX9-DL: ; %bb.0: ; %entry
2698 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
2699 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
2700 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
2701 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2702 ; GFX9-DL-NEXT: global_load_ushort v1, v0, s[4:5]
2703 ; GFX9-DL-NEXT: global_load_ushort v2, v0, s[6:7]
2704 ; GFX9-DL-NEXT: s_load_dword s2, s[0:1], 0x0
2705 ; GFX9-DL-NEXT: s_waitcnt vmcnt(1)
2706 ; GFX9-DL-NEXT: v_bfe_i32 v3, v1, 0, 8
2707 ; GFX9-DL-NEXT: v_lshrrev_b16_e32 v1, 8, v1
2708 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
2709 ; GFX9-DL-NEXT: v_bfe_i32 v4, v2, 0, 8
2710 ; GFX9-DL-NEXT: v_lshrrev_b16_e32 v2, 8, v2
2711 ; GFX9-DL-NEXT: v_bfe_i32 v1, v1, 0, 8
2712 ; GFX9-DL-NEXT: v_bfe_i32 v2, v2, 0, 8
2713 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2714 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, v2, v1, s2
2715 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, v4, v3, v1
2716 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
2717 ; GFX9-DL-NEXT: s_endpgm
2719 ; GFX10-DL-LABEL: notsdot2_sext8:
2720 ; GFX10-DL: ; %bb.0: ; %entry
2721 ; GFX10-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
2722 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
2723 ; GFX10-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
2724 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2725 ; GFX10-DL-NEXT: s_clause 0x1
2726 ; GFX10-DL-NEXT: global_load_ushort v1, v0, s[4:5]
2727 ; GFX10-DL-NEXT: global_load_ushort v2, v0, s[6:7]
2728 ; GFX10-DL-NEXT: s_load_dword s2, s[0:1], 0x0
2729 ; GFX10-DL-NEXT: s_waitcnt vmcnt(1)
2730 ; GFX10-DL-NEXT: v_lshrrev_b16_e64 v3, 8, v1
2731 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
2732 ; GFX10-DL-NEXT: v_lshrrev_b16_e64 v4, 8, v2
2733 ; GFX10-DL-NEXT: v_bfe_i32 v1, v1, 0, 8
2734 ; GFX10-DL-NEXT: v_bfe_i32 v2, v2, 0, 8
2735 ; GFX10-DL-NEXT: v_bfe_i32 v3, v3, 0, 8
2736 ; GFX10-DL-NEXT: v_bfe_i32 v4, v4, 0, 8
2737 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2738 ; GFX10-DL-NEXT: v_mad_i32_i24 v3, v4, v3, s2
2739 ; GFX10-DL-NEXT: v_mad_i32_i24 v1, v2, v1, v3
2740 ; GFX10-DL-NEXT: global_store_dword v0, v1, s[0:1]
2741 ; GFX10-DL-NEXT: s_endpgm