• Home
  • Raw
  • Download

Lines Matching refs:DL

5 … llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-DL %s
6 …lc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL %s
7 …lc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-DL %s
105 ; GFX9-DL-LABEL: udot4_acc32:
106 ; GFX9-DL: ; %bb.0: ; %entry
107 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
108 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
109 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
110 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
111 ; GFX9-DL-NEXT: s_load_dword s2, s[6:7], 0x0
112 ; GFX9-DL-NEXT: s_load_dword s3, s[0:1], 0x0
113 ; GFX9-DL-NEXT: s_load_dword s4, s[4:5], 0x0
114 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
115 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s2
116 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
117 ; GFX9-DL-NEXT: v_dot4_u32_u8 v1, s4, v1, v2
118 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
119 ; GFX9-DL-NEXT: s_endpgm
121 ; GFX10-DL-LABEL: udot4_acc32:
122 ; GFX10-DL: ; %bb.0: ; %entry
123 ; GFX10-DL-NEXT: s_clause 0x1
124 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
125 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
126 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
127 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
128 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
129 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
130 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
131 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
132 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
133 ; GFX10-DL-NEXT: v_dot4_u32_u8 v0, s0, s1, v0
134 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
135 ; GFX10-DL-NEXT: s_endpgm
272 ; GFX9-DL-LABEL: udot4_acc16:
273 ; GFX9-DL: ; %bb.0: ; %entry
274 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
275 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
276 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
277 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
278 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
279 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
280 ; GFX9-DL-NEXT: global_load_ushort v1, v0, s[0:1]
281 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
282 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
283 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
284 ; GFX9-DL-NEXT: v_dot4_u32_u8 v1, s2, v2, v1
285 ; GFX9-DL-NEXT: global_store_short v0, v1, s[0:1]
286 ; GFX9-DL-NEXT: s_endpgm
288 ; GFX10-DL-LABEL: udot4_acc16:
289 ; GFX10-DL: ; %bb.0: ; %entry
290 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
291 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
292 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
293 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
294 ; GFX10-DL-NEXT: global_load_ushort v1, v0, s[4:5]
295 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
296 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
297 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
298 ; GFX10-DL-NEXT: v_dot4_u32_u8 v1, s0, s1, v1
299 ; GFX10-DL-NEXT: global_store_short v0, v1, s[4:5]
300 ; GFX10-DL-NEXT: s_endpgm
437 ; GFX9-DL-LABEL: udot4_acc8:
438 ; GFX9-DL: ; %bb.0: ; %entry
439 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
440 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
441 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
442 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
443 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
444 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
445 ; GFX9-DL-NEXT: global_load_ubyte v1, v0, s[0:1]
446 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
447 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
448 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
449 ; GFX9-DL-NEXT: v_dot4_u32_u8 v1, s2, v2, v1
450 ; GFX9-DL-NEXT: global_store_byte v0, v1, s[0:1]
451 ; GFX9-DL-NEXT: s_endpgm
453 ; GFX10-DL-LABEL: udot4_acc8:
454 ; GFX10-DL: ; %bb.0: ; %entry
455 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
456 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
457 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
458 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
459 ; GFX10-DL-NEXT: global_load_ubyte v1, v0, s[4:5]
460 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
461 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
462 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
463 ; GFX10-DL-NEXT: v_dot4_u32_u8 v1, s0, s1, v1
464 ; GFX10-DL-NEXT: global_store_byte v0, v1, s[4:5]
465 ; GFX10-DL-NEXT: s_endpgm
571 ; GFX9-DL-LABEL: udot2_8:
572 ; GFX9-DL: ; %bb.0: ; %entry
573 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
574 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
575 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
576 ; GFX9-DL-NEXT: s_movk_i32 s2, 0xff
577 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
578 ; GFX9-DL-NEXT: global_load_ubyte v1, v0, s[0:1]
579 ; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0
580 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
581 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
582 ; GFX9-DL-NEXT: s_and_b32 s5, s4, s2
583 ; GFX9-DL-NEXT: s_and_b32 s2, s3, s2
584 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s5
585 ; GFX9-DL-NEXT: s_bfe_u32 s4, s4, 0x80008
586 ; GFX9-DL-NEXT: s_bfe_u32 s3, s3, 0x80008
587 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
588 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s2, v2, v1
589 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s4
590 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s3, v2, v1
591 ; GFX9-DL-NEXT: global_store_byte v0, v1, s[0:1]
592 ; GFX9-DL-NEXT: s_endpgm
594 ; GFX10-DL-LABEL: udot2_8:
595 ; GFX10-DL: ; %bb.0: ; %entry
596 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
597 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
598 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
599 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
600 ; GFX10-DL-NEXT: global_load_ubyte v1, v0, s[4:5]
601 ; GFX10-DL-NEXT: s_load_dword s2, s[2:3], 0x0
602 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
603 ; GFX10-DL-NEXT: s_movk_i32 s1, 0xff
604 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
605 ; GFX10-DL-NEXT: s_and_b32 s3, s2, s1
606 ; GFX10-DL-NEXT: s_and_b32 s1, s0, s1
607 ; GFX10-DL-NEXT: s_bfe_u32 s0, s0, 0x80008
608 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
609 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s1, s3, v1
610 ; GFX10-DL-NEXT: s_bfe_u32 s1, s2, 0x80008
611 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s0, s1, v1
612 ; GFX10-DL-NEXT: global_store_byte v0, v1, s[4:5]
613 ; GFX10-DL-NEXT: s_endpgm
731 ; GFX9-DL-LABEL: udot4_CommutationInsideMAD:
732 ; GFX9-DL: ; %bb.0: ; %entry
733 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
734 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
735 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
736 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
737 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
738 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
739 ; GFX9-DL-NEXT: global_load_ubyte v1, v0, s[0:1]
740 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
741 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s2
742 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
743 ; GFX9-DL-NEXT: v_dot4_u32_u8 v1, s3, v2, v1
744 ; GFX9-DL-NEXT: global_store_byte v0, v1, s[0:1]
745 ; GFX9-DL-NEXT: s_endpgm
747 ; GFX10-DL-LABEL: udot4_CommutationInsideMAD:
748 ; GFX10-DL: ; %bb.0: ; %entry
749 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
750 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
751 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
752 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
753 ; GFX10-DL-NEXT: global_load_ubyte v1, v0, s[4:5]
754 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
755 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
756 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
757 ; GFX10-DL-NEXT: v_dot4_u32_u8 v1, s1, s0, v1
758 ; GFX10-DL-NEXT: global_store_byte v0, v1, s[4:5]
759 ; GFX10-DL-NEXT: s_endpgm
889 ; GFX9-DL-LABEL: udot4_CommutationAccrossMADs:
890 ; GFX9-DL: ; %bb.0: ; %entry
891 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
892 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
893 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
894 ; GFX9-DL-NEXT: s_movk_i32 s2, 0xff
895 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
896 ; GFX9-DL-NEXT: global_load_ubyte v1, v0, s[0:1]
897 ; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0
898 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
899 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
900 ; GFX9-DL-NEXT: s_bfe_u32 s6, s3, 0x80008
901 ; GFX9-DL-NEXT: s_and_b32 s5, s3, s2
902 ; GFX9-DL-NEXT: s_bfe_u32 s7, s4, 0x80008
903 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s6
904 ; GFX9-DL-NEXT: s_bfe_u32 s8, s3, 0x80010
905 ; GFX9-DL-NEXT: s_and_b32 s2, s4, s2
906 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s5
907 ; GFX9-DL-NEXT: s_bfe_u32 s9, s4, 0x80010
908 ; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 24
909 ; GFX9-DL-NEXT: v_mov_b32_e32 v4, s8
910 ; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 24
911 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
912 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s7, v2, v1
913 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s2, v3, v1
914 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s9, v4, v1
915 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
916 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s4, v2, v1
917 ; GFX9-DL-NEXT: global_store_byte v0, v1, s[0:1]
918 ; GFX9-DL-NEXT: s_endpgm
920 ; GFX10-DL-LABEL: udot4_CommutationAccrossMADs:
921 ; GFX10-DL: ; %bb.0: ; %entry
922 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
923 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
924 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
925 ; GFX10-DL-NEXT: s_movk_i32 s6, 0xff
926 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
927 ; GFX10-DL-NEXT: global_load_ubyte v1, v0, s[4:5]
928 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
929 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
930 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
931 ; GFX10-DL-NEXT: s_bfe_u32 s2, s0, 0x80008
932 ; GFX10-DL-NEXT: s_bfe_u32 s3, s1, 0x80008
933 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
934 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s3, s2, v1
935 ; GFX10-DL-NEXT: s_and_b32 s2, s0, s6
936 ; GFX10-DL-NEXT: s_and_b32 s3, s1, s6
937 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s3, s2, v1
938 ; GFX10-DL-NEXT: s_bfe_u32 s2, s0, 0x80010
939 ; GFX10-DL-NEXT: s_bfe_u32 s3, s1, 0x80010
940 ; GFX10-DL-NEXT: s_lshr_b32 s0, s0, 24
941 ; GFX10-DL-NEXT: s_lshr_b32 s1, s1, 24
942 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s3, s2, v1
943 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s1, s0, v1
944 ; GFX10-DL-NEXT: global_store_byte v0, v1, s[4:5]
945 ; GFX10-DL-NEXT: s_endpgm
1077 ; GFX9-DL-LABEL: udot4_multiuse_mul1:
1078 ; GFX9-DL: ; %bb.0: ; %entry
1079 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1080 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1081 ; GFX9-DL-NEXT: s_movk_i32 s2, 0xff
1082 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1083 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1084 ; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0
1085 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
1086 ; GFX9-DL-NEXT: s_load_dword s10, s[0:1], 0x0
1087 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1088 ; GFX9-DL-NEXT: s_and_b32 s5, s3, s2
1089 ; GFX9-DL-NEXT: s_and_b32 s2, s4, s2
1090 ; GFX9-DL-NEXT: s_bfe_u32 s7, s4, 0x80008
1091 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s2
1092 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s10
1093 ; GFX9-DL-NEXT: s_bfe_u32 s6, s3, 0x80008
1094 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s5, v1, v2
1095 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s7
1096 ; GFX9-DL-NEXT: s_bfe_u32 s9, s4, 0x80010
1097 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s6, v3, v2
1098 ; GFX9-DL-NEXT: s_bfe_u32 s8, s3, 0x80010
1099 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s5, v1, v2
1100 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s9
1101 ; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 24
1102 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s8, v2, v1
1103 ; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 24
1104 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s4
1105 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s3, v2, v1
1106 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
1107 ; GFX9-DL-NEXT: s_endpgm
1109 ; GFX10-DL-LABEL: udot4_multiuse_mul1:
1110 ; GFX10-DL: ; %bb.0: ; %entry
1111 ; GFX10-DL-NEXT: s_clause 0x1
1112 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1113 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1114 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
1115 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1116 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
1117 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
1118 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
1119 ; GFX10-DL-NEXT: s_movk_i32 s2, 0xff
1120 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1121 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
1122 ; GFX10-DL-NEXT: s_and_b32 s3, s0, s2
1123 ; GFX10-DL-NEXT: s_and_b32 s2, s1, s2
1124 ; GFX10-DL-NEXT: s_bfe_u32 s6, s0, 0x80008
1125 ; GFX10-DL-NEXT: s_bfe_u32 s7, s1, 0x80008
1126 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s3, s2, v0
1127 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s6, s7, v0
1128 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s3, s2, v0
1129 ; GFX10-DL-NEXT: s_bfe_u32 s2, s0, 0x80010
1130 ; GFX10-DL-NEXT: s_bfe_u32 s3, s1, 0x80010
1131 ; GFX10-DL-NEXT: s_lshr_b32 s0, s0, 24
1132 ; GFX10-DL-NEXT: s_lshr_b32 s1, s1, 24
1133 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s2, s3, v0
1134 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s0, s1, v0
1135 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[4:5]
1136 ; GFX10-DL-NEXT: s_endpgm
1280 ; GFX9-DL-LABEL: udot4_multiuse_add1:
1281 ; GFX9-DL: ; %bb.0: ; %entry
1282 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1283 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1284 ; GFX9-DL-NEXT: s_movk_i32 s2, 0xff
1285 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1286 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1287 ; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0
1288 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
1289 ; GFX9-DL-NEXT: s_load_dword s10, s[0:1], 0x0
1290 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1291 ; GFX9-DL-NEXT: s_and_b32 s5, s3, s2
1292 ; GFX9-DL-NEXT: s_bfe_u32 s7, s4, 0x80008
1293 ; GFX9-DL-NEXT: s_and_b32 s2, s4, s2
1294 ; GFX9-DL-NEXT: s_bfe_u32 s6, s3, 0x80008
1295 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s7
1296 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s10
1297 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s6, v1, v2
1298 ; GFX9-DL-NEXT: s_bfe_u32 s9, s4, 0x80010
1299 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s2
1300 ; GFX9-DL-NEXT: s_bfe_u32 s8, s3, 0x80010
1301 ; GFX9-DL-NEXT: v_add_u32_e32 v2, s10, v1
1302 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s5, v3, v1
1303 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s9
1304 ; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 24
1305 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s8, v3, v1
1306 ; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 24
1307 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4
1308 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s3, v3, v1
1309 ; GFX9-DL-NEXT: v_add_u32_e32 v1, v1, v2
1310 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
1311 ; GFX9-DL-NEXT: s_endpgm
1313 ; GFX10-DL-LABEL: udot4_multiuse_add1:
1314 ; GFX10-DL: ; %bb.0: ; %entry
1315 ; GFX10-DL-NEXT: s_clause 0x1
1316 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1317 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1318 ; GFX10-DL-NEXT: s_movk_i32 s7, 0xff
1319 ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0
1320 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1321 ; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0
1322 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
1323 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
1324 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1325 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6
1326 ; GFX10-DL-NEXT: s_bfe_u32 s2, s0, 0x80008
1327 ; GFX10-DL-NEXT: s_bfe_u32 s3, s1, 0x80008
1328 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s2, s3, v0
1329 ; GFX10-DL-NEXT: s_and_b32 s2, s0, s7
1330 ; GFX10-DL-NEXT: s_and_b32 s3, s1, s7
1331 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s2, s3, v0
1332 ; GFX10-DL-NEXT: s_bfe_u32 s2, s0, 0x80010
1333 ; GFX10-DL-NEXT: s_bfe_u32 s3, s1, 0x80010
1334 ; GFX10-DL-NEXT: s_lshr_b32 s0, s0, 24
1335 ; GFX10-DL-NEXT: s_lshr_b32 s1, s1, 24
1336 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s2, s3, v1
1337 ; GFX10-DL-NEXT: v_add_nc_u32_e32 v0, s6, v0
1338 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s0, s1, v1
1339 ; GFX10-DL-NEXT: v_add_nc_u32_e32 v0, v1, v0
1340 ; GFX10-DL-NEXT: global_store_dword v2, v0, s[4:5]
1341 ; GFX10-DL-NEXT: s_endpgm
1479 ; GFX9-DL-LABEL: notdot4_mixedtypes:
1480 ; GFX9-DL: ; %bb.0: ; %entry
1481 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1482 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1483 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1484 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1485 ; GFX9-DL-NEXT: global_load_ushort v1, v0, s[0:1]
1486 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
1487 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
1488 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1489 ; GFX9-DL-NEXT: s_bfe_u32 s6, s2, 0x80008
1490 ; GFX9-DL-NEXT: s_bfe_u32 s7, s3, 0x80008
1491 ; GFX9-DL-NEXT: s_sext_i32_i8 s5, s3
1492 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s7
1493 ; GFX9-DL-NEXT: s_bfe_u32 s9, s3, 0x80010
1494 ; GFX9-DL-NEXT: s_sext_i32_i8 s4, s2
1495 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s5
1496 ; GFX9-DL-NEXT: s_bfe_u32 s8, s2, 0x80010
1497 ; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 24
1498 ; GFX9-DL-NEXT: v_mov_b32_e32 v4, s9
1499 ; GFX9-DL-NEXT: s_lshr_b32 s2, s2, 24
1500 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
1501 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s6, v2, v1
1502 ; GFX9-DL-NEXT: v_mad_i32_i24 v1, s4, v3, v1
1503 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s8, v4, v1
1504 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
1505 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s2, v2, v1
1506 ; GFX9-DL-NEXT: global_store_short v0, v1, s[0:1]
1507 ; GFX9-DL-NEXT: s_endpgm
1509 ; GFX10-DL-LABEL: notdot4_mixedtypes:
1510 ; GFX10-DL: ; %bb.0: ; %entry
1511 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1512 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
1513 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1514 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1515 ; GFX10-DL-NEXT: global_load_ushort v1, v0, s[4:5]
1516 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
1517 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
1518 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1519 ; GFX10-DL-NEXT: s_bfe_u32 s2, s0, 0x80008
1520 ; GFX10-DL-NEXT: s_bfe_u32 s3, s1, 0x80008
1521 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
1522 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s2, s3, v1
1523 ; GFX10-DL-NEXT: s_sext_i32_i8 s2, s0
1524 ; GFX10-DL-NEXT: s_sext_i32_i8 s3, s1
1525 ; GFX10-DL-NEXT: v_mad_i32_i24 v1, s2, s3, v1
1526 ; GFX10-DL-NEXT: s_bfe_u32 s2, s0, 0x80010
1527 ; GFX10-DL-NEXT: s_bfe_u32 s3, s1, 0x80010
1528 ; GFX10-DL-NEXT: s_lshr_b32 s0, s0, 24
1529 ; GFX10-DL-NEXT: s_lshr_b32 s1, s1, 24
1530 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s2, s3, v1
1531 ; GFX10-DL-NEXT: v_mad_u32_u24 v1, s0, s1, v1
1532 ; GFX10-DL-NEXT: global_store_short v0, v1, s[4:5]
1533 ; GFX10-DL-NEXT: s_endpgm
1672 ; GFX9-DL-LABEL: udot4_acc32_vecMul:
1673 ; GFX9-DL: ; %bb.0: ; %entry
1674 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1675 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1676 ; GFX9-DL-NEXT: s_movk_i32 s2, 0xff
1677 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1678 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1679 ; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0
1680 ; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0
1681 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1682 ; GFX9-DL-NEXT: s_lshr_b32 s5, s3, 24
1683 ; GFX9-DL-NEXT: s_lshr_b32 s6, s4, 24
1684 ; GFX9-DL-NEXT: s_bfe_u32 s7, s3, 0x80010
1685 ; GFX9-DL-NEXT: v_lshrrev_b16_e64 v1, 8, s3
1686 ; GFX9-DL-NEXT: s_and_b32 s3, s3, s2
1687 ; GFX9-DL-NEXT: s_and_b32 s2, s4, s2
1688 ; GFX9-DL-NEXT: s_bfe_u32 s8, s4, 0x80010
1689 ; GFX9-DL-NEXT: v_lshrrev_b16_e64 v2, 8, s4
1690 ; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0
1691 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s2
1692 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1693 ; GFX9-DL-NEXT: v_mov_b32_e32 v4, s4
1694 ; GFX9-DL-NEXT: v_mad_u32_u24 v3, s3, v3, v4
1695 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, v1, v2, v3
1696 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s8
1697 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s7, v2, v1
1698 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s6
1699 ; GFX9-DL-NEXT: v_mad_u32_u24 v1, s5, v2, v1
1700 ; GFX9-DL-NEXT: global_store_dword v0, v1, s[0:1]
1701 ; GFX9-DL-NEXT: s_endpgm
1703 ; GFX10-DL-LABEL: udot4_acc32_vecMul:
1704 ; GFX10-DL: ; %bb.0: ; %entry
1705 ; GFX10-DL-NEXT: s_clause 0x1
1706 ; GFX10-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1707 ; GFX10-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1708 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1709 ; GFX10-DL-NEXT: s_load_dword s2, s[4:5], 0x0
1710 ; GFX10-DL-NEXT: s_load_dword s3, s[6:7], 0x0
1711 ; GFX10-DL-NEXT: s_load_dword s4, s[0:1], 0x0
1712 ; GFX10-DL-NEXT: s_movk_i32 s6, 0xff
1713 ; GFX10-DL-NEXT: s_mov_b32 s5, 0xffff
1714 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1715 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, s2
1716 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, s3
1717 ; GFX10-DL-NEXT: v_mov_b32_e32 v2, s4
1718 ; GFX10-DL-NEXT: s_and_b32 s4, s2, s6
1719 ; GFX10-DL-NEXT: s_and_b32 s6, s3, s6
1720 ; GFX10-DL-NEXT: v_and_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD sr…
1721 ; GFX10-DL-NEXT: v_and_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD sr…
1722 ; GFX10-DL-NEXT: v_mad_u32_u24 v2, s4, s6, v2
1723 ; GFX10-DL-NEXT: s_bfe_u32 s4, s2, 0x80010
1724 ; GFX10-DL-NEXT: s_bfe_u32 s5, s3, 0x80010
1725 ; GFX10-DL-NEXT: s_lshr_b32 s2, s2, 24
1726 ; GFX10-DL-NEXT: s_lshr_b32 s3, s3, 24
1727 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, v0, v1, v2
1728 ; GFX10-DL-NEXT: v_mov_b32_e32 v1, 0
1729 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s4, s5, v0
1730 ; GFX10-DL-NEXT: v_mad_u32_u24 v0, s2, s3, v0
1731 ; GFX10-DL-NEXT: global_store_dword v1, v0, s[0:1]
1732 ; GFX10-DL-NEXT: s_endpgm
1858 ; GFX9-DL-LABEL: udot4_acc16_vecMul:
1859 ; GFX9-DL: ; %bb.0: ; %entry
1860 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1861 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
1862 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, 0xffff
1863 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
1864 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1865 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
1866 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
1867 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
1868 ; GFX9-DL-NEXT: s_lshr_b32 s5, s2, 16
1869 ; GFX9-DL-NEXT: s_lshr_b32 s7, s3, 16
1870 ; GFX9-DL-NEXT: s_lshr_b32 s4, s2, 24
1871 ; GFX9-DL-NEXT: v_and_b32_sdwa v5, v3, s5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src…
1872 ; GFX9-DL-NEXT: s_lshr_b32 s6, s3, 24
1873 ; GFX9-DL-NEXT: v_and_b32_sdwa v4, v3, s7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src…
1874 ; GFX9-DL-NEXT: v_lshl_or_b32 v4, s6, 16, v4
1875 ; GFX9-DL-NEXT: v_lshl_or_b32 v5, s4, 16, v5
1876 ; GFX9-DL-NEXT: v_pk_mul_lo_u16 v4, v5, v4
1877 ; GFX9-DL-NEXT: v_and_b32_sdwa v5, v3, s3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src…
1878 ; GFX9-DL-NEXT: v_lshrrev_b16_e64 v2, 8, s3
1879 ; GFX9-DL-NEXT: v_lshrrev_b16_e64 v1, 8, s2
1880 ; GFX9-DL-NEXT: v_and_b32_sdwa v3, v3, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src…
1881 ; GFX9-DL-NEXT: v_lshl_or_b32 v2, v2, 16, v5
1882 ; GFX9-DL-NEXT: v_lshl_or_b32 v1, v1, 16, v3
1883 ; GFX9-DL-NEXT: v_pk_mul_lo_u16 v1, v1, v2
1884 ; GFX9-DL-NEXT: global_load_ushort v2, v0, s[0:1]
1885 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
1886 ; GFX9-DL-NEXT: v_add_u32_e32 v2, v1, v2
1887 ; GFX9-DL-NEXT: v_add_u32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src…
1888 ; GFX9-DL-NEXT: v_add_u32_e32 v1, v1, v4
1889 ; GFX9-DL-NEXT: v_add_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src…
1890 ; GFX9-DL-NEXT: global_store_short v0, v1, s[0:1]
1891 ; GFX9-DL-NEXT: s_endpgm
1893 ; GFX10-DL-LABEL: udot4_acc16_vecMul:
1894 ; GFX10-DL: ; %bb.0: ; %entry
1895 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
1896 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
1897 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1898 ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0xffff
1899 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1900 ; GFX10-DL-NEXT: global_load_ushort v1, v0, s[4:5]
1901 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
1902 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
1903 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
1904 ; GFX10-DL-NEXT: v_lshrrev_b16_e64 v3, 8, s0
1905 ; GFX10-DL-NEXT: v_and_b32_sdwa v6, v2, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD sr…
1906 ; GFX10-DL-NEXT: v_lshrrev_b16_e64 v4, 8, s1
1907 ; GFX10-DL-NEXT: v_and_b32_sdwa v5, v2, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD sr…
1908 ; GFX10-DL-NEXT: s_lshr_b32 s2, s1, 16
1909 ; GFX10-DL-NEXT: s_lshr_b32 s3, s0, 16
1910 ; GFX10-DL-NEXT: v_lshl_or_b32 v3, v3, 16, v6
1911 ; GFX10-DL-NEXT: s_lshr_b32 s1, s1, 24
1912 ; GFX10-DL-NEXT: v_lshl_or_b32 v4, v4, 16, v5
1913 ; GFX10-DL-NEXT: v_and_b32_sdwa v5, v2, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD sr…
1914 ; GFX10-DL-NEXT: v_and_b32_sdwa v2, v2, s3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD sr…
1915 ; GFX10-DL-NEXT: s_lshr_b32 s0, s0, 24
1916 ; GFX10-DL-NEXT: v_pk_mul_lo_u16 v3, v3, v4
1917 ; GFX10-DL-NEXT: v_lshl_or_b32 v4, s1, 16, v5
1918 ; GFX10-DL-NEXT: v_lshl_or_b32 v2, s0, 16, v2
1919 ; GFX10-DL-NEXT: v_pk_mul_lo_u16 v2, v2, v4
1920 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
1921 ; GFX10-DL-NEXT: v_add_nc_u32_e32 v1, v3, v1
1922 ; GFX10-DL-NEXT: v_add_nc_u32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD…
1923 ; GFX10-DL-NEXT: v_add_nc_u32_e32 v1, v1, v2
1924 ; GFX10-DL-NEXT: v_add_nc_u32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD…
1925 ; GFX10-DL-NEXT: global_store_short v0, v1, s[4:5]
1926 ; GFX10-DL-NEXT: s_endpgm
2074 ; GFX9-DL-LABEL: udot4_acc8_vecMul:
2075 ; GFX9-DL: ; %bb.0: ; %entry
2076 ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
2077 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
2078 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0
2079 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2080 ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0
2081 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0
2082 ; GFX9-DL-NEXT: global_load_ubyte v4, v0, s[0:1]
2083 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0)
2084 ; GFX9-DL-NEXT: s_lshr_b32 s4, s2, 16
2085 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3
2086 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3
2087 ; GFX9-DL-NEXT: s_lshr_b32 s6, s3, 16
2088 ; GFX9-DL-NEXT: s_lshr_b32 s7, s3, 24
2089 ; GFX9-DL-NEXT: v_mul_lo_u16_e32 v1, s2, v1
2090 ; GFX9-DL-NEXT: v_mul_lo_u16_sdwa v2, s2, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_…
2091 ; GFX9-DL-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src…
2092 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s6
2093 ; GFX9-DL-NEXT: s_lshr_b32 s5, s2, 24
2094 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s7
2095 ; GFX9-DL-NEXT: v_mul_lo_u16_sdwa v2, s5, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD…
2096 ; GFX9-DL-NEXT: v_mul_lo_u16_e32 v3, s4, v3
2097 ; GFX9-DL-NEXT: v_and_b32_e32 v1, 0xffff, v1
2098 ; GFX9-DL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 sr…
2099 ; GFX9-DL-NEXT: v_or_b32_e32 v2, v1, v2
2100 ; GFX9-DL-NEXT: v_lshrrev_b32_e32 v3, 8, v2
2101 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
2102 ; GFX9-DL-NEXT: v_add_u32_e32 v1, v1, v4
2103 ; GFX9-DL-NEXT: v_add_u32_e32 v1, v1, v3
2104 ; GFX9-DL-NEXT: v_add_u32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src…
2105 ; GFX9-DL-NEXT: v_add_u32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src…
2106 ; GFX9-DL-NEXT: global_store_byte v0, v1, s[0:1]
2107 ; GFX9-DL-NEXT: s_endpgm
2109 ; GFX10-DL-LABEL: udot4_acc8_vecMul:
2110 ; GFX10-DL: ; %bb.0: ; %entry
2111 ; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
2112 ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
2113 ; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
2114 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2115 ; GFX10-DL-NEXT: global_load_ubyte v1, v0, s[4:5]
2116 ; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0
2117 ; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0
2118 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
2119 ; GFX10-DL-NEXT: v_lshrrev_b16_e64 v2, 8, s0
2120 ; GFX10-DL-NEXT: v_lshrrev_b16_e64 v3, 8, s1
2121 ; GFX10-DL-NEXT: s_lshr_b32 s2, s0, 24
2122 ; GFX10-DL-NEXT: s_lshr_b32 s3, s1, 24
2123 ; GFX10-DL-NEXT: v_mul_lo_u16_e64 v4, s2, s3
2124 ; GFX10-DL-NEXT: v_mul_lo_u16_e64 v2, v2, v3
2125 ; GFX10-DL-NEXT: v_mul_lo_u16_e64 v3, s0, s1
2126 ; GFX10-DL-NEXT: s_lshr_b32 s0, s0, 16
2127 ; GFX10-DL-NEXT: s_lshr_b32 s1, s1, 16
2128 ; GFX10-DL-NEXT: v_lshlrev_b16_e64 v2, 8, v2
2129 ; GFX10-DL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 sr…
2130 ; GFX10-DL-NEXT: v_lshlrev_b16_e64 v3, 8, v4
2131 ; GFX10-DL-NEXT: v_mul_lo_u16_e64 v4, s0, s1
2132 ; GFX10-DL-NEXT: v_and_b32_e32 v2, 0xffff, v2
2133 ; GFX10-DL-NEXT: v_or_b32_sdwa v3, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 s…
2134 ; GFX10-DL-NEXT: v_or_b32_e32 v3, v2, v3
2135 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v4, 8, v3
2136 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
2137 ; GFX10-DL-NEXT: v_add_nc_u32_e32 v1, v2, v1
2138 ; GFX10-DL-NEXT: v_add_nc_u32_e32 v1, v1, v4
2139 ; GFX10-DL-NEXT: v_add_nc_u32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD…
2140 ; GFX10-DL-NEXT: v_add_nc_u32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD…
2141 ; GFX10-DL-NEXT: global_store_byte v0, v1, s[4:5]
2142 ; GFX10-DL-NEXT: s_endpgm