1# RUN: llc -march=amdgcn -mcpu=fiji -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s 2# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s 3 4# SDWA-LABEL: {{^}}add_f16_u32_preserve 5 6# SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] 7# SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] 8 9# SDWA: v_mul_f32_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_3 10# SDWA: v_add_f16_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:WORD_1 11 12# SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[RES]] 13 14--- 15name: add_f16_u32_preserve 16tracksRegLiveness: true 17registers: 18 - { id: 0, class: vreg_64 } 19 - { id: 1, class: vreg_64 } 20 - { id: 2, class: sreg_64 } 21 - { id: 3, class: vgpr_32 } 22 - { id: 4, class: vgpr_32 } 23 - { id: 5, class: vgpr_32 } 24 - { id: 6, class: vgpr_32 } 25 - { id: 7, class: vgpr_32 } 26 - { id: 8, class: vgpr_32 } 27 - { id: 9, class: vgpr_32 } 28 - { id: 10, class: vgpr_32 } 29 - { id: 11, class: vgpr_32 } 30 - { id: 12, class: vgpr_32 } 31 - { id: 13, class: vgpr_32 } 32body: | 33 bb.0: 34 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31 35 36 %2 = COPY $sgpr30_sgpr31 37 %1 = COPY $vgpr2_vgpr3 38 %0 = COPY $vgpr0_vgpr1 39 %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) 40 %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) 41 42 %5 = V_AND_B32_e32 65535, %3, implicit $exec 43 %6 = V_LSHRREV_B32_e64 16, %4, implicit $exec 44 %7 = V_BFE_U32 %3, 8, 8, implicit $exec 45 %8 = V_LSHRREV_B32_e32 24, %4, implicit $exec 46 47 %9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $exec 48 %10 = V_LSHLREV_B16_e64 8, %9, implicit $exec 49 %11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $exec 50 %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec 51 52 %13 = V_OR_B32_e64 %10, %12, implicit $exec 53 54 FLAT_STORE_DWORD %0, %13, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) 55 $sgpr30_sgpr31 = COPY %2 56 S_SETPC_B64_return $sgpr30_sgpr31 57 58--- 59# SDWA-LABEL: sdwa_preserve_keep 60# SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] 61# SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] 62 63# SDWA: v_and_b32_e32 [[AND:v[0-9]+]], 0xff, [[FIRST]] 64# SDWA: v_mov_b32_sdwa [[AND]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 65 66# SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[AND]] 67 68name: sdwa_preserve_keep 69tracksRegLiveness: true 70registers: 71 - { id: 0, class: vreg_64 } 72 - { id: 1, class: vreg_64 } 73 - { id: 2, class: sreg_64 } 74 - { id: 3, class: vgpr_32 } 75 - { id: 4, class: vgpr_32 } 76 - { id: 5, class: sreg_32_xm0_xexec } 77 - { id: 6, class: vgpr_32 } 78 - { id: 7, class: vgpr_32 } 79 - { id: 8, class: sreg_32_xm0 } 80 - { id: 9, class: vgpr_32 } 81 - { id: 10, class: sreg_32_xm0 } 82 - { id: 11, class: vgpr_32 } 83 - { id: 17, class: vgpr_32 } 84body: | 85 bb.0: 86 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31 87 88 %2 = COPY $sgpr30_sgpr31 89 %1 = COPY $vgpr2_vgpr3 90 %0 = COPY $vgpr0_vgpr1 91 %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) 92 %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) 93 94 %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec 95 %10:sreg_32_xm0 = S_MOV_B32 255 96 %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec 97 %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0) 98 FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) 99 S_ENDPGM 100 101... 102--- 103# SDWA-LABEL: sdwa_preserve_remove 104# SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] 105# SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] 106 107# SDWA: v_mov_b32_sdwa [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 108 109# SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[FIRST]] 110 111name: sdwa_preserve_remove 112tracksRegLiveness: true 113registers: 114 - { id: 0, class: vreg_64 } 115 - { id: 1, class: vreg_64 } 116 - { id: 2, class: sreg_64 } 117 - { id: 3, class: vgpr_32 } 118 - { id: 4, class: vgpr_32 } 119 - { id: 5, class: sreg_32_xm0_xexec } 120 - { id: 6, class: vgpr_32 } 121 - { id: 7, class: vgpr_32 } 122 - { id: 8, class: sreg_32_xm0 } 123 - { id: 9, class: vgpr_32 } 124 - { id: 10, class: sreg_32_xm0 } 125 - { id: 11, class: vgpr_32 } 126 - { id: 17, class: vgpr_32 } 127body: | 128 bb.0: 129 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31 130 131 %2 = COPY $sgpr30_sgpr31 132 %1 = COPY $vgpr2_vgpr3 133 %0 = COPY $vgpr0_vgpr1 134 %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) 135 %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) 136 137 %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec 138 %10:sreg_32_xm0 = S_MOV_B32 65535 139 %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec 140 %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0) 141 FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) 142 S_ENDPGM 143 144... 145