1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s 3 4# Kernels can have no FP 5--- 6name: kernel_no_fold_fi_non_stack_rsrc_and_soffset 7tracksRegLiveness: true 8frameInfo: 9 maxAlignment: 4 10 localFrameSize: 4 11stack: 12 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 13machineFunctionInfo: 14 isEntryFunction: true 15 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' 16 stackPtrOffsetReg: '$sgpr32' 17body: | 18 bb.0: 19 liveins: $sgpr12_sgpr13_sgpr14_sgpr15 20 21 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc_and_soffset 22 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15 23 ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 24 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 25 ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, 0, 0, implicit $exec 26 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]] 27 ; GCN: SI_RETURN_TO_EPILOG $vgpr0 28 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 29 %1:sreg_32_xm0 = S_MOV_B32 0 30 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 31 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, 0, 0, implicit $exec 32 $vgpr0 = COPY %3 33 SI_RETURN_TO_EPILOG $vgpr0 34 35... 36 37--- 38name: kernel_no_fold_fi_non_stack_rsrc 39tracksRegLiveness: true 40frameInfo: 41 maxAlignment: 4 42 localFrameSize: 4 43stack: 44 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 45machineFunctionInfo: 46 isEntryFunction: true 47 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' 48 stackPtrOffsetReg: '$sgpr32' 49body: | 50 bb.0: 51 liveins: $sgpr12_sgpr13_sgpr14_sgpr15 52 53 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc 54 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15 55 ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 56 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 57 ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 58 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]] 59 ; GCN: SI_RETURN_TO_EPILOG $vgpr0 60 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 61 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 62 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 63 $vgpr0 = COPY %3 64 SI_RETURN_TO_EPILOG $vgpr0 65 66... 67 68--- 69name: kernel_no_fold_fi_non_stack_soffset 70tracksRegLiveness: true 71frameInfo: 72 maxAlignment: 4 73 localFrameSize: 4 74stack: 75 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 76machineFunctionInfo: 77 isEntryFunction: true 78 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 79 stackPtrOffsetReg: '$sgpr32' 80body: | 81 bb.0: 82 83 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_soffset 84 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 85 ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 86 ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec 87 ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec 88 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 89 ; GCN: S_ENDPGM 0, implicit $vgpr0 90 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 91 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 92 %2:sreg_32_xm0 = S_MOV_B32 0 93 94 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, 0, 0, implicit $exec 95 %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, 0, 0, implicit $exec 96 $vgpr0 = COPY %3 97 S_ENDPGM 0, implicit $vgpr0 98 99... 100 101--- 102name: kernel_fold_fi_mubuf 103tracksRegLiveness: true 104frameInfo: 105 maxAlignment: 4 106 localFrameSize: 4 107stack: 108 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 109machineFunctionInfo: 110 isEntryFunction: true 111 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 112 stackPtrOffsetReg: '$sgpr32' 113body: | 114 bb.0: 115 116 ; GCN-LABEL: name: kernel_fold_fi_mubuf 117 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 118 ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 119 ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 120 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 121 ; GCN: S_ENDPGM 0, implicit $vgpr0 122 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 123 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 124 125 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 126 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 127 $vgpr0 = COPY %2 128 S_ENDPGM 0, implicit $vgpr0 129 130... 131 132 133# Functions have an unswizzled SP/FP relative to the wave offset 134--- 135name: function_no_fold_fi_non_stack_rsrc_and_soffset 136tracksRegLiveness: true 137frameInfo: 138 maxAlignment: 4 139 localFrameSize: 4 140stack: 141 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 142machineFunctionInfo: 143 isEntryFunction: false 144 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' 145 frameOffsetReg: '$sgpr32' 146 stackPtrOffsetReg: '$sgpr32' 147body: | 148 bb.0: 149 liveins: $sgpr12_sgpr13_sgpr14_sgpr15 150 151 ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc_and_soffset 152 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15 153 ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 154 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 155 ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, 0, 0, implicit $exec 156 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]] 157 ; GCN: SI_RETURN_TO_EPILOG $vgpr0 158 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 159 %1:sreg_32_xm0 = S_MOV_B32 0 160 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 161 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, 0, 0, implicit $exec 162 $vgpr0 = COPY %3 163 SI_RETURN_TO_EPILOG $vgpr0 164 165... 166 167--- 168name: function_no_fold_fi_non_stack_rsrc 169tracksRegLiveness: true 170frameInfo: 171 maxAlignment: 4 172 localFrameSize: 4 173stack: 174 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 175machineFunctionInfo: 176 isEntryFunction: false 177 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' 178 frameOffsetReg: '$sgpr32' 179 stackPtrOffsetReg: '$sgpr32' 180body: | 181 bb.0: 182 liveins: $sgpr12_sgpr13_sgpr14_sgpr15 183 184 ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc 185 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15 186 ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 187 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 188 ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 189 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]] 190 ; GCN: SI_RETURN_TO_EPILOG $vgpr0 191 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 192 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 193 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 194 $vgpr0 = COPY %3 195 SI_RETURN_TO_EPILOG $vgpr0 196 197... 198 199--- 200name: function_no_fold_fi_non_stack_soffset 201tracksRegLiveness: true 202frameInfo: 203 maxAlignment: 4 204 localFrameSize: 4 205stack: 206 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 207machineFunctionInfo: 208 isEntryFunction: false 209 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 210 frameOffsetReg: '$sgpr32' 211 stackPtrOffsetReg: '$sgpr32' 212body: | 213 bb.0: 214 215 ; GCN-LABEL: name: function_no_fold_fi_non_stack_soffset 216 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 217 ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 218 ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 219 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 220 ; GCN: S_ENDPGM 0, implicit $vgpr0 221 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 222 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 223 224 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec 225 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec 226 $vgpr0 = COPY %2 227 S_ENDPGM 0, implicit $vgpr0 228 229... 230 231--- 232name: function_fold_fi_mubuf_wave_relative 233tracksRegLiveness: true 234frameInfo: 235 maxAlignment: 4 236 localFrameSize: 4 237stack: 238 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 239machineFunctionInfo: 240 isEntryFunction: false 241 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 242 frameOffsetReg: '$sgpr32' 243 stackPtrOffsetReg: '$sgpr32' 244body: | 245 bb.0: 246 247 ; GCN-LABEL: name: function_fold_fi_mubuf_wave_relative 248 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 249 ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 250 ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 251 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 252 ; GCN: S_ENDPGM 0, implicit $vgpr0 253 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 254 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 255 256 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec 257 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec 258 $vgpr0 = COPY %2 259 S_ENDPGM 0, implicit $vgpr0 260 261... 262 263--- 264name: function_fold_fi_mubuf_stack_relative 265tracksRegLiveness: true 266frameInfo: 267 maxAlignment: 4 268 localFrameSize: 4 269stack: 270 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 271machineFunctionInfo: 272 isEntryFunction: false 273 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 274 frameOffsetReg: '$sgpr32' 275 stackPtrOffsetReg: '$sgpr32' 276body: | 277 bb.0: 278 279 ; GCN-LABEL: name: function_fold_fi_mubuf_stack_relative 280 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 281 ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 282 ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 283 ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 284 ; GCN: S_ENDPGM 0, implicit $vgpr0 285 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 286 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 287 288 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 289 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec 290 $vgpr0 = COPY %2 291 S_ENDPGM 0, implicit $vgpr0 292 293... 294