1# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s -check-prefixes=CHECK,SI 2# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx900 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX9 3# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX10 4--- 5# CHECK-LABEL: name: vccz_corrupt_workaround 6# CHECK: $vcc = V_CMP_EQ_F32 7# SI-NEXT: S_WAITCNT 127 8# SI-NEXT: $vcc = S_MOV_B64 $vcc 9# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed $vcc 10 11name: vccz_corrupt_workaround 12tracksRegLiveness: true 13body: | 14 bb.0: 15 liveins: $sgpr0_sgpr1 16 17 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 18 $sgpr7 = S_MOV_B32 61440 19 $sgpr6 = S_MOV_B32 -1 20 $vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $mode, implicit $exec 21 S_CBRANCH_VCCZ %bb.1, implicit killed $vcc 22 23 bb.2: 24 liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 25 26 $vgpr0 = V_MOV_B32_e32 9, implicit $exec 27 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec 28 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 29 S_BRANCH %bb.3 30 31 bb.1: 32 liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 33 34 $vgpr0 = V_MOV_B32_e32 100, implicit $exec 35 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec 36 $vgpr0 = V_MOV_B32_e32 1, implicit $exec 37 38 bb.3: 39 liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 40 41 $sgpr3 = S_MOV_B32 61440 42 $sgpr2 = S_MOV_B32 -1 43 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec 44 S_ENDPGM 0 45 46... 47--- 48# CHECK-LABEL: name: vccz_corrupt_undef_vcc 49# CHECK: BUFFER_STORE_DWORD_OFFSET 50# SI-NEXT: S_WAITCNT 3855 51# CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 52 53name: vccz_corrupt_undef_vcc 54tracksRegLiveness: true 55body: | 56 bb.0: 57 liveins: $sgpr0_sgpr1 58 59 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 60 $sgpr7 = S_MOV_B32 61440 61 $sgpr6 = S_MOV_B32 -1 62 S_CBRANCH_VCCZ %bb.1, implicit undef $vcc 63 64 bb.2: 65 liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 66 67 $vgpr0 = V_MOV_B32_e32 9, implicit $exec 68 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec 69 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 70 S_BRANCH %bb.3 71 72 bb.1: 73 liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 74 75 $vgpr0 = V_MOV_B32_e32 100, implicit $exec 76 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec 77 $vgpr0 = V_MOV_B32_e32 1, implicit $exec 78 79 bb.3: 80 liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 81 82 $sgpr3 = S_MOV_B32 61440 83 $sgpr2 = S_MOV_B32 -1 84 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec 85 S_ENDPGM 0 86 87... 88--- 89# Test that after reloading vcc spilled to a vgpr, we insert any necessary 90# instructions to fix vccz. 91 92# CHECK-LABEL: name: reload_vcc_from_vgpr 93# CHECK: $vcc_lo = V_READLANE_B32 $vgpr0, 8, implicit-def $vcc 94# CHECK: $vcc_hi = V_READLANE_B32 $vgpr0, 9 95# SI: $vcc = S_MOV_B64 $vcc 96# GFX9: $vcc = S_MOV_B64 $vcc 97# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc 98 99name: reload_vcc_from_vgpr 100body: | 101 bb.0: 102 $vcc_lo = V_READLANE_B32 $vgpr0, 8, implicit-def $vcc 103 $vcc_hi = V_READLANE_B32 $vgpr0, 9 104 S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc 105 bb.1: 106 107... 108--- 109# Test that after reloading vcc spilled to memory, we insert any necessary 110# instructions to fix vccz. 111 112# CHECK-LABEL: name: reload_vcc_from_mem 113# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, implicit $exec 114# CHECK: $vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc 115# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, implicit $exec 116# CHECK: $vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc 117# SI: $vcc = S_MOV_B64 $vcc 118# GFX9: $vcc = S_MOV_B64 $vcc 119# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc 120 121name: reload_vcc_from_mem 122body: | 123 bb.0: 124 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, implicit $exec 125 $vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc 126 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, implicit $exec 127 $vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc 128 S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc 129 bb.1: 130 131... 132--- 133# Test that after inline asm that defines vcc_lo, we insert any necessary 134# instructions to fix vccz. 135 136# CHECK-LABEL: name: inlineasm_def_vcc_lo 137# CHECK: INLINEASM &"; def vcc_lo", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vcc_lo 138# SI: $vcc = S_MOV_B64 $vcc 139# GFX9: $vcc = S_MOV_B64 $vcc 140# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc 141 142name: inlineasm_def_vcc_lo 143body: | 144 bb.0: 145 INLINEASM &"; def vcc_lo", 1, 10, implicit-def $vcc_lo 146 S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc 147 bb.1: 148 149... 150--- 151# Test that after inline asm that defines vcc, no unnecessary instructions are 152# inserted to fix vccz. 153 154# CHECK-LABEL: name: inlineasm_def_vcc 155# CHECK: INLINEASM &"; def vcc", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vcc 156# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc 157 158name: inlineasm_def_vcc 159body: | 160 bb.0: 161 INLINEASM &"; def vcc", 1, 10, implicit-def $vcc 162 S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc 163 bb.1: 164 165... 166--- 167# Test vcc definition in a previous basic block. 168 169# CHECK-LABEL: name: vcc_def_pred 170# CHECK: bb.1: 171# SI: $vcc = S_MOV_B64 $vcc 172# GFX9: $vcc = S_MOV_B64 $vcc 173# CHECK: S_CBRANCH_VCCZ %bb.2, implicit $vcc 174 175name: vcc_def_pred 176body: | 177 bb.0: 178 $vcc = S_MOV_B64 0 179 bb.1: 180 S_CBRANCH_VCCZ %bb.2, implicit $vcc 181 bb.2: 182 183... 184 185# Test various ways that the live range of vccz can overlap with the live range 186# of an outstanding smem load. 187 188--- 189# CHECK-LABEL: name: load_wait_def_use 190# SI: S_WAITCNT 0 191# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 192# SI-NEXT: S_WAITCNT 127 193# SI-NEXT: $vcc = S_MOV_B64 0 194# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc 195name: load_wait_def_use 196body: | 197 bb.0: 198 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 199 S_WAITCNT 127 200 $vcc = S_MOV_B64 0 201 S_CBRANCH_VCCZ %bb.1, implicit $vcc 202 bb.1: 203... 204 205--- 206# CHECK-LABEL: name: load_wait_nop_def_use 207# SI: S_WAITCNT 0 208# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 209# SI-NEXT: S_WAITCNT 127 210# SI-NEXT: S_NOP 0 211# SI-NEXT: $vcc = S_MOV_B64 0 212# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc 213name: load_wait_nop_def_use 214body: | 215 bb.0: 216 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 217 S_WAITCNT 127 218 S_NOP 0 219 $vcc = S_MOV_B64 0 220 S_CBRANCH_VCCZ %bb.1, implicit $vcc 221 bb.1: 222... 223 224--- 225# CHECK-LABEL: name: load_def_wait_use 226# SI: S_WAITCNT 0 227# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 228# SI-NEXT: $vcc = S_MOV_B64 0 229# SI-NEXT: S_WAITCNT 127 230# SI-NEXT: $vcc = S_MOV_B64 $vcc 231# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc 232name: load_def_wait_use 233body: | 234 bb.0: 235 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 236 $vcc = S_MOV_B64 0 237 S_WAITCNT 127 238 S_CBRANCH_VCCZ %bb.1, implicit $vcc 239 bb.1: 240... 241 242# CHECK-LABEL: name: load_def_wait_nop_use 243# SI: S_WAITCNT 0 244# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 245# SI-NEXT: $vcc = S_MOV_B64 0 246# SI-NEXT: S_WAITCNT 127 247# SI-NEXT: S_NOP 0 248# SI-NEXT: $vcc = S_MOV_B64 $vcc 249# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc 250name: load_def_wait_nop_use 251body: | 252 bb.0: 253 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 254 $vcc = S_MOV_B64 0 255 S_WAITCNT 127 256 S_NOP 0 257 S_CBRANCH_VCCZ %bb.1, implicit $vcc 258 bb.1: 259... 260 261--- 262# CHECK-LABEL: name: load_def_use 263# SI: S_WAITCNT 0 264# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 265# SI-NEXT: $vcc = S_MOV_B64 0 266# SI-NEXT: S_WAITCNT 127 267# SI-NEXT: $vcc = S_MOV_B64 $vcc 268# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc 269name: load_def_use 270body: | 271 bb.0: 272 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 273 $vcc = S_MOV_B64 0 274 S_CBRANCH_VCCZ %bb.1, implicit $vcc 275 bb.1: 276... 277 278--- 279# CHECK-LABEL: name: def_load_wait_use 280# SI: S_WAITCNT 0 281# SI-NEXT: $vcc = S_MOV_B64 0 282# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 283# SI-NEXT: S_WAITCNT 127 284# SI-NEXT: $vcc = S_MOV_B64 $vcc 285# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc 286name: def_load_wait_use 287body: | 288 bb.0: 289 $vcc = S_MOV_B64 0 290 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 291 S_WAITCNT 127 292 S_CBRANCH_VCCZ %bb.1, implicit $vcc 293 bb.1: 294... 295 296--- 297# CHECK-LABEL: name: def_load_wait_nop_use 298# SI: S_WAITCNT 0 299# SI-NEXT: $vcc = S_MOV_B64 0 300# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 301# SI-NEXT: S_WAITCNT 127 302# SI-NEXT: S_NOP 0 303# SI-NEXT: $vcc = S_MOV_B64 $vcc 304# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc 305name: def_load_wait_nop_use 306body: | 307 bb.0: 308 $vcc = S_MOV_B64 0 309 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 310 S_WAITCNT 127 311 S_NOP 0 312 S_CBRANCH_VCCZ %bb.1, implicit $vcc 313 bb.1: 314... 315 316--- 317# CHECK-LABEL: name: def_load_use 318# SI: S_WAITCNT 0 319# SI-NEXT: $vcc = S_MOV_B64 0 320# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 321# SI-NEXT: S_WAITCNT 127 322# SI-NEXT: $vcc = S_MOV_B64 $vcc 323# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc 324name: def_load_use 325body: | 326 bb.0: 327 $vcc = S_MOV_B64 0 328 $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 329 S_CBRANCH_VCCZ %bb.1, implicit $vcc 330 bb.1: 331... 332