1# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking -o - %s | FileCheck %s 2 3--- | 4 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" 5 6 define amdgpu_kernel void @optimize_if_and_saveexec_xor(i32 %z, i32 %v) #0 { 7 main_body: 8 %id = call i32 @llvm.amdgcn.workitem.id.x() 9 %cc = icmp eq i32 %id, 0 10 %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc) 11 %1 = extractvalue { i1, i64 } %0, 0 12 %2 = extractvalue { i1, i64 } %0, 1 13 br i1 %1, label %if, label %end 14 15 if: ; preds = %main_body 16 %v.if = load volatile i32, i32 addrspace(1)* undef 17 br label %end 18 19 end: ; preds = %if, %main_body 20 %r = phi i32 [ 4, %main_body ], [ %v.if, %if ] 21 call void @llvm.amdgcn.end.cf(i64 %2) 22 store i32 %r, i32 addrspace(1)* undef 23 ret void 24 } 25 26 define amdgpu_kernel void @optimize_if_and_saveexec(i32 %z, i32 %v) #0 { 27 main_body: 28 br i1 undef, label %if, label %end 29 30 if: 31 br label %end 32 33 end: 34 ret void 35 } 36 37 define amdgpu_kernel void @optimize_if_or_saveexec(i32 %z, i32 %v) #0 { 38 main_body: 39 br i1 undef, label %if, label %end 40 41 if: 42 br label %end 43 44 end: 45 ret void 46 } 47 48 49 define amdgpu_kernel void @optimize_if_and_saveexec_xor_valu_middle(i32 %z, i32 %v) #0 { 50 main_body: 51 %id = call i32 @llvm.amdgcn.workitem.id.x() 52 %cc = icmp eq i32 %id, 0 53 %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc) 54 %1 = extractvalue { i1, i64 } %0, 0 55 %2 = extractvalue { i1, i64 } %0, 1 56 store i32 %id, i32 addrspace(1)* undef 57 br i1 %1, label %if, label %end 58 59 if: ; preds = %main_body 60 %v.if = load volatile i32, i32 addrspace(1)* undef 61 br label %end 62 63 end: ; preds = %if, %main_body 64 %r = phi i32 [ 4, %main_body ], [ %v.if, %if ] 65 call void @llvm.amdgcn.end.cf(i64 %2) 66 store i32 %r, i32 addrspace(1)* undef 67 ret void 68 } 69 70 define amdgpu_kernel void @optimize_if_and_saveexec_xor_wrong_reg(i32 %z, i32 %v) #0 { 71 main_body: 72 br i1 undef, label %if, label %end 73 74 if: 75 br label %end 76 77 end: 78 ret void 79 } 80 81 define amdgpu_kernel void @optimize_if_and_saveexec_xor_modify_copy_to_exec(i32 %z, i32 %v) #0 { 82 main_body: 83 br i1 undef, label %if, label %end 84 85 if: 86 br label %end 87 88 end: 89 ret void 90 } 91 92 define amdgpu_kernel void @optimize_if_and_saveexec_xor_live_out_setexec(i32 %z, i32 %v) #0 { 93 main_body: 94 br i1 undef, label %if, label %end 95 96 if: 97 br label %end 98 99 end: 100 ret void 101 } 102 103 define amdgpu_kernel void @optimize_if_unknown_saveexec(i32 %z, i32 %v) #0 { 104 main_body: 105 br i1 undef, label %if, label %end 106 107 if: 108 br label %end 109 110 end: 111 ret void 112 } 113 114 define amdgpu_kernel void @optimize_if_andn2_saveexec(i32 %z, i32 %v) #0 { 115 main_body: 116 br i1 undef, label %if, label %end 117 118 if: 119 br label %end 120 121 end: 122 ret void 123 } 124 125 define amdgpu_kernel void @optimize_if_andn2_saveexec_no_commute(i32 %z, i32 %v) #0 { 126 main_body: 127 br i1 undef, label %if, label %end 128 129 if: 130 br label %end 131 132 end: 133 ret void 134 } 135 136 ; Function Attrs: nounwind readnone 137 declare i32 @llvm.amdgcn.workitem.id.x() #1 138 139 declare { i1, i64 } @llvm.amdgcn.if(i1) 140 141 declare void @llvm.amdgcn.end.cf(i64) 142 143 144 attributes #0 = { nounwind } 145 attributes #1 = { nounwind readnone } 146 147... 148--- 149# CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}} 150# CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec 151# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc 152# CHECK-NEXT: SI_MASK_BRANCH 153 154name: optimize_if_and_saveexec_xor 155alignment: 0 156exposesReturnsTwice: false 157legalized: false 158regBankSelected: false 159selected: false 160tracksRegLiveness: true 161liveins: 162 - { reg: '$vgpr0' } 163frameInfo: 164 isFrameAddressTaken: false 165 isReturnAddressTaken: false 166 hasStackMap: false 167 hasPatchPoint: false 168 stackSize: 0 169 offsetAdjustment: 0 170 maxAlignment: 0 171 adjustsStack: false 172 hasCalls: false 173 maxCallFrameSize: 0 174 hasOpaqueSPAdjustment: false 175 hasVAStart: false 176 hasMustTailInVarArgFunc: false 177body: | 178 bb.0.main_body: 179 liveins: $vgpr0 180 181 $sgpr0_sgpr1 = COPY $exec 182 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec 183 $vgpr0 = V_MOV_B32_e32 4, implicit $exec 184 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 185 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc 186 $exec = S_MOV_B64_term killed $sgpr2_sgpr3 187 SI_MASK_BRANCH %bb.2, implicit $exec 188 S_BRANCH %bb.1 189 190 bb.1.if: 191 liveins: $sgpr0_sgpr1 192 193 $sgpr7 = S_MOV_B32 61440 194 $sgpr6 = S_MOV_B32 -1 195 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 196 197 bb.2.end: 198 liveins: $vgpr0, $sgpr0_sgpr1 199 200 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc 201 $sgpr3 = S_MOV_B32 61440 202 $sgpr2 = S_MOV_B32 -1 203 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`) 204 S_ENDPGM 205 206... 207--- 208# CHECK-LABEL: name: optimize_if_and_saveexec{{$}} 209# CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec 210# CHECK-NEXT: SI_MASK_BRANCH 211 212name: optimize_if_and_saveexec 213alignment: 0 214exposesReturnsTwice: false 215legalized: false 216regBankSelected: false 217selected: false 218tracksRegLiveness: true 219liveins: 220 - { reg: '$vgpr0' } 221frameInfo: 222 isFrameAddressTaken: false 223 isReturnAddressTaken: false 224 hasStackMap: false 225 hasPatchPoint: false 226 stackSize: 0 227 offsetAdjustment: 0 228 maxAlignment: 0 229 adjustsStack: false 230 hasCalls: false 231 maxCallFrameSize: 0 232 hasOpaqueSPAdjustment: false 233 hasVAStart: false 234 hasMustTailInVarArgFunc: false 235body: | 236 bb.0.main_body: 237 liveins: $vgpr0 238 239 $sgpr0_sgpr1 = COPY $exec 240 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec 241 $vgpr0 = V_MOV_B32_e32 4, implicit $exec 242 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 243 $exec = S_MOV_B64_term killed $sgpr2_sgpr3 244 SI_MASK_BRANCH %bb.2, implicit $exec 245 S_BRANCH %bb.1 246 247 bb.1.if: 248 liveins: $sgpr0_sgpr1 249 250 $sgpr7 = S_MOV_B32 61440 251 $sgpr6 = S_MOV_B32 -1 252 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 253 254 bb.2.end: 255 liveins: $vgpr0, $sgpr0_sgpr1 256 257 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc 258 $sgpr3 = S_MOV_B32 61440 259 $sgpr2 = S_MOV_B32 -1 260 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`) 261 S_ENDPGM 262 263... 264--- 265# CHECK-LABEL: name: optimize_if_or_saveexec{{$}} 266# CHECK: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec 267# CHECK-NEXT: SI_MASK_BRANCH 268 269name: optimize_if_or_saveexec 270alignment: 0 271exposesReturnsTwice: false 272legalized: false 273regBankSelected: false 274selected: false 275tracksRegLiveness: true 276liveins: 277 - { reg: '$vgpr0' } 278frameInfo: 279 isFrameAddressTaken: false 280 isReturnAddressTaken: false 281 hasStackMap: false 282 hasPatchPoint: false 283 stackSize: 0 284 offsetAdjustment: 0 285 maxAlignment: 0 286 adjustsStack: false 287 hasCalls: false 288 maxCallFrameSize: 0 289 hasOpaqueSPAdjustment: false 290 hasVAStart: false 291 hasMustTailInVarArgFunc: false 292body: | 293 bb.0.main_body: 294 liveins: $vgpr0 295 296 $sgpr0_sgpr1 = COPY $exec 297 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec 298 $vgpr0 = V_MOV_B32_e32 4, implicit $exec 299 $sgpr2_sgpr3 = S_OR_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 300 $exec = S_MOV_B64_term killed $sgpr2_sgpr3 301 SI_MASK_BRANCH %bb.2, implicit $exec 302 S_BRANCH %bb.1 303 304 bb.1.if: 305 liveins: $sgpr0_sgpr1 306 307 $sgpr7 = S_MOV_B32 61440 308 $sgpr6 = S_MOV_B32 -1 309 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 310 311 bb.2.end: 312 liveins: $vgpr0, $sgpr0_sgpr1 313 314 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc 315 $sgpr3 = S_MOV_B32 61440 316 $sgpr2 = S_MOV_B32 -1 317 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`) 318 S_ENDPGM 319 320... 321--- 322# CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle 323# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 324# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 325# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc 326# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 327# CHECK-NEXT: SI_MASK_BRANCH 328name: optimize_if_and_saveexec_xor_valu_middle 329alignment: 0 330exposesReturnsTwice: false 331legalized: false 332regBankSelected: false 333selected: false 334tracksRegLiveness: true 335liveins: 336 - { reg: '$vgpr0' } 337frameInfo: 338 isFrameAddressTaken: false 339 isReturnAddressTaken: false 340 hasStackMap: false 341 hasPatchPoint: false 342 stackSize: 0 343 offsetAdjustment: 0 344 maxAlignment: 0 345 adjustsStack: false 346 hasCalls: false 347 maxCallFrameSize: 0 348 hasOpaqueSPAdjustment: false 349 hasVAStart: false 350 hasMustTailInVarArgFunc: false 351body: | 352 bb.0.main_body: 353 liveins: $vgpr0 354 355 $sgpr0_sgpr1 = COPY $exec 356 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec 357 $vgpr0 = V_MOV_B32_e32 4, implicit $exec 358 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 359 BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`) 360 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc 361 $exec = S_MOV_B64_term killed $sgpr2_sgpr3 362 SI_MASK_BRANCH %bb.2, implicit $exec 363 S_BRANCH %bb.1 364 365 bb.1.if: 366 liveins: $sgpr0_sgpr1 367 368 $sgpr7 = S_MOV_B32 61440 369 $sgpr6 = S_MOV_B32 -1 370 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 371 372 bb.2.end: 373 liveins: $vgpr0, $sgpr0_sgpr1 374 375 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc 376 $sgpr3 = S_MOV_B32 61440 377 $sgpr2 = S_MOV_B32 -1 378 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`) 379 S_ENDPGM 380 381... 382--- 383# CHECK-LABEL: name: optimize_if_and_saveexec_xor_wrong_reg{{$}} 384# CHECK: $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 385# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc 386# CHECK-NEXT: $exec = COPY $sgpr0_sgpr1 387# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec 388name: optimize_if_and_saveexec_xor_wrong_reg 389alignment: 0 390exposesReturnsTwice: false 391legalized: false 392regBankSelected: false 393selected: false 394tracksRegLiveness: true 395liveins: 396 - { reg: '$vgpr0' } 397frameInfo: 398 isFrameAddressTaken: false 399 isReturnAddressTaken: false 400 hasStackMap: false 401 hasPatchPoint: false 402 stackSize: 0 403 offsetAdjustment: 0 404 maxAlignment: 0 405 adjustsStack: false 406 hasCalls: false 407 maxCallFrameSize: 0 408 hasOpaqueSPAdjustment: false 409 hasVAStart: false 410 hasMustTailInVarArgFunc: false 411body: | 412 bb.0.main_body: 413 liveins: $vgpr0 414 415 $sgpr6 = S_MOV_B32 -1 416 $sgpr7 = S_MOV_B32 61440 417 $sgpr0_sgpr1 = COPY $exec 418 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec 419 $vgpr0 = V_MOV_B32_e32 4, implicit $exec 420 $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 421 $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc 422 $exec = S_MOV_B64_term $sgpr0_sgpr1 423 SI_MASK_BRANCH %bb.2, implicit $exec 424 S_BRANCH %bb.1 425 426 bb.1.if: 427 liveins: $sgpr0_sgpr1 , $sgpr4_sgpr5_sgpr6_sgpr7 428 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 429 430 bb.2.end: 431 liveins: $vgpr0, $sgpr0_sgpr1, $sgpr4_sgpr5_sgpr6_sgpr7 432 433 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc 434 $sgpr3 = S_MOV_B32 61440 435 $sgpr2 = S_MOV_B32 -1 436 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`) 437 S_ENDPGM 438 439... 440--- 441# CHECK-LABEL: name: optimize_if_and_saveexec_xor_modify_copy_to_exec{{$}} 442# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 443# CHECK-NEXT: $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc 444# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc 445# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 446# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec 447 448name: optimize_if_and_saveexec_xor_modify_copy_to_exec 449alignment: 0 450exposesReturnsTwice: false 451legalized: false 452regBankSelected: false 453selected: false 454tracksRegLiveness: true 455liveins: 456 - { reg: '$vgpr0' } 457frameInfo: 458 isFrameAddressTaken: false 459 isReturnAddressTaken: false 460 hasStackMap: false 461 hasPatchPoint: false 462 stackSize: 0 463 offsetAdjustment: 0 464 maxAlignment: 0 465 adjustsStack: false 466 hasCalls: false 467 maxCallFrameSize: 0 468 hasOpaqueSPAdjustment: false 469 hasVAStart: false 470 hasMustTailInVarArgFunc: false 471body: | 472 bb.0.main_body: 473 liveins: $vgpr0 474 475 $sgpr0_sgpr1 = COPY $exec 476 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec 477 $vgpr0 = V_MOV_B32_e32 4, implicit $exec 478 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 479 $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc 480 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc 481 $exec = S_MOV_B64_term killed $sgpr2_sgpr3 482 SI_MASK_BRANCH %bb.2, implicit $exec 483 S_BRANCH %bb.1 484 485 bb.1.if: 486 liveins: $sgpr0_sgpr1 487 488 $sgpr7 = S_MOV_B32 61440 489 $sgpr6 = S_MOV_B32 -1 490 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 491 492 bb.2.end: 493 liveins: $vgpr0, $sgpr0_sgpr1 494 495 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc 496 $sgpr0 = S_MOV_B32 0 497 $sgpr1 = S_MOV_B32 1 498 $sgpr2 = S_MOV_B32 -1 499 $sgpr3 = S_MOV_B32 61440 500 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`) 501 S_ENDPGM 502 503... 504--- 505# CHECK-LABEL: name: optimize_if_and_saveexec_xor_live_out_setexec{{$}} 506# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 507# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc 508# CHECK-NEXT: $exec = COPY $sgpr2_sgpr3 509# CHECK-NEXT: SI_MASK_BRANCH 510name: optimize_if_and_saveexec_xor_live_out_setexec 511alignment: 0 512exposesReturnsTwice: false 513legalized: false 514regBankSelected: false 515selected: false 516tracksRegLiveness: true 517liveins: 518 - { reg: '$vgpr0' } 519frameInfo: 520 isFrameAddressTaken: false 521 isReturnAddressTaken: false 522 hasStackMap: false 523 hasPatchPoint: false 524 stackSize: 0 525 offsetAdjustment: 0 526 maxAlignment: 0 527 adjustsStack: false 528 hasCalls: false 529 maxCallFrameSize: 0 530 hasOpaqueSPAdjustment: false 531 hasVAStart: false 532 hasMustTailInVarArgFunc: false 533body: | 534 bb.0.main_body: 535 liveins: $vgpr0 536 537 $sgpr0_sgpr1 = COPY $exec 538 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec 539 $vgpr0 = V_MOV_B32_e32 4, implicit $exec 540 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 541 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc 542 $exec = S_MOV_B64_term $sgpr2_sgpr3 543 SI_MASK_BRANCH %bb.2, implicit $exec 544 S_BRANCH %bb.1 545 546 bb.1.if: 547 liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 548 S_SLEEP 0, implicit $sgpr2_sgpr3 549 $sgpr7 = S_MOV_B32 61440 550 $sgpr6 = S_MOV_B32 -1 551 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 552 553 bb.2.end: 554 liveins: $vgpr0, $sgpr0_sgpr1 555 556 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc 557 $sgpr3 = S_MOV_B32 61440 558 $sgpr2 = S_MOV_B32 -1 559 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`) 560 S_ENDPGM 561 562... 563 564# CHECK-LABEL: name: optimize_if_unknown_saveexec{{$}} 565# CHECK: $sgpr0_sgpr1 = COPY $exec 566# CHECK: $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc 567# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 568# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec 569 570name: optimize_if_unknown_saveexec 571alignment: 0 572exposesReturnsTwice: false 573legalized: false 574regBankSelected: false 575selected: false 576tracksRegLiveness: true 577liveins: 578 - { reg: '$vgpr0' } 579frameInfo: 580 isFrameAddressTaken: false 581 isReturnAddressTaken: false 582 hasStackMap: false 583 hasPatchPoint: false 584 stackSize: 0 585 offsetAdjustment: 0 586 maxAlignment: 0 587 adjustsStack: false 588 hasCalls: false 589 maxCallFrameSize: 0 590 hasOpaqueSPAdjustment: false 591 hasVAStart: false 592 hasMustTailInVarArgFunc: false 593body: | 594 bb.0.main_body: 595 liveins: $vgpr0 596 597 $sgpr0_sgpr1 = COPY $exec 598 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec 599 $vgpr0 = V_MOV_B32_e32 4, implicit $exec 600 $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc 601 $exec = S_MOV_B64_term killed $sgpr2_sgpr3 602 SI_MASK_BRANCH %bb.2, implicit $exec 603 S_BRANCH %bb.1 604 605 bb.1.if: 606 liveins: $sgpr0_sgpr1 607 608 $sgpr7 = S_MOV_B32 61440 609 $sgpr6 = S_MOV_B32 -1 610 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 611 612 bb.2.end: 613 liveins: $vgpr0, $sgpr0_sgpr1 614 615 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc 616 $sgpr3 = S_MOV_B32 61440 617 $sgpr2 = S_MOV_B32 -1 618 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`) 619 S_ENDPGM 620 621... 622--- 623# CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}} 624# CHECK: $sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec 625# CHECK-NEXT: SI_MASK_BRANCH 626 627name: optimize_if_andn2_saveexec 628alignment: 0 629exposesReturnsTwice: false 630legalized: false 631regBankSelected: false 632selected: false 633tracksRegLiveness: true 634liveins: 635 - { reg: '$vgpr0' } 636frameInfo: 637 isFrameAddressTaken: false 638 isReturnAddressTaken: false 639 hasStackMap: false 640 hasPatchPoint: false 641 stackSize: 0 642 offsetAdjustment: 0 643 maxAlignment: 0 644 adjustsStack: false 645 hasCalls: false 646 maxCallFrameSize: 0 647 hasOpaqueSPAdjustment: false 648 hasVAStart: false 649 hasMustTailInVarArgFunc: false 650body: | 651 bb.0.main_body: 652 liveins: $vgpr0 653 654 $sgpr0_sgpr1 = COPY $exec 655 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec 656 $vgpr0 = V_MOV_B32_e32 4, implicit $exec 657 $sgpr2_sgpr3 = S_ANDN2_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc 658 $exec = S_MOV_B64_term killed $sgpr2_sgpr3 659 SI_MASK_BRANCH %bb.2, implicit $exec 660 S_BRANCH %bb.1 661 662 bb.1.if: 663 liveins: $sgpr0_sgpr1 664 665 $sgpr7 = S_MOV_B32 61440 666 $sgpr6 = S_MOV_B32 -1 667 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 668 669 bb.2.end: 670 liveins: $vgpr0, $sgpr0_sgpr1 671 672 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc 673 $sgpr3 = S_MOV_B32 61440 674 $sgpr2 = S_MOV_B32 -1 675 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`) 676 S_ENDPGM 677 678... 679--- 680# CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}} 681# CHECK: $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc 682# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 683# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec 684name: optimize_if_andn2_saveexec_no_commute 685alignment: 0 686exposesReturnsTwice: false 687legalized: false 688regBankSelected: false 689selected: false 690tracksRegLiveness: true 691liveins: 692 - { reg: '$vgpr0' } 693frameInfo: 694 isFrameAddressTaken: false 695 isReturnAddressTaken: false 696 hasStackMap: false 697 hasPatchPoint: false 698 stackSize: 0 699 offsetAdjustment: 0 700 maxAlignment: 0 701 adjustsStack: false 702 hasCalls: false 703 maxCallFrameSize: 0 704 hasOpaqueSPAdjustment: false 705 hasVAStart: false 706 hasMustTailInVarArgFunc: false 707body: | 708 bb.0.main_body: 709 liveins: $vgpr0 710 711 $sgpr0_sgpr1 = COPY $exec 712 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec 713 $vgpr0 = V_MOV_B32_e32 4, implicit $exec 714 $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc 715 $exec = S_MOV_B64_term killed $sgpr2_sgpr3 716 SI_MASK_BRANCH %bb.2, implicit $exec 717 S_BRANCH %bb.1 718 719 bb.1.if: 720 liveins: $sgpr0_sgpr1 721 722 $sgpr7 = S_MOV_B32 61440 723 $sgpr6 = S_MOV_B32 -1 724 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 725 726 bb.2.end: 727 liveins: $vgpr0, $sgpr0_sgpr1 728 729 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc 730 $sgpr3 = S_MOV_B32 61440 731 $sgpr2 = S_MOV_B32 -1 732 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`) 733 S_ENDPGM 734 735... 736