1# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -run-pass=simple-register-coalescing,rename-independent-subregs %s -o - | FileCheck -check-prefix=GCN %s 2 3# This test is for a bug where the following happens: 4# 5# Inside the loop, %29.sub2 is used in a V_LSHLREV whose result is then used 6# in an LDS read. %29 is a 128 bit value that is linked by copies to 7# %45 (from phi elimination), %28 (the value in the loop pre-header), 8# %31 (defined and subreg-modified in the loop, and used after the loop) 9# and %30: 10# 11# %45:vreg_128 = COPY killed %28 12# bb.1: 13# %29:vreg_128 = COPY killed %45 14# %39:vgpr_32 = V_LSHLREV_B32_e32 2, %29.sub2, implicit $exec 15# %31:vreg_128 = COPY killed %29 16# %31.sub1:vreg_128 = COPY %34 17# %30:vreg_128 = COPY %31 18# %45:vreg_128 = COPY killed %30 19# S_CBRANCH_EXECNZ %bb.39, implicit $exec 20# S_BRANCH %bb.40 21# bb.2: 22# undef %32.sub0:vreg_128 = COPY killed %31.sub0 23# 24# So this coalesces together into a single 128 bit value whose sub1 is modified 25# in the loop, but the sub2 used in the V_LSHLREV is not modified in the loop. 26# 27# The bug is that the coalesced value has a L00000004 subrange (for sub2) that 28# says that it is not live up to the end of the loop block. The symptom is that 29# Rename Independent Subregs separates sub2 into its own register, and it is 30# not live round the loop, so that pass adds an IMPLICIT_DEF for it just before 31# the loop backedge. 32 33# GCN: bb.1: 34# GCN: V_LSHLREV_B32_e32 2, [[val:%[0-9][0-9]*]].sub2 35# GCN-NOT: [[val]]:vreg_128 = IMPLICIT_DEF 36 37--- 38name: _amdgpu_cs_main 39tracksRegLiveness: true 40body: | 41 bb.0: 42 successors: %bb.1 43 44 %3:sgpr_32 = S_MOV_B32 0 45 undef %19.sub1:vreg_128 = COPY undef %3 46 %4:sgpr_32 = S_MOV_B32 1 47 %5:sgpr_32 = S_MOV_B32 2 48 %11:sreg_32_xm0 = S_MOV_B32 255 49 undef %28.sub0:vreg_128 = COPY killed %3 50 %28.sub1:vreg_128 = COPY killed %4 51 %28.sub2:vreg_128 = COPY killed %11 52 %28.sub3:vreg_128 = COPY killed %5 53 %2:sreg_64 = S_MOV_B64 0 54 %34:sreg_32 = S_MOV_B32 7 55 %37:vreg_128 = COPY undef %42:vreg_128 56 %43:sreg_64 = COPY killed %2 57 %44:vreg_128 = COPY killed %37 58 %45:vreg_128 = COPY killed %28 59 60 bb.1: 61 successors: %bb.1, %bb.2 62 63 %29:vreg_128 = COPY killed %45 64 %36:vreg_128 = COPY killed %44 65 %0:sreg_64 = COPY killed %43 66 %39:vgpr_32 = V_LSHLREV_B32_e32 2, %29.sub2, implicit $exec 67 %41:vgpr_32 = V_ADD_CO_U32_e32 1152, %39, implicit-def dead $vcc, implicit $exec 68 $m0 = S_MOV_B32 -1 69 %12:vreg_64 = DS_READ2_B32 killed %41, 0, 1, 0, implicit $m0, implicit $exec 70 %13:vreg_64 = DS_READ2_B32 %39, -112, -111, 0, implicit $m0, implicit $exec 71 %14:vreg_64 = DS_READ2_B32 %39, 0, 1, 0, implicit $m0, implicit $exec 72 %40:vgpr_32 = V_ADD_CO_U32_e32 1160, %39, implicit-def dead $vcc, implicit $exec 73 %15:vreg_64 = DS_READ2_B32 killed %40, 0, 1, 0, implicit $m0, implicit $exec 74 %16:vreg_64 = DS_READ2_B32 %39, -110, -109, 0, implicit $m0, implicit $exec 75 %17:vreg_64 = DS_READ2_B32 %39, 2, 3, 0, implicit $m0, implicit $exec 76 undef %35.sub1:vreg_128 = COPY undef %34 77 %31:vreg_128 = COPY killed %29 78 %31.sub1:vreg_128 = COPY %34 79 %38:vgpr_32 = V_ADD_CO_U32_e32 1, %36.sub0, implicit-def dead $vcc, implicit $exec 80 %18:sreg_64 = V_CMP_LT_I32_e64 5, %38, implicit $exec 81 %1:sreg_64 = S_OR_B64 killed %18, killed %0, implicit-def $scc 82 %30:vreg_128 = COPY %31 83 %43:sreg_64 = COPY %1 84 %44:vreg_128 = COPY %35 85 %45:vreg_128 = COPY killed %30 86 $exec = S_ANDN2_B64_term $exec, %1, implicit-def $scc 87 S_CBRANCH_EXECNZ %bb.1, implicit $exec 88 S_BRANCH %bb.2 89 90 bb.2: 91 $exec = S_OR_B64 $exec, killed %1, implicit-def $scc 92 %33:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 93 undef %32.sub0:vreg_128 = COPY killed %31.sub0 94 %32.sub2:vreg_128 = COPY %33 95 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %32:vreg_128 96 S_ENDPGM 0 97 98... 99