1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s 3# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=-flat-for-global -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s 4# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7-FLAT %s 5# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s 6# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s 7# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s 8 9--- 10name: amdgpu_atomic_cmpxchg_s32_global 11legalized: true 12regBankSelected: true 13tracksRegLiveness: true 14body: | 15 bb.0: 16 liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 17 18 ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global 19 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 20 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 21 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 22 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 23 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 24 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 25 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 26 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 27 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 28 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 29 ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 30 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 31 ; GFX6: $vgpr0 = COPY [[COPY3]] 32 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global 33 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 34 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 35 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 36 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 37 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 38 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 39 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 40 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 41 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 42 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 43 ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 44 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 45 ; GFX7: $vgpr0 = COPY [[COPY3]] 46 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global 47 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 48 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 49 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 50 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 51 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 52 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 53 ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] 54 ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global 55 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 56 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 57 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 58 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 59 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 60 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 61 ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] 62 ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global 63 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 64 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 65 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 66 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 67 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 68 ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 69 ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] 70 ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global 71 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 72 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 73 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 74 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 75 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 76 ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 77 ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] 78 %0:vgpr(p1) = COPY $vgpr0_vgpr1 79 %1:vgpr(s32) = COPY $vgpr2 80 %2:vgpr(s32) = COPY $vgpr3 81 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 82 %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 1) 83 $vgpr0 = COPY %4 84 85... 86 87--- 88name: amdgpu_atomic_cmpxchg_s32_global_gep4 89legalized: true 90regBankSelected: true 91tracksRegLiveness: true 92body: | 93 bb.0: 94 liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 95 96 ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 97 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 98 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 99 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 100 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 101 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 102 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 103 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 104 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 105 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 106 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 107 ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 108 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 109 ; GFX6: $vgpr0 = COPY [[COPY3]] 110 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 111 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 112 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 113 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 114 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 115 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 116 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 117 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 118 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 119 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 120 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 121 ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 122 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 123 ; GFX7: $vgpr0 = COPY [[COPY3]] 124 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 125 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 126 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 127 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 128 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 129 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 130 ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec 131 ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 132 ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 133 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 134 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 135 ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 136 ; GFX7-FLAT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 137 ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec 138 ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 139 ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 140 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 141 ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] 142 ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 143 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 144 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 145 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 146 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 147 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 148 ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec 149 ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 150 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 151 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 152 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 153 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 154 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 155 ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec 156 ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 157 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 158 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 159 ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] 160 ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 161 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 162 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 163 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 164 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 165 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 166 ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 167 ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] 168 ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 169 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 170 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 171 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 172 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 173 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 174 ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 175 ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] 176 %0:vgpr(p1) = COPY $vgpr0_vgpr1 177 %1:vgpr(s32) = COPY $vgpr2 178 %2:vgpr(s32) = COPY $vgpr3 179 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 180 %4:vgpr(s64) = G_CONSTANT i64 4 181 %5:vgpr(p1) = G_PTR_ADD %0, %4 182 %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 4, addrspace 1) 183 $vgpr0 = COPY %6 184 185... 186 187--- 188name: amdgpu_atomic_cmpxchg_s64_global 189legalized: true 190regBankSelected: true 191tracksRegLiveness: true 192body: | 193 bb.0: 194 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 195 196 ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global 197 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 198 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 199 ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 200 ; GFX6: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 201 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 202 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 203 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 204 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 205 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 206 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 207 ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 208 ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 209 ; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]] 210 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global 211 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 212 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 213 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 214 ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 215 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 216 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 217 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 218 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 219 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 220 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 221 ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 222 ; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 223 ; GFX7: $vgpr0_vgpr1 = COPY [[COPY3]] 224 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global 225 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 226 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 227 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 228 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 229 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 230 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) 231 ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] 232 ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global 233 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 234 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 235 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 236 ; GFX8: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 237 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 238 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) 239 ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] 240 ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global 241 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 242 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 243 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 244 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 245 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 246 ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 247 ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] 248 ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global 249 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 250 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 251 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 252 ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 253 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 254 ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 255 ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] 256 %0:vgpr(p1) = COPY $vgpr0_vgpr1 257 %1:vgpr(s64) = COPY $vgpr2_vgpr3 258 %2:vgpr(s64) = COPY $vgpr4_vgpr5 259 %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 260 %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 8, addrspace 1) 261 $vgpr0_vgpr1 = COPY %4 262 263... 264 265--- 266name: amdgpu_atomic_cmpxchg_s64_global_gep4 267legalized: true 268regBankSelected: true 269tracksRegLiveness: true 270body: | 271 bb.0: 272 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 273 274 ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 275 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 276 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 277 ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 278 ; GFX6: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 279 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 280 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 281 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 282 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 283 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 284 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 285 ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 286 ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 287 ; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]] 288 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 289 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 290 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 291 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 292 ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 293 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 294 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 295 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 296 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 297 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 298 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 299 ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 300 ; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 301 ; GFX7: $vgpr0_vgpr1 = COPY [[COPY3]] 302 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 303 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 304 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 305 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 306 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 307 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 308 ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec 309 ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 310 ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 311 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 312 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 313 ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 314 ; GFX7-FLAT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 315 ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec 316 ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 317 ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 318 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) 319 ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] 320 ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 321 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 322 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 323 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 324 ; GFX8: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 325 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 326 ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec 327 ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 328 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 329 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 330 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 331 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 332 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 333 ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec 334 ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 335 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 336 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) 337 ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] 338 ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 339 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 340 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 341 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 342 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 343 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 344 ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 345 ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] 346 ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 347 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 348 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 349 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 350 ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 351 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 352 ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 353 ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] 354 %0:vgpr(p1) = COPY $vgpr0_vgpr1 355 %1:vgpr(s64) = COPY $vgpr2_vgpr3 356 %2:vgpr(s64) = COPY $vgpr4_vgpr5 357 %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 358 %4:vgpr(s64) = G_CONSTANT i64 4 359 %5:vgpr(p1) = G_PTR_ADD %0, %4 360 %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 8, addrspace 1) 361 $vgpr0_vgpr1 = COPY %6 362 363... 364 365--- 366name: amdgpu_atomic_cmpxchg_s32_global_gepm4 367legalized: true 368regBankSelected: true 369tracksRegLiveness: true 370body: | 371 bb.0: 372 liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 373 374 ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 375 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 376 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 377 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 378 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 379 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 380 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec 381 ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec 382 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 383 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 384 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 385 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 386 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 387 ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec 388 ; GFX6: %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 389 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %18, %subreg.sub1 390 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 391 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 392 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 393 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 394 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 395 ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 396 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 397 ; GFX6: $vgpr0 = COPY [[COPY7]] 398 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 399 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 400 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 401 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 402 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 403 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 404 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec 405 ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec 406 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 407 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 408 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 409 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 410 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 411 ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec 412 ; GFX7: %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 413 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %18, %subreg.sub1 414 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 415 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 416 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 417 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 418 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 419 ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 420 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 421 ; GFX7: $vgpr0 = COPY [[COPY7]] 422 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 423 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 424 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 425 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 426 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 427 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 428 ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec 429 ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec 430 ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 431 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 432 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 433 ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 434 ; GFX7-FLAT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 435 ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec 436 ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 437 ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 438 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 439 ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] 440 ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 441 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 442 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 443 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 444 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 445 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 446 ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec 447 ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec 448 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 449 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 450 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 451 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 452 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 453 ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec 454 ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec 455 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 456 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 457 ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] 458 ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 459 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 460 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 461 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 462 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 463 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 464 ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 465 ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] 466 ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 467 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 468 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 469 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 470 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 471 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 472 ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 473 ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] 474 %0:vgpr(p1) = COPY $vgpr0_vgpr1 475 %1:vgpr(s32) = COPY $vgpr2 476 %2:vgpr(s32) = COPY $vgpr3 477 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 478 %4:vgpr(s64) = G_CONSTANT i64 -4 479 %5:vgpr(p1) = G_PTR_ADD %0, %4 480 %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 4, addrspace 1) 481 $vgpr0 = COPY %6 482 483... 484 485--- 486name: amdgpu_atomic_cmpxchg_s32_global_nortn 487legalized: true 488regBankSelected: true 489tracksRegLiveness: true 490body: | 491 bb.0: 492 liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 493 494 ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn 495 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 496 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 497 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 498 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 499 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 500 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 501 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 502 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 503 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 504 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 505 ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 506 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 507 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn 508 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 509 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 510 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 511 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 512 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 513 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 514 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 515 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 516 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 517 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 518 ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 519 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 520 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn 521 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 522 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 523 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 524 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 525 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 526 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 527 ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn 528 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 529 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 530 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 531 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 532 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 533 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 534 ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn 535 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 536 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 537 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 538 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 539 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 540 ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 541 ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn 542 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 543 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 544 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 545 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 546 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 547 ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 548 %0:vgpr(p1) = COPY $vgpr0_vgpr1 549 %1:vgpr(s32) = COPY $vgpr2 550 %2:vgpr(s32) = COPY $vgpr3 551 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 552 %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 1) 553 554... 555 556--- 557name: amdgpu_atomic_cmpxchg_s64_global_nortn 558legalized: true 559regBankSelected: true 560tracksRegLiveness: true 561body: | 562 bb.0: 563 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 564 565 ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn 566 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 567 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 568 ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 569 ; GFX6: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 570 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 571 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 572 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 573 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 574 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 575 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 576 ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 577 ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 578 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn 579 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 580 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 581 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 582 ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 583 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 584 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 585 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 586 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 587 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 588 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 589 ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 590 ; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 591 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn 592 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 593 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 594 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 595 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 596 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 597 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) 598 ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn 599 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 600 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 601 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 602 ; GFX8: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 603 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 604 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) 605 ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn 606 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 607 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 608 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 609 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 610 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 611 ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 612 ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn 613 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 614 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 615 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 616 ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 617 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 618 ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) 619 %0:vgpr(p1) = COPY $vgpr0_vgpr1 620 %1:vgpr(s64) = COPY $vgpr2_vgpr3 621 %2:vgpr(s64) = COPY $vgpr4_vgpr5 622 %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 623 %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 8, addrspace 1) 624 625... 626 627--- 628name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr 629legalized: true 630regBankSelected: true 631tracksRegLiveness: true 632body: | 633 bb.0: 634 liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 635 636 ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr 637 ; GFX6: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 638 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 639 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 640 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 641 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 642 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 643 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 644 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 645 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 646 ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 647 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 648 ; GFX6: $vgpr0 = COPY [[COPY3]] 649 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr 650 ; GFX7: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 651 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 652 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 653 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 654 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 655 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 656 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 657 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 658 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 659 ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 660 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 661 ; GFX7: $vgpr0 = COPY [[COPY3]] 662 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr 663 ; GFX7-FLAT: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 664 ; GFX7-FLAT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 665 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 666 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 667 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 668 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] 669 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 670 ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] 671 ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr 672 ; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 673 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 674 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 675 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 676 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 677 ; GFX8: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] 678 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 679 ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] 680 ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr 681 ; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 682 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 683 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 684 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 685 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 686 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 687 ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 688 ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] 689 ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr 690 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 691 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 692 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 693 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 694 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 695 ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 696 ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 697 ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] 698 %0:sgpr(p1) = COPY $sgpr0_sgpr1 699 %1:vgpr(s32) = COPY $vgpr2 700 %2:vgpr(s32) = COPY $vgpr3 701 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 702 %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 1) 703 $vgpr0 = COPY %4 704 705... 706 707--- 708name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 709legalized: true 710regBankSelected: true 711tracksRegLiveness: true 712body: | 713 bb.0: 714 liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 715 716 ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 717 ; GFX6: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 718 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 719 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 720 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 721 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 722 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 723 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 724 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 725 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 726 ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 727 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 728 ; GFX6: $vgpr0 = COPY [[COPY3]] 729 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 730 ; GFX7: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 731 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 732 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 733 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 734 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 735 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 736 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 737 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 738 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 739 ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 740 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 741 ; GFX7: $vgpr0 = COPY [[COPY3]] 742 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 743 ; GFX7-FLAT: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 744 ; GFX7-FLAT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 745 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 746 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 747 ; GFX7-FLAT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 748 ; GFX7-FLAT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 749 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 750 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 751 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 752 ; GFX7-FLAT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 753 ; GFX7-FLAT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 754 ; GFX7-FLAT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY3]], [[COPY4]], implicit-def $scc 755 ; GFX7-FLAT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def $scc, implicit $scc 756 ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 757 ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 758 ; GFX7-FLAT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 759 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 760 ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] 761 ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 762 ; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 763 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 764 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 765 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 766 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 767 ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 768 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 769 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 770 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 771 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 772 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 773 ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY3]], [[COPY4]], implicit-def $scc 774 ; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def $scc, implicit $scc 775 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 776 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 777 ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 778 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) 779 ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] 780 ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 781 ; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 782 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 783 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 784 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 785 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 786 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 787 ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 788 ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] 789 ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 790 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 791 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 792 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 793 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 794 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 795 ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec 796 ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) 797 ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] 798 %0:sgpr(p1) = COPY $sgpr0_sgpr1 799 %1:vgpr(s32) = COPY $vgpr2 800 %2:vgpr(s32) = COPY $vgpr3 801 %3:sgpr(s64) = G_CONSTANT i64 4095 802 %4:sgpr(p1) = G_PTR_ADD %0, %3 803 %5:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 804 %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %4, %5 :: (load store seq_cst 4, addrspace 1) 805 $vgpr0 = COPY %6 806 807... 808