1; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 2 3; FUNC-LABEL: @lds_atomic_xchg_ret_i32: 4; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]], 5; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4 6; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 7; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0] 8; SI: BUFFER_STORE_DWORD [[RESULT]], 9; SI: S_ENDPGM 10define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 11 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst 12 store i32 %result, i32 addrspace(1)* %out, align 4 13 ret void 14} 15 16; FUNC-LABEL: @lds_atomic_xchg_ret_i32_offset: 17; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 18; SI: S_ENDPGM 19define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 20 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 21 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst 22 store i32 %result, i32 addrspace(1)* %out, align 4 23 ret void 24} 25 26; XXX - Is it really necessary to load 4 into VGPR? 27; FUNC-LABEL: @lds_atomic_add_ret_i32: 28; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]], 29; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4 30; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 31; SI: DS_ADD_RTN_U32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0] 32; SI: BUFFER_STORE_DWORD [[RESULT]], 33; SI: S_ENDPGM 34define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 35 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst 36 store i32 %result, i32 addrspace(1)* %out, align 4 37 ret void 38} 39 40; FUNC-LABEL: @lds_atomic_add_ret_i32_offset: 41; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 42; SI: S_ENDPGM 43define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 44 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 45 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 46 store i32 %result, i32 addrspace(1)* %out, align 4 47 ret void 48} 49 50; FUNC-LABEL: @lds_atomic_inc_ret_i32: 51; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 52; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] 53; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0 54; SI: S_ENDPGM 55define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 56 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst 57 store i32 %result, i32 addrspace(1)* %out, align 4 58 ret void 59} 60 61; FUNC-LABEL: @lds_atomic_inc_ret_i32_offset: 62; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 63; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] 64; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10 65; SI: S_ENDPGM 66define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 67 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 68 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 69 store i32 %result, i32 addrspace(1)* %out, align 4 70 ret void 71} 72 73; FUNC-LABEL: @lds_atomic_sub_ret_i32: 74; SI: DS_SUB_RTN_U32 75; SI: S_ENDPGM 76define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 77 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst 78 store i32 %result, i32 addrspace(1)* %out, align 4 79 ret void 80} 81 82; FUNC-LABEL: @lds_atomic_sub_ret_i32_offset: 83; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 84; SI: S_ENDPGM 85define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 86 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 87 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst 88 store i32 %result, i32 addrspace(1)* %out, align 4 89 ret void 90} 91 92; FUNC-LABEL: @lds_atomic_dec_ret_i32: 93; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 94; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] 95; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0 96; SI: S_ENDPGM 97define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 98 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst 99 store i32 %result, i32 addrspace(1)* %out, align 4 100 ret void 101} 102 103; FUNC-LABEL: @lds_atomic_dec_ret_i32_offset: 104; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 105; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] 106; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10 107; SI: S_ENDPGM 108define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 109 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 110 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst 111 store i32 %result, i32 addrspace(1)* %out, align 4 112 ret void 113} 114 115; FUNC-LABEL: @lds_atomic_and_ret_i32: 116; SI: DS_AND_RTN_B32 117; SI: S_ENDPGM 118define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 119 %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst 120 store i32 %result, i32 addrspace(1)* %out, align 4 121 ret void 122} 123 124; FUNC-LABEL: @lds_atomic_and_ret_i32_offset: 125; SI: DS_AND_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 126; SI: S_ENDPGM 127define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 128 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 129 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst 130 store i32 %result, i32 addrspace(1)* %out, align 4 131 ret void 132} 133 134; FUNC-LABEL: @lds_atomic_or_ret_i32: 135; SI: DS_OR_RTN_B32 136; SI: S_ENDPGM 137define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 138 %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst 139 store i32 %result, i32 addrspace(1)* %out, align 4 140 ret void 141} 142 143; FUNC-LABEL: @lds_atomic_or_ret_i32_offset: 144; SI: DS_OR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 145; SI: S_ENDPGM 146define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 147 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 148 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst 149 store i32 %result, i32 addrspace(1)* %out, align 4 150 ret void 151} 152 153; FUNC-LABEL: @lds_atomic_xor_ret_i32: 154; SI: DS_XOR_RTN_B32 155; SI: S_ENDPGM 156define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 157 %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst 158 store i32 %result, i32 addrspace(1)* %out, align 4 159 ret void 160} 161 162; FUNC-LABEL: @lds_atomic_xor_ret_i32_offset: 163; SI: DS_XOR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 164; SI: S_ENDPGM 165define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 166 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 167 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst 168 store i32 %result, i32 addrspace(1)* %out, align 4 169 ret void 170} 171 172; FIXME: There is no atomic nand instr 173; XFUNC-LABEL: @lds_atomic_nand_ret_i32:uction, so we somehow need to expand this. 174; define void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 175; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst 176; store i32 %result, i32 addrspace(1)* %out, align 4 177; ret void 178; } 179 180; FUNC-LABEL: @lds_atomic_min_ret_i32: 181; SI: DS_MIN_RTN_I32 182; SI: S_ENDPGM 183define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 184 %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst 185 store i32 %result, i32 addrspace(1)* %out, align 4 186 ret void 187} 188 189; FUNC-LABEL: @lds_atomic_min_ret_i32_offset: 190; SI: DS_MIN_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 191; SI: S_ENDPGM 192define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 193 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 194 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst 195 store i32 %result, i32 addrspace(1)* %out, align 4 196 ret void 197} 198 199; FUNC-LABEL: @lds_atomic_max_ret_i32: 200; SI: DS_MAX_RTN_I32 201; SI: S_ENDPGM 202define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 203 %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst 204 store i32 %result, i32 addrspace(1)* %out, align 4 205 ret void 206} 207 208; FUNC-LABEL: @lds_atomic_max_ret_i32_offset: 209; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 210; SI: S_ENDPGM 211define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 212 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 213 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst 214 store i32 %result, i32 addrspace(1)* %out, align 4 215 ret void 216} 217 218; FUNC-LABEL: @lds_atomic_umin_ret_i32: 219; SI: DS_MIN_RTN_U32 220; SI: S_ENDPGM 221define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 222 %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst 223 store i32 %result, i32 addrspace(1)* %out, align 4 224 ret void 225} 226 227; FUNC-LABEL: @lds_atomic_umin_ret_i32_offset: 228; SI: DS_MIN_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 229; SI: S_ENDPGM 230define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 231 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 232 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst 233 store i32 %result, i32 addrspace(1)* %out, align 4 234 ret void 235} 236 237; FUNC-LABEL: @lds_atomic_umax_ret_i32: 238; SI: DS_MAX_RTN_U32 239; SI: S_ENDPGM 240define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 241 %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst 242 store i32 %result, i32 addrspace(1)* %out, align 4 243 ret void 244} 245 246; FUNC-LABEL: @lds_atomic_umax_ret_i32_offset: 247; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 248; SI: S_ENDPGM 249define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 250 %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 251 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst 252 store i32 %result, i32 addrspace(1)* %out, align 4 253 ret void 254} 255