1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 4 5; GCN-LABEL: {{^}}atomic_add_i32_offset: 6; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 7; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 8define amdgpu_kernel void @atomic_add_i32_offset(i32* %out, i32 %in) { 9entry: 10 %gep = getelementptr i32, i32* %out, i32 4 11 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 12 ret void 13} 14 15; GCN-LABEL: {{^}}atomic_add_i32_max_offset: 16; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 17; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}} 18define amdgpu_kernel void @atomic_add_i32_max_offset(i32* %out, i32 %in) { 19entry: 20 %gep = getelementptr i32, i32* %out, i32 1023 21 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 22 ret void 23} 24 25; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1: 26; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 27define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32* %out, i32 %in) { 28entry: 29 %gep = getelementptr i32, i32* %out, i32 1024 30 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 31 ret void 32} 33 34; GCN-LABEL: {{^}}atomic_add_i32_ret_offset: 35; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 36; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 37; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 38define amdgpu_kernel void @atomic_add_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 39entry: 40 %gep = getelementptr i32, i32* %out, i32 4 41 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 42 store i32 %val, i32* %out2 43 ret void 44} 45 46; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset: 47; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 48; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 49define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 50entry: 51 %ptr = getelementptr i32, i32* %out, i64 %index 52 %gep = getelementptr i32, i32* %ptr, i32 4 53 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 54 ret void 55} 56 57; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: 58; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 59; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 60; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 61define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 62entry: 63 %ptr = getelementptr i32, i32* %out, i64 %index 64 %gep = getelementptr i32, i32* %ptr, i32 4 65 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 66 store i32 %val, i32* %out2 67 ret void 68} 69 70; GCN-LABEL: {{^}}atomic_add_i32: 71; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 72define amdgpu_kernel void @atomic_add_i32(i32* %out, i32 %in) { 73entry: 74 %val = atomicrmw volatile add i32* %out, i32 %in seq_cst 75 ret void 76} 77 78; GCN-LABEL: {{^}}atomic_add_i32_ret: 79; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 80; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 81define amdgpu_kernel void @atomic_add_i32_ret(i32* %out, i32* %out2, i32 %in) { 82entry: 83 %val = atomicrmw volatile add i32* %out, i32 %in seq_cst 84 store i32 %val, i32* %out2 85 ret void 86} 87 88; GCN-LABEL: {{^}}atomic_add_i32_addr64: 89; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 90define amdgpu_kernel void @atomic_add_i32_addr64(i32* %out, i32 %in, i64 %index) { 91entry: 92 %ptr = getelementptr i32, i32* %out, i64 %index 93 %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst 94 ret void 95} 96 97; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64: 98; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 99; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 100define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 101entry: 102 %ptr = getelementptr i32, i32* %out, i64 %index 103 %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst 104 store i32 %val, i32* %out2 105 ret void 106} 107 108; GCN-LABEL: {{^}}atomic_and_i32_offset: 109; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 110; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 111define amdgpu_kernel void @atomic_and_i32_offset(i32* %out, i32 %in) { 112entry: 113 %gep = getelementptr i32, i32* %out, i32 4 114 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 115 ret void 116} 117 118; GCN-LABEL: {{^}}atomic_and_i32_ret_offset: 119; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 120; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 121; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 122define amdgpu_kernel void @atomic_and_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 123entry: 124 %gep = getelementptr i32, i32* %out, i32 4 125 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 126 store i32 %val, i32* %out2 127 ret void 128} 129 130; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset: 131; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 132; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 133define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 134entry: 135 %ptr = getelementptr i32, i32* %out, i64 %index 136 %gep = getelementptr i32, i32* %ptr, i32 4 137 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 138 ret void 139} 140 141; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: 142; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 143; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 144; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 145define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 146entry: 147 %ptr = getelementptr i32, i32* %out, i64 %index 148 %gep = getelementptr i32, i32* %ptr, i32 4 149 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 150 store i32 %val, i32* %out2 151 ret void 152} 153 154; GCN-LABEL: {{^}}atomic_and_i32: 155; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 156define amdgpu_kernel void @atomic_and_i32(i32* %out, i32 %in) { 157entry: 158 %val = atomicrmw volatile and i32* %out, i32 %in seq_cst 159 ret void 160} 161 162; GCN-LABEL: {{^}}atomic_and_i32_ret: 163; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 164; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 165define amdgpu_kernel void @atomic_and_i32_ret(i32* %out, i32* %out2, i32 %in) { 166entry: 167 %val = atomicrmw volatile and i32* %out, i32 %in seq_cst 168 store i32 %val, i32* %out2 169 ret void 170} 171 172; GCN-LABEL: {{^}}atomic_and_i32_addr64: 173; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 174define amdgpu_kernel void @atomic_and_i32_addr64(i32* %out, i32 %in, i64 %index) { 175entry: 176 %ptr = getelementptr i32, i32* %out, i64 %index 177 %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst 178 ret void 179} 180 181; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64: 182; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 183; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 184define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 185entry: 186 %ptr = getelementptr i32, i32* %out, i64 %index 187 %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst 188 store i32 %val, i32* %out2 189 ret void 190} 191 192; GCN-LABEL: {{^}}atomic_sub_i32_offset: 193; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 194; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 195define amdgpu_kernel void @atomic_sub_i32_offset(i32* %out, i32 %in) { 196entry: 197 %gep = getelementptr i32, i32* %out, i32 4 198 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 199 ret void 200} 201 202; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset: 203; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 204; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 205; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 206define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 207entry: 208 %gep = getelementptr i32, i32* %out, i32 4 209 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 210 store i32 %val, i32* %out2 211 ret void 212} 213 214; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset: 215; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 216; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 217define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 218entry: 219 %ptr = getelementptr i32, i32* %out, i64 %index 220 %gep = getelementptr i32, i32* %ptr, i32 4 221 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 222 ret void 223} 224 225; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: 226; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 227; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 228; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 229define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 230entry: 231 %ptr = getelementptr i32, i32* %out, i64 %index 232 %gep = getelementptr i32, i32* %ptr, i32 4 233 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 234 store i32 %val, i32* %out2 235 ret void 236} 237 238; GCN-LABEL: {{^}}atomic_sub_i32: 239; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 240define amdgpu_kernel void @atomic_sub_i32(i32* %out, i32 %in) { 241entry: 242 %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst 243 ret void 244} 245 246; GCN-LABEL: {{^}}atomic_sub_i32_ret: 247; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 248; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 249define amdgpu_kernel void @atomic_sub_i32_ret(i32* %out, i32* %out2, i32 %in) { 250entry: 251 %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst 252 store i32 %val, i32* %out2 253 ret void 254} 255 256; GCN-LABEL: {{^}}atomic_sub_i32_addr64: 257; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 258define amdgpu_kernel void @atomic_sub_i32_addr64(i32* %out, i32 %in, i64 %index) { 259entry: 260 %ptr = getelementptr i32, i32* %out, i64 %index 261 %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst 262 ret void 263} 264 265; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64: 266; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 267; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 268define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 269entry: 270 %ptr = getelementptr i32, i32* %out, i64 %index 271 %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst 272 store i32 %val, i32* %out2 273 ret void 274} 275 276; GCN-LABEL: {{^}}atomic_max_i32_offset: 277; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 278; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 279define amdgpu_kernel void @atomic_max_i32_offset(i32* %out, i32 %in) { 280entry: 281 %gep = getelementptr i32, i32* %out, i32 4 282 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 283 ret void 284} 285 286; GCN-LABEL: {{^}}atomic_max_i32_ret_offset: 287; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 288; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 289; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 290define amdgpu_kernel void @atomic_max_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 291entry: 292 %gep = getelementptr i32, i32* %out, i32 4 293 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 294 store i32 %val, i32* %out2 295 ret void 296} 297 298; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset: 299; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 300; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 301define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 302entry: 303 %ptr = getelementptr i32, i32* %out, i64 %index 304 %gep = getelementptr i32, i32* %ptr, i32 4 305 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 306 ret void 307} 308 309; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: 310; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 311; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 312; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 313define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 314entry: 315 %ptr = getelementptr i32, i32* %out, i64 %index 316 %gep = getelementptr i32, i32* %ptr, i32 4 317 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 318 store i32 %val, i32* %out2 319 ret void 320} 321 322; GCN-LABEL: {{^}}atomic_max_i32: 323; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 324define amdgpu_kernel void @atomic_max_i32(i32* %out, i32 %in) { 325entry: 326 %val = atomicrmw volatile max i32* %out, i32 %in seq_cst 327 ret void 328} 329 330; GCN-LABEL: {{^}}atomic_max_i32_ret: 331; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 332; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 333define amdgpu_kernel void @atomic_max_i32_ret(i32* %out, i32* %out2, i32 %in) { 334entry: 335 %val = atomicrmw volatile max i32* %out, i32 %in seq_cst 336 store i32 %val, i32* %out2 337 ret void 338} 339 340; GCN-LABEL: {{^}}atomic_max_i32_addr64: 341; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 342define amdgpu_kernel void @atomic_max_i32_addr64(i32* %out, i32 %in, i64 %index) { 343entry: 344 %ptr = getelementptr i32, i32* %out, i64 %index 345 %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst 346 ret void 347} 348 349; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64: 350; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 351; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 352define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 353entry: 354 %ptr = getelementptr i32, i32* %out, i64 %index 355 %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst 356 store i32 %val, i32* %out2 357 ret void 358} 359 360; GCN-LABEL: {{^}}atomic_umax_i32_offset: 361; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 362; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 363define amdgpu_kernel void @atomic_umax_i32_offset(i32* %out, i32 %in) { 364entry: 365 %gep = getelementptr i32, i32* %out, i32 4 366 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 367 ret void 368} 369 370; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset: 371; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 372; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 373; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 374define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 375entry: 376 %gep = getelementptr i32, i32* %out, i32 4 377 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 378 store i32 %val, i32* %out2 379 ret void 380} 381 382; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset: 383; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 384; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 385define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 386entry: 387 %ptr = getelementptr i32, i32* %out, i64 %index 388 %gep = getelementptr i32, i32* %ptr, i32 4 389 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 390 ret void 391} 392 393; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: 394; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 395; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 396; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 397define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 398entry: 399 %ptr = getelementptr i32, i32* %out, i64 %index 400 %gep = getelementptr i32, i32* %ptr, i32 4 401 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 402 store i32 %val, i32* %out2 403 ret void 404} 405 406; GCN-LABEL: {{^}}atomic_umax_i32: 407; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 408define amdgpu_kernel void @atomic_umax_i32(i32* %out, i32 %in) { 409entry: 410 %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst 411 ret void 412} 413 414; GCN-LABEL: {{^}}atomic_umax_i32_ret: 415; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 416; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 417define amdgpu_kernel void @atomic_umax_i32_ret(i32* %out, i32* %out2, i32 %in) { 418entry: 419 %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst 420 store i32 %val, i32* %out2 421 ret void 422} 423 424; GCN-LABEL: {{^}}atomic_umax_i32_addr64: 425; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 426define amdgpu_kernel void @atomic_umax_i32_addr64(i32* %out, i32 %in, i64 %index) { 427entry: 428 %ptr = getelementptr i32, i32* %out, i64 %index 429 %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst 430 ret void 431} 432 433; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64: 434; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 435; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 436define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 437entry: 438 %ptr = getelementptr i32, i32* %out, i64 %index 439 %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst 440 store i32 %val, i32* %out2 441 ret void 442} 443 444; GCN-LABEL: {{^}}atomic_min_i32_offset: 445; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 446; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 447define amdgpu_kernel void @atomic_min_i32_offset(i32* %out, i32 %in) { 448entry: 449 %gep = getelementptr i32, i32* %out, i32 4 450 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 451 ret void 452} 453 454; GCN-LABEL: {{^}}atomic_min_i32_ret_offset: 455; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 456; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 457; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 458define amdgpu_kernel void @atomic_min_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 459entry: 460 %gep = getelementptr i32, i32* %out, i32 4 461 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 462 store i32 %val, i32* %out2 463 ret void 464} 465 466; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset: 467; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 468; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 469define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 470entry: 471 %ptr = getelementptr i32, i32* %out, i64 %index 472 %gep = getelementptr i32, i32* %ptr, i32 4 473 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 474 ret void 475} 476 477; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: 478; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 479; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 480; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 481define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 482entry: 483 %ptr = getelementptr i32, i32* %out, i64 %index 484 %gep = getelementptr i32, i32* %ptr, i32 4 485 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 486 store i32 %val, i32* %out2 487 ret void 488} 489 490; GCN-LABEL: {{^}}atomic_min_i32: 491; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 492define amdgpu_kernel void @atomic_min_i32(i32* %out, i32 %in) { 493entry: 494 %val = atomicrmw volatile min i32* %out, i32 %in seq_cst 495 ret void 496} 497 498; GCN-LABEL: {{^}}atomic_min_i32_ret: 499; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 500; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 501define amdgpu_kernel void @atomic_min_i32_ret(i32* %out, i32* %out2, i32 %in) { 502entry: 503 %val = atomicrmw volatile min i32* %out, i32 %in seq_cst 504 store i32 %val, i32* %out2 505 ret void 506} 507 508; GCN-LABEL: {{^}}atomic_min_i32_addr64: 509; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 510define amdgpu_kernel void @atomic_min_i32_addr64(i32* %out, i32 %in, i64 %index) { 511entry: 512 %ptr = getelementptr i32, i32* %out, i64 %index 513 %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst 514 ret void 515} 516 517; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64: 518; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 519; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 520define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 521entry: 522 %ptr = getelementptr i32, i32* %out, i64 %index 523 %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst 524 store i32 %val, i32* %out2 525 ret void 526} 527 528; GCN-LABEL: {{^}}atomic_umin_i32_offset: 529; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 530; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 531define amdgpu_kernel void @atomic_umin_i32_offset(i32* %out, i32 %in) { 532entry: 533 %gep = getelementptr i32, i32* %out, i32 4 534 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 535 ret void 536} 537 538; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset: 539; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 540; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 541; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 542define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 543entry: 544 %gep = getelementptr i32, i32* %out, i32 4 545 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 546 store i32 %val, i32* %out2 547 ret void 548} 549 550; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset: 551; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 552; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 553define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 554entry: 555 %ptr = getelementptr i32, i32* %out, i64 %index 556 %gep = getelementptr i32, i32* %ptr, i32 4 557 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 558 ret void 559} 560 561; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: 562; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 563; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 564; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 565define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 566entry: 567 %ptr = getelementptr i32, i32* %out, i64 %index 568 %gep = getelementptr i32, i32* %ptr, i32 4 569 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 570 store i32 %val, i32* %out2 571 ret void 572} 573 574; GCN-LABEL: {{^}}atomic_umin_i32: 575; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 576define amdgpu_kernel void @atomic_umin_i32(i32* %out, i32 %in) { 577entry: 578 %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst 579 ret void 580} 581 582; GCN-LABEL: {{^}}atomic_umin_i32_ret: 583; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 584; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 585define amdgpu_kernel void @atomic_umin_i32_ret(i32* %out, i32* %out2, i32 %in) { 586entry: 587 %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst 588 store i32 %val, i32* %out2 589 ret void 590} 591 592; GCN-LABEL: {{^}}atomic_umin_i32_addr64: 593; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 594define amdgpu_kernel void @atomic_umin_i32_addr64(i32* %out, i32 %in, i64 %index) { 595entry: 596 %ptr = getelementptr i32, i32* %out, i64 %index 597 %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst 598 ret void 599} 600 601; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64: 602; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 603; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]{{$}} 604 define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 605entry: 606 %ptr = getelementptr i32, i32* %out, i64 %index 607 %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst 608 store i32 %val, i32* %out2 609 ret void 610} 611 612; GCN-LABEL: {{^}}atomic_or_i32_offset: 613; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 614; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 615define amdgpu_kernel void @atomic_or_i32_offset(i32* %out, i32 %in) { 616entry: 617 %gep = getelementptr i32, i32* %out, i32 4 618 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 619 ret void 620} 621 622; GCN-LABEL: {{^}}atomic_or_i32_ret_offset: 623; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 624; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 625; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 626define amdgpu_kernel void @atomic_or_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 627entry: 628 %gep = getelementptr i32, i32* %out, i32 4 629 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 630 store i32 %val, i32* %out2 631 ret void 632} 633 634; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset: 635; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 636; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 637define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 638entry: 639 %ptr = getelementptr i32, i32* %out, i64 %index 640 %gep = getelementptr i32, i32* %ptr, i32 4 641 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 642 ret void 643} 644 645; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: 646; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 647; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 648; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 649define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 650entry: 651 %ptr = getelementptr i32, i32* %out, i64 %index 652 %gep = getelementptr i32, i32* %ptr, i32 4 653 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 654 store i32 %val, i32* %out2 655 ret void 656} 657 658; GCN-LABEL: {{^}}atomic_or_i32: 659; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 660define amdgpu_kernel void @atomic_or_i32(i32* %out, i32 %in) { 661entry: 662 %val = atomicrmw volatile or i32* %out, i32 %in seq_cst 663 ret void 664} 665 666; GCN-LABEL: {{^}}atomic_or_i32_ret: 667; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 668; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 669define amdgpu_kernel void @atomic_or_i32_ret(i32* %out, i32* %out2, i32 %in) { 670entry: 671 %val = atomicrmw volatile or i32* %out, i32 %in seq_cst 672 store i32 %val, i32* %out2 673 ret void 674} 675 676; GCN-LABEL: {{^}}atomic_or_i32_addr64: 677; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 678define amdgpu_kernel void @atomic_or_i32_addr64(i32* %out, i32 %in, i64 %index) { 679entry: 680 %ptr = getelementptr i32, i32* %out, i64 %index 681 %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst 682 ret void 683} 684 685; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64: 686; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 687; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 688define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 689entry: 690 %ptr = getelementptr i32, i32* %out, i64 %index 691 %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst 692 store i32 %val, i32* %out2 693 ret void 694} 695 696; GCN-LABEL: {{^}}atomic_xchg_i32_offset: 697; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 698; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 699define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) { 700entry: 701 %gep = getelementptr i32, i32* %out, i32 4 702 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 703 ret void 704} 705 706; GCN-LABEL: {{^}}atomic_xchg_f32_offset: 707; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 708; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 709define amdgpu_kernel void @atomic_xchg_f32_offset(float* %out, float %in) { 710entry: 711 %gep = getelementptr float, float* %out, i32 4 712 %val = atomicrmw volatile xchg float* %gep, float %in seq_cst 713 ret void 714} 715 716; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: 717; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 718; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 719; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 720define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 721entry: 722 %gep = getelementptr i32, i32* %out, i32 4 723 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 724 store i32 %val, i32* %out2 725 ret void 726} 727 728; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset: 729; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 730; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 731define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 732entry: 733 %ptr = getelementptr i32, i32* %out, i64 %index 734 %gep = getelementptr i32, i32* %ptr, i32 4 735 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 736 ret void 737} 738 739; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: 740; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 741; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 742; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 743define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 744entry: 745 %ptr = getelementptr i32, i32* %out, i64 %index 746 %gep = getelementptr i32, i32* %ptr, i32 4 747 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 748 store i32 %val, i32* %out2 749 ret void 750} 751 752; GCN-LABEL: {{^}}atomic_xchg_i32: 753; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 754define amdgpu_kernel void @atomic_xchg_i32(i32* %out, i32 %in) { 755entry: 756 %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst 757 ret void 758} 759 760; GCN-LABEL: {{^}}atomic_xchg_i32_ret: 761; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 762; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 763define amdgpu_kernel void @atomic_xchg_i32_ret(i32* %out, i32* %out2, i32 %in) { 764entry: 765 %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst 766 store i32 %val, i32* %out2 767 ret void 768} 769 770; GCN-LABEL: {{^}}atomic_xchg_i32_addr64: 771; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 772define amdgpu_kernel void @atomic_xchg_i32_addr64(i32* %out, i32 %in, i64 %index) { 773entry: 774 %ptr = getelementptr i32, i32* %out, i64 %index 775 %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst 776 ret void 777} 778 779; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64: 780; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 781; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 782define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 783entry: 784 %ptr = getelementptr i32, i32* %out, i64 %index 785 %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst 786 store i32 %val, i32* %out2 787 ret void 788} 789 790; CMP_SWAP 791 792; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset: 793; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 794; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 795define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32* %out, i32 %in, i32 %old) { 796entry: 797 %gep = getelementptr i32, i32* %out, i32 4 798 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 799 ret void 800} 801 802; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset: 803; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 804; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 805; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 806define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in, i32 %old) { 807entry: 808 %gep = getelementptr i32, i32* %out, i32 4 809 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 810 %flag = extractvalue { i32, i1 } %val, 0 811 store i32 %flag, i32* %out2 812 ret void 813} 814 815; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: 816; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 817; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 818define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index, i32 %old) { 819entry: 820 %ptr = getelementptr i32, i32* %out, i64 %index 821 %gep = getelementptr i32, i32* %ptr, i32 4 822 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 823 ret void 824} 825 826; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset: 827; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 828; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 829; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 830define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) { 831entry: 832 %ptr = getelementptr i32, i32* %out, i64 %index 833 %gep = getelementptr i32, i32* %ptr, i32 4 834 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 835 %flag = extractvalue { i32, i1 } %val, 0 836 store i32 %flag, i32* %out2 837 ret void 838} 839 840; GCN-LABEL: {{^}}atomic_cmpxchg_i32: 841; GCN: flat_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 842define amdgpu_kernel void @atomic_cmpxchg_i32(i32* %out, i32 %in, i32 %old) { 843entry: 844 %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst 845 ret void 846} 847 848; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret: 849; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc 850; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 851define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32* %out, i32* %out2, i32 %in, i32 %old) { 852entry: 853 %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst 854 %flag = extractvalue { i32, i1 } %val, 0 855 store i32 %flag, i32* %out2 856 ret void 857} 858 859; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64: 860; GCN: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} 861define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32* %out, i32 %in, i64 %index, i32 %old) { 862entry: 863 %ptr = getelementptr i32, i32* %out, i64 %index 864 %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst 865 ret void 866} 867 868; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64: 869; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 870; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 871define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) { 872entry: 873 %ptr = getelementptr i32, i32* %out, i64 %index 874 %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst 875 %flag = extractvalue { i32, i1 } %val, 0 876 store i32 %flag, i32* %out2 877 ret void 878} 879 880; GCN-LABEL: {{^}}atomic_xor_i32_offset: 881; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 882; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 883define amdgpu_kernel void @atomic_xor_i32_offset(i32* %out, i32 %in) { 884entry: 885 %gep = getelementptr i32, i32* %out, i32 4 886 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 887 ret void 888} 889 890; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset: 891; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 892; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 893; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 894define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 895entry: 896 %gep = getelementptr i32, i32* %out, i32 4 897 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 898 store i32 %val, i32* %out2 899 ret void 900} 901 902; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset: 903; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 904; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 905define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 906entry: 907 %ptr = getelementptr i32, i32* %out, i64 %index 908 %gep = getelementptr i32, i32* %ptr, i32 4 909 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 910 ret void 911} 912 913; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: 914; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 915; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 916; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 917define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 918entry: 919 %ptr = getelementptr i32, i32* %out, i64 %index 920 %gep = getelementptr i32, i32* %ptr, i32 4 921 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 922 store i32 %val, i32* %out2 923 ret void 924} 925 926; GCN-LABEL: {{^}}atomic_xor_i32: 927; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 928define amdgpu_kernel void @atomic_xor_i32(i32* %out, i32 %in) { 929entry: 930 %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst 931 ret void 932} 933 934; GCN-LABEL: {{^}}atomic_xor_i32_ret: 935; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 936; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 937define amdgpu_kernel void @atomic_xor_i32_ret(i32* %out, i32* %out2, i32 %in) { 938entry: 939 %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst 940 store i32 %val, i32* %out2 941 ret void 942} 943 944; GCN-LABEL: {{^}}atomic_xor_i32_addr64: 945; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 946define amdgpu_kernel void @atomic_xor_i32_addr64(i32* %out, i32 %in, i64 %index) { 947entry: 948 %ptr = getelementptr i32, i32* %out, i64 %index 949 %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst 950 ret void 951} 952 953; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64: 954; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 955; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 956define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 957entry: 958 %ptr = getelementptr i32, i32* %out, i64 %index 959 %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst 960 store i32 %val, i32* %out2 961 ret void 962} 963 964; GCN-LABEL: {{^}}atomic_load_i32_offset: 965; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 966; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 967; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 968define amdgpu_kernel void @atomic_load_i32_offset(i32* %in, i32* %out) { 969entry: 970 %gep = getelementptr i32, i32* %in, i32 4 971 %val = load atomic i32, i32* %gep seq_cst, align 4 972 store i32 %val, i32* %out 973 ret void 974} 975 976; GCN-LABEL: {{^}}atomic_load_i32: 977; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc 978; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 979define amdgpu_kernel void @atomic_load_i32(i32* %in, i32* %out) { 980entry: 981 %val = load atomic i32, i32* %in seq_cst, align 4 982 store i32 %val, i32* %out 983 ret void 984} 985 986; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset: 987; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 988; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 989; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 990define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32* %in, i32* %out, i64 %index) { 991entry: 992 %ptr = getelementptr i32, i32* %in, i64 %index 993 %gep = getelementptr i32, i32* %ptr, i32 4 994 %val = load atomic i32, i32* %gep seq_cst, align 4 995 store i32 %val, i32* %out 996 ret void 997} 998 999; GCN-LABEL: {{^}}atomic_load_i32_addr64: 1000; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1001; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 1002define amdgpu_kernel void @atomic_load_i32_addr64(i32* %in, i32* %out, i64 %index) { 1003entry: 1004 %ptr = getelementptr i32, i32* %in, i64 %index 1005 %val = load atomic i32, i32* %ptr seq_cst, align 4 1006 store i32 %val, i32* %out 1007 ret void 1008} 1009 1010; GCN-LABEL: {{^}}atomic_store_i32_offset: 1011; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1012; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} 1013define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32* %out) { 1014entry: 1015 %gep = getelementptr i32, i32* %out, i32 4 1016 store atomic i32 %in, i32* %gep seq_cst, align 4 1017 ret void 1018} 1019 1020; GCN-LABEL: {{^}}atomic_store_i32: 1021; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1022define amdgpu_kernel void @atomic_store_i32(i32 %in, i32* %out) { 1023entry: 1024 store atomic i32 %in, i32* %out seq_cst, align 4 1025 ret void 1026} 1027 1028; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset: 1029; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1030; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} 1031define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32* %out, i64 %index) { 1032entry: 1033 %ptr = getelementptr i32, i32* %out, i64 %index 1034 %gep = getelementptr i32, i32* %ptr, i32 4 1035 store atomic i32 %in, i32* %gep seq_cst, align 4 1036 ret void 1037} 1038 1039; GCN-LABEL: {{^}}atomic_store_i32_addr64: 1040; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1041define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32* %out, i64 %index) { 1042entry: 1043 %ptr = getelementptr i32, i32* %out, i64 %index 1044 store atomic i32 %in, i32* %ptr seq_cst, align 4 1045 ret void 1046} 1047 1048; GCN-LABEL: {{^}}atomic_load_f32_offset: 1049; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1050; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 1051; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 1052define amdgpu_kernel void @atomic_load_f32_offset(float* %in, float* %out) { 1053entry: 1054 %gep = getelementptr float, float* %in, i32 4 1055 %val = load atomic float, float* %gep seq_cst, align 4 1056 store float %val, float* %out 1057 ret void 1058} 1059 1060; GCN-LABEL: {{^}}atomic_load_f32: 1061; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc 1062; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 1063define amdgpu_kernel void @atomic_load_f32(float* %in, float* %out) { 1064entry: 1065 %val = load atomic float, float* %in seq_cst, align 4 1066 store float %val, float* %out 1067 ret void 1068} 1069 1070; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset: 1071; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1072; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 1073; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 1074define amdgpu_kernel void @atomic_load_f32_addr64_offset(float* %in, float* %out, i64 %index) { 1075entry: 1076 %ptr = getelementptr float, float* %in, i64 %index 1077 %gep = getelementptr float, float* %ptr, i32 4 1078 %val = load atomic float, float* %gep seq_cst, align 4 1079 store float %val, float* %out 1080 ret void 1081} 1082 1083; GCN-LABEL: {{^}}atomic_load_f32_addr64: 1084; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1085; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 1086define amdgpu_kernel void @atomic_load_f32_addr64(float* %in, float* %out, i64 %index) { 1087entry: 1088 %ptr = getelementptr float, float* %in, i64 %index 1089 %val = load atomic float, float* %ptr seq_cst, align 4 1090 store float %val, float* %out 1091 ret void 1092} 1093 1094; GCN-LABEL: {{^}}atomic_store_f32_offset: 1095; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1096; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} 1097define amdgpu_kernel void @atomic_store_f32_offset(float %in, float* %out) { 1098entry: 1099 %gep = getelementptr float, float* %out, i32 4 1100 store atomic float %in, float* %gep seq_cst, align 4 1101 ret void 1102} 1103 1104; GCN-LABEL: {{^}}atomic_store_f32: 1105; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1106define amdgpu_kernel void @atomic_store_f32(float %in, float* %out) { 1107entry: 1108 store atomic float %in, float* %out seq_cst, align 4 1109 ret void 1110} 1111 1112; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset: 1113; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1114; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} 1115define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float* %out, i64 %index) { 1116entry: 1117 %ptr = getelementptr float, float* %out, i64 %index 1118 %gep = getelementptr float, float* %ptr, i32 4 1119 store atomic float %in, float* %gep seq_cst, align 4 1120 ret void 1121} 1122 1123; GCN-LABEL: {{^}}atomic_store_f32_addr64: 1124; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1125define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float* %out, i64 %index) { 1126entry: 1127 %ptr = getelementptr float, float* %out, i64 %index 1128 store atomic float %in, float* %ptr seq_cst, align 4 1129 ret void 1130} 1131