1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3 4; GCN-LABEL: {{^}}atomic_load_monotonic_i32: 5; GCN: s_waitcnt 6; GFX9-NOT: s_mov_b32 m0 7; CI-NEXT: s_mov_b32 m0 8; GCN-NEXT: ds_read_b32 v0, v0{{$}} 9; GCN-NEXT: s_waitcnt lgkmcnt(0) 10; GCN-NEXT: s_setpc_b64 11define i32 @atomic_load_monotonic_i32(i32 addrspace(3)* %ptr) { 12 %load = load atomic i32, i32 addrspace(3)* %ptr monotonic, align 4 13 ret i32 %load 14} 15 16; GCN-LABEL: {{^}}atomic_load_monotonic_i32_offset: 17; GCN: s_waitcnt 18; GFX9-NOT: s_mov_b32 m0 19; CI-NEXT: s_mov_b32 m0 20; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} 21; GCN-NEXT: s_waitcnt lgkmcnt(0) 22; GCN-NEXT: s_setpc_b64 23define i32 @atomic_load_monotonic_i32_offset(i32 addrspace(3)* %ptr) { 24 %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16 25 %load = load atomic i32, i32 addrspace(3)* %gep monotonic, align 4 26 ret i32 %load 27} 28 29; GCN-LABEL: {{^}}atomic_load_monotonic_i64: 30; GCN: s_waitcnt 31; GFX9-NOT: s_mov_b32 m0 32; CI-NEXT: s_mov_b32 m0 33; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}} 34; GCN-NEXT: s_waitcnt lgkmcnt(0) 35; GCN-NEXT: s_setpc_b64 36define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) { 37 %load = load atomic i64, i64 addrspace(3)* %ptr monotonic, align 8 38 ret i64 %load 39} 40 41; GCN-LABEL: {{^}}atomic_load_monotonic_i64_offset: 42; GCN: s_waitcnt 43; GFX9-NOT: s_mov_b32 m0 44; CI-NEXT: s_mov_b32 m0 45; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} 46; GCN-NEXT: s_waitcnt lgkmcnt(0) 47; GCN-NEXT: s_setpc_b64 48define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) { 49 %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i32 16 50 %load = load atomic i64, i64 addrspace(3)* %gep monotonic, align 8 51 ret i64 %load 52} 53 54; GCN-LABEL: {{^}}atomic_load_monotonic_f32_offset: 55; GCN: s_waitcnt 56; GFX9-NOT: s_mov_b32 m0 57; CI-NEXT: s_mov_b32 m0 58; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} 59; GCN-NEXT: s_waitcnt lgkmcnt(0) 60; GCN-NEXT: s_setpc_b64 61define float @atomic_load_monotonic_f32_offset(float addrspace(3)* %ptr) { 62 %gep = getelementptr inbounds float, float addrspace(3)* %ptr, i32 16 63 %load = load atomic float, float addrspace(3)* %gep monotonic, align 4 64 ret float %load 65} 66 67; GCN-LABEL: {{^}}atomic_load_monotonic_f64_offset: 68; GCN: s_waitcnt 69; GFX9-NOT: s_mov_b32 m0 70; CI-NEXT: s_mov_b32 m0 71; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} 72; GCN-NEXT: s_waitcnt lgkmcnt(0) 73; GCN-NEXT: s_setpc_b64 74define double @atomic_load_monotonic_f64_offset(double addrspace(3)* %ptr) { 75 %gep = getelementptr inbounds double, double addrspace(3)* %ptr, i32 16 76 %load = load atomic double, double addrspace(3)* %gep monotonic, align 8 77 ret double %load 78} 79 80; GCN-LABEL: {{^}}atomic_load_monotonic_p0i8_offset: 81; GCN: s_waitcnt 82; GFX9-NOT: s_mov_b32 m0 83; CI-NEXT: s_mov_b32 m0 84; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} 85; GCN-NEXT: s_waitcnt lgkmcnt(0) 86; GCN-NEXT: s_setpc_b64 87define i8* @atomic_load_monotonic_p0i8_offset(i8* addrspace(3)* %ptr) { 88 %gep = getelementptr inbounds i8*, i8* addrspace(3)* %ptr, i32 16 89 %load = load atomic i8*, i8* addrspace(3)* %gep monotonic, align 8 90 ret i8* %load 91} 92 93; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset: 94; GCN: s_waitcnt 95; GFX9-NOT: s_mov_b32 m0 96; CI-NEXT: s_mov_b32 m0 97; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} 98; GCN-NEXT: s_waitcnt lgkmcnt(0) 99; GCN-NEXT: s_setpc_b64 100define i8 addrspace(3)* @atomic_load_monotonic_p3i8_offset(i8 addrspace(3)* addrspace(3)* %ptr) { 101 %gep = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %ptr, i32 16 102 %load = load atomic i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %gep monotonic, align 4 103 ret i8 addrspace(3)* %load 104} 105