1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s 2 3; Trivial optimization of generic addressing 4 5; CHECK-LABEL: @load_global_from_flat( 6; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* 7; CHECK-NEXT: %tmp1 = load float, float addrspace(1)* %tmp0 8; CHECK-NEXT: ret float %tmp1 9define float @load_global_from_flat(float* %generic_scalar) #0 { 10 %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* 11 %tmp1 = load float, float addrspace(1)* %tmp0 12 ret float %tmp1 13} 14 15; CHECK-LABEL: @load_constant_from_flat( 16; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(4)* 17; CHECK-NEXT: %tmp1 = load float, float addrspace(4)* %tmp0 18; CHECK-NEXT: ret float %tmp1 19define float @load_constant_from_flat(float* %generic_scalar) #0 { 20 %tmp0 = addrspacecast float* %generic_scalar to float addrspace(4)* 21 %tmp1 = load float, float addrspace(4)* %tmp0 22 ret float %tmp1 23} 24 25; CHECK-LABEL: @load_group_from_flat( 26; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* 27; CHECK-NEXT: %tmp1 = load float, float addrspace(3)* %tmp0 28; CHECK-NEXT: ret float %tmp1 29define float @load_group_from_flat(float* %generic_scalar) #0 { 30 %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* 31 %tmp1 = load float, float addrspace(3)* %tmp0 32 ret float %tmp1 33} 34 35; CHECK-LABEL: @load_private_from_flat( 36; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* 37; CHECK-NEXT: %tmp1 = load float, float addrspace(5)* %tmp0 38; CHECK-NEXT: ret float %tmp1 39define float @load_private_from_flat(float* %generic_scalar) #0 { 40 %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* 41 %tmp1 = load float, float addrspace(5)* %tmp0 42 ret float %tmp1 43} 44 45; CHECK-LABEL: @store_global_from_flat( 46; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* 47; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* %tmp0 48define amdgpu_kernel void @store_global_from_flat(float* %generic_scalar) #0 { 49 %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* 50 store float 0.0, float addrspace(1)* %tmp0 51 ret void 52} 53 54; CHECK-LABEL: @store_group_from_flat( 55; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* 56; CHECK-NEXT: store float 0.000000e+00, float addrspace(3)* %tmp0 57define amdgpu_kernel void @store_group_from_flat(float* %generic_scalar) #0 { 58 %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* 59 store float 0.0, float addrspace(3)* %tmp0 60 ret void 61} 62 63; CHECK-LABEL: @store_private_from_flat( 64; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* 65; CHECK-NEXT: store float 0.000000e+00, float addrspace(5)* %tmp0 66define amdgpu_kernel void @store_private_from_flat(float* %generic_scalar) #0 { 67 %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* 68 store float 0.0, float addrspace(5)* %tmp0 69 ret void 70} 71 72; optimized to global load/store. 73; CHECK-LABEL: @load_store_global( 74; CHECK-NEXT: %val = load i32, i32 addrspace(1)* %input, align 4 75; CHECK-NEXT: store i32 %val, i32 addrspace(1)* %output, align 4 76; CHECK-NEXT: ret void 77define amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { 78 %tmp0 = addrspacecast i32 addrspace(1)* %input to i32* 79 %tmp1 = addrspacecast i32 addrspace(1)* %output to i32* 80 %val = load i32, i32* %tmp0, align 4 81 store i32 %val, i32* %tmp1, align 4 82 ret void 83} 84 85; Optimized to group load/store. 86; CHECK-LABEL: @load_store_group( 87; CHECK-NEXT: %val = load i32, i32 addrspace(3)* %input, align 4 88; CHECK-NEXT: store i32 %val, i32 addrspace(3)* %output, align 4 89; CHECK-NEXT: ret void 90define amdgpu_kernel void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { 91 %tmp0 = addrspacecast i32 addrspace(3)* %input to i32* 92 %tmp1 = addrspacecast i32 addrspace(3)* %output to i32* 93 %val = load i32, i32* %tmp0, align 4 94 store i32 %val, i32* %tmp1, align 4 95 ret void 96} 97 98; Optimized to private load/store. 99; CHECK-LABEL: @load_store_private( 100; CHECK-NEXT: %val = load i32, i32 addrspace(5)* %input, align 4 101; CHECK-NEXT: store i32 %val, i32 addrspace(5)* %output, align 4 102; CHECK-NEXT: ret void 103define amdgpu_kernel void @load_store_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 { 104 %tmp0 = addrspacecast i32 addrspace(5)* %input to i32* 105 %tmp1 = addrspacecast i32 addrspace(5)* %output to i32* 106 %val = load i32, i32* %tmp0, align 4 107 store i32 %val, i32* %tmp1, align 4 108 ret void 109} 110 111; No optimization. flat load/store. 112; CHECK-LABEL: @load_store_flat( 113; CHECK-NEXT: %val = load i32, i32* %input, align 4 114; CHECK-NEXT: store i32 %val, i32* %output, align 4 115; CHECK-NEXT: ret void 116define amdgpu_kernel void @load_store_flat(i32* nocapture %input, i32* nocapture %output) #0 { 117 %val = load i32, i32* %input, align 4 118 store i32 %val, i32* %output, align 4 119 ret void 120} 121 122; CHECK-LABEL: @store_addrspacecast_ptr_value( 123; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32* 124; CHECK-NEXT: store i32* %cast, i32* addrspace(1)* %output, align 4 125define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32* addrspace(1)* nocapture %output) #0 { 126 %cast = addrspacecast i32 addrspace(1)* %input to i32* 127 store i32* %cast, i32* addrspace(1)* %output, align 4 128 ret void 129} 130 131; CHECK-LABEL: @atomicrmw_add_global_to_flat( 132; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(1)* %global.ptr, i32 %y seq_cst 133define i32 @atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 { 134 %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* 135 %ret = atomicrmw add i32* %cast, i32 %y seq_cst 136 ret i32 %ret 137} 138 139; CHECK-LABEL: @atomicrmw_add_group_to_flat( 140; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(3)* %group.ptr, i32 %y seq_cst 141define i32 @atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 { 142 %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* 143 %ret = atomicrmw add i32* %cast, i32 %y seq_cst 144 ret i32 %ret 145} 146 147; CHECK-LABEL: @cmpxchg_global_to_flat( 148; CHECK: %ret = cmpxchg i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val seq_cst monotonic 149define { i32, i1 } @cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 { 150 %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* 151 %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic 152 ret { i32, i1 } %ret 153} 154 155; CHECK-LABEL: @cmpxchg_group_to_flat( 156; CHECK: %ret = cmpxchg i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val seq_cst monotonic 157define { i32, i1 } @cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 { 158 %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* 159 %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic 160 ret { i32, i1 } %ret 161} 162 163; Not pointer operand 164; CHECK-LABEL: @cmpxchg_group_to_flat_wrong_operand( 165; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32* 166; CHECK: %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic 167define { i32*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32* %val) #0 { 168 %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32* 169 %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic 170 ret { i32*, i1 } %ret 171} 172 173; Null pointer in local addr space 174; CHECK-LABEL: @local_nullptr 175; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*) 176; CHECK-NOT: i8 addrspace(3)* null 177define void @local_nullptr(i32 addrspace(1)* nocapture %results, i8 addrspace(3)* %a) { 178entry: 179 %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*) 180 %conv = zext i1 %tobool to i32 181 store i32 %conv, i32 addrspace(1)* %results, align 4 182 ret void 183} 184 185attributes #0 = { nounwind } 186