1; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s 2 3target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" 4 5declare i32 @llvm.amdgcn.workitem.id.x() #1 6 7; CHECK-LABEL: @merge_v2p1i8( 8; CHECK: load <2 x i64> 9; CHECK: inttoptr i64 %{{[0-9]+}} to i8 addrspace(1)* 10; CHECK: inttoptr i64 %{{[0-9]+}} to i8 addrspace(1)* 11; CHECK: store <2 x i64> zeroinitializer 12define void @merge_v2p1i8(i8 addrspace(1)* addrspace(1)* nocapture %a, i8 addrspace(1)* addrspace(1)* nocapture readonly %b) #0 { 13entry: 14 %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 15 %b.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, i64 1 16 17 %ld.c = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, align 4 18 %ld.c.idx.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b.1, align 4 19 20 store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a, align 4 21 store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a.1, align 4 22 23 ret void 24} 25 26; CHECK-LABEL: @merge_v2p3i8( 27; CHECK: load <2 x i32> 28; CHECK: inttoptr i32 %{{[0-9]+}} to i8 addrspace(3)* 29; CHECK: inttoptr i32 %{{[0-9]+}} to i8 addrspace(3)* 30; CHECK: store <2 x i32> zeroinitializer 31define void @merge_v2p3i8(i8 addrspace(3)* addrspace(3)* nocapture %a, i8 addrspace(3)* addrspace(3)* nocapture readonly %b) #0 { 32entry: 33 %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i64 1 34 %b.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, i64 1 35 36 %ld.c = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, align 4 37 %ld.c.idx.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b.1, align 4 38 39 store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a, align 4 40 store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a.1, align 4 41 42 ret void 43} 44 45; CHECK-LABEL: @merge_load_i64_ptr64( 46; CHECK: load <2 x i64> 47; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1 48; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)* 49define void @merge_load_i64_ptr64(i64 addrspace(1)* nocapture %a) #0 { 50entry: 51 %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 52 %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)* 53 54 %ld.0 = load i64, i64 addrspace(1)* %a 55 %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast 56 57 ret void 58} 59 60; CHECK-LABEL: @merge_load_ptr64_i64( 61; CHECK: load <2 x i64> 62; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0 63; CHECK: inttoptr i64 [[ELT0]] to i8 addrspace(1)* 64define void @merge_load_ptr64_i64(i64 addrspace(1)* nocapture %a) #0 { 65entry: 66 %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* 67 %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 68 69 %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast 70 %ld.1 = load i64, i64 addrspace(1)* %a.1 71 72 ret void 73} 74 75; CHECK-LABEL: @merge_store_ptr64_i64( 76; CHECK: [[ELT0:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64 77; CHECK: insertelement <2 x i64> undef, i64 [[ELT0]], i32 0 78; CHECK: store <2 x i64> 79define void @merge_store_ptr64_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, i64 %val1) #0 { 80entry: 81 %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* 82 %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 83 84 85 store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast 86 store i64 %val1, i64 addrspace(1)* %a.1 87 88 ret void 89} 90 91; CHECK-LABEL: @merge_store_i64_ptr64( 92; CHECK: [[ELT1:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64 93; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1]], i32 1 94; CHECK: store <2 x i64> 95define void @merge_store_i64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(1)* %ptr1) #0 { 96entry: 97 %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 98 %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to i64 addrspace(1)* 99 100 store i64 %val0, i64 addrspace(1)* %a.cast 101 store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1 102 103 ret void 104} 105 106; CHECK-LABEL: @merge_load_i32_ptr32( 107; CHECK: load <2 x i32> 108; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i32> %{{[0-9]+}}, i32 1 109; CHECK: inttoptr i32 [[ELT1]] to i8 addrspace(3)* 110define void @merge_load_i32_ptr32(i32 addrspace(3)* nocapture %a) #0 { 111entry: 112 %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 113 %a.1.cast = bitcast i32 addrspace(3)* %a.1 to i8 addrspace(3)* addrspace(3)* 114 115 %ld.0 = load i32, i32 addrspace(3)* %a 116 %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.1.cast 117 118 ret void 119} 120 121; CHECK-LABEL: @merge_load_ptr32_i32( 122; CHECK: load <2 x i32> 123; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i32> %{{[0-9]+}}, i32 0 124; CHECK: inttoptr i32 [[ELT0]] to i8 addrspace(3)* 125define void @merge_load_ptr32_i32(i32 addrspace(3)* nocapture %a) #0 { 126entry: 127 %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)* 128 %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 129 130 %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.cast 131 %ld.1 = load i32, i32 addrspace(3)* %a.1 132 133 ret void 134} 135 136; CHECK-LABEL: @merge_store_ptr32_i32( 137; CHECK: [[ELT0:%[0-9]+]] = ptrtoint i8 addrspace(3)* %ptr0 to i32 138; CHECK: insertelement <2 x i32> undef, i32 [[ELT0]], i32 0 139; CHECK: store <2 x i32> 140define void @merge_store_ptr32_i32(i32 addrspace(3)* nocapture %a, i8 addrspace(3)* %ptr0, i32 %val1) #0 { 141entry: 142 %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)* 143 %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 144 145 store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(3)* %a.cast 146 store i32 %val1, i32 addrspace(3)* %a.1 147 148 ret void 149} 150 151; CHECK-LABEL: @merge_store_i32_ptr32( 152; CHECK: [[ELT1:%[0-9]+]] = ptrtoint i8 addrspace(3)* %ptr1 to i32 153; CHECK: insertelement <2 x i32> %{{[0-9]+}}, i32 [[ELT1]], i32 1 154; CHECK: store <2 x i32> 155define void @merge_store_i32_ptr32(i8 addrspace(3)* addrspace(3)* nocapture %a, i32 %val0, i8 addrspace(3)* %ptr1) #0 { 156entry: 157 %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i32 1 158 %a.cast = bitcast i8 addrspace(3)* addrspace(3)* %a to i32 addrspace(3)* 159 160 store i32 %val0, i32 addrspace(3)* %a.cast 161 store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(3)* %a.1 162 163 ret void 164} 165 166; CHECK-LABEL: @no_merge_store_ptr32_i64( 167; CHECK: store i8 addrspace(3)* 168; CHECK: store i64 169define void @no_merge_store_ptr32_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(3)* %ptr0, i64 %val1) #0 { 170entry: 171 %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)* 172 %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 173 174 175 store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(1)* %a.cast 176 store i64 %val1, i64 addrspace(1)* %a.1 177 178 ret void 179} 180 181; CHECK-LABEL: @no_merge_store_i64_ptr32( 182; CHECK: store i64 183; CHECK: store i8 addrspace(3)* 184define void @no_merge_store_i64_ptr32(i8 addrspace(3)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(3)* %ptr1) #0 { 185entry: 186 %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a, i64 1 187 %a.cast = bitcast i8 addrspace(3)* addrspace(1)* %a to i64 addrspace(1)* 188 189 store i64 %val0, i64 addrspace(1)* %a.cast 190 store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(1)* %a.1 191 192 ret void 193} 194 195; CHECK-LABEL: @no_merge_load_i64_ptr32( 196; CHECK: load i64, 197; CHECK: load i8 addrspace(3)*, 198define void @no_merge_load_i64_ptr32(i64 addrspace(1)* nocapture %a) #0 { 199entry: 200 %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 201 %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(3)* addrspace(1)* 202 203 %ld.0 = load i64, i64 addrspace(1)* %a 204 %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.1.cast 205 206 ret void 207} 208 209; CHECK-LABEL: @no_merge_load_ptr32_i64( 210; CHECK: load i8 addrspace(3)*, 211; CHECK: load i64, 212define void @no_merge_load_ptr32_i64(i64 addrspace(1)* nocapture %a) #0 { 213entry: 214 %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)* 215 %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 216 217 %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.cast 218 %ld.1 = load i64, i64 addrspace(1)* %a.1 219 220 ret void 221} 222 223; XXX - This isn't merged for some reason 224; CHECK-LABEL: @merge_v2p1i8_v2p1i8( 225; CHECK: load <2 x i8 addrspace(1)*> 226; CHECK: load <2 x i8 addrspace(1)*> 227; CHECK: store <2 x i8 addrspace(1)*> 228; CHECK: store <2 x i8 addrspace(1)*> 229define void @merge_v2p1i8_v2p1i8(<2 x i8 addrspace(1)*> addrspace(1)* nocapture noalias %a, <2 x i8 addrspace(1)*> addrspace(1)* nocapture readonly noalias %b) #0 { 230entry: 231 %a.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %a, i64 1 232 %b.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, i64 1 233 234 %ld.c = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, align 4 235 %ld.c.idx.1 = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b.1, align 4 236 237 store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a, align 4 238 store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a.1, align 4 239 ret void 240} 241 242; CHECK-LABEL: @merge_load_ptr64_f64( 243; CHECK: load <2 x i64> 244; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0 245; CHECK: [[ELT0_INT:%[0-9]+]] = inttoptr i64 [[ELT0]] to i8 addrspace(1)* 246; CHECK: [[ELT1_INT:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1 247; CHECK: bitcast i64 [[ELT1_INT]] to double 248define void @merge_load_ptr64_f64(double addrspace(1)* nocapture %a) #0 { 249entry: 250 %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* 251 %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 252 253 %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast 254 %ld.1 = load double, double addrspace(1)* %a.1 255 256 ret void 257} 258 259; CHECK-LABEL: @merge_load_f64_ptr64( 260; CHECK: load <2 x i64> 261; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0 262; CHECK: bitcast i64 [[ELT0]] to double 263; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1 264; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)* 265define void @merge_load_f64_ptr64(double addrspace(1)* nocapture %a) #0 { 266entry: 267 %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 268 %a.1.cast = bitcast double addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)* 269 270 %ld.0 = load double, double addrspace(1)* %a 271 %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast 272 273 ret void 274} 275 276; CHECK-LABEL: @merge_store_ptr64_f64( 277; CHECK: [[ELT0_INT:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64 278; CHECK: insertelement <2 x i64> undef, i64 [[ELT0_INT]], i32 0 279; CHECK: [[ELT1_INT:%[0-9]+]] = bitcast double %val1 to i64 280; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1_INT]], i32 1 281; CHECK: store <2 x i64> 282define void @merge_store_ptr64_f64(double addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, double %val1) #0 { 283entry: 284 %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* 285 %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 286 287 store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast 288 store double %val1, double addrspace(1)* %a.1 289 290 ret void 291} 292 293; CHECK-LABEL: @merge_store_f64_ptr64( 294; CHECK: [[ELT0_INT:%[0-9]+]] = bitcast double %val0 to i64 295; CHECK: insertelement <2 x i64> undef, i64 [[ELT0_INT]], i32 0 296; CHECK: [[ELT1_INT:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64 297; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1_INT]], i32 1 298; CHECK: store <2 x i64> 299define void @merge_store_f64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, double %val0, i8 addrspace(1)* %ptr1) #0 { 300entry: 301 %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 302 %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to double addrspace(1)* 303 304 store double %val0, double addrspace(1)* %a.cast 305 store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1 306 307 ret void 308} 309 310attributes #0 = { nounwind } 311attributes #1 = { nounwind readnone } 312