1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck --check-prefixes=CHECK %s 3target datalayout = "E-m:e-i64:64-n32:64" 4target triple = "powerpc64le-unknown-linux" 5 6%"struct.std::complex" = type { { float, float } } 7 8define void @_Z4testSt7complexIfE(%"struct.std::complex"* noalias nocapture sret(%"struct.std::complex") %agg.result, i64 %c.coerce) { 9; CHECK-LABEL: @_Z4testSt7complexIfE( 10; CHECK-NEXT: entry: 11; CHECK-NEXT: [[REF_TMP:%.*]] = alloca i64, align 8 12; CHECK-NEXT: [[TMPCAST:%.*]] = bitcast i64* [[REF_TMP]] to %"struct.std::complex"* 13; CHECK-NEXT: [[C_SROA_0_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[C_COERCE:%.*]], 32 14; CHECK-NEXT: [[C_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[C_SROA_0_0_EXTRACT_SHIFT]] to i32 15; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[C_SROA_0_0_EXTRACT_TRUNC]] to float 16; CHECK-NEXT: [[C_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[C_COERCE]] to i32 17; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[C_SROA_2_0_EXTRACT_TRUNC]] to float 18; CHECK-NEXT: call void @_Z3barSt7complexIfE(%"struct.std::complex"* nonnull sret(%"struct.std::complex") [[TMPCAST]], i64 [[C_COERCE]]) 19; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[REF_TMP]], align 8 20; CHECK-NEXT: [[_M_VALUE_REALP_I_I:%.*]] = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* [[AGG_RESULT:%.*]], i64 0, i32 0, i32 0 21; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 32 22; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 23; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float 24; CHECK-NEXT: [[_M_VALUE_IMAGP_I_I:%.*]] = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* [[AGG_RESULT]], i64 0, i32 0, i32 1 25; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP2]] to i32 26; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float 27; CHECK-NEXT: [[MUL_AD_I_I:%.*]] = fmul fast float [[TMP5]], [[TMP1]] 28; CHECK-NEXT: [[MUL_BC_I_I:%.*]] = fmul fast float [[TMP7]], [[TMP0]] 29; CHECK-NEXT: [[MUL_I_I_I:%.*]] = fadd fast float [[MUL_AD_I_I]], [[MUL_BC_I_I]] 30; CHECK-NEXT: [[MUL_AC_I_I:%.*]] = fmul fast float [[TMP5]], [[TMP0]] 31; CHECK-NEXT: [[MUL_BD_I_I:%.*]] = fmul fast float [[TMP7]], [[TMP1]] 32; CHECK-NEXT: [[MUL_R_I_I:%.*]] = fsub fast float [[MUL_AC_I_I]], [[MUL_BD_I_I]] 33; CHECK-NEXT: store float [[MUL_R_I_I]], float* [[_M_VALUE_REALP_I_I]], align 4 34; CHECK-NEXT: store float [[MUL_I_I_I]], float* [[_M_VALUE_IMAGP_I_I]], align 4 35; CHECK-NEXT: ret void 36; 37entry: 38 39 %ref.tmp = alloca i64, align 8 40 %tmpcast = bitcast i64* %ref.tmp to %"struct.std::complex"* 41 %c.sroa.0.0.extract.shift = lshr i64 %c.coerce, 32 42 %c.sroa.0.0.extract.trunc = trunc i64 %c.sroa.0.0.extract.shift to i32 43 %0 = bitcast i32 %c.sroa.0.0.extract.trunc to float 44 %c.sroa.2.0.extract.trunc = trunc i64 %c.coerce to i32 45 %1 = bitcast i32 %c.sroa.2.0.extract.trunc to float 46 call void @_Z3barSt7complexIfE(%"struct.std::complex"* nonnull sret(%"struct.std::complex") %tmpcast, i64 %c.coerce) 47 %2 = bitcast %"struct.std::complex"* %agg.result to i64* 48 %3 = load i64, i64* %ref.tmp, align 8 49 store i64 %3, i64* %2, align 4 50 51 %_M_value.realp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %agg.result, i64 0, i32 0, i32 0 52 %4 = lshr i64 %3, 32 53 %5 = trunc i64 %4 to i32 54 %6 = bitcast i32 %5 to float 55 %_M_value.imagp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %agg.result, i64 0, i32 0, i32 1 56 %7 = trunc i64 %3 to i32 57 %8 = bitcast i32 %7 to float 58 %mul_ad.i.i = fmul fast float %6, %1 59 %mul_bc.i.i = fmul fast float %8, %0 60 %mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i 61 %mul_ac.i.i = fmul fast float %6, %0 62 %mul_bd.i.i = fmul fast float %8, %1 63 %mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i 64 store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4 65 store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4 66 ret void 67} 68 69declare void @_Z3barSt7complexIfE(%"struct.std::complex"* sret(%"struct.std::complex"), i64) 70 71define void @test1(i32 *%ptr) { 72; CHECK-LABEL: @test1( 73; CHECK-NEXT: entry: 74; CHECK-NEXT: [[BPTR:%.*]] = bitcast i32* [[PTR:%.*]] to i8* 75; CHECK-NEXT: [[WPTR:%.*]] = bitcast i32* [[PTR]] to i16* 76; CHECK-NEXT: store i16 -30062, i16* [[WPTR]], align 2 77; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3 78; CHECK-NEXT: store i8 47, i8* [[BPTR3]], align 1 79; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 1 80; CHECK-NEXT: [[WPTRP:%.*]] = bitcast i8* [[BPTR1]] to i16* 81; CHECK-NEXT: store i16 2020, i16* [[WPTRP]], align 1 82; CHECK-NEXT: ret void 83; 84entry: 85 86 store i32 5, i32* %ptr 87 %bptr = bitcast i32* %ptr to i8* 88 store i8 7, i8* %bptr 89 %wptr = bitcast i32* %ptr to i16* 90 store i16 -30062, i16* %wptr 91 %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 92 store i8 25, i8* %bptr2 93 %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 94 store i8 47, i8* %bptr3 95 %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 96 %wptrp = bitcast i8* %bptr1 to i16* 97 store i16 2020, i16* %wptrp, align 1 98 ret void 99 100 101} 102 103define void @test2(i32 *%ptr) { 104; CHECK-LABEL: @test2( 105; CHECK-NEXT: entry: 106; CHECK-NEXT: [[BPTR:%.*]] = bitcast i32* [[PTR:%.*]] to i8* 107; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1 108; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 1 109; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 2 110; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3 111; CHECK-NEXT: [[WPTR:%.*]] = bitcast i8* [[BPTR]] to i16* 112; CHECK-NEXT: [[WPTRM1:%.*]] = bitcast i8* [[BPTRM1]] to i16* 113; CHECK-NEXT: [[WPTR1:%.*]] = bitcast i8* [[BPTR1]] to i16* 114; CHECK-NEXT: [[WPTR2:%.*]] = bitcast i8* [[BPTR2]] to i16* 115; CHECK-NEXT: [[WPTR3:%.*]] = bitcast i8* [[BPTR3]] to i16* 116; CHECK-NEXT: store i16 1456, i16* [[WPTRM1]], align 1 117; CHECK-NEXT: store i16 1346, i16* [[WPTR]], align 1 118; CHECK-NEXT: store i16 1756, i16* [[WPTR1]], align 1 119; CHECK-NEXT: store i16 1126, i16* [[WPTR2]], align 1 120; CHECK-NEXT: store i16 5656, i16* [[WPTR3]], align 1 121; CHECK-NEXT: ret void 122; 123entry: 124 125 store i32 5, i32* %ptr 126 127 %bptr = bitcast i32* %ptr to i8* 128 %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1 129 %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 130 %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 131 %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 132 133 %wptr = bitcast i8* %bptr to i16* 134 %wptrm1 = bitcast i8* %bptrm1 to i16* 135 %wptr1 = bitcast i8* %bptr1 to i16* 136 %wptr2 = bitcast i8* %bptr2 to i16* 137 %wptr3 = bitcast i8* %bptr3 to i16* 138 139 store i16 1456, i16* %wptrm1, align 1 140 store i16 1346, i16* %wptr, align 1 141 store i16 1756, i16* %wptr1, align 1 142 store i16 1126, i16* %wptr2, align 1 143 store i16 5656, i16* %wptr3, align 1 144 145 146 147 ret void 148 149} 150 151define signext i8 @test3(i32 *%ptr) { 152; CHECK-LABEL: @test3( 153; CHECK-NEXT: entry: 154; CHECK-NEXT: store i32 5, i32* [[PTR:%.*]], align 4 155; CHECK-NEXT: [[BPTR:%.*]] = bitcast i32* [[PTR]] to i8* 156; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1 157; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 1 158; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 2 159; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3 160; CHECK-NEXT: [[WPTR:%.*]] = bitcast i8* [[BPTR]] to i16* 161; CHECK-NEXT: [[WPTRM1:%.*]] = bitcast i8* [[BPTRM1]] to i16* 162; CHECK-NEXT: [[WPTR1:%.*]] = bitcast i8* [[BPTR1]] to i16* 163; CHECK-NEXT: [[WPTR2:%.*]] = bitcast i8* [[BPTR2]] to i16* 164; CHECK-NEXT: [[WPTR3:%.*]] = bitcast i8* [[BPTR3]] to i16* 165; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[BPTR]], align 1 166; CHECK-NEXT: store i16 1456, i16* [[WPTRM1]], align 1 167; CHECK-NEXT: store i16 1346, i16* [[WPTR]], align 1 168; CHECK-NEXT: store i16 1756, i16* [[WPTR1]], align 1 169; CHECK-NEXT: store i16 1126, i16* [[WPTR2]], align 1 170; CHECK-NEXT: store i16 5656, i16* [[WPTR3]], align 1 171; CHECK-NEXT: ret i8 [[V]] 172; 173entry: 174 175 store i32 5, i32* %ptr 176 177 %bptr = bitcast i32* %ptr to i8* 178 %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1 179 %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 180 %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 181 %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 182 183 %wptr = bitcast i8* %bptr to i16* 184 %wptrm1 = bitcast i8* %bptrm1 to i16* 185 %wptr1 = bitcast i8* %bptr1 to i16* 186 %wptr2 = bitcast i8* %bptr2 to i16* 187 %wptr3 = bitcast i8* %bptr3 to i16* 188 189 %v = load i8, i8* %bptr, align 1 190 store i16 1456, i16* %wptrm1, align 1 191 store i16 1346, i16* %wptr, align 1 192 store i16 1756, i16* %wptr1, align 1 193 store i16 1126, i16* %wptr2, align 1 194 store i16 5656, i16* %wptr3, align 1 195 196 197 ret i8 %v 198 199} 200 201%struct.foostruct = type { 202i32 (i8*, i8**, i32, i8, i8*)*, 203i32 (i8*, i8**, i32, i8, i8*)*, 204i32 (i8*, i8**, i32, i8, i8*)*, 205i32 (i8*, i8**, i32, i8, i8*)*, 206void (i8*, i32, i32)* 207} 208declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) 209declare void @goFunc(%struct.foostruct*) 210declare i32 @fa(i8*, i8**, i32, i8, i8*) 211 212; We miss this case, because of an aggressive limit of partial overlap analysis. 213; With a larger partial store limit, we remove the memset. 214define void @test4() { 215; CHECK-LABEL: @test4( 216; CHECK-NEXT: entry: 217; CHECK-NEXT: [[BANG:%.*]] = alloca [[STRUCT_FOOSTRUCT:%.*]], align 8 218; CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], %struct.foostruct* [[BANG]], i64 0, i32 0 219; CHECK-NEXT: store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** [[V2]], align 8 220; CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], %struct.foostruct* [[BANG]], i64 0, i32 1 221; CHECK-NEXT: store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** [[V3]], align 8 222; CHECK-NEXT: [[V4:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], %struct.foostruct* [[BANG]], i64 0, i32 2 223; CHECK-NEXT: store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** [[V4]], align 8 224; CHECK-NEXT: [[V5:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], %struct.foostruct* [[BANG]], i64 0, i32 3 225; CHECK-NEXT: store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** [[V5]], align 8 226; CHECK-NEXT: [[V6:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], %struct.foostruct* [[BANG]], i64 0, i32 4 227; CHECK-NEXT: store void (i8*, i32, i32)* null, void (i8*, i32, i32)** [[V6]], align 8 228; CHECK-NEXT: call void @goFunc(%struct.foostruct* [[BANG]]) 229; CHECK-NEXT: ret void 230entry: 231 232 %bang = alloca %struct.foostruct, align 8 233 %v1 = bitcast %struct.foostruct* %bang to i8* 234 call void @llvm.memset.p0i8.i64(i8* align 8 %v1, i8 0, i64 40, i1 false) 235 %v2 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 0 236 store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v2, align 8 237 %v3 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 1 238 store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v3, align 8 239 %v4 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 2 240 store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v4, align 8 241 %v5 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 3 242 store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v5, align 8 243 %v6 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 4 244 store void (i8*, i32, i32)* null, void (i8*, i32, i32)** %v6, align 8 245 call void @goFunc(%struct.foostruct* %bang) 246 ret void 247 248} 249 250define signext i8 @test5(i32 *%ptr) { 251; CHECK-LABEL: @test5( 252; CHECK-NEXT: entry: 253; CHECK-NEXT: [[BPTR:%.*]] = bitcast i32* [[PTR:%.*]] to i8* 254; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 1 255; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 2 256; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3 257; CHECK-NEXT: [[WPTR:%.*]] = bitcast i8* [[BPTR]] to i16* 258; CHECK-NEXT: [[WPTR1:%.*]] = bitcast i8* [[BPTR1]] to i16* 259; CHECK-NEXT: [[WPTR2:%.*]] = bitcast i8* [[BPTR2]] to i16* 260; CHECK-NEXT: store i16 -1, i16* [[WPTR2]], align 1 261; CHECK-NEXT: store i16 1456, i16* [[WPTR1]], align 1 262; CHECK-NEXT: store i16 1346, i16* [[WPTR]], align 1 263; CHECK-NEXT: ret i8 0 264; 265entry: 266 267 store i32 0, i32* %ptr 268 269 %bptr = bitcast i32* %ptr to i8* 270 %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 271 %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 272 %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 273 274 %wptr = bitcast i8* %bptr to i16* 275 %wptr1 = bitcast i8* %bptr1 to i16* 276 %wptr2 = bitcast i8* %bptr2 to i16* 277 278 store i16 65535, i16* %wptr2, align 1 279 store i16 1456, i16* %wptr1, align 1 280 store i16 1346, i16* %wptr, align 1 281 282 283 ret i8 0 284} 285 286define signext i8 @test6(i32 *%ptr) { 287; CHECK-LABEL: @test6( 288; CHECK-NEXT: entry: 289; CHECK-NEXT: [[BPTR:%.*]] = bitcast i32* [[PTR:%.*]] to i16* 290; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 0 291; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 1 292; CHECK-NEXT: store i16 1456, i16* [[BPTR2]], align 1 293; CHECK-NEXT: store i16 -1, i16* [[BPTR1]], align 1 294; CHECK-NEXT: ret i8 0 295; 296entry: 297 298 store i32 0, i32* %ptr 299 300 %bptr = bitcast i32* %ptr to i16* 301 %bptr1 = getelementptr inbounds i16, i16* %bptr, i64 0 302 %bptr2 = getelementptr inbounds i16, i16* %bptr, i64 1 303 304 store i16 1456, i16* %bptr2, align 1 305 store i16 65535, i16* %bptr1, align 1 306 307 308 ret i8 0 309} 310 311define signext i8 @test7(i64 *%ptr) { 312; CHECK-LABEL: @test7( 313; CHECK-NEXT: entry: 314; CHECK-NEXT: [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i16* 315; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 0 316; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 1 317; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 2 318; CHECK-NEXT: [[BPTR4:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 3 319; CHECK-NEXT: store i16 1346, i16* [[BPTR1]], align 1 320; CHECK-NEXT: store i16 1756, i16* [[BPTR3]], align 1 321; CHECK-NEXT: store i16 1456, i16* [[BPTR2]], align 1 322; CHECK-NEXT: store i16 5656, i16* [[BPTR4]], align 1 323; CHECK-NEXT: ret i8 0 324; 325entry: 326 327 store i64 0, i64* %ptr 328 329 %bptr = bitcast i64* %ptr to i16* 330 %bptr1 = getelementptr inbounds i16, i16* %bptr, i64 0 331 %bptr2 = getelementptr inbounds i16, i16* %bptr, i64 1 332 %bptr3 = getelementptr inbounds i16, i16* %bptr, i64 2 333 %bptr4 = getelementptr inbounds i16, i16* %bptr, i64 3 334 335 store i16 1346, i16* %bptr1, align 1 336 store i16 1756, i16* %bptr3, align 1 337 store i16 1456, i16* %bptr2, align 1 338 store i16 5656, i16* %bptr4, align 1 339 340 341 ret i8 0 342} 343