1; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s 2; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s 3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4 5declare <4 x float> @ext(<4 x float>) 6@g = global <4 x float> zeroinitializer 7 8define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) { 9; CHECK-LABEL: @f1( 10; CHECK: entry: 11; CHECK: %init.i0 = extractelement <4 x float> %init, i32 0 12; CHECK: %init.i1 = extractelement <4 x float> %init, i32 1 13; CHECK: %init.i2 = extractelement <4 x float> %init, i32 2 14; CHECK: %init.i3 = extractelement <4 x float> %init, i32 3 15; CHECK: br label %loop 16; CHECK: loop: 17; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 18; CHECK: %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ] 19; CHECK: %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ] 20; CHECK: %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ] 21; CHECK: %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ] 22; CHECK: %nexti = sub i32 %i, 1 23; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i 24; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float* 25; CHECK: %val.i0 = load float, float* %ptr.i0, align 16 26; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1 27; CHECK: %val.i1 = load float, float* %ptr.i1, align 4 28; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2 29; CHECK: %val.i2 = load float, float* %ptr.i2, align 8 30; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3 31; CHECK: %val.i3 = load float, float* %ptr.i3, align 4 32; CHECK: %add.i0 = fadd float %val.i0, %val.i2 33; CHECK: %add.i1 = fadd float %val.i1, %val.i3 34; CHECK: %add.i2 = fadd float %acc.i0, %acc.i2 35; CHECK: %add.i3 = fadd float %acc.i1, %acc.i3 36; CHECK: %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0 37; CHECK: %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1 38; CHECK: %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2 39; CHECK: %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3 40; CHECK: %call = call <4 x float> @ext(<4 x float> %add) 41; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0 42; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.0 43; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1 44; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.0 45; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2 46; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.0 47; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3 48; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.0 49; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0 50; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0 51; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0 52; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0 53; CHECK: store float %sel.i0, float* %ptr.i0 54; CHECK: store float %sel.i1, float* %ptr.i1 55; CHECK: store float %sel.i2, float* %ptr.i2 56; CHECK: store float %sel.i3, float* %ptr.i3 57; CHECK: %test = icmp eq i32 %nexti, 0 58; CHECK: br i1 %test, label %loop, label %exit 59; CHECK: exit: 60; CHECK: ret void 61entry: 62 br label %loop 63 64loop: 65 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 66 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ] 67 %nexti = sub i32 %i, 1 68 69 %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i 70 %val = load <4 x float> , <4 x float> *%ptr 71 %dval = bitcast <4 x float> %val to <2 x double> 72 %dacc = bitcast <4 x float> %acc to <2 x double> 73 %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc, 74 <2 x i32> <i32 0, i32 2> 75 %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc, 76 <2 x i32> <i32 1, i32 3> 77 %f1 = bitcast <2 x double> %shuffle1 to <4 x float> 78 %f2 = bitcast <2 x double> %shuffle2 to <4 x float> 79 %add = fadd <4 x float> %f1, %f2 80 %call = call <4 x float> @ext(<4 x float> %add) 81 %cmp = fcmp ogt <4 x float> %call, 82 <float 1.0, float 2.0, float 3.0, float 4.0> 83 %sel = select <4 x i1> %cmp, <4 x float> %call, 84 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0> 85 store <4 x float> %sel, <4 x float> *%ptr 86 87 %test = icmp eq i32 %nexti, 0 88 br i1 %test, label %loop, label %exit 89 90exit: 91 ret void 92} 93 94define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) { 95; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) { 96; CHECK: entry: 97; CHECK: %init.i0 = extractelement <4 x i32> %init, i32 0 98; CHECK: %init.i1 = extractelement <4 x i32> %init, i32 1 99; CHECK: %init.i2 = extractelement <4 x i32> %init, i32 2 100; CHECK: %init.i3 = extractelement <4 x i32> %init, i32 3 101; CHECK: br label %loop 102; CHECK: loop: 103; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 104; CHECK: %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ] 105; CHECK: %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ] 106; CHECK: %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ] 107; CHECK: %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ] 108; CHECK: %nexti = sub i32 %i, 1 109; CHECK: %ptr = getelementptr <4 x i8>, <4 x i8>* %base, i32 %i 110; CHECK: %ptr.i0 = bitcast <4 x i8>* %ptr to i8* 111; CHECK: %val.i0 = load i8, i8* %ptr.i0, align 4 112; CHECK: %ptr.i1 = getelementptr i8, i8* %ptr.i0, i32 1 113; CHECK: %val.i1 = load i8, i8* %ptr.i1, align 1 114; CHECK: %ptr.i2 = getelementptr i8, i8* %ptr.i0, i32 2 115; CHECK: %val.i2 = load i8, i8* %ptr.i2, align 2 116; CHECK: %ptr.i3 = getelementptr i8, i8* %ptr.i0, i32 3 117; CHECK: %val.i3 = load i8, i8* %ptr.i3, align 1 118; CHECK: %ext.i0 = sext i8 %val.i0 to i32 119; CHECK: %ext.i1 = sext i8 %val.i1 to i32 120; CHECK: %ext.i2 = sext i8 %val.i2 to i32 121; CHECK: %ext.i3 = sext i8 %val.i3 to i32 122; CHECK: %add.i0 = add i32 %ext.i0, %acc.i0 123; CHECK: %add.i1 = add i32 %ext.i1, %acc.i1 124; CHECK: %add.i2 = add i32 %ext.i2, %acc.i2 125; CHECK: %add.i3 = add i32 %ext.i3, %acc.i3 126; CHECK: %cmp.i0 = icmp slt i32 %add.i0, -10 127; CHECK: %cmp.i1 = icmp slt i32 %add.i1, -11 128; CHECK: %cmp.i2 = icmp slt i32 %add.i2, -12 129; CHECK: %cmp.i3 = icmp slt i32 %add.i3, -13 130; CHECK: %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i 131; CHECK: %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i 132; CHECK: %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i 133; CHECK: %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i 134; CHECK: %trunc.i0 = trunc i32 %sel.i0 to i8 135; CHECK: %trunc.i1 = trunc i32 %sel.i1 to i8 136; CHECK: %trunc.i2 = trunc i32 %sel.i2 to i8 137; CHECK: %trunc.i3 = trunc i32 %sel.i3 to i8 138; CHECK: store i8 %trunc.i0, i8* %ptr.i0, align 4 139; CHECK: store i8 %trunc.i1, i8* %ptr.i1, align 1 140; CHECK: store i8 %trunc.i2, i8* %ptr.i2, align 2 141; CHECK: store i8 %trunc.i3, i8* %ptr.i3, align 1 142; CHECK: %test = icmp eq i32 %nexti, 0 143; CHECK: br i1 %test, label %loop, label %exit 144; CHECK: exit: 145; CHECK: ret void 146entry: 147 br label %loop 148 149loop: 150 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 151 %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ] 152 %nexti = sub i32 %i, 1 153 154 %ptr = getelementptr <4 x i8>, <4 x i8> *%base, i32 %i 155 %val = load <4 x i8> , <4 x i8> *%ptr 156 %ext = sext <4 x i8> %val to <4 x i32> 157 %add = add <4 x i32> %ext, %acc 158 %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13> 159 %single = insertelement <4 x i32> undef, i32 %i, i32 0 160 %limit = shufflevector <4 x i32> %single, <4 x i32> undef, 161 <4 x i32> zeroinitializer 162 %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit 163 %trunc = trunc <4 x i32> %sel to <4 x i8> 164 store <4 x i8> %trunc, <4 x i8> *%ptr 165 166 %test = icmp eq i32 %nexti, 0 167 br i1 %test, label %loop, label %exit 168 169exit: 170 ret void 171} 172 173; Check that !tbaa information is preserved. 174define void @f3(<4 x i32> *%src, <4 x i32> *%dst) { 175; CHECK-LABEL: @f3( 176; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]] 177; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa ![[TAG]] 178; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa ![[TAG]] 179; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa ![[TAG]] 180; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]] 181; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]] 182; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]] 183; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]] 184; CHECK: ret void 185 %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1 186 %add = add <4 x i32> %val, %val 187 store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2 188 ret void 189} 190 191; Check that !tbaa.struct information is preserved. 192define void @f4(<4 x i32> *%src, <4 x i32> *%dst) { 193; CHECK-LABEL: @f4( 194; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]] 195; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa.struct ![[TAG]] 196; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa.struct ![[TAG]] 197; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa.struct ![[TAG]] 198; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]] 199; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]] 200; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]] 201; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]] 202; CHECK: ret void 203 %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5 204 %add = add <4 x i32> %val, %val 205 store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5 206 ret void 207} 208 209; Check that llvm.access.group information is preserved. 210define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) { 211; CHECK-LABEL: @f5( 212; CHECK: %val.i0 = load i32, i32* %this_src.i0, align 16, !llvm.access.group ![[TAG:[0-9]*]] 213; CHECK: %val.i1 = load i32, i32* %this_src.i1, align 4, !llvm.access.group ![[TAG]] 214; CHECK: %val.i2 = load i32, i32* %this_src.i2, align 8, !llvm.access.group ![[TAG]] 215; CHECK: %val.i3 = load i32, i32* %this_src.i3, align 4, !llvm.access.group ![[TAG]] 216; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.access.group ![[TAG]] 217; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.access.group ![[TAG]] 218; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.access.group ![[TAG]] 219; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.access.group ![[TAG]] 220; CHECK: ret void 221entry: 222 br label %loop 223 224loop: 225 %index = phi i32 [ 0, %entry ], [ %next_index, %loop ] 226 %this_src = getelementptr <4 x i32>, <4 x i32> *%src, i32 %index 227 %this_dst = getelementptr <4 x i32>, <4 x i32> *%dst, i32 %index 228 %val = load <4 x i32> , <4 x i32> *%this_src, !llvm.access.group !13 229 %add = add <4 x i32> %val, %val 230 store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.access.group !13 231 %next_index = add i32 %index, -1 232 %continue = icmp ne i32 %next_index, %count 233 br i1 %continue, label %loop, label %end, !llvm.loop !3 234 235end: 236 ret void 237} 238 239; Check that fpmath information is preserved. 240define <4 x float> @f6(<4 x float> %x) { 241; CHECK-LABEL: @f6( 242; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0 243; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]] 244; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1 245; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]] 246; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2 247; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]] 248; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3 249; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]] 250; CHECK: %res.upto0 = insertelement <4 x float> undef, float %res.i0, i32 0 251; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1 252; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2 253; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3 254; CHECK: ret <4 x float> %res 255 %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>, 256 !fpmath !4 257 ret <4 x float> %res 258} 259 260; Check that random metadata isn't kept. 261define void @f7(<4 x i32> *%src, <4 x i32> *%dst) { 262; CHECK-LABEL: @f7( 263; CHECK-NOT: !foo 264; CHECK: ret void 265 %val = load <4 x i32> , <4 x i32> *%src, !foo !5 266 %add = add <4 x i32> %val, %val 267 store <4 x i32> %add, <4 x i32> *%dst, !foo !5 268 ret void 269} 270 271; Test GEP with vectors. 272define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0, 273 float *%other) { 274; CHECK-LABEL: @f8( 275; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float** 276; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1 277; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2 278; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3 279; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0 280; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2 281; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3 282; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1 283; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3 284; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100 285; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1 286; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100 287; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3 288; CHECK: store float* %val.i0, float** %dest.i0, align 32 289; CHECK: store float* %val.i1, float** %dest.i1, align 8 290; CHECK: store float* %val.i2, float** %dest.i2, align 16 291; CHECK: store float* %val.i3, float** %dest.i3, align 8 292; CHECK: ret void 293 %i1 = insertelement <4 x i32> %i0, i32 100, i32 0 294 %i2 = insertelement <4 x i32> %i1, i32 100, i32 2 295 %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1 296 %val = getelementptr float, <4 x float *> %ptr1, <4 x i32> %i2 297 store <4 x float *> %val, <4 x float *> *%dest 298 ret void 299} 300 301; Test the handling of unaligned loads. 302define void @f9(<4 x float> *%dest, <4 x float> *%src) { 303; CHECK: @f9( 304; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float* 305; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1 306; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2 307; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3 308; CHECK: %src.i0 = bitcast <4 x float>* %src to float* 309; CHECK: %val.i0 = load float, float* %src.i0, align 4 310; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1 311; CHECK: %val.i1 = load float, float* %src.i1, align 4 312; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2 313; CHECK: %val.i2 = load float, float* %src.i2, align 4 314; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3 315; CHECK: %val.i3 = load float, float* %src.i3, align 4 316; CHECK: store float %val.i0, float* %dest.i0, align 8 317; CHECK: store float %val.i1, float* %dest.i1, align 4 318; CHECK: store float %val.i2, float* %dest.i2, align 8 319; CHECK: store float %val.i3, float* %dest.i3, align 4 320; CHECK: ret void 321 %val = load <4 x float> , <4 x float> *%src, align 4 322 store <4 x float> %val, <4 x float> *%dest, align 8 323 ret void 324} 325 326; ...and again with subelement alignment. 327define void @f10(<4 x float> *%dest, <4 x float> *%src) { 328; CHECK: @f10( 329; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float* 330; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1 331; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2 332; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3 333; CHECK: %src.i0 = bitcast <4 x float>* %src to float* 334; CHECK: %val.i0 = load float, float* %src.i0, align 1 335; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1 336; CHECK: %val.i1 = load float, float* %src.i1, align 1 337; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2 338; CHECK: %val.i2 = load float, float* %src.i2, align 1 339; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3 340; CHECK: %val.i3 = load float, float* %src.i3, align 1 341; CHECK: store float %val.i0, float* %dest.i0, align 2 342; CHECK: store float %val.i1, float* %dest.i1, align 2 343; CHECK: store float %val.i2, float* %dest.i2, align 2 344; CHECK: store float %val.i3, float* %dest.i3, align 2 345; CHECK: ret void 346 %val = load <4 x float> , <4 x float> *%src, align 1 347 store <4 x float> %val, <4 x float> *%dest, align 2 348 ret void 349} 350 351; Test that sub-byte loads aren't scalarized. 352define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) { 353; CHECK: @f11( 354; CHECK: %val0 = load <32 x i1>, <32 x i1>* %src0 355; CHECK: %val1 = load <32 x i1>, <32 x i1>* %src1 356; CHECK: store <32 x i1> %and, <32 x i1>* %dest 357; CHECK: ret void 358 %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1 359 %val0 = load <32 x i1> , <32 x i1> *%src0 360 %val1 = load <32 x i1> , <32 x i1> *%src1 361 %and = and <32 x i1> %val0, %val1 362 store <32 x i1> %and, <32 x i1> *%dest 363 ret void 364} 365 366; Test vector GEPs with more than one index. 367define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i, 368 float *%other) { 369; CHECK-LABEL: @f13( 370; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float** 371; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1 372; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2 373; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3 374; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0 375; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0 376; CHECK: %val.i0 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i0, i32 0, i32 %i.i0 377; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1 378; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1 379; CHECK: %val.i1 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i1, i32 1, i32 %i.i1 380; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2 381; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2 382; CHECK: %val.i2 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i2, i32 2, i32 %i.i2 383; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3 384; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3 385; CHECK: %val.i3 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i3, i32 3, i32 %i.i3 386; CHECK: store float* %val.i0, float** %dest.i0, align 32 387; CHECK: store float* %val.i1, float** %dest.i1, align 8 388; CHECK: store float* %val.i2, float** %dest.i2, align 16 389; CHECK: store float* %val.i3, float** %dest.i3, align 8 390; CHECK: ret void 391 %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr, 392 <4 x i32> <i32 0, i32 1, i32 2, i32 3>, 393 <4 x i32> %i 394 store <4 x float *> %val, <4 x float *> *%dest 395 ret void 396} 397 398; Test combinations of vector and non-vector PHIs. 399define <4 x float> @f14(<4 x float> %acc, i32 %count) { 400; CHECK-LABEL: @f14( 401; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ] 402; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ] 403; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ] 404; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ] 405; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ] 406; CHECK: %this_acc.upto0 = insertelement <4 x float> undef, float %this_acc.i0, i32 0 407; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1 408; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2 409; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3 410; CHECK: ret <4 x float> %next_acc 411entry: 412 br label %loop 413 414loop: 415 %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ] 416 %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ] 417 %foo = call <4 x float> @ext(<4 x float> %this_acc) 418 %next_acc = fadd <4 x float> %this_acc, %foo 419 %next_count = sub i32 %this_count, 1 420 %cmp = icmp eq i32 %next_count, 0 421 br i1 %cmp, label %loop, label %exit 422 423exit: 424 ret <4 x float> %next_acc 425} 426 427; Test unary operator scalarization. 428define void @f15(<4 x float> %init, <4 x float> *%base, i32 %count) { 429; CHECK-LABEL: @f15( 430; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i 431; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float* 432; CHECK: %val.i0 = load float, float* %ptr.i0, align 16 433; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1 434; CHECK: %val.i1 = load float, float* %ptr.i1, align 4 435; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2 436; CHECK: %val.i2 = load float, float* %ptr.i2, align 8 437; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3 438; CHECK: %val.i3 = load float, float* %ptr.i3, align 4 439; CHECK: %neg.i0 = fneg float %val.i0 440; CHECK: %neg.i1 = fneg float %val.i1 441; CHECK: %neg.i2 = fneg float %val.i2 442; CHECK: %neg.i3 = fneg float %val.i3 443; CHECK: %neg.upto0 = insertelement <4 x float> undef, float %neg.i0, i32 0 444; CHECK: %neg.upto1 = insertelement <4 x float> %neg.upto0, float %neg.i1, i32 1 445; CHECK: %neg.upto2 = insertelement <4 x float> %neg.upto1, float %neg.i2, i32 2 446; CHECK: %neg = insertelement <4 x float> %neg.upto2, float %neg.i3, i32 3 447; CHECK: %call = call <4 x float> @ext(<4 x float> %neg) 448; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0 449; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.000000e+00 450; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1 451; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.000000e+00 452; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2 453; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.000000e+00 454; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3 455; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.000000e+00 456; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.000000e+00 457; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.000000e+00 458; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.000000e+00 459; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.000000e+00 460; CHECK: store float %sel.i0, float* %ptr.i0, align 16 461; CHECK: store float %sel.i1, float* %ptr.i1, align 4 462; CHECK: store float %sel.i2, float* %ptr.i2, align 8 463; CHECK: store float %sel.i3, float* %ptr.i3, align 4 464entry: 465 br label %loop 466 467loop: 468 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 469 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ] 470 %nexti = sub i32 %i, 1 471 472 %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i 473 %val = load <4 x float> , <4 x float> *%ptr 474 %neg = fneg <4 x float> %val 475 %call = call <4 x float> @ext(<4 x float> %neg) 476 %cmp = fcmp ogt <4 x float> %call, 477 <float 1.0, float 2.0, float 3.0, float 4.0> 478 %sel = select <4 x i1> %cmp, <4 x float> %call, 479 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0> 480 store <4 x float> %sel, <4 x float> *%ptr 481 482 %test = icmp eq i32 %nexti, 0 483 br i1 %test, label %loop, label %exit 484 485exit: 486 ret void 487} 488 489; Check that IR flags are preserved. 490define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) { 491; CHECK-LABEL: @f16( 492; CHECK: %res.i0 = add nuw nsw i32 493; CHECK: %res.i1 = add nuw nsw i32 494 %res = add nuw nsw <2 x i32> %i, %j 495 ret <2 x i32> %res 496} 497define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) { 498; CHECK-LABEL: @f17( 499; CHECK: %res.i0 = sdiv exact i32 500; CHECK: %res.i1 = sdiv exact i32 501 %res = sdiv exact <2 x i32> %i, %j 502 ret <2 x i32> %res 503} 504define <2 x float> @f18(<2 x float> %x, <2 x float> %y) { 505; CHECK-LABEL: @f18( 506; CHECK: %res.i0 = fadd fast float 507; CHECK: %res.i1 = fadd fast float 508 %res = fadd fast <2 x float> %x, %y 509 ret <2 x float> %res 510} 511define <2 x float> @f19(<2 x float> %x) { 512; CHECK-LABEL: @f19( 513; CHECK: %res.i0 = fneg fast float 514; CHECK: %res.i1 = fneg fast float 515 %res = fneg fast <2 x float> %x 516 ret <2 x float> %res 517} 518define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) { 519; CHECK-LABEL: @f20( 520; CHECK: %res.i0 = fcmp fast ogt float 521; CHECK: %res.i1 = fcmp fast ogt float 522 %res = fcmp fast ogt <2 x float> %x, %y 523 ret <2 x i1> %res 524} 525declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) 526define <2 x float> @f21(<2 x float> %x) { 527; CHECK-LABEL: @f21( 528; CHECK: %res.i0 = call fast float @llvm.sqrt.f32 529; CHECK: %res.i1 = call fast float @llvm.sqrt.f32 530 %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) 531 ret <2 x float> %res 532} 533declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 534define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { 535; CHECK-LABEL: @f22( 536; CHECK: %res.i0 = call fast float @llvm.fma.f32 537; CHECK: %res.i1 = call fast float @llvm.fma.f32 538 %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) 539 ret <2 x float> %res 540} 541 542; See https://reviews.llvm.org/D83101#2133062 543define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { 544; CHECK-LABEL: @f23_crash( 545; CHECK: %v0 = extractelement <2 x i32> %srcvec, i32 0 546; CHECK: %t1.upto0 = insertelement <2 x i32> undef, i32 %v0, i32 0 547; CHECK: %t1 = insertelement <2 x i32> %t1.upto0, i32 %v1, i32 1 548; CHECK: ret <2 x i32> %t1 549 %v0 = extractelement <2 x i32> %srcvec, i32 0 550 %t0 = insertelement <2 x i32> undef, i32 %v0, i32 0 551 %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1 552 ret <2 x i32> %t1 553} 554 555!0 = !{ !"root" } 556!1 = !{ !"set1", !0 } 557!2 = !{ !"set2", !0 } 558!3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} } 559!4 = !{ float 4.0 } 560!5 = !{ i64 0, i64 8, null } 561!13 = distinct !{} 562