1target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" 2; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s 3 4declare double @llvm.fma.f64(double, double, double) 5declare double @llvm.cos.f64(double) 6declare double @llvm.powi.f64(double, i32) 7 8; Basic depth-3 chain with fma 9define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) { 10 %X1 = fsub double %A1, %B1 11 %X2 = fsub double %A2, %B2 12 %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1) 13 %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2) 14 %Z1 = fadd double %Y1, %B1 15 %Z2 = fadd double %Y2, %B2 16 %R = fmul double %Z1, %Z2 17 ret double %R 18; CHECK: @test1 19; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 20; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 21; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 22; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 23; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 24; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0 25; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1 26; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2) 27; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 28; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 29; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 30; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 31; CHECK: ret double %R 32} 33 34; Basic depth-3 chain with cos 35define double @test2(double %A1, double %A2, double %B1, double %B2) { 36 %X1 = fsub double %A1, %B1 37 %X2 = fsub double %A2, %B2 38 %Y1 = call double @llvm.cos.f64(double %X1) 39 %Y2 = call double @llvm.cos.f64(double %X2) 40 %Z1 = fadd double %Y1, %B1 41 %Z2 = fadd double %Y2, %B2 42 %R = fmul double %Z1, %Z2 43 ret double %R 44; CHECK: @test2 45; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 46; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 47; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 48; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 49; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 50; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1) 51; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 52; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 53; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 54; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 55; CHECK: ret double %R 56} 57 58; Basic depth-3 chain with powi 59define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) { 60 61 %X1 = fsub double %A1, %B1 62 %X2 = fsub double %A2, %B2 63 %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) 64 %Y2 = call double @llvm.powi.f64(double %X2, i32 %P) 65 %Z1 = fadd double %Y1, %B1 66 %Z2 = fadd double %Y2, %B2 67 %R = fmul double %Z1, %Z2 68 ret double %R 69; CHECK: @test3 70; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 71; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 72; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 73; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 74; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 75; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P) 76; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 77; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 78; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 79; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 80; CHECK: ret double %R 81} 82 83; Basic depth-3 chain with powi (different powers: should not vectorize) 84define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) { 85 86 %X1 = fsub double %A1, %B1 87 %X2 = fsub double %A2, %B2 88 %P2 = add i32 %P, 1 89 %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) 90 %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2) 91 %Z1 = fadd double %Y1, %B1 92 %Z2 = fadd double %Y2, %B2 93 %R = fmul double %Z1, %Z2 94 ret double %R 95; CHECK: @test4 96; CHECK-NOT: <2 x double> 97; CHECK: ret double %R 98} 99 100; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 101; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) nounwind readonly 102; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) nounwind readonly 103 104