1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -slp-vectorizer < %s -S -mtriple="x86_64-grtev3-linux-gnu" -mcpu=corei7-avx | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5 6; We used to crash on this example because we were building a constant 7; expression during vectorization and the vectorizer expects instructions 8; as elements of the vectorized tree. 9; PR19621 10 11define void @test() { 12; CHECK-LABEL: @test( 13; CHECK-NEXT: bb279: 14; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> undef, float undef, i32 0 15; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float undef, i32 1 16; CHECK-NEXT: br label [[BB283:%.*]] 17; CHECK: bb283: 18; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP13:%.*]], [[EXIT:%.*]] ] 19; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP1]], [[EXIT]] ] 20; CHECK-NEXT: br label [[BB284:%.*]] 21; CHECK: bb284: 22; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double> 23; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP4]], undef 24; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP5]], undef 25; CHECK-NEXT: br label [[BB21_I:%.*]] 26; CHECK: bb21.i: 27; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]] 28; CHECK: bb22.i: 29; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> undef, [[TMP6]] 30; CHECK-NEXT: br label [[BB32_I:%.*]] 31; CHECK: bb32.i: 32; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x double> [ [[TMP7]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ] 33; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] 34; CHECK: exit: 35; CHECK-NEXT: [[TMP9:%.*]] = fpext <2 x float> [[TMP3]] to <2 x double> 36; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], <double undef, double 0.000000e+00> 37; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> undef, [[TMP10]] 38; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], undef 39; CHECK-NEXT: [[TMP13]] = fptrunc <2 x double> [[TMP12]] to <2 x float> 40; CHECK-NEXT: br label [[BB283]] 41; 42bb279: 43 br label %bb283 44 45bb283: 46 %Av.sroa.8.0 = phi float [ undef, %bb279 ], [ %tmp315, %exit ] 47 %Av.sroa.5.0 = phi float [ undef, %bb279 ], [ %tmp319, %exit ] 48 %Av.sroa.3.0 = phi float [ undef, %bb279 ], [ %tmp307, %exit ] 49 %Av.sroa.0.0 = phi float [ undef, %bb279 ], [ %tmp317, %exit ] 50 br label %bb284 51 52bb284: 53 %tmp7.i = fpext float %Av.sroa.3.0 to double 54 %tmp8.i = fsub double %tmp7.i, undef 55 %tmp9.i = fsub double %tmp8.i, undef 56 %tmp17.i = fpext float %Av.sroa.8.0 to double 57 %tmp19.i = fsub double %tmp17.i, undef 58 %tmp20.i = fsub double %tmp19.i, undef 59 br label %bb21.i 60 61bb21.i: 62 br i1 undef, label %bb22.i, label %exit 63 64bb22.i: 65 %tmp24.i = fadd double undef, %tmp9.i 66 %tmp26.i = fadd double undef, %tmp20.i 67 br label %bb32.i 68 69bb32.i: 70 %xs.0.i = phi double [ %tmp24.i, %bb22.i ], [ 0.000000e+00, %bb32.i ] 71 %ys.0.i = phi double [ %tmp26.i, %bb22.i ], [ 0.000000e+00, %bb32.i ] 72 br i1 undef, label %bb32.i, label %bb21.i 73 74exit: 75 %tmp303 = fpext float %Av.sroa.0.0 to double 76 %tmp304 = fmul double %tmp303, undef 77 %tmp305 = fadd double undef, %tmp304 78 %tmp306 = fadd double %tmp305, undef 79 %tmp307 = fptrunc double %tmp306 to float 80 %tmp311 = fpext float %Av.sroa.5.0 to double 81 %tmp312 = fmul double %tmp311, 0.000000e+00 82 %tmp313 = fadd double undef, %tmp312 83 %tmp314 = fadd double %tmp313, undef 84 %tmp315 = fptrunc double %tmp314 to float 85 %tmp317 = fptrunc double undef to float 86 %tmp319 = fptrunc double undef to float 87 br label %bb283 88} 89 90; Make sure that we probably handle constant folded vectorized trees. The 91; vectorizer starts at the type (%t2, %t3) and wil constant fold the tree. 92; The code that handles insertelement instructions must handle this. 93define <4 x double> @constant_folding() { 94; CHECK-LABEL: @constant_folding( 95; CHECK-NEXT: entry: 96; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 1 97; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0 98; CHECK-NEXT: ret <4 x double> [[I2]] 99; 100entry: 101 %t0 = fadd double 1.000000e+00 , 0.000000e+00 102 %t1 = fadd double 1.000000e+00 , 1.000000e+00 103 %t2 = fmul double %t0, 1.000000e+00 104 %i1 = insertelement <4 x double> undef, double %t2, i32 1 105 %t3 = fmul double %t1, 1.000000e+00 106 %i2 = insertelement <4 x double> %i1, double %t3, i32 0 107 ret <4 x double> %i2 108} 109