1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s 2 3; CHECK: vbroadcastsd (% 4define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp { 5entry: 6 %q = load i64* %ptr, align 8 7 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 8 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 9 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 10 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 11 ret <4 x i64> %vecinit6.i 12} 13 14; CHECK: vbroadcastss (% 15define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp { 16entry: 17 %q = load i32* %ptr, align 4 18 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 19 %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1 20 %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2 21 %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3 22 ret <8 x i32> %vecinit6.i 23} 24 25; CHECK: vbroadcastsd (% 26define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp { 27entry: 28 %q = load double* %ptr, align 8 29 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 30 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 31 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 32 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 33 ret <4 x double> %vecinit6.i 34} 35 36; CHECK: vbroadcastss (% 37define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp { 38entry: 39 %q = load float* %ptr, align 4 40 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 41 %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1 42 %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2 43 %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3 44 ret <8 x float> %vecinit6.i 45} 46 47;;;; 128-bit versions 48 49; CHECK: vbroadcastss (% 50define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp { 51entry: 52 %q = load float* %ptr, align 4 53 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 54 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 55 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 56 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 57 ret <4 x float> %vecinit6.i 58} 59 60 61; CHECK: _e2 62; CHECK-NOT: vbroadcastss 63; CHECK: ret 64define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { 65 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 66 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1 67 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2 68 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3 69 ret <4 x float> %vecinit6.i 70} 71 72 73; CHECK: vbroadcastss (% 74define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp { 75entry: 76 %q = load i32* %ptr, align 4 77 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 78 %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1 79 %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2 80 %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3 81 ret <4 x i32> %vecinit6.i 82} 83 84; Unsupported vbroadcasts 85 86; CHECK: _G 87; CHECK-NOT: broadcast (% 88; CHECK: ret 89define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp { 90entry: 91 %q = load i64* %ptr, align 8 92 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 93 %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1 94 ret <2 x i64> %vecinit2.i 95} 96 97; CHECK: _H 98; CHECK-NOT: broadcast 99; CHECK: ret 100define <4 x i32> @H(<4 x i32> %a) { 101 %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 102 ret <4 x i32> %x 103} 104 105; CHECK: _I 106; CHECK-NOT: broadcast (% 107; CHECK: ret 108define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { 109entry: 110 %q = load double* %ptr, align 4 111 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 112 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 113 ret <2 x double> %vecinit2.i 114} 115 116; CHECK: _RR 117; CHECK: vbroadcastss (% 118; CHECK: ret 119define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 120entry: 121 %q = load float* %ptr, align 4 122 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 123 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 124 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 125 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 126 ; force a chain 127 %j = load i32* %k, align 4 128 store i32 %j, i32* undef 129 ret <4 x float> %vecinit6.i 130} 131 132 133; CHECK: _RR2 134; CHECK: vbroadcastss (% 135; CHECK: ret 136define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 137entry: 138 %q = load float* %ptr, align 4 139 %v = insertelement <4 x float> undef, float %q, i32 0 140 %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 141 ret <4 x float> %t 142} 143 144 145; These tests check that a vbroadcast instruction is used when we have a splat 146; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs 147; (via the insertelements). 148 149; CHECK-LABEL: splat_concat1 150; CHECK-NOT: vinsertf128 151; CHECK: vbroadcastss (% 152; CHECK-NEXT: ret 153define <8 x float> @splat_concat1(float* %p) { 154 %1 = load float* %p, align 4 155 %2 = insertelement <4 x float> undef, float %1, i32 0 156 %3 = insertelement <4 x float> %2, float %1, i32 1 157 %4 = insertelement <4 x float> %3, float %1, i32 2 158 %5 = insertelement <4 x float> %4, float %1, i32 3 159 %6 = shufflevector <4 x float> %5, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 160 ret <8 x float> %6 161} 162 163; CHECK-LABEL: splat_concat2 164; CHECK-NOT: vinsertf128 165; CHECK: vbroadcastss (% 166; CHECK-NEXT: ret 167define <8 x float> @splat_concat2(float* %p) { 168 %1 = load float* %p, align 4 169 %2 = insertelement <4 x float> undef, float %1, i32 0 170 %3 = insertelement <4 x float> %2, float %1, i32 1 171 %4 = insertelement <4 x float> %3, float %1, i32 2 172 %5 = insertelement <4 x float> %4, float %1, i32 3 173 %6 = insertelement <4 x float> undef, float %1, i32 0 174 %7 = insertelement <4 x float> %6, float %1, i32 1 175 %8 = insertelement <4 x float> %7, float %1, i32 2 176 %9 = insertelement <4 x float> %8, float %1, i32 3 177 %10 = shufflevector <4 x float> %5, <4 x float> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 178 ret <8 x float> %10 179} 180 181; CHECK-LABEL: splat_concat3 182; CHECK-NOT: vinsertf128 183; CHECK: vbroadcastsd (% 184; CHECK-NEXT: ret 185define <4 x double> @splat_concat3(double* %p) { 186 %1 = load double* %p, align 8 187 %2 = insertelement <2 x double> undef, double %1, i32 0 188 %3 = insertelement <2 x double> %2, double %1, i32 1 189 %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 190 ret <4 x double> %4 191} 192 193; CHECK-LABEL: splat_concat4 194; CHECK-NOT: vinsertf128 195; CHECK: vbroadcastsd (% 196; CHECK-NEXT: ret 197define <4 x double> @splat_concat4(double* %p) { 198 %1 = load double* %p, align 8 199 %2 = insertelement <2 x double> undef, double %1, i32 0 200 %3 = insertelement <2 x double> %2, double %1, i32 1 201 %4 = insertelement <2 x double> undef, double %1, i32 0 202 %5 = insertelement <2 x double> %2, double %1, i32 1 203 %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 204 ret <4 x double> %6 205} 206 207