1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s 2 3define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { 4; CHECK-LABEL: funcA: 5; CHECK: ## BB#0: ## %entry 6; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 7; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 8; CHECK-NEXT: retq 9entry: 10 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 11 ret <32 x i8> %shuffle 12} 13 14define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { 15; CHECK-LABEL: funcB: 16; CHECK: ## BB#0: ## %entry 17; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11] 18; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 19; CHECK-NEXT: retq 20entry: 21 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 22 ret <16 x i16> %shuffle 23} 24 25define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { 26; CHECK-LABEL: funcC: 27; CHECK: ## BB#0: ## %entry 28; CHECK-NEXT: vmovq %rdi, %xmm0 29; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 30; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 31; CHECK-NEXT: retq 32entry: 33 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 34 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 35 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 36 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 37 ret <4 x i64> %vecinit6.i 38} 39 40define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { 41; CHECK-LABEL: funcD: 42; CHECK: ## BB#0: ## %entry 43; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 44; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 45; CHECK-NEXT: retq 46entry: 47 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 48 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 49 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 50 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 51 ret <4 x double> %vecinit6.i 52} 53 54; Test this turns into a broadcast: 55; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> 56; 57define <8 x float> @funcE() nounwind { 58; CHECK-LABEL: funcE: 59; CHECK: ## BB#0: ## %for_exit499 60; CHECK-NEXT: xorl %eax, %eax 61; CHECK-NEXT: ## implicit-def: %YMM0 62; CHECK-NEXT: testb %al, %al 63; CHECK-NEXT: jne LBB4_2 64; CHECK-NEXT: ## BB#1: ## %load.i1247 65; CHECK-NEXT: pushq %rbp 66; CHECK-NEXT: movq %rsp, %rbp 67; CHECK-NEXT: andq $-32, %rsp 68; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520 69; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 70; CHECK-NEXT: movq %rbp, %rsp 71; CHECK-NEXT: popq %rbp 72; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249 73; CHECK-NEXT: retq 74allocas: 75 %udx495 = alloca [18 x [18 x float]], align 32 76 br label %for_test505.preheader 77 78for_test505.preheader: ; preds = %for_test505.preheader, %allocas 79 br i1 undef, label %for_exit499, label %for_test505.preheader 80 81for_exit499: ; preds = %for_test505.preheader 82 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 83 84load.i1247: ; preds = %for_exit499 85 %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1 86 %ptr.i1237 = bitcast float* %ptr1227 to i32* 87 %val.i1238 = load i32, i32* %ptr.i1237, align 4 88 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 89 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 90 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> 91 br label %__load_and_broadcast_32.exit1249 92 93__load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 94 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] 95 ret <8 x float> %load_broadcast12281250 96} 97 98define <8 x float> @funcF(i32 %val) nounwind { 99; CHECK-LABEL: funcF: 100; CHECK: ## BB#0: 101; CHECK-NEXT: vmovd %edi, %xmm0 102; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,0] 103; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 104; CHECK-NEXT: retq 105 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6 106 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7 107 %tmp = bitcast <8 x i32> %ret7 to <8 x float> 108 ret <8 x float> %tmp 109} 110 111define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { 112; CHECK-LABEL: funcG: 113; CHECK: ## BB#0: ## %entry 114; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 115; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 116; CHECK-NEXT: retq 117entry: 118 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 119 ret <8 x float> %shuffle 120} 121 122define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { 123; CHECK-LABEL: funcH: 124; CHECK: ## BB#0: ## %entry 125; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 126; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 127; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 128; CHECK-NEXT: retq 129entry: 130 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 131 ret <8 x float> %shuffle 132} 133 134define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) { 135; CHECK-LABEL: splat_load_2f64_11: 136; CHECK: ## BB#0: 137; CHECK-NEXT: vmovaps (%rdi), %xmm0 138; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 139; CHECK-NEXT: retq 140 %x = load <2 x double>, <2 x double>* %ptr 141 %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1> 142 ret <2 x double> %x1 143} 144 145define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) { 146; CHECK-LABEL: splat_load_4f64_2222: 147; CHECK: ## BB#0: 148; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 149; CHECK-NEXT: retq 150 %x = load <4 x double>, <4 x double>* %ptr 151 %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 152 ret <4 x double> %x1 153} 154 155define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) { 156; CHECK-LABEL: splat_load_4f32_0000: 157; CHECK: ## BB#0: 158; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 159; CHECK-NEXT: retq 160 %x = load <4 x float>, <4 x float>* %ptr 161 %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 162 ret <4 x float> %x1 163} 164 165define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) { 166; CHECK-LABEL: splat_load_8f32_77777777: 167; CHECK: ## BB#0: 168; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0 169; CHECK-NEXT: retq 170 %x = load <8 x float>, <8 x float>* %ptr 171 %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 172 ret <8 x float> %x1 173} 174