1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s 3 4define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { 5; CHECK-LABEL: funcA: 6; CHECK: ## BB#0: ## %entry 7; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 8; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 9; CHECK-NEXT: retq 10entry: 11 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 12 ret <32 x i8> %shuffle 13} 14 15define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { 16; CHECK-LABEL: funcB: 17; CHECK: ## BB#0: ## %entry 18; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 19; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 20; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 21; CHECK-NEXT: retq 22entry: 23 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 24 ret <16 x i16> %shuffle 25} 26 27define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { 28; CHECK-LABEL: funcC: 29; CHECK: ## BB#0: ## %entry 30; CHECK-NEXT: vmovq %rdi, %xmm0 31; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 32; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 33; CHECK-NEXT: retq 34entry: 35 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 36 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 37 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 38 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 39 ret <4 x i64> %vecinit6.i 40} 41 42define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { 43; CHECK-LABEL: funcD: 44; CHECK: ## BB#0: ## %entry 45; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 46; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 47; CHECK-NEXT: retq 48entry: 49 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 50 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 51 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 52 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 53 ret <4 x double> %vecinit6.i 54} 55 56; Test this turns into a broadcast: 57; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> 58; 59define <8 x float> @funcE() nounwind { 60; CHECK-LABEL: funcE: 61; CHECK: ## BB#0: ## %for_exit499 62; CHECK-NEXT: xorl %eax, %eax 63; CHECK-NEXT: ## implicit-def: %YMM0 64; CHECK-NEXT: testb %al, %al 65; CHECK-NEXT: jne LBB4_2 66; CHECK-NEXT: ## BB#1: ## %load.i1247 67; CHECK-NEXT: pushq %rbp 68; CHECK-NEXT: movq %rsp, %rbp 69; CHECK-NEXT: andq $-32, %rsp 70; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520 71; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 72; CHECK-NEXT: movq %rbp, %rsp 73; CHECK-NEXT: popq %rbp 74; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249 75; CHECK-NEXT: retq 76allocas: 77 %udx495 = alloca [18 x [18 x float]], align 32 78 br label %for_test505.preheader 79 80for_test505.preheader: ; preds = %for_test505.preheader, %allocas 81 br i1 undef, label %for_exit499, label %for_test505.preheader 82 83for_exit499: ; preds = %for_test505.preheader 84 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 85 86load.i1247: ; preds = %for_exit499 87 %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1 88 %ptr.i1237 = bitcast float* %ptr1227 to i32* 89 %val.i1238 = load i32, i32* %ptr.i1237, align 4 90 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 91 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 92 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> 93 br label %__load_and_broadcast_32.exit1249 94 95__load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 96 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] 97 ret <8 x float> %load_broadcast12281250 98} 99 100define <8 x float> @funcF(i32 %val) nounwind { 101; CHECK-LABEL: funcF: 102; CHECK: ## BB#0: 103; CHECK-NEXT: vmovd %edi, %xmm0 104; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0] 105; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 106; CHECK-NEXT: retq 107 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6 108 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7 109 %tmp = bitcast <8 x i32> %ret7 to <8 x float> 110 ret <8 x float> %tmp 111} 112 113define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { 114; CHECK-LABEL: funcG: 115; CHECK: ## BB#0: ## %entry 116; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 117; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 118; CHECK-NEXT: retq 119entry: 120 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 121 ret <8 x float> %shuffle 122} 123 124define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { 125; CHECK-LABEL: funcH: 126; CHECK: ## BB#0: ## %entry 127; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5] 128; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 129; CHECK-NEXT: retq 130entry: 131 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 132 ret <8 x float> %shuffle 133} 134 135define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) { 136; CHECK-LABEL: splat_load_2f64_11: 137; CHECK: ## BB#0: 138; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 139; CHECK-NEXT: retq 140 %x = load <2 x double>, <2 x double>* %ptr 141 %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1> 142 ret <2 x double> %x1 143} 144 145define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) { 146; CHECK-LABEL: splat_load_4f64_2222: 147; CHECK: ## BB#0: 148; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 149; CHECK-NEXT: retq 150 %x = load <4 x double>, <4 x double>* %ptr 151 %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 152 ret <4 x double> %x1 153} 154 155define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) { 156; CHECK-LABEL: splat_load_4f32_0000: 157; CHECK: ## BB#0: 158; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 159; CHECK-NEXT: retq 160 %x = load <4 x float>, <4 x float>* %ptr 161 %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 162 ret <4 x float> %x1 163} 164 165define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) { 166; CHECK-LABEL: splat_load_8f32_77777777: 167; CHECK: ## BB#0: 168; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0 169; CHECK-NEXT: retq 170 %x = load <8 x float>, <8 x float>* %ptr 171 %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 172 ret <8 x float> %x1 173} 174