• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
2
3define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
4; CHECK-LABEL: funcA:
5; CHECK:       ## BB#0: ## %entry
6; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
7; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
8; CHECK-NEXT:    retq
9entry:
10  %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
11  ret <32 x i8> %shuffle
12}
13
14define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
15; CHECK-LABEL: funcB:
16; CHECK:       ## BB#0: ## %entry
17; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
18; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
19; CHECK-NEXT:    retq
20entry:
21  %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
22  ret <16 x i16> %shuffle
23}
24
25define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
26; CHECK-LABEL: funcC:
27; CHECK:       ## BB#0: ## %entry
28; CHECK-NEXT:    vmovq %rdi, %xmm0
29; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
30; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
31; CHECK-NEXT:    retq
32entry:
33  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
34  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
35  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
36  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
37  ret <4 x i64> %vecinit6.i
38}
39
40define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
41; CHECK-LABEL: funcD:
42; CHECK:       ## BB#0: ## %entry
43; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
44; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
45; CHECK-NEXT:    retq
46entry:
47  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
48  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
49  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
50  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
51  ret <4 x double> %vecinit6.i
52}
53
54; Test this turns into a broadcast:
55;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
56;
57define <8 x float> @funcE() nounwind {
58; CHECK-LABEL: funcE:
59; CHECK:       ## BB#0: ## %for_exit499
60; CHECK-NEXT:    xorl %eax, %eax
61; CHECK-NEXT:    ## implicit-def: %YMM0
62; CHECK-NEXT:    testb %al, %al
63; CHECK-NEXT:    jne LBB4_2
64; CHECK-NEXT:  ## BB#1: ## %load.i1247
65; CHECK-NEXT:    pushq %rbp
66; CHECK-NEXT:    movq %rsp, %rbp
67; CHECK-NEXT:    andq $-32, %rsp
68; CHECK-NEXT:    subq $1312, %rsp ## imm = 0x520
69; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %ymm0
70; CHECK-NEXT:    movq %rbp, %rsp
71; CHECK-NEXT:    popq %rbp
72; CHECK-NEXT:  LBB4_2: ## %__load_and_broadcast_32.exit1249
73; CHECK-NEXT:    retq
74allocas:
75  %udx495 = alloca [18 x [18 x float]], align 32
76  br label %for_test505.preheader
77
78for_test505.preheader:                            ; preds = %for_test505.preheader, %allocas
79  br i1 undef, label %for_exit499, label %for_test505.preheader
80
81for_exit499:                                      ; preds = %for_test505.preheader
82  br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
83
84load.i1247:                                       ; preds = %for_exit499
85  %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
86  %ptr.i1237 = bitcast float* %ptr1227 to i32*
87  %val.i1238 = load i32, i32* %ptr.i1237, align 4
88  %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
89  %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
90  %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
91  br label %__load_and_broadcast_32.exit1249
92
93__load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
94  %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
95  ret <8 x float> %load_broadcast12281250
96}
97
98define <8 x float> @funcF(i32 %val) nounwind {
99; CHECK-LABEL: funcF:
100; CHECK:       ## BB#0:
101; CHECK-NEXT:    vmovd %edi, %xmm0
102; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,0]
103; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
104; CHECK-NEXT:    retq
105  %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
106  %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
107  %tmp = bitcast <8 x i32> %ret7 to <8 x float>
108  ret <8 x float> %tmp
109}
110
111define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
112; CHECK-LABEL: funcG:
113; CHECK:       ## BB#0: ## %entry
114; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
115; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
116; CHECK-NEXT:    retq
117entry:
118  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
119  ret <8 x float> %shuffle
120}
121
122define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
123; CHECK-LABEL: funcH:
124; CHECK:       ## BB#0: ## %entry
125; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
126; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
127; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
128; CHECK-NEXT:    retq
129entry:
130  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
131  ret <8 x float> %shuffle
132}
133
134define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
135; CHECK-LABEL: splat_load_2f64_11:
136; CHECK:       ## BB#0:
137; CHECK-NEXT:    vmovaps (%rdi), %xmm0
138; CHECK-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
139; CHECK-NEXT:    retq
140  %x = load <2 x double>, <2 x double>* %ptr
141  %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
142  ret <2 x double> %x1
143}
144
145define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
146; CHECK-LABEL: splat_load_4f64_2222:
147; CHECK:       ## BB#0:
148; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
149; CHECK-NEXT:    retq
150  %x = load <4 x double>, <4 x double>* %ptr
151  %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
152  ret <4 x double> %x1
153}
154
155define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
156; CHECK-LABEL: splat_load_4f32_0000:
157; CHECK:       ## BB#0:
158; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
159; CHECK-NEXT:    retq
160  %x = load <4 x float>, <4 x float>* %ptr
161  %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
162  ret <4 x float> %x1
163}
164
165define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
166; CHECK-LABEL: splat_load_8f32_77777777:
167; CHECK:       ## BB#0:
168; CHECK-NEXT:    vbroadcastss 28(%rdi), %ymm0
169; CHECK-NEXT:    retq
170  %x = load <8 x float>, <8 x float>* %ptr
171  %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
172  ret <8 x float> %x1
173}
174