• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s
3
4define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
5; CHECK-LABEL: BB16:
6; CHECK:       ## BB#0: ## %entry
7; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0
8; CHECK-NEXT:    retq
9entry:
10  %q = load i8, i8* %ptr, align 4
11  %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
12  %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
13  %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
14  %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
15  %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
16  %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
17  %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
18  %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
19  %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
20  %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
21  %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
22  %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
23  %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
24  %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
25  %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
26  %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
27  ret <16 x i8> %qf
28}
29
30define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
31; CHECK-LABEL: BB32:
32; CHECK:       ## BB#0: ## %entry
33; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0
34; CHECK-NEXT:    retq
35entry:
36  %q = load i8, i8* %ptr, align 4
37  %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
38  %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
39  %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
40  %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
41  %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
42  %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
43  %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
44  %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
45  %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
46  %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
47  %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
48  %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
49  %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
50  %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
51  %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
52  %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
53
54  %q20 = insertelement <32 x i8> %qf, i8 %q,  i32 16
55  %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
56  %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
57  %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
58  %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
59  %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
60  %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
61  %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
62  %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
63  %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
64  %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
65  %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
66  %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
67  %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
68  %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
69  %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
70  ret <32 x i8> %q2f
71}
72
73define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
74; CHECK-LABEL: W16:
75; CHECK:       ## BB#0: ## %entry
76; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0
77; CHECK-NEXT:    retq
78entry:
79  %q = load i16, i16* %ptr, align 4
80  %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
81  %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
82  %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
83  %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
84  %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
85  %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
86  %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
87  %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
88  ret <8 x i16> %q7
89}
90
91define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
92; CHECK-LABEL: WW16:
93; CHECK:       ## BB#0: ## %entry
94; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0
95; CHECK-NEXT:    retq
96entry:
97  %q = load i16, i16* %ptr, align 4
98  %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
99  %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
100  %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
101  %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
102  %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
103  %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
104  %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
105  %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
106  %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
107  %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
108  %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
109  %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
110  %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
111  %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
112  %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
113  %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
114  ret <16 x i16> %qf
115}
116
117define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
118; CHECK-LABEL: D32:
119; CHECK:       ## BB#0: ## %entry
120; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
121; CHECK-NEXT:    retq
122entry:
123  %q = load i32, i32* %ptr, align 4
124  %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
125  %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
126  %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
127  %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
128  ret <4 x i32> %q3
129}
130
131define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
132; CHECK-LABEL: DD32:
133; CHECK:       ## BB#0: ## %entry
134; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
135; CHECK-NEXT:    retq
136entry:
137  %q = load i32, i32* %ptr, align 4
138  %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
139  %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
140  %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
141  %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
142  %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
143  %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
144  %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
145  %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
146  ret <8 x i32> %q7
147}
148
149define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
150; CHECK-LABEL: Q64:
151; CHECK:       ## BB#0: ## %entry
152; CHECK-NEXT:    vpbroadcastq (%rdi), %xmm0
153; CHECK-NEXT:    retq
154entry:
155  %q = load i64, i64* %ptr, align 4
156  %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
157  %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
158  ret <2 x i64> %q1
159}
160
161define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
162; CHECK-LABEL: QQ64:
163; CHECK:       ## BB#0: ## %entry
164; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
165; CHECK-NEXT:    retq
166entry:
167  %q = load i64, i64* %ptr, align 4
168  %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
169  %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
170  %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
171  %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
172  ret <4 x i64> %q3
173}
174
175; FIXME: Pointer adjusted broadcasts
176
177define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
178; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111:
179; CHECK:       ## BB#0: ## %entry
180; CHECK-NEXT:    vpbroadcastb 1(%rdi), %xmm0
181; CHECK-NEXT:    retq
182entry:
183  %ld = load <16 x i8>, <16 x i8>* %ptr
184  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
185  ret <16 x i8> %ret
186}
187
188define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
189; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
190; CHECK:       ## BB#0: ## %entry
191; CHECK-NEXT:    vpbroadcastb 1(%rdi), %ymm0
192; CHECK-NEXT:    retq
193entry:
194  %ld = load <16 x i8>, <16 x i8>* %ptr
195  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
196  ret <32 x i8> %ret
197}
198
199define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
200; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
201; CHECK:       ## BB#0: ## %entry
202; CHECK-NEXT:    vpbroadcastb 1(%rdi), %ymm0
203; CHECK-NEXT:    retq
204entry:
205  %ld = load <32 x i8>, <32 x i8>* %ptr
206  %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
207  ret <32 x i8> %ret
208}
209
210define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
211; CHECK-LABEL: load_splat_8i16_8i16_11111111:
212; CHECK:       ## BB#0: ## %entry
213; CHECK-NEXT:    vpbroadcastw 2(%rdi), %xmm0
214; CHECK-NEXT:    retq
215entry:
216  %ld = load <8 x i16>, <8 x i16>* %ptr
217  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
218  ret <8 x i16> %ret
219}
220
221define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
222; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111:
223; CHECK:       ## BB#0: ## %entry
224; CHECK-NEXT:    vpbroadcastw 2(%rdi), %ymm0
225; CHECK-NEXT:    retq
226entry:
227  %ld = load <8 x i16>, <8 x i16>* %ptr
228  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
229  ret <16 x i16> %ret
230}
231
232define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
233; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111:
234; CHECK:       ## BB#0: ## %entry
235; CHECK-NEXT:    vpbroadcastw 2(%rdi), %ymm0
236; CHECK-NEXT:    retq
237entry:
238  %ld = load <16 x i16>, <16 x i16>* %ptr
239  %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
240  ret <16 x i16> %ret
241}
242
243define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
244; CHECK-LABEL: load_splat_4i32_4i32_1111:
245; CHECK:       ## BB#0: ## %entry
246; CHECK-NEXT:    vbroadcastss 4(%rdi), %xmm0
247; CHECK-NEXT:    retq
248entry:
249  %ld = load <4 x i32>, <4 x i32>* %ptr
250  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
251  ret <4 x i32> %ret
252}
253
254define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
255; CHECK-LABEL: load_splat_8i32_4i32_33333333:
256; CHECK:       ## BB#0: ## %entry
257; CHECK-NEXT:    vbroadcastss 12(%rdi), %ymm0
258; CHECK-NEXT:    retq
259entry:
260  %ld = load <4 x i32>, <4 x i32>* %ptr
261  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
262  ret <8 x i32> %ret
263}
264
265define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
266; CHECK-LABEL: load_splat_8i32_8i32_55555555:
267; CHECK:       ## BB#0: ## %entry
268; CHECK-NEXT:    vbroadcastss 20(%rdi), %ymm0
269; CHECK-NEXT:    retq
270entry:
271  %ld = load <8 x i32>, <8 x i32>* %ptr
272  %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
273  ret <8 x i32> %ret
274}
275
276define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
277; CHECK-LABEL: load_splat_4f32_4f32_1111:
278; CHECK:       ## BB#0: ## %entry
279; CHECK-NEXT:    vbroadcastss 4(%rdi), %xmm0
280; CHECK-NEXT:    retq
281entry:
282  %ld = load <4 x float>, <4 x float>* %ptr
283  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
284  ret <4 x float> %ret
285}
286
287define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
288; CHECK-LABEL: load_splat_8f32_4f32_33333333:
289; CHECK:       ## BB#0: ## %entry
290; CHECK-NEXT:    vbroadcastss 12(%rdi), %ymm0
291; CHECK-NEXT:    retq
292entry:
293  %ld = load <4 x float>, <4 x float>* %ptr
294  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
295  ret <8 x float> %ret
296}
297
298define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
299; CHECK-LABEL: load_splat_8f32_8f32_55555555:
300; CHECK:       ## BB#0: ## %entry
301; CHECK-NEXT:    vbroadcastss 20(%rdi), %ymm0
302; CHECK-NEXT:    retq
303entry:
304  %ld = load <8 x float>, <8 x float>* %ptr
305  %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
306  ret <8 x float> %ret
307}
308
309define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
310; CHECK-LABEL: load_splat_2i64_2i64_1111:
311; CHECK:       ## BB#0: ## %entry
312; CHECK-NEXT:    vpbroadcastq 8(%rdi), %xmm0
313; CHECK-NEXT:    retq
314entry:
315  %ld = load <2 x i64>, <2 x i64>* %ptr
316  %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
317  ret <2 x i64> %ret
318}
319
320define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
321; CHECK-LABEL: load_splat_4i64_2i64_1111:
322; CHECK:       ## BB#0: ## %entry
323; CHECK-NEXT:    vbroadcastsd 8(%rdi), %ymm0
324; CHECK-NEXT:    retq
325entry:
326  %ld = load <2 x i64>, <2 x i64>* %ptr
327  %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
328  ret <4 x i64> %ret
329}
330
331define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
332; CHECK-LABEL: load_splat_4i64_4i64_2222:
333; CHECK:       ## BB#0: ## %entry
334; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
335; CHECK-NEXT:    retq
336entry:
337  %ld = load <4 x i64>, <4 x i64>* %ptr
338  %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
339  ret <4 x i64> %ret
340}
341
342define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
343; CHECK-LABEL: load_splat_2f64_2f64_1111:
344; CHECK:       ## BB#0: ## %entry
345; CHECK-NEXT:    vmovaps (%rdi), %xmm0
346; CHECK-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
347; CHECK-NEXT:    retq
348entry:
349  %ld = load <2 x double>, <2 x double>* %ptr
350  %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
351  ret <2 x double> %ret
352}
353
354define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
355; CHECK-LABEL: load_splat_4f64_2f64_1111:
356; CHECK:       ## BB#0: ## %entry
357; CHECK-NEXT:    vbroadcastsd 8(%rdi), %ymm0
358; CHECK-NEXT:    retq
359entry:
360  %ld = load <2 x double>, <2 x double>* %ptr
361  %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
362  ret <4 x double> %ret
363}
364
365define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
366; CHECK-LABEL: load_splat_4f64_4f64_2222:
367; CHECK:       ## BB#0: ## %entry
368; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
369; CHECK-NEXT:    retq
370entry:
371  %ld = load <4 x double>, <4 x double>* %ptr
372  %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
373  ret <4 x double> %ret
374}
375
376; make sure that we still don't support broadcast double into 128-bit vector
377; this used to crash
378define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
379; CHECK-LABEL: I:
380; CHECK:       ## BB#0: ## %entry
381; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
382; CHECK-NEXT:    retq
383entry:
384  %q = load double, double* %ptr, align 4
385  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
386  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
387  ret <2 x double> %vecinit2.i
388}
389
390define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
391; CHECK-LABEL: V111:
392; CHECK:       ## BB#0: ## %entry
393; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
394; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
395; CHECK-NEXT:    retq
396entry:
397  %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
398  ret <8 x i32> %g
399}
400
401define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
402; CHECK-LABEL: V113:
403; CHECK:       ## BB#0: ## %entry
404; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
405; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0
406; CHECK-NEXT:    retq
407entry:
408  %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
409  ret <8 x float> %g
410}
411
412define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
413; CHECK-LABEL: _e2:
414; CHECK:       ## BB#0:
415; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %xmm0
416; CHECK-NEXT:    retq
417  %vecinit.i = insertelement <4 x float> undef, float        0xbf80000000000000, i32 0
418  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
419  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
420  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
421  ret <4 x float> %vecinit6.i
422}
423
424define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
425; CHECK-LABEL: _e4:
426; CHECK:       ## BB#0:
427; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
428; CHECK-NEXT:    retq
429  %vecinit0.i = insertelement <8 x i8> undef, i8       52, i32 0
430  %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
431  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
432  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
433  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
434  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
435  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
436  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
437  ret <8 x i8> %vecinit7.i
438}
439
440
441define void @crash() nounwind alwaysinline {
442; CHECK-LABEL: crash:
443; CHECK:       ## BB#0: ## %WGLoopsEntry
444; CHECK-NEXT:    xorl %eax, %eax
445; CHECK-NEXT:    testb %al, %al
446; CHECK-NEXT:    je LBB31_1
447; CHECK-NEXT:  ## BB#2: ## %ret
448; CHECK-NEXT:    retq
449; CHECK-NEXT:    .align 4, 0x90
450; CHECK-NEXT:  LBB31_1: ## %footer349VF
451; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
452; CHECK-NEXT:    jmp LBB31_1
453WGLoopsEntry:
454  br i1 undef, label %ret, label %footer329VF
455
456footer329VF:
457  %A.0.inVF = fmul float undef, 6.553600e+04
458  %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
459  %A.0VF = fptosi float %A.0.inVF to i32
460  %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
461  %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
462  %1 = and i32 %A.0VF, 65535
463  %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
464  %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
465  br i1 undef, label %preload1201VF, label %footer349VF
466
467preload1201VF:
468  br label %footer349VF
469
470footer349VF:
471  %2 = mul nsw <8 x i32> undef, %0
472  %3 = mul nsw <8 x i32> undef, %vector1099VF
473  br label %footer329VF
474
475ret:
476  ret void
477}
478
479define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
480; CHECK-LABEL: _inreg0:
481; CHECK:       ## BB#0:
482; CHECK-NEXT:    vmovd %edi, %xmm0
483; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
484; CHECK-NEXT:    retq
485  %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
486  %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
487  ret <8 x i32> %wide
488}
489
490define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
491; CHECK-LABEL: _inreg1:
492; CHECK:       ## BB#0:
493; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
494; CHECK-NEXT:    retq
495  %in = insertelement <8 x float> undef, float %scalar, i32 0
496  %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
497  ret <8 x float> %wide
498}
499
500define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
501; CHECK-LABEL: _inreg2:
502; CHECK:       ## BB#0:
503; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
504; CHECK-NEXT:    retq
505  %in = insertelement <4 x float> undef, float %scalar, i32 0
506  %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
507  ret <4 x float> %wide
508}
509
510define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
511; CHECK-LABEL: _inreg3:
512; CHECK:       ## BB#0:
513; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
514; CHECK-NEXT:    retq
515  %in = insertelement <4 x double> undef, double %scalar, i32 0
516  %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
517  ret <4 x double> %wide
518}
519
520define   <8 x float> @_inreg8xfloat(<8 x float> %a) {
521; CHECK-LABEL: _inreg8xfloat:
522; CHECK:       ## BB#0:
523; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
524; CHECK-NEXT:    retq
525  %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
526  ret <8 x float> %b
527}
528
529define   <4 x float> @_inreg4xfloat(<4 x float> %a) {
530; CHECK-LABEL: _inreg4xfloat:
531; CHECK:       ## BB#0:
532; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
533; CHECK-NEXT:    retq
534  %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
535  ret <4 x float> %b
536}
537
538define   <16 x i16> @_inreg16xi16(<16 x i16> %a) {
539; CHECK-LABEL: _inreg16xi16:
540; CHECK:       ## BB#0:
541; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0
542; CHECK-NEXT:    retq
543  %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
544  ret <16 x i16> %b
545}
546
547define   <8 x i16> @_inreg8xi16(<8 x i16> %a) {
548; CHECK-LABEL: _inreg8xi16:
549; CHECK:       ## BB#0:
550; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0
551; CHECK-NEXT:    retq
552  %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
553  ret <8 x i16> %b
554}
555
556define   <4 x i64> @_inreg4xi64(<4 x i64> %a) {
557; CHECK-LABEL: _inreg4xi64:
558; CHECK:       ## BB#0:
559; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
560; CHECK-NEXT:    retq
561  %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
562  ret <4 x i64> %b
563}
564
565define   <2 x i64> @_inreg2xi64(<2 x i64> %a) {
566; CHECK-LABEL: _inreg2xi64:
567; CHECK:       ## BB#0:
568; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm0
569; CHECK-NEXT:    retq
570  %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
571  ret <2 x i64> %b
572}
573
574define   <4 x double> @_inreg4xdouble(<4 x double> %a) {
575; CHECK-LABEL: _inreg4xdouble:
576; CHECK:       ## BB#0:
577; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
578; CHECK-NEXT:    retq
579  %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
580  ret <4 x double> %b
581}
582
583define   <2 x double> @_inreg2xdouble(<2 x double> %a) {
584; CHECK-LABEL: _inreg2xdouble:
585; CHECK:       ## BB#0:
586; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
587; CHECK-NEXT:    retq
588  %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
589  ret <2 x double> %b
590}
591
592define   <8 x i32> @_inreg8xi32(<8 x i32> %a) {
593; CHECK-LABEL: _inreg8xi32:
594; CHECK:       ## BB#0:
595; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
596; CHECK-NEXT:    retq
597  %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
598  ret <8 x i32> %b
599}
600
601define   <4 x i32> @_inreg4xi32(<4 x i32> %a) {
602; CHECK-LABEL: _inreg4xi32:
603; CHECK:       ## BB#0:
604; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
605; CHECK-NEXT:    retq
606  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
607  ret <4 x i32> %b
608}
609
610define   <32 x i8> @_inreg32xi8(<32 x i8> %a) {
611; CHECK-LABEL: _inreg32xi8:
612; CHECK:       ## BB#0:
613; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0
614; CHECK-NEXT:    retq
615  %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
616  ret <32 x i8> %b
617}
618
619define   <16 x i8> @_inreg16xi8(<16 x i8> %a) {
620; CHECK-LABEL: _inreg16xi8:
621; CHECK:       ## BB#0:
622; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0
623; CHECK-NEXT:    retq
624  %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
625  ret <16 x i8> %b
626}
627
628; These tests check that a vbroadcast instruction is used when we have a splat
629; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
630; (via the insertelements).
631
632define <8 x float> @splat_concat1(float %f) {
633; CHECK-LABEL: splat_concat1:
634; CHECK:       ## BB#0:
635; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
636; CHECK-NEXT:    retq
637  %1 = insertelement <4 x float> undef, float %f, i32 0
638  %2 = insertelement <4 x float> %1, float %f, i32 1
639  %3 = insertelement <4 x float> %2, float %f, i32 2
640  %4 = insertelement <4 x float> %3, float %f, i32 3
641  %5 = shufflevector <4 x float> %4, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
642  ret <8 x float> %5
643}
644
645define <8 x float> @splat_concat2(float %f) {
646; CHECK-LABEL: splat_concat2:
647; CHECK:       ## BB#0:
648; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
649; CHECK-NEXT:    retq
650  %1 = insertelement <4 x float> undef, float %f, i32 0
651  %2 = insertelement <4 x float> %1, float %f, i32 1
652  %3 = insertelement <4 x float> %2, float %f, i32 2
653  %4 = insertelement <4 x float> %3, float %f, i32 3
654  %5 = insertelement <4 x float> undef, float %f, i32 0
655  %6 = insertelement <4 x float> %5, float %f, i32 1
656  %7 = insertelement <4 x float> %6, float %f, i32 2
657  %8 = insertelement <4 x float> %7, float %f, i32 3
658  %9 = shufflevector <4 x float> %4, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
659  ret <8 x float> %9
660}
661
662define <4 x double> @splat_concat3(double %d) {
663; CHECK-LABEL: splat_concat3:
664; CHECK:       ## BB#0:
665; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
666; CHECK-NEXT:    retq
667  %1 = insertelement <2 x double> undef, double %d, i32 0
668  %2 = insertelement <2 x double> %1, double %d, i32 1
669  %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
670  ret <4 x double> %3
671}
672
673define <4 x double> @splat_concat4(double %d) {
674; CHECK-LABEL: splat_concat4:
675; CHECK:       ## BB#0:
676; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
677; CHECK-NEXT:    retq
678  %1 = insertelement <2 x double> undef, double %d, i32 0
679  %2 = insertelement <2 x double> %1, double %d, i32 1
680  %3 = insertelement <2 x double> undef, double %d, i32 0
681  %4 = insertelement <2 x double> %3, double %d, i32 1
682  %5 = shufflevector <2 x double> %2, <2 x double> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
683  ret <4 x double> %5
684}
685
686; Test cases for <rdar://problem/16074331>.
687; Instruction selection for broacast instruction fails if
688; the load cannot be folded into the broadcast.
689; This happens if the load has initial one use but other uses are
690; created later, or if selection DAG cannot prove that folding the
691; load will not create a cycle in the DAG.
692; Those test cases exerce the latter.
693
694; CHECK-LABEL: isel_crash_16b
695; CHECK: vpbroadcastb {{[^,]+}}, %xmm{{[0-9]+}}
696; CHECK: ret
697define void @isel_crash_16b(i8* %cV_R.addr) {
698eintry:
699  %__a.addr.i = alloca <2 x i64>, align 16
700  %__b.addr.i = alloca <2 x i64>, align 16
701  %vCr = alloca <2 x i64>, align 16
702  store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
703  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
704  %tmp2 = load i8, i8* %cV_R.addr, align 4
705  %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0
706  %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
707  %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64>
708  store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
709  store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
710  ret void
711}
712
713; CHECK-LABEL: isel_crash_32b
714; CHECK: vpbroadcastb {{[^,]+}}, %ymm{{[0-9]+}}
715; CHECK: ret
716define void @isel_crash_32b(i8* %cV_R.addr) {
717eintry:
718  %__a.addr.i = alloca <4 x i64>, align 16
719  %__b.addr.i = alloca <4 x i64>, align 16
720  %vCr = alloca <4 x i64>, align 16
721  store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
722  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
723  %tmp2 = load i8, i8* %cV_R.addr, align 4
724  %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0
725  %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer
726  %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64>
727  store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
728  store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
729  ret void
730}
731
732; CHECK-LABEL: isel_crash_8w
733; CHECK: vpbroadcastw {{[^,]+}}, %xmm{{[0-9]+}}
734; CHECK: ret
735define void @isel_crash_8w(i16* %cV_R.addr) {
736entry:
737  %__a.addr.i = alloca <2 x i64>, align 16
738  %__b.addr.i = alloca <2 x i64>, align 16
739  %vCr = alloca <2 x i64>, align 16
740  store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
741  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
742  %tmp2 = load i16, i16* %cV_R.addr, align 4
743  %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0
744  %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
745  %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64>
746  store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
747  store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
748  ret void
749}
750
751; CHECK-LABEL: isel_crash_16w
752; CHECK: vpbroadcastw {{[^,]+}}, %ymm{{[0-9]+}}
753; CHECK: ret
754define void @isel_crash_16w(i16* %cV_R.addr) {
755eintry:
756  %__a.addr.i = alloca <4 x i64>, align 16
757  %__b.addr.i = alloca <4 x i64>, align 16
758  %vCr = alloca <4 x i64>, align 16
759  store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
760  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
761  %tmp2 = load i16, i16* %cV_R.addr, align 4
762  %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0
763  %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer
764  %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64>
765  store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
766  store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
767  ret void
768}
769
770; CHECK-LABEL: isel_crash_4d
771; CHECK: vbroadcastss {{[^,]+}}, %xmm{{[0-9]+}}
772; CHECK: ret
773define void @isel_crash_4d(i32* %cV_R.addr) {
774entry:
775  %__a.addr.i = alloca <2 x i64>, align 16
776  %__b.addr.i = alloca <2 x i64>, align 16
777  %vCr = alloca <2 x i64>, align 16
778  store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
779  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
780  %tmp2 = load i32, i32* %cV_R.addr, align 4
781  %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0
782  %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
783  %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64>
784  store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
785  store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
786  ret void
787}
788
789; CHECK-LABEL: isel_crash_8d
790; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}}
791; CHECK: ret
792define void @isel_crash_8d(i32* %cV_R.addr) {
793eintry:
794  %__a.addr.i = alloca <4 x i64>, align 16
795  %__b.addr.i = alloca <4 x i64>, align 16
796  %vCr = alloca <4 x i64>, align 16
797  store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
798  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
799  %tmp2 = load i32, i32* %cV_R.addr, align 4
800  %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0
801  %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
802  %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64>
803  store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
804  store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
805  ret void
806}
807
808; CHECK-LABEL: isel_crash_2q
809; CHECK: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}}
810; CHECK: ret
811define void @isel_crash_2q(i64* %cV_R.addr) {
812entry:
813  %__a.addr.i = alloca <2 x i64>, align 16
814  %__b.addr.i = alloca <2 x i64>, align 16
815  %vCr = alloca <2 x i64>, align 16
816  store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
817  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
818  %tmp2 = load i64, i64* %cV_R.addr, align 4
819  %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0
820  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
821  store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
822  store <2 x i64> %splat.splat, <2 x i64>* %__b.addr.i, align 16
823  ret void
824}
825
826; CHECK-LABEL: isel_crash_4q
827; CHECK: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}}
828; CHECK: ret
829define void @isel_crash_4q(i64* %cV_R.addr) {
830eintry:
831  %__a.addr.i = alloca <4 x i64>, align 16
832  %__b.addr.i = alloca <4 x i64>, align 16
833  %vCr = alloca <4 x i64>, align 16
834  store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
835  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
836  %tmp2 = load i64, i64* %cV_R.addr, align 4
837  %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0
838  %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
839  store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
840  store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16
841  ret void
842}
843