• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
4
5; Check constant loads of every 128-bit and 256-bit vector type
6; for size optimization using splat ops available with AVX and AVX2.
7
8; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr).
9define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
10; CHECK-LABEL: splat_v2f64:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0]
13; CHECK-NEXT:    # xmm1 = mem[0,0]
14; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
15; CHECK-NEXT:    retq
16  %add = fadd <2 x double> %x, <double 1.0, double 1.0>
17  ret <2 x double> %add
18}
19
20define <2 x double> @splat_v2f64_pgso(<2 x double> %x) !prof !14 {
21; CHECK-LABEL: splat_v2f64_pgso:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0]
24; CHECK-NEXT:    # xmm1 = mem[0,0]
25; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
26; CHECK-NEXT:    retq
27  %add = fadd <2 x double> %x, <double 1.0, double 1.0>
28  ret <2 x double> %add
29}
30
31define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
32; CHECK-LABEL: splat_v4f64:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
35; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
36; CHECK-NEXT:    retq
37  %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
38  ret <4 x double> %add
39}
40
41define <4 x double> @splat_v4f64_pgso(<4 x double> %x) !prof !14 {
42; CHECK-LABEL: splat_v4f64_pgso:
43; CHECK:       # %bb.0:
44; CHECK-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
45; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
46; CHECK-NEXT:    retq
47  %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
48  ret <4 x double> %add
49}
50
51define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
52; CHECK-LABEL: splat_v4f32:
53; CHECK:       # %bb.0:
54; CHECK-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
55; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0
56; CHECK-NEXT:    retq
57  %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
58  ret <4 x float> %add
59}
60
61define <4 x float> @splat_v4f32_pgso(<4 x float> %x) !prof !14 {
62; CHECK-LABEL: splat_v4f32_pgso:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
65; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0
66; CHECK-NEXT:    retq
67  %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
68  ret <4 x float> %add
69}
70
71define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
72; CHECK-LABEL: splat_v8f32:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
75; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0
76; CHECK-NEXT:    retq
77  %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
78  ret <8 x float> %add
79}
80
81define <8 x float> @splat_v8f32_pgso(<8 x float> %x) !prof !14 {
82; CHECK-LABEL: splat_v8f32_pgso:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
85; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0
86; CHECK-NEXT:    retq
87  %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
88  ret <8 x float> %add
89}
90
91; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
92; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
93define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
94; AVX-LABEL: splat_v2i64:
95; AVX:       # %bb.0:
96; AVX-NEXT:    vmovddup {{.*#+}} xmm1 = [2,2]
97; AVX-NEXT:    # xmm1 = mem[0,0]
98; AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
99; AVX-NEXT:    retq
100;
101; AVX2-LABEL: splat_v2i64:
102; AVX2:       # %bb.0:
103; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2,2]
104; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
105; AVX2-NEXT:    retq
106  %add = add <2 x i64> %x, <i64 2, i64 2>
107  ret <2 x i64> %add
108}
109
110define <2 x i64> @splat_v2i64_pgso(<2 x i64> %x) !prof !14 {
111; AVX-LABEL: splat_v2i64_pgso:
112; AVX:       # %bb.0:
113; AVX-NEXT:    vmovddup {{.*#+}} xmm1 = [2,2]
114; AVX-NEXT:    # xmm1 = mem[0,0]
115; AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
116; AVX-NEXT:    retq
117;
118; AVX2-LABEL: splat_v2i64_pgso:
119; AVX2:       # %bb.0:
120; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2,2]
121; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
122; AVX2-NEXT:    retq
123  %add = add <2 x i64> %x, <i64 2, i64 2>
124  ret <2 x i64> %add
125}
126
127; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
128; and then we fake it: use vmovddup to splat 64-bit value.
129define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
130; AVX-LABEL: splat_v4i64:
131; AVX:       # %bb.0:
132; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
133; AVX-NEXT:    vmovddup {{.*#+}} xmm2 = [2,2]
134; AVX-NEXT:    # xmm2 = mem[0,0]
135; AVX-NEXT:    vpaddq %xmm2, %xmm1, %xmm1
136; AVX-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
137; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
138; AVX-NEXT:    retq
139;
140; AVX2-LABEL: splat_v4i64:
141; AVX2:       # %bb.0:
142; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2]
143; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
144; AVX2-NEXT:    retq
145  %add = add <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
146  ret <4 x i64> %add
147}
148
149define <4 x i64> @splat_v4i64_pgso(<4 x i64> %x) !prof !14 {
150; AVX-LABEL: splat_v4i64_pgso:
151; AVX:       # %bb.0:
152; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
153; AVX-NEXT:    vmovddup {{.*#+}} xmm2 = [2,2]
154; AVX-NEXT:    # xmm2 = mem[0,0]
155; AVX-NEXT:    vpaddq %xmm2, %xmm1, %xmm1
156; AVX-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
157; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
158; AVX-NEXT:    retq
159;
160; AVX2-LABEL: splat_v4i64_pgso:
161; AVX2:       # %bb.0:
162; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2]
163; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
164; AVX2-NEXT:    retq
165  %add = add <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
166  ret <4 x i64> %add
167}
168
169; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
170define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
171; AVX-LABEL: splat_v4i32:
172; AVX:       # %bb.0:
173; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [2,2,2,2]
174; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
175; AVX-NEXT:    retq
176;
177; AVX2-LABEL: splat_v4i32:
178; AVX2:       # %bb.0:
179; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
180; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
181; AVX2-NEXT:    retq
182  %add = add <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
183  ret <4 x i32> %add
184}
185
186define <4 x i32> @splat_v4i32_pgso(<4 x i32> %x) !prof !14 {
187; AVX-LABEL: splat_v4i32_pgso:
188; AVX:       # %bb.0:
189; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [2,2,2,2]
190; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
191; AVX-NEXT:    retq
192;
193; AVX2-LABEL: splat_v4i32_pgso:
194; AVX2:       # %bb.0:
195; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
196; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
197; AVX2-NEXT:    retq
198  %add = add <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
199  ret <4 x i32> %add
200}
201
202; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
203define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
204; AVX-LABEL: splat_v8i32:
205; AVX:       # %bb.0:
206; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
207; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [2,2,2,2]
208; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
209; AVX-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
210; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
211; AVX-NEXT:    retq
212;
213; AVX2-LABEL: splat_v8i32:
214; AVX2:       # %bb.0:
215; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
216; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
217; AVX2-NEXT:    retq
218  %add = add <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
219  ret <8 x i32> %add
220}
221
222define <8 x i32> @splat_v8i32_pgso(<8 x i32> %x) !prof !14 {
223; AVX-LABEL: splat_v8i32_pgso:
224; AVX:       # %bb.0:
225; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
226; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [2,2,2,2]
227; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
228; AVX-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
229; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
230; AVX-NEXT:    retq
231;
232; AVX2-LABEL: splat_v8i32_pgso:
233; AVX2:       # %bb.0:
234; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
235; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
236; AVX2-NEXT:    retq
237  %add = add <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
238  ret <8 x i32> %add
239}
240
241; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
242define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
243; AVX-LABEL: splat_v8i16:
244; AVX:       # %bb.0:
245; AVX-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
246; AVX-NEXT:    retq
247;
248; AVX2-LABEL: splat_v8i16:
249; AVX2:       # %bb.0:
250; AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2]
251; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
252; AVX2-NEXT:    retq
253  %add = add <8 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
254  ret <8 x i16> %add
255}
256
257define <8 x i16> @splat_v8i16_pgso(<8 x i16> %x) !prof !14 {
258; AVX-LABEL: splat_v8i16_pgso:
259; AVX:       # %bb.0:
260; AVX-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
261; AVX-NEXT:    retq
262;
263; AVX2-LABEL: splat_v8i16_pgso:
264; AVX2:       # %bb.0:
265; AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2]
266; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
267; AVX2-NEXT:    retq
268  %add = add <8 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
269  ret <8 x i16> %add
270}
271
272; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
273define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
274; AVX-LABEL: splat_v16i16:
275; AVX:       # %bb.0:
276; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
277; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
278; AVX-NEXT:    vpaddw %xmm2, %xmm1, %xmm1
279; AVX-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
280; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
281; AVX-NEXT:    retq
282;
283; AVX2-LABEL: splat_v16i16:
284; AVX2:       # %bb.0:
285; AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
286; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
287; AVX2-NEXT:    retq
288  %add = add <16 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
289  ret <16 x i16> %add
290}
291
292define <16 x i16> @splat_v16i16_pgso(<16 x i16> %x) !prof !14 {
293; AVX-LABEL: splat_v16i16_pgso:
294; AVX:       # %bb.0:
295; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
296; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
297; AVX-NEXT:    vpaddw %xmm2, %xmm1, %xmm1
298; AVX-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
299; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
300; AVX-NEXT:    retq
301;
302; AVX2-LABEL: splat_v16i16_pgso:
303; AVX2:       # %bb.0:
304; AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
305; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
306; AVX2-NEXT:    retq
307  %add = add <16 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
308  ret <16 x i16> %add
309}
310
311; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
312define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
313; AVX-LABEL: splat_v16i8:
314; AVX:       # %bb.0:
315; AVX-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
316; AVX-NEXT:    retq
317;
318; AVX2-LABEL: splat_v16i8:
319; AVX2:       # %bb.0:
320; AVX2-NEXT:    vpbroadcastb {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
321; AVX2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
322; AVX2-NEXT:    retq
323  %add = add <16 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
324  ret <16 x i8> %add
325}
326
327define <16 x i8> @splat_v16i8_pgso(<16 x i8> %x) !prof !14 {
328; AVX-LABEL: splat_v16i8_pgso:
329; AVX:       # %bb.0:
330; AVX-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
331; AVX-NEXT:    retq
332;
333; AVX2-LABEL: splat_v16i8_pgso:
334; AVX2:       # %bb.0:
335; AVX2-NEXT:    vpbroadcastb {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
336; AVX2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
337; AVX2-NEXT:    retq
338  %add = add <16 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
339  ret <16 x i8> %add
340}
341
342; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
343define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
344; AVX-LABEL: splat_v32i8:
345; AVX:       # %bb.0:
346; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
347; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
348; AVX-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
349; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
350; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
351; AVX-NEXT:    retq
352;
353; AVX2-LABEL: splat_v32i8:
354; AVX2:       # %bb.0:
355; AVX2-NEXT:    vpbroadcastb {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
356; AVX2-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
357; AVX2-NEXT:    retq
358  %add = add <32 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
359  ret <32 x i8> %add
360}
361
362define <32 x i8> @splat_v32i8_pgso(<32 x i8> %x) !prof !14 {
363; AVX-LABEL: splat_v32i8_pgso:
364; AVX:       # %bb.0:
365; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
366; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
367; AVX-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
368; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
369; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
370; AVX-NEXT:    retq
371;
372; AVX2-LABEL: splat_v32i8_pgso:
373; AVX2:       # %bb.0:
374; AVX2-NEXT:    vpbroadcastb {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
375; AVX2-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
376; AVX2-NEXT:    retq
377  %add = add <32 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
378  ret <32 x i8> %add
379}
380
381; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend'
382; due to a missing AVX pattern to select a v2i64 X86ISD::BROADCAST of a
383; loadi64 with multiple uses.
384
385@A = common global <3 x i64> zeroinitializer, align 32
386
387define <8 x i64> @pr23259() #1 {
388; AVX-LABEL: pr23259:
389; AVX:       # %bb.0: # %entry
390; AVX-NEXT:    pushq $1
391; AVX-NEXT:    .cfi_adjust_cfa_offset 8
392; AVX-NEXT:    popq %rax
393; AVX-NEXT:    .cfi_adjust_cfa_offset -8
394; AVX-NEXT:    vmovq %rax, %xmm0
395; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
396; AVX-NEXT:    vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1]
397; AVX-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
398; AVX-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
399; AVX-NEXT:    retq
400;
401; AVX2-LABEL: pr23259:
402; AVX2:       # %bb.0: # %entry
403; AVX2-NEXT:    vmovdqa {{.*}}(%rip), %ymm0
404; AVX2-NEXT:    pushq $1
405; AVX2-NEXT:    .cfi_adjust_cfa_offset 8
406; AVX2-NEXT:    popq %rax
407; AVX2-NEXT:    .cfi_adjust_cfa_offset -8
408; AVX2-NEXT:    vmovq %rax, %xmm1
409; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
410; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,1,1]
411; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
412; AVX2-NEXT:    retq
413entry:
414  %0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32
415  %1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2>
416  %shuffle = shufflevector <3 x i64> <i64 1, i64 undef, i64 undef>, <3 x i64> %1, <8 x i32> <i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
417  ret <8 x i64> %shuffle
418}
419
420attributes #0 = { optsize }
421attributes #1 = { minsize }
422
423!llvm.module.flags = !{!0}
424!0 = !{i32 1, !"ProfileSummary", !1}
425!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
426!2 = !{!"ProfileFormat", !"InstrProf"}
427!3 = !{!"TotalCount", i64 10000}
428!4 = !{!"MaxCount", i64 10}
429!5 = !{!"MaxInternalCount", i64 1}
430!6 = !{!"MaxFunctionCount", i64 1000}
431!7 = !{!"NumCounts", i64 3}
432!8 = !{!"NumFunctions", i64 3}
433!9 = !{!"DetailedSummary", !10}
434!10 = !{!11, !12, !13}
435!11 = !{i32 10000, i64 100, i32 1}
436!12 = !{i32 999000, i64 100, i32 1}
437!13 = !{i32 999999, i64 1, i32 2}
438!14 = !{!"function_entry_count", i64 0}
439