• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3
4define <4 x float> @test1(<4 x float> %v1) {
5; CHECK-LABEL: @test1(
6; CHECK-NEXT:    ret <4 x float> [[V1:%.*]]
7;
8  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9  ret <4 x float> %v2
10}
11
12define <4 x float> @test2(<4 x float> %v1) {
13; CHECK-LABEL: @test2(
14; CHECK-NEXT:    ret <4 x float> [[V1:%.*]]
15;
16  %v2 = shufflevector <4 x float> %v1, <4 x float> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
17  ret <4 x float> %v2
18}
19
20define float @test3(<4 x float> %A, <4 x float> %B, float %f) {
21; CHECK-LABEL: @test3(
22; CHECK-NEXT:    ret float [[F:%.*]]
23;
24  %C = insertelement <4 x float> %A, float %f, i32 0
25  %D = shufflevector <4 x float> %C, <4 x float> %B, <4 x i32> <i32 5, i32 0, i32 2, i32 7>
26  %E = extractelement <4 x float> %D, i32 1
27  ret float %E
28}
29
30define i32 @test4(<4 x i32> %X) {
31; CHECK-LABEL: @test4(
32; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
33; CHECK-NEXT:    ret i32 [[R]]
34;
35  %t = shufflevector <4 x i32> %X, <4 x i32> undef, <4 x i32> zeroinitializer
36  %r = extractelement <4 x i32> %t, i32 0
37  ret i32 %r
38}
39
40define i32 @test5(<4 x i32> %X) {
41; CHECK-LABEL: @test5(
42; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3
43; CHECK-NEXT:    ret i32 [[R]]
44;
45  %t = shufflevector <4 x i32> %X, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 undef, i32 undef>
46  %r = extractelement <4 x i32> %t, i32 0
47  ret i32 %r
48}
49
50define float @test6(<4 x float> %X) {
51; CHECK-LABEL: @test6(
52; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
53; CHECK-NEXT:    ret float [[R]]
54;
55  %X1 = bitcast <4 x float> %X to <4 x i32>
56  %t = shufflevector <4 x i32> %X1, <4 x i32> undef, <4 x i32> zeroinitializer
57  %t2 = bitcast <4 x i32> %t to <4 x float>
58  %r = extractelement <4 x float> %t2, i32 0
59  ret float %r
60}
61
62define <4 x float> @test7(<4 x float> %x) {
63; CHECK-LABEL: @test7(
64; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
65; CHECK-NEXT:    ret <4 x float> [[R]]
66;
67  %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >
68  ret <4 x float> %r
69}
70
71; This should turn into a single shuffle.
72define <4 x float> @test8(<4 x float> %x, <4 x float> %y) {
73; CHECK-LABEL: @test8(
74; CHECK-NEXT:    [[T134:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> <i32 1, i32 undef, i32 3, i32 4>
75; CHECK-NEXT:    ret <4 x float> [[T134]]
76;
77  %t4 = extractelement <4 x float> %x, i32 1
78  %t2 = extractelement <4 x float> %x, i32 3
79  %t1 = extractelement <4 x float> %y, i32 0
80  %t128 = insertelement <4 x float> undef, float %t4, i32 0
81  %t130 = insertelement <4 x float> %t128, float undef, i32 1
82  %t132 = insertelement <4 x float> %t130, float %t2, i32 2
83  %t134 = insertelement <4 x float> %t132, float %t1, i32 3
84  ret <4 x float> %t134
85}
86
87; Test fold of two shuffles where the first shuffle vectors inputs are a
88; different length then the second.
89define <4 x i8> @test9(<16 x i8> %t6) {
90; CHECK-LABEL: @test9(
91; CHECK-NEXT:    [[T9:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <4 x i32> <i32 13, i32 9, i32 4, i32 13>
92; CHECK-NEXT:    ret <4 x i8> [[T9]]
93;
94  %t7 = shufflevector <16 x i8> %t6, <16 x i8> undef, <4 x i32> < i32 13, i32 9, i32 4, i32 13 >
95  %t9 = shufflevector <4 x i8> %t7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 >
96  ret <4 x i8> %t9
97}
98
99; Same as test9, but make sure that "undef" mask values are not confused with
100; mask values of 2*N, where N is the mask length.  These shuffles should not
101; be folded (because [8,9,4,8] may not be a mask supported by the target).
102
103define <4 x i8> @test9a(<16 x i8> %t6) {
104; CHECK-LABEL: @test9a(
105; CHECK-NEXT:    [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <4 x i32> <i32 undef, i32 9, i32 4, i32 8>
106; CHECK-NEXT:    [[T9:%.*]] = shufflevector <4 x i8> [[T7]], <4 x i8> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 undef>
107; CHECK-NEXT:    ret <4 x i8> [[T9]]
108;
109  %t7 = shufflevector <16 x i8> %t6, <16 x i8> undef, <4 x i32> < i32 undef, i32 9, i32 4, i32 8 >
110  %t9 = shufflevector <4 x i8> %t7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 >
111  ret <4 x i8> %t9
112}
113
114; Test fold of two shuffles where the first shuffle vectors inputs are a
115; different length then the second.
116define <4 x i8> @test9b(<4 x i8> %t6, <4 x i8> %t7) {
117; CHECK-LABEL: @test9b(
118; CHECK-NEXT:    [[T9:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[T7:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
119; CHECK-NEXT:    ret <4 x i8> [[T9]]
120;
121  %t1 = shufflevector <4 x i8> %t6, <4 x i8> %t7, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>
122  %t9 = shufflevector <8 x i8> %t1, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
123  ret <4 x i8> %t9
124}
125
126; Redundant vector splats should be removed.  Radar 8597790.
127define <4 x i32> @test10(<4 x i32> %t5) {
128; CHECK-LABEL: @test10(
129; CHECK-NEXT:    [[T7:%.*]] = shufflevector <4 x i32> [[T5:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
130; CHECK-NEXT:    ret <4 x i32> [[T7]]
131;
132  %t6 = shufflevector <4 x i32> %t5, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
133  %t7 = shufflevector <4 x i32> %t6, <4 x i32> undef, <4 x i32> zeroinitializer
134  ret <4 x i32> %t7
135}
136
137; Test fold of two shuffles where the two shufflevector inputs's op1 are the same.
138
139define <8 x i8> @test11(<16 x i8> %t6) {
140; CHECK-LABEL: @test11(
141; CHECK-NEXT:    [[T3:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
142; CHECK-NEXT:    ret <8 x i8> [[T3]]
143;
144  %t1 = shufflevector <16 x i8> %t6, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
145  %t2 = shufflevector <16 x i8> %t6, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
146  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
147  ret <8 x i8> %t3
148}
149
150; Test fold of two shuffles where the first shufflevector's inputs are the same as the second.
151
152define <8 x i8> @test12(<8 x i8> %t6, <8 x i8> %t2) {
153; CHECK-LABEL: @test12(
154; CHECK-NEXT:    [[T3:%.*]] = shufflevector <8 x i8> [[T6:%.*]], <8 x i8> [[T2:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9, i32 8, i32 11, i32 12>
155; CHECK-NEXT:    ret <8 x i8> [[T3]]
156;
157  %t1 = shufflevector <8 x i8> %t6, <8 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 undef, i32 7>
158  %t3 = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9, i32 8, i32 11, i32 12>
159  ret <8 x i8> %t3
160}
161
162; Test fold of two shuffles where the first shufflevector's inputs are the same as the second.
163
164define <8 x i8> @test12a(<8 x i8> %t6, <8 x i8> %t2) {
165; CHECK-LABEL: @test12a(
166; CHECK-NEXT:    [[T3:%.*]] = shufflevector <8 x i8> [[T2:%.*]], <8 x i8> [[T6:%.*]], <8 x i32> <i32 0, i32 3, i32 1, i32 4, i32 8, i32 9, i32 10, i32 11>
167; CHECK-NEXT:    ret <8 x i8> [[T3]]
168;
169  %t1 = shufflevector <8 x i8> %t6, <8 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 undef, i32 7>
170  %t3 = shufflevector <8 x i8> %t2, <8 x i8> %t1, <8 x i32> <i32 0, i32 3, i32 1, i32 4, i32 8, i32 9, i32 10, i32 11>
171  ret <8 x i8> %t3
172}
173
174; The mask length of the 1st shuffle can be reduced to eliminate the 2nd shuffle.
175
176define <2 x i8> @extract_subvector_of_shuffle(<2 x i8> %x, <2 x i8> %y) {
177; CHECK-LABEL: @extract_subvector_of_shuffle(
178; CHECK-NEXT:    [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 2>
179; CHECK-NEXT:    ret <2 x i8> [[EXTRACT_SUBV]]
180;
181  %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <3 x i32> <i32 0, i32 2, i32 0>
182  %extract_subv = shufflevector <3 x i8> %shuf, <3 x i8> undef, <2 x i32> <i32 0, i32 1>
183  ret <2 x i8> %extract_subv
184}
185
186; Undef elements in either mask are ok. Undefs from the 2nd shuffle mask should propagate to the new shuffle.
187; The type of the inputs does not have to match the output type.
188
189define <4 x i8> @extract_subvector_of_shuffle_undefs_types(<2 x i8> %x, <2 x i8> %y) {
190; CHECK-LABEL: @extract_subvector_of_shuffle_undefs_types(
191; CHECK-NEXT:    [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <4 x i32> <i32 undef, i32 2, i32 0, i32 undef>
192; CHECK-NEXT:    ret <4 x i8> [[EXTRACT_SUBV]]
193;
194  %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
195  %extract_subv = shufflevector <5 x i8> %shuf, <5 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
196  ret <4 x i8> %extract_subv
197}
198
199; Extra uses are not ok - we only do the transform when we can eliminate an instruction.
200
201declare void @use_v5i8(<5 x i8>)
202
203define <4 x i8> @extract_subvector_of_shuffle_extra_use(<2 x i8> %x, <2 x i8> %y) {
204; CHECK-LABEL: @extract_subvector_of_shuffle_extra_use(
205; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
206; CHECK-NEXT:    call void @use_v5i8(<5 x i8> [[SHUF]])
207; CHECK-NEXT:    [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
208; CHECK-NEXT:    ret <4 x i8> [[EXTRACT_SUBV]]
209;
210  %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
211  call void @use_v5i8(<5 x i8> %shuf)
212  %extract_subv = shufflevector <5 x i8> %shuf, <5 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
213  ret <4 x i8> %extract_subv
214}
215
216define <2 x i8> @test13a(i8 %x1, i8 %x2) {
217; CHECK-LABEL: @test13a(
218; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8> undef, i8 [[X1:%.*]], i32 1
219; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[X2:%.*]], i32 0
220; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i8> [[TMP2]], <i8 7, i8 5>
221; CHECK-NEXT:    ret <2 x i8> [[TMP3]]
222;
223  %A = insertelement <2 x i8> undef, i8 %x1, i32 0
224  %B = insertelement <2 x i8> %A, i8 %x2, i32 1
225  %C = add <2 x i8> %B, <i8 5, i8 7>
226  %D = shufflevector <2 x i8> %C, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
227  ret <2 x i8> %D
228}
229
230; Increasing length of vector ops is not a good canonicalization.
231
232define <3 x i32> @add_wider(i32 %y, i32 %z) {
233; CHECK-LABEL: @add_wider(
234; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0
235; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i32 1
236; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[I1]], <i32 255, i32 255>
237; CHECK-NEXT:    [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
238; CHECK-NEXT:    ret <3 x i32> [[EXT]]
239;
240  %i0 = insertelement <2 x i32> undef, i32 %y, i32 0
241  %i1 = insertelement <2 x i32> %i0, i32 %z, i32 1
242  %a = add <2 x i32> %i1, <i32 255, i32 255>
243  %ext = shufflevector <2 x i32> %a, <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
244  ret <3 x i32> %ext
245}
246
247; Increasing length of vector ops must be safe from illegal undef propagation.
248
249define <3 x i32> @div_wider(i32 %y, i32 %z) {
250; CHECK-LABEL: @div_wider(
251; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0
252; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i32 1
253; CHECK-NEXT:    [[A:%.*]] = sdiv <2 x i32> [[I1]], <i32 255, i32 255>
254; CHECK-NEXT:    [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
255; CHECK-NEXT:    ret <3 x i32> [[EXT]]
256;
257  %i0 = insertelement <2 x i32> undef, i32 %y, i32 0
258  %i1 = insertelement <2 x i32> %i0, i32 %z, i32 1
259  %a = sdiv <2 x i32> %i1, <i32 255, i32 255>
260  %ext = shufflevector <2 x i32> %a, <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
261  ret <3 x i32> %ext
262}
263
264; Increasing length of insertelements (no math ops) is a good canonicalization.
265
266define <3 x i8> @fold_inselts_with_widening_shuffle(i8 %x, i8 %y) {
267; CHECK-LABEL: @fold_inselts_with_widening_shuffle(
268; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i8> undef, i8 [[X:%.*]], i32 0
269; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <3 x i8> [[TMP1]], i8 [[Y:%.*]], i32 1
270; CHECK-NEXT:    ret <3 x i8> [[TMP2]]
271;
272  %ins0 = insertelement <2 x i8> undef, i8 %x, i32 0
273  %ins1 = insertelement <2 x i8> %ins0, i8 %y, i32 1
274  %widen = shufflevector <2 x i8> %ins1, <2 x i8> undef, <3 x i32> <i32 0, i32 1, i32 undef>
275  ret <3 x i8> %widen
276}
277
278define <2 x i8> @test13b(i8 %x) {
279; CHECK-LABEL: @test13b(
280; CHECK-NEXT:    [[B:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
281; CHECK-NEXT:    ret <2 x i8> [[B]]
282;
283  %A = insertelement <2 x i8> undef, i8 %x, i32 0
284  %B = shufflevector <2 x i8> %A, <2 x i8> undef, <2 x i32> <i32 undef, i32 0>
285  ret <2 x i8> %B
286}
287
288define <2 x i8> @test13c(i8 %x1, i8 %x2) {
289; CHECK-LABEL: @test13c(
290; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8> undef, i8 [[X1:%.*]], i32 0
291; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[X2:%.*]], i32 1
292; CHECK-NEXT:    ret <2 x i8> [[TMP2]]
293;
294  %A = insertelement <4 x i8> undef, i8 %x1, i32 0
295  %B = insertelement <4 x i8> %A, i8 %x2, i32 2
296  %C = shufflevector <4 x i8> %B, <4 x i8> undef, <2 x i32> <i32 0, i32 2>
297  ret <2 x i8> %C
298}
299
300define void @test14(i16 %conv10) {
301; CHECK-LABEL: @test14(
302; CHECK-NEXT:    store <4 x i16> <i16 undef, i16 undef, i16 undef, i16 23>, <4 x i16>* undef, align 8
303; CHECK-NEXT:    ret void
304;
305  %t = alloca <4 x i16>, align 8
306  %vecinit6 = insertelement <4 x i16> undef, i16 23, i32 3
307  store <4 x i16> %vecinit6, <4 x i16>* undef
308  %t1 = load <4 x i16>, <4 x i16>* undef
309  %vecinit11 = insertelement <4 x i16> undef, i16 %conv10, i32 3
310  %div = udiv <4 x i16> %t1, %vecinit11
311  store <4 x i16> %div, <4 x i16>* %t
312  %t4 = load <4 x i16>, <4 x i16>* %t
313  %t5 = shufflevector <4 x i16> %t4, <4 x i16> undef, <2 x i32> <i32 2, i32 0>
314  %cmp = icmp ule <2 x i16> %t5, undef
315  %sext = sext <2 x i1> %cmp to <2 x i16>
316  ret void
317}
318
319; Check that sequences of insert/extract element are
320; collapsed into valid shuffle instruction with correct shuffle indexes.
321
322define <4 x float> @test15a(<4 x float> %LHS, <4 x float> %RHS) {
323; CHECK-LABEL: @test15a(
324; CHECK-NEXT:    [[T4:%.*]] = shufflevector <4 x float> [[LHS:%.*]], <4 x float> [[RHS:%.*]], <4 x i32> <i32 4, i32 0, i32 6, i32 6>
325; CHECK-NEXT:    ret <4 x float> [[T4]]
326;
327  %t1 = extractelement <4 x float> %LHS, i32 0
328  %t2 = insertelement <4 x float> %RHS, float %t1, i32 1
329  %t3 = extractelement <4 x float> %RHS, i32 2
330  %t4 = insertelement <4 x float> %t2, float %t3, i32 3
331  ret <4 x float> %t4
332}
333
334define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) {
335; CHECK-LABEL: @test15b(
336; CHECK-NEXT:    [[T5:%.*]] = shufflevector <4 x float> [[LHS:%.*]], <4 x float> [[RHS:%.*]], <4 x i32> <i32 4, i32 3, i32 6, i32 6>
337; CHECK-NEXT:    ret <4 x float> [[T5]]
338;
339  %t0 = extractelement <4 x float> %LHS, i32 3
340  %t1 = insertelement <4 x float> %RHS, float %t0, i32 0
341  %t2 = extractelement <4 x float> %t1, i32 0
342  %t3 = insertelement <4 x float> %RHS, float %t2, i32 1
343  %t4 = extractelement <4 x float> %RHS, i32 2
344  %t5 = insertelement <4 x float> %t3, float %t4, i32 3
345  ret <4 x float> %t5
346}
347
348define <1 x i32> @test16a(i32 %ele) {
349; CHECK-LABEL: @test16a(
350; CHECK-NEXT:    ret <1 x i32> <i32 2>
351;
352  %t0 = insertelement <2 x i32> <i32 1, i32 undef>, i32 %ele, i32 1
353  %t1 = shl <2 x i32> %t0, <i32 1, i32 1>
354  %t2 = shufflevector <2 x i32> %t1, <2 x i32> undef, <1 x i32> <i32 0>
355  ret <1 x i32> %t2
356}
357
358define <4 x i8> @test16b(i8 %ele) {
359; CHECK-LABEL: @test16b(
360; CHECK-NEXT:    ret <4 x i8> <i8 2, i8 2, i8 2, i8 2>
361;
362  %t0 = insertelement <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 undef, i8 1>, i8 %ele, i32 6
363  %t1 = shl <8 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
364  %t2 = shufflevector <8 x i8> %t1, <8 x i8> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
365  ret <4 x i8> %t2
366}
367
368; If composition of two shuffles is identity, shuffles can be removed.
369define <4 x i32> @shuffle_17ident(<4 x i32> %v) {
370; CHECK-LABEL: @shuffle_17ident(
371; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
372;
373  %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
374  %shuffle2 = shufflevector <4 x i32> %shuffle, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
375  ret <4 x i32> %shuffle2
376}
377
378; swizzle can be put after operation
379define <4 x i32> @shuffle_17and(<4 x i32> %v1, <4 x i32> %v2) {
380; CHECK-LABEL: @shuffle_17and(
381; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[V1:%.*]], [[V2:%.*]]
382; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
383; CHECK-NEXT:    ret <4 x i32> [[R]]
384;
385  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
386  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
387  %r = and <4 x i32> %t1, %t2
388  ret <4 x i32> %r
389}
390
391declare void @use(<2 x float>)
392
393; One extra use is ok to transform.
394
395define <2 x float> @shuffle_fadd_multiuse(<2 x float> %v1, <2 x float> %v2) {
396; CHECK-LABEL: @shuffle_fadd_multiuse(
397; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
398; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[V1]], [[V2:%.*]]
399; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
400; CHECK-NEXT:    call void @use(<2 x float> [[T1]])
401; CHECK-NEXT:    ret <2 x float> [[R]]
402;
403  %t1 = shufflevector <2 x float> %v1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
404  %t2 = shufflevector <2 x float> %v2, <2 x float> undef, <2 x i32> <i32 1, i32 0>
405  %r = fadd <2 x float> %t1, %t2
406  call void @use(<2 x float> %t1)
407  ret <2 x float> %r
408}
409
410define <2 x float> @shuffle_fdiv_multiuse(<2 x float> %v1, <2 x float> %v2) {
411; CHECK-LABEL: @shuffle_fdiv_multiuse(
412; CHECK-NEXT:    [[T2:%.*]] = shufflevector <2 x float> [[V2:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
413; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <2 x float> [[V1:%.*]], [[V2]]
414; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
415; CHECK-NEXT:    call void @use(<2 x float> [[T2]])
416; CHECK-NEXT:    ret <2 x float> [[R]]
417;
418  %t1 = shufflevector <2 x float> %v1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
419  %t2 = shufflevector <2 x float> %v2, <2 x float> undef, <2 x i32> <i32 1, i32 0>
420  %r = fdiv <2 x float> %t1, %t2
421  call void @use(<2 x float> %t2)
422  ret <2 x float> %r
423}
424
425; But 2 extra uses would require an extra instruction.
426
427define <2 x float> @shuffle_fsub_multiuse(<2 x float> %v1, <2 x float> %v2) {
428; CHECK-LABEL: @shuffle_fsub_multiuse(
429; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
430; CHECK-NEXT:    [[T2:%.*]] = shufflevector <2 x float> [[V2:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
431; CHECK-NEXT:    [[R:%.*]] = fsub <2 x float> [[T1]], [[T2]]
432; CHECK-NEXT:    call void @use(<2 x float> [[T1]])
433; CHECK-NEXT:    call void @use(<2 x float> [[T2]])
434; CHECK-NEXT:    ret <2 x float> [[R]]
435;
436  %t1 = shufflevector <2 x float> %v1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
437  %t2 = shufflevector <2 x float> %v2, <2 x float> undef, <2 x i32> <i32 1, i32 0>
438  %r = fsub <2 x float> %t1, %t2
439  call void @use(<2 x float> %t1)
440  call void @use(<2 x float> %t2)
441  ret <2 x float> %r
442}
443
444define <4 x i32> @shuffle_17add(<4 x i32> %v1, <4 x i32> %v2) {
445; CHECK-LABEL: @shuffle_17add(
446; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[V1:%.*]], [[V2:%.*]]
447; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
448; CHECK-NEXT:    ret <4 x i32> [[R]]
449;
450  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
451  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
452  %r = add <4 x i32> %t1, %t2
453  ret <4 x i32> %r
454}
455
456define <4 x i32> @shuffle_17addnsw(<4 x i32> %v1, <4 x i32> %v2) {
457; CHECK-LABEL: @shuffle_17addnsw(
458; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[V1:%.*]], [[V2:%.*]]
459; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
460; CHECK-NEXT:    ret <4 x i32> [[R]]
461;
462  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
463  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
464  %r = add nsw <4 x i32> %t1, %t2
465  ret <4 x i32> %r
466}
467
468define <4 x i32> @shuffle_17addnuw(<4 x i32> %v1, <4 x i32> %v2) {
469; CHECK-LABEL: @shuffle_17addnuw(
470; CHECK-NEXT:    [[TMP1:%.*]] = add nuw <4 x i32> [[V1:%.*]], [[V2:%.*]]
471; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
472; CHECK-NEXT:    ret <4 x i32> [[R]]
473;
474  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
475  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
476  %r = add nuw <4 x i32> %t1, %t2
477  ret <4 x i32> %r
478}
479
480define <4 x float> @shuffle_17fsub_fast(<4 x float> %v1, <4 x float> %v2) {
481; CHECK-LABEL: @shuffle_17fsub_fast(
482; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast <4 x float> [[V1:%.*]], [[V2:%.*]]
483; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
484; CHECK-NEXT:    ret <4 x float> [[R]]
485;
486  %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
487  %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
488  %r = fsub fast <4 x float> %t1, %t2
489  ret <4 x float> %r
490}
491
492define <4 x i32> @add_const(<4 x i32> %v) {
493; CHECK-LABEL: @add_const(
494; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[V:%.*]], <i32 44, i32 41, i32 42, i32 43>
495; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
496; CHECK-NEXT:    ret <4 x i32> [[R]]
497;
498  %t1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
499  %r = add <4 x i32> %t1, <i32 41, i32 42, i32 43, i32 44>
500  ret <4 x i32> %r
501}
502
503define <4 x i32> @sub_const(<4 x i32> %v) {
504; CHECK-LABEL: @sub_const(
505; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> <i32 44, i32 43, i32 42, i32 41>, [[V:%.*]]
506; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
507; CHECK-NEXT:    ret <4 x i32> [[R]]
508;
509  %t1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
510  %r = sub <4 x i32> <i32 41, i32 42, i32 43, i32 44>, %t1
511  ret <4 x i32> %r
512}
513
514; Math before shuffle requires an extra shuffle.
515
516define <2 x float> @fadd_const_multiuse(<2 x float> %v) {
517; CHECK-LABEL: @fadd_const_multiuse(
518; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x float> [[V:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
519; CHECK-NEXT:    [[R:%.*]] = fadd <2 x float> [[T1]], <float 4.100000e+01, float 4.200000e+01>
520; CHECK-NEXT:    call void @use(<2 x float> [[T1]])
521; CHECK-NEXT:    ret <2 x float> [[R]]
522;
523  %t1 = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 0>
524  %r = fadd <2 x float> %t1, <float 41.0, float 42.0>
525  call void @use(<2 x float> %t1)
526  ret <2 x float> %r
527}
528
529; Math before splat allows replacing constant elements with undef lanes.
530
531define <4 x i32> @mul_const_splat(<4 x i32> %v) {
532; CHECK-LABEL: @mul_const_splat(
533; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[V:%.*]], <i32 undef, i32 42, i32 undef, i32 undef>
534; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
535; CHECK-NEXT:    ret <4 x i32> [[R]]
536;
537  %t1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
538  %r = mul <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %t1
539  ret <4 x i32> %r
540}
541
542; Take 2 elements of a vector and shift each of those by a different amount
543
544define <4 x i32> @lshr_const_half_splat(<4 x i32> %v) {
545; CHECK-LABEL: @lshr_const_half_splat(
546; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> <i32 undef, i32 8, i32 9, i32 undef>, [[V:%.*]]
547; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
548; CHECK-NEXT:    ret <4 x i32> [[R]]
549;
550  %t1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
551  %r = lshr <4 x i32> <i32 8, i32 8, i32 9, i32 9>, %t1
552  ret <4 x i32> %r
553}
554
555; We can't change this because there's no pre-shuffle version of the fmul constant.
556
557define <2 x float> @fmul_const_invalid_constant(<2 x float> %v) {
558; CHECK-LABEL: @fmul_const_invalid_constant(
559; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x float> [[V:%.*]], <2 x float> undef, <2 x i32> zeroinitializer
560; CHECK-NEXT:    [[R:%.*]] = fmul <2 x float> [[T1]], <float 4.100000e+01, float 4.200000e+01>
561; CHECK-NEXT:    ret <2 x float> [[R]]
562;
563  %t1 = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 0, i32 0>
564  %r = fmul <2 x float> %t1, <float 41.0, float 42.0>
565  ret <2 x float> %r
566}
567
568; Reduce the width of the binop by moving it ahead of a shuffle.
569
570define <4 x i8> @widening_shuffle_add_1(<2 x i8> %x) {
571; CHECK-LABEL: @widening_shuffle_add_1(
572; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 42, i8 43>
573; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
574; CHECK-NEXT:    ret <4 x i8> [[R]]
575;
576  %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
577  %r = add <4 x i8> %widex, <i8 42, i8 43, i8 44, i8 45>
578  ret <4 x i8> %r
579}
580
581; Reduce the width of the binop by moving it ahead of a shuffle.
582
583define <4 x i8> @widening_shuffle_add_2(<2 x i8> %x) {
584; CHECK-LABEL: @widening_shuffle_add_2(
585; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 43, i8 42>
586; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
587; CHECK-NEXT:    ret <4 x i8> [[R]]
588;
589  %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
590  %r = add <4 x i8> %widex, <i8 42, i8 43, i8 44, i8 45>
591  ret <4 x i8> %r
592}
593
594; Negative test - widening shuffles have the same mask/constant constraint as non-size-changing shuffles.
595
596define <4 x i8> @widening_shuffle_add_invalid_constant(<2 x i8> %x) {
597; CHECK-LABEL: @widening_shuffle_add_invalid_constant(
598; CHECK-NEXT:    [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
599; CHECK-NEXT:    [[R:%.*]] = add <4 x i8> [[WIDEX]], <i8 42, i8 43, i8 44, i8 45>
600; CHECK-NEXT:    ret <4 x i8> [[R]]
601;
602  %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
603  %r = add <4 x i8> %widex, <i8 42, i8 43, i8 44, i8 45>
604  ret <4 x i8> %r
605}
606
607; Negative test - widening shuffles have an additional constraint: they must not extend with anything but undefs.
608
609define <4 x i8> @widening_shuffle_add_invalid_mask(<2 x i8> %x) {
610; CHECK-LABEL: @widening_shuffle_add_invalid_mask(
611; CHECK-NEXT:    [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 0>
612; CHECK-NEXT:    [[R:%.*]] = add <4 x i8> [[WIDEX]], <i8 42, i8 43, i8 44, i8 45>
613; CHECK-NEXT:    ret <4 x i8> [[R]]
614;
615  %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 0>
616  %r = add <4 x i8> %widex, <i8 42, i8 43, i8 44, i8 45>
617  ret <4 x i8> %r
618}
619
620; A binop that produces undef in the high lanes can be moved before the shuffle.
621; This is ok because 'shl C, undef --> undef'.
622
623define <4 x i16> @widening_shuffle_shl_constant_op0(<2 x i16> %v) {
624; CHECK-LABEL: @widening_shuffle_shl_constant_op0(
625; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i16> <i16 42, i16 -42>, [[V:%.*]]
626; CHECK-NEXT:    [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
627; CHECK-NEXT:    ret <4 x i16> [[BO]]
628;
629  %shuf = shufflevector <2 x i16> %v, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
630  %bo = shl <4 x i16> <i16 42, i16 -42, i16 -1, i16 -1>, %shuf
631  ret <4 x i16> %bo
632}
633
634; A binop that produces undef in the high lanes can be moved before the shuffle.
635; This is ok because 'shl undef, 0 --> undef'.
636
637define <4 x i16> @widening_shuffle_shl_constant_op1(<2 x i16> %v) {
638; CHECK-LABEL: @widening_shuffle_shl_constant_op1(
639; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i16> [[V:%.*]], <i16 2, i16 4>
640; CHECK-NEXT:    [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
641; CHECK-NEXT:    ret <4 x i16> [[BO]]
642;
643  %shuf = shufflevector <2 x i16> %v, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
644  %bo = shl <4 x i16> %shuf, <i16 2, i16 4, i16 0, i16 0>
645  ret <4 x i16> %bo
646}
647
648; A binop that does not produce undef in the high lanes can not be moved before the shuffle.
649; This is not ok because 'shl undef, 1 (or 2)' --> 0' but moving the shuffle results in undef instead.
650
651define <4 x i16> @widening_shuffle_shl_constant_op1_non0(<2 x i16> %v) {
652; CHECK-LABEL: @widening_shuffle_shl_constant_op1_non0(
653; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
654; CHECK-NEXT:    [[BO:%.*]] = shl <4 x i16> [[SHUF]], <i16 2, i16 4, i16 1, i16 2>
655; CHECK-NEXT:    ret <4 x i16> [[BO]]
656;
657  %shuf = shufflevector <2 x i16> %v, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
658  %bo = shl <4 x i16> %shuf, <i16 2, i16 4, i16 1, i16 2>
659  ret <4 x i16> %bo
660}
661
662; A binop that does not produce undef in the high lanes can not be moved before the shuffle.
663; This is not ok because 'or -1, undef --> -1' but moving the shuffle results in undef instead.
664
665define <4 x i16> @widening_shuffle_or(<2 x i16> %v) {
666; CHECK-LABEL: @widening_shuffle_or(
667; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
668; CHECK-NEXT:    [[BO:%.*]] = or <4 x i16> [[SHUF]], <i16 42, i16 -42, i16 -1, i16 -1>
669; CHECK-NEXT:    ret <4 x i16> [[BO]]
670;
671  %shuf = shufflevector <2 x i16> %v, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
672  %bo = or <4 x i16> %shuf, <i16 42, i16 -42, i16 -1, i16 -1>
673  ret <4 x i16> %bo
674}
675
676define <4 x i32> @shuffle_17add2(<4 x i32> %v) {
677; CHECK-LABEL: @shuffle_17add2(
678; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 1, i32 1>
679; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
680;
681  %t1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
682  %t2 = add <4 x i32> %t1, %t1
683  %r = shufflevector <4 x i32> %t2, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
684  ret <4 x i32> %r
685}
686
687define <4 x i32> @shuffle_17mulsplat(<4 x i32> %v) {
688; CHECK-LABEL: @shuffle_17mulsplat(
689; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[V:%.*]], [[V]]
690; CHECK-NEXT:    [[M1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
691; CHECK-NEXT:    ret <4 x i32> [[M1]]
692;
693  %s1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
694  %m1 = mul <4 x i32> %s1, %s1
695  %s2 = shufflevector <4 x i32> %m1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
696  ret <4 x i32> %s2
697}
698
699; Do not reorder shuffle and binop if LHS of shuffles are of different size
700define <2 x i32> @pr19717(<4 x i32> %in0, <2 x i32> %in1) {
701; CHECK-LABEL: @pr19717(
702; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[IN0:%.*]], <4 x i32> undef, <2 x i32> zeroinitializer
703; CHECK-NEXT:    [[SHUFFLE4:%.*]] = shufflevector <2 x i32> [[IN1:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
704; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i32> [[SHUFFLE]], [[SHUFFLE4]]
705; CHECK-NEXT:    ret <2 x i32> [[MUL]]
706;
707  %shuffle = shufflevector <4 x i32> %in0, <4 x i32> %in0, <2 x i32> zeroinitializer
708  %shuffle4 = shufflevector <2 x i32> %in1, <2 x i32> %in1, <2 x i32> zeroinitializer
709  %mul = mul <2 x i32> %shuffle, %shuffle4
710  ret <2 x i32> %mul
711}
712
713define <4 x i16> @pr19717a(<8 x i16> %in0, <8 x i16> %in1) {
714; CHECK-LABEL: @pr19717a(
715; CHECK-NEXT:    [[TMP1:%.*]] = mul <8 x i16> [[IN0:%.*]], [[IN1:%.*]]
716; CHECK-NEXT:    [[MUL:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
717; CHECK-NEXT:    ret <4 x i16> [[MUL]]
718;
719  %shuffle = shufflevector <8 x i16> %in0, <8 x i16> %in0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
720  %shuffle1 = shufflevector <8 x i16> %in1, <8 x i16> %in1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
721  %mul = mul <4 x i16> %shuffle, %shuffle1
722  ret <4 x i16> %mul
723}
724
725define <8 x i8> @pr19730(<16 x i8> %in0) {
726; CHECK-LABEL: @pr19730(
727; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[IN0:%.*]], <16 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
728; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x i8> [[SHUFFLE]], <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
729; CHECK-NEXT:    ret <8 x i8> [[SHUFFLE1]]
730;
731  %shuffle = shufflevector <16 x i8> %in0, <16 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
732  %shuffle1 = shufflevector <8 x i8> %shuffle, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
733  ret <8 x i8> %shuffle1
734}
735
736define i32 @pr19737(<4 x i32> %in0) {
737; CHECK-LABEL: @pr19737(
738; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[IN0:%.*]], i32 0
739; CHECK-NEXT:    ret i32 [[TMP1]]
740;
741  %shuffle.i = shufflevector <4 x i32> zeroinitializer, <4 x i32> %in0, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
742  %neg.i = xor <4 x i32> %shuffle.i, <i32 -1, i32 -1, i32 -1, i32 -1>
743  %and.i = and <4 x i32> %in0, %neg.i
744  %rv = extractelement <4 x i32> %and.i, i32 0
745  ret i32 %rv
746}
747
748; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
749; for an srem operation. This is not a valid optimization because it may cause a trap
750; on div-by-zero.
751
752define <4 x i32> @pr20059(<4 x i32> %p1, <4 x i32> %p2) {
753; CHECK-LABEL: @pr20059(
754; CHECK-NEXT:    [[SPLAT1:%.*]] = shufflevector <4 x i32> [[P1:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
755; CHECK-NEXT:    [[SPLAT2:%.*]] = shufflevector <4 x i32> [[P2:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
756; CHECK-NEXT:    [[RETVAL:%.*]] = srem <4 x i32> [[SPLAT1]], [[SPLAT2]]
757; CHECK-NEXT:    ret <4 x i32> [[RETVAL]]
758;
759  %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
760  %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
761  %retval = srem <4 x i32> %splat1, %splat2
762  ret <4 x i32> %retval
763}
764
765define <4 x i32> @pr20114(<4 x i32> %__mask) {
766; CHECK-LABEL: @pr20114(
767; CHECK-NEXT:    [[MASK01_I:%.*]] = shufflevector <4 x i32> [[__MASK:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
768; CHECK-NEXT:    [[MASKED_NEW_I_I_I:%.*]] = and <4 x i32> [[MASK01_I]], bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64)> to <4 x i32>)
769; CHECK-NEXT:    ret <4 x i32> [[MASKED_NEW_I_I_I]]
770;
771  %mask01.i = shufflevector <4 x i32> %__mask, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
772  %masked_new.i.i.i = and <4 x i32> bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64)> to <4 x i32>), %mask01.i
773  ret <4 x i32> %masked_new.i.i.i
774}
775
776define <2 x i32*> @pr23113(<4 x i32*> %A) {
777; CHECK-LABEL: @pr23113(
778; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32*> [[A:%.*]], <4 x i32*> undef, <2 x i32> <i32 0, i32 1>
779; CHECK-NEXT:    ret <2 x i32*> [[TMP1]]
780;
781  %1 = shufflevector <4 x i32*> %A, <4 x i32*> undef, <2 x i32> <i32 0, i32 1>
782  ret <2 x i32*> %1
783}
784
785; Unused lanes in the new binop should not kill the entire op (although it may simplify anyway as shown here).
786
787define <2 x i32> @PR37648(<2 x i32> %x) {
788; CHECK-LABEL: @PR37648(
789; CHECK-NEXT:    ret <2 x i32> zeroinitializer
790;
791  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
792  %r = urem <2 x i32> %splat, <i32 1, i32 1>
793  ret <2 x i32> %r
794}
795
796; Test shuffle followed by binop with splat constant for all 18 binop opcodes.
797; Test with constant as operand 0 and operand 1 for non-commutative opcodes.
798
799define <2 x i32> @add_splat_constant(<2 x i32> %x) {
800; CHECK-LABEL: @add_splat_constant(
801; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 42, i32 undef>
802; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
803; CHECK-NEXT:    ret <2 x i32> [[R]]
804;
805  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
806  %r = add <2 x i32> %splat, <i32 42, i32 42>
807  ret <2 x i32> %r
808}
809
810define <2 x i32> @sub_splat_constant0(<2 x i32> %x) {
811; CHECK-LABEL: @sub_splat_constant0(
812; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> <i32 42, i32 undef>, [[X:%.*]]
813; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
814; CHECK-NEXT:    ret <2 x i32> [[R]]
815;
816  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
817  %r = sub <2 x i32> <i32 42, i32 42>, %splat
818  ret <2 x i32> %r
819}
820
821define <2 x i32> @sub_splat_constant1(<2 x i32> %x) {
822; CHECK-LABEL: @sub_splat_constant1(
823; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -42, i32 undef>
824; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
825; CHECK-NEXT:    ret <2 x i32> [[R]]
826;
827  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
828  %r = sub <2 x i32> %splat, <i32 42, i32 42>
829  ret <2 x i32> %r
830}
831
832define <2 x i32> @mul_splat_constant(<2 x i32> %x) {
833; CHECK-LABEL: @mul_splat_constant(
834; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], <i32 42, i32 undef>
835; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
836; CHECK-NEXT:    ret <2 x i32> [[R]]
837;
838  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
839  %r = mul <2 x i32> %splat, <i32 42, i32 42>
840  ret <2 x i32> %r
841}
842
843define <2 x i32> @shl_splat_constant0(<2 x i32> %x) {
844; CHECK-LABEL: @shl_splat_constant0(
845; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
846; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
847; CHECK-NEXT:    ret <2 x i32> [[R]]
848;
849  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
850  %r = shl <2 x i32> <i32 5, i32 5>, %splat
851  ret <2 x i32> %r
852}
853
854define <2 x i32> @shl_splat_constant1(<2 x i32> %x) {
855; CHECK-LABEL: @shl_splat_constant1(
856; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 0>
857; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
858; CHECK-NEXT:    ret <2 x i32> [[R]]
859;
860  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
861  %r = shl <2 x i32> %splat, <i32 5, i32 5>
862  ret <2 x i32> %r
863}
864
865define <2 x i32> @ashr_splat_constant0(<2 x i32> %x) {
866; CHECK-LABEL: @ashr_splat_constant0(
867; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
868; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
869; CHECK-NEXT:    ret <2 x i32> [[R]]
870;
871  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
872  %r = ashr <2 x i32> <i32 5, i32 5>, %splat
873  ret <2 x i32> %r
874}
875
876define <2 x i32> @ashr_splat_constant1(<2 x i32> %x) {
877; CHECK-LABEL: @ashr_splat_constant1(
878; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 5, i32 0>
879; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
880; CHECK-NEXT:    ret <2 x i32> [[R]]
881;
882  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
883  %r = ashr <2 x i32> %splat, <i32 5, i32 5>
884  ret <2 x i32> %r
885}
886
887define <2 x i32> @lshr_splat_constant0(<2 x i32> %x) {
888; CHECK-LABEL: @lshr_splat_constant0(
889; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
890; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
891; CHECK-NEXT:    ret <2 x i32> [[R]]
892;
893  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
894  %r = lshr <2 x i32> <i32 5, i32 5>, %splat
895  ret <2 x i32> %r
896}
897
898define <2 x i32> @lshr_splat_constant1(<2 x i32> %x) {
899; CHECK-LABEL: @lshr_splat_constant1(
900; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 0>
901; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
902; CHECK-NEXT:    ret <2 x i32> [[R]]
903;
904  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
905  %r = lshr <2 x i32> %splat, <i32 5, i32 5>
906  ret <2 x i32> %r
907}
908
909define <2 x i32> @urem_splat_constant0(<2 x i32> %x) {
910; CHECK-LABEL: @urem_splat_constant0(
911; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
912; CHECK-NEXT:    [[R:%.*]] = urem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
913; CHECK-NEXT:    ret <2 x i32> [[R]]
914;
915  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
916  %r = urem <2 x i32> <i32 42, i32 42>, %splat
917  ret <2 x i32> %r
918}
919
920define <2 x i32> @urem_splat_constant1(<2 x i32> %x) {
921; CHECK-LABEL: @urem_splat_constant1(
922; CHECK-NEXT:    [[TMP1:%.*]] = urem <2 x i32> [[X:%.*]], <i32 42, i32 1>
923; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
924; CHECK-NEXT:    ret <2 x i32> [[R]]
925;
926  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
927  %r = urem <2 x i32> %splat, <i32 42, i32 42>
928  ret <2 x i32> %r
929}
930
931define <2 x i32> @srem_splat_constant0(<2 x i32> %x) {
932; CHECK-LABEL: @srem_splat_constant0(
933; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
934; CHECK-NEXT:    [[R:%.*]] = srem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
935; CHECK-NEXT:    ret <2 x i32> [[R]]
936;
937  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
938  %r = srem <2 x i32> <i32 42, i32 42>, %splat
939  ret <2 x i32> %r
940}
941
942define <2 x i32> @srem_splat_constant1(<2 x i32> %x) {
943; CHECK-LABEL: @srem_splat_constant1(
944; CHECK-NEXT:    [[TMP1:%.*]] = srem <2 x i32> [[X:%.*]], <i32 42, i32 1>
945; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
946; CHECK-NEXT:    ret <2 x i32> [[R]]
947;
948  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
949  %r = srem <2 x i32> %splat, <i32 42, i32 42>
950  ret <2 x i32> %r
951}
952
953define <2 x i32> @udiv_splat_constant0(<2 x i32> %x) {
954; CHECK-LABEL: @udiv_splat_constant0(
955; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
956; CHECK-NEXT:    [[R:%.*]] = udiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
957; CHECK-NEXT:    ret <2 x i32> [[R]]
958;
959  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
960  %r = udiv <2 x i32> <i32 42, i32 42>, %splat
961  ret <2 x i32> %r
962}
963
964define <2 x i32> @udiv_splat_constant1(<2 x i32> %x) {
965; CHECK-LABEL: @udiv_splat_constant1(
966; CHECK-NEXT:    [[TMP1:%.*]] = udiv <2 x i32> [[X:%.*]], <i32 42, i32 1>
967; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
968; CHECK-NEXT:    ret <2 x i32> [[R]]
969;
970  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
971  %r = udiv <2 x i32> %splat, <i32 42, i32 42>
972  ret <2 x i32> %r
973}
974
975define <2 x i32> @sdiv_splat_constant0(<2 x i32> %x) {
976; CHECK-LABEL: @sdiv_splat_constant0(
977; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
978; CHECK-NEXT:    [[R:%.*]] = sdiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
979; CHECK-NEXT:    ret <2 x i32> [[R]]
980;
981  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
982  %r = sdiv <2 x i32> <i32 42, i32 42>, %splat
983  ret <2 x i32> %r
984}
985
986define <2 x i32> @sdiv_splat_constant1(<2 x i32> %x) {
987; CHECK-LABEL: @sdiv_splat_constant1(
988; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <2 x i32> [[X:%.*]], <i32 42, i32 1>
989; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
990; CHECK-NEXT:    ret <2 x i32> [[R]]
991;
992  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
993  %r = sdiv <2 x i32> %splat, <i32 42, i32 42>
994  ret <2 x i32> %r
995}
996
997define <2 x i32> @and_splat_constant(<2 x i32> %x) {
998; CHECK-LABEL: @and_splat_constant(
999; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 42, i32 undef>
1000; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
1001; CHECK-NEXT:    ret <2 x i32> [[R]]
1002;
1003  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
1004  %r = and <2 x i32> %splat, <i32 42, i32 42>
1005  ret <2 x i32> %r
1006}
1007
1008; AND does not fold to undef for undef operands, we cannot move it
1009; across a shuffle with undef masks.
1010define <4 x i16> @and_constant_mask_undef(<4 x i16> %add) {
1011; CHECK-LABEL: @and_constant_mask_undef(
1012; CHECK-NEXT:  entry:
1013; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1014; CHECK-NEXT:    [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], <i16 0, i16 0, i16 -1, i16 -1>
1015; CHECK-NEXT:    ret <4 x i16> [[AND]]
1016;
1017entry:
1018  %shuffle = shufflevector <4 x i16> %add, <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1019  %and = and <4 x i16> %shuffle, <i16 0, i16 0, i16 -1, i16 -1>
1020  ret <4 x i16> %and
1021}
1022
1023; AND does not fold to undef for undef operands, we cannot move it
1024; across a shuffle with undef masks.
1025define <4 x i16> @and_constant_mask_undef_2(<4 x i16> %add) {
1026; CHECK-LABEL: @and_constant_mask_undef_2(
1027; CHECK-NEXT:  entry:
1028; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 undef>
1029; CHECK-NEXT:    [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], <i16 -1, i16 -1, i16 -1, i16 0>
1030; CHECK-NEXT:    ret <4 x i16> [[AND]]
1031;
1032entry:
1033  %shuffle = shufflevector <4 x i16> %add, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 undef>
1034  %and = and <4 x i16> %shuffle, <i16 -1, i16 -1, i16 -1, i16 -0>
1035  ret <4 x i16> %and
1036}
1037
1038; We can move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes.
1039define <4 x i16> @and_constant_mask_undef_3(<4 x i16> %add) {
1040; CHECK-LABEL: @and_constant_mask_undef_3(
1041; CHECK-NEXT:  entry:
1042; CHECK-NEXT:    ret <4 x i16> <i16 0, i16 0, i16 0, i16 undef>
1043;
1044entry:
1045  %shuffle = shufflevector <4 x i16> %add, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
1046  %and = and <4 x i16> %shuffle, <i16 0, i16 0, i16 0, i16 -1>
1047  ret <4 x i16> %and
1048}
1049
1050; We can move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes.
1051define <4 x i16> @and_constant_mask_undef_4(<4 x i16> %add) {
1052; CHECK-LABEL: @and_constant_mask_undef_4(
1053; CHECK-NEXT:  entry:
1054; CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i16> [[ADD:%.*]], <i16 9, i16 20, i16 undef, i16 undef>
1055; CHECK-NEXT:    [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
1056; CHECK-NEXT:    ret <4 x i16> [[AND]]
1057;
1058entry:
1059  %shuffle = shufflevector <4 x i16> %add, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
1060  %and = and <4 x i16> %shuffle, <i16 9, i16 20, i16 20, i16 -1>
1061  ret <4 x i16> %and
1062}
1063
1064define <4 x i16> @and_constant_mask_not_undef(<4 x i16> %add) {
1065; CHECK-LABEL: @and_constant_mask_not_undef(
1066; CHECK-NEXT:  entry:
1067; CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i16> [[ADD:%.*]], <i16 undef, i16 -1, i16 0, i16 0>
1068; CHECK-NEXT:    [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 1>
1069; CHECK-NEXT:    ret <4 x i16> [[AND]]
1070;
1071entry:
1072  %shuffle = shufflevector <4 x i16> %add, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 1>
1073  %and = and <4 x i16> %shuffle, <i16 0, i16 0, i16 -1, i16 -1>
1074  ret <4 x i16> %and
1075}
1076
1077; OR does not fold to undef for undef operands, we cannot move it
1078; across a shuffle with undef masks.
1079define <4 x i16> @or_constant_mask_undef(<4 x i16> %in) {
1080; CHECK-LABEL: @or_constant_mask_undef(
1081; CHECK-NEXT:  entry:
1082; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1083; CHECK-NEXT:    [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], <i16 -1, i16 -1, i16 0, i16 0>
1084; CHECK-NEXT:    ret <4 x i16> [[OR]]
1085;
1086entry:
1087  %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1088  %or = or <4 x i16> %shuffle, <i16 -1, i16 -1, i16 0, i16 0>
1089  ret <4 x i16> %or
1090}
1091
1092; OR does not fold to undef for undef operands, we cannot move it
1093; across a shuffle with undef masks.
1094define <4 x i16> @or_constant_mask_undef_2(<4 x i16> %in) {
1095; CHECK-LABEL: @or_constant_mask_undef_2(
1096; CHECK-NEXT:  entry:
1097; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
1098; CHECK-NEXT:    [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], <i16 -1, i16 0, i16 0, i16 -1>
1099; CHECK-NEXT:    ret <4 x i16> [[OR]]
1100;
1101entry:
1102  %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
1103  %or = or <4 x i16> %shuffle, <i16 -1, i16 0, i16 0, i16 -1>
1104  ret <4 x i16> %or
1105}
1106
1107; We can move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes.
1108define <4 x i16> @or_constant_mask_undef_3(<4 x i16> %in) {
1109; CHECK-LABEL: @or_constant_mask_undef_3(
1110; CHECK-NEXT:  entry:
1111; CHECK-NEXT:    ret <4 x i16> <i16 undef, i16 -1, i16 -1, i16 undef>
1112;
1113entry:
1114  %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
1115  %or = or <4 x i16> %shuffle, <i16 0, i16 -1, i16 -1, i16 0>
1116  ret <4 x i16> %or
1117}
1118
1119; We can move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes.
1120define <4 x i16> @or_constant_mask_undef_4(<4 x i16> %in) {
1121; CHECK-LABEL: @or_constant_mask_undef_4(
1122; CHECK-NEXT:  entry:
1123; CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i16> [[IN:%.*]], <i16 undef, i16 99, i16 undef, i16 undef>
1124; CHECK-NEXT:    [[OR:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
1125; CHECK-NEXT:    ret <4 x i16> [[OR]]
1126;
1127entry:
1128  %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
1129  %or = or <4 x i16> %shuffle, <i16 0, i16 99, i16 99, i16 0>
1130  ret <4 x i16> %or
1131}
1132
1133define <4 x i16> @or_constant_mask_not_undef(<4 x i16> %in) {
1134; CHECK-LABEL: @or_constant_mask_not_undef(
1135; CHECK-NEXT:  entry:
1136; CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i16> [[IN:%.*]], <i16 undef, i16 -1, i16 0, i16 0>
1137; CHECK-NEXT:    [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 1>
1138; CHECK-NEXT:    ret <4 x i16> [[AND]]
1139;
1140entry:
1141  %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 1>
1142  %and = or <4 x i16> %shuffle, <i16 0, i16 0, i16 -1, i16 -1>
1143  ret <4 x i16> %and
1144}
1145
1146define <4 x i16> @shl_constant_mask_undef(<4 x i16> %in) {
1147; CHECK-LABEL: @shl_constant_mask_undef(
1148; CHECK-NEXT:  entry:
1149; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 1>
1150; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i16> [[SHUFFLE]], <i16 10, i16 3, i16 0, i16 0>
1151; CHECK-NEXT:    ret <4 x i16> [[SHL]]
1152;
1153entry:
1154  %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 1>
1155  %shl = shl <4 x i16> %shuffle, <i16 10, i16 3, i16 0, i16 0>
1156  ret <4 x i16> %shl
1157}
1158
1159define <4 x i16> @add_constant_mask_undef(<4 x i16> %in) {
1160; CHECK-LABEL: @add_constant_mask_undef(
1161; CHECK-NEXT:  entry:
1162; CHECK-NEXT:    [[ADD:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1163; CHECK-NEXT:    ret <4 x i16> [[ADD]]
1164;
1165entry:
1166  %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1167  %add = add <4 x i16> %shuffle, <i16 10, i16 3, i16 0, i16 0>
1168  ret <4 x i16> %add
1169}
1170
1171define <4 x i16> @add_constant_mask_undef_2(<4 x i16> %in) {
1172; CHECK-LABEL: @add_constant_mask_undef_2(
1173; CHECK-NEXT:  entry:
1174; CHECK-NEXT:    [[TMP0:%.*]] = add <4 x i16> [[IN:%.*]], <i16 undef, i16 0, i16 3, i16 undef>
1175; CHECK-NEXT:    [[ADD:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 1>
1176; CHECK-NEXT:    ret <4 x i16> [[ADD]]
1177;
1178entry:
1179  %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 1>
1180  %add = add <4 x i16> %shuffle, <i16 10, i16 3, i16 0, i16 0>
1181  ret <4 x i16> %add
1182}
1183
1184define <4 x i16> @sub_constant_mask_undef(<4 x i16> %in) {
1185; CHECK-LABEL: @sub_constant_mask_undef(
1186; CHECK-NEXT:  entry:
1187; CHECK-NEXT:    [[SUB:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1188; CHECK-NEXT:    ret <4 x i16> [[SUB]]
1189;
1190entry:
1191  %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1192  %sub = sub <4 x i16> %shuffle, <i16 10, i16 3, i16 0, i16 0>
1193  ret <4 x i16> %sub
1194}
1195
1196define <4 x i16> @sub_constant_mask_undef_2(<4 x i16> %in) {
1197; CHECK-LABEL: @sub_constant_mask_undef_2(
1198; CHECK-NEXT:  entry:
1199; CHECK-NEXT:    [[TMP0:%.*]] = add <4 x i16> [[IN:%.*]], <i16 undef, i16 0, i16 -10, i16 undef>
1200; CHECK-NEXT:    [[SUB:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 undef>
1201; CHECK-NEXT:    ret <4 x i16> [[SUB]]
1202;
1203entry:
1204  %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 undef>
1205  %sub = sub <4 x i16> %shuffle, <i16 0, i16 0, i16 10, i16 99>
1206  ret <4 x i16> %sub
1207}
1208
1209define <2 x i32> @or_splat_constant(<2 x i32> %x) {
1210; CHECK-LABEL: @or_splat_constant(
1211; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 42, i32 undef>
1212; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
1213; CHECK-NEXT:    ret <2 x i32> [[R]]
1214;
1215  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
1216  %r = or <2 x i32> %splat, <i32 42, i32 42>
1217  ret <2 x i32> %r
1218}
1219
1220define <2 x i32> @xor_splat_constant(<2 x i32> %x) {
1221; CHECK-LABEL: @xor_splat_constant(
1222; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], <i32 42, i32 undef>
1223; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
1224; CHECK-NEXT:    ret <2 x i32> [[R]]
1225;
1226  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
1227  %r = xor <2 x i32> %splat, <i32 42, i32 42>
1228  ret <2 x i32> %r
1229}
1230
1231define <2 x float> @fadd_splat_constant(<2 x float> %x) {
1232; CHECK-LABEL: @fadd_splat_constant(
1233; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
1234; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
1235; CHECK-NEXT:    ret <2 x float> [[R]]
1236;
1237  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
1238  %r = fadd <2 x float> %splat, <float 42.0, float 42.0>
1239  ret <2 x float> %r
1240}
1241
1242define <2 x float> @fsub_splat_constant0(<2 x float> %x) {
1243; CHECK-LABEL: @fsub_splat_constant0(
1244; CHECK-NEXT:    [[TMP1:%.*]] = fsub <2 x float> <float 4.200000e+01, float undef>, [[X:%.*]]
1245; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
1246; CHECK-NEXT:    ret <2 x float> [[R]]
1247;
1248  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
1249  %r = fsub <2 x float> <float 42.0, float 42.0>, %splat
1250  ret <2 x float> %r
1251}
1252
1253define <2 x float> @fsub_splat_constant1(<2 x float> %x) {
1254; CHECK-LABEL: @fsub_splat_constant1(
1255; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float -4.200000e+01, float undef>
1256; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
1257; CHECK-NEXT:    ret <2 x float> [[R]]
1258;
1259  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
1260  %r = fsub <2 x float> %splat, <float 42.0, float 42.0>
1261  ret <2 x float> %r
1262}
1263
1264define <2 x float> @fneg(<2 x float> %x) {
1265; CHECK-LABEL: @fneg(
1266; CHECK-NEXT:    [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
1267; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
1268; CHECK-NEXT:    ret <2 x float> [[R]]
1269;
1270  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
1271  %r = fsub <2 x float> <float -0.0, float -0.0>, %splat
1272  ret <2 x float> %r
1273}
1274
1275define <2 x float> @fmul_splat_constant(<2 x float> %x) {
1276; CHECK-LABEL: @fmul_splat_constant(
1277; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
1278; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
1279; CHECK-NEXT:    ret <2 x float> [[R]]
1280;
1281  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
1282  %r = fmul <2 x float> %splat, <float 42.0, float 42.0>
1283  ret <2 x float> %r
1284}
1285
1286define <2 x float> @fdiv_splat_constant0(<2 x float> %x) {
1287; CHECK-LABEL: @fdiv_splat_constant0(
1288; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <2 x float> <float 4.200000e+01, float undef>, [[X:%.*]]
1289; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
1290; CHECK-NEXT:    ret <2 x float> [[R]]
1291;
1292  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
1293  %r = fdiv <2 x float> <float 42.0, float 42.0>, %splat
1294  ret <2 x float> %r
1295}
1296
1297define <2 x float> @fdiv_splat_constant1(<2 x float> %x) {
1298; CHECK-LABEL: @fdiv_splat_constant1(
1299; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
1300; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
1301; CHECK-NEXT:    ret <2 x float> [[R]]
1302;
1303  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
1304  %r = fdiv <2 x float> %splat, <float 42.0, float 42.0>
1305  ret <2 x float> %r
1306}
1307
1308define <2 x float> @frem_splat_constant0(<2 x float> %x) {
1309; CHECK-LABEL: @frem_splat_constant0(
1310; CHECK-NEXT:    [[TMP1:%.*]] = frem <2 x float> <float 4.200000e+01, float undef>, [[X:%.*]]
1311; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
1312; CHECK-NEXT:    ret <2 x float> [[R]]
1313;
1314  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
1315  %r = frem <2 x float> <float 42.0, float 42.0>, %splat
1316  ret <2 x float> %r
1317}
1318
1319define <2 x float> @frem_splat_constant1(<2 x float> %x) {
1320; CHECK-LABEL: @frem_splat_constant1(
1321; CHECK-NEXT:    [[TMP1:%.*]] = frem <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
1322; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
1323; CHECK-NEXT:    ret <2 x float> [[R]]
1324;
1325  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
1326  %r = frem <2 x float> %splat, <float 42.0, float 42.0>
1327  ret <2 x float> %r
1328}
1329
1330; Equivalent shuffle masks, but only one is a narrowing op.
1331
1332define <2 x i1> @PR40734(<1 x i1> %x, <4 x i1> %y) {
1333; CHECK-LABEL: @PR40734(
1334; CHECK-NEXT:    [[WIDEN:%.*]] = shufflevector <1 x i1> zeroinitializer, <1 x i1> [[X:%.*]], <2 x i32> <i32 0, i32 1>
1335; CHECK-NEXT:    [[NARROW:%.*]] = shufflevector <4 x i1> [[Y:%.*]], <4 x i1> undef, <2 x i32> <i32 0, i32 1>
1336; CHECK-NEXT:    [[R:%.*]] = and <2 x i1> [[WIDEN]], [[NARROW]]
1337; CHECK-NEXT:    ret <2 x i1> [[R]]
1338;
1339  %widen = shufflevector <1 x i1> zeroinitializer, <1 x i1> %x, <2 x i32> <i32 0, i32 1>
1340  %narrow = shufflevector <4 x i1> %y, <4 x i1> undef, <2 x i32> <i32 0, i32 1>
1341  %r = and <2 x i1> %widen, %narrow
1342  ret <2 x i1> %r
1343}
1344
1345; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically.
1346
1347define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) {
1348; CHECK-LABEL: @insert_subvector_shuffles(
1349; CHECK-NEXT:    [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1350; CHECK-NEXT:    [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> <i32 undef, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
1351; CHECK-NEXT:    [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <7 x i32> <i32 0, i32 8, i32 1, i32 undef, i32 8, i32 1, i32 9>
1352; CHECK-NEXT:    ret <7 x i8> [[S3]]
1353;
1354  %s1 = shufflevector <3 x i8> %x, <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1355  %s2 = shufflevector <3 x i8> %y, <3 x i8> undef, <7 x i32> <i32 undef, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
1356  %s3 = shufflevector <7 x i8> %s1, <7 x i8> %s2, <7 x i32> <i32 0, i32 8, i32 1, i32 undef, i32 8, i32 1, i32 9>
1357  ret <7 x i8> %s3
1358}
1359
1360define <8 x i8> @insert_subvector_shuffles_pow2elts(<2 x i8> %x, <2 x i8> %y) {
1361; CHECK-LABEL: @insert_subvector_shuffles_pow2elts(
1362; CHECK-NEXT:    [[S3:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <8 x i32> <i32 0, i32 2, i32 1, i32 undef, i32 2, i32 1, i32 3, i32 0>
1363; CHECK-NEXT:    ret <8 x i8> [[S3]]
1364;
1365  %s1 = shufflevector <2 x i8> %x, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1366  %s2 = shufflevector <2 x i8> %y, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1367  %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s2, <8 x i32> <i32 0, i32 8, i32 1, i32 undef, i32 8, i32 1, i32 9, i32 0>
1368  ret <8 x i8> %s3
1369}
1370
1371; The last shuffle may change the vector type.
1372; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically.
1373
1374define <2 x i8> @insert_subvector_shuffles_narrowing(<3 x i8> %x, <3 x i8> %y) {
1375; CHECK-LABEL: @insert_subvector_shuffles_narrowing(
1376; CHECK-NEXT:    [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1377; CHECK-NEXT:    [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1378; CHECK-NEXT:    [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <2 x i32> <i32 0, i32 8>
1379; CHECK-NEXT:    ret <2 x i8> [[S3]]
1380;
1381  %s1 = shufflevector <3 x i8> %x, <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1382  %s2 = shufflevector <3 x i8> %y, <3 x i8> undef, <7 x i32> <i32 undef, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
1383  %s3 = shufflevector <7 x i8> %s1, <7 x i8> %s2, <2 x i32> <i32 0, i32 8>
1384  ret <2 x i8> %s3
1385}
1386
1387define <2 x i8> @insert_subvector_shuffles_narrowing_pow2elts(<4 x i8> %x, <4 x i8> %y) {
1388; CHECK-LABEL: @insert_subvector_shuffles_narrowing_pow2elts(
1389; CHECK-NEXT:    [[S3:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 4>
1390; CHECK-NEXT:    ret <2 x i8> [[S3]]
1391;
1392  %s1 = shufflevector <4 x i8> %x, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1393  %s2 = shufflevector <4 x i8> %y, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1394  %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s2, <2 x i32> <i32 0, i32 8>
1395  ret <2 x i8> %s3
1396}
1397
1398; Similar to above, but this reduces to a widen with undefs of 'x'.
1399
1400define <4 x double> @insert_subvector_shuffles_identity(<2 x double> %x) {
1401; CHECK-LABEL: @insert_subvector_shuffles_identity(
1402; CHECK-NEXT:    [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1403; CHECK-NEXT:    ret <4 x double> [[S3]]
1404;
1405  %s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
1406  %s2 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1407  %s3 = shufflevector <4 x double> %s2, <4 x double> %s1, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
1408  ret <4 x double> %s3
1409}
1410
1411; Negative test - not identity with padding (although this could be folded with better analysis).
1412
1413define <4 x double> @not_insert_subvector_shuffle(<2 x double> %x) {
1414; CHECK-LABEL: @not_insert_subvector_shuffle(
1415; CHECK-NEXT:    [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 1>
1416; CHECK-NEXT:    [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1417; CHECK-NEXT:    [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> <i32 0, i32 5, i32 7, i32 undef>
1418; CHECK-NEXT:    ret <4 x double> [[S3]]
1419;
1420  %s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 1>
1421  %s2 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1422  %s3 = shufflevector <4 x double> %s2, <4 x double> %s1, <4 x i32> <i32 0, i32 5, i32 7, i32 undef>
1423  ret <4 x double> %s3
1424}
1425
1426; Negative test - operands are not the same size (although this could be partly folded with better analysis).
1427
1428define <4 x double> @not_insert_subvector_shuffles_with_same_size(<2 x double> %x, <3 x double> %y) {
1429; CHECK-LABEL: @not_insert_subvector_shuffles_with_same_size(
1430; CHECK-NEXT:    [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
1431; CHECK-NEXT:    [[S2:%.*]] = shufflevector <3 x double> [[Y:%.*]], <3 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1432; CHECK-NEXT:    [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
1433; CHECK-NEXT:    ret <4 x double> [[S3]]
1434;
1435  %s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
1436  %s2 = shufflevector <3 x double> %y, <3 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1437  %s3 = shufflevector <4 x double> %s2, <4 x double> %s1, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
1438  ret <4 x double> %s3
1439}
1440
1441; Demanded vector elements may not be able to simplify a shuffle mask
1442; before we try to narrow it. This used to crash.
1443
1444define <4 x float> @insert_subvector_crash_invalid_mask_elt(<2 x float> %x, <4 x float>* %p) {
1445; CHECK-LABEL: @insert_subvector_crash_invalid_mask_elt(
1446; CHECK-NEXT:    [[WIDEN:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1447; CHECK-NEXT:    [[I:%.*]] = shufflevector <2 x float> [[X]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1448; CHECK-NEXT:    store <4 x float> [[I]], <4 x float>* [[P:%.*]], align 16
1449; CHECK-NEXT:    ret <4 x float> [[WIDEN]]
1450;
1451  %widen = shufflevector <2 x float> %x, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1452  %ext2 = extractelement <2 x float> %x, i32 0
1453  %I = insertelement <4 x float> %widen, float %ext2, i16 0
1454  store <4 x float> %I, <4 x float>* %p
1455  ret <4 x float> %widen
1456}
1457
1458define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) {
1459; CHECK-LABEL: @splat_assoc_add(
1460; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 317426, i32 undef, i32 undef, i32 undef>
1461; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
1462; CHECK-NEXT:    [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
1463; CHECK-NEXT:    ret <4 x i32> [[R]]
1464;
1465  %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
1466  %a = add <4 x i32> %y, <i32 317426, i32 317426, i32 317426, i32 317426>
1467  %r = add <4 x i32> %splatx, %a
1468  ret <4 x i32> %r
1469}
1470
1471; Undefs in splat mask are replaced with defined splat index
1472
1473define <4 x i32> @splat_assoc_add_undef_mask_elts(<4 x i32> %x, <4 x i32> %y) {
1474; CHECK-LABEL: @splat_assoc_add_undef_mask_elts(
1475; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 undef, i32 undef, i32 undef>
1476; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
1477; CHECK-NEXT:    [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
1478; CHECK-NEXT:    ret <4 x i32> [[R]]
1479;
1480  %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
1481  %a = add <4 x i32> %y, <i32 42, i32 42, i32 42, i32 42>
1482  %r = add <4 x i32> %splatx, %a
1483  ret <4 x i32> %r
1484}
1485
1486; Undefs in splat mask are replaced with defined splat index
1487
1488define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index(<4 x i32> %x, <4 x i32> %y) {
1489; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index(
1490; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 undef, i32 undef, i32 undef>
1491; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
1492; CHECK-NEXT:    [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
1493; CHECK-NEXT:    ret <4 x i32> [[R]]
1494;
1495  %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
1496  %a = add <4 x i32> %y, <i32 42, i32 42, i32 42, i32 42>
1497  %r = add <4 x i32> %splatx, %a
1498  ret <4 x i32> %r
1499}
1500
1501define <4 x i32> @splat_assoc_add_undef_constant_elts(<4 x i32> %x, <4 x i32> %y) {
1502; CHECK-LABEL: @splat_assoc_add_undef_constant_elts(
1503; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
1504; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 42, i32 undef, i32 undef, i32 42>
1505; CHECK-NEXT:    [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]]
1506; CHECK-NEXT:    ret <4 x i32> [[R]]
1507;
1508  %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
1509  %a = add <4 x i32> %y, <i32 42, i32 undef, i32 undef, i32 42>
1510  %r = add <4 x i32> %splatx, %a
1511  ret <4 x i32> %r
1512}
1513
1514define <4 x i32> @splat_assoc_add_undef_constant_elt_at_splat_index(<4 x i32> %x, <4 x i32> %y) {
1515; CHECK-LABEL: @splat_assoc_add_undef_constant_elt_at_splat_index(
1516; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
1517; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 undef, i32 42, i32 undef, i32 42>
1518; CHECK-NEXT:    [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]]
1519; CHECK-NEXT:    ret <4 x i32> [[R]]
1520;
1521  %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
1522  %a = add <4 x i32> %y, <i32 undef, i32 42, i32 undef, i32 42>
1523  %r = add <4 x i32> %splatx, %a
1524  ret <4 x i32> %r
1525}
1526
1527define <4 x i32> @splat_assoc_add_undef_mask_elts_undef_constant_elts(<4 x i32> %x, <4 x i32> %y) {
1528; CHECK-LABEL: @splat_assoc_add_undef_mask_elts_undef_constant_elts(
1529; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 undef>
1530; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 42, i32 undef, i32 undef, i32 42>
1531; CHECK-NEXT:    [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]]
1532; CHECK-NEXT:    ret <4 x i32> [[R]]
1533;
1534  %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 undef>
1535  %a = add <4 x i32> %y, <i32 42, i32 undef, i32 undef, i32 42>
1536  %r = add <4 x i32> %splatx, %a
1537  ret <4 x i32> %r
1538}
1539
1540define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elts(<4 x i32> %x, <4 x i32> %y) {
1541; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elts(
1542; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
1543; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 42, i32 undef, i32 undef, i32 42>
1544; CHECK-NEXT:    [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]]
1545; CHECK-NEXT:    ret <4 x i32> [[R]]
1546;
1547  %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
1548  %a = add <4 x i32> %y, <i32 42, i32 undef, i32 undef, i32 42>
1549  %r = add <4 x i32> %splatx, %a
1550  ret <4 x i32> %r
1551}
1552
1553define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elt_at_splat_index(<4 x i32> %x, <4 x i32> %y) {
1554; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elt_at_splat_index(
1555; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
1556; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 undef, i32 42, i32 undef, i32 42>
1557; CHECK-NEXT:    [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]]
1558; CHECK-NEXT:    ret <4 x i32> [[R]]
1559;
1560  %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
1561  %a = add <4 x i32> %y, <i32 undef, i32 42, i32 undef, i32 42>
1562  %r = add <4 x i32> %splatx, %a
1563  ret <4 x i32> %r
1564}
1565
1566; Non-zero splat index; commute operands; FMF intersect
1567
1568define <2 x float> @splat_assoc_fmul(<2 x float> %x, <2 x float> %y) {
1569; CHECK-LABEL: @splat_assoc_fmul(
1570; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], <float undef, float 3.000000e+00>
1571; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 1>
1572; CHECK-NEXT:    [[R:%.*]] = fmul reassoc nsz <2 x float> [[TMP2]], [[Y:%.*]]
1573; CHECK-NEXT:    ret <2 x float> [[R]]
1574;
1575  %splatx = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1576  %a = fmul reassoc nsz <2 x float> %y, <float 3.0, float 3.0>
1577  %r = fmul reassoc nsz nnan <2 x float> %a, %splatx
1578  ret <2 x float> %r
1579}
1580
1581; Two splat shuffles; drop poison-generating flags
1582
1583define <3 x i8> @splat_assoc_mul(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
1584; CHECK-LABEL: @splat_assoc_mul(
1585; CHECK-NEXT:    [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
1586; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1587; CHECK-NEXT:    [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
1588; CHECK-NEXT:    ret <3 x i8> [[R]]
1589;
1590  %splatx = shufflevector <3 x i8> %x, <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1591  %splatz = shufflevector <3 x i8> %z, <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1592  %a = mul nsw <3 x i8> %y, %splatz
1593  %r = mul <3 x i8> %a, %splatx
1594  ret <3 x i8> %r
1595}
1596
1597define <3 x i8> @splat_assoc_mul_undef_elt1(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
1598; CHECK-LABEL: @splat_assoc_mul_undef_elt1(
1599; CHECK-NEXT:    [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
1600; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1601; CHECK-NEXT:    [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
1602; CHECK-NEXT:    ret <3 x i8> [[R]]
1603;
1604  %splatx = shufflevector <3 x i8> %x, <3 x i8> undef, <3 x i32> <i32 undef, i32 2, i32 2>
1605  %splatz = shufflevector <3 x i8> %z, <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1606  %a = mul nsw <3 x i8> %y, %splatz
1607  %r = mul nsw nuw <3 x i8> %a, %splatx
1608  ret <3 x i8> %r
1609}
1610
1611define <3 x i8> @splat_assoc_mul_undef_elt2(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
1612; CHECK-LABEL: @splat_assoc_mul_undef_elt2(
1613; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1614; CHECK-NEXT:    [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> undef, <3 x i32> <i32 undef, i32 2, i32 2>
1615; CHECK-NEXT:    [[A:%.*]] = mul nsw <3 x i8> [[SPLATZ]], [[Y:%.*]]
1616; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw <3 x i8> [[A]], [[SPLATX]]
1617; CHECK-NEXT:    ret <3 x i8> [[R]]
1618;
1619  %splatx = shufflevector <3 x i8> %x, <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1620  %splatz = shufflevector <3 x i8> %z, <3 x i8> undef, <3 x i32> <i32 undef, i32 2, i32 2>
1621  %a = mul nsw <3 x i8> %y, %splatz
1622  %r = mul nsw nuw <3 x i8> %a, %splatx
1623  ret <3 x i8> %r
1624}
1625
1626define <3 x i8> @splat_assoc_mul_undef_elt_at_splat_index1(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
1627; CHECK-LABEL: @splat_assoc_mul_undef_elt_at_splat_index1(
1628; CHECK-NEXT:    [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
1629; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1630; CHECK-NEXT:    [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
1631; CHECK-NEXT:    ret <3 x i8> [[R]]
1632;
1633  %splatx = shufflevector <3 x i8> %x, <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 undef>
1634  %splatz = shufflevector <3 x i8> %z, <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1635  %a = mul nsw <3 x i8> %y, %splatz
1636  %r = mul nsw nuw <3 x i8> %a, %splatx
1637  ret <3 x i8> %r
1638}
1639
1640define <3 x i8> @splat_assoc_mul_undef_elt_at_splat_index2(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
1641; CHECK-LABEL: @splat_assoc_mul_undef_elt_at_splat_index2(
1642; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1643; CHECK-NEXT:    [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 undef>
1644; CHECK-NEXT:    [[A:%.*]] = mul nsw <3 x i8> [[SPLATZ]], [[Y:%.*]]
1645; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw <3 x i8> [[A]], [[SPLATX]]
1646; CHECK-NEXT:    ret <3 x i8> [[R]]
1647;
1648  %splatx = shufflevector <3 x i8> %x, <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1649  %splatz = shufflevector <3 x i8> %z, <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 undef>
1650  %a = mul nsw <3 x i8> %y, %splatz
1651  %r = mul nsw nuw <3 x i8> %a, %splatx
1652  ret <3 x i8> %r
1653}
1654
1655; Negative test - mismatched splat elements
1656
1657define <3 x i8> @splat_assoc_or(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
1658; CHECK-LABEL: @splat_assoc_or(
1659; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <3 x i32> <i32 1, i32 1, i32 1>
1660; CHECK-NEXT:    [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1661; CHECK-NEXT:    [[A:%.*]] = or <3 x i8> [[SPLATZ]], [[Y:%.*]]
1662; CHECK-NEXT:    [[R:%.*]] = or <3 x i8> [[A]], [[SPLATX]]
1663; CHECK-NEXT:    ret <3 x i8> [[R]]
1664;
1665  %splatx = shufflevector <3 x i8> %x, <3 x i8> undef, <3 x i32> <i32 1, i32 1, i32 1>
1666  %splatz = shufflevector <3 x i8> %z, <3 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
1667  %a = or <3 x i8> %y, %splatz
1668  %r = or <3 x i8> %a, %splatx
1669  ret <3 x i8> %r
1670}
1671
1672; Negative test - not associative
1673
1674define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) {
1675; CHECK-LABEL: @splat_assoc_fdiv(
1676; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> undef, <2 x i32> zeroinitializer
1677; CHECK-NEXT:    [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], <float 3.000000e+00, float 3.000000e+00>
1678; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc nsz <2 x float> [[A]], [[SPLATX]]
1679; CHECK-NEXT:    ret <2 x float> [[R]]
1680;
1681  %splatx = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
1682  %a = fdiv reassoc nsz <2 x float> %y, <float 3.0, float 3.0>
1683  %r = fdiv reassoc nsz <2 x float> %a, %splatx
1684  ret <2 x float> %r
1685}
1686
1687; Negative test - extra use
1688
1689define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) {
1690; CHECK-LABEL: @splat_assoc_fadd(
1691; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 1>
1692; CHECK-NEXT:    [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], <float 3.000000e+00, float 3.000000e+00>
1693; CHECK-NEXT:    call void @use(<2 x float> [[A]])
1694; CHECK-NEXT:    [[R:%.*]] = fadd fast <2 x float> [[A]], [[SPLATX]]
1695; CHECK-NEXT:    ret <2 x float> [[R]]
1696;
1697  %splatx = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1698  %a = fadd fast <2 x float> %y, <float 3.0, float 3.0>
1699  call void @use(<2 x float> %a)
1700  %r = fadd fast <2 x float> %a, %splatx
1701  ret <2 x float> %r
1702}
1703
1704; Negative test - narrowing splat
1705
1706define <3 x i32> @splat_assoc_and(<4 x i32> %x, <3 x i32> %y) {
1707; CHECK-LABEL: @splat_assoc_and(
1708; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <3 x i32> zeroinitializer
1709; CHECK-NEXT:    [[A:%.*]] = and <3 x i32> [[Y:%.*]], <i32 42, i32 42, i32 42>
1710; CHECK-NEXT:    [[R:%.*]] = and <3 x i32> [[SPLATX]], [[A]]
1711; CHECK-NEXT:    ret <3 x i32> [[R]]
1712;
1713  %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <3 x i32> zeroinitializer
1714  %a = and <3 x i32> %y, <i32 42, i32 42, i32 42>
1715  %r = and <3 x i32> %splatx, %a
1716  ret <3 x i32> %r
1717}
1718
1719; Negative test - widening splat
1720
1721define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) {
1722; CHECK-LABEL: @splat_assoc_xor(
1723; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <5 x i32> zeroinitializer
1724; CHECK-NEXT:    [[A:%.*]] = xor <5 x i32> [[Y:%.*]], <i32 42, i32 42, i32 42, i32 42, i32 42>
1725; CHECK-NEXT:    [[R:%.*]] = xor <5 x i32> [[SPLATX]], [[A]]
1726; CHECK-NEXT:    ret <5 x i32> [[R]]
1727;
1728  %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <5 x i32> zeroinitializer
1729  %a = xor <5 x i32> %y, <i32 42, i32 42, i32 42, i32 42, i32 42>
1730  %r = xor <5 x i32> %splatx, %a
1731  ret <5 x i32> %r
1732}
1733
1734; Negative test - opcode mismatch
1735
1736define <4 x i32> @splat_assoc_add_mul(<4 x i32> %x, <4 x i32> %y) {
1737; CHECK-LABEL: @splat_assoc_add_mul(
1738; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
1739; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 42, i32 42, i32 42, i32 42>
1740; CHECK-NEXT:    [[R:%.*]] = mul <4 x i32> [[SPLATX]], [[A]]
1741; CHECK-NEXT:    ret <4 x i32> [[R]]
1742;
1743  %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
1744  %a = add <4 x i32> %y, <i32 42, i32 42, i32 42, i32 42>
1745  %r = mul <4 x i32> %splatx, %a
1746  ret <4 x i32> %r
1747}
1748
1749
1750; Do not crash on constant expressions.
1751
1752define <4 x i32> @PR46872(<4 x i32> %x) {
1753; CHECK-LABEL: @PR46872(
1754; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 1>
1755; CHECK-NEXT:    [[A:%.*]] = and <4 x i32> [[S]], bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @PR46872 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @PR46872 to i64)> to <4 x i32>)
1756; CHECK-NEXT:    ret <4 x i32> [[A]]
1757;
1758  %s = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 1>
1759  %a = and <4 x i32> %s, bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @PR46872 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @PR46872 to i64)> to <4 x i32>)
1760  ret <4 x i32> %a
1761}
1762
1763