• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4
5;
6; ASHR - Immediate
7;
8
9define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
10; CHECK-LABEL: @sse2_psrai_w_0(
11; CHECK-NEXT:    ret <8 x i16> %v
12;
13  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
14  ret <8 x i16> %1
15}
16
17define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
18; CHECK-LABEL: @sse2_psrai_w_15(
19; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
20; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
21;
22  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
23  ret <8 x i16> %1
24}
25
26define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
27; CHECK-LABEL: @sse2_psrai_w_64(
28; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
29; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
30;
31  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
32  ret <8 x i16> %1
33}
34
35define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
36; CHECK-LABEL: @sse2_psrai_d_0(
37; CHECK-NEXT:    ret <4 x i32> %v
38;
39  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
40  ret <4 x i32> %1
41}
42
43define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
44; CHECK-LABEL: @sse2_psrai_d_15(
45; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
46; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
47;
48  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
49  ret <4 x i32> %1
50}
51
52define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
53; CHECK-LABEL: @sse2_psrai_d_64(
54; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
55; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
56;
57  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
58  ret <4 x i32> %1
59}
60
61define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
62; CHECK-LABEL: @avx2_psrai_w_0(
63; CHECK-NEXT:    ret <16 x i16> %v
64;
65  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
66  ret <16 x i16> %1
67}
68
69define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
70; CHECK-LABEL: @avx2_psrai_w_15(
71; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
72; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
73;
74  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
75  ret <16 x i16> %1
76}
77
78define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
79; CHECK-LABEL: @avx2_psrai_w_64(
80; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
81; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
82;
83  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
84  ret <16 x i16> %1
85}
86
87define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
88; CHECK-LABEL: @avx2_psrai_d_0(
89; CHECK-NEXT:    ret <8 x i32> %v
90;
91  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
92  ret <8 x i32> %1
93}
94
95define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
96; CHECK-LABEL: @avx2_psrai_d_15(
97; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
98; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
99;
100  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
101  ret <8 x i32> %1
102}
103
104define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
105; CHECK-LABEL: @avx2_psrai_d_64(
106; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
107; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
108;
109  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
110  ret <8 x i32> %1
111}
112
113;
114; LSHR - Immediate
115;
116
117define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
118; CHECK-LABEL: @sse2_psrli_w_0(
119; CHECK-NEXT:    ret <8 x i16> %v
120;
121  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
122  ret <8 x i16> %1
123}
124
125define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
126; CHECK-LABEL: @sse2_psrli_w_15(
127; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
128; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
129;
130  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
131  ret <8 x i16> %1
132}
133
134define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
135; CHECK-LABEL: @sse2_psrli_w_64(
136; CHECK-NEXT:    ret <8 x i16> zeroinitializer
137;
138  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
139  ret <8 x i16> %1
140}
141
142define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
143; CHECK-LABEL: @sse2_psrli_d_0(
144; CHECK-NEXT:    ret <4 x i32> %v
145;
146  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
147  ret <4 x i32> %1
148}
149
150define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
151; CHECK-LABEL: @sse2_psrli_d_15(
152; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
153; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
154;
155  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
156  ret <4 x i32> %1
157}
158
159define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
160; CHECK-LABEL: @sse2_psrli_d_64(
161; CHECK-NEXT:    ret <4 x i32> zeroinitializer
162;
163  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
164  ret <4 x i32> %1
165}
166
167define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
168; CHECK-LABEL: @sse2_psrli_q_0(
169; CHECK-NEXT:    ret <2 x i64> %v
170;
171  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
172  ret <2 x i64> %1
173}
174
175define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
176; CHECK-LABEL: @sse2_psrli_q_15(
177; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
178; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
179;
180  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
181  ret <2 x i64> %1
182}
183
184define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
185; CHECK-LABEL: @sse2_psrli_q_64(
186; CHECK-NEXT:    ret <2 x i64> zeroinitializer
187;
188  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
189  ret <2 x i64> %1
190}
191
192define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
193; CHECK-LABEL: @avx2_psrli_w_0(
194; CHECK-NEXT:    ret <16 x i16> %v
195;
196  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
197  ret <16 x i16> %1
198}
199
200define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
201; CHECK-LABEL: @avx2_psrli_w_15(
202; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
203; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
204;
205  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
206  ret <16 x i16> %1
207}
208
209define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
210; CHECK-LABEL: @avx2_psrli_w_64(
211; CHECK-NEXT:    ret <16 x i16> zeroinitializer
212;
213  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
214  ret <16 x i16> %1
215}
216
217define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
218; CHECK-LABEL: @avx2_psrli_d_0(
219; CHECK-NEXT:    ret <8 x i32> %v
220;
221  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
222  ret <8 x i32> %1
223}
224
225define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
226; CHECK-LABEL: @avx2_psrli_d_15(
227; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
228; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
229;
230  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
231  ret <8 x i32> %1
232}
233
234define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
235; CHECK-LABEL: @avx2_psrli_d_64(
236; CHECK-NEXT:    ret <8 x i32> zeroinitializer
237;
238  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
239  ret <8 x i32> %1
240}
241
242define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
243; CHECK-LABEL: @avx2_psrli_q_0(
244; CHECK-NEXT:    ret <4 x i64> %v
245;
246  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
247  ret <4 x i64> %1
248}
249
250define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
251; CHECK-LABEL: @avx2_psrli_q_15(
252; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
253; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
254;
255  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
256  ret <4 x i64> %1
257}
258
259define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
260; CHECK-LABEL: @avx2_psrli_q_64(
261; CHECK-NEXT:    ret <4 x i64> zeroinitializer
262;
263  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
264  ret <4 x i64> %1
265}
266
267;
268; SHL - Immediate
269;
270
271define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
272; CHECK-LABEL: @sse2_pslli_w_0(
273; CHECK-NEXT:    ret <8 x i16> %v
274;
275  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
276  ret <8 x i16> %1
277}
278
279define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
280; CHECK-LABEL: @sse2_pslli_w_15(
281; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
282; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
283;
284  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
285  ret <8 x i16> %1
286}
287
288define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
289; CHECK-LABEL: @sse2_pslli_w_64(
290; CHECK-NEXT:    ret <8 x i16> zeroinitializer
291;
292  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
293  ret <8 x i16> %1
294}
295
296define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
297; CHECK-LABEL: @sse2_pslli_d_0(
298; CHECK-NEXT:    ret <4 x i32> %v
299;
300  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
301  ret <4 x i32> %1
302}
303
304define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
305; CHECK-LABEL: @sse2_pslli_d_15(
306; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
307; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
308;
309  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
310  ret <4 x i32> %1
311}
312
313define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
314; CHECK-LABEL: @sse2_pslli_d_64(
315; CHECK-NEXT:    ret <4 x i32> zeroinitializer
316;
317  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
318  ret <4 x i32> %1
319}
320
321define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
322; CHECK-LABEL: @sse2_pslli_q_0(
323; CHECK-NEXT:    ret <2 x i64> %v
324;
325  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
326  ret <2 x i64> %1
327}
328
329define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
330; CHECK-LABEL: @sse2_pslli_q_15(
331; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
332; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
333;
334  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
335  ret <2 x i64> %1
336}
337
338define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
339; CHECK-LABEL: @sse2_pslli_q_64(
340; CHECK-NEXT:    ret <2 x i64> zeroinitializer
341;
342  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
343  ret <2 x i64> %1
344}
345
346define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
347; CHECK-LABEL: @avx2_pslli_w_0(
348; CHECK-NEXT:    ret <16 x i16> %v
349;
350  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
351  ret <16 x i16> %1
352}
353
354define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
355; CHECK-LABEL: @avx2_pslli_w_15(
356; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
357; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
358;
359  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
360  ret <16 x i16> %1
361}
362
363define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
364; CHECK-LABEL: @avx2_pslli_w_64(
365; CHECK-NEXT:    ret <16 x i16> zeroinitializer
366;
367  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
368  ret <16 x i16> %1
369}
370
371define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
372; CHECK-LABEL: @avx2_pslli_d_0(
373; CHECK-NEXT:    ret <8 x i32> %v
374;
375  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
376  ret <8 x i32> %1
377}
378
379define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
380; CHECK-LABEL: @avx2_pslli_d_15(
381; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
382; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
383;
384  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
385  ret <8 x i32> %1
386}
387
388define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
389; CHECK-LABEL: @avx2_pslli_d_64(
390; CHECK-NEXT:    ret <8 x i32> zeroinitializer
391;
392  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
393  ret <8 x i32> %1
394}
395
396define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
397; CHECK-LABEL: @avx2_pslli_q_0(
398; CHECK-NEXT:    ret <4 x i64> %v
399;
400  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
401  ret <4 x i64> %1
402}
403
404define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
405; CHECK-LABEL: @avx2_pslli_q_15(
406; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
407; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
408;
409  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
410  ret <4 x i64> %1
411}
412
413define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
414; CHECK-LABEL: @avx2_pslli_q_64(
415; CHECK-NEXT:    ret <4 x i64> zeroinitializer
416;
417  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
418  ret <4 x i64> %1
419}
420
421;
422; ASHR - Constant Vector
423;
424
425define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
426; CHECK-LABEL: @sse2_psra_w_0(
427; CHECK-NEXT:    ret <8 x i16> %v
428;
429  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
430  ret <8 x i16> %1
431}
432
433define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
434; CHECK-LABEL: @sse2_psra_w_15(
435; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
436; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
437;
438  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
439  ret <8 x i16> %1
440}
441
442define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
443; CHECK-LABEL: @sse2_psra_w_15_splat(
444; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
445; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
446;
447  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
448  ret <8 x i16> %1
449}
450
451define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
452; CHECK-LABEL: @sse2_psra_w_64(
453; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
454; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
455;
456  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
457  ret <8 x i16> %1
458}
459
460define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
461; CHECK-LABEL: @sse2_psra_d_0(
462; CHECK-NEXT:    ret <4 x i32> %v
463;
464  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
465  ret <4 x i32> %1
466}
467
468define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
469; CHECK-LABEL: @sse2_psra_d_15(
470; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
471; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
472;
473  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
474  ret <4 x i32> %1
475}
476
477define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
478; CHECK-LABEL: @sse2_psra_d_15_splat(
479; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
480; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
481;
482  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
483  ret <4 x i32> %1
484}
485
486define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
487; CHECK-LABEL: @sse2_psra_d_64(
488; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
489; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
490;
491  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
492  ret <4 x i32> %1
493}
494
495define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
496; CHECK-LABEL: @avx2_psra_w_0(
497; CHECK-NEXT:    ret <16 x i16> %v
498;
499  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
500  ret <16 x i16> %1
501}
502
503define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
504; CHECK-LABEL: @avx2_psra_w_15(
505; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
506; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
507;
508  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
509  ret <16 x i16> %1
510}
511
512define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
513; CHECK-LABEL: @avx2_psra_w_15_splat(
514; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
515; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
516;
517  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
518  ret <16 x i16> %1
519}
520
521define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
522; CHECK-LABEL: @avx2_psra_w_64(
523; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
524; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
525;
526  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
527  ret <16 x i16> %1
528}
529
530define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
531; CHECK-LABEL: @avx2_psra_d_0(
532; CHECK-NEXT:    ret <8 x i32> %v
533;
534  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
535  ret <8 x i32> %1
536}
537
538define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
539; CHECK-LABEL: @avx2_psra_d_15(
540; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
541; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
542;
543  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
544  ret <8 x i32> %1
545}
546
547define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
548; CHECK-LABEL: @avx2_psra_d_15_splat(
549; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
550; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
551;
552  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
553  ret <8 x i32> %1
554}
555
556define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
557; CHECK-LABEL: @avx2_psra_d_64(
558; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
559; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
560;
561  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
562  ret <8 x i32> %1
563}
564
565;
566; LSHR - Constant Vector
567;
568
569define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
570; CHECK-LABEL: @sse2_psrl_w_0(
571; CHECK-NEXT:    ret <8 x i16> %v
572;
573  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
574  ret <8 x i16> %1
575}
576
577define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
578; CHECK-LABEL: @sse2_psrl_w_15(
579; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
580; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
581;
582  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
583  ret <8 x i16> %1
584}
585
586define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
587; CHECK-LABEL: @sse2_psrl_w_15_splat(
588; CHECK-NEXT:    ret <8 x i16> zeroinitializer
589;
590  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
591  ret <8 x i16> %1
592}
593
594define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
595; CHECK-LABEL: @sse2_psrl_w_64(
596; CHECK-NEXT:    ret <8 x i16> zeroinitializer
597;
598  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
599  ret <8 x i16> %1
600}
601
602define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
603; CHECK-LABEL: @sse2_psrl_d_0(
604; CHECK-NEXT:    ret <4 x i32> %v
605;
606  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
607  ret <4 x i32> %1
608}
609
610define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
611; CHECK-LABEL: @sse2_psrl_d_15(
612; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
613; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
614;
615  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
616  ret <4 x i32> %1
617}
618
619define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
620; CHECK-LABEL: @sse2_psrl_d_15_splat(
621; CHECK-NEXT:    ret <4 x i32> zeroinitializer
622;
623  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
624  ret <4 x i32> %1
625}
626
627define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
628; CHECK-LABEL: @sse2_psrl_d_64(
629; CHECK-NEXT:    ret <4 x i32> zeroinitializer
630;
631  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
632  ret <4 x i32> %1
633}
634
635define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
636; CHECK-LABEL: @sse2_psrl_q_0(
637; CHECK-NEXT:    ret <2 x i64> %v
638;
639  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
640  ret <2 x i64> %1
641}
642
643define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
644; CHECK-LABEL: @sse2_psrl_q_15(
645; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
646; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
647;
648  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
649  ret <2 x i64> %1
650}
651
652define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
653; CHECK-LABEL: @sse2_psrl_q_64(
654; CHECK-NEXT:    ret <2 x i64> zeroinitializer
655;
656  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
657  ret <2 x i64> %1
658}
659
660define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
661; CHECK-LABEL: @avx2_psrl_w_0(
662; CHECK-NEXT:    ret <16 x i16> %v
663;
664  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
665  ret <16 x i16> %1
666}
667
668define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
669; CHECK-LABEL: @avx2_psrl_w_15(
670; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
671; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
672;
673  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
674  ret <16 x i16> %1
675}
676
677define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
678; CHECK-LABEL: @avx2_psrl_w_15_splat(
679; CHECK-NEXT:    ret <16 x i16> zeroinitializer
680;
681  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
682  ret <16 x i16> %1
683}
684
685define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
686; CHECK-LABEL: @avx2_psrl_w_64(
687; CHECK-NEXT:    ret <16 x i16> zeroinitializer
688;
689  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
690  ret <16 x i16> %1
691}
692
693define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
694; CHECK-LABEL: @avx2_psrl_d_0(
695; CHECK-NEXT:    ret <8 x i32> %v
696;
697  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
698  ret <8 x i32> %1
699}
700
701define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
702; CHECK-LABEL: @avx2_psrl_d_15(
703; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
704; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
705;
706  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
707  ret <8 x i32> %1
708}
709
710define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
711; CHECK-LABEL: @avx2_psrl_d_15_splat(
712; CHECK-NEXT:    ret <8 x i32> zeroinitializer
713;
714  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
715  ret <8 x i32> %1
716}
717
718define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
719; CHECK-LABEL: @avx2_psrl_d_64(
720; CHECK-NEXT:    ret <8 x i32> zeroinitializer
721;
722  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
723  ret <8 x i32> %1
724}
725
726define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
727; CHECK-LABEL: @avx2_psrl_q_0(
728; CHECK-NEXT:    ret <4 x i64> %v
729;
730  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
731  ret <4 x i64> %1
732}
733
734define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
735; CHECK-LABEL: @avx2_psrl_q_15(
736; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
737; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
738;
739  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
740  ret <4 x i64> %1
741}
742
743define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
744; CHECK-LABEL: @avx2_psrl_q_64(
745; CHECK-NEXT:    ret <4 x i64> zeroinitializer
746;
747  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
748  ret <4 x i64> %1
749}
750
751;
752; SHL - Constant Vector
753;
754
755define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
756; CHECK-LABEL: @sse2_psll_w_0(
757; CHECK-NEXT:    ret <8 x i16> %v
758;
759  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
760  ret <8 x i16> %1
761}
762
763define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
764; CHECK-LABEL: @sse2_psll_w_15(
765; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
766; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
767;
768  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
769  ret <8 x i16> %1
770}
771
772define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
773; CHECK-LABEL: @sse2_psll_w_15_splat(
774; CHECK-NEXT:    ret <8 x i16> zeroinitializer
775;
776  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
777  ret <8 x i16> %1
778}
779
780define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
781; CHECK-LABEL: @sse2_psll_w_64(
782; CHECK-NEXT:    ret <8 x i16> zeroinitializer
783;
784  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
785  ret <8 x i16> %1
786}
787
788define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
789; CHECK-LABEL: @sse2_psll_d_0(
790; CHECK-NEXT:    ret <4 x i32> %v
791;
792  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
793  ret <4 x i32> %1
794}
795
796define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
797; CHECK-LABEL: @sse2_psll_d_15(
798; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
799; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
800;
801  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
802  ret <4 x i32> %1
803}
804
805define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
806; CHECK-LABEL: @sse2_psll_d_15_splat(
807; CHECK-NEXT:    ret <4 x i32> zeroinitializer
808;
809  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
810  ret <4 x i32> %1
811}
812
813define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
814; CHECK-LABEL: @sse2_psll_d_64(
815; CHECK-NEXT:    ret <4 x i32> zeroinitializer
816;
817  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
818  ret <4 x i32> %1
819}
820
821define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
822; CHECK-LABEL: @sse2_psll_q_0(
823; CHECK-NEXT:    ret <2 x i64> %v
824;
825  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
826  ret <2 x i64> %1
827}
828
829define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
830; CHECK-LABEL: @sse2_psll_q_15(
831; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
832; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
833;
834  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
835  ret <2 x i64> %1
836}
837
838define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
839; CHECK-LABEL: @sse2_psll_q_64(
840; CHECK-NEXT:    ret <2 x i64> zeroinitializer
841;
842  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
843  ret <2 x i64> %1
844}
845
846define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
847; CHECK-LABEL: @avx2_psll_w_0(
848; CHECK-NEXT:    ret <16 x i16> %v
849;
850  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
851  ret <16 x i16> %1
852}
853
854define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
855; CHECK-LABEL: @avx2_psll_w_15(
856; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
857; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
858;
859  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
860  ret <16 x i16> %1
861}
862
863define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
864; CHECK-LABEL: @avx2_psll_w_15_splat(
865; CHECK-NEXT:    ret <16 x i16> zeroinitializer
866;
867  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
868  ret <16 x i16> %1
869}
870
871define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
872; CHECK-LABEL: @avx2_psll_w_64(
873; CHECK-NEXT:    ret <16 x i16> zeroinitializer
874;
875  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
876  ret <16 x i16> %1
877}
878
879define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
880; CHECK-LABEL: @avx2_psll_d_0(
881; CHECK-NEXT:    ret <8 x i32> %v
882;
883  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
884  ret <8 x i32> %1
885}
886
887define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
888; CHECK-LABEL: @avx2_psll_d_15(
889; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
890; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
891;
892  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
893  ret <8 x i32> %1
894}
895
896define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
897; CHECK-LABEL: @avx2_psll_d_15_splat(
898; CHECK-NEXT:    ret <8 x i32> zeroinitializer
899;
900  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
901  ret <8 x i32> %1
902}
903
904define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
905; CHECK-LABEL: @avx2_psll_d_64(
906; CHECK-NEXT:    ret <8 x i32> zeroinitializer
907;
908  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
909  ret <8 x i32> %1
910}
911
912define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
913; CHECK-LABEL: @avx2_psll_q_0(
914; CHECK-NEXT:    ret <4 x i64> %v
915;
916  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
917  ret <4 x i64> %1
918}
919
920define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
921; CHECK-LABEL: @avx2_psll_q_15(
922; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
923; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
924;
925  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
926  ret <4 x i64> %1
927}
928
929define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
930; CHECK-LABEL: @avx2_psll_q_64(
931; CHECK-NEXT:    ret <4 x i64> zeroinitializer
932;
933  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
934  ret <4 x i64> %1
935}
936
937;
938; ASHR - Constant Per-Element Vector
939;
940
941define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) {
942; CHECK-LABEL: @avx2_psrav_d_128_0(
943; CHECK-NEXT:    ret <4 x i32> %v
944;
945  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer)
946  ret <4 x i32> %1
947}
948
949define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) {
950; CHECK-LABEL: @avx2_psrav_d_256_0(
951; CHECK-NEXT:    ret <8 x i32> %v
952;
953  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
954  ret <8 x i32> %1
955}
956
957define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) {
958; CHECK-LABEL: @avx2_psrav_d_128_var(
959; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
960; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
961;
962  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
963  ret <4 x i32> %1
964}
965
966define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) {
967; CHECK-LABEL: @avx2_psrav_d_256_var(
968; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
969; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
970;
971  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
972  ret <8 x i32> %1
973}
974
975define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) {
976; CHECK-LABEL: @avx2_psrav_d_128_allbig(
977; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 undef>
978; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
979;
980  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
981  ret <4 x i32> %1
982}
983
984define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) {
985; CHECK-LABEL: @avx2_psrav_d_256_allbig(
986; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
987; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
988;
989  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
990  ret <8 x i32> %1
991}
992
993define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) {
994; CHECK-LABEL: @avx2_psrav_d_128_undef(
995; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
996; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
997;
998  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0
999  %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
1000  ret <4 x i32> %2
1001}
1002
1003define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) {
1004; CHECK-LABEL: @avx2_psrav_d_256_undef(
1005; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1006; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1007;
1008  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
1009  %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
1010  ret <8 x i32> %2
1011}
1012
1013;
1014; LSHR - Constant Per-Element Vector
1015;
1016
1017define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) {
1018; CHECK-LABEL: @avx2_psrlv_d_128_0(
1019; CHECK-NEXT:    ret <4 x i32> %v
1020;
1021  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
1022  ret <4 x i32> %1
1023}
1024
1025define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) {
1026; CHECK-LABEL: @avx2_psrlv_d_256_0(
1027; CHECK-NEXT:    ret <8 x i32> %v
1028;
1029  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
1030  ret <8 x i32> %1
1031}
1032
1033define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) {
1034; CHECK-LABEL: @avx2_psrlv_d_128_var(
1035; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
1036; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1037;
1038  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
1039  ret <4 x i32> %1
1040}
1041
1042define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) {
1043; CHECK-LABEL: @avx2_psrlv_d_256_var(
1044; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1045; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1046;
1047  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
1048  ret <8 x i32> %1
1049}
1050
1051define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) {
1052; CHECK-LABEL: @avx2_psrlv_d_128_big(
1053; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
1054; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1055;
1056  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
1057  ret <4 x i32> %1
1058}
1059
1060define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) {
1061; CHECK-LABEL: @avx2_psrlv_d_256_big(
1062; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
1063; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1064;
1065  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
1066  ret <8 x i32> %1
1067}
1068
1069define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) {
1070; CHECK-LABEL: @avx2_psrlv_d_128_allbig(
1071; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
1072;
1073  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
1074  ret <4 x i32> %1
1075}
1076
1077define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) {
1078; CHECK-LABEL: @avx2_psrlv_d_256_allbig(
1079; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1080;
1081  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
1082  ret <8 x i32> %1
1083}
1084
1085define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) {
1086; CHECK-LABEL: @avx2_psrlv_d_128_undef(
1087; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
1088; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1089;
1090  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
1091  %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1)
1092  ret <4 x i32> %2
1093}
1094
1095define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) {
1096; CHECK-LABEL: @avx2_psrlv_d_256_undef(
1097; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
1098; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1099;
1100  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
1101  %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
1102  ret <8 x i32> %2
1103}
1104
1105define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) {
1106; CHECK-LABEL: @avx2_psrlv_q_128_0(
1107; CHECK-NEXT:    ret <2 x i64> %v
1108;
1109  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
1110  ret <2 x i64> %1
1111}
1112
1113define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) {
1114; CHECK-LABEL: @avx2_psrlv_q_256_0(
1115; CHECK-NEXT:    ret <4 x i64> %v
1116;
1117  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
1118  ret <4 x i64> %1
1119}
1120
1121define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) {
1122; CHECK-LABEL: @avx2_psrlv_q_128_var(
1123; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 8>
1124; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1125;
1126  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
1127  ret <2 x i64> %1
1128}
1129
1130define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) {
1131; CHECK-LABEL: @avx2_psrlv_q_256_var(
1132; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
1133; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1134;
1135  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
1136  ret <4 x i64> %1
1137}
1138
1139define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) {
1140; CHECK-LABEL: @avx2_psrlv_q_128_big(
1141; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
1142; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1143;
1144  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
1145  ret <2 x i64> %1
1146}
1147
1148define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) {
1149; CHECK-LABEL: @avx2_psrlv_q_256_big(
1150; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
1151; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1152;
1153  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
1154  ret <4 x i64> %1
1155}
1156
1157define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) {
1158; CHECK-LABEL: @avx2_psrlv_q_128_allbig(
1159; CHECK-NEXT:    ret <2 x i64> zeroinitializer
1160;
1161  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
1162  ret <2 x i64> %1
1163}
1164
1165define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) {
1166; CHECK-LABEL: @avx2_psrlv_q_256_allbig(
1167; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
1168;
1169  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
1170  ret <4 x i64> %1
1171}
1172
1173define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) {
1174; CHECK-LABEL: @avx2_psrlv_q_128_undef(
1175; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 undef>
1176; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1177;
1178  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
1179  %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
1180  ret <2 x i64> %2
1181}
1182
1183define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) {
1184; CHECK-LABEL: @avx2_psrlv_q_256_undef(
1185; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
1186; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1187;
1188  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
1189  %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1)
1190  ret <4 x i64> %2
1191}
1192
1193;
1194; SHL - Constant Per-Element Vector
1195;
1196
1197define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) {
1198; CHECK-LABEL: @avx2_psllv_d_128_0(
1199; CHECK-NEXT:    ret <4 x i32> %v
1200;
1201  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
1202  ret <4 x i32> %1
1203}
1204
1205define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) {
1206; CHECK-LABEL: @avx2_psllv_d_256_0(
1207; CHECK-NEXT:    ret <8 x i32> %v
1208;
1209  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
1210  ret <8 x i32> %1
1211}
1212
1213define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) {
1214; CHECK-LABEL: @avx2_psllv_d_128_var(
1215; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
1216; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1217;
1218  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
1219  ret <4 x i32> %1
1220}
1221
1222define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) {
1223; CHECK-LABEL: @avx2_psllv_d_256_var(
1224; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1225; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1226;
1227  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
1228  ret <8 x i32> %1
1229}
1230
1231define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) {
1232; CHECK-LABEL: @avx2_psllv_d_128_big(
1233; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
1234; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1235;
1236  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
1237  ret <4 x i32> %1
1238}
1239
1240define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) {
1241; CHECK-LABEL: @avx2_psllv_d_256_big(
1242; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
1243; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1244;
1245  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
1246  ret <8 x i32> %1
1247}
1248
1249define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) {
1250; CHECK-LABEL: @avx2_psllv_d_128_allbig(
1251; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
1252;
1253  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
1254  ret <4 x i32> %1
1255}
1256
1257define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) {
1258; CHECK-LABEL: @avx2_psllv_d_256_allbig(
1259; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1260;
1261  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
1262  ret <8 x i32> %1
1263}
1264
1265define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) {
1266; CHECK-LABEL: @avx2_psllv_d_128_undef(
1267; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
1268; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1269;
1270  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
1271  %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
1272  ret <4 x i32> %2
1273}
1274
1275define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) {
1276; CHECK-LABEL: @avx2_psllv_d_256_undef(
1277; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
1278; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1279;
1280  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
1281  %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1)
1282  ret <8 x i32> %2
1283}
1284
1285define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) {
1286; CHECK-LABEL: @avx2_psllv_q_128_0(
1287; CHECK-NEXT:    ret <2 x i64> %v
1288;
1289  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
1290  ret <2 x i64> %1
1291}
1292
1293define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) {
1294; CHECK-LABEL: @avx2_psllv_q_256_0(
1295; CHECK-NEXT:    ret <4 x i64> %v
1296;
1297  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
1298  ret <4 x i64> %1
1299}
1300
1301define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) {
1302; CHECK-LABEL: @avx2_psllv_q_128_var(
1303; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 8>
1304; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1305;
1306  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
1307  ret <2 x i64> %1
1308}
1309
1310define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) {
1311; CHECK-LABEL: @avx2_psllv_q_256_var(
1312; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
1313; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1314;
1315  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
1316  ret <4 x i64> %1
1317}
1318
1319define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) {
1320; CHECK-LABEL: @avx2_psllv_q_128_big(
1321; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
1322; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1323;
1324  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
1325  ret <2 x i64> %1
1326}
1327
1328define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) {
1329; CHECK-LABEL: @avx2_psllv_q_256_big(
1330; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
1331; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1332;
1333  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
1334  ret <4 x i64> %1
1335}
1336
1337define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) {
1338; CHECK-LABEL: @avx2_psllv_q_128_allbig(
1339; CHECK-NEXT:    ret <2 x i64> zeroinitializer
1340;
1341  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
1342  ret <2 x i64> %1
1343}
1344
1345define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) {
1346; CHECK-LABEL: @avx2_psllv_q_256_allbig(
1347; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
1348;
1349  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
1350  ret <4 x i64> %1
1351}
1352
1353define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) {
1354; CHECK-LABEL: @avx2_psllv_q_128_undef(
1355; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 undef>
1356; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1357;
1358  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
1359  %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1)
1360  ret <2 x i64> %2
1361}
1362
1363define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) {
1364; CHECK-LABEL: @avx2_psllv_q_256_undef(
1365; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
1366; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1367;
1368  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
1369  %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
1370  ret <4 x i64> %2
1371}
1372
1373;
1374; Vector Demanded Bits
1375;
1376
1377define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
1378; CHECK-LABEL: @sse2_psra_w_var(
1379; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
1380; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
1381;
1382  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1383  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
1384  ret <8 x i16> %2
1385}
1386
1387define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
1388; CHECK-LABEL: @sse2_psra_w_var_bc(
1389; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <8 x i16>
1390; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> [[TMP1]])
1391; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
1392;
1393  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1394  %2 = bitcast <2 x i64> %1 to <8 x i16>
1395  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
1396  ret <8 x i16> %3
1397}
1398
1399define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
1400; CHECK-LABEL: @sse2_psra_d_var(
1401; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
1402; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1403;
1404  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1405  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
1406  ret <4 x i32> %2
1407}
1408
1409define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
1410; CHECK-LABEL: @sse2_psra_d_var_bc(
1411; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> %a to <4 x i32>
1412; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> [[TMP1]])
1413; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1414;
1415  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1416  %2 = bitcast <8 x i16> %1 to <4 x i32>
1417  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
1418  ret <4 x i32> %3
1419}
1420
1421define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
1422; CHECK-LABEL: @avx2_psra_w_var(
1423; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
1424; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
1425;
1426  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1427  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
1428  ret <16 x i16> %2
1429}
1430
1431define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
1432; CHECK-LABEL: @avx2_psra_d_var(
1433; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
1434; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1435;
1436  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1437  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
1438  ret <8 x i32> %2
1439}
1440
1441define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
1442; CHECK-LABEL: @sse2_psrl_w_var(
1443; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
1444; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
1445;
1446  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1447  %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
1448  ret <8 x i16> %2
1449}
1450
1451define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
1452; CHECK-LABEL: @sse2_psrl_d_var(
1453; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
1454; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1455;
1456  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1457  %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
1458  ret <4 x i32> %2
1459}
1460
1461define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
1462; CHECK-LABEL: @sse2_psrl_q_var(
1463; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
1464; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1465;
1466  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1467  %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
1468  ret <2 x i64> %2
1469}
1470
1471define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
1472; CHECK-LABEL: @avx2_psrl_w_var(
1473; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
1474; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
1475;
1476  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1477  %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
1478  ret <16 x i16> %2
1479}
1480
1481define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
1482; CHECK-LABEL: @avx2_psrl_w_var_bc(
1483; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> %a to <8 x i16>
1484; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> [[TMP1]])
1485; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
1486;
1487  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1488  %2 = bitcast <16 x i8> %1 to <8 x i16>
1489  %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
1490  ret <16 x i16> %3
1491}
1492
1493define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
1494; CHECK-LABEL: @avx2_psrl_d_var(
1495; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
1496; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1497;
1498  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1499  %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
1500  ret <8 x i32> %2
1501}
1502
1503define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
1504; CHECK-LABEL: @avx2_psrl_d_var_bc(
1505; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
1506; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> [[TMP1]])
1507; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
1508;
1509  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1510  %2 = bitcast <2 x i64> %1 to <4 x i32>
1511  %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
1512  ret <8 x i32> %3
1513}
1514
1515define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
1516; CHECK-LABEL: @avx2_psrl_q_var(
1517; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
1518; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1519;
1520  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1521  %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
1522  ret <4 x i64> %2
1523}
1524
1525define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
1526; CHECK-LABEL: @sse2_psll_w_var(
1527; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
1528; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
1529;
1530  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1531  %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
1532  ret <8 x i16> %2
1533}
1534
1535define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
1536; CHECK-LABEL: @sse2_psll_d_var(
1537; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
1538; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1539;
1540  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1541  %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
1542  ret <4 x i32> %2
1543}
1544
1545define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
1546; CHECK-LABEL: @sse2_psll_q_var(
1547; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
1548; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
1549;
1550  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1551  %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
1552  ret <2 x i64> %2
1553}
1554
1555define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
1556; CHECK-LABEL: @avx2_psll_w_var(
1557; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
1558; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
1559;
1560  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1561  %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
1562  ret <16 x i16> %2
1563}
1564
1565define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
1566; CHECK-LABEL: @avx2_psll_d_var(
1567; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
1568; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
1569;
1570  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1571  %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
1572  ret <8 x i32> %2
1573}
1574
1575define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
1576; CHECK-LABEL: @avx2_psll_q_var(
1577; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
1578; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
1579;
1580  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1581  %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
1582  ret <4 x i64> %2
1583}
1584
1585;
1586; Constant Folding
1587;
1588
1589define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
1590; CHECK-LABEL: @test_sse2_psra_w_0(
1591; CHECK-NEXT:    ret <8 x i16> %A
1592;
1593  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
1594  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1595  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
1596  ret <8 x i16> %3
1597}
1598
1599define <8 x i16> @test_sse2_psra_w_8() {
1600; CHECK-LABEL: @test_sse2_psra_w_8(
1601; CHECK-NEXT:    ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
1602;
1603  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
1604  %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
1605  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1606  %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
1607  ret <8 x i16> %4
1608}
1609
1610define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
1611; CHECK-LABEL: @test_sse2_psra_d_0(
1612; CHECK-NEXT:    ret <4 x i32> %A
1613;
1614  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
1615  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
1616  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
1617  ret <4 x i32> %3
1618}
1619
1620define <4 x i32> @sse2_psra_d_8() {
1621; CHECK-LABEL: @sse2_psra_d_8(
1622; CHECK-NEXT:    ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
1623;
1624  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
1625  %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
1626  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
1627  %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
1628  ret <4 x i32> %4
1629}
1630
1631define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
1632; CHECK-LABEL: @test_avx2_psra_w_0(
1633; CHECK-NEXT:    ret <16 x i16> %A
1634;
1635  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
1636  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1637  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
1638  ret <16 x i16> %3
1639}
1640
1641define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
1642; CHECK-LABEL: @test_avx2_psra_w_8(
1643; CHECK-NEXT:    ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
1644;
1645  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
1646  %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
1647  %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1648  %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
1649  ret <16 x i16> %4
1650}
1651
1652define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
1653; CHECK-LABEL: @test_avx2_psra_d_0(
1654; CHECK-NEXT:    ret <8 x i32> %A
1655;
1656  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
1657  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
1658  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
1659  ret <8 x i32> %3
1660}
1661
1662define <8 x i32> @test_avx2_psra_d_8() {
1663; CHECK-LABEL: @test_avx2_psra_d_8(
1664; CHECK-NEXT:    ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
1665;
1666  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
1667  %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
1668  %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
1669  %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
1670  ret <8 x i32> %4
1671}
1672
1673;
1674; Old Tests
1675;
1676
1677define <2 x i64> @test_sse2_1() {
1678; CHECK-LABEL: @test_sse2_1(
1679; CHECK-NEXT:    ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
1680;
1681  %S = bitcast i32 1 to i32
1682  %1 = zext i32 %S to i64
1683  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1684  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1685  %4 = bitcast <2 x i64> %3 to <8 x i16>
1686  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
1687  %6 = bitcast <8 x i16> %5 to <4 x i32>
1688  %7 = bitcast <2 x i64> %3 to <4 x i32>
1689  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
1690  %9 = bitcast <4 x i32> %8 to <2 x i64>
1691  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
1692  %11 = bitcast <2 x i64> %10 to <8 x i16>
1693  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
1694  %13 = bitcast <8 x i16> %12 to <4 x i32>
1695  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
1696  %15 = bitcast <4 x i32> %14 to <2 x i64>
1697  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
1698  ret <2 x i64> %16
1699}
1700
1701define <4 x i64> @test_avx2_1() {
1702; CHECK-LABEL: @test_avx2_1(
1703; CHECK-NEXT:    ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
1704;
1705  %S = bitcast i32 1 to i32
1706  %1 = zext i32 %S to i64
1707  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1708  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1709  %4 = bitcast <2 x i64> %3 to <8 x i16>
1710  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
1711  %6 = bitcast <16 x i16> %5 to <8 x i32>
1712  %7 = bitcast <2 x i64> %3 to <4 x i32>
1713  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
1714  %9 = bitcast <8 x i32> %8 to <4 x i64>
1715  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
1716  %11 = bitcast <4 x i64> %10 to <16 x i16>
1717  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
1718  %13 = bitcast <16 x i16> %12 to <8 x i32>
1719  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
1720  %15 = bitcast <8 x i32> %14 to <4 x i64>
1721  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
1722  ret <4 x i64> %16
1723}
1724
1725define <2 x i64> @test_sse2_0() {
1726; CHECK-LABEL: @test_sse2_0(
1727; CHECK-NEXT:    ret <2 x i64> zeroinitializer
1728;
1729  %S = bitcast i32 128 to i32
1730  %1 = zext i32 %S to i64
1731  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1732  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1733  %4 = bitcast <2 x i64> %3 to <8 x i16>
1734  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
1735  %6 = bitcast <8 x i16> %5 to <4 x i32>
1736  %7 = bitcast <2 x i64> %3 to <4 x i32>
1737  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
1738  %9 = bitcast <4 x i32> %8 to <2 x i64>
1739  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
1740  %11 = bitcast <2 x i64> %10 to <8 x i16>
1741  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
1742  %13 = bitcast <8 x i16> %12 to <4 x i32>
1743  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
1744  %15 = bitcast <4 x i32> %14 to <2 x i64>
1745  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
1746  ret <2 x i64> %16
1747}
1748
1749define <4 x i64> @test_avx2_0() {
1750; CHECK-LABEL: @test_avx2_0(
1751; CHECK-NEXT:    ret <4 x i64> zeroinitializer
1752;
1753  %S = bitcast i32 128 to i32
1754  %1 = zext i32 %S to i64
1755  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1756  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1757  %4 = bitcast <2 x i64> %3 to <8 x i16>
1758  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
1759  %6 = bitcast <16 x i16> %5 to <8 x i32>
1760  %7 = bitcast <2 x i64> %3 to <4 x i32>
1761  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
1762  %9 = bitcast <8 x i32> %8 to <4 x i64>
1763  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
1764  %11 = bitcast <4 x i64> %10 to <16 x i16>
1765  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
1766  %13 = bitcast <16 x i16> %12 to <8 x i32>
1767  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
1768  %15 = bitcast <8 x i32> %14 to <4 x i64>
1769  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
1770  ret <4 x i64> %16
1771}
1772define <2 x i64> @test_sse2_psrl_1() {
1773; CHECK-LABEL: @test_sse2_psrl_1(
1774; CHECK-NEXT:    ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
1775;
1776  %S = bitcast i32 1 to i32
1777  %1 = zext i32 %S to i64
1778  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1779  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1780  %4 = bitcast <2 x i64> %3 to <8 x i16>
1781  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
1782  %6 = bitcast <8 x i16> %5 to <4 x i32>
1783  %7 = bitcast <2 x i64> %3 to <4 x i32>
1784  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
1785  %9 = bitcast <4 x i32> %8 to <2 x i64>
1786  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
1787  %11 = bitcast <2 x i64> %10 to <8 x i16>
1788  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
1789  %13 = bitcast <8 x i16> %12 to <4 x i32>
1790  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
1791  %15 = bitcast <4 x i32> %14 to <2 x i64>
1792  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
1793  ret <2 x i64> %16
1794}
1795
1796define <4 x i64> @test_avx2_psrl_1() {
1797; CHECK-LABEL: @test_avx2_psrl_1(
1798; CHECK-NEXT:    ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
1799;
1800  %S = bitcast i32 1 to i32
1801  %1 = zext i32 %S to i64
1802  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1803  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1804  %4 = bitcast <2 x i64> %3 to <8 x i16>
1805  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
1806  %6 = bitcast <16 x i16> %5 to <8 x i32>
1807  %7 = bitcast <2 x i64> %3 to <4 x i32>
1808  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
1809  %9 = bitcast <8 x i32> %8 to <4 x i64>
1810  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
1811  %11 = bitcast <4 x i64> %10 to <16 x i16>
1812  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
1813  %13 = bitcast <16 x i16> %12 to <8 x i32>
1814  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
1815  %15 = bitcast <8 x i32> %14 to <4 x i64>
1816  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
1817  ret <4 x i64> %16
1818}
1819
1820define <2 x i64> @test_sse2_psrl_0() {
1821; CHECK-LABEL: @test_sse2_psrl_0(
1822; CHECK-NEXT:    ret <2 x i64> zeroinitializer
1823;
1824  %S = bitcast i32 128 to i32
1825  %1 = zext i32 %S to i64
1826  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1827  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1828  %4 = bitcast <2 x i64> %3 to <8 x i16>
1829  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
1830  %6 = bitcast <8 x i16> %5 to <4 x i32>
1831  %7 = bitcast <2 x i64> %3 to <4 x i32>
1832  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
1833  %9 = bitcast <4 x i32> %8 to <2 x i64>
1834  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
1835  %11 = bitcast <2 x i64> %10 to <8 x i16>
1836  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
1837  %13 = bitcast <8 x i16> %12 to <4 x i32>
1838  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
1839  %15 = bitcast <4 x i32> %14 to <2 x i64>
1840  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
1841  ret <2 x i64> %16
1842}
1843
1844define <4 x i64> @test_avx2_psrl_0() {
1845; CHECK-LABEL: @test_avx2_psrl_0(
1846; CHECK-NEXT:    ret <4 x i64> zeroinitializer
1847;
1848  %S = bitcast i32 128 to i32
1849  %1 = zext i32 %S to i64
1850  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1851  %3 = insertelement <2 x i64> %2, i64 0, i32 1
1852  %4 = bitcast <2 x i64> %3 to <8 x i16>
1853  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
1854  %6 = bitcast <16 x i16> %5 to <8 x i32>
1855  %7 = bitcast <2 x i64> %3 to <4 x i32>
1856  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
1857  %9 = bitcast <8 x i32> %8 to <4 x i64>
1858  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
1859  %11 = bitcast <4 x i64> %10 to <16 x i16>
1860  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
1861  %13 = bitcast <16 x i16> %12 to <8 x i32>
1862  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
1863  %15 = bitcast <8 x i32> %14 to <4 x i64>
1864  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
1865  ret <4 x i64> %16
1866}
1867
1868declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
1869declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
1870declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
1871declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
1872declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
1873declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
1874declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
1875declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
1876declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
1877declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
1878declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
1879declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
1880
1881declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
1882declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
1883declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
1884declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
1885declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
1886declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
1887declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
1888declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
1889declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
1890declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
1891declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
1892declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
1893
1894declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
1895declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
1896declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
1897declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
1898declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
1899declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
1900declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
1901declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
1902
1903declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1
1904declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1
1905
1906declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1
1907declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1
1908declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1
1909declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1
1910
1911declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1
1912declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1
1913declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1
1914declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1
1915
1916attributes #1 = { nounwind readnone }
1917