• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2              | FileCheck %s --check-prefixes=X86-SSE2
3; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2,+avx         | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
4; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2,+avx,+avx2   | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2            | FileCheck %s --check-prefixes=X64-SSE2
6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+avx       | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+avx,+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
8
9; The mask is all-ones, potentially shifted.
10
11;------------------------------------------------------------------------------;
12; 128-bit vector; 8-bit elements = 16 elements
13;------------------------------------------------------------------------------;
14
15; lshr
16
17define <16 x i8> @test_128_i8_x_16_7_mask_lshr_1(<16 x i8> %a0) {
18; X86-SSE2-LABEL: test_128_i8_x_16_7_mask_lshr_1:
19; X86-SSE2:       # %bb.0:
20; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
21; X86-SSE2-NEXT:    psrlw $1, %xmm0
22; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
23; X86-SSE2-NEXT:    retl
24;
25; X86-AVX-LABEL: test_128_i8_x_16_7_mask_lshr_1:
26; X86-AVX:       # %bb.0:
27; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
28; X86-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
29; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
30; X86-AVX-NEXT:    retl
31;
32; X64-SSE2-LABEL: test_128_i8_x_16_7_mask_lshr_1:
33; X64-SSE2:       # %bb.0:
34; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
35; X64-SSE2-NEXT:    psrlw $1, %xmm0
36; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
37; X64-SSE2-NEXT:    retq
38;
39; X64-AVX-LABEL: test_128_i8_x_16_7_mask_lshr_1:
40; X64-AVX:       # %bb.0:
41; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
42; X64-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
43; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
44; X64-AVX-NEXT:    retq
45  %t0 = and <16 x i8> %a0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
46  %t1 = lshr <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
47  ret <16 x i8> %t1
48}
49
50define <16 x i8> @test_128_i8_x_16_28_mask_lshr_1(<16 x i8> %a0) {
51; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_1:
52; X86-SSE2:       # %bb.0:
53; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
54; X86-SSE2-NEXT:    psrlw $1, %xmm0
55; X86-SSE2-NEXT:    retl
56;
57; X86-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_1:
58; X86-AVX:       # %bb.0:
59; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
60; X86-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
61; X86-AVX-NEXT:    retl
62;
63; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_1:
64; X64-SSE2:       # %bb.0:
65; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
66; X64-SSE2-NEXT:    psrlw $1, %xmm0
67; X64-SSE2-NEXT:    retq
68;
69; X64-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_1:
70; X64-AVX:       # %bb.0:
71; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
72; X64-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
73; X64-AVX-NEXT:    retq
74  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
75  %t1 = lshr <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
76  ret <16 x i8> %t1
77}
78define <16 x i8> @test_128_i8_x_16_28_mask_lshr_2(<16 x i8> %a0) {
79; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_2:
80; X86-SSE2:       # %bb.0:
81; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
82; X86-SSE2-NEXT:    psrlw $2, %xmm0
83; X86-SSE2-NEXT:    retl
84;
85; X86-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_2:
86; X86-AVX:       # %bb.0:
87; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
88; X86-AVX-NEXT:    vpsrlw $2, %xmm0, %xmm0
89; X86-AVX-NEXT:    retl
90;
91; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_2:
92; X64-SSE2:       # %bb.0:
93; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
94; X64-SSE2-NEXT:    psrlw $2, %xmm0
95; X64-SSE2-NEXT:    retq
96;
97; X64-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_2:
98; X64-AVX:       # %bb.0:
99; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
100; X64-AVX-NEXT:    vpsrlw $2, %xmm0, %xmm0
101; X64-AVX-NEXT:    retq
102  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
103  %t1 = lshr <16 x i8> %t0, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
104  ret <16 x i8> %t1
105}
106define <16 x i8> @test_128_i8_x_16_28_mask_lshr_3(<16 x i8> %a0) {
107; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_3:
108; X86-SSE2:       # %bb.0:
109; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
110; X86-SSE2-NEXT:    psrlw $3, %xmm0
111; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
112; X86-SSE2-NEXT:    retl
113;
114; X86-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_3:
115; X86-AVX:       # %bb.0:
116; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
117; X86-AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
118; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
119; X86-AVX-NEXT:    retl
120;
121; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_3:
122; X64-SSE2:       # %bb.0:
123; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
124; X64-SSE2-NEXT:    psrlw $3, %xmm0
125; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
126; X64-SSE2-NEXT:    retq
127;
128; X64-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_3:
129; X64-AVX:       # %bb.0:
130; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
131; X64-AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
132; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
133; X64-AVX-NEXT:    retq
134  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
135  %t1 = lshr <16 x i8> %t0, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
136  ret <16 x i8> %t1
137}
138define <16 x i8> @test_128_i8_x_16_28_mask_lshr_4(<16 x i8> %a0) {
139; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_4:
140; X86-SSE2:       # %bb.0:
141; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
142; X86-SSE2-NEXT:    psrlw $4, %xmm0
143; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
144; X86-SSE2-NEXT:    retl
145;
146; X86-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_4:
147; X86-AVX:       # %bb.0:
148; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
149; X86-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
150; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
151; X86-AVX-NEXT:    retl
152;
153; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_4:
154; X64-SSE2:       # %bb.0:
155; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
156; X64-SSE2-NEXT:    psrlw $4, %xmm0
157; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
158; X64-SSE2-NEXT:    retq
159;
160; X64-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_4:
161; X64-AVX:       # %bb.0:
162; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
163; X64-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
164; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
165; X64-AVX-NEXT:    retq
166  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
167  %t1 = lshr <16 x i8> %t0, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
168  ret <16 x i8> %t1
169}
170
171define <16 x i8> @test_128_i8_x_16_224_mask_lshr_1(<16 x i8> %a0) {
172; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_1:
173; X86-SSE2:       # %bb.0:
174; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
175; X86-SSE2-NEXT:    psrlw $1, %xmm0
176; X86-SSE2-NEXT:    retl
177;
178; X86-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_1:
179; X86-AVX:       # %bb.0:
180; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
181; X86-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
182; X86-AVX-NEXT:    retl
183;
184; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_1:
185; X64-SSE2:       # %bb.0:
186; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
187; X64-SSE2-NEXT:    psrlw $1, %xmm0
188; X64-SSE2-NEXT:    retq
189;
190; X64-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_1:
191; X64-AVX:       # %bb.0:
192; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
193; X64-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
194; X64-AVX-NEXT:    retq
195  %t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
196  %t1 = lshr <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
197  ret <16 x i8> %t1
198}
199define <16 x i8> @test_128_i8_x_16_224_mask_lshr_4(<16 x i8> %a0) {
200; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_4:
201; X86-SSE2:       # %bb.0:
202; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
203; X86-SSE2-NEXT:    psrlw $4, %xmm0
204; X86-SSE2-NEXT:    retl
205;
206; X86-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_4:
207; X86-AVX:       # %bb.0:
208; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
209; X86-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
210; X86-AVX-NEXT:    retl
211;
212; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_4:
213; X64-SSE2:       # %bb.0:
214; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
215; X64-SSE2-NEXT:    psrlw $4, %xmm0
216; X64-SSE2-NEXT:    retq
217;
218; X64-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_4:
219; X64-AVX:       # %bb.0:
220; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
221; X64-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
222; X64-AVX-NEXT:    retq
223  %t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
224  %t1 = lshr <16 x i8> %t0, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
225  ret <16 x i8> %t1
226}
227define <16 x i8> @test_128_i8_x_16_224_mask_lshr_5(<16 x i8> %a0) {
228; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_5:
229; X86-SSE2:       # %bb.0:
230; X86-SSE2-NEXT:    psrlw $5, %xmm0
231; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
232; X86-SSE2-NEXT:    retl
233;
234; X86-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_5:
235; X86-AVX:       # %bb.0:
236; X86-AVX-NEXT:    vpsrlw $5, %xmm0, %xmm0
237; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
238; X86-AVX-NEXT:    retl
239;
240; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_5:
241; X64-SSE2:       # %bb.0:
242; X64-SSE2-NEXT:    psrlw $5, %xmm0
243; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
244; X64-SSE2-NEXT:    retq
245;
246; X64-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_5:
247; X64-AVX:       # %bb.0:
248; X64-AVX-NEXT:    vpsrlw $5, %xmm0, %xmm0
249; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
250; X64-AVX-NEXT:    retq
251  %t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
252  %t1 = lshr <16 x i8> %t0, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
253  ret <16 x i8> %t1
254}
255define <16 x i8> @test_128_i8_x_16_224_mask_lshr_6(<16 x i8> %a0) {
256; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_6:
257; X86-SSE2:       # %bb.0:
258; X86-SSE2-NEXT:    psrlw $6, %xmm0
259; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
260; X86-SSE2-NEXT:    retl
261;
262; X86-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_6:
263; X86-AVX:       # %bb.0:
264; X86-AVX-NEXT:    vpsrlw $6, %xmm0, %xmm0
265; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
266; X86-AVX-NEXT:    retl
267;
268; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_6:
269; X64-SSE2:       # %bb.0:
270; X64-SSE2-NEXT:    psrlw $6, %xmm0
271; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
272; X64-SSE2-NEXT:    retq
273;
274; X64-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_6:
275; X64-AVX:       # %bb.0:
276; X64-AVX-NEXT:    vpsrlw $6, %xmm0, %xmm0
277; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
278; X64-AVX-NEXT:    retq
279  %t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
280  %t1 = lshr <16 x i8> %t0, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
281  ret <16 x i8> %t1
282}
283
284; ashr
285
286define <16 x i8> @test_128_i8_x_16_7_mask_ashr_1(<16 x i8> %a0) {
287; X86-SSE2-LABEL: test_128_i8_x_16_7_mask_ashr_1:
288; X86-SSE2:       # %bb.0:
289; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
290; X86-SSE2-NEXT:    psrlw $1, %xmm0
291; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
292; X86-SSE2-NEXT:    retl
293;
294; X86-AVX-LABEL: test_128_i8_x_16_7_mask_ashr_1:
295; X86-AVX:       # %bb.0:
296; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
297; X86-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
298; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
299; X86-AVX-NEXT:    retl
300;
301; X64-SSE2-LABEL: test_128_i8_x_16_7_mask_ashr_1:
302; X64-SSE2:       # %bb.0:
303; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
304; X64-SSE2-NEXT:    psrlw $1, %xmm0
305; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
306; X64-SSE2-NEXT:    retq
307;
308; X64-AVX-LABEL: test_128_i8_x_16_7_mask_ashr_1:
309; X64-AVX:       # %bb.0:
310; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
311; X64-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
312; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
313; X64-AVX-NEXT:    retq
314  %t0 = and <16 x i8> %a0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
315  %t1 = ashr <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
316  ret <16 x i8> %t1
317}
318
319define <16 x i8> @test_128_i8_x_16_28_mask_ashr_1(<16 x i8> %a0) {
320; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_1:
321; X86-SSE2:       # %bb.0:
322; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
323; X86-SSE2-NEXT:    psrlw $1, %xmm0
324; X86-SSE2-NEXT:    retl
325;
326; X86-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_1:
327; X86-AVX:       # %bb.0:
328; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
329; X86-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
330; X86-AVX-NEXT:    retl
331;
332; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_1:
333; X64-SSE2:       # %bb.0:
334; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
335; X64-SSE2-NEXT:    psrlw $1, %xmm0
336; X64-SSE2-NEXT:    retq
337;
338; X64-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_1:
339; X64-AVX:       # %bb.0:
340; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
341; X64-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
342; X64-AVX-NEXT:    retq
343  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
344  %t1 = ashr <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
345  ret <16 x i8> %t1
346}
347define <16 x i8> @test_128_i8_x_16_28_mask_ashr_2(<16 x i8> %a0) {
348; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_2:
349; X86-SSE2:       # %bb.0:
350; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
351; X86-SSE2-NEXT:    psrlw $2, %xmm0
352; X86-SSE2-NEXT:    retl
353;
354; X86-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_2:
355; X86-AVX:       # %bb.0:
356; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
357; X86-AVX-NEXT:    vpsrlw $2, %xmm0, %xmm0
358; X86-AVX-NEXT:    retl
359;
360; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_2:
361; X64-SSE2:       # %bb.0:
362; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
363; X64-SSE2-NEXT:    psrlw $2, %xmm0
364; X64-SSE2-NEXT:    retq
365;
366; X64-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_2:
367; X64-AVX:       # %bb.0:
368; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
369; X64-AVX-NEXT:    vpsrlw $2, %xmm0, %xmm0
370; X64-AVX-NEXT:    retq
371  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
372  %t1 = ashr <16 x i8> %t0, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
373  ret <16 x i8> %t1
374}
375define <16 x i8> @test_128_i8_x_16_28_mask_ashr_3(<16 x i8> %a0) {
376; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_3:
377; X86-SSE2:       # %bb.0:
378; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
379; X86-SSE2-NEXT:    psrlw $3, %xmm0
380; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
381; X86-SSE2-NEXT:    retl
382;
383; X86-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_3:
384; X86-AVX:       # %bb.0:
385; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
386; X86-AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
387; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
388; X86-AVX-NEXT:    retl
389;
390; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_3:
391; X64-SSE2:       # %bb.0:
392; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
393; X64-SSE2-NEXT:    psrlw $3, %xmm0
394; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
395; X64-SSE2-NEXT:    retq
396;
397; X64-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_3:
398; X64-AVX:       # %bb.0:
399; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
400; X64-AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
401; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
402; X64-AVX-NEXT:    retq
403  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
404  %t1 = ashr <16 x i8> %t0, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
405  ret <16 x i8> %t1
406}
407define <16 x i8> @test_128_i8_x_16_28_mask_ashr_4(<16 x i8> %a0) {
408; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_4:
409; X86-SSE2:       # %bb.0:
410; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
411; X86-SSE2-NEXT:    psrlw $4, %xmm0
412; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
413; X86-SSE2-NEXT:    retl
414;
415; X86-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_4:
416; X86-AVX:       # %bb.0:
417; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
418; X86-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
419; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
420; X86-AVX-NEXT:    retl
421;
422; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_4:
423; X64-SSE2:       # %bb.0:
424; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
425; X64-SSE2-NEXT:    psrlw $4, %xmm0
426; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
427; X64-SSE2-NEXT:    retq
428;
429; X64-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_4:
430; X64-AVX:       # %bb.0:
431; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
432; X64-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
433; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
434; X64-AVX-NEXT:    retq
435  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
436  %t1 = ashr <16 x i8> %t0, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
437  ret <16 x i8> %t1
438}
439
440define <16 x i8> @test_128_i8_x_16_224_mask_ashr_1(<16 x i8> %a0) {
441; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_1:
442; X86-SSE2:       # %bb.0:
443; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
444; X86-SSE2-NEXT:    psrlw $1, %xmm0
445; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
446; X86-SSE2-NEXT:    pxor %xmm1, %xmm0
447; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
448; X86-SSE2-NEXT:    retl
449;
450; X86-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_1:
451; X86-AVX:       # %bb.0:
452; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
453; X86-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
454; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
455; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
456; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
457; X86-AVX-NEXT:    retl
458;
459; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_1:
460; X64-SSE2:       # %bb.0:
461; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
462; X64-SSE2-NEXT:    psrlw $1, %xmm0
463; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
464; X64-SSE2-NEXT:    pxor %xmm1, %xmm0
465; X64-SSE2-NEXT:    psubb %xmm1, %xmm0
466; X64-SSE2-NEXT:    retq
467;
468; X64-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_1:
469; X64-AVX:       # %bb.0:
470; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
471; X64-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
472; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
473; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
474; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
475; X64-AVX-NEXT:    retq
476  %t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
477  %t1 = ashr <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
478  ret <16 x i8> %t1
479}
480define <16 x i8> @test_128_i8_x_16_224_mask_ashr_4(<16 x i8> %a0) {
481; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_4:
482; X86-SSE2:       # %bb.0:
483; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
484; X86-SSE2-NEXT:    psrlw $4, %xmm0
485; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
486; X86-SSE2-NEXT:    pxor %xmm1, %xmm0
487; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
488; X86-SSE2-NEXT:    retl
489;
490; X86-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_4:
491; X86-AVX:       # %bb.0:
492; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
493; X86-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
494; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
495; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
496; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
497; X86-AVX-NEXT:    retl
498;
499; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_4:
500; X64-SSE2:       # %bb.0:
501; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
502; X64-SSE2-NEXT:    psrlw $4, %xmm0
503; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
504; X64-SSE2-NEXT:    pxor %xmm1, %xmm0
505; X64-SSE2-NEXT:    psubb %xmm1, %xmm0
506; X64-SSE2-NEXT:    retq
507;
508; X64-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_4:
509; X64-AVX:       # %bb.0:
510; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
511; X64-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
512; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
513; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
514; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
515; X64-AVX-NEXT:    retq
516  %t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
517  %t1 = ashr <16 x i8> %t0, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
518  ret <16 x i8> %t1
519}
520define <16 x i8> @test_128_i8_x_16_224_mask_ashr_5(<16 x i8> %a0) {
521; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_5:
522; X86-SSE2:       # %bb.0:
523; X86-SSE2-NEXT:    psrlw $5, %xmm0
524; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
525; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
526; X86-SSE2-NEXT:    pxor %xmm1, %xmm0
527; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
528; X86-SSE2-NEXT:    retl
529;
530; X86-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_5:
531; X86-AVX:       # %bb.0:
532; X86-AVX-NEXT:    vpsrlw $5, %xmm0, %xmm0
533; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
534; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
535; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
536; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
537; X86-AVX-NEXT:    retl
538;
539; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_5:
540; X64-SSE2:       # %bb.0:
541; X64-SSE2-NEXT:    psrlw $5, %xmm0
542; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
543; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
544; X64-SSE2-NEXT:    pxor %xmm1, %xmm0
545; X64-SSE2-NEXT:    psubb %xmm1, %xmm0
546; X64-SSE2-NEXT:    retq
547;
548; X64-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_5:
549; X64-AVX:       # %bb.0:
550; X64-AVX-NEXT:    vpsrlw $5, %xmm0, %xmm0
551; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
552; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
553; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
554; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
555; X64-AVX-NEXT:    retq
556  %t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
557  %t1 = ashr <16 x i8> %t0, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
558  ret <16 x i8> %t1
559}
560define <16 x i8> @test_128_i8_x_16_224_mask_ashr_6(<16 x i8> %a0) {
561; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_6:
562; X86-SSE2:       # %bb.0:
563; X86-SSE2-NEXT:    psrlw $6, %xmm0
564; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
565; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
566; X86-SSE2-NEXT:    pxor %xmm1, %xmm0
567; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
568; X86-SSE2-NEXT:    retl
569;
570; X86-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_6:
571; X86-AVX:       # %bb.0:
572; X86-AVX-NEXT:    vpsrlw $6, %xmm0, %xmm0
573; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
574; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
575; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
576; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
577; X86-AVX-NEXT:    retl
578;
579; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_6:
580; X64-SSE2:       # %bb.0:
581; X64-SSE2-NEXT:    psrlw $6, %xmm0
582; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
583; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
584; X64-SSE2-NEXT:    pxor %xmm1, %xmm0
585; X64-SSE2-NEXT:    psubb %xmm1, %xmm0
586; X64-SSE2-NEXT:    retq
587;
588; X64-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_6:
589; X64-AVX:       # %bb.0:
590; X64-AVX-NEXT:    vpsrlw $6, %xmm0, %xmm0
591; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
592; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
593; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
594; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
595; X64-AVX-NEXT:    retq
596  %t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
597  %t1 = ashr <16 x i8> %t0, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
598  ret <16 x i8> %t1
599}
600
601; shl
602
603define <16 x i8> @test_128_i8_x_16_7_mask_shl_1(<16 x i8> %a0) {
604; X86-SSE2-LABEL: test_128_i8_x_16_7_mask_shl_1:
605; X86-SSE2:       # %bb.0:
606; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
607; X86-SSE2-NEXT:    paddb %xmm0, %xmm0
608; X86-SSE2-NEXT:    retl
609;
610; X86-AVX-LABEL: test_128_i8_x_16_7_mask_shl_1:
611; X86-AVX:       # %bb.0:
612; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
613; X86-AVX-NEXT:    vpaddb %xmm0, %xmm0, %xmm0
614; X86-AVX-NEXT:    retl
615;
616; X64-SSE2-LABEL: test_128_i8_x_16_7_mask_shl_1:
617; X64-SSE2:       # %bb.0:
618; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
619; X64-SSE2-NEXT:    paddb %xmm0, %xmm0
620; X64-SSE2-NEXT:    retq
621;
622; X64-AVX-LABEL: test_128_i8_x_16_7_mask_shl_1:
623; X64-AVX:       # %bb.0:
624; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
625; X64-AVX-NEXT:    vpaddb %xmm0, %xmm0, %xmm0
626; X64-AVX-NEXT:    retq
627  %t0 = and <16 x i8> %a0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
628  %t1 = shl <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
629  ret <16 x i8> %t1
630}
631define <16 x i8> @test_128_i8_x_16_7_mask_shl_4(<16 x i8> %a0) {
632; X86-SSE2-LABEL: test_128_i8_x_16_7_mask_shl_4:
633; X86-SSE2:       # %bb.0:
634; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
635; X86-SSE2-NEXT:    psllw $4, %xmm0
636; X86-SSE2-NEXT:    retl
637;
638; X86-AVX-LABEL: test_128_i8_x_16_7_mask_shl_4:
639; X86-AVX:       # %bb.0:
640; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
641; X86-AVX-NEXT:    vpsllw $4, %xmm0, %xmm0
642; X86-AVX-NEXT:    retl
643;
644; X64-SSE2-LABEL: test_128_i8_x_16_7_mask_shl_4:
645; X64-SSE2:       # %bb.0:
646; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
647; X64-SSE2-NEXT:    psllw $4, %xmm0
648; X64-SSE2-NEXT:    retq
649;
650; X64-AVX-LABEL: test_128_i8_x_16_7_mask_shl_4:
651; X64-AVX:       # %bb.0:
652; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
653; X64-AVX-NEXT:    vpsllw $4, %xmm0, %xmm0
654; X64-AVX-NEXT:    retq
655  %t0 = and <16 x i8> %a0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
656  %t1 = shl <16 x i8> %t0, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
657  ret <16 x i8> %t1
658}
659define <16 x i8> @test_128_i8_x_16_7_mask_shl_5(<16 x i8> %a0) {
660; X86-SSE2-LABEL: test_128_i8_x_16_7_mask_shl_5:
661; X86-SSE2:       # %bb.0:
662; X86-SSE2-NEXT:    psllw $5, %xmm0
663; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
664; X86-SSE2-NEXT:    retl
665;
666; X86-AVX-LABEL: test_128_i8_x_16_7_mask_shl_5:
667; X86-AVX:       # %bb.0:
668; X86-AVX-NEXT:    vpsllw $5, %xmm0, %xmm0
669; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
670; X86-AVX-NEXT:    retl
671;
672; X64-SSE2-LABEL: test_128_i8_x_16_7_mask_shl_5:
673; X64-SSE2:       # %bb.0:
674; X64-SSE2-NEXT:    psllw $5, %xmm0
675; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
676; X64-SSE2-NEXT:    retq
677;
678; X64-AVX-LABEL: test_128_i8_x_16_7_mask_shl_5:
679; X64-AVX:       # %bb.0:
680; X64-AVX-NEXT:    vpsllw $5, %xmm0, %xmm0
681; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
682; X64-AVX-NEXT:    retq
683  %t0 = and <16 x i8> %a0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
684  %t1 = shl <16 x i8> %t0, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
685  ret <16 x i8> %t1
686}
687define <16 x i8> @test_128_i8_x_16_7_mask_shl_6(<16 x i8> %a0) {
688; X86-SSE2-LABEL: test_128_i8_x_16_7_mask_shl_6:
689; X86-SSE2:       # %bb.0:
690; X86-SSE2-NEXT:    psllw $6, %xmm0
691; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
692; X86-SSE2-NEXT:    retl
693;
694; X86-AVX-LABEL: test_128_i8_x_16_7_mask_shl_6:
695; X86-AVX:       # %bb.0:
696; X86-AVX-NEXT:    vpsllw $6, %xmm0, %xmm0
697; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
698; X86-AVX-NEXT:    retl
699;
700; X64-SSE2-LABEL: test_128_i8_x_16_7_mask_shl_6:
701; X64-SSE2:       # %bb.0:
702; X64-SSE2-NEXT:    psllw $6, %xmm0
703; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
704; X64-SSE2-NEXT:    retq
705;
706; X64-AVX-LABEL: test_128_i8_x_16_7_mask_shl_6:
707; X64-AVX:       # %bb.0:
708; X64-AVX-NEXT:    vpsllw $6, %xmm0, %xmm0
709; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
710; X64-AVX-NEXT:    retq
711  %t0 = and <16 x i8> %a0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
712  %t1 = shl <16 x i8> %t0, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
713  ret <16 x i8> %t1
714}
715
716define <16 x i8> @test_128_i8_x_16_28_mask_shl_1(<16 x i8> %a0) {
717; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_1:
718; X86-SSE2:       # %bb.0:
719; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
720; X86-SSE2-NEXT:    paddb %xmm0, %xmm0
721; X86-SSE2-NEXT:    retl
722;
723; X86-AVX-LABEL: test_128_i8_x_16_28_mask_shl_1:
724; X86-AVX:       # %bb.0:
725; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
726; X86-AVX-NEXT:    vpaddb %xmm0, %xmm0, %xmm0
727; X86-AVX-NEXT:    retl
728;
729; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_1:
730; X64-SSE2:       # %bb.0:
731; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
732; X64-SSE2-NEXT:    paddb %xmm0, %xmm0
733; X64-SSE2-NEXT:    retq
734;
735; X64-AVX-LABEL: test_128_i8_x_16_28_mask_shl_1:
736; X64-AVX:       # %bb.0:
737; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
738; X64-AVX-NEXT:    vpaddb %xmm0, %xmm0, %xmm0
739; X64-AVX-NEXT:    retq
740  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
741  %t1 = shl <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
742  ret <16 x i8> %t1
743}
744define <16 x i8> @test_128_i8_x_16_28_mask_shl_2(<16 x i8> %a0) {
745; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_2:
746; X86-SSE2:       # %bb.0:
747; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
748; X86-SSE2-NEXT:    psllw $2, %xmm0
749; X86-SSE2-NEXT:    retl
750;
751; X86-AVX-LABEL: test_128_i8_x_16_28_mask_shl_2:
752; X86-AVX:       # %bb.0:
753; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
754; X86-AVX-NEXT:    vpsllw $2, %xmm0, %xmm0
755; X86-AVX-NEXT:    retl
756;
757; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_2:
758; X64-SSE2:       # %bb.0:
759; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
760; X64-SSE2-NEXT:    psllw $2, %xmm0
761; X64-SSE2-NEXT:    retq
762;
763; X64-AVX-LABEL: test_128_i8_x_16_28_mask_shl_2:
764; X64-AVX:       # %bb.0:
765; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
766; X64-AVX-NEXT:    vpsllw $2, %xmm0, %xmm0
767; X64-AVX-NEXT:    retq
768  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
769  %t1 = shl <16 x i8> %t0, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
770  ret <16 x i8> %t1
771}
772define <16 x i8> @test_128_i8_x_16_28_mask_shl_3(<16 x i8> %a0) {
773; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_3:
774; X86-SSE2:       # %bb.0:
775; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
776; X86-SSE2-NEXT:    psllw $3, %xmm0
777; X86-SSE2-NEXT:    retl
778;
779; X86-AVX-LABEL: test_128_i8_x_16_28_mask_shl_3:
780; X86-AVX:       # %bb.0:
781; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
782; X86-AVX-NEXT:    vpsllw $3, %xmm0, %xmm0
783; X86-AVX-NEXT:    retl
784;
785; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_3:
786; X64-SSE2:       # %bb.0:
787; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
788; X64-SSE2-NEXT:    psllw $3, %xmm0
789; X64-SSE2-NEXT:    retq
790;
791; X64-AVX-LABEL: test_128_i8_x_16_28_mask_shl_3:
792; X64-AVX:       # %bb.0:
793; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
794; X64-AVX-NEXT:    vpsllw $3, %xmm0, %xmm0
795; X64-AVX-NEXT:    retq
796  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
797  %t1 = shl <16 x i8> %t0, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
798  ret <16 x i8> %t1
799}
800define <16 x i8> @test_128_i8_x_16_28_mask_shl_4(<16 x i8> %a0) {
801; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_4:
802; X86-SSE2:       # %bb.0:
803; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
804; X86-SSE2-NEXT:    psllw $4, %xmm0
805; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
806; X86-SSE2-NEXT:    retl
807;
808; X86-AVX-LABEL: test_128_i8_x_16_28_mask_shl_4:
809; X86-AVX:       # %bb.0:
810; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
811; X86-AVX-NEXT:    vpsllw $4, %xmm0, %xmm0
812; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
813; X86-AVX-NEXT:    retl
814;
815; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_4:
816; X64-SSE2:       # %bb.0:
817; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
818; X64-SSE2-NEXT:    psllw $4, %xmm0
819; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
820; X64-SSE2-NEXT:    retq
821;
822; X64-AVX-LABEL: test_128_i8_x_16_28_mask_shl_4:
823; X64-AVX:       # %bb.0:
824; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
825; X64-AVX-NEXT:    vpsllw $4, %xmm0, %xmm0
826; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
827; X64-AVX-NEXT:    retq
828  %t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
829  %t1 = shl <16 x i8> %t0, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
830  ret <16 x i8> %t1
831}
832
833define <16 x i8> @test_128_i8_x_16_224_mask_shl_1(<16 x i8> %a0) {
834; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_shl_1:
835; X86-SSE2:       # %bb.0:
836; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
837; X86-SSE2-NEXT:    paddb %xmm0, %xmm0
838; X86-SSE2-NEXT:    retl
839;
840; X86-AVX-LABEL: test_128_i8_x_16_224_mask_shl_1:
841; X86-AVX:       # %bb.0:
842; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
843; X86-AVX-NEXT:    vpaddb %xmm0, %xmm0, %xmm0
844; X86-AVX-NEXT:    retl
845;
846; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_shl_1:
847; X64-SSE2:       # %bb.0:
848; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
849; X64-SSE2-NEXT:    paddb %xmm0, %xmm0
850; X64-SSE2-NEXT:    retq
851;
852; X64-AVX-LABEL: test_128_i8_x_16_224_mask_shl_1:
853; X64-AVX:       # %bb.0:
854; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
855; X64-AVX-NEXT:    vpaddb %xmm0, %xmm0, %xmm0
856; X64-AVX-NEXT:    retq
857  %t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
858  %t1 = shl <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
859  ret <16 x i8> %t1
860}
861
862;------------------------------------------------------------------------------;
863; 128-bit vector; 16-bit elements = 8 elements
864;------------------------------------------------------------------------------;
865
866; lshr
867
868define <8 x i16> @test_128_i16_x_8_127_mask_lshr_1(<8 x i16> %a0) {
869; X86-SSE2-LABEL: test_128_i16_x_8_127_mask_lshr_1:
870; X86-SSE2:       # %bb.0:
871; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
872; X86-SSE2-NEXT:    psrlw $1, %xmm0
873; X86-SSE2-NEXT:    retl
874;
875; X86-AVX-LABEL: test_128_i16_x_8_127_mask_lshr_1:
876; X86-AVX:       # %bb.0:
877; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
878; X86-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
879; X86-AVX-NEXT:    retl
880;
881; X64-SSE2-LABEL: test_128_i16_x_8_127_mask_lshr_1:
882; X64-SSE2:       # %bb.0:
883; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
884; X64-SSE2-NEXT:    psrlw $1, %xmm0
885; X64-SSE2-NEXT:    retq
886;
887; X64-AVX-LABEL: test_128_i16_x_8_127_mask_lshr_1:
888; X64-AVX:       # %bb.0:
889; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
890; X64-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
891; X64-AVX-NEXT:    retq
892  %t0 = and <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
893  %t1 = lshr <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
894  ret <8 x i16> %t1
895}
896
897define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_3(<8 x i16> %a0) {
898; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_3:
899; X86-SSE2:       # %bb.0:
900; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
901; X86-SSE2-NEXT:    psrlw $3, %xmm0
902; X86-SSE2-NEXT:    retl
903;
904; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_3:
905; X86-AVX:       # %bb.0:
906; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
907; X86-AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
908; X86-AVX-NEXT:    retl
909;
910; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_3:
911; X64-SSE2:       # %bb.0:
912; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
913; X64-SSE2-NEXT:    psrlw $3, %xmm0
914; X64-SSE2-NEXT:    retq
915;
916; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_3:
917; X64-AVX:       # %bb.0:
918; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
919; X64-AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
920; X64-AVX-NEXT:    retq
921  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
922  %t1 = lshr <8 x i16> %t0, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
923  ret <8 x i16> %t1
924}
925define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_4(<8 x i16> %a0) {
926; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_4:
927; X86-SSE2:       # %bb.0:
928; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
929; X86-SSE2-NEXT:    psrlw $4, %xmm0
930; X86-SSE2-NEXT:    retl
931;
932; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_4:
933; X86-AVX:       # %bb.0:
934; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
935; X86-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
936; X86-AVX-NEXT:    retl
937;
938; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_4:
939; X64-SSE2:       # %bb.0:
940; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
941; X64-SSE2-NEXT:    psrlw $4, %xmm0
942; X64-SSE2-NEXT:    retq
943;
944; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_4:
945; X64-AVX:       # %bb.0:
946; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
947; X64-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
948; X64-AVX-NEXT:    retq
949  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
950  %t1 = lshr <8 x i16> %t0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
951  ret <8 x i16> %t1
952}
953define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_5(<8 x i16> %a0) {
954; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_5:
955; X86-SSE2:       # %bb.0:
956; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
957; X86-SSE2-NEXT:    psrlw $5, %xmm0
958; X86-SSE2-NEXT:    retl
959;
960; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_5:
961; X86-AVX:       # %bb.0:
962; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
963; X86-AVX-NEXT:    vpsrlw $5, %xmm0, %xmm0
964; X86-AVX-NEXT:    retl
965;
966; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_5:
967; X64-SSE2:       # %bb.0:
968; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
969; X64-SSE2-NEXT:    psrlw $5, %xmm0
970; X64-SSE2-NEXT:    retq
971;
972; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_5:
973; X64-AVX:       # %bb.0:
974; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
975; X64-AVX-NEXT:    vpsrlw $5, %xmm0, %xmm0
976; X64-AVX-NEXT:    retq
977  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
978  %t1 = lshr <8 x i16> %t0, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
979  ret <8 x i16> %t1
980}
981define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_6(<8 x i16> %a0) {
982; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_6:
983; X86-SSE2:       # %bb.0:
984; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
985; X86-SSE2-NEXT:    psrlw $6, %xmm0
986; X86-SSE2-NEXT:    retl
987;
988; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_6:
989; X86-AVX:       # %bb.0:
990; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
991; X86-AVX-NEXT:    vpsrlw $6, %xmm0, %xmm0
992; X86-AVX-NEXT:    retl
993;
994; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_6:
995; X64-SSE2:       # %bb.0:
996; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
997; X64-SSE2-NEXT:    psrlw $6, %xmm0
998; X64-SSE2-NEXT:    retq
999;
1000; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_6:
1001; X64-AVX:       # %bb.0:
1002; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1003; X64-AVX-NEXT:    vpsrlw $6, %xmm0, %xmm0
1004; X64-AVX-NEXT:    retq
1005  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
1006  %t1 = lshr <8 x i16> %t0, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
1007  ret <8 x i16> %t1
1008}
1009
1010define <8 x i16> @test_128_i16_x_8_65024_mask_lshr_1(<8 x i16> %a0) {
1011; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_1:
1012; X86-SSE2:       # %bb.0:
1013; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1014; X86-SSE2-NEXT:    psrlw $1, %xmm0
1015; X86-SSE2-NEXT:    retl
1016;
1017; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_1:
1018; X86-AVX:       # %bb.0:
1019; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1020; X86-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
1021; X86-AVX-NEXT:    retl
1022;
1023; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_1:
1024; X64-SSE2:       # %bb.0:
1025; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1026; X64-SSE2-NEXT:    psrlw $1, %xmm0
1027; X64-SSE2-NEXT:    retq
1028;
1029; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_1:
1030; X64-AVX:       # %bb.0:
1031; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1032; X64-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
1033; X64-AVX-NEXT:    retq
1034  %t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
1035  %t1 = lshr <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1036  ret <8 x i16> %t1
1037}
1038define <8 x i16> @test_128_i16_x_8_65024_mask_lshr_8(<8 x i16> %a0) {
1039; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_8:
1040; X86-SSE2:       # %bb.0:
1041; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1042; X86-SSE2-NEXT:    psrlw $8, %xmm0
1043; X86-SSE2-NEXT:    retl
1044;
1045; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_8:
1046; X86-AVX:       # %bb.0:
1047; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1048; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
1049; X86-AVX-NEXT:    retl
1050;
1051; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_8:
1052; X64-SSE2:       # %bb.0:
1053; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1054; X64-SSE2-NEXT:    psrlw $8, %xmm0
1055; X64-SSE2-NEXT:    retq
1056;
1057; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_8:
1058; X64-AVX:       # %bb.0:
1059; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1060; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
1061; X64-AVX-NEXT:    retq
1062  %t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
1063  %t1 = lshr <8 x i16> %t0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1064  ret <8 x i16> %t1
1065}
1066define <8 x i16> @test_128_i16_x_8_65024_mask_lshr_9(<8 x i16> %a0) {
1067; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_9:
1068; X86-SSE2:       # %bb.0:
1069; X86-SSE2-NEXT:    psrlw $9, %xmm0
1070; X86-SSE2-NEXT:    retl
1071;
1072; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_9:
1073; X86-AVX:       # %bb.0:
1074; X86-AVX-NEXT:    vpsrlw $9, %xmm0, %xmm0
1075; X86-AVX-NEXT:    retl
1076;
1077; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_9:
1078; X64-SSE2:       # %bb.0:
1079; X64-SSE2-NEXT:    psrlw $9, %xmm0
1080; X64-SSE2-NEXT:    retq
1081;
1082; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_9:
1083; X64-AVX:       # %bb.0:
1084; X64-AVX-NEXT:    vpsrlw $9, %xmm0, %xmm0
1085; X64-AVX-NEXT:    retq
1086  %t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
1087  %t1 = lshr <8 x i16> %t0, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
1088  ret <8 x i16> %t1
1089}
1090define <8 x i16> @test_128_i16_x_8_65024_mask_lshr_10(<8 x i16> %a0) {
1091; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_10:
1092; X86-SSE2:       # %bb.0:
1093; X86-SSE2-NEXT:    psrlw $10, %xmm0
1094; X86-SSE2-NEXT:    retl
1095;
1096; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_10:
1097; X86-AVX:       # %bb.0:
1098; X86-AVX-NEXT:    vpsrlw $10, %xmm0, %xmm0
1099; X86-AVX-NEXT:    retl
1100;
1101; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_10:
1102; X64-SSE2:       # %bb.0:
1103; X64-SSE2-NEXT:    psrlw $10, %xmm0
1104; X64-SSE2-NEXT:    retq
1105;
1106; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_10:
1107; X64-AVX:       # %bb.0:
1108; X64-AVX-NEXT:    vpsrlw $10, %xmm0, %xmm0
1109; X64-AVX-NEXT:    retq
1110  %t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
1111  %t1 = lshr <8 x i16> %t0, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
1112  ret <8 x i16> %t1
1113}
1114
1115; ashr
1116
1117define <8 x i16> @test_128_i16_x_8_127_mask_ashr_1(<8 x i16> %a0) {
1118; X86-SSE2-LABEL: test_128_i16_x_8_127_mask_ashr_1:
1119; X86-SSE2:       # %bb.0:
1120; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1121; X86-SSE2-NEXT:    psrlw $1, %xmm0
1122; X86-SSE2-NEXT:    retl
1123;
1124; X86-AVX-LABEL: test_128_i16_x_8_127_mask_ashr_1:
1125; X86-AVX:       # %bb.0:
1126; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1127; X86-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
1128; X86-AVX-NEXT:    retl
1129;
1130; X64-SSE2-LABEL: test_128_i16_x_8_127_mask_ashr_1:
1131; X64-SSE2:       # %bb.0:
1132; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1133; X64-SSE2-NEXT:    psrlw $1, %xmm0
1134; X64-SSE2-NEXT:    retq
1135;
1136; X64-AVX-LABEL: test_128_i16_x_8_127_mask_ashr_1:
1137; X64-AVX:       # %bb.0:
1138; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1139; X64-AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
1140; X64-AVX-NEXT:    retq
1141  %t0 = and <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
1142  %t1 = ashr <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1143  ret <8 x i16> %t1
1144}
1145
1146define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_3(<8 x i16> %a0) {
1147; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_3:
1148; X86-SSE2:       # %bb.0:
1149; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1150; X86-SSE2-NEXT:    psrlw $3, %xmm0
1151; X86-SSE2-NEXT:    retl
1152;
1153; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_3:
1154; X86-AVX:       # %bb.0:
1155; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1156; X86-AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
1157; X86-AVX-NEXT:    retl
1158;
1159; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_3:
1160; X64-SSE2:       # %bb.0:
1161; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1162; X64-SSE2-NEXT:    psrlw $3, %xmm0
1163; X64-SSE2-NEXT:    retq
1164;
1165; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_3:
1166; X64-AVX:       # %bb.0:
1167; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1168; X64-AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
1169; X64-AVX-NEXT:    retq
1170  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
1171  %t1 = ashr <8 x i16> %t0, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
1172  ret <8 x i16> %t1
1173}
1174define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_4(<8 x i16> %a0) {
1175; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_4:
1176; X86-SSE2:       # %bb.0:
1177; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1178; X86-SSE2-NEXT:    psrlw $4, %xmm0
1179; X86-SSE2-NEXT:    retl
1180;
1181; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_4:
1182; X86-AVX:       # %bb.0:
1183; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1184; X86-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
1185; X86-AVX-NEXT:    retl
1186;
1187; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_4:
1188; X64-SSE2:       # %bb.0:
1189; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1190; X64-SSE2-NEXT:    psrlw $4, %xmm0
1191; X64-SSE2-NEXT:    retq
1192;
1193; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_4:
1194; X64-AVX:       # %bb.0:
1195; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1196; X64-AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
1197; X64-AVX-NEXT:    retq
1198  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
1199  %t1 = ashr <8 x i16> %t0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
1200  ret <8 x i16> %t1
1201}
1202define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_5(<8 x i16> %a0) {
1203; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_5:
1204; X86-SSE2:       # %bb.0:
1205; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1206; X86-SSE2-NEXT:    psrlw $5, %xmm0
1207; X86-SSE2-NEXT:    retl
1208;
1209; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_5:
1210; X86-AVX:       # %bb.0:
1211; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1212; X86-AVX-NEXT:    vpsrlw $5, %xmm0, %xmm0
1213; X86-AVX-NEXT:    retl
1214;
1215; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_5:
1216; X64-SSE2:       # %bb.0:
1217; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1218; X64-SSE2-NEXT:    psrlw $5, %xmm0
1219; X64-SSE2-NEXT:    retq
1220;
1221; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_5:
1222; X64-AVX:       # %bb.0:
1223; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1224; X64-AVX-NEXT:    vpsrlw $5, %xmm0, %xmm0
1225; X64-AVX-NEXT:    retq
1226  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
1227  %t1 = ashr <8 x i16> %t0, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
1228  ret <8 x i16> %t1
1229}
1230define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_6(<8 x i16> %a0) {
1231; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_6:
1232; X86-SSE2:       # %bb.0:
1233; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1234; X86-SSE2-NEXT:    psrlw $6, %xmm0
1235; X86-SSE2-NEXT:    retl
1236;
1237; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_6:
1238; X86-AVX:       # %bb.0:
1239; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1240; X86-AVX-NEXT:    vpsrlw $6, %xmm0, %xmm0
1241; X86-AVX-NEXT:    retl
1242;
1243; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_6:
1244; X64-SSE2:       # %bb.0:
1245; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1246; X64-SSE2-NEXT:    psrlw $6, %xmm0
1247; X64-SSE2-NEXT:    retq
1248;
1249; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_6:
1250; X64-AVX:       # %bb.0:
1251; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1252; X64-AVX-NEXT:    vpsrlw $6, %xmm0, %xmm0
1253; X64-AVX-NEXT:    retq
1254  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
1255  %t1 = ashr <8 x i16> %t0, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
1256  ret <8 x i16> %t1
1257}
1258
1259define <8 x i16> @test_128_i16_x_8_65024_mask_ashr_1(<8 x i16> %a0) {
1260; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_1:
1261; X86-SSE2:       # %bb.0:
1262; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1263; X86-SSE2-NEXT:    psraw $1, %xmm0
1264; X86-SSE2-NEXT:    retl
1265;
1266; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_1:
1267; X86-AVX:       # %bb.0:
1268; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1269; X86-AVX-NEXT:    vpsraw $1, %xmm0, %xmm0
1270; X86-AVX-NEXT:    retl
1271;
1272; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_1:
1273; X64-SSE2:       # %bb.0:
1274; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1275; X64-SSE2-NEXT:    psraw $1, %xmm0
1276; X64-SSE2-NEXT:    retq
1277;
1278; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_1:
1279; X64-AVX:       # %bb.0:
1280; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1281; X64-AVX-NEXT:    vpsraw $1, %xmm0, %xmm0
1282; X64-AVX-NEXT:    retq
1283  %t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
1284  %t1 = ashr <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1285  ret <8 x i16> %t1
1286}
1287define <8 x i16> @test_128_i16_x_8_65024_mask_ashr_8(<8 x i16> %a0) {
1288; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_8:
1289; X86-SSE2:       # %bb.0:
1290; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1291; X86-SSE2-NEXT:    psraw $8, %xmm0
1292; X86-SSE2-NEXT:    retl
1293;
1294; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_8:
1295; X86-AVX:       # %bb.0:
1296; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1297; X86-AVX-NEXT:    vpsraw $8, %xmm0, %xmm0
1298; X86-AVX-NEXT:    retl
1299;
1300; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_8:
1301; X64-SSE2:       # %bb.0:
1302; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1303; X64-SSE2-NEXT:    psraw $8, %xmm0
1304; X64-SSE2-NEXT:    retq
1305;
1306; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_8:
1307; X64-AVX:       # %bb.0:
1308; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1309; X64-AVX-NEXT:    vpsraw $8, %xmm0, %xmm0
1310; X64-AVX-NEXT:    retq
1311  %t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
1312  %t1 = ashr <8 x i16> %t0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1313  ret <8 x i16> %t1
1314}
1315define <8 x i16> @test_128_i16_x_8_65024_mask_ashr_9(<8 x i16> %a0) {
1316; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_9:
1317; X86-SSE2:       # %bb.0:
1318; X86-SSE2-NEXT:    psraw $9, %xmm0
1319; X86-SSE2-NEXT:    retl
1320;
1321; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_9:
1322; X86-AVX:       # %bb.0:
1323; X86-AVX-NEXT:    vpsraw $9, %xmm0, %xmm0
1324; X86-AVX-NEXT:    retl
1325;
1326; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_9:
1327; X64-SSE2:       # %bb.0:
1328; X64-SSE2-NEXT:    psraw $9, %xmm0
1329; X64-SSE2-NEXT:    retq
1330;
1331; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_9:
1332; X64-AVX:       # %bb.0:
1333; X64-AVX-NEXT:    vpsraw $9, %xmm0, %xmm0
1334; X64-AVX-NEXT:    retq
1335  %t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
1336  %t1 = ashr <8 x i16> %t0, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
1337  ret <8 x i16> %t1
1338}
1339define <8 x i16> @test_128_i16_x_8_65024_mask_ashr_10(<8 x i16> %a0) {
1340; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_10:
1341; X86-SSE2:       # %bb.0:
1342; X86-SSE2-NEXT:    psraw $10, %xmm0
1343; X86-SSE2-NEXT:    retl
1344;
1345; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_10:
1346; X86-AVX:       # %bb.0:
1347; X86-AVX-NEXT:    vpsraw $10, %xmm0, %xmm0
1348; X86-AVX-NEXT:    retl
1349;
1350; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_10:
1351; X64-SSE2:       # %bb.0:
1352; X64-SSE2-NEXT:    psraw $10, %xmm0
1353; X64-SSE2-NEXT:    retq
1354;
1355; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_10:
1356; X64-AVX:       # %bb.0:
1357; X64-AVX-NEXT:    vpsraw $10, %xmm0, %xmm0
1358; X64-AVX-NEXT:    retq
1359  %t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
1360  %t1 = ashr <8 x i16> %t0, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
1361  ret <8 x i16> %t1
1362}
1363
1364; shl
1365
1366define <8 x i16> @test_128_i16_x_8_127_mask_shl_1(<8 x i16> %a0) {
1367; X86-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_1:
1368; X86-SSE2:       # %bb.0:
1369; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1370; X86-SSE2-NEXT:    paddw %xmm0, %xmm0
1371; X86-SSE2-NEXT:    retl
1372;
1373; X86-AVX-LABEL: test_128_i16_x_8_127_mask_shl_1:
1374; X86-AVX:       # %bb.0:
1375; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1376; X86-AVX-NEXT:    vpaddw %xmm0, %xmm0, %xmm0
1377; X86-AVX-NEXT:    retl
1378;
1379; X64-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_1:
1380; X64-SSE2:       # %bb.0:
1381; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1382; X64-SSE2-NEXT:    paddw %xmm0, %xmm0
1383; X64-SSE2-NEXT:    retq
1384;
1385; X64-AVX-LABEL: test_128_i16_x_8_127_mask_shl_1:
1386; X64-AVX:       # %bb.0:
1387; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1388; X64-AVX-NEXT:    vpaddw %xmm0, %xmm0, %xmm0
1389; X64-AVX-NEXT:    retq
1390  %t0 = and <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
1391  %t1 = shl <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1392  ret <8 x i16> %t1
1393}
1394define <8 x i16> @test_128_i16_x_8_127_mask_shl_8(<8 x i16> %a0) {
1395; X86-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_8:
1396; X86-SSE2:       # %bb.0:
1397; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1398; X86-SSE2-NEXT:    psllw $8, %xmm0
1399; X86-SSE2-NEXT:    retl
1400;
1401; X86-AVX-LABEL: test_128_i16_x_8_127_mask_shl_8:
1402; X86-AVX:       # %bb.0:
1403; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1404; X86-AVX-NEXT:    vpsllw $8, %xmm0, %xmm0
1405; X86-AVX-NEXT:    retl
1406;
1407; X64-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_8:
1408; X64-SSE2:       # %bb.0:
1409; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1410; X64-SSE2-NEXT:    psllw $8, %xmm0
1411; X64-SSE2-NEXT:    retq
1412;
1413; X64-AVX-LABEL: test_128_i16_x_8_127_mask_shl_8:
1414; X64-AVX:       # %bb.0:
1415; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1416; X64-AVX-NEXT:    vpsllw $8, %xmm0, %xmm0
1417; X64-AVX-NEXT:    retq
1418  %t0 = and <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
1419  %t1 = shl <8 x i16> %t0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1420  ret <8 x i16> %t1
1421}
1422define <8 x i16> @test_128_i16_x_8_127_mask_shl_9(<8 x i16> %a0) {
1423; X86-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_9:
1424; X86-SSE2:       # %bb.0:
1425; X86-SSE2-NEXT:    psllw $9, %xmm0
1426; X86-SSE2-NEXT:    retl
1427;
1428; X86-AVX-LABEL: test_128_i16_x_8_127_mask_shl_9:
1429; X86-AVX:       # %bb.0:
1430; X86-AVX-NEXT:    vpsllw $9, %xmm0, %xmm0
1431; X86-AVX-NEXT:    retl
1432;
1433; X64-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_9:
1434; X64-SSE2:       # %bb.0:
1435; X64-SSE2-NEXT:    psllw $9, %xmm0
1436; X64-SSE2-NEXT:    retq
1437;
1438; X64-AVX-LABEL: test_128_i16_x_8_127_mask_shl_9:
1439; X64-AVX:       # %bb.0:
1440; X64-AVX-NEXT:    vpsllw $9, %xmm0, %xmm0
1441; X64-AVX-NEXT:    retq
1442  %t0 = and <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
1443  %t1 = shl <8 x i16> %t0, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
1444  ret <8 x i16> %t1
1445}
1446define <8 x i16> @test_128_i16_x_8_127_mask_shl_10(<8 x i16> %a0) {
1447; X86-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_10:
1448; X86-SSE2:       # %bb.0:
1449; X86-SSE2-NEXT:    psllw $10, %xmm0
1450; X86-SSE2-NEXT:    retl
1451;
1452; X86-AVX-LABEL: test_128_i16_x_8_127_mask_shl_10:
1453; X86-AVX:       # %bb.0:
1454; X86-AVX-NEXT:    vpsllw $10, %xmm0, %xmm0
1455; X86-AVX-NEXT:    retl
1456;
1457; X64-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_10:
1458; X64-SSE2:       # %bb.0:
1459; X64-SSE2-NEXT:    psllw $10, %xmm0
1460; X64-SSE2-NEXT:    retq
1461;
1462; X64-AVX-LABEL: test_128_i16_x_8_127_mask_shl_10:
1463; X64-AVX:       # %bb.0:
1464; X64-AVX-NEXT:    vpsllw $10, %xmm0, %xmm0
1465; X64-AVX-NEXT:    retq
1466  %t0 = and <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
1467  %t1 = shl <8 x i16> %t0, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
1468  ret <8 x i16> %t1
1469}
1470
1471define <8 x i16> @test_128_i16_x_8_2032_mask_shl_3(<8 x i16> %a0) {
1472; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_3:
1473; X86-SSE2:       # %bb.0:
1474; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1475; X86-SSE2-NEXT:    psllw $3, %xmm0
1476; X86-SSE2-NEXT:    retl
1477;
1478; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_3:
1479; X86-AVX:       # %bb.0:
1480; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1481; X86-AVX-NEXT:    vpsllw $3, %xmm0, %xmm0
1482; X86-AVX-NEXT:    retl
1483;
1484; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_3:
1485; X64-SSE2:       # %bb.0:
1486; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1487; X64-SSE2-NEXT:    psllw $3, %xmm0
1488; X64-SSE2-NEXT:    retq
1489;
1490; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_3:
1491; X64-AVX:       # %bb.0:
1492; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1493; X64-AVX-NEXT:    vpsllw $3, %xmm0, %xmm0
1494; X64-AVX-NEXT:    retq
1495  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
1496  %t1 = shl <8 x i16> %t0, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
1497  ret <8 x i16> %t1
1498}
1499define <8 x i16> @test_128_i16_x_8_2032_mask_shl_4(<8 x i16> %a0) {
1500; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_4:
1501; X86-SSE2:       # %bb.0:
1502; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1503; X86-SSE2-NEXT:    psllw $4, %xmm0
1504; X86-SSE2-NEXT:    retl
1505;
1506; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_4:
1507; X86-AVX:       # %bb.0:
1508; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1509; X86-AVX-NEXT:    vpsllw $4, %xmm0, %xmm0
1510; X86-AVX-NEXT:    retl
1511;
1512; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_4:
1513; X64-SSE2:       # %bb.0:
1514; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1515; X64-SSE2-NEXT:    psllw $4, %xmm0
1516; X64-SSE2-NEXT:    retq
1517;
1518; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_4:
1519; X64-AVX:       # %bb.0:
1520; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1521; X64-AVX-NEXT:    vpsllw $4, %xmm0, %xmm0
1522; X64-AVX-NEXT:    retq
1523  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
1524  %t1 = shl <8 x i16> %t0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
1525  ret <8 x i16> %t1
1526}
1527define <8 x i16> @test_128_i16_x_8_2032_mask_shl_5(<8 x i16> %a0) {
1528; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_5:
1529; X86-SSE2:       # %bb.0:
1530; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1531; X86-SSE2-NEXT:    psllw $5, %xmm0
1532; X86-SSE2-NEXT:    retl
1533;
1534; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_5:
1535; X86-AVX:       # %bb.0:
1536; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1537; X86-AVX-NEXT:    vpsllw $5, %xmm0, %xmm0
1538; X86-AVX-NEXT:    retl
1539;
1540; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_5:
1541; X64-SSE2:       # %bb.0:
1542; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1543; X64-SSE2-NEXT:    psllw $5, %xmm0
1544; X64-SSE2-NEXT:    retq
1545;
1546; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_5:
1547; X64-AVX:       # %bb.0:
1548; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1549; X64-AVX-NEXT:    vpsllw $5, %xmm0, %xmm0
1550; X64-AVX-NEXT:    retq
1551  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
1552  %t1 = shl <8 x i16> %t0, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
1553  ret <8 x i16> %t1
1554}
1555define <8 x i16> @test_128_i16_x_8_2032_mask_shl_6(<8 x i16> %a0) {
1556; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_6:
1557; X86-SSE2:       # %bb.0:
1558; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1559; X86-SSE2-NEXT:    psllw $6, %xmm0
1560; X86-SSE2-NEXT:    retl
1561;
1562; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_6:
1563; X86-AVX:       # %bb.0:
1564; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1565; X86-AVX-NEXT:    vpsllw $6, %xmm0, %xmm0
1566; X86-AVX-NEXT:    retl
1567;
1568; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_6:
1569; X64-SSE2:       # %bb.0:
1570; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1571; X64-SSE2-NEXT:    psllw $6, %xmm0
1572; X64-SSE2-NEXT:    retq
1573;
1574; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_6:
1575; X64-AVX:       # %bb.0:
1576; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1577; X64-AVX-NEXT:    vpsllw $6, %xmm0, %xmm0
1578; X64-AVX-NEXT:    retq
1579  %t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
1580  %t1 = shl <8 x i16> %t0, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
1581  ret <8 x i16> %t1
1582}
1583
1584define <8 x i16> @test_128_i16_x_8_65024_mask_shl_1(<8 x i16> %a0) {
1585; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_shl_1:
1586; X86-SSE2:       # %bb.0:
1587; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1588; X86-SSE2-NEXT:    paddw %xmm0, %xmm0
1589; X86-SSE2-NEXT:    retl
1590;
1591; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_shl_1:
1592; X86-AVX:       # %bb.0:
1593; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1594; X86-AVX-NEXT:    vpaddw %xmm0, %xmm0, %xmm0
1595; X86-AVX-NEXT:    retl
1596;
1597; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_shl_1:
1598; X64-SSE2:       # %bb.0:
1599; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1600; X64-SSE2-NEXT:    paddw %xmm0, %xmm0
1601; X64-SSE2-NEXT:    retq
1602;
1603; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_shl_1:
1604; X64-AVX:       # %bb.0:
1605; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1606; X64-AVX-NEXT:    vpaddw %xmm0, %xmm0, %xmm0
1607; X64-AVX-NEXT:    retq
1608  %t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
1609  %t1 = shl <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1610  ret <8 x i16> %t1
1611}
1612
1613;------------------------------------------------------------------------------;
1614; 128-bit vector; 32-bit elements = 4 elements
1615;------------------------------------------------------------------------------;
1616
1617; lshr
1618
1619define <4 x i32> @test_128_i32_x_4_32767_mask_lshr_1(<4 x i32> %a0) {
1620; X86-SSE2-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
1621; X86-SSE2:       # %bb.0:
1622; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1623; X86-SSE2-NEXT:    psrld $1, %xmm0
1624; X86-SSE2-NEXT:    retl
1625;
1626; X86-AVX1-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
1627; X86-AVX1:       # %bb.0:
1628; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1629; X86-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
1630; X86-AVX1-NEXT:    retl
1631;
1632; X86-AVX2-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
1633; X86-AVX2:       # %bb.0:
1634; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
1635; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1636; X86-AVX2-NEXT:    vpsrld $1, %xmm0, %xmm0
1637; X86-AVX2-NEXT:    retl
1638;
1639; X64-SSE2-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
1640; X64-SSE2:       # %bb.0:
1641; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1642; X64-SSE2-NEXT:    psrld $1, %xmm0
1643; X64-SSE2-NEXT:    retq
1644;
1645; X64-AVX1-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
1646; X64-AVX1:       # %bb.0:
1647; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1648; X64-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
1649; X64-AVX1-NEXT:    retq
1650;
1651; X64-AVX2-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
1652; X64-AVX2:       # %bb.0:
1653; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
1654; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1655; X64-AVX2-NEXT:    vpsrld $1, %xmm0, %xmm0
1656; X64-AVX2-NEXT:    retq
1657  %t0 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
1658  %t1 = lshr <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
1659  ret <4 x i32> %t1
1660}
1661
1662define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_7(<4 x i32> %a0) {
1663; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
1664; X86-SSE2:       # %bb.0:
1665; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1666; X86-SSE2-NEXT:    psrld $7, %xmm0
1667; X86-SSE2-NEXT:    retl
1668;
1669; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
1670; X86-AVX1:       # %bb.0:
1671; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1672; X86-AVX1-NEXT:    vpsrld $7, %xmm0, %xmm0
1673; X86-AVX1-NEXT:    retl
1674;
1675; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
1676; X86-AVX2:       # %bb.0:
1677; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
1678; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1679; X86-AVX2-NEXT:    vpsrld $7, %xmm0, %xmm0
1680; X86-AVX2-NEXT:    retl
1681;
1682; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
1683; X64-SSE2:       # %bb.0:
1684; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1685; X64-SSE2-NEXT:    psrld $7, %xmm0
1686; X64-SSE2-NEXT:    retq
1687;
1688; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
1689; X64-AVX1:       # %bb.0:
1690; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1691; X64-AVX1-NEXT:    vpsrld $7, %xmm0, %xmm0
1692; X64-AVX1-NEXT:    retq
1693;
1694; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
1695; X64-AVX2:       # %bb.0:
1696; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
1697; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1698; X64-AVX2-NEXT:    vpsrld $7, %xmm0, %xmm0
1699; X64-AVX2-NEXT:    retq
1700  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
1701  %t1 = lshr <4 x i32> %t0, <i32 7, i32 7, i32 7, i32 7>
1702  ret <4 x i32> %t1
1703}
1704define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_8(<4 x i32> %a0) {
1705; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
1706; X86-SSE2:       # %bb.0:
1707; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1708; X86-SSE2-NEXT:    psrld $8, %xmm0
1709; X86-SSE2-NEXT:    retl
1710;
1711; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
1712; X86-AVX1:       # %bb.0:
1713; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1714; X86-AVX1-NEXT:    vpsrld $8, %xmm0, %xmm0
1715; X86-AVX1-NEXT:    retl
1716;
1717; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
1718; X86-AVX2:       # %bb.0:
1719; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
1720; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1721; X86-AVX2-NEXT:    vpsrld $8, %xmm0, %xmm0
1722; X86-AVX2-NEXT:    retl
1723;
1724; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
1725; X64-SSE2:       # %bb.0:
1726; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1727; X64-SSE2-NEXT:    psrld $8, %xmm0
1728; X64-SSE2-NEXT:    retq
1729;
1730; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
1731; X64-AVX1:       # %bb.0:
1732; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1733; X64-AVX1-NEXT:    vpsrld $8, %xmm0, %xmm0
1734; X64-AVX1-NEXT:    retq
1735;
1736; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
1737; X64-AVX2:       # %bb.0:
1738; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
1739; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1740; X64-AVX2-NEXT:    vpsrld $8, %xmm0, %xmm0
1741; X64-AVX2-NEXT:    retq
1742  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
1743  %t1 = lshr <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
1744  ret <4 x i32> %t1
1745}
1746define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_9(<4 x i32> %a0) {
1747; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
1748; X86-SSE2:       # %bb.0:
1749; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1750; X86-SSE2-NEXT:    psrld $9, %xmm0
1751; X86-SSE2-NEXT:    retl
1752;
1753; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
1754; X86-AVX1:       # %bb.0:
1755; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1756; X86-AVX1-NEXT:    vpsrld $9, %xmm0, %xmm0
1757; X86-AVX1-NEXT:    retl
1758;
1759; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
1760; X86-AVX2:       # %bb.0:
1761; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
1762; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1763; X86-AVX2-NEXT:    vpsrld $9, %xmm0, %xmm0
1764; X86-AVX2-NEXT:    retl
1765;
1766; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
1767; X64-SSE2:       # %bb.0:
1768; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1769; X64-SSE2-NEXT:    psrld $9, %xmm0
1770; X64-SSE2-NEXT:    retq
1771;
1772; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
1773; X64-AVX1:       # %bb.0:
1774; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1775; X64-AVX1-NEXT:    vpsrld $9, %xmm0, %xmm0
1776; X64-AVX1-NEXT:    retq
1777;
1778; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
1779; X64-AVX2:       # %bb.0:
1780; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
1781; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1782; X64-AVX2-NEXT:    vpsrld $9, %xmm0, %xmm0
1783; X64-AVX2-NEXT:    retq
1784  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
1785  %t1 = lshr <4 x i32> %t0, <i32 9, i32 9, i32 9, i32 9>
1786  ret <4 x i32> %t1
1787}
1788define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_10(<4 x i32> %a0) {
1789; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
1790; X86-SSE2:       # %bb.0:
1791; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1792; X86-SSE2-NEXT:    psrld $10, %xmm0
1793; X86-SSE2-NEXT:    retl
1794;
1795; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
1796; X86-AVX1:       # %bb.0:
1797; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1798; X86-AVX1-NEXT:    vpsrld $10, %xmm0, %xmm0
1799; X86-AVX1-NEXT:    retl
1800;
1801; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
1802; X86-AVX2:       # %bb.0:
1803; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
1804; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1805; X86-AVX2-NEXT:    vpsrld $10, %xmm0, %xmm0
1806; X86-AVX2-NEXT:    retl
1807;
1808; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
1809; X64-SSE2:       # %bb.0:
1810; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1811; X64-SSE2-NEXT:    psrld $10, %xmm0
1812; X64-SSE2-NEXT:    retq
1813;
1814; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
1815; X64-AVX1:       # %bb.0:
1816; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1817; X64-AVX1-NEXT:    vpsrld $10, %xmm0, %xmm0
1818; X64-AVX1-NEXT:    retq
1819;
1820; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
1821; X64-AVX2:       # %bb.0:
1822; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
1823; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1824; X64-AVX2-NEXT:    vpsrld $10, %xmm0, %xmm0
1825; X64-AVX2-NEXT:    retq
1826  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
1827  %t1 = lshr <4 x i32> %t0, <i32 10, i32 10, i32 10, i32 10>
1828  ret <4 x i32> %t1
1829}
1830
1831define <4 x i32> @test_128_i32_x_4_4294836224_mask_lshr_1(<4 x i32> %a0) {
1832; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
1833; X86-SSE2:       # %bb.0:
1834; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1835; X86-SSE2-NEXT:    psrld $1, %xmm0
1836; X86-SSE2-NEXT:    retl
1837;
1838; X86-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
1839; X86-AVX1:       # %bb.0:
1840; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1841; X86-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
1842; X86-AVX1-NEXT:    retl
1843;
1844; X86-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
1845; X86-AVX2:       # %bb.0:
1846; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
1847; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1848; X86-AVX2-NEXT:    vpsrld $1, %xmm0, %xmm0
1849; X86-AVX2-NEXT:    retl
1850;
1851; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
1852; X64-SSE2:       # %bb.0:
1853; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1854; X64-SSE2-NEXT:    psrld $1, %xmm0
1855; X64-SSE2-NEXT:    retq
1856;
1857; X64-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
1858; X64-AVX1:       # %bb.0:
1859; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1860; X64-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
1861; X64-AVX1-NEXT:    retq
1862;
1863; X64-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
1864; X64-AVX2:       # %bb.0:
1865; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
1866; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1867; X64-AVX2-NEXT:    vpsrld $1, %xmm0, %xmm0
1868; X64-AVX2-NEXT:    retq
1869  %t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
1870  %t1 = lshr <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
1871  ret <4 x i32> %t1
1872}
1873define <4 x i32> @test_128_i32_x_4_4294836224_mask_lshr_16(<4 x i32> %a0) {
1874; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
1875; X86-SSE2:       # %bb.0:
1876; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1877; X86-SSE2-NEXT:    psrld $16, %xmm0
1878; X86-SSE2-NEXT:    retl
1879;
1880; X86-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
1881; X86-AVX1:       # %bb.0:
1882; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1883; X86-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
1884; X86-AVX1-NEXT:    retl
1885;
1886; X86-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
1887; X86-AVX2:       # %bb.0:
1888; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
1889; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1890; X86-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
1891; X86-AVX2-NEXT:    retl
1892;
1893; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
1894; X64-SSE2:       # %bb.0:
1895; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1896; X64-SSE2-NEXT:    psrld $16, %xmm0
1897; X64-SSE2-NEXT:    retq
1898;
1899; X64-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
1900; X64-AVX1:       # %bb.0:
1901; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1902; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
1903; X64-AVX1-NEXT:    retq
1904;
1905; X64-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
1906; X64-AVX2:       # %bb.0:
1907; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
1908; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1909; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
1910; X64-AVX2-NEXT:    retq
1911  %t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
1912  %t1 = lshr <4 x i32> %t0, <i32 16, i32 16, i32 16, i32 16>
1913  ret <4 x i32> %t1
1914}
1915define <4 x i32> @test_128_i32_x_4_4294836224_mask_lshr_17(<4 x i32> %a0) {
1916; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_17:
1917; X86-SSE2:       # %bb.0:
1918; X86-SSE2-NEXT:    psrld $17, %xmm0
1919; X86-SSE2-NEXT:    retl
1920;
1921; X86-AVX-LABEL: test_128_i32_x_4_4294836224_mask_lshr_17:
1922; X86-AVX:       # %bb.0:
1923; X86-AVX-NEXT:    vpsrld $17, %xmm0, %xmm0
1924; X86-AVX-NEXT:    retl
1925;
1926; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_17:
1927; X64-SSE2:       # %bb.0:
1928; X64-SSE2-NEXT:    psrld $17, %xmm0
1929; X64-SSE2-NEXT:    retq
1930;
1931; X64-AVX-LABEL: test_128_i32_x_4_4294836224_mask_lshr_17:
1932; X64-AVX:       # %bb.0:
1933; X64-AVX-NEXT:    vpsrld $17, %xmm0, %xmm0
1934; X64-AVX-NEXT:    retq
1935  %t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
1936  %t1 = lshr <4 x i32> %t0, <i32 17, i32 17, i32 17, i32 17>
1937  ret <4 x i32> %t1
1938}
1939define <4 x i32> @test_128_i32_x_4_4294836224_mask_lshr_18(<4 x i32> %a0) {
1940; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_18:
1941; X86-SSE2:       # %bb.0:
1942; X86-SSE2-NEXT:    psrld $18, %xmm0
1943; X86-SSE2-NEXT:    retl
1944;
1945; X86-AVX-LABEL: test_128_i32_x_4_4294836224_mask_lshr_18:
1946; X86-AVX:       # %bb.0:
1947; X86-AVX-NEXT:    vpsrld $18, %xmm0, %xmm0
1948; X86-AVX-NEXT:    retl
1949;
1950; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_18:
1951; X64-SSE2:       # %bb.0:
1952; X64-SSE2-NEXT:    psrld $18, %xmm0
1953; X64-SSE2-NEXT:    retq
1954;
1955; X64-AVX-LABEL: test_128_i32_x_4_4294836224_mask_lshr_18:
1956; X64-AVX:       # %bb.0:
1957; X64-AVX-NEXT:    vpsrld $18, %xmm0, %xmm0
1958; X64-AVX-NEXT:    retq
1959  %t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
1960  %t1 = lshr <4 x i32> %t0, <i32 18, i32 18, i32 18, i32 18>
1961  ret <4 x i32> %t1
1962}
1963
1964; ashr
1965
1966define <4 x i32> @test_128_i32_x_4_32767_mask_ashr_1(<4 x i32> %a0) {
1967; X86-SSE2-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
1968; X86-SSE2:       # %bb.0:
1969; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
1970; X86-SSE2-NEXT:    psrld $1, %xmm0
1971; X86-SSE2-NEXT:    retl
1972;
1973; X86-AVX1-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
1974; X86-AVX1:       # %bb.0:
1975; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
1976; X86-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
1977; X86-AVX1-NEXT:    retl
1978;
1979; X86-AVX2-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
1980; X86-AVX2:       # %bb.0:
1981; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
1982; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1983; X86-AVX2-NEXT:    vpsrld $1, %xmm0, %xmm0
1984; X86-AVX2-NEXT:    retl
1985;
1986; X64-SSE2-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
1987; X64-SSE2:       # %bb.0:
1988; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1989; X64-SSE2-NEXT:    psrld $1, %xmm0
1990; X64-SSE2-NEXT:    retq
1991;
1992; X64-AVX1-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
1993; X64-AVX1:       # %bb.0:
1994; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1995; X64-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
1996; X64-AVX1-NEXT:    retq
1997;
1998; X64-AVX2-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
1999; X64-AVX2:       # %bb.0:
2000; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
2001; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2002; X64-AVX2-NEXT:    vpsrld $1, %xmm0, %xmm0
2003; X64-AVX2-NEXT:    retq
2004  %t0 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
2005  %t1 = ashr <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
2006  ret <4 x i32> %t1
2007}
2008
2009define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_7(<4 x i32> %a0) {
2010; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
2011; X86-SSE2:       # %bb.0:
2012; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2013; X86-SSE2-NEXT:    psrld $7, %xmm0
2014; X86-SSE2-NEXT:    retl
2015;
2016; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
2017; X86-AVX1:       # %bb.0:
2018; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2019; X86-AVX1-NEXT:    vpsrld $7, %xmm0, %xmm0
2020; X86-AVX1-NEXT:    retl
2021;
2022; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
2023; X86-AVX2:       # %bb.0:
2024; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2025; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2026; X86-AVX2-NEXT:    vpsrld $7, %xmm0, %xmm0
2027; X86-AVX2-NEXT:    retl
2028;
2029; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
2030; X64-SSE2:       # %bb.0:
2031; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2032; X64-SSE2-NEXT:    psrld $7, %xmm0
2033; X64-SSE2-NEXT:    retq
2034;
2035; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
2036; X64-AVX1:       # %bb.0:
2037; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2038; X64-AVX1-NEXT:    vpsrld $7, %xmm0, %xmm0
2039; X64-AVX1-NEXT:    retq
2040;
2041; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
2042; X64-AVX2:       # %bb.0:
2043; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2044; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2045; X64-AVX2-NEXT:    vpsrld $7, %xmm0, %xmm0
2046; X64-AVX2-NEXT:    retq
2047  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
2048  %t1 = ashr <4 x i32> %t0, <i32 7, i32 7, i32 7, i32 7>
2049  ret <4 x i32> %t1
2050}
2051define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_8(<4 x i32> %a0) {
2052; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
2053; X86-SSE2:       # %bb.0:
2054; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2055; X86-SSE2-NEXT:    psrld $8, %xmm0
2056; X86-SSE2-NEXT:    retl
2057;
2058; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
2059; X86-AVX1:       # %bb.0:
2060; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2061; X86-AVX1-NEXT:    vpsrld $8, %xmm0, %xmm0
2062; X86-AVX1-NEXT:    retl
2063;
2064; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
2065; X86-AVX2:       # %bb.0:
2066; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2067; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2068; X86-AVX2-NEXT:    vpsrld $8, %xmm0, %xmm0
2069; X86-AVX2-NEXT:    retl
2070;
2071; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
2072; X64-SSE2:       # %bb.0:
2073; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2074; X64-SSE2-NEXT:    psrld $8, %xmm0
2075; X64-SSE2-NEXT:    retq
2076;
2077; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
2078; X64-AVX1:       # %bb.0:
2079; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2080; X64-AVX1-NEXT:    vpsrld $8, %xmm0, %xmm0
2081; X64-AVX1-NEXT:    retq
2082;
2083; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
2084; X64-AVX2:       # %bb.0:
2085; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2086; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2087; X64-AVX2-NEXT:    vpsrld $8, %xmm0, %xmm0
2088; X64-AVX2-NEXT:    retq
2089  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
2090  %t1 = ashr <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
2091  ret <4 x i32> %t1
2092}
2093define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_9(<4 x i32> %a0) {
2094; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
2095; X86-SSE2:       # %bb.0:
2096; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2097; X86-SSE2-NEXT:    psrld $9, %xmm0
2098; X86-SSE2-NEXT:    retl
2099;
2100; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
2101; X86-AVX1:       # %bb.0:
2102; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2103; X86-AVX1-NEXT:    vpsrld $9, %xmm0, %xmm0
2104; X86-AVX1-NEXT:    retl
2105;
2106; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
2107; X86-AVX2:       # %bb.0:
2108; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2109; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2110; X86-AVX2-NEXT:    vpsrld $9, %xmm0, %xmm0
2111; X86-AVX2-NEXT:    retl
2112;
2113; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
2114; X64-SSE2:       # %bb.0:
2115; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2116; X64-SSE2-NEXT:    psrld $9, %xmm0
2117; X64-SSE2-NEXT:    retq
2118;
2119; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
2120; X64-AVX1:       # %bb.0:
2121; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2122; X64-AVX1-NEXT:    vpsrld $9, %xmm0, %xmm0
2123; X64-AVX1-NEXT:    retq
2124;
2125; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
2126; X64-AVX2:       # %bb.0:
2127; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2128; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2129; X64-AVX2-NEXT:    vpsrld $9, %xmm0, %xmm0
2130; X64-AVX2-NEXT:    retq
2131  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
2132  %t1 = ashr <4 x i32> %t0, <i32 9, i32 9, i32 9, i32 9>
2133  ret <4 x i32> %t1
2134}
2135define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_10(<4 x i32> %a0) {
2136; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
2137; X86-SSE2:       # %bb.0:
2138; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2139; X86-SSE2-NEXT:    psrld $10, %xmm0
2140; X86-SSE2-NEXT:    retl
2141;
2142; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
2143; X86-AVX1:       # %bb.0:
2144; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2145; X86-AVX1-NEXT:    vpsrld $10, %xmm0, %xmm0
2146; X86-AVX1-NEXT:    retl
2147;
2148; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
2149; X86-AVX2:       # %bb.0:
2150; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2151; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2152; X86-AVX2-NEXT:    vpsrld $10, %xmm0, %xmm0
2153; X86-AVX2-NEXT:    retl
2154;
2155; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
2156; X64-SSE2:       # %bb.0:
2157; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2158; X64-SSE2-NEXT:    psrld $10, %xmm0
2159; X64-SSE2-NEXT:    retq
2160;
2161; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
2162; X64-AVX1:       # %bb.0:
2163; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2164; X64-AVX1-NEXT:    vpsrld $10, %xmm0, %xmm0
2165; X64-AVX1-NEXT:    retq
2166;
2167; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
2168; X64-AVX2:       # %bb.0:
2169; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2170; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2171; X64-AVX2-NEXT:    vpsrld $10, %xmm0, %xmm0
2172; X64-AVX2-NEXT:    retq
2173  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
2174  %t1 = ashr <4 x i32> %t0, <i32 10, i32 10, i32 10, i32 10>
2175  ret <4 x i32> %t1
2176}
2177
2178define <4 x i32> @test_128_i32_x_4_4294836224_mask_ashr_1(<4 x i32> %a0) {
2179; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
2180; X86-SSE2:       # %bb.0:
2181; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2182; X86-SSE2-NEXT:    psrad $1, %xmm0
2183; X86-SSE2-NEXT:    retl
2184;
2185; X86-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
2186; X86-AVX1:       # %bb.0:
2187; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2188; X86-AVX1-NEXT:    vpsrad $1, %xmm0, %xmm0
2189; X86-AVX1-NEXT:    retl
2190;
2191; X86-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
2192; X86-AVX2:       # %bb.0:
2193; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
2194; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2195; X86-AVX2-NEXT:    vpsrad $1, %xmm0, %xmm0
2196; X86-AVX2-NEXT:    retl
2197;
2198; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
2199; X64-SSE2:       # %bb.0:
2200; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2201; X64-SSE2-NEXT:    psrad $1, %xmm0
2202; X64-SSE2-NEXT:    retq
2203;
2204; X64-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
2205; X64-AVX1:       # %bb.0:
2206; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2207; X64-AVX1-NEXT:    vpsrad $1, %xmm0, %xmm0
2208; X64-AVX1-NEXT:    retq
2209;
2210; X64-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
2211; X64-AVX2:       # %bb.0:
2212; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
2213; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2214; X64-AVX2-NEXT:    vpsrad $1, %xmm0, %xmm0
2215; X64-AVX2-NEXT:    retq
2216  %t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
2217  %t1 = ashr <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
2218  ret <4 x i32> %t1
2219}
2220define <4 x i32> @test_128_i32_x_4_4294836224_mask_ashr_16(<4 x i32> %a0) {
2221; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
2222; X86-SSE2:       # %bb.0:
2223; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2224; X86-SSE2-NEXT:    psrad $16, %xmm0
2225; X86-SSE2-NEXT:    retl
2226;
2227; X86-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
2228; X86-AVX1:       # %bb.0:
2229; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2230; X86-AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
2231; X86-AVX1-NEXT:    retl
2232;
2233; X86-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
2234; X86-AVX2:       # %bb.0:
2235; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
2236; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2237; X86-AVX2-NEXT:    vpsrad $16, %xmm0, %xmm0
2238; X86-AVX2-NEXT:    retl
2239;
2240; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
2241; X64-SSE2:       # %bb.0:
2242; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2243; X64-SSE2-NEXT:    psrad $16, %xmm0
2244; X64-SSE2-NEXT:    retq
2245;
2246; X64-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
2247; X64-AVX1:       # %bb.0:
2248; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2249; X64-AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
2250; X64-AVX1-NEXT:    retq
2251;
2252; X64-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
2253; X64-AVX2:       # %bb.0:
2254; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
2255; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2256; X64-AVX2-NEXT:    vpsrad $16, %xmm0, %xmm0
2257; X64-AVX2-NEXT:    retq
2258  %t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
2259  %t1 = ashr <4 x i32> %t0, <i32 16, i32 16, i32 16, i32 16>
2260  ret <4 x i32> %t1
2261}
2262define <4 x i32> @test_128_i32_x_4_4294836224_mask_ashr_17(<4 x i32> %a0) {
2263; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_17:
2264; X86-SSE2:       # %bb.0:
2265; X86-SSE2-NEXT:    psrad $17, %xmm0
2266; X86-SSE2-NEXT:    retl
2267;
2268; X86-AVX-LABEL: test_128_i32_x_4_4294836224_mask_ashr_17:
2269; X86-AVX:       # %bb.0:
2270; X86-AVX-NEXT:    vpsrad $17, %xmm0, %xmm0
2271; X86-AVX-NEXT:    retl
2272;
2273; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_17:
2274; X64-SSE2:       # %bb.0:
2275; X64-SSE2-NEXT:    psrad $17, %xmm0
2276; X64-SSE2-NEXT:    retq
2277;
2278; X64-AVX-LABEL: test_128_i32_x_4_4294836224_mask_ashr_17:
2279; X64-AVX:       # %bb.0:
2280; X64-AVX-NEXT:    vpsrad $17, %xmm0, %xmm0
2281; X64-AVX-NEXT:    retq
2282  %t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
2283  %t1 = ashr <4 x i32> %t0, <i32 17, i32 17, i32 17, i32 17>
2284  ret <4 x i32> %t1
2285}
2286define <4 x i32> @test_128_i32_x_4_4294836224_mask_ashr_18(<4 x i32> %a0) {
2287; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_18:
2288; X86-SSE2:       # %bb.0:
2289; X86-SSE2-NEXT:    psrad $18, %xmm0
2290; X86-SSE2-NEXT:    retl
2291;
2292; X86-AVX-LABEL: test_128_i32_x_4_4294836224_mask_ashr_18:
2293; X86-AVX:       # %bb.0:
2294; X86-AVX-NEXT:    vpsrad $18, %xmm0, %xmm0
2295; X86-AVX-NEXT:    retl
2296;
2297; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_18:
2298; X64-SSE2:       # %bb.0:
2299; X64-SSE2-NEXT:    psrad $18, %xmm0
2300; X64-SSE2-NEXT:    retq
2301;
2302; X64-AVX-LABEL: test_128_i32_x_4_4294836224_mask_ashr_18:
2303; X64-AVX:       # %bb.0:
2304; X64-AVX-NEXT:    vpsrad $18, %xmm0, %xmm0
2305; X64-AVX-NEXT:    retq
2306  %t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
2307  %t1 = ashr <4 x i32> %t0, <i32 18, i32 18, i32 18, i32 18>
2308  ret <4 x i32> %t1
2309}
2310
2311; shl
2312
2313define <4 x i32> @test_128_i32_x_4_32767_mask_shl_1(<4 x i32> %a0) {
2314; X86-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_1:
2315; X86-SSE2:       # %bb.0:
2316; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2317; X86-SSE2-NEXT:    paddd %xmm0, %xmm0
2318; X86-SSE2-NEXT:    retl
2319;
2320; X86-AVX1-LABEL: test_128_i32_x_4_32767_mask_shl_1:
2321; X86-AVX1:       # %bb.0:
2322; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2323; X86-AVX1-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
2324; X86-AVX1-NEXT:    retl
2325;
2326; X86-AVX2-LABEL: test_128_i32_x_4_32767_mask_shl_1:
2327; X86-AVX2:       # %bb.0:
2328; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
2329; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2330; X86-AVX2-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
2331; X86-AVX2-NEXT:    retl
2332;
2333; X64-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_1:
2334; X64-SSE2:       # %bb.0:
2335; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2336; X64-SSE2-NEXT:    paddd %xmm0, %xmm0
2337; X64-SSE2-NEXT:    retq
2338;
2339; X64-AVX1-LABEL: test_128_i32_x_4_32767_mask_shl_1:
2340; X64-AVX1:       # %bb.0:
2341; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2342; X64-AVX1-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
2343; X64-AVX1-NEXT:    retq
2344;
2345; X64-AVX2-LABEL: test_128_i32_x_4_32767_mask_shl_1:
2346; X64-AVX2:       # %bb.0:
2347; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
2348; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2349; X64-AVX2-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
2350; X64-AVX2-NEXT:    retq
2351  %t0 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
2352  %t1 = shl <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
2353  ret <4 x i32> %t1
2354}
2355define <4 x i32> @test_128_i32_x_4_32767_mask_shl_16(<4 x i32> %a0) {
2356; X86-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_16:
2357; X86-SSE2:       # %bb.0:
2358; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2359; X86-SSE2-NEXT:    pslld $16, %xmm0
2360; X86-SSE2-NEXT:    retl
2361;
2362; X86-AVX1-LABEL: test_128_i32_x_4_32767_mask_shl_16:
2363; X86-AVX1:       # %bb.0:
2364; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2365; X86-AVX1-NEXT:    vpslld $16, %xmm0, %xmm0
2366; X86-AVX1-NEXT:    retl
2367;
2368; X86-AVX2-LABEL: test_128_i32_x_4_32767_mask_shl_16:
2369; X86-AVX2:       # %bb.0:
2370; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
2371; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2372; X86-AVX2-NEXT:    vpslld $16, %xmm0, %xmm0
2373; X86-AVX2-NEXT:    retl
2374;
2375; X64-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_16:
2376; X64-SSE2:       # %bb.0:
2377; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2378; X64-SSE2-NEXT:    pslld $16, %xmm0
2379; X64-SSE2-NEXT:    retq
2380;
2381; X64-AVX1-LABEL: test_128_i32_x_4_32767_mask_shl_16:
2382; X64-AVX1:       # %bb.0:
2383; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2384; X64-AVX1-NEXT:    vpslld $16, %xmm0, %xmm0
2385; X64-AVX1-NEXT:    retq
2386;
2387; X64-AVX2-LABEL: test_128_i32_x_4_32767_mask_shl_16:
2388; X64-AVX2:       # %bb.0:
2389; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
2390; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2391; X64-AVX2-NEXT:    vpslld $16, %xmm0, %xmm0
2392; X64-AVX2-NEXT:    retq
2393  %t0 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
2394  %t1 = shl <4 x i32> %t0, <i32 16, i32 16, i32 16, i32 16>
2395  ret <4 x i32> %t1
2396}
2397define <4 x i32> @test_128_i32_x_4_32767_mask_shl_17(<4 x i32> %a0) {
2398; X86-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_17:
2399; X86-SSE2:       # %bb.0:
2400; X86-SSE2-NEXT:    pslld $17, %xmm0
2401; X86-SSE2-NEXT:    retl
2402;
2403; X86-AVX-LABEL: test_128_i32_x_4_32767_mask_shl_17:
2404; X86-AVX:       # %bb.0:
2405; X86-AVX-NEXT:    vpslld $17, %xmm0, %xmm0
2406; X86-AVX-NEXT:    retl
2407;
2408; X64-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_17:
2409; X64-SSE2:       # %bb.0:
2410; X64-SSE2-NEXT:    pslld $17, %xmm0
2411; X64-SSE2-NEXT:    retq
2412;
2413; X64-AVX-LABEL: test_128_i32_x_4_32767_mask_shl_17:
2414; X64-AVX:       # %bb.0:
2415; X64-AVX-NEXT:    vpslld $17, %xmm0, %xmm0
2416; X64-AVX-NEXT:    retq
2417  %t0 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
2418  %t1 = shl <4 x i32> %t0, <i32 17, i32 17, i32 17, i32 17>
2419  ret <4 x i32> %t1
2420}
2421define <4 x i32> @test_128_i32_x_4_32767_mask_shl_18(<4 x i32> %a0) {
2422; X86-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_18:
2423; X86-SSE2:       # %bb.0:
2424; X86-SSE2-NEXT:    pslld $18, %xmm0
2425; X86-SSE2-NEXT:    retl
2426;
2427; X86-AVX-LABEL: test_128_i32_x_4_32767_mask_shl_18:
2428; X86-AVX:       # %bb.0:
2429; X86-AVX-NEXT:    vpslld $18, %xmm0, %xmm0
2430; X86-AVX-NEXT:    retl
2431;
2432; X64-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_18:
2433; X64-SSE2:       # %bb.0:
2434; X64-SSE2-NEXT:    pslld $18, %xmm0
2435; X64-SSE2-NEXT:    retq
2436;
2437; X64-AVX-LABEL: test_128_i32_x_4_32767_mask_shl_18:
2438; X64-AVX:       # %bb.0:
2439; X64-AVX-NEXT:    vpslld $18, %xmm0, %xmm0
2440; X64-AVX-NEXT:    retq
2441  %t0 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
2442  %t1 = shl <4 x i32> %t0, <i32 18, i32 18, i32 18, i32 18>
2443  ret <4 x i32> %t1
2444}
2445
2446define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_7(<4 x i32> %a0) {
2447; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
2448; X86-SSE2:       # %bb.0:
2449; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2450; X86-SSE2-NEXT:    pslld $7, %xmm0
2451; X86-SSE2-NEXT:    retl
2452;
2453; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
2454; X86-AVX1:       # %bb.0:
2455; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2456; X86-AVX1-NEXT:    vpslld $7, %xmm0, %xmm0
2457; X86-AVX1-NEXT:    retl
2458;
2459; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
2460; X86-AVX2:       # %bb.0:
2461; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2462; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2463; X86-AVX2-NEXT:    vpslld $7, %xmm0, %xmm0
2464; X86-AVX2-NEXT:    retl
2465;
2466; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
2467; X64-SSE2:       # %bb.0:
2468; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2469; X64-SSE2-NEXT:    pslld $7, %xmm0
2470; X64-SSE2-NEXT:    retq
2471;
2472; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
2473; X64-AVX1:       # %bb.0:
2474; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2475; X64-AVX1-NEXT:    vpslld $7, %xmm0, %xmm0
2476; X64-AVX1-NEXT:    retq
2477;
2478; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
2479; X64-AVX2:       # %bb.0:
2480; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2481; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2482; X64-AVX2-NEXT:    vpslld $7, %xmm0, %xmm0
2483; X64-AVX2-NEXT:    retq
2484  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
2485  %t1 = shl <4 x i32> %t0, <i32 7, i32 7, i32 7, i32 7>
2486  ret <4 x i32> %t1
2487}
2488define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_8(<4 x i32> %a0) {
2489; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
2490; X86-SSE2:       # %bb.0:
2491; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2492; X86-SSE2-NEXT:    pslld $8, %xmm0
2493; X86-SSE2-NEXT:    retl
2494;
2495; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
2496; X86-AVX1:       # %bb.0:
2497; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2498; X86-AVX1-NEXT:    vpslld $8, %xmm0, %xmm0
2499; X86-AVX1-NEXT:    retl
2500;
2501; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
2502; X86-AVX2:       # %bb.0:
2503; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2504; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2505; X86-AVX2-NEXT:    vpslld $8, %xmm0, %xmm0
2506; X86-AVX2-NEXT:    retl
2507;
2508; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
2509; X64-SSE2:       # %bb.0:
2510; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2511; X64-SSE2-NEXT:    pslld $8, %xmm0
2512; X64-SSE2-NEXT:    retq
2513;
2514; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
2515; X64-AVX1:       # %bb.0:
2516; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2517; X64-AVX1-NEXT:    vpslld $8, %xmm0, %xmm0
2518; X64-AVX1-NEXT:    retq
2519;
2520; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
2521; X64-AVX2:       # %bb.0:
2522; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2523; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2524; X64-AVX2-NEXT:    vpslld $8, %xmm0, %xmm0
2525; X64-AVX2-NEXT:    retq
2526  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
2527  %t1 = shl <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
2528  ret <4 x i32> %t1
2529}
2530define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_9(<4 x i32> %a0) {
2531; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
2532; X86-SSE2:       # %bb.0:
2533; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2534; X86-SSE2-NEXT:    pslld $9, %xmm0
2535; X86-SSE2-NEXT:    retl
2536;
2537; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
2538; X86-AVX1:       # %bb.0:
2539; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2540; X86-AVX1-NEXT:    vpslld $9, %xmm0, %xmm0
2541; X86-AVX1-NEXT:    retl
2542;
2543; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
2544; X86-AVX2:       # %bb.0:
2545; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2546; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2547; X86-AVX2-NEXT:    vpslld $9, %xmm0, %xmm0
2548; X86-AVX2-NEXT:    retl
2549;
2550; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
2551; X64-SSE2:       # %bb.0:
2552; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2553; X64-SSE2-NEXT:    pslld $9, %xmm0
2554; X64-SSE2-NEXT:    retq
2555;
2556; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
2557; X64-AVX1:       # %bb.0:
2558; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2559; X64-AVX1-NEXT:    vpslld $9, %xmm0, %xmm0
2560; X64-AVX1-NEXT:    retq
2561;
2562; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
2563; X64-AVX2:       # %bb.0:
2564; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2565; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2566; X64-AVX2-NEXT:    vpslld $9, %xmm0, %xmm0
2567; X64-AVX2-NEXT:    retq
2568  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
2569  %t1 = shl <4 x i32> %t0, <i32 9, i32 9, i32 9, i32 9>
2570  ret <4 x i32> %t1
2571}
2572define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_10(<4 x i32> %a0) {
2573; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
2574; X86-SSE2:       # %bb.0:
2575; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2576; X86-SSE2-NEXT:    pslld $10, %xmm0
2577; X86-SSE2-NEXT:    retl
2578;
2579; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
2580; X86-AVX1:       # %bb.0:
2581; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2582; X86-AVX1-NEXT:    vpslld $10, %xmm0, %xmm0
2583; X86-AVX1-NEXT:    retl
2584;
2585; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
2586; X86-AVX2:       # %bb.0:
2587; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2588; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2589; X86-AVX2-NEXT:    vpslld $10, %xmm0, %xmm0
2590; X86-AVX2-NEXT:    retl
2591;
2592; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
2593; X64-SSE2:       # %bb.0:
2594; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2595; X64-SSE2-NEXT:    pslld $10, %xmm0
2596; X64-SSE2-NEXT:    retq
2597;
2598; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
2599; X64-AVX1:       # %bb.0:
2600; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2601; X64-AVX1-NEXT:    vpslld $10, %xmm0, %xmm0
2602; X64-AVX1-NEXT:    retq
2603;
2604; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
2605; X64-AVX2:       # %bb.0:
2606; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
2607; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2608; X64-AVX2-NEXT:    vpslld $10, %xmm0, %xmm0
2609; X64-AVX2-NEXT:    retq
2610  %t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
2611  %t1 = shl <4 x i32> %t0, <i32 10, i32 10, i32 10, i32 10>
2612  ret <4 x i32> %t1
2613}
2614
2615define <4 x i32> @test_128_i32_x_4_4294836224_mask_shl_1(<4 x i32> %a0) {
2616; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
2617; X86-SSE2:       # %bb.0:
2618; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2619; X86-SSE2-NEXT:    paddd %xmm0, %xmm0
2620; X86-SSE2-NEXT:    retl
2621;
2622; X86-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
2623; X86-AVX1:       # %bb.0:
2624; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2625; X86-AVX1-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
2626; X86-AVX1-NEXT:    retl
2627;
2628; X86-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
2629; X86-AVX2:       # %bb.0:
2630; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
2631; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2632; X86-AVX2-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
2633; X86-AVX2-NEXT:    retl
2634;
2635; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
2636; X64-SSE2:       # %bb.0:
2637; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2638; X64-SSE2-NEXT:    paddd %xmm0, %xmm0
2639; X64-SSE2-NEXT:    retq
2640;
2641; X64-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
2642; X64-AVX1:       # %bb.0:
2643; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2644; X64-AVX1-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
2645; X64-AVX1-NEXT:    retq
2646;
2647; X64-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
2648; X64-AVX2:       # %bb.0:
2649; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
2650; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2651; X64-AVX2-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
2652; X64-AVX2-NEXT:    retq
2653  %t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
2654  %t1 = shl <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
2655  ret <4 x i32> %t1
2656}
2657
2658;------------------------------------------------------------------------------;
2659; 128-bit vector; 64-bit elements = 2 elements
2660;------------------------------------------------------------------------------;
2661
2662; lshr
2663
2664define <2 x i64> @test_128_i64_x_2_2147483647_mask_lshr_1(<2 x i64> %a0) {
2665; X86-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1:
2666; X86-SSE2:       # %bb.0:
2667; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2668; X86-SSE2-NEXT:    psrlq $1, %xmm0
2669; X86-SSE2-NEXT:    retl
2670;
2671; X86-AVX-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1:
2672; X86-AVX:       # %bb.0:
2673; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2674; X86-AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
2675; X86-AVX-NEXT:    retl
2676;
2677; X64-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1:
2678; X64-SSE2:       # %bb.0:
2679; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2680; X64-SSE2-NEXT:    psrlq $1, %xmm0
2681; X64-SSE2-NEXT:    retq
2682;
2683; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1:
2684; X64-AVX:       # %bb.0:
2685; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2686; X64-AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
2687; X64-AVX-NEXT:    retq
2688  %t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
2689  %t1 = lshr <2 x i64> %t0, <i64 1, i64 1>
2690  ret <2 x i64> %t1
2691}
2692
2693define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_15(<2 x i64> %a0) {
2694; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15:
2695; X86-SSE2:       # %bb.0:
2696; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2697; X86-SSE2-NEXT:    psrlq $15, %xmm0
2698; X86-SSE2-NEXT:    retl
2699;
2700; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15:
2701; X86-AVX:       # %bb.0:
2702; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2703; X86-AVX-NEXT:    vpsrlq $15, %xmm0, %xmm0
2704; X86-AVX-NEXT:    retl
2705;
2706; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15:
2707; X64-SSE2:       # %bb.0:
2708; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2709; X64-SSE2-NEXT:    psrlq $15, %xmm0
2710; X64-SSE2-NEXT:    retq
2711;
2712; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15:
2713; X64-AVX:       # %bb.0:
2714; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2715; X64-AVX-NEXT:    vpsrlq $15, %xmm0, %xmm0
2716; X64-AVX-NEXT:    retq
2717  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
2718  %t1 = lshr <2 x i64> %t0, <i64 15, i64 15>
2719  ret <2 x i64> %t1
2720}
2721define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_16(<2 x i64> %a0) {
2722; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16:
2723; X86-SSE2:       # %bb.0:
2724; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2725; X86-SSE2-NEXT:    psrlq $16, %xmm0
2726; X86-SSE2-NEXT:    retl
2727;
2728; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16:
2729; X86-AVX:       # %bb.0:
2730; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2731; X86-AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
2732; X86-AVX-NEXT:    retl
2733;
2734; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16:
2735; X64-SSE2:       # %bb.0:
2736; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2737; X64-SSE2-NEXT:    psrlq $16, %xmm0
2738; X64-SSE2-NEXT:    retq
2739;
2740; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16:
2741; X64-AVX:       # %bb.0:
2742; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2743; X64-AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
2744; X64-AVX-NEXT:    retq
2745  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
2746  %t1 = lshr <2 x i64> %t0, <i64 16, i64 16>
2747  ret <2 x i64> %t1
2748}
2749define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_17(<2 x i64> %a0) {
2750; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17:
2751; X86-SSE2:       # %bb.0:
2752; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2753; X86-SSE2-NEXT:    psrlq $17, %xmm0
2754; X86-SSE2-NEXT:    retl
2755;
2756; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17:
2757; X86-AVX:       # %bb.0:
2758; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2759; X86-AVX-NEXT:    vpsrlq $17, %xmm0, %xmm0
2760; X86-AVX-NEXT:    retl
2761;
2762; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17:
2763; X64-SSE2:       # %bb.0:
2764; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2765; X64-SSE2-NEXT:    psrlq $17, %xmm0
2766; X64-SSE2-NEXT:    retq
2767;
2768; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17:
2769; X64-AVX:       # %bb.0:
2770; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2771; X64-AVX-NEXT:    vpsrlq $17, %xmm0, %xmm0
2772; X64-AVX-NEXT:    retq
2773  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
2774  %t1 = lshr <2 x i64> %t0, <i64 17, i64 17>
2775  ret <2 x i64> %t1
2776}
2777define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_18(<2 x i64> %a0) {
2778; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18:
2779; X86-SSE2:       # %bb.0:
2780; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2781; X86-SSE2-NEXT:    psrlq $18, %xmm0
2782; X86-SSE2-NEXT:    retl
2783;
2784; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18:
2785; X86-AVX:       # %bb.0:
2786; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2787; X86-AVX-NEXT:    vpsrlq $18, %xmm0, %xmm0
2788; X86-AVX-NEXT:    retl
2789;
2790; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18:
2791; X64-SSE2:       # %bb.0:
2792; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2793; X64-SSE2-NEXT:    psrlq $18, %xmm0
2794; X64-SSE2-NEXT:    retq
2795;
2796; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18:
2797; X64-AVX:       # %bb.0:
2798; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2799; X64-AVX-NEXT:    vpsrlq $18, %xmm0, %xmm0
2800; X64-AVX-NEXT:    retq
2801  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
2802  %t1 = lshr <2 x i64> %t0, <i64 18, i64 18>
2803  ret <2 x i64> %t1
2804}
2805
2806define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_lshr_1(<2 x i64> %a0) {
2807; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1:
2808; X86-SSE2:       # %bb.0:
2809; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2810; X86-SSE2-NEXT:    psrlq $1, %xmm0
2811; X86-SSE2-NEXT:    retl
2812;
2813; X86-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1:
2814; X86-AVX:       # %bb.0:
2815; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2816; X86-AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
2817; X86-AVX-NEXT:    retl
2818;
2819; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1:
2820; X64-SSE2:       # %bb.0:
2821; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2822; X64-SSE2-NEXT:    psrlq $1, %xmm0
2823; X64-SSE2-NEXT:    retq
2824;
2825; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1:
2826; X64-AVX:       # %bb.0:
2827; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2828; X64-AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
2829; X64-AVX-NEXT:    retq
2830  %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
2831  %t1 = lshr <2 x i64> %t0, <i64 1, i64 1>
2832  ret <2 x i64> %t1
2833}
2834define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_lshr_32(<2 x i64> %a0) {
2835; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
2836; X86-SSE2:       # %bb.0:
2837; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2838; X86-SSE2-NEXT:    psrlq $32, %xmm0
2839; X86-SSE2-NEXT:    retl
2840;
2841; X86-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
2842; X86-AVX1:       # %bb.0:
2843; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2844; X86-AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
2845; X86-AVX1-NEXT:    retl
2846;
2847; X86-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
2848; X86-AVX2:       # %bb.0:
2849; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294]
2850; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
2851; X86-AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm0
2852; X86-AVX2-NEXT:    retl
2853;
2854; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
2855; X64-SSE2:       # %bb.0:
2856; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2857; X64-SSE2-NEXT:    psrlq $32, %xmm0
2858; X64-SSE2-NEXT:    retq
2859;
2860; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
2861; X64-AVX:       # %bb.0:
2862; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2863; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
2864; X64-AVX-NEXT:    retq
2865  %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
2866  %t1 = lshr <2 x i64> %t0, <i64 32, i64 32>
2867  ret <2 x i64> %t1
2868}
2869define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_lshr_33(<2 x i64> %a0) {
2870; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_33:
2871; X86-SSE2:       # %bb.0:
2872; X86-SSE2-NEXT:    psrlq $33, %xmm0
2873; X86-SSE2-NEXT:    retl
2874;
2875; X86-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_33:
2876; X86-AVX:       # %bb.0:
2877; X86-AVX-NEXT:    vpsrlq $33, %xmm0, %xmm0
2878; X86-AVX-NEXT:    retl
2879;
2880; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_33:
2881; X64-SSE2:       # %bb.0:
2882; X64-SSE2-NEXT:    psrlq $33, %xmm0
2883; X64-SSE2-NEXT:    retq
2884;
2885; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_33:
2886; X64-AVX:       # %bb.0:
2887; X64-AVX-NEXT:    vpsrlq $33, %xmm0, %xmm0
2888; X64-AVX-NEXT:    retq
2889  %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
2890  %t1 = lshr <2 x i64> %t0, <i64 33, i64 33>
2891  ret <2 x i64> %t1
2892}
2893define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_lshr_34(<2 x i64> %a0) {
2894; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_34:
2895; X86-SSE2:       # %bb.0:
2896; X86-SSE2-NEXT:    psrlq $34, %xmm0
2897; X86-SSE2-NEXT:    retl
2898;
2899; X86-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_34:
2900; X86-AVX:       # %bb.0:
2901; X86-AVX-NEXT:    vpsrlq $34, %xmm0, %xmm0
2902; X86-AVX-NEXT:    retl
2903;
2904; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_34:
2905; X64-SSE2:       # %bb.0:
2906; X64-SSE2-NEXT:    psrlq $34, %xmm0
2907; X64-SSE2-NEXT:    retq
2908;
2909; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_34:
2910; X64-AVX:       # %bb.0:
2911; X64-AVX-NEXT:    vpsrlq $34, %xmm0, %xmm0
2912; X64-AVX-NEXT:    retq
2913  %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
2914  %t1 = lshr <2 x i64> %t0, <i64 34, i64 34>
2915  ret <2 x i64> %t1
2916}
2917
2918; ashr
2919
2920define <2 x i64> @test_128_i64_x_2_2147483647_mask_ashr_1(<2 x i64> %a0) {
2921; X86-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1:
2922; X86-SSE2:       # %bb.0:
2923; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2924; X86-SSE2-NEXT:    psrlq $1, %xmm0
2925; X86-SSE2-NEXT:    retl
2926;
2927; X86-AVX-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1:
2928; X86-AVX:       # %bb.0:
2929; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2930; X86-AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
2931; X86-AVX-NEXT:    retl
2932;
2933; X64-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1:
2934; X64-SSE2:       # %bb.0:
2935; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2936; X64-SSE2-NEXT:    psrlq $1, %xmm0
2937; X64-SSE2-NEXT:    retq
2938;
2939; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1:
2940; X64-AVX:       # %bb.0:
2941; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2942; X64-AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
2943; X64-AVX-NEXT:    retq
2944  %t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
2945  %t1 = ashr <2 x i64> %t0, <i64 1, i64 1>
2946  ret <2 x i64> %t1
2947}
2948
2949define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_15(<2 x i64> %a0) {
2950; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15:
2951; X86-SSE2:       # %bb.0:
2952; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2953; X86-SSE2-NEXT:    psrlq $15, %xmm0
2954; X86-SSE2-NEXT:    retl
2955;
2956; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15:
2957; X86-AVX:       # %bb.0:
2958; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2959; X86-AVX-NEXT:    vpsrlq $15, %xmm0, %xmm0
2960; X86-AVX-NEXT:    retl
2961;
2962; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15:
2963; X64-SSE2:       # %bb.0:
2964; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2965; X64-SSE2-NEXT:    psrlq $15, %xmm0
2966; X64-SSE2-NEXT:    retq
2967;
2968; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15:
2969; X64-AVX:       # %bb.0:
2970; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2971; X64-AVX-NEXT:    vpsrlq $15, %xmm0, %xmm0
2972; X64-AVX-NEXT:    retq
2973  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
2974  %t1 = ashr <2 x i64> %t0, <i64 15, i64 15>
2975  ret <2 x i64> %t1
2976}
2977define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_16(<2 x i64> %a0) {
2978; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16:
2979; X86-SSE2:       # %bb.0:
2980; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
2981; X86-SSE2-NEXT:    psrlq $16, %xmm0
2982; X86-SSE2-NEXT:    retl
2983;
2984; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16:
2985; X86-AVX:       # %bb.0:
2986; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
2987; X86-AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
2988; X86-AVX-NEXT:    retl
2989;
2990; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16:
2991; X64-SSE2:       # %bb.0:
2992; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
2993; X64-SSE2-NEXT:    psrlq $16, %xmm0
2994; X64-SSE2-NEXT:    retq
2995;
2996; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16:
2997; X64-AVX:       # %bb.0:
2998; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2999; X64-AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
3000; X64-AVX-NEXT:    retq
3001  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
3002  %t1 = ashr <2 x i64> %t0, <i64 16, i64 16>
3003  ret <2 x i64> %t1
3004}
3005define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_17(<2 x i64> %a0) {
3006; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17:
3007; X86-SSE2:       # %bb.0:
3008; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3009; X86-SSE2-NEXT:    psrlq $17, %xmm0
3010; X86-SSE2-NEXT:    retl
3011;
3012; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17:
3013; X86-AVX:       # %bb.0:
3014; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3015; X86-AVX-NEXT:    vpsrlq $17, %xmm0, %xmm0
3016; X86-AVX-NEXT:    retl
3017;
3018; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17:
3019; X64-SSE2:       # %bb.0:
3020; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3021; X64-SSE2-NEXT:    psrlq $17, %xmm0
3022; X64-SSE2-NEXT:    retq
3023;
3024; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17:
3025; X64-AVX:       # %bb.0:
3026; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3027; X64-AVX-NEXT:    vpsrlq $17, %xmm0, %xmm0
3028; X64-AVX-NEXT:    retq
3029  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
3030  %t1 = ashr <2 x i64> %t0, <i64 17, i64 17>
3031  ret <2 x i64> %t1
3032}
3033define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_18(<2 x i64> %a0) {
3034; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18:
3035; X86-SSE2:       # %bb.0:
3036; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3037; X86-SSE2-NEXT:    psrlq $18, %xmm0
3038; X86-SSE2-NEXT:    retl
3039;
3040; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18:
3041; X86-AVX:       # %bb.0:
3042; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3043; X86-AVX-NEXT:    vpsrlq $18, %xmm0, %xmm0
3044; X86-AVX-NEXT:    retl
3045;
3046; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18:
3047; X64-SSE2:       # %bb.0:
3048; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3049; X64-SSE2-NEXT:    psrlq $18, %xmm0
3050; X64-SSE2-NEXT:    retq
3051;
3052; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18:
3053; X64-AVX:       # %bb.0:
3054; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3055; X64-AVX-NEXT:    vpsrlq $18, %xmm0, %xmm0
3056; X64-AVX-NEXT:    retq
3057  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
3058  %t1 = ashr <2 x i64> %t0, <i64 18, i64 18>
3059  ret <2 x i64> %t1
3060}
3061
3062define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_1(<2 x i64> %a0) {
3063; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
3064; X86-SSE2:       # %bb.0:
3065; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3066; X86-SSE2-NEXT:    psrad $1, %xmm0
3067; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3068; X86-SSE2-NEXT:    retl
3069;
3070; X86-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
3071; X86-AVX1:       # %bb.0:
3072; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3073; X86-AVX1-NEXT:    vpsrad $1, %xmm0, %xmm0
3074; X86-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3075; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
3076; X86-AVX1-NEXT:    retl
3077;
3078; X86-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
3079; X86-AVX2:       # %bb.0:
3080; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294]
3081; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
3082; X86-AVX2-NEXT:    vpsrad $1, %xmm0, %xmm0
3083; X86-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3084; X86-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
3085; X86-AVX2-NEXT:    retl
3086;
3087; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
3088; X64-SSE2:       # %bb.0:
3089; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3090; X64-SSE2-NEXT:    psrad $1, %xmm0
3091; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3092; X64-SSE2-NEXT:    retq
3093;
3094; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
3095; X64-AVX1:       # %bb.0:
3096; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3097; X64-AVX1-NEXT:    vpsrad $1, %xmm0, %xmm0
3098; X64-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3099; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
3100; X64-AVX1-NEXT:    retq
3101;
3102; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
3103; X64-AVX2:       # %bb.0:
3104; X64-AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3105; X64-AVX2-NEXT:    vpsrad $1, %xmm0, %xmm0
3106; X64-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
3107; X64-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
3108; X64-AVX2-NEXT:    retq
3109  %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
3110  %t1 = ashr <2 x i64> %t0, <i64 1, i64 1>
3111  ret <2 x i64> %t1
3112}
3113define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_32(<2 x i64> %a0) {
3114; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_32:
3115; X86-SSE2:       # %bb.0:
3116; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3117; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
3118; X86-SSE2-NEXT:    psrad $31, %xmm0
3119; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
3120; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3121; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
3122; X86-SSE2-NEXT:    retl
3123;
3124; X86-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_32:
3125; X86-AVX1:       # %bb.0:
3126; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3127; X86-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
3128; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3129; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3130; X86-AVX1-NEXT:    retl
3131;
3132; X86-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_32:
3133; X86-AVX2:       # %bb.0:
3134; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3135; X86-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
3136; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3137; X86-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3138; X86-AVX2-NEXT:    retl
3139;
3140; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_32:
3141; X64-SSE2:       # %bb.0:
3142; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3143; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
3144; X64-SSE2-NEXT:    psrad $31, %xmm0
3145; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
3146; X64-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3147; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
3148; X64-SSE2-NEXT:    retq
3149;
3150; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_32:
3151; X64-AVX1:       # %bb.0:
3152; X64-AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3153; X64-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
3154; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3155; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3156; X64-AVX1-NEXT:    retq
3157;
3158; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_32:
3159; X64-AVX2:       # %bb.0:
3160; X64-AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3161; X64-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
3162; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3163; X64-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3164; X64-AVX2-NEXT:    retq
3165  %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
3166  %t1 = ashr <2 x i64> %t0, <i64 32, i64 32>
3167  ret <2 x i64> %t1
3168}
3169define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_33(<2 x i64> %a0) {
3170; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_33:
3171; X86-SSE2:       # %bb.0:
3172; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
3173; X86-SSE2-NEXT:    psrad $31, %xmm1
3174; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
3175; X86-SSE2-NEXT:    psrad $1, %xmm0
3176; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
3177; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3178; X86-SSE2-NEXT:    retl
3179;
3180; X86-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_33:
3181; X86-AVX1:       # %bb.0:
3182; X86-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
3183; X86-AVX1-NEXT:    vpsrad $1, %xmm0, %xmm0
3184; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3185; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3186; X86-AVX1-NEXT:    retl
3187;
3188; X86-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_33:
3189; X86-AVX2:       # %bb.0:
3190; X86-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
3191; X86-AVX2-NEXT:    vpsrad $1, %xmm0, %xmm0
3192; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3193; X86-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3194; X86-AVX2-NEXT:    retl
3195;
3196; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_33:
3197; X64-SSE2:       # %bb.0:
3198; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
3199; X64-SSE2-NEXT:    psrad $31, %xmm1
3200; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
3201; X64-SSE2-NEXT:    psrad $1, %xmm0
3202; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
3203; X64-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3204; X64-SSE2-NEXT:    retq
3205;
3206; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_33:
3207; X64-AVX1:       # %bb.0:
3208; X64-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
3209; X64-AVX1-NEXT:    vpsrad $1, %xmm0, %xmm0
3210; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3211; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3212; X64-AVX1-NEXT:    retq
3213;
3214; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_33:
3215; X64-AVX2:       # %bb.0:
3216; X64-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
3217; X64-AVX2-NEXT:    vpsrad $1, %xmm0, %xmm0
3218; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3219; X64-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3220; X64-AVX2-NEXT:    retq
3221  %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
3222  %t1 = ashr <2 x i64> %t0, <i64 33, i64 33>
3223  ret <2 x i64> %t1
3224}
3225define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_34(<2 x i64> %a0) {
3226; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_34:
3227; X86-SSE2:       # %bb.0:
3228; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
3229; X86-SSE2-NEXT:    psrad $31, %xmm1
3230; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
3231; X86-SSE2-NEXT:    psrad $2, %xmm0
3232; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
3233; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3234; X86-SSE2-NEXT:    retl
3235;
3236; X86-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_34:
3237; X86-AVX1:       # %bb.0:
3238; X86-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
3239; X86-AVX1-NEXT:    vpsrad $2, %xmm0, %xmm0
3240; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3241; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3242; X86-AVX1-NEXT:    retl
3243;
3244; X86-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_34:
3245; X86-AVX2:       # %bb.0:
3246; X86-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
3247; X86-AVX2-NEXT:    vpsrad $2, %xmm0, %xmm0
3248; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3249; X86-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3250; X86-AVX2-NEXT:    retl
3251;
3252; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_34:
3253; X64-SSE2:       # %bb.0:
3254; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
3255; X64-SSE2-NEXT:    psrad $31, %xmm1
3256; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
3257; X64-SSE2-NEXT:    psrad $2, %xmm0
3258; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
3259; X64-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3260; X64-SSE2-NEXT:    retq
3261;
3262; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_34:
3263; X64-AVX1:       # %bb.0:
3264; X64-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
3265; X64-AVX1-NEXT:    vpsrad $2, %xmm0, %xmm0
3266; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3267; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
3268; X64-AVX1-NEXT:    retq
3269;
3270; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_34:
3271; X64-AVX2:       # %bb.0:
3272; X64-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
3273; X64-AVX2-NEXT:    vpsrad $2, %xmm0, %xmm0
3274; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3275; X64-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3276; X64-AVX2-NEXT:    retq
3277  %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
3278  %t1 = ashr <2 x i64> %t0, <i64 34, i64 34>
3279  ret <2 x i64> %t1
3280}
3281
3282; shl
3283
3284define <2 x i64> @test_128_i64_x_2_2147483647_mask_shl_1(<2 x i64> %a0) {
3285; X86-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_1:
3286; X86-SSE2:       # %bb.0:
3287; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3288; X86-SSE2-NEXT:    paddq %xmm0, %xmm0
3289; X86-SSE2-NEXT:    retl
3290;
3291; X86-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_1:
3292; X86-AVX:       # %bb.0:
3293; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3294; X86-AVX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
3295; X86-AVX-NEXT:    retl
3296;
3297; X64-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_1:
3298; X64-SSE2:       # %bb.0:
3299; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3300; X64-SSE2-NEXT:    paddq %xmm0, %xmm0
3301; X64-SSE2-NEXT:    retq
3302;
3303; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_1:
3304; X64-AVX:       # %bb.0:
3305; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3306; X64-AVX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
3307; X64-AVX-NEXT:    retq
3308  %t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
3309  %t1 = shl <2 x i64> %t0, <i64 1, i64 1>
3310  ret <2 x i64> %t1
3311}
3312define <2 x i64> @test_128_i64_x_2_2147483647_mask_shl_32(<2 x i64> %a0) {
3313; X86-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
3314; X86-SSE2:       # %bb.0:
3315; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3316; X86-SSE2-NEXT:    psllq $32, %xmm0
3317; X86-SSE2-NEXT:    retl
3318;
3319; X86-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
3320; X86-AVX1:       # %bb.0:
3321; X86-AVX1-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3322; X86-AVX1-NEXT:    vpsllq $32, %xmm0, %xmm0
3323; X86-AVX1-NEXT:    retl
3324;
3325; X86-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
3326; X86-AVX2:       # %bb.0:
3327; X86-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
3328; X86-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
3329; X86-AVX2-NEXT:    vpsllq $32, %xmm0, %xmm0
3330; X86-AVX2-NEXT:    retl
3331;
3332; X64-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
3333; X64-SSE2:       # %bb.0:
3334; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3335; X64-SSE2-NEXT:    psllq $32, %xmm0
3336; X64-SSE2-NEXT:    retq
3337;
3338; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
3339; X64-AVX:       # %bb.0:
3340; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3341; X64-AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
3342; X64-AVX-NEXT:    retq
3343  %t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
3344  %t1 = shl <2 x i64> %t0, <i64 32, i64 32>
3345  ret <2 x i64> %t1
3346}
3347define <2 x i64> @test_128_i64_x_2_2147483647_mask_shl_33(<2 x i64> %a0) {
3348; X86-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_33:
3349; X86-SSE2:       # %bb.0:
3350; X86-SSE2-NEXT:    psllq $33, %xmm0
3351; X86-SSE2-NEXT:    retl
3352;
3353; X86-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_33:
3354; X86-AVX:       # %bb.0:
3355; X86-AVX-NEXT:    vpsllq $33, %xmm0, %xmm0
3356; X86-AVX-NEXT:    retl
3357;
3358; X64-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_33:
3359; X64-SSE2:       # %bb.0:
3360; X64-SSE2-NEXT:    psllq $33, %xmm0
3361; X64-SSE2-NEXT:    retq
3362;
3363; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_33:
3364; X64-AVX:       # %bb.0:
3365; X64-AVX-NEXT:    vpsllq $33, %xmm0, %xmm0
3366; X64-AVX-NEXT:    retq
3367  %t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
3368  %t1 = shl <2 x i64> %t0, <i64 33, i64 33>
3369  ret <2 x i64> %t1
3370}
3371define <2 x i64> @test_128_i64_x_2_2147483647_mask_shl_34(<2 x i64> %a0) {
3372; X86-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_34:
3373; X86-SSE2:       # %bb.0:
3374; X86-SSE2-NEXT:    psllq $34, %xmm0
3375; X86-SSE2-NEXT:    retl
3376;
3377; X86-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_34:
3378; X86-AVX:       # %bb.0:
3379; X86-AVX-NEXT:    vpsllq $34, %xmm0, %xmm0
3380; X86-AVX-NEXT:    retl
3381;
3382; X64-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_34:
3383; X64-SSE2:       # %bb.0:
3384; X64-SSE2-NEXT:    psllq $34, %xmm0
3385; X64-SSE2-NEXT:    retq
3386;
3387; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_34:
3388; X64-AVX:       # %bb.0:
3389; X64-AVX-NEXT:    vpsllq $34, %xmm0, %xmm0
3390; X64-AVX-NEXT:    retq
3391  %t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
3392  %t1 = shl <2 x i64> %t0, <i64 34, i64 34>
3393  ret <2 x i64> %t1
3394}
3395
3396define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_15(<2 x i64> %a0) {
3397; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15:
3398; X86-SSE2:       # %bb.0:
3399; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3400; X86-SSE2-NEXT:    psllq $15, %xmm0
3401; X86-SSE2-NEXT:    retl
3402;
3403; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15:
3404; X86-AVX:       # %bb.0:
3405; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3406; X86-AVX-NEXT:    vpsllq $15, %xmm0, %xmm0
3407; X86-AVX-NEXT:    retl
3408;
3409; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15:
3410; X64-SSE2:       # %bb.0:
3411; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3412; X64-SSE2-NEXT:    psllq $15, %xmm0
3413; X64-SSE2-NEXT:    retq
3414;
3415; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15:
3416; X64-AVX:       # %bb.0:
3417; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3418; X64-AVX-NEXT:    vpsllq $15, %xmm0, %xmm0
3419; X64-AVX-NEXT:    retq
3420  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
3421  %t1 = shl <2 x i64> %t0, <i64 15, i64 15>
3422  ret <2 x i64> %t1
3423}
3424define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_16(<2 x i64> %a0) {
3425; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16:
3426; X86-SSE2:       # %bb.0:
3427; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3428; X86-SSE2-NEXT:    psllq $16, %xmm0
3429; X86-SSE2-NEXT:    retl
3430;
3431; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16:
3432; X86-AVX:       # %bb.0:
3433; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3434; X86-AVX-NEXT:    vpsllq $16, %xmm0, %xmm0
3435; X86-AVX-NEXT:    retl
3436;
3437; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16:
3438; X64-SSE2:       # %bb.0:
3439; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3440; X64-SSE2-NEXT:    psllq $16, %xmm0
3441; X64-SSE2-NEXT:    retq
3442;
3443; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16:
3444; X64-AVX:       # %bb.0:
3445; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3446; X64-AVX-NEXT:    vpsllq $16, %xmm0, %xmm0
3447; X64-AVX-NEXT:    retq
3448  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
3449  %t1 = shl <2 x i64> %t0, <i64 16, i64 16>
3450  ret <2 x i64> %t1
3451}
3452define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_17(<2 x i64> %a0) {
3453; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17:
3454; X86-SSE2:       # %bb.0:
3455; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3456; X86-SSE2-NEXT:    psllq $17, %xmm0
3457; X86-SSE2-NEXT:    retl
3458;
3459; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17:
3460; X86-AVX:       # %bb.0:
3461; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3462; X86-AVX-NEXT:    vpsllq $17, %xmm0, %xmm0
3463; X86-AVX-NEXT:    retl
3464;
3465; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17:
3466; X64-SSE2:       # %bb.0:
3467; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3468; X64-SSE2-NEXT:    psllq $17, %xmm0
3469; X64-SSE2-NEXT:    retq
3470;
3471; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17:
3472; X64-AVX:       # %bb.0:
3473; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3474; X64-AVX-NEXT:    vpsllq $17, %xmm0, %xmm0
3475; X64-AVX-NEXT:    retq
3476  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
3477  %t1 = shl <2 x i64> %t0, <i64 17, i64 17>
3478  ret <2 x i64> %t1
3479}
3480define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_18(<2 x i64> %a0) {
3481; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18:
3482; X86-SSE2:       # %bb.0:
3483; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3484; X86-SSE2-NEXT:    psllq $18, %xmm0
3485; X86-SSE2-NEXT:    retl
3486;
3487; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18:
3488; X86-AVX:       # %bb.0:
3489; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3490; X86-AVX-NEXT:    vpsllq $18, %xmm0, %xmm0
3491; X86-AVX-NEXT:    retl
3492;
3493; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18:
3494; X64-SSE2:       # %bb.0:
3495; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3496; X64-SSE2-NEXT:    psllq $18, %xmm0
3497; X64-SSE2-NEXT:    retq
3498;
3499; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18:
3500; X64-AVX:       # %bb.0:
3501; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3502; X64-AVX-NEXT:    vpsllq $18, %xmm0, %xmm0
3503; X64-AVX-NEXT:    retq
3504  %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
3505  %t1 = shl <2 x i64> %t0, <i64 18, i64 18>
3506  ret <2 x i64> %t1
3507}
3508
3509define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_shl_1(<2 x i64> %a0) {
3510; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1:
3511; X86-SSE2:       # %bb.0:
3512; X86-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm0
3513; X86-SSE2-NEXT:    paddq %xmm0, %xmm0
3514; X86-SSE2-NEXT:    retl
3515;
3516; X86-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1:
3517; X86-AVX:       # %bb.0:
3518; X86-AVX-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
3519; X86-AVX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
3520; X86-AVX-NEXT:    retl
3521;
3522; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1:
3523; X64-SSE2:       # %bb.0:
3524; X64-SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
3525; X64-SSE2-NEXT:    paddq %xmm0, %xmm0
3526; X64-SSE2-NEXT:    retq
3527;
3528; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1:
3529; X64-AVX:       # %bb.0:
3530; X64-AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
3531; X64-AVX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
3532; X64-AVX-NEXT:    retq
3533  %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
3534  %t1 = shl <2 x i64> %t0, <i64 1, i64 1>
3535  ret <2 x i64> %t1
3536}
3537