• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlvbmi2-builtins.c
6
7define <2 x i64> @test_mm_mask_compress_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) {
8; X86-LABEL: test_mm_mask_compress_epi16:
9; X86:       # %bb.0: # %entry
10; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
11; X86-NEXT:    kmovd %eax, %k1
12; X86-NEXT:    vpcompressw %xmm1, %xmm0 {%k1}
13; X86-NEXT:    retl
14;
15; X64-LABEL: test_mm_mask_compress_epi16:
16; X64:       # %bb.0: # %entry
17; X64-NEXT:    kmovd %edi, %k1
18; X64-NEXT:    vpcompressw %xmm1, %xmm0 {%k1}
19; X64-NEXT:    retq
20entry:
21  %0 = bitcast <2 x i64> %__D to <8 x i16>
22  %1 = bitcast <2 x i64> %__S to <8 x i16>
23  %2 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U)
24  %3 = bitcast <8 x i16> %2 to <2 x i64>
25  ret <2 x i64> %3
26}
27
28define <2 x i64> @test_mm_maskz_compress_epi16(i8 zeroext %__U, <2 x i64> %__D) {
29; X86-LABEL: test_mm_maskz_compress_epi16:
30; X86:       # %bb.0: # %entry
31; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
32; X86-NEXT:    kmovd %eax, %k1
33; X86-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z}
34; X86-NEXT:    retl
35;
36; X64-LABEL: test_mm_maskz_compress_epi16:
37; X64:       # %bb.0: # %entry
38; X64-NEXT:    kmovd %edi, %k1
39; X64-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z}
40; X64-NEXT:    retq
41entry:
42  %0 = bitcast <2 x i64> %__D to <8 x i16>
43  %1 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U)
44  %2 = bitcast <8 x i16> %1 to <2 x i64>
45  ret <2 x i64> %2
46}
47
48define <2 x i64> @test_mm_mask_compress_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) {
49; X86-LABEL: test_mm_mask_compress_epi8:
50; X86:       # %bb.0: # %entry
51; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
52; X86-NEXT:    vpcompressb %xmm1, %xmm0 {%k1}
53; X86-NEXT:    retl
54;
55; X64-LABEL: test_mm_mask_compress_epi8:
56; X64:       # %bb.0: # %entry
57; X64-NEXT:    kmovd %edi, %k1
58; X64-NEXT:    vpcompressb %xmm1, %xmm0 {%k1}
59; X64-NEXT:    retq
60entry:
61  %0 = bitcast <2 x i64> %__D to <16 x i8>
62  %1 = bitcast <2 x i64> %__S to <16 x i8>
63  %2 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U)
64  %3 = bitcast <16 x i8> %2 to <2 x i64>
65  ret <2 x i64> %3
66}
67
68define <2 x i64> @test_mm_maskz_compress_epi8(i16 zeroext %__U, <2 x i64> %__D) {
69; X86-LABEL: test_mm_maskz_compress_epi8:
70; X86:       # %bb.0: # %entry
71; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
72; X86-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z}
73; X86-NEXT:    retl
74;
75; X64-LABEL: test_mm_maskz_compress_epi8:
76; X64:       # %bb.0: # %entry
77; X64-NEXT:    kmovd %edi, %k1
78; X64-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z}
79; X64-NEXT:    retq
80entry:
81  %0 = bitcast <2 x i64> %__D to <16 x i8>
82  %1 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U)
83  %2 = bitcast <16 x i8> %1 to <2 x i64>
84  ret <2 x i64> %2
85}
86
87define void @test_mm_mask_compressstoreu_epi16(i8* %__P, i8 zeroext %__U, <2 x i64> %__D) {
88; X86-LABEL: test_mm_mask_compressstoreu_epi16:
89; X86:       # %bb.0: # %entry
90; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
91; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
92; X86-NEXT:    kmovd %eax, %k1
93; X86-NEXT:    vpcompressw %xmm0, (%ecx) {%k1}
94; X86-NEXT:    retl
95;
96; X64-LABEL: test_mm_mask_compressstoreu_epi16:
97; X64:       # %bb.0: # %entry
98; X64-NEXT:    kmovd %esi, %k1
99; X64-NEXT:    vpcompressw %xmm0, (%rdi) {%k1}
100; X64-NEXT:    retq
101entry:
102  %0 = bitcast <2 x i64> %__D to <8 x i16>
103  %1 = bitcast i8* %__P to i16*
104  %2 = bitcast i8 %__U to <8 x i1>
105  tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %0, i16* %1, <8 x i1> %2)
106  ret void
107}
108
109define void @test_mm_mask_compressstoreu_epi8(i8* %__P, i16 zeroext %__U, <2 x i64> %__D) {
110; X86-LABEL: test_mm_mask_compressstoreu_epi8:
111; X86:       # %bb.0: # %entry
112; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
113; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
114; X86-NEXT:    vpcompressb %xmm0, (%eax) {%k1}
115; X86-NEXT:    retl
116;
117; X64-LABEL: test_mm_mask_compressstoreu_epi8:
118; X64:       # %bb.0: # %entry
119; X64-NEXT:    kmovd %esi, %k1
120; X64-NEXT:    vpcompressb %xmm0, (%rdi) {%k1}
121; X64-NEXT:    retq
122entry:
123  %0 = bitcast <2 x i64> %__D to <16 x i8>
124  %1 = bitcast i16 %__U to <16 x i1>
125  tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %0, i8* %__P, <16 x i1> %1)
126  ret void
127}
128
129define <2 x i64> @test_mm_mask_expand_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) {
130; X86-LABEL: test_mm_mask_expand_epi16:
131; X86:       # %bb.0: # %entry
132; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
133; X86-NEXT:    kmovd %eax, %k1
134; X86-NEXT:    vpexpandw %xmm1, %xmm0 {%k1}
135; X86-NEXT:    retl
136;
137; X64-LABEL: test_mm_mask_expand_epi16:
138; X64:       # %bb.0: # %entry
139; X64-NEXT:    kmovd %edi, %k1
140; X64-NEXT:    vpexpandw %xmm1, %xmm0 {%k1}
141; X64-NEXT:    retq
142entry:
143  %0 = bitcast <2 x i64> %__D to <8 x i16>
144  %1 = bitcast <2 x i64> %__S to <8 x i16>
145  %2 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U)
146  %3 = bitcast <8 x i16> %2 to <2 x i64>
147  ret <2 x i64> %3
148}
149
150define <2 x i64> @test_mm_maskz_expand_epi16(i8 zeroext %__U, <2 x i64> %__D) {
151; X86-LABEL: test_mm_maskz_expand_epi16:
152; X86:       # %bb.0: # %entry
153; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
154; X86-NEXT:    kmovd %eax, %k1
155; X86-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z}
156; X86-NEXT:    retl
157;
158; X64-LABEL: test_mm_maskz_expand_epi16:
159; X64:       # %bb.0: # %entry
160; X64-NEXT:    kmovd %edi, %k1
161; X64-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z}
162; X64-NEXT:    retq
163entry:
164  %0 = bitcast <2 x i64> %__D to <8 x i16>
165  %1 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U)
166  %2 = bitcast <8 x i16> %1 to <2 x i64>
167  ret <2 x i64> %2
168}
169
170define <2 x i64> @test_mm_mask_expand_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) {
171; X86-LABEL: test_mm_mask_expand_epi8:
172; X86:       # %bb.0: # %entry
173; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
174; X86-NEXT:    vpexpandb %xmm1, %xmm0 {%k1}
175; X86-NEXT:    retl
176;
177; X64-LABEL: test_mm_mask_expand_epi8:
178; X64:       # %bb.0: # %entry
179; X64-NEXT:    kmovd %edi, %k1
180; X64-NEXT:    vpexpandb %xmm1, %xmm0 {%k1}
181; X64-NEXT:    retq
182entry:
183  %0 = bitcast <2 x i64> %__D to <16 x i8>
184  %1 = bitcast <2 x i64> %__S to <16 x i8>
185  %2 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U)
186  %3 = bitcast <16 x i8> %2 to <2 x i64>
187  ret <2 x i64> %3
188}
189
190define <2 x i64> @test_mm_maskz_expand_epi8(i16 zeroext %__U, <2 x i64> %__D) {
191; X86-LABEL: test_mm_maskz_expand_epi8:
192; X86:       # %bb.0: # %entry
193; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
194; X86-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z}
195; X86-NEXT:    retl
196;
197; X64-LABEL: test_mm_maskz_expand_epi8:
198; X64:       # %bb.0: # %entry
199; X64-NEXT:    kmovd %edi, %k1
200; X64-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z}
201; X64-NEXT:    retq
202entry:
203  %0 = bitcast <2 x i64> %__D to <16 x i8>
204  %1 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U)
205  %2 = bitcast <16 x i8> %1 to <2 x i64>
206  ret <2 x i64> %2
207}
208
209define <2 x i64> @test_mm_mask_expandloadu_epi16(<2 x i64> %__S, i8 zeroext %__U, i8* readonly %__P) {
210; X86-LABEL: test_mm_mask_expandloadu_epi16:
211; X86:       # %bb.0: # %entry
212; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
213; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
214; X86-NEXT:    kmovd %ecx, %k1
215; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1}
216; X86-NEXT:    retl
217;
218; X64-LABEL: test_mm_mask_expandloadu_epi16:
219; X64:       # %bb.0: # %entry
220; X64-NEXT:    kmovd %edi, %k1
221; X64-NEXT:    vpexpandw (%rsi), %xmm0 {%k1}
222; X64-NEXT:    retq
223entry:
224  %0 = bitcast <2 x i64> %__S to <8 x i16>
225  %1 = bitcast i8* %__P to i16*
226  %2 = bitcast i8 %__U to <8 x i1>
227  %3 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> %2, <8 x i16> %0)
228  %4 = bitcast <8 x i16> %3 to <2 x i64>
229  ret <2 x i64> %4
230}
231
232define <2 x i64> @test_mm_maskz_expandloadu_epi16(i8 zeroext %__U, i8* readonly %__P) {
233; X86-LABEL: test_mm_maskz_expandloadu_epi16:
234; X86:       # %bb.0: # %entry
235; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
236; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
237; X86-NEXT:    kmovd %ecx, %k1
238; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} {z}
239; X86-NEXT:    retl
240;
241; X64-LABEL: test_mm_maskz_expandloadu_epi16:
242; X64:       # %bb.0: # %entry
243; X64-NEXT:    kmovd %edi, %k1
244; X64-NEXT:    vpexpandw (%rsi), %xmm0 {%k1} {z}
245; X64-NEXT:    retq
246entry:
247  %0 = bitcast i8* %__P to i16*
248  %1 = bitcast i8 %__U to <8 x i1>
249  %2 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %0, <8 x i1> %1, <8 x i16> zeroinitializer)
250  %3 = bitcast <8 x i16> %2 to <2 x i64>
251  ret <2 x i64> %3
252}
253
254define <2 x i64> @test_mm_mask_expandloadu_epi8(<2 x i64> %__S, i16 zeroext %__U, i8* readonly %__P) {
255; X86-LABEL: test_mm_mask_expandloadu_epi8:
256; X86:       # %bb.0: # %entry
257; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
258; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
259; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1}
260; X86-NEXT:    retl
261;
262; X64-LABEL: test_mm_mask_expandloadu_epi8:
263; X64:       # %bb.0: # %entry
264; X64-NEXT:    kmovd %edi, %k1
265; X64-NEXT:    vpexpandb (%rsi), %xmm0 {%k1}
266; X64-NEXT:    retq
267entry:
268  %0 = bitcast <2 x i64> %__S to <16 x i8>
269  %1 = bitcast i16 %__U to <16 x i1>
270  %2 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %1, <16 x i8> %0)
271  %3 = bitcast <16 x i8> %2 to <2 x i64>
272  ret <2 x i64> %3
273}
274
275define <2 x i64> @test_mm_maskz_expandloadu_epi8(i16 zeroext %__U, i8* readonly %__P) {
276; X86-LABEL: test_mm_maskz_expandloadu_epi8:
277; X86:       # %bb.0: # %entry
278; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
279; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
280; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} {z}
281; X86-NEXT:    retl
282;
283; X64-LABEL: test_mm_maskz_expandloadu_epi8:
284; X64:       # %bb.0: # %entry
285; X64-NEXT:    kmovd %edi, %k1
286; X64-NEXT:    vpexpandb (%rsi), %xmm0 {%k1} {z}
287; X64-NEXT:    retq
288entry:
289  %0 = bitcast i16 %__U to <16 x i1>
290  %1 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %0, <16 x i8> zeroinitializer)
291  %2 = bitcast <16 x i8> %1 to <2 x i64>
292  ret <2 x i64> %2
293}
294
295define <4 x i64> @test_mm256_mask_compress_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) {
296; X86-LABEL: test_mm256_mask_compress_epi16:
297; X86:       # %bb.0: # %entry
298; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
299; X86-NEXT:    vpcompressw %ymm1, %ymm0 {%k1}
300; X86-NEXT:    retl
301;
302; X64-LABEL: test_mm256_mask_compress_epi16:
303; X64:       # %bb.0: # %entry
304; X64-NEXT:    kmovd %edi, %k1
305; X64-NEXT:    vpcompressw %ymm1, %ymm0 {%k1}
306; X64-NEXT:    retq
307entry:
308  %0 = bitcast <4 x i64> %__D to <16 x i16>
309  %1 = bitcast <4 x i64> %__S to <16 x i16>
310  %2 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U)
311  %3 = bitcast <16 x i16> %2 to <4 x i64>
312  ret <4 x i64> %3
313}
314
315define <4 x i64> @test_mm256_maskz_compress_epi16(i16 zeroext %__U, <4 x i64> %__D) {
316; X86-LABEL: test_mm256_maskz_compress_epi16:
317; X86:       # %bb.0: # %entry
318; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
319; X86-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z}
320; X86-NEXT:    retl
321;
322; X64-LABEL: test_mm256_maskz_compress_epi16:
323; X64:       # %bb.0: # %entry
324; X64-NEXT:    kmovd %edi, %k1
325; X64-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z}
326; X64-NEXT:    retq
327entry:
328  %0 = bitcast <4 x i64> %__D to <16 x i16>
329  %1 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U)
330  %2 = bitcast <16 x i16> %1 to <4 x i64>
331  ret <4 x i64> %2
332}
333
334define <4 x i64> @test_mm256_mask_compress_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) {
335; X86-LABEL: test_mm256_mask_compress_epi8:
336; X86:       # %bb.0: # %entry
337; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
338; X86-NEXT:    vpcompressb %ymm1, %ymm0 {%k1}
339; X86-NEXT:    retl
340;
341; X64-LABEL: test_mm256_mask_compress_epi8:
342; X64:       # %bb.0: # %entry
343; X64-NEXT:    kmovd %edi, %k1
344; X64-NEXT:    vpcompressb %ymm1, %ymm0 {%k1}
345; X64-NEXT:    retq
346entry:
347  %0 = bitcast <4 x i64> %__D to <32 x i8>
348  %1 = bitcast <4 x i64> %__S to <32 x i8>
349  %2 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U)
350  %3 = bitcast <32 x i8> %2 to <4 x i64>
351  ret <4 x i64> %3
352}
353
354define <4 x i64> @test_mm256_maskz_compress_epi8(i32 %__U, <4 x i64> %__D) {
355; X86-LABEL: test_mm256_maskz_compress_epi8:
356; X86:       # %bb.0: # %entry
357; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
358; X86-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z}
359; X86-NEXT:    retl
360;
361; X64-LABEL: test_mm256_maskz_compress_epi8:
362; X64:       # %bb.0: # %entry
363; X64-NEXT:    kmovd %edi, %k1
364; X64-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z}
365; X64-NEXT:    retq
366entry:
367  %0 = bitcast <4 x i64> %__D to <32 x i8>
368  %1 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U)
369  %2 = bitcast <32 x i8> %1 to <4 x i64>
370  ret <4 x i64> %2
371}
372
373define void @test_mm256_mask_compressstoreu_epi16(i8* %__P, i16 zeroext %__U, <4 x i64> %__D) {
374; X86-LABEL: test_mm256_mask_compressstoreu_epi16:
375; X86:       # %bb.0: # %entry
376; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
377; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
378; X86-NEXT:    vpcompressw %ymm0, (%eax) {%k1}
379; X86-NEXT:    vzeroupper
380; X86-NEXT:    retl
381;
382; X64-LABEL: test_mm256_mask_compressstoreu_epi16:
383; X64:       # %bb.0: # %entry
384; X64-NEXT:    kmovd %esi, %k1
385; X64-NEXT:    vpcompressw %ymm0, (%rdi) {%k1}
386; X64-NEXT:    vzeroupper
387; X64-NEXT:    retq
388entry:
389  %0 = bitcast <4 x i64> %__D to <16 x i16>
390  %1 = bitcast i8* %__P to i16*
391  %2 = bitcast i16 %__U to <16 x i1>
392  tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %0, i16* %1, <16 x i1> %2)
393  ret void
394}
395
396define void @test_mm256_mask_compressstoreu_epi8(i8* %__P, i32 %__U, <4 x i64> %__D) {
397; X86-LABEL: test_mm256_mask_compressstoreu_epi8:
398; X86:       # %bb.0: # %entry
399; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
400; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
401; X86-NEXT:    vpcompressb %ymm0, (%eax) {%k1}
402; X86-NEXT:    vzeroupper
403; X86-NEXT:    retl
404;
405; X64-LABEL: test_mm256_mask_compressstoreu_epi8:
406; X64:       # %bb.0: # %entry
407; X64-NEXT:    kmovd %esi, %k1
408; X64-NEXT:    vpcompressb %ymm0, (%rdi) {%k1}
409; X64-NEXT:    vzeroupper
410; X64-NEXT:    retq
411entry:
412  %0 = bitcast <4 x i64> %__D to <32 x i8>
413  %1 = bitcast i32 %__U to <32 x i1>
414  tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %0, i8* %__P, <32 x i1> %1)
415  ret void
416}
417
418define <4 x i64> @test_mm256_mask_expand_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) {
419; X86-LABEL: test_mm256_mask_expand_epi16:
420; X86:       # %bb.0: # %entry
421; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
422; X86-NEXT:    vpexpandw %ymm1, %ymm0 {%k1}
423; X86-NEXT:    retl
424;
425; X64-LABEL: test_mm256_mask_expand_epi16:
426; X64:       # %bb.0: # %entry
427; X64-NEXT:    kmovd %edi, %k1
428; X64-NEXT:    vpexpandw %ymm1, %ymm0 {%k1}
429; X64-NEXT:    retq
430entry:
431  %0 = bitcast <4 x i64> %__D to <16 x i16>
432  %1 = bitcast <4 x i64> %__S to <16 x i16>
433  %2 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U)
434  %3 = bitcast <16 x i16> %2 to <4 x i64>
435  ret <4 x i64> %3
436}
437
438define <4 x i64> @test_mm256_maskz_expand_epi16(i16 zeroext %__U, <4 x i64> %__D) {
439; X86-LABEL: test_mm256_maskz_expand_epi16:
440; X86:       # %bb.0: # %entry
441; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
442; X86-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z}
443; X86-NEXT:    retl
444;
445; X64-LABEL: test_mm256_maskz_expand_epi16:
446; X64:       # %bb.0: # %entry
447; X64-NEXT:    kmovd %edi, %k1
448; X64-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z}
449; X64-NEXT:    retq
450entry:
451  %0 = bitcast <4 x i64> %__D to <16 x i16>
452  %1 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U)
453  %2 = bitcast <16 x i16> %1 to <4 x i64>
454  ret <4 x i64> %2
455}
456
457define <4 x i64> @test_mm256_mask_expand_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) {
458; X86-LABEL: test_mm256_mask_expand_epi8:
459; X86:       # %bb.0: # %entry
460; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
461; X86-NEXT:    vpexpandb %ymm1, %ymm0 {%k1}
462; X86-NEXT:    retl
463;
464; X64-LABEL: test_mm256_mask_expand_epi8:
465; X64:       # %bb.0: # %entry
466; X64-NEXT:    kmovd %edi, %k1
467; X64-NEXT:    vpexpandb %ymm1, %ymm0 {%k1}
468; X64-NEXT:    retq
469entry:
470  %0 = bitcast <4 x i64> %__D to <32 x i8>
471  %1 = bitcast <4 x i64> %__S to <32 x i8>
472  %2 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U)
473  %3 = bitcast <32 x i8> %2 to <4 x i64>
474  ret <4 x i64> %3
475}
476
477define <4 x i64> @test_mm256_maskz_expand_epi8(i32 %__U, <4 x i64> %__D) {
478; X86-LABEL: test_mm256_maskz_expand_epi8:
479; X86:       # %bb.0: # %entry
480; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
481; X86-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z}
482; X86-NEXT:    retl
483;
484; X64-LABEL: test_mm256_maskz_expand_epi8:
485; X64:       # %bb.0: # %entry
486; X64-NEXT:    kmovd %edi, %k1
487; X64-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z}
488; X64-NEXT:    retq
489entry:
490  %0 = bitcast <4 x i64> %__D to <32 x i8>
491  %1 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U)
492  %2 = bitcast <32 x i8> %1 to <4 x i64>
493  ret <4 x i64> %2
494}
495
496define <4 x i64> @test_mm256_mask_expandloadu_epi16(<4 x i64> %__S, i16 zeroext %__U, i8* readonly %__P) {
497; X86-LABEL: test_mm256_mask_expandloadu_epi16:
498; X86:       # %bb.0: # %entry
499; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
500; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
501; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1}
502; X86-NEXT:    retl
503;
504; X64-LABEL: test_mm256_mask_expandloadu_epi16:
505; X64:       # %bb.0: # %entry
506; X64-NEXT:    kmovd %edi, %k1
507; X64-NEXT:    vpexpandw (%rsi), %ymm0 {%k1}
508; X64-NEXT:    retq
509entry:
510  %0 = bitcast <4 x i64> %__S to <16 x i16>
511  %1 = bitcast i8* %__P to i16*
512  %2 = bitcast i16 %__U to <16 x i1>
513  %3 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> %2, <16 x i16> %0)
514  %4 = bitcast <16 x i16> %3 to <4 x i64>
515  ret <4 x i64> %4
516}
517
518define <4 x i64> @test_mm256_maskz_expandloadu_epi16(i16 zeroext %__U, i8* readonly %__P) {
519; X86-LABEL: test_mm256_maskz_expandloadu_epi16:
520; X86:       # %bb.0: # %entry
521; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
522; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
523; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} {z}
524; X86-NEXT:    retl
525;
526; X64-LABEL: test_mm256_maskz_expandloadu_epi16:
527; X64:       # %bb.0: # %entry
528; X64-NEXT:    kmovd %edi, %k1
529; X64-NEXT:    vpexpandw (%rsi), %ymm0 {%k1} {z}
530; X64-NEXT:    retq
531entry:
532  %0 = bitcast i8* %__P to i16*
533  %1 = bitcast i16 %__U to <16 x i1>
534  %2 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %0, <16 x i1> %1, <16 x i16> zeroinitializer)
535  %3 = bitcast <16 x i16> %2 to <4 x i64>
536  ret <4 x i64> %3
537}
538
539define <4 x i64> @test_mm256_mask_expandloadu_epi8(<4 x i64> %__S, i32 %__U, i8* readonly %__P) {
540; X86-LABEL: test_mm256_mask_expandloadu_epi8:
541; X86:       # %bb.0: # %entry
542; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
543; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
544; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1}
545; X86-NEXT:    retl
546;
547; X64-LABEL: test_mm256_mask_expandloadu_epi8:
548; X64:       # %bb.0: # %entry
549; X64-NEXT:    kmovd %edi, %k1
550; X64-NEXT:    vpexpandb (%rsi), %ymm0 {%k1}
551; X64-NEXT:    retq
552entry:
553  %0 = bitcast <4 x i64> %__S to <32 x i8>
554  %1 = bitcast i32 %__U to <32 x i1>
555  %2 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %1, <32 x i8> %0)
556  %3 = bitcast <32 x i8> %2 to <4 x i64>
557  ret <4 x i64> %3
558}
559
560define <4 x i64> @test_mm256_maskz_expandloadu_epi8(i32 %__U, i8* readonly %__P) {
561; X86-LABEL: test_mm256_maskz_expandloadu_epi8:
562; X86:       # %bb.0: # %entry
563; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
564; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
565; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} {z}
566; X86-NEXT:    retl
567;
568; X64-LABEL: test_mm256_maskz_expandloadu_epi8:
569; X64:       # %bb.0: # %entry
570; X64-NEXT:    kmovd %edi, %k1
571; X64-NEXT:    vpexpandb (%rsi), %ymm0 {%k1} {z}
572; X64-NEXT:    retq
573entry:
574  %0 = bitcast i32 %__U to <32 x i1>
575  %1 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %0, <32 x i8> zeroinitializer)
576  %2 = bitcast <32 x i8> %1 to <4 x i64>
577  ret <4 x i64> %2
578}
579
580define <4 x i64> @test_mm256_mask_shldi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
581; X86-LABEL: test_mm256_mask_shldi_epi64:
582; X86:       # %bb.0: # %entry
583; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
584; X86-NEXT:    kmovd %eax, %k1
585; X86-NEXT:    vpshldq $127, %ymm2, %ymm1, %ymm0 {%k1}
586; X86-NEXT:    retl
587;
588; X64-LABEL: test_mm256_mask_shldi_epi64:
589; X64:       # %bb.0: # %entry
590; X64-NEXT:    kmovd %edi, %k1
591; X64-NEXT:    vpshldq $127, %ymm2, %ymm1, %ymm0 {%k1}
592; X64-NEXT:    retq
593entry:
594  %0 = tail call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 127)
595  %1 = bitcast i8 %__U to <8 x i1>
596  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
597  %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S
598  ret <4 x i64> %2
599}
600
601declare <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64>, <4 x i64>, i32)
602
603define <4 x i64> @test_mm256_maskz_shldi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
604; X86-LABEL: test_mm256_maskz_shldi_epi64:
605; X86:       # %bb.0: # %entry
606; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
607; X86-NEXT:    kmovd %eax, %k1
608; X86-NEXT:    vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
609; X86-NEXT:    retl
610;
611; X64-LABEL: test_mm256_maskz_shldi_epi64:
612; X64:       # %bb.0: # %entry
613; X64-NEXT:    kmovd %edi, %k1
614; X64-NEXT:    vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
615; X64-NEXT:    retq
616entry:
617  %0 = tail call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 63)
618  %1 = bitcast i8 %__U to <8 x i1>
619  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
620  %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer
621  ret <4 x i64> %2
622}
623
624define <4 x i64> @test_mm256_shldi_epi64(<4 x i64> %__A, <4 x i64> %__B) {
625; CHECK-LABEL: test_mm256_shldi_epi64:
626; CHECK:       # %bb.0: # %entry
627; CHECK-NEXT:    vpshldq $31, %ymm1, %ymm0, %ymm0
628; CHECK-NEXT:    ret{{[l|q]}}
629entry:
630  %0 = tail call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 31)
631  ret <4 x i64> %0
632}
633
634define <2 x i64> @test_mm_mask_shldi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
635; X86-LABEL: test_mm_mask_shldi_epi64:
636; X86:       # %bb.0: # %entry
637; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
638; X86-NEXT:    kmovd %eax, %k1
639; X86-NEXT:    vpshldq $127, %xmm2, %xmm1, %xmm0 {%k1}
640; X86-NEXT:    retl
641;
642; X64-LABEL: test_mm_mask_shldi_epi64:
643; X64:       # %bb.0: # %entry
644; X64-NEXT:    kmovd %edi, %k1
645; X64-NEXT:    vpshldq $127, %xmm2, %xmm1, %xmm0 {%k1}
646; X64-NEXT:    retq
647entry:
648  %0 = tail call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 127)
649  %1 = bitcast i8 %__U to <8 x i1>
650  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
651  %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S
652  ret <2 x i64> %2
653}
654
655declare <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64>, <2 x i64>, i32) #3
656
657define <2 x i64> @test_mm_maskz_shldi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
658; X86-LABEL: test_mm_maskz_shldi_epi64:
659; X86:       # %bb.0: # %entry
660; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
661; X86-NEXT:    kmovd %eax, %k1
662; X86-NEXT:    vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
663; X86-NEXT:    retl
664;
665; X64-LABEL: test_mm_maskz_shldi_epi64:
666; X64:       # %bb.0: # %entry
667; X64-NEXT:    kmovd %edi, %k1
668; X64-NEXT:    vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
669; X64-NEXT:    retq
670entry:
671  %0 = tail call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 63)
672  %1 = bitcast i8 %__U to <8 x i1>
673  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
674  %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer
675  ret <2 x i64> %2
676}
677
678define <2 x i64> @test_mm_shldi_epi64(<2 x i64> %__A, <2 x i64> %__B) {
679; CHECK-LABEL: test_mm_shldi_epi64:
680; CHECK:       # %bb.0: # %entry
681; CHECK-NEXT:    vpshldq $31, %xmm1, %xmm0, %xmm0
682; CHECK-NEXT:    ret{{[l|q]}}
683entry:
684  %0 = tail call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 31)
685  ret <2 x i64> %0
686}
687
688define <4 x i64> @test_mm256_mask_shldi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
689; X86-LABEL: test_mm256_mask_shldi_epi32:
690; X86:       # %bb.0: # %entry
691; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
692; X86-NEXT:    kmovd %eax, %k1
693; X86-NEXT:    vpshldd $127, %ymm2, %ymm1, %ymm0 {%k1}
694; X86-NEXT:    retl
695;
696; X64-LABEL: test_mm256_mask_shldi_epi32:
697; X64:       # %bb.0: # %entry
698; X64-NEXT:    kmovd %edi, %k1
699; X64-NEXT:    vpshldd $127, %ymm2, %ymm1, %ymm0 {%k1}
700; X64-NEXT:    retq
701entry:
702  %0 = bitcast <4 x i64> %__A to <8 x i32>
703  %1 = bitcast <4 x i64> %__B to <8 x i32>
704  %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 127)
705  %3 = bitcast <4 x i64> %__S to <8 x i32>
706  %4 = bitcast i8 %__U to <8 x i1>
707  %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3
708  %6 = bitcast <8 x i32> %5 to <4 x i64>
709  ret <4 x i64> %6
710}
711
712declare <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32>, <8 x i32>, i32)
713
714define <4 x i64> @test_mm256_maskz_shldi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
715; X86-LABEL: test_mm256_maskz_shldi_epi32:
716; X86:       # %bb.0: # %entry
717; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
718; X86-NEXT:    kmovd %eax, %k1
719; X86-NEXT:    vpshldd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
720; X86-NEXT:    retl
721;
722; X64-LABEL: test_mm256_maskz_shldi_epi32:
723; X64:       # %bb.0: # %entry
724; X64-NEXT:    kmovd %edi, %k1
725; X64-NEXT:    vpshldd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
726; X64-NEXT:    retq
727entry:
728  %0 = bitcast <4 x i64> %__A to <8 x i32>
729  %1 = bitcast <4 x i64> %__B to <8 x i32>
730  %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 63)
731  %3 = bitcast i8 %__U to <8 x i1>
732  %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
733  %5 = bitcast <8 x i32> %4 to <4 x i64>
734  ret <4 x i64> %5
735}
736
737define <4 x i64> @test_mm256_shldi_epi32(<4 x i64> %__A, <4 x i64> %__B) {
738; CHECK-LABEL: test_mm256_shldi_epi32:
739; CHECK:       # %bb.0: # %entry
740; CHECK-NEXT:    vpshldd $31, %ymm1, %ymm0, %ymm0
741; CHECK-NEXT:    ret{{[l|q]}}
742entry:
743  %0 = bitcast <4 x i64> %__A to <8 x i32>
744  %1 = bitcast <4 x i64> %__B to <8 x i32>
745  %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 31)
746  %3 = bitcast <8 x i32> %2 to <4 x i64>
747  ret <4 x i64> %3
748}
749
750define <2 x i64> @test_mm_mask_shldi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
751; X86-LABEL: test_mm_mask_shldi_epi32:
752; X86:       # %bb.0: # %entry
753; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
754; X86-NEXT:    kmovd %eax, %k1
755; X86-NEXT:    vpshldd $127, %xmm2, %xmm1, %xmm0 {%k1}
756; X86-NEXT:    retl
757;
758; X64-LABEL: test_mm_mask_shldi_epi32:
759; X64:       # %bb.0: # %entry
760; X64-NEXT:    kmovd %edi, %k1
761; X64-NEXT:    vpshldd $127, %xmm2, %xmm1, %xmm0 {%k1}
762; X64-NEXT:    retq
763entry:
764  %0 = bitcast <2 x i64> %__A to <4 x i32>
765  %1 = bitcast <2 x i64> %__B to <4 x i32>
766  %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 127)
767  %3 = bitcast <2 x i64> %__S to <4 x i32>
768  %4 = bitcast i8 %__U to <8 x i1>
769  %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
770  %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3
771  %6 = bitcast <4 x i32> %5 to <2 x i64>
772  ret <2 x i64> %6
773}
774
775declare <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32>, <4 x i32>, i32)
776
777define <2 x i64> @test_mm_maskz_shldi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
778; X86-LABEL: test_mm_maskz_shldi_epi32:
779; X86:       # %bb.0: # %entry
780; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
781; X86-NEXT:    kmovd %eax, %k1
782; X86-NEXT:    vpshldd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
783; X86-NEXT:    retl
784;
785; X64-LABEL: test_mm_maskz_shldi_epi32:
786; X64:       # %bb.0: # %entry
787; X64-NEXT:    kmovd %edi, %k1
788; X64-NEXT:    vpshldd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
789; X64-NEXT:    retq
790entry:
791  %0 = bitcast <2 x i64> %__A to <4 x i32>
792  %1 = bitcast <2 x i64> %__B to <4 x i32>
793  %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 63)
794  %3 = bitcast i8 %__U to <8 x i1>
795  %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
796  %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer
797  %5 = bitcast <4 x i32> %4 to <2 x i64>
798  ret <2 x i64> %5
799}
800
801define <2 x i64> @test_mm_shldi_epi32(<2 x i64> %__A, <2 x i64> %__B) {
802; CHECK-LABEL: test_mm_shldi_epi32:
803; CHECK:       # %bb.0: # %entry
804; CHECK-NEXT:    vpshldd $31, %xmm1, %xmm0, %xmm0
805; CHECK-NEXT:    ret{{[l|q]}}
806entry:
807  %0 = bitcast <2 x i64> %__A to <4 x i32>
808  %1 = bitcast <2 x i64> %__B to <4 x i32>
809  %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 31)
810  %3 = bitcast <4 x i32> %2 to <2 x i64>
811  ret <2 x i64> %3
812}
813
814define <4 x i64> @test_mm256_mask_shldi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
815; X86-LABEL: test_mm256_mask_shldi_epi16:
816; X86:       # %bb.0: # %entry
817; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
818; X86-NEXT:    vpshldw $127, %ymm2, %ymm1, %ymm0 {%k1}
819; X86-NEXT:    retl
820;
821; X64-LABEL: test_mm256_mask_shldi_epi16:
822; X64:       # %bb.0: # %entry
823; X64-NEXT:    kmovd %edi, %k1
824; X64-NEXT:    vpshldw $127, %ymm2, %ymm1, %ymm0 {%k1}
825; X64-NEXT:    retq
826entry:
827  %0 = bitcast <4 x i64> %__A to <16 x i16>
828  %1 = bitcast <4 x i64> %__B to <16 x i16>
829  %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 127)
830  %3 = bitcast <4 x i64> %__S to <16 x i16>
831  %4 = bitcast i16 %__U to <16 x i1>
832  %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3
833  %6 = bitcast <16 x i16> %5 to <4 x i64>
834  ret <4 x i64> %6
835}
836
837declare <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16>, <16 x i16>, i32)
838
839define <4 x i64> @test_mm256_maskz_shldi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
840; X86-LABEL: test_mm256_maskz_shldi_epi16:
841; X86:       # %bb.0: # %entry
842; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
843; X86-NEXT:    vpshldw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
844; X86-NEXT:    retl
845;
846; X64-LABEL: test_mm256_maskz_shldi_epi16:
847; X64:       # %bb.0: # %entry
848; X64-NEXT:    kmovd %edi, %k1
849; X64-NEXT:    vpshldw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
850; X64-NEXT:    retq
851entry:
852  %0 = bitcast <4 x i64> %__A to <16 x i16>
853  %1 = bitcast <4 x i64> %__B to <16 x i16>
854  %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 63)
855  %3 = bitcast i16 %__U to <16 x i1>
856  %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
857  %5 = bitcast <16 x i16> %4 to <4 x i64>
858  ret <4 x i64> %5
859}
860
861define <4 x i64> @test_mm256_shldi_epi16(<4 x i64> %__A, <4 x i64> %__B) {
862; CHECK-LABEL: test_mm256_shldi_epi16:
863; CHECK:       # %bb.0: # %entry
864; CHECK-NEXT:    vpshldw $31, %ymm1, %ymm0, %ymm0
865; CHECK-NEXT:    ret{{[l|q]}}
866entry:
867  %0 = bitcast <4 x i64> %__A to <16 x i16>
868  %1 = bitcast <4 x i64> %__B to <16 x i16>
869  %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 31)
870  %3 = bitcast <16 x i16> %2 to <4 x i64>
871  ret <4 x i64> %3
872}
873
874define <2 x i64> @test_mm_mask_shldi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
875; X86-LABEL: test_mm_mask_shldi_epi16:
876; X86:       # %bb.0: # %entry
877; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
878; X86-NEXT:    kmovd %eax, %k1
879; X86-NEXT:    vpshldw $127, %xmm2, %xmm1, %xmm0 {%k1}
880; X86-NEXT:    retl
881;
882; X64-LABEL: test_mm_mask_shldi_epi16:
883; X64:       # %bb.0: # %entry
884; X64-NEXT:    kmovd %edi, %k1
885; X64-NEXT:    vpshldw $127, %xmm2, %xmm1, %xmm0 {%k1}
886; X64-NEXT:    retq
887entry:
888  %0 = bitcast <2 x i64> %__A to <8 x i16>
889  %1 = bitcast <2 x i64> %__B to <8 x i16>
890  %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 127)
891  %3 = bitcast <2 x i64> %__S to <8 x i16>
892  %4 = bitcast i8 %__U to <8 x i1>
893  %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3
894  %6 = bitcast <8 x i16> %5 to <2 x i64>
895  ret <2 x i64> %6
896}
897
898declare <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16>, <8 x i16>, i32)
899
900define <2 x i64> @test_mm_maskz_shldi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
901; X86-LABEL: test_mm_maskz_shldi_epi16:
902; X86:       # %bb.0: # %entry
903; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
904; X86-NEXT:    kmovd %eax, %k1
905; X86-NEXT:    vpshldw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
906; X86-NEXT:    retl
907;
908; X64-LABEL: test_mm_maskz_shldi_epi16:
909; X64:       # %bb.0: # %entry
910; X64-NEXT:    kmovd %edi, %k1
911; X64-NEXT:    vpshldw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
912; X64-NEXT:    retq
913entry:
914  %0 = bitcast <2 x i64> %__A to <8 x i16>
915  %1 = bitcast <2 x i64> %__B to <8 x i16>
916  %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 63)
917  %3 = bitcast i8 %__U to <8 x i1>
918  %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer
919  %5 = bitcast <8 x i16> %4 to <2 x i64>
920  ret <2 x i64> %5
921}
922
923define <2 x i64> @test_mm_shldi_epi16(<2 x i64> %__A, <2 x i64> %__B) {
924; CHECK-LABEL: test_mm_shldi_epi16:
925; CHECK:       # %bb.0: # %entry
926; CHECK-NEXT:    vpshldw $31, %xmm1, %xmm0, %xmm0
927; CHECK-NEXT:    ret{{[l|q]}}
928entry:
929  %0 = bitcast <2 x i64> %__A to <8 x i16>
930  %1 = bitcast <2 x i64> %__B to <8 x i16>
931  %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 31)
932  %3 = bitcast <8 x i16> %2 to <2 x i64>
933  ret <2 x i64> %3
934}
935
936define <4 x i64> @test_mm256_mask_shrdi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
937; X86-LABEL: test_mm256_mask_shrdi_epi64:
938; X86:       # %bb.0: # %entry
939; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
940; X86-NEXT:    kmovd %eax, %k1
941; X86-NEXT:    vpshrdq $127, %ymm2, %ymm1, %ymm0 {%k1}
942; X86-NEXT:    retl
943;
944; X64-LABEL: test_mm256_mask_shrdi_epi64:
945; X64:       # %bb.0: # %entry
946; X64-NEXT:    kmovd %edi, %k1
947; X64-NEXT:    vpshrdq $127, %ymm2, %ymm1, %ymm0 {%k1}
948; X64-NEXT:    retq
949entry:
950  %0 = tail call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 127)
951  %1 = bitcast i8 %__U to <8 x i1>
952  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
953  %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S
954  ret <4 x i64> %2
955}
956
957declare <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64>, <4 x i64>, i32)
958
959define <4 x i64> @test_mm256_maskz_shrdi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
960; X86-LABEL: test_mm256_maskz_shrdi_epi64:
961; X86:       # %bb.0: # %entry
962; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
963; X86-NEXT:    kmovd %eax, %k1
964; X86-NEXT:    vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
965; X86-NEXT:    retl
966;
967; X64-LABEL: test_mm256_maskz_shrdi_epi64:
968; X64:       # %bb.0: # %entry
969; X64-NEXT:    kmovd %edi, %k1
970; X64-NEXT:    vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
971; X64-NEXT:    retq
972entry:
973  %0 = tail call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 63)
974  %1 = bitcast i8 %__U to <8 x i1>
975  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
976  %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer
977  ret <4 x i64> %2
978}
979
980define <4 x i64> @test_mm256_shrdi_epi64(<4 x i64> %__A, <4 x i64> %__B) {
981; CHECK-LABEL: test_mm256_shrdi_epi64:
982; CHECK:       # %bb.0: # %entry
983; CHECK-NEXT:    vpshrdq $31, %ymm1, %ymm0, %ymm0
984; CHECK-NEXT:    ret{{[l|q]}}
985entry:
986  %0 = tail call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 31)
987  ret <4 x i64> %0
988}
989
990define <2 x i64> @test_mm_mask_shrdi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
991; X86-LABEL: test_mm_mask_shrdi_epi64:
992; X86:       # %bb.0: # %entry
993; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
994; X86-NEXT:    kmovd %eax, %k1
995; X86-NEXT:    vpshrdq $127, %xmm2, %xmm1, %xmm0 {%k1}
996; X86-NEXT:    retl
997;
998; X64-LABEL: test_mm_mask_shrdi_epi64:
999; X64:       # %bb.0: # %entry
1000; X64-NEXT:    kmovd %edi, %k1
1001; X64-NEXT:    vpshrdq $127, %xmm2, %xmm1, %xmm0 {%k1}
1002; X64-NEXT:    retq
1003entry:
1004  %0 = tail call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 127)
1005  %1 = bitcast i8 %__U to <8 x i1>
1006  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1007  %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S
1008  ret <2 x i64> %2
1009}
1010
1011declare <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64>, <2 x i64>, i32)
1012
1013define <2 x i64> @test_mm_maskz_shrdi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1014; X86-LABEL: test_mm_maskz_shrdi_epi64:
1015; X86:       # %bb.0: # %entry
1016; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1017; X86-NEXT:    kmovd %eax, %k1
1018; X86-NEXT:    vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1019; X86-NEXT:    retl
1020;
1021; X64-LABEL: test_mm_maskz_shrdi_epi64:
1022; X64:       # %bb.0: # %entry
1023; X64-NEXT:    kmovd %edi, %k1
1024; X64-NEXT:    vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1025; X64-NEXT:    retq
1026entry:
1027  %0 = tail call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 63)
1028  %1 = bitcast i8 %__U to <8 x i1>
1029  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1030  %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer
1031  ret <2 x i64> %2
1032}
1033
1034define <2 x i64> @test_mm_shrdi_epi64(<2 x i64> %__A, <2 x i64> %__B) {
1035; CHECK-LABEL: test_mm_shrdi_epi64:
1036; CHECK:       # %bb.0: # %entry
1037; CHECK-NEXT:    vpshrdq $31, %xmm1, %xmm0, %xmm0
1038; CHECK-NEXT:    ret{{[l|q]}}
1039entry:
1040  %0 = tail call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 31)
1041  ret <2 x i64> %0
1042}
1043
1044define <4 x i64> @test_mm256_mask_shrdi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1045; X86-LABEL: test_mm256_mask_shrdi_epi32:
1046; X86:       # %bb.0: # %entry
1047; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1048; X86-NEXT:    kmovd %eax, %k1
1049; X86-NEXT:    vpshrdd $127, %ymm2, %ymm1, %ymm0 {%k1}
1050; X86-NEXT:    retl
1051;
1052; X64-LABEL: test_mm256_mask_shrdi_epi32:
1053; X64:       # %bb.0: # %entry
1054; X64-NEXT:    kmovd %edi, %k1
1055; X64-NEXT:    vpshrdd $127, %ymm2, %ymm1, %ymm0 {%k1}
1056; X64-NEXT:    retq
1057entry:
1058  %0 = bitcast <4 x i64> %__A to <8 x i32>
1059  %1 = bitcast <4 x i64> %__B to <8 x i32>
1060  %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 127)
1061  %3 = bitcast <4 x i64> %__S to <8 x i32>
1062  %4 = bitcast i8 %__U to <8 x i1>
1063  %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3
1064  %6 = bitcast <8 x i32> %5 to <4 x i64>
1065  ret <4 x i64> %6
1066}
1067
1068declare <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32>, <8 x i32>, i32)
1069
1070define <4 x i64> @test_mm256_maskz_shrdi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1071; X86-LABEL: test_mm256_maskz_shrdi_epi32:
1072; X86:       # %bb.0: # %entry
1073; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1074; X86-NEXT:    kmovd %eax, %k1
1075; X86-NEXT:    vpshrdd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
1076; X86-NEXT:    retl
1077;
1078; X64-LABEL: test_mm256_maskz_shrdi_epi32:
1079; X64:       # %bb.0: # %entry
1080; X64-NEXT:    kmovd %edi, %k1
1081; X64-NEXT:    vpshrdd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
1082; X64-NEXT:    retq
1083entry:
1084  %0 = bitcast <4 x i64> %__A to <8 x i32>
1085  %1 = bitcast <4 x i64> %__B to <8 x i32>
1086  %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 63)
1087  %3 = bitcast i8 %__U to <8 x i1>
1088  %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
1089  %5 = bitcast <8 x i32> %4 to <4 x i64>
1090  ret <4 x i64> %5
1091}
1092
1093define <4 x i64> @test_mm256_shrdi_epi32(<4 x i64> %__A, <4 x i64> %__B) {
1094; CHECK-LABEL: test_mm256_shrdi_epi32:
1095; CHECK:       # %bb.0: # %entry
1096; CHECK-NEXT:    vpshrdd $31, %ymm1, %ymm0, %ymm0
1097; CHECK-NEXT:    ret{{[l|q]}}
1098entry:
1099  %0 = bitcast <4 x i64> %__A to <8 x i32>
1100  %1 = bitcast <4 x i64> %__B to <8 x i32>
1101  %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 31)
1102  %3 = bitcast <8 x i32> %2 to <4 x i64>
1103  ret <4 x i64> %3
1104}
1105
1106define <2 x i64> @test_mm_mask_shrdi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1107; X86-LABEL: test_mm_mask_shrdi_epi32:
1108; X86:       # %bb.0: # %entry
1109; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1110; X86-NEXT:    kmovd %eax, %k1
1111; X86-NEXT:    vpshrdd $127, %xmm2, %xmm1, %xmm0 {%k1}
1112; X86-NEXT:    retl
1113;
1114; X64-LABEL: test_mm_mask_shrdi_epi32:
1115; X64:       # %bb.0: # %entry
1116; X64-NEXT:    kmovd %edi, %k1
1117; X64-NEXT:    vpshrdd $127, %xmm2, %xmm1, %xmm0 {%k1}
1118; X64-NEXT:    retq
1119entry:
1120  %0 = bitcast <2 x i64> %__A to <4 x i32>
1121  %1 = bitcast <2 x i64> %__B to <4 x i32>
1122  %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 127)
1123  %3 = bitcast <2 x i64> %__S to <4 x i32>
1124  %4 = bitcast i8 %__U to <8 x i1>
1125  %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1126  %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3
1127  %6 = bitcast <4 x i32> %5 to <2 x i64>
1128  ret <2 x i64> %6
1129}
1130
1131declare <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32>, <4 x i32>, i32)
1132
1133define <2 x i64> @test_mm_maskz_shrdi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1134; X86-LABEL: test_mm_maskz_shrdi_epi32:
1135; X86:       # %bb.0: # %entry
1136; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1137; X86-NEXT:    kmovd %eax, %k1
1138; X86-NEXT:    vpshrdd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1139; X86-NEXT:    retl
1140;
1141; X64-LABEL: test_mm_maskz_shrdi_epi32:
1142; X64:       # %bb.0: # %entry
1143; X64-NEXT:    kmovd %edi, %k1
1144; X64-NEXT:    vpshrdd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1145; X64-NEXT:    retq
1146entry:
1147  %0 = bitcast <2 x i64> %__A to <4 x i32>
1148  %1 = bitcast <2 x i64> %__B to <4 x i32>
1149  %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 63)
1150  %3 = bitcast i8 %__U to <8 x i1>
1151  %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1152  %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer
1153  %5 = bitcast <4 x i32> %4 to <2 x i64>
1154  ret <2 x i64> %5
1155}
1156
1157define <2 x i64> @test_mm_shrdi_epi32(<2 x i64> %__A, <2 x i64> %__B) {
1158; CHECK-LABEL: test_mm_shrdi_epi32:
1159; CHECK:       # %bb.0: # %entry
1160; CHECK-NEXT:    vpshrdd $31, %xmm1, %xmm0, %xmm0
1161; CHECK-NEXT:    ret{{[l|q]}}
1162entry:
1163  %0 = bitcast <2 x i64> %__A to <4 x i32>
1164  %1 = bitcast <2 x i64> %__B to <4 x i32>
1165  %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 31)
1166  %3 = bitcast <4 x i32> %2 to <2 x i64>
1167  ret <2 x i64> %3
1168}
1169
1170define <4 x i64> @test_mm256_mask_shrdi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1171; X86-LABEL: test_mm256_mask_shrdi_epi16:
1172; X86:       # %bb.0: # %entry
1173; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
1174; X86-NEXT:    vpshrdw $127, %ymm2, %ymm1, %ymm0 {%k1}
1175; X86-NEXT:    retl
1176;
1177; X64-LABEL: test_mm256_mask_shrdi_epi16:
1178; X64:       # %bb.0: # %entry
1179; X64-NEXT:    kmovd %edi, %k1
1180; X64-NEXT:    vpshrdw $127, %ymm2, %ymm1, %ymm0 {%k1}
1181; X64-NEXT:    retq
1182entry:
1183  %0 = bitcast <4 x i64> %__A to <16 x i16>
1184  %1 = bitcast <4 x i64> %__B to <16 x i16>
1185  %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 127)
1186  %3 = bitcast <4 x i64> %__S to <16 x i16>
1187  %4 = bitcast i16 %__U to <16 x i1>
1188  %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3
1189  %6 = bitcast <16 x i16> %5 to <4 x i64>
1190  ret <4 x i64> %6
1191}
1192
1193declare <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16>, <16 x i16>, i32)
1194
1195define <4 x i64> @test_mm256_maskz_shrdi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1196; X86-LABEL: test_mm256_maskz_shrdi_epi16:
1197; X86:       # %bb.0: # %entry
1198; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
1199; X86-NEXT:    vpshrdw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
1200; X86-NEXT:    retl
1201;
1202; X64-LABEL: test_mm256_maskz_shrdi_epi16:
1203; X64:       # %bb.0: # %entry
1204; X64-NEXT:    kmovd %edi, %k1
1205; X64-NEXT:    vpshrdw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
1206; X64-NEXT:    retq
1207entry:
1208  %0 = bitcast <4 x i64> %__A to <16 x i16>
1209  %1 = bitcast <4 x i64> %__B to <16 x i16>
1210  %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 63)
1211  %3 = bitcast i16 %__U to <16 x i1>
1212  %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
1213  %5 = bitcast <16 x i16> %4 to <4 x i64>
1214  ret <4 x i64> %5
1215}
1216
1217define <4 x i64> @test_mm256_shrdi_epi16(<4 x i64> %__A, <4 x i64> %__B) {
1218; CHECK-LABEL: test_mm256_shrdi_epi16:
1219; CHECK:       # %bb.0: # %entry
1220; CHECK-NEXT:    vpshrdw $31, %ymm1, %ymm0, %ymm0
1221; CHECK-NEXT:    ret{{[l|q]}}
1222entry:
1223  %0 = bitcast <4 x i64> %__A to <16 x i16>
1224  %1 = bitcast <4 x i64> %__B to <16 x i16>
1225  %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 31)
1226  %3 = bitcast <16 x i16> %2 to <4 x i64>
1227  ret <4 x i64> %3
1228}
1229
1230define <2 x i64> @test_mm_mask_shrdi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1231; X86-LABEL: test_mm_mask_shrdi_epi16:
1232; X86:       # %bb.0: # %entry
1233; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1234; X86-NEXT:    kmovd %eax, %k1
1235; X86-NEXT:    vpshrdw $127, %xmm2, %xmm1, %xmm0 {%k1}
1236; X86-NEXT:    retl
1237;
1238; X64-LABEL: test_mm_mask_shrdi_epi16:
1239; X64:       # %bb.0: # %entry
1240; X64-NEXT:    kmovd %edi, %k1
1241; X64-NEXT:    vpshrdw $127, %xmm2, %xmm1, %xmm0 {%k1}
1242; X64-NEXT:    retq
1243entry:
1244  %0 = bitcast <2 x i64> %__A to <8 x i16>
1245  %1 = bitcast <2 x i64> %__B to <8 x i16>
1246  %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 127)
1247  %3 = bitcast <2 x i64> %__S to <8 x i16>
1248  %4 = bitcast i8 %__U to <8 x i1>
1249  %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3
1250  %6 = bitcast <8 x i16> %5 to <2 x i64>
1251  ret <2 x i64> %6
1252}
1253
1254declare <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16>, <8 x i16>, i32)
1255
1256define <2 x i64> @test_mm_maskz_shrdi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1257; X86-LABEL: test_mm_maskz_shrdi_epi16:
1258; X86:       # %bb.0: # %entry
1259; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1260; X86-NEXT:    kmovd %eax, %k1
1261; X86-NEXT:    vpshrdw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1262; X86-NEXT:    retl
1263;
1264; X64-LABEL: test_mm_maskz_shrdi_epi16:
1265; X64:       # %bb.0: # %entry
1266; X64-NEXT:    kmovd %edi, %k1
1267; X64-NEXT:    vpshrdw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1268; X64-NEXT:    retq
1269entry:
1270  %0 = bitcast <2 x i64> %__A to <8 x i16>
1271  %1 = bitcast <2 x i64> %__B to <8 x i16>
1272  %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 63)
1273  %3 = bitcast i8 %__U to <8 x i1>
1274  %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer
1275  %5 = bitcast <8 x i16> %4 to <2 x i64>
1276  ret <2 x i64> %5
1277}
1278
1279define <2 x i64> @test_mm_shrdi_epi16(<2 x i64> %__A, <2 x i64> %__B) {
1280; CHECK-LABEL: test_mm_shrdi_epi16:
1281; CHECK:       # %bb.0: # %entry
1282; CHECK-NEXT:    vpshrdw $31, %xmm1, %xmm0, %xmm0
1283; CHECK-NEXT:    ret{{[l|q]}}
1284entry:
1285  %0 = bitcast <2 x i64> %__A to <8 x i16>
1286  %1 = bitcast <2 x i64> %__B to <8 x i16>
1287  %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 31)
1288  %3 = bitcast <8 x i16> %2 to <2 x i64>
1289  ret <2 x i64> %3
1290}
1291
1292define <4 x i64> @test_mm256_mask_shldv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1293; X86-LABEL: test_mm256_mask_shldv_epi64:
1294; X86:       # %bb.0: # %entry
1295; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1296; X86-NEXT:    kmovd %eax, %k1
1297; X86-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 {%k1}
1298; X86-NEXT:    retl
1299;
1300; X64-LABEL: test_mm256_mask_shldv_epi64:
1301; X64:       # %bb.0: # %entry
1302; X64-NEXT:    kmovd %edi, %k1
1303; X64-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 {%k1}
1304; X64-NEXT:    retq
1305entry:
1306  %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U)
1307  ret <4 x i64> %0
1308}
1309
1310define <4 x i64> @test_mm256_maskz_shldv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1311; X86-LABEL: test_mm256_maskz_shldv_epi64:
1312; X86:       # %bb.0: # %entry
1313; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1314; X86-NEXT:    kmovd %eax, %k1
1315; X86-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1316; X86-NEXT:    retl
1317;
1318; X64-LABEL: test_mm256_maskz_shldv_epi64:
1319; X64:       # %bb.0: # %entry
1320; X64-NEXT:    kmovd %edi, %k1
1321; X64-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1322; X64-NEXT:    retq
1323entry:
1324  %0 = tail call <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U)
1325  ret <4 x i64> %0
1326}
1327
1328define <4 x i64> @test_mm256_shldv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1329; CHECK-LABEL: test_mm256_shldv_epi64:
1330; CHECK:       # %bb.0: # %entry
1331; CHECK-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0
1332; CHECK-NEXT:    ret{{[l|q]}}
1333entry:
1334  %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 -1)
1335  ret <4 x i64> %0
1336}
1337
1338define <2 x i64> @test_mm_mask_shldv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1339; X86-LABEL: test_mm_mask_shldv_epi64:
1340; X86:       # %bb.0: # %entry
1341; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1342; X86-NEXT:    kmovd %eax, %k1
1343; X86-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 {%k1}
1344; X86-NEXT:    retl
1345;
1346; X64-LABEL: test_mm_mask_shldv_epi64:
1347; X64:       # %bb.0: # %entry
1348; X64-NEXT:    kmovd %edi, %k1
1349; X64-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 {%k1}
1350; X64-NEXT:    retq
1351entry:
1352  %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U)
1353  ret <2 x i64> %0
1354}
1355
1356define <2 x i64> @test_mm_maskz_shldv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1357; X86-LABEL: test_mm_maskz_shldv_epi64:
1358; X86:       # %bb.0: # %entry
1359; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1360; X86-NEXT:    kmovd %eax, %k1
1361; X86-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1362; X86-NEXT:    retl
1363;
1364; X64-LABEL: test_mm_maskz_shldv_epi64:
1365; X64:       # %bb.0: # %entry
1366; X64-NEXT:    kmovd %edi, %k1
1367; X64-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1368; X64-NEXT:    retq
1369entry:
1370  %0 = tail call <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U)
1371  ret <2 x i64> %0
1372}
1373
1374define <2 x i64> @test_mm_shldv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1375; CHECK-LABEL: test_mm_shldv_epi64:
1376; CHECK:       # %bb.0: # %entry
1377; CHECK-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0
1378; CHECK-NEXT:    ret{{[l|q]}}
1379entry:
1380  %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 -1)
1381  ret <2 x i64> %0
1382}
1383
1384define <4 x i64> @test_mm256_mask_shldv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1385; X86-LABEL: test_mm256_mask_shldv_epi32:
1386; X86:       # %bb.0: # %entry
1387; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1388; X86-NEXT:    kmovd %eax, %k1
1389; X86-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 {%k1}
1390; X86-NEXT:    retl
1391;
1392; X64-LABEL: test_mm256_mask_shldv_epi32:
1393; X64:       # %bb.0: # %entry
1394; X64-NEXT:    kmovd %edi, %k1
1395; X64-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 {%k1}
1396; X64-NEXT:    retq
1397entry:
1398  %0 = bitcast <4 x i64> %__S to <8 x i32>
1399  %1 = bitcast <4 x i64> %__A to <8 x i32>
1400  %2 = bitcast <4 x i64> %__B to <8 x i32>
1401  %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U)
1402  %4 = bitcast <8 x i32> %3 to <4 x i64>
1403  ret <4 x i64> %4
1404}
1405
1406define <4 x i64> @test_mm256_maskz_shldv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1407; X86-LABEL: test_mm256_maskz_shldv_epi32:
1408; X86:       # %bb.0: # %entry
1409; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1410; X86-NEXT:    kmovd %eax, %k1
1411; X86-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1412; X86-NEXT:    retl
1413;
1414; X64-LABEL: test_mm256_maskz_shldv_epi32:
1415; X64:       # %bb.0: # %entry
1416; X64-NEXT:    kmovd %edi, %k1
1417; X64-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1418; X64-NEXT:    retq
1419entry:
1420  %0 = bitcast <4 x i64> %__S to <8 x i32>
1421  %1 = bitcast <4 x i64> %__A to <8 x i32>
1422  %2 = bitcast <4 x i64> %__B to <8 x i32>
1423  %3 = tail call <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U)
1424  %4 = bitcast <8 x i32> %3 to <4 x i64>
1425  ret <4 x i64> %4
1426}
1427
1428define <4 x i64> @test_mm256_shldv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1429; CHECK-LABEL: test_mm256_shldv_epi32:
1430; CHECK:       # %bb.0: # %entry
1431; CHECK-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0
1432; CHECK-NEXT:    ret{{[l|q]}}
1433entry:
1434  %0 = bitcast <4 x i64> %__S to <8 x i32>
1435  %1 = bitcast <4 x i64> %__A to <8 x i32>
1436  %2 = bitcast <4 x i64> %__B to <8 x i32>
1437  %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 -1)
1438  %4 = bitcast <8 x i32> %3 to <4 x i64>
1439  ret <4 x i64> %4
1440}
1441
1442define <2 x i64> @test_mm_mask_shldv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1443; X86-LABEL: test_mm_mask_shldv_epi32:
1444; X86:       # %bb.0: # %entry
1445; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1446; X86-NEXT:    kmovd %eax, %k1
1447; X86-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 {%k1}
1448; X86-NEXT:    retl
1449;
1450; X64-LABEL: test_mm_mask_shldv_epi32:
1451; X64:       # %bb.0: # %entry
1452; X64-NEXT:    kmovd %edi, %k1
1453; X64-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 {%k1}
1454; X64-NEXT:    retq
1455entry:
1456  %0 = bitcast <2 x i64> %__S to <4 x i32>
1457  %1 = bitcast <2 x i64> %__A to <4 x i32>
1458  %2 = bitcast <2 x i64> %__B to <4 x i32>
1459  %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U)
1460  %4 = bitcast <4 x i32> %3 to <2 x i64>
1461  ret <2 x i64> %4
1462}
1463
1464define <2 x i64> @test_mm_maskz_shldv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1465; X86-LABEL: test_mm_maskz_shldv_epi32:
1466; X86:       # %bb.0: # %entry
1467; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1468; X86-NEXT:    kmovd %eax, %k1
1469; X86-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1470; X86-NEXT:    retl
1471;
1472; X64-LABEL: test_mm_maskz_shldv_epi32:
1473; X64:       # %bb.0: # %entry
1474; X64-NEXT:    kmovd %edi, %k1
1475; X64-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1476; X64-NEXT:    retq
1477entry:
1478  %0 = bitcast <2 x i64> %__S to <4 x i32>
1479  %1 = bitcast <2 x i64> %__A to <4 x i32>
1480  %2 = bitcast <2 x i64> %__B to <4 x i32>
1481  %3 = tail call <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U)
1482  %4 = bitcast <4 x i32> %3 to <2 x i64>
1483  ret <2 x i64> %4
1484}
1485
1486define <2 x i64> @test_mm_shldv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1487; CHECK-LABEL: test_mm_shldv_epi32:
1488; CHECK:       # %bb.0: # %entry
1489; CHECK-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0
1490; CHECK-NEXT:    ret{{[l|q]}}
1491entry:
1492  %0 = bitcast <2 x i64> %__S to <4 x i32>
1493  %1 = bitcast <2 x i64> %__A to <4 x i32>
1494  %2 = bitcast <2 x i64> %__B to <4 x i32>
1495  %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 -1)
1496  %4 = bitcast <4 x i32> %3 to <2 x i64>
1497  ret <2 x i64> %4
1498}
1499
1500define <4 x i64> @test_mm256_mask_shldv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1501; X86-LABEL: test_mm256_mask_shldv_epi16:
1502; X86:       # %bb.0: # %entry
1503; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
1504; X86-NEXT:    vpshldvw %ymm2, %ymm1, %ymm0 {%k1}
1505; X86-NEXT:    retl
1506;
1507; X64-LABEL: test_mm256_mask_shldv_epi16:
1508; X64:       # %bb.0: # %entry
1509; X64-NEXT:    kmovd %edi, %k1
1510; X64-NEXT:    vpshldvw %ymm2, %ymm1, %ymm0 {%k1}
1511; X64-NEXT:    retq
1512entry:
1513  %0 = bitcast <4 x i64> %__S to <16 x i16>
1514  %1 = bitcast <4 x i64> %__A to <16 x i16>
1515  %2 = bitcast <4 x i64> %__B to <16 x i16>
1516  %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U)
1517  %4 = bitcast <16 x i16> %3 to <4 x i64>
1518  ret <4 x i64> %4
1519}
1520
1521define <4 x i64> @test_mm256_maskz_shldv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1522; X86-LABEL: test_mm256_maskz_shldv_epi16:
1523; X86:       # %bb.0: # %entry
1524; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
1525; X86-NEXT:    vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1526; X86-NEXT:    retl
1527;
1528; X64-LABEL: test_mm256_maskz_shldv_epi16:
1529; X64:       # %bb.0: # %entry
1530; X64-NEXT:    kmovd %edi, %k1
1531; X64-NEXT:    vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1532; X64-NEXT:    retq
1533entry:
1534  %0 = bitcast <4 x i64> %__S to <16 x i16>
1535  %1 = bitcast <4 x i64> %__A to <16 x i16>
1536  %2 = bitcast <4 x i64> %__B to <16 x i16>
1537  %3 = tail call <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U)
1538  %4 = bitcast <16 x i16> %3 to <4 x i64>
1539  ret <4 x i64> %4
1540}
1541
1542define <4 x i64> @test_mm256_shldv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1543; CHECK-LABEL: test_mm256_shldv_epi16:
1544; CHECK:       # %bb.0: # %entry
1545; CHECK-NEXT:    vpshldvw %ymm2, %ymm1, %ymm0
1546; CHECK-NEXT:    ret{{[l|q]}}
1547entry:
1548  %0 = bitcast <4 x i64> %__S to <16 x i16>
1549  %1 = bitcast <4 x i64> %__A to <16 x i16>
1550  %2 = bitcast <4 x i64> %__B to <16 x i16>
1551  %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 -1)
1552  %4 = bitcast <16 x i16> %3 to <4 x i64>
1553  ret <4 x i64> %4
1554}
1555
1556define <2 x i64> @test_mm_mask_shldv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1557; X86-LABEL: test_mm_mask_shldv_epi16:
1558; X86:       # %bb.0: # %entry
1559; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1560; X86-NEXT:    kmovd %eax, %k1
1561; X86-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 {%k1}
1562; X86-NEXT:    retl
1563;
1564; X64-LABEL: test_mm_mask_shldv_epi16:
1565; X64:       # %bb.0: # %entry
1566; X64-NEXT:    kmovd %edi, %k1
1567; X64-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 {%k1}
1568; X64-NEXT:    retq
1569entry:
1570  %0 = bitcast <2 x i64> %__S to <8 x i16>
1571  %1 = bitcast <2 x i64> %__A to <8 x i16>
1572  %2 = bitcast <2 x i64> %__B to <8 x i16>
1573  %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U)
1574  %4 = bitcast <8 x i16> %3 to <2 x i64>
1575  ret <2 x i64> %4
1576}
1577
1578define <2 x i64> @test_mm_maskz_shldv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1579; X86-LABEL: test_mm_maskz_shldv_epi16:
1580; X86:       # %bb.0: # %entry
1581; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1582; X86-NEXT:    kmovd %eax, %k1
1583; X86-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1584; X86-NEXT:    retl
1585;
1586; X64-LABEL: test_mm_maskz_shldv_epi16:
1587; X64:       # %bb.0: # %entry
1588; X64-NEXT:    kmovd %edi, %k1
1589; X64-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1590; X64-NEXT:    retq
1591entry:
1592  %0 = bitcast <2 x i64> %__S to <8 x i16>
1593  %1 = bitcast <2 x i64> %__A to <8 x i16>
1594  %2 = bitcast <2 x i64> %__B to <8 x i16>
1595  %3 = tail call <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U)
1596  %4 = bitcast <8 x i16> %3 to <2 x i64>
1597  ret <2 x i64> %4
1598}
1599
1600define <2 x i64> @test_mm_shldv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1601; CHECK-LABEL: test_mm_shldv_epi16:
1602; CHECK:       # %bb.0: # %entry
1603; CHECK-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0
1604; CHECK-NEXT:    ret{{[l|q]}}
1605entry:
1606  %0 = bitcast <2 x i64> %__S to <8 x i16>
1607  %1 = bitcast <2 x i64> %__A to <8 x i16>
1608  %2 = bitcast <2 x i64> %__B to <8 x i16>
1609  %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 -1)
1610  %4 = bitcast <8 x i16> %3 to <2 x i64>
1611  ret <2 x i64> %4
1612}
1613
1614define <4 x i64> @test_mm256_mask_shrdv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1615; X86-LABEL: test_mm256_mask_shrdv_epi64:
1616; X86:       # %bb.0: # %entry
1617; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1618; X86-NEXT:    kmovd %eax, %k1
1619; X86-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 {%k1}
1620; X86-NEXT:    retl
1621;
1622; X64-LABEL: test_mm256_mask_shrdv_epi64:
1623; X64:       # %bb.0: # %entry
1624; X64-NEXT:    kmovd %edi, %k1
1625; X64-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 {%k1}
1626; X64-NEXT:    retq
1627entry:
1628  %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U)
1629  ret <4 x i64> %0
1630}
1631
1632define <4 x i64> @test_mm256_maskz_shrdv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1633; X86-LABEL: test_mm256_maskz_shrdv_epi64:
1634; X86:       # %bb.0: # %entry
1635; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1636; X86-NEXT:    kmovd %eax, %k1
1637; X86-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1638; X86-NEXT:    retl
1639;
1640; X64-LABEL: test_mm256_maskz_shrdv_epi64:
1641; X64:       # %bb.0: # %entry
1642; X64-NEXT:    kmovd %edi, %k1
1643; X64-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1644; X64-NEXT:    retq
1645entry:
1646  %0 = tail call <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U)
1647  ret <4 x i64> %0
1648}
1649
1650define <4 x i64> @test_mm256_shrdv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1651; CHECK-LABEL: test_mm256_shrdv_epi64:
1652; CHECK:       # %bb.0: # %entry
1653; CHECK-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0
1654; CHECK-NEXT:    ret{{[l|q]}}
1655entry:
1656  %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 -1)
1657  ret <4 x i64> %0
1658}
1659
1660define <2 x i64> @test_mm_mask_shrdv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1661; X86-LABEL: test_mm_mask_shrdv_epi64:
1662; X86:       # %bb.0: # %entry
1663; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1664; X86-NEXT:    kmovd %eax, %k1
1665; X86-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 {%k1}
1666; X86-NEXT:    retl
1667;
1668; X64-LABEL: test_mm_mask_shrdv_epi64:
1669; X64:       # %bb.0: # %entry
1670; X64-NEXT:    kmovd %edi, %k1
1671; X64-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 {%k1}
1672; X64-NEXT:    retq
1673entry:
1674  %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U)
1675  ret <2 x i64> %0
1676}
1677
1678define <2 x i64> @test_mm_maskz_shrdv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1679; X86-LABEL: test_mm_maskz_shrdv_epi64:
1680; X86:       # %bb.0: # %entry
1681; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1682; X86-NEXT:    kmovd %eax, %k1
1683; X86-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1684; X86-NEXT:    retl
1685;
1686; X64-LABEL: test_mm_maskz_shrdv_epi64:
1687; X64:       # %bb.0: # %entry
1688; X64-NEXT:    kmovd %edi, %k1
1689; X64-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1690; X64-NEXT:    retq
1691entry:
1692  %0 = tail call <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U)
1693  ret <2 x i64> %0
1694}
1695
1696define <2 x i64> @test_mm_shrdv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1697; CHECK-LABEL: test_mm_shrdv_epi64:
1698; CHECK:       # %bb.0: # %entry
1699; CHECK-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0
1700; CHECK-NEXT:    ret{{[l|q]}}
1701entry:
1702  %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 -1)
1703  ret <2 x i64> %0
1704}
1705
1706define <4 x i64> @test_mm256_mask_shrdv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1707; X86-LABEL: test_mm256_mask_shrdv_epi32:
1708; X86:       # %bb.0: # %entry
1709; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1710; X86-NEXT:    kmovd %eax, %k1
1711; X86-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 {%k1}
1712; X86-NEXT:    retl
1713;
1714; X64-LABEL: test_mm256_mask_shrdv_epi32:
1715; X64:       # %bb.0: # %entry
1716; X64-NEXT:    kmovd %edi, %k1
1717; X64-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 {%k1}
1718; X64-NEXT:    retq
1719entry:
1720  %0 = bitcast <4 x i64> %__S to <8 x i32>
1721  %1 = bitcast <4 x i64> %__A to <8 x i32>
1722  %2 = bitcast <4 x i64> %__B to <8 x i32>
1723  %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U)
1724  %4 = bitcast <8 x i32> %3 to <4 x i64>
1725  ret <4 x i64> %4
1726}
1727
1728define <4 x i64> @test_mm256_maskz_shrdv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1729; X86-LABEL: test_mm256_maskz_shrdv_epi32:
1730; X86:       # %bb.0: # %entry
1731; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1732; X86-NEXT:    kmovd %eax, %k1
1733; X86-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1734; X86-NEXT:    retl
1735;
1736; X64-LABEL: test_mm256_maskz_shrdv_epi32:
1737; X64:       # %bb.0: # %entry
1738; X64-NEXT:    kmovd %edi, %k1
1739; X64-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1740; X64-NEXT:    retq
1741entry:
1742  %0 = bitcast <4 x i64> %__S to <8 x i32>
1743  %1 = bitcast <4 x i64> %__A to <8 x i32>
1744  %2 = bitcast <4 x i64> %__B to <8 x i32>
1745  %3 = tail call <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U)
1746  %4 = bitcast <8 x i32> %3 to <4 x i64>
1747  ret <4 x i64> %4
1748}
1749
1750define <4 x i64> @test_mm256_shrdv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1751; CHECK-LABEL: test_mm256_shrdv_epi32:
1752; CHECK:       # %bb.0: # %entry
1753; CHECK-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0
1754; CHECK-NEXT:    ret{{[l|q]}}
1755entry:
1756  %0 = bitcast <4 x i64> %__S to <8 x i32>
1757  %1 = bitcast <4 x i64> %__A to <8 x i32>
1758  %2 = bitcast <4 x i64> %__B to <8 x i32>
1759  %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 -1)
1760  %4 = bitcast <8 x i32> %3 to <4 x i64>
1761  ret <4 x i64> %4
1762}
1763
1764define <2 x i64> @test_mm_mask_shrdv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1765; X86-LABEL: test_mm_mask_shrdv_epi32:
1766; X86:       # %bb.0: # %entry
1767; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1768; X86-NEXT:    kmovd %eax, %k1
1769; X86-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 {%k1}
1770; X86-NEXT:    retl
1771;
1772; X64-LABEL: test_mm_mask_shrdv_epi32:
1773; X64:       # %bb.0: # %entry
1774; X64-NEXT:    kmovd %edi, %k1
1775; X64-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 {%k1}
1776; X64-NEXT:    retq
1777entry:
1778  %0 = bitcast <2 x i64> %__S to <4 x i32>
1779  %1 = bitcast <2 x i64> %__A to <4 x i32>
1780  %2 = bitcast <2 x i64> %__B to <4 x i32>
1781  %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U)
1782  %4 = bitcast <4 x i32> %3 to <2 x i64>
1783  ret <2 x i64> %4
1784}
1785
1786define <2 x i64> @test_mm_maskz_shrdv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1787; X86-LABEL: test_mm_maskz_shrdv_epi32:
1788; X86:       # %bb.0: # %entry
1789; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1790; X86-NEXT:    kmovd %eax, %k1
1791; X86-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1792; X86-NEXT:    retl
1793;
1794; X64-LABEL: test_mm_maskz_shrdv_epi32:
1795; X64:       # %bb.0: # %entry
1796; X64-NEXT:    kmovd %edi, %k1
1797; X64-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1798; X64-NEXT:    retq
1799entry:
1800  %0 = bitcast <2 x i64> %__S to <4 x i32>
1801  %1 = bitcast <2 x i64> %__A to <4 x i32>
1802  %2 = bitcast <2 x i64> %__B to <4 x i32>
1803  %3 = tail call <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U)
1804  %4 = bitcast <4 x i32> %3 to <2 x i64>
1805  ret <2 x i64> %4
1806}
1807
1808define <2 x i64> @test_mm_shrdv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1809; CHECK-LABEL: test_mm_shrdv_epi32:
1810; CHECK:       # %bb.0: # %entry
1811; CHECK-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0
1812; CHECK-NEXT:    ret{{[l|q]}}
1813entry:
1814  %0 = bitcast <2 x i64> %__S to <4 x i32>
1815  %1 = bitcast <2 x i64> %__A to <4 x i32>
1816  %2 = bitcast <2 x i64> %__B to <4 x i32>
1817  %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 -1)
1818  %4 = bitcast <4 x i32> %3 to <2 x i64>
1819  ret <2 x i64> %4
1820}
1821
1822define <4 x i64> @test_mm256_mask_shrdv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1823; X86-LABEL: test_mm256_mask_shrdv_epi16:
1824; X86:       # %bb.0: # %entry
1825; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
1826; X86-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm0 {%k1}
1827; X86-NEXT:    retl
1828;
1829; X64-LABEL: test_mm256_mask_shrdv_epi16:
1830; X64:       # %bb.0: # %entry
1831; X64-NEXT:    kmovd %edi, %k1
1832; X64-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm0 {%k1}
1833; X64-NEXT:    retq
1834entry:
1835  %0 = bitcast <4 x i64> %__S to <16 x i16>
1836  %1 = bitcast <4 x i64> %__A to <16 x i16>
1837  %2 = bitcast <4 x i64> %__B to <16 x i16>
1838  %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U)
1839  %4 = bitcast <16 x i16> %3 to <4 x i64>
1840  ret <4 x i64> %4
1841}
1842
1843define <4 x i64> @test_mm256_maskz_shrdv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1844; X86-LABEL: test_mm256_maskz_shrdv_epi16:
1845; X86:       # %bb.0: # %entry
1846; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
1847; X86-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1848; X86-NEXT:    retl
1849;
1850; X64-LABEL: test_mm256_maskz_shrdv_epi16:
1851; X64:       # %bb.0: # %entry
1852; X64-NEXT:    kmovd %edi, %k1
1853; X64-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1854; X64-NEXT:    retq
1855entry:
1856  %0 = bitcast <4 x i64> %__S to <16 x i16>
1857  %1 = bitcast <4 x i64> %__A to <16 x i16>
1858  %2 = bitcast <4 x i64> %__B to <16 x i16>
1859  %3 = tail call <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U)
1860  %4 = bitcast <16 x i16> %3 to <4 x i64>
1861  ret <4 x i64> %4
1862}
1863
1864define <4 x i64> @test_mm256_shrdv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1865; CHECK-LABEL: test_mm256_shrdv_epi16:
1866; CHECK:       # %bb.0: # %entry
1867; CHECK-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm0
1868; CHECK-NEXT:    ret{{[l|q]}}
1869entry:
1870  %0 = bitcast <4 x i64> %__S to <16 x i16>
1871  %1 = bitcast <4 x i64> %__A to <16 x i16>
1872  %2 = bitcast <4 x i64> %__B to <16 x i16>
1873  %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 -1)
1874  %4 = bitcast <16 x i16> %3 to <4 x i64>
1875  ret <4 x i64> %4
1876}
1877
1878define <2 x i64> @test_mm_mask_shrdv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1879; X86-LABEL: test_mm_mask_shrdv_epi16:
1880; X86:       # %bb.0: # %entry
1881; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1882; X86-NEXT:    kmovd %eax, %k1
1883; X86-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 {%k1}
1884; X86-NEXT:    retl
1885;
1886; X64-LABEL: test_mm_mask_shrdv_epi16:
1887; X64:       # %bb.0: # %entry
1888; X64-NEXT:    kmovd %edi, %k1
1889; X64-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 {%k1}
1890; X64-NEXT:    retq
1891entry:
1892  %0 = bitcast <2 x i64> %__S to <8 x i16>
1893  %1 = bitcast <2 x i64> %__A to <8 x i16>
1894  %2 = bitcast <2 x i64> %__B to <8 x i16>
1895  %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U)
1896  %4 = bitcast <8 x i16> %3 to <2 x i64>
1897  ret <2 x i64> %4
1898}
1899
1900define <2 x i64> @test_mm_maskz_shrdv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1901; X86-LABEL: test_mm_maskz_shrdv_epi16:
1902; X86:       # %bb.0: # %entry
1903; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1904; X86-NEXT:    kmovd %eax, %k1
1905; X86-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1906; X86-NEXT:    retl
1907;
1908; X64-LABEL: test_mm_maskz_shrdv_epi16:
1909; X64:       # %bb.0: # %entry
1910; X64-NEXT:    kmovd %edi, %k1
1911; X64-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1912; X64-NEXT:    retq
1913entry:
1914  %0 = bitcast <2 x i64> %__S to <8 x i16>
1915  %1 = bitcast <2 x i64> %__A to <8 x i16>
1916  %2 = bitcast <2 x i64> %__B to <8 x i16>
1917  %3 = tail call <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U)
1918  %4 = bitcast <8 x i16> %3 to <2 x i64>
1919  ret <2 x i64> %4
1920}
1921
1922define <2 x i64> @test_mm_shrdv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1923; CHECK-LABEL: test_mm_shrdv_epi16:
1924; CHECK:       # %bb.0: # %entry
1925; CHECK-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0
1926; CHECK-NEXT:    ret{{[l|q]}}
1927entry:
1928  %0 = bitcast <2 x i64> %__S to <8 x i16>
1929  %1 = bitcast <2 x i64> %__A to <8 x i16>
1930  %2 = bitcast <2 x i64> %__B to <8 x i16>
1931  %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 -1)
1932  %4 = bitcast <8 x i16> %3 to <2 x i64>
1933  ret <2 x i64> %4
1934}
1935
1936declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16>, <8 x i16>, i8)
1937declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8>, <16 x i8>, i16)
1938declare void @llvm.masked.compressstore.v8i16(<8 x i16>, i16*, <8 x i1>)
1939declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>)
1940declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16>, <8 x i16>, i8)
1941declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8>, <16 x i8>, i16)
1942declare <8 x i16> @llvm.masked.expandload.v8i16(i16*, <8 x i1>, <8 x i16>)
1943declare <16 x i8> @llvm.masked.expandload.v16i8(i8*, <16 x i1>, <16 x i8>)
1944declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16>, <16 x i16>, i16)
1945declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8>, <32 x i8>, i32)
1946declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>)
1947declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>)
1948declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16>, <16 x i16>, i16)
1949declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8>, <32 x i8>, i32)
1950declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>)
1951declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>)
1952declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1953declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1954declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1955declare <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1956declare <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1957declare <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1958declare <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1959declare <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1960declare <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1961declare <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1962declare <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1963declare <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1964declare <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1965declare <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1966declare <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1967declare <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1968declare <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1969declare <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1970declare <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1971declare <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1972declare <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1973declare <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1974declare <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1975declare <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1976