• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define <32 x i16> @test_mask_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
6; X86-LABEL: test_mask_expand_load_w_512:
7; X86:       # %bb.0:
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
10; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
11; X86-NEXT:    retl # encoding: [0xc3]
12;
13; X64-LABEL: test_mask_expand_load_w_512:
14; X64:       # %bb.0:
15; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
16; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
17; X64-NEXT:    retq # encoding: [0xc3]
18  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
19  ret <32 x i16> %res
20}
21
22define <32 x i16> @test_maskz_expand_load_w_512(i8* %addr, i32 %mask) {
23; X86-LABEL: test_maskz_expand_load_w_512:
24; X86:       # %bb.0:
25; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
26; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
27; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00]
28; X86-NEXT:    retl # encoding: [0xc3]
29;
30; X64-LABEL: test_maskz_expand_load_w_512:
31; X64:       # %bb.0:
32; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
33; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07]
34; X64-NEXT:    retq # encoding: [0xc3]
35  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> zeroinitializer, i32 %mask)
36  ret <32 x i16> %res
37}
38
39declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
40
41define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) {
42; X86-LABEL: test_expand_load_w_512:
43; X86:       # %bb.0:
44; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
45; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
46; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
47; X86-NEXT:    retl # encoding: [0xc3]
48;
49; X64-LABEL: test_expand_load_w_512:
50; X64:       # %bb.0:
51; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
52; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
53; X64-NEXT:    retq # encoding: [0xc3]
54  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 -1)
55  ret <32 x i16> %res
56}
57
58define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
59; X86-LABEL: test_mask_expand_load_b_512:
60; X86:       # %bb.0:
61; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
62; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
63; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
64; X86-NEXT:    retl # encoding: [0xc3]
65;
66; X64-LABEL: test_mask_expand_load_b_512:
67; X64:       # %bb.0:
68; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
69; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
70; X64-NEXT:    retq # encoding: [0xc3]
71  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
72  ret <64 x i8> %res
73}
74
75define <64 x i8> @test_maskz_expand_load_b_512(i8* %addr, i64 %mask) {
76; X86-LABEL: test_maskz_expand_load_b_512:
77; X86:       # %bb.0:
78; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
79; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
80; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00]
81; X86-NEXT:    retl # encoding: [0xc3]
82;
83; X64-LABEL: test_maskz_expand_load_b_512:
84; X64:       # %bb.0:
85; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
86; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07]
87; X64-NEXT:    retq # encoding: [0xc3]
88  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> zeroinitializer, i64 %mask)
89  ret <64 x i8> %res
90}
91
92declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
93
94define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) {
95; X86-LABEL: test_expand_load_b_512:
96; X86:       # %bb.0:
97; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
98; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
99; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
100; X86-NEXT:    retl # encoding: [0xc3]
101;
102; X64-LABEL: test_expand_load_b_512:
103; X64:       # %bb.0:
104; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
105; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
106; X64-NEXT:    retq # encoding: [0xc3]
107  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 -1)
108  ret <64 x i8> %res
109}
110
111define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
112; X86-LABEL: test_mask_compress_store_w_512:
113; X86:       # %bb.0:
114; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
115; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
116; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
117; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
118; X86-NEXT:    retl # encoding: [0xc3]
119;
120; X64-LABEL: test_mask_compress_store_w_512:
121; X64:       # %bb.0:
122; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
123; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
124; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
125; X64-NEXT:    retq # encoding: [0xc3]
126  call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
127  ret void
128}
129
130declare void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
131
132define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
133; X86-LABEL: test_mask_compress_w_512:
134; X86:       # %bb.0:
135; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
136; X86-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
137; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
138; X86-NEXT:    retl # encoding: [0xc3]
139;
140; X64-LABEL: test_mask_compress_w_512:
141; X64:       # %bb.0:
142; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
143; X64-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
144; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
145; X64-NEXT:    retq # encoding: [0xc3]
146  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
147  ret <32 x i16> %res
148}
149
150define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
151; X86-LABEL: test_maskz_compress_w_512:
152; X86:       # %bb.0:
153; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
154; X86-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
155; X86-NEXT:    retl # encoding: [0xc3]
156;
157; X64-LABEL: test_maskz_compress_w_512:
158; X64:       # %bb.0:
159; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
160; X64-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
161; X64-NEXT:    retq # encoding: [0xc3]
162  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
163  ret <32 x i16> %res
164}
165
166define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
167; CHECK-LABEL: test_compress_w_512:
168; CHECK:       # %bb.0:
169; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
170  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
171  ret <32 x i16> %res
172}
173
174declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
175
176define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
177; X86-LABEL: test_compress_store_w_512:
178; X86:       # %bb.0:
179; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
180; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
181; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
182; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
183; X86-NEXT:    retl # encoding: [0xc3]
184;
185; X64-LABEL: test_compress_store_w_512:
186; X64:       # %bb.0:
187; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
188; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
189; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
190; X64-NEXT:    retq # encoding: [0xc3]
191  call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 -1)
192  ret void
193}
194
195define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
196; X86-LABEL: test_mask_compress_store_b_512:
197; X86:       # %bb.0:
198; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
199; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
200; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
201; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
202; X86-NEXT:    retl # encoding: [0xc3]
203;
204; X64-LABEL: test_mask_compress_store_b_512:
205; X64:       # %bb.0:
206; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
207; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
208; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
209; X64-NEXT:    retq # encoding: [0xc3]
210  call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
211  ret void
212}
213
214declare void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
215
216define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
217; X86-LABEL: test_mask_compress_b_512:
218; X86:       # %bb.0:
219; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
220; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
221; X86-NEXT:    kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
222; X86-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
223; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
224; X86-NEXT:    retl # encoding: [0xc3]
225;
226; X64-LABEL: test_mask_compress_b_512:
227; X64:       # %bb.0:
228; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
229; X64-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
230; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
231; X64-NEXT:    retq # encoding: [0xc3]
232  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
233  ret <64 x i8> %res
234}
235
236define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
237; X86-LABEL: test_maskz_compress_b_512:
238; X86:       # %bb.0:
239; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
240; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
241; X86-NEXT:    kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
242; X86-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
243; X86-NEXT:    retl # encoding: [0xc3]
244;
245; X64-LABEL: test_maskz_compress_b_512:
246; X64:       # %bb.0:
247; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
248; X64-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
249; X64-NEXT:    retq # encoding: [0xc3]
250  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
251  ret <64 x i8> %res
252}
253
254define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
255; CHECK-LABEL: test_compress_b_512:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
258  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
259  ret <64 x i8> %res
260}
261
262declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
263
264define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
265; X86-LABEL: test_compress_store_b_512:
266; X86:       # %bb.0:
267; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
268; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
269; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
270; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
271; X86-NEXT:    retl # encoding: [0xc3]
272;
273; X64-LABEL: test_compress_store_b_512:
274; X64:       # %bb.0:
275; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
276; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
277; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
278; X64-NEXT:    retq # encoding: [0xc3]
279  call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 -1)
280  ret void
281}
282
283define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
284; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
285; X86:       # %bb.0:
286; X86-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16]
287; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
288; X86-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
289; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
290; X86-NEXT:    retl # encoding: [0xc3]
291;
292; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
293; X64:       # %bb.0:
294; X64-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16]
295; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
296; X64-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
297; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
298; X64-NEXT:    retq # encoding: [0xc3]
299  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
300  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
301  %res2 = add <16 x i32> %res, %res1
302  ret <16 x i32> %res2
303}
304declare <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
305
306define <8 x i64>@test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
307; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
308; X86:       # %bb.0:
309; X86-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16]
310; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
311; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
312; X86-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
313; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
314; X86-NEXT:    retl # encoding: [0xc3]
315;
316; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
317; X64:       # %bb.0:
318; X64-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16]
319; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
320; X64-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
321; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
322; X64-NEXT:    retq # encoding: [0xc3]
323  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
324  %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
325  %res2 = add <8 x i64> %res, %res1
326  ret <8 x i64> %res2
327}
328declare <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
329
330define <32 x i16>@test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
331; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
332; X86:       # %bb.0:
333; X86-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16]
334; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
335; X86-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16]
336; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
337; X86-NEXT:    retl # encoding: [0xc3]
338;
339; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
340; X64:       # %bb.0:
341; X64-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16]
342; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
343; X64-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16]
344; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
345; X64-NEXT:    retq # encoding: [0xc3]
346  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 %x4)
347  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 -1)
348  %res2 = add <32 x i16> %res, %res1
349  ret <32 x i16> %res2
350}
351declare <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32)
352
353define <16 x i32>@test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
354; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
355; X86:       # %bb.0:
356; X86-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16]
357; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
358; X86-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
359; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
360; X86-NEXT:    retl # encoding: [0xc3]
361;
362; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
363; X64:       # %bb.0:
364; X64-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16]
365; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
366; X64-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
367; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
368; X64-NEXT:    retq # encoding: [0xc3]
369  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
370  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
371  %res2 = add <16 x i32> %res, %res1
372  ret <16 x i32> %res2
373}
374declare <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
375
376define <8 x i64>@test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
377; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
378; X86:       # %bb.0:
379; X86-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16]
380; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
381; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
382; X86-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
383; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
384; X86-NEXT:    retl # encoding: [0xc3]
385;
386; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
387; X64:       # %bb.0:
388; X64-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16]
389; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
390; X64-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
391; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
392; X64-NEXT:    retq # encoding: [0xc3]
393  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
394  %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
395  %res2 = add <8 x i64> %res, %res1
396  ret <8 x i64> %res2
397}
398declare <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
399
400define <32 x i16>@test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
401; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
402; X86:       # %bb.0:
403; X86-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16]
404; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
405; X86-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16]
406; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
407; X86-NEXT:    retl # encoding: [0xc3]
408;
409; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
410; X64:       # %bb.0:
411; X64-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16]
412; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
413; X64-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16]
414; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
415; X64-NEXT:    retq # encoding: [0xc3]
416  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 %x4)
417  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 -1)
418  %res2 = add <32 x i16> %res, %res1
419  ret <32 x i16> %res2
420}
421declare <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32)
422