• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define <8 x i16> @test_mask_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
6; X86-LABEL: test_mask_expand_load_w_128:
7; X86:       # %bb.0:
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
10; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
11; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
12; X86-NEXT:    retl # encoding: [0xc3]
13;
14; X64-LABEL: test_mask_expand_load_w_128:
15; X64:       # %bb.0:
16; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
17; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
18; X64-NEXT:    retq # encoding: [0xc3]
19  %1 = bitcast i8* %addr to i16*
20  %2 = bitcast i8 %mask to <8 x i1>
21  %3 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> %2, <8 x i16> %data)
22  ret <8 x i16> %3
23}
24
25define <8 x i16> @test_maskz_expand_load_w_128(i8* %addr, i8 %mask) {
26; X86-LABEL: test_maskz_expand_load_w_128:
27; X86:       # %bb.0:
28; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
29; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
30; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
31; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00]
32; X86-NEXT:    retl # encoding: [0xc3]
33;
34; X64-LABEL: test_maskz_expand_load_w_128:
35; X64:       # %bb.0:
36; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
37; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07]
38; X64-NEXT:    retq # encoding: [0xc3]
39  %1 = bitcast i8* %addr to i16*
40  %2 = bitcast i8 %mask to <8 x i1>
41  %3 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> %2, <8 x i16> zeroinitializer)
42  ret <8 x i16> %3
43}
44
45define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data) {
46; X86-LABEL: test_expand_load_w_128:
47; X86:       # %bb.0:
48; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
49; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
50; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
51; X86-NEXT:    retl # encoding: [0xc3]
52;
53; X64-LABEL: test_expand_load_w_128:
54; X64:       # %bb.0:
55; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
56; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
57; X64-NEXT:    retq # encoding: [0xc3]
58  %1 = bitcast i8* %addr to i16*
59  %2 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %data)
60  ret <8 x i16> %2
61}
62
63define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
64; CHECK-LABEL: test_expand_w_128:
65; CHECK:       # %bb.0:
66; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
67  %1 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
68  ret <8 x i16> %1
69}
70
71define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
72; X86-LABEL: test_mask_expand_w_128:
73; X86:       # %bb.0:
74; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
75; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
76; X86-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
77; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
78; X86-NEXT:    retl # encoding: [0xc3]
79;
80; X64-LABEL: test_mask_expand_w_128:
81; X64:       # %bb.0:
82; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
83; X64-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
84; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
85; X64-NEXT:    retq # encoding: [0xc3]
86  %1 = bitcast i8 %mask to <8 x i1>
87  %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
88  ret <8 x i16> %2
89}
90
91define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
92; X86-LABEL: test_maskz_expand_w_128:
93; X86:       # %bb.0:
94; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
95; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
96; X86-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
97; X86-NEXT:    retl # encoding: [0xc3]
98;
99; X64-LABEL: test_maskz_expand_w_128:
100; X64:       # %bb.0:
101; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
102; X64-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
103; X64-NEXT:    retq # encoding: [0xc3]
104  %1 = bitcast i8 %mask to <8 x i1>
105  %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
106  ret <8 x i16> %2
107}
108
109define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
110; X86-LABEL: test_mask_expand_load_b_128:
111; X86:       # %bb.0:
112; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
113; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
114; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
115; X86-NEXT:    retl # encoding: [0xc3]
116;
117; X64-LABEL: test_mask_expand_load_b_128:
118; X64:       # %bb.0:
119; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
120; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
121; X64-NEXT:    retq # encoding: [0xc3]
122  %1 = bitcast i16 %mask to <16 x i1>
123  %2 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* %addr, <16 x i1> %1, <16 x i8> %data)
124  ret <16 x i8> %2
125}
126
127define <16 x i8> @test_maskz_expand_load_b_128(i8* %addr, i16 %mask) {
128; X86-LABEL: test_maskz_expand_load_b_128:
129; X86:       # %bb.0:
130; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
131; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
132; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00]
133; X86-NEXT:    retl # encoding: [0xc3]
134;
135; X64-LABEL: test_maskz_expand_load_b_128:
136; X64:       # %bb.0:
137; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
138; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07]
139; X64-NEXT:    retq # encoding: [0xc3]
140  %1 = bitcast i16 %mask to <16 x i1>
141  %2 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* %addr, <16 x i1> %1, <16 x i8> zeroinitializer)
142  ret <16 x i8> %2
143}
144
145define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data) {
146; X86-LABEL: test_expand_load_b_128:
147; X86:       # %bb.0:
148; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
149; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
150; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
151; X86-NEXT:    retl # encoding: [0xc3]
152;
153; X64-LABEL: test_expand_load_b_128:
154; X64:       # %bb.0:
155; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
156; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
157; X64-NEXT:    retq # encoding: [0xc3]
158  %1 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %data)
159  ret <16 x i8> %1
160}
161
162define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
163; CHECK-LABEL: test_expand_b_128:
164; CHECK:       # %bb.0:
165; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
166  %1 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
167  ret <16 x i8> %1
168}
169
170define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
171; X86-LABEL: test_mask_expand_b_128:
172; X86:       # %bb.0:
173; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
174; X86-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
175; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
176; X86-NEXT:    retl # encoding: [0xc3]
177;
178; X64-LABEL: test_mask_expand_b_128:
179; X64:       # %bb.0:
180; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
181; X64-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
182; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
183; X64-NEXT:    retq # encoding: [0xc3]
184  %1 = bitcast i16 %mask to <16 x i1>
185  %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
186  ret <16 x i8> %2
187}
188
189define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
190; X86-LABEL: test_maskz_expand_b_128:
191; X86:       # %bb.0:
192; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
193; X86-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
194; X86-NEXT:    retl # encoding: [0xc3]
195;
196; X64-LABEL: test_maskz_expand_b_128:
197; X64:       # %bb.0:
198; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
199; X64-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
200; X64-NEXT:    retq # encoding: [0xc3]
201  %1 = bitcast i16 %mask to <16 x i1>
202  %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
203  ret <16 x i8> %2
204}
205
206define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
207; X86-LABEL: test_mask_compress_store_w_128:
208; X86:       # %bb.0:
209; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
210; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
211; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
212; X86-NEXT:    vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
213; X86-NEXT:    retl # encoding: [0xc3]
214;
215; X64-LABEL: test_mask_compress_store_w_128:
216; X64:       # %bb.0:
217; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
218; X64-NEXT:    vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
219; X64-NEXT:    retq # encoding: [0xc3]
220  %1 = bitcast i8* %addr to i16*
221  %2 = bitcast i8 %mask to <8 x i1>
222  call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, i16* %1, <8 x i1> %2)
223  ret void
224}
225
226define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data) {
227; X86-LABEL: test_compress_store_w_128:
228; X86:       # %bb.0:
229; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
230; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
231; X86-NEXT:    vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
232; X86-NEXT:    retl # encoding: [0xc3]
233;
234; X64-LABEL: test_compress_store_w_128:
235; X64:       # %bb.0:
236; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
237; X64-NEXT:    vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
238; X64-NEXT:    retq # encoding: [0xc3]
239  %1 = bitcast i8* %addr to i16*
240  call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, i16* %1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
241  ret void
242}
243
244define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
245; X86-LABEL: test_mask_compress_w_128:
246; X86:       # %bb.0:
247; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
248; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
249; X86-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
250; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
251; X86-NEXT:    retl # encoding: [0xc3]
252;
253; X64-LABEL: test_mask_compress_w_128:
254; X64:       # %bb.0:
255; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
256; X64-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
257; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
258; X64-NEXT:    retq # encoding: [0xc3]
259  %1 = bitcast i8 %mask to <8 x i1>
260  %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
261  ret <8 x i16> %2
262}
263
264define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
265; X86-LABEL: test_maskz_compress_w_128:
266; X86:       # %bb.0:
267; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
268; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
269; X86-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
270; X86-NEXT:    retl # encoding: [0xc3]
271;
272; X64-LABEL: test_maskz_compress_w_128:
273; X64:       # %bb.0:
274; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
275; X64-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
276; X64-NEXT:    retq # encoding: [0xc3]
277  %1 = bitcast i8 %mask to <8 x i1>
278  %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
279  ret <8 x i16> %2
280}
281
282define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
283; CHECK-LABEL: test_compress_w_128:
284; CHECK:       # %bb.0:
285; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
286  %1 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
287  ret <8 x i16> %1
288}
289
290define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
291; X86-LABEL: test_mask_compress_store_b_128:
292; X86:       # %bb.0:
293; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
294; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
295; X86-NEXT:    vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
296; X86-NEXT:    retl # encoding: [0xc3]
297;
298; X64-LABEL: test_mask_compress_store_b_128:
299; X64:       # %bb.0:
300; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
301; X64-NEXT:    vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
302; X64-NEXT:    retq # encoding: [0xc3]
303  %1 = bitcast i16 %mask to <16 x i1>
304  call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, i8* %addr, <16 x i1> %1)
305  ret void
306}
307
308define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data) {
309; X86-LABEL: test_compress_store_b_128:
310; X86:       # %bb.0:
311; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
312; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
313; X86-NEXT:    vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
314; X86-NEXT:    retl # encoding: [0xc3]
315;
316; X64-LABEL: test_compress_store_b_128:
317; X64:       # %bb.0:
318; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
319; X64-NEXT:    vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
320; X64-NEXT:    retq # encoding: [0xc3]
321  call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, i8* %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
322  ret void
323}
324
325define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
326; X86-LABEL: test_mask_compress_b_128:
327; X86:       # %bb.0:
328; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
329; X86-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
330; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
331; X86-NEXT:    retl # encoding: [0xc3]
332;
333; X64-LABEL: test_mask_compress_b_128:
334; X64:       # %bb.0:
335; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
336; X64-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
337; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
338; X64-NEXT:    retq # encoding: [0xc3]
339  %1 = bitcast i16 %mask to <16 x i1>
340  %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
341  ret <16 x i8> %2
342}
343
344define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
345; X86-LABEL: test_maskz_compress_b_128:
346; X86:       # %bb.0:
347; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
348; X86-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
349; X86-NEXT:    retl # encoding: [0xc3]
350;
351; X64-LABEL: test_maskz_compress_b_128:
352; X64:       # %bb.0:
353; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
354; X64-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
355; X64-NEXT:    retq # encoding: [0xc3]
356  %1 = bitcast i16 %mask to <16 x i1>
357  %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
358  ret <16 x i8> %2
359}
360
361define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
362; CHECK-LABEL: test_compress_b_128:
363; CHECK:       # %bb.0:
364; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
365  %1 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
366  ret <16 x i8> %1
367}
368
369define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
370; X86-LABEL: test_mask_expand_load_w_256:
371; X86:       # %bb.0:
372; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
373; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
374; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
375; X86-NEXT:    retl # encoding: [0xc3]
376;
377; X64-LABEL: test_mask_expand_load_w_256:
378; X64:       # %bb.0:
379; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
380; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
381; X64-NEXT:    retq # encoding: [0xc3]
382  %1 = bitcast i8* %addr to i16*
383  %2 = bitcast i16 %mask to <16 x i1>
384  %3 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> %2, <16 x i16> %data)
385  ret <16 x i16> %3
386}
387
388define <16 x i16> @test_maskz_expand_load_w_256(i8* %addr, i16 %mask) {
389; X86-LABEL: test_maskz_expand_load_w_256:
390; X86:       # %bb.0:
391; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
392; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
393; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00]
394; X86-NEXT:    retl # encoding: [0xc3]
395;
396; X64-LABEL: test_maskz_expand_load_w_256:
397; X64:       # %bb.0:
398; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
399; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07]
400; X64-NEXT:    retq # encoding: [0xc3]
401  %1 = bitcast i8* %addr to i16*
402  %2 = bitcast i16 %mask to <16 x i1>
403  %3 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> %2, <16 x i16> zeroinitializer)
404  ret <16 x i16> %3
405}
406
407define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data) {
408; X86-LABEL: test_expand_load_w_256:
409; X86:       # %bb.0:
410; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
411; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
412; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
413; X86-NEXT:    retl # encoding: [0xc3]
414;
415; X64-LABEL: test_expand_load_w_256:
416; X64:       # %bb.0:
417; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
418; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
419; X64-NEXT:    retq # encoding: [0xc3]
420  %1 = bitcast i8* %addr to i16*
421  %2 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %data)
422  ret <16 x i16> %2
423}
424
425define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
426; CHECK-LABEL: test_expand_w_256:
427; CHECK:       # %bb.0:
428; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
429  %1 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
430  ret <16 x i16> %1
431}
432
433define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
434; X86-LABEL: test_mask_expand_w_256:
435; X86:       # %bb.0:
436; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
437; X86-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
438; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
439; X86-NEXT:    retl # encoding: [0xc3]
440;
441; X64-LABEL: test_mask_expand_w_256:
442; X64:       # %bb.0:
443; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
444; X64-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
445; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
446; X64-NEXT:    retq # encoding: [0xc3]
447  %1 = bitcast i16 %mask to <16 x i1>
448  %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
449  ret <16 x i16> %2
450}
451
452define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
453; X86-LABEL: test_maskz_expand_w_256:
454; X86:       # %bb.0:
455; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
456; X86-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
457; X86-NEXT:    retl # encoding: [0xc3]
458;
459; X64-LABEL: test_maskz_expand_w_256:
460; X64:       # %bb.0:
461; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
462; X64-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
463; X64-NEXT:    retq # encoding: [0xc3]
464  %1 = bitcast i16 %mask to <16 x i1>
465  %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
466  ret <16 x i16> %2
467}
468
469define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
470; X86-LABEL: test_mask_expand_load_b_256:
471; X86:       # %bb.0:
472; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
473; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
474; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
475; X86-NEXT:    retl # encoding: [0xc3]
476;
477; X64-LABEL: test_mask_expand_load_b_256:
478; X64:       # %bb.0:
479; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
480; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
481; X64-NEXT:    retq # encoding: [0xc3]
482  %1 = bitcast i32 %mask to <32 x i1>
483  %2 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* %addr, <32 x i1> %1, <32 x i8> %data)
484  ret <32 x i8> %2
485}
486
487define <32 x i8> @test_maskz_expand_load_b_256(i8* %addr, i32 %mask) {
488; X86-LABEL: test_maskz_expand_load_b_256:
489; X86:       # %bb.0:
490; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
491; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
492; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00]
493; X86-NEXT:    retl # encoding: [0xc3]
494;
495; X64-LABEL: test_maskz_expand_load_b_256:
496; X64:       # %bb.0:
497; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
498; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07]
499; X64-NEXT:    retq # encoding: [0xc3]
500  %1 = bitcast i32 %mask to <32 x i1>
501  %2 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* %addr, <32 x i1> %1, <32 x i8> zeroinitializer)
502  ret <32 x i8> %2
503}
504
505define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data) {
506; X86-LABEL: test_expand_load_b_256:
507; X86:       # %bb.0:
508; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
509; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
510; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
511; X86-NEXT:    retl # encoding: [0xc3]
512;
513; X64-LABEL: test_expand_load_b_256:
514; X64:       # %bb.0:
515; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
516; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
517; X64-NEXT:    retq # encoding: [0xc3]
518  %1 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> %data)
519  ret <32 x i8> %1
520}
521
522define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
523; CHECK-LABEL: test_expand_b_256:
524; CHECK:       # %bb.0:
525; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
526  %1 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
527  ret <32 x i8> %1
528}
529
530define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
531; X86-LABEL: test_mask_expand_b_256:
532; X86:       # %bb.0:
533; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
534; X86-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
535; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
536; X86-NEXT:    retl # encoding: [0xc3]
537;
538; X64-LABEL: test_mask_expand_b_256:
539; X64:       # %bb.0:
540; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
541; X64-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
542; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
543; X64-NEXT:    retq # encoding: [0xc3]
544  %1 = bitcast i32 %mask to <32 x i1>
545  %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
546  ret <32 x i8> %2
547}
548
549define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
550; X86-LABEL: test_maskz_expand_b_256:
551; X86:       # %bb.0:
552; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
553; X86-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
554; X86-NEXT:    retl # encoding: [0xc3]
555;
556; X64-LABEL: test_maskz_expand_b_256:
557; X64:       # %bb.0:
558; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
559; X64-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
560; X64-NEXT:    retq # encoding: [0xc3]
561  %1 = bitcast i32 %mask to <32 x i1>
562  %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
563  ret <32 x i8> %2
564}
565
566define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
567; X86-LABEL: test_mask_compress_store_w_256:
568; X86:       # %bb.0:
569; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
570; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
571; X86-NEXT:    vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
572; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
573; X86-NEXT:    retl # encoding: [0xc3]
574;
575; X64-LABEL: test_mask_compress_store_w_256:
576; X64:       # %bb.0:
577; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
578; X64-NEXT:    vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
579; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
580; X64-NEXT:    retq # encoding: [0xc3]
581  %1 = bitcast i8* %addr to i16*
582  %2 = bitcast i16 %mask to <16 x i1>
583  call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, i16* %1, <16 x i1> %2)
584  ret void
585}
586
587define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data) {
588; X86-LABEL: test_compress_store_w_256:
589; X86:       # %bb.0:
590; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
591; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
592; X86-NEXT:    vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
593; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
594; X86-NEXT:    retl # encoding: [0xc3]
595;
596; X64-LABEL: test_compress_store_w_256:
597; X64:       # %bb.0:
598; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
599; X64-NEXT:    vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
600; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
601; X64-NEXT:    retq # encoding: [0xc3]
602  %1 = bitcast i8* %addr to i16*
603  call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, i16* %1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
604  ret void
605}
606
607define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
608; X86-LABEL: test_mask_compress_w_256:
609; X86:       # %bb.0:
610; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
611; X86-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
612; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
613; X86-NEXT:    retl # encoding: [0xc3]
614;
615; X64-LABEL: test_mask_compress_w_256:
616; X64:       # %bb.0:
617; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
618; X64-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
619; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
620; X64-NEXT:    retq # encoding: [0xc3]
621  %1 = bitcast i16 %mask to <16 x i1>
622  %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
623  ret <16 x i16> %2
624}
625
626define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
627; X86-LABEL: test_maskz_compress_w_256:
628; X86:       # %bb.0:
629; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
630; X86-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
631; X86-NEXT:    retl # encoding: [0xc3]
632;
633; X64-LABEL: test_maskz_compress_w_256:
634; X64:       # %bb.0:
635; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
636; X64-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
637; X64-NEXT:    retq # encoding: [0xc3]
638  %1 = bitcast i16 %mask to <16 x i1>
639  %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
640  ret <16 x i16> %2
641}
642
643define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
644; CHECK-LABEL: test_compress_w_256:
645; CHECK:       # %bb.0:
646; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
647  %1 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
648  ret <16 x i16> %1
649}
650
651define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
652; X86-LABEL: test_mask_compress_store_b_256:
653; X86:       # %bb.0:
654; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
655; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
656; X86-NEXT:    vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
657; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
658; X86-NEXT:    retl # encoding: [0xc3]
659;
660; X64-LABEL: test_mask_compress_store_b_256:
661; X64:       # %bb.0:
662; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
663; X64-NEXT:    vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
664; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
665; X64-NEXT:    retq # encoding: [0xc3]
666  %1 = bitcast i32 %mask to <32 x i1>
667  call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, i8* %addr, <32 x i1> %1)
668  ret void
669}
670
671define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data) {
672; X86-LABEL: test_compress_store_b_256:
673; X86:       # %bb.0:
674; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
675; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
676; X86-NEXT:    vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
677; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
678; X86-NEXT:    retl # encoding: [0xc3]
679;
680; X64-LABEL: test_compress_store_b_256:
681; X64:       # %bb.0:
682; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
683; X64-NEXT:    vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
684; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
685; X64-NEXT:    retq # encoding: [0xc3]
686  call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, i8* %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
687  ret void
688}
689
690define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
691; X86-LABEL: test_mask_compress_b_256:
692; X86:       # %bb.0:
693; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
694; X86-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
695; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
696; X86-NEXT:    retl # encoding: [0xc3]
697;
698; X64-LABEL: test_mask_compress_b_256:
699; X64:       # %bb.0:
700; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
701; X64-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
702; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
703; X64-NEXT:    retq # encoding: [0xc3]
704  %1 = bitcast i32 %mask to <32 x i1>
705  %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
706  ret <32 x i8> %2
707}
708
709define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
710; X86-LABEL: test_maskz_compress_b_256:
711; X86:       # %bb.0:
712; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
713; X86-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
714; X86-NEXT:    retl # encoding: [0xc3]
715;
716; X64-LABEL: test_maskz_compress_b_256:
717; X64:       # %bb.0:
718; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
719; X64-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
720; X64-NEXT:    retq # encoding: [0xc3]
721  %1 = bitcast i32 %mask to <32 x i1>
722  %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
723  ret <32 x i8> %2
724}
725
726define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
727; CHECK-LABEL: test_compress_b_256:
728; CHECK:       # %bb.0:
729; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
730  %1 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
731  ret <32 x i8> %1
732}
733
734define <4 x i32> @test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
735; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
736; X86:       # %bb.0:
737; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
738; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
739; X86-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16]
740; X86-NEXT:    vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
741; X86-NEXT:    vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18]
742; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
743; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
744; X86-NEXT:    retl # encoding: [0xc3]
745;
746; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
747; X64:       # %bb.0:
748; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
749; X64-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16]
750; X64-NEXT:    vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
751; X64-NEXT:    vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18]
752; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
753; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
754; X64-NEXT:    retq # encoding: [0xc3]
755  %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 22, i32 22, i32 22, i32 22>)
756  %2 = bitcast i8 %x4 to <8 x i1>
757  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
758  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
759  %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 23, i32 23, i32 23, i32 23>)
760  %5 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 24, i32 24, i32 24, i32 24>)
761  %6 = bitcast i8 %x4 to <8 x i1>
762  %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
763  %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
764  %res3 = add <4 x i32> %3, %4
765  %res4 = add <4 x i32> %res3, %7
766  ret <4 x i32> %res4
767}
768
769define <8 x i32> @test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
770; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
771; X86:       # %bb.0:
772; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
773; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
774; X86-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
775; X86-NEXT:    vpshldd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc1,0x17]
776; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
777; X86-NEXT:    retl # encoding: [0xc3]
778;
779; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
780; X64:       # %bb.0:
781; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
782; X64-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
783; X64-NEXT:    vpshldd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc1,0x17]
784; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
785; X64-NEXT:    retq # encoding: [0xc3]
786  %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
787  %2 = bitcast i8 %x4 to <8 x i1>
788  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
789  %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
790  %res2 = add <8 x i32> %3, %4
791  ret <8 x i32> %res2
792}
793
794define <2 x i64> @test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
795; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
796; X86:       # %bb.0:
797; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
798; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
799; X86-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
800; X86-NEXT:    vpshldq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc1,0x17]
801; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
802; X86-NEXT:    retl # encoding: [0xc3]
803;
804; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
805; X64:       # %bb.0:
806; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
807; X64-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
808; X64-NEXT:    vpshldq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc1,0x17]
809; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
810; X64-NEXT:    retq # encoding: [0xc3]
811  %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 22, i64 22>)
812  %2 = bitcast i8 %x4 to <8 x i1>
813  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
814  %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
815  %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 23, i64 23>)
816  %res2 = add <2 x i64> %3, %4
817  ret <2 x i64> %res2
818}
819
820define <4 x i64> @test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
821; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
822; X86:       # %bb.0:
823; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
824; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
825; X86-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
826; X86-NEXT:    vpshldq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc1,0x17]
827; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
828; X86-NEXT:    retl # encoding: [0xc3]
829;
830; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
831; X64:       # %bb.0:
832; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
833; X64-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
834; X64-NEXT:    vpshldq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc1,0x17]
835; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
836; X64-NEXT:    retq # encoding: [0xc3]
837  %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 22, i64 22, i64 22, i64 22>)
838  %2 = bitcast i8 %x4 to <8 x i1>
839  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
840  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
841  %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 23, i64 23, i64 23, i64 23>)
842  %res2 = add <4 x i64> %3, %4
843  ret <4 x i64> %res2
844}
845
846define <8 x i16> @test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
847; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
848; X86:       # %bb.0:
849; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
850; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
851; X86-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
852; X86-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc1,0x07]
853; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
854; X86-NEXT:    retl # encoding: [0xc3]
855;
856; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
857; X64:       # %bb.0:
858; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
859; X64-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
860; X64-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc1,0x07]
861; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
862; X64-NEXT:    retq # encoding: [0xc3]
863  %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
864  %2 = bitcast i8 %x4 to <8 x i1>
865  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
866  %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
867  %res2 = add <8 x i16> %3, %4
868  ret <8 x i16> %res2
869}
870
871define <16 x i16> @test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
872; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
873; X86:       # %bb.0:
874; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
875; X86-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
876; X86-NEXT:    vpshldw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc1,0x07]
877; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
878; X86-NEXT:    retl # encoding: [0xc3]
879;
880; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
881; X64:       # %bb.0:
882; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
883; X64-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
884; X64-NEXT:    vpshldw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc1,0x07]
885; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
886; X64-NEXT:    retq # encoding: [0xc3]
887  %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
888  %2 = bitcast i16 %x4 to <16 x i1>
889  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
890  %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
891  %res2 = add <16 x i16> %3, %4
892  ret <16 x i16> %res2
893}
894
895define <4 x i32> @test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
896; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
897; X86:       # %bb.0:
898; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
899; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
900; X86-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16]
901; X86-NEXT:    vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
902; X86-NEXT:    vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18]
903; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
904; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
905; X86-NEXT:    retl # encoding: [0xc3]
906;
907; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
908; X64:       # %bb.0:
909; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
910; X64-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16]
911; X64-NEXT:    vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
912; X64-NEXT:    vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18]
913; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
914; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
915; X64-NEXT:    retq # encoding: [0xc3]
916  %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 22, i32 22, i32 22, i32 22>)
917  %2 = bitcast i8 %x4 to <8 x i1>
918  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
919  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
920  %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 23, i32 23, i32 23, i32 23>)
921  %5 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 24, i32 24, i32 24, i32 24>)
922  %6 = bitcast i8 %x4 to <8 x i1>
923  %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
924  %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
925  %res3 = add <4 x i32> %3, %4
926  %res4 = add <4 x i32> %res3, %7
927  ret <4 x i32> %res4
928}
929
930define <8 x i32> @test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
931; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
932; X86:       # %bb.0:
933; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
934; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
935; X86-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
936; X86-NEXT:    vpshrdd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc1,0x17]
937; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
938; X86-NEXT:    retl # encoding: [0xc3]
939;
940; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
941; X64:       # %bb.0:
942; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
943; X64-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
944; X64-NEXT:    vpshrdd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc1,0x17]
945; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
946; X64-NEXT:    retq # encoding: [0xc3]
947  %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
948  %2 = bitcast i8 %x4 to <8 x i1>
949  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
950  %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
951  %res2 = add <8 x i32> %3, %4
952  ret <8 x i32> %res2
953}
954
955define <2 x i64> @test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
956; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
957; X86:       # %bb.0:
958; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
959; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
960; X86-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
961; X86-NEXT:    vpshrdq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc1,0x17]
962; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
963; X86-NEXT:    retl # encoding: [0xc3]
964;
965; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
966; X64:       # %bb.0:
967; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
968; X64-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
969; X64-NEXT:    vpshrdq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc1,0x17]
970; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
971; X64-NEXT:    retq # encoding: [0xc3]
972  %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 22, i64 22>)
973  %2 = bitcast i8 %x4 to <8 x i1>
974  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
975  %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
976  %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 23, i64 23>)
977  %res2 = add <2 x i64> %3, %4
978  ret <2 x i64> %res2
979}
980
981define <4 x i64> @test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
982; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
983; X86:       # %bb.0:
984; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
985; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
986; X86-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
987; X86-NEXT:    vpshrdq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc1,0x17]
988; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
989; X86-NEXT:    retl # encoding: [0xc3]
990;
991; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
992; X64:       # %bb.0:
993; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
994; X64-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
995; X64-NEXT:    vpshrdq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc1,0x17]
996; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
997; X64-NEXT:    retq # encoding: [0xc3]
998  %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 22, i64 22, i64 22, i64 22>)
999  %2 = bitcast i8 %x4 to <8 x i1>
1000  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1001  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
1002  %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 23, i64 23, i64 23, i64 23>)
1003  %res2 = add <4 x i64> %3, %4
1004  ret <4 x i64> %res2
1005}
1006
1007define <8 x i16> @test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1008; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1009; X86:       # %bb.0:
1010; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1011; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1012; X86-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1013; X86-NEXT:    vpshrdw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc1,0x07]
1014; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
1015; X86-NEXT:    retl # encoding: [0xc3]
1016;
1017; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1018; X64:       # %bb.0:
1019; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1020; X64-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1021; X64-NEXT:    vpshrdw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc1,0x07]
1022; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
1023; X64-NEXT:    retq # encoding: [0xc3]
1024  %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
1025  %2 = bitcast i8 %x4 to <8 x i1>
1026  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
1027  %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1028  %res2 = add <8 x i16> %3, %4
1029  ret <8 x i16> %res2
1030}
1031
1032define <16 x i16> @test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1033; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1034; X86:       # %bb.0:
1035; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1036; X86-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1037; X86-NEXT:    vpshrdw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc1,0x07]
1038; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
1039; X86-NEXT:    retl # encoding: [0xc3]
1040;
1041; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1042; X64:       # %bb.0:
1043; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1044; X64-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1045; X64-NEXT:    vpshrdw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc1,0x07]
1046; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
1047; X64-NEXT:    retq # encoding: [0xc3]
1048  %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
1049  %2 = bitcast i16 %x4 to <16 x i1>
1050  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
1051  %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1052  %res2 = add <16 x i16> %3, %4
1053  ret <16 x i16> %res2
1054}
1055
1056define <8 x i32> @test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
1057; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1058; X86:       # %bb.0:
1059; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1060; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1061; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1062; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1063; X86-NEXT:    vpshrdvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x18]
1064; X86-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2]
1065; X86-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
1066; X86-NEXT:    retl # encoding: [0xc3]
1067;
1068; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1069; X64:       # %bb.0:
1070; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1071; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1072; X64-NEXT:    vpshrdvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x1f]
1073; X64-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2]
1074; X64-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
1075; X64-NEXT:    retq # encoding: [0xc3]
1076  %x2 = load <8 x i32>, <8 x i32>* %x2p
1077  %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1078  %2 = bitcast i8 %x3 to <8 x i1>
1079  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
1080  %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x4)
1081  %5 = bitcast i8 %x3 to <8 x i1>
1082  %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
1083  %res3 = add <8 x i32> %3, %6
1084  ret <8 x i32> %res3
1085}
1086
1087define <4 x i32> @test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
1088; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1089; X86:       # %bb.0:
1090; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1091; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1092; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1093; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1094; X86-NEXT:    vpshrdvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x18]
1095; X86-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2]
1096; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1097; X86-NEXT:    retl # encoding: [0xc3]
1098;
1099; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1100; X64:       # %bb.0:
1101; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1102; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1103; X64-NEXT:    vpshrdvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x1f]
1104; X64-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2]
1105; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1106; X64-NEXT:    retq # encoding: [0xc3]
1107  %x2 = load <4 x i32>, <4 x i32>* %x2p
1108  %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1109  %2 = bitcast i8 %x3 to <8 x i1>
1110  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1111  %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
1112  %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x4)
1113  %5 = bitcast i8 %x3 to <8 x i1>
1114  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1115  %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
1116  %res3 = add <4 x i32> %3, %6
1117  ret <4 x i32> %res3
1118}
1119
1120define <4 x i64> @test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) {
1121; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1122; X86:       # %bb.0:
1123; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1124; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1125; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1126; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1127; X86-NEXT:    vpshrdvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x18]
1128; X86-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2]
1129; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
1130; X86-NEXT:    retl # encoding: [0xc3]
1131;
1132; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1133; X64:       # %bb.0:
1134; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1135; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1136; X64-NEXT:    vpshrdvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x1f]
1137; X64-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2]
1138; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
1139; X64-NEXT:    retq # encoding: [0xc3]
1140  %x2 = load <4 x i64>, <4 x i64>* %x2p
1141  %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1142  %2 = bitcast i8 %x3 to <8 x i1>
1143  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1144  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
1145  %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x4)
1146  %5 = bitcast i8 %x3 to <8 x i1>
1147  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1148  %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer
1149  %res3 = add <4 x i64> %3, %6
1150  ret <4 x i64> %res3
1151}
1152
1153define <2 x i64> @test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) {
1154; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1155; X86:       # %bb.0:
1156; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1157; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1158; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1159; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1160; X86-NEXT:    vpshrdvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x18]
1161; X86-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2]
1162; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
1163; X86-NEXT:    retl # encoding: [0xc3]
1164;
1165; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1166; X64:       # %bb.0:
1167; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1168; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1169; X64-NEXT:    vpshrdvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x1f]
1170; X64-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2]
1171; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
1172; X64-NEXT:    retq # encoding: [0xc3]
1173  %x2 = load <2 x i64>, <2 x i64>* %x2p
1174  %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1175  %2 = bitcast i8 %x3 to <8 x i1>
1176  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1177  %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
1178  %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x4)
1179  %5 = bitcast i8 %x3 to <8 x i1>
1180  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
1181  %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer
1182  %res3 = add <2 x i64> %3, %6
1183  ret <2 x i64> %res3
1184}
1185
1186define <16 x i16> @test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) {
1187; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1188; X86:       # %bb.0:
1189; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1190; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1191; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1192; X86-NEXT:    vpshrdvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x18]
1193; X86-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xc2]
1194; X86-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
1195; X86-NEXT:    retl # encoding: [0xc3]
1196;
1197; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1198; X64:       # %bb.0:
1199; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1200; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1201; X64-NEXT:    vpshrdvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x1f]
1202; X64-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xc2]
1203; X64-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
1204; X64-NEXT:    retq # encoding: [0xc3]
1205  %x2 = load <16 x i16>, <16 x i16>* %x2p
1206  %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1207  %2 = bitcast i16 %x3 to <16 x i1>
1208  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0
1209  %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x4)
1210  %5 = bitcast i16 %x3 to <16 x i1>
1211  %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
1212  %res3 = add <16 x i16> %3, %4
1213  ret <16 x i16> %res3
1214}
1215
1216define <8 x i16> @test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) {
1217; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1218; X86:       # %bb.0:
1219; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1220; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1221; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1222; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1223; X86-NEXT:    vpshrdvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x18]
1224; X86-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2]
1225; X86-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
1226; X86-NEXT:    retl # encoding: [0xc3]
1227;
1228; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1229; X64:       # %bb.0:
1230; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1231; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1232; X64-NEXT:    vpshrdvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x1f]
1233; X64-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2]
1234; X64-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
1235; X64-NEXT:    retq # encoding: [0xc3]
1236  %x2 = load <8 x i16>, <8 x i16>* %x2p
1237  %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1238  %2 = bitcast i8 %x3 to <8 x i1>
1239  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0
1240  %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x4)
1241  %5 = bitcast i8 %x3 to <8 x i1>
1242  %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
1243  %res3 = add <8 x i16> %3, %6
1244  ret <8 x i16> %res3
1245}
1246
1247define <8 x i32> @test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
1248; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1249; X86:       # %bb.0:
1250; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1251; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1252; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1253; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1254; X86-NEXT:    vpshldvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x18]
1255; X86-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2]
1256; X86-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
1257; X86-NEXT:    retl # encoding: [0xc3]
1258;
1259; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1260; X64:       # %bb.0:
1261; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1262; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1263; X64-NEXT:    vpshldvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x1f]
1264; X64-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2]
1265; X64-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
1266; X64-NEXT:    retq # encoding: [0xc3]
1267  %x2 = load <8 x i32>, <8 x i32>* %x2p
1268  %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
1269  %2 = bitcast i8 %x3 to <8 x i1>
1270  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
1271  %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
1272  %5 = bitcast i8 %x3 to <8 x i1>
1273  %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
1274  %res3 = add <8 x i32> %3, %6
1275  ret <8 x i32> %res3
1276}
1277
1278define <4 x i32> @test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
1279; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1280; X86:       # %bb.0:
1281; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1282; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1283; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1284; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1285; X86-NEXT:    vpshldvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x18]
1286; X86-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2]
1287; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1288; X86-NEXT:    retl # encoding: [0xc3]
1289;
1290; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1291; X64:       # %bb.0:
1292; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1293; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1294; X64-NEXT:    vpshldvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x1f]
1295; X64-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2]
1296; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
1297; X64-NEXT:    retq # encoding: [0xc3]
1298  %x2 = load <4 x i32>, <4 x i32>* %x2p
1299  %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
1300  %2 = bitcast i8 %x3 to <8 x i1>
1301  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1302  %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
1303  %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
1304  %5 = bitcast i8 %x3 to <8 x i1>
1305  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1306  %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
1307  %res3 = add <4 x i32> %3, %6
1308  ret <4 x i32> %res3
1309}
1310
1311define <4 x i64> @test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) {
1312; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1313; X86:       # %bb.0:
1314; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1315; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1316; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1317; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1318; X86-NEXT:    vpshldvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x18]
1319; X86-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2]
1320; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
1321; X86-NEXT:    retl # encoding: [0xc3]
1322;
1323; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1324; X64:       # %bb.0:
1325; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1326; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1327; X64-NEXT:    vpshldvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x1f]
1328; X64-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2]
1329; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
1330; X64-NEXT:    retq # encoding: [0xc3]
1331  %x2 = load <4 x i64>, <4 x i64>* %x2p
1332  %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
1333  %2 = bitcast i8 %x3 to <8 x i1>
1334  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1335  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
1336  %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4)
1337  %5 = bitcast i8 %x3 to <8 x i1>
1338  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1339  %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer
1340  %res3 = add <4 x i64> %3, %6
1341  ret <4 x i64> %res3
1342}
1343
1344define <2 x i64> @test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) {
1345; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1346; X86:       # %bb.0:
1347; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1348; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1349; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1350; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1351; X86-NEXT:    vpshldvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x18]
1352; X86-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2]
1353; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
1354; X86-NEXT:    retl # encoding: [0xc3]
1355;
1356; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1357; X64:       # %bb.0:
1358; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1359; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1360; X64-NEXT:    vpshldvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x1f]
1361; X64-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2]
1362; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
1363; X64-NEXT:    retq # encoding: [0xc3]
1364  %x2 = load <2 x i64>, <2 x i64>* %x2p
1365  %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
1366  %2 = bitcast i8 %x3 to <8 x i1>
1367  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1368  %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
1369  %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4)
1370  %5 = bitcast i8 %x3 to <8 x i1>
1371  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
1372  %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer
1373  %res3 = add <2 x i64> %3, %6
1374  ret <2 x i64> %res3
1375}
1376
1377define <16 x i16> @test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) {
1378; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1379; X86:       # %bb.0:
1380; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1381; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1382; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1383; X86-NEXT:    vpshldvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x18]
1384; X86-NEXT:    vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2]
1385; X86-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
1386; X86-NEXT:    retl # encoding: [0xc3]
1387;
1388; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1389; X64:       # %bb.0:
1390; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1391; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1392; X64-NEXT:    vpshldvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x1f]
1393; X64-NEXT:    vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2]
1394; X64-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
1395; X64-NEXT:    retq # encoding: [0xc3]
1396  %x2 = load <16 x i16>, <16 x i16>* %x2p
1397  %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
1398  %2 = bitcast i16 %x3 to <16 x i1>
1399  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0
1400  %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4)
1401  %5 = bitcast i16 %x3 to <16 x i1>
1402  %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
1403  %res3 = add <16 x i16> %3, %6
1404  ret <16 x i16> %res3
1405}
1406
1407define <8 x i16> @test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) {
1408; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1409; X86:       # %bb.0:
1410; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1411; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1412; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1413; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1414; X86-NEXT:    vpshldvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x18]
1415; X86-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xc2]
1416; X86-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
1417; X86-NEXT:    retl # encoding: [0xc3]
1418;
1419; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1420; X64:       # %bb.0:
1421; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1422; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1423; X64-NEXT:    vpshldvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x1f]
1424; X64-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xc2]
1425; X64-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
1426; X64-NEXT:    retq # encoding: [0xc3]
1427  %x2 = load <8 x i16>, <8 x i16>* %x2p
1428  %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
1429  %2 = bitcast i8 %x3 to <8 x i1>
1430  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0
1431  %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4)
1432  %5 = bitcast i8 %x3 to <8 x i1>
1433  %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
1434  %res3 = add <8 x i16> %3, %4
1435  ret <8 x i16> %res3
1436}
1437
1438declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
1439declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
1440declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
1441declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
1442declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
1443declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
1444declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
1445declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
1446declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
1447declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
1448declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
1449declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
1450declare <8 x i16> @llvm.masked.expandload.v8i16(i16*, <8 x i1>, <8 x i16>)
1451declare <16 x i8> @llvm.masked.expandload.v16i8(i8*, <16 x i1>, <16 x i8>)
1452declare void @llvm.masked.compressstore.v8i16(<8 x i16>, i16*, <8 x i1>)
1453declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>)
1454declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>)
1455declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>)
1456declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>)
1457declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>)
1458declare <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
1459declare <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
1460declare <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
1461declare <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
1462declare <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
1463declare <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)
1464declare <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
1465declare <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)
1466