• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X86 --check-prefix=X86-AVX
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X86 --check-prefix=X86-AVX512VL
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X64 --check-prefix=X64-AVX
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X64 --check-prefix=X64-AVX512VL
6
7define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
8; X86-AVX-LABEL: test_x86_avx2_packssdw:
9; X86-AVX:       ## %bb.0:
10; X86-AVX-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1]
11; X86-AVX-NEXT:    retl ## encoding: [0xc3]
12;
13; X86-AVX512VL-LABEL: test_x86_avx2_packssdw:
14; X86-AVX512VL:       ## %bb.0:
15; X86-AVX512VL-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
16; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
17;
18; X64-AVX-LABEL: test_x86_avx2_packssdw:
19; X64-AVX:       ## %bb.0:
20; X64-AVX-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1]
21; X64-AVX-NEXT:    retq ## encoding: [0xc3]
22;
23; X64-AVX512VL-LABEL: test_x86_avx2_packssdw:
24; X64-AVX512VL:       ## %bb.0:
25; X64-AVX512VL-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
26; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
27  %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
28  ret <16 x i16> %res
29}
30declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
31
32
33define <16 x i16> @test_x86_avx2_packssdw_fold() {
34; X86-AVX-LABEL: test_x86_avx2_packssdw_fold:
35; X86-AVX:       ## %bb.0:
36; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
37; X86-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
38; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI1_0, kind: FK_Data_4
39; X86-AVX-NEXT:    retl ## encoding: [0xc3]
40;
41; X86-AVX512VL-LABEL: test_x86_avx2_packssdw_fold:
42; X86-AVX512VL:       ## %bb.0:
43; X86-AVX512VL-NEXT:    vmovaps LCPI1_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
44; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
45; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI1_0, kind: FK_Data_4
46; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
47;
48; X64-AVX-LABEL: test_x86_avx2_packssdw_fold:
49; X64-AVX:       ## %bb.0:
50; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
51; X64-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
52; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI1_0-4, kind: reloc_riprel_4byte
53; X64-AVX-NEXT:    retq ## encoding: [0xc3]
54;
55; X64-AVX512VL-LABEL: test_x86_avx2_packssdw_fold:
56; X64-AVX512VL:       ## %bb.0:
57; X64-AVX512VL-NEXT:    vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
58; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
59; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI1_0-4, kind: reloc_riprel_4byte
60; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
61  %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
62  ret <16 x i16> %res
63}
64
65
66define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) {
67; X86-AVX-LABEL: test_x86_avx2_packsswb:
68; X86-AVX:       ## %bb.0:
69; X86-AVX-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1]
70; X86-AVX-NEXT:    retl ## encoding: [0xc3]
71;
72; X86-AVX512VL-LABEL: test_x86_avx2_packsswb:
73; X86-AVX512VL:       ## %bb.0:
74; X86-AVX512VL-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
75; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
76;
77; X64-AVX-LABEL: test_x86_avx2_packsswb:
78; X64-AVX:       ## %bb.0:
79; X64-AVX-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1]
80; X64-AVX-NEXT:    retq ## encoding: [0xc3]
81;
82; X64-AVX512VL-LABEL: test_x86_avx2_packsswb:
83; X64-AVX512VL:       ## %bb.0:
84; X64-AVX512VL-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
85; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
86  %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
87  ret <32 x i8> %res
88}
89declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
90
91
92define <32 x i8> @test_x86_avx2_packsswb_fold() {
93; X86-AVX-LABEL: test_x86_avx2_packsswb_fold:
94; X86-AVX:       ## %bb.0:
95; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
96; X86-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
97; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI3_0, kind: FK_Data_4
98; X86-AVX-NEXT:    retl ## encoding: [0xc3]
99;
100; X86-AVX512VL-LABEL: test_x86_avx2_packsswb_fold:
101; X86-AVX512VL:       ## %bb.0:
102; X86-AVX512VL-NEXT:    vmovaps LCPI3_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
103; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
104; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI3_0, kind: FK_Data_4
105; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
106;
107; X64-AVX-LABEL: test_x86_avx2_packsswb_fold:
108; X64-AVX:       ## %bb.0:
109; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
110; X64-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
111; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte
112; X64-AVX-NEXT:    retq ## encoding: [0xc3]
113;
114; X64-AVX512VL-LABEL: test_x86_avx2_packsswb_fold:
115; X64-AVX512VL:       ## %bb.0:
116; X64-AVX512VL-NEXT:    vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
117; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
118; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte
119; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
120  %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
121  ret <32 x i8> %res
122}
123
124
125define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) {
126; X86-AVX-LABEL: test_x86_avx2_packuswb:
127; X86-AVX:       ## %bb.0:
128; X86-AVX-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x67,0xc1]
129; X86-AVX-NEXT:    retl ## encoding: [0xc3]
130;
131; X86-AVX512VL-LABEL: test_x86_avx2_packuswb:
132; X86-AVX512VL:       ## %bb.0:
133; X86-AVX512VL-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
134; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
135;
136; X64-AVX-LABEL: test_x86_avx2_packuswb:
137; X64-AVX:       ## %bb.0:
138; X64-AVX-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x67,0xc1]
139; X64-AVX-NEXT:    retq ## encoding: [0xc3]
140;
141; X64-AVX512VL-LABEL: test_x86_avx2_packuswb:
142; X64-AVX512VL:       ## %bb.0:
143; X64-AVX512VL-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
144; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
145  %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
146  ret <32 x i8> %res
147}
148declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
149
150
151define <32 x i8> @test_x86_avx2_packuswb_fold() {
152; X86-AVX-LABEL: test_x86_avx2_packuswb_fold:
153; X86-AVX:       ## %bb.0:
154; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
155; X86-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
156; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI5_0, kind: FK_Data_4
157; X86-AVX-NEXT:    retl ## encoding: [0xc3]
158;
159; X86-AVX512VL-LABEL: test_x86_avx2_packuswb_fold:
160; X86-AVX512VL:       ## %bb.0:
161; X86-AVX512VL-NEXT:    vmovaps LCPI5_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
162; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
163; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI5_0, kind: FK_Data_4
164; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
165;
166; X64-AVX-LABEL: test_x86_avx2_packuswb_fold:
167; X64-AVX:       ## %bb.0:
168; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
169; X64-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
170; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI5_0-4, kind: reloc_riprel_4byte
171; X64-AVX-NEXT:    retq ## encoding: [0xc3]
172;
173; X64-AVX512VL-LABEL: test_x86_avx2_packuswb_fold:
174; X64-AVX512VL:       ## %bb.0:
175; X64-AVX512VL-NEXT:    vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
176; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
177; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI5_0-4, kind: reloc_riprel_4byte
178; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
179  %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
180  ret <32 x i8> %res
181}
182
183
184define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {
185; X86-AVX-LABEL: test_x86_avx2_padds_b:
186; X86-AVX:       ## %bb.0:
187; X86-AVX-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xec,0xc1]
188; X86-AVX-NEXT:    retl ## encoding: [0xc3]
189;
190; X86-AVX512VL-LABEL: test_x86_avx2_padds_b:
191; X86-AVX512VL:       ## %bb.0:
192; X86-AVX512VL-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]
193; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
194;
195; X64-AVX-LABEL: test_x86_avx2_padds_b:
196; X64-AVX:       ## %bb.0:
197; X64-AVX-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xec,0xc1]
198; X64-AVX-NEXT:    retq ## encoding: [0xc3]
199;
200; X64-AVX512VL-LABEL: test_x86_avx2_padds_b:
201; X64-AVX512VL:       ## %bb.0:
202; X64-AVX512VL-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]
203; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
204  %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
205  ret <32 x i8> %res
206}
207declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
208
209
210define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {
211; X86-AVX-LABEL: test_x86_avx2_padds_w:
212; X86-AVX:       ## %bb.0:
213; X86-AVX-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xed,0xc1]
214; X86-AVX-NEXT:    retl ## encoding: [0xc3]
215;
216; X86-AVX512VL-LABEL: test_x86_avx2_padds_w:
217; X86-AVX512VL:       ## %bb.0:
218; X86-AVX512VL-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]
219; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
220;
221; X64-AVX-LABEL: test_x86_avx2_padds_w:
222; X64-AVX:       ## %bb.0:
223; X64-AVX-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xed,0xc1]
224; X64-AVX-NEXT:    retq ## encoding: [0xc3]
225;
226; X64-AVX512VL-LABEL: test_x86_avx2_padds_w:
227; X64-AVX512VL:       ## %bb.0:
228; X64-AVX512VL-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]
229; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
230  %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
231  ret <16 x i16> %res
232}
233declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
234
235
236define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {
237; X86-AVX-LABEL: test_x86_avx2_paddus_b:
238; X86-AVX:       ## %bb.0:
239; X86-AVX-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdc,0xc1]
240; X86-AVX-NEXT:    retl ## encoding: [0xc3]
241;
242; X86-AVX512VL-LABEL: test_x86_avx2_paddus_b:
243; X86-AVX512VL:       ## %bb.0:
244; X86-AVX512VL-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]
245; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
246;
247; X64-AVX-LABEL: test_x86_avx2_paddus_b:
248; X64-AVX:       ## %bb.0:
249; X64-AVX-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdc,0xc1]
250; X64-AVX-NEXT:    retq ## encoding: [0xc3]
251;
252; X64-AVX512VL-LABEL: test_x86_avx2_paddus_b:
253; X64-AVX512VL:       ## %bb.0:
254; X64-AVX512VL-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]
255; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
256  %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
257  ret <32 x i8> %res
258}
259declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
260
261
262define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {
263; X86-AVX-LABEL: test_x86_avx2_paddus_w:
264; X86-AVX:       ## %bb.0:
265; X86-AVX-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdd,0xc1]
266; X86-AVX-NEXT:    retl ## encoding: [0xc3]
267;
268; X86-AVX512VL-LABEL: test_x86_avx2_paddus_w:
269; X86-AVX512VL:       ## %bb.0:
270; X86-AVX512VL-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]
271; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
272;
273; X64-AVX-LABEL: test_x86_avx2_paddus_w:
274; X64-AVX:       ## %bb.0:
275; X64-AVX-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdd,0xc1]
276; X64-AVX-NEXT:    retq ## encoding: [0xc3]
277;
278; X64-AVX512VL-LABEL: test_x86_avx2_paddus_w:
279; X64-AVX512VL:       ## %bb.0:
280; X64-AVX512VL-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]
281; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
282  %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
283  ret <16 x i16> %res
284}
285declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
286
287
288define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) {
289; X86-AVX-LABEL: test_x86_avx2_pmadd_wd:
290; X86-AVX:       ## %bb.0:
291; X86-AVX-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf5,0xc1]
292; X86-AVX-NEXT:    retl ## encoding: [0xc3]
293;
294; X86-AVX512VL-LABEL: test_x86_avx2_pmadd_wd:
295; X86-AVX512VL:       ## %bb.0:
296; X86-AVX512VL-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1]
297; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
298;
299; X64-AVX-LABEL: test_x86_avx2_pmadd_wd:
300; X64-AVX:       ## %bb.0:
301; X64-AVX-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf5,0xc1]
302; X64-AVX-NEXT:    retq ## encoding: [0xc3]
303;
304; X64-AVX512VL-LABEL: test_x86_avx2_pmadd_wd:
305; X64-AVX512VL:       ## %bb.0:
306; X64-AVX512VL-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1]
307; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
308  %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1]
309  ret <8 x i32> %res
310}
311declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
312
313
314define <16 x i16> @test_x86_avx2_pmaxs_w(<16 x i16> %a0, <16 x i16> %a1) {
315; X86-AVX-LABEL: test_x86_avx2_pmaxs_w:
316; X86-AVX:       ## %bb.0:
317; X86-AVX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xee,0xc1]
318; X86-AVX-NEXT:    retl ## encoding: [0xc3]
319;
320; X86-AVX512VL-LABEL: test_x86_avx2_pmaxs_w:
321; X86-AVX512VL:       ## %bb.0:
322; X86-AVX512VL-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xee,0xc1]
323; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
324;
325; X64-AVX-LABEL: test_x86_avx2_pmaxs_w:
326; X64-AVX:       ## %bb.0:
327; X64-AVX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xee,0xc1]
328; X64-AVX-NEXT:    retq ## encoding: [0xc3]
329;
330; X64-AVX512VL-LABEL: test_x86_avx2_pmaxs_w:
331; X64-AVX512VL:       ## %bb.0:
332; X64-AVX512VL-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xee,0xc1]
333; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
334  %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
335  ret <16 x i16> %res
336}
337declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
338
339
340define <32 x i8> @test_x86_avx2_pmaxu_b(<32 x i8> %a0, <32 x i8> %a1) {
341; X86-AVX-LABEL: test_x86_avx2_pmaxu_b:
342; X86-AVX:       ## %bb.0:
343; X86-AVX-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xde,0xc1]
344; X86-AVX-NEXT:    retl ## encoding: [0xc3]
345;
346; X86-AVX512VL-LABEL: test_x86_avx2_pmaxu_b:
347; X86-AVX512VL:       ## %bb.0:
348; X86-AVX512VL-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xc1]
349; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
350;
351; X64-AVX-LABEL: test_x86_avx2_pmaxu_b:
352; X64-AVX:       ## %bb.0:
353; X64-AVX-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xde,0xc1]
354; X64-AVX-NEXT:    retq ## encoding: [0xc3]
355;
356; X64-AVX512VL-LABEL: test_x86_avx2_pmaxu_b:
357; X64-AVX512VL:       ## %bb.0:
358; X64-AVX512VL-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xc1]
359; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
360  %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
361  ret <32 x i8> %res
362}
363declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
364
365
366define <16 x i16> @test_x86_avx2_pmins_w(<16 x i16> %a0, <16 x i16> %a1) {
367; X86-AVX-LABEL: test_x86_avx2_pmins_w:
368; X86-AVX:       ## %bb.0:
369; X86-AVX-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xea,0xc1]
370; X86-AVX-NEXT:    retl ## encoding: [0xc3]
371;
372; X86-AVX512VL-LABEL: test_x86_avx2_pmins_w:
373; X86-AVX512VL:       ## %bb.0:
374; X86-AVX512VL-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xea,0xc1]
375; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
376;
377; X64-AVX-LABEL: test_x86_avx2_pmins_w:
378; X64-AVX:       ## %bb.0:
379; X64-AVX-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xea,0xc1]
380; X64-AVX-NEXT:    retq ## encoding: [0xc3]
381;
382; X64-AVX512VL-LABEL: test_x86_avx2_pmins_w:
383; X64-AVX512VL:       ## %bb.0:
384; X64-AVX512VL-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xea,0xc1]
385; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
386  %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
387  ret <16 x i16> %res
388}
389declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
390
391
392define <32 x i8> @test_x86_avx2_pminu_b(<32 x i8> %a0, <32 x i8> %a1) {
393; X86-AVX-LABEL: test_x86_avx2_pminu_b:
394; X86-AVX:       ## %bb.0:
395; X86-AVX-NEXT:    vpminub %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xda,0xc1]
396; X86-AVX-NEXT:    retl ## encoding: [0xc3]
397;
398; X86-AVX512VL-LABEL: test_x86_avx2_pminu_b:
399; X86-AVX512VL:       ## %bb.0:
400; X86-AVX512VL-NEXT:    vpminub %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xc1]
401; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
402;
403; X64-AVX-LABEL: test_x86_avx2_pminu_b:
404; X64-AVX:       ## %bb.0:
405; X64-AVX-NEXT:    vpminub %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xda,0xc1]
406; X64-AVX-NEXT:    retq ## encoding: [0xc3]
407;
408; X64-AVX512VL-LABEL: test_x86_avx2_pminu_b:
409; X64-AVX512VL:       ## %bb.0:
410; X64-AVX512VL-NEXT:    vpminub %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xc1]
411; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
412  %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
413  ret <32 x i8> %res
414}
415declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
416
417
418define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) {
419; X86-LABEL: test_x86_avx2_pmovmskb:
420; X86:       ## %bb.0:
421; X86-NEXT:    vpmovmskb %ymm0, %eax ## encoding: [0xc5,0xfd,0xd7,0xc0]
422; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
423; X86-NEXT:    retl ## encoding: [0xc3]
424;
425; X64-LABEL: test_x86_avx2_pmovmskb:
426; X64:       ## %bb.0:
427; X64-NEXT:    vpmovmskb %ymm0, %eax ## encoding: [0xc5,0xfd,0xd7,0xc0]
428; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
429; X64-NEXT:    retq ## encoding: [0xc3]
430  %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1]
431  ret i32 %res
432}
433declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
434
435
436define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) {
437; X86-AVX-LABEL: test_x86_avx2_pmulh_w:
438; X86-AVX:       ## %bb.0:
439; X86-AVX-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe5,0xc1]
440; X86-AVX-NEXT:    retl ## encoding: [0xc3]
441;
442; X86-AVX512VL-LABEL: test_x86_avx2_pmulh_w:
443; X86-AVX512VL:       ## %bb.0:
444; X86-AVX512VL-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1]
445; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
446;
447; X64-AVX-LABEL: test_x86_avx2_pmulh_w:
448; X64-AVX:       ## %bb.0:
449; X64-AVX-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe5,0xc1]
450; X64-AVX-NEXT:    retq ## encoding: [0xc3]
451;
452; X64-AVX512VL-LABEL: test_x86_avx2_pmulh_w:
453; X64-AVX512VL:       ## %bb.0:
454; X64-AVX512VL-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1]
455; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
456  %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
457  ret <16 x i16> %res
458}
459declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
460
461
462define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) {
463; X86-AVX-LABEL: test_x86_avx2_pmulhu_w:
464; X86-AVX:       ## %bb.0:
465; X86-AVX-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe4,0xc1]
466; X86-AVX-NEXT:    retl ## encoding: [0xc3]
467;
468; X86-AVX512VL-LABEL: test_x86_avx2_pmulhu_w:
469; X86-AVX512VL:       ## %bb.0:
470; X86-AVX512VL-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1]
471; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
472;
473; X64-AVX-LABEL: test_x86_avx2_pmulhu_w:
474; X64-AVX:       ## %bb.0:
475; X64-AVX-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe4,0xc1]
476; X64-AVX-NEXT:    retq ## encoding: [0xc3]
477;
478; X64-AVX512VL-LABEL: test_x86_avx2_pmulhu_w:
479; X64-AVX512VL:       ## %bb.0:
480; X64-AVX512VL-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1]
481; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
482  %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
483  ret <16 x i16> %res
484}
485declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
486
487
488define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) {
489; X86-AVX-LABEL: test_x86_avx2_psad_bw:
490; X86-AVX:       ## %bb.0:
491; X86-AVX-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf6,0xc1]
492; X86-AVX-NEXT:    retl ## encoding: [0xc3]
493;
494; X86-AVX512VL-LABEL: test_x86_avx2_psad_bw:
495; X86-AVX512VL:       ## %bb.0:
496; X86-AVX512VL-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf6,0xc1]
497; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
498;
499; X64-AVX-LABEL: test_x86_avx2_psad_bw:
500; X64-AVX:       ## %bb.0:
501; X64-AVX-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf6,0xc1]
502; X64-AVX-NEXT:    retq ## encoding: [0xc3]
503;
504; X64-AVX512VL-LABEL: test_x86_avx2_psad_bw:
505; X64-AVX512VL:       ## %bb.0:
506; X64-AVX512VL-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf6,0xc1]
507; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
508  %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1]
509  ret <4 x i64> %res
510}
511declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
512
513
514define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
515; X86-AVX-LABEL: test_x86_avx2_psll_d:
516; X86-AVX:       ## %bb.0:
517; X86-AVX-NEXT:    vpslld %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf2,0xc1]
518; X86-AVX-NEXT:    retl ## encoding: [0xc3]
519;
520; X86-AVX512VL-LABEL: test_x86_avx2_psll_d:
521; X86-AVX512VL:       ## %bb.0:
522; X86-AVX512VL-NEXT:    vpslld %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xc1]
523; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
524;
525; X64-AVX-LABEL: test_x86_avx2_psll_d:
526; X64-AVX:       ## %bb.0:
527; X64-AVX-NEXT:    vpslld %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf2,0xc1]
528; X64-AVX-NEXT:    retq ## encoding: [0xc3]
529;
530; X64-AVX512VL-LABEL: test_x86_avx2_psll_d:
531; X64-AVX512VL:       ## %bb.0:
532; X64-AVX512VL-NEXT:    vpslld %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xc1]
533; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
534  %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
535  ret <8 x i32> %res
536}
537declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
538
539
540define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
541; X86-AVX-LABEL: test_x86_avx2_psll_q:
542; X86-AVX:       ## %bb.0:
543; X86-AVX-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf3,0xc1]
544; X86-AVX-NEXT:    retl ## encoding: [0xc3]
545;
546; X86-AVX512VL-LABEL: test_x86_avx2_psll_q:
547; X86-AVX512VL:       ## %bb.0:
548; X86-AVX512VL-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xc1]
549; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
550;
551; X64-AVX-LABEL: test_x86_avx2_psll_q:
552; X64-AVX:       ## %bb.0:
553; X64-AVX-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf3,0xc1]
554; X64-AVX-NEXT:    retq ## encoding: [0xc3]
555;
556; X64-AVX512VL-LABEL: test_x86_avx2_psll_q:
557; X64-AVX512VL:       ## %bb.0:
558; X64-AVX512VL-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xc1]
559; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
560  %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
561  ret <4 x i64> %res
562}
563declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
564
565
566define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) {
567; X86-AVX-LABEL: test_x86_avx2_psll_w:
568; X86-AVX:       ## %bb.0:
569; X86-AVX-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf1,0xc1]
570; X86-AVX-NEXT:    retl ## encoding: [0xc3]
571;
572; X86-AVX512VL-LABEL: test_x86_avx2_psll_w:
573; X86-AVX512VL:       ## %bb.0:
574; X86-AVX512VL-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1]
575; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
576;
577; X64-AVX-LABEL: test_x86_avx2_psll_w:
578; X64-AVX:       ## %bb.0:
579; X64-AVX-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf1,0xc1]
580; X64-AVX-NEXT:    retq ## encoding: [0xc3]
581;
582; X64-AVX512VL-LABEL: test_x86_avx2_psll_w:
583; X64-AVX512VL:       ## %bb.0:
584; X64-AVX512VL-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1]
585; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
586  %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
587  ret <16 x i16> %res
588}
589declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
590
591
592define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) {
593; X86-AVX-LABEL: test_x86_avx2_pslli_d:
594; X86-AVX:       ## %bb.0:
595; X86-AVX-NEXT:    vpslld $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xf0,0x07]
596; X86-AVX-NEXT:    retl ## encoding: [0xc3]
597;
598; X86-AVX512VL-LABEL: test_x86_avx2_pslli_d:
599; X86-AVX512VL:       ## %bb.0:
600; X86-AVX512VL-NEXT:    vpslld $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x07]
601; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
602;
603; X64-AVX-LABEL: test_x86_avx2_pslli_d:
604; X64-AVX:       ## %bb.0:
605; X64-AVX-NEXT:    vpslld $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xf0,0x07]
606; X64-AVX-NEXT:    retq ## encoding: [0xc3]
607;
608; X64-AVX512VL-LABEL: test_x86_avx2_pslli_d:
609; X64-AVX512VL:       ## %bb.0:
610; X64-AVX512VL-NEXT:    vpslld $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x07]
611; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
612  %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
613  ret <8 x i32> %res
614}
615declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone
616
617
618define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) {
619; X86-AVX-LABEL: test_x86_avx2_pslli_q:
620; X86-AVX:       ## %bb.0:
621; X86-AVX-NEXT:    vpsllq $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x73,0xf0,0x07]
622; X86-AVX-NEXT:    retl ## encoding: [0xc3]
623;
624; X86-AVX512VL-LABEL: test_x86_avx2_pslli_q:
625; X86-AVX512VL:       ## %bb.0:
626; X86-AVX512VL-NEXT:    vpsllq $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xf0,0x07]
627; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
628;
629; X64-AVX-LABEL: test_x86_avx2_pslli_q:
630; X64-AVX:       ## %bb.0:
631; X64-AVX-NEXT:    vpsllq $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x73,0xf0,0x07]
632; X64-AVX-NEXT:    retq ## encoding: [0xc3]
633;
634; X64-AVX512VL-LABEL: test_x86_avx2_pslli_q:
635; X64-AVX512VL:       ## %bb.0:
636; X64-AVX512VL-NEXT:    vpsllq $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xf0,0x07]
637; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
638  %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
639  ret <4 x i64> %res
640}
641declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone
642
643
644define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) {
645; X86-AVX-LABEL: test_x86_avx2_pslli_w:
646; X86-AVX:       ## %bb.0:
647; X86-AVX-NEXT:    vpsllw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xf0,0x07]
648; X86-AVX-NEXT:    retl ## encoding: [0xc3]
649;
650; X86-AVX512VL-LABEL: test_x86_avx2_pslli_w:
651; X86-AVX512VL:       ## %bb.0:
652; X86-AVX512VL-NEXT:    vpsllw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x07]
653; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
654;
655; X64-AVX-LABEL: test_x86_avx2_pslli_w:
656; X64-AVX:       ## %bb.0:
657; X64-AVX-NEXT:    vpsllw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xf0,0x07]
658; X64-AVX-NEXT:    retq ## encoding: [0xc3]
659;
660; X64-AVX512VL-LABEL: test_x86_avx2_pslli_w:
661; X64-AVX512VL:       ## %bb.0:
662; X64-AVX512VL-NEXT:    vpsllw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x07]
663; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
664  %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
665  ret <16 x i16> %res
666}
667declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone
668
669
670define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) {
671; X86-AVX-LABEL: test_x86_avx2_psra_d:
672; X86-AVX:       ## %bb.0:
673; X86-AVX-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe2,0xc1]
674; X86-AVX-NEXT:    retl ## encoding: [0xc3]
675;
676; X86-AVX512VL-LABEL: test_x86_avx2_psra_d:
677; X86-AVX512VL:       ## %bb.0:
678; X86-AVX512VL-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xc1]
679; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
680;
681; X64-AVX-LABEL: test_x86_avx2_psra_d:
682; X64-AVX:       ## %bb.0:
683; X64-AVX-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe2,0xc1]
684; X64-AVX-NEXT:    retq ## encoding: [0xc3]
685;
686; X64-AVX512VL-LABEL: test_x86_avx2_psra_d:
687; X64-AVX512VL:       ## %bb.0:
688; X64-AVX512VL-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xc1]
689; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
690  %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
691  ret <8 x i32> %res
692}
693declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
694
695
696define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) {
697; X86-AVX-LABEL: test_x86_avx2_psra_w:
698; X86-AVX:       ## %bb.0:
699; X86-AVX-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe1,0xc1]
700; X86-AVX-NEXT:    retl ## encoding: [0xc3]
701;
702; X86-AVX512VL-LABEL: test_x86_avx2_psra_w:
703; X86-AVX512VL:       ## %bb.0:
704; X86-AVX512VL-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1]
705; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
706;
707; X64-AVX-LABEL: test_x86_avx2_psra_w:
708; X64-AVX:       ## %bb.0:
709; X64-AVX-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe1,0xc1]
710; X64-AVX-NEXT:    retq ## encoding: [0xc3]
711;
712; X64-AVX512VL-LABEL: test_x86_avx2_psra_w:
713; X64-AVX512VL:       ## %bb.0:
714; X64-AVX512VL-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1]
715; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
716  %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
717  ret <16 x i16> %res
718}
719declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
720
721
722define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) {
723; X86-AVX-LABEL: test_x86_avx2_psrai_d:
724; X86-AVX:       ## %bb.0:
725; X86-AVX-NEXT:    vpsrad $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xe0,0x07]
726; X86-AVX-NEXT:    retl ## encoding: [0xc3]
727;
728; X86-AVX512VL-LABEL: test_x86_avx2_psrai_d:
729; X86-AVX512VL:       ## %bb.0:
730; X86-AVX512VL-NEXT:    vpsrad $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xe0,0x07]
731; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
732;
733; X64-AVX-LABEL: test_x86_avx2_psrai_d:
734; X64-AVX:       ## %bb.0:
735; X64-AVX-NEXT:    vpsrad $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xe0,0x07]
736; X64-AVX-NEXT:    retq ## encoding: [0xc3]
737;
738; X64-AVX512VL-LABEL: test_x86_avx2_psrai_d:
739; X64-AVX512VL:       ## %bb.0:
740; X64-AVX512VL-NEXT:    vpsrad $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xe0,0x07]
741; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
742  %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
743  ret <8 x i32> %res
744}
745declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone
746
747
748define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) {
749; X86-AVX-LABEL: test_x86_avx2_psrai_w:
750; X86-AVX:       ## %bb.0:
751; X86-AVX-NEXT:    vpsraw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xe0,0x07]
752; X86-AVX-NEXT:    retl ## encoding: [0xc3]
753;
754; X86-AVX512VL-LABEL: test_x86_avx2_psrai_w:
755; X86-AVX512VL:       ## %bb.0:
756; X86-AVX512VL-NEXT:    vpsraw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x07]
757; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
758;
759; X64-AVX-LABEL: test_x86_avx2_psrai_w:
760; X64-AVX:       ## %bb.0:
761; X64-AVX-NEXT:    vpsraw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xe0,0x07]
762; X64-AVX-NEXT:    retq ## encoding: [0xc3]
763;
764; X64-AVX512VL-LABEL: test_x86_avx2_psrai_w:
765; X64-AVX512VL:       ## %bb.0:
766; X64-AVX512VL-NEXT:    vpsraw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x07]
767; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
768  %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
769  ret <16 x i16> %res
770}
771declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone
772
773
774define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
775; X86-AVX-LABEL: test_x86_avx2_psrl_d:
776; X86-AVX:       ## %bb.0:
777; X86-AVX-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd2,0xc1]
778; X86-AVX-NEXT:    retl ## encoding: [0xc3]
779;
780; X86-AVX512VL-LABEL: test_x86_avx2_psrl_d:
781; X86-AVX512VL:       ## %bb.0:
782; X86-AVX512VL-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xc1]
783; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
784;
785; X64-AVX-LABEL: test_x86_avx2_psrl_d:
786; X64-AVX:       ## %bb.0:
787; X64-AVX-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd2,0xc1]
788; X64-AVX-NEXT:    retq ## encoding: [0xc3]
789;
790; X64-AVX512VL-LABEL: test_x86_avx2_psrl_d:
791; X64-AVX512VL:       ## %bb.0:
792; X64-AVX512VL-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xc1]
793; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
794  %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
795  ret <8 x i32> %res
796}
797declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
798
799
800define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {
801; X86-AVX-LABEL: test_x86_avx2_psrl_q:
802; X86-AVX:       ## %bb.0:
803; X86-AVX-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd3,0xc1]
804; X86-AVX-NEXT:    retl ## encoding: [0xc3]
805;
806; X86-AVX512VL-LABEL: test_x86_avx2_psrl_q:
807; X86-AVX512VL:       ## %bb.0:
808; X86-AVX512VL-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xc1]
809; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
810;
811; X64-AVX-LABEL: test_x86_avx2_psrl_q:
812; X64-AVX:       ## %bb.0:
813; X64-AVX-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd3,0xc1]
814; X64-AVX-NEXT:    retq ## encoding: [0xc3]
815;
816; X64-AVX512VL-LABEL: test_x86_avx2_psrl_q:
817; X64-AVX512VL:       ## %bb.0:
818; X64-AVX512VL-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xc1]
819; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
820  %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
821  ret <4 x i64> %res
822}
823declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
824
825
826define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) {
827; X86-AVX-LABEL: test_x86_avx2_psrl_w:
828; X86-AVX:       ## %bb.0:
829; X86-AVX-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd1,0xc1]
830; X86-AVX-NEXT:    retl ## encoding: [0xc3]
831;
832; X86-AVX512VL-LABEL: test_x86_avx2_psrl_w:
833; X86-AVX512VL:       ## %bb.0:
834; X86-AVX512VL-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1]
835; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
836;
837; X64-AVX-LABEL: test_x86_avx2_psrl_w:
838; X64-AVX:       ## %bb.0:
839; X64-AVX-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd1,0xc1]
840; X64-AVX-NEXT:    retq ## encoding: [0xc3]
841;
842; X64-AVX512VL-LABEL: test_x86_avx2_psrl_w:
843; X64-AVX512VL:       ## %bb.0:
844; X64-AVX512VL-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1]
845; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
846  %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
847  ret <16 x i16> %res
848}
849declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
850
851
852define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) {
853; X86-AVX-LABEL: test_x86_avx2_psrli_d:
854; X86-AVX:       ## %bb.0:
855; X86-AVX-NEXT:    vpsrld $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xd0,0x07]
856; X86-AVX-NEXT:    retl ## encoding: [0xc3]
857;
858; X86-AVX512VL-LABEL: test_x86_avx2_psrli_d:
859; X86-AVX512VL:       ## %bb.0:
860; X86-AVX512VL-NEXT:    vpsrld $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x07]
861; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
862;
863; X64-AVX-LABEL: test_x86_avx2_psrli_d:
864; X64-AVX:       ## %bb.0:
865; X64-AVX-NEXT:    vpsrld $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xd0,0x07]
866; X64-AVX-NEXT:    retq ## encoding: [0xc3]
867;
868; X64-AVX512VL-LABEL: test_x86_avx2_psrli_d:
869; X64-AVX512VL:       ## %bb.0:
870; X64-AVX512VL-NEXT:    vpsrld $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x07]
871; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
872  %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
873  ret <8 x i32> %res
874}
875declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone
876
877
878define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) {
879; X86-AVX-LABEL: test_x86_avx2_psrli_q:
880; X86-AVX:       ## %bb.0:
881; X86-AVX-NEXT:    vpsrlq $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x73,0xd0,0x07]
882; X86-AVX-NEXT:    retl ## encoding: [0xc3]
883;
884; X86-AVX512VL-LABEL: test_x86_avx2_psrli_q:
885; X86-AVX512VL:       ## %bb.0:
886; X86-AVX512VL-NEXT:    vpsrlq $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xd0,0x07]
887; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
888;
889; X64-AVX-LABEL: test_x86_avx2_psrli_q:
890; X64-AVX:       ## %bb.0:
891; X64-AVX-NEXT:    vpsrlq $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x73,0xd0,0x07]
892; X64-AVX-NEXT:    retq ## encoding: [0xc3]
893;
894; X64-AVX512VL-LABEL: test_x86_avx2_psrli_q:
895; X64-AVX512VL:       ## %bb.0:
896; X64-AVX512VL-NEXT:    vpsrlq $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xd0,0x07]
897; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
898  %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
899  ret <4 x i64> %res
900}
901declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone
902
903
904define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) {
905; X86-AVX-LABEL: test_x86_avx2_psrli_w:
906; X86-AVX:       ## %bb.0:
907; X86-AVX-NEXT:    vpsrlw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xd0,0x07]
908; X86-AVX-NEXT:    retl ## encoding: [0xc3]
909;
910; X86-AVX512VL-LABEL: test_x86_avx2_psrli_w:
911; X86-AVX512VL:       ## %bb.0:
912; X86-AVX512VL-NEXT:    vpsrlw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x07]
913; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
914;
915; X64-AVX-LABEL: test_x86_avx2_psrli_w:
916; X64-AVX:       ## %bb.0:
917; X64-AVX-NEXT:    vpsrlw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xd0,0x07]
918; X64-AVX-NEXT:    retq ## encoding: [0xc3]
919;
920; X64-AVX512VL-LABEL: test_x86_avx2_psrli_w:
921; X64-AVX512VL:       ## %bb.0:
922; X64-AVX512VL-NEXT:    vpsrlw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x07]
923; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
924  %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
925  ret <16 x i16> %res
926}
927declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone
928
929
930define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {
931; X86-AVX-LABEL: test_x86_avx2_psubs_b:
932; X86-AVX:       ## %bb.0:
933; X86-AVX-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe8,0xc1]
934; X86-AVX-NEXT:    retl ## encoding: [0xc3]
935;
936; X86-AVX512VL-LABEL: test_x86_avx2_psubs_b:
937; X86-AVX512VL:       ## %bb.0:
938; X86-AVX512VL-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]
939; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
940;
941; X64-AVX-LABEL: test_x86_avx2_psubs_b:
942; X64-AVX:       ## %bb.0:
943; X64-AVX-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe8,0xc1]
944; X64-AVX-NEXT:    retq ## encoding: [0xc3]
945;
946; X64-AVX512VL-LABEL: test_x86_avx2_psubs_b:
947; X64-AVX512VL:       ## %bb.0:
948; X64-AVX512VL-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]
949; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
950  %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
951  ret <32 x i8> %res
952}
953declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
954
955
956define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {
957; X86-AVX-LABEL: test_x86_avx2_psubs_w:
958; X86-AVX:       ## %bb.0:
959; X86-AVX-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe9,0xc1]
960; X86-AVX-NEXT:    retl ## encoding: [0xc3]
961;
962; X86-AVX512VL-LABEL: test_x86_avx2_psubs_w:
963; X86-AVX512VL:       ## %bb.0:
964; X86-AVX512VL-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]
965; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
966;
967; X64-AVX-LABEL: test_x86_avx2_psubs_w:
968; X64-AVX:       ## %bb.0:
969; X64-AVX-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe9,0xc1]
970; X64-AVX-NEXT:    retq ## encoding: [0xc3]
971;
972; X64-AVX512VL-LABEL: test_x86_avx2_psubs_w:
973; X64-AVX512VL:       ## %bb.0:
974; X64-AVX512VL-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]
975; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
976  %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
977  ret <16 x i16> %res
978}
979declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
980
981
982define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {
983; X86-AVX-LABEL: test_x86_avx2_psubus_b:
984; X86-AVX:       ## %bb.0:
985; X86-AVX-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd8,0xc1]
986; X86-AVX-NEXT:    retl ## encoding: [0xc3]
987;
988; X86-AVX512VL-LABEL: test_x86_avx2_psubus_b:
989; X86-AVX512VL:       ## %bb.0:
990; X86-AVX512VL-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]
991; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
992;
993; X64-AVX-LABEL: test_x86_avx2_psubus_b:
994; X64-AVX:       ## %bb.0:
995; X64-AVX-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd8,0xc1]
996; X64-AVX-NEXT:    retq ## encoding: [0xc3]
997;
998; X64-AVX512VL-LABEL: test_x86_avx2_psubus_b:
999; X64-AVX512VL:       ## %bb.0:
1000; X64-AVX512VL-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]
1001; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1002  %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
1003  ret <32 x i8> %res
1004}
1005declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
1006
1007
1008define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
1009; X86-AVX-LABEL: test_x86_avx2_psubus_w:
1010; X86-AVX:       ## %bb.0:
1011; X86-AVX-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd9,0xc1]
1012; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1013;
1014; X86-AVX512VL-LABEL: test_x86_avx2_psubus_w:
1015; X86-AVX512VL:       ## %bb.0:
1016; X86-AVX512VL-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]
1017; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1018;
1019; X64-AVX-LABEL: test_x86_avx2_psubus_w:
1020; X64-AVX:       ## %bb.0:
1021; X64-AVX-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd9,0xc1]
1022; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1023;
1024; X64-AVX512VL-LABEL: test_x86_avx2_psubus_w:
1025; X64-AVX512VL:       ## %bb.0:
1026; X64-AVX512VL-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]
1027; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1028  %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
1029  ret <16 x i16> %res
1030}
1031declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
1032
1033define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) {
1034; X86-LABEL: test_x86_avx2_phadd_d:
1035; X86:       ## %bb.0:
1036; X86-NEXT:    vphaddd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x02,0xc1]
1037; X86-NEXT:    retl ## encoding: [0xc3]
1038;
1039; X64-LABEL: test_x86_avx2_phadd_d:
1040; X64:       ## %bb.0:
1041; X64-NEXT:    vphaddd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x02,0xc1]
1042; X64-NEXT:    retq ## encoding: [0xc3]
1043  %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1044  ret <8 x i32> %res
1045}
1046declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
1047
1048
1049define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) {
1050; X86-LABEL: test_x86_avx2_phadd_sw:
1051; X86:       ## %bb.0:
1052; X86-NEXT:    vphaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x03,0xc1]
1053; X86-NEXT:    retl ## encoding: [0xc3]
1054;
1055; X64-LABEL: test_x86_avx2_phadd_sw:
1056; X64:       ## %bb.0:
1057; X64-NEXT:    vphaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x03,0xc1]
1058; X64-NEXT:    retq ## encoding: [0xc3]
1059  %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
1060  ret <16 x i16> %res
1061}
1062declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
1063
1064
1065define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) {
1066; X86-LABEL: test_x86_avx2_phadd_w:
1067; X86:       ## %bb.0:
1068; X86-NEXT:    vphaddw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x01,0xc1]
1069; X86-NEXT:    retl ## encoding: [0xc3]
1070;
1071; X64-LABEL: test_x86_avx2_phadd_w:
1072; X64:       ## %bb.0:
1073; X64-NEXT:    vphaddw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x01,0xc1]
1074; X64-NEXT:    retq ## encoding: [0xc3]
1075  %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
1076  ret <16 x i16> %res
1077}
1078declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
1079
1080
1081define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) {
1082; X86-LABEL: test_x86_avx2_phsub_d:
1083; X86:       ## %bb.0:
1084; X86-NEXT:    vphsubd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x06,0xc1]
1085; X86-NEXT:    retl ## encoding: [0xc3]
1086;
1087; X64-LABEL: test_x86_avx2_phsub_d:
1088; X64:       ## %bb.0:
1089; X64-NEXT:    vphsubd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x06,0xc1]
1090; X64-NEXT:    retq ## encoding: [0xc3]
1091  %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1092  ret <8 x i32> %res
1093}
1094declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
1095
1096
1097define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) {
1098; X86-LABEL: test_x86_avx2_phsub_sw:
1099; X86:       ## %bb.0:
1100; X86-NEXT:    vphsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x07,0xc1]
1101; X86-NEXT:    retl ## encoding: [0xc3]
1102;
1103; X64-LABEL: test_x86_avx2_phsub_sw:
1104; X64:       ## %bb.0:
1105; X64-NEXT:    vphsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x07,0xc1]
1106; X64-NEXT:    retq ## encoding: [0xc3]
1107  %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
1108  ret <16 x i16> %res
1109}
1110declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
1111
1112
1113define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) {
1114; X86-LABEL: test_x86_avx2_phsub_w:
1115; X86:       ## %bb.0:
1116; X86-NEXT:    vphsubw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x05,0xc1]
1117; X86-NEXT:    retl ## encoding: [0xc3]
1118;
1119; X64-LABEL: test_x86_avx2_phsub_w:
1120; X64:       ## %bb.0:
1121; X64-NEXT:    vphsubw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x05,0xc1]
1122; X64-NEXT:    retq ## encoding: [0xc3]
1123  %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
1124  ret <16 x i16> %res
1125}
1126declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
1127
1128
1129define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) {
1130; X86-AVX-LABEL: test_x86_avx2_pmadd_ub_sw:
1131; X86-AVX:       ## %bb.0:
1132; X86-AVX-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
1133; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1134;
1135; X86-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw:
1136; X86-AVX512VL:       ## %bb.0:
1137; X86-AVX512VL-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
1138; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1139;
1140; X64-AVX-LABEL: test_x86_avx2_pmadd_ub_sw:
1141; X64-AVX:       ## %bb.0:
1142; X64-AVX-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
1143; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1144;
1145; X64-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw:
1146; X64-AVX512VL:       ## %bb.0:
1147; X64-AVX512VL-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
1148; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1149  %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
1150  ret <16 x i16> %res
1151}
1152declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
1153
1154; Make sure we don't commute this operation.
1155define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(<32 x i8>* %ptr, <32 x i8> %a1) {
1156; X86-AVX-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
1157; X86-AVX:       ## %bb.0:
1158; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1159; X86-AVX-NEXT:    vmovdqa (%eax), %ymm1 ## encoding: [0xc5,0xfd,0x6f,0x08]
1160; X86-AVX-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x04,0xc0]
1161; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1162;
1163; X86-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
1164; X86-AVX512VL:       ## %bb.0:
1165; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1166; X86-AVX512VL-NEXT:    vmovdqa (%eax), %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08]
1167; X86-AVX512VL-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0]
1168; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1169;
1170; X64-AVX-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
1171; X64-AVX:       ## %bb.0:
1172; X64-AVX-NEXT:    vmovdqa (%rdi), %ymm1 ## encoding: [0xc5,0xfd,0x6f,0x0f]
1173; X64-AVX-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x04,0xc0]
1174; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1175;
1176; X64-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
1177; X64-AVX512VL:       ## %bb.0:
1178; X64-AVX512VL-NEXT:    vmovdqa (%rdi), %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x0f]
1179; X64-AVX512VL-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0]
1180; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1181  %a0 = load <32 x i8>, <32 x i8>* %ptr
1182  %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
1183  ret <16 x i16> %res
1184}
1185
1186define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) {
1187; X86-AVX-LABEL: test_x86_avx2_pmul_hr_sw:
1188; X86-AVX:       ## %bb.0:
1189; X86-AVX-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
1190; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1191;
1192; X86-AVX512VL-LABEL: test_x86_avx2_pmul_hr_sw:
1193; X86-AVX512VL:       ## %bb.0:
1194; X86-AVX512VL-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
1195; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1196;
1197; X64-AVX-LABEL: test_x86_avx2_pmul_hr_sw:
1198; X64-AVX:       ## %bb.0:
1199; X64-AVX-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
1200; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1201;
1202; X64-AVX512VL-LABEL: test_x86_avx2_pmul_hr_sw:
1203; X64-AVX512VL:       ## %bb.0:
1204; X64-AVX512VL-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
1205; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1206  %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
1207  ret <16 x i16> %res
1208}
1209declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
1210
1211
1212define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) {
1213; X86-AVX-LABEL: test_x86_avx2_pshuf_b:
1214; X86-AVX:       ## %bb.0:
1215; X86-AVX-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
1216; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1217;
1218; X86-AVX512VL-LABEL: test_x86_avx2_pshuf_b:
1219; X86-AVX512VL:       ## %bb.0:
1220; X86-AVX512VL-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
1221; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1222;
1223; X64-AVX-LABEL: test_x86_avx2_pshuf_b:
1224; X64-AVX:       ## %bb.0:
1225; X64-AVX-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
1226; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1227;
1228; X64-AVX512VL-LABEL: test_x86_avx2_pshuf_b:
1229; X64-AVX512VL:       ## %bb.0:
1230; X64-AVX512VL-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
1231; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1232  %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1]
1233  ret <32 x i8> %res
1234}
1235declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
1236
1237
1238define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) {
1239; X86-LABEL: test_x86_avx2_psign_b:
1240; X86:       ## %bb.0:
1241; X86-NEXT:    vpsignb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x08,0xc1]
1242; X86-NEXT:    retl ## encoding: [0xc3]
1243;
1244; X64-LABEL: test_x86_avx2_psign_b:
1245; X64:       ## %bb.0:
1246; X64-NEXT:    vpsignb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x08,0xc1]
1247; X64-NEXT:    retq ## encoding: [0xc3]
1248  %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
1249  ret <32 x i8> %res
1250}
1251declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
1252
1253
1254define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) {
1255; X86-LABEL: test_x86_avx2_psign_d:
1256; X86:       ## %bb.0:
1257; X86-NEXT:    vpsignd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0a,0xc1]
1258; X86-NEXT:    retl ## encoding: [0xc3]
1259;
1260; X64-LABEL: test_x86_avx2_psign_d:
1261; X64:       ## %bb.0:
1262; X64-NEXT:    vpsignd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0a,0xc1]
1263; X64-NEXT:    retq ## encoding: [0xc3]
1264  %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1]
1265  ret <8 x i32> %res
1266}
1267declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
1268
1269
1270define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) {
1271; X86-LABEL: test_x86_avx2_psign_w:
1272; X86:       ## %bb.0:
1273; X86-NEXT:    vpsignw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x09,0xc1]
1274; X86-NEXT:    retl ## encoding: [0xc3]
1275;
1276; X64-LABEL: test_x86_avx2_psign_w:
1277; X64:       ## %bb.0:
1278; X64-NEXT:    vpsignw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x09,0xc1]
1279; X64-NEXT:    retq ## encoding: [0xc3]
1280  %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
1281  ret <16 x i16> %res
1282}
1283declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
1284
1285
1286define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
1287; X86-LABEL: test_x86_avx2_mpsadbw:
1288; X86:       ## %bb.0:
1289; X86-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x42,0xc1,0x07]
1290; X86-NEXT:    retl ## encoding: [0xc3]
1291;
1292; X64-LABEL: test_x86_avx2_mpsadbw:
1293; X64:       ## %bb.0:
1294; X64-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x42,0xc1,0x07]
1295; X64-NEXT:    retq ## encoding: [0xc3]
1296  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
1297  ret <16 x i16> %res
1298}
1299declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
1300
1301
1302define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
1303; X86-AVX-LABEL: test_x86_avx2_packusdw:
1304; X86-AVX:       ## %bb.0:
1305; X86-AVX-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
1306; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1307;
1308; X86-AVX512VL-LABEL: test_x86_avx2_packusdw:
1309; X86-AVX512VL:       ## %bb.0:
1310; X86-AVX512VL-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
1311; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1312;
1313; X64-AVX-LABEL: test_x86_avx2_packusdw:
1314; X64-AVX:       ## %bb.0:
1315; X64-AVX-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
1316; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1317;
1318; X64-AVX512VL-LABEL: test_x86_avx2_packusdw:
1319; X64-AVX512VL:       ## %bb.0:
1320; X64-AVX512VL-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
1321; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1322  %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
1323  ret <16 x i16> %res
1324}
1325declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
1326
1327
1328define <16 x i16> @test_x86_avx2_packusdw_fold() {
1329; X86-AVX-LABEL: test_x86_avx2_packusdw_fold:
1330; X86-AVX:       ## %bb.0:
1331; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
1332; X86-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
1333; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI54_0, kind: FK_Data_4
1334; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1335;
1336; X86-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:
1337; X86-AVX512VL:       ## %bb.0:
1338; X86-AVX512VL-NEXT:    vmovaps LCPI54_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
1339; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
1340; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI54_0, kind: FK_Data_4
1341; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1342;
1343; X64-AVX-LABEL: test_x86_avx2_packusdw_fold:
1344; X64-AVX:       ## %bb.0:
1345; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
1346; X64-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
1347; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI54_0-4, kind: reloc_riprel_4byte
1348; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1349;
1350; X64-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:
1351; X64-AVX512VL:       ## %bb.0:
1352; X64-AVX512VL-NEXT:    vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
1353; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
1354; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI54_0-4, kind: reloc_riprel_4byte
1355; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1356  %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
1357  ret <16 x i16> %res
1358}
1359
1360
1361define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
1362; X86-LABEL: test_x86_avx2_pblendvb:
1363; X86:       ## %bb.0:
1364; X86-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x4c,0xc1,0x20]
1365; X86-NEXT:    retl ## encoding: [0xc3]
1366;
1367; X64-LABEL: test_x86_avx2_pblendvb:
1368; X64:       ## %bb.0:
1369; X64-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x4c,0xc1,0x20]
1370; X64-NEXT:    retq ## encoding: [0xc3]
1371  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1]
1372  ret <32 x i8> %res
1373}
1374declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
1375
1376
1377define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
1378; X86-LABEL: test_x86_avx2_pblendw:
1379; X86:       ## %bb.0:
1380; X86-NEXT:    vpblendw $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0e,0xc1,0x07]
1381; X86-NEXT:    ## ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
1382; X86-NEXT:    retl ## encoding: [0xc3]
1383;
1384; X64-LABEL: test_x86_avx2_pblendw:
1385; X64:       ## %bb.0:
1386; X64-NEXT:    vpblendw $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0e,0xc1,0x07]
1387; X64-NEXT:    ## ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
1388; X64-NEXT:    retq ## encoding: [0xc3]
1389  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1]
1390  ret <16 x i16> %res
1391}
1392declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
1393
1394
1395define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
1396; X86-AVX-LABEL: test_x86_avx2_pmaxsb:
1397; X86-AVX:       ## %bb.0:
1398; X86-AVX-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3c,0xc1]
1399; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1400;
1401; X86-AVX512VL-LABEL: test_x86_avx2_pmaxsb:
1402; X86-AVX512VL:       ## %bb.0:
1403; X86-AVX512VL-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xc1]
1404; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1405;
1406; X64-AVX-LABEL: test_x86_avx2_pmaxsb:
1407; X64-AVX:       ## %bb.0:
1408; X64-AVX-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3c,0xc1]
1409; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1410;
1411; X64-AVX512VL-LABEL: test_x86_avx2_pmaxsb:
1412; X64-AVX512VL:       ## %bb.0:
1413; X64-AVX512VL-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xc1]
1414; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1415  %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
1416  ret <32 x i8> %res
1417}
1418declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
1419
1420
1421define <8 x i32> @test_x86_avx2_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) {
1422; X86-AVX-LABEL: test_x86_avx2_pmaxsd:
1423; X86-AVX:       ## %bb.0:
1424; X86-AVX-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3d,0xc1]
1425; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1426;
1427; X86-AVX512VL-LABEL: test_x86_avx2_pmaxsd:
1428; X86-AVX512VL:       ## %bb.0:
1429; X86-AVX512VL-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xc1]
1430; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1431;
1432; X64-AVX-LABEL: test_x86_avx2_pmaxsd:
1433; X64-AVX:       ## %bb.0:
1434; X64-AVX-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3d,0xc1]
1435; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1436;
1437; X64-AVX512VL-LABEL: test_x86_avx2_pmaxsd:
1438; X64-AVX512VL:       ## %bb.0:
1439; X64-AVX512VL-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xc1]
1440; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1441  %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1442  ret <8 x i32> %res
1443}
1444declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
1445
1446
1447define <8 x i32> @test_x86_avx2_pmaxud(<8 x i32> %a0, <8 x i32> %a1) {
1448; X86-AVX-LABEL: test_x86_avx2_pmaxud:
1449; X86-AVX:       ## %bb.0:
1450; X86-AVX-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
1451; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1452;
1453; X86-AVX512VL-LABEL: test_x86_avx2_pmaxud:
1454; X86-AVX512VL:       ## %bb.0:
1455; X86-AVX512VL-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
1456; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1457;
1458; X64-AVX-LABEL: test_x86_avx2_pmaxud:
1459; X64-AVX:       ## %bb.0:
1460; X64-AVX-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
1461; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1462;
1463; X64-AVX512VL-LABEL: test_x86_avx2_pmaxud:
1464; X64-AVX512VL:       ## %bb.0:
1465; X64-AVX512VL-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
1466; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1467  %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1468  ret <8 x i32> %res
1469}
1470declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
1471
1472
1473define <16 x i16> @test_x86_avx2_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) {
1474; X86-AVX-LABEL: test_x86_avx2_pmaxuw:
1475; X86-AVX:       ## %bb.0:
1476; X86-AVX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3e,0xc1]
1477; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1478;
1479; X86-AVX512VL-LABEL: test_x86_avx2_pmaxuw:
1480; X86-AVX512VL:       ## %bb.0:
1481; X86-AVX512VL-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3e,0xc1]
1482; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1483;
1484; X64-AVX-LABEL: test_x86_avx2_pmaxuw:
1485; X64-AVX:       ## %bb.0:
1486; X64-AVX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3e,0xc1]
1487; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1488;
1489; X64-AVX512VL-LABEL: test_x86_avx2_pmaxuw:
1490; X64-AVX512VL:       ## %bb.0:
1491; X64-AVX512VL-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3e,0xc1]
1492; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1493  %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
1494  ret <16 x i16> %res
1495}
1496declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
1497
1498
1499define <32 x i8> @test_x86_avx2_pminsb(<32 x i8> %a0, <32 x i8> %a1) {
1500; X86-AVX-LABEL: test_x86_avx2_pminsb:
1501; X86-AVX:       ## %bb.0:
1502; X86-AVX-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x38,0xc1]
1503; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1504;
1505; X86-AVX512VL-LABEL: test_x86_avx2_pminsb:
1506; X86-AVX512VL:       ## %bb.0:
1507; X86-AVX512VL-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xc1]
1508; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1509;
1510; X64-AVX-LABEL: test_x86_avx2_pminsb:
1511; X64-AVX:       ## %bb.0:
1512; X64-AVX-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x38,0xc1]
1513; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1514;
1515; X64-AVX512VL-LABEL: test_x86_avx2_pminsb:
1516; X64-AVX512VL:       ## %bb.0:
1517; X64-AVX512VL-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xc1]
1518; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1519  %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
1520  ret <32 x i8> %res
1521}
1522declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
1523
1524
1525define <8 x i32> @test_x86_avx2_pminsd(<8 x i32> %a0, <8 x i32> %a1) {
1526; X86-AVX-LABEL: test_x86_avx2_pminsd:
1527; X86-AVX:       ## %bb.0:
1528; X86-AVX-NEXT:    vpminsd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x39,0xc1]
1529; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1530;
1531; X86-AVX512VL-LABEL: test_x86_avx2_pminsd:
1532; X86-AVX512VL:       ## %bb.0:
1533; X86-AVX512VL-NEXT:    vpminsd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xc1]
1534; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1535;
1536; X64-AVX-LABEL: test_x86_avx2_pminsd:
1537; X64-AVX:       ## %bb.0:
1538; X64-AVX-NEXT:    vpminsd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x39,0xc1]
1539; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1540;
1541; X64-AVX512VL-LABEL: test_x86_avx2_pminsd:
1542; X64-AVX512VL:       ## %bb.0:
1543; X64-AVX512VL-NEXT:    vpminsd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xc1]
1544; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1545  %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1546  ret <8 x i32> %res
1547}
1548declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
1549
1550
1551define <8 x i32> @test_x86_avx2_pminud(<8 x i32> %a0, <8 x i32> %a1) {
1552; X86-AVX-LABEL: test_x86_avx2_pminud:
1553; X86-AVX:       ## %bb.0:
1554; X86-AVX-NEXT:    vpminud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3b,0xc1]
1555; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1556;
1557; X86-AVX512VL-LABEL: test_x86_avx2_pminud:
1558; X86-AVX512VL:       ## %bb.0:
1559; X86-AVX512VL-NEXT:    vpminud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xc1]
1560; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1561;
1562; X64-AVX-LABEL: test_x86_avx2_pminud:
1563; X64-AVX:       ## %bb.0:
1564; X64-AVX-NEXT:    vpminud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3b,0xc1]
1565; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1566;
1567; X64-AVX512VL-LABEL: test_x86_avx2_pminud:
1568; X64-AVX512VL:       ## %bb.0:
1569; X64-AVX512VL-NEXT:    vpminud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xc1]
1570; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1571  %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1572  ret <8 x i32> %res
1573}
1574declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
1575
1576
1577define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
1578; X86-AVX-LABEL: test_x86_avx2_pminuw:
1579; X86-AVX:       ## %bb.0:
1580; X86-AVX-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3a,0xc1]
1581; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1582;
1583; X86-AVX512VL-LABEL: test_x86_avx2_pminuw:
1584; X86-AVX512VL:       ## %bb.0:
1585; X86-AVX512VL-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3a,0xc1]
1586; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1587;
1588; X64-AVX-LABEL: test_x86_avx2_pminuw:
1589; X64-AVX:       ## %bb.0:
1590; X64-AVX-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3a,0xc1]
1591; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1592;
1593; X64-AVX512VL-LABEL: test_x86_avx2_pminuw:
1594; X64-AVX512VL:       ## %bb.0:
1595; X64-AVX512VL-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3a,0xc1]
1596; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1597  %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
1598  ret <16 x i16> %res
1599}
1600declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
1601
1602
1603define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
1604; X86-LABEL: test_x86_avx2_pblendd_128:
1605; X86:       ## %bb.0:
1606; X86-NEXT:    vblendps $8, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08]
1607; X86-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3]
1608; X86-NEXT:    retl ## encoding: [0xc3]
1609;
1610; X64-LABEL: test_x86_avx2_pblendd_128:
1611; X64:       ## %bb.0:
1612; X64-NEXT:    vblendps $8, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08]
1613; X64-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3]
1614; X64-NEXT:    retq ## encoding: [0xc3]
1615  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1]
1616  ret <4 x i32> %res
1617}
1618declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
1619
1620
1621define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
1622; X86-LABEL: test_x86_avx2_pblendd_256:
1623; X86:       ## %bb.0:
1624; X86-NEXT:    vblendps $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07]
1625; X86-NEXT:    ## ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
1626; X86-NEXT:    retl ## encoding: [0xc3]
1627;
1628; X64-LABEL: test_x86_avx2_pblendd_256:
1629; X64:       ## %bb.0:
1630; X64-NEXT:    vblendps $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07]
1631; X64-NEXT:    ## ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
1632; X64-NEXT:    retq ## encoding: [0xc3]
1633  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
1634  ret <8 x i32> %res
1635}
1636declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
1637
1638
1639; Check that the arguments are swapped between the intrinsic definition
1640; and its lowering. Indeed, the offsets are the first source in
1641; the instruction.
1642define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
1643; X86-AVX-LABEL: test_x86_avx2_permd:
1644; X86-AVX:       ## %bb.0:
1645; X86-AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x16,0xc0]
1646; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1647;
1648; X86-AVX512VL-LABEL: test_x86_avx2_permd:
1649; X86-AVX512VL:       ## %bb.0:
1650; X86-AVX512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
1651; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1652;
1653; X64-AVX-LABEL: test_x86_avx2_permd:
1654; X64-AVX:       ## %bb.0:
1655; X64-AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x16,0xc0]
1656; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1657;
1658; X64-AVX512VL-LABEL: test_x86_avx2_permd:
1659; X64-AVX512VL:       ## %bb.0:
1660; X64-AVX512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
1661; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1662  %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1663  ret <8 x i32> %res
1664}
1665declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
1666
1667
1668; Check that the arguments are swapped between the intrinsic definition
1669; and its lowering. Indeed, the offsets are the first source in
1670; the instruction.
1671define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) {
1672; X86-AVX-LABEL: test_x86_avx2_permps:
1673; X86-AVX:       ## %bb.0:
1674; X86-AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x16,0xc0]
1675; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1676;
1677; X86-AVX512VL-LABEL: test_x86_avx2_permps:
1678; X86-AVX512VL:       ## %bb.0:
1679; X86-AVX512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
1680; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1681;
1682; X64-AVX-LABEL: test_x86_avx2_permps:
1683; X64-AVX:       ## %bb.0:
1684; X64-AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x16,0xc0]
1685; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1686;
1687; X64-AVX512VL-LABEL: test_x86_avx2_permps:
1688; X64-AVX512VL:       ## %bb.0:
1689; X64-AVX512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
1690; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1691  %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
1692  ret <8 x float> %res
1693}
1694declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
1695
1696
1697define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) {
1698; X86-LABEL: test_x86_avx2_maskload_q:
1699; X86:       ## %bb.0:
1700; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1701; X86-NEXT:    vpmaskmovq (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x8c,0x00]
1702; X86-NEXT:    retl ## encoding: [0xc3]
1703;
1704; X64-LABEL: test_x86_avx2_maskload_q:
1705; X64:       ## %bb.0:
1706; X64-NEXT:    vpmaskmovq (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x8c,0x07]
1707; X64-NEXT:    retq ## encoding: [0xc3]
1708  %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1709  ret <2 x i64> %res
1710}
1711declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
1712
1713
1714define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) {
1715; X86-LABEL: test_x86_avx2_maskload_q_256:
1716; X86:       ## %bb.0:
1717; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1718; X86-NEXT:    vpmaskmovq (%eax), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x8c,0x00]
1719; X86-NEXT:    retl ## encoding: [0xc3]
1720;
1721; X64-LABEL: test_x86_avx2_maskload_q_256:
1722; X64:       ## %bb.0:
1723; X64-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x8c,0x07]
1724; X64-NEXT:    retq ## encoding: [0xc3]
1725  %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
1726  ret <4 x i64> %res
1727}
1728declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
1729
1730
1731define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) {
1732; X86-LABEL: test_x86_avx2_maskload_d:
1733; X86:       ## %bb.0:
1734; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1735; X86-NEXT:    vpmaskmovd (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x8c,0x00]
1736; X86-NEXT:    retl ## encoding: [0xc3]
1737;
1738; X64-LABEL: test_x86_avx2_maskload_d:
1739; X64:       ## %bb.0:
1740; X64-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x8c,0x07]
1741; X64-NEXT:    retq ## encoding: [0xc3]
1742  %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1743  ret <4 x i32> %res
1744}
1745declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
1746
1747
1748define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) {
1749; X86-LABEL: test_x86_avx2_maskload_d_256:
1750; X86:       ## %bb.0:
1751; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1752; X86-NEXT:    vpmaskmovd (%eax), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x8c,0x00]
1753; X86-NEXT:    retl ## encoding: [0xc3]
1754;
1755; X64-LABEL: test_x86_avx2_maskload_d_256:
1756; X64:       ## %bb.0:
1757; X64-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x8c,0x07]
1758; X64-NEXT:    retq ## encoding: [0xc3]
1759  %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1760  ret <8 x i32> %res
1761}
1762declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
1763
1764
1765define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
1766; X86-LABEL: test_x86_avx2_maskstore_q:
1767; X86:       ## %bb.0:
1768; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1769; X86-NEXT:    vpmaskmovq %xmm1, %xmm0, (%eax) ## encoding: [0xc4,0xe2,0xf9,0x8e,0x08]
1770; X86-NEXT:    retl ## encoding: [0xc3]
1771;
1772; X64-LABEL: test_x86_avx2_maskstore_q:
1773; X64:       ## %bb.0:
1774; X64-NEXT:    vpmaskmovq %xmm1, %xmm0, (%rdi) ## encoding: [0xc4,0xe2,0xf9,0x8e,0x0f]
1775; X64-NEXT:    retq ## encoding: [0xc3]
1776  call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
1777  ret void
1778}
1779declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
1780
1781
1782define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
1783; X86-LABEL: test_x86_avx2_maskstore_q_256:
1784; X86:       ## %bb.0:
1785; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1786; X86-NEXT:    vpmaskmovq %ymm1, %ymm0, (%eax) ## encoding: [0xc4,0xe2,0xfd,0x8e,0x08]
1787; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1788; X86-NEXT:    retl ## encoding: [0xc3]
1789;
1790; X64-LABEL: test_x86_avx2_maskstore_q_256:
1791; X64:       ## %bb.0:
1792; X64-NEXT:    vpmaskmovq %ymm1, %ymm0, (%rdi) ## encoding: [0xc4,0xe2,0xfd,0x8e,0x0f]
1793; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1794; X64-NEXT:    retq ## encoding: [0xc3]
1795  call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
1796  ret void
1797}
1798declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
1799
1800
1801define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
1802; X86-LABEL: test_x86_avx2_maskstore_d:
1803; X86:       ## %bb.0:
1804; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1805; X86-NEXT:    vpmaskmovd %xmm1, %xmm0, (%eax) ## encoding: [0xc4,0xe2,0x79,0x8e,0x08]
1806; X86-NEXT:    retl ## encoding: [0xc3]
1807;
1808; X64-LABEL: test_x86_avx2_maskstore_d:
1809; X64:       ## %bb.0:
1810; X64-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi) ## encoding: [0xc4,0xe2,0x79,0x8e,0x0f]
1811; X64-NEXT:    retq ## encoding: [0xc3]
1812  call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
1813  ret void
1814}
1815declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
1816
1817
1818define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
1819; X86-LABEL: test_x86_avx2_maskstore_d_256:
1820; X86:       ## %bb.0:
1821; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1822; X86-NEXT:    vpmaskmovd %ymm1, %ymm0, (%eax) ## encoding: [0xc4,0xe2,0x7d,0x8e,0x08]
1823; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1824; X86-NEXT:    retl ## encoding: [0xc3]
1825;
1826; X64-LABEL: test_x86_avx2_maskstore_d_256:
1827; X64:       ## %bb.0:
1828; X64-NEXT:    vpmaskmovd %ymm1, %ymm0, (%rdi) ## encoding: [0xc4,0xe2,0x7d,0x8e,0x0f]
1829; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1830; X64-NEXT:    retq ## encoding: [0xc3]
1831  call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
1832  ret void
1833}
1834declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
1835
1836
1837define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) {
1838; X86-AVX-LABEL: test_x86_avx2_psllv_d:
1839; X86-AVX:       ## %bb.0:
1840; X86-AVX-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x47,0xc1]
1841; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1842;
1843; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d:
1844; X86-AVX512VL:       ## %bb.0:
1845; X86-AVX512VL-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xc1]
1846; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1847;
1848; X64-AVX-LABEL: test_x86_avx2_psllv_d:
1849; X64-AVX:       ## %bb.0:
1850; X64-AVX-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x47,0xc1]
1851; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1852;
1853; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d:
1854; X64-AVX512VL:       ## %bb.0:
1855; X64-AVX512VL-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xc1]
1856; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1857  %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1858  ret <4 x i32> %res
1859}
1860declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
1861
1862
1863define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
1864; X86-AVX-LABEL: test_x86_avx2_psllv_d_256:
1865; X86-AVX:       ## %bb.0:
1866; X86-AVX-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x47,0xc1]
1867; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1868;
1869; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_256:
1870; X86-AVX512VL:       ## %bb.0:
1871; X86-AVX512VL-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xc1]
1872; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1873;
1874; X64-AVX-LABEL: test_x86_avx2_psllv_d_256:
1875; X64-AVX:       ## %bb.0:
1876; X64-AVX-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x47,0xc1]
1877; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1878;
1879; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_256:
1880; X64-AVX512VL:       ## %bb.0:
1881; X64-AVX512VL-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xc1]
1882; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1883  %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1884  ret <8 x i32> %res
1885}
1886declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1887
1888
1889define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) {
1890; X86-AVX-LABEL: test_x86_avx2_psllv_q:
1891; X86-AVX:       ## %bb.0:
1892; X86-AVX-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x47,0xc1]
1893; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1894;
1895; X86-AVX512VL-LABEL: test_x86_avx2_psllv_q:
1896; X86-AVX512VL:       ## %bb.0:
1897; X86-AVX512VL-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xc1]
1898; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1899;
1900; X64-AVX-LABEL: test_x86_avx2_psllv_q:
1901; X64-AVX:       ## %bb.0:
1902; X64-AVX-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x47,0xc1]
1903; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1904;
1905; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q:
1906; X64-AVX512VL:       ## %bb.0:
1907; X64-AVX512VL-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xc1]
1908; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1909  %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1910  ret <2 x i64> %res
1911}
1912declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
1913
1914
1915define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
1916; X86-AVX-LABEL: test_x86_avx2_psllv_q_256:
1917; X86-AVX:       ## %bb.0:
1918; X86-AVX-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x47,0xc1]
1919; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1920;
1921; X86-AVX512VL-LABEL: test_x86_avx2_psllv_q_256:
1922; X86-AVX512VL:       ## %bb.0:
1923; X86-AVX512VL-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xc1]
1924; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1925;
1926; X64-AVX-LABEL: test_x86_avx2_psllv_q_256:
1927; X64-AVX:       ## %bb.0:
1928; X64-AVX-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x47,0xc1]
1929; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1930;
1931; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_256:
1932; X64-AVX512VL:       ## %bb.0:
1933; X64-AVX512VL-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xc1]
1934; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1935  %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
1936  ret <4 x i64> %res
1937}
1938declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
1939
1940
1941define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) {
1942; X86-AVX-LABEL: test_x86_avx2_psrlv_d:
1943; X86-AVX:       ## %bb.0:
1944; X86-AVX-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x45,0xc1]
1945; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1946;
1947; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d:
1948; X86-AVX512VL:       ## %bb.0:
1949; X86-AVX512VL-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xc1]
1950; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1951;
1952; X64-AVX-LABEL: test_x86_avx2_psrlv_d:
1953; X64-AVX:       ## %bb.0:
1954; X64-AVX-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x45,0xc1]
1955; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1956;
1957; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d:
1958; X64-AVX512VL:       ## %bb.0:
1959; X64-AVX512VL-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xc1]
1960; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1961  %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1962  ret <4 x i32> %res
1963}
1964declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
1965
1966
1967define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
1968; X86-AVX-LABEL: test_x86_avx2_psrlv_d_256:
1969; X86-AVX:       ## %bb.0:
1970; X86-AVX-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x45,0xc1]
1971; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1972;
1973; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256:
1974; X86-AVX512VL:       ## %bb.0:
1975; X86-AVX512VL-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xc1]
1976; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
1977;
1978; X64-AVX-LABEL: test_x86_avx2_psrlv_d_256:
1979; X64-AVX:       ## %bb.0:
1980; X64-AVX-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x45,0xc1]
1981; X64-AVX-NEXT:    retq ## encoding: [0xc3]
1982;
1983; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256:
1984; X64-AVX512VL:       ## %bb.0:
1985; X64-AVX512VL-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xc1]
1986; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
1987  %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1988  ret <8 x i32> %res
1989}
1990declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1991
1992
1993define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) {
1994; X86-AVX-LABEL: test_x86_avx2_psrlv_q:
1995; X86-AVX:       ## %bb.0:
1996; X86-AVX-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x45,0xc1]
1997; X86-AVX-NEXT:    retl ## encoding: [0xc3]
1998;
1999; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_q:
2000; X86-AVX512VL:       ## %bb.0:
2001; X86-AVX512VL-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xc1]
2002; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
2003;
2004; X64-AVX-LABEL: test_x86_avx2_psrlv_q:
2005; X64-AVX:       ## %bb.0:
2006; X64-AVX-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x45,0xc1]
2007; X64-AVX-NEXT:    retq ## encoding: [0xc3]
2008;
2009; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q:
2010; X64-AVX512VL:       ## %bb.0:
2011; X64-AVX512VL-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xc1]
2012; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
2013  %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
2014  ret <2 x i64> %res
2015}
2016declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
2017
2018
2019define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
2020; X86-AVX-LABEL: test_x86_avx2_psrlv_q_256:
2021; X86-AVX:       ## %bb.0:
2022; X86-AVX-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x45,0xc1]
2023; X86-AVX-NEXT:    retl ## encoding: [0xc3]
2024;
2025; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256:
2026; X86-AVX512VL:       ## %bb.0:
2027; X86-AVX512VL-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xc1]
2028; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
2029;
2030; X64-AVX-LABEL: test_x86_avx2_psrlv_q_256:
2031; X64-AVX:       ## %bb.0:
2032; X64-AVX-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x45,0xc1]
2033; X64-AVX-NEXT:    retq ## encoding: [0xc3]
2034;
2035; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256:
2036; X64-AVX512VL:       ## %bb.0:
2037; X64-AVX512VL-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xc1]
2038; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
2039  %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
2040  ret <4 x i64> %res
2041}
2042declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
2043
2044
2045define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) {
2046; X86-AVX-LABEL: test_x86_avx2_psrav_d:
2047; X86-AVX:       ## %bb.0:
2048; X86-AVX-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0xc1]
2049; X86-AVX-NEXT:    retl ## encoding: [0xc3]
2050;
2051; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d:
2052; X86-AVX512VL:       ## %bb.0:
2053; X86-AVX512VL-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xc1]
2054; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
2055;
2056; X64-AVX-LABEL: test_x86_avx2_psrav_d:
2057; X64-AVX:       ## %bb.0:
2058; X64-AVX-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0xc1]
2059; X64-AVX-NEXT:    retq ## encoding: [0xc3]
2060;
2061; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d:
2062; X64-AVX512VL:       ## %bb.0:
2063; X64-AVX512VL-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xc1]
2064; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
2065  %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2066  ret <4 x i32> %res
2067}
2068
2069define <4 x i32> @test_x86_avx2_psrav_d_const(<4 x i32> %a0, <4 x i32> %a1) {
2070; X86-AVX-LABEL: test_x86_avx2_psrav_d_const:
2071; X86-AVX:       ## %bb.0:
2072; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]
2073; X86-AVX-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2074; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4
2075; X86-AVX-NEXT:    vpsravd LCPI86_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
2076; X86-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4
2077; X86-AVX-NEXT:    retl ## encoding: [0xc3]
2078;
2079; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:
2080; X86-AVX512VL:       ## %bb.0:
2081; X86-AVX512VL-NEXT:    vmovdqa LCPI86_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]
2082; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2083; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4
2084; X86-AVX512VL-NEXT:    vpsravd LCPI86_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
2085; X86-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4
2086; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
2087;
2088; X64-AVX-LABEL: test_x86_avx2_psrav_d_const:
2089; X64-AVX:       ## %bb.0:
2090; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]
2091; X64-AVX-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2092; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI86_0-4, kind: reloc_riprel_4byte
2093; X64-AVX-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
2094; X64-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI86_1-4, kind: reloc_riprel_4byte
2095; X64-AVX-NEXT:    retq ## encoding: [0xc3]
2096;
2097; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:
2098; X64-AVX512VL:       ## %bb.0:
2099; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]
2100; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2101; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI86_0-4, kind: reloc_riprel_4byte
2102; X64-AVX512VL-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
2103; X64-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI86_1-4, kind: reloc_riprel_4byte
2104; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
2105  %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
2106  ret <4 x i32> %res
2107}
2108declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
2109
2110define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
2111; X86-AVX-LABEL: test_x86_avx2_psrav_d_256:
2112; X86-AVX:       ## %bb.0:
2113; X86-AVX-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0xc1]
2114; X86-AVX-NEXT:    retl ## encoding: [0xc3]
2115;
2116; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256:
2117; X86-AVX512VL:       ## %bb.0:
2118; X86-AVX512VL-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xc1]
2119; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
2120;
2121; X64-AVX-LABEL: test_x86_avx2_psrav_d_256:
2122; X64-AVX:       ## %bb.0:
2123; X64-AVX-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0xc1]
2124; X64-AVX-NEXT:    retq ## encoding: [0xc3]
2125;
2126; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256:
2127; X64-AVX512VL:       ## %bb.0:
2128; X64-AVX512VL-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xc1]
2129; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
2130  %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
2131  ret <8 x i32> %res
2132}
2133
2134define <8 x i32> @test_x86_avx2_psrav_d_256_const(<8 x i32> %a0, <8 x i32> %a1) {
2135; X86-AVX-LABEL: test_x86_avx2_psrav_d_256_const:
2136; X86-AVX:       ## %bb.0:
2137; X86-AVX-NEXT:    vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
2138; X86-AVX-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2139; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4
2140; X86-AVX-NEXT:    vpsravd LCPI88_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
2141; X86-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4
2142; X86-AVX-NEXT:    retl ## encoding: [0xc3]
2143;
2144; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:
2145; X86-AVX512VL:       ## %bb.0:
2146; X86-AVX512VL-NEXT:    vmovdqa LCPI88_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
2147; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2148; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4
2149; X86-AVX512VL-NEXT:    vpsravd LCPI88_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
2150; X86-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4
2151; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
2152;
2153; X64-AVX-LABEL: test_x86_avx2_psrav_d_256_const:
2154; X64-AVX:       ## %bb.0:
2155; X64-AVX-NEXT:    vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
2156; X64-AVX-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2157; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI88_0-4, kind: reloc_riprel_4byte
2158; X64-AVX-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
2159; X64-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI88_1-4, kind: reloc_riprel_4byte
2160; X64-AVX-NEXT:    retq ## encoding: [0xc3]
2161;
2162; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:
2163; X64-AVX512VL:       ## %bb.0:
2164; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
2165; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2166; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI88_0-4, kind: reloc_riprel_4byte
2167; X64-AVX512VL-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
2168; X64-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI88_1-4, kind: reloc_riprel_4byte
2169; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
2170  %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
2171  ret <8 x i32> %res
2172}
2173declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
2174
2175define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, <4 x i32> %idx, <2 x double> %mask) {
2176; X86-LABEL: test_x86_avx2_gather_d_pd:
2177; X86:       ## %bb.0:
2178; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2179; X86-NEXT:    vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x48]
2180; X86-NEXT:    retl ## encoding: [0xc3]
2181;
2182; X64-LABEL: test_x86_avx2_gather_d_pd:
2183; X64:       ## %bb.0:
2184; X64-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f]
2185; X64-NEXT:    retq ## encoding: [0xc3]
2186  %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0,
2187                            i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ;
2188  ret <2 x double> %res
2189}
2190declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*,
2191                      <4 x i32>, <2 x double>, i8) nounwind readonly
2192
2193define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1, <4 x i32> %idx, <4 x double> %mask) {
2194; X86-LABEL: test_x86_avx2_gather_d_pd_256:
2195; X86:       ## %bb.0:
2196; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2197; X86-NEXT:    vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x92,0x04,0x48]
2198; X86-NEXT:    retl ## encoding: [0xc3]
2199;
2200; X64-LABEL: test_x86_avx2_gather_d_pd_256:
2201; X64:       ## %bb.0:
2202; X64-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x92,0x04,0x4f]
2203; X64-NEXT:    retq ## encoding: [0xc3]
2204  %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0,
2205                            i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ;
2206  ret <4 x double> %res
2207}
2208declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
2209                      <4 x i32>, <4 x double>, i8) nounwind readonly
2210
2211define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1, <2 x i64> %idx, <2 x double> %mask) {
2212; X86-LABEL: test_x86_avx2_gather_q_pd:
2213; X86:       ## %bb.0:
2214; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2215; X86-NEXT:    vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x93,0x04,0x48]
2216; X86-NEXT:    retl ## encoding: [0xc3]
2217;
2218; X64-LABEL: test_x86_avx2_gather_q_pd:
2219; X64:       ## %bb.0:
2220; X64-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x93,0x04,0x4f]
2221; X64-NEXT:    retq ## encoding: [0xc3]
2222  %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0,
2223                            i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ;
2224  ret <2 x double> %res
2225}
2226declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*,
2227                      <2 x i64>, <2 x double>, i8) nounwind readonly
2228
2229define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1, <4 x i64> %idx, <4 x double> %mask) {
2230; X86-LABEL: test_x86_avx2_gather_q_pd_256:
2231; X86:       ## %bb.0:
2232; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2233; X86-NEXT:    vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x93,0x04,0x48]
2234; X86-NEXT:    retl ## encoding: [0xc3]
2235;
2236; X64-LABEL: test_x86_avx2_gather_q_pd_256:
2237; X64:       ## %bb.0:
2238; X64-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x93,0x04,0x4f]
2239; X64-NEXT:    retq ## encoding: [0xc3]
2240  %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0,
2241                            i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ;
2242  ret <4 x double> %res
2243}
2244declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*,
2245                      <4 x i64>, <4 x double>, i8) nounwind readonly
2246
2247define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1, <4 x i32> %idx, <4 x float> %mask) {
2248; X86-LABEL: test_x86_avx2_gather_d_ps:
2249; X86:       ## %bb.0:
2250; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2251; X86-NEXT:    vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x92,0x04,0x48]
2252; X86-NEXT:    retl ## encoding: [0xc3]
2253;
2254; X64-LABEL: test_x86_avx2_gather_d_ps:
2255; X64:       ## %bb.0:
2256; X64-NEXT:    vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x92,0x04,0x4f]
2257; X64-NEXT:    retq ## encoding: [0xc3]
2258  %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0,
2259                            i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
2260  ret <4 x float> %res
2261}
2262declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
2263                      <4 x i32>, <4 x float>, i8) nounwind readonly
2264
2265define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1, <8 x i32> %idx, <8 x float> %mask) {
2266; X86-LABEL: test_x86_avx2_gather_d_ps_256:
2267; X86:       ## %bb.0:
2268; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2269; X86-NEXT:    vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0x6d,0x92,0x04,0x48]
2270; X86-NEXT:    retl ## encoding: [0xc3]
2271;
2272; X64-LABEL: test_x86_avx2_gather_d_ps_256:
2273; X64:       ## %bb.0:
2274; X64-NEXT:    vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0x6d,0x92,0x04,0x4f]
2275; X64-NEXT:    retq ## encoding: [0xc3]
2276  %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
2277                            i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ;
2278  ret <8 x float> %res
2279}
2280declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*,
2281                      <8 x i32>, <8 x float>, i8) nounwind readonly
2282
2283define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1, <2 x i64> %idx, <4 x float> %mask) {
2284; X86-LABEL: test_x86_avx2_gather_q_ps:
2285; X86:       ## %bb.0:
2286; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2287; X86-NEXT:    vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x93,0x04,0x48]
2288; X86-NEXT:    retl ## encoding: [0xc3]
2289;
2290; X64-LABEL: test_x86_avx2_gather_q_ps:
2291; X64:       ## %bb.0:
2292; X64-NEXT:    vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x93,0x04,0x4f]
2293; X64-NEXT:    retq ## encoding: [0xc3]
2294  %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0,
2295                            i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ;
2296  ret <4 x float> %res
2297}
2298declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*,
2299                      <2 x i64>, <4 x float>, i8) nounwind readonly
2300
2301define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1, <4 x i64> %idx, <4 x float> %mask) {
2302; X86-LABEL: test_x86_avx2_gather_q_ps_256:
2303; X86:       ## %bb.0:
2304; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2305; X86-NEXT:    vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x6d,0x93,0x04,0x48]
2306; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
2307; X86-NEXT:    retl ## encoding: [0xc3]
2308;
2309; X64-LABEL: test_x86_avx2_gather_q_ps_256:
2310; X64:       ## %bb.0:
2311; X64-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x6d,0x93,0x04,0x4f]
2312; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
2313; X64-NEXT:    retq ## encoding: [0xc3]
2314  %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0,
2315                            i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ;
2316  ret <4 x float> %res
2317}
2318declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*,
2319                      <4 x i64>, <4 x float>, i8) nounwind readonly
2320
2321define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1, <4 x i32> %idx, <2 x i64> %mask) {
2322; X86-LABEL: test_x86_avx2_gather_d_q:
2323; X86:       ## %bb.0:
2324; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2325; X86-NEXT:    vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x48]
2326; X86-NEXT:    retl ## encoding: [0xc3]
2327;
2328; X64-LABEL: test_x86_avx2_gather_d_q:
2329; X64:       ## %bb.0:
2330; X64-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x4f]
2331; X64-NEXT:    retq ## encoding: [0xc3]
2332  %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0,
2333                            i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ;
2334  ret <2 x i64> %res
2335}
2336declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*,
2337                      <4 x i32>, <2 x i64>, i8) nounwind readonly
2338
2339define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1, <4 x i32> %idx, <4 x i64> %mask) {
2340; X86-LABEL: test_x86_avx2_gather_d_q_256:
2341; X86:       ## %bb.0:
2342; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2343; X86-NEXT:    vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x90,0x04,0x48]
2344; X86-NEXT:    retl ## encoding: [0xc3]
2345;
2346; X64-LABEL: test_x86_avx2_gather_d_q_256:
2347; X64:       ## %bb.0:
2348; X64-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x90,0x04,0x4f]
2349; X64-NEXT:    retq ## encoding: [0xc3]
2350  %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0,
2351                            i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ;
2352  ret <4 x i64> %res
2353}
2354declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*,
2355                      <4 x i32>, <4 x i64>, i8) nounwind readonly
2356
2357define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1, <2 x i64> %idx, <2 x i64> %mask) {
2358; X86-LABEL: test_x86_avx2_gather_q_q:
2359; X86:       ## %bb.0:
2360; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2361; X86-NEXT:    vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x48]
2362; X86-NEXT:    retl ## encoding: [0xc3]
2363;
2364; X64-LABEL: test_x86_avx2_gather_q_q:
2365; X64:       ## %bb.0:
2366; X64-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x4f]
2367; X64-NEXT:    retq ## encoding: [0xc3]
2368  %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
2369                            i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ;
2370  ret <2 x i64> %res
2371}
2372declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*,
2373                      <2 x i64>, <2 x i64>, i8) nounwind readonly
2374
2375define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1, <4 x i64> %idx, <4 x i64> %mask) {
2376; X86-LABEL: test_x86_avx2_gather_q_q_256:
2377; X86:       ## %bb.0:
2378; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2379; X86-NEXT:    vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x91,0x04,0x48]
2380; X86-NEXT:    retl ## encoding: [0xc3]
2381;
2382; X64-LABEL: test_x86_avx2_gather_q_q_256:
2383; X64:       ## %bb.0:
2384; X64-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x91,0x04,0x4f]
2385; X64-NEXT:    retq ## encoding: [0xc3]
2386  %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0,
2387                            i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ;
2388  ret <4 x i64> %res
2389}
2390declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*,
2391                      <4 x i64>, <4 x i64>, i8) nounwind readonly
2392
2393define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1, <4 x i32> %idx, <4 x i32> %mask) {
2394; X86-LABEL: test_x86_avx2_gather_d_d:
2395; X86:       ## %bb.0:
2396; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2397; X86-NEXT:    vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x90,0x04,0x48]
2398; X86-NEXT:    retl ## encoding: [0xc3]
2399;
2400; X64-LABEL: test_x86_avx2_gather_d_d:
2401; X64:       ## %bb.0:
2402; X64-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x90,0x04,0x4f]
2403; X64-NEXT:    retq ## encoding: [0xc3]
2404  %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0,
2405                            i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ;
2406  ret <4 x i32> %res
2407}
2408declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*,
2409                      <4 x i32>, <4 x i32>, i8) nounwind readonly
2410
2411define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1, <8 x i32> %idx, <8 x i32> %mask) {
2412; X86-LABEL: test_x86_avx2_gather_d_d_256:
2413; X86:       ## %bb.0:
2414; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2415; X86-NEXT:    vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0x6d,0x90,0x04,0x48]
2416; X86-NEXT:    retl ## encoding: [0xc3]
2417;
2418; X64-LABEL: test_x86_avx2_gather_d_d_256:
2419; X64:       ## %bb.0:
2420; X64-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0x6d,0x90,0x04,0x4f]
2421; X64-NEXT:    retq ## encoding: [0xc3]
2422  %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0,
2423                            i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ;
2424  ret <8 x i32> %res
2425}
2426declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*,
2427                      <8 x i32>, <8 x i32>, i8) nounwind readonly
2428
2429define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1, <2 x i64> %idx, <4 x i32> %mask) {
2430; X86-LABEL: test_x86_avx2_gather_q_d:
2431; X86:       ## %bb.0:
2432; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2433; X86-NEXT:    vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x91,0x04,0x48]
2434; X86-NEXT:    retl ## encoding: [0xc3]
2435;
2436; X64-LABEL: test_x86_avx2_gather_q_d:
2437; X64:       ## %bb.0:
2438; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x91,0x04,0x4f]
2439; X64-NEXT:    retq ## encoding: [0xc3]
2440  %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0,
2441                            i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ;
2442  ret <4 x i32> %res
2443}
2444declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*,
2445                      <2 x i64>, <4 x i32>, i8) nounwind readonly
2446
2447define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1, <4 x i64> %idx, <4 x i32> %mask) {
2448; X86-LABEL: test_x86_avx2_gather_q_d_256:
2449; X86:       ## %bb.0:
2450; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2451; X86-NEXT:    vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x6d,0x91,0x04,0x48]
2452; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
2453; X86-NEXT:    retl ## encoding: [0xc3]
2454;
2455; X64-LABEL: test_x86_avx2_gather_q_d_256:
2456; X64:       ## %bb.0:
2457; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x6d,0x91,0x04,0x4f]
2458; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
2459; X64-NEXT:    retq ## encoding: [0xc3]
2460  %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0,
2461                            i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ;
2462  ret <4 x i32> %res
2463}
2464declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*,
2465                      <4 x i64>, <4 x i32>, i8) nounwind readonly
2466
2467; PR13298
2468define <8 x float>  @test_gather_mask(<8 x float> %a0, float* %a, <8 x i32> %idx, <8 x float> %mask, float* nocapture %out) {
2469;; gather with mask
2470; X86-AVX-LABEL: test_gather_mask:
2471; X86-AVX:       ## %bb.0:
2472; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
2473; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
2474; X86-AVX-NEXT:    vmovaps %ymm2, %ymm3 ## encoding: [0xc5,0xfc,0x28,0xda]
2475; X86-AVX-NEXT:    vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89]
2476; X86-AVX-NEXT:    vmovups %ymm2, (%eax) ## encoding: [0xc5,0xfc,0x11,0x10]
2477; X86-AVX-NEXT:    retl ## encoding: [0xc3]
2478;
2479; X86-AVX512VL-LABEL: test_gather_mask:
2480; X86-AVX512VL:       ## %bb.0:
2481; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
2482; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
2483; X86-AVX512VL-NEXT:    vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda]
2484; X86-AVX512VL-NEXT:    vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89]
2485; X86-AVX512VL-NEXT:    vmovups %ymm2, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10]
2486; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
2487;
2488; X64-AVX-LABEL: test_gather_mask:
2489; X64-AVX:       ## %bb.0:
2490; X64-AVX-NEXT:    vmovaps %ymm2, %ymm3 ## encoding: [0xc5,0xfc,0x28,0xda]
2491; X64-AVX-NEXT:    vgatherdps %ymm3, (%rdi,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x8f]
2492; X64-AVX-NEXT:    vmovups %ymm2, (%rsi) ## encoding: [0xc5,0xfc,0x11,0x16]
2493; X64-AVX-NEXT:    retq ## encoding: [0xc3]
2494;
2495; X64-AVX512VL-LABEL: test_gather_mask:
2496; X64-AVX512VL:       ## %bb.0:
2497; X64-AVX512VL-NEXT:    vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda]
2498; X64-AVX512VL-NEXT:    vgatherdps %ymm3, (%rdi,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x8f]
2499; X64-AVX512VL-NEXT:    vmovups %ymm2, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x16]
2500; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
2501  %a_i8 = bitcast float* %a to i8*
2502  %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
2503                           i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
2504
2505;; for debugging, we'll just dump out the mask
2506  %out_ptr = bitcast float * %out to <8 x float> *
2507  store <8 x float> %mask, <8 x float> * %out_ptr, align 4
2508
2509  ret <8 x float> %res
2510}
2511