• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
4; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
5; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
7; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
8
9; This test works just like the non-upgrade one except that it only checks
10; forms which require auto-upgrading.
11
12define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
13; SSE-LABEL: test_x86_sse41_blendpd:
14; SSE:       ## %bb.0:
15; SSE-NEXT:    blendps $12, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x0c]
16; SSE-NEXT:    ## xmm0 = xmm0[0,1],xmm1[2,3]
17; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
18;
19; AVX-LABEL: test_x86_sse41_blendpd:
20; AVX:       ## %bb.0:
21; AVX-NEXT:    vblendps $3, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x03]
22; AVX-NEXT:    ## xmm0 = xmm0[0,1],xmm1[2,3]
23; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
24  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 6) ; <<2 x double>> [#uses=1]
25  ret <2 x double> %res
26}
27declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
28
29
30define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
31; SSE-LABEL: test_x86_sse41_blendps:
32; SSE:       ## %bb.0:
33; SSE-NEXT:    blendps $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x07]
34; SSE-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3]
35; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
36;
37; AVX-LABEL: test_x86_sse41_blendps:
38; AVX:       ## %bb.0:
39; AVX-NEXT:    vblendps $8, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08]
40; AVX-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3]
41; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
42  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
43  ret <4 x float> %res
44}
45declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
46
47
48define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
49; SSE-LABEL: test_x86_sse41_dppd:
50; SSE:       ## %bb.0:
51; SSE-NEXT:    dppd $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x41,0xc1,0x07]
52; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
53;
54; AVX-LABEL: test_x86_sse41_dppd:
55; AVX:       ## %bb.0:
56; AVX-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x41,0xc1,0x07]
57; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
58  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
59  ret <2 x double> %res
60}
61declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
62
63
64define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
65; SSE-LABEL: test_x86_sse41_dpps:
66; SSE:       ## %bb.0:
67; SSE-NEXT:    dpps $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x40,0xc1,0x07]
68; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
69;
70; AVX-LABEL: test_x86_sse41_dpps:
71; AVX:       ## %bb.0:
72; AVX-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x40,0xc1,0x07]
73; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
74  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
75  ret <4 x float> %res
76}
77declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
78
79
80define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
81; SSE-LABEL: test_x86_sse41_insertps:
82; SSE:       ## %bb.0:
83; SSE-NEXT:    insertps $17, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x21,0xc1,0x11]
84; SSE-NEXT:    ## xmm0 = zero,xmm1[0],xmm0[2,3]
85; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
86;
87; AVX1-LABEL: test_x86_sse41_insertps:
88; AVX1:       ## %bb.0:
89; AVX1-NEXT:    vinsertps $17, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x11]
90; AVX1-NEXT:    ## xmm0 = zero,xmm1[0],xmm0[2,3]
91; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
92;
93; AVX512-LABEL: test_x86_sse41_insertps:
94; AVX512:       ## %bb.0:
95; AVX512-NEXT:    vinsertps $17, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x11]
96; AVX512-NEXT:    ## xmm0 = zero,xmm1[0],xmm0[2,3]
97; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
98  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 17) ; <<4 x float>> [#uses=1]
99  ret <4 x float> %res
100}
101declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
102
103
104define <2 x i64> @test_x86_sse41_movntdqa(<2 x i64>* %a0) {
105; X86-SSE-LABEL: test_x86_sse41_movntdqa:
106; X86-SSE:       ## %bb.0:
107; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
108; X86-SSE-NEXT:    movntdqa (%eax), %xmm0 ## encoding: [0x66,0x0f,0x38,0x2a,0x00]
109; X86-SSE-NEXT:    retl ## encoding: [0xc3]
110;
111; X86-AVX1-LABEL: test_x86_sse41_movntdqa:
112; X86-AVX1:       ## %bb.0:
113; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
114; X86-AVX1-NEXT:    vmovntdqa (%eax), %xmm0 ## encoding: [0xc4,0xe2,0x79,0x2a,0x00]
115; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
116;
117; X86-AVX512-LABEL: test_x86_sse41_movntdqa:
118; X86-AVX512:       ## %bb.0:
119; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
120; X86-AVX512-NEXT:    vmovntdqa (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2a,0x00]
121; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
122;
123; X64-SSE-LABEL: test_x86_sse41_movntdqa:
124; X64-SSE:       ## %bb.0:
125; X64-SSE-NEXT:    movntdqa (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x38,0x2a,0x07]
126; X64-SSE-NEXT:    retq ## encoding: [0xc3]
127;
128; X64-AVX1-LABEL: test_x86_sse41_movntdqa:
129; X64-AVX1:       ## %bb.0:
130; X64-AVX1-NEXT:    vmovntdqa (%rdi), %xmm0 ## encoding: [0xc4,0xe2,0x79,0x2a,0x07]
131; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
132;
133; X64-AVX512-LABEL: test_x86_sse41_movntdqa:
134; X64-AVX512:       ## %bb.0:
135; X64-AVX512-NEXT:    vmovntdqa (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2a,0x07]
136; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
137  %arg0 = bitcast <2 x i64>* %a0 to i8*
138  %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %arg0)
139  ret <2 x i64> %res
140}
141declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone
142
143
144define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
145; SSE-LABEL: test_x86_sse41_mpsadbw:
146; SSE:       ## %bb.0:
147; SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0xc1,0x07]
148; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
149;
150; AVX-LABEL: test_x86_sse41_mpsadbw:
151; AVX:       ## %bb.0:
152; AVX-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0xc1,0x07]
153; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
154  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
155  ret <8 x i16> %res
156}
157declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
158
159
160define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
161; SSE-LABEL: test_x86_sse41_pblendw:
162; SSE:       ## %bb.0:
163; SSE-NEXT:    pblendw $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0e,0xc1,0x07]
164; SSE-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
165; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
166;
167; AVX-LABEL: test_x86_sse41_pblendw:
168; AVX:       ## %bb.0:
169; AVX-NEXT:    vpblendw $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x07]
170; AVX-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
171; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
172  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
173  ret <8 x i16> %res
174}
175declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
176
177
178define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
179; SSE-LABEL: test_x86_sse41_pmovsxbd:
180; SSE:       ## %bb.0:
181; SSE-NEXT:    pmovsxbd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x21,0xc0]
182; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
183;
184; AVX1-LABEL: test_x86_sse41_pmovsxbd:
185; AVX1:       ## %bb.0:
186; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x21,0xc0]
187; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
188;
189; AVX512-LABEL: test_x86_sse41_pmovsxbd:
190; AVX512:       ## %bb.0:
191; AVX512-NEXT:    vpmovsxbd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0]
192; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
193  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
194  ret <4 x i32> %res
195}
196declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
197
198
199define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
200; SSE-LABEL: test_x86_sse41_pmovsxbq:
201; SSE:       ## %bb.0:
202; SSE-NEXT:    pmovsxbq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x22,0xc0]
203; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
204;
205; AVX1-LABEL: test_x86_sse41_pmovsxbq:
206; AVX1:       ## %bb.0:
207; AVX1-NEXT:    vpmovsxbq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x22,0xc0]
208; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
209;
210; AVX512-LABEL: test_x86_sse41_pmovsxbq:
211; AVX512:       ## %bb.0:
212; AVX512-NEXT:    vpmovsxbq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0]
213; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
214  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
215  ret <2 x i64> %res
216}
217declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
218
219
220define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
221; SSE-LABEL: test_x86_sse41_pmovsxbw:
222; SSE:       ## %bb.0:
223; SSE-NEXT:    pmovsxbw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x20,0xc0]
224; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
225;
226; AVX1-LABEL: test_x86_sse41_pmovsxbw:
227; AVX1:       ## %bb.0:
228; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x20,0xc0]
229; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
230;
231; AVX512-LABEL: test_x86_sse41_pmovsxbw:
232; AVX512:       ## %bb.0:
233; AVX512-NEXT:    vpmovsxbw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0]
234; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
235  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
236  ret <8 x i16> %res
237}
238declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
239
240
241define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
242; SSE-LABEL: test_x86_sse41_pmovsxdq:
243; SSE:       ## %bb.0:
244; SSE-NEXT:    pmovsxdq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x25,0xc0]
245; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
246;
247; AVX1-LABEL: test_x86_sse41_pmovsxdq:
248; AVX1:       ## %bb.0:
249; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x25,0xc0]
250; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
251;
252; AVX512-LABEL: test_x86_sse41_pmovsxdq:
253; AVX512:       ## %bb.0:
254; AVX512-NEXT:    vpmovsxdq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0]
255; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
256  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
257  ret <2 x i64> %res
258}
259declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
260
261
262define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
263; SSE-LABEL: test_x86_sse41_pmovsxwd:
264; SSE:       ## %bb.0:
265; SSE-NEXT:    pmovsxwd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x23,0xc0]
266; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
267;
268; AVX1-LABEL: test_x86_sse41_pmovsxwd:
269; AVX1:       ## %bb.0:
270; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x23,0xc0]
271; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
272;
273; AVX512-LABEL: test_x86_sse41_pmovsxwd:
274; AVX512:       ## %bb.0:
275; AVX512-NEXT:    vpmovsxwd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0]
276; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
277  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
278  ret <4 x i32> %res
279}
280declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
281
282
283define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
284; SSE-LABEL: test_x86_sse41_pmovsxwq:
285; SSE:       ## %bb.0:
286; SSE-NEXT:    pmovsxwq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x24,0xc0]
287; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
288;
289; AVX1-LABEL: test_x86_sse41_pmovsxwq:
290; AVX1:       ## %bb.0:
291; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x24,0xc0]
292; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
293;
294; AVX512-LABEL: test_x86_sse41_pmovsxwq:
295; AVX512:       ## %bb.0:
296; AVX512-NEXT:    vpmovsxwq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0]
297; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
298  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
299  ret <2 x i64> %res
300}
301declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
302
303
304define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
305; SSE-LABEL: test_x86_sse41_pmovzxbd:
306; SSE:       ## %bb.0:
307; SSE-NEXT:    pmovzxbd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x31,0xc0]
308; SSE-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
309; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
310;
311; AVX1-LABEL: test_x86_sse41_pmovzxbd:
312; AVX1:       ## %bb.0:
313; AVX1-NEXT:    vpmovzxbd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x31,0xc0]
314; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
315; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
316;
317; AVX512-LABEL: test_x86_sse41_pmovzxbd:
318; AVX512:       ## %bb.0:
319; AVX512-NEXT:    vpmovzxbd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0]
320; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
321; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
322  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
323  ret <4 x i32> %res
324}
325declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
326
327
328define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
329; SSE-LABEL: test_x86_sse41_pmovzxbq:
330; SSE:       ## %bb.0:
331; SSE-NEXT:    pmovzxbq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x32,0xc0]
332; SSE-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
333; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
334;
335; AVX1-LABEL: test_x86_sse41_pmovzxbq:
336; AVX1:       ## %bb.0:
337; AVX1-NEXT:    vpmovzxbq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x32,0xc0]
338; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
339; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
340;
341; AVX512-LABEL: test_x86_sse41_pmovzxbq:
342; AVX512:       ## %bb.0:
343; AVX512-NEXT:    vpmovzxbq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0]
344; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
345; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
346  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
347  ret <2 x i64> %res
348}
349declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
350
351
352define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
353; SSE-LABEL: test_x86_sse41_pmovzxbw:
354; SSE:       ## %bb.0:
355; SSE-NEXT:    pmovzxbw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x30,0xc0]
356; SSE-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
357; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
358;
359; AVX1-LABEL: test_x86_sse41_pmovzxbw:
360; AVX1:       ## %bb.0:
361; AVX1-NEXT:    vpmovzxbw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x30,0xc0]
362; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
363; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
364;
365; AVX512-LABEL: test_x86_sse41_pmovzxbw:
366; AVX512:       ## %bb.0:
367; AVX512-NEXT:    vpmovzxbw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0]
368; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
369; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
370  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
371  ret <8 x i16> %res
372}
373declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
374
375
376define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
377; SSE-LABEL: test_x86_sse41_pmovzxdq:
378; SSE:       ## %bb.0:
379; SSE-NEXT:    pmovzxdq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x35,0xc0]
380; SSE-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero
381; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
382;
383; AVX1-LABEL: test_x86_sse41_pmovzxdq:
384; AVX1:       ## %bb.0:
385; AVX1-NEXT:    vpmovzxdq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x35,0xc0]
386; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero
387; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
388;
389; AVX512-LABEL: test_x86_sse41_pmovzxdq:
390; AVX512:       ## %bb.0:
391; AVX512-NEXT:    vpmovzxdq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0]
392; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero
393; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
394  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
395  ret <2 x i64> %res
396}
397declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
398
399
400define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
401; SSE-LABEL: test_x86_sse41_pmovzxwd:
402; SSE:       ## %bb.0:
403; SSE-NEXT:    pmovzxwd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x33,0xc0]
404; SSE-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
405; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
406;
407; AVX1-LABEL: test_x86_sse41_pmovzxwd:
408; AVX1:       ## %bb.0:
409; AVX1-NEXT:    vpmovzxwd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x33,0xc0]
410; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
411; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
412;
413; AVX512-LABEL: test_x86_sse41_pmovzxwd:
414; AVX512:       ## %bb.0:
415; AVX512-NEXT:    vpmovzxwd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
416; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
417; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
418  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
419  ret <4 x i32> %res
420}
421declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
422
423
424define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
425; SSE-LABEL: test_x86_sse41_pmovzxwq:
426; SSE:       ## %bb.0:
427; SSE-NEXT:    pmovzxwq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x34,0xc0]
428; SSE-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
429; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
430;
431; AVX1-LABEL: test_x86_sse41_pmovzxwq:
432; AVX1:       ## %bb.0:
433; AVX1-NEXT:    vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0]
434; AVX1-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
435; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
436;
437; AVX512-LABEL: test_x86_sse41_pmovzxwq:
438; AVX512:       ## %bb.0:
439; AVX512-NEXT:    vpmovzxwq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0]
440; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
441; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
442  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
443  ret <2 x i64> %res
444}
445declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
446
447define <16 x i8> @max_epi8(<16 x i8> %a0, <16 x i8> %a1) {
448; SSE-LABEL: max_epi8:
449; SSE:       ## %bb.0:
450; SSE-NEXT:    pmaxsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3c,0xc1]
451; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
452;
453; AVX1-LABEL: max_epi8:
454; AVX1:       ## %bb.0:
455; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3c,0xc1]
456; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
457;
458; AVX512-LABEL: max_epi8:
459; AVX512:       ## %bb.0:
460; AVX512-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3c,0xc1]
461; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
462  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
463  ret <16 x i8> %res
464}
465declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
466
467define <16 x i8> @min_epi8(<16 x i8> %a0, <16 x i8> %a1) {
468; SSE-LABEL: min_epi8:
469; SSE:       ## %bb.0:
470; SSE-NEXT:    pminsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x38,0xc1]
471; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
472;
473; AVX1-LABEL: min_epi8:
474; AVX1:       ## %bb.0:
475; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x38,0xc1]
476; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
477;
478; AVX512-LABEL: min_epi8:
479; AVX512:       ## %bb.0:
480; AVX512-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x38,0xc1]
481; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
482  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
483  ret <16 x i8> %res
484}
485declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
486
487define <8 x i16> @max_epu16(<8 x i16> %a0, <8 x i16> %a1) {
488; SSE-LABEL: max_epu16:
489; SSE:       ## %bb.0:
490; SSE-NEXT:    pmaxuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3e,0xc1]
491; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
492;
493; AVX1-LABEL: max_epu16:
494; AVX1:       ## %bb.0:
495; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3e,0xc1]
496; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
497;
498; AVX512-LABEL: max_epu16:
499; AVX512:       ## %bb.0:
500; AVX512-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1]
501; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
502  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
503  ret <8 x i16> %res
504}
505declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
506
507define <8 x i16> @min_epu16(<8 x i16> %a0, <8 x i16> %a1) {
508; SSE-LABEL: min_epu16:
509; SSE:       ## %bb.0:
510; SSE-NEXT:    pminuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3a,0xc1]
511; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
512;
513; AVX1-LABEL: min_epu16:
514; AVX1:       ## %bb.0:
515; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3a,0xc1]
516; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
517;
518; AVX512-LABEL: min_epu16:
519; AVX512:       ## %bb.0:
520; AVX512-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1]
521; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
522  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
523  ret <8 x i16> %res
524}
525declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
526
527define <4 x i32> @max_epi32(<4 x i32> %a0, <4 x i32> %a1) {
528; SSE-LABEL: max_epi32:
529; SSE:       ## %bb.0:
530; SSE-NEXT:    pmaxsd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3d,0xc1]
531; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
532;
533; AVX1-LABEL: max_epi32:
534; AVX1:       ## %bb.0:
535; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3d,0xc1]
536; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
537;
538; AVX512-LABEL: max_epi32:
539; AVX512:       ## %bb.0:
540; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3d,0xc1]
541; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
542  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
543  ret <4 x i32> %res
544}
545declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
546
547define <4 x i32> @min_epi32(<4 x i32> %a0, <4 x i32> %a1) {
548; SSE-LABEL: min_epi32:
549; SSE:       ## %bb.0:
550; SSE-NEXT:    pminsd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x39,0xc1]
551; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
552;
553; AVX1-LABEL: min_epi32:
554; AVX1:       ## %bb.0:
555; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x39,0xc1]
556; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
557;
558; AVX512-LABEL: min_epi32:
559; AVX512:       ## %bb.0:
560; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x39,0xc1]
561; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
562  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
563  ret <4 x i32> %res
564}
565declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
566
567define <4 x i32> @max_epu32(<4 x i32> %a0, <4 x i32> %a1) {
568; SSE-LABEL: max_epu32:
569; SSE:       ## %bb.0:
570; SSE-NEXT:    pmaxud %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3f,0xc1]
571; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
572;
573; AVX1-LABEL: max_epu32:
574; AVX1:       ## %bb.0:
575; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3f,0xc1]
576; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
577;
578; AVX512-LABEL: max_epu32:
579; AVX512:       ## %bb.0:
580; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3f,0xc1]
581; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
582  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
583  ret <4 x i32> %res
584}
585declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
586
587define <4 x i32> @min_epu32(<4 x i32> %a0, <4 x i32> %a1) {
588; SSE-LABEL: min_epu32:
589; SSE:       ## %bb.0:
590; SSE-NEXT:    pminud %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3b,0xc1]
591; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
592;
593; AVX1-LABEL: min_epu32:
594; AVX1:       ## %bb.0:
595; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3b,0xc1]
596; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
597;
598; AVX512-LABEL: min_epu32:
599; AVX512:       ## %bb.0:
600; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3b,0xc1]
601; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
602  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
603  ret <4 x i32> %res
604}
605declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
606
607
608define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
609; SSE-LABEL: test_x86_sse41_pmuldq:
610; SSE:       ## %bb.0:
611; SSE-NEXT:    pmuldq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x28,0xc1]
612; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
613;
614; AVX1-LABEL: test_x86_sse41_pmuldq:
615; AVX1:       ## %bb.0:
616; AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x28,0xc1]
617; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
618;
619; AVX512-LABEL: test_x86_sse41_pmuldq:
620; AVX512:       ## %bb.0:
621; AVX512-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1]
622; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
623  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
624  ret <2 x i64> %res
625}
626declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
627