• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X86-SSE
3; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX1,X86-AVX1
4; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX512,X86-AVX512
5; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X64-SSE
6; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX1,X64-AVX1
7; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX512,X64-AVX512
8
9
10define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
11; SSE-LABEL: test_x86_sse2_sqrt_pd:
12; SSE:       ## %bb.0:
13; SSE-NEXT:    sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0]
14; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
15;
16; AVX1-LABEL: test_x86_sse2_sqrt_pd:
17; AVX1:       ## %bb.0:
18; AVX1-NEXT:    vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0]
19; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
20;
21; AVX512-LABEL: test_x86_sse2_sqrt_pd:
22; AVX512:       ## %bb.0:
23; AVX512-NEXT:    vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0]
24; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
25  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
26  ret <2 x double> %res
27}
28declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
29
30
31define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
32; SSE-LABEL: test_x86_sse2_sqrt_sd:
33; SSE:       ## %bb.0:
34; SSE-NEXT:    sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
35; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
36;
37; AVX1-LABEL: test_x86_sse2_sqrt_sd:
38; AVX1:       ## %bb.0:
39; AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
40; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
41;
42; AVX512-LABEL: test_x86_sse2_sqrt_sd:
43; AVX512:       ## %bb.0:
44; AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
45; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
46  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
47  ret <2 x double> %res
48}
49declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
50
51
52define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) {
53; X86-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
54; X86-SSE:       ## %bb.0:
55; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
56; X86-SSE-NEXT:    movapd (%eax), %xmm0 ## encoding: [0x66,0x0f,0x28,0x00]
57; X86-SSE-NEXT:    sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
58; X86-SSE-NEXT:    retl ## encoding: [0xc3]
59;
60; X86-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
61; X86-AVX1:       ## %bb.0:
62; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
63; X86-AVX1-NEXT:    vmovapd (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x00]
64; X86-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
65; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
66;
67; X86-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
68; X86-AVX512:       ## %bb.0:
69; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
70; X86-AVX512-NEXT:    vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
71; X86-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
72; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
73;
74; X64-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
75; X64-SSE:       ## %bb.0:
76; X64-SSE-NEXT:    movapd (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x28,0x07]
77; X64-SSE-NEXT:    sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
78; X64-SSE-NEXT:    retq ## encoding: [0xc3]
79;
80; X64-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
81; X64-AVX1:       ## %bb.0:
82; X64-AVX1-NEXT:    vmovapd (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x07]
83; X64-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
84; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
85;
86; X64-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
87; X64-AVX512:       ## %bb.0:
88; X64-AVX512-NEXT:    vmovapd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07]
89; X64-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
90; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
91  %a1 = load <2 x double>, <2 x double>* %a0, align 16
92  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1]
93  ret <2 x double> %res
94}
95
96
97define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
98; SSE-LABEL: test_x86_sse2_psll_dq_bs:
99; SSE:       ## %bb.0:
100; SSE-NEXT:    pslldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x07]
101; SSE-NEXT:    ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
102; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
103;
104; AVX1-LABEL: test_x86_sse2_psll_dq_bs:
105; AVX1:       ## %bb.0:
106; AVX1-NEXT:    vpslldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x07]
107; AVX1-NEXT:    ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
108; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
109;
110; AVX512-LABEL: test_x86_sse2_psll_dq_bs:
111; AVX512:       ## %bb.0:
112; AVX512-NEXT:    vpslldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x07]
113; AVX512-NEXT:    ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
114; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
115  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
116  ret <2 x i64> %res
117}
118declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
119
120
121define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
122; SSE-LABEL: test_x86_sse2_psrl_dq_bs:
123; SSE:       ## %bb.0:
124; SSE-NEXT:    psrldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x07]
125; SSE-NEXT:    ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
126; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
127;
128; AVX1-LABEL: test_x86_sse2_psrl_dq_bs:
129; AVX1:       ## %bb.0:
130; AVX1-NEXT:    vpsrldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x07]
131; AVX1-NEXT:    ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
132; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
133;
134; AVX512-LABEL: test_x86_sse2_psrl_dq_bs:
135; AVX512:       ## %bb.0:
136; AVX512-NEXT:    vpsrldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x07]
137; AVX512-NEXT:    ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
138; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
139  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
140  ret <2 x i64> %res
141}
142declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
143
144define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
145; SSE-LABEL: test_x86_sse2_psll_dq:
146; SSE:       ## %bb.0:
147; SSE-NEXT:    pslldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x01]
148; SSE-NEXT:    ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
149; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
150;
151; AVX1-LABEL: test_x86_sse2_psll_dq:
152; AVX1:       ## %bb.0:
153; AVX1-NEXT:    vpslldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x01]
154; AVX1-NEXT:    ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
155; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
156;
157; AVX512-LABEL: test_x86_sse2_psll_dq:
158; AVX512:       ## %bb.0:
159; AVX512-NEXT:    vpslldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01]
160; AVX512-NEXT:    ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
161; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
162  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
163  ret <2 x i64> %res
164}
165declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
166
167
168define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
169; SSE-LABEL: test_x86_sse2_psrl_dq:
170; SSE:       ## %bb.0:
171; SSE-NEXT:    psrldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x01]
172; SSE-NEXT:    ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
173; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
174;
175; AVX1-LABEL: test_x86_sse2_psrl_dq:
176; AVX1:       ## %bb.0:
177; AVX1-NEXT:    vpsrldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x01]
178; AVX1-NEXT:    ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
179; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
180;
181; AVX512-LABEL: test_x86_sse2_psrl_dq:
182; AVX512:       ## %bb.0:
183; AVX512-NEXT:    vpsrldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01]
184; AVX512-NEXT:    ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
185; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
186  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
187  ret <2 x i64> %res
188}
189declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
190
191
192define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
193; SSE-LABEL: test_x86_sse2_cvtdq2pd:
194; SSE:       ## %bb.0:
195; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0xe6,0xc0]
196; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
197;
198; AVX1-LABEL: test_x86_sse2_cvtdq2pd:
199; AVX1:       ## %bb.0:
200; AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0xe6,0xc0]
201; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
202;
203; AVX512-LABEL: test_x86_sse2_cvtdq2pd:
204; AVX512:       ## %bb.0:
205; AVX512-NEXT:    vcvtdq2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
206; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
207  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
208  ret <2 x double> %res
209}
210declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
211
212
213define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
214; SSE-LABEL: test_x86_sse2_cvtps2pd:
215; SSE:       ## %bb.0:
216; SSE-NEXT:    cvtps2pd %xmm0, %xmm0 ## encoding: [0x0f,0x5a,0xc0]
217; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
218;
219; AVX1-LABEL: test_x86_sse2_cvtps2pd:
220; AVX1:       ## %bb.0:
221; AVX1-NEXT:    vcvtps2pd %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5a,0xc0]
222; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
223;
224; AVX512-LABEL: test_x86_sse2_cvtps2pd:
225; AVX512:       ## %bb.0:
226; AVX512-NEXT:    vcvtps2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
227; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
228  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
229  ret <2 x double> %res
230}
231declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
232
233
234define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
235; X86-SSE-LABEL: test_x86_sse2_storel_dq:
236; X86-SSE:       ## %bb.0:
237; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
238; X86-SSE-NEXT:    movlps %xmm0, (%eax) ## encoding: [0x0f,0x13,0x00]
239; X86-SSE-NEXT:    retl ## encoding: [0xc3]
240;
241; X86-AVX1-LABEL: test_x86_sse2_storel_dq:
242; X86-AVX1:       ## %bb.0:
243; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
244; X86-AVX1-NEXT:    vmovlps %xmm0, (%eax) ## encoding: [0xc5,0xf8,0x13,0x00]
245; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
246;
247; X86-AVX512-LABEL: test_x86_sse2_storel_dq:
248; X86-AVX512:       ## %bb.0:
249; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
250; X86-AVX512-NEXT:    vmovlps %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
251; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
252;
253; X64-SSE-LABEL: test_x86_sse2_storel_dq:
254; X64-SSE:       ## %bb.0:
255; X64-SSE-NEXT:    movlps %xmm0, (%rdi) ## encoding: [0x0f,0x13,0x07]
256; X64-SSE-NEXT:    retq ## encoding: [0xc3]
257;
258; X64-AVX1-LABEL: test_x86_sse2_storel_dq:
259; X64-AVX1:       ## %bb.0:
260; X64-AVX1-NEXT:    vmovlps %xmm0, (%rdi) ## encoding: [0xc5,0xf8,0x13,0x07]
261; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
262;
263; X64-AVX512-LABEL: test_x86_sse2_storel_dq:
264; X64-AVX512:       ## %bb.0:
265; X64-AVX512-NEXT:    vmovlps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
266; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
267  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
268  ret void
269}
270declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
271
272
273define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
274  ; add operation forces the execution domain.
275; X86-SSE-LABEL: test_x86_sse2_storeu_dq:
276; X86-SSE:       ## %bb.0:
277; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
278; X86-SSE-NEXT:    pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9]
279; X86-SSE-NEXT:    psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1]
280; X86-SSE-NEXT:    movdqu %xmm0, (%eax) ## encoding: [0xf3,0x0f,0x7f,0x00]
281; X86-SSE-NEXT:    retl ## encoding: [0xc3]
282;
283; X86-AVX1-LABEL: test_x86_sse2_storeu_dq:
284; X86-AVX1:       ## %bb.0:
285; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
286; X86-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
287; X86-AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1]
288; X86-AVX1-NEXT:    vmovdqu %xmm0, (%eax) ## encoding: [0xc5,0xfa,0x7f,0x00]
289; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
290;
291; X86-AVX512-LABEL: test_x86_sse2_storeu_dq:
292; X86-AVX512:       ## %bb.0:
293; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
294; X86-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
295; X86-AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
296; X86-AVX512-NEXT:    vmovdqu %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
297; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
298;
299; X64-SSE-LABEL: test_x86_sse2_storeu_dq:
300; X64-SSE:       ## %bb.0:
301; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9]
302; X64-SSE-NEXT:    psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1]
303; X64-SSE-NEXT:    movdqu %xmm0, (%rdi) ## encoding: [0xf3,0x0f,0x7f,0x07]
304; X64-SSE-NEXT:    retq ## encoding: [0xc3]
305;
306; X64-AVX1-LABEL: test_x86_sse2_storeu_dq:
307; X64-AVX1:       ## %bb.0:
308; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
309; X64-AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1]
310; X64-AVX1-NEXT:    vmovdqu %xmm0, (%rdi) ## encoding: [0xc5,0xfa,0x7f,0x07]
311; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
312;
313; X64-AVX512-LABEL: test_x86_sse2_storeu_dq:
314; X64-AVX512:       ## %bb.0:
315; X64-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
316; X64-AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
317; X64-AVX512-NEXT:    vmovdqu %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07]
318; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
319  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
320  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
321  ret void
322}
323declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
324
325
326define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
327  ; fadd operation forces the execution domain.
328; X86-SSE-LABEL: test_x86_sse2_storeu_pd:
329; X86-SSE:       ## %bb.0:
330; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
331; X86-SSE-NEXT:    xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9]
332; X86-SSE-NEXT:    movhpd LCPI11_0, %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A]
333; X86-SSE-NEXT:    ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
334; X86-SSE-NEXT:    ## xmm1 = xmm1[0],mem[0]
335; X86-SSE-NEXT:    addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8]
336; X86-SSE-NEXT:    movupd %xmm1, (%eax) ## encoding: [0x66,0x0f,0x11,0x08]
337; X86-SSE-NEXT:    retl ## encoding: [0xc3]
338;
339; X86-AVX1-LABEL: test_x86_sse2_storeu_pd:
340; X86-AVX1:       ## %bb.0:
341; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
342; X86-AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
343; X86-AVX1-NEXT:    vmovhpd LCPI11_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
344; X86-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
345; X86-AVX1-NEXT:    ## xmm1 = xmm1[0],mem[0]
346; X86-AVX1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1]
347; X86-AVX1-NEXT:    vmovupd %xmm0, (%eax) ## encoding: [0xc5,0xf9,0x11,0x00]
348; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
349;
350; X86-AVX512-LABEL: test_x86_sse2_storeu_pd:
351; X86-AVX512:       ## %bb.0:
352; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
353; X86-AVX512-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
354; X86-AVX512-NEXT:    vmovhpd LCPI11_0, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
355; X86-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
356; X86-AVX512-NEXT:    ## xmm1 = xmm1[0],mem[0]
357; X86-AVX512-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
358; X86-AVX512-NEXT:    vmovupd %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
359; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
360;
361; X64-SSE-LABEL: test_x86_sse2_storeu_pd:
362; X64-SSE:       ## %bb.0:
363; X64-SSE-NEXT:    xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9]
364; X64-SSE-NEXT:    movhpd {{.*}}(%rip), %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A]
365; X64-SSE-NEXT:    ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
366; X64-SSE-NEXT:    ## xmm1 = xmm1[0],mem[0]
367; X64-SSE-NEXT:    addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8]
368; X64-SSE-NEXT:    movupd %xmm1, (%rdi) ## encoding: [0x66,0x0f,0x11,0x0f]
369; X64-SSE-NEXT:    retq ## encoding: [0xc3]
370;
371; X64-AVX1-LABEL: test_x86_sse2_storeu_pd:
372; X64-AVX1:       ## %bb.0:
373; X64-AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
374; X64-AVX1-NEXT:    vmovhpd {{.*}}(%rip), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
375; X64-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
376; X64-AVX1-NEXT:    ## xmm1 = xmm1[0],mem[0]
377; X64-AVX1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1]
378; X64-AVX1-NEXT:    vmovupd %xmm0, (%rdi) ## encoding: [0xc5,0xf9,0x11,0x07]
379; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
380;
381; X64-AVX512-LABEL: test_x86_sse2_storeu_pd:
382; X64-AVX512:       ## %bb.0:
383; X64-AVX512-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
384; X64-AVX512-NEXT:    vmovhpd {{.*}}(%rip), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
385; X64-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
386; X64-AVX512-NEXT:    ## xmm1 = xmm1[0],mem[0]
387; X64-AVX512-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
388; X64-AVX512-NEXT:    vmovupd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07]
389; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
390  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
391  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
392  ret void
393}
394declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
395
396define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
397; SSE-LABEL: test_x86_sse2_pshuf_d:
398; SSE:       ## %bb.0: ## %entry
399; SSE-NEXT:    pshufd $27, %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x70,0xc0,0x1b]
400; SSE-NEXT:    ## xmm0 = xmm0[3,2,1,0]
401; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
402;
403; AVX1-LABEL: test_x86_sse2_pshuf_d:
404; AVX1:       ## %bb.0: ## %entry
405; AVX1-NEXT:    vpermilps $27, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
406; AVX1-NEXT:    ## xmm0 = xmm0[3,2,1,0]
407; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
408;
409; AVX512-LABEL: test_x86_sse2_pshuf_d:
410; AVX512:       ## %bb.0: ## %entry
411; AVX512-NEXT:    vpermilps $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
412; AVX512-NEXT:    ## xmm0 = xmm0[3,2,1,0]
413; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
414entry:
415  %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
416  ret <4 x i32> %res
417}
418declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
419
420define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
421; SSE-LABEL: test_x86_sse2_pshufl_w:
422; SSE:       ## %bb.0: ## %entry
423; SSE-NEXT:    pshuflw $27, %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x70,0xc0,0x1b]
424; SSE-NEXT:    ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
425; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
426;
427; AVX1-LABEL: test_x86_sse2_pshufl_w:
428; AVX1:       ## %bb.0: ## %entry
429; AVX1-NEXT:    vpshuflw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x70,0xc0,0x1b]
430; AVX1-NEXT:    ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
431; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
432;
433; AVX512-LABEL: test_x86_sse2_pshufl_w:
434; AVX512:       ## %bb.0: ## %entry
435; AVX512-NEXT:    vpshuflw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x1b]
436; AVX512-NEXT:    ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
437; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
438entry:
439  %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
440  ret <8 x i16> %res
441}
442declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
443
444define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
445; SSE-LABEL: test_x86_sse2_pshufh_w:
446; SSE:       ## %bb.0: ## %entry
447; SSE-NEXT:    pshufhw $27, %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x70,0xc0,0x1b]
448; SSE-NEXT:    ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
449; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
450;
451; AVX1-LABEL: test_x86_sse2_pshufh_w:
452; AVX1:       ## %bb.0: ## %entry
453; AVX1-NEXT:    vpshufhw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x70,0xc0,0x1b]
454; AVX1-NEXT:    ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
455; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
456;
457; AVX512-LABEL: test_x86_sse2_pshufh_w:
458; AVX512:       ## %bb.0: ## %entry
459; AVX512-NEXT:    vpshufhw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x1b]
460; AVX512-NEXT:    ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
461; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
462entry:
463  %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
464  ret <8 x i16> %res
465}
466declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
467
468define <16 x i8> @max_epu8(<16 x i8> %a0, <16 x i8> %a1) {
469; SSE-LABEL: max_epu8:
470; SSE:       ## %bb.0:
471; SSE-NEXT:    pmaxub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xde,0xc1]
472; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
473;
474; AVX1-LABEL: max_epu8:
475; AVX1:       ## %bb.0:
476; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xde,0xc1]
477; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
478;
479; AVX512-LABEL: max_epu8:
480; AVX512:       ## %bb.0:
481; AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1]
482; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
483  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
484  ret <16 x i8> %res
485}
486declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
487
488define <16 x i8> @min_epu8(<16 x i8> %a0, <16 x i8> %a1) {
489; SSE-LABEL: min_epu8:
490; SSE:       ## %bb.0:
491; SSE-NEXT:    pminub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xda,0xc1]
492; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
493;
494; AVX1-LABEL: min_epu8:
495; AVX1:       ## %bb.0:
496; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xda,0xc1]
497; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
498;
499; AVX512-LABEL: min_epu8:
500; AVX512:       ## %bb.0:
501; AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1]
502; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
503  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
504  ret <16 x i8> %res
505}
506declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
507
508define <8 x i16> @max_epi16(<8 x i16> %a0, <8 x i16> %a1) {
509; SSE-LABEL: max_epi16:
510; SSE:       ## %bb.0:
511; SSE-NEXT:    pmaxsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xee,0xc1]
512; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
513;
514; AVX1-LABEL: max_epi16:
515; AVX1:       ## %bb.0:
516; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xee,0xc1]
517; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
518;
519; AVX512-LABEL: max_epi16:
520; AVX512:       ## %bb.0:
521; AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
522; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
523  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
524  ret <8 x i16> %res
525}
526declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
527
528define <8 x i16> @min_epi16(<8 x i16> %a0, <8 x i16> %a1) {
529; SSE-LABEL: min_epi16:
530; SSE:       ## %bb.0:
531; SSE-NEXT:    pminsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xea,0xc1]
532; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
533;
534; AVX1-LABEL: min_epi16:
535; AVX1:       ## %bb.0:
536; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xea,0xc1]
537; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
538;
539; AVX512-LABEL: min_epi16:
540; AVX512:       ## %bb.0:
541; AVX512-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
542; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
543  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
544  ret <8 x i16> %res
545}
546declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
547
548define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
549; SSE-LABEL: test_x86_sse2_add_sd:
550; SSE:       ## %bb.0:
551; SSE-NEXT:    addsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x58,0xc1]
552; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
553;
554; AVX1-LABEL: test_x86_sse2_add_sd:
555; AVX1:       ## %bb.0:
556; AVX1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1]
557; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
558;
559; AVX512-LABEL: test_x86_sse2_add_sd:
560; AVX512:       ## %bb.0:
561; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1]
562; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
563  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
564  ret <2 x double> %res
565}
566declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
567
568
569define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
570; SSE-LABEL: test_x86_sse2_sub_sd:
571; SSE:       ## %bb.0:
572; SSE-NEXT:    subsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5c,0xc1]
573; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
574;
575; AVX1-LABEL: test_x86_sse2_sub_sd:
576; AVX1:       ## %bb.0:
577; AVX1-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1]
578; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
579;
580; AVX512-LABEL: test_x86_sse2_sub_sd:
581; AVX512:       ## %bb.0:
582; AVX512-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1]
583; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
584  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
585  ret <2 x double> %res
586}
587declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
588
589
590define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
591; SSE-LABEL: test_x86_sse2_mul_sd:
592; SSE:       ## %bb.0:
593; SSE-NEXT:    mulsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x59,0xc1]
594; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
595;
596; AVX1-LABEL: test_x86_sse2_mul_sd:
597; AVX1:       ## %bb.0:
598; AVX1-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1]
599; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
600;
601; AVX512-LABEL: test_x86_sse2_mul_sd:
602; AVX512:       ## %bb.0:
603; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1]
604; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
605  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
606  ret <2 x double> %res
607}
608declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
609
610
611define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
612; SSE-LABEL: test_x86_sse2_div_sd:
613; SSE:       ## %bb.0:
614; SSE-NEXT:    divsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5e,0xc1]
615; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
616;
617; AVX1-LABEL: test_x86_sse2_div_sd:
618; AVX1:       ## %bb.0:
619; AVX1-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1]
620; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
621;
622; AVX512-LABEL: test_x86_sse2_div_sd:
623; AVX512:       ## %bb.0:
624; AVX512-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1]
625; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
626  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
627  ret <2 x double> %res
628}
629declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
630
631
632define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
633; SSE-LABEL: test_x86_sse2_pmulu_dq:
634; SSE:       ## %bb.0:
635; SSE-NEXT:    pmuludq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf4,0xc1]
636; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
637;
638; AVX1-LABEL: test_x86_sse2_pmulu_dq:
639; AVX1:       ## %bb.0:
640; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1]
641; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
642;
643; AVX512-LABEL: test_x86_sse2_pmulu_dq:
644; AVX512:       ## %bb.0:
645; AVX512-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
646; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
647  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
648  ret <2 x i64> %res
649}
650declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
651
652
653define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) {
654; X86-SSE-LABEL: test_x86_sse2_cvtsi2sd:
655; X86-SSE:       ## %bb.0:
656; X86-SSE-NEXT:    cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04]
657; X86-SSE-NEXT:    retl ## encoding: [0xc3]
658;
659; X86-AVX1-LABEL: test_x86_sse2_cvtsi2sd:
660; X86-AVX1:       ## %bb.0:
661; X86-AVX1-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
662; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
663;
664; X86-AVX512-LABEL: test_x86_sse2_cvtsi2sd:
665; X86-AVX512:       ## %bb.0:
666; X86-AVX512-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
667; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
668;
669; X64-SSE-LABEL: test_x86_sse2_cvtsi2sd:
670; X64-SSE:       ## %bb.0:
671; X64-SSE-NEXT:    cvtsi2sd %edi, %xmm0 ## encoding: [0xf2,0x0f,0x2a,0xc7]
672; X64-SSE-NEXT:    retq ## encoding: [0xc3]
673;
674; X64-AVX1-LABEL: test_x86_sse2_cvtsi2sd:
675; X64-AVX1:       ## %bb.0:
676; X64-AVX1-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0xc7]
677; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
678;
679; X64-AVX512-LABEL: test_x86_sse2_cvtsi2sd:
680; X64-AVX512:       ## %bb.0:
681; X64-AVX512-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
682; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
683  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1]
684  ret <2 x double> %res
685}
686declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
687
688
689define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
690; SSE-LABEL: test_x86_sse2_cvtss2sd:
691; SSE:       ## %bb.0:
692; SSE-NEXT:    cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1]
693; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
694;
695; AVX1-LABEL: test_x86_sse2_cvtss2sd:
696; AVX1:       ## %bb.0:
697; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0xc1]
698; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
699;
700; AVX512-LABEL: test_x86_sse2_cvtss2sd:
701; AVX512:       ## %bb.0:
702; AVX512-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1]
703; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
704  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
705  ret <2 x double> %res
706}
707declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
708
709
710define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) {
711; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
712; X86-SSE:       ## %bb.0:
713; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
714; X86-SSE-NEXT:    movss (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x08]
715; X86-SSE-NEXT:    ## xmm1 = mem[0],zero,zero,zero
716; X86-SSE-NEXT:    cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
717; X86-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
718; X86-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
719; X86-SSE-NEXT:    retl ## encoding: [0xc3]
720;
721; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
722; X86-AVX1:       ## %bb.0:
723; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
724; X86-AVX1-NEXT:    vmovss (%eax), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x08]
725; X86-AVX1-NEXT:    ## xmm1 = mem[0],zero,zero,zero
726; X86-AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
727; X86-AVX1-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
728; X86-AVX1-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
729; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
730;
731; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
732; X86-AVX512:       ## %bb.0:
733; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
734; X86-AVX512-NEXT:    vmovss (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x08]
735; X86-AVX512-NEXT:    ## xmm1 = mem[0],zero,zero,zero
736; X86-AVX512-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
737; X86-AVX512-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
738; X86-AVX512-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
739; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
740;
741; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
742; X64-SSE:       ## %bb.0:
743; X64-SSE-NEXT:    movss (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x0f]
744; X64-SSE-NEXT:    ## xmm1 = mem[0],zero,zero,zero
745; X64-SSE-NEXT:    cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
746; X64-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
747; X64-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
748; X64-SSE-NEXT:    retq ## encoding: [0xc3]
749;
750; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
751; X64-AVX1:       ## %bb.0:
752; X64-AVX1-NEXT:    vmovss (%rdi), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x0f]
753; X64-AVX1-NEXT:    ## xmm1 = mem[0],zero,zero,zero
754; X64-AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
755; X64-AVX1-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
756; X64-AVX1-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
757; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
758;
759; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
760; X64-AVX512:       ## %bb.0:
761; X64-AVX512-NEXT:    vmovss (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0f]
762; X64-AVX512-NEXT:    ## xmm1 = mem[0],zero,zero,zero
763; X64-AVX512-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
764; X64-AVX512-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
765; X64-AVX512-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
766; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
767  %a1 = load <4 x float>, <4 x float>* %p1
768  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
769  ret <2 x double> %res
770}
771
772
773define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, <4 x float>* %p1) optsize {
774; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
775; X86-SSE:       ## %bb.0:
776; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
777; X86-SSE-NEXT:    cvtss2sd (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x08]
778; X86-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
779; X86-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
780; X86-SSE-NEXT:    retl ## encoding: [0xc3]
781;
782; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
783; X86-AVX1:       ## %bb.0:
784; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
785; X86-AVX1-NEXT:    vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08]
786; X86-AVX1-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
787; X86-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
788; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
789;
790; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
791; X86-AVX512:       ## %bb.0:
792; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
793; X86-AVX512-NEXT:    vcvtss2sd (%eax), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x08]
794; X86-AVX512-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
795; X86-AVX512-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
796; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
797;
798; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
799; X64-SSE:       ## %bb.0:
800; X64-SSE-NEXT:    cvtss2sd (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x0f]
801; X64-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
802; X64-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
803; X64-SSE-NEXT:    retq ## encoding: [0xc3]
804;
805; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
806; X64-AVX1:       ## %bb.0:
807; X64-AVX1-NEXT:    vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f]
808; X64-AVX1-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
809; X64-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
810; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
811;
812; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
813; X64-AVX512:       ## %bb.0:
814; X64-AVX512-NEXT:    vcvtss2sd (%rdi), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x0f]
815; X64-AVX512-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
816; X64-AVX512-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
817; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
818  %a1 = load <4 x float>, <4 x float>* %p1
819  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
820  ret <2 x double> %res
821}
822
823
824define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
825; SSE-LABEL: test_x86_sse2_cvtdq2ps:
826; SSE:       ## %bb.0:
827; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0 ## encoding: [0x0f,0x5b,0xc0]
828; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
829;
830; AVX1-LABEL: test_x86_sse2_cvtdq2ps:
831; AVX1:       ## %bb.0:
832; AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0]
833; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
834;
835; AVX512-LABEL: test_x86_sse2_cvtdq2ps:
836; AVX512:       ## %bb.0:
837; AVX512-NEXT:    vcvtdq2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0]
838; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
839  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
840  ret <4 x float> %res
841}
842declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
843
844
845define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
846; SSE-LABEL: test_x86_sse2_padds_b:
847; SSE:       ## %bb.0:
848; SSE-NEXT:    paddsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xec,0xc1]
849; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
850;
851; AVX1-LABEL: test_x86_sse2_padds_b:
852; AVX1:       ## %bb.0:
853; AVX1-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xec,0xc1]
854; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
855;
856; AVX512-LABEL: test_x86_sse2_padds_b:
857; AVX512:       ## %bb.0:
858; AVX512-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
859; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
860  %res = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
861  ret <16 x i8> %res
862}
863declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
864
865
866define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
867; SSE-LABEL: test_x86_sse2_padds_w:
868; SSE:       ## %bb.0:
869; SSE-NEXT:    paddsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xed,0xc1]
870; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
871;
872; AVX1-LABEL: test_x86_sse2_padds_w:
873; AVX1:       ## %bb.0:
874; AVX1-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xed,0xc1]
875; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
876;
877; AVX512-LABEL: test_x86_sse2_padds_w:
878; AVX512:       ## %bb.0:
879; AVX512-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
880; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
881  %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
882  ret <8 x i16> %res
883}
884declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
885
886
887define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
888; SSE-LABEL: test_x86_sse2_paddus_b:
889; SSE:       ## %bb.0:
890; SSE-NEXT:    paddusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdc,0xc1]
891; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
892;
893; AVX1-LABEL: test_x86_sse2_paddus_b:
894; AVX1:       ## %bb.0:
895; AVX1-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdc,0xc1]
896; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
897;
898; AVX512-LABEL: test_x86_sse2_paddus_b:
899; AVX512:       ## %bb.0:
900; AVX512-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
901; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
902; AVX2-LABEL: test_x86_sse2_paddus_b:
903; AVX2:       ## %bb.0:
904; AVX2-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdc,0xc1]
905; AVX2-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
906; SKX-LABEL: test_x86_sse2_paddus_b:
907; SKX:       ## %bb.0:
908; SKX-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
909; SKX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
910  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
911  ret <16 x i8> %res
912}
913declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
914
915
916define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
917; SSE-LABEL: test_x86_sse2_paddus_w:
918; SSE:       ## %bb.0:
919; SSE-NEXT:    paddusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdd,0xc1]
920; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
921;
922; AVX1-LABEL: test_x86_sse2_paddus_w:
923; AVX1:       ## %bb.0:
924; AVX1-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1]
925; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
926;
927; AVX512-LABEL: test_x86_sse2_paddus_w:
928; AVX512:       ## %bb.0:
929; AVX512-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
930; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
931; AVX2-LABEL: test_x86_sse2_paddus_w:
932; AVX2:       ## %bb.0:
933; AVX2-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1]
934; AVX2-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
935; SKX-LABEL: test_x86_sse2_paddus_w:
936; SKX:       ## %bb.0:
937; SKX-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
938; SKX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
939  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
940  ret <8 x i16> %res
941}
942declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
943
944
945define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
946; SSE-LABEL: test_x86_sse2_psubs_b:
947; SSE:       ## %bb.0:
948; SSE-NEXT:    psubsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe8,0xc1]
949; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
950;
951; AVX1-LABEL: test_x86_sse2_psubs_b:
952; AVX1:       ## %bb.0:
953; AVX1-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe8,0xc1]
954; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
955;
956; AVX512-LABEL: test_x86_sse2_psubs_b:
957; AVX512:       ## %bb.0:
958; AVX512-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
959; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
960  %res = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
961  ret <16 x i8> %res
962}
963declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
964
965
966define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
967; SSE-LABEL: test_x86_sse2_psubs_w:
968; SSE:       ## %bb.0:
969; SSE-NEXT:    psubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe9,0xc1]
970; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
971;
972; AVX1-LABEL: test_x86_sse2_psubs_w:
973; AVX1:       ## %bb.0:
974; AVX1-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe9,0xc1]
975; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
976;
977; AVX512-LABEL: test_x86_sse2_psubs_w:
978; AVX512:       ## %bb.0:
979; AVX512-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
980; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
981  %res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
982  ret <8 x i16> %res
983}
984declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
985
986
987define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
988; SSE-LABEL: test_x86_sse2_psubus_b:
989; SSE:       ## %bb.0:
990; SSE-NEXT:    psubusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd8,0xc1]
991; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
992;
993; AVX1-LABEL: test_x86_sse2_psubus_b:
994; AVX1:       ## %bb.0:
995; AVX1-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd8,0xc1]
996; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
997;
998; AVX512-LABEL: test_x86_sse2_psubus_b:
999; AVX512:       ## %bb.0:
1000; AVX512-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
1001; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1002; AVX2-LABEL: test_x86_sse2_psubus_b:
1003; AVX2:       ## %bb.0:
1004; AVX2-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd8,0xc1]
1005; AVX2-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1006; SKX-LABEL: test_x86_sse2_psubus_b:
1007; SKX:       ## %bb.0:
1008; SKX-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
1009; SKX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1010  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1011  ret <16 x i8> %res
1012}
1013declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
1014
1015
1016define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
1017; SSE-LABEL: test_x86_sse2_psubus_w:
1018; SSE:       ## %bb.0:
1019; SSE-NEXT:    psubusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd9,0xc1]
1020; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1021;
1022; AVX1-LABEL: test_x86_sse2_psubus_w:
1023; AVX1:       ## %bb.0:
1024; AVX1-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd9,0xc1]
1025; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1026;
1027; AVX512-LABEL: test_x86_sse2_psubus_w:
1028; AVX512:       ## %bb.0:
1029; AVX512-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
1030; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1031; AVX2-LABEL: test_x86_sse2_psubus_w:
1032; AVX2:       ## %bb.0:
1033; AVX2-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd9,0xc1]
1034; AVX2-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1035; SKX-LABEL: test_x86_sse2_psubus_w:
1036; SKX:       ## %bb.0:
1037; SKX-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
1038; SKX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1039  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1040  ret <8 x i16> %res
1041}
1042declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
1043