• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
4; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
5; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
7; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
8
9
10define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
11; SSE-LABEL: test_x86_sse2_sqrt_pd:
12; SSE:       ## %bb.0:
13; SSE-NEXT:    sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0]
14; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
15;
16; AVX1-LABEL: test_x86_sse2_sqrt_pd:
17; AVX1:       ## %bb.0:
18; AVX1-NEXT:    vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0]
19; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
20;
21; AVX512-LABEL: test_x86_sse2_sqrt_pd:
22; AVX512:       ## %bb.0:
23; AVX512-NEXT:    vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0]
24; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
25  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
26  ret <2 x double> %res
27}
28declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
29
30
31define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
32; SSE-LABEL: test_x86_sse2_sqrt_sd:
33; SSE:       ## %bb.0:
34; SSE-NEXT:    sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
35; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
36;
37; AVX1-LABEL: test_x86_sse2_sqrt_sd:
38; AVX1:       ## %bb.0:
39; AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
40; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
41;
42; AVX512-LABEL: test_x86_sse2_sqrt_sd:
43; AVX512:       ## %bb.0:
44; AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
45; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
46  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
47  ret <2 x double> %res
48}
49declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
50
51
52define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) {
53; X86-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
54; X86-SSE:       ## %bb.0:
55; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
56; X86-SSE-NEXT:    movapd (%eax), %xmm0 ## encoding: [0x66,0x0f,0x28,0x00]
57; X86-SSE-NEXT:    sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
58; X86-SSE-NEXT:    retl ## encoding: [0xc3]
59;
60; X86-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
61; X86-AVX1:       ## %bb.0:
62; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
63; X86-AVX1-NEXT:    vmovapd (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x00]
64; X86-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
65; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
66;
67; X86-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
68; X86-AVX512:       ## %bb.0:
69; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
70; X86-AVX512-NEXT:    vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
71; X86-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
72; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
73;
74; X64-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
75; X64-SSE:       ## %bb.0:
76; X64-SSE-NEXT:    movapd (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x28,0x07]
77; X64-SSE-NEXT:    sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
78; X64-SSE-NEXT:    retq ## encoding: [0xc3]
79;
80; X64-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
81; X64-AVX1:       ## %bb.0:
82; X64-AVX1-NEXT:    vmovapd (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x07]
83; X64-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
84; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
85;
86; X64-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
87; X64-AVX512:       ## %bb.0:
88; X64-AVX512-NEXT:    vmovapd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07]
89; X64-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
90; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
91  %a1 = load <2 x double>, <2 x double>* %a0, align 16
92  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1]
93  ret <2 x double> %res
94}
95
96
97define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
98; SSE-LABEL: test_x86_sse2_psll_dq_bs:
99; SSE:       ## %bb.0:
100; SSE-NEXT:    pslldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x07]
101; SSE-NEXT:    ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
102; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
103;
104; AVX1-LABEL: test_x86_sse2_psll_dq_bs:
105; AVX1:       ## %bb.0:
106; AVX1-NEXT:    vpslldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x07]
107; AVX1-NEXT:    ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
108; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
109;
110; AVX512-LABEL: test_x86_sse2_psll_dq_bs:
111; AVX512:       ## %bb.0:
112; AVX512-NEXT:    vpslldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x07]
113; AVX512-NEXT:    ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
114; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
115  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
116  ret <2 x i64> %res
117}
118declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
119
120
121define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
122; SSE-LABEL: test_x86_sse2_psrl_dq_bs:
123; SSE:       ## %bb.0:
124; SSE-NEXT:    psrldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x07]
125; SSE-NEXT:    ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
126; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
127;
128; AVX1-LABEL: test_x86_sse2_psrl_dq_bs:
129; AVX1:       ## %bb.0:
130; AVX1-NEXT:    vpsrldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x07]
131; AVX1-NEXT:    ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
132; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
133;
134; AVX512-LABEL: test_x86_sse2_psrl_dq_bs:
135; AVX512:       ## %bb.0:
136; AVX512-NEXT:    vpsrldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x07]
137; AVX512-NEXT:    ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
138; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
139  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
140  ret <2 x i64> %res
141}
142declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
143
144define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
145; SSE-LABEL: test_x86_sse2_psll_dq:
146; SSE:       ## %bb.0:
147; SSE-NEXT:    pslldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x01]
148; SSE-NEXT:    ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
149; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
150;
151; AVX1-LABEL: test_x86_sse2_psll_dq:
152; AVX1:       ## %bb.0:
153; AVX1-NEXT:    vpslldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x01]
154; AVX1-NEXT:    ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
155; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
156;
157; AVX512-LABEL: test_x86_sse2_psll_dq:
158; AVX512:       ## %bb.0:
159; AVX512-NEXT:    vpslldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01]
160; AVX512-NEXT:    ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
161; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
162  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
163  ret <2 x i64> %res
164}
165declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
166
167
168define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
169; SSE-LABEL: test_x86_sse2_psrl_dq:
170; SSE:       ## %bb.0:
171; SSE-NEXT:    psrldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x01]
172; SSE-NEXT:    ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
173; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
174;
175; AVX1-LABEL: test_x86_sse2_psrl_dq:
176; AVX1:       ## %bb.0:
177; AVX1-NEXT:    vpsrldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x01]
178; AVX1-NEXT:    ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
179; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
180;
181; AVX512-LABEL: test_x86_sse2_psrl_dq:
182; AVX512:       ## %bb.0:
183; AVX512-NEXT:    vpsrldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01]
184; AVX512-NEXT:    ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
185; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
186  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
187  ret <2 x i64> %res
188}
189declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
190
191
192define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
193; SSE-LABEL: test_x86_sse2_cvtdq2pd:
194; SSE:       ## %bb.0:
195; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0xe6,0xc0]
196; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
197;
198; AVX1-LABEL: test_x86_sse2_cvtdq2pd:
199; AVX1:       ## %bb.0:
200; AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0xe6,0xc0]
201; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
202;
203; AVX512-LABEL: test_x86_sse2_cvtdq2pd:
204; AVX512:       ## %bb.0:
205; AVX512-NEXT:    vcvtdq2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
206; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
207  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
208  ret <2 x double> %res
209}
210declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
211
212
213define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
214; SSE-LABEL: test_x86_sse2_cvtps2pd:
215; SSE:       ## %bb.0:
216; SSE-NEXT:    cvtps2pd %xmm0, %xmm0 ## encoding: [0x0f,0x5a,0xc0]
217; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
218;
219; AVX1-LABEL: test_x86_sse2_cvtps2pd:
220; AVX1:       ## %bb.0:
221; AVX1-NEXT:    vcvtps2pd %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5a,0xc0]
222; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
223;
224; AVX512-LABEL: test_x86_sse2_cvtps2pd:
225; AVX512:       ## %bb.0:
226; AVX512-NEXT:    vcvtps2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
227; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
228  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
229  ret <2 x double> %res
230}
231declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
232
233
234define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
235; X86-SSE-LABEL: test_x86_sse2_storel_dq:
236; X86-SSE:       ## %bb.0:
237; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
238; X86-SSE-NEXT:    movlps %xmm0, (%eax) ## encoding: [0x0f,0x13,0x00]
239; X86-SSE-NEXT:    retl ## encoding: [0xc3]
240;
241; X86-AVX1-LABEL: test_x86_sse2_storel_dq:
242; X86-AVX1:       ## %bb.0:
243; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
244; X86-AVX1-NEXT:    vmovlps %xmm0, (%eax) ## encoding: [0xc5,0xf8,0x13,0x00]
245; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
246;
247; X86-AVX512-LABEL: test_x86_sse2_storel_dq:
248; X86-AVX512:       ## %bb.0:
249; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
250; X86-AVX512-NEXT:    vmovlps %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
251; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
252;
253; X64-SSE-LABEL: test_x86_sse2_storel_dq:
254; X64-SSE:       ## %bb.0:
255; X64-SSE-NEXT:    movlps %xmm0, (%rdi) ## encoding: [0x0f,0x13,0x07]
256; X64-SSE-NEXT:    retq ## encoding: [0xc3]
257;
258; X64-AVX1-LABEL: test_x86_sse2_storel_dq:
259; X64-AVX1:       ## %bb.0:
260; X64-AVX1-NEXT:    vmovlps %xmm0, (%rdi) ## encoding: [0xc5,0xf8,0x13,0x07]
261; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
262;
263; X64-AVX512-LABEL: test_x86_sse2_storel_dq:
264; X64-AVX512:       ## %bb.0:
265; X64-AVX512-NEXT:    vmovlps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
266; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
267  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
268  ret void
269}
270declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
271
272
273define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
274  ; add operation forces the execution domain.
275; X86-SSE-LABEL: test_x86_sse2_storeu_dq:
276; X86-SSE:       ## %bb.0:
277; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
278; X86-SSE-NEXT:    pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9]
279; X86-SSE-NEXT:    psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1]
280; X86-SSE-NEXT:    movdqu %xmm0, (%eax) ## encoding: [0xf3,0x0f,0x7f,0x00]
281; X86-SSE-NEXT:    retl ## encoding: [0xc3]
282;
283; X86-AVX1-LABEL: test_x86_sse2_storeu_dq:
284; X86-AVX1:       ## %bb.0:
285; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
286; X86-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
287; X86-AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1]
288; X86-AVX1-NEXT:    vmovdqu %xmm0, (%eax) ## encoding: [0xc5,0xfa,0x7f,0x00]
289; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
290;
291; X86-AVX512-LABEL: test_x86_sse2_storeu_dq:
292; X86-AVX512:       ## %bb.0:
293; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
294; X86-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
295; X86-AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
296; X86-AVX512-NEXT:    vmovdqu %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
297; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
298;
299; X64-SSE-LABEL: test_x86_sse2_storeu_dq:
300; X64-SSE:       ## %bb.0:
301; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9]
302; X64-SSE-NEXT:    psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1]
303; X64-SSE-NEXT:    movdqu %xmm0, (%rdi) ## encoding: [0xf3,0x0f,0x7f,0x07]
304; X64-SSE-NEXT:    retq ## encoding: [0xc3]
305;
306; X64-AVX1-LABEL: test_x86_sse2_storeu_dq:
307; X64-AVX1:       ## %bb.0:
308; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
309; X64-AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1]
310; X64-AVX1-NEXT:    vmovdqu %xmm0, (%rdi) ## encoding: [0xc5,0xfa,0x7f,0x07]
311; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
312;
313; X64-AVX512-LABEL: test_x86_sse2_storeu_dq:
314; X64-AVX512:       ## %bb.0:
315; X64-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
316; X64-AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
317; X64-AVX512-NEXT:    vmovdqu %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07]
318; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
319  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
320  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
321  ret void
322}
323declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
324
325
326define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
327  ; fadd operation forces the execution domain.
328; X86-SSE-LABEL: test_x86_sse2_storeu_pd:
329; X86-SSE:       ## %bb.0:
330; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
331; X86-SSE-NEXT:    xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9]
332; X86-SSE-NEXT:    movhpd LCPI11_0, %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A]
333; X86-SSE-NEXT:    ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
334; X86-SSE-NEXT:    ## xmm1 = xmm1[0],mem[0]
335; X86-SSE-NEXT:    addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8]
336; X86-SSE-NEXT:    movupd %xmm1, (%eax) ## encoding: [0x66,0x0f,0x11,0x08]
337; X86-SSE-NEXT:    retl ## encoding: [0xc3]
338;
339; X86-AVX1-LABEL: test_x86_sse2_storeu_pd:
340; X86-AVX1:       ## %bb.0:
341; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
342; X86-AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
343; X86-AVX1-NEXT:    vmovhpd LCPI11_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
344; X86-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
345; X86-AVX1-NEXT:    ## xmm1 = xmm1[0],mem[0]
346; X86-AVX1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1]
347; X86-AVX1-NEXT:    vmovupd %xmm0, (%eax) ## encoding: [0xc5,0xf9,0x11,0x00]
348; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
349;
350; X86-AVX512-LABEL: test_x86_sse2_storeu_pd:
351; X86-AVX512:       ## %bb.0:
352; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
353; X86-AVX512-NEXT:    vmovsd LCPI11_0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
354; X86-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
355; X86-AVX512-NEXT:    ## xmm1 = mem[0],zero
356; X86-AVX512-NEXT:    vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
357; X86-AVX512-NEXT:    ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
358; X86-AVX512-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
359; X86-AVX512-NEXT:    vmovupd %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
360; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
361;
362; X64-SSE-LABEL: test_x86_sse2_storeu_pd:
363; X64-SSE:       ## %bb.0:
364; X64-SSE-NEXT:    xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9]
365; X64-SSE-NEXT:    movhpd {{.*}}(%rip), %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A]
366; X64-SSE-NEXT:    ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
367; X64-SSE-NEXT:    ## xmm1 = xmm1[0],mem[0]
368; X64-SSE-NEXT:    addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8]
369; X64-SSE-NEXT:    movupd %xmm1, (%rdi) ## encoding: [0x66,0x0f,0x11,0x0f]
370; X64-SSE-NEXT:    retq ## encoding: [0xc3]
371;
372; X64-AVX1-LABEL: test_x86_sse2_storeu_pd:
373; X64-AVX1:       ## %bb.0:
374; X64-AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
375; X64-AVX1-NEXT:    vmovhpd {{.*}}(%rip), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
376; X64-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
377; X64-AVX1-NEXT:    ## xmm1 = xmm1[0],mem[0]
378; X64-AVX1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1]
379; X64-AVX1-NEXT:    vmovupd %xmm0, (%rdi) ## encoding: [0xc5,0xf9,0x11,0x07]
380; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
381;
382; X64-AVX512-LABEL: test_x86_sse2_storeu_pd:
383; X64-AVX512:       ## %bb.0:
384; X64-AVX512-NEXT:    vmovsd {{.*}}(%rip), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
385; X64-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
386; X64-AVX512-NEXT:    ## xmm1 = mem[0],zero
387; X64-AVX512-NEXT:    vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
388; X64-AVX512-NEXT:    ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
389; X64-AVX512-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
390; X64-AVX512-NEXT:    vmovupd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07]
391; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
392  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
393  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
394  ret void
395}
396declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
397
398define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
399; SSE-LABEL: test_x86_sse2_pshuf_d:
400; SSE:       ## %bb.0: ## %entry
401; SSE-NEXT:    pshufd $27, %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x70,0xc0,0x1b]
402; SSE-NEXT:    ## xmm0 = xmm0[3,2,1,0]
403; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
404;
405; AVX1-LABEL: test_x86_sse2_pshuf_d:
406; AVX1:       ## %bb.0: ## %entry
407; AVX1-NEXT:    vpermilps $27, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
408; AVX1-NEXT:    ## xmm0 = xmm0[3,2,1,0]
409; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
410;
411; AVX512-LABEL: test_x86_sse2_pshuf_d:
412; AVX512:       ## %bb.0: ## %entry
413; AVX512-NEXT:    vpermilps $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
414; AVX512-NEXT:    ## xmm0 = xmm0[3,2,1,0]
415; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
416entry:
417  %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
418  ret <4 x i32> %res
419}
420declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
421
422define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
423; SSE-LABEL: test_x86_sse2_pshufl_w:
424; SSE:       ## %bb.0: ## %entry
425; SSE-NEXT:    pshuflw $27, %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x70,0xc0,0x1b]
426; SSE-NEXT:    ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
427; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
428;
429; AVX1-LABEL: test_x86_sse2_pshufl_w:
430; AVX1:       ## %bb.0: ## %entry
431; AVX1-NEXT:    vpshuflw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x70,0xc0,0x1b]
432; AVX1-NEXT:    ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
433; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
434;
435; AVX512-LABEL: test_x86_sse2_pshufl_w:
436; AVX512:       ## %bb.0: ## %entry
437; AVX512-NEXT:    vpshuflw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x1b]
438; AVX512-NEXT:    ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
439; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
440entry:
441  %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
442  ret <8 x i16> %res
443}
444declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
445
446define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
447; SSE-LABEL: test_x86_sse2_pshufh_w:
448; SSE:       ## %bb.0: ## %entry
449; SSE-NEXT:    pshufhw $27, %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x70,0xc0,0x1b]
450; SSE-NEXT:    ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
451; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
452;
453; AVX1-LABEL: test_x86_sse2_pshufh_w:
454; AVX1:       ## %bb.0: ## %entry
455; AVX1-NEXT:    vpshufhw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x70,0xc0,0x1b]
456; AVX1-NEXT:    ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
457; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
458;
459; AVX512-LABEL: test_x86_sse2_pshufh_w:
460; AVX512:       ## %bb.0: ## %entry
461; AVX512-NEXT:    vpshufhw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x1b]
462; AVX512-NEXT:    ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
463; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
464entry:
465  %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
466  ret <8 x i16> %res
467}
468declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
469
470define <16 x i8> @max_epu8(<16 x i8> %a0, <16 x i8> %a1) {
471; SSE-LABEL: max_epu8:
472; SSE:       ## %bb.0:
473; SSE-NEXT:    pmaxub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xde,0xc1]
474; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
475;
476; AVX1-LABEL: max_epu8:
477; AVX1:       ## %bb.0:
478; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xde,0xc1]
479; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
480;
481; AVX512-LABEL: max_epu8:
482; AVX512:       ## %bb.0:
483; AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1]
484; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
485  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
486  ret <16 x i8> %res
487}
488declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
489
490define <16 x i8> @min_epu8(<16 x i8> %a0, <16 x i8> %a1) {
491; SSE-LABEL: min_epu8:
492; SSE:       ## %bb.0:
493; SSE-NEXT:    pminub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xda,0xc1]
494; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
495;
496; AVX1-LABEL: min_epu8:
497; AVX1:       ## %bb.0:
498; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xda,0xc1]
499; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
500;
501; AVX512-LABEL: min_epu8:
502; AVX512:       ## %bb.0:
503; AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1]
504; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
505  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
506  ret <16 x i8> %res
507}
508declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
509
510define <8 x i16> @max_epi16(<8 x i16> %a0, <8 x i16> %a1) {
511; SSE-LABEL: max_epi16:
512; SSE:       ## %bb.0:
513; SSE-NEXT:    pmaxsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xee,0xc1]
514; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
515;
516; AVX1-LABEL: max_epi16:
517; AVX1:       ## %bb.0:
518; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xee,0xc1]
519; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
520;
521; AVX512-LABEL: max_epi16:
522; AVX512:       ## %bb.0:
523; AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
524; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
525  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
526  ret <8 x i16> %res
527}
528declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
529
530define <8 x i16> @min_epi16(<8 x i16> %a0, <8 x i16> %a1) {
531; SSE-LABEL: min_epi16:
532; SSE:       ## %bb.0:
533; SSE-NEXT:    pminsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xea,0xc1]
534; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
535;
536; AVX1-LABEL: min_epi16:
537; AVX1:       ## %bb.0:
538; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xea,0xc1]
539; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
540;
541; AVX512-LABEL: min_epi16:
542; AVX512:       ## %bb.0:
543; AVX512-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
544; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
545  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
546  ret <8 x i16> %res
547}
548declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
549
550define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
551; SSE-LABEL: test_x86_sse2_add_sd:
552; SSE:       ## %bb.0:
553; SSE-NEXT:    addsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x58,0xc1]
554; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
555;
556; AVX1-LABEL: test_x86_sse2_add_sd:
557; AVX1:       ## %bb.0:
558; AVX1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1]
559; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
560;
561; AVX512-LABEL: test_x86_sse2_add_sd:
562; AVX512:       ## %bb.0:
563; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1]
564; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
565  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
566  ret <2 x double> %res
567}
568declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
569
570
571define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
572; SSE-LABEL: test_x86_sse2_sub_sd:
573; SSE:       ## %bb.0:
574; SSE-NEXT:    subsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5c,0xc1]
575; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
576;
577; AVX1-LABEL: test_x86_sse2_sub_sd:
578; AVX1:       ## %bb.0:
579; AVX1-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1]
580; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
581;
582; AVX512-LABEL: test_x86_sse2_sub_sd:
583; AVX512:       ## %bb.0:
584; AVX512-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1]
585; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
586  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
587  ret <2 x double> %res
588}
589declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
590
591
592define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
593; SSE-LABEL: test_x86_sse2_mul_sd:
594; SSE:       ## %bb.0:
595; SSE-NEXT:    mulsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x59,0xc1]
596; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
597;
598; AVX1-LABEL: test_x86_sse2_mul_sd:
599; AVX1:       ## %bb.0:
600; AVX1-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1]
601; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
602;
603; AVX512-LABEL: test_x86_sse2_mul_sd:
604; AVX512:       ## %bb.0:
605; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1]
606; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
607  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
608  ret <2 x double> %res
609}
610declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
611
612
613define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
614; SSE-LABEL: test_x86_sse2_div_sd:
615; SSE:       ## %bb.0:
616; SSE-NEXT:    divsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5e,0xc1]
617; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
618;
619; AVX1-LABEL: test_x86_sse2_div_sd:
620; AVX1:       ## %bb.0:
621; AVX1-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1]
622; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
623;
624; AVX512-LABEL: test_x86_sse2_div_sd:
625; AVX512:       ## %bb.0:
626; AVX512-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1]
627; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
628  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
629  ret <2 x double> %res
630}
631declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
632
633define <16 x i8> @mm_avg_epu8(<16 x i8> %a0, <16 x i8> %a1) {
634; SSE-LABEL: mm_avg_epu8:
635; SSE:       ## %bb.0:
636; SSE-NEXT:    pavgb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe0,0xc1]
637; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
638;
639; AVX1-LABEL: mm_avg_epu8:
640; AVX1:       ## %bb.0:
641; AVX1-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe0,0xc1]
642; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
643;
644; AVX512-LABEL: mm_avg_epu8:
645; AVX512:       ## %bb.0:
646; AVX512-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
647; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
648  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
649  ret <16 x i8> %res
650}
651declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
652
653define <8 x i16> @mm_avg_epu16(<8 x i16> %a0, <8 x i16> %a1) {
654; SSE-LABEL: mm_avg_epu16:
655; SSE:       ## %bb.0:
656; SSE-NEXT:    pavgw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe3,0xc1]
657; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
658;
659; AVX1-LABEL: mm_avg_epu16:
660; AVX1:       ## %bb.0:
661; AVX1-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe3,0xc1]
662; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
663;
664; AVX512-LABEL: mm_avg_epu16:
665; AVX512:       ## %bb.0:
666; AVX512-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
667; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
668  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
669  ret <8 x i16> %res
670}
671declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
672
673
674define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
675; SSE-LABEL: test_x86_sse2_pmulu_dq:
676; SSE:       ## %bb.0:
677; SSE-NEXT:    pmuludq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf4,0xc1]
678; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
679;
680; AVX1-LABEL: test_x86_sse2_pmulu_dq:
681; AVX1:       ## %bb.0:
682; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1]
683; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
684;
685; AVX512-LABEL: test_x86_sse2_pmulu_dq:
686; AVX512:       ## %bb.0:
687; AVX512-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
688; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
689  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
690  ret <2 x i64> %res
691}
692declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
693
694
695define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) {
696; X86-SSE-LABEL: test_x86_sse2_cvtsi2sd:
697; X86-SSE:       ## %bb.0:
698; X86-SSE-NEXT:    cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04]
699; X86-SSE-NEXT:    retl ## encoding: [0xc3]
700;
701; X86-AVX1-LABEL: test_x86_sse2_cvtsi2sd:
702; X86-AVX1:       ## %bb.0:
703; X86-AVX1-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
704; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
705;
706; X86-AVX512-LABEL: test_x86_sse2_cvtsi2sd:
707; X86-AVX512:       ## %bb.0:
708; X86-AVX512-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
709; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
710;
711; X64-SSE-LABEL: test_x86_sse2_cvtsi2sd:
712; X64-SSE:       ## %bb.0:
713; X64-SSE-NEXT:    cvtsi2sdl %edi, %xmm0 ## encoding: [0xf2,0x0f,0x2a,0xc7]
714; X64-SSE-NEXT:    retq ## encoding: [0xc3]
715;
716; X64-AVX1-LABEL: test_x86_sse2_cvtsi2sd:
717; X64-AVX1:       ## %bb.0:
718; X64-AVX1-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0xc7]
719; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
720;
721; X64-AVX512-LABEL: test_x86_sse2_cvtsi2sd:
722; X64-AVX512:       ## %bb.0:
723; X64-AVX512-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
724; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
725  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1]
726  ret <2 x double> %res
727}
728declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
729
730
731define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
732; SSE-LABEL: test_x86_sse2_cvtss2sd:
733; SSE:       ## %bb.0:
734; SSE-NEXT:    cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1]
735; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
736;
737; AVX1-LABEL: test_x86_sse2_cvtss2sd:
738; AVX1:       ## %bb.0:
739; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0xc1]
740; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
741;
742; AVX512-LABEL: test_x86_sse2_cvtss2sd:
743; AVX512:       ## %bb.0:
744; AVX512-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1]
745; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
746  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
747  ret <2 x double> %res
748}
749declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
750
751
752define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) {
753; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
754; X86-SSE:       ## %bb.0:
755; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
756; X86-SSE-NEXT:    movss (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x08]
757; X86-SSE-NEXT:    ## xmm1 = mem[0],zero,zero,zero
758; X86-SSE-NEXT:    cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
759; X86-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
760; X86-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
761; X86-SSE-NEXT:    retl ## encoding: [0xc3]
762;
763; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
764; X86-AVX1:       ## %bb.0:
765; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
766; X86-AVX1-NEXT:    vmovss (%eax), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x08]
767; X86-AVX1-NEXT:    ## xmm1 = mem[0],zero,zero,zero
768; X86-AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
769; X86-AVX1-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
770; X86-AVX1-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
771; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
772;
773; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
774; X86-AVX512:       ## %bb.0:
775; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
776; X86-AVX512-NEXT:    vmovss (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x08]
777; X86-AVX512-NEXT:    ## xmm1 = mem[0],zero,zero,zero
778; X86-AVX512-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
779; X86-AVX512-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
780; X86-AVX512-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
781; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
782;
783; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
784; X64-SSE:       ## %bb.0:
785; X64-SSE-NEXT:    movss (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x0f]
786; X64-SSE-NEXT:    ## xmm1 = mem[0],zero,zero,zero
787; X64-SSE-NEXT:    cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
788; X64-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
789; X64-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
790; X64-SSE-NEXT:    retq ## encoding: [0xc3]
791;
792; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
793; X64-AVX1:       ## %bb.0:
794; X64-AVX1-NEXT:    vmovss (%rdi), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x0f]
795; X64-AVX1-NEXT:    ## xmm1 = mem[0],zero,zero,zero
796; X64-AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
797; X64-AVX1-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
798; X64-AVX1-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
799; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
800;
801; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
802; X64-AVX512:       ## %bb.0:
803; X64-AVX512-NEXT:    vmovss (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0f]
804; X64-AVX512-NEXT:    ## xmm1 = mem[0],zero,zero,zero
805; X64-AVX512-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
806; X64-AVX512-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
807; X64-AVX512-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
808; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
809  %a1 = load <4 x float>, <4 x float>* %p1
810  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
811  ret <2 x double> %res
812}
813
814
815define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, <4 x float>* %p1) optsize {
816; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
817; X86-SSE:       ## %bb.0:
818; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
819; X86-SSE-NEXT:    cvtss2sd (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x08]
820; X86-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
821; X86-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
822; X86-SSE-NEXT:    retl ## encoding: [0xc3]
823;
824; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
825; X86-AVX1:       ## %bb.0:
826; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
827; X86-AVX1-NEXT:    vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08]
828; X86-AVX1-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
829; X86-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
830; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
831;
832; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
833; X86-AVX512:       ## %bb.0:
834; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
835; X86-AVX512-NEXT:    vcvtss2sd (%eax), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x08]
836; X86-AVX512-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
837; X86-AVX512-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
838; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
839;
840; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
841; X64-SSE:       ## %bb.0:
842; X64-SSE-NEXT:    cvtss2sd (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x0f]
843; X64-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
844; X64-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
845; X64-SSE-NEXT:    retq ## encoding: [0xc3]
846;
847; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
848; X64-AVX1:       ## %bb.0:
849; X64-AVX1-NEXT:    vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f]
850; X64-AVX1-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
851; X64-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
852; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
853;
854; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
855; X64-AVX512:       ## %bb.0:
856; X64-AVX512-NEXT:    vcvtss2sd (%rdi), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x0f]
857; X64-AVX512-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
858; X64-AVX512-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
859; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
860  %a1 = load <4 x float>, <4 x float>* %p1
861  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
862  ret <2 x double> %res
863}
864
865
866define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
867; SSE-LABEL: test_x86_sse2_cvtdq2ps:
868; SSE:       ## %bb.0:
869; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0 ## encoding: [0x0f,0x5b,0xc0]
870; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
871;
872; AVX1-LABEL: test_x86_sse2_cvtdq2ps:
873; AVX1:       ## %bb.0:
874; AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0]
875; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
876;
877; AVX512-LABEL: test_x86_sse2_cvtdq2ps:
878; AVX512:       ## %bb.0:
879; AVX512-NEXT:    vcvtdq2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0]
880; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
881  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
882  ret <4 x float> %res
883}
884declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
885