• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
4; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
5; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
7; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
8
9define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
10; SSE-LABEL: test_x86_sse2_cmp_pd:
11; SSE:       ## %bb.0:
12; SSE-NEXT:    cmpordpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xc2,0xc1,0x07]
13; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
14;
15; AVX-LABEL: test_x86_sse2_cmp_pd:
16; AVX:       ## %bb.0:
17; AVX-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc2,0xc1,0x07]
18; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
19  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
20  ret <2 x double> %res
21}
22declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
23
24
25define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
26; SSE-LABEL: test_x86_sse2_cmp_sd:
27; SSE:       ## %bb.0:
28; SSE-NEXT:    cmpordsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0xc2,0xc1,0x07]
29; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
30;
31; AVX-LABEL: test_x86_sse2_cmp_sd:
32; AVX:       ## %bb.0:
33; AVX-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xc2,0xc1,0x07]
34; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
35  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
36  ret <2 x double> %res
37}
38declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
39
40
41define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
42; SSE-LABEL: test_x86_sse2_comieq_sd:
43; SSE:       ## %bb.0:
44; SSE-NEXT:    comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
45; SSE-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
46; SSE-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
47; SSE-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
48; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
49; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
50;
51; AVX1-LABEL: test_x86_sse2_comieq_sd:
52; AVX1:       ## %bb.0:
53; AVX1-NEXT:    vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
54; AVX1-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
55; AVX1-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
56; AVX1-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
57; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
58; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
59;
60; AVX512-LABEL: test_x86_sse2_comieq_sd:
61; AVX512:       ## %bb.0:
62; AVX512-NEXT:    vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
63; AVX512-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
64; AVX512-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
65; AVX512-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
66; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
67; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
68  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
69  ret i32 %res
70}
71declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
72
73
74define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
75; SSE-LABEL: test_x86_sse2_comige_sd:
76; SSE:       ## %bb.0:
77; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
78; SSE-NEXT:    comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
79; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
80; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
81;
82; AVX1-LABEL: test_x86_sse2_comige_sd:
83; AVX1:       ## %bb.0:
84; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
85; AVX1-NEXT:    vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
86; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
87; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
88;
89; AVX512-LABEL: test_x86_sse2_comige_sd:
90; AVX512:       ## %bb.0:
91; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
92; AVX512-NEXT:    vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
93; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
94; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
95  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
96  ret i32 %res
97}
98declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
99
100
101define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
102; SSE-LABEL: test_x86_sse2_comigt_sd:
103; SSE:       ## %bb.0:
104; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
105; SSE-NEXT:    comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
106; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
107; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
108;
109; AVX1-LABEL: test_x86_sse2_comigt_sd:
110; AVX1:       ## %bb.0:
111; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
112; AVX1-NEXT:    vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
113; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
114; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
115;
116; AVX512-LABEL: test_x86_sse2_comigt_sd:
117; AVX512:       ## %bb.0:
118; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
119; AVX512-NEXT:    vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
120; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
121; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
122  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
123  ret i32 %res
124}
125declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
126
127
128define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
129; SSE-LABEL: test_x86_sse2_comile_sd:
130; SSE:       ## %bb.0:
131; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
132; SSE-NEXT:    comisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2f,0xc8]
133; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
134; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
135;
136; AVX1-LABEL: test_x86_sse2_comile_sd:
137; AVX1:       ## %bb.0:
138; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
139; AVX1-NEXT:    vcomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2f,0xc8]
140; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
141; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
142;
143; AVX512-LABEL: test_x86_sse2_comile_sd:
144; AVX512:       ## %bb.0:
145; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
146; AVX512-NEXT:    vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
147; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
148; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
149  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
150  ret i32 %res
151}
152declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
153
154
155define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
156; SSE-LABEL: test_x86_sse2_comilt_sd:
157; SSE:       ## %bb.0:
158; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
159; SSE-NEXT:    comisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2f,0xc8]
160; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
161; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
162;
163; AVX1-LABEL: test_x86_sse2_comilt_sd:
164; AVX1:       ## %bb.0:
165; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
166; AVX1-NEXT:    vcomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2f,0xc8]
167; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
168; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
169;
170; AVX512-LABEL: test_x86_sse2_comilt_sd:
171; AVX512:       ## %bb.0:
172; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
173; AVX512-NEXT:    vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
174; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
175; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
176  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
177  ret i32 %res
178}
179declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
180
181
182define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
183; SSE-LABEL: test_x86_sse2_comineq_sd:
184; SSE:       ## %bb.0:
185; SSE-NEXT:    comisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2f,0xc1]
186; SSE-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
187; SSE-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
188; SSE-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
189; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
190; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
191;
192; AVX1-LABEL: test_x86_sse2_comineq_sd:
193; AVX1:       ## %bb.0:
194; AVX1-NEXT:    vcomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2f,0xc1]
195; AVX1-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
196; AVX1-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
197; AVX1-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
198; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
199; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
200;
201; AVX512-LABEL: test_x86_sse2_comineq_sd:
202; AVX512:       ## %bb.0:
203; AVX512-NEXT:    vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
204; AVX512-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
205; AVX512-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
206; AVX512-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
207; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
208; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
209  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
210  ret i32 %res
211}
212declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
213
214
215define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
216; SSE-LABEL: test_x86_sse2_cvtpd2dq:
217; SSE:       ## %bb.0:
218; SSE-NEXT:    cvtpd2dq %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0xe6,0xc0]
219; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
220;
221; AVX1-LABEL: test_x86_sse2_cvtpd2dq:
222; AVX1:       ## %bb.0:
223; AVX1-NEXT:    vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0]
224; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
225;
226; AVX512-LABEL: test_x86_sse2_cvtpd2dq:
227; AVX512:       ## %bb.0:
228; AVX512-NEXT:    vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
229; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
230  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
231  ret <4 x i32> %res
232}
233declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
234
235
236define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind {
237; SSE-LABEL: test_mm_cvtpd_epi32_zext:
238; SSE:       ## %bb.0:
239; SSE-NEXT:    cvtpd2dq %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0xe6,0xc0]
240; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
241;
242; AVX1-LABEL: test_mm_cvtpd_epi32_zext:
243; AVX1:       ## %bb.0:
244; AVX1-NEXT:    vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0]
245; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
246;
247; AVX512-LABEL: test_mm_cvtpd_epi32_zext:
248; AVX512:       ## %bb.0:
249; AVX512-NEXT:    vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
250; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
251  %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
252  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
253  %bc = bitcast <4 x i32> %res to <2 x i64>
254  ret <2 x i64> %bc
255}
256
257
258define <2 x i64> @test_mm_cvtpd_epi32_zext_load(<2 x double>* %p0) nounwind {
259; X86-SSE-LABEL: test_mm_cvtpd_epi32_zext_load:
260; X86-SSE:       ## %bb.0:
261; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
262; X86-SSE-NEXT:    cvtpd2dq (%eax), %xmm0 ## encoding: [0xf2,0x0f,0xe6,0x00]
263; X86-SSE-NEXT:    retl ## encoding: [0xc3]
264;
265; X86-AVX1-LABEL: test_mm_cvtpd_epi32_zext_load:
266; X86-AVX1:       ## %bb.0:
267; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
268; X86-AVX1-NEXT:    vcvtpd2dqx (%eax), %xmm0 ## encoding: [0xc5,0xfb,0xe6,0x00]
269; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
270;
271; X86-AVX512-LABEL: test_mm_cvtpd_epi32_zext_load:
272; X86-AVX512:       ## %bb.0:
273; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
274; X86-AVX512-NEXT:    vcvtpd2dqx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0x00]
275; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
276;
277; X64-SSE-LABEL: test_mm_cvtpd_epi32_zext_load:
278; X64-SSE:       ## %bb.0:
279; X64-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0xe6,0x07]
280; X64-SSE-NEXT:    retq ## encoding: [0xc3]
281;
282; X64-AVX1-LABEL: test_mm_cvtpd_epi32_zext_load:
283; X64-AVX1:       ## %bb.0:
284; X64-AVX1-NEXT:    vcvtpd2dqx (%rdi), %xmm0 ## encoding: [0xc5,0xfb,0xe6,0x07]
285; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
286;
287; X64-AVX512-LABEL: test_mm_cvtpd_epi32_zext_load:
288; X64-AVX512:       ## %bb.0:
289; X64-AVX512-NEXT:    vcvtpd2dqx (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0x07]
290; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
291  %a0 = load <2 x double>, <2 x double>* %p0
292  %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
293  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
294  %bc = bitcast <4 x i32> %res to <2 x i64>
295  ret <2 x i64> %bc
296}
297
298
299define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
300; SSE-LABEL: test_x86_sse2_cvtpd2ps:
301; SSE:       ## %bb.0:
302; SSE-NEXT:    cvtpd2ps %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x5a,0xc0]
303; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
304;
305; AVX1-LABEL: test_x86_sse2_cvtpd2ps:
306; AVX1:       ## %bb.0:
307; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5a,0xc0]
308; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
309;
310; AVX512-LABEL: test_x86_sse2_cvtpd2ps:
311; AVX512:       ## %bb.0:
312; AVX512-NEXT:    vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
313; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
314  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
315  ret <4 x float> %res
316}
317declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
318
319define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind {
320; SSE-LABEL: test_x86_sse2_cvtpd2ps_zext:
321; SSE:       ## %bb.0:
322; SSE-NEXT:    cvtpd2ps %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x5a,0xc0]
323; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
324;
325; AVX1-LABEL: test_x86_sse2_cvtpd2ps_zext:
326; AVX1:       ## %bb.0:
327; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5a,0xc0]
328; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
329;
330; AVX512-LABEL: test_x86_sse2_cvtpd2ps_zext:
331; AVX512:       ## %bb.0:
332; AVX512-NEXT:    vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
333; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
334  %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
335  %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
336  ret <4 x float> %res
337}
338
339define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(<2 x double>* %p0) nounwind {
340; X86-SSE-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
341; X86-SSE:       ## %bb.0:
342; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
343; X86-SSE-NEXT:    cvtpd2ps (%eax), %xmm0 ## encoding: [0x66,0x0f,0x5a,0x00]
344; X86-SSE-NEXT:    retl ## encoding: [0xc3]
345;
346; X86-AVX1-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
347; X86-AVX1:       ## %bb.0:
348; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
349; X86-AVX1-NEXT:    vcvtpd2psx (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x5a,0x00]
350; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
351;
352; X86-AVX512-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
353; X86-AVX512:       ## %bb.0:
354; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
355; X86-AVX512-NEXT:    vcvtpd2psx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0x00]
356; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
357;
358; X64-SSE-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
359; X64-SSE:       ## %bb.0:
360; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x5a,0x07]
361; X64-SSE-NEXT:    retq ## encoding: [0xc3]
362;
363; X64-AVX1-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
364; X64-AVX1:       ## %bb.0:
365; X64-AVX1-NEXT:    vcvtpd2psx (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x5a,0x07]
366; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
367;
368; X64-AVX512-LABEL: test_x86_sse2_cvtpd2ps_zext_load:
369; X64-AVX512:       ## %bb.0:
370; X64-AVX512-NEXT:    vcvtpd2psx (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0x07]
371; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
372  %a0 = load <2 x double>, <2 x double>* %p0
373  %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
374  %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
375  ret <4 x float> %res
376}
377
378define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
379; SSE-LABEL: test_x86_sse2_cvtps2dq:
380; SSE:       ## %bb.0:
381; SSE-NEXT:    cvtps2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x5b,0xc0]
382; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
383;
384; AVX1-LABEL: test_x86_sse2_cvtps2dq:
385; AVX1:       ## %bb.0:
386; AVX1-NEXT:    vcvtps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5b,0xc0]
387; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
388;
389; AVX512-LABEL: test_x86_sse2_cvtps2dq:
390; AVX512:       ## %bb.0:
391; AVX512-NEXT:    vcvtps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
392; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
393  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
394  ret <4 x i32> %res
395}
396declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
397
398
399define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
400; SSE-LABEL: test_x86_sse2_cvtsd2si:
401; SSE:       ## %bb.0:
402; SSE-NEXT:    cvtsd2si %xmm0, %eax ## encoding: [0xf2,0x0f,0x2d,0xc0]
403; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
404;
405; AVX1-LABEL: test_x86_sse2_cvtsd2si:
406; AVX1:       ## %bb.0:
407; AVX1-NEXT:    vcvtsd2si %xmm0, %eax ## encoding: [0xc5,0xfb,0x2d,0xc0]
408; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
409;
410; AVX512-LABEL: test_x86_sse2_cvtsd2si:
411; AVX512:       ## %bb.0:
412; AVX512-NEXT:    vcvtsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0]
413; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
414  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
415  ret i32 %res
416}
417declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
418
419
420define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
421; SSE-LABEL: test_x86_sse2_cvtsd2ss:
422; SSE:       ## %bb.0:
423; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5a,0xc1]
424; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
425;
426; AVX-LABEL: test_x86_sse2_cvtsd2ss:
427; AVX:       ## %bb.0:
428; AVX-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0xc1]
429; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
430  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
431  ret <4 x float> %res
432}
433declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
434
435
436define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %p1) {
437; X86-SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
438; X86-SSE:       ## %bb.0:
439; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
440; X86-SSE-NEXT:    cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
441; X86-SSE-NEXT:    retl ## encoding: [0xc3]
442;
443; X86-AVX-LABEL: test_x86_sse2_cvtsd2ss_load:
444; X86-AVX:       ## %bb.0:
445; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
446; X86-AVX-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
447; X86-AVX-NEXT:    retl ## encoding: [0xc3]
448;
449; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
450; X64-SSE:       ## %bb.0:
451; X64-SSE-NEXT:    cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07]
452; X64-SSE-NEXT:    retq ## encoding: [0xc3]
453;
454; X64-AVX-LABEL: test_x86_sse2_cvtsd2ss_load:
455; X64-AVX:       ## %bb.0:
456; X64-AVX-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
457; X64-AVX-NEXT:    retq ## encoding: [0xc3]
458  %a1 = load <2 x double>, <2 x double>* %p1
459  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
460  ret <4 x float> %res
461}
462
463
464define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, <2 x double>* %p1) optsize {
465; X86-SSE-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
466; X86-SSE:       ## %bb.0:
467; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
468; X86-SSE-NEXT:    cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
469; X86-SSE-NEXT:    retl ## encoding: [0xc3]
470;
471; X86-AVX-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
472; X86-AVX:       ## %bb.0:
473; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
474; X86-AVX-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
475; X86-AVX-NEXT:    retl ## encoding: [0xc3]
476;
477; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
478; X64-SSE:       ## %bb.0:
479; X64-SSE-NEXT:    cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07]
480; X64-SSE-NEXT:    retq ## encoding: [0xc3]
481;
482; X64-AVX-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
483; X64-AVX:       ## %bb.0:
484; X64-AVX-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
485; X64-AVX-NEXT:    retq ## encoding: [0xc3]
486  %a1 = load <2 x double>, <2 x double>* %p1
487  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
488  ret <4 x float> %res
489}
490
491
492define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
493; SSE-LABEL: test_x86_sse2_cvttpd2dq:
494; SSE:       ## %bb.0:
495; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]
496; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
497;
498; AVX1-LABEL: test_x86_sse2_cvttpd2dq:
499; AVX1:       ## %bb.0:
500; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
501; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
502;
503; AVX512-LABEL: test_x86_sse2_cvttpd2dq:
504; AVX512:       ## %bb.0:
505; AVX512-NEXT:    vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
506; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
507  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
508  ret <4 x i32> %res
509}
510declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
511
512
513define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind {
514; SSE-LABEL: test_mm_cvttpd_epi32_zext:
515; SSE:       ## %bb.0:
516; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0]
517; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
518;
519; AVX1-LABEL: test_mm_cvttpd_epi32_zext:
520; AVX1:       ## %bb.0:
521; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0]
522; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
523;
524; AVX512-LABEL: test_mm_cvttpd_epi32_zext:
525; AVX512:       ## %bb.0:
526; AVX512-NEXT:    vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
527; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
528  %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
529  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
530  %bc = bitcast <4 x i32> %res to <2 x i64>
531  ret <2 x i64> %bc
532}
533
534
535define <2 x i64> @test_mm_cvttpd_epi32_zext_load(<2 x double>* %p0) nounwind {
536; X86-SSE-LABEL: test_mm_cvttpd_epi32_zext_load:
537; X86-SSE:       ## %bb.0:
538; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
539; X86-SSE-NEXT:    cvttpd2dq (%eax), %xmm0 ## encoding: [0x66,0x0f,0xe6,0x00]
540; X86-SSE-NEXT:    retl ## encoding: [0xc3]
541;
542; X86-AVX1-LABEL: test_mm_cvttpd_epi32_zext_load:
543; X86-AVX1:       ## %bb.0:
544; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
545; X86-AVX1-NEXT:    vcvttpd2dqx (%eax), %xmm0 ## encoding: [0xc5,0xf9,0xe6,0x00]
546; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
547;
548; X86-AVX512-LABEL: test_mm_cvttpd_epi32_zext_load:
549; X86-AVX512:       ## %bb.0:
550; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
551; X86-AVX512-NEXT:    vcvttpd2dqx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0x00]
552; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
553;
554; X64-SSE-LABEL: test_mm_cvttpd_epi32_zext_load:
555; X64-SSE:       ## %bb.0:
556; X64-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 ## encoding: [0x66,0x0f,0xe6,0x07]
557; X64-SSE-NEXT:    retq ## encoding: [0xc3]
558;
559; X64-AVX1-LABEL: test_mm_cvttpd_epi32_zext_load:
560; X64-AVX1:       ## %bb.0:
561; X64-AVX1-NEXT:    vcvttpd2dqx (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0xe6,0x07]
562; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
563;
564; X64-AVX512-LABEL: test_mm_cvttpd_epi32_zext_load:
565; X64-AVX512:       ## %bb.0:
566; X64-AVX512-NEXT:    vcvttpd2dqx (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0x07]
567; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
568  %a0 = load <2 x double>, <2 x double>* %p0
569  %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
570  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
571  %bc = bitcast <4 x i32> %res to <2 x i64>
572  ret <2 x i64> %bc
573}
574
575
576define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
577; SSE-LABEL: test_x86_sse2_cvttps2dq:
578; SSE:       ## %bb.0:
579; SSE-NEXT:    cvttps2dq %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x5b,0xc0]
580; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
581;
582; AVX1-LABEL: test_x86_sse2_cvttps2dq:
583; AVX1:       ## %bb.0:
584; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0]
585; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
586;
587; AVX512-LABEL: test_x86_sse2_cvttps2dq:
588; AVX512:       ## %bb.0:
589; AVX512-NEXT:    vcvttps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
590; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
591  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
592  ret <4 x i32> %res
593}
594declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
595
596
597define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
598; SSE-LABEL: test_x86_sse2_cvttsd2si:
599; SSE:       ## %bb.0:
600; SSE-NEXT:    cvttsd2si %xmm0, %eax ## encoding: [0xf2,0x0f,0x2c,0xc0]
601; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
602;
603; AVX1-LABEL: test_x86_sse2_cvttsd2si:
604; AVX1:       ## %bb.0:
605; AVX1-NEXT:    vcvttsd2si %xmm0, %eax ## encoding: [0xc5,0xfb,0x2c,0xc0]
606; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
607;
608; AVX512-LABEL: test_x86_sse2_cvttsd2si:
609; AVX512:       ## %bb.0:
610; AVX512-NEXT:    vcvttsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0]
611; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
612  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
613  ret i32 %res
614}
615declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
616
617
618define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
619; SSE-LABEL: test_x86_sse2_max_pd:
620; SSE:       ## %bb.0:
621; SSE-NEXT:    maxpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x5f,0xc1]
622; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
623;
624; AVX1-LABEL: test_x86_sse2_max_pd:
625; AVX1:       ## %bb.0:
626; AVX1-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5f,0xc1]
627; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
628;
629; AVX512-LABEL: test_x86_sse2_max_pd:
630; AVX512:       ## %bb.0:
631; AVX512-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1]
632; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
633  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
634  ret <2 x double> %res
635}
636declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
637
638
639define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
640; SSE-LABEL: test_x86_sse2_max_sd:
641; SSE:       ## %bb.0:
642; SSE-NEXT:    maxsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5f,0xc1]
643; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
644;
645; AVX1-LABEL: test_x86_sse2_max_sd:
646; AVX1:       ## %bb.0:
647; AVX1-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5f,0xc1]
648; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
649;
650; AVX512-LABEL: test_x86_sse2_max_sd:
651; AVX512:       ## %bb.0:
652; AVX512-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1]
653; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
654  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
655  ret <2 x double> %res
656}
657declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
658
659
660define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
661; SSE-LABEL: test_x86_sse2_min_pd:
662; SSE:       ## %bb.0:
663; SSE-NEXT:    minpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x5d,0xc1]
664; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
665;
666; AVX1-LABEL: test_x86_sse2_min_pd:
667; AVX1:       ## %bb.0:
668; AVX1-NEXT:    vminpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5d,0xc1]
669; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
670;
671; AVX512-LABEL: test_x86_sse2_min_pd:
672; AVX512:       ## %bb.0:
673; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1]
674; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
675  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
676  ret <2 x double> %res
677}
678declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
679
680
681define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
682; SSE-LABEL: test_x86_sse2_min_sd:
683; SSE:       ## %bb.0:
684; SSE-NEXT:    minsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5d,0xc1]
685; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
686;
687; AVX1-LABEL: test_x86_sse2_min_sd:
688; AVX1:       ## %bb.0:
689; AVX1-NEXT:    vminsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5d,0xc1]
690; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
691;
692; AVX512-LABEL: test_x86_sse2_min_sd:
693; AVX512:       ## %bb.0:
694; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1]
695; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
696  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
697  ret <2 x double> %res
698}
699declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
700
701
702define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
703; SSE-LABEL: test_x86_sse2_movmsk_pd:
704; SSE:       ## %bb.0:
705; SSE-NEXT:    movmskpd %xmm0, %eax ## encoding: [0x66,0x0f,0x50,0xc0]
706; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
707;
708; AVX-LABEL: test_x86_sse2_movmsk_pd:
709; AVX:       ## %bb.0:
710; AVX-NEXT:    vmovmskpd %xmm0, %eax ## encoding: [0xc5,0xf9,0x50,0xc0]
711; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
712  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
713  ret i32 %res
714}
715declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
716
717
718define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
719; SSE-LABEL: test_x86_sse2_packssdw_128:
720; SSE:       ## %bb.0:
721; SSE-NEXT:    packssdw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6b,0xc1]
722; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
723;
724; AVX1-LABEL: test_x86_sse2_packssdw_128:
725; AVX1:       ## %bb.0:
726; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1]
727; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
728;
729; AVX512-LABEL: test_x86_sse2_packssdw_128:
730; AVX512:       ## %bb.0:
731; AVX512-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
732; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
733  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
734  ret <8 x i16> %res
735}
736declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
737
738
739define <8 x i16> @test_x86_sse2_packssdw_128_fold() {
740; X86-SSE-LABEL: test_x86_sse2_packssdw_128_fold:
741; X86-SSE:       ## %bb.0:
742; X86-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
743; X86-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
744; X86-SSE-NEXT:    ## fixup A - offset: 3, value: LCPI30_0, kind: FK_Data_4
745; X86-SSE-NEXT:    retl ## encoding: [0xc3]
746;
747; X86-AVX1-LABEL: test_x86_sse2_packssdw_128_fold:
748; X86-AVX1:       ## %bb.0:
749; X86-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
750; X86-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
751; X86-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI30_0, kind: FK_Data_4
752; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
753;
754; X86-AVX512-LABEL: test_x86_sse2_packssdw_128_fold:
755; X86-AVX512:       ## %bb.0:
756; X86-AVX512-NEXT:    vmovaps LCPI30_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768]
757; X86-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
758; X86-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI30_0, kind: FK_Data_4
759; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
760;
761; X64-SSE-LABEL: test_x86_sse2_packssdw_128_fold:
762; X64-SSE:       ## %bb.0:
763; X64-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
764; X64-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
765; X64-SSE-NEXT:    ## fixup A - offset: 3, value: LCPI30_0-4, kind: reloc_riprel_4byte
766; X64-SSE-NEXT:    retq ## encoding: [0xc3]
767;
768; X64-AVX1-LABEL: test_x86_sse2_packssdw_128_fold:
769; X64-AVX1:       ## %bb.0:
770; X64-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
771; X64-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
772; X64-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI30_0-4, kind: reloc_riprel_4byte
773; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
774;
775; X64-AVX512-LABEL: test_x86_sse2_packssdw_128_fold:
776; X64-AVX512:       ## %bb.0:
777; X64-AVX512-NEXT:    vmovaps {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768]
778; X64-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
779; X64-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI30_0-4, kind: reloc_riprel_4byte
780; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
781  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
782  ret <8 x i16> %res
783}
784
785
786define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
787; SSE-LABEL: test_x86_sse2_packsswb_128:
788; SSE:       ## %bb.0:
789; SSE-NEXT:    packsswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x63,0xc1]
790; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
791;
792; AVX1-LABEL: test_x86_sse2_packsswb_128:
793; AVX1:       ## %bb.0:
794; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1]
795; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
796;
797; AVX512-LABEL: test_x86_sse2_packsswb_128:
798; AVX512:       ## %bb.0:
799; AVX512-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
800; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
801  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
802  ret <16 x i8> %res
803}
804declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
805
806
807define <16 x i8> @test_x86_sse2_packsswb_128_fold() {
808; X86-SSE-LABEL: test_x86_sse2_packsswb_128_fold:
809; X86-SSE:       ## %bb.0:
810; X86-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
811; X86-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
812; X86-SSE-NEXT:    ## fixup A - offset: 3, value: LCPI32_0, kind: FK_Data_4
813; X86-SSE-NEXT:    retl ## encoding: [0xc3]
814;
815; X86-AVX1-LABEL: test_x86_sse2_packsswb_128_fold:
816; X86-AVX1:       ## %bb.0:
817; X86-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
818; X86-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
819; X86-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4
820; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
821;
822; X86-AVX512-LABEL: test_x86_sse2_packsswb_128_fold:
823; X86-AVX512:       ## %bb.0:
824; X86-AVX512-NEXT:    vmovaps LCPI32_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
825; X86-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
826; X86-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4
827; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
828;
829; X64-SSE-LABEL: test_x86_sse2_packsswb_128_fold:
830; X64-SSE:       ## %bb.0:
831; X64-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
832; X64-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
833; X64-SSE-NEXT:    ## fixup A - offset: 3, value: LCPI32_0-4, kind: reloc_riprel_4byte
834; X64-SSE-NEXT:    retq ## encoding: [0xc3]
835;
836; X64-AVX1-LABEL: test_x86_sse2_packsswb_128_fold:
837; X64-AVX1:       ## %bb.0:
838; X64-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
839; X64-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
840; X64-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI32_0-4, kind: reloc_riprel_4byte
841; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
842;
843; X64-AVX512-LABEL: test_x86_sse2_packsswb_128_fold:
844; X64-AVX512:       ## %bb.0:
845; X64-AVX512-NEXT:    vmovaps {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
846; X64-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
847; X64-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI32_0-4, kind: reloc_riprel_4byte
848; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
849  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
850  ret <16 x i8> %res
851}
852
853
854define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
855; SSE-LABEL: test_x86_sse2_packuswb_128:
856; SSE:       ## %bb.0:
857; SSE-NEXT:    packuswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x67,0xc1]
858; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
859;
860; AVX1-LABEL: test_x86_sse2_packuswb_128:
861; AVX1:       ## %bb.0:
862; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x67,0xc1]
863; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
864;
865; AVX512-LABEL: test_x86_sse2_packuswb_128:
866; AVX512:       ## %bb.0:
867; AVX512-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
868; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
869  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
870  ret <16 x i8> %res
871}
872declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
873
874
875define <16 x i8> @test_x86_sse2_packuswb_128_fold() {
876; X86-SSE-LABEL: test_x86_sse2_packuswb_128_fold:
877; X86-SSE:       ## %bb.0:
878; X86-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
879; X86-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
880; X86-SSE-NEXT:    ## fixup A - offset: 3, value: LCPI34_0, kind: FK_Data_4
881; X86-SSE-NEXT:    retl ## encoding: [0xc3]
882;
883; X86-AVX1-LABEL: test_x86_sse2_packuswb_128_fold:
884; X86-AVX1:       ## %bb.0:
885; X86-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
886; X86-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
887; X86-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4
888; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
889;
890; X86-AVX512-LABEL: test_x86_sse2_packuswb_128_fold:
891; X86-AVX512:       ## %bb.0:
892; X86-AVX512-NEXT:    vmovaps LCPI34_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
893; X86-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
894; X86-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4
895; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
896;
897; X64-SSE-LABEL: test_x86_sse2_packuswb_128_fold:
898; X64-SSE:       ## %bb.0:
899; X64-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
900; X64-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
901; X64-SSE-NEXT:    ## fixup A - offset: 3, value: LCPI34_0-4, kind: reloc_riprel_4byte
902; X64-SSE-NEXT:    retq ## encoding: [0xc3]
903;
904; X64-AVX1-LABEL: test_x86_sse2_packuswb_128_fold:
905; X64-AVX1:       ## %bb.0:
906; X64-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
907; X64-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
908; X64-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI34_0-4, kind: reloc_riprel_4byte
909; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
910;
911; X64-AVX512-LABEL: test_x86_sse2_packuswb_128_fold:
912; X64-AVX512:       ## %bb.0:
913; X64-AVX512-NEXT:    vmovaps {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
914; X64-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
915; X64-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI34_0-4, kind: reloc_riprel_4byte
916; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
917  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
918  ret <16 x i8> %res
919}
920
921
922define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
923; SSE-LABEL: test_x86_sse2_padds_b:
924; SSE:       ## %bb.0:
925; SSE-NEXT:    paddsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xec,0xc1]
926; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
927;
928; AVX1-LABEL: test_x86_sse2_padds_b:
929; AVX1:       ## %bb.0:
930; AVX1-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xec,0xc1]
931; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
932;
933; AVX512-LABEL: test_x86_sse2_padds_b:
934; AVX512:       ## %bb.0:
935; AVX512-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
936; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
937  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
938  ret <16 x i8> %res
939}
940declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
941
942
943define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
944; SSE-LABEL: test_x86_sse2_padds_w:
945; SSE:       ## %bb.0:
946; SSE-NEXT:    paddsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xed,0xc1]
947; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
948;
949; AVX1-LABEL: test_x86_sse2_padds_w:
950; AVX1:       ## %bb.0:
951; AVX1-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xed,0xc1]
952; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
953;
954; AVX512-LABEL: test_x86_sse2_padds_w:
955; AVX512:       ## %bb.0:
956; AVX512-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
957; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
958  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
959  ret <8 x i16> %res
960}
961declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
962
963
964define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
965; SSE-LABEL: test_x86_sse2_paddus_b:
966; SSE:       ## %bb.0:
967; SSE-NEXT:    paddusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdc,0xc1]
968; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
969;
970; AVX1-LABEL: test_x86_sse2_paddus_b:
971; AVX1:       ## %bb.0:
972; AVX1-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdc,0xc1]
973; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
974;
975; AVX512-LABEL: test_x86_sse2_paddus_b:
976; AVX512:       ## %bb.0:
977; AVX512-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
978; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
979  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
980  ret <16 x i8> %res
981}
982declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
983
984
985define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
986; SSE-LABEL: test_x86_sse2_paddus_w:
987; SSE:       ## %bb.0:
988; SSE-NEXT:    paddusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdd,0xc1]
989; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
990;
991; AVX1-LABEL: test_x86_sse2_paddus_w:
992; AVX1:       ## %bb.0:
993; AVX1-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1]
994; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
995;
996; AVX512-LABEL: test_x86_sse2_paddus_w:
997; AVX512:       ## %bb.0:
998; AVX512-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
999; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1000  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1001  ret <8 x i16> %res
1002}
1003declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
1004
1005
1006define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
1007; SSE-LABEL: test_x86_sse2_pmadd_wd:
1008; SSE:       ## %bb.0:
1009; SSE-NEXT:    pmaddwd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf5,0xc1]
1010; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1011;
1012; AVX1-LABEL: test_x86_sse2_pmadd_wd:
1013; AVX1:       ## %bb.0:
1014; AVX1-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf5,0xc1]
1015; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1016;
1017; AVX512-LABEL: test_x86_sse2_pmadd_wd:
1018; AVX512:       ## %bb.0:
1019; AVX512-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
1020; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1021  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
1022  ret <4 x i32> %res
1023}
1024declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1025
1026
1027define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
1028; SSE-LABEL: test_x86_sse2_pmaxs_w:
1029; SSE:       ## %bb.0:
1030; SSE-NEXT:    pmaxsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xee,0xc1]
1031; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1032;
1033; AVX1-LABEL: test_x86_sse2_pmaxs_w:
1034; AVX1:       ## %bb.0:
1035; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xee,0xc1]
1036; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1037;
1038; AVX512-LABEL: test_x86_sse2_pmaxs_w:
1039; AVX512:       ## %bb.0:
1040; AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
1041; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1042  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1043  ret <8 x i16> %res
1044}
1045declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
1046
1047
1048define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
1049; SSE-LABEL: test_x86_sse2_pmaxu_b:
1050; SSE:       ## %bb.0:
1051; SSE-NEXT:    pmaxub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xde,0xc1]
1052; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1053;
1054; AVX1-LABEL: test_x86_sse2_pmaxu_b:
1055; AVX1:       ## %bb.0:
1056; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xde,0xc1]
1057; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1058;
1059; AVX512-LABEL: test_x86_sse2_pmaxu_b:
1060; AVX512:       ## %bb.0:
1061; AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1]
1062; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1063  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1064  ret <16 x i8> %res
1065}
1066declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
1067
1068
1069define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
1070; SSE-LABEL: test_x86_sse2_pmins_w:
1071; SSE:       ## %bb.0:
1072; SSE-NEXT:    pminsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xea,0xc1]
1073; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1074;
1075; AVX1-LABEL: test_x86_sse2_pmins_w:
1076; AVX1:       ## %bb.0:
1077; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xea,0xc1]
1078; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1079;
1080; AVX512-LABEL: test_x86_sse2_pmins_w:
1081; AVX512:       ## %bb.0:
1082; AVX512-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
1083; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1084  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1085  ret <8 x i16> %res
1086}
1087declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
1088
1089
1090define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
1091; SSE-LABEL: test_x86_sse2_pminu_b:
1092; SSE:       ## %bb.0:
1093; SSE-NEXT:    pminub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xda,0xc1]
1094; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1095;
1096; AVX1-LABEL: test_x86_sse2_pminu_b:
1097; AVX1:       ## %bb.0:
1098; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xda,0xc1]
1099; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1100;
1101; AVX512-LABEL: test_x86_sse2_pminu_b:
1102; AVX512:       ## %bb.0:
1103; AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1]
1104; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1105  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1106  ret <16 x i8> %res
1107}
1108declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
1109
1110
1111define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
1112; SSE-LABEL: test_x86_sse2_pmovmskb_128:
1113; SSE:       ## %bb.0:
1114; SSE-NEXT:    pmovmskb %xmm0, %eax ## encoding: [0x66,0x0f,0xd7,0xc0]
1115; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1116;
1117; AVX-LABEL: test_x86_sse2_pmovmskb_128:
1118; AVX:       ## %bb.0:
1119; AVX-NEXT:    vpmovmskb %xmm0, %eax ## encoding: [0xc5,0xf9,0xd7,0xc0]
1120; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1121  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
1122  ret i32 %res
1123}
1124declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1125
1126
1127define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
1128; SSE-LABEL: test_x86_sse2_pmulh_w:
1129; SSE:       ## %bb.0:
1130; SSE-NEXT:    pmulhw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe5,0xc1]
1131; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1132;
1133; AVX1-LABEL: test_x86_sse2_pmulh_w:
1134; AVX1:       ## %bb.0:
1135; AVX1-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe5,0xc1]
1136; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1137;
1138; AVX512-LABEL: test_x86_sse2_pmulh_w:
1139; AVX512:       ## %bb.0:
1140; AVX512-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1]
1141; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1142  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1143  ret <8 x i16> %res
1144}
1145declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1146
1147
1148define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
1149; SSE-LABEL: test_x86_sse2_pmulhu_w:
1150; SSE:       ## %bb.0:
1151; SSE-NEXT:    pmulhuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe4,0xc1]
1152; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1153;
1154; AVX1-LABEL: test_x86_sse2_pmulhu_w:
1155; AVX1:       ## %bb.0:
1156; AVX1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe4,0xc1]
1157; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1158;
1159; AVX512-LABEL: test_x86_sse2_pmulhu_w:
1160; AVX512:       ## %bb.0:
1161; AVX512-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
1162; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1163  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1164  ret <8 x i16> %res
1165}
1166declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1167
1168
1169define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
1170; SSE-LABEL: test_x86_sse2_psad_bw:
1171; SSE:       ## %bb.0:
1172; SSE-NEXT:    psadbw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf6,0xc1]
1173; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1174;
1175; AVX1-LABEL: test_x86_sse2_psad_bw:
1176; AVX1:       ## %bb.0:
1177; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf6,0xc1]
1178; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1179;
1180; AVX512-LABEL: test_x86_sse2_psad_bw:
1181; AVX512:       ## %bb.0:
1182; AVX512-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1]
1183; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1184  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
1185  ret <2 x i64> %res
1186}
1187declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
1188
1189
1190define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
1191; SSE-LABEL: test_x86_sse2_psll_d:
1192; SSE:       ## %bb.0:
1193; SSE-NEXT:    pslld %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf2,0xc1]
1194; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1195;
1196; AVX1-LABEL: test_x86_sse2_psll_d:
1197; AVX1:       ## %bb.0:
1198; AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf2,0xc1]
1199; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1200;
1201; AVX512-LABEL: test_x86_sse2_psll_d:
1202; AVX512:       ## %bb.0:
1203; AVX512-NEXT:    vpslld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1]
1204; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1205  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1206  ret <4 x i32> %res
1207}
1208declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
1209
1210
1211define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
1212; SSE-LABEL: test_x86_sse2_psll_q:
1213; SSE:       ## %bb.0:
1214; SSE-NEXT:    psllq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf3,0xc1]
1215; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1216;
1217; AVX1-LABEL: test_x86_sse2_psll_q:
1218; AVX1:       ## %bb.0:
1219; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf3,0xc1]
1220; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1221;
1222; AVX512-LABEL: test_x86_sse2_psll_q:
1223; AVX512:       ## %bb.0:
1224; AVX512-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1]
1225; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1226  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1227  ret <2 x i64> %res
1228}
1229declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
1230
1231
1232define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
1233; SSE-LABEL: test_x86_sse2_psll_w:
1234; SSE:       ## %bb.0:
1235; SSE-NEXT:    psllw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf1,0xc1]
1236; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1237;
1238; AVX1-LABEL: test_x86_sse2_psll_w:
1239; AVX1:       ## %bb.0:
1240; AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf1,0xc1]
1241; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1242;
1243; AVX512-LABEL: test_x86_sse2_psll_w:
1244; AVX512:       ## %bb.0:
1245; AVX512-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1]
1246; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1247  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1248  ret <8 x i16> %res
1249}
1250declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
1251
1252
1253define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
1254; SSE-LABEL: test_x86_sse2_pslli_d:
1255; SSE:       ## %bb.0:
1256; SSE-NEXT:    pslld $7, %xmm0 ## encoding: [0x66,0x0f,0x72,0xf0,0x07]
1257; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1258;
1259; AVX1-LABEL: test_x86_sse2_pslli_d:
1260; AVX1:       ## %bb.0:
1261; AVX1-NEXT:    vpslld $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x72,0xf0,0x07]
1262; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1263;
1264; AVX512-LABEL: test_x86_sse2_pslli_d:
1265; AVX512:       ## %bb.0:
1266; AVX512-NEXT:    vpslld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x07]
1267; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1268  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1269  ret <4 x i32> %res
1270}
1271declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
1272
1273
1274define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
1275; SSE-LABEL: test_x86_sse2_pslli_q:
1276; SSE:       ## %bb.0:
1277; SSE-NEXT:    psllq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf0,0x07]
1278; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1279;
1280; AVX1-LABEL: test_x86_sse2_pslli_q:
1281; AVX1:       ## %bb.0:
1282; AVX1-NEXT:    vpsllq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf0,0x07]
1283; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1284;
1285; AVX512-LABEL: test_x86_sse2_pslli_q:
1286; AVX512:       ## %bb.0:
1287; AVX512-NEXT:    vpsllq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x07]
1288; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1289  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
1290  ret <2 x i64> %res
1291}
1292declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
1293
1294
1295define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
1296; SSE-LABEL: test_x86_sse2_pslli_w:
1297; SSE:       ## %bb.0:
1298; SSE-NEXT:    psllw $7, %xmm0 ## encoding: [0x66,0x0f,0x71,0xf0,0x07]
1299; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1300;
1301; AVX1-LABEL: test_x86_sse2_pslli_w:
1302; AVX1:       ## %bb.0:
1303; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x71,0xf0,0x07]
1304; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1305;
1306; AVX512-LABEL: test_x86_sse2_pslli_w:
1307; AVX512:       ## %bb.0:
1308; AVX512-NEXT:    vpsllw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x07]
1309; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1310  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1311  ret <8 x i16> %res
1312}
1313declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
1314
1315
1316define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
1317; SSE-LABEL: test_x86_sse2_psra_d:
1318; SSE:       ## %bb.0:
1319; SSE-NEXT:    psrad %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe2,0xc1]
1320; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1321;
1322; AVX1-LABEL: test_x86_sse2_psra_d:
1323; AVX1:       ## %bb.0:
1324; AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe2,0xc1]
1325; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1326;
1327; AVX512-LABEL: test_x86_sse2_psra_d:
1328; AVX512:       ## %bb.0:
1329; AVX512-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1]
1330; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1331  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1332  ret <4 x i32> %res
1333}
1334declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
1335
1336
1337define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
1338; SSE-LABEL: test_x86_sse2_psra_w:
1339; SSE:       ## %bb.0:
1340; SSE-NEXT:    psraw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe1,0xc1]
1341; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1342;
1343; AVX1-LABEL: test_x86_sse2_psra_w:
1344; AVX1:       ## %bb.0:
1345; AVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe1,0xc1]
1346; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1347;
1348; AVX512-LABEL: test_x86_sse2_psra_w:
1349; AVX512:       ## %bb.0:
1350; AVX512-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1]
1351; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1352  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1353  ret <8 x i16> %res
1354}
1355declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
1356
1357
1358define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
1359; SSE-LABEL: test_x86_sse2_psrai_d:
1360; SSE:       ## %bb.0:
1361; SSE-NEXT:    psrad $7, %xmm0 ## encoding: [0x66,0x0f,0x72,0xe0,0x07]
1362; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1363;
1364; AVX1-LABEL: test_x86_sse2_psrai_d:
1365; AVX1:       ## %bb.0:
1366; AVX1-NEXT:    vpsrad $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x72,0xe0,0x07]
1367; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1368;
1369; AVX512-LABEL: test_x86_sse2_psrai_d:
1370; AVX512:       ## %bb.0:
1371; AVX512-NEXT:    vpsrad $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x07]
1372; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1373  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1374  ret <4 x i32> %res
1375}
1376declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
1377
1378
1379define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
1380; SSE-LABEL: test_x86_sse2_psrai_w:
1381; SSE:       ## %bb.0:
1382; SSE-NEXT:    psraw $7, %xmm0 ## encoding: [0x66,0x0f,0x71,0xe0,0x07]
1383; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1384;
1385; AVX1-LABEL: test_x86_sse2_psrai_w:
1386; AVX1:       ## %bb.0:
1387; AVX1-NEXT:    vpsraw $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x71,0xe0,0x07]
1388; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1389;
1390; AVX512-LABEL: test_x86_sse2_psrai_w:
1391; AVX512:       ## %bb.0:
1392; AVX512-NEXT:    vpsraw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x07]
1393; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1394  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1395  ret <8 x i16> %res
1396}
1397declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
1398
1399
1400define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
1401; SSE-LABEL: test_x86_sse2_psrl_d:
1402; SSE:       ## %bb.0:
1403; SSE-NEXT:    psrld %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd2,0xc1]
1404; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1405;
1406; AVX1-LABEL: test_x86_sse2_psrl_d:
1407; AVX1:       ## %bb.0:
1408; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd2,0xc1]
1409; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1410;
1411; AVX512-LABEL: test_x86_sse2_psrl_d:
1412; AVX512:       ## %bb.0:
1413; AVX512-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1]
1414; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1415  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1416  ret <4 x i32> %res
1417}
1418declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
1419
1420
1421define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
1422; SSE-LABEL: test_x86_sse2_psrl_q:
1423; SSE:       ## %bb.0:
1424; SSE-NEXT:    psrlq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd3,0xc1]
1425; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1426;
1427; AVX1-LABEL: test_x86_sse2_psrl_q:
1428; AVX1:       ## %bb.0:
1429; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd3,0xc1]
1430; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1431;
1432; AVX512-LABEL: test_x86_sse2_psrl_q:
1433; AVX512:       ## %bb.0:
1434; AVX512-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1]
1435; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1436  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1437  ret <2 x i64> %res
1438}
1439declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
1440
1441
1442define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
1443; SSE-LABEL: test_x86_sse2_psrl_w:
1444; SSE:       ## %bb.0:
1445; SSE-NEXT:    psrlw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd1,0xc1]
1446; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1447;
1448; AVX1-LABEL: test_x86_sse2_psrl_w:
1449; AVX1:       ## %bb.0:
1450; AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd1,0xc1]
1451; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1452;
1453; AVX512-LABEL: test_x86_sse2_psrl_w:
1454; AVX512:       ## %bb.0:
1455; AVX512-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1]
1456; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1457  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1458  ret <8 x i16> %res
1459}
1460declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
1461
1462
1463define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
1464; SSE-LABEL: test_x86_sse2_psrli_d:
1465; SSE:       ## %bb.0:
1466; SSE-NEXT:    psrld $7, %xmm0 ## encoding: [0x66,0x0f,0x72,0xd0,0x07]
1467; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1468;
1469; AVX1-LABEL: test_x86_sse2_psrli_d:
1470; AVX1:       ## %bb.0:
1471; AVX1-NEXT:    vpsrld $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x72,0xd0,0x07]
1472; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1473;
1474; AVX512-LABEL: test_x86_sse2_psrli_d:
1475; AVX512:       ## %bb.0:
1476; AVX512-NEXT:    vpsrld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x07]
1477; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1478  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1479  ret <4 x i32> %res
1480}
1481declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
1482
1483
1484define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
1485; SSE-LABEL: test_x86_sse2_psrli_q:
1486; SSE:       ## %bb.0:
1487; SSE-NEXT:    psrlq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd0,0x07]
1488; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1489;
1490; AVX1-LABEL: test_x86_sse2_psrli_q:
1491; AVX1:       ## %bb.0:
1492; AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd0,0x07]
1493; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1494;
1495; AVX512-LABEL: test_x86_sse2_psrli_q:
1496; AVX512:       ## %bb.0:
1497; AVX512-NEXT:    vpsrlq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x07]
1498; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1499  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
1500  ret <2 x i64> %res
1501}
1502declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
1503
1504
1505define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
1506; SSE-LABEL: test_x86_sse2_psrli_w:
1507; SSE:       ## %bb.0:
1508; SSE-NEXT:    psrlw $7, %xmm0 ## encoding: [0x66,0x0f,0x71,0xd0,0x07]
1509; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1510;
1511; AVX1-LABEL: test_x86_sse2_psrli_w:
1512; AVX1:       ## %bb.0:
1513; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x71,0xd0,0x07]
1514; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1515;
1516; AVX512-LABEL: test_x86_sse2_psrli_w:
1517; AVX512:       ## %bb.0:
1518; AVX512-NEXT:    vpsrlw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x07]
1519; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1520  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1521  ret <8 x i16> %res
1522}
1523declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
1524
1525
1526define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
1527; SSE-LABEL: test_x86_sse2_psubs_b:
1528; SSE:       ## %bb.0:
1529; SSE-NEXT:    psubsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe8,0xc1]
1530; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1531;
1532; AVX1-LABEL: test_x86_sse2_psubs_b:
1533; AVX1:       ## %bb.0:
1534; AVX1-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe8,0xc1]
1535; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1536;
1537; AVX512-LABEL: test_x86_sse2_psubs_b:
1538; AVX512:       ## %bb.0:
1539; AVX512-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
1540; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1541  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1542  ret <16 x i8> %res
1543}
1544declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
1545
1546
1547define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
1548; SSE-LABEL: test_x86_sse2_psubs_w:
1549; SSE:       ## %bb.0:
1550; SSE-NEXT:    psubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe9,0xc1]
1551; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1552;
1553; AVX1-LABEL: test_x86_sse2_psubs_w:
1554; AVX1:       ## %bb.0:
1555; AVX1-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe9,0xc1]
1556; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1557;
1558; AVX512-LABEL: test_x86_sse2_psubs_w:
1559; AVX512:       ## %bb.0:
1560; AVX512-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
1561; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1562  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1563  ret <8 x i16> %res
1564}
1565declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
1566
1567
1568define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
1569; SSE-LABEL: test_x86_sse2_psubus_b:
1570; SSE:       ## %bb.0:
1571; SSE-NEXT:    psubusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd8,0xc1]
1572; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1573;
1574; AVX1-LABEL: test_x86_sse2_psubus_b:
1575; AVX1:       ## %bb.0:
1576; AVX1-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd8,0xc1]
1577; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1578;
1579; AVX512-LABEL: test_x86_sse2_psubus_b:
1580; AVX512:       ## %bb.0:
1581; AVX512-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
1582; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1583  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1584  ret <16 x i8> %res
1585}
1586declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
1587
1588
1589define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
1590; SSE-LABEL: test_x86_sse2_psubus_w:
1591; SSE:       ## %bb.0:
1592; SSE-NEXT:    psubusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd9,0xc1]
1593; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1594;
1595; AVX1-LABEL: test_x86_sse2_psubus_w:
1596; AVX1:       ## %bb.0:
1597; AVX1-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd9,0xc1]
1598; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1599;
1600; AVX512-LABEL: test_x86_sse2_psubus_w:
1601; AVX512:       ## %bb.0:
1602; AVX512-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
1603; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1604  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1605  ret <8 x i16> %res
1606}
1607declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
1608
1609
1610define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
1611; SSE-LABEL: test_x86_sse2_ucomieq_sd:
1612; SSE:       ## %bb.0:
1613; SSE-NEXT:    ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1614; SSE-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
1615; SSE-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
1616; SSE-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
1617; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1618; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1619;
1620; AVX1-LABEL: test_x86_sse2_ucomieq_sd:
1621; AVX1:       ## %bb.0:
1622; AVX1-NEXT:    vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1623; AVX1-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
1624; AVX1-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
1625; AVX1-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
1626; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1627; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1628;
1629; AVX512-LABEL: test_x86_sse2_ucomieq_sd:
1630; AVX512:       ## %bb.0:
1631; AVX512-NEXT:    vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1632; AVX512-NEXT:    setnp %al ## encoding: [0x0f,0x9b,0xc0]
1633; AVX512-NEXT:    sete %cl ## encoding: [0x0f,0x94,0xc1]
1634; AVX512-NEXT:    andb %al, %cl ## encoding: [0x20,0xc1]
1635; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1636; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1637  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1638  ret i32 %res
1639}
1640declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
1641
1642
1643define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
1644; SSE-LABEL: test_x86_sse2_ucomige_sd:
1645; SSE:       ## %bb.0:
1646; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1647; SSE-NEXT:    ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1648; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1649; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1650;
1651; AVX1-LABEL: test_x86_sse2_ucomige_sd:
1652; AVX1:       ## %bb.0:
1653; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1654; AVX1-NEXT:    vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1655; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1656; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1657;
1658; AVX512-LABEL: test_x86_sse2_ucomige_sd:
1659; AVX512:       ## %bb.0:
1660; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1661; AVX512-NEXT:    vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1662; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1663; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1664  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1665  ret i32 %res
1666}
1667declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
1668
1669
1670define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
1671; SSE-LABEL: test_x86_sse2_ucomigt_sd:
1672; SSE:       ## %bb.0:
1673; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1674; SSE-NEXT:    ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1675; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1676; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1677;
1678; AVX1-LABEL: test_x86_sse2_ucomigt_sd:
1679; AVX1:       ## %bb.0:
1680; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1681; AVX1-NEXT:    vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1682; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1683; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1684;
1685; AVX512-LABEL: test_x86_sse2_ucomigt_sd:
1686; AVX512:       ## %bb.0:
1687; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1688; AVX512-NEXT:    vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1689; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1690; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1691  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1692  ret i32 %res
1693}
1694declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
1695
1696
1697define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
1698; SSE-LABEL: test_x86_sse2_ucomile_sd:
1699; SSE:       ## %bb.0:
1700; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1701; SSE-NEXT:    ucomisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2e,0xc8]
1702; SSE-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1703; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1704;
1705; AVX1-LABEL: test_x86_sse2_ucomile_sd:
1706; AVX1:       ## %bb.0:
1707; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1708; AVX1-NEXT:    vucomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2e,0xc8]
1709; AVX1-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1710; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1711;
1712; AVX512-LABEL: test_x86_sse2_ucomile_sd:
1713; AVX512:       ## %bb.0:
1714; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1715; AVX512-NEXT:    vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
1716; AVX512-NEXT:    setae %al ## encoding: [0x0f,0x93,0xc0]
1717; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1718  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1719  ret i32 %res
1720}
1721declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
1722
1723
1724define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
1725; SSE-LABEL: test_x86_sse2_ucomilt_sd:
1726; SSE:       ## %bb.0:
1727; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1728; SSE-NEXT:    ucomisd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x2e,0xc8]
1729; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1730; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1731;
1732; AVX1-LABEL: test_x86_sse2_ucomilt_sd:
1733; AVX1:       ## %bb.0:
1734; AVX1-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1735; AVX1-NEXT:    vucomisd %xmm0, %xmm1 ## encoding: [0xc5,0xf9,0x2e,0xc8]
1736; AVX1-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1737; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1738;
1739; AVX512-LABEL: test_x86_sse2_ucomilt_sd:
1740; AVX512:       ## %bb.0:
1741; AVX512-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
1742; AVX512-NEXT:    vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
1743; AVX512-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
1744; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1745  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1746  ret i32 %res
1747}
1748declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
1749
1750
1751define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
1752; SSE-LABEL: test_x86_sse2_ucomineq_sd:
1753; SSE:       ## %bb.0:
1754; SSE-NEXT:    ucomisd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x2e,0xc1]
1755; SSE-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
1756; SSE-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
1757; SSE-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
1758; SSE-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1759; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1760;
1761; AVX1-LABEL: test_x86_sse2_ucomineq_sd:
1762; AVX1:       ## %bb.0:
1763; AVX1-NEXT:    vucomisd %xmm1, %xmm0 ## encoding: [0xc5,0xf9,0x2e,0xc1]
1764; AVX1-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
1765; AVX1-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
1766; AVX1-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
1767; AVX1-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1768; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1769;
1770; AVX512-LABEL: test_x86_sse2_ucomineq_sd:
1771; AVX512:       ## %bb.0:
1772; AVX512-NEXT:    vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
1773; AVX512-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
1774; AVX512-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
1775; AVX512-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
1776; AVX512-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
1777; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1778  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1779  ret i32 %res
1780}
1781declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
1782
1783define void @test_x86_sse2_pause() {
1784; CHECK-LABEL: test_x86_sse2_pause:
1785; CHECK:       ## %bb.0:
1786; CHECK-NEXT:    pause ## encoding: [0xf3,0x90]
1787; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1788  tail call void @llvm.x86.sse2.pause()
1789  ret void
1790}
1791declare void @llvm.x86.sse2.pause() nounwind
1792
1793define void @lfence() nounwind {
1794; CHECK-LABEL: lfence:
1795; CHECK:       ## %bb.0:
1796; CHECK-NEXT:    lfence ## encoding: [0x0f,0xae,0xe8]
1797; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1798  tail call void @llvm.x86.sse2.lfence()
1799  ret void
1800}
1801declare void @llvm.x86.sse2.lfence() nounwind
1802
1803define void @mfence() nounwind {
1804; CHECK-LABEL: mfence:
1805; CHECK:       ## %bb.0:
1806; CHECK-NEXT:    mfence ## encoding: [0x0f,0xae,0xf0]
1807; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1808  tail call void @llvm.x86.sse2.mfence()
1809  ret void
1810}
1811declare void @llvm.x86.sse2.mfence() nounwind
1812
1813define void @clflush(i8* %p) nounwind {
1814; X86-LABEL: clflush:
1815; X86:       ## %bb.0:
1816; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1817; X86-NEXT:    clflush (%eax) ## encoding: [0x0f,0xae,0x38]
1818; X86-NEXT:    retl ## encoding: [0xc3]
1819;
1820; X64-LABEL: clflush:
1821; X64:       ## %bb.0:
1822; X64-NEXT:    clflush (%rdi) ## encoding: [0x0f,0xae,0x3f]
1823; X64-NEXT:    retq ## encoding: [0xc3]
1824  tail call void @llvm.x86.sse2.clflush(i8* %p)
1825  ret void
1826}
1827declare void @llvm.x86.sse2.clflush(i8*) nounwind
1828