• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX512VL,X86-AVX512VL
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX512VL,X64-AVX512VL
6
7; We don't check any vinsertf128 variant with immediate 0 because that's just a blend.
8
9define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
10; AVX-LABEL: test_x86_avx_sqrt_pd_256:
11; AVX:       # %bb.0:
12; AVX-NEXT:    vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0]
13; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14;
15; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256:
16; AVX512VL:       # %bb.0:
17; AVX512VL-NEXT:    vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0]
18; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
19  %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
20  ret <4 x double> %res
21}
22declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
23
24define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
25; AVX-LABEL: test_x86_avx_sqrt_ps_256:
26; AVX:       # %bb.0:
27; AVX-NEXT:    vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0]
28; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
29;
30; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256:
31; AVX512VL:       # %bb.0:
32; AVX512VL-NEXT:    vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0]
33; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
34  %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
35  ret <8 x float> %res
36}
37declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
38
39define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) {
40; AVX-LABEL: test_x86_avx_vinsertf128_pd_256_1:
41; AVX:       # %bb.0:
42; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
43; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
44;
45; AVX512VL-LABEL: test_x86_avx_vinsertf128_pd_256_1:
46; AVX512VL:       # %bb.0:
47; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
48; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
49  %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1)
50  ret <4 x double> %res
51}
52declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
53
54define <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) {
55; AVX-LABEL: test_x86_avx_vinsertf128_ps_256_1:
56; AVX:       # %bb.0:
57; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
58; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
59;
60; AVX512VL-LABEL: test_x86_avx_vinsertf128_ps_256_1:
61; AVX512VL:       # %bb.0:
62; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
63; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
64  %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1)
65  ret <8 x float> %res
66}
67declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
68
69define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) {
70; AVX-LABEL: test_x86_avx_vinsertf128_si_256_1:
71; AVX:       # %bb.0:
72; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
73; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
74;
75; AVX512VL-LABEL: test_x86_avx_vinsertf128_si_256_1:
76; AVX512VL:       # %bb.0:
77; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
78; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
79  %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1)
80  ret <8 x i32> %res
81}
82
83; Verify that high bits of the immediate are masked off. This should be the equivalent
84; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's
85; not a vinsertf128 $1.
86define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) {
87; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
90; CHECK-NEXT:    vblendps $240, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xf0]
91; CHECK-NEXT:    # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
92; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
93  %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2)
94  ret <8 x i32> %res
95}
96declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
97
98; We don't check any vextractf128 variant with immediate 0 because that's just a move.
99
100define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) {
101; AVX-LABEL: test_x86_avx_vextractf128_pd_256_1:
102; AVX:       # %bb.0:
103; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
104; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
105; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
106;
107; AVX512VL-LABEL: test_x86_avx_vextractf128_pd_256_1:
108; AVX512VL:       # %bb.0:
109; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
110; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
111; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
112  %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1)
113  ret <2 x double> %res
114}
115declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
116
117define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) {
118; AVX-LABEL: test_x86_avx_vextractf128_ps_256_1:
119; AVX:       # %bb.0:
120; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
121; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
122; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
123;
124; AVX512VL-LABEL: test_x86_avx_vextractf128_ps_256_1:
125; AVX512VL:       # %bb.0:
126; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
127; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
128; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
129  %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1)
130  ret <4 x float> %res
131}
132declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
133
134define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) {
135; AVX-LABEL: test_x86_avx_vextractf128_si_256_1:
136; AVX:       # %bb.0:
137; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
138; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
139; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
140;
141; AVX512VL-LABEL: test_x86_avx_vextractf128_si_256_1:
142; AVX512VL:       # %bb.0:
143; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
144; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
145; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
146  %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1)
147  ret <4 x i32> %res
148}
149declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
150
151; Verify that high bits of the immediate are masked off. This should be the equivalent
152; of a vextractf128 $0 which should be optimized away, so just check that it's
153; not a vextractf128 of any kind.
154define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) {
155; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2:
156; CHECK:       # %bb.0:
157; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
158; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
159; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
160  %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2)
161  ret <2 x double> %res
162}
163
164
165define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
166; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256:
167; X86-AVX:       # %bb.0:
168; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
169; X86-AVX-NEXT:    vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
170; X86-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
171; X86-AVX-NEXT:    retl # encoding: [0xc3]
172;
173; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256:
174; X86-AVX512VL:       # %bb.0:
175; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
176; X86-AVX512VL-NEXT:    vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
177; X86-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
178; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
179;
180; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256:
181; X64-AVX:       # %bb.0:
182; X64-AVX-NEXT:    vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
183; X64-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
184; X64-AVX-NEXT:    retq # encoding: [0xc3]
185;
186; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256:
187; X64-AVX512VL:       # %bb.0:
188; X64-AVX512VL-NEXT:    vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
189; X64-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
190; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
191  %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
192  ret <4 x double> %res
193}
194declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
195
196
197define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
198; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256:
199; X86-AVX:       # %bb.0:
200; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
201; X86-AVX-NEXT:    vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
202; X86-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
203; X86-AVX-NEXT:    retl # encoding: [0xc3]
204;
205; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256:
206; X86-AVX512VL:       # %bb.0:
207; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
208; X86-AVX512VL-NEXT:    vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
209; X86-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
210; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
211;
212; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256:
213; X64-AVX:       # %bb.0:
214; X64-AVX-NEXT:    vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
215; X64-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
216; X64-AVX-NEXT:    retq # encoding: [0xc3]
217;
218; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256:
219; X64-AVX512VL:       # %bb.0:
220; X64-AVX512VL-NEXT:    vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
221; X64-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
222; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
223  %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
224  ret <8 x float> %res
225}
226declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
227
228
229define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
230; CHECK-LABEL: test_x86_avx_blend_pd_256:
231; CHECK:       # %bb.0:
232; CHECK-NEXT:    vblendps $192, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xc0]
233; CHECK-NEXT:    # ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
234; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
235  %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
236  ret <4 x double> %res
237}
238declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
239
240
241define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
242; CHECK-LABEL: test_x86_avx_blend_ps_256:
243; CHECK:       # %bb.0:
244; CHECK-NEXT:    vblendps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07]
245; CHECK-NEXT:    # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
246; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
247  %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
248  ret <8 x float> %res
249}
250declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
251
252
253define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
254; CHECK-LABEL: test_x86_avx_dp_ps_256:
255; CHECK:       # %bb.0:
256; CHECK-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0x07]
257; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
258  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
259  ret <8 x float> %res
260}
261declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
262
263
264define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
265; AVX-LABEL: test_x86_sse2_psll_dq:
266; AVX:       # %bb.0:
267; AVX-NEXT:    vpslldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x01]
268; AVX-NEXT:    # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
269; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
270;
271; AVX512VL-LABEL: test_x86_sse2_psll_dq:
272; AVX512VL:       # %bb.0:
273; AVX512VL-NEXT:    vpslldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01]
274; AVX512VL-NEXT:    # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
275; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
276  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
277  ret <2 x i64> %res
278}
279declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
280
281
282define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
283; AVX-LABEL: test_x86_sse2_psrl_dq:
284; AVX:       # %bb.0:
285; AVX-NEXT:    vpsrldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x01]
286; AVX-NEXT:    # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
287; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
288;
289; AVX512VL-LABEL: test_x86_sse2_psrl_dq:
290; AVX512VL:       # %bb.0:
291; AVX512VL-NEXT:    vpsrldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01]
292; AVX512VL-NEXT:    # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
293; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
294  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
295  ret <2 x i64> %res
296}
297declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
298
299
300define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
301; CHECK-LABEL: test_x86_sse41_blendpd:
302; CHECK:       # %bb.0:
303; CHECK-NEXT:    vblendps $3, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x03]
304; CHECK-NEXT:    # xmm0 = xmm0[0,1],xmm1[2,3]
305; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
306  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1]
307  ret <2 x double> %res
308}
309declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
310
311
312define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
313; CHECK-LABEL: test_x86_sse41_blendps:
314; CHECK:       # %bb.0:
315; CHECK-NEXT:    vblendps $8, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08]
316; CHECK-NEXT:    # xmm0 = xmm1[0,1,2],xmm0[3]
317; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
318  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
319  ret <4 x float> %res
320}
321declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
322
323
324define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
325; CHECK-LABEL: test_x86_sse41_pblendw:
326; CHECK:       # %bb.0:
327; CHECK-NEXT:    vpblendw $7, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x07]
328; CHECK-NEXT:    # xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
329; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
330  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
331  ret <8 x i16> %res
332}
333declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
334
335
336define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
337; AVX-LABEL: test_x86_sse41_pmovsxbd:
338; AVX:       # %bb.0:
339; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x21,0xc0]
340; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
341;
342; AVX512VL-LABEL: test_x86_sse41_pmovsxbd:
343; AVX512VL:       # %bb.0:
344; AVX512VL-NEXT:    vpmovsxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0]
345; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
346  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
347  ret <4 x i32> %res
348}
349declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
350
351
352define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
353; AVX-LABEL: test_x86_sse41_pmovsxbq:
354; AVX:       # %bb.0:
355; AVX-NEXT:    vpmovsxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x22,0xc0]
356; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
357;
358; AVX512VL-LABEL: test_x86_sse41_pmovsxbq:
359; AVX512VL:       # %bb.0:
360; AVX512VL-NEXT:    vpmovsxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0]
361; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
362  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
363  ret <2 x i64> %res
364}
365declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
366
367
368define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
369; AVX-LABEL: test_x86_sse41_pmovsxbw:
370; AVX:       # %bb.0:
371; AVX-NEXT:    vpmovsxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x20,0xc0]
372; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
373;
374; AVX512VL-LABEL: test_x86_sse41_pmovsxbw:
375; AVX512VL:       # %bb.0:
376; AVX512VL-NEXT:    vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0]
377; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
378  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
379  ret <8 x i16> %res
380}
381declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
382
383
384define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
385; AVX-LABEL: test_x86_sse41_pmovsxdq:
386; AVX:       # %bb.0:
387; AVX-NEXT:    vpmovsxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x25,0xc0]
388; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
389;
390; AVX512VL-LABEL: test_x86_sse41_pmovsxdq:
391; AVX512VL:       # %bb.0:
392; AVX512VL-NEXT:    vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0]
393; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
394  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
395  ret <2 x i64> %res
396}
397declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
398
399
400define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
401; AVX-LABEL: test_x86_sse41_pmovsxwd:
402; AVX:       # %bb.0:
403; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x23,0xc0]
404; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
405;
406; AVX512VL-LABEL: test_x86_sse41_pmovsxwd:
407; AVX512VL:       # %bb.0:
408; AVX512VL-NEXT:    vpmovsxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0]
409; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
410  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
411  ret <4 x i32> %res
412}
413declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
414
415
416define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
417; AVX-LABEL: test_x86_sse41_pmovsxwq:
418; AVX:       # %bb.0:
419; AVX-NEXT:    vpmovsxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x24,0xc0]
420; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
421;
422; AVX512VL-LABEL: test_x86_sse41_pmovsxwq:
423; AVX512VL:       # %bb.0:
424; AVX512VL-NEXT:    vpmovsxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0]
425; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
426  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
427  ret <2 x i64> %res
428}
429declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
430
431
432define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
433; AVX-LABEL: test_x86_sse41_pmovzxbd:
434; AVX:       # %bb.0:
435; AVX-NEXT:    vpmovzxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x31,0xc0]
436; AVX-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
437; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
438;
439; AVX512VL-LABEL: test_x86_sse41_pmovzxbd:
440; AVX512VL:       # %bb.0:
441; AVX512VL-NEXT:    vpmovzxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0]
442; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
443; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
444  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
445  ret <4 x i32> %res
446}
447declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
448
449
450define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
451; AVX-LABEL: test_x86_sse41_pmovzxbq:
452; AVX:       # %bb.0:
453; AVX-NEXT:    vpmovzxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x32,0xc0]
454; AVX-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
455; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
456;
457; AVX512VL-LABEL: test_x86_sse41_pmovzxbq:
458; AVX512VL:       # %bb.0:
459; AVX512VL-NEXT:    vpmovzxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0]
460; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
461; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
462  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
463  ret <2 x i64> %res
464}
465declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
466
467
468define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
469; AVX-LABEL: test_x86_sse41_pmovzxbw:
470; AVX:       # %bb.0:
471; AVX-NEXT:    vpmovzxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x30,0xc0]
472; AVX-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
473; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
474;
475; AVX512VL-LABEL: test_x86_sse41_pmovzxbw:
476; AVX512VL:       # %bb.0:
477; AVX512VL-NEXT:    vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0]
478; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
479; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
480  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
481  ret <8 x i16> %res
482}
483declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
484
485
486define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
487; AVX-LABEL: test_x86_sse41_pmovzxdq:
488; AVX:       # %bb.0:
489; AVX-NEXT:    vpmovzxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x35,0xc0]
490; AVX-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero
491; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
492;
493; AVX512VL-LABEL: test_x86_sse41_pmovzxdq:
494; AVX512VL:       # %bb.0:
495; AVX512VL-NEXT:    vpmovzxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0]
496; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero
497; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
498  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
499  ret <2 x i64> %res
500}
501declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
502
503
504define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
505; AVX-LABEL: test_x86_sse41_pmovzxwd:
506; AVX:       # %bb.0:
507; AVX-NEXT:    vpmovzxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x33,0xc0]
508; AVX-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
509; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
510;
511; AVX512VL-LABEL: test_x86_sse41_pmovzxwd:
512; AVX512VL:       # %bb.0:
513; AVX512VL-NEXT:    vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
514; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
515; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
516  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
517  ret <4 x i32> %res
518}
519declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
520
521
522define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
523; AVX-LABEL: test_x86_sse41_pmovzxwq:
524; AVX:       # %bb.0:
525; AVX-NEXT:    vpmovzxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x34,0xc0]
526; AVX-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
527; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
528;
529; AVX512VL-LABEL: test_x86_sse41_pmovzxwq:
530; AVX512VL:       # %bb.0:
531; AVX512VL-NEXT:    vpmovzxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0]
532; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
533; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
534  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
535  ret <2 x i64> %res
536}
537declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
538
539
540define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
541; AVX-LABEL: test_x86_sse2_cvtdq2pd:
542; AVX:       # %bb.0:
543; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0]
544; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
545;
546; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd:
547; AVX512VL:       # %bb.0:
548; AVX512VL-NEXT:    vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
549; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
550  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
551  ret <2 x double> %res
552}
553declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
554
555
556define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
557; AVX-LABEL: test_x86_avx_cvtdq2_pd_256:
558; AVX:       # %bb.0:
559; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0 # encoding: [0xc5,0xfe,0xe6,0xc0]
560; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
561;
562; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256:
563; AVX512VL:       # %bb.0:
564; AVX512VL-NEXT:    vcvtdq2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xc0]
565; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
566  %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
567  ret <4 x double> %res
568}
569declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
570
571
572define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
573; AVX-LABEL: test_x86_sse2_cvtps2pd:
574; AVX:       # %bb.0:
575; AVX-NEXT:    vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
576; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
577;
578; AVX512VL-LABEL: test_x86_sse2_cvtps2pd:
579; AVX512VL:       # %bb.0:
580; AVX512VL-NEXT:    vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
581; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
582  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
583  ret <2 x double> %res
584}
585declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
586
587
588define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
589; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256:
590; AVX:       # %bb.0:
591; AVX-NEXT:    vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0]
592; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
593;
594; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256:
595; AVX512VL:       # %bb.0:
596; AVX512VL-NEXT:    vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0]
597; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
598  %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
599  ret <4 x double> %res
600}
601declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
602
603
604define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
605  ; add operation forces the execution domain.
606; X86-AVX-LABEL: test_x86_sse2_storeu_dq:
607; X86-AVX:       # %bb.0:
608; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
609; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
610; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
611; X86-AVX-NEXT:    vmovdqu %xmm0, (%eax) # encoding: [0xc5,0xfa,0x7f,0x00]
612; X86-AVX-NEXT:    retl # encoding: [0xc3]
613;
614; X86-AVX512VL-LABEL: test_x86_sse2_storeu_dq:
615; X86-AVX512VL:       # %bb.0:
616; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
617; X86-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
618; X86-AVX512VL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
619; X86-AVX512VL-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
620; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
621;
622; X64-AVX-LABEL: test_x86_sse2_storeu_dq:
623; X64-AVX:       # %bb.0:
624; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
625; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
626; X64-AVX-NEXT:    vmovdqu %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x07]
627; X64-AVX-NEXT:    retq # encoding: [0xc3]
628;
629; X64-AVX512VL-LABEL: test_x86_sse2_storeu_dq:
630; X64-AVX512VL:       # %bb.0:
631; X64-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
632; X64-AVX512VL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
633; X64-AVX512VL-NEXT:    vmovdqu %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07]
634; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
635  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
636  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
637  ret void
638}
639declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
640
641
642define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
643  ; fadd operation forces the execution domain.
644; X86-AVX-LABEL: test_x86_sse2_storeu_pd:
645; X86-AVX:       # %bb.0:
646; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
647; X86-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
648; X86-AVX-NEXT:    vmovhpd {{\.LCPI.*}}, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
649; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
650; X86-AVX-NEXT:    # xmm1 = xmm1[0],mem[0]
651; X86-AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
652; X86-AVX-NEXT:    vmovupd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x11,0x00]
653; X86-AVX-NEXT:    retl # encoding: [0xc3]
654;
655; X86-AVX512VL-LABEL: test_x86_sse2_storeu_pd:
656; X86-AVX512VL:       # %bb.0:
657; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
658; X86-AVX512VL-NEXT:    vmovsd {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
659; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
660; X86-AVX512VL-NEXT:    # xmm1 = mem[0],zero
661; X86-AVX512VL-NEXT:    vpslldq $8, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
662; X86-AVX512VL-NEXT:    # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
663; X86-AVX512VL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
664; X86-AVX512VL-NEXT:    vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
665; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
666;
667; X64-AVX-LABEL: test_x86_sse2_storeu_pd:
668; X64-AVX:       # %bb.0:
669; X64-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
670; X64-AVX-NEXT:    vmovhpd {{.*}}(%rip), %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
671; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
672; X64-AVX-NEXT:    # xmm1 = xmm1[0],mem[0]
673; X64-AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
674; X64-AVX-NEXT:    vmovupd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x11,0x07]
675; X64-AVX-NEXT:    retq # encoding: [0xc3]
676;
677; X64-AVX512VL-LABEL: test_x86_sse2_storeu_pd:
678; X64-AVX512VL:       # %bb.0:
679; X64-AVX512VL-NEXT:    vmovsd {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
680; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
681; X64-AVX512VL-NEXT:    # xmm1 = mem[0],zero
682; X64-AVX512VL-NEXT:    vpslldq $8, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
683; X64-AVX512VL-NEXT:    # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
684; X64-AVX512VL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
685; X64-AVX512VL-NEXT:    vmovupd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07]
686; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
687  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
688  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
689  ret void
690}
691declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
692
693
694define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
695; X86-AVX-LABEL: test_x86_sse_storeu_ps:
696; X86-AVX:       # %bb.0:
697; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
698; X86-AVX-NEXT:    vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
699; X86-AVX-NEXT:    retl # encoding: [0xc3]
700;
701; X86-AVX512VL-LABEL: test_x86_sse_storeu_ps:
702; X86-AVX512VL:       # %bb.0:
703; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
704; X86-AVX512VL-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
705; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
706;
707; X64-AVX-LABEL: test_x86_sse_storeu_ps:
708; X64-AVX:       # %bb.0:
709; X64-AVX-NEXT:    vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
710; X64-AVX-NEXT:    retq # encoding: [0xc3]
711;
712; X64-AVX512VL-LABEL: test_x86_sse_storeu_ps:
713; X64-AVX512VL:       # %bb.0:
714; X64-AVX512VL-NEXT:    vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
715; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
716  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
717  ret void
718}
719declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
720
721
722define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
723  ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
724  ; add operation forces the execution domain.
725; X86-AVX-LABEL: test_x86_avx_storeu_dq_256:
726; X86-AVX:       # %bb.0:
727; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
728; X86-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
729; X86-AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
730; X86-AVX-NEXT:    vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca]
731; X86-AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2]
732; X86-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
733; X86-AVX-NEXT:    vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00]
734; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
735; X86-AVX-NEXT:    retl # encoding: [0xc3]
736;
737; X86-AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
738; X86-AVX512VL:       # %bb.0:
739; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
740; X86-AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
741; X86-AVX512VL-NEXT:    vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1]
742; X86-AVX512VL-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
743; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
744; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
745;
746; X64-AVX-LABEL: test_x86_avx_storeu_dq_256:
747; X64-AVX:       # %bb.0:
748; X64-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
749; X64-AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
750; X64-AVX-NEXT:    vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca]
751; X64-AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2]
752; X64-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
753; X64-AVX-NEXT:    vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07]
754; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
755; X64-AVX-NEXT:    retq # encoding: [0xc3]
756;
757; X64-AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
758; X64-AVX512VL:       # %bb.0:
759; X64-AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
760; X64-AVX512VL-NEXT:    vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1]
761; X64-AVX512VL-NEXT:    vmovdqu %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x07]
762; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
763; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
764  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
765  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
766  ret void
767}
768declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
769
770
771define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
772  ; add operation forces the execution domain.
773; X86-AVX-LABEL: test_x86_avx_storeu_pd_256:
774; X86-AVX:       # %bb.0:
775; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
776; X86-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
777; X86-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
778; X86-AVX-NEXT:    vmovupd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x11,0x00]
779; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
780; X86-AVX-NEXT:    retl # encoding: [0xc3]
781;
782; X86-AVX512VL-LABEL: test_x86_avx_storeu_pd_256:
783; X86-AVX512VL:       # %bb.0:
784; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
785; X86-AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
786; X86-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
787; X86-AVX512VL-NEXT:    vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00]
788; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
789; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
790;
791; X64-AVX-LABEL: test_x86_avx_storeu_pd_256:
792; X64-AVX:       # %bb.0:
793; X64-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
794; X64-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
795; X64-AVX-NEXT:    vmovupd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x11,0x07]
796; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
797; X64-AVX-NEXT:    retq # encoding: [0xc3]
798;
799; X64-AVX512VL-LABEL: test_x86_avx_storeu_pd_256:
800; X64-AVX512VL:       # %bb.0:
801; X64-AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
802; X64-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
803; X64-AVX512VL-NEXT:    vmovupd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x07]
804; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
805; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
806  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
807  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
808  ret void
809}
810declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
811
812
813define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
814; X86-AVX-LABEL: test_x86_avx_storeu_ps_256:
815; X86-AVX:       # %bb.0:
816; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
817; X86-AVX-NEXT:    vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00]
818; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
819; X86-AVX-NEXT:    retl # encoding: [0xc3]
820;
821; X86-AVX512VL-LABEL: test_x86_avx_storeu_ps_256:
822; X86-AVX512VL:       # %bb.0:
823; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
824; X86-AVX512VL-NEXT:    vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
825; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
826; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
827;
828; X64-AVX-LABEL: test_x86_avx_storeu_ps_256:
829; X64-AVX:       # %bb.0:
830; X64-AVX-NEXT:    vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07]
831; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
832; X64-AVX-NEXT:    retq # encoding: [0xc3]
833;
834; X64-AVX512VL-LABEL: test_x86_avx_storeu_ps_256:
835; X64-AVX512VL:       # %bb.0:
836; X64-AVX512VL-NEXT:    vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
837; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
838; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
839  call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
840  ret void
841}
842declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
843
844
845define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
846; AVX-LABEL: test_x86_avx_vpermil_pd:
847; AVX:       # %bb.0:
848; AVX-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
849; AVX-NEXT:    # xmm0 = xmm0[1,0]
850; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
851;
852; AVX512VL-LABEL: test_x86_avx_vpermil_pd:
853; AVX512VL:       # %bb.0:
854; AVX512VL-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
855; AVX512VL-NEXT:    # xmm0 = xmm0[1,0]
856; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
857  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
858  ret <2 x double> %res
859}
860declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
861
862
863define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
864; AVX-LABEL: test_x86_avx_vpermil_pd_256:
865; AVX:       # %bb.0:
866; AVX-NEXT:    vpermilpd $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07]
867; AVX-NEXT:    # ymm0 = ymm0[1,1,3,2]
868; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
869;
870; AVX512VL-LABEL: test_x86_avx_vpermil_pd_256:
871; AVX512VL:       # %bb.0:
872; AVX512VL-NEXT:    vpermilpd $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07]
873; AVX512VL-NEXT:    # ymm0 = ymm0[1,1,3,2]
874; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
875  %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
876  ret <4 x double> %res
877}
878declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
879
880
881define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
882; AVX-LABEL: test_x86_avx_vpermil_ps:
883; AVX:       # %bb.0:
884; AVX-NEXT:    vpermilps $7, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07]
885; AVX-NEXT:    # xmm0 = xmm0[3,1,0,0]
886; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
887;
888; AVX512VL-LABEL: test_x86_avx_vpermil_ps:
889; AVX512VL:       # %bb.0:
890; AVX512VL-NEXT:    vpermilps $7, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07]
891; AVX512VL-NEXT:    # xmm0 = xmm0[3,1,0,0]
892; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
893  %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
894  ret <4 x float> %res
895}
896declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
897
898
899define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
900; AVX-LABEL: test_x86_avx_vpermil_ps_256:
901; AVX:       # %bb.0:
902; AVX-NEXT:    vpermilps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07]
903; AVX-NEXT:    # ymm0 = ymm0[3,1,0,0,7,5,4,4]
904; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
905;
906; AVX512VL-LABEL: test_x86_avx_vpermil_ps_256:
907; AVX512VL:       # %bb.0:
908; AVX512VL-NEXT:    vpermilps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07]
909; AVX512VL-NEXT:    # ymm0 = ymm0[3,1,0,0,7,5,4,4]
910; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
911  %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
912  ret <8 x float> %res
913}
914declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
915
916
917define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
918; AVX-LABEL: test_x86_avx_vperm2f128_pd_256:
919; AVX:       # %bb.0:
920; AVX-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
921; AVX-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
922; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
923;
924; AVX512VL-LABEL: test_x86_avx_vperm2f128_pd_256:
925; AVX512VL:       # %bb.0:
926; AVX512VL-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
927; AVX512VL-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
928; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
929  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) ; <<4 x double>> [#uses=1]
930  ret <4 x double> %res
931}
932declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
933
934
935define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
936; AVX-LABEL: test_x86_avx_vperm2f128_ps_256:
937; AVX:       # %bb.0:
938; AVX-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
939; AVX-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
940; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
941;
942; AVX512VL-LABEL: test_x86_avx_vperm2f128_ps_256:
943; AVX512VL:       # %bb.0:
944; AVX512VL-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
945; AVX512VL-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
946; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
947  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 3) ; <<8 x float>> [#uses=1]
948  ret <8 x float> %res
949}
950declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
951
952
953define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
954; AVX-LABEL: test_x86_avx_vperm2f128_si_256:
955; AVX:       # %bb.0:
956; AVX-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
957; AVX-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
958; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
959;
960; AVX512VL-LABEL: test_x86_avx_vperm2f128_si_256:
961; AVX512VL:       # %bb.0:
962; AVX512VL-NEXT:    vperm2i128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x46,0xc0,0x21]
963; AVX512VL-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
964; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
965  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 3) ; <<8 x i32>> [#uses=1]
966  ret <8 x i32> %res
967}
968declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
969
970
971define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
972; AVX-LABEL: test_x86_avx_cvtdq2_ps_256:
973; AVX:       # %bb.0:
974; AVX-NEXT:    vcvtdq2ps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5b,0xc0]
975; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
976;
977; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256:
978; AVX512VL:       # %bb.0:
979; AVX512VL-NEXT:    vcvtdq2ps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0]
980; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
981  %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
982  ret <8 x float> %res
983}
984declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
985