• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
4; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
5; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
7; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
8
9; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
10
11define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
12; SSE-LABEL: test_mm_add_epi8:
13; SSE:       # %bb.0:
14; SSE-NEXT:    paddb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfc,0xc1]
15; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16;
17; AVX1-LABEL: test_mm_add_epi8:
18; AVX1:       # %bb.0:
19; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
20; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
21;
22; AVX512-LABEL: test_mm_add_epi8:
23; AVX512:       # %bb.0:
24; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
25; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
26  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
27  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
28  %res = add <16 x i8> %arg0, %arg1
29  %bc = bitcast <16 x i8> %res to <2 x i64>
30  ret <2 x i64> %bc
31}
32
33define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
34; SSE-LABEL: test_mm_add_epi16:
35; SSE:       # %bb.0:
36; SSE-NEXT:    paddw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfd,0xc1]
37; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
38;
39; AVX1-LABEL: test_mm_add_epi16:
40; AVX1:       # %bb.0:
41; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
42; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
43;
44; AVX512-LABEL: test_mm_add_epi16:
45; AVX512:       # %bb.0:
46; AVX512-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
47; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
48  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
49  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
50  %res = add <8 x i16> %arg0, %arg1
51  %bc = bitcast <8 x i16> %res to <2 x i64>
52  ret <2 x i64> %bc
53}
54
55define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
56; SSE-LABEL: test_mm_add_epi32:
57; SSE:       # %bb.0:
58; SSE-NEXT:    paddd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfe,0xc1]
59; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
60;
61; AVX1-LABEL: test_mm_add_epi32:
62; AVX1:       # %bb.0:
63; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
64; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
65;
66; AVX512-LABEL: test_mm_add_epi32:
67; AVX512:       # %bb.0:
68; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
69; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
70  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
71  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
72  %res = add <4 x i32> %arg0, %arg1
73  %bc = bitcast <4 x i32> %res to <2 x i64>
74  ret <2 x i64> %bc
75}
76
77define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
78; SSE-LABEL: test_mm_add_epi64:
79; SSE:       # %bb.0:
80; SSE-NEXT:    paddq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd4,0xc1]
81; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
82;
83; AVX1-LABEL: test_mm_add_epi64:
84; AVX1:       # %bb.0:
85; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd4,0xc1]
86; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
87;
88; AVX512-LABEL: test_mm_add_epi64:
89; AVX512:       # %bb.0:
90; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1]
91; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
92  %res = add <2 x i64> %a0, %a1
93  ret <2 x i64> %res
94}
95
96define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
97; SSE-LABEL: test_mm_add_pd:
98; SSE:       # %bb.0:
99; SSE-NEXT:    addpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x58,0xc1]
100; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
101;
102; AVX1-LABEL: test_mm_add_pd:
103; AVX1:       # %bb.0:
104; AVX1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
105; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
106;
107; AVX512-LABEL: test_mm_add_pd:
108; AVX512:       # %bb.0:
109; AVX512-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
110; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
111  %res = fadd <2 x double> %a0, %a1
112  ret <2 x double> %res
113}
114
115define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
116; SSE-LABEL: test_mm_add_sd:
117; SSE:       # %bb.0:
118; SSE-NEXT:    addsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x58,0xc1]
119; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
120;
121; AVX1-LABEL: test_mm_add_sd:
122; AVX1:       # %bb.0:
123; AVX1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x58,0xc1]
124; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
125;
126; AVX512-LABEL: test_mm_add_sd:
127; AVX512:       # %bb.0:
128; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1]
129; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
130  %ext0 = extractelement <2 x double> %a0, i32 0
131  %ext1 = extractelement <2 x double> %a1, i32 0
132  %fadd = fadd double %ext0, %ext1
133  %res = insertelement <2 x double> %a0, double %fadd, i32 0
134  ret <2 x double> %res
135}
136
137define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
138; SSE-LABEL: test_mm_adds_epi8:
139; SSE:       # %bb.0:
140; SSE-NEXT:    paddsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xec,0xc1]
141; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
142;
143; AVX1-LABEL: test_mm_adds_epi8:
144; AVX1:       # %bb.0:
145; AVX1-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xec,0xc1]
146; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
147;
148; AVX512-LABEL: test_mm_adds_epi8:
149; AVX512:       # %bb.0:
150; AVX512-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
151; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
152  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
153  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
154  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1)
155  %bc = bitcast <16 x i8> %res to <2 x i64>
156  ret <2 x i64> %bc
157}
158declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
159
160define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
161; SSE-LABEL: test_mm_adds_epi16:
162; SSE:       # %bb.0:
163; SSE-NEXT:    paddsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xed,0xc1]
164; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
165;
166; AVX1-LABEL: test_mm_adds_epi16:
167; AVX1:       # %bb.0:
168; AVX1-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xed,0xc1]
169; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
170;
171; AVX512-LABEL: test_mm_adds_epi16:
172; AVX512:       # %bb.0:
173; AVX512-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
174; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
175  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
176  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
177  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1)
178  %bc = bitcast <8 x i16> %res to <2 x i64>
179  ret <2 x i64> %bc
180}
181declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
182
183define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
184; SSE-LABEL: test_mm_adds_epu8:
185; SSE:       # %bb.0:
186; SSE-NEXT:    paddusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdc,0xc1]
187; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
188;
189; AVX1-LABEL: test_mm_adds_epu8:
190; AVX1:       # %bb.0:
191; AVX1-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdc,0xc1]
192; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
193;
194; AVX512-LABEL: test_mm_adds_epu8:
195; AVX512:       # %bb.0:
196; AVX512-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
197; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
198  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
199  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
200  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1)
201  %bc = bitcast <16 x i8> %res to <2 x i64>
202  ret <2 x i64> %bc
203}
204declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
205
206define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
207; SSE-LABEL: test_mm_adds_epu16:
208; SSE:       # %bb.0:
209; SSE-NEXT:    paddusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdd,0xc1]
210; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
211;
212; AVX1-LABEL: test_mm_adds_epu16:
213; AVX1:       # %bb.0:
214; AVX1-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdd,0xc1]
215; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
216;
217; AVX512-LABEL: test_mm_adds_epu16:
218; AVX512:       # %bb.0:
219; AVX512-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
220; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
221  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
222  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
223  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1)
224  %bc = bitcast <8 x i16> %res to <2 x i64>
225  ret <2 x i64> %bc
226}
227declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
228
229define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
230; SSE-LABEL: test_mm_and_pd:
231; SSE:       # %bb.0:
232; SSE-NEXT:    andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
233; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
234;
235; AVX1-LABEL: test_mm_and_pd:
236; AVX1:       # %bb.0:
237; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
238; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
239;
240; AVX512-LABEL: test_mm_and_pd:
241; AVX512:       # %bb.0:
242; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
243; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
244  %arg0 = bitcast <2 x double> %a0 to <4 x i32>
245  %arg1 = bitcast <2 x double> %a1 to <4 x i32>
246  %res = and <4 x i32> %arg0, %arg1
247  %bc = bitcast <4 x i32> %res to <2 x double>
248  ret <2 x double> %bc
249}
250
251define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
252; SSE-LABEL: test_mm_and_si128:
253; SSE:       # %bb.0:
254; SSE-NEXT:    andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
255; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
256;
257; AVX1-LABEL: test_mm_and_si128:
258; AVX1:       # %bb.0:
259; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
260; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
261;
262; AVX512-LABEL: test_mm_and_si128:
263; AVX512:       # %bb.0:
264; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
265; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
266  %res = and <2 x i64> %a0, %a1
267  ret <2 x i64> %res
268}
269
270define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
271; SSE-LABEL: test_mm_andnot_pd:
272; SSE:       # %bb.0:
273; SSE-NEXT:    andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1]
274; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
275;
276; AVX1-LABEL: test_mm_andnot_pd:
277; AVX1:       # %bb.0:
278; AVX1-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
279; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
280;
281; AVX512-LABEL: test_mm_andnot_pd:
282; AVX512:       # %bb.0:
283; AVX512-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1]
284; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
285  %arg0 = bitcast <2 x double> %a0 to <4 x i32>
286  %arg1 = bitcast <2 x double> %a1 to <4 x i32>
287  %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
288  %res = and <4 x i32> %not, %arg1
289  %bc = bitcast <4 x i32> %res to <2 x double>
290  ret <2 x double> %bc
291}
292
293define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
294; SSE-LABEL: test_mm_andnot_si128:
295; SSE:       # %bb.0:
296; SSE-NEXT:    pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2]
297; SSE-NEXT:    pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2]
298; SSE-NEXT:    pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1]
299; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
300;
301; AVX1-LABEL: test_mm_andnot_si128:
302; AVX1:       # %bb.0:
303; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
304; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
305; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
306; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
307;
308; AVX512-LABEL: test_mm_andnot_si128:
309; AVX512:       # %bb.0:
310; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
311; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
312; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
313  %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
314  %res = and <2 x i64> %not, %a1
315  ret <2 x i64> %res
316}
317
318define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
319; SSE-LABEL: test_mm_avg_epu8:
320; SSE:       # %bb.0:
321; SSE-NEXT:    pavgb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe0,0xc1]
322; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
323;
324; AVX1-LABEL: test_mm_avg_epu8:
325; AVX1:       # %bb.0:
326; AVX1-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe0,0xc1]
327; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
328;
329; AVX512-LABEL: test_mm_avg_epu8:
330; AVX512:       # %bb.0:
331; AVX512-NEXT:    vpmovzxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc0]
332; AVX512-NEXT:    # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
333; AVX512-NEXT:    vpmovzxbw %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc9]
334; AVX512-NEXT:    # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
335; AVX512-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1]
336; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
337; AVX512-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1]
338; AVX512-NEXT:    vpsrlw $1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x01]
339; AVX512-NEXT:    vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0]
340; AVX512-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
341; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
342  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
343  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
344  %zext0 = zext <16 x i8> %arg0 to <16 x i16>
345  %zext1 = zext <16 x i8> %arg1 to <16 x i16>
346  %add = add <16 x i16> %zext0, %zext1
347  %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
348  %lshr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
349  %res = trunc <16 x i16> %lshr to <16 x i8>
350  %bc = bitcast <16 x i8> %res to <2 x i64>
351  ret <2 x i64> %bc
352}
353
354define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
355; SSE-LABEL: test_mm_avg_epu16:
356; SSE:       # %bb.0:
357; SSE-NEXT:    pavgw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe3,0xc1]
358; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
359;
360; AVX1-LABEL: test_mm_avg_epu16:
361; AVX1:       # %bb.0:
362; AVX1-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe3,0xc1]
363; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
364;
365; AVX512-LABEL: test_mm_avg_epu16:
366; AVX512:       # %bb.0:
367; AVX512-NEXT:    vpmovzxwd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc0]
368; AVX512-NEXT:    # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
369; AVX512-NEXT:    vpmovzxwd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc9]
370; AVX512-NEXT:    # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
371; AVX512-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
372; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
373; AVX512-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1]
374; AVX512-NEXT:    vpsrld $1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x01]
375; AVX512-NEXT:    vpmovdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0]
376; AVX512-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
377; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
378  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
379  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
380  %zext0 = zext <8 x i16> %arg0 to <8 x i32>
381  %zext1 = zext <8 x i16> %arg1 to <8 x i32>
382  %add = add <8 x i32> %zext0, %zext1
383  %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
384  %lshr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
385  %res = trunc <8 x i32> %lshr to <8 x i16>
386  %bc = bitcast <8 x i16> %res to <2 x i64>
387  ret <2 x i64> %bc
388}
389
390define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
391; SSE-LABEL: test_mm_bslli_si128:
392; SSE:       # %bb.0:
393; SSE-NEXT:    pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05]
394; SSE-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
395; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
396;
397; AVX1-LABEL: test_mm_bslli_si128:
398; AVX1:       # %bb.0:
399; AVX1-NEXT:    vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05]
400; AVX1-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
401; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
402;
403; AVX512-LABEL: test_mm_bslli_si128:
404; AVX512:       # %bb.0:
405; AVX512-NEXT:    vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05]
406; AVX512-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
407; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
408  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
409  %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
410  %bc = bitcast <16 x i8> %res to <2 x i64>
411  ret <2 x i64> %bc
412}
413
414define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
415; SSE-LABEL: test_mm_bsrli_si128:
416; SSE:       # %bb.0:
417; SSE-NEXT:    psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05]
418; SSE-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
419; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
420;
421; AVX1-LABEL: test_mm_bsrli_si128:
422; AVX1:       # %bb.0:
423; AVX1-NEXT:    vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05]
424; AVX1-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
425; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
426;
427; AVX512-LABEL: test_mm_bsrli_si128:
428; AVX512:       # %bb.0:
429; AVX512-NEXT:    vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05]
430; AVX512-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
431; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
432  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
433  %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
434  %bc = bitcast <16 x i8> %res to <2 x i64>
435  ret <2 x i64> %bc
436}
437
438define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
439; CHECK-LABEL: test_mm_castpd_ps:
440; CHECK:       # %bb.0:
441; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
442  %res = bitcast <2 x double> %a0 to <4 x float>
443  ret <4 x float> %res
444}
445
446define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
447; CHECK-LABEL: test_mm_castpd_si128:
448; CHECK:       # %bb.0:
449; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
450  %res = bitcast <2 x double> %a0 to <2 x i64>
451  ret <2 x i64> %res
452}
453
454define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
455; CHECK-LABEL: test_mm_castps_pd:
456; CHECK:       # %bb.0:
457; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
458  %res = bitcast <4 x float> %a0 to <2 x double>
459  ret <2 x double> %res
460}
461
462define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
463; CHECK-LABEL: test_mm_castps_si128:
464; CHECK:       # %bb.0:
465; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
466  %res = bitcast <4 x float> %a0 to <2 x i64>
467  ret <2 x i64> %res
468}
469
470define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
471; CHECK-LABEL: test_mm_castsi128_pd:
472; CHECK:       # %bb.0:
473; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
474  %res = bitcast <2 x i64> %a0 to <2 x double>
475  ret <2 x double> %res
476}
477
478define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
479; CHECK-LABEL: test_mm_castsi128_ps:
480; CHECK:       # %bb.0:
481; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
482  %res = bitcast <2 x i64> %a0 to <4 x float>
483  ret <4 x float> %res
484}
485
486define void @test_mm_clflush(i8* %a0) nounwind {
487; X86-LABEL: test_mm_clflush:
488; X86:       # %bb.0:
489; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
490; X86-NEXT:    clflush (%eax) # encoding: [0x0f,0xae,0x38]
491; X86-NEXT:    retl # encoding: [0xc3]
492;
493; X64-LABEL: test_mm_clflush:
494; X64:       # %bb.0:
495; X64-NEXT:    clflush (%rdi) # encoding: [0x0f,0xae,0x3f]
496; X64-NEXT:    retq # encoding: [0xc3]
497  call void @llvm.x86.sse2.clflush(i8* %a0)
498  ret void
499}
500declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
501
502define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
503; SSE-LABEL: test_mm_cmpeq_epi8:
504; SSE:       # %bb.0:
505; SSE-NEXT:    pcmpeqb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x74,0xc1]
506; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
507;
508; AVX1-LABEL: test_mm_cmpeq_epi8:
509; AVX1:       # %bb.0:
510; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x74,0xc1]
511; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
512;
513; AVX512-LABEL: test_mm_cmpeq_epi8:
514; AVX512:       # %bb.0:
515; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
516; AVX512-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
517; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
518  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
519  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
520  %cmp = icmp eq <16 x i8> %arg0, %arg1
521  %res = sext <16 x i1> %cmp to <16 x i8>
522  %bc = bitcast <16 x i8> %res to <2 x i64>
523  ret <2 x i64> %bc
524}
525
526define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
527; SSE-LABEL: test_mm_cmpeq_epi16:
528; SSE:       # %bb.0:
529; SSE-NEXT:    pcmpeqw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x75,0xc1]
530; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
531;
532; AVX1-LABEL: test_mm_cmpeq_epi16:
533; AVX1:       # %bb.0:
534; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x75,0xc1]
535; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
536;
537; AVX512-LABEL: test_mm_cmpeq_epi16:
538; AVX512:       # %bb.0:
539; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
540; AVX512-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
541; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
542  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
543  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
544  %cmp = icmp eq <8 x i16> %arg0, %arg1
545  %res = sext <8 x i1> %cmp to <8 x i16>
546  %bc = bitcast <8 x i16> %res to <2 x i64>
547  ret <2 x i64> %bc
548}
549
550define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
551; SSE-LABEL: test_mm_cmpeq_epi32:
552; SSE:       # %bb.0:
553; SSE-NEXT:    pcmpeqd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x76,0xc1]
554; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
555;
556; AVX1-LABEL: test_mm_cmpeq_epi32:
557; AVX1:       # %bb.0:
558; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x76,0xc1]
559; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
560;
561; AVX512-LABEL: test_mm_cmpeq_epi32:
562; AVX512:       # %bb.0:
563; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
564; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
565; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
566  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
567  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
568  %cmp = icmp eq <4 x i32> %arg0, %arg1
569  %res = sext <4 x i1> %cmp to <4 x i32>
570  %bc = bitcast <4 x i32> %res to <2 x i64>
571  ret <2 x i64> %bc
572}
573
574define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
575; SSE-LABEL: test_mm_cmpeq_pd:
576; SSE:       # %bb.0:
577; SSE-NEXT:    cmpeqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x00]
578; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
579;
580; AVX1-LABEL: test_mm_cmpeq_pd:
581; AVX1:       # %bb.0:
582; AVX1-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x00]
583; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
584;
585; AVX512-LABEL: test_mm_cmpeq_pd:
586; AVX512:       # %bb.0:
587; AVX512-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x00]
588; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
589; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
590  %fcmp = fcmp oeq <2 x double> %a0, %a1
591  %sext = sext <2 x i1> %fcmp to <2 x i64>
592  %res = bitcast <2 x i64> %sext to <2 x double>
593  ret <2 x double> %res
594}
595
596define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
597; SSE-LABEL: test_mm_cmpeq_sd:
598; SSE:       # %bb.0:
599; SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x00]
600; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
601;
602; AVX-LABEL: test_mm_cmpeq_sd:
603; AVX:       # %bb.0:
604; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x00]
605; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
606  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
607  ret <2 x double> %res
608}
609declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
610
611define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
612; SSE-LABEL: test_mm_cmpge_pd:
613; SSE:       # %bb.0:
614; SSE-NEXT:    cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02]
615; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
616; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
617;
618; AVX1-LABEL: test_mm_cmpge_pd:
619; AVX1:       # %bb.0:
620; AVX1-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x02]
621; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
622;
623; AVX512-LABEL: test_mm_cmpge_pd:
624; AVX512:       # %bb.0:
625; AVX512-NEXT:    vcmplepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x02]
626; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
627; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
628  %fcmp = fcmp ole <2 x double> %a1, %a0
629  %sext = sext <2 x i1> %fcmp to <2 x i64>
630  %res = bitcast <2 x i64> %sext to <2 x double>
631  ret <2 x double> %res
632}
633
634define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
635; SSE-LABEL: test_mm_cmpge_sd:
636; SSE:       # %bb.0:
637; SSE-NEXT:    cmplesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x02]
638; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
639; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
640; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
641;
642; AVX-LABEL: test_mm_cmpge_sd:
643; AVX:       # %bb.0:
644; AVX-NEXT:    vcmplesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x02]
645; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
646; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
647; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
648  %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
649  %ext0 = extractelement <2 x double> %cmp, i32 0
650  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
651  %ext1 = extractelement <2 x double> %a0, i32 1
652  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
653  ret <2 x double> %ins1
654}
655
656define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
657; SSE-LABEL: test_mm_cmpgt_epi8:
658; SSE:       # %bb.0:
659; SSE-NEXT:    pcmpgtb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x64,0xc1]
660; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
661;
662; AVX1-LABEL: test_mm_cmpgt_epi8:
663; AVX1:       # %bb.0:
664; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x64,0xc1]
665; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
666;
667; AVX512-LABEL: test_mm_cmpgt_epi8:
668; AVX512:       # %bb.0:
669; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
670; AVX512-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
671; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
672  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
673  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
674  %cmp = icmp sgt <16 x i8> %arg0, %arg1
675  %res = sext <16 x i1> %cmp to <16 x i8>
676  %bc = bitcast <16 x i8> %res to <2 x i64>
677  ret <2 x i64> %bc
678}
679
680define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
681; SSE-LABEL: test_mm_cmpgt_epi16:
682; SSE:       # %bb.0:
683; SSE-NEXT:    pcmpgtw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x65,0xc1]
684; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
685;
686; AVX1-LABEL: test_mm_cmpgt_epi16:
687; AVX1:       # %bb.0:
688; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x65,0xc1]
689; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
690;
691; AVX512-LABEL: test_mm_cmpgt_epi16:
692; AVX512:       # %bb.0:
693; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
694; AVX512-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
695; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
696  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
697  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
698  %cmp = icmp sgt <8 x i16> %arg0, %arg1
699  %res = sext <8 x i1> %cmp to <8 x i16>
700  %bc = bitcast <8 x i16> %res to <2 x i64>
701  ret <2 x i64> %bc
702}
703
704define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
705; SSE-LABEL: test_mm_cmpgt_epi32:
706; SSE:       # %bb.0:
707; SSE-NEXT:    pcmpgtd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x66,0xc1]
708; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
709;
710; AVX1-LABEL: test_mm_cmpgt_epi32:
711; AVX1:       # %bb.0:
712; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x66,0xc1]
713; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
714;
715; AVX512-LABEL: test_mm_cmpgt_epi32:
716; AVX512:       # %bb.0:
717; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
718; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
719; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
720  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
721  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
722  %cmp = icmp sgt <4 x i32> %arg0, %arg1
723  %res = sext <4 x i1> %cmp to <4 x i32>
724  %bc = bitcast <4 x i32> %res to <2 x i64>
725  ret <2 x i64> %bc
726}
727
728define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
729; SSE-LABEL: test_mm_cmpgt_pd:
730; SSE:       # %bb.0:
731; SSE-NEXT:    cmpltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x01]
732; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
733; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
734;
735; AVX1-LABEL: test_mm_cmpgt_pd:
736; AVX1:       # %bb.0:
737; AVX1-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x01]
738; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
739;
740; AVX512-LABEL: test_mm_cmpgt_pd:
741; AVX512:       # %bb.0:
742; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x01]
743; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
744; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
745  %fcmp = fcmp olt <2 x double> %a1, %a0
746  %sext = sext <2 x i1> %fcmp to <2 x i64>
747  %res = bitcast <2 x i64> %sext to <2 x double>
748  ret <2 x double> %res
749}
750
751define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
752; SSE-LABEL: test_mm_cmpgt_sd:
753; SSE:       # %bb.0:
754; SSE-NEXT:    cmpltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x01]
755; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
756; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
757; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
758;
759; AVX-LABEL: test_mm_cmpgt_sd:
760; AVX:       # %bb.0:
761; AVX-NEXT:    vcmpltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x01]
762; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
763; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
764; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
765  %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
766  %ext0 = extractelement <2 x double> %cmp, i32 0
767  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
768  %ext1 = extractelement <2 x double> %a0, i32 1
769  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
770  ret <2 x double> %ins1
771}
772
773define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
774; SSE-LABEL: test_mm_cmple_pd:
775; SSE:       # %bb.0:
776; SSE-NEXT:    cmplepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x02]
777; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
778;
779; AVX1-LABEL: test_mm_cmple_pd:
780; AVX1:       # %bb.0:
781; AVX1-NEXT:    vcmplepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x02]
782; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
783;
784; AVX512-LABEL: test_mm_cmple_pd:
785; AVX512:       # %bb.0:
786; AVX512-NEXT:    vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
787; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
788; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
789  %fcmp = fcmp ole <2 x double> %a0, %a1
790  %sext = sext <2 x i1> %fcmp to <2 x i64>
791  %res = bitcast <2 x i64> %sext to <2 x double>
792  ret <2 x double> %res
793}
794
795define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
796; SSE-LABEL: test_mm_cmple_sd:
797; SSE:       # %bb.0:
798; SSE-NEXT:    cmplesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x02]
799; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
800;
801; AVX-LABEL: test_mm_cmple_sd:
802; AVX:       # %bb.0:
803; AVX-NEXT:    vcmplesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x02]
804; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
805  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
806  ret <2 x double> %res
807}
808
809define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
810; SSE-LABEL: test_mm_cmplt_epi8:
811; SSE:       # %bb.0:
812; SSE-NEXT:    pcmpgtb %xmm0, %xmm1 # encoding: [0x66,0x0f,0x64,0xc8]
813; SSE-NEXT:    movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
814; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
815;
816; AVX1-LABEL: test_mm_cmplt_epi8:
817; AVX1:       # %bb.0:
818; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x64,0xc0]
819; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
820;
821; AVX512-LABEL: test_mm_cmplt_epi8:
822; AVX512:       # %bb.0:
823; AVX512-NEXT:    vpcmpgtb %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc0]
824; AVX512-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
825; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
826  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
827  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
828  %cmp = icmp sgt <16 x i8> %arg1, %arg0
829  %res = sext <16 x i1> %cmp to <16 x i8>
830  %bc = bitcast <16 x i8> %res to <2 x i64>
831  ret <2 x i64> %bc
832}
833
834define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
835; SSE-LABEL: test_mm_cmplt_epi16:
836; SSE:       # %bb.0:
837; SSE-NEXT:    pcmpgtw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x65,0xc8]
838; SSE-NEXT:    movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
839; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
840;
841; AVX1-LABEL: test_mm_cmplt_epi16:
842; AVX1:       # %bb.0:
843; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x65,0xc0]
844; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
845;
846; AVX512-LABEL: test_mm_cmplt_epi16:
847; AVX512:       # %bb.0:
848; AVX512-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0]
849; AVX512-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
850; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
851  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
852  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
853  %cmp = icmp sgt <8 x i16> %arg1, %arg0
854  %res = sext <8 x i1> %cmp to <8 x i16>
855  %bc = bitcast <8 x i16> %res to <2 x i64>
856  ret <2 x i64> %bc
857}
858
859define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
860; SSE-LABEL: test_mm_cmplt_epi32:
861; SSE:       # %bb.0:
862; SSE-NEXT:    pcmpgtd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x66,0xc8]
863; SSE-NEXT:    movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
864; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
865;
866; AVX1-LABEL: test_mm_cmplt_epi32:
867; AVX1:       # %bb.0:
868; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x66,0xc0]
869; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
870;
871; AVX512-LABEL: test_mm_cmplt_epi32:
872; AVX512:       # %bb.0:
873; AVX512-NEXT:    vpcmpgtd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc0]
874; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
875; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
876  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
877  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
878  %cmp = icmp sgt <4 x i32> %arg1, %arg0
879  %res = sext <4 x i1> %cmp to <4 x i32>
880  %bc = bitcast <4 x i32> %res to <2 x i64>
881  ret <2 x i64> %bc
882}
883
884define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
885; SSE-LABEL: test_mm_cmplt_pd:
886; SSE:       # %bb.0:
887; SSE-NEXT:    cmpltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x01]
888; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
889;
890; AVX1-LABEL: test_mm_cmplt_pd:
891; AVX1:       # %bb.0:
892; AVX1-NEXT:    vcmpltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x01]
893; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
894;
895; AVX512-LABEL: test_mm_cmplt_pd:
896; AVX512:       # %bb.0:
897; AVX512-NEXT:    vcmpltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x01]
898; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
899; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
900  %fcmp = fcmp olt <2 x double> %a0, %a1
901  %sext = sext <2 x i1> %fcmp to <2 x i64>
902  %res = bitcast <2 x i64> %sext to <2 x double>
903  ret <2 x double> %res
904}
905
906define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
907; SSE-LABEL: test_mm_cmplt_sd:
908; SSE:       # %bb.0:
909; SSE-NEXT:    cmpltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x01]
910; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
911;
912; AVX-LABEL: test_mm_cmplt_sd:
913; AVX:       # %bb.0:
914; AVX-NEXT:    vcmpltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x01]
915; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
916  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
917  ret <2 x double> %res
918}
919
920define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
921; SSE-LABEL: test_mm_cmpneq_pd:
922; SSE:       # %bb.0:
923; SSE-NEXT:    cmpneqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x04]
924; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
925;
926; AVX1-LABEL: test_mm_cmpneq_pd:
927; AVX1:       # %bb.0:
928; AVX1-NEXT:    vcmpneqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x04]
929; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
930;
931; AVX512-LABEL: test_mm_cmpneq_pd:
932; AVX512:       # %bb.0:
933; AVX512-NEXT:    vcmpneqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x04]
934; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
935; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
936  %fcmp = fcmp une <2 x double> %a0, %a1
937  %sext = sext <2 x i1> %fcmp to <2 x i64>
938  %res = bitcast <2 x i64> %sext to <2 x double>
939  ret <2 x double> %res
940}
941
942define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
943; SSE-LABEL: test_mm_cmpneq_sd:
944; SSE:       # %bb.0:
945; SSE-NEXT:    cmpneqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x04]
946; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
947;
948; AVX-LABEL: test_mm_cmpneq_sd:
949; AVX:       # %bb.0:
950; AVX-NEXT:    vcmpneqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x04]
951; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
952  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
953  ret <2 x double> %res
954}
955
956define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
957; SSE-LABEL: test_mm_cmpnge_pd:
958; SSE:       # %bb.0:
959; SSE-NEXT:    cmpnlepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x06]
960; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
961; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
962;
963; AVX1-LABEL: test_mm_cmpnge_pd:
964; AVX1:       # %bb.0:
965; AVX1-NEXT:    vcmpnlepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x06]
966; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
967;
968; AVX512-LABEL: test_mm_cmpnge_pd:
969; AVX512:       # %bb.0:
970; AVX512-NEXT:    vcmpnlepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x06]
971; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
972; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
973  %fcmp = fcmp ugt <2 x double> %a1, %a0
974  %sext = sext <2 x i1> %fcmp to <2 x i64>
975  %res = bitcast <2 x i64> %sext to <2 x double>
976  ret <2 x double> %res
977}
978
979define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
980; SSE-LABEL: test_mm_cmpnge_sd:
981; SSE:       # %bb.0:
982; SSE-NEXT:    cmpnlesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x06]
983; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
984; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
985; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
986;
987; AVX-LABEL: test_mm_cmpnge_sd:
988; AVX:       # %bb.0:
989; AVX-NEXT:    vcmpnlesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x06]
990; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
991; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
992; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
993  %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
994  %ext0 = extractelement <2 x double> %cmp, i32 0
995  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
996  %ext1 = extractelement <2 x double> %a0, i32 1
997  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
998  ret <2 x double> %ins1
999}
1000
1001define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1002; SSE-LABEL: test_mm_cmpngt_pd:
1003; SSE:       # %bb.0:
1004; SSE-NEXT:    cmpnltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x05]
1005; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
1006; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1007;
1008; AVX1-LABEL: test_mm_cmpngt_pd:
1009; AVX1:       # %bb.0:
1010; AVX1-NEXT:    vcmpnltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x05]
1011; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1012;
1013; AVX512-LABEL: test_mm_cmpngt_pd:
1014; AVX512:       # %bb.0:
1015; AVX512-NEXT:    vcmpnltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x05]
1016; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1017; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1018  %fcmp = fcmp uge <2 x double> %a1, %a0
1019  %sext = sext <2 x i1> %fcmp to <2 x i64>
1020  %res = bitcast <2 x i64> %sext to <2 x double>
1021  ret <2 x double> %res
1022}
1023
1024define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1025; SSE-LABEL: test_mm_cmpngt_sd:
1026; SSE:       # %bb.0:
1027; SSE-NEXT:    cmpnltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x05]
1028; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
1029; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
1030; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1031;
1032; AVX-LABEL: test_mm_cmpngt_sd:
1033; AVX:       # %bb.0:
1034; AVX-NEXT:    vcmpnltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x05]
1035; AVX-NEXT:    vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
1036; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1]
1037; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1038  %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
1039  %ext0 = extractelement <2 x double> %cmp, i32 0
1040  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
1041  %ext1 = extractelement <2 x double> %a0, i32 1
1042  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
1043  ret <2 x double> %ins1
1044}
1045
1046define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1047; SSE-LABEL: test_mm_cmpnle_pd:
1048; SSE:       # %bb.0:
1049; SSE-NEXT:    cmpnlepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x06]
1050; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1051;
1052; AVX1-LABEL: test_mm_cmpnle_pd:
1053; AVX1:       # %bb.0:
1054; AVX1-NEXT:    vcmpnlepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x06]
1055; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1056;
1057; AVX512-LABEL: test_mm_cmpnle_pd:
1058; AVX512:       # %bb.0:
1059; AVX512-NEXT:    vcmpnlepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x06]
1060; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1061; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1062  %fcmp = fcmp ugt <2 x double> %a0, %a1
1063  %sext = sext <2 x i1> %fcmp to <2 x i64>
1064  %res = bitcast <2 x i64> %sext to <2 x double>
1065  ret <2 x double> %res
1066}
1067
1068define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1069; SSE-LABEL: test_mm_cmpnle_sd:
1070; SSE:       # %bb.0:
1071; SSE-NEXT:    cmpnlesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x06]
1072; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1073;
1074; AVX-LABEL: test_mm_cmpnle_sd:
1075; AVX:       # %bb.0:
1076; AVX-NEXT:    vcmpnlesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x06]
1077; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1078  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
1079  ret <2 x double> %res
1080}
1081
1082define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1083; SSE-LABEL: test_mm_cmpnlt_pd:
1084; SSE:       # %bb.0:
1085; SSE-NEXT:    cmpnltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x05]
1086; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1087;
1088; AVX1-LABEL: test_mm_cmpnlt_pd:
1089; AVX1:       # %bb.0:
1090; AVX1-NEXT:    vcmpnltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x05]
1091; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1092;
1093; AVX512-LABEL: test_mm_cmpnlt_pd:
1094; AVX512:       # %bb.0:
1095; AVX512-NEXT:    vcmpnltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x05]
1096; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1097; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1098  %fcmp = fcmp uge <2 x double> %a0, %a1
1099  %sext = sext <2 x i1> %fcmp to <2 x i64>
1100  %res = bitcast <2 x i64> %sext to <2 x double>
1101  ret <2 x double> %res
1102}
1103
1104define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1105; SSE-LABEL: test_mm_cmpnlt_sd:
1106; SSE:       # %bb.0:
1107; SSE-NEXT:    cmpnltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x05]
1108; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1109;
1110; AVX-LABEL: test_mm_cmpnlt_sd:
1111; AVX:       # %bb.0:
1112; AVX-NEXT:    vcmpnltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x05]
1113; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1114  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
1115  ret <2 x double> %res
1116}
1117
1118define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1119; SSE-LABEL: test_mm_cmpord_pd:
1120; SSE:       # %bb.0:
1121; SSE-NEXT:    cmpordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x07]
1122; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1123;
1124; AVX1-LABEL: test_mm_cmpord_pd:
1125; AVX1:       # %bb.0:
1126; AVX1-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x07]
1127; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1128;
1129; AVX512-LABEL: test_mm_cmpord_pd:
1130; AVX512:       # %bb.0:
1131; AVX512-NEXT:    vcmpordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x07]
1132; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1133; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1134  %fcmp = fcmp ord <2 x double> %a0, %a1
1135  %sext = sext <2 x i1> %fcmp to <2 x i64>
1136  %res = bitcast <2 x i64> %sext to <2 x double>
1137  ret <2 x double> %res
1138}
1139
1140define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1141; SSE-LABEL: test_mm_cmpord_sd:
1142; SSE:       # %bb.0:
1143; SSE-NEXT:    cmpordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x07]
1144; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1145;
1146; AVX-LABEL: test_mm_cmpord_sd:
1147; AVX:       # %bb.0:
1148; AVX-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x07]
1149; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1150  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
1151  ret <2 x double> %res
1152}
1153
1154define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1155; SSE-LABEL: test_mm_cmpunord_pd:
1156; SSE:       # %bb.0:
1157; SSE-NEXT:    cmpunordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x03]
1158; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1159;
1160; AVX1-LABEL: test_mm_cmpunord_pd:
1161; AVX1:       # %bb.0:
1162; AVX1-NEXT:    vcmpunordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x03]
1163; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1164;
1165; AVX512-LABEL: test_mm_cmpunord_pd:
1166; AVX512:       # %bb.0:
1167; AVX512-NEXT:    vcmpunordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x03]
1168; AVX512-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1169; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1170  %fcmp = fcmp uno <2 x double> %a0, %a1
1171  %sext = sext <2 x i1> %fcmp to <2 x i64>
1172  %res = bitcast <2 x i64> %sext to <2 x double>
1173  ret <2 x double> %res
1174}
1175
1176define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1177; SSE-LABEL: test_mm_cmpunord_sd:
1178; SSE:       # %bb.0:
1179; SSE-NEXT:    cmpunordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x03]
1180; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1181;
1182; AVX-LABEL: test_mm_cmpunord_sd:
1183; AVX:       # %bb.0:
1184; AVX-NEXT:    vcmpunordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x03]
1185; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1186  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
1187  ret <2 x double> %res
1188}
1189
1190define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1191; SSE-LABEL: test_mm_comieq_sd:
1192; SSE:       # %bb.0:
1193; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1194; SSE-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
1195; SSE-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
1196; SSE-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
1197; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1198; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1199;
1200; AVX1-LABEL: test_mm_comieq_sd:
1201; AVX1:       # %bb.0:
1202; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1203; AVX1-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
1204; AVX1-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
1205; AVX1-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
1206; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1207; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1208;
1209; AVX512-LABEL: test_mm_comieq_sd:
1210; AVX512:       # %bb.0:
1211; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1212; AVX512-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
1213; AVX512-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
1214; AVX512-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
1215; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1216; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1217  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
1218  ret i32 %res
1219}
1220declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
1221
1222define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1223; SSE-LABEL: test_mm_comige_sd:
1224; SSE:       # %bb.0:
1225; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1226; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1227; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1228; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1229;
1230; AVX1-LABEL: test_mm_comige_sd:
1231; AVX1:       # %bb.0:
1232; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1233; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1234; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1235; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1236;
1237; AVX512-LABEL: test_mm_comige_sd:
1238; AVX512:       # %bb.0:
1239; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1240; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1241; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1242; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1243  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
1244  ret i32 %res
1245}
1246declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
1247
1248define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1249; SSE-LABEL: test_mm_comigt_sd:
1250; SSE:       # %bb.0:
1251; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1252; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1253; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1254; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1255;
1256; AVX1-LABEL: test_mm_comigt_sd:
1257; AVX1:       # %bb.0:
1258; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1259; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1260; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1261; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1262;
1263; AVX512-LABEL: test_mm_comigt_sd:
1264; AVX512:       # %bb.0:
1265; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1266; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1267; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1268; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1269  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
1270  ret i32 %res
1271}
1272declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
1273
1274define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1275; SSE-LABEL: test_mm_comile_sd:
1276; SSE:       # %bb.0:
1277; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1278; SSE-NEXT:    comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8]
1279; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1280; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1281;
1282; AVX1-LABEL: test_mm_comile_sd:
1283; AVX1:       # %bb.0:
1284; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1285; AVX1-NEXT:    vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8]
1286; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1287; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1288;
1289; AVX512-LABEL: test_mm_comile_sd:
1290; AVX512:       # %bb.0:
1291; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1292; AVX512-NEXT:    vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
1293; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
1294; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1295  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
1296  ret i32 %res
1297}
1298declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
1299
1300define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1301; SSE-LABEL: test_mm_comilt_sd:
1302; SSE:       # %bb.0:
1303; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1304; SSE-NEXT:    comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8]
1305; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1306; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1307;
1308; AVX1-LABEL: test_mm_comilt_sd:
1309; AVX1:       # %bb.0:
1310; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1311; AVX1-NEXT:    vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8]
1312; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1313; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1314;
1315; AVX512-LABEL: test_mm_comilt_sd:
1316; AVX512:       # %bb.0:
1317; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
1318; AVX512-NEXT:    vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
1319; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
1320; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1321  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
1322  ret i32 %res
1323}
1324declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
1325
1326define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1327; SSE-LABEL: test_mm_comineq_sd:
1328; SSE:       # %bb.0:
1329; SSE-NEXT:    comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1330; SSE-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
1331; SSE-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
1332; SSE-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
1333; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1334; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1335;
1336; AVX1-LABEL: test_mm_comineq_sd:
1337; AVX1:       # %bb.0:
1338; AVX1-NEXT:    vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1339; AVX1-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
1340; AVX1-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
1341; AVX1-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
1342; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1343; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1344;
1345; AVX512-LABEL: test_mm_comineq_sd:
1346; AVX512:       # %bb.0:
1347; AVX512-NEXT:    vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1348; AVX512-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
1349; AVX512-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
1350; AVX512-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
1351; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1352; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1353  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
1354  ret i32 %res
1355}
1356declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
1357
1358define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
1359; SSE-LABEL: test_mm_cvtepi32_pd:
1360; SSE:       # %bb.0:
1361; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0 # encoding: [0xf3,0x0f,0xe6,0xc0]
1362; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1363;
1364; AVX1-LABEL: test_mm_cvtepi32_pd:
1365; AVX1:       # %bb.0:
1366; AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0]
1367; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1368;
1369; AVX512-LABEL: test_mm_cvtepi32_pd:
1370; AVX512:       # %bb.0:
1371; AVX512-NEXT:    vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
1372; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1373  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1374  %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
1375  %res = sitofp <2 x i32> %ext to <2 x double>
1376  ret <2 x double> %res
1377}
1378
1379define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
1380; SSE-LABEL: test_mm_cvtepi32_ps:
1381; SSE:       # %bb.0:
1382; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0 # encoding: [0x0f,0x5b,0xc0]
1383; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1384;
1385; AVX1-LABEL: test_mm_cvtepi32_ps:
1386; AVX1:       # %bb.0:
1387; AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5b,0xc0]
1388; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1389;
1390; AVX512-LABEL: test_mm_cvtepi32_ps:
1391; AVX512:       # %bb.0:
1392; AVX512-NEXT:    vcvtdq2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0]
1393; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1394  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1395  %res = sitofp <4 x i32> %arg0 to <4 x float>
1396  ret <4 x float> %res
1397}
1398
1399define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1400; SSE-LABEL: test_mm_cvtpd_epi32:
1401; SSE:       # %bb.0:
1402; SSE-NEXT:    cvtpd2dq %xmm0, %xmm0 # encoding: [0xf2,0x0f,0xe6,0xc0]
1403; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1404;
1405; AVX1-LABEL: test_mm_cvtpd_epi32:
1406; AVX1:       # %bb.0:
1407; AVX1-NEXT:    vcvtpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xe6,0xc0]
1408; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1409;
1410; AVX512-LABEL: test_mm_cvtpd_epi32:
1411; AVX512:       # %bb.0:
1412; AVX512-NEXT:    vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
1413; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1414  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1415  %bc = bitcast <4 x i32> %res to <2 x i64>
1416  ret <2 x i64> %bc
1417}
1418declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1419
1420define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1421; SSE-LABEL: test_mm_cvtpd_ps:
1422; SSE:       # %bb.0:
1423; SSE-NEXT:    cvtpd2ps %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5a,0xc0]
1424; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1425;
1426; AVX1-LABEL: test_mm_cvtpd_ps:
1427; AVX1:       # %bb.0:
1428; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5a,0xc0]
1429; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1430;
1431; AVX512-LABEL: test_mm_cvtpd_ps:
1432; AVX512:       # %bb.0:
1433; AVX512-NEXT:    vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
1434; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1435  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1436  ret <4 x float> %res
1437}
1438declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1439
1440define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1441; SSE-LABEL: test_mm_cvtps_epi32:
1442; SSE:       # %bb.0:
1443; SSE-NEXT:    cvtps2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5b,0xc0]
1444; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1445;
1446; AVX1-LABEL: test_mm_cvtps_epi32:
1447; AVX1:       # %bb.0:
1448; AVX1-NEXT:    vcvtps2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5b,0xc0]
1449; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1450;
1451; AVX512-LABEL: test_mm_cvtps_epi32:
1452; AVX512:       # %bb.0:
1453; AVX512-NEXT:    vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
1454; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1455  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1456  %bc = bitcast <4 x i32> %res to <2 x i64>
1457  ret <2 x i64> %bc
1458}
1459declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1460
1461define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1462; SSE-LABEL: test_mm_cvtps_pd:
1463; SSE:       # %bb.0:
1464; SSE-NEXT:    cvtps2pd %xmm0, %xmm0 # encoding: [0x0f,0x5a,0xc0]
1465; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1466;
1467; AVX1-LABEL: test_mm_cvtps_pd:
1468; AVX1:       # %bb.0:
1469; AVX1-NEXT:    vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
1470; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1471;
1472; AVX512-LABEL: test_mm_cvtps_pd:
1473; AVX512:       # %bb.0:
1474; AVX512-NEXT:    vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
1475; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1476  %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
1477  %res = fpext <2 x float> %ext to <2 x double>
1478  ret <2 x double> %res
1479}
1480
1481define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1482; X86-SSE-LABEL: test_mm_cvtsd_f64:
1483; X86-SSE:       # %bb.0:
1484; X86-SSE-NEXT:    pushl %ebp # encoding: [0x55]
1485; X86-SSE-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
1486; X86-SSE-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
1487; X86-SSE-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
1488; X86-SSE-NEXT:    movlps %xmm0, (%esp) # encoding: [0x0f,0x13,0x04,0x24]
1489; X86-SSE-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
1490; X86-SSE-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
1491; X86-SSE-NEXT:    popl %ebp # encoding: [0x5d]
1492; X86-SSE-NEXT:    retl # encoding: [0xc3]
1493;
1494; X86-AVX1-LABEL: test_mm_cvtsd_f64:
1495; X86-AVX1:       # %bb.0:
1496; X86-AVX1-NEXT:    pushl %ebp # encoding: [0x55]
1497; X86-AVX1-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
1498; X86-AVX1-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
1499; X86-AVX1-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
1500; X86-AVX1-NEXT:    vmovlps %xmm0, (%esp) # encoding: [0xc5,0xf8,0x13,0x04,0x24]
1501; X86-AVX1-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
1502; X86-AVX1-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
1503; X86-AVX1-NEXT:    popl %ebp # encoding: [0x5d]
1504; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1505;
1506; X86-AVX512-LABEL: test_mm_cvtsd_f64:
1507; X86-AVX512:       # %bb.0:
1508; X86-AVX512-NEXT:    pushl %ebp # encoding: [0x55]
1509; X86-AVX512-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
1510; X86-AVX512-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
1511; X86-AVX512-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
1512; X86-AVX512-NEXT:    vmovlps %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x04,0x24]
1513; X86-AVX512-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
1514; X86-AVX512-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
1515; X86-AVX512-NEXT:    popl %ebp # encoding: [0x5d]
1516; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1517;
1518; X64-LABEL: test_mm_cvtsd_f64:
1519; X64:       # %bb.0:
1520; X64-NEXT:    retq # encoding: [0xc3]
1521  %res = extractelement <2 x double> %a0, i32 0
1522  ret double %res
1523}
1524
1525define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1526; SSE-LABEL: test_mm_cvtsd_si32:
1527; SSE:       # %bb.0:
1528; SSE-NEXT:    cvtsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2d,0xc0]
1529; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1530;
1531; AVX1-LABEL: test_mm_cvtsd_si32:
1532; AVX1:       # %bb.0:
1533; AVX1-NEXT:    vcvtsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2d,0xc0]
1534; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1535;
1536; AVX512-LABEL: test_mm_cvtsd_si32:
1537; AVX512:       # %bb.0:
1538; AVX512-NEXT:    vcvtsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0]
1539; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1540  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1541  ret i32 %res
1542}
1543declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1544
1545define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
1546; SSE-LABEL: test_mm_cvtsd_ss:
1547; SSE:       # %bb.0:
1548; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1]
1549; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1550;
1551; AVX-LABEL: test_mm_cvtsd_ss:
1552; AVX:       # %bb.0:
1553; AVX-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1]
1554; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1555  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1556  ret <4 x float> %res
1557}
1558declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
1559
1560define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
1561; X86-SSE-LABEL: test_mm_cvtsd_ss_load:
1562; X86-SSE:       # %bb.0:
1563; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1564; X86-SSE-NEXT:    cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00]
1565; X86-SSE-NEXT:    retl # encoding: [0xc3]
1566;
1567; X86-AVX-LABEL: test_mm_cvtsd_ss_load:
1568; X86-AVX:       # %bb.0:
1569; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1570; X86-AVX-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00]
1571; X86-AVX-NEXT:    retl # encoding: [0xc3]
1572;
1573; X64-SSE-LABEL: test_mm_cvtsd_ss_load:
1574; X64-SSE:       # %bb.0:
1575; X64-SSE-NEXT:    cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07]
1576; X64-SSE-NEXT:    retq # encoding: [0xc3]
1577;
1578; X64-AVX-LABEL: test_mm_cvtsd_ss_load:
1579; X64-AVX:       # %bb.0:
1580; X64-AVX-NEXT:    vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07]
1581; X64-AVX-NEXT:    retq # encoding: [0xc3]
1582  %a1 = load <2 x double>, <2 x double>* %p1
1583  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1584  ret <4 x float> %res
1585}
1586
1587define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1588; SSE-LABEL: test_mm_cvtsi128_si32:
1589; SSE:       # %bb.0:
1590; SSE-NEXT:    movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
1591; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1592;
1593; AVX1-LABEL: test_mm_cvtsi128_si32:
1594; AVX1:       # %bb.0:
1595; AVX1-NEXT:    vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
1596; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1597;
1598; AVX512-LABEL: test_mm_cvtsi128_si32:
1599; AVX512:       # %bb.0:
1600; AVX512-NEXT:    vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
1601; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1602  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1603  %res = extractelement <4 x i32> %arg0, i32 0
1604  ret i32 %res
1605}
1606
1607define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1608; X86-SSE-LABEL: test_mm_cvtsi32_sd:
1609; X86-SSE:       # %bb.0:
1610; X86-SSE-NEXT:    cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04]
1611; X86-SSE-NEXT:    retl # encoding: [0xc3]
1612;
1613; X86-AVX1-LABEL: test_mm_cvtsi32_sd:
1614; X86-AVX1:       # %bb.0:
1615; X86-AVX1-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
1616; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1617;
1618; X86-AVX512-LABEL: test_mm_cvtsi32_sd:
1619; X86-AVX512:       # %bb.0:
1620; X86-AVX512-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
1621; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1622;
1623; X64-SSE-LABEL: test_mm_cvtsi32_sd:
1624; X64-SSE:       # %bb.0:
1625; X64-SSE-NEXT:    cvtsi2sdl %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7]
1626; X64-SSE-NEXT:    retq # encoding: [0xc3]
1627;
1628; X64-AVX1-LABEL: test_mm_cvtsi32_sd:
1629; X64-AVX1:       # %bb.0:
1630; X64-AVX1-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7]
1631; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1632;
1633; X64-AVX512-LABEL: test_mm_cvtsi32_sd:
1634; X64-AVX512:       # %bb.0:
1635; X64-AVX512-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
1636; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1637  %cvt = sitofp i32 %a1 to double
1638  %res = insertelement <2 x double> %a0, double %cvt, i32 0
1639  ret <2 x double> %res
1640}
1641
1642define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1643; X86-SSE-LABEL: test_mm_cvtsi32_si128:
1644; X86-SSE:       # %bb.0:
1645; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
1646; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
1647; X86-SSE-NEXT:    retl # encoding: [0xc3]
1648;
1649; X86-AVX1-LABEL: test_mm_cvtsi32_si128:
1650; X86-AVX1:       # %bb.0:
1651; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
1652; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
1653; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1654;
1655; X86-AVX512-LABEL: test_mm_cvtsi32_si128:
1656; X86-AVX512:       # %bb.0:
1657; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
1658; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
1659; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1660;
1661; X64-SSE-LABEL: test_mm_cvtsi32_si128:
1662; X64-SSE:       # %bb.0:
1663; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
1664; X64-SSE-NEXT:    retq # encoding: [0xc3]
1665;
1666; X64-AVX1-LABEL: test_mm_cvtsi32_si128:
1667; X64-AVX1:       # %bb.0:
1668; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
1669; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1670;
1671; X64-AVX512-LABEL: test_mm_cvtsi32_si128:
1672; X64-AVX512:       # %bb.0:
1673; X64-AVX512-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
1674; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1675  %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1676  %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1677  %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1678  %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1679  %res = bitcast <4 x i32> %res3 to <2 x i64>
1680  ret <2 x i64> %res
1681}
1682
1683define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1684; SSE-LABEL: test_mm_cvtss_sd:
1685; SSE:       # %bb.0:
1686; SSE-NEXT:    cvtss2sd %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5a,0xc1]
1687; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1688;
1689; AVX1-LABEL: test_mm_cvtss_sd:
1690; AVX1:       # %bb.0:
1691; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5a,0xc1]
1692; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1693;
1694; AVX512-LABEL: test_mm_cvtss_sd:
1695; AVX512:       # %bb.0:
1696; AVX512-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1]
1697; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1698  %ext = extractelement <4 x float> %a1, i32 0
1699  %cvt = fpext float %ext to double
1700  %res = insertelement <2 x double> %a0, double %cvt, i32 0
1701  ret <2 x double> %res
1702}
1703
1704define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1705; SSE-LABEL: test_mm_cvttpd_epi32:
1706; SSE:       # %bb.0:
1707; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0xe6,0xc0]
1708; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1709;
1710; AVX1-LABEL: test_mm_cvttpd_epi32:
1711; AVX1:       # %bb.0:
1712; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe6,0xc0]
1713; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1714;
1715; AVX512-LABEL: test_mm_cvttpd_epi32:
1716; AVX512:       # %bb.0:
1717; AVX512-NEXT:    vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
1718; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1719  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1720  %bc = bitcast <4 x i32> %res to <2 x i64>
1721  ret <2 x i64> %bc
1722}
1723declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1724
1725define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1726; SSE-LABEL: test_mm_cvttps_epi32:
1727; SSE:       # %bb.0:
1728; SSE-NEXT:    cvttps2dq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x5b,0xc0]
1729; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1730;
1731; AVX1-LABEL: test_mm_cvttps_epi32:
1732; AVX1:       # %bb.0:
1733; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5b,0xc0]
1734; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1735;
1736; AVX512-LABEL: test_mm_cvttps_epi32:
1737; AVX512:       # %bb.0:
1738; AVX512-NEXT:    vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
1739; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1740  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
1741  %bc = bitcast <4 x i32> %res to <2 x i64>
1742  ret <2 x i64> %bc
1743}
1744declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
1745
1746define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1747; SSE-LABEL: test_mm_cvttsd_si32:
1748; SSE:       # %bb.0:
1749; SSE-NEXT:    cvttsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2c,0xc0]
1750; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1751;
1752; AVX1-LABEL: test_mm_cvttsd_si32:
1753; AVX1:       # %bb.0:
1754; AVX1-NEXT:    vcvttsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2c,0xc0]
1755; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1756;
1757; AVX512-LABEL: test_mm_cvttsd_si32:
1758; AVX512:       # %bb.0:
1759; AVX512-NEXT:    vcvttsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0]
1760; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1761  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
1762  ret i32 %res
1763}
1764declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
1765
1766define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1767; SSE-LABEL: test_mm_div_pd:
1768; SSE:       # %bb.0:
1769; SSE-NEXT:    divpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5e,0xc1]
1770; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1771;
1772; AVX1-LABEL: test_mm_div_pd:
1773; AVX1:       # %bb.0:
1774; AVX1-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5e,0xc1]
1775; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1776;
1777; AVX512-LABEL: test_mm_div_pd:
1778; AVX512:       # %bb.0:
1779; AVX512-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5e,0xc1]
1780; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1781  %res = fdiv <2 x double> %a0, %a1
1782  ret <2 x double> %res
1783}
1784
1785define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1786; SSE-LABEL: test_mm_div_sd:
1787; SSE:       # %bb.0:
1788; SSE-NEXT:    divsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5e,0xc1]
1789; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1790;
1791; AVX1-LABEL: test_mm_div_sd:
1792; AVX1:       # %bb.0:
1793; AVX1-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5e,0xc1]
1794; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1795;
1796; AVX512-LABEL: test_mm_div_sd:
1797; AVX512:       # %bb.0:
1798; AVX512-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1]
1799; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1800  %ext0 = extractelement <2 x double> %a0, i32 0
1801  %ext1 = extractelement <2 x double> %a1, i32 0
1802  %fdiv = fdiv double %ext0, %ext1
1803  %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1804  ret <2 x double> %res
1805}
1806
1807define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1808; SSE-LABEL: test_mm_extract_epi16:
1809; SSE:       # %bb.0:
1810; SSE-NEXT:    pextrw $1, %xmm0, %eax # encoding: [0x66,0x0f,0xc5,0xc0,0x01]
1811; SSE-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
1812; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1813;
1814; AVX1-LABEL: test_mm_extract_epi16:
1815; AVX1:       # %bb.0:
1816; AVX1-NEXT:    vpextrw $1, %xmm0, %eax # encoding: [0xc5,0xf9,0xc5,0xc0,0x01]
1817; AVX1-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
1818; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1819;
1820; AVX512-LABEL: test_mm_extract_epi16:
1821; AVX512:       # %bb.0:
1822; AVX512-NEXT:    vpextrw $1, %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc0,0x01]
1823; AVX512-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
1824; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1825  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1826  %ext = extractelement <8 x i16> %arg0, i32 1
1827  %res = zext i16 %ext to i32
1828  ret i32 %res
1829}
1830
1831define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1832; X86-SSE-LABEL: test_mm_insert_epi16:
1833; X86-SSE:       # %bb.0:
1834; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
1835; X86-SSE-NEXT:    pinsrw $1, %eax, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc0,0x01]
1836; X86-SSE-NEXT:    retl # encoding: [0xc3]
1837;
1838; X86-AVX1-LABEL: test_mm_insert_epi16:
1839; X86-AVX1:       # %bb.0:
1840; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
1841; X86-AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
1842; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1843;
1844; X86-AVX512-LABEL: test_mm_insert_epi16:
1845; X86-AVX512:       # %bb.0:
1846; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
1847; X86-AVX512-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
1848; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1849;
1850; X64-SSE-LABEL: test_mm_insert_epi16:
1851; X64-SSE:       # %bb.0:
1852; X64-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01]
1853; X64-SSE-NEXT:    retq # encoding: [0xc3]
1854;
1855; X64-AVX1-LABEL: test_mm_insert_epi16:
1856; X64-AVX1:       # %bb.0:
1857; X64-AVX1-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
1858; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1859;
1860; X64-AVX512-LABEL: test_mm_insert_epi16:
1861; X64-AVX512:       # %bb.0:
1862; X64-AVX512-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
1863; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1864  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1865  %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1866  %bc = bitcast <8 x i16> %res to <2 x i64>
1867  ret <2 x i64> %bc
1868}
1869
1870define void @test_mm_lfence() nounwind {
1871; CHECK-LABEL: test_mm_lfence:
1872; CHECK:       # %bb.0:
1873; CHECK-NEXT:    lfence # encoding: [0x0f,0xae,0xe8]
1874; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1875  call void @llvm.x86.sse2.lfence()
1876  ret void
1877}
1878declare void @llvm.x86.sse2.lfence() nounwind readnone
1879
1880define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
1881; X86-SSE-LABEL: test_mm_load_pd:
1882; X86-SSE:       # %bb.0:
1883; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1884; X86-SSE-NEXT:    movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
1885; X86-SSE-NEXT:    retl # encoding: [0xc3]
1886;
1887; X86-AVX1-LABEL: test_mm_load_pd:
1888; X86-AVX1:       # %bb.0:
1889; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1890; X86-AVX1-NEXT:    vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
1891; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1892;
1893; X86-AVX512-LABEL: test_mm_load_pd:
1894; X86-AVX512:       # %bb.0:
1895; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1896; X86-AVX512-NEXT:    vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
1897; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1898;
1899; X64-SSE-LABEL: test_mm_load_pd:
1900; X64-SSE:       # %bb.0:
1901; X64-SSE-NEXT:    movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
1902; X64-SSE-NEXT:    retq # encoding: [0xc3]
1903;
1904; X64-AVX1-LABEL: test_mm_load_pd:
1905; X64-AVX1:       # %bb.0:
1906; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
1907; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1908;
1909; X64-AVX512-LABEL: test_mm_load_pd:
1910; X64-AVX512:       # %bb.0:
1911; X64-AVX512-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
1912; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1913  %arg0 = bitcast double* %a0 to <2 x double>*
1914  %res = load <2 x double>, <2 x double>* %arg0, align 16
1915  ret <2 x double> %res
1916}
1917
1918define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
1919; X86-SSE-LABEL: test_mm_load_sd:
1920; X86-SSE:       # %bb.0:
1921; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1922; X86-SSE-NEXT:    movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
1923; X86-SSE-NEXT:    # xmm0 = mem[0],zero
1924; X86-SSE-NEXT:    retl # encoding: [0xc3]
1925;
1926; X86-AVX1-LABEL: test_mm_load_sd:
1927; X86-AVX1:       # %bb.0:
1928; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1929; X86-AVX1-NEXT:    vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00]
1930; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
1931; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1932;
1933; X86-AVX512-LABEL: test_mm_load_sd:
1934; X86-AVX512:       # %bb.0:
1935; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1936; X86-AVX512-NEXT:    vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
1937; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
1938; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1939;
1940; X64-SSE-LABEL: test_mm_load_sd:
1941; X64-SSE:       # %bb.0:
1942; X64-SSE-NEXT:    movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
1943; X64-SSE-NEXT:    # xmm0 = mem[0],zero
1944; X64-SSE-NEXT:    retq # encoding: [0xc3]
1945;
1946; X64-AVX1-LABEL: test_mm_load_sd:
1947; X64-AVX1:       # %bb.0:
1948; X64-AVX1-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
1949; X64-AVX1-NEXT:    # xmm0 = mem[0],zero
1950; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1951;
1952; X64-AVX512-LABEL: test_mm_load_sd:
1953; X64-AVX512:       # %bb.0:
1954; X64-AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
1955; X64-AVX512-NEXT:    # xmm0 = mem[0],zero
1956; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1957  %ld = load double, double* %a0, align 1
1958  %res0 = insertelement <2 x double> undef, double %ld, i32 0
1959  %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
1960  ret <2 x double> %res1
1961}
1962
1963define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
1964; X86-SSE-LABEL: test_mm_load_si128:
1965; X86-SSE:       # %bb.0:
1966; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1967; X86-SSE-NEXT:    movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
1968; X86-SSE-NEXT:    retl # encoding: [0xc3]
1969;
1970; X86-AVX1-LABEL: test_mm_load_si128:
1971; X86-AVX1:       # %bb.0:
1972; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1973; X86-AVX1-NEXT:    vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
1974; X86-AVX1-NEXT:    retl # encoding: [0xc3]
1975;
1976; X86-AVX512-LABEL: test_mm_load_si128:
1977; X86-AVX512:       # %bb.0:
1978; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1979; X86-AVX512-NEXT:    vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
1980; X86-AVX512-NEXT:    retl # encoding: [0xc3]
1981;
1982; X64-SSE-LABEL: test_mm_load_si128:
1983; X64-SSE:       # %bb.0:
1984; X64-SSE-NEXT:    movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
1985; X64-SSE-NEXT:    retq # encoding: [0xc3]
1986;
1987; X64-AVX1-LABEL: test_mm_load_si128:
1988; X64-AVX1:       # %bb.0:
1989; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
1990; X64-AVX1-NEXT:    retq # encoding: [0xc3]
1991;
1992; X64-AVX512-LABEL: test_mm_load_si128:
1993; X64-AVX512:       # %bb.0:
1994; X64-AVX512-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
1995; X64-AVX512-NEXT:    retq # encoding: [0xc3]
1996  %res = load <2 x i64>, <2 x i64>* %a0, align 16
1997  ret <2 x i64> %res
1998}
1999
2000define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
2001; X86-SSE-LABEL: test_mm_load1_pd:
2002; X86-SSE:       # %bb.0:
2003; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2004; X86-SSE-NEXT:    movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
2005; X86-SSE-NEXT:    # xmm0 = mem[0],zero
2006; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
2007; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
2008; X86-SSE-NEXT:    retl # encoding: [0xc3]
2009;
2010; X86-AVX1-LABEL: test_mm_load1_pd:
2011; X86-AVX1:       # %bb.0:
2012; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2013; X86-AVX1-NEXT:    vmovddup (%eax), %xmm0 # encoding: [0xc5,0xfb,0x12,0x00]
2014; X86-AVX1-NEXT:    # xmm0 = mem[0,0]
2015; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2016;
2017; X86-AVX512-LABEL: test_mm_load1_pd:
2018; X86-AVX512:       # %bb.0:
2019; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2020; X86-AVX512-NEXT:    vmovddup (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x00]
2021; X86-AVX512-NEXT:    # xmm0 = mem[0,0]
2022; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2023;
2024; X64-SSE-LABEL: test_mm_load1_pd:
2025; X64-SSE:       # %bb.0:
2026; X64-SSE-NEXT:    movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
2027; X64-SSE-NEXT:    # xmm0 = mem[0],zero
2028; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
2029; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
2030; X64-SSE-NEXT:    retq # encoding: [0xc3]
2031;
2032; X64-AVX1-LABEL: test_mm_load1_pd:
2033; X64-AVX1:       # %bb.0:
2034; X64-AVX1-NEXT:    vmovddup (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x12,0x07]
2035; X64-AVX1-NEXT:    # xmm0 = mem[0,0]
2036; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2037;
2038; X64-AVX512-LABEL: test_mm_load1_pd:
2039; X64-AVX512:       # %bb.0:
2040; X64-AVX512-NEXT:    vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07]
2041; X64-AVX512-NEXT:    # xmm0 = mem[0,0]
2042; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2043  %ld = load double, double* %a0, align 8
2044  %res0 = insertelement <2 x double> undef, double %ld, i32 0
2045  %res1 = insertelement <2 x double> %res0, double %ld, i32 1
2046  ret <2 x double> %res1
2047}
2048
2049define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
2050; X86-SSE-LABEL: test_mm_loadh_pd:
2051; X86-SSE:       # %bb.0:
2052; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2053; X86-SSE-NEXT:    movhpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x16,0x00]
2054; X86-SSE-NEXT:    # xmm0 = xmm0[0],mem[0]
2055; X86-SSE-NEXT:    retl # encoding: [0xc3]
2056;
2057; X86-AVX1-LABEL: test_mm_loadh_pd:
2058; X86-AVX1:       # %bb.0:
2059; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2060; X86-AVX1-NEXT:    vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00]
2061; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
2062; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2063;
2064; X86-AVX512-LABEL: test_mm_loadh_pd:
2065; X86-AVX512:       # %bb.0:
2066; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2067; X86-AVX512-NEXT:    vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00]
2068; X86-AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
2069; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2070;
2071; X64-SSE-LABEL: test_mm_loadh_pd:
2072; X64-SSE:       # %bb.0:
2073; X64-SSE-NEXT:    movhpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x16,0x07]
2074; X64-SSE-NEXT:    # xmm0 = xmm0[0],mem[0]
2075; X64-SSE-NEXT:    retq # encoding: [0xc3]
2076;
2077; X64-AVX1-LABEL: test_mm_loadh_pd:
2078; X64-AVX1:       # %bb.0:
2079; X64-AVX1-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07]
2080; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
2081; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2082;
2083; X64-AVX512-LABEL: test_mm_loadh_pd:
2084; X64-AVX512:       # %bb.0:
2085; X64-AVX512-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07]
2086; X64-AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
2087; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2088  %ld = load double, double* %a1, align 8
2089  %res = insertelement <2 x double> %a0, double %ld, i32 1
2090  ret <2 x double> %res
2091}
2092
2093define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
2094; X86-SSE-LABEL: test_mm_loadl_epi64:
2095; X86-SSE:       # %bb.0:
2096; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2097; X86-SSE-NEXT:    movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00]
2098; X86-SSE-NEXT:    # xmm0 = mem[0],zero
2099; X86-SSE-NEXT:    retl # encoding: [0xc3]
2100;
2101; X86-AVX1-LABEL: test_mm_loadl_epi64:
2102; X86-AVX1:       # %bb.0:
2103; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2104; X86-AVX1-NEXT:    vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00]
2105; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
2106; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2107;
2108; X86-AVX512-LABEL: test_mm_loadl_epi64:
2109; X86-AVX512:       # %bb.0:
2110; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2111; X86-AVX512-NEXT:    vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
2112; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
2113; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2114;
2115; X64-SSE-LABEL: test_mm_loadl_epi64:
2116; X64-SSE:       # %bb.0:
2117; X64-SSE-NEXT:    movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07]
2118; X64-SSE-NEXT:    # xmm0 = mem[0],zero
2119; X64-SSE-NEXT:    retq # encoding: [0xc3]
2120;
2121; X64-AVX1-LABEL: test_mm_loadl_epi64:
2122; X64-AVX1:       # %bb.0:
2123; X64-AVX1-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
2124; X64-AVX1-NEXT:    # xmm0 = mem[0],zero
2125; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2126;
2127; X64-AVX512-LABEL: test_mm_loadl_epi64:
2128; X64-AVX512:       # %bb.0:
2129; X64-AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
2130; X64-AVX512-NEXT:    # xmm0 = mem[0],zero
2131; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2132  %bc = bitcast <2 x i64>* %a1 to i64*
2133  %ld = load i64, i64* %bc, align 1
2134  %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
2135  %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
2136  ret <2 x i64> %res1
2137}
2138
2139define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
2140; X86-SSE-LABEL: test_mm_loadl_pd:
2141; X86-SSE:       # %bb.0:
2142; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2143; X86-SSE-NEXT:    movlpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x12,0x00]
2144; X86-SSE-NEXT:    # xmm0 = mem[0],xmm0[1]
2145; X86-SSE-NEXT:    retl # encoding: [0xc3]
2146;
2147; X86-AVX1-LABEL: test_mm_loadl_pd:
2148; X86-AVX1:       # %bb.0:
2149; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2150; X86-AVX1-NEXT:    vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00]
2151; X86-AVX1-NEXT:    # xmm0 = mem[0],xmm0[1]
2152; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2153;
2154; X86-AVX512-LABEL: test_mm_loadl_pd:
2155; X86-AVX512:       # %bb.0:
2156; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2157; X86-AVX512-NEXT:    vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00]
2158; X86-AVX512-NEXT:    # xmm0 = mem[0],xmm0[1]
2159; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2160;
2161; X64-SSE-LABEL: test_mm_loadl_pd:
2162; X64-SSE:       # %bb.0:
2163; X64-SSE-NEXT:    movlpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x12,0x07]
2164; X64-SSE-NEXT:    # xmm0 = mem[0],xmm0[1]
2165; X64-SSE-NEXT:    retq # encoding: [0xc3]
2166;
2167; X64-AVX1-LABEL: test_mm_loadl_pd:
2168; X64-AVX1:       # %bb.0:
2169; X64-AVX1-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07]
2170; X64-AVX1-NEXT:    # xmm0 = mem[0],xmm0[1]
2171; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2172;
2173; X64-AVX512-LABEL: test_mm_loadl_pd:
2174; X64-AVX512:       # %bb.0:
2175; X64-AVX512-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07]
2176; X64-AVX512-NEXT:    # xmm0 = mem[0],xmm0[1]
2177; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2178  %ld = load double, double* %a1, align 8
2179  %res = insertelement <2 x double> %a0, double %ld, i32 0
2180  ret <2 x double> %res
2181}
2182
2183define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
2184; X86-SSE-LABEL: test_mm_loadr_pd:
2185; X86-SSE:       # %bb.0:
2186; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2187; X86-SSE-NEXT:    movapd (%eax), %xmm0 # encoding: [0x66,0x0f,0x28,0x00]
2188; X86-SSE-NEXT:    shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
2189; X86-SSE-NEXT:    # xmm0 = xmm0[1,0]
2190; X86-SSE-NEXT:    retl # encoding: [0xc3]
2191;
2192; X86-AVX1-LABEL: test_mm_loadr_pd:
2193; X86-AVX1:       # %bb.0:
2194; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2195; X86-AVX1-NEXT:    vpermilpd $1, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01]
2196; X86-AVX1-NEXT:    # xmm0 = mem[1,0]
2197; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2198;
2199; X86-AVX512-LABEL: test_mm_loadr_pd:
2200; X86-AVX512:       # %bb.0:
2201; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2202; X86-AVX512-NEXT:    vpermilpd $1, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01]
2203; X86-AVX512-NEXT:    # xmm0 = mem[1,0]
2204; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2205;
2206; X64-SSE-LABEL: test_mm_loadr_pd:
2207; X64-SSE:       # %bb.0:
2208; X64-SSE-NEXT:    movapd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x28,0x07]
2209; X64-SSE-NEXT:    shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
2210; X64-SSE-NEXT:    # xmm0 = xmm0[1,0]
2211; X64-SSE-NEXT:    retq # encoding: [0xc3]
2212;
2213; X64-AVX1-LABEL: test_mm_loadr_pd:
2214; X64-AVX1:       # %bb.0:
2215; X64-AVX1-NEXT:    vpermilpd $1, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01]
2216; X64-AVX1-NEXT:    # xmm0 = mem[1,0]
2217; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2218;
2219; X64-AVX512-LABEL: test_mm_loadr_pd:
2220; X64-AVX512:       # %bb.0:
2221; X64-AVX512-NEXT:    vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01]
2222; X64-AVX512-NEXT:    # xmm0 = mem[1,0]
2223; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2224  %arg0 = bitcast double* %a0 to <2 x double>*
2225  %ld = load <2 x double>, <2 x double>* %arg0, align 16
2226  %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
2227  ret <2 x double> %res
2228}
2229
2230define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
2231; X86-SSE-LABEL: test_mm_loadu_pd:
2232; X86-SSE:       # %bb.0:
2233; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2234; X86-SSE-NEXT:    movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
2235; X86-SSE-NEXT:    retl # encoding: [0xc3]
2236;
2237; X86-AVX1-LABEL: test_mm_loadu_pd:
2238; X86-AVX1:       # %bb.0:
2239; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2240; X86-AVX1-NEXT:    vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
2241; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2242;
2243; X86-AVX512-LABEL: test_mm_loadu_pd:
2244; X86-AVX512:       # %bb.0:
2245; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2246; X86-AVX512-NEXT:    vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
2247; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2248;
2249; X64-SSE-LABEL: test_mm_loadu_pd:
2250; X64-SSE:       # %bb.0:
2251; X64-SSE-NEXT:    movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
2252; X64-SSE-NEXT:    retq # encoding: [0xc3]
2253;
2254; X64-AVX1-LABEL: test_mm_loadu_pd:
2255; X64-AVX1:       # %bb.0:
2256; X64-AVX1-NEXT:    vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
2257; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2258;
2259; X64-AVX512-LABEL: test_mm_loadu_pd:
2260; X64-AVX512:       # %bb.0:
2261; X64-AVX512-NEXT:    vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
2262; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2263  %arg0 = bitcast double* %a0 to <2 x double>*
2264  %res = load <2 x double>, <2 x double>* %arg0, align 1
2265  ret <2 x double> %res
2266}
2267
2268define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
2269; X86-SSE-LABEL: test_mm_loadu_si128:
2270; X86-SSE:       # %bb.0:
2271; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2272; X86-SSE-NEXT:    movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
2273; X86-SSE-NEXT:    retl # encoding: [0xc3]
2274;
2275; X86-AVX1-LABEL: test_mm_loadu_si128:
2276; X86-AVX1:       # %bb.0:
2277; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2278; X86-AVX1-NEXT:    vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
2279; X86-AVX1-NEXT:    retl # encoding: [0xc3]
2280;
2281; X86-AVX512-LABEL: test_mm_loadu_si128:
2282; X86-AVX512:       # %bb.0:
2283; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2284; X86-AVX512-NEXT:    vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
2285; X86-AVX512-NEXT:    retl # encoding: [0xc3]
2286;
2287; X64-SSE-LABEL: test_mm_loadu_si128:
2288; X64-SSE:       # %bb.0:
2289; X64-SSE-NEXT:    movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
2290; X64-SSE-NEXT:    retq # encoding: [0xc3]
2291;
2292; X64-AVX1-LABEL: test_mm_loadu_si128:
2293; X64-AVX1:       # %bb.0:
2294; X64-AVX1-NEXT:    vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
2295; X64-AVX1-NEXT:    retq # encoding: [0xc3]
2296;
2297; X64-AVX512-LABEL: test_mm_loadu_si128:
2298; X64-AVX512:       # %bb.0:
2299; X64-AVX512-NEXT:    vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
2300; X64-AVX512-NEXT:    retq # encoding: [0xc3]
2301  %res = load <2 x i64>, <2 x i64>* %a0, align 1
2302  ret <2 x i64> %res
2303}
2304
2305define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2306; SSE-LABEL: test_mm_madd_epi16:
2307; SSE:       # %bb.0:
2308; SSE-NEXT:    pmaddwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf5,0xc1]
2309; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2310;
2311; AVX1-LABEL: test_mm_madd_epi16:
2312; AVX1:       # %bb.0:
2313; AVX1-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf5,0xc1]
2314; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2315;
2316; AVX512-LABEL: test_mm_madd_epi16:
2317; AVX512:       # %bb.0:
2318; AVX512-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
2319; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2320  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2321  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2322  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
2323  %bc = bitcast <4 x i32> %res to <2 x i64>
2324  ret <2 x i64> %bc
2325}
2326declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
2327
2328define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
2329; X86-SSE-LABEL: test_mm_maskmoveu_si128:
2330; X86-SSE:       # %bb.0:
2331; X86-SSE-NEXT:    pushl %edi # encoding: [0x57]
2332; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08]
2333; X86-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1]
2334; X86-SSE-NEXT:    popl %edi # encoding: [0x5f]
2335; X86-SSE-NEXT:    retl # encoding: [0xc3]
2336;
2337; X86-AVX-LABEL: test_mm_maskmoveu_si128:
2338; X86-AVX:       # %bb.0:
2339; X86-AVX-NEXT:    pushl %edi # encoding: [0x57]
2340; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08]
2341; X86-AVX-NEXT:    vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1]
2342; X86-AVX-NEXT:    popl %edi # encoding: [0x5f]
2343; X86-AVX-NEXT:    retl # encoding: [0xc3]
2344;
2345; X64-SSE-LABEL: test_mm_maskmoveu_si128:
2346; X64-SSE:       # %bb.0:
2347; X64-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1]
2348; X64-SSE-NEXT:    retq # encoding: [0xc3]
2349;
2350; X64-AVX-LABEL: test_mm_maskmoveu_si128:
2351; X64-AVX:       # %bb.0:
2352; X64-AVX-NEXT:    vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1]
2353; X64-AVX-NEXT:    retq # encoding: [0xc3]
2354  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2355  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2356  call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
2357  ret void
2358}
2359declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
2360
2361define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2362; SSE-LABEL: test_mm_max_epi16:
2363; SSE:       # %bb.0:
2364; SSE-NEXT:    pmaxsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xee,0xc1]
2365; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2366;
2367; AVX1-LABEL: test_mm_max_epi16:
2368; AVX1:       # %bb.0:
2369; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xee,0xc1]
2370; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2371;
2372; AVX512-LABEL: test_mm_max_epi16:
2373; AVX512:       # %bb.0:
2374; AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
2375; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2376  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2377  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2378  %cmp = icmp sgt <8 x i16> %arg0, %arg1
2379  %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
2380  %bc = bitcast <8 x i16> %sel to <2 x i64>
2381  ret <2 x i64> %bc
2382}
2383
2384define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2385; SSE-LABEL: test_mm_max_epu8:
2386; SSE:       # %bb.0:
2387; SSE-NEXT:    pmaxub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xde,0xc1]
2388; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2389;
2390; AVX1-LABEL: test_mm_max_epu8:
2391; AVX1:       # %bb.0:
2392; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xde,0xc1]
2393; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2394;
2395; AVX512-LABEL: test_mm_max_epu8:
2396; AVX512:       # %bb.0:
2397; AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1]
2398; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2399  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2400  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2401  %cmp = icmp ugt <16 x i8> %arg0, %arg1
2402  %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
2403  %bc = bitcast <16 x i8> %sel to <2 x i64>
2404  ret <2 x i64> %bc
2405}
2406
2407define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
2408; SSE-LABEL: test_mm_max_pd:
2409; SSE:       # %bb.0:
2410; SSE-NEXT:    maxpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5f,0xc1]
2411; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2412;
2413; AVX1-LABEL: test_mm_max_pd:
2414; AVX1:       # %bb.0:
2415; AVX1-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5f,0xc1]
2416; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2417;
2418; AVX512-LABEL: test_mm_max_pd:
2419; AVX512:       # %bb.0:
2420; AVX512-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1]
2421; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2422  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
2423  ret <2 x double> %res
2424}
2425declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
2426
2427define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2428; SSE-LABEL: test_mm_max_sd:
2429; SSE:       # %bb.0:
2430; SSE-NEXT:    maxsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5f,0xc1]
2431; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2432;
2433; AVX1-LABEL: test_mm_max_sd:
2434; AVX1:       # %bb.0:
2435; AVX1-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5f,0xc1]
2436; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2437;
2438; AVX512-LABEL: test_mm_max_sd:
2439; AVX512:       # %bb.0:
2440; AVX512-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1]
2441; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2442  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
2443  ret <2 x double> %res
2444}
2445declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
2446
2447define void @test_mm_mfence() nounwind {
2448; CHECK-LABEL: test_mm_mfence:
2449; CHECK:       # %bb.0:
2450; CHECK-NEXT:    mfence # encoding: [0x0f,0xae,0xf0]
2451; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2452  call void @llvm.x86.sse2.mfence()
2453  ret void
2454}
2455declare void @llvm.x86.sse2.mfence() nounwind readnone
2456
2457define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2458; SSE-LABEL: test_mm_min_epi16:
2459; SSE:       # %bb.0:
2460; SSE-NEXT:    pminsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xea,0xc1]
2461; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2462;
2463; AVX1-LABEL: test_mm_min_epi16:
2464; AVX1:       # %bb.0:
2465; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xea,0xc1]
2466; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2467;
2468; AVX512-LABEL: test_mm_min_epi16:
2469; AVX512:       # %bb.0:
2470; AVX512-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
2471; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2472  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2473  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2474  %cmp = icmp slt <8 x i16> %arg0, %arg1
2475  %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
2476  %bc = bitcast <8 x i16> %sel to <2 x i64>
2477  ret <2 x i64> %bc
2478}
2479
2480define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2481; SSE-LABEL: test_mm_min_epu8:
2482; SSE:       # %bb.0:
2483; SSE-NEXT:    pminub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xda,0xc1]
2484; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2485;
2486; AVX1-LABEL: test_mm_min_epu8:
2487; AVX1:       # %bb.0:
2488; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xda,0xc1]
2489; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2490;
2491; AVX512-LABEL: test_mm_min_epu8:
2492; AVX512:       # %bb.0:
2493; AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1]
2494; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2495  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2496  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2497  %cmp = icmp ult <16 x i8> %arg0, %arg1
2498  %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
2499  %bc = bitcast <16 x i8> %sel to <2 x i64>
2500  ret <2 x i64> %bc
2501}
2502
2503define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
2504; SSE-LABEL: test_mm_min_pd:
2505; SSE:       # %bb.0:
2506; SSE-NEXT:    minpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5d,0xc1]
2507; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2508;
2509; AVX1-LABEL: test_mm_min_pd:
2510; AVX1:       # %bb.0:
2511; AVX1-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5d,0xc1]
2512; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2513;
2514; AVX512-LABEL: test_mm_min_pd:
2515; AVX512:       # %bb.0:
2516; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1]
2517; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2518  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
2519  ret <2 x double> %res
2520}
2521declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
2522
2523define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2524; SSE-LABEL: test_mm_min_sd:
2525; SSE:       # %bb.0:
2526; SSE-NEXT:    minsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5d,0xc1]
2527; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2528;
2529; AVX1-LABEL: test_mm_min_sd:
2530; AVX1:       # %bb.0:
2531; AVX1-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5d,0xc1]
2532; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2533;
2534; AVX512-LABEL: test_mm_min_sd:
2535; AVX512:       # %bb.0:
2536; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1]
2537; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2538  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
2539  ret <2 x double> %res
2540}
2541declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
2542
2543define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
2544; SSE-LABEL: test_mm_move_epi64:
2545; SSE:       # %bb.0:
2546; SSE-NEXT:    movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
2547; SSE-NEXT:    # xmm0 = xmm0[0],zero
2548; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2549;
2550; AVX1-LABEL: test_mm_move_epi64:
2551; AVX1:       # %bb.0:
2552; AVX1-NEXT:    vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
2553; AVX1-NEXT:    # xmm0 = xmm0[0],zero
2554; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2555;
2556; AVX512-LABEL: test_mm_move_epi64:
2557; AVX512:       # %bb.0:
2558; AVX512-NEXT:    vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
2559; AVX512-NEXT:    # xmm0 = xmm0[0],zero
2560; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2561  %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
2562  ret <2 x i64> %res
2563}
2564
2565define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2566; SSE-LABEL: test_mm_move_sd:
2567; SSE:       # %bb.0:
2568; SSE-NEXT:    movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
2569; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1]
2570; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2571;
2572; AVX-LABEL: test_mm_move_sd:
2573; AVX:       # %bb.0:
2574; AVX-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
2575; AVX-NEXT:    # xmm0 = xmm1[0,1],xmm0[2,3]
2576; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2577  %ext0 = extractelement <2 x double> %a1, i32 0
2578  %res0 = insertelement <2 x double> undef, double %ext0, i32 0
2579  %ext1 = extractelement <2 x double> %a0, i32 1
2580  %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
2581  ret <2 x double> %res1
2582}
2583
2584define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
2585; SSE-LABEL: test_mm_movemask_epi8:
2586; SSE:       # %bb.0:
2587; SSE-NEXT:    pmovmskb %xmm0, %eax # encoding: [0x66,0x0f,0xd7,0xc0]
2588; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2589;
2590; AVX-LABEL: test_mm_movemask_epi8:
2591; AVX:       # %bb.0:
2592; AVX-NEXT:    vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0]
2593; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2594  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2595  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
2596  ret i32 %res
2597}
2598declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
2599
2600define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
2601; SSE-LABEL: test_mm_movemask_pd:
2602; SSE:       # %bb.0:
2603; SSE-NEXT:    movmskpd %xmm0, %eax # encoding: [0x66,0x0f,0x50,0xc0]
2604; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2605;
2606; AVX-LABEL: test_mm_movemask_pd:
2607; AVX:       # %bb.0:
2608; AVX-NEXT:    vmovmskpd %xmm0, %eax # encoding: [0xc5,0xf9,0x50,0xc0]
2609; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2610  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
2611  ret i32 %res
2612}
2613declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
2614
2615define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2616; X86-SSE-LABEL: test_mm_mul_epu32:
2617; X86-SSE:       # %bb.0:
2618; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
2619; X86-SSE-NEXT:    # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
2620; X86-SSE-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
2621; X86-SSE-NEXT:    pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2]
2622; X86-SSE-NEXT:    pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca]
2623; X86-SSE-NEXT:    pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
2624; X86-SSE-NEXT:    retl # encoding: [0xc3]
2625;
2626; AVX1-LABEL: test_mm_mul_epu32:
2627; AVX1:       # %bb.0:
2628; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
2629; AVX1-NEXT:    vpblendw $204, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc2,0xcc]
2630; AVX1-NEXT:    # xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
2631; AVX1-NEXT:    vpblendw $204, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x0e,0xca,0xcc]
2632; AVX1-NEXT:    # xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2633; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf4,0xc1]
2634; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2635;
2636; AVX512-LABEL: test_mm_mul_epu32:
2637; AVX512:       # %bb.0:
2638; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
2639; AVX512-NEXT:    vpblendd $10, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc2,0x0a]
2640; AVX512-NEXT:    # xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
2641; AVX512-NEXT:    vpblendd $10, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x02,0xca,0x0a]
2642; AVX512-NEXT:    # xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2643; AVX512-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
2644; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2645;
2646; X64-SSE-LABEL: test_mm_mul_epu32:
2647; X64-SSE:       # %bb.0:
2648; X64-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
2649; X64-SSE-NEXT:    # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
2650; X64-SSE-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
2651; X64-SSE-NEXT:    pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2]
2652; X64-SSE-NEXT:    pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca]
2653; X64-SSE-NEXT:    pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
2654; X64-SSE-NEXT:    retq # encoding: [0xc3]
2655  %A = and <2 x i64> %a0, <i64 4294967295, i64 4294967295>
2656  %B = and <2 x i64> %a1, <i64 4294967295, i64 4294967295>
2657  %res = mul nuw <2 x i64> %A, %B
2658  ret <2 x i64> %res
2659}
2660
2661define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
2662; SSE-LABEL: test_mm_mul_pd:
2663; SSE:       # %bb.0:
2664; SSE-NEXT:    mulpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x59,0xc1]
2665; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2666;
2667; AVX1-LABEL: test_mm_mul_pd:
2668; AVX1:       # %bb.0:
2669; AVX1-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x59,0xc1]
2670; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2671;
2672; AVX512-LABEL: test_mm_mul_pd:
2673; AVX512:       # %bb.0:
2674; AVX512-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x59,0xc1]
2675; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2676  %res = fmul <2 x double> %a0, %a1
2677  ret <2 x double> %res
2678}
2679
2680define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2681; SSE-LABEL: test_mm_mul_sd:
2682; SSE:       # %bb.0:
2683; SSE-NEXT:    mulsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x59,0xc1]
2684; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2685;
2686; AVX1-LABEL: test_mm_mul_sd:
2687; AVX1:       # %bb.0:
2688; AVX1-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x59,0xc1]
2689; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2690;
2691; AVX512-LABEL: test_mm_mul_sd:
2692; AVX512:       # %bb.0:
2693; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1]
2694; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2695  %ext0 = extractelement <2 x double> %a0, i32 0
2696  %ext1 = extractelement <2 x double> %a1, i32 0
2697  %fmul = fmul double %ext0, %ext1
2698  %res = insertelement <2 x double> %a0, double %fmul, i32 0
2699  ret <2 x double> %res
2700}
2701
2702define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2703; SSE-LABEL: test_mm_mulhi_epi16:
2704; SSE:       # %bb.0:
2705; SSE-NEXT:    pmulhw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe5,0xc1]
2706; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2707;
2708; AVX1-LABEL: test_mm_mulhi_epi16:
2709; AVX1:       # %bb.0:
2710; AVX1-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe5,0xc1]
2711; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2712;
2713; AVX512-LABEL: test_mm_mulhi_epi16:
2714; AVX512:       # %bb.0:
2715; AVX512-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1]
2716; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2717  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2718  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2719  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
2720  %bc = bitcast <8 x i16> %res to <2 x i64>
2721  ret <2 x i64> %bc
2722}
2723declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
2724
2725define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
2726; SSE-LABEL: test_mm_mulhi_epu16:
2727; SSE:       # %bb.0:
2728; SSE-NEXT:    pmulhuw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe4,0xc1]
2729; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2730;
2731; AVX1-LABEL: test_mm_mulhi_epu16:
2732; AVX1:       # %bb.0:
2733; AVX1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe4,0xc1]
2734; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2735;
2736; AVX512-LABEL: test_mm_mulhi_epu16:
2737; AVX512:       # %bb.0:
2738; AVX512-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
2739; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2740  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2741  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2742  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
2743  %bc = bitcast <8 x i16> %res to <2 x i64>
2744  ret <2 x i64> %bc
2745}
2746declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
2747
2748define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2749; SSE-LABEL: test_mm_mullo_epi16:
2750; SSE:       # %bb.0:
2751; SSE-NEXT:    pmullw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd5,0xc1]
2752; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2753;
2754; AVX1-LABEL: test_mm_mullo_epi16:
2755; AVX1:       # %bb.0:
2756; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd5,0xc1]
2757; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2758;
2759; AVX512-LABEL: test_mm_mullo_epi16:
2760; AVX512:       # %bb.0:
2761; AVX512-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1]
2762; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2763  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2764  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2765  %res = mul <8 x i16> %arg0, %arg1
2766  %bc = bitcast <8 x i16> %res to <2 x i64>
2767  ret <2 x i64> %bc
2768}
2769
2770define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
2771; SSE-LABEL: test_mm_or_pd:
2772; SSE:       # %bb.0:
2773; SSE-NEXT:    orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1]
2774; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2775;
2776; AVX1-LABEL: test_mm_or_pd:
2777; AVX1:       # %bb.0:
2778; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
2779; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2780;
2781; AVX512-LABEL: test_mm_or_pd:
2782; AVX512:       # %bb.0:
2783; AVX512-NEXT:    vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
2784; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2785  %arg0 = bitcast <2 x double> %a0 to <4 x i32>
2786  %arg1 = bitcast <2 x double> %a1 to <4 x i32>
2787  %res = or <4 x i32> %arg0, %arg1
2788  %bc = bitcast <4 x i32> %res to <2 x double>
2789  ret <2 x double> %bc
2790}
2791
2792define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2793; SSE-LABEL: test_mm_or_si128:
2794; SSE:       # %bb.0:
2795; SSE-NEXT:    orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1]
2796; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2797;
2798; AVX1-LABEL: test_mm_or_si128:
2799; AVX1:       # %bb.0:
2800; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
2801; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2802;
2803; AVX512-LABEL: test_mm_or_si128:
2804; AVX512:       # %bb.0:
2805; AVX512-NEXT:    vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
2806; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2807  %res = or <2 x i64> %a0, %a1
2808  ret <2 x i64> %res
2809}
2810
2811define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2812; SSE-LABEL: test_mm_packs_epi16:
2813; SSE:       # %bb.0:
2814; SSE-NEXT:    packsswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x63,0xc1]
2815; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2816;
2817; AVX1-LABEL: test_mm_packs_epi16:
2818; AVX1:       # %bb.0:
2819; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x63,0xc1]
2820; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2821;
2822; AVX512-LABEL: test_mm_packs_epi16:
2823; AVX512:       # %bb.0:
2824; AVX512-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
2825; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2826  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2827  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2828  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
2829  %bc = bitcast <16 x i8> %res to <2 x i64>
2830  ret <2 x i64> %bc
2831}
2832declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
2833
2834define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2835; SSE-LABEL: test_mm_packs_epi32:
2836; SSE:       # %bb.0:
2837; SSE-NEXT:    packssdw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6b,0xc1]
2838; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2839;
2840; AVX1-LABEL: test_mm_packs_epi32:
2841; AVX1:       # %bb.0:
2842; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x6b,0xc1]
2843; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2844;
2845; AVX512-LABEL: test_mm_packs_epi32:
2846; AVX512:       # %bb.0:
2847; AVX512-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
2848; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2849  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2850  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2851  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
2852  %bc = bitcast <8 x i16> %res to <2 x i64>
2853  ret <2 x i64> %bc
2854}
2855declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
2856
2857define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2858; SSE-LABEL: test_mm_packus_epi16:
2859; SSE:       # %bb.0:
2860; SSE-NEXT:    packuswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x67,0xc1]
2861; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2862;
2863; AVX1-LABEL: test_mm_packus_epi16:
2864; AVX1:       # %bb.0:
2865; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x67,0xc1]
2866; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2867;
2868; AVX512-LABEL: test_mm_packus_epi16:
2869; AVX512:       # %bb.0:
2870; AVX512-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
2871; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2872  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2873  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2874  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
2875  %bc = bitcast <16 x i8> %res to <2 x i64>
2876  ret <2 x i64> %bc
2877}
2878declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
2879
2880define void @test_mm_pause() nounwind {
2881; CHECK-LABEL: test_mm_pause:
2882; CHECK:       # %bb.0:
2883; CHECK-NEXT:    pause # encoding: [0xf3,0x90]
2884; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2885  call void @llvm.x86.sse2.pause()
2886  ret void
2887}
2888declare void @llvm.x86.sse2.pause() nounwind readnone
2889
2890define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2891; SSE-LABEL: test_mm_sad_epu8:
2892; SSE:       # %bb.0:
2893; SSE-NEXT:    psadbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf6,0xc1]
2894; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2895;
2896; AVX1-LABEL: test_mm_sad_epu8:
2897; AVX1:       # %bb.0:
2898; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf6,0xc1]
2899; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2900;
2901; AVX512-LABEL: test_mm_sad_epu8:
2902; AVX512:       # %bb.0:
2903; AVX512-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1]
2904; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2905  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2906  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2907  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
2908  ret <2 x i64> %res
2909}
2910declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
2911
2912define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2913; X86-SSE-LABEL: test_mm_set_epi8:
2914; X86-SSE:       # %bb.0:
2915; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2916; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2917; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2918; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
2919; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
2920; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2921; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
2922; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2923; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
2924; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
2925; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
2926; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2927; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
2928; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2929; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
2930; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2931; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
2932; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
2933; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
2934; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2935; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
2936; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2937; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
2938; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
2939; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
2940; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2941; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
2942; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
2943; X86-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
2944; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2945; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
2946; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2947; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
2948; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
2949; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
2950; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2951; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
2952; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2953; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
2954; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
2955; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
2956; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2957; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
2958; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2959; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
2960; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2961; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
2962; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
2963; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
2964; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2965; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
2966; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
2967; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
2968; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2969; X86-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
2970; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2971; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
2972; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2973; X86-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
2974; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
2975; X86-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
2976; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
2977; X86-SSE-NEXT:    retl # encoding: [0xc3]
2978;
2979; X86-AVX1-LABEL: test_mm_set_epi8:
2980; X86-AVX1:       # %bb.0:
2981; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
2982; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40]
2983; X86-AVX1-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
2984; X86-AVX1-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
2985; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
2986; X86-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
2987; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
2988; X86-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
2989; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
2990; X86-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
2991; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
2992; X86-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
2993; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
2994; X86-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
2995; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
2996; X86-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
2997; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
2998; X86-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
2999; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
3000; X86-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3001; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3002; X86-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
3003; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
3004; X86-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
3005; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3006; X86-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
3007; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
3008; X86-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
3009; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3010; X86-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
3011; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3012; X86-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
3013; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3014;
3015; X86-AVX512-LABEL: test_mm_set_epi8:
3016; X86-AVX512:       # %bb.0:
3017; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
3018; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40]
3019; X86-AVX512-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
3020; X86-AVX512-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
3021; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3022; X86-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3023; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
3024; X86-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3025; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3026; X86-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3027; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
3028; X86-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3029; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3030; X86-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3031; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
3032; X86-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3033; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3034; X86-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3035; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
3036; X86-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3037; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3038; X86-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
3039; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
3040; X86-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
3041; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3042; X86-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
3043; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
3044; X86-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
3045; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3046; X86-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
3047; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3048; X86-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
3049; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3050;
3051; X64-SSE-LABEL: test_mm_set_epi8:
3052; X64-SSE:       # %bb.0:
3053; X64-SSE-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3054; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3055; X64-SSE-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
3056; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3057; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3058; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3059; X64-SSE-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
3060; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3061; X64-SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3062; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3063; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3064; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3065; X64-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
3066; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3067; X64-SSE-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
3068; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3069; X64-SSE-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
3070; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3071; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3072; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3073; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3074; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3075; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3076; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3077; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3078; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3079; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
3080; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3081; X64-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
3082; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3083; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3084; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3085; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3086; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3087; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3088; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3089; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3090; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3091; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3092; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3093; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3094; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3095; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
3096; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
3097; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3098; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3099; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3100; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3101; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3102; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3103; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
3104; X64-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
3105; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
3106; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3107; X64-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
3108; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
3109; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
3110; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
3111; X64-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
3112; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3113; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3114; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3115; X64-SSE-NEXT:    retq # encoding: [0xc3]
3116;
3117; X64-AVX1-LABEL: test_mm_set_epi8:
3118; X64-AVX1:       # %bb.0:
3119; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48]
3120; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
3121; X64-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3122; X64-AVX1-NEXT:    vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01]
3123; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3124; X64-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3125; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3126; X64-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3127; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3128; X64-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3129; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3130; X64-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3131; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3132; X64-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3133; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3134; X64-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3135; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3136; X64-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3137; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3138; X64-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3139; X64-AVX1-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
3140; X64-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
3141; X64-AVX1-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
3142; X64-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
3143; X64-AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3144; X64-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
3145; X64-AVX1-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
3146; X64-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
3147; X64-AVX1-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
3148; X64-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
3149; X64-AVX1-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3150; X64-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
3151; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3152;
3153; X64-AVX512-LABEL: test_mm_set_epi8:
3154; X64-AVX512:       # %bb.0:
3155; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48]
3156; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
3157; X64-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3158; X64-AVX512-NEXT:    vpinsrb $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01]
3159; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3160; X64-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3161; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3162; X64-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3163; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3164; X64-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3165; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3166; X64-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3167; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3168; X64-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3169; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3170; X64-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3171; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3172; X64-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3173; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3174; X64-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3175; X64-AVX512-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
3176; X64-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
3177; X64-AVX512-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
3178; X64-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
3179; X64-AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3180; X64-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
3181; X64-AVX512-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
3182; X64-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
3183; X64-AVX512-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
3184; X64-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
3185; X64-AVX512-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3186; X64-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
3187; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3188  %res0  = insertelement <16 x i8> undef,  i8 %a15, i32 0
3189  %res1  = insertelement <16 x i8> %res0,  i8 %a14, i32 1
3190  %res2  = insertelement <16 x i8> %res1,  i8 %a13, i32 2
3191  %res3  = insertelement <16 x i8> %res2,  i8 %a12, i32 3
3192  %res4  = insertelement <16 x i8> %res3,  i8 %a11, i32 4
3193  %res5  = insertelement <16 x i8> %res4,  i8 %a10, i32 5
3194  %res6  = insertelement <16 x i8> %res5,  i8 %a9 , i32 6
3195  %res7  = insertelement <16 x i8> %res6,  i8 %a8 , i32 7
3196  %res8  = insertelement <16 x i8> %res7,  i8 %a7 , i32 8
3197  %res9  = insertelement <16 x i8> %res8,  i8 %a6 , i32 9
3198  %res10 = insertelement <16 x i8> %res9,  i8 %a5 , i32 10
3199  %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
3200  %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
3201  %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
3202  %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
3203  %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
3204  %res = bitcast <16 x i8> %res15 to <2 x i64>
3205  ret <2 x i64> %res
3206}
3207
3208define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
3209; X86-SSE-LABEL: test_mm_set_epi16:
3210; X86-SSE:       # %bb.0:
3211; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3212; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3213; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
3214; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3215; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
3216; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3217; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3218; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
3219; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
3220; X86-SSE-NEXT:    movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8]
3221; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
3222; X86-SSE-NEXT:    movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0]
3223; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
3224; X86-SSE-NEXT:    movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8]
3225; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
3226; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3227; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
3228; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3229; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3]
3230; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
3231; X86-SSE-NEXT:    punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2]
3232; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
3233; X86-SSE-NEXT:    punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5]
3234; X86-SSE-NEXT:    # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
3235; X86-SSE-NEXT:    punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7]
3236; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
3237; X86-SSE-NEXT:    punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6]
3238; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
3239; X86-SSE-NEXT:    punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4]
3240; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0]
3241; X86-SSE-NEXT:    retl # encoding: [0xc3]
3242;
3243; X86-AVX1-LABEL: test_mm_set_epi16:
3244; X86-AVX1:       # %bb.0:
3245; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
3246; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3247; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
3248; X86-AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
3249; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
3250; X86-AVX1-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
3251; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
3252; X86-AVX1-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
3253; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3254; X86-AVX1-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
3255; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
3256; X86-AVX1-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
3257; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
3258; X86-AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
3259; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3260; X86-AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
3261; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3262;
3263; X86-AVX512-LABEL: test_mm_set_epi16:
3264; X86-AVX512:       # %bb.0:
3265; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
3266; X86-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3267; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
3268; X86-AVX512-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
3269; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
3270; X86-AVX512-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
3271; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
3272; X86-AVX512-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
3273; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3274; X86-AVX512-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
3275; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
3276; X86-AVX512-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
3277; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
3278; X86-AVX512-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
3279; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3280; X86-AVX512-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
3281; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3282;
3283; X64-SSE-LABEL: test_mm_set_epi16:
3284; X64-SSE:       # %bb.0:
3285; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10]
3286; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
3287; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
3288; X64-SSE-NEXT:    movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
3289; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
3290; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3291; X64-SSE-NEXT:    movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2]
3292; X64-SSE-NEXT:    movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1]
3293; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
3294; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
3295; X64-SSE-NEXT:    punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
3296; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3297; X64-SSE-NEXT:    movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0]
3298; X64-SSE-NEXT:    movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9]
3299; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
3300; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3301; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3302; X64-SSE-NEXT:    movd %r10d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc2]
3303; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
3304; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
3305; X64-SSE-NEXT:    punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
3306; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3307; X64-SSE-NEXT:    punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
3308; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0]
3309; X64-SSE-NEXT:    retq # encoding: [0xc3]
3310;
3311; X64-AVX1-LABEL: test_mm_set_epi16:
3312; X64-AVX1:       # %bb.0:
3313; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3314; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
3315; X64-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3316; X64-AVX1-NEXT:    vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
3317; X64-AVX1-NEXT:    vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
3318; X64-AVX1-NEXT:    vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
3319; X64-AVX1-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
3320; X64-AVX1-NEXT:    vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
3321; X64-AVX1-NEXT:    vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
3322; X64-AVX1-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
3323; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3324;
3325; X64-AVX512-LABEL: test_mm_set_epi16:
3326; X64-AVX512:       # %bb.0:
3327; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3328; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
3329; X64-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3330; X64-AVX512-NEXT:    vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
3331; X64-AVX512-NEXT:    vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
3332; X64-AVX512-NEXT:    vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
3333; X64-AVX512-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
3334; X64-AVX512-NEXT:    vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
3335; X64-AVX512-NEXT:    vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
3336; X64-AVX512-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
3337; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3338  %res0  = insertelement <8 x i16> undef, i16 %a7, i32 0
3339  %res1  = insertelement <8 x i16> %res0, i16 %a6, i32 1
3340  %res2  = insertelement <8 x i16> %res1, i16 %a5, i32 2
3341  %res3  = insertelement <8 x i16> %res2, i16 %a4, i32 3
3342  %res4  = insertelement <8 x i16> %res3, i16 %a3, i32 4
3343  %res5  = insertelement <8 x i16> %res4, i16 %a2, i32 5
3344  %res6  = insertelement <8 x i16> %res5, i16 %a1, i32 6
3345  %res7  = insertelement <8 x i16> %res6, i16 %a0, i32 7
3346  %res = bitcast <8 x i16> %res7 to <2 x i64>
3347  ret <2 x i64> %res
3348}
3349
3350define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
3351; X86-SSE-LABEL: test_mm_set_epi32:
3352; X86-SSE:       # %bb.0:
3353; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
3354; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3355; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08]
3356; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
3357; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
3358; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3359; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x0c]
3360; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
3361; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
3362; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3363; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
3364; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3365; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
3366; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3367; X86-SSE-NEXT:    retl # encoding: [0xc3]
3368;
3369; X86-AVX1-LABEL: test_mm_set_epi32:
3370; X86-AVX1:       # %bb.0:
3371; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10]
3372; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
3373; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01]
3374; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02]
3375; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03]
3376; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3377;
3378; X86-AVX512-LABEL: test_mm_set_epi32:
3379; X86-AVX512:       # %bb.0:
3380; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10]
3381; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
3382; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01]
3383; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02]
3384; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03]
3385; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3386;
3387; X64-SSE-LABEL: test_mm_set_epi32:
3388; X64-SSE:       # %bb.0:
3389; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
3390; X64-SSE-NEXT:    movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
3391; X64-SSE-NEXT:    punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
3392; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3393; X64-SSE-NEXT:    movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2]
3394; X64-SSE-NEXT:    movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
3395; X64-SSE-NEXT:    punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
3396; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3397; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3398; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3399; X64-SSE-NEXT:    retq # encoding: [0xc3]
3400;
3401; X64-AVX1-LABEL: test_mm_set_epi32:
3402; X64-AVX1:       # %bb.0:
3403; X64-AVX1-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
3404; X64-AVX1-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
3405; X64-AVX1-NEXT:    vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
3406; X64-AVX1-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
3407; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3408;
3409; X64-AVX512-LABEL: test_mm_set_epi32:
3410; X64-AVX512:       # %bb.0:
3411; X64-AVX512-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
3412; X64-AVX512-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
3413; X64-AVX512-NEXT:    vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
3414; X64-AVX512-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
3415; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3416  %res0  = insertelement <4 x i32> undef, i32 %a3, i32 0
3417  %res1  = insertelement <4 x i32> %res0, i32 %a2, i32 1
3418  %res2  = insertelement <4 x i32> %res1, i32 %a1, i32 2
3419  %res3  = insertelement <4 x i32> %res2, i32 %a0, i32 3
3420  %res = bitcast <4 x i32> %res3 to <2 x i64>
3421  ret <2 x i64> %res
3422}
3423
3424; TODO test_mm_set_epi64
3425
3426define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
3427; X86-SSE-LABEL: test_mm_set_epi64x:
3428; X86-SSE:       # %bb.0:
3429; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04]
3430; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
3431; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08]
3432; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3433; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
3434; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3435; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x0c]
3436; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3437; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x10]
3438; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
3439; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
3440; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3441; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
3442; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3443; X86-SSE-NEXT:    retl # encoding: [0xc3]
3444;
3445; X86-AVX1-LABEL: test_mm_set_epi64x:
3446; X86-AVX1:       # %bb.0:
3447; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c]
3448; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
3449; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01]
3450; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02]
3451; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03]
3452; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3453;
3454; X86-AVX512-LABEL: test_mm_set_epi64x:
3455; X86-AVX512:       # %bb.0:
3456; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c]
3457; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
3458; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01]
3459; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02]
3460; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03]
3461; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3462;
3463; X64-SSE-LABEL: test_mm_set_epi64x:
3464; X64-SSE:       # %bb.0:
3465; X64-SSE-NEXT:    movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf]
3466; X64-SSE-NEXT:    movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6]
3467; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3468; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3469; X64-SSE-NEXT:    retq # encoding: [0xc3]
3470;
3471; X64-AVX1-LABEL: test_mm_set_epi64x:
3472; X64-AVX1:       # %bb.0:
3473; X64-AVX1-NEXT:    vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
3474; X64-AVX1-NEXT:    vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
3475; X64-AVX1-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
3476; X64-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
3477; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3478;
3479; X64-AVX512-LABEL: test_mm_set_epi64x:
3480; X64-AVX512:       # %bb.0:
3481; X64-AVX512-NEXT:    vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
3482; X64-AVX512-NEXT:    vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
3483; X64-AVX512-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
3484; X64-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
3485; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3486  %res0  = insertelement <2 x i64> undef, i64 %a1, i32 0
3487  %res1  = insertelement <2 x i64> %res0, i64 %a0, i32 1
3488  ret <2 x i64> %res1
3489}
3490
3491define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
3492; X86-SSE-LABEL: test_mm_set_pd:
3493; X86-SSE:       # %bb.0:
3494; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x0c]
3495; X86-SSE-NEXT:    # xmm0 = mem[0],zero
3496; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x04]
3497; X86-SSE-NEXT:    # xmm1 = mem[0],zero
3498; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
3499; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3500; X86-SSE-NEXT:    retl # encoding: [0xc3]
3501;
3502; X86-AVX1-LABEL: test_mm_set_pd:
3503; X86-AVX1:       # %bb.0:
3504; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
3505; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
3506; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
3507; X86-AVX1-NEXT:    # xmm1 = mem[0],zero
3508; X86-AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
3509; X86-AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3510; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3511;
3512; X86-AVX512-LABEL: test_mm_set_pd:
3513; X86-AVX512:       # %bb.0:
3514; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
3515; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
3516; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
3517; X86-AVX512-NEXT:    # xmm1 = mem[0],zero
3518; X86-AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
3519; X86-AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3520; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3521;
3522; X64-SSE-LABEL: test_mm_set_pd:
3523; X64-SSE:       # %bb.0:
3524; X64-SSE-NEXT:    movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8]
3525; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0]
3526; X64-SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
3527; X64-SSE-NEXT:    retq # encoding: [0xc3]
3528;
3529; X64-AVX1-LABEL: test_mm_set_pd:
3530; X64-AVX1:       # %bb.0:
3531; X64-AVX1-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0]
3532; X64-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
3533; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3534;
3535; X64-AVX512-LABEL: test_mm_set_pd:
3536; X64-AVX512:       # %bb.0:
3537; X64-AVX512-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0]
3538; X64-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
3539; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3540  %res0  = insertelement <2 x double> undef, double %a1, i32 0
3541  %res1  = insertelement <2 x double> %res0, double %a0, i32 1
3542  ret <2 x double> %res1
3543}
3544
3545define <2 x double> @test_mm_set_pd1(double %a0) nounwind {
3546; X86-SSE-LABEL: test_mm_set_pd1:
3547; X86-SSE:       # %bb.0:
3548; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
3549; X86-SSE-NEXT:    # xmm0 = mem[0],zero
3550; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
3551; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
3552; X86-SSE-NEXT:    retl # encoding: [0xc3]
3553;
3554; X86-AVX1-LABEL: test_mm_set_pd1:
3555; X86-AVX1:       # %bb.0:
3556; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
3557; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
3558; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
3559; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
3560; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3561;
3562; X86-AVX512-LABEL: test_mm_set_pd1:
3563; X86-AVX512:       # %bb.0:
3564; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
3565; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
3566; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
3567; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
3568; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3569;
3570; X64-SSE-LABEL: test_mm_set_pd1:
3571; X64-SSE:       # %bb.0:
3572; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
3573; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
3574; X64-SSE-NEXT:    retq # encoding: [0xc3]
3575;
3576; X64-AVX1-LABEL: test_mm_set_pd1:
3577; X64-AVX1:       # %bb.0:
3578; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
3579; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
3580; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3581;
3582; X64-AVX512-LABEL: test_mm_set_pd1:
3583; X64-AVX512:       # %bb.0:
3584; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
3585; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
3586; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3587  %res0  = insertelement <2 x double> undef, double %a0, i32 0
3588  %res1  = insertelement <2 x double> %res0, double %a0, i32 1
3589  ret <2 x double> %res1
3590}
3591
3592define <2 x double> @test_mm_set_sd(double %a0) nounwind {
3593; X86-SSE-LABEL: test_mm_set_sd:
3594; X86-SSE:       # %bb.0:
3595; X86-SSE-NEXT:    movq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x7e,0x44,0x24,0x04]
3596; X86-SSE-NEXT:    # xmm0 = mem[0],zero
3597; X86-SSE-NEXT:    movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
3598; X86-SSE-NEXT:    # xmm0 = xmm0[0],zero
3599; X86-SSE-NEXT:    retl # encoding: [0xc3]
3600;
3601; X86-AVX1-LABEL: test_mm_set_sd:
3602; X86-AVX1:       # %bb.0:
3603; X86-AVX1-NEXT:    vmovq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04]
3604; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
3605; X86-AVX1-NEXT:    vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
3606; X86-AVX1-NEXT:    # xmm0 = xmm0[0],zero
3607; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3608;
3609; X86-AVX512-LABEL: test_mm_set_sd:
3610; X86-AVX512:       # %bb.0:
3611; X86-AVX512-NEXT:    vmovq {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04]
3612; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
3613; X86-AVX512-NEXT:    vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
3614; X86-AVX512-NEXT:    # xmm0 = xmm0[0],zero
3615; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3616;
3617; X64-SSE-LABEL: test_mm_set_sd:
3618; X64-SSE:       # %bb.0:
3619; X64-SSE-NEXT:    movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
3620; X64-SSE-NEXT:    # xmm0 = xmm0[0],zero
3621; X64-SSE-NEXT:    retq # encoding: [0xc3]
3622;
3623; X64-AVX1-LABEL: test_mm_set_sd:
3624; X64-AVX1:       # %bb.0:
3625; X64-AVX1-NEXT:    vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
3626; X64-AVX1-NEXT:    # xmm0 = xmm0[0],zero
3627; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3628;
3629; X64-AVX512-LABEL: test_mm_set_sd:
3630; X64-AVX512:       # %bb.0:
3631; X64-AVX512-NEXT:    vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
3632; X64-AVX512-NEXT:    # xmm0 = xmm0[0],zero
3633; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3634  %res0  = insertelement <2 x double> undef, double %a0, i32 0
3635  %res1  = insertelement <2 x double> %res0, double 0.0, i32 1
3636  ret <2 x double> %res1
3637}
3638
3639define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
3640; X86-SSE-LABEL: test_mm_set1_epi8:
3641; X86-SSE:       # %bb.0:
3642; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3643; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3644; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0]
3645; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3646; X86-SSE-NEXT:    pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0]
3647; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
3648; X86-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3649; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3650; X86-SSE-NEXT:    retl # encoding: [0xc3]
3651;
3652; X86-AVX1-LABEL: test_mm_set1_epi8:
3653; X86-AVX1:       # %bb.0:
3654; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3655; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3656; X86-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
3657; X86-AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
3658; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3659;
3660; X86-AVX512-LABEL: test_mm_set1_epi8:
3661; X86-AVX512:       # %bb.0:
3662; X86-AVX512-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
3663; X86-AVX512-NEXT:    vpbroadcastb %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc0]
3664; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3665;
3666; X64-SSE-LABEL: test_mm_set1_epi8:
3667; X64-SSE:       # %bb.0:
3668; X64-SSE-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3669; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3670; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0]
3671; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3672; X64-SSE-NEXT:    pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0]
3673; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
3674; X64-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3675; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3676; X64-SSE-NEXT:    retq # encoding: [0xc3]
3677;
3678; X64-AVX1-LABEL: test_mm_set1_epi8:
3679; X64-AVX1:       # %bb.0:
3680; X64-AVX1-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3681; X64-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3682; X64-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
3683; X64-AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
3684; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3685;
3686; X64-AVX512-LABEL: test_mm_set1_epi8:
3687; X64-AVX512:       # %bb.0:
3688; X64-AVX512-NEXT:    vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7]
3689; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3690  %res0  = insertelement <16 x i8> undef,  i8 %a0, i32 0
3691  %res1  = insertelement <16 x i8> %res0,  i8 %a0, i32 1
3692  %res2  = insertelement <16 x i8> %res1,  i8 %a0, i32 2
3693  %res3  = insertelement <16 x i8> %res2,  i8 %a0, i32 3
3694  %res4  = insertelement <16 x i8> %res3,  i8 %a0, i32 4
3695  %res5  = insertelement <16 x i8> %res4,  i8 %a0, i32 5
3696  %res6  = insertelement <16 x i8> %res5,  i8 %a0, i32 6
3697  %res7  = insertelement <16 x i8> %res6,  i8 %a0, i32 7
3698  %res8  = insertelement <16 x i8> %res7,  i8 %a0, i32 8
3699  %res9  = insertelement <16 x i8> %res8,  i8 %a0, i32 9
3700  %res10 = insertelement <16 x i8> %res9,  i8 %a0, i32 10
3701  %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
3702  %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
3703  %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
3704  %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
3705  %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
3706  %res = bitcast <16 x i8> %res15 to <2 x i64>
3707  ret <2 x i64> %res
3708}
3709
3710define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
3711; X86-SSE-LABEL: test_mm_set1_epi16:
3712; X86-SSE:       # %bb.0:
3713; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3714; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3715; X86-SSE-NEXT:    pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0]
3716; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
3717; X86-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3718; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3719; X86-SSE-NEXT:    retl # encoding: [0xc3]
3720;
3721; X86-AVX1-LABEL: test_mm_set1_epi16:
3722; X86-AVX1:       # %bb.0:
3723; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3724; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3725; X86-AVX1-NEXT:    vpshuflw $224, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0xe0]
3726; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
3727; X86-AVX1-NEXT:    vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
3728; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
3729; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3730;
3731; X86-AVX512-LABEL: test_mm_set1_epi16:
3732; X86-AVX512:       # %bb.0:
3733; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3734; X86-AVX512-NEXT:    vpbroadcastw %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc0]
3735; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3736;
3737; X64-SSE-LABEL: test_mm_set1_epi16:
3738; X64-SSE:       # %bb.0:
3739; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
3740; X64-SSE-NEXT:    pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0]
3741; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
3742; X64-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3743; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3744; X64-SSE-NEXT:    retq # encoding: [0xc3]
3745;
3746; X64-AVX1-LABEL: test_mm_set1_epi16:
3747; X64-AVX1:       # %bb.0:
3748; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
3749; X64-AVX1-NEXT:    vpshuflw $224, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0xe0]
3750; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,2,3,4,5,6,7]
3751; X64-AVX1-NEXT:    vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
3752; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
3753; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3754;
3755; X64-AVX512-LABEL: test_mm_set1_epi16:
3756; X64-AVX512:       # %bb.0:
3757; X64-AVX512-NEXT:    vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7]
3758; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3759  %res0  = insertelement <8 x i16> undef, i16 %a0, i32 0
3760  %res1  = insertelement <8 x i16> %res0, i16 %a0, i32 1
3761  %res2  = insertelement <8 x i16> %res1, i16 %a0, i32 2
3762  %res3  = insertelement <8 x i16> %res2, i16 %a0, i32 3
3763  %res4  = insertelement <8 x i16> %res3, i16 %a0, i32 4
3764  %res5  = insertelement <8 x i16> %res4, i16 %a0, i32 5
3765  %res6  = insertelement <8 x i16> %res5, i16 %a0, i32 6
3766  %res7  = insertelement <8 x i16> %res6, i16 %a0, i32 7
3767  %res = bitcast <8 x i16> %res7 to <2 x i64>
3768  ret <2 x i64> %res
3769}
3770
3771define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
3772; X86-SSE-LABEL: test_mm_set1_epi32:
3773; X86-SSE:       # %bb.0:
3774; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04]
3775; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3776; X86-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3777; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3778; X86-SSE-NEXT:    retl # encoding: [0xc3]
3779;
3780; X86-AVX1-LABEL: test_mm_set1_epi32:
3781; X86-AVX1:       # %bb.0:
3782; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
3783; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
3784; X86-AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
3785; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
3786; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3787;
3788; X86-AVX512-LABEL: test_mm_set1_epi32:
3789; X86-AVX512:       # %bb.0:
3790; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3791; X86-AVX512-NEXT:    vpbroadcastd %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc0]
3792; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3793;
3794; X64-SSE-LABEL: test_mm_set1_epi32:
3795; X64-SSE:       # %bb.0:
3796; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
3797; X64-SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
3798; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
3799; X64-SSE-NEXT:    retq # encoding: [0xc3]
3800;
3801; X64-AVX1-LABEL: test_mm_set1_epi32:
3802; X64-AVX1:       # %bb.0:
3803; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
3804; X64-AVX1-NEXT:    vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
3805; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
3806; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3807;
3808; X64-AVX512-LABEL: test_mm_set1_epi32:
3809; X64-AVX512:       # %bb.0:
3810; X64-AVX512-NEXT:    vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7]
3811; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3812  %res0  = insertelement <4 x i32> undef, i32 %a0, i32 0
3813  %res1  = insertelement <4 x i32> %res0, i32 %a0, i32 1
3814  %res2  = insertelement <4 x i32> %res1, i32 %a0, i32 2
3815  %res3  = insertelement <4 x i32> %res2, i32 %a0, i32 3
3816  %res = bitcast <4 x i32> %res3 to <2 x i64>
3817  ret <2 x i64> %res
3818}
3819
3820; TODO test_mm_set1_epi64
3821
3822define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
3823; X86-SSE-LABEL: test_mm_set1_epi64x:
3824; X86-SSE:       # %bb.0:
3825; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04]
3826; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
3827; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %xmm1 # encoding: [0x66,0x0f,0x6e,0x4c,0x24,0x08]
3828; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
3829; X86-SSE-NEXT:    punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
3830; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3831; X86-SSE-NEXT:    pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44]
3832; X86-SSE-NEXT:    # xmm0 = xmm0[0,1,0,1]
3833; X86-SSE-NEXT:    retl # encoding: [0xc3]
3834;
3835; X86-AVX1-LABEL: test_mm_set1_epi64x:
3836; X86-AVX1:       # %bb.0:
3837; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
3838; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
3839; X86-AVX1-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
3840; X86-AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
3841; X86-AVX1-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
3842; X86-AVX1-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
3843; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3844;
3845; X86-AVX512-LABEL: test_mm_set1_epi64x:
3846; X86-AVX512:       # %bb.0:
3847; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
3848; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
3849; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
3850; X86-AVX512-NEXT:    vpbroadcastq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xc0]
3851; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3852;
3853; X64-SSE-LABEL: test_mm_set1_epi64x:
3854; X64-SSE:       # %bb.0:
3855; X64-SSE-NEXT:    movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
3856; X64-SSE-NEXT:    pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44]
3857; X64-SSE-NEXT:    # xmm0 = xmm0[0,1,0,1]
3858; X64-SSE-NEXT:    retq # encoding: [0xc3]
3859;
3860; X64-AVX1-LABEL: test_mm_set1_epi64x:
3861; X64-AVX1:       # %bb.0:
3862; X64-AVX1-NEXT:    vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
3863; X64-AVX1-NEXT:    vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44]
3864; X64-AVX1-NEXT:    # xmm0 = xmm0[0,1,0,1]
3865; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3866;
3867; X64-AVX512-LABEL: test_mm_set1_epi64x:
3868; X64-AVX512:       # %bb.0:
3869; X64-AVX512-NEXT:    vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7]
3870; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3871  %res0  = insertelement <2 x i64> undef, i64 %a0, i32 0
3872  %res1  = insertelement <2 x i64> %res0, i64 %a0, i32 1
3873  ret <2 x i64> %res1
3874}
3875
3876define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
3877; X86-SSE-LABEL: test_mm_set1_pd:
3878; X86-SSE:       # %bb.0:
3879; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
3880; X86-SSE-NEXT:    # xmm0 = mem[0],zero
3881; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
3882; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
3883; X86-SSE-NEXT:    retl # encoding: [0xc3]
3884;
3885; X86-AVX1-LABEL: test_mm_set1_pd:
3886; X86-AVX1:       # %bb.0:
3887; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
3888; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
3889; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
3890; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
3891; X86-AVX1-NEXT:    retl # encoding: [0xc3]
3892;
3893; X86-AVX512-LABEL: test_mm_set1_pd:
3894; X86-AVX512:       # %bb.0:
3895; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
3896; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
3897; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
3898; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
3899; X86-AVX512-NEXT:    retl # encoding: [0xc3]
3900;
3901; X64-SSE-LABEL: test_mm_set1_pd:
3902; X64-SSE:       # %bb.0:
3903; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
3904; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
3905; X64-SSE-NEXT:    retq # encoding: [0xc3]
3906;
3907; X64-AVX1-LABEL: test_mm_set1_pd:
3908; X64-AVX1:       # %bb.0:
3909; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
3910; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
3911; X64-AVX1-NEXT:    retq # encoding: [0xc3]
3912;
3913; X64-AVX512-LABEL: test_mm_set1_pd:
3914; X64-AVX512:       # %bb.0:
3915; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
3916; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
3917; X64-AVX512-NEXT:    retq # encoding: [0xc3]
3918  %res0  = insertelement <2 x double> undef, double %a0, i32 0
3919  %res1  = insertelement <2 x double> %res0, double %a0, i32 1
3920  ret <2 x double> %res1
3921}
3922
3923define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
3924; X86-SSE-LABEL: test_mm_setr_epi8:
3925; X86-SSE:       # %bb.0:
3926; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3927; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3928; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
3929; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3930; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3931; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3932; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3933; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3934; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
3935; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3936; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3937; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3938; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
3939; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3940; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3941; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3942; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
3943; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3944; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3945; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3946; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3947; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3948; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
3949; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3950; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3951; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3952; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
3953; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3954; X86-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
3955; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3956; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3957; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3958; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
3959; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3960; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3961; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3962; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3963; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3964; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
3965; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3966; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3967; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3968; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
3969; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
3970; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3971; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3972; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
3973; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3974; X86-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3975; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3976; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3977; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
3978; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3979; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3980; X86-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
3981; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
3982; X86-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
3983; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
3984; X86-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
3985; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3986; X86-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3987; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
3988; X86-SSE-NEXT:    retl # encoding: [0xc3]
3989;
3990; X86-AVX1-LABEL: test_mm_setr_epi8:
3991; X86-AVX1:       # %bb.0:
3992; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3993; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04]
3994; X86-AVX1-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
3995; X86-AVX1-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
3996; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
3997; X86-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3998; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3999; X86-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
4000; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
4001; X86-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
4002; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4003; X86-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
4004; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
4005; X86-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4006; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4007; X86-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4008; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
4009; X86-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4010; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4011; X86-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4012; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
4013; X86-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4014; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4015; X86-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4016; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
4017; X86-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4018; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4019; X86-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4020; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
4021; X86-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4022; X86-AVX1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4023; X86-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4024; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4025;
4026; X86-AVX512-LABEL: test_mm_setr_epi8:
4027; X86-AVX512:       # %bb.0:
4028; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4029; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04]
4030; X86-AVX512-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
4031; X86-AVX512-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
4032; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
4033; X86-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
4034; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4035; X86-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
4036; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
4037; X86-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
4038; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4039; X86-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
4040; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
4041; X86-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4042; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4043; X86-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4044; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
4045; X86-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4046; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4047; X86-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4048; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
4049; X86-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4050; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4051; X86-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4052; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
4053; X86-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4054; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4055; X86-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4056; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
4057; X86-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4058; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4059; X86-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4060; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4061;
4062; X64-SSE-LABEL: test_mm_setr_epi8:
4063; X64-SSE:       # %bb.0:
4064; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
4065; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4066; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
4067; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4068; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
4069; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4070; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4071; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4072; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4073; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4074; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4075; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4076; X64-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
4077; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
4078; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4079; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4080; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4081; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4082; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
4083; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
4084; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4085; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4086; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4087; X64-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4088; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
4089; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4090; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
4091; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
4092; X64-SSE-NEXT:    punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
4093; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
4094; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4095; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4096; X64-SSE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4097; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4098; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4099; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4100; X64-SSE-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
4101; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4102; X64-SSE-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
4103; X64-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4104; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
4105; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
4106; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
4107; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
4108; X64-SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
4109; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4110; X64-SSE-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
4111; X64-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4112; X64-SSE-NEXT:    punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4113; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4114; X64-SSE-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
4115; X64-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
4116; X64-SSE-NEXT:    movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
4117; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4118; X64-SSE-NEXT:    punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
4119; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
4120; X64-SSE-NEXT:    punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
4121; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4122; X64-SSE-NEXT:    punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
4123; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
4124; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4125; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4126; X64-SSE-NEXT:    retq # encoding: [0xc3]
4127;
4128; X64-AVX1-LABEL: test_mm_setr_epi8:
4129; X64-AVX1:       # %bb.0:
4130; X64-AVX1-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
4131; X64-AVX1-NEXT:    movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
4132; X64-AVX1-NEXT:    vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6]
4133; X64-AVX1-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
4134; X64-AVX1-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
4135; X64-AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
4136; X64-AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
4137; X64-AVX1-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
4138; X64-AVX1-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
4139; X64-AVX1-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
4140; X64-AVX1-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
4141; X64-AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
4142; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4143; X64-AVX1-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4144; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4145; X64-AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4146; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4147; X64-AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4148; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4149; X64-AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4150; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4151; X64-AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4152; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4153; X64-AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4154; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4155; X64-AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4156; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4157; X64-AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4158; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
4159; X64-AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4160; X64-AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
4161; X64-AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4162; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4163;
4164; X64-AVX512-LABEL: test_mm_setr_epi8:
4165; X64-AVX512:       # %bb.0:
4166; X64-AVX512-NEXT:    movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
4167; X64-AVX512-NEXT:    movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
4168; X64-AVX512-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
4169; X64-AVX512-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
4170; X64-AVX512-NEXT:    movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
4171; X64-AVX512-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
4172; X64-AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
4173; X64-AVX512-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
4174; X64-AVX512-NEXT:    movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
4175; X64-AVX512-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
4176; X64-AVX512-NEXT:    movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
4177; X64-AVX512-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
4178; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4179; X64-AVX512-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4180; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4181; X64-AVX512-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4182; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4183; X64-AVX512-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4184; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4185; X64-AVX512-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4186; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4187; X64-AVX512-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4188; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4189; X64-AVX512-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4190; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4191; X64-AVX512-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4192; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4193; X64-AVX512-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4194; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
4195; X64-AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4196; X64-AVX512-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
4197; X64-AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4198; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4199  %res0  = insertelement <16 x i8> undef,  i8 %a0 , i32 0
4200  %res1  = insertelement <16 x i8> %res0,  i8 %a1 , i32 1
4201  %res2  = insertelement <16 x i8> %res1,  i8 %a2 , i32 2
4202  %res3  = insertelement <16 x i8> %res2,  i8 %a3 , i32 3
4203  %res4  = insertelement <16 x i8> %res3,  i8 %a4 , i32 4
4204  %res5  = insertelement <16 x i8> %res4,  i8 %a5 , i32 5
4205  %res6  = insertelement <16 x i8> %res5,  i8 %a6 , i32 6
4206  %res7  = insertelement <16 x i8> %res6,  i8 %a7 , i32 7
4207  %res8  = insertelement <16 x i8> %res7,  i8 %a8 , i32 8
4208  %res9  = insertelement <16 x i8> %res8,  i8 %a9 , i32 9
4209  %res10 = insertelement <16 x i8> %res9,  i8 %a10, i32 10
4210  %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
4211  %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
4212  %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
4213  %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
4214  %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
4215  %res = bitcast <16 x i8> %res15 to <2 x i64>
4216  ret <2 x i64> %res
4217}
4218
4219define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
4220; X86-SSE-LABEL: test_mm_setr_epi16:
4221; X86-SSE:       # %bb.0:
4222; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
4223; X86-SSE-NEXT:    movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4224; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
4225; X86-SSE-NEXT:    movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4226; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
4227; X86-SSE-NEXT:    movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4228; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
4229; X86-SSE-NEXT:    movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
4230; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
4231; X86-SSE-NEXT:    movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8]
4232; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
4233; X86-SSE-NEXT:    movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0]
4234; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
4235; X86-SSE-NEXT:    movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8]
4236; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
4237; X86-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4238; X86-SSE-NEXT:    punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
4239; X86-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
4240; X86-SSE-NEXT:    punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3]
4241; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
4242; X86-SSE-NEXT:    punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2]
4243; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
4244; X86-SSE-NEXT:    punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5]
4245; X86-SSE-NEXT:    # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
4246; X86-SSE-NEXT:    punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7]
4247; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
4248; X86-SSE-NEXT:    punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6]
4249; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
4250; X86-SSE-NEXT:    punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4]
4251; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm4[0]
4252; X86-SSE-NEXT:    retl # encoding: [0xc3]
4253;
4254; X86-AVX1-LABEL: test_mm_setr_epi16:
4255; X86-AVX1:       # %bb.0:
4256; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
4257; X86-AVX1-NEXT:    vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
4258; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
4259; X86-AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
4260; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
4261; X86-AVX1-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
4262; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
4263; X86-AVX1-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
4264; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
4265; X86-AVX1-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
4266; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
4267; X86-AVX1-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
4268; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
4269; X86-AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
4270; X86-AVX1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
4271; X86-AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
4272; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4273;
4274; X86-AVX512-LABEL: test_mm_setr_epi16:
4275; X86-AVX512:       # %bb.0:
4276; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
4277; X86-AVX512-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
4278; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
4279; X86-AVX512-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
4280; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
4281; X86-AVX512-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
4282; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
4283; X86-AVX512-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
4284; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
4285; X86-AVX512-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
4286; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
4287; X86-AVX512-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
4288; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
4289; X86-AVX512-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
4290; X86-AVX512-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
4291; X86-AVX512-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
4292; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4293;
4294; X64-SSE-LABEL: test_mm_setr_epi16:
4295; X64-SSE:       # %bb.0:
4296; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
4297; X64-SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
4298; X64-SSE-NEXT:    movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4299; X64-SSE-NEXT:    movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca]
4300; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
4301; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
4302; X64-SSE-NEXT:    movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1]
4303; X64-SSE-NEXT:    movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0]
4304; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
4305; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
4306; X64-SSE-NEXT:    punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
4307; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4308; X64-SSE-NEXT:    movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
4309; X64-SSE-NEXT:    movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
4310; X64-SSE-NEXT:    punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
4311; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
4312; X64-SSE-NEXT:    movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde]
4313; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
4314; X64-SSE-NEXT:    punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
4315; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
4316; X64-SSE-NEXT:    punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
4317; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4318; X64-SSE-NEXT:    punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
4319; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0]
4320; X64-SSE-NEXT:    retq # encoding: [0xc3]
4321;
4322; X64-AVX1-LABEL: test_mm_setr_epi16:
4323; X64-AVX1:       # %bb.0:
4324; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10]
4325; X64-AVX1-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
4326; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
4327; X64-AVX1-NEXT:    vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
4328; X64-AVX1-NEXT:    vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
4329; X64-AVX1-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
4330; X64-AVX1-NEXT:    vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
4331; X64-AVX1-NEXT:    vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
4332; X64-AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
4333; X64-AVX1-NEXT:    vpinsrw $7, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07]
4334; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4335;
4336; X64-AVX512-LABEL: test_mm_setr_epi16:
4337; X64-AVX512:       # %bb.0:
4338; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10]
4339; X64-AVX512-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
4340; X64-AVX512-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
4341; X64-AVX512-NEXT:    vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
4342; X64-AVX512-NEXT:    vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
4343; X64-AVX512-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
4344; X64-AVX512-NEXT:    vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
4345; X64-AVX512-NEXT:    vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
4346; X64-AVX512-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
4347; X64-AVX512-NEXT:    vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07]
4348; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4349  %res0  = insertelement <8 x i16> undef, i16 %a0, i32 0
4350  %res1  = insertelement <8 x i16> %res0, i16 %a1, i32 1
4351  %res2  = insertelement <8 x i16> %res1, i16 %a2, i32 2
4352  %res3  = insertelement <8 x i16> %res2, i16 %a3, i32 3
4353  %res4  = insertelement <8 x i16> %res3, i16 %a4, i32 4
4354  %res5  = insertelement <8 x i16> %res4, i16 %a5, i32 5
4355  %res6  = insertelement <8 x i16> %res5, i16 %a6, i32 6
4356  %res7  = insertelement <8 x i16> %res6, i16 %a7, i32 7
4357  %res = bitcast <8 x i16> %res7 to <2 x i64>
4358  ret <2 x i64> %res
4359}
4360
4361define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
4362; X86-SSE-LABEL: test_mm_setr_epi32:
4363; X86-SSE:       # %bb.0:
4364; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
4365; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
4366; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
4367; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
4368; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
4369; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
4370; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08]
4371; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
4372; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
4373; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
4374; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
4375; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4376; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
4377; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4378; X86-SSE-NEXT:    retl # encoding: [0xc3]
4379;
4380; X86-AVX1-LABEL: test_mm_setr_epi32:
4381; X86-AVX1:       # %bb.0:
4382; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
4383; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
4384; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
4385; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
4386; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
4387; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4388;
4389; X86-AVX512-LABEL: test_mm_setr_epi32:
4390; X86-AVX512:       # %bb.0:
4391; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
4392; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
4393; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
4394; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
4395; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
4396; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4397;
4398; X64-SSE-LABEL: test_mm_setr_epi32:
4399; X64-SSE:       # %bb.0:
4400; X64-SSE-NEXT:    movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
4401; X64-SSE-NEXT:    movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
4402; X64-SSE-NEXT:    punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
4403; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
4404; X64-SSE-NEXT:    movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6]
4405; X64-SSE-NEXT:    movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
4406; X64-SSE-NEXT:    punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
4407; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4408; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4409; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4410; X64-SSE-NEXT:    retq # encoding: [0xc3]
4411;
4412; X64-AVX1-LABEL: test_mm_setr_epi32:
4413; X64-AVX1:       # %bb.0:
4414; X64-AVX1-NEXT:    vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
4415; X64-AVX1-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
4416; X64-AVX1-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
4417; X64-AVX1-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
4418; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4419;
4420; X64-AVX512-LABEL: test_mm_setr_epi32:
4421; X64-AVX512:       # %bb.0:
4422; X64-AVX512-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
4423; X64-AVX512-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
4424; X64-AVX512-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
4425; X64-AVX512-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
4426; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4427  %res0  = insertelement <4 x i32> undef, i32 %a0, i32 0
4428  %res1  = insertelement <4 x i32> %res0, i32 %a1, i32 1
4429  %res2  = insertelement <4 x i32> %res1, i32 %a2, i32 2
4430  %res3  = insertelement <4 x i32> %res2, i32 %a3, i32 3
4431  %res = bitcast <4 x i32> %res3 to <2 x i64>
4432  ret <2 x i64> %res
4433}
4434
4435; TODO test_mm_setr_epi64
4436
4437define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
4438; X86-SSE-LABEL: test_mm_setr_epi64x:
4439; X86-SSE:       # %bb.0:
4440; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
4441; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
4442; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
4443; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
4444; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
4445; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
4446; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
4447; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero
4448; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08]
4449; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
4450; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
4451; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4452; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
4453; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4454; X86-SSE-NEXT:    retl # encoding: [0xc3]
4455;
4456; X86-AVX1-LABEL: test_mm_setr_epi64x:
4457; X86-AVX1:       # %bb.0:
4458; X86-AVX1-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
4459; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
4460; X86-AVX1-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
4461; X86-AVX1-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
4462; X86-AVX1-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
4463; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4464;
4465; X86-AVX512-LABEL: test_mm_setr_epi64x:
4466; X86-AVX512:       # %bb.0:
4467; X86-AVX512-NEXT:    vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
4468; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
4469; X86-AVX512-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
4470; X86-AVX512-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
4471; X86-AVX512-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
4472; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4473;
4474; X64-SSE-LABEL: test_mm_setr_epi64x:
4475; X64-SSE:       # %bb.0:
4476; X64-SSE-NEXT:    movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce]
4477; X64-SSE-NEXT:    movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
4478; X64-SSE-NEXT:    punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4479; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4480; X64-SSE-NEXT:    retq # encoding: [0xc3]
4481;
4482; X64-AVX1-LABEL: test_mm_setr_epi64x:
4483; X64-AVX1:       # %bb.0:
4484; X64-AVX1-NEXT:    vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
4485; X64-AVX1-NEXT:    vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
4486; X64-AVX1-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
4487; X64-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
4488; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4489;
4490; X64-AVX512-LABEL: test_mm_setr_epi64x:
4491; X64-AVX512:       # %bb.0:
4492; X64-AVX512-NEXT:    vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
4493; X64-AVX512-NEXT:    vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
4494; X64-AVX512-NEXT:    vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
4495; X64-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
4496; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4497  %res0  = insertelement <2 x i64> undef, i64 %a0, i32 0
4498  %res1  = insertelement <2 x i64> %res0, i64 %a1, i32 1
4499  ret <2 x i64> %res1
4500}
4501
4502define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
4503; X86-SSE-LABEL: test_mm_setr_pd:
4504; X86-SSE:       # %bb.0:
4505; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x0c]
4506; X86-SSE-NEXT:    # xmm1 = mem[0],zero
4507; X86-SSE-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
4508; X86-SSE-NEXT:    # xmm0 = mem[0],zero
4509; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
4510; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4511; X86-SSE-NEXT:    retl # encoding: [0xc3]
4512;
4513; X86-AVX1-LABEL: test_mm_setr_pd:
4514; X86-AVX1:       # %bb.0:
4515; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
4516; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
4517; X86-AVX1-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
4518; X86-AVX1-NEXT:    # xmm1 = mem[0],zero
4519; X86-AVX1-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0]
4520; X86-AVX1-NEXT:    # xmm0 = xmm1[0],xmm0[0]
4521; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4522;
4523; X86-AVX512-LABEL: test_mm_setr_pd:
4524; X86-AVX512:       # %bb.0:
4525; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
4526; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
4527; X86-AVX512-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
4528; X86-AVX512-NEXT:    # xmm1 = mem[0],zero
4529; X86-AVX512-NEXT:    vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0]
4530; X86-AVX512-NEXT:    # xmm0 = xmm1[0],xmm0[0]
4531; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4532;
4533; X64-SSE-LABEL: test_mm_setr_pd:
4534; X64-SSE:       # %bb.0:
4535; X64-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
4536; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4537; X64-SSE-NEXT:    retq # encoding: [0xc3]
4538;
4539; X64-AVX1-LABEL: test_mm_setr_pd:
4540; X64-AVX1:       # %bb.0:
4541; X64-AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
4542; X64-AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4543; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4544;
4545; X64-AVX512-LABEL: test_mm_setr_pd:
4546; X64-AVX512:       # %bb.0:
4547; X64-AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
4548; X64-AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
4549; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4550  %res0  = insertelement <2 x double> undef, double %a0, i32 0
4551  %res1  = insertelement <2 x double> %res0, double %a1, i32 1
4552  ret <2 x double> %res1
4553}
4554
4555define <2 x double> @test_mm_setzero_pd() {
4556; SSE-LABEL: test_mm_setzero_pd:
4557; SSE:       # %bb.0:
4558; SSE-NEXT:    xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
4559; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4560;
4561; AVX1-LABEL: test_mm_setzero_pd:
4562; AVX1:       # %bb.0:
4563; AVX1-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
4564; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4565;
4566; AVX512-LABEL: test_mm_setzero_pd:
4567; AVX512:       # %bb.0:
4568; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
4569; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4570  ret <2 x double> zeroinitializer
4571}
4572
4573define <2 x i64> @test_mm_setzero_si128() {
4574; SSE-LABEL: test_mm_setzero_si128:
4575; SSE:       # %bb.0:
4576; SSE-NEXT:    xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
4577; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4578;
4579; AVX1-LABEL: test_mm_setzero_si128:
4580; AVX1:       # %bb.0:
4581; AVX1-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
4582; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4583;
4584; AVX512-LABEL: test_mm_setzero_si128:
4585; AVX512:       # %bb.0:
4586; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
4587; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4588  ret <2 x i64> zeroinitializer
4589}
4590
4591define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
4592; SSE-LABEL: test_mm_shuffle_epi32:
4593; SSE:       # %bb.0:
4594; SSE-NEXT:    pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
4595; SSE-NEXT:    # xmm0 = xmm0[0,0,0,0]
4596; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4597;
4598; AVX1-LABEL: test_mm_shuffle_epi32:
4599; AVX1:       # %bb.0:
4600; AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
4601; AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0]
4602; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4603;
4604; AVX512-LABEL: test_mm_shuffle_epi32:
4605; AVX512:       # %bb.0:
4606; AVX512-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
4607; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4608  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
4609  %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
4610  %bc = bitcast <4 x i32> %res to <2 x i64>
4611  ret <2 x i64> %bc
4612}
4613
4614define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
4615; SSE-LABEL: test_mm_shuffle_pd:
4616; SSE:       # %bb.0:
4617; SSE-NEXT:    shufpd $1, %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc1,0x01]
4618; SSE-NEXT:    # xmm0 = xmm0[1],xmm1[0]
4619; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4620;
4621; AVX1-LABEL: test_mm_shuffle_pd:
4622; AVX1:       # %bb.0:
4623; AVX1-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc1,0x01]
4624; AVX1-NEXT:    # xmm0 = xmm0[1],xmm1[0]
4625; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4626;
4627; AVX512-LABEL: test_mm_shuffle_pd:
4628; AVX512:       # %bb.0:
4629; AVX512-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc1,0x01]
4630; AVX512-NEXT:    # xmm0 = xmm0[1],xmm1[0]
4631; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4632  %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
4633  ret <2 x double> %res
4634}
4635
4636define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
4637; SSE-LABEL: test_mm_shufflehi_epi16:
4638; SSE:       # %bb.0:
4639; SSE-NEXT:    pshufhw $0, %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x70,0xc0,0x00]
4640; SSE-NEXT:    # xmm0 = xmm0[0,1,2,3,4,4,4,4]
4641; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4642;
4643; AVX1-LABEL: test_mm_shufflehi_epi16:
4644; AVX1:       # %bb.0:
4645; AVX1-NEXT:    vpshufhw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x70,0xc0,0x00]
4646; AVX1-NEXT:    # xmm0 = xmm0[0,1,2,3,4,4,4,4]
4647; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4648;
4649; AVX512-LABEL: test_mm_shufflehi_epi16:
4650; AVX512:       # %bb.0:
4651; AVX512-NEXT:    vpshufhw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x00]
4652; AVX512-NEXT:    # xmm0 = xmm0[0,1,2,3,4,4,4,4]
4653; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4654  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
4655  %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
4656  %bc = bitcast <8 x i16> %res to <2 x i64>
4657  ret <2 x i64> %bc
4658}
4659
4660define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
4661; SSE-LABEL: test_mm_shufflelo_epi16:
4662; SSE:       # %bb.0:
4663; SSE-NEXT:    pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
4664; SSE-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4665; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4666;
4667; AVX1-LABEL: test_mm_shufflelo_epi16:
4668; AVX1:       # %bb.0:
4669; AVX1-NEXT:    vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00]
4670; AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4671; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4672;
4673; AVX512-LABEL: test_mm_shufflelo_epi16:
4674; AVX512:       # %bb.0:
4675; AVX512-NEXT:    vpshuflw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x00]
4676; AVX512-NEXT:    # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4677; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4678  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
4679  %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
4680  %bc = bitcast <8 x i16> %res to <2 x i64>
4681  ret <2 x i64> %bc
4682}
4683
4684define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
4685; SSE-LABEL: test_mm_sll_epi16:
4686; SSE:       # %bb.0:
4687; SSE-NEXT:    psllw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf1,0xc1]
4688; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4689;
4690; AVX1-LABEL: test_mm_sll_epi16:
4691; AVX1:       # %bb.0:
4692; AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf1,0xc1]
4693; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4694;
4695; AVX512-LABEL: test_mm_sll_epi16:
4696; AVX512:       # %bb.0:
4697; AVX512-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1]
4698; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4699  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
4700  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
4701  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
4702  %bc = bitcast <8 x i16> %res to <2 x i64>
4703  ret <2 x i64> %bc
4704}
4705declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
4706
4707define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
4708; SSE-LABEL: test_mm_sll_epi32:
4709; SSE:       # %bb.0:
4710; SSE-NEXT:    pslld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf2,0xc1]
4711; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4712;
4713; AVX1-LABEL: test_mm_sll_epi32:
4714; AVX1:       # %bb.0:
4715; AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf2,0xc1]
4716; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4717;
4718; AVX512-LABEL: test_mm_sll_epi32:
4719; AVX512:       # %bb.0:
4720; AVX512-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1]
4721; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4722  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
4723  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
4724  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
4725  %bc = bitcast <4 x i32> %res to <2 x i64>
4726  ret <2 x i64> %bc
4727}
4728declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
4729
4730define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
4731; SSE-LABEL: test_mm_sll_epi64:
4732; SSE:       # %bb.0:
4733; SSE-NEXT:    psllq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf3,0xc1]
4734; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4735;
4736; AVX1-LABEL: test_mm_sll_epi64:
4737; AVX1:       # %bb.0:
4738; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf3,0xc1]
4739; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4740;
4741; AVX512-LABEL: test_mm_sll_epi64:
4742; AVX512:       # %bb.0:
4743; AVX512-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1]
4744; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4745  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
4746  ret <2 x i64> %res
4747}
4748declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
4749
4750define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
4751; SSE-LABEL: test_mm_slli_epi16:
4752; SSE:       # %bb.0:
4753; SSE-NEXT:    psllw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xf0,0x01]
4754; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4755;
4756; AVX1-LABEL: test_mm_slli_epi16:
4757; AVX1:       # %bb.0:
4758; AVX1-NEXT:    vpsllw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x01]
4759; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4760;
4761; AVX512-LABEL: test_mm_slli_epi16:
4762; AVX512:       # %bb.0:
4763; AVX512-NEXT:    vpsllw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x01]
4764; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4765  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
4766  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
4767  %bc = bitcast <8 x i16> %res to <2 x i64>
4768  ret <2 x i64> %bc
4769}
4770declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
4771
4772define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
4773; SSE-LABEL: test_mm_slli_epi32:
4774; SSE:       # %bb.0:
4775; SSE-NEXT:    pslld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xf0,0x01]
4776; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4777;
4778; AVX1-LABEL: test_mm_slli_epi32:
4779; AVX1:       # %bb.0:
4780; AVX1-NEXT:    vpslld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xf0,0x01]
4781; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4782;
4783; AVX512-LABEL: test_mm_slli_epi32:
4784; AVX512:       # %bb.0:
4785; AVX512-NEXT:    vpslld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x01]
4786; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4787  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
4788  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
4789  %bc = bitcast <4 x i32> %res to <2 x i64>
4790  ret <2 x i64> %bc
4791}
4792declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
4793
4794define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
4795; SSE-LABEL: test_mm_slli_epi64:
4796; SSE:       # %bb.0:
4797; SSE-NEXT:    psllq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xf0,0x01]
4798; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4799;
4800; AVX1-LABEL: test_mm_slli_epi64:
4801; AVX1:       # %bb.0:
4802; AVX1-NEXT:    vpsllq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf0,0x01]
4803; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4804;
4805; AVX512-LABEL: test_mm_slli_epi64:
4806; AVX512:       # %bb.0:
4807; AVX512-NEXT:    vpsllq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x01]
4808; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4809  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
4810  ret <2 x i64> %res
4811}
4812declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
4813
4814define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
4815; SSE-LABEL: test_mm_slli_si128:
4816; SSE:       # %bb.0:
4817; SSE-NEXT:    pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05]
4818; SSE-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
4819; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4820;
4821; AVX1-LABEL: test_mm_slli_si128:
4822; AVX1:       # %bb.0:
4823; AVX1-NEXT:    vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05]
4824; AVX1-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
4825; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4826;
4827; AVX512-LABEL: test_mm_slli_si128:
4828; AVX512:       # %bb.0:
4829; AVX512-NEXT:    vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05]
4830; AVX512-NEXT:    # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
4831; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4832  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
4833  %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
4834  %bc = bitcast <16 x i8> %res to <2 x i64>
4835  ret <2 x i64> %bc
4836}
4837
4838define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
4839; SSE-LABEL: test_mm_sqrt_pd:
4840; SSE:       # %bb.0:
4841; SSE-NEXT:    sqrtpd %xmm0, %xmm0 # encoding: [0x66,0x0f,0x51,0xc0]
4842; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4843;
4844; AVX1-LABEL: test_mm_sqrt_pd:
4845; AVX1:       # %bb.0:
4846; AVX1-NEXT:    vsqrtpd %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x51,0xc0]
4847; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4848;
4849; AVX512-LABEL: test_mm_sqrt_pd:
4850; AVX512:       # %bb.0:
4851; AVX512-NEXT:    vsqrtpd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0]
4852; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4853  %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0)
4854  ret <2 x double> %res
4855}
4856declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone
4857
4858define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
4859; SSE-LABEL: test_mm_sqrt_sd:
4860; SSE:       # %bb.0:
4861; SSE-NEXT:    sqrtsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0x51,0xc8]
4862; SSE-NEXT:    movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
4863; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4864;
4865; AVX1-LABEL: test_mm_sqrt_sd:
4866; AVX1:       # %bb.0:
4867; AVX1-NEXT:    vsqrtsd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf3,0x51,0xc0]
4868; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4869;
4870; AVX512-LABEL: test_mm_sqrt_sd:
4871; AVX512:       # %bb.0:
4872; AVX512-NEXT:    vsqrtsd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf3,0x51,0xc0]
4873; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4874  %ext = extractelement <2 x double> %a0, i32 0
4875  %sqrt = call double @llvm.sqrt.f64(double %ext)
4876  %ins = insertelement <2 x double> %a1, double %sqrt, i32 0
4877  ret <2 x double> %ins
4878}
4879declare double @llvm.sqrt.f64(double) nounwind readnone
4880
4881; This doesn't match a clang test, but helps with fast-isel coverage.
4882define double @test_mm_sqrt_sd_scalar(double %a0) nounwind {
4883; X86-SSE-LABEL: test_mm_sqrt_sd_scalar:
4884; X86-SSE:       # %bb.0:
4885; X86-SSE-NEXT:    pushl %ebp # encoding: [0x55]
4886; X86-SSE-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
4887; X86-SSE-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
4888; X86-SSE-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
4889; X86-SSE-NEXT:    movsd 8(%ebp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x45,0x08]
4890; X86-SSE-NEXT:    # xmm0 = mem[0],zero
4891; X86-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
4892; X86-SSE-NEXT:    movsd %xmm0, (%esp) # encoding: [0xf2,0x0f,0x11,0x04,0x24]
4893; X86-SSE-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
4894; X86-SSE-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
4895; X86-SSE-NEXT:    popl %ebp # encoding: [0x5d]
4896; X86-SSE-NEXT:    retl # encoding: [0xc3]
4897;
4898; X86-AVX1-LABEL: test_mm_sqrt_sd_scalar:
4899; X86-AVX1:       # %bb.0:
4900; X86-AVX1-NEXT:    pushl %ebp # encoding: [0x55]
4901; X86-AVX1-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
4902; X86-AVX1-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
4903; X86-AVX1-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
4904; X86-AVX1-NEXT:    vmovsd 8(%ebp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x45,0x08]
4905; X86-AVX1-NEXT:    # xmm0 = mem[0],zero
4906; X86-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
4907; X86-AVX1-NEXT:    vmovsd %xmm0, (%esp) # encoding: [0xc5,0xfb,0x11,0x04,0x24]
4908; X86-AVX1-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
4909; X86-AVX1-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
4910; X86-AVX1-NEXT:    popl %ebp # encoding: [0x5d]
4911; X86-AVX1-NEXT:    retl # encoding: [0xc3]
4912;
4913; X86-AVX512-LABEL: test_mm_sqrt_sd_scalar:
4914; X86-AVX512:       # %bb.0:
4915; X86-AVX512-NEXT:    pushl %ebp # encoding: [0x55]
4916; X86-AVX512-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
4917; X86-AVX512-NEXT:    andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
4918; X86-AVX512-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
4919; X86-AVX512-NEXT:    vmovsd 8(%ebp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x45,0x08]
4920; X86-AVX512-NEXT:    # xmm0 = mem[0],zero
4921; X86-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
4922; X86-AVX512-NEXT:    vmovsd %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x24]
4923; X86-AVX512-NEXT:    fldl (%esp) # encoding: [0xdd,0x04,0x24]
4924; X86-AVX512-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
4925; X86-AVX512-NEXT:    popl %ebp # encoding: [0x5d]
4926; X86-AVX512-NEXT:    retl # encoding: [0xc3]
4927;
4928; X64-SSE-LABEL: test_mm_sqrt_sd_scalar:
4929; X64-SSE:       # %bb.0:
4930; X64-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
4931; X64-SSE-NEXT:    retq # encoding: [0xc3]
4932;
4933; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar:
4934; X64-AVX1:       # %bb.0:
4935; X64-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
4936; X64-AVX1-NEXT:    retq # encoding: [0xc3]
4937;
4938; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar:
4939; X64-AVX512:       # %bb.0:
4940; X64-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
4941; X64-AVX512-NEXT:    retq # encoding: [0xc3]
4942  %sqrt = call double @llvm.sqrt.f64(double %a0)
4943  ret double %sqrt
4944}
4945
4946define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
4947; SSE-LABEL: test_mm_sra_epi16:
4948; SSE:       # %bb.0:
4949; SSE-NEXT:    psraw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe1,0xc1]
4950; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4951;
4952; AVX1-LABEL: test_mm_sra_epi16:
4953; AVX1:       # %bb.0:
4954; AVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe1,0xc1]
4955; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4956;
4957; AVX512-LABEL: test_mm_sra_epi16:
4958; AVX512:       # %bb.0:
4959; AVX512-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1]
4960; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4961  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
4962  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
4963  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
4964  %bc = bitcast <8 x i16> %res to <2 x i64>
4965  ret <2 x i64> %bc
4966}
4967declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
4968
4969define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
4970; SSE-LABEL: test_mm_sra_epi32:
4971; SSE:       # %bb.0:
4972; SSE-NEXT:    psrad %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe2,0xc1]
4973; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4974;
4975; AVX1-LABEL: test_mm_sra_epi32:
4976; AVX1:       # %bb.0:
4977; AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe2,0xc1]
4978; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4979;
4980; AVX512-LABEL: test_mm_sra_epi32:
4981; AVX512:       # %bb.0:
4982; AVX512-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1]
4983; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4984  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
4985  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
4986  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
4987  %bc = bitcast <4 x i32> %res to <2 x i64>
4988  ret <2 x i64> %bc
4989}
4990declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
4991
4992define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
4993; SSE-LABEL: test_mm_srai_epi16:
4994; SSE:       # %bb.0:
4995; SSE-NEXT:    psraw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xe0,0x01]
4996; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4997;
4998; AVX1-LABEL: test_mm_srai_epi16:
4999; AVX1:       # %bb.0:
5000; AVX1-NEXT:    vpsraw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xe0,0x01]
5001; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5002;
5003; AVX512-LABEL: test_mm_srai_epi16:
5004; AVX512:       # %bb.0:
5005; AVX512-NEXT:    vpsraw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x01]
5006; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5007  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5008  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
5009  %bc = bitcast <8 x i16> %res to <2 x i64>
5010  ret <2 x i64> %bc
5011}
5012declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
5013
5014define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
5015; SSE-LABEL: test_mm_srai_epi32:
5016; SSE:       # %bb.0:
5017; SSE-NEXT:    psrad $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xe0,0x01]
5018; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5019;
5020; AVX1-LABEL: test_mm_srai_epi32:
5021; AVX1:       # %bb.0:
5022; AVX1-NEXT:    vpsrad $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xe0,0x01]
5023; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5024;
5025; AVX512-LABEL: test_mm_srai_epi32:
5026; AVX512:       # %bb.0:
5027; AVX512-NEXT:    vpsrad $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x01]
5028; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5029  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
5030  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
5031  %bc = bitcast <4 x i32> %res to <2 x i64>
5032  ret <2 x i64> %bc
5033}
5034declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
5035
5036define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
5037; SSE-LABEL: test_mm_srl_epi16:
5038; SSE:       # %bb.0:
5039; SSE-NEXT:    psrlw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd1,0xc1]
5040; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5041;
5042; AVX1-LABEL: test_mm_srl_epi16:
5043; AVX1:       # %bb.0:
5044; AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd1,0xc1]
5045; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5046;
5047; AVX512-LABEL: test_mm_srl_epi16:
5048; AVX512:       # %bb.0:
5049; AVX512-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1]
5050; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5051  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5052  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
5053  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
5054  %bc = bitcast <8 x i16> %res to <2 x i64>
5055  ret <2 x i64> %bc
5056}
5057declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
5058
5059define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
5060; SSE-LABEL: test_mm_srl_epi32:
5061; SSE:       # %bb.0:
5062; SSE-NEXT:    psrld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd2,0xc1]
5063; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5064;
5065; AVX1-LABEL: test_mm_srl_epi32:
5066; AVX1:       # %bb.0:
5067; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd2,0xc1]
5068; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5069;
5070; AVX512-LABEL: test_mm_srl_epi32:
5071; AVX512:       # %bb.0:
5072; AVX512-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1]
5073; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5074  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
5075  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
5076  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
5077  %bc = bitcast <4 x i32> %res to <2 x i64>
5078  ret <2 x i64> %bc
5079}
5080declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
5081
5082define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
5083; SSE-LABEL: test_mm_srl_epi64:
5084; SSE:       # %bb.0:
5085; SSE-NEXT:    psrlq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd3,0xc1]
5086; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5087;
5088; AVX1-LABEL: test_mm_srl_epi64:
5089; AVX1:       # %bb.0:
5090; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd3,0xc1]
5091; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5092;
5093; AVX512-LABEL: test_mm_srl_epi64:
5094; AVX512:       # %bb.0:
5095; AVX512-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1]
5096; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5097  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
5098  ret <2 x i64> %res
5099}
5100declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
5101
5102define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
5103; SSE-LABEL: test_mm_srli_epi16:
5104; SSE:       # %bb.0:
5105; SSE-NEXT:    psrlw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xd0,0x01]
5106; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5107;
5108; AVX1-LABEL: test_mm_srli_epi16:
5109; AVX1:       # %bb.0:
5110; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xd0,0x01]
5111; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5112;
5113; AVX512-LABEL: test_mm_srli_epi16:
5114; AVX512:       # %bb.0:
5115; AVX512-NEXT:    vpsrlw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x01]
5116; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5117  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5118  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
5119  %bc = bitcast <8 x i16> %res to <2 x i64>
5120  ret <2 x i64> %bc
5121}
5122declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
5123
5124define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
5125; SSE-LABEL: test_mm_srli_epi32:
5126; SSE:       # %bb.0:
5127; SSE-NEXT:    psrld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xd0,0x01]
5128; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5129;
5130; AVX1-LABEL: test_mm_srli_epi32:
5131; AVX1:       # %bb.0:
5132; AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xd0,0x01]
5133; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5134;
5135; AVX512-LABEL: test_mm_srli_epi32:
5136; AVX512:       # %bb.0:
5137; AVX512-NEXT:    vpsrld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x01]
5138; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5139  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
5140  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
5141  %bc = bitcast <4 x i32> %res to <2 x i64>
5142  ret <2 x i64> %bc
5143}
5144declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
5145
5146define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
5147; SSE-LABEL: test_mm_srli_epi64:
5148; SSE:       # %bb.0:
5149; SSE-NEXT:    psrlq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xd0,0x01]
5150; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5151;
5152; AVX1-LABEL: test_mm_srli_epi64:
5153; AVX1:       # %bb.0:
5154; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd0,0x01]
5155; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5156;
5157; AVX512-LABEL: test_mm_srli_epi64:
5158; AVX512:       # %bb.0:
5159; AVX512-NEXT:    vpsrlq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x01]
5160; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5161  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
5162  ret <2 x i64> %res
5163}
5164declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
5165
5166define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
5167; SSE-LABEL: test_mm_srli_si128:
5168; SSE:       # %bb.0:
5169; SSE-NEXT:    psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05]
5170; SSE-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
5171; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5172;
5173; AVX1-LABEL: test_mm_srli_si128:
5174; AVX1:       # %bb.0:
5175; AVX1-NEXT:    vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05]
5176; AVX1-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
5177; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5178;
5179; AVX512-LABEL: test_mm_srli_si128:
5180; AVX512:       # %bb.0:
5181; AVX512-NEXT:    vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05]
5182; AVX512-NEXT:    # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
5183; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5184  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
5185  %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
5186  %bc = bitcast <16 x i8> %res to <2 x i64>
5187  ret <2 x i64> %bc
5188}
5189
5190define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
5191; X86-SSE-LABEL: test_mm_store_pd:
5192; X86-SSE:       # %bb.0:
5193; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5194; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
5195; X86-SSE-NEXT:    retl # encoding: [0xc3]
5196;
5197; X86-AVX1-LABEL: test_mm_store_pd:
5198; X86-AVX1:       # %bb.0:
5199; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5200; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
5201; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5202;
5203; X86-AVX512-LABEL: test_mm_store_pd:
5204; X86-AVX512:       # %bb.0:
5205; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5206; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
5207; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5208;
5209; X64-SSE-LABEL: test_mm_store_pd:
5210; X64-SSE:       # %bb.0:
5211; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
5212; X64-SSE-NEXT:    retq # encoding: [0xc3]
5213;
5214; X64-AVX1-LABEL: test_mm_store_pd:
5215; X64-AVX1:       # %bb.0:
5216; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
5217; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5218;
5219; X64-AVX512-LABEL: test_mm_store_pd:
5220; X64-AVX512:       # %bb.0:
5221; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
5222; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5223  %arg0 = bitcast double* %a0 to <2 x double>*
5224  store <2 x double> %a1, <2 x double>* %arg0, align 16
5225  ret void
5226}
5227
5228define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) {
5229; X86-SSE-LABEL: test_mm_store_pd1:
5230; X86-SSE:       # %bb.0:
5231; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5232; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
5233; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
5234; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
5235; X86-SSE-NEXT:    retl # encoding: [0xc3]
5236;
5237; X86-AVX1-LABEL: test_mm_store_pd1:
5238; X86-AVX1:       # %bb.0:
5239; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5240; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
5241; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
5242; X86-AVX1-NEXT:    vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00]
5243; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5244;
5245; X86-AVX512-LABEL: test_mm_store_pd1:
5246; X86-AVX512:       # %bb.0:
5247; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5248; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
5249; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
5250; X86-AVX512-NEXT:    vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
5251; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5252;
5253; X64-SSE-LABEL: test_mm_store_pd1:
5254; X64-SSE:       # %bb.0:
5255; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
5256; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
5257; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
5258; X64-SSE-NEXT:    retq # encoding: [0xc3]
5259;
5260; X64-AVX1-LABEL: test_mm_store_pd1:
5261; X64-AVX1:       # %bb.0:
5262; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
5263; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
5264; X64-AVX1-NEXT:    vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07]
5265; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5266;
5267; X64-AVX512-LABEL: test_mm_store_pd1:
5268; X64-AVX512:       # %bb.0:
5269; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
5270; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
5271; X64-AVX512-NEXT:    vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07]
5272; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5273  %arg0 = bitcast double * %a0 to <2 x double>*
5274  %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
5275  store <2 x double> %shuf, <2 x double>* %arg0, align 16
5276  ret void
5277}
5278
5279define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
5280; X86-SSE-LABEL: test_mm_store_sd:
5281; X86-SSE:       # %bb.0:
5282; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5283; X86-SSE-NEXT:    movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
5284; X86-SSE-NEXT:    retl # encoding: [0xc3]
5285;
5286; X86-AVX1-LABEL: test_mm_store_sd:
5287; X86-AVX1:       # %bb.0:
5288; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5289; X86-AVX1-NEXT:    vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
5290; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5291;
5292; X86-AVX512-LABEL: test_mm_store_sd:
5293; X86-AVX512:       # %bb.0:
5294; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5295; X86-AVX512-NEXT:    vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
5296; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5297;
5298; X64-SSE-LABEL: test_mm_store_sd:
5299; X64-SSE:       # %bb.0:
5300; X64-SSE-NEXT:    movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
5301; X64-SSE-NEXT:    retq # encoding: [0xc3]
5302;
5303; X64-AVX1-LABEL: test_mm_store_sd:
5304; X64-AVX1:       # %bb.0:
5305; X64-AVX1-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
5306; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5307;
5308; X64-AVX512-LABEL: test_mm_store_sd:
5309; X64-AVX512:       # %bb.0:
5310; X64-AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
5311; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5312  %ext = extractelement <2 x double> %a1, i32 0
5313  store double %ext, double* %a0, align 1
5314  ret void
5315}
5316
5317define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
5318; X86-SSE-LABEL: test_mm_store_si128:
5319; X86-SSE:       # %bb.0:
5320; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5321; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
5322; X86-SSE-NEXT:    retl # encoding: [0xc3]
5323;
5324; X86-AVX1-LABEL: test_mm_store_si128:
5325; X86-AVX1:       # %bb.0:
5326; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5327; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
5328; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5329;
5330; X86-AVX512-LABEL: test_mm_store_si128:
5331; X86-AVX512:       # %bb.0:
5332; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5333; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
5334; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5335;
5336; X64-SSE-LABEL: test_mm_store_si128:
5337; X64-SSE:       # %bb.0:
5338; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
5339; X64-SSE-NEXT:    retq # encoding: [0xc3]
5340;
5341; X64-AVX1-LABEL: test_mm_store_si128:
5342; X64-AVX1:       # %bb.0:
5343; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
5344; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5345;
5346; X64-AVX512-LABEL: test_mm_store_si128:
5347; X64-AVX512:       # %bb.0:
5348; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
5349; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5350  store <2 x i64> %a1, <2 x i64>* %a0, align 16
5351  ret void
5352}
5353
5354define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
5355; X86-SSE-LABEL: test_mm_store1_pd:
5356; X86-SSE:       # %bb.0:
5357; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5358; X86-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
5359; X86-SSE-NEXT:    # xmm0 = xmm0[0,0]
5360; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
5361; X86-SSE-NEXT:    retl # encoding: [0xc3]
5362;
5363; X86-AVX1-LABEL: test_mm_store1_pd:
5364; X86-AVX1:       # %bb.0:
5365; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5366; X86-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
5367; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0]
5368; X86-AVX1-NEXT:    vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00]
5369; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5370;
5371; X86-AVX512-LABEL: test_mm_store1_pd:
5372; X86-AVX512:       # %bb.0:
5373; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5374; X86-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
5375; X86-AVX512-NEXT:    # xmm0 = xmm0[0,0]
5376; X86-AVX512-NEXT:    vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
5377; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5378;
5379; X64-SSE-LABEL: test_mm_store1_pd:
5380; X64-SSE:       # %bb.0:
5381; X64-SSE-NEXT:    movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
5382; X64-SSE-NEXT:    # xmm0 = xmm0[0,0]
5383; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
5384; X64-SSE-NEXT:    retq # encoding: [0xc3]
5385;
5386; X64-AVX1-LABEL: test_mm_store1_pd:
5387; X64-AVX1:       # %bb.0:
5388; X64-AVX1-NEXT:    vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
5389; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0]
5390; X64-AVX1-NEXT:    vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07]
5391; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5392;
5393; X64-AVX512-LABEL: test_mm_store1_pd:
5394; X64-AVX512:       # %bb.0:
5395; X64-AVX512-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
5396; X64-AVX512-NEXT:    # xmm0 = xmm0[0,0]
5397; X64-AVX512-NEXT:    vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07]
5398; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5399  %arg0 = bitcast double * %a0 to <2 x double>*
5400  %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
5401  store <2 x double> %shuf, <2 x double>* %arg0, align 16
5402  ret void
5403}
5404
5405define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
5406; X86-SSE-LABEL: test_mm_storeh_sd:
5407; X86-SSE:       # %bb.0:
5408; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5409; X86-SSE-NEXT:    movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0]
5410; X86-SSE-NEXT:    # xmm0 = xmm0[1,1]
5411; X86-SSE-NEXT:    movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
5412; X86-SSE-NEXT:    retl # encoding: [0xc3]
5413;
5414; X86-AVX1-LABEL: test_mm_storeh_sd:
5415; X86-AVX1:       # %bb.0:
5416; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5417; X86-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5418; X86-AVX1-NEXT:    # xmm0 = xmm0[1,0]
5419; X86-AVX1-NEXT:    vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
5420; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5421;
5422; X86-AVX512-LABEL: test_mm_storeh_sd:
5423; X86-AVX512:       # %bb.0:
5424; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5425; X86-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5426; X86-AVX512-NEXT:    # xmm0 = xmm0[1,0]
5427; X86-AVX512-NEXT:    vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
5428; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5429;
5430; X64-SSE-LABEL: test_mm_storeh_sd:
5431; X64-SSE:       # %bb.0:
5432; X64-SSE-NEXT:    movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0]
5433; X64-SSE-NEXT:    # xmm0 = xmm0[1,1]
5434; X64-SSE-NEXT:    movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
5435; X64-SSE-NEXT:    retq # encoding: [0xc3]
5436;
5437; X64-AVX1-LABEL: test_mm_storeh_sd:
5438; X64-AVX1:       # %bb.0:
5439; X64-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5440; X64-AVX1-NEXT:    # xmm0 = xmm0[1,0]
5441; X64-AVX1-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
5442; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5443;
5444; X64-AVX512-LABEL: test_mm_storeh_sd:
5445; X64-AVX512:       # %bb.0:
5446; X64-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5447; X64-AVX512-NEXT:    # xmm0 = xmm0[1,0]
5448; X64-AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
5449; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5450  %ext = extractelement <2 x double> %a1, i32 1
5451  store double %ext, double* %a0, align 8
5452  ret void
5453}
5454
5455define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
5456; X86-SSE-LABEL: test_mm_storel_epi64:
5457; X86-SSE:       # %bb.0:
5458; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5459; X86-SSE-NEXT:    movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00]
5460; X86-SSE-NEXT:    retl # encoding: [0xc3]
5461;
5462; X86-AVX1-LABEL: test_mm_storel_epi64:
5463; X86-AVX1:       # %bb.0:
5464; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5465; X86-AVX1-NEXT:    vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00]
5466; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5467;
5468; X86-AVX512-LABEL: test_mm_storel_epi64:
5469; X86-AVX512:       # %bb.0:
5470; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5471; X86-AVX512-NEXT:    vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
5472; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5473;
5474; X64-SSE-LABEL: test_mm_storel_epi64:
5475; X64-SSE:       # %bb.0:
5476; X64-SSE-NEXT:    movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
5477; X64-SSE-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
5478; X64-SSE-NEXT:    retq # encoding: [0xc3]
5479;
5480; X64-AVX1-LABEL: test_mm_storel_epi64:
5481; X64-AVX1:       # %bb.0:
5482; X64-AVX1-NEXT:    vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
5483; X64-AVX1-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
5484; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5485;
5486; X64-AVX512-LABEL: test_mm_storel_epi64:
5487; X64-AVX512:       # %bb.0:
5488; X64-AVX512-NEXT:    vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
5489; X64-AVX512-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
5490; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5491  %ext = extractelement <2 x i64> %a1, i32 0
5492  %bc = bitcast <2 x i64> *%a0 to i64*
5493  store i64 %ext, i64* %bc, align 8
5494  ret void
5495}
5496
5497define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
5498; X86-SSE-LABEL: test_mm_storel_sd:
5499; X86-SSE:       # %bb.0:
5500; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5501; X86-SSE-NEXT:    movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
5502; X86-SSE-NEXT:    retl # encoding: [0xc3]
5503;
5504; X86-AVX1-LABEL: test_mm_storel_sd:
5505; X86-AVX1:       # %bb.0:
5506; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5507; X86-AVX1-NEXT:    vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
5508; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5509;
5510; X86-AVX512-LABEL: test_mm_storel_sd:
5511; X86-AVX512:       # %bb.0:
5512; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5513; X86-AVX512-NEXT:    vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
5514; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5515;
5516; X64-SSE-LABEL: test_mm_storel_sd:
5517; X64-SSE:       # %bb.0:
5518; X64-SSE-NEXT:    movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
5519; X64-SSE-NEXT:    retq # encoding: [0xc3]
5520;
5521; X64-AVX1-LABEL: test_mm_storel_sd:
5522; X64-AVX1:       # %bb.0:
5523; X64-AVX1-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
5524; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5525;
5526; X64-AVX512-LABEL: test_mm_storel_sd:
5527; X64-AVX512:       # %bb.0:
5528; X64-AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
5529; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5530  %ext = extractelement <2 x double> %a1, i32 0
5531  store double %ext, double* %a0, align 8
5532  ret void
5533}
5534
5535define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
5536; X86-SSE-LABEL: test_mm_storer_pd:
5537; X86-SSE:       # %bb.0:
5538; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5539; X86-SSE-NEXT:    shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
5540; X86-SSE-NEXT:    # xmm0 = xmm0[1,0]
5541; X86-SSE-NEXT:    movapd %xmm0, (%eax) # encoding: [0x66,0x0f,0x29,0x00]
5542; X86-SSE-NEXT:    retl # encoding: [0xc3]
5543;
5544; X86-AVX1-LABEL: test_mm_storer_pd:
5545; X86-AVX1:       # %bb.0:
5546; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5547; X86-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5548; X86-AVX1-NEXT:    # xmm0 = xmm0[1,0]
5549; X86-AVX1-NEXT:    vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00]
5550; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5551;
5552; X86-AVX512-LABEL: test_mm_storer_pd:
5553; X86-AVX512:       # %bb.0:
5554; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5555; X86-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5556; X86-AVX512-NEXT:    # xmm0 = xmm0[1,0]
5557; X86-AVX512-NEXT:    vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
5558; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5559;
5560; X64-SSE-LABEL: test_mm_storer_pd:
5561; X64-SSE:       # %bb.0:
5562; X64-SSE-NEXT:    shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
5563; X64-SSE-NEXT:    # xmm0 = xmm0[1,0]
5564; X64-SSE-NEXT:    movapd %xmm0, (%rdi) # encoding: [0x66,0x0f,0x29,0x07]
5565; X64-SSE-NEXT:    retq # encoding: [0xc3]
5566;
5567; X64-AVX1-LABEL: test_mm_storer_pd:
5568; X64-AVX1:       # %bb.0:
5569; X64-AVX1-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5570; X64-AVX1-NEXT:    # xmm0 = xmm0[1,0]
5571; X64-AVX1-NEXT:    vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07]
5572; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5573;
5574; X64-AVX512-LABEL: test_mm_storer_pd:
5575; X64-AVX512:       # %bb.0:
5576; X64-AVX512-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
5577; X64-AVX512-NEXT:    # xmm0 = xmm0[1,0]
5578; X64-AVX512-NEXT:    vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07]
5579; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5580  %arg0 = bitcast double* %a0 to <2 x double>*
5581  %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
5582  store <2 x double> %shuf, <2 x double>* %arg0, align 16
5583  ret void
5584}
5585
5586define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
5587; X86-SSE-LABEL: test_mm_storeu_pd:
5588; X86-SSE:       # %bb.0:
5589; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5590; X86-SSE-NEXT:    movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00]
5591; X86-SSE-NEXT:    retl # encoding: [0xc3]
5592;
5593; X86-AVX1-LABEL: test_mm_storeu_pd:
5594; X86-AVX1:       # %bb.0:
5595; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5596; X86-AVX1-NEXT:    vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
5597; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5598;
5599; X86-AVX512-LABEL: test_mm_storeu_pd:
5600; X86-AVX512:       # %bb.0:
5601; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5602; X86-AVX512-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
5603; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5604;
5605; X64-SSE-LABEL: test_mm_storeu_pd:
5606; X64-SSE:       # %bb.0:
5607; X64-SSE-NEXT:    movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07]
5608; X64-SSE-NEXT:    retq # encoding: [0xc3]
5609;
5610; X64-AVX1-LABEL: test_mm_storeu_pd:
5611; X64-AVX1:       # %bb.0:
5612; X64-AVX1-NEXT:    vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
5613; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5614;
5615; X64-AVX512-LABEL: test_mm_storeu_pd:
5616; X64-AVX512:       # %bb.0:
5617; X64-AVX512-NEXT:    vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
5618; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5619  %arg0 = bitcast double* %a0 to <2 x double>*
5620  store <2 x double> %a1, <2 x double>* %arg0, align 1
5621  ret void
5622}
5623
5624define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
5625; X86-SSE-LABEL: test_mm_storeu_si128:
5626; X86-SSE:       # %bb.0:
5627; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5628; X86-SSE-NEXT:    movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00]
5629; X86-SSE-NEXT:    retl # encoding: [0xc3]
5630;
5631; X86-AVX1-LABEL: test_mm_storeu_si128:
5632; X86-AVX1:       # %bb.0:
5633; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5634; X86-AVX1-NEXT:    vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
5635; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5636;
5637; X86-AVX512-LABEL: test_mm_storeu_si128:
5638; X86-AVX512:       # %bb.0:
5639; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5640; X86-AVX512-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
5641; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5642;
5643; X64-SSE-LABEL: test_mm_storeu_si128:
5644; X64-SSE:       # %bb.0:
5645; X64-SSE-NEXT:    movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07]
5646; X64-SSE-NEXT:    retq # encoding: [0xc3]
5647;
5648; X64-AVX1-LABEL: test_mm_storeu_si128:
5649; X64-AVX1:       # %bb.0:
5650; X64-AVX1-NEXT:    vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
5651; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5652;
5653; X64-AVX512-LABEL: test_mm_storeu_si128:
5654; X64-AVX512:       # %bb.0:
5655; X64-AVX512-NEXT:    vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
5656; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5657  store <2 x i64> %a1, <2 x i64>* %a0, align 1
5658  ret void
5659}
5660
5661define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
5662; X86-SSE-LABEL: test_mm_stream_pd:
5663; X86-SSE:       # %bb.0:
5664; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5665; X86-SSE-NEXT:    movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00]
5666; X86-SSE-NEXT:    retl # encoding: [0xc3]
5667;
5668; X86-AVX1-LABEL: test_mm_stream_pd:
5669; X86-AVX1:       # %bb.0:
5670; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5671; X86-AVX1-NEXT:    vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00]
5672; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5673;
5674; X86-AVX512-LABEL: test_mm_stream_pd:
5675; X86-AVX512:       # %bb.0:
5676; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5677; X86-AVX512-NEXT:    vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00]
5678; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5679;
5680; X64-SSE-LABEL: test_mm_stream_pd:
5681; X64-SSE:       # %bb.0:
5682; X64-SSE-NEXT:    movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07]
5683; X64-SSE-NEXT:    retq # encoding: [0xc3]
5684;
5685; X64-AVX1-LABEL: test_mm_stream_pd:
5686; X64-AVX1:       # %bb.0:
5687; X64-AVX1-NEXT:    vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07]
5688; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5689;
5690; X64-AVX512-LABEL: test_mm_stream_pd:
5691; X64-AVX512:       # %bb.0:
5692; X64-AVX512-NEXT:    vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
5693; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5694  %arg0 = bitcast double* %a0 to <2 x double>*
5695  store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
5696  ret void
5697}
5698
5699define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
5700; X86-LABEL: test_mm_stream_si32:
5701; X86:       # %bb.0:
5702; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
5703; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
5704; X86-NEXT:    movntil %eax, (%ecx) # encoding: [0x0f,0xc3,0x01]
5705; X86-NEXT:    retl # encoding: [0xc3]
5706;
5707; X64-LABEL: test_mm_stream_si32:
5708; X64:       # %bb.0:
5709; X64-NEXT:    movntil %esi, (%rdi) # encoding: [0x0f,0xc3,0x37]
5710; X64-NEXT:    retq # encoding: [0xc3]
5711  store i32 %a1, i32* %a0, align 1, !nontemporal !0
5712  ret void
5713}
5714
5715define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
5716; X86-SSE-LABEL: test_mm_stream_si128:
5717; X86-SSE:       # %bb.0:
5718; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5719; X86-SSE-NEXT:    movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00]
5720; X86-SSE-NEXT:    retl # encoding: [0xc3]
5721;
5722; X86-AVX1-LABEL: test_mm_stream_si128:
5723; X86-AVX1:       # %bb.0:
5724; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5725; X86-AVX1-NEXT:    vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00]
5726; X86-AVX1-NEXT:    retl # encoding: [0xc3]
5727;
5728; X86-AVX512-LABEL: test_mm_stream_si128:
5729; X86-AVX512:       # %bb.0:
5730; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5731; X86-AVX512-NEXT:    vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00]
5732; X86-AVX512-NEXT:    retl # encoding: [0xc3]
5733;
5734; X64-SSE-LABEL: test_mm_stream_si128:
5735; X64-SSE:       # %bb.0:
5736; X64-SSE-NEXT:    movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07]
5737; X64-SSE-NEXT:    retq # encoding: [0xc3]
5738;
5739; X64-AVX1-LABEL: test_mm_stream_si128:
5740; X64-AVX1:       # %bb.0:
5741; X64-AVX1-NEXT:    vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07]
5742; X64-AVX1-NEXT:    retq # encoding: [0xc3]
5743;
5744; X64-AVX512-LABEL: test_mm_stream_si128:
5745; X64-AVX512:       # %bb.0:
5746; X64-AVX512-NEXT:    vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
5747; X64-AVX512-NEXT:    retq # encoding: [0xc3]
5748  store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
5749  ret void
5750}
5751
5752define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
5753; SSE-LABEL: test_mm_sub_epi8:
5754; SSE:       # %bb.0:
5755; SSE-NEXT:    psubb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf8,0xc1]
5756; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5757;
5758; AVX1-LABEL: test_mm_sub_epi8:
5759; AVX1:       # %bb.0:
5760; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
5761; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5762;
5763; AVX512-LABEL: test_mm_sub_epi8:
5764; AVX512:       # %bb.0:
5765; AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
5766; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5767  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
5768  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
5769  %res = sub <16 x i8> %arg0, %arg1
5770  %bc = bitcast <16 x i8> %res to <2 x i64>
5771  ret <2 x i64> %bc
5772}
5773
5774define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
5775; SSE-LABEL: test_mm_sub_epi16:
5776; SSE:       # %bb.0:
5777; SSE-NEXT:    psubw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf9,0xc1]
5778; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5779;
5780; AVX1-LABEL: test_mm_sub_epi16:
5781; AVX1:       # %bb.0:
5782; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf9,0xc1]
5783; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5784;
5785; AVX512-LABEL: test_mm_sub_epi16:
5786; AVX512:       # %bb.0:
5787; AVX512-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1]
5788; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5789  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5790  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
5791  %res = sub <8 x i16> %arg0, %arg1
5792  %bc = bitcast <8 x i16> %res to <2 x i64>
5793  ret <2 x i64> %bc
5794}
5795
5796define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
5797; SSE-LABEL: test_mm_sub_epi32:
5798; SSE:       # %bb.0:
5799; SSE-NEXT:    psubd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfa,0xc1]
5800; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5801;
5802; AVX1-LABEL: test_mm_sub_epi32:
5803; AVX1:       # %bb.0:
5804; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfa,0xc1]
5805; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5806;
5807; AVX512-LABEL: test_mm_sub_epi32:
5808; AVX512:       # %bb.0:
5809; AVX512-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1]
5810; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5811  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
5812  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
5813  %res = sub <4 x i32> %arg0, %arg1
5814  %bc = bitcast <4 x i32> %res to <2 x i64>
5815  ret <2 x i64> %bc
5816}
5817
5818define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
5819; SSE-LABEL: test_mm_sub_epi64:
5820; SSE:       # %bb.0:
5821; SSE-NEXT:    psubq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfb,0xc1]
5822; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5823;
5824; AVX1-LABEL: test_mm_sub_epi64:
5825; AVX1:       # %bb.0:
5826; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
5827; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5828;
5829; AVX512-LABEL: test_mm_sub_epi64:
5830; AVX512:       # %bb.0:
5831; AVX512-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
5832; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5833  %res = sub <2 x i64> %a0, %a1
5834  ret <2 x i64> %res
5835}
5836
5837define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
5838; SSE-LABEL: test_mm_sub_pd:
5839; SSE:       # %bb.0:
5840; SSE-NEXT:    subpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5c,0xc1]
5841; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5842;
5843; AVX1-LABEL: test_mm_sub_pd:
5844; AVX1:       # %bb.0:
5845; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5c,0xc1]
5846; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5847;
5848; AVX512-LABEL: test_mm_sub_pd:
5849; AVX512:       # %bb.0:
5850; AVX512-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5c,0xc1]
5851; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5852  %res = fsub <2 x double> %a0, %a1
5853  ret <2 x double> %res
5854}
5855
5856define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
5857; SSE-LABEL: test_mm_sub_sd:
5858; SSE:       # %bb.0:
5859; SSE-NEXT:    subsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5c,0xc1]
5860; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5861;
5862; AVX1-LABEL: test_mm_sub_sd:
5863; AVX1:       # %bb.0:
5864; AVX1-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5c,0xc1]
5865; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5866;
5867; AVX512-LABEL: test_mm_sub_sd:
5868; AVX512:       # %bb.0:
5869; AVX512-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1]
5870; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5871  %ext0 = extractelement <2 x double> %a0, i32 0
5872  %ext1 = extractelement <2 x double> %a1, i32 0
5873  %fsub = fsub double %ext0, %ext1
5874  %res = insertelement <2 x double> %a0, double %fsub, i32 0
5875  ret <2 x double> %res
5876}
5877
5878define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
5879; SSE-LABEL: test_mm_subs_epi8:
5880; SSE:       # %bb.0:
5881; SSE-NEXT:    psubsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe8,0xc1]
5882; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5883;
5884; AVX1-LABEL: test_mm_subs_epi8:
5885; AVX1:       # %bb.0:
5886; AVX1-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe8,0xc1]
5887; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5888;
5889; AVX512-LABEL: test_mm_subs_epi8:
5890; AVX512:       # %bb.0:
5891; AVX512-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
5892; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5893  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
5894  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
5895  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1)
5896  %bc = bitcast <16 x i8> %res to <2 x i64>
5897  ret <2 x i64> %bc
5898}
5899declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
5900
5901define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
5902; SSE-LABEL: test_mm_subs_epi16:
5903; SSE:       # %bb.0:
5904; SSE-NEXT:    psubsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe9,0xc1]
5905; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5906;
5907; AVX1-LABEL: test_mm_subs_epi16:
5908; AVX1:       # %bb.0:
5909; AVX1-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe9,0xc1]
5910; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5911;
5912; AVX512-LABEL: test_mm_subs_epi16:
5913; AVX512:       # %bb.0:
5914; AVX512-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
5915; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5916  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5917  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
5918  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1)
5919  %bc = bitcast <8 x i16> %res to <2 x i64>
5920  ret <2 x i64> %bc
5921}
5922declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
5923
5924define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
5925; SSE-LABEL: test_mm_subs_epu8:
5926; SSE:       # %bb.0:
5927; SSE-NEXT:    psubusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd8,0xc1]
5928; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5929;
5930; AVX1-LABEL: test_mm_subs_epu8:
5931; AVX1:       # %bb.0:
5932; AVX1-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd8,0xc1]
5933; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5934;
5935; AVX512-LABEL: test_mm_subs_epu8:
5936; AVX512:       # %bb.0:
5937; AVX512-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
5938; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5939  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
5940  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
5941  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1)
5942  %bc = bitcast <16 x i8> %res to <2 x i64>
5943  ret <2 x i64> %bc
5944}
5945declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
5946
5947define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
5948; SSE-LABEL: test_mm_subs_epu16:
5949; SSE:       # %bb.0:
5950; SSE-NEXT:    psubusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd9,0xc1]
5951; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5952;
5953; AVX1-LABEL: test_mm_subs_epu16:
5954; AVX1:       # %bb.0:
5955; AVX1-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd9,0xc1]
5956; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5957;
5958; AVX512-LABEL: test_mm_subs_epu16:
5959; AVX512:       # %bb.0:
5960; AVX512-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
5961; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5962  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5963  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
5964  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1)
5965  %bc = bitcast <8 x i16> %res to <2 x i64>
5966  ret <2 x i64> %bc
5967}
5968declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
5969
5970define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
5971; SSE-LABEL: test_mm_ucomieq_sd:
5972; SSE:       # %bb.0:
5973; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
5974; SSE-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
5975; SSE-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
5976; SSE-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
5977; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
5978; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5979;
5980; AVX1-LABEL: test_mm_ucomieq_sd:
5981; AVX1:       # %bb.0:
5982; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
5983; AVX1-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
5984; AVX1-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
5985; AVX1-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
5986; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
5987; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5988;
5989; AVX512-LABEL: test_mm_ucomieq_sd:
5990; AVX512:       # %bb.0:
5991; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
5992; AVX512-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0]
5993; AVX512-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
5994; AVX512-NEXT:    andb %al, %cl # encoding: [0x20,0xc1]
5995; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
5996; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5997  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
5998  ret i32 %res
5999}
6000declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
6001
6002define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6003; SSE-LABEL: test_mm_ucomige_sd:
6004; SSE:       # %bb.0:
6005; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6006; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
6007; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6008; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6009;
6010; AVX1-LABEL: test_mm_ucomige_sd:
6011; AVX1:       # %bb.0:
6012; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6013; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
6014; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6015; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6016;
6017; AVX512-LABEL: test_mm_ucomige_sd:
6018; AVX512:       # %bb.0:
6019; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6020; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
6021; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6022; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6023  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
6024  ret i32 %res
6025}
6026declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
6027
6028define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6029; SSE-LABEL: test_mm_ucomigt_sd:
6030; SSE:       # %bb.0:
6031; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6032; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
6033; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6034; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6035;
6036; AVX1-LABEL: test_mm_ucomigt_sd:
6037; AVX1:       # %bb.0:
6038; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6039; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
6040; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6041; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6042;
6043; AVX512-LABEL: test_mm_ucomigt_sd:
6044; AVX512:       # %bb.0:
6045; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6046; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
6047; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6048; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6049  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
6050  ret i32 %res
6051}
6052declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
6053
6054define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6055; SSE-LABEL: test_mm_ucomile_sd:
6056; SSE:       # %bb.0:
6057; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6058; SSE-NEXT:    ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8]
6059; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6060; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6061;
6062; AVX1-LABEL: test_mm_ucomile_sd:
6063; AVX1:       # %bb.0:
6064; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6065; AVX1-NEXT:    vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8]
6066; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6067; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6068;
6069; AVX512-LABEL: test_mm_ucomile_sd:
6070; AVX512:       # %bb.0:
6071; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6072; AVX512-NEXT:    vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
6073; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
6074; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6075  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
6076  ret i32 %res
6077}
6078declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
6079
6080define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6081; SSE-LABEL: test_mm_ucomilt_sd:
6082; SSE:       # %bb.0:
6083; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6084; SSE-NEXT:    ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8]
6085; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6086; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6087;
6088; AVX1-LABEL: test_mm_ucomilt_sd:
6089; AVX1:       # %bb.0:
6090; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6091; AVX1-NEXT:    vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8]
6092; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6093; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6094;
6095; AVX512-LABEL: test_mm_ucomilt_sd:
6096; AVX512:       # %bb.0:
6097; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
6098; AVX512-NEXT:    vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
6099; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
6100; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6101  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
6102  ret i32 %res
6103}
6104declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
6105
6106define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
6107; SSE-LABEL: test_mm_ucomineq_sd:
6108; SSE:       # %bb.0:
6109; SSE-NEXT:    ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
6110; SSE-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
6111; SSE-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
6112; SSE-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
6113; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
6114; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6115;
6116; AVX1-LABEL: test_mm_ucomineq_sd:
6117; AVX1:       # %bb.0:
6118; AVX1-NEXT:    vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
6119; AVX1-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
6120; AVX1-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
6121; AVX1-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
6122; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
6123; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6124;
6125; AVX512-LABEL: test_mm_ucomineq_sd:
6126; AVX512:       # %bb.0:
6127; AVX512-NEXT:    vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
6128; AVX512-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0]
6129; AVX512-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1]
6130; AVX512-NEXT:    orb %al, %cl # encoding: [0x08,0xc1]
6131; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
6132; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6133  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
6134  ret i32 %res
6135}
6136declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
6137
6138define <2 x double> @test_mm_undefined_pd() {
6139; CHECK-LABEL: test_mm_undefined_pd:
6140; CHECK:       # %bb.0:
6141; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6142  ret <2 x double> undef
6143}
6144
6145define <2 x i64> @test_mm_undefined_si128() {
6146; CHECK-LABEL: test_mm_undefined_si128:
6147; CHECK:       # %bb.0:
6148; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6149  ret <2 x i64> undef
6150}
6151
6152define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
6153; SSE-LABEL: test_mm_unpackhi_epi8:
6154; SSE:       # %bb.0:
6155; SSE-NEXT:    punpckhbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x68,0xc1]
6156; SSE-NEXT:    # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
6157; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6158;
6159; AVX1-LABEL: test_mm_unpackhi_epi8:
6160; AVX1:       # %bb.0:
6161; AVX1-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x68,0xc1]
6162; AVX1-NEXT:    # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
6163; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6164;
6165; AVX512-LABEL: test_mm_unpackhi_epi8:
6166; AVX512:       # %bb.0:
6167; AVX512-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1]
6168; AVX512-NEXT:    # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
6169; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6170  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
6171  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
6172  %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6173  %bc = bitcast <16 x i8> %res to <2 x i64>
6174  ret <2 x i64> %bc
6175}
6176
6177define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
6178; SSE-LABEL: test_mm_unpackhi_epi16:
6179; SSE:       # %bb.0:
6180; SSE-NEXT:    punpckhwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x69,0xc1]
6181; SSE-NEXT:    # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6182; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6183;
6184; AVX1-LABEL: test_mm_unpackhi_epi16:
6185; AVX1:       # %bb.0:
6186; AVX1-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x69,0xc1]
6187; AVX1-NEXT:    # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6188; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6189;
6190; AVX512-LABEL: test_mm_unpackhi_epi16:
6191; AVX512:       # %bb.0:
6192; AVX512-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1]
6193; AVX512-NEXT:    # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6194; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6195  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
6196  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
6197  %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
6198  %bc = bitcast <8 x i16> %res to <2 x i64>
6199  ret <2 x i64> %bc
6200}
6201
6202define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
6203; SSE-LABEL: test_mm_unpackhi_epi32:
6204; SSE:       # %bb.0:
6205; SSE-NEXT:    unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1]
6206; SSE-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6207; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6208;
6209; AVX1-LABEL: test_mm_unpackhi_epi32:
6210; AVX1:       # %bb.0:
6211; AVX1-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1]
6212; AVX1-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6213; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6214;
6215; AVX512-LABEL: test_mm_unpackhi_epi32:
6216; AVX512:       # %bb.0:
6217; AVX512-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
6218; AVX512-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6219; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6220  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
6221  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
6222  %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
6223  %bc = bitcast <4 x i32> %res to <2 x i64>
6224  ret <2 x i64> %bc
6225}
6226
6227define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
6228; SSE-LABEL: test_mm_unpackhi_epi64:
6229; SSE:       # %bb.0:
6230; SSE-NEXT:    unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1]
6231; SSE-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6232; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6233;
6234; AVX1-LABEL: test_mm_unpackhi_epi64:
6235; AVX1:       # %bb.0:
6236; AVX1-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
6237; AVX1-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6238; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6239;
6240; AVX512-LABEL: test_mm_unpackhi_epi64:
6241; AVX512:       # %bb.0:
6242; AVX512-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
6243; AVX512-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6244; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6245  %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
6246  ret <2 x i64> %res
6247}
6248
6249define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
6250; SSE-LABEL: test_mm_unpackhi_pd:
6251; SSE:       # %bb.0:
6252; SSE-NEXT:    unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1]
6253; SSE-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6254; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6255;
6256; AVX1-LABEL: test_mm_unpackhi_pd:
6257; AVX1:       # %bb.0:
6258; AVX1-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
6259; AVX1-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6260; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6261;
6262; AVX512-LABEL: test_mm_unpackhi_pd:
6263; AVX512:       # %bb.0:
6264; AVX512-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
6265; AVX512-NEXT:    # xmm0 = xmm0[1],xmm1[1]
6266; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6267  %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
6268  ret <2 x double> %res
6269}
6270
6271define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
6272; SSE-LABEL: test_mm_unpacklo_epi8:
6273; SSE:       # %bb.0:
6274; SSE-NEXT:    punpcklbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x60,0xc1]
6275; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6276; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6277;
6278; AVX1-LABEL: test_mm_unpacklo_epi8:
6279; AVX1:       # %bb.0:
6280; AVX1-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x60,0xc1]
6281; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6282; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6283;
6284; AVX512-LABEL: test_mm_unpacklo_epi8:
6285; AVX512:       # %bb.0:
6286; AVX512-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1]
6287; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
6288; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6289  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
6290  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
6291  %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
6292  %bc = bitcast <16 x i8> %res to <2 x i64>
6293  ret <2 x i64> %bc
6294}
6295
6296define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
6297; SSE-LABEL: test_mm_unpacklo_epi16:
6298; SSE:       # %bb.0:
6299; SSE-NEXT:    punpcklwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x61,0xc1]
6300; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6301; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6302;
6303; AVX1-LABEL: test_mm_unpacklo_epi16:
6304; AVX1:       # %bb.0:
6305; AVX1-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x61,0xc1]
6306; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6307; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6308;
6309; AVX512-LABEL: test_mm_unpacklo_epi16:
6310; AVX512:       # %bb.0:
6311; AVX512-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1]
6312; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6313; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6314  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
6315  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
6316  %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
6317  %bc = bitcast <8 x i16> %res to <2 x i64>
6318  ret <2 x i64> %bc
6319}
6320
6321define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
6322; SSE-LABEL: test_mm_unpacklo_epi32:
6323; SSE:       # %bb.0:
6324; SSE-NEXT:    unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1]
6325; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6326; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6327;
6328; AVX1-LABEL: test_mm_unpacklo_epi32:
6329; AVX1:       # %bb.0:
6330; AVX1-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1]
6331; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6332; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6333;
6334; AVX512-LABEL: test_mm_unpacklo_epi32:
6335; AVX512:       # %bb.0:
6336; AVX512-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1]
6337; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6338; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6339  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
6340  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
6341  %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
6342  %bc = bitcast <4 x i32> %res to <2 x i64>
6343  ret <2 x i64> %bc
6344}
6345
6346define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
6347; SSE-LABEL: test_mm_unpacklo_epi64:
6348; SSE:       # %bb.0:
6349; SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
6350; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6351; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6352;
6353; AVX1-LABEL: test_mm_unpacklo_epi64:
6354; AVX1:       # %bb.0:
6355; AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
6356; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6357; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6358;
6359; AVX512-LABEL: test_mm_unpacklo_epi64:
6360; AVX512:       # %bb.0:
6361; AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
6362; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6363; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6364  %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
6365  ret <2 x i64> %res
6366}
6367
6368define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
6369; SSE-LABEL: test_mm_unpacklo_pd:
6370; SSE:       # %bb.0:
6371; SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
6372; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6373; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6374;
6375; AVX1-LABEL: test_mm_unpacklo_pd:
6376; AVX1:       # %bb.0:
6377; AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
6378; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6379; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6380;
6381; AVX512-LABEL: test_mm_unpacklo_pd:
6382; AVX512:       # %bb.0:
6383; AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
6384; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0]
6385; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6386  %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
6387  ret <2 x double> %res
6388}
6389
6390define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
6391; SSE-LABEL: test_mm_xor_pd:
6392; SSE:       # %bb.0:
6393; SSE-NEXT:    xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1]
6394; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6395;
6396; AVX1-LABEL: test_mm_xor_pd:
6397; AVX1:       # %bb.0:
6398; AVX1-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
6399; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6400;
6401; AVX512-LABEL: test_mm_xor_pd:
6402; AVX512:       # %bb.0:
6403; AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
6404; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6405  %arg0 = bitcast <2 x double> %a0 to <4 x i32>
6406  %arg1 = bitcast <2 x double> %a1 to <4 x i32>
6407  %res = xor <4 x i32> %arg0, %arg1
6408  %bc = bitcast <4 x i32> %res to <2 x double>
6409  ret <2 x double> %bc
6410}
6411
6412define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
6413; SSE-LABEL: test_mm_xor_si128:
6414; SSE:       # %bb.0:
6415; SSE-NEXT:    xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1]
6416; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6417;
6418; AVX1-LABEL: test_mm_xor_si128:
6419; AVX1:       # %bb.0:
6420; AVX1-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
6421; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6422;
6423; AVX512-LABEL: test_mm_xor_si128:
6424; AVX512:       # %bb.0:
6425; AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
6426; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6427  %res = xor <2 x i64> %a0, %a1
6428  ret <2 x i64> %res
6429}
6430
6431!0 = !{i32 1}
6432
6433