• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
4
5define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
6; X32-LABEL: test_vpaddq:
7; X32:       # %bb.0:
8; X32-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
9; X32-NEXT:    retl
10;
11; X64-LABEL: test_vpaddq:
12; X64:       # %bb.0:
13; X64-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
14; X64-NEXT:    retq
15  %x = add <4 x i64> %i, %j
16  ret <4 x i64> %x
17}
18
19define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
20; X32-LABEL: test_vpaddd:
21; X32:       # %bb.0:
22; X32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
23; X32-NEXT:    retl
24;
25; X64-LABEL: test_vpaddd:
26; X64:       # %bb.0:
27; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
28; X64-NEXT:    retq
29  %x = add <8 x i32> %i, %j
30  ret <8 x i32> %x
31}
32
33define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
34; X32-LABEL: test_vpaddw:
35; X32:       # %bb.0:
36; X32-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
37; X32-NEXT:    retl
38;
39; X64-LABEL: test_vpaddw:
40; X64:       # %bb.0:
41; X64-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
42; X64-NEXT:    retq
43  %x = add <16 x i16> %i, %j
44  ret <16 x i16> %x
45}
46
47define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
48; X32-LABEL: test_vpaddb:
49; X32:       # %bb.0:
50; X32-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
51; X32-NEXT:    retl
52;
53; X64-LABEL: test_vpaddb:
54; X64:       # %bb.0:
55; X64-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
56; X64-NEXT:    retq
57  %x = add <32 x i8> %i, %j
58  ret <32 x i8> %x
59}
60
61define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
62; X32-LABEL: test_vpsubq:
63; X32:       # %bb.0:
64; X32-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
65; X32-NEXT:    retl
66;
67; X64-LABEL: test_vpsubq:
68; X64:       # %bb.0:
69; X64-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
70; X64-NEXT:    retq
71  %x = sub <4 x i64> %i, %j
72  ret <4 x i64> %x
73}
74
75define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
76; X32-LABEL: test_vpsubd:
77; X32:       # %bb.0:
78; X32-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
79; X32-NEXT:    retl
80;
81; X64-LABEL: test_vpsubd:
82; X64:       # %bb.0:
83; X64-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
84; X64-NEXT:    retq
85  %x = sub <8 x i32> %i, %j
86  ret <8 x i32> %x
87}
88
89define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
90; X32-LABEL: test_vpsubw:
91; X32:       # %bb.0:
92; X32-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
93; X32-NEXT:    retl
94;
95; X64-LABEL: test_vpsubw:
96; X64:       # %bb.0:
97; X64-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
98; X64-NEXT:    retq
99  %x = sub <16 x i16> %i, %j
100  ret <16 x i16> %x
101}
102
103define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
104; X32-LABEL: test_vpsubb:
105; X32:       # %bb.0:
106; X32-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
107; X32-NEXT:    retl
108;
109; X64-LABEL: test_vpsubb:
110; X64:       # %bb.0:
111; X64-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
112; X64-NEXT:    retq
113  %x = sub <32 x i8> %i, %j
114  ret <32 x i8> %x
115}
116
117define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
118; X32-LABEL: test_vpmulld:
119; X32:       # %bb.0:
120; X32-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
121; X32-NEXT:    retl
122;
123; X64-LABEL: test_vpmulld:
124; X64:       # %bb.0:
125; X64-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
126; X64-NEXT:    retq
127  %x = mul <8 x i32> %i, %j
128  ret <8 x i32> %x
129}
130
131define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
132; X32-LABEL: test_vpmullw:
133; X32:       # %bb.0:
134; X32-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
135; X32-NEXT:    retl
136;
137; X64-LABEL: test_vpmullw:
138; X64:       # %bb.0:
139; X64-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
140; X64-NEXT:    retq
141  %x = mul <16 x i16> %i, %j
142  ret <16 x i16> %x
143}
144
145define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
146; X32-LABEL: mul_v16i8:
147; X32:       # %bb.0:
148; X32-NEXT:    vpmovsxbw %xmm1, %ymm1
149; X32-NEXT:    vpmovsxbw %xmm0, %ymm0
150; X32-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
151; X32-NEXT:    vextracti128 $1, %ymm0, %xmm1
152; X32-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
153; X32-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
154; X32-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
155; X32-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
156; X32-NEXT:    vzeroupper
157; X32-NEXT:    retl
158;
159; X64-LABEL: mul_v16i8:
160; X64:       # %bb.0:
161; X64-NEXT:    vpmovsxbw %xmm1, %ymm1
162; X64-NEXT:    vpmovsxbw %xmm0, %ymm0
163; X64-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
164; X64-NEXT:    vextracti128 $1, %ymm0, %xmm1
165; X64-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
166; X64-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
167; X64-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
168; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
169; X64-NEXT:    vzeroupper
170; X64-NEXT:    retq
171  %x = mul <16 x i8> %i, %j
172  ret <16 x i8> %x
173}
174
175define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
176; X32-LABEL: mul_v32i8:
177; X32:       # %bb.0:
178; X32-NEXT:    vextracti128 $1, %ymm1, %xmm2
179; X32-NEXT:    vpmovsxbw %xmm2, %ymm2
180; X32-NEXT:    vextracti128 $1, %ymm0, %xmm3
181; X32-NEXT:    vpmovsxbw %xmm3, %ymm3
182; X32-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
183; X32-NEXT:    vextracti128 $1, %ymm2, %xmm3
184; X32-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
185; X32-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
186; X32-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
187; X32-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
188; X32-NEXT:    vpmovsxbw %xmm1, %ymm1
189; X32-NEXT:    vpmovsxbw %xmm0, %ymm0
190; X32-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
191; X32-NEXT:    vextracti128 $1, %ymm0, %xmm1
192; X32-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
193; X32-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
194; X32-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
195; X32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
196; X32-NEXT:    retl
197;
198; X64-LABEL: mul_v32i8:
199; X64:       # %bb.0:
200; X64-NEXT:    vextracti128 $1, %ymm1, %xmm2
201; X64-NEXT:    vpmovsxbw %xmm2, %ymm2
202; X64-NEXT:    vextracti128 $1, %ymm0, %xmm3
203; X64-NEXT:    vpmovsxbw %xmm3, %ymm3
204; X64-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
205; X64-NEXT:    vextracti128 $1, %ymm2, %xmm3
206; X64-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
207; X64-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
208; X64-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
209; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
210; X64-NEXT:    vpmovsxbw %xmm1, %ymm1
211; X64-NEXT:    vpmovsxbw %xmm0, %ymm0
212; X64-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
213; X64-NEXT:    vextracti128 $1, %ymm0, %xmm1
214; X64-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
215; X64-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
216; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
217; X64-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
218; X64-NEXT:    retq
219  %x = mul <32 x i8> %i, %j
220  ret <32 x i8> %x
221}
222
223define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
224; X32-LABEL: mul_v4i64:
225; X32:       # %bb.0:
226; X32-NEXT:    vpsrlq $32, %ymm0, %ymm2
227; X32-NEXT:    vpmuludq %ymm1, %ymm2, %ymm2
228; X32-NEXT:    vpsrlq $32, %ymm1, %ymm3
229; X32-NEXT:    vpmuludq %ymm3, %ymm0, %ymm3
230; X32-NEXT:    vpaddq %ymm2, %ymm3, %ymm2
231; X32-NEXT:    vpsllq $32, %ymm2, %ymm2
232; X32-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
233; X32-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
234; X32-NEXT:    retl
235;
236; X64-LABEL: mul_v4i64:
237; X64:       # %bb.0:
238; X64-NEXT:    vpsrlq $32, %ymm0, %ymm2
239; X64-NEXT:    vpmuludq %ymm1, %ymm2, %ymm2
240; X64-NEXT:    vpsrlq $32, %ymm1, %ymm3
241; X64-NEXT:    vpmuludq %ymm3, %ymm0, %ymm3
242; X64-NEXT:    vpaddq %ymm2, %ymm3, %ymm2
243; X64-NEXT:    vpsllq $32, %ymm2, %ymm2
244; X64-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
245; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
246; X64-NEXT:    retq
247  %x = mul <4 x i64> %i, %j
248  ret <4 x i64> %x
249}
250
251define <8 x i32> @mul_const1(<8 x i32> %x) {
252; X32-LABEL: mul_const1:
253; X32:       # %bb.0:
254; X32-NEXT:    vpaddd %ymm0, %ymm0, %ymm0
255; X32-NEXT:    retl
256;
257; X64-LABEL: mul_const1:
258; X64:       # %bb.0:
259; X64-NEXT:    vpaddd %ymm0, %ymm0, %ymm0
260; X64-NEXT:    retq
261  %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
262  ret <8 x i32> %y
263}
264
265define <4 x i64> @mul_const2(<4 x i64> %x) {
266; X32-LABEL: mul_const2:
267; X32:       # %bb.0:
268; X32-NEXT:    vpsllq $2, %ymm0, %ymm0
269; X32-NEXT:    retl
270;
271; X64-LABEL: mul_const2:
272; X64:       # %bb.0:
273; X64-NEXT:    vpsllq $2, %ymm0, %ymm0
274; X64-NEXT:    retq
275  %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4>
276  ret <4 x i64> %y
277}
278
279define <16 x i16> @mul_const3(<16 x i16> %x) {
280; X32-LABEL: mul_const3:
281; X32:       # %bb.0:
282; X32-NEXT:    vpsllw $3, %ymm0, %ymm0
283; X32-NEXT:    retl
284;
285; X64-LABEL: mul_const3:
286; X64:       # %bb.0:
287; X64-NEXT:    vpsllw $3, %ymm0, %ymm0
288; X64-NEXT:    retq
289  %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
290  ret <16 x i16> %y
291}
292
293define <4 x i64> @mul_const4(<4 x i64> %x) {
294; X32-LABEL: mul_const4:
295; X32:       # %bb.0:
296; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
297; X32-NEXT:    vpsubq %ymm0, %ymm1, %ymm0
298; X32-NEXT:    retl
299;
300; X64-LABEL: mul_const4:
301; X64:       # %bb.0:
302; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
303; X64-NEXT:    vpsubq %ymm0, %ymm1, %ymm0
304; X64-NEXT:    retq
305  %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
306  ret <4 x i64> %y
307}
308
309define <8 x i32> @mul_const5(<8 x i32> %x) {
310; X32-LABEL: mul_const5:
311; X32:       # %bb.0:
312; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
313; X32-NEXT:    retl
314;
315; X64-LABEL: mul_const5:
316; X64:       # %bb.0:
317; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
318; X64-NEXT:    retq
319  %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
320  ret <8 x i32> %y
321}
322
323define <8 x i32> @mul_const6(<8 x i32> %x) {
324; X32-LABEL: mul_const6:
325; X32:       # %bb.0:
326; X32-NEXT:    vpmulld {{\.LCPI.*}}, %ymm0, %ymm0
327; X32-NEXT:    retl
328;
329; X64-LABEL: mul_const6:
330; X64:       # %bb.0:
331; X64-NEXT:    vpmulld {{.*}}(%rip), %ymm0, %ymm0
332; X64-NEXT:    retq
333  %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
334  ret <8 x i32> %y
335}
336
337define <8 x i64> @mul_const7(<8 x i64> %x) {
338; X32-LABEL: mul_const7:
339; X32:       # %bb.0:
340; X32-NEXT:    vpaddq %ymm0, %ymm0, %ymm0
341; X32-NEXT:    vpaddq %ymm1, %ymm1, %ymm1
342; X32-NEXT:    retl
343;
344; X64-LABEL: mul_const7:
345; X64:       # %bb.0:
346; X64-NEXT:    vpaddq %ymm0, %ymm0, %ymm0
347; X64-NEXT:    vpaddq %ymm1, %ymm1, %ymm1
348; X64-NEXT:    retq
349  %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
350  ret <8 x i64> %y
351}
352
353define <8 x i16> @mul_const8(<8 x i16> %x) {
354; X32-LABEL: mul_const8:
355; X32:       # %bb.0:
356; X32-NEXT:    vpsllw $3, %xmm0, %xmm0
357; X32-NEXT:    retl
358;
359; X64-LABEL: mul_const8:
360; X64:       # %bb.0:
361; X64-NEXT:    vpsllw $3, %xmm0, %xmm0
362; X64-NEXT:    retq
363  %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
364  ret <8 x i16> %y
365}
366
367define <8 x i32> @mul_const9(<8 x i32> %x) {
368; X32-LABEL: mul_const9:
369; X32:       # %bb.0:
370; X32-NEXT:    movl $2, %eax
371; X32-NEXT:    vmovd %eax, %xmm1
372; X32-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
373; X32-NEXT:    retl
374;
375; X64-LABEL: mul_const9:
376; X64:       # %bb.0:
377; X64-NEXT:    movl $2, %eax
378; X64-NEXT:    vmovd %eax, %xmm1
379; X64-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
380; X64-NEXT:    retq
381  %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
382  ret <8 x i32> %y
383}
384
385; %x * 0x01010101
386define <4 x i32> @mul_const10(<4 x i32> %x) {
387; X32-LABEL: mul_const10:
388; X32:       # %bb.0:
389; X32-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16843009,16843009,16843009,16843009]
390; X32-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
391; X32-NEXT:    retl
392;
393; X64-LABEL: mul_const10:
394; X64:       # %bb.0:
395; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16843009,16843009,16843009,16843009]
396; X64-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
397; X64-NEXT:    retq
398  %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
399  ret <4 x i32> %m
400}
401
402; %x * 0x80808080
403define <4 x i32> @mul_const11(<4 x i32> %x) {
404; X32-LABEL: mul_const11:
405; X32:       # %bb.0:
406; X32-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2155905152,2155905152,2155905152,2155905152]
407; X32-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
408; X32-NEXT:    retl
409;
410; X64-LABEL: mul_const11:
411; X64:       # %bb.0:
412; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2155905152,2155905152,2155905152,2155905152]
413; X64-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
414; X64-NEXT:    retq
415  %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152>
416  ret <4 x i32> %m
417}
418