• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s
2
3; Verify that the backend correctly combines AVX2 builtin intrinsics.
4
5
6define <8 x i32> @test_psra_1(<8 x i32> %A) {
7  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 3)
8  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
9  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 2)
10  ret <8 x i32> %3
11}
12; CHECK-LABEL: test_psra_1
13; CHECK: vpsrad $8, %ymm0, %ymm0
14; CHECK-NEXT: ret
15
16define <16 x i16> @test_psra_2(<16 x i16> %A) {
17  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 3)
18  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
19  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 2)
20  ret <16 x i16> %3
21}
22; CHECK-LABEL: test_psra_2
23; CHECK: vpsraw $8, %ymm0, %ymm0
24; CHECK-NEXT: ret
25
26define <16 x i16> @test_psra_3(<16 x i16> %A) {
27  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
28  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
29  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
30  ret <16 x i16> %3
31}
32; CHECK-LABEL: test_psra_3
33; CHECK-NOT: vpsraw
34; CHECK: ret
35
36define <8 x i32> @test_psra_4(<8 x i32> %A) {
37  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
38  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
39  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
40  ret <8 x i32> %3
41}
42; CHECK-LABEL: test_psra_4
43; CHECK-NOT: vpsrad
44; CHECK: ret
45
46
47define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
48  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a0, <32 x i8> %a1)
49  ret <32 x i8> %res
50}
51; CHECK-LABEL: test_x86_avx2_pblendvb
52; CHECK-NOT: vpblendvb
53; CHECK: ret
54
55
56define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0) {
57  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a0, i32 7)
58  ret <16 x i16> %res
59}
60; CHECK-LABEL: test_x86_avx2_pblendw
61; CHECK-NOT: vpblendw
62; CHECK: ret
63
64
65define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0) {
66  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a0, i32 7)
67  ret <4 x i32> %res
68}
69; CHECK-LABEL: test_x86_avx2_pblendd_128
70; CHECK-NOT: vpblendd
71; CHECK: ret
72
73
74define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0) {
75  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a0, i32 7)
76  ret <8 x i32> %res
77}
78; CHECK-LABEL: test_x86_avx2_pblendd_256
79; CHECK-NOT: vpblendd
80; CHECK: ret
81
82
83define <32 x i8> @test2_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
84  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> zeroinitializer)
85  ret <32 x i8> %res
86}
87; CHECK-LABEL: test2_x86_avx2_pblendvb
88; CHECK-NOT: vpblendvb
89; CHECK: ret
90
91
92define <16 x i16> @test2_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
93  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 0)
94  ret <16 x i16> %res
95}
96; CHECK-LABEL: test2_x86_avx2_pblendw
97; CHECK-NOT: vpblendw
98; CHECK: ret
99
100
101define <4 x i32> @test2_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
102  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 0)
103  ret <4 x i32> %res
104}
105; CHECK-LABEL: test2_x86_avx2_pblendd_128
106; CHECK-NOT: vpblendd
107; CHECK: ret
108
109
110define <8 x i32> @test2_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
111  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 0)
112  ret <8 x i32> %res
113}
114; CHECK-LABEL: test2_x86_avx2_pblendd_256
115; CHECK-NOT: vpblendd
116; CHECK: ret
117
118
119define <32 x i8> @test3_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
120  %1 = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <32 x i8>
121  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %1)
122  ret <32 x i8> %res
123}
124; CHECK-LABEL: test3_x86_avx2_pblendvb
125; CHECK-NOT: vpblendvb
126; CHECK: ret
127
128
129define <16 x i16> @test3_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
130  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 -1)
131  ret <16 x i16> %res
132}
133; CHECK-LABEL: test3_x86_avx2_pblendw
134; CHECK-NOT: vpblendw
135; CHECK: ret
136
137
138define <4 x i32> @test3_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
139  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 -1)
140  ret <4 x i32> %res
141}
142; CHECK-LABEL: test3_x86_avx2_pblendd_128
143; CHECK-NOT: vpblendd
144; CHECK: ret
145
146
147define <8 x i32> @test3_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
148  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 -1)
149  ret <8 x i32> %res
150}
151; CHECK-LABEL: test3_x86_avx2_pblendd_256
152; CHECK-NOT: vpblendd
153; CHECK: ret
154
155
156declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
157declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32)
158declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32)
159declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32)
160declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>)
161declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32)
162declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>)
163declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32)
164
165