• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
5
6; Test ADDSUB ISel patterns.
7
8; Functions below are obtained from the following source:
9;
10; typedef double double2 __attribute__((ext_vector_type(2)));
11; typedef double double4 __attribute__((ext_vector_type(4)));
12; typedef float float4 __attribute__((ext_vector_type(4)));
13; typedef float float8 __attribute__((ext_vector_type(8)));
14;
15; float4 test1(float4 A, float4 B) {
16;   float4 X = A - B;
17;   float4 Y = A + B;
18;   return (float4){X[0], Y[1], X[2], Y[3]};
19; }
20;
21; float8 test2(float8 A, float8 B) {
22;   float8 X = A - B;
23;   float8 Y = A + B;
24;   return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]};
25; }
26;
27; double4 test3(double4 A, double4 B) {
28;   double4 X = A - B;
29;   double4 Y = A + B;
30;   return (double4){X[0], Y[1], X[2], Y[3]};
31; }
32;
33; double2 test4(double2 A, double2 B) {
34;   double2 X = A - B;
35;   double2 Y = A + B;
36;   return (double2){X[0], Y[1]};
37; }
38
39define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
40; SSE-LABEL: test1:
41; SSE:       # %bb.0:
42; SSE-NEXT:    addsubps %xmm1, %xmm0
43; SSE-NEXT:    retq
44;
45; AVX-LABEL: test1:
46; AVX:       # %bb.0:
47; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
48; AVX-NEXT:    retq
49  %sub = fsub <4 x float> %A, %B
50  %add = fadd <4 x float> %A, %B
51  %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
52  ret <4 x float> %vecinit6
53}
54
55define <8 x float> @test2(<8 x float> %A, <8 x float> %B) {
56; SSE-LABEL: test2:
57; SSE:       # %bb.0:
58; SSE-NEXT:    addsubps %xmm2, %xmm0
59; SSE-NEXT:    addsubps %xmm3, %xmm1
60; SSE-NEXT:    retq
61;
62; AVX-LABEL: test2:
63; AVX:       # %bb.0:
64; AVX-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
65; AVX-NEXT:    retq
66  %sub = fsub <8 x float> %A, %B
67  %add = fadd <8 x float> %A, %B
68  %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
69  ret <8 x float> %vecinit14
70}
71
72define <4 x double> @test3(<4 x double> %A, <4 x double> %B) {
73; SSE-LABEL: test3:
74; SSE:       # %bb.0:
75; SSE-NEXT:    addsubpd %xmm2, %xmm0
76; SSE-NEXT:    addsubpd %xmm3, %xmm1
77; SSE-NEXT:    retq
78;
79; AVX-LABEL: test3:
80; AVX:       # %bb.0:
81; AVX-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
82; AVX-NEXT:    retq
83  %sub = fsub <4 x double> %A, %B
84  %add = fadd <4 x double> %A, %B
85  %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
86  ret <4 x double> %vecinit6
87}
88
89define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
90; SSE-LABEL: test4:
91; SSE:       # %bb.0:
92; SSE-NEXT:    addsubpd %xmm1, %xmm0
93; SSE-NEXT:    retq
94;
95; AVX-LABEL: test4:
96; AVX:       # %bb.0:
97; AVX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
98; AVX-NEXT:    retq
99  %add = fadd <2 x double> %A, %B
100  %sub = fsub <2 x double> %A, %B
101  %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
102  ret <2 x double> %vecinit2
103}
104
105define <16 x float> @test5(<16 x float> %A, <16 x float> %B) {
106; SSE-LABEL: test5:
107; SSE:       # %bb.0:
108; SSE-NEXT:    addsubps %xmm4, %xmm0
109; SSE-NEXT:    addsubps %xmm5, %xmm1
110; SSE-NEXT:    addsubps %xmm6, %xmm2
111; SSE-NEXT:    addsubps %xmm7, %xmm3
112; SSE-NEXT:    retq
113;
114; AVX1-LABEL: test5:
115; AVX1:       # %bb.0:
116; AVX1-NEXT:    vaddsubps %ymm2, %ymm0, %ymm0
117; AVX1-NEXT:    vaddsubps %ymm3, %ymm1, %ymm1
118; AVX1-NEXT:    retq
119;
120; AVX512-LABEL: test5:
121; AVX512:       # %bb.0:
122; AVX512-NEXT:    vsubps %zmm1, %zmm0, %zmm2
123; AVX512-NEXT:    movw $-21846, %ax # imm = 0xAAAA
124; AVX512-NEXT:    kmovw %eax, %k1
125; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm2 {%k1}
126; AVX512-NEXT:    vmovaps %zmm2, %zmm0
127; AVX512-NEXT:    retq
128  %add = fadd <16 x float> %A, %B
129  %sub = fsub <16 x float> %A, %B
130  %vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
131  ret <16 x float> %vecinit2
132}
133
134define <8 x double> @test6(<8 x double> %A, <8 x double> %B) {
135; SSE-LABEL: test6:
136; SSE:       # %bb.0:
137; SSE-NEXT:    addsubpd %xmm4, %xmm0
138; SSE-NEXT:    addsubpd %xmm5, %xmm1
139; SSE-NEXT:    addsubpd %xmm6, %xmm2
140; SSE-NEXT:    addsubpd %xmm7, %xmm3
141; SSE-NEXT:    retq
142;
143; AVX1-LABEL: test6:
144; AVX1:       # %bb.0:
145; AVX1-NEXT:    vaddsubpd %ymm2, %ymm0, %ymm0
146; AVX1-NEXT:    vaddsubpd %ymm3, %ymm1, %ymm1
147; AVX1-NEXT:    retq
148;
149; AVX512-LABEL: test6:
150; AVX512:       # %bb.0:
151; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm2
152; AVX512-NEXT:    vsubpd %zmm1, %zmm0, %zmm0
153; AVX512-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[0],zmm2[1],zmm0[2],zmm2[3],zmm0[4],zmm2[5],zmm0[6],zmm2[7]
154; AVX512-NEXT:    retq
155  %add = fadd <8 x double> %A, %B
156  %sub = fsub <8 x double> %A, %B
157  %vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
158  ret <8 x double> %vecinit2
159}
160
161define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
162; SSE-LABEL: test1b:
163; SSE:       # %bb.0:
164; SSE-NEXT:    addsubps (%rdi), %xmm0
165; SSE-NEXT:    retq
166;
167; AVX-LABEL: test1b:
168; AVX:       # %bb.0:
169; AVX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0
170; AVX-NEXT:    retq
171  %1 = load <4 x float>, <4 x float>* %B
172  %add = fadd <4 x float> %A, %1
173  %sub = fsub <4 x float> %A, %1
174  %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
175  ret <4 x float> %vecinit6
176}
177
178define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
179; SSE-LABEL: test2b:
180; SSE:       # %bb.0:
181; SSE-NEXT:    addsubps (%rdi), %xmm0
182; SSE-NEXT:    addsubps 16(%rdi), %xmm1
183; SSE-NEXT:    retq
184;
185; AVX-LABEL: test2b:
186; AVX:       # %bb.0:
187; AVX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0
188; AVX-NEXT:    retq
189  %1 = load <8 x float>, <8 x float>* %B
190  %add = fadd <8 x float> %A, %1
191  %sub = fsub <8 x float> %A, %1
192  %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
193  ret <8 x float> %vecinit14
194}
195
196define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
197; SSE-LABEL: test3b:
198; SSE:       # %bb.0:
199; SSE-NEXT:    addsubpd (%rdi), %xmm0
200; SSE-NEXT:    addsubpd 16(%rdi), %xmm1
201; SSE-NEXT:    retq
202;
203; AVX-LABEL: test3b:
204; AVX:       # %bb.0:
205; AVX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0
206; AVX-NEXT:    retq
207  %1 = load <4 x double>, <4 x double>* %B
208  %add = fadd <4 x double> %A, %1
209  %sub = fsub <4 x double> %A, %1
210  %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
211  ret <4 x double> %vecinit6
212}
213
214define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
215; SSE-LABEL: test4b:
216; SSE:       # %bb.0:
217; SSE-NEXT:    addsubpd (%rdi), %xmm0
218; SSE-NEXT:    retq
219;
220; AVX-LABEL: test4b:
221; AVX:       # %bb.0:
222; AVX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0
223; AVX-NEXT:    retq
224  %1 = load <2 x double>, <2 x double>* %B
225  %sub = fsub <2 x double> %A, %1
226  %add = fadd <2 x double> %A, %1
227  %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
228  ret <2 x double> %vecinit2
229}
230
231define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) {
232; SSE-LABEL: test1c:
233; SSE:       # %bb.0:
234; SSE-NEXT:    addsubps (%rdi), %xmm0
235; SSE-NEXT:    retq
236;
237; AVX-LABEL: test1c:
238; AVX:       # %bb.0:
239; AVX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0
240; AVX-NEXT:    retq
241  %1 = load <4 x float>, <4 x float>* %B
242  %add = fadd <4 x float> %A, %1
243  %sub = fsub <4 x float> %A, %1
244  %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
245  ret <4 x float> %vecinit6
246}
247
248define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) {
249; SSE-LABEL: test2c:
250; SSE:       # %bb.0:
251; SSE-NEXT:    addsubps (%rdi), %xmm0
252; SSE-NEXT:    addsubps 16(%rdi), %xmm1
253; SSE-NEXT:    retq
254;
255; AVX-LABEL: test2c:
256; AVX:       # %bb.0:
257; AVX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0
258; AVX-NEXT:    retq
259  %1 = load <8 x float>, <8 x float>* %B
260  %add = fadd <8 x float> %A, %1
261  %sub = fsub <8 x float> %A, %1
262  %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
263  ret <8 x float> %vecinit14
264}
265
266define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) {
267; SSE-LABEL: test3c:
268; SSE:       # %bb.0:
269; SSE-NEXT:    addsubpd (%rdi), %xmm0
270; SSE-NEXT:    addsubpd 16(%rdi), %xmm1
271; SSE-NEXT:    retq
272;
273; AVX-LABEL: test3c:
274; AVX:       # %bb.0:
275; AVX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0
276; AVX-NEXT:    retq
277  %1 = load <4 x double>, <4 x double>* %B
278  %add = fadd <4 x double> %A, %1
279  %sub = fsub <4 x double> %A, %1
280  %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
281  ret <4 x double> %vecinit6
282}
283
284define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) {
285; SSE-LABEL: test4c:
286; SSE:       # %bb.0:
287; SSE-NEXT:    addsubpd (%rdi), %xmm0
288; SSE-NEXT:    retq
289;
290; AVX-LABEL: test4c:
291; AVX:       # %bb.0:
292; AVX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0
293; AVX-NEXT:    retq
294  %1 = load <2 x double>, <2 x double>* %B
295  %sub = fsub <2 x double> %A, %1
296  %add = fadd <2 x double> %A, %1
297  %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1>
298  ret <2 x double> %vecinit2
299}
300