• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
3
4;
5; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),
6; so we need to edit it to remove the NAN constant comments
7;
8
9; copysign(x, c1) -> fabs(x) iff ispos(c1)
10define <4 x float> @combine_vec_fcopysign_pos_constant0(<4 x float> %x) {
11; SSE-LABEL: combine_vec_fcopysign_pos_constant0:
12; SSE:       # %bb.0:
13; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
14; SSE-NEXT:    retq
15;
16; AVX-LABEL: combine_vec_fcopysign_pos_constant0:
17; AVX:       # %bb.0:
18; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
19; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
20; AVX-NEXT:    retq
21  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>)
22  ret <4 x float> %1
23}
24
25define <4 x float> @combine_vec_fcopysign_pos_constant1(<4 x float> %x) {
26; SSE-LABEL: combine_vec_fcopysign_pos_constant1:
27; SSE:       # %bb.0:
28; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
29; SSE-NEXT:    retq
30;
31; AVX-LABEL: combine_vec_fcopysign_pos_constant1:
32; AVX:       # %bb.0:
33; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
34; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
35; AVX-NEXT:    retq
36  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 2.0, float 4.0, float 8.0>)
37  ret <4 x float> %1
38}
39
40define <4 x float> @combine_vec_fcopysign_fabs_sgn(<4 x float> %x, <4 x float> %y) {
41; SSE-LABEL: combine_vec_fcopysign_fabs_sgn:
42; SSE:       # %bb.0:
43; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
44; SSE-NEXT:    retq
45;
46; AVX-LABEL: combine_vec_fcopysign_fabs_sgn:
47; AVX:       # %bb.0:
48; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
49; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
50; AVX-NEXT:    retq
51  %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
52  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
53  ret <4 x float> %2
54}
55
56; copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
57define <4 x float> @combine_vec_fcopysign_neg_constant0(<4 x float> %x) {
58; SSE-LABEL: combine_vec_fcopysign_neg_constant0:
59; SSE:       # %bb.0:
60; SSE-NEXT:    orps {{.*}}(%rip), %xmm0
61; SSE-NEXT:    retq
62;
63; AVX-LABEL: combine_vec_fcopysign_neg_constant0:
64; AVX:       # %bb.0:
65; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
66; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
67; AVX-NEXT:    retq
68  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>)
69  ret <4 x float> %1
70}
71
72define <4 x float> @combine_vec_fcopysign_neg_constant1(<4 x float> %x) {
73; SSE-LABEL: combine_vec_fcopysign_neg_constant1:
74; SSE:       # %bb.0:
75; SSE-NEXT:    orps {{.*}}(%rip), %xmm0
76; SSE-NEXT:    retq
77;
78; AVX-LABEL: combine_vec_fcopysign_neg_constant1:
79; AVX:       # %bb.0:
80; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
81; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
82; AVX-NEXT:    retq
83  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>)
84  ret <4 x float> %1
85}
86
87define <4 x float> @combine_vec_fcopysign_fneg_fabs_sgn(<4 x float> %x, <4 x float> %y) {
88; SSE-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
89; SSE:       # %bb.0:
90; SSE-NEXT:    orps {{.*}}(%rip), %xmm0
91; SSE-NEXT:    retq
92;
93; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
94; AVX:       # %bb.0:
95; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
96; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
97; AVX-NEXT:    retq
98  %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
99  %2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %1
100  %3 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %2)
101  ret <4 x float> %3
102}
103
104; copysign(fabs(x), y) -> copysign(x, y)
105define <4 x float> @combine_vec_fcopysign_fabs_mag(<4 x float> %x, <4 x float> %y) {
106; SSE-LABEL: combine_vec_fcopysign_fabs_mag:
107; SSE:       # %bb.0:
108; SSE-NEXT:    andps {{.*}}(%rip), %xmm1
109; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
110; SSE-NEXT:    orps %xmm1, %xmm0
111; SSE-NEXT:    retq
112;
113; AVX-LABEL: combine_vec_fcopysign_fabs_mag:
114; AVX:       # %bb.0:
115; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
116; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
117; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
118; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
119; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
120; AVX-NEXT:    retq
121  %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
122  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
123  ret <4 x float> %2
124}
125
126; copysign(fneg(x), y) -> copysign(x, y)
127define <4 x float> @combine_vec_fcopysign_fneg_mag(<4 x float> %x, <4 x float> %y) {
128; SSE-LABEL: combine_vec_fcopysign_fneg_mag:
129; SSE:       # %bb.0:
130; SSE-NEXT:    andps {{.*}}(%rip), %xmm1
131; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
132; SSE-NEXT:    orps %xmm1, %xmm0
133; SSE-NEXT:    retq
134;
135; AVX-LABEL: combine_vec_fcopysign_fneg_mag:
136; AVX:       # %bb.0:
137; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
138; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
139; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
140; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
141; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
142; AVX-NEXT:    retq
143  %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x
144  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
145  ret <4 x float> %2
146}
147
148; copysign(copysign(x,z), y) -> copysign(x, y)
149define <4 x float> @combine_vec_fcopysign_fcopysign_mag(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
150; SSE-LABEL: combine_vec_fcopysign_fcopysign_mag:
151; SSE:       # %bb.0:
152; SSE-NEXT:    andps {{.*}}(%rip), %xmm1
153; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
154; SSE-NEXT:    orps %xmm1, %xmm0
155; SSE-NEXT:    retq
156;
157; AVX-LABEL: combine_vec_fcopysign_fcopysign_mag:
158; AVX:       # %bb.0:
159; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
160; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
161; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
162; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
163; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
164; AVX-NEXT:    retq
165  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %z)
166  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
167  ret <4 x float> %2
168}
169
170; copysign(x, copysign(y,z)) -> copysign(x, z)
171define <4 x float> @combine_vec_fcopysign_fcopysign_sgn(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
172; SSE-LABEL: combine_vec_fcopysign_fcopysign_sgn:
173; SSE:       # %bb.0:
174; SSE-NEXT:    andps {{.*}}(%rip), %xmm2
175; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
176; SSE-NEXT:    orps %xmm2, %xmm0
177; SSE-NEXT:    retq
178;
179; AVX-LABEL: combine_vec_fcopysign_fcopysign_sgn:
180; AVX:       # %bb.0:
181; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
182; AVX-NEXT:    vandps %xmm1, %xmm2, %xmm1
183; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
184; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
185; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
186; AVX-NEXT:    retq
187  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %y, <4 x float> %z)
188  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
189  ret <4 x float> %2
190}
191
192; copysign(x, fp_extend(y)) -> copysign(x, y)
193define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float> %y) {
194; SSE-LABEL: combine_vec_fcopysign_fpext_sgn:
195; SSE:       # %bb.0:
196; SSE-NEXT:    movaps %xmm2, %xmm3
197; SSE-NEXT:    cvtss2sd %xmm2, %xmm4
198; SSE-NEXT:    movshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
199; SSE-NEXT:    movaps %xmm2, %xmm6
200; SSE-NEXT:    movhlps {{.*#+}} xmm6 = xmm2[1],xmm6[1]
201; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1],xmm2[2,3]
202; SSE-NEXT:    movaps {{.*#+}} xmm7
203; SSE-NEXT:    movaps %xmm0, %xmm2
204; SSE-NEXT:    andps %xmm7, %xmm2
205; SSE-NEXT:    movaps {{.*#+}} xmm8 = [-0.000000e+00,-0.000000e+00]
206; SSE-NEXT:    andps %xmm8, %xmm4
207; SSE-NEXT:    orps %xmm4, %xmm2
208; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
209; SSE-NEXT:    andps %xmm7, %xmm0
210; SSE-NEXT:    xorps %xmm4, %xmm4
211; SSE-NEXT:    cvtss2sd %xmm5, %xmm4
212; SSE-NEXT:    andps %xmm8, %xmm4
213; SSE-NEXT:    orps %xmm0, %xmm4
214; SSE-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0]
215; SSE-NEXT:    movaps %xmm1, %xmm0
216; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
217; SSE-NEXT:    andps %xmm7, %xmm0
218; SSE-NEXT:    cvtss2sd %xmm3, %xmm3
219; SSE-NEXT:    andps %xmm8, %xmm3
220; SSE-NEXT:    orps %xmm0, %xmm3
221; SSE-NEXT:    andps %xmm7, %xmm1
222; SSE-NEXT:    xorps %xmm0, %xmm0
223; SSE-NEXT:    cvtss2sd %xmm6, %xmm0
224; SSE-NEXT:    andps %xmm8, %xmm0
225; SSE-NEXT:    orps %xmm0, %xmm1
226; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
227; SSE-NEXT:    movaps %xmm2, %xmm0
228; SSE-NEXT:    retq
229;
230; AVX-LABEL: combine_vec_fcopysign_fpext_sgn:
231; AVX:       # %bb.0:
232; AVX-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
233; AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
234; AVX-NEXT:    vcvtps2pd %xmm1, %ymm1
235; AVX-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
236; AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
237; AVX-NEXT:    vorps %ymm1, %ymm0, %ymm0
238; AVX-NEXT:    retq
239  %1 = fpext <4 x float> %y to <4 x double>
240  %2 = call <4 x double> @llvm.copysign.v4f64(<4 x double> %x, <4 x double> %1)
241  ret <4 x double> %2
242}
243
244; copysign(x, fp_round(y)) -> copysign(x, y)
245define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x double> %y) {
246; SSE-LABEL: combine_vec_fcopysign_fptrunc_sgn:
247; SSE:       # %bb.0:
248; SSE-NEXT:    movaps %xmm0, %xmm3
249; SSE-NEXT:    movaps {{.*#+}} xmm5
250; SSE-NEXT:    andps %xmm5, %xmm0
251; SSE-NEXT:    cvtsd2ss %xmm1, %xmm6
252; SSE-NEXT:    movaps {{.*#+}} xmm4 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
253; SSE-NEXT:    andps %xmm4, %xmm6
254; SSE-NEXT:    orps %xmm6, %xmm0
255; SSE-NEXT:    movshdup {{.*#+}} xmm6 = xmm3[1,1,3,3]
256; SSE-NEXT:    andps %xmm5, %xmm6
257; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
258; SSE-NEXT:    cvtsd2ss %xmm1, %xmm1
259; SSE-NEXT:    andps %xmm4, %xmm1
260; SSE-NEXT:    orps %xmm6, %xmm1
261; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
262; SSE-NEXT:    movaps %xmm3, %xmm1
263; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
264; SSE-NEXT:    andps %xmm5, %xmm1
265; SSE-NEXT:    xorps %xmm6, %xmm6
266; SSE-NEXT:    cvtsd2ss %xmm2, %xmm6
267; SSE-NEXT:    andps %xmm4, %xmm6
268; SSE-NEXT:    orps %xmm1, %xmm6
269; SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm6[0],xmm0[3]
270; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
271; SSE-NEXT:    andps %xmm5, %xmm3
272; SSE-NEXT:    movhlps {{.*#+}} xmm2 = xmm2[1,1]
273; SSE-NEXT:    xorps %xmm1, %xmm1
274; SSE-NEXT:    cvtsd2ss %xmm2, %xmm1
275; SSE-NEXT:    andps %xmm4, %xmm1
276; SSE-NEXT:    orps %xmm3, %xmm1
277; SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
278; SSE-NEXT:    retq
279;
280; AVX-LABEL: combine_vec_fcopysign_fptrunc_sgn:
281; AVX:       # %bb.0:
282; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
283; AVX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
284; AVX-NEXT:    vcvtpd2ps %ymm1, %xmm1
285; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
286; AVX-NEXT:    vandpd %xmm2, %xmm1, %xmm1
287; AVX-NEXT:    vorpd %xmm1, %xmm0, %xmm0
288; AVX-NEXT:    vzeroupper
289; AVX-NEXT:    retq
290  %1 = fptrunc <4 x double> %y to <4 x float>
291  %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
292  ret <4 x float> %2
293}
294
295declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
296declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)
297declare <4 x double> @llvm.copysign.v4f64(<4 x double> %Mag, <4 x double> %Sgn)
298