• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX512VL
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX512VLDQ
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VLDQ
8
9define <2 x double> @fabs_v2f64(<2 x double> %p) {
10; X86-LABEL: fabs_v2f64:
11; X86:       # %bb.0:
12; X86-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
13; X86-NEXT:    retl
14;
15; X64-LABEL: fabs_v2f64:
16; X64:       # %bb.0:
17; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
18; X64-NEXT:    retq
19  %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
20  ret <2 x double> %t
21}
22declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
23
24define <4 x float> @fabs_v4f32(<4 x float> %p) {
25; X86-AVX-LABEL: fabs_v4f32:
26; X86-AVX:       # %bb.0:
27; X86-AVX-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
28; X86-AVX-NEXT:    retl
29;
30; X86-AVX512VL-LABEL: fabs_v4f32:
31; X86-AVX512VL:       # %bb.0:
32; X86-AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
33; X86-AVX512VL-NEXT:    retl
34;
35; X86-AVX512VLDQ-LABEL: fabs_v4f32:
36; X86-AVX512VLDQ:       # %bb.0:
37; X86-AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
38; X86-AVX512VLDQ-NEXT:    retl
39;
40; X64-AVX-LABEL: fabs_v4f32:
41; X64-AVX:       # %bb.0:
42; X64-AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
43; X64-AVX-NEXT:    retq
44;
45; X64-AVX512VL-LABEL: fabs_v4f32:
46; X64-AVX512VL:       # %bb.0:
47; X64-AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
48; X64-AVX512VL-NEXT:    retq
49;
50; X64-AVX512VLDQ-LABEL: fabs_v4f32:
51; X64-AVX512VLDQ:       # %bb.0:
52; X64-AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
53; X64-AVX512VLDQ-NEXT:    retq
54  %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
55  ret <4 x float> %t
56}
57declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
58
59define <4 x double> @fabs_v4f64(<4 x double> %p) {
60; X86-AVX-LABEL: fabs_v4f64:
61; X86-AVX:       # %bb.0:
62; X86-AVX-NEXT:    vandps {{\.LCPI.*}}, %ymm0, %ymm0
63; X86-AVX-NEXT:    retl
64;
65; X86-AVX512VL-LABEL: fabs_v4f64:
66; X86-AVX512VL:       # %bb.0:
67; X86-AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
68; X86-AVX512VL-NEXT:    retl
69;
70; X86-AVX512VLDQ-LABEL: fabs_v4f64:
71; X86-AVX512VLDQ:       # %bb.0:
72; X86-AVX512VLDQ-NEXT:    vandpd {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
73; X86-AVX512VLDQ-NEXT:    retl
74;
75; X64-AVX-LABEL: fabs_v4f64:
76; X64-AVX:       # %bb.0:
77; X64-AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
78; X64-AVX-NEXT:    retq
79;
80; X64-AVX512VL-LABEL: fabs_v4f64:
81; X64-AVX512VL:       # %bb.0:
82; X64-AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
83; X64-AVX512VL-NEXT:    retq
84;
85; X64-AVX512VLDQ-LABEL: fabs_v4f64:
86; X64-AVX512VLDQ:       # %bb.0:
87; X64-AVX512VLDQ-NEXT:    vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
88; X64-AVX512VLDQ-NEXT:    retq
89  %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
90  ret <4 x double> %t
91}
92declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
93
94define <8 x float> @fabs_v8f32(<8 x float> %p) {
95; X86-AVX-LABEL: fabs_v8f32:
96; X86-AVX:       # %bb.0:
97; X86-AVX-NEXT:    vandps {{\.LCPI.*}}, %ymm0, %ymm0
98; X86-AVX-NEXT:    retl
99;
100; X86-AVX512VL-LABEL: fabs_v8f32:
101; X86-AVX512VL:       # %bb.0:
102; X86-AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
103; X86-AVX512VL-NEXT:    retl
104;
105; X86-AVX512VLDQ-LABEL: fabs_v8f32:
106; X86-AVX512VLDQ:       # %bb.0:
107; X86-AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
108; X86-AVX512VLDQ-NEXT:    retl
109;
110; X64-AVX-LABEL: fabs_v8f32:
111; X64-AVX:       # %bb.0:
112; X64-AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
113; X64-AVX-NEXT:    retq
114;
115; X64-AVX512VL-LABEL: fabs_v8f32:
116; X64-AVX512VL:       # %bb.0:
117; X64-AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
118; X64-AVX512VL-NEXT:    retq
119;
120; X64-AVX512VLDQ-LABEL: fabs_v8f32:
121; X64-AVX512VLDQ:       # %bb.0:
122; X64-AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0
123; X64-AVX512VLDQ-NEXT:    retq
124  %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
125  ret <8 x float> %t
126}
127declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
128
129define <8 x double> @fabs_v8f64(<8 x double> %p) {
130; X86-AVX-LABEL: fabs_v8f64:
131; X86-AVX:       # %bb.0:
132; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
133; X86-AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
134; X86-AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
135; X86-AVX-NEXT:    retl
136;
137; X86-AVX512VL-LABEL: fabs_v8f64:
138; X86-AVX512VL:       # %bb.0:
139; X86-AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
140; X86-AVX512VL-NEXT:    retl
141;
142; X86-AVX512VLDQ-LABEL: fabs_v8f64:
143; X86-AVX512VLDQ:       # %bb.0:
144; X86-AVX512VLDQ-NEXT:    vandpd {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
145; X86-AVX512VLDQ-NEXT:    retl
146;
147; X64-AVX-LABEL: fabs_v8f64:
148; X64-AVX:       # %bb.0:
149; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
150; X64-AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
151; X64-AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
152; X64-AVX-NEXT:    retq
153;
154; X64-AVX512VL-LABEL: fabs_v8f64:
155; X64-AVX512VL:       # %bb.0:
156; X64-AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
157; X64-AVX512VL-NEXT:    retq
158;
159; X64-AVX512VLDQ-LABEL: fabs_v8f64:
160; X64-AVX512VLDQ:       # %bb.0:
161; X64-AVX512VLDQ-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
162; X64-AVX512VLDQ-NEXT:    retq
163  %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
164  ret <8 x double> %t
165}
166declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
167
168define <16 x float> @fabs_v16f32(<16 x float> %p) {
169; X86-AVX-LABEL: fabs_v16f32:
170; X86-AVX:       # %bb.0:
171; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
172; X86-AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
173; X86-AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
174; X86-AVX-NEXT:    retl
175;
176; X86-AVX512VL-LABEL: fabs_v16f32:
177; X86-AVX512VL:       # %bb.0:
178; X86-AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
179; X86-AVX512VL-NEXT:    retl
180;
181; X86-AVX512VLDQ-LABEL: fabs_v16f32:
182; X86-AVX512VLDQ:       # %bb.0:
183; X86-AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
184; X86-AVX512VLDQ-NEXT:    retl
185;
186; X64-AVX-LABEL: fabs_v16f32:
187; X64-AVX:       # %bb.0:
188; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
189; X64-AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
190; X64-AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
191; X64-AVX-NEXT:    retq
192;
193; X64-AVX512VL-LABEL: fabs_v16f32:
194; X64-AVX512VL:       # %bb.0:
195; X64-AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
196; X64-AVX512VL-NEXT:    retq
197;
198; X64-AVX512VLDQ-LABEL: fabs_v16f32:
199; X64-AVX512VLDQ:       # %bb.0:
200; X64-AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
201; X64-AVX512VLDQ-NEXT:    retq
202  %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
203  ret <16 x float> %t
204}
205declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
206
207; PR20354: when generating code for a vector fabs op,
208; make sure that we're only turning off the sign bit of each float value.
209; No constant pool loads or vector ops are needed for the fabs of a
210; bitcasted integer constant; we should just return an integer constant
211; that has the sign bits turned off.
212;
213; So instead of something like this:
214;    movabsq (constant pool load of mask for sign bits)
215;    vmovq   (move from integer register to vector/fp register)
216;    vandps  (mask off sign bits)
217;    vmovq   (move vector/fp register back to integer return register)
218;
219; We should generate:
220;    mov     (put constant value in return register)
221
222define i64 @fabs_v2f32_1() {
223; X86-LABEL: fabs_v2f32_1:
224; X86:       # %bb.0:
225; X86-NEXT:    xorl %eax, %eax
226; X86-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
227; X86-NEXT:    retl
228;
229; X64-LABEL: fabs_v2f32_1:
230; X64:       # %bb.0:
231; X64-NEXT:    movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
232; X64-NEXT:    retq
233 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
234 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
235 %ret = bitcast <2 x float> %fabs to i64
236 ret i64 %ret
237}
238
239define i64 @fabs_v2f32_2() {
240; X86-LABEL: fabs_v2f32_2:
241; X86:       # %bb.0:
242; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
243; X86-NEXT:    xorl %edx, %edx
244; X86-NEXT:    retl
245;
246; X64-LABEL: fabs_v2f32_2:
247; X64:       # %bb.0:
248; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
249; X64-NEXT:    retq
250 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
251 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
252 %ret = bitcast <2 x float> %fabs to i64
253 ret i64 %ret
254}
255
256declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)
257