• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VL
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VLDQ
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VL
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VLDQ
8
9; FIXME: Drop the regex pattern matching of 'nan' once we drop support for MSVC
10; 2013.
11
12define <2 x double> @fabs_v2f64(<2 x double> %p) {
13; X32-LABEL: fabs_v2f64:
14; X32:       # %bb.0:
15; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
16; X32-NEXT:    retl
17;
18; X64-LABEL: fabs_v2f64:
19; X64:       # %bb.0:
20; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
21; X64-NEXT:    retq
22  %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
23  ret <2 x double> %t
24}
25declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
26
27define <4 x float> @fabs_v4f32(<4 x float> %p) {
28; X32_AVX-LABEL: fabs_v4f32:
29; X32_AVX:       # %bb.0:
30; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
31; X32_AVX-NEXT:    retl
32;
33; X32_AVX512VL-LABEL: fabs_v4f32:
34; X32_AVX512VL:       # %bb.0:
35; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
36; X32_AVX512VL-NEXT:    retl
37;
38; X32_AVX512VLDQ-LABEL: fabs_v4f32:
39; X32_AVX512VLDQ:       # %bb.0:
40; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
41; X32_AVX512VLDQ-NEXT:    retl
42;
43; X64_AVX-LABEL: fabs_v4f32:
44; X64_AVX:       # %bb.0:
45; X64_AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
46; X64_AVX-NEXT:    retq
47;
48; X64_AVX512VL-LABEL: fabs_v4f32:
49; X64_AVX512VL:       # %bb.0:
50; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
51; X64_AVX512VL-NEXT:    retq
52;
53; X64_AVX512VLDQ-LABEL: fabs_v4f32:
54; X64_AVX512VLDQ:       # %bb.0:
55; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
56; X64_AVX512VLDQ-NEXT:    retq
57  %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
58  ret <4 x float> %t
59}
60declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
61
62define <4 x double> @fabs_v4f64(<4 x double> %p) {
63; X32_AVX-LABEL: fabs_v4f64:
64; X32_AVX:       # %bb.0:
65; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %ymm0, %ymm0
66; X32_AVX-NEXT:    retl
67;
68; X32_AVX512VL-LABEL: fabs_v4f64:
69; X32_AVX512VL:       # %bb.0:
70; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
71; X32_AVX512VL-NEXT:    retl
72;
73; X32_AVX512VLDQ-LABEL: fabs_v4f64:
74; X32_AVX512VLDQ:       # %bb.0:
75; X32_AVX512VLDQ-NEXT:    vandpd {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
76; X32_AVX512VLDQ-NEXT:    retl
77;
78; X64_AVX-LABEL: fabs_v4f64:
79; X64_AVX:       # %bb.0:
80; X64_AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
81; X64_AVX-NEXT:    retq
82;
83; X64_AVX512VL-LABEL: fabs_v4f64:
84; X64_AVX512VL:       # %bb.0:
85; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
86; X64_AVX512VL-NEXT:    retq
87;
88; X64_AVX512VLDQ-LABEL: fabs_v4f64:
89; X64_AVX512VLDQ:       # %bb.0:
90; X64_AVX512VLDQ-NEXT:    vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
91; X64_AVX512VLDQ-NEXT:    retq
92  %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
93  ret <4 x double> %t
94}
95declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
96
97define <8 x float> @fabs_v8f32(<8 x float> %p) {
98; X32_AVX-LABEL: fabs_v8f32:
99; X32_AVX:       # %bb.0:
100; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %ymm0, %ymm0
101; X32_AVX-NEXT:    retl
102;
103; X32_AVX512VL-LABEL: fabs_v8f32:
104; X32_AVX512VL:       # %bb.0:
105; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
106; X32_AVX512VL-NEXT:    retl
107;
108; X32_AVX512VLDQ-LABEL: fabs_v8f32:
109; X32_AVX512VLDQ:       # %bb.0:
110; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
111; X32_AVX512VLDQ-NEXT:    retl
112;
113; X64_AVX-LABEL: fabs_v8f32:
114; X64_AVX:       # %bb.0:
115; X64_AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
116; X64_AVX-NEXT:    retq
117;
118; X64_AVX512VL-LABEL: fabs_v8f32:
119; X64_AVX512VL:       # %bb.0:
120; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
121; X64_AVX512VL-NEXT:    retq
122;
123; X64_AVX512VLDQ-LABEL: fabs_v8f32:
124; X64_AVX512VLDQ:       # %bb.0:
125; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0
126; X64_AVX512VLDQ-NEXT:    retq
127  %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
128  ret <8 x float> %t
129}
130declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
131
132define <8 x double> @fabs_v8f64(<8 x double> %p) {
133; X32_AVX-LABEL: fabs_v8f64:
134; X32_AVX:       # %bb.0:
135; X32_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
136; X32_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
137; X32_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
138; X32_AVX-NEXT:    retl
139;
140; X32_AVX512VL-LABEL: fabs_v8f64:
141; X32_AVX512VL:       # %bb.0:
142; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
143; X32_AVX512VL-NEXT:    retl
144;
145; X32_AVX512VLDQ-LABEL: fabs_v8f64:
146; X32_AVX512VLDQ:       # %bb.0:
147; X32_AVX512VLDQ-NEXT:    vandpd {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
148; X32_AVX512VLDQ-NEXT:    retl
149;
150; X64_AVX-LABEL: fabs_v8f64:
151; X64_AVX:       # %bb.0:
152; X64_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
153; X64_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
154; X64_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
155; X64_AVX-NEXT:    retq
156;
157; X64_AVX512VL-LABEL: fabs_v8f64:
158; X64_AVX512VL:       # %bb.0:
159; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
160; X64_AVX512VL-NEXT:    retq
161;
162; X64_AVX512VLDQ-LABEL: fabs_v8f64:
163; X64_AVX512VLDQ:       # %bb.0:
164; X64_AVX512VLDQ-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
165; X64_AVX512VLDQ-NEXT:    retq
166  %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
167  ret <8 x double> %t
168}
169declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
170
171define <16 x float> @fabs_v16f32(<16 x float> %p) {
172; X32_AVX-LABEL: fabs_v16f32:
173; X32_AVX:       # %bb.0:
174; X32_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
175; X32_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
176; X32_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
177; X32_AVX-NEXT:    retl
178;
179; X32_AVX512VL-LABEL: fabs_v16f32:
180; X32_AVX512VL:       # %bb.0:
181; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
182; X32_AVX512VL-NEXT:    retl
183;
184; X32_AVX512VLDQ-LABEL: fabs_v16f32:
185; X32_AVX512VLDQ:       # %bb.0:
186; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
187; X32_AVX512VLDQ-NEXT:    retl
188;
189; X64_AVX-LABEL: fabs_v16f32:
190; X64_AVX:       # %bb.0:
191; X64_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
192; X64_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
193; X64_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
194; X64_AVX-NEXT:    retq
195;
196; X64_AVX512VL-LABEL: fabs_v16f32:
197; X64_AVX512VL:       # %bb.0:
198; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
199; X64_AVX512VL-NEXT:    retq
200;
201; X64_AVX512VLDQ-LABEL: fabs_v16f32:
202; X64_AVX512VLDQ:       # %bb.0:
203; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
204; X64_AVX512VLDQ-NEXT:    retq
205  %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
206  ret <16 x float> %t
207}
208declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
209
210; PR20354: when generating code for a vector fabs op,
211; make sure that we're only turning off the sign bit of each float value.
212; No constant pool loads or vector ops are needed for the fabs of a
213; bitcasted integer constant; we should just return an integer constant
214; that has the sign bits turned off.
215;
216; So instead of something like this:
217;    movabsq (constant pool load of mask for sign bits)
218;    vmovq   (move from integer register to vector/fp register)
219;    vandps  (mask off sign bits)
220;    vmovq   (move vector/fp register back to integer return register)
221;
222; We should generate:
223;    mov     (put constant value in return register)
224
225define i64 @fabs_v2f32_1() {
226; X32-LABEL: fabs_v2f32_1:
227; X32:       # %bb.0:
228; X32-NEXT:    xorl %eax, %eax
229; X32-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
230; X32-NEXT:    retl
231;
232; X64-LABEL: fabs_v2f32_1:
233; X64:       # %bb.0:
234; X64-NEXT:    movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
235; X64-NEXT:    retq
236 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
237 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
238 %ret = bitcast <2 x float> %fabs to i64
239 ret i64 %ret
240}
241
242define i64 @fabs_v2f32_2() {
243; X32-LABEL: fabs_v2f32_2:
244; X32:       # %bb.0:
245; X32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
246; X32-NEXT:    xorl %edx, %edx
247; X32-NEXT:    retl
248;
249; X64-LABEL: fabs_v2f32_2:
250; X64:       # %bb.0:
251; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
252; X64-NEXT:    retq
253 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
254 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
255 %ret = bitcast <2 x float> %fabs to i64
256 ret i64 %ret
257}
258
259declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)
260