• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,GENERIC
3; RUN: llc < %s -O0 -fast-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,FAST
4; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \
5; RUN:          -mtriple=arm64-eabi -aarch64-neon-syntax=apple \
6; RUN:          | FileCheck %s --check-prefixes=GISEL,FALLBACK
7
8; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f64_f32)
9; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f64_f32)
10define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp {
11; CHECK-LABEL: test_vcvt_f64_f32:
12; CHECK:       // %bb.0:
13; CHECK-NEXT:    fcvtl v0.2d, v0.2s
14; CHECK-NEXT:    ret
15;
16; GISEL-LABEL: test_vcvt_f64_f32:
17; GISEL:       // %bb.0:
18; GISEL-NEXT:    fcvtl v0.2d, v0.2s
19; GISEL-NEXT:    ret
20  %vcvt1.i = fpext <2 x float> %x to <2 x double>
21  ret <2 x double> %vcvt1.i
22}
23
24; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f64_f32)
25; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f64_f32)
26define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ssp {
27; CHECK-LABEL: test_vcvt_high_f64_f32:
28; CHECK:       // %bb.0:
29; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
30; CHECK-NEXT:    ret
31;
32; GISEL-LABEL: test_vcvt_high_f64_f32:
33; GISEL:       // %bb.0:
34; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
35; GISEL-NEXT:    ret
36  %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3>
37  %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double>
38  ret <2 x double> %vcvt1.i
39}
40
41define <2 x double> @test_vcvt_high_v1f64_f32_bitcast(<4 x float> %x) nounwind readnone ssp {
42; CHECK-LABEL: test_vcvt_high_v1f64_f32_bitcast:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
45; CHECK-NEXT:    ret
46;
47; GISEL-LABEL: test_vcvt_high_v1f64_f32_bitcast:
48; GISEL:       // %bb.0:
49; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
50; GISEL-NEXT:    ret
51  %bc1 = bitcast <4 x float> %x to <2 x double>
52  %ext = shufflevector <2 x double> %bc1, <2 x double> undef, <1 x i32> <i32 1>
53  %bc2 = bitcast <1 x double> %ext to <2 x float>
54  %r = fpext <2 x float> %bc2 to <2 x double>
55  ret <2 x double> %r
56}
57
58define <2 x double> @test_vcvt_high_v1i64_f32_bitcast(<2 x i64> %x) nounwind readnone ssp {
59; CHECK-LABEL: test_vcvt_high_v1i64_f32_bitcast:
60; CHECK:       // %bb.0:
61; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
62; CHECK-NEXT:    ret
63;
64; GISEL-LABEL: test_vcvt_high_v1i64_f32_bitcast:
65; GISEL:       // %bb.0:
66; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
67; GISEL-NEXT:    ret
68  %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
69  %bc2 = bitcast <1 x i64> %ext to <2 x float>
70  %r = fpext <2 x float> %bc2 to <2 x double>
71  ret <2 x double> %r
72}
73
74define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind readnone ssp {
75; CHECK-LABEL: test_vcvt_high_v2i32_f32_bitcast:
76; CHECK:       // %bb.0:
77; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
78; CHECK-NEXT:    ret
79;
80; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast:
81; GISEL:       // %bb.0:
82; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
83; GISEL-NEXT:    ret
84  %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
85  %bc2 = bitcast <2 x i32> %ext to <2 x float>
86  %r = fpext <2 x float> %bc2 to <2 x double>
87  ret <2 x double> %r
88}
89
90define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind readnone ssp {
91; CHECK-LABEL: test_vcvt_high_v4i16_f32_bitcast:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
94; CHECK-NEXT:    ret
95;
96; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast:
97; GISEL:       // %bb.0:
98; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
99; GISEL-NEXT:    ret
100  %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
101  %bc2 = bitcast <4 x i16> %ext to <2 x float>
102  %r = fpext <2 x float> %bc2 to <2 x double>
103  ret <2 x double> %r
104}
105
106define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind readnone ssp {
107; CHECK-LABEL: test_vcvt_high_v8i8_f32_bitcast:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
110; CHECK-NEXT:    ret
111;
112; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast:
113; GISEL:       // %bb.0:
114; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
115; GISEL-NEXT:    ret
116  %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
117  %bc2 = bitcast <8 x i8> %ext to <2 x float>
118  %r = fpext <2 x float> %bc2 to <2 x double>
119  ret <2 x double> %r
120}
121
122define <4 x float> @test_vcvt_high_v1i64_f16_bitcast(<2 x i64> %x) nounwind readnone ssp {
123; CHECK-LABEL: test_vcvt_high_v1i64_f16_bitcast:
124; CHECK:       // %bb.0:
125; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
126; CHECK-NEXT:    ret
127;
128; GISEL-LABEL: test_vcvt_high_v1i64_f16_bitcast:
129; GISEL:       // %bb.0:
130; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
131; GISEL-NEXT:    ret
132  %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
133  %bc2 = bitcast <1 x i64> %ext to <4 x half>
134  %r = fpext <4 x half> %bc2 to <4 x float>
135  ret <4 x float> %r
136}
137
138define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind readnone ssp {
139; CHECK-LABEL: test_vcvt_high_v2i32_f16_bitcast:
140; CHECK:       // %bb.0:
141; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
142; CHECK-NEXT:    ret
143;
144; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast:
145; GISEL:       // %bb.0:
146; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
147; GISEL-NEXT:    ret
148  %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
149  %bc2 = bitcast <2 x i32> %ext to <4 x half>
150  %r = fpext <4 x half> %bc2 to <4 x float>
151  ret <4 x float> %r
152}
153
154define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind readnone ssp {
155; CHECK-LABEL: test_vcvt_high_v4i16_f16_bitcast:
156; CHECK:       // %bb.0:
157; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
158; CHECK-NEXT:    ret
159;
160; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast:
161; GISEL:       // %bb.0:
162; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
163; GISEL-NEXT:    ret
164  %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
165  %bc2 = bitcast <4 x i16> %ext to <4 x half>
166  %r = fpext <4 x half> %bc2 to <4 x float>
167  ret <4 x float> %r
168}
169
170define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readnone ssp {
171; CHECK-LABEL: test_vcvt_high_v8i8_f16_bitcast:
172; CHECK:       // %bb.0:
173; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
174; CHECK-NEXT:    ret
175;
176; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast:
177; GISEL:       // %bb.0:
178; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
179; GISEL-NEXT:    ret
180  %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
181  %bc2 = bitcast <8 x i8> %ext to <4 x half>
182  %r = fpext <4 x half> %bc2 to <4 x float>
183  ret <4 x float> %r
184}
185
186; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f32_f64)
187; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f32_f64)
188define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp {
189; CHECK-LABEL: test_vcvt_f32_f64:
190; CHECK:       // %bb.0:
191; CHECK-NEXT:    fcvtn v0.2s, v0.2d
192; CHECK-NEXT:    ret
193;
194; GISEL-LABEL: test_vcvt_f32_f64:
195; GISEL:       // %bb.0:
196; GISEL-NEXT:    fcvtn v0.2s, v0.2d
197; GISEL-NEXT:    ret
198  %vcvt1.i = fptrunc <2 x double> %v to <2 x float>
199  ret <2 x float> %vcvt1.i
200}
201
202; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f32_f64)
203; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f32_f64)
204define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
205; GENERIC-LABEL: test_vcvt_high_f32_f64:
206; GENERIC:       // %bb.0:
207; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
208; GENERIC-NEXT:    fcvtn2 v0.4s, v1.2d
209; GENERIC-NEXT:    ret
210;
211; FAST-LABEL: test_vcvt_high_f32_f64:
212; FAST:       // %bb.0:
213; FAST-NEXT:    mov.16b v2, v0
214; FAST-NEXT:    // implicit-def: $q0
215; FAST-NEXT:    mov.16b v0, v2
216; FAST-NEXT:    fcvtn2 v0.4s, v1.2d
217; FAST-NEXT:    ret
218;
219; GISEL-LABEL: test_vcvt_high_f32_f64:
220; GISEL:       // %bb.0:
221; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
222; GISEL-NEXT:    fcvtn2 v0.4s, v1.2d
223; GISEL-NEXT:    ret
224  %cvt = fptrunc <2 x double> %v to <2 x float>
225  %vcvt2.i = shufflevector <2 x float> %x, <2 x float> %cvt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
226  ret <4 x float> %vcvt2.i
227}
228
229define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp {
230; CHECK-LABEL: test_vcvtx_f32_f64:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    fcvtxn v0.2s, v0.2d
233; CHECK-NEXT:    ret
234;
235; GISEL-LABEL: test_vcvtx_f32_f64:
236; GISEL:       // %bb.0:
237; GISEL-NEXT:    fcvtxn v0.2s, v0.2d
238; GISEL-NEXT:    ret
239  %vcvtx1.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
240  ret <2 x float> %vcvtx1.i
241}
242
243define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
244; GENERIC-LABEL: test_vcvtx_high_f32_f64:
245; GENERIC:       // %bb.0:
246; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
247; GENERIC-NEXT:    fcvtxn2 v0.4s, v1.2d
248; GENERIC-NEXT:    ret
249;
250; FAST-LABEL: test_vcvtx_high_f32_f64:
251; FAST:       // %bb.0:
252; FAST-NEXT:    mov.16b v2, v0
253; FAST-NEXT:    // implicit-def: $q0
254; FAST-NEXT:    mov.16b v0, v2
255; FAST-NEXT:    fcvtxn2 v0.4s, v1.2d
256; FAST-NEXT:    ret
257;
258; GISEL-LABEL: test_vcvtx_high_f32_f64:
259; GISEL:       // %bb.0:
260; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
261; GISEL-NEXT:    fcvtxn2 v0.4s, v1.2d
262; GISEL-NEXT:    ret
263  %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
264  %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
265  ret <4 x float> %res
266}
267
268
269declare <2 x double> @llvm.aarch64.neon.vcvthighfp2df(<4 x float>) nounwind readnone
270declare <2 x double> @llvm.aarch64.neon.vcvtfp2df(<2 x float>) nounwind readnone
271
272declare <2 x float> @llvm.aarch64.neon.vcvtdf2fp(<2 x double>) nounwind readnone
273declare <4 x float> @llvm.aarch64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone
274
275declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
276
277define i16 @to_half(float %in) {
278; GENERIC-LABEL: to_half:
279; GENERIC:       // %bb.0:
280; GENERIC-NEXT:    fcvt h0, s0
281; GENERIC-NEXT:    fmov w0, s0
282; GENERIC-NEXT:    ret
283;
284; FAST-LABEL: to_half:
285; FAST:       // %bb.0:
286; FAST-NEXT:    fcvt h1, s0
287; FAST-NEXT:    // implicit-def: $w0
288; FAST-NEXT:    fmov s0, w0
289; FAST-NEXT:    mov.16b v0, v1
290; FAST-NEXT:    fmov w0, s0
291; FAST-NEXT:    // kill: def $w1 killed $w0
292; FAST-NEXT:    ret
293;
294; GISEL-LABEL: to_half:
295; GISEL:       // %bb.0:
296; GISEL-NEXT:    fcvt h0, s0
297; GISEL-NEXT:    fmov w0, s0
298; GISEL-NEXT:    ret
299  %res = call i16 @llvm.convert.to.fp16.f32(float %in)
300  ret i16 %res
301}
302
303define float @from_half(i16 %in) {
304; GENERIC-LABEL: from_half:
305; GENERIC:       // %bb.0:
306; GENERIC-NEXT:    fmov s0, w0
307; GENERIC-NEXT:    fcvt s0, h0
308; GENERIC-NEXT:    ret
309;
310; FAST-LABEL: from_half:
311; FAST:       // %bb.0:
312; FAST-NEXT:    fmov s0, w0
313; FAST-NEXT:    // kill: def $h0 killed $h0 killed $s0
314; FAST-NEXT:    fcvt s0, h0
315; FAST-NEXT:    ret
316;
317; GISEL-LABEL: from_half:
318; GISEL:       // %bb.0:
319; GISEL-NEXT:    fmov s0, w0
320; GISEL-NEXT:    fcvt s0, h0
321; GISEL-NEXT:    ret
322  %res = call float @llvm.convert.from.fp16.f32(i16 %in)
323  ret float %res
324}
325
326declare float @llvm.convert.from.fp16.f32(i16) #1
327declare i16 @llvm.convert.to.fp16.f32(float) #1
328