• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
2
3; CHECK-LABEL: test1
4; CHECK-NOT: fmul.2s
5; CHECK: fcvtzs.2s v0, v0, #4
6; CHECK: ret
7define <2 x i32> @test1(<2 x float> %f) {
8  %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
9  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
10  ret <2 x i32> %vcvt.i
11}
12
13; CHECK-LABEL: test2
14; CHECK-NOT: fmul.4s
15; CHECK: fcvtzs.4s v0, v0, #3
16; CHECK: ret
17define <4 x i32> @test2(<4 x float> %f) {
18  %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
19  %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
20  ret <4 x i32> %vcvt.i
21}
22
23; CHECK-LABEL: test3
24; CHECK-NOT: fmul.2d
25; CHECK: fcvtzs.2d v0, v0, #5
26; CHECK: ret
27define <2 x i64> @test3(<2 x double> %d) {
28  %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
29  %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64>
30  ret <2 x i64> %vcvt.i
31}
32
33; Truncate double to i32
34; CHECK-LABEL: test4
35; CHECK-NOT: fmul.2d v0, v0, #4
36; CHECK: fcvtzs.2d v0, v0
37; CHECK: xtn.2s
38; CHECK: ret
39define <2 x i32> @test4(<2 x double> %d) {
40  %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
41  %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32>
42  ret <2 x i32> %vcvt.i
43}
44
45; Truncate float to i16
46; CHECK-LABEL: test5
47; CHECK-NOT: fmul.2s
48; CHECK: fcvtzs.2s v0, v0, #4
49; CHECK: ret
50define <2 x i16> @test5(<2 x float> %f) {
51  %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
52  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16>
53  ret <2 x i16> %vcvt.i
54}
55
56; Don't convert float to i64
57; CHECK-LABEL: test6
58; CHECK: fmov.2s v1, #16.00000000
59; CHECK: fmul.2s v0, v0, v1
60; CHECK: fcvtl v0.2d, v0.2s
61; CHECK: fcvtzs.2d v0, v0
62; CHECK: ret
63define <2 x i64> @test6(<2 x float> %f) {
64  %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
65  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64>
66  ret <2 x i64> %vcvt.i
67}
68
69; Check unsigned conversion.
70; CHECK-LABEL: test7
71; CHECK-NOT: fmul.2s
72; CHECK: fcvtzu.2s v0, v0, #4
73; CHECK: ret
74define <2 x i32> @test7(<2 x float> %f) {
75  %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
76  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
77  ret <2 x i32> %vcvt.i
78}
79
80; Test which should not fold due to non-power of 2.
81; CHECK-LABEL: test8
82; CHECK: fmov.2s v1, #17.00000000
83; CHECK: fmul.2s v0, v0, v1
84; CHECK: fcvtzu.2s v0, v0
85; CHECK: ret
86define <2 x i32> @test8(<2 x float> %f) {
87  %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00>
88  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
89  ret <2 x i32> %vcvt.i
90}
91
92; Test which should not fold due to non-matching power of 2.
93; CHECK-LABEL: test9
94; CHECK: fmul.2s v0, v0, v1
95; CHECK: fcvtzu.2s v0, v0
96; CHECK: ret
97define <2 x i32> @test9(<2 x float> %f) {
98  %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
99  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
100  ret <2 x i32> %vcvt.i
101}
102
103; Don't combine all undefs.
104; CHECK-LABEL: test10
105; CHECK: fmul.2s v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
106; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}}
107; CHECK: ret
108define <2 x i32> @test10(<2 x float> %f) {
109  %mul.i = fmul <2 x float> %f, <float undef, float undef>
110  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
111  ret <2 x i32> %vcvt.i
112}
113
114; Combine if mix of undef and pow2.
115; CHECK-LABEL: test11
116; CHECK: fcvtzu.2s v0, v0, #3
117; CHECK: ret
118define <2 x i32> @test11(<2 x float> %f) {
119  %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
120  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
121  ret <2 x i32> %vcvt.i
122}
123
124; Don't combine when multiplied by 0.0.
125; CHECK-LABEL: test12
126; CHECK: fmul.2s v0, v0, v1
127; CHECK: fcvtzs.2s v0, v0
128; CHECK: ret
129define <2 x i32> @test12(<2 x float> %f) {
130  %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00>
131  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
132  ret <2 x i32> %vcvt.i
133}
134
135; Test which should not fold due to power of 2 out of range (i.e., 2^33).
136; CHECK-LABEL: test13
137; CHECK: fmul.2s v0, v0, v1
138; CHECK: fcvtzs.2s v0, v0
139; CHECK: ret
140define <2 x i32> @test13(<2 x float> %f) {
141  %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000>
142  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
143  ret <2 x i32> %vcvt.i
144}
145
146; Test case where const is max power of 2 (i.e., 2^32).
147; CHECK-LABEL: test14
148; CHECK: fcvtzs.2s v0, v0, #32
149; CHECK: ret
150define <2 x i32> @test14(<2 x float> %f) {
151  %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
152  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
153  ret <2 x i32> %vcvt.i
154}
155
156; CHECK-LABEL: test_illegal_fp_to_int:
157; CHECK: fcvtzs.4s v0, v0, #2
158define <3 x i32> @test_illegal_fp_to_int(<3 x float> %in) {
159  %scale = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
160  %val = fptosi <3 x float> %scale to <3 x i32>
161  ret <3 x i32> %val
162}
163