• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -verify-machineinstrs -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
2
3define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
4; CHECK: test_vmovs_via_vext_lane0to0:
5  %vec = insertelement <2 x float> %in, float %arg, i32 0
6  %res = fadd <2 x float> %vec, %vec
7
8; CHECK: vext.32 d1, d1, d0, #1
9; CHECK: vext.32 d1, d1, d1, #1
10; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
11
12  ret <2 x float> %res
13}
14
15define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) {
16; CHECK: test_vmovs_via_vext_lane0to1:
17  %vec = insertelement <2 x float> %in, float %arg, i32 1
18  %res = fadd <2 x float> %vec, %vec
19
20; CHECK: vext.32 d1, d1, d1, #1
21; CHECK: vext.32 d1, d1, d0, #1
22; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
23
24  ret <2 x float> %res
25}
26
27define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) {
28; CHECK: test_vmovs_via_vext_lane1to0:
29  %vec = insertelement <2 x float> %in, float %arg, i32 0
30  %res = fadd <2 x float> %vec, %vec
31
32; CHECK: vext.32 d1, d1, d1, #1
33; CHECK: vext.32 d1, d0, d1, #1
34; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
35
36  ret <2 x float> %res
37}
38
39define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) {
40; CHECK: test_vmovs_via_vext_lane1to1:
41  %vec = insertelement <2 x float> %in, float %arg, i32 1
42  %res = fadd <2 x float> %vec, %vec
43
44; CHECK: vext.32 d1, d0, d1, #1
45; CHECK: vext.32 d1, d1, d1, #1
46; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
47
48  ret <2 x float> %res
49}
50
51
52define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) {
53; CHECK: test_vmovs_via_vdup:
54
55  ; Do an operation (which will end up NEON because of +neonfp) to convince the
56  ; execution-domain pass that NEON is a good thing to use.
57  %res = fadd float %ret, %ret
58  ;  It makes sense for LLVM to do the addition in d0 here, because it's going
59  ;  to be returned. This means it will want a "vmov s0, s1":
60; CHECK: vdup.32 d0, d0[1]
61
62  ret float %res
63}
64
65declare float @llvm.sqrt.f32(float)
66
67declare void @bar()
68
69; This is a comp
70define float @test_ineligible(float, float %in) {
71; CHECK: test_ineligible:
72
73  %sqrt = call float @llvm.sqrt.f32(float %in)
74  %val = fadd float %sqrt, %sqrt
75
76  ; This call forces a move from a callee-saved register to the return-reg. That
77  ; move is not eligible for conversion to a d-register instructions because the
78  ; use-def chains would be messed up. Primarily a compile-test (we used to
79  ; internal fault).
80  call void @bar()
81; CHECL: bl bar
82; CHECK: vmov.f32 {{s[0-9]+}}, {{s[0-9]+}}
83  ret float %val
84}