• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT
2; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT
3; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT
4; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT
5
6define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
7; CHECK-LABEL: bar:
8; CHECK: add.2d	v[[REG:[0-9]+]], v0, v1
9; CHECK: add	d[[REG3:[0-9]+]], d[[REG]], d1
10; CHECK: sub	d[[REG2:[0-9]+]], d[[REG]], d1
11; Without advanced copy optimization, we end up with cross register
12; banks copies that cannot be coalesced.
13; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
14; With advanced copy optimization, we end up with just one copy
15; to insert the computed high part into the V register.
16; CHECK-OPT-NOT: fmov
17; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
18; CHECK-NOOPT: fmov d0, [[COPY_REG3]]
19; CHECK-OPT-NOT: fmov
20; CHECK: ins.d v0[1], [[COPY_REG2]]
21; CHECK-NEXT: ret
22;
23; GENERIC-LABEL: bar:
24; GENERIC: add	v[[REG:[0-9]+]].2d, v0.2d, v1.2d
25; GENERIC: add	d[[REG3:[0-9]+]], d[[REG]], d1
26; GENERIC: sub	d[[REG2:[0-9]+]], d[[REG]], d1
27; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
28; GENERIC-OPT-NOT: fmov
29; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
30; GENERIC-NOOPT: fmov d0, [[COPY_REG3]]
31; GENERIC-OPT-NOT: fmov
32; GENERIC: ins v0.d[1], [[COPY_REG2]]
33; GENERIC-NEXT: ret
34  %add = add <2 x i64> %a, %b
35  %vgetq_lane = extractelement <2 x i64> %add, i32 0
36  %vgetq_lane2 = extractelement <2 x i64> %b, i32 0
37  %add3 = add i64 %vgetq_lane, %vgetq_lane2
38  %sub = sub i64 %vgetq_lane, %vgetq_lane2
39  %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0
40  %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1
41  ret <2 x i64> %vecinit8
42}
43
44define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
45; CHECK-LABEL: subdd_su64:
46; CHECK: sub d0, d1, d0
47; CHECK-NEXT: ret
48; GENERIC-LABEL: subdd_su64:
49; GENERIC: sub d0, d1, d0
50; GENERIC-NEXT: ret
51  %vecext = extractelement <2 x i64> %a, i32 0
52  %vecext1 = extractelement <2 x i64> %b, i32 0
53  %sub.i = sub nsw i64 %vecext1, %vecext
54  %retval = bitcast i64 %sub.i to double
55  ret double %retval
56}
57
58define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
59; CHECK-LABEL: vaddd_su64:
60; CHECK: add d0, d1, d0
61; CHECK-NEXT: ret
62; GENERIC-LABEL: vaddd_su64:
63; GENERIC: add d0, d1, d0
64; GENERIC-NEXT: ret
65  %vecext = extractelement <2 x i64> %a, i32 0
66  %vecext1 = extractelement <2 x i64> %b, i32 0
67  %add.i = add nsw i64 %vecext1, %vecext
68  %retval = bitcast i64 %add.i to double
69  ret double %retval
70}
71
72; sub MI doesn't access dsub register.
73define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
74; CHECK-LABEL: add_sub_su64:
75; CHECK: add d0, d1, d0
76; CHECK: sub d0, {{d[0-9]+}}, d0
77; CHECK-NEXT: ret
78; GENERIC-LABEL: add_sub_su64:
79; GENERIC: add d0, d1, d0
80; GENERIC: sub d0, {{d[0-9]+}}, d0
81; GENERIC-NEXT: ret
82  %vecext = extractelement <2 x i64> %a, i32 0
83  %vecext1 = extractelement <2 x i64> %b, i32 0
84  %add.i = add i64 %vecext1, %vecext
85  %sub.i = sub i64 0, %add.i
86  %retval = bitcast i64 %sub.i to double
87  ret double %retval
88}
89define double @and_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
90; CHECK-LABEL: and_su64:
91; CHECK: and.8b v0, v1, v0
92; CHECK-NEXT: ret
93; GENERIC-LABEL: and_su64:
94; GENERIC: and v0.8b, v1.8b, v0.8b
95; GENERIC-NEXT: ret
96  %vecext = extractelement <2 x i64> %a, i32 0
97  %vecext1 = extractelement <2 x i64> %b, i32 0
98  %or.i = and i64 %vecext1, %vecext
99  %retval = bitcast i64 %or.i to double
100  ret double %retval
101}
102
103define double @orr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
104; CHECK-LABEL: orr_su64:
105; CHECK: orr.8b v0, v1, v0
106; CHECK-NEXT: ret
107; GENERIC-LABEL: orr_su64:
108; GENERIC: orr v0.8b, v1.8b, v0.8b
109; GENERIC-NEXT: ret
110  %vecext = extractelement <2 x i64> %a, i32 0
111  %vecext1 = extractelement <2 x i64> %b, i32 0
112  %or.i = or i64 %vecext1, %vecext
113  %retval = bitcast i64 %or.i to double
114  ret double %retval
115}
116
117define double @xorr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
118; CHECK-LABEL: xorr_su64:
119; CHECK: eor.8b v0, v1, v0
120; CHECK-NEXT: ret
121; GENERIC-LABEL: xorr_su64:
122; GENERIC: eor v0.8b, v1.8b, v0.8b
123; GENERIC-NEXT: ret
124  %vecext = extractelement <2 x i64> %a, i32 0
125  %vecext1 = extractelement <2 x i64> %b, i32 0
126  %xor.i = xor i64 %vecext1, %vecext
127  %retval = bitcast i64 %xor.i to double
128  ret double %retval
129}
130