• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
2; Make sure that ARM backend with NEON handles vselect.
3
4define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
5; CHECK: vcgt.s32 [[QR:q[0-9]+]], [[Q1:q[0-9]+]], [[Q2:q[0-9]+]]
6; CHECK: vbsl [[QR]], [[Q1]], [[Q2]]
7    %cmpres = icmp sgt <4 x i32> %a, %b
8    %maxres = select <4 x i1> %cmpres, <4 x i32> %a,  <4 x i32> %b
9    store <4 x i32> %maxres, <4 x i32>* %m
10    ret void
11}
12
13; We adjusted the cost model of the following selects. When we improve code
14; lowering we also need to adjust the cost.
15%T0_10 = type <16 x i16>
16%T1_10 = type <16 x i1>
17; CHECK: func_blend10:
18define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
19                           %T1_10* %blend, %T0_10* %storeaddr) {
20  %v0 = load %T0_10* %loadaddr
21  %v1 = load %T0_10* %loadaddr2
22  %c = icmp slt %T0_10 %v0, %v1
23; CHECK: vst1
24; CHECK: vst1
25; CHECK: vst1
26; CHECK: vst1
27; CHECK: vld
28; COST: func_blend10
29; COST: cost of 40 {{.*}} select
30  %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
31  store %T0_10 %r, %T0_10* %storeaddr
32  ret void
33}
34%T0_14 = type <8 x i32>
35%T1_14 = type <8 x i1>
36; CHECK: func_blend14:
37define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
38                           %T1_14* %blend, %T0_14* %storeaddr) {
39  %v0 = load %T0_14* %loadaddr
40  %v1 = load %T0_14* %loadaddr2
41  %c = icmp slt %T0_14 %v0, %v1
42; CHECK: strb
43; CHECK: strb
44; CHECK: strb
45; CHECK: strb
46; COST: func_blend14
47; COST: cost of 41 {{.*}} select
48  %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
49  store %T0_14 %r, %T0_14* %storeaddr
50  ret void
51}
52%T0_15 = type <16 x i32>
53%T1_15 = type <16 x i1>
54; CHECK: func_blend15:
55define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
56                           %T1_15* %blend, %T0_15* %storeaddr) {
57  %v0 = load %T0_15* %loadaddr
58  %v1 = load %T0_15* %loadaddr2
59  %c = icmp slt %T0_15 %v0, %v1
60; CHECK: strb
61; CHECK: strb
62; CHECK: strb
63; CHECK: strb
64; COST: func_blend15
65; COST: cost of 82 {{.*}} select
66  %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
67  store %T0_15 %r, %T0_15* %storeaddr
68  ret void
69}
70%T0_18 = type <4 x i64>
71%T1_18 = type <4 x i1>
72; CHECK: func_blend18:
73define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
74                           %T1_18* %blend, %T0_18* %storeaddr) {
75  %v0 = load %T0_18* %loadaddr
76  %v1 = load %T0_18* %loadaddr2
77  %c = icmp slt %T0_18 %v0, %v1
78; CHECK: strh
79; CHECK: strh
80; CHECK: strh
81; CHECK: strh
82; COST: func_blend18
83; COST: cost of 19 {{.*}} select
84  %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
85  store %T0_18 %r, %T0_18* %storeaddr
86  ret void
87}
88%T0_19 = type <8 x i64>
89%T1_19 = type <8 x i1>
90; CHECK: func_blend19:
91define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
92                           %T1_19* %blend, %T0_19* %storeaddr) {
93  %v0 = load %T0_19* %loadaddr
94  %v1 = load %T0_19* %loadaddr2
95  %c = icmp slt %T0_19 %v0, %v1
96; CHECK: strb
97; CHECK: strb
98; CHECK: strb
99; CHECK: strb
100; COST: func_blend19
101; COST: cost of 50 {{.*}} select
102  %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
103  store %T0_19 %r, %T0_19* %storeaddr
104  ret void
105}
106%T0_20 = type <16 x i64>
107%T1_20 = type <16 x i1>
108; CHECK: func_blend20:
109define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
110                           %T1_20* %blend, %T0_20* %storeaddr) {
111  %v0 = load %T0_20* %loadaddr
112  %v1 = load %T0_20* %loadaddr2
113  %c = icmp slt %T0_20 %v0, %v1
114; CHECK: strb
115; CHECK: strb
116; CHECK: strb
117; CHECK: strb
118; COST: func_blend20
119; COST: cost of 100 {{.*}} select
120  %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
121  store %T0_20 %r, %T0_20* %storeaddr
122  ret void
123}
124