1 // REQUIRES: aarch64-registered-target
2
3 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
4 // RUN: -target-feature +v8.1a -S -emit-llvm -o - %s | FileCheck %s
5
6 #include <arm_neon.h>
7
8 // CHECK-LABEL: test_vqrdmlah_laneq_s16
test_vqrdmlah_laneq_s16(int16x4_t a,int16x4_t b,int16x8_t v)9 int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
10 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
11 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
12 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
13 return vqrdmlah_laneq_s16(a, b, v, 7);
14 }
15
16 // CHECK-LABEL: test_vqrdmlah_laneq_s32
test_vqrdmlah_laneq_s32(int32x2_t a,int32x2_t b,int32x4_t v)17 int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
18 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3>
19 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
20 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
21 return vqrdmlah_laneq_s32(a, b, v, 3);
22 }
23
24 // CHECK-LABEL: test_vqrdmlahq_laneq_s16
test_vqrdmlahq_laneq_s16(int16x8_t a,int16x8_t b,int16x8_t v)25 int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
26 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
27 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
28 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
29 return vqrdmlahq_laneq_s16(a, b, v, 7);
30 }
31
32 // CHECK-LABEL: test_vqrdmlahq_laneq_s32
test_vqrdmlahq_laneq_s32(int32x4_t a,int32x4_t b,int32x4_t v)33 int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
34 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
35 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
36 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
37 return vqrdmlahq_laneq_s32(a, b, v, 3);
38 }
39
40 // CHECK-LABEL: test_vqrdmlahh_s16
test_vqrdmlahh_s16(int16_t a,int16_t b,int16_t c)41 int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) {
42 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
43 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
44 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
45 // CHECK: extractelement <4 x i16> [[mul]], i64 0
46 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
47 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
48 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
49 // CHECK: extractelement <4 x i16> [[add]], i64 0
50 return vqrdmlahh_s16(a, b, c);
51 }
52
53 // CHECK-LABEL: test_vqrdmlahs_s32
test_vqrdmlahs_s32(int32_t a,int32_t b,int32_t c)54 int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) {
55 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
56 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
57 return vqrdmlahs_s32(a, b, c);
58 }
59
60 // CHECK-LABEL: test_vqrdmlahh_lane_s16
test_vqrdmlahh_lane_s16(int16_t a,int16_t b,int16x4_t c)61 int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
62 // CHECK: extractelement <4 x i16> {{%.*}}, i32 3
63 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
64 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
65 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
66 // CHECK: extractelement <4 x i16> [[mul]], i64 0
67 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
68 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
69 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
70 // CHECK: extractelement <4 x i16> [[add]], i64 0
71 return vqrdmlahh_lane_s16(a, b, c, 3);
72 }
73
74 // CHECK-LABEL: test_vqrdmlahs_lane_s32
test_vqrdmlahs_lane_s32(int32_t a,int32_t b,int32x2_t c)75 int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
76 // CHECK: extractelement <2 x i32> {{%.*}}, i32 1
77 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
78 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
79 return vqrdmlahs_lane_s32(a, b, c, 1);
80 }
81
82 // CHECK-LABEL: test_vqrdmlahh_laneq_s16
test_vqrdmlahh_laneq_s16(int16_t a,int16_t b,int16x8_t c)83 int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
84 // CHECK: extractelement <8 x i16> {{%.*}}, i32 7
85 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
86 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
87 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
88 // CHECK: extractelement <4 x i16> [[mul]], i64 0
89 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
90 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
91 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
92 // CHECK: extractelement <4 x i16> [[add]], i64 0
93 return vqrdmlahh_laneq_s16(a, b, c, 7);
94 }
95
96 // CHECK-LABEL: test_vqrdmlahs_laneq_s32
test_vqrdmlahs_laneq_s32(int32_t a,int32_t b,int32x4_t c)97 int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
98 // CHECK: extractelement <4 x i32> {{%.*}}, i32 3
99 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
100 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
101 return vqrdmlahs_laneq_s32(a, b, c, 3);
102 }
103
104 // CHECK-LABEL: test_vqrdmlsh_laneq_s16
test_vqrdmlsh_laneq_s16(int16x4_t a,int16x4_t b,int16x8_t v)105 int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
106 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
107 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
108 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
109 return vqrdmlsh_laneq_s16(a, b, v, 7);
110 }
111
112 // CHECK-LABEL: test_vqrdmlsh_laneq_s32
test_vqrdmlsh_laneq_s32(int32x2_t a,int32x2_t b,int32x4_t v)113 int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
114 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3>
115 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
116 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
117 return vqrdmlsh_laneq_s32(a, b, v, 3);
118 }
119
120 // CHECK-LABEL: test_vqrdmlshq_laneq_s16
test_vqrdmlshq_laneq_s16(int16x8_t a,int16x8_t b,int16x8_t v)121 int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
122 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
123 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
124 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
125 return vqrdmlshq_laneq_s16(a, b, v, 7);
126 }
127
128 // CHECK-LABEL: test_vqrdmlshq_laneq_s32
test_vqrdmlshq_laneq_s32(int32x4_t a,int32x4_t b,int32x4_t v)129 int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
130 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
131 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
132 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
133 return vqrdmlshq_laneq_s32(a, b, v, 3);
134 }
135
136 // CHECK-LABEL: test_vqrdmlshh_s16
test_vqrdmlshh_s16(int16_t a,int16_t b,int16_t c)137 int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) {
138 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
139 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
140 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
141 // CHECK: extractelement <4 x i16> [[mul]], i64 0
142 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
143 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
144 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
145 // CHECK: extractelement <4 x i16> [[sub]], i64 0
146 return vqrdmlshh_s16(a, b, c);
147 }
148
149 // CHECK-LABEL: test_vqrdmlshs_s32
test_vqrdmlshs_s32(int32_t a,int32_t b,int32_t c)150 int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) {
151 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
152 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
153 return vqrdmlshs_s32(a, b, c);
154 }
155
156 // CHECK-LABEL: test_vqrdmlshh_lane_s16
test_vqrdmlshh_lane_s16(int16_t a,int16_t b,int16x4_t c)157 int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
158 // CHECK: extractelement <4 x i16> {{%.*}}, i32 3
159 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
160 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
161 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
162 // CHECK: extractelement <4 x i16> [[mul]], i64 0
163 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
164 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
165 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
166 // CHECK: extractelement <4 x i16> [[sub]], i64 0
167 return vqrdmlshh_lane_s16(a, b, c, 3);
168 }
169
170 // CHECK-LABEL: test_vqrdmlshs_lane_s32
test_vqrdmlshs_lane_s32(int32_t a,int32_t b,int32x2_t c)171 int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
172 // CHECK: extractelement <2 x i32> {{%.*}}, i32 1
173 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
174 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
175 return vqrdmlshs_lane_s32(a, b, c, 1);
176 }
177
178 // CHECK-LABEL: test_vqrdmlshh_laneq_s16
test_vqrdmlshh_laneq_s16(int16_t a,int16_t b,int16x8_t c)179 int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
180 // CHECK: extractelement <8 x i16> {{%.*}}, i32 7
181 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
182 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
183 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
184 // CHECK: extractelement <4 x i16> [[mul]], i64 0
185 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
186 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
187 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
188 // CHECK: extractelement <4 x i16> [[sub]], i64 0
189 return vqrdmlshh_laneq_s16(a, b, c, 7);
190 }
191
192 // CHECK-LABEL: test_vqrdmlshs_laneq_s32
test_vqrdmlshs_laneq_s32(int32_t a,int32_t b,int32x4_t c)193 int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
194 // CHECK: extractelement <4 x i32> {{%.*}}, i32 3
195 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
196 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
197 return vqrdmlshs_laneq_s32(a, b, c, 3);
198 }
199