• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // REQUIRES: aarch64-registered-target
2 
3 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
4 // RUN:  -target-feature +v8.1a -S -emit-llvm -o - %s | FileCheck %s
5 
6  #include <arm_neon.h>
7 
8 // CHECK-LABEL: test_vqrdmlah_laneq_s16
test_vqrdmlah_laneq_s16(int16x4_t a,int16x4_t b,int16x8_t v)9 int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
10 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
11 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
12 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
13   return vqrdmlah_laneq_s16(a, b, v, 7);
14 }
15 
16 // CHECK-LABEL: test_vqrdmlah_laneq_s32
test_vqrdmlah_laneq_s32(int32x2_t a,int32x2_t b,int32x4_t v)17 int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
18 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3>
19 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
20 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
21   return vqrdmlah_laneq_s32(a, b, v, 3);
22 }
23 
24 // CHECK-LABEL: test_vqrdmlahq_laneq_s16
test_vqrdmlahq_laneq_s16(int16x8_t a,int16x8_t b,int16x8_t v)25 int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
26 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
27 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
28 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
29   return vqrdmlahq_laneq_s16(a, b, v, 7);
30 }
31 
32 // CHECK-LABEL: test_vqrdmlahq_laneq_s32
test_vqrdmlahq_laneq_s32(int32x4_t a,int32x4_t b,int32x4_t v)33 int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
34 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
35 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
36 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
37   return vqrdmlahq_laneq_s32(a, b, v, 3);
38 }
39 
40 // CHECK-LABEL: test_vqrdmlahh_s16
test_vqrdmlahh_s16(int16_t a,int16_t b,int16_t c)41 int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) {
42 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
43 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
44 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
45 // CHECK: extractelement <4 x i16> [[mul]], i64 0
46 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
47 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
48 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
49 // CHECK: extractelement <4 x i16> [[add]], i64 0
50   return vqrdmlahh_s16(a, b, c);
51 }
52 
53 // CHECK-LABEL: test_vqrdmlahs_s32
test_vqrdmlahs_s32(int32_t a,int32_t b,int32_t c)54 int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) {
55 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
56 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
57   return vqrdmlahs_s32(a, b, c);
58 }
59 
60 // CHECK-LABEL: test_vqrdmlahh_lane_s16
test_vqrdmlahh_lane_s16(int16_t a,int16_t b,int16x4_t c)61 int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
62 // CHECK: extractelement <4 x i16> {{%.*}}, i32 3
63 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
64 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
65 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
66 // CHECK: extractelement <4 x i16> [[mul]], i64 0
67 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
68 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
69 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
70 // CHECK: extractelement <4 x i16> [[add]], i64 0
71   return vqrdmlahh_lane_s16(a, b, c, 3);
72 }
73 
74 // CHECK-LABEL: test_vqrdmlahs_lane_s32
test_vqrdmlahs_lane_s32(int32_t a,int32_t b,int32x2_t c)75 int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
76 // CHECK: extractelement <2 x i32> {{%.*}}, i32 1
77 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
78 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
79   return vqrdmlahs_lane_s32(a, b, c, 1);
80 }
81 
82 // CHECK-LABEL: test_vqrdmlahh_laneq_s16
test_vqrdmlahh_laneq_s16(int16_t a,int16_t b,int16x8_t c)83 int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
84 // CHECK: extractelement <8 x i16> {{%.*}}, i32 7
85 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
86 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
87 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
88 // CHECK: extractelement <4 x i16> [[mul]], i64 0
89 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
90 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
91 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
92 // CHECK: extractelement <4 x i16> [[add]], i64 0
93   return vqrdmlahh_laneq_s16(a, b, c, 7);
94 }
95 
96 // CHECK-LABEL: test_vqrdmlahs_laneq_s32
test_vqrdmlahs_laneq_s32(int32_t a,int32_t b,int32x4_t c)97 int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
98 // CHECK: extractelement <4 x i32> {{%.*}}, i32 3
99 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
100 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
101   return vqrdmlahs_laneq_s32(a, b, c, 3);
102 }
103 
104 // CHECK-LABEL: test_vqrdmlsh_laneq_s16
test_vqrdmlsh_laneq_s16(int16x4_t a,int16x4_t b,int16x8_t v)105 int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
106 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
107 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
108 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
109   return vqrdmlsh_laneq_s16(a, b, v, 7);
110 }
111 
112 // CHECK-LABEL: test_vqrdmlsh_laneq_s32
test_vqrdmlsh_laneq_s32(int32x2_t a,int32x2_t b,int32x4_t v)113 int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
114 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3>
115 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
116 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
117   return vqrdmlsh_laneq_s32(a, b, v, 3);
118 }
119 
120 // CHECK-LABEL: test_vqrdmlshq_laneq_s16
test_vqrdmlshq_laneq_s16(int16x8_t a,int16x8_t b,int16x8_t v)121 int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
122 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
123 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
124 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
125   return vqrdmlshq_laneq_s16(a, b, v, 7);
126 }
127 
128 // CHECK-LABEL: test_vqrdmlshq_laneq_s32
test_vqrdmlshq_laneq_s32(int32x4_t a,int32x4_t b,int32x4_t v)129 int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
130 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
131 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
132 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
133   return vqrdmlshq_laneq_s32(a, b, v, 3);
134 }
135 
136 // CHECK-LABEL: test_vqrdmlshh_s16
test_vqrdmlshh_s16(int16_t a,int16_t b,int16_t c)137 int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) {
138 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
139 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
140 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
141 // CHECK: extractelement <4 x i16> [[mul]], i64 0
142 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
143 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
144 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
145 // CHECK: extractelement <4 x i16> [[sub]], i64 0
146   return vqrdmlshh_s16(a, b, c);
147 }
148 
149 // CHECK-LABEL: test_vqrdmlshs_s32
test_vqrdmlshs_s32(int32_t a,int32_t b,int32_t c)150 int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) {
151 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
152 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
153   return vqrdmlshs_s32(a, b, c);
154 }
155 
156 // CHECK-LABEL: test_vqrdmlshh_lane_s16
test_vqrdmlshh_lane_s16(int16_t a,int16_t b,int16x4_t c)157 int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
158 // CHECK: extractelement <4 x i16> {{%.*}}, i32 3
159 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
160 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
161 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
162 // CHECK: extractelement <4 x i16> [[mul]], i64 0
163 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
164 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
165 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
166 // CHECK: extractelement <4 x i16> [[sub]], i64 0
167   return vqrdmlshh_lane_s16(a, b, c, 3);
168 }
169 
170 // CHECK-LABEL: test_vqrdmlshs_lane_s32
test_vqrdmlshs_lane_s32(int32_t a,int32_t b,int32x2_t c)171 int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
172 // CHECK: extractelement <2 x i32> {{%.*}}, i32 1
173 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
174 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
175   return vqrdmlshs_lane_s32(a, b, c, 1);
176 }
177 
178 // CHECK-LABEL: test_vqrdmlshh_laneq_s16
test_vqrdmlshh_laneq_s16(int16_t a,int16_t b,int16x8_t c)179 int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
180 // CHECK: extractelement <8 x i16> {{%.*}}, i32 7
181 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
182 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
183 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
184 // CHECK: extractelement <4 x i16> [[mul]], i64 0
185 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
186 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
187 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
188 // CHECK: extractelement <4 x i16> [[sub]], i64 0
189   return vqrdmlshh_laneq_s16(a, b, c, 7);
190 }
191 
192 // CHECK-LABEL: test_vqrdmlshs_laneq_s32
test_vqrdmlshs_laneq_s32(int32_t a,int32_t b,int32x4_t c)193 int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
194 // CHECK: extractelement <4 x i32> {{%.*}}, i32 3
195 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
196 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
197   return vqrdmlshs_laneq_s32(a, b, c, 3);
198 }
199