1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,GENERIC 2; The instruction latencies of Exynos-M1 trigger the transform we see under the Exynos check. 3; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m1 | FileCheck %s --check-prefixes=CHECK,EXYNOSM1 4; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m3 | FileCheck %s --check-prefixes=CHECK,EXYNOSM3 5 6declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) 7 8declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) 9 10declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) 11 12declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) 13 14declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) 15 16declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) 17 18declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) 19 20declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) 21 22declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) 23 24declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) 25 26declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) 27 28declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) 29 30declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) 31 32declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 33 34declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 35 36declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 37 38declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 39 40declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) 41 42declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) 43 44declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) 45 46declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) 47 48define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 49; CHECK-LABEL: test_vmla_lane_s16: 50; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 51entry: 52 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 53 %mul = mul <4 x i16> %shuffle, %b 54 %add = add <4 x i16> %mul, %a 55 ret <4 x i16> %add 56} 57 58define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 59; CHECK-LABEL: test_vmlaq_lane_s16: 60; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 61entry: 62 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 63 %mul = mul <8 x i16> %shuffle, %b 64 %add = add <8 x i16> %mul, %a 65 ret <8 x i16> %add 66} 67 68define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 69; CHECK-LABEL: test_vmla_lane_s32: 70; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 71entry: 72 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 73 %mul = mul <2 x i32> %shuffle, %b 74 %add = add <2 x i32> %mul, %a 75 ret <2 x i32> %add 76} 77 78define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 79; CHECK-LABEL: test_vmlaq_lane_s32: 80; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 81entry: 82 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 83 %mul = mul <4 x i32> %shuffle, %b 84 %add = add <4 x i32> %mul, %a 85 ret <4 x i32> %add 86} 87 88define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 89; CHECK-LABEL: test_vmla_laneq_s16: 90; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 91entry: 92 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 93 %mul = mul <4 x i16> %shuffle, %b 94 %add = add <4 x i16> %mul, %a 95 ret <4 x i16> %add 96} 97 98define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 99; CHECK-LABEL: test_vmlaq_laneq_s16: 100; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 101entry: 102 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 103 %mul = mul <8 x i16> %shuffle, %b 104 %add = add <8 x i16> %mul, %a 105 ret <8 x i16> %add 106} 107 108define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 109; CHECK-LABEL: test_vmla_laneq_s32: 110; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 111entry: 112 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 113 %mul = mul <2 x i32> %shuffle, %b 114 %add = add <2 x i32> %mul, %a 115 ret <2 x i32> %add 116} 117 118define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 119; CHECK-LABEL: test_vmlaq_laneq_s32: 120; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 121entry: 122 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 123 %mul = mul <4 x i32> %shuffle, %b 124 %add = add <4 x i32> %mul, %a 125 ret <4 x i32> %add 126} 127 128define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 129; CHECK-LABEL: test_vmls_lane_s16: 130; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 131entry: 132 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 133 %mul = mul <4 x i16> %shuffle, %b 134 %sub = sub <4 x i16> %a, %mul 135 ret <4 x i16> %sub 136} 137 138define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 139; CHECK-LABEL: test_vmlsq_lane_s16: 140; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 141entry: 142 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 143 %mul = mul <8 x i16> %shuffle, %b 144 %sub = sub <8 x i16> %a, %mul 145 ret <8 x i16> %sub 146} 147 148define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 149; CHECK-LABEL: test_vmls_lane_s32: 150; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 151entry: 152 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 153 %mul = mul <2 x i32> %shuffle, %b 154 %sub = sub <2 x i32> %a, %mul 155 ret <2 x i32> %sub 156} 157 158define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 159; CHECK-LABEL: test_vmlsq_lane_s32: 160; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 161entry: 162 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 163 %mul = mul <4 x i32> %shuffle, %b 164 %sub = sub <4 x i32> %a, %mul 165 ret <4 x i32> %sub 166} 167 168define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 169; CHECK-LABEL: test_vmls_laneq_s16: 170; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 171entry: 172 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 173 %mul = mul <4 x i16> %shuffle, %b 174 %sub = sub <4 x i16> %a, %mul 175 ret <4 x i16> %sub 176} 177 178define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 179; CHECK-LABEL: test_vmlsq_laneq_s16: 180; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 181entry: 182 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 183 %mul = mul <8 x i16> %shuffle, %b 184 %sub = sub <8 x i16> %a, %mul 185 ret <8 x i16> %sub 186} 187 188define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 189; CHECK-LABEL: test_vmls_laneq_s32: 190; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 191entry: 192 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 193 %mul = mul <2 x i32> %shuffle, %b 194 %sub = sub <2 x i32> %a, %mul 195 ret <2 x i32> %sub 196} 197 198define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 199; CHECK-LABEL: test_vmlsq_laneq_s32: 200; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 201entry: 202 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 203 %mul = mul <4 x i32> %shuffle, %b 204 %sub = sub <4 x i32> %a, %mul 205 ret <4 x i32> %sub 206} 207 208define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { 209; CHECK-LABEL: test_vmul_lane_s16: 210; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 211entry: 212 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 213 %mul = mul <4 x i16> %shuffle, %a 214 ret <4 x i16> %mul 215} 216 217define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 218; CHECK-LABEL: test_vmulq_lane_s16: 219; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 220entry: 221 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 222 %mul = mul <8 x i16> %shuffle, %a 223 ret <8 x i16> %mul 224} 225 226define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { 227; CHECK-LABEL: test_vmul_lane_s32: 228; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 229entry: 230 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 231 %mul = mul <2 x i32> %shuffle, %a 232 ret <2 x i32> %mul 233} 234 235define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 236; CHECK-LABEL: test_vmulq_lane_s32: 237; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 238entry: 239 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 240 %mul = mul <4 x i32> %shuffle, %a 241 ret <4 x i32> %mul 242} 243 244define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { 245; CHECK-LABEL: test_vmul_lane_u16: 246; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 247entry: 248 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 249 %mul = mul <4 x i16> %shuffle, %a 250 ret <4 x i16> %mul 251} 252 253define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { 254; CHECK-LABEL: test_vmulq_lane_u16: 255; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 256entry: 257 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 258 %mul = mul <8 x i16> %shuffle, %a 259 ret <8 x i16> %mul 260} 261 262define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { 263; CHECK-LABEL: test_vmul_lane_u32: 264; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 265entry: 266 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 267 %mul = mul <2 x i32> %shuffle, %a 268 ret <2 x i32> %mul 269} 270 271define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { 272; CHECK-LABEL: test_vmulq_lane_u32: 273; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 274entry: 275 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 276 %mul = mul <4 x i32> %shuffle, %a 277 ret <4 x i32> %mul 278} 279 280define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 281; CHECK-LABEL: test_vmul_laneq_s16: 282; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 283entry: 284 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 285 %mul = mul <4 x i16> %shuffle, %a 286 ret <4 x i16> %mul 287} 288 289define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 290; CHECK-LABEL: test_vmulq_laneq_s16: 291; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 292entry: 293 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 294 %mul = mul <8 x i16> %shuffle, %a 295 ret <8 x i16> %mul 296} 297 298define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 299; CHECK-LABEL: test_vmul_laneq_s32: 300; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 301entry: 302 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 303 %mul = mul <2 x i32> %shuffle, %a 304 ret <2 x i32> %mul 305} 306 307define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 308; CHECK-LABEL: test_vmulq_laneq_s32: 309; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 310entry: 311 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 312 %mul = mul <4 x i32> %shuffle, %a 313 ret <4 x i32> %mul 314} 315 316define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 317; CHECK-LABEL: test_vmul_laneq_u16: 318; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 319entry: 320 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 321 %mul = mul <4 x i16> %shuffle, %a 322 ret <4 x i16> %mul 323} 324 325define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 326; CHECK-LABEL: test_vmulq_laneq_u16: 327; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 328entry: 329 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 330 %mul = mul <8 x i16> %shuffle, %a 331 ret <8 x i16> %mul 332} 333 334define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 335; CHECK-LABEL: test_vmul_laneq_u32: 336; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 337entry: 338 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 339 %mul = mul <2 x i32> %shuffle, %a 340 ret <2 x i32> %mul 341} 342 343define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 344; CHECK-LABEL: test_vmulq_laneq_u32: 345; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 346entry: 347 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 348 %mul = mul <4 x i32> %shuffle, %a 349 ret <4 x i32> %mul 350} 351 352define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 353; CHECK-LABEL: test_vfma_lane_f32: 354; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 355; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] 356; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 357; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 358entry: 359 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 360 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 361 ret <2 x float> %0 362} 363 364declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 365 366define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 367; CHECK-LABEL: test_vfmaq_lane_f32: 368; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 369; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] 370; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 371; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 372entry: 373 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 374 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 375 ret <4 x float> %0 376} 377 378declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 379 380define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 381; CHECK-LABEL: test_vfma_laneq_f32: 382; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 383; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] 384; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 385; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 386entry: 387 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 388 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 389 ret <2 x float> %0 390} 391 392define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 393; CHECK-LABEL: test_vfmaq_laneq_f32: 394; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 395; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] 396; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 397; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 398entry: 399 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 400 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 401 ret <4 x float> %0 402} 403 404define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 405; CHECK-LABEL: test_vfms_lane_f32: 406; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 407; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] 408; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 409; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 410entry: 411 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 412 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1> 413 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 414 ret <2 x float> %0 415} 416 417define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 418; CHECK-LABEL: test_vfmsq_lane_f32: 419; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 420; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] 421; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 422; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 423entry: 424 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 425 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 426 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 427 ret <4 x float> %0 428} 429 430define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 431; CHECK-LABEL: test_vfms_laneq_f32: 432; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 433; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] 434; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 435; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 436entry: 437 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 438 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3> 439 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 440 ret <2 x float> %0 441} 442 443define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 444; CHECK-LABEL: test_vfmsq_laneq_f32: 445; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 446; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] 447; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 448; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 449entry: 450 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 451 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 452 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 453 ret <4 x float> %0 454} 455 456define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 457; CHECK-LABEL: test_vfmaq_lane_f64: 458; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 459; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] 460; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 461; EXYNOSM3: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 462entry: 463 %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 464 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 465 ret <2 x double> %0 466} 467 468declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) 469 470define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 471; CHECK-LABEL: test_vfmaq_laneq_f64: 472; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 473; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] 474; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 475; EXYNOSM3: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 476entry: 477 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 478 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 479 ret <2 x double> %0 480} 481 482define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 483; CHECK-LABEL: test_vfmsq_lane_f64: 484; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 485; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] 486; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 487; EXYNOSM3: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 488entry: 489 %sub = fsub <1 x double> <double -0.000000e+00>, %v 490 %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer 491 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 492 ret <2 x double> %0 493} 494 495define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 496; CHECK-LABEL: test_vfmsq_laneq_f64: 497; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 498; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] 499; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 500; EXYNOSM3: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 501entry: 502 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 503 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1> 504 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 505 ret <2 x double> %0 506} 507 508define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { 509; CHECK-LABEL: test_vfmas_laneq_f32 510; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 511entry: 512 %extract = extractelement <4 x float> %v, i32 3 513 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 514 ret float %0 515} 516 517declare float @llvm.fma.f32(float, float, float) 518 519define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { 520; CHECK-LABEL: test_vfmsd_lane_f64 521; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 522entry: 523 %extract.rhs = extractelement <1 x double> %v, i32 0 524 %extract = fsub double -0.000000e+00, %extract.rhs 525 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 526 ret double %0 527} 528 529declare double @llvm.fma.f64(double, double, double) 530 531define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) { 532; CHECK-LABEL: test_vfmss_lane_f32 533; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 534entry: 535 %extract.rhs = extractelement <2 x float> %v, i32 1 536 %extract = fsub float -0.000000e+00, %extract.rhs 537 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 538 ret float %0 539} 540 541define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { 542; CHECK-LABEL: test_vfmss_laneq_f32 543; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 544entry: 545 %extract.rhs = extractelement <4 x float> %v, i32 3 546 %extract = fsub float -0.000000e+00, %extract.rhs 547 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 548 ret float %0 549} 550 551define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { 552; CHECK-LABEL: test_vfmsd_laneq_f64 553; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 554entry: 555 %extract.rhs = extractelement <2 x double> %v, i32 1 556 %extract = fsub double -0.000000e+00, %extract.rhs 557 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 558 ret double %0 559} 560 561define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) { 562; CHCK-LABEL: test_vfmsd_lane_f64_0 563; CHCK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 564; CHCK-NEXT: ret 565entry: 566 %tmp0 = fsub <1 x double> <double -0.000000e+00>, %v 567 %tmp1 = extractelement <1 x double> %tmp0, i32 0 568 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) 569 ret double %0 570} 571 572define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) { 573; CHECK-LABEL: test_vfmss_lane_f32_0 574; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 575entry: 576 %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 577 %tmp1 = extractelement <2 x float> %tmp0, i32 1 578 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) 579 ret float %0 580} 581 582define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) { 583; CHECK-LABEL: test_vfmss_laneq_f32_0 584; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 585entry: 586 %tmp0 = fsub <4 x float><float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 587 %tmp1 = extractelement <4 x float> %tmp0, i32 3 588 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) 589 ret float %0 590} 591 592define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) { 593; CHECK-LABEL: test_vfmsd_laneq_f64_0 594; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 595entry: 596 %tmp0 = fsub <2 x double><double -0.000000e+00, double -0.000000e+00>, %v 597 %tmp1 = extractelement <2 x double> %tmp0, i32 1 598 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) 599 ret double %0 600} 601 602define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 603; CHECK-LABEL: test_vmlal_lane_s16: 604; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 605entry: 606 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 607 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 608 %add = add <4 x i32> %vmull2.i, %a 609 ret <4 x i32> %add 610} 611 612define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 613; CHECK-LABEL: test_vmlal_lane_s32: 614; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 615entry: 616 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 617 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 618 %add = add <2 x i64> %vmull2.i, %a 619 ret <2 x i64> %add 620} 621 622define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 623; CHECK-LABEL: test_vmlal_laneq_s16: 624; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 625entry: 626 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 627 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 628 %add = add <4 x i32> %vmull2.i, %a 629 ret <4 x i32> %add 630} 631 632define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 633; CHECK-LABEL: test_vmlal_laneq_s32: 634; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 635entry: 636 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 637 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 638 %add = add <2 x i64> %vmull2.i, %a 639 ret <2 x i64> %add 640} 641 642define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 643; CHECK-LABEL: test_vmlal_high_lane_s16: 644; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 645entry: 646 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 647 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 648 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 649 %add = add <4 x i32> %vmull2.i, %a 650 ret <4 x i32> %add 651} 652 653define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 654; CHECK-LABEL: test_vmlal_high_lane_s32: 655; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 656entry: 657 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 658 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 659 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 660 %add = add <2 x i64> %vmull2.i, %a 661 ret <2 x i64> %add 662} 663 664define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 665; CHECK-LABEL: test_vmlal_high_laneq_s16: 666; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 667entry: 668 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 669 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 670 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 671 %add = add <4 x i32> %vmull2.i, %a 672 ret <4 x i32> %add 673} 674 675define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 676; CHECK-LABEL: test_vmlal_high_laneq_s32: 677; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 678entry: 679 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 680 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 681 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 682 %add = add <2 x i64> %vmull2.i, %a 683 ret <2 x i64> %add 684} 685 686define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 687; CHECK-LABEL: test_vmlsl_lane_s16: 688; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 689entry: 690 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 691 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 692 %sub = sub <4 x i32> %a, %vmull2.i 693 ret <4 x i32> %sub 694} 695 696define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 697; CHECK-LABEL: test_vmlsl_lane_s32: 698; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 699entry: 700 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 701 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 702 %sub = sub <2 x i64> %a, %vmull2.i 703 ret <2 x i64> %sub 704} 705 706define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 707; CHECK-LABEL: test_vmlsl_laneq_s16: 708; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 709entry: 710 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 711 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 712 %sub = sub <4 x i32> %a, %vmull2.i 713 ret <4 x i32> %sub 714} 715 716define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 717; CHECK-LABEL: test_vmlsl_laneq_s32: 718; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 719entry: 720 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 721 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 722 %sub = sub <2 x i64> %a, %vmull2.i 723 ret <2 x i64> %sub 724} 725 726define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 727; CHECK-LABEL: test_vmlsl_high_lane_s16: 728; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 729entry: 730 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 731 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 732 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 733 %sub = sub <4 x i32> %a, %vmull2.i 734 ret <4 x i32> %sub 735} 736 737define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 738; CHECK-LABEL: test_vmlsl_high_lane_s32: 739; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 740entry: 741 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 742 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 743 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 744 %sub = sub <2 x i64> %a, %vmull2.i 745 ret <2 x i64> %sub 746} 747 748define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 749; CHECK-LABEL: test_vmlsl_high_laneq_s16: 750; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 751entry: 752 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 753 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 754 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 755 %sub = sub <4 x i32> %a, %vmull2.i 756 ret <4 x i32> %sub 757} 758 759define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 760; CHECK-LABEL: test_vmlsl_high_laneq_s32: 761; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 762entry: 763 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 764 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 765 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 766 %sub = sub <2 x i64> %a, %vmull2.i 767 ret <2 x i64> %sub 768} 769 770define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 771; CHECK-LABEL: test_vmlal_lane_u16: 772; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 773entry: 774 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 775 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 776 %add = add <4 x i32> %vmull2.i, %a 777 ret <4 x i32> %add 778} 779 780define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 781; CHECK-LABEL: test_vmlal_lane_u32: 782; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 783entry: 784 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 785 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 786 %add = add <2 x i64> %vmull2.i, %a 787 ret <2 x i64> %add 788} 789 790define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 791; CHECK-LABEL: test_vmlal_laneq_u16: 792; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 793entry: 794 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 795 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 796 %add = add <4 x i32> %vmull2.i, %a 797 ret <4 x i32> %add 798} 799 800define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 801; CHECK-LABEL: test_vmlal_laneq_u32: 802; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 803entry: 804 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 805 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 806 %add = add <2 x i64> %vmull2.i, %a 807 ret <2 x i64> %add 808} 809 810define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 811; CHECK-LABEL: test_vmlal_high_lane_u16: 812; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 813entry: 814 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 815 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 816 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 817 %add = add <4 x i32> %vmull2.i, %a 818 ret <4 x i32> %add 819} 820 821define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 822; CHECK-LABEL: test_vmlal_high_lane_u32: 823; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 824entry: 825 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 826 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 827 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 828 %add = add <2 x i64> %vmull2.i, %a 829 ret <2 x i64> %add 830} 831 832define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 833; CHECK-LABEL: test_vmlal_high_laneq_u16: 834; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 835entry: 836 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 837 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 838 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 839 %add = add <4 x i32> %vmull2.i, %a 840 ret <4 x i32> %add 841} 842 843define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 844; CHECK-LABEL: test_vmlal_high_laneq_u32: 845; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 846entry: 847 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 848 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 849 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 850 %add = add <2 x i64> %vmull2.i, %a 851 ret <2 x i64> %add 852} 853 854define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 855; CHECK-LABEL: test_vmlsl_lane_u16: 856; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 857entry: 858 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 859 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 860 %sub = sub <4 x i32> %a, %vmull2.i 861 ret <4 x i32> %sub 862} 863 864define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 865; CHECK-LABEL: test_vmlsl_lane_u32: 866; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 867entry: 868 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 869 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 870 %sub = sub <2 x i64> %a, %vmull2.i 871 ret <2 x i64> %sub 872} 873 874define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 875; CHECK-LABEL: test_vmlsl_laneq_u16: 876; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 877entry: 878 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 879 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 880 %sub = sub <4 x i32> %a, %vmull2.i 881 ret <4 x i32> %sub 882} 883 884define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 885; CHECK-LABEL: test_vmlsl_laneq_u32: 886; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 887entry: 888 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 889 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 890 %sub = sub <2 x i64> %a, %vmull2.i 891 ret <2 x i64> %sub 892} 893 894define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 895; CHECK-LABEL: test_vmlsl_high_lane_u16: 896; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 897entry: 898 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 899 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 900 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 901 %sub = sub <4 x i32> %a, %vmull2.i 902 ret <4 x i32> %sub 903} 904 905define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 906; CHECK-LABEL: test_vmlsl_high_lane_u32: 907; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 908entry: 909 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 910 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 911 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 912 %sub = sub <2 x i64> %a, %vmull2.i 913 ret <2 x i64> %sub 914} 915 916define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 917; CHECK-LABEL: test_vmlsl_high_laneq_u16: 918; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 919entry: 920 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 921 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 922 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 923 %sub = sub <4 x i32> %a, %vmull2.i 924 ret <4 x i32> %sub 925} 926 927define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 928; CHECK-LABEL: test_vmlsl_high_laneq_u32: 929; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 930entry: 931 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 932 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 933 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 934 %sub = sub <2 x i64> %a, %vmull2.i 935 ret <2 x i64> %sub 936} 937 938define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 939; CHECK-LABEL: test_vmull_lane_s16: 940; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 941entry: 942 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 943 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 944 ret <4 x i32> %vmull2.i 945} 946 947define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 948; CHECK-LABEL: test_vmull_lane_s32: 949; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 950entry: 951 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 952 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 953 ret <2 x i64> %vmull2.i 954} 955 956define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { 957; CHECK-LABEL: test_vmull_lane_u16: 958; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 959entry: 960 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 961 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 962 ret <4 x i32> %vmull2.i 963} 964 965define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { 966; CHECK-LABEL: test_vmull_lane_u32: 967; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 968entry: 969 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 970 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 971 ret <2 x i64> %vmull2.i 972} 973 974define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 975; CHECK-LABEL: test_vmull_high_lane_s16: 976; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 977entry: 978 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 979 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 980 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 981 ret <4 x i32> %vmull2.i 982} 983 984define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 985; CHECK-LABEL: test_vmull_high_lane_s32: 986; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 987entry: 988 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 989 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 990 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 991 ret <2 x i64> %vmull2.i 992} 993 994define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { 995; CHECK-LABEL: test_vmull_high_lane_u16: 996; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 997entry: 998 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 999 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1000 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1001 ret <4 x i32> %vmull2.i 1002} 1003 1004define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { 1005; CHECK-LABEL: test_vmull_high_lane_u32: 1006; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1007entry: 1008 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1009 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1010 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1011 ret <2 x i64> %vmull2.i 1012} 1013 1014define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1015; CHECK-LABEL: test_vmull_laneq_s16: 1016; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 1017entry: 1018 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1019 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1020 ret <4 x i32> %vmull2.i 1021} 1022 1023define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1024; CHECK-LABEL: test_vmull_laneq_s32: 1025; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1026entry: 1027 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1028 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1029 ret <2 x i64> %vmull2.i 1030} 1031 1032define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 1033; CHECK-LABEL: test_vmull_laneq_u16: 1034; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 1035entry: 1036 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1037 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1038 ret <4 x i32> %vmull2.i 1039} 1040 1041define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 1042; CHECK-LABEL: test_vmull_laneq_u32: 1043; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1044entry: 1045 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1046 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1047 ret <2 x i64> %vmull2.i 1048} 1049 1050define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1051; CHECK-LABEL: test_vmull_high_laneq_s16: 1052; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1053entry: 1054 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1055 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1056 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1057 ret <4 x i32> %vmull2.i 1058} 1059 1060define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1061; CHECK-LABEL: test_vmull_high_laneq_s32: 1062; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1063entry: 1064 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1065 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1066 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1067 ret <2 x i64> %vmull2.i 1068} 1069 1070define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 1071; CHECK-LABEL: test_vmull_high_laneq_u16: 1072; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1073entry: 1074 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1075 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1076 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1077 ret <4 x i32> %vmull2.i 1078} 1079 1080define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 1081; CHECK-LABEL: test_vmull_high_laneq_u32: 1082; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1083entry: 1084 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1085 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1086 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1087 ret <2 x i64> %vmull2.i 1088} 1089 1090define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1091; CHECK-LABEL: test_vqdmlal_lane_s16: 1092; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1093entry: 1094 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1095 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1096 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1097 ret <4 x i32> %vqdmlal4.i 1098} 1099 1100define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1101; CHECK-LABEL: test_vqdmlal_lane_s32: 1102; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1103entry: 1104 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1105 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1106 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1107 ret <2 x i64> %vqdmlal4.i 1108} 1109 1110define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1111; CHECK-LABEL: test_vqdmlal_high_lane_s16: 1112; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1113entry: 1114 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1115 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1116 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1117 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1118 ret <4 x i32> %vqdmlal4.i 1119} 1120 1121define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1122; CHECK-LABEL: test_vqdmlal_high_lane_s32: 1123; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1124entry: 1125 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1126 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1127 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1128 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1129 ret <2 x i64> %vqdmlal4.i 1130} 1131 1132define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1133; CHECK-LABEL: test_vqdmlsl_lane_s16: 1134; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1135entry: 1136 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1137 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1138 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1139 ret <4 x i32> %vqdmlsl4.i 1140} 1141 1142define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1143; CHECK-LABEL: test_vqdmlsl_lane_s32: 1144; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1145entry: 1146 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1147 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1148 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1149 ret <2 x i64> %vqdmlsl4.i 1150} 1151 1152define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1153; CHECK-LABEL: test_vqdmlsl_high_lane_s16: 1154; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1155entry: 1156 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1157 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1158 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1159 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1160 ret <4 x i32> %vqdmlsl4.i 1161} 1162 1163define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1164; CHECK-LABEL: test_vqdmlsl_high_lane_s32: 1165; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1166entry: 1167 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1168 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1169 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1170 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1171 ret <2 x i64> %vqdmlsl4.i 1172} 1173 1174define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1175; CHECK-LABEL: test_vqdmull_lane_s16: 1176; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1177entry: 1178 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1179 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1180 ret <4 x i32> %vqdmull2.i 1181} 1182 1183define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1184; CHECK-LABEL: test_vqdmull_lane_s32: 1185; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1186entry: 1187 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1188 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1189 ret <2 x i64> %vqdmull2.i 1190} 1191 1192define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1193; CHECK-LABEL: test_vqdmull_laneq_s16: 1194; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1195entry: 1196 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1197 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1198 ret <4 x i32> %vqdmull2.i 1199} 1200 1201define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1202; CHECK-LABEL: test_vqdmull_laneq_s32: 1203; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1204entry: 1205 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1206 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1207 ret <2 x i64> %vqdmull2.i 1208} 1209 1210define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1211; CHECK-LABEL: test_vqdmull_high_lane_s16: 1212; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1213entry: 1214 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1215 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1216 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1217 ret <4 x i32> %vqdmull2.i 1218} 1219 1220define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1221; CHECK-LABEL: test_vqdmull_high_lane_s32: 1222; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1223entry: 1224 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1225 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1226 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1227 ret <2 x i64> %vqdmull2.i 1228} 1229 1230define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1231; CHECK-LABEL: test_vqdmull_high_laneq_s16: 1232; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1233entry: 1234 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1235 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1236 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1237 ret <4 x i32> %vqdmull2.i 1238} 1239 1240define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1241; CHECK-LABEL: test_vqdmull_high_laneq_s32: 1242; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1243entry: 1244 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1245 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1246 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1247 ret <2 x i64> %vqdmull2.i 1248} 1249 1250define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1251; CHECK-LABEL: test_vqdmulh_lane_s16: 1252; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1253entry: 1254 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1255 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1256 ret <4 x i16> %vqdmulh2.i 1257} 1258 1259define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1260; CHECK-LABEL: test_vqdmulhq_lane_s16: 1261; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1262entry: 1263 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1264 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1265 ret <8 x i16> %vqdmulh2.i 1266} 1267 1268define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1269; CHECK-LABEL: test_vqdmulh_lane_s32: 1270; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1271entry: 1272 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1273 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1274 ret <2 x i32> %vqdmulh2.i 1275} 1276 1277define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1278; CHECK-LABEL: test_vqdmulhq_lane_s32: 1279; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1280entry: 1281 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1282 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1283 ret <4 x i32> %vqdmulh2.i 1284} 1285 1286define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1287; CHECK-LABEL: test_vqrdmulh_lane_s16: 1288; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1289entry: 1290 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1291 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1292 ret <4 x i16> %vqrdmulh2.i 1293} 1294 1295define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1296; CHECK-LABEL: test_vqrdmulhq_lane_s16: 1297; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1298entry: 1299 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1300 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1301 ret <8 x i16> %vqrdmulh2.i 1302} 1303 1304define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1305; CHECK-LABEL: test_vqrdmulh_lane_s32: 1306; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1307entry: 1308 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1309 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1310 ret <2 x i32> %vqrdmulh2.i 1311} 1312 1313define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1314; CHECK-LABEL: test_vqrdmulhq_lane_s32: 1315; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1316entry: 1317 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1318 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1319 ret <4 x i32> %vqrdmulh2.i 1320} 1321 1322define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { 1323; CHECK-LABEL: test_vmul_lane_f32: 1324; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1325; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] 1326; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 1327; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1328entry: 1329 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1330 %mul = fmul <2 x float> %shuffle, %a 1331 ret <2 x float> %mul 1332} 1333 1334define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { 1335; CHECK-LABEL: test_vmul_lane_f64: 1336; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 1337entry: 1338 %0 = bitcast <1 x double> %a to <8 x i8> 1339 %1 = bitcast <8 x i8> %0 to double 1340 %extract = extractelement <1 x double> %v, i32 0 1341 %2 = fmul double %1, %extract 1342 %3 = insertelement <1 x double> undef, double %2, i32 0 1343 ret <1 x double> %3 1344} 1345 1346define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { 1347; CHECK-LABEL: test_vmulq_lane_f32: 1348; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1349; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] 1350; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 1351; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1352entry: 1353 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1354 %mul = fmul <4 x float> %shuffle, %a 1355 ret <4 x float> %mul 1356} 1357 1358define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { 1359; CHECK-LABEL: test_vmulq_lane_f64: 1360; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1361; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] 1362; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1363; EXYNOSM3: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1364entry: 1365 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1366 %mul = fmul <2 x double> %shuffle, %a 1367 ret <2 x double> %mul 1368} 1369 1370define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { 1371; CHECK-LABEL: test_vmul_laneq_f32: 1372; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1373; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] 1374; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 1375; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1376entry: 1377 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1378 %mul = fmul <2 x float> %shuffle, %a 1379 ret <2 x float> %mul 1380} 1381 1382define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { 1383; CHECK-LABEL: test_vmul_laneq_f64: 1384; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 1385entry: 1386 %0 = bitcast <1 x double> %a to <8 x i8> 1387 %1 = bitcast <8 x i8> %0 to double 1388 %extract = extractelement <2 x double> %v, i32 1 1389 %2 = fmul double %1, %extract 1390 %3 = insertelement <1 x double> undef, double %2, i32 0 1391 ret <1 x double> %3 1392} 1393 1394define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1395; CHECK-LABEL: test_vmulq_laneq_f32: 1396; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1397; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] 1398; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 1399; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1400entry: 1401 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1402 %mul = fmul <4 x float> %shuffle, %a 1403 ret <4 x float> %mul 1404} 1405 1406define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1407; CHECK-LABEL: test_vmulq_laneq_f64: 1408; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1409; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] 1410; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 1411; EXYNOSM3: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1412entry: 1413 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1414 %mul = fmul <2 x double> %shuffle, %a 1415 ret <2 x double> %mul 1416} 1417 1418define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { 1419; CHECK-LABEL: test_vmulx_lane_f32: 1420; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1421; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] 1422; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 1423; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1424entry: 1425 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1426 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1427 ret <2 x float> %vmulx2.i 1428} 1429 1430define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { 1431; CHECK-LABEL: test_vmulxq_lane_f32: 1432; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1433; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] 1434; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 1435; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1436entry: 1437 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1438 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1439 ret <4 x float> %vmulx2.i 1440} 1441 1442define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { 1443; CHECK-LABEL: test_vmulxq_lane_f64: 1444; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1445; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] 1446; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 1447; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1448entry: 1449 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1450 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1451 ret <2 x double> %vmulx2.i 1452} 1453 1454define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { 1455; CHECK-LABEL: test_vmulx_laneq_f32: 1456; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1457; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] 1458; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 1459; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1460entry: 1461 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1462 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1463 ret <2 x float> %vmulx2.i 1464} 1465 1466define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1467; CHECK-LABEL: test_vmulxq_laneq_f32: 1468; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1469; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] 1470; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 1471; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1472entry: 1473 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1474 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1475 ret <4 x float> %vmulx2.i 1476} 1477 1478define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1479; CHECK-LABEL: test_vmulxq_laneq_f64: 1480; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1481; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] 1482; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 1483; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1484entry: 1485 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1486 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1487 ret <2 x double> %vmulx2.i 1488} 1489 1490define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1491; CHECK-LABEL: test_vmla_lane_s16_0: 1492; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1493entry: 1494 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1495 %mul = mul <4 x i16> %shuffle, %b 1496 %add = add <4 x i16> %mul, %a 1497 ret <4 x i16> %add 1498} 1499 1500define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1501; CHECK-LABEL: test_vmlaq_lane_s16_0: 1502; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1503entry: 1504 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1505 %mul = mul <8 x i16> %shuffle, %b 1506 %add = add <8 x i16> %mul, %a 1507 ret <8 x i16> %add 1508} 1509 1510define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1511; CHECK-LABEL: test_vmla_lane_s32_0: 1512; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1513entry: 1514 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1515 %mul = mul <2 x i32> %shuffle, %b 1516 %add = add <2 x i32> %mul, %a 1517 ret <2 x i32> %add 1518} 1519 1520define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1521; CHECK-LABEL: test_vmlaq_lane_s32_0: 1522; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1523entry: 1524 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1525 %mul = mul <4 x i32> %shuffle, %b 1526 %add = add <4 x i32> %mul, %a 1527 ret <4 x i32> %add 1528} 1529 1530define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 1531; CHECK-LABEL: test_vmla_laneq_s16_0: 1532; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1533entry: 1534 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1535 %mul = mul <4 x i16> %shuffle, %b 1536 %add = add <4 x i16> %mul, %a 1537 ret <4 x i16> %add 1538} 1539 1540define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 1541; CHECK-LABEL: test_vmlaq_laneq_s16_0: 1542; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1543entry: 1544 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1545 %mul = mul <8 x i16> %shuffle, %b 1546 %add = add <8 x i16> %mul, %a 1547 ret <8 x i16> %add 1548} 1549 1550define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 1551; CHECK-LABEL: test_vmla_laneq_s32_0: 1552; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1553entry: 1554 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1555 %mul = mul <2 x i32> %shuffle, %b 1556 %add = add <2 x i32> %mul, %a 1557 ret <2 x i32> %add 1558} 1559 1560define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 1561; CHECK-LABEL: test_vmlaq_laneq_s32_0: 1562; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1563entry: 1564 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1565 %mul = mul <4 x i32> %shuffle, %b 1566 %add = add <4 x i32> %mul, %a 1567 ret <4 x i32> %add 1568} 1569 1570define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1571; CHECK-LABEL: test_vmls_lane_s16_0: 1572; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1573entry: 1574 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1575 %mul = mul <4 x i16> %shuffle, %b 1576 %sub = sub <4 x i16> %a, %mul 1577 ret <4 x i16> %sub 1578} 1579 1580define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1581; CHECK-LABEL: test_vmlsq_lane_s16_0: 1582; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1583entry: 1584 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1585 %mul = mul <8 x i16> %shuffle, %b 1586 %sub = sub <8 x i16> %a, %mul 1587 ret <8 x i16> %sub 1588} 1589 1590define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1591; CHECK-LABEL: test_vmls_lane_s32_0: 1592; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1593entry: 1594 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1595 %mul = mul <2 x i32> %shuffle, %b 1596 %sub = sub <2 x i32> %a, %mul 1597 ret <2 x i32> %sub 1598} 1599 1600define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1601; CHECK-LABEL: test_vmlsq_lane_s32_0: 1602; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1603entry: 1604 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1605 %mul = mul <4 x i32> %shuffle, %b 1606 %sub = sub <4 x i32> %a, %mul 1607 ret <4 x i32> %sub 1608} 1609 1610define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 1611; CHECK-LABEL: test_vmls_laneq_s16_0: 1612; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1613entry: 1614 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1615 %mul = mul <4 x i16> %shuffle, %b 1616 %sub = sub <4 x i16> %a, %mul 1617 ret <4 x i16> %sub 1618} 1619 1620define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 1621; CHECK-LABEL: test_vmlsq_laneq_s16_0: 1622; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1623entry: 1624 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1625 %mul = mul <8 x i16> %shuffle, %b 1626 %sub = sub <8 x i16> %a, %mul 1627 ret <8 x i16> %sub 1628} 1629 1630define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 1631; CHECK-LABEL: test_vmls_laneq_s32_0: 1632; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1633entry: 1634 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1635 %mul = mul <2 x i32> %shuffle, %b 1636 %sub = sub <2 x i32> %a, %mul 1637 ret <2 x i32> %sub 1638} 1639 1640define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 1641; CHECK-LABEL: test_vmlsq_laneq_s32_0: 1642; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1643entry: 1644 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1645 %mul = mul <4 x i32> %shuffle, %b 1646 %sub = sub <4 x i32> %a, %mul 1647 ret <4 x i32> %sub 1648} 1649 1650define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 1651; CHECK-LABEL: test_vmul_lane_s16_0: 1652; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1653entry: 1654 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1655 %mul = mul <4 x i16> %shuffle, %a 1656 ret <4 x i16> %mul 1657} 1658 1659define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 1660; CHECK-LABEL: test_vmulq_lane_s16_0: 1661; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1662entry: 1663 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1664 %mul = mul <8 x i16> %shuffle, %a 1665 ret <8 x i16> %mul 1666} 1667 1668define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 1669; CHECK-LABEL: test_vmul_lane_s32_0: 1670; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1671entry: 1672 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1673 %mul = mul <2 x i32> %shuffle, %a 1674 ret <2 x i32> %mul 1675} 1676 1677define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 1678; CHECK-LABEL: test_vmulq_lane_s32_0: 1679; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1680entry: 1681 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1682 %mul = mul <4 x i32> %shuffle, %a 1683 ret <4 x i32> %mul 1684} 1685 1686define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 1687; CHECK-LABEL: test_vmul_lane_u16_0: 1688; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1689entry: 1690 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1691 %mul = mul <4 x i16> %shuffle, %a 1692 ret <4 x i16> %mul 1693} 1694 1695define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 1696; CHECK-LABEL: test_vmulq_lane_u16_0: 1697; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1698entry: 1699 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1700 %mul = mul <8 x i16> %shuffle, %a 1701 ret <8 x i16> %mul 1702} 1703 1704define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 1705; CHECK-LABEL: test_vmul_lane_u32_0: 1706; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1707entry: 1708 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1709 %mul = mul <2 x i32> %shuffle, %a 1710 ret <2 x i32> %mul 1711} 1712 1713define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 1714; CHECK-LABEL: test_vmulq_lane_u32_0: 1715; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1716entry: 1717 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1718 %mul = mul <4 x i32> %shuffle, %a 1719 ret <4 x i32> %mul 1720} 1721 1722define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 1723; CHECK-LABEL: test_vmul_laneq_s16_0: 1724; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1725entry: 1726 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1727 %mul = mul <4 x i16> %shuffle, %a 1728 ret <4 x i16> %mul 1729} 1730 1731define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 1732; CHECK-LABEL: test_vmulq_laneq_s16_0: 1733; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1734entry: 1735 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1736 %mul = mul <8 x i16> %shuffle, %a 1737 ret <8 x i16> %mul 1738} 1739 1740define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 1741; CHECK-LABEL: test_vmul_laneq_s32_0: 1742; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1743entry: 1744 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1745 %mul = mul <2 x i32> %shuffle, %a 1746 ret <2 x i32> %mul 1747} 1748 1749define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 1750; CHECK-LABEL: test_vmulq_laneq_s32_0: 1751; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1752entry: 1753 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1754 %mul = mul <4 x i32> %shuffle, %a 1755 ret <4 x i32> %mul 1756} 1757 1758define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 1759; CHECK-LABEL: test_vmul_laneq_u16_0: 1760; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1761entry: 1762 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1763 %mul = mul <4 x i16> %shuffle, %a 1764 ret <4 x i16> %mul 1765} 1766 1767define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 1768; CHECK-LABEL: test_vmulq_laneq_u16_0: 1769; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1770entry: 1771 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1772 %mul = mul <8 x i16> %shuffle, %a 1773 ret <8 x i16> %mul 1774} 1775 1776define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 1777; CHECK-LABEL: test_vmul_laneq_u32_0: 1778; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1779entry: 1780 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1781 %mul = mul <2 x i32> %shuffle, %a 1782 ret <2 x i32> %mul 1783} 1784 1785define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 1786; CHECK-LABEL: test_vmulq_laneq_u32_0: 1787; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1788entry: 1789 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1790 %mul = mul <4 x i32> %shuffle, %a 1791 ret <4 x i32> %mul 1792} 1793 1794define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 1795; CHECK-LABEL: test_vfma_lane_f32_0: 1796; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1797; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] 1798; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 1799; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1800entry: 1801 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 1802 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1803 ret <2 x float> %0 1804} 1805 1806define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 1807; CHECK-LABEL: test_vfmaq_lane_f32_0: 1808; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1809; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] 1810; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 1811; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1812entry: 1813 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 1814 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1815 ret <4 x float> %0 1816} 1817 1818define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 1819; CHECK-LABEL: test_vfma_laneq_f32_0: 1820; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1821; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] 1822; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 1823; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1824entry: 1825 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 1826 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1827 ret <2 x float> %0 1828} 1829 1830define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 1831; CHECK-LABEL: test_vfmaq_laneq_f32_0: 1832; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1833; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] 1834; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 1835; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1836entry: 1837 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 1838 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1839 ret <4 x float> %0 1840} 1841 1842define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 1843; CHECK-LABEL: test_vfms_lane_f32_0: 1844; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1845; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] 1846; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 1847; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1848entry: 1849 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 1850 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer 1851 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1852 ret <2 x float> %0 1853} 1854 1855define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 1856; CHECK-LABEL: test_vfmsq_lane_f32_0: 1857; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1858; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] 1859; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 1860; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1861entry: 1862 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 1863 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer 1864 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1865 ret <4 x float> %0 1866} 1867 1868define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 1869; CHECK-LABEL: test_vfms_laneq_f32_0: 1870; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1871; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] 1872; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 1873; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1874entry: 1875 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 1876 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer 1877 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1878 ret <2 x float> %0 1879} 1880 1881define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 1882; CHECK-LABEL: test_vfmsq_laneq_f32_0: 1883; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1884; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] 1885; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 1886; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1887entry: 1888 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 1889 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer 1890 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1891 ret <4 x float> %0 1892} 1893 1894define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 1895; CHECK-LABEL: test_vfmaq_laneq_f64_0: 1896; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1897; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] 1898; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 1899; EXYNOSM3: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1900entry: 1901 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 1902 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 1903 ret <2 x double> %0 1904} 1905 1906define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 1907; CHECK-LABEL: test_vfmsq_laneq_f64_0: 1908; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1909; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] 1910; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 1911; EXYNOSM3: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1912entry: 1913 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 1914 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer 1915 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 1916 ret <2 x double> %0 1917} 1918 1919define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1920; CHECK-LABEL: test_vmlal_lane_s16_0: 1921; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1922entry: 1923 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1924 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1925 %add = add <4 x i32> %vmull2.i, %a 1926 ret <4 x i32> %add 1927} 1928 1929define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1930; CHECK-LABEL: test_vmlal_lane_s32_0: 1931; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1932entry: 1933 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1934 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1935 %add = add <2 x i64> %vmull2.i, %a 1936 ret <2 x i64> %add 1937} 1938 1939define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 1940; CHECK-LABEL: test_vmlal_laneq_s16_0: 1941; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1942entry: 1943 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1944 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1945 %add = add <4 x i32> %vmull2.i, %a 1946 ret <4 x i32> %add 1947} 1948 1949define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 1950; CHECK-LABEL: test_vmlal_laneq_s32_0: 1951; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1952entry: 1953 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1954 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1955 %add = add <2 x i64> %vmull2.i, %a 1956 ret <2 x i64> %add 1957} 1958 1959define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1960; CHECK-LABEL: test_vmlal_high_lane_s16_0: 1961; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1962entry: 1963 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1964 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1965 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1966 %add = add <4 x i32> %vmull2.i, %a 1967 ret <4 x i32> %add 1968} 1969 1970define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1971; CHECK-LABEL: test_vmlal_high_lane_s32_0: 1972; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1973entry: 1974 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1975 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1976 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1977 %add = add <2 x i64> %vmull2.i, %a 1978 ret <2 x i64> %add 1979} 1980 1981define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 1982; CHECK-LABEL: test_vmlal_high_laneq_s16_0: 1983; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1984entry: 1985 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1986 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1987 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1988 %add = add <4 x i32> %vmull2.i, %a 1989 ret <4 x i32> %add 1990} 1991 1992define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 1993; CHECK-LABEL: test_vmlal_high_laneq_s32_0: 1994; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1995entry: 1996 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1997 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1998 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1999 %add = add <2 x i64> %vmull2.i, %a 2000 ret <2 x i64> %add 2001} 2002 2003define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2004; CHECK-LABEL: test_vmlsl_lane_s16_0: 2005; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2006entry: 2007 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2008 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2009 %sub = sub <4 x i32> %a, %vmull2.i 2010 ret <4 x i32> %sub 2011} 2012 2013define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2014; CHECK-LABEL: test_vmlsl_lane_s32_0: 2015; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2016entry: 2017 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2018 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2019 %sub = sub <2 x i64> %a, %vmull2.i 2020 ret <2 x i64> %sub 2021} 2022 2023define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2024; CHECK-LABEL: test_vmlsl_laneq_s16_0: 2025; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2026entry: 2027 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2028 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2029 %sub = sub <4 x i32> %a, %vmull2.i 2030 ret <4 x i32> %sub 2031} 2032 2033define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2034; CHECK-LABEL: test_vmlsl_laneq_s32_0: 2035; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2036entry: 2037 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2038 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2039 %sub = sub <2 x i64> %a, %vmull2.i 2040 ret <2 x i64> %sub 2041} 2042 2043define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2044; CHECK-LABEL: test_vmlsl_high_lane_s16_0: 2045; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2046entry: 2047 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2048 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2049 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2050 %sub = sub <4 x i32> %a, %vmull2.i 2051 ret <4 x i32> %sub 2052} 2053 2054define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2055; CHECK-LABEL: test_vmlsl_high_lane_s32_0: 2056; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2057entry: 2058 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2059 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2060 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2061 %sub = sub <2 x i64> %a, %vmull2.i 2062 ret <2 x i64> %sub 2063} 2064 2065define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2066; CHECK-LABEL: test_vmlsl_high_laneq_s16_0: 2067; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2068entry: 2069 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2070 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2071 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2072 %sub = sub <4 x i32> %a, %vmull2.i 2073 ret <4 x i32> %sub 2074} 2075 2076define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2077; CHECK-LABEL: test_vmlsl_high_laneq_s32_0: 2078; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2079entry: 2080 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2081 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2082 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2083 %sub = sub <2 x i64> %a, %vmull2.i 2084 ret <2 x i64> %sub 2085} 2086 2087define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2088; CHECK-LABEL: test_vmlal_lane_u16_0: 2089; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2090entry: 2091 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2092 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2093 %add = add <4 x i32> %vmull2.i, %a 2094 ret <4 x i32> %add 2095} 2096 2097define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2098; CHECK-LABEL: test_vmlal_lane_u32_0: 2099; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2100entry: 2101 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2102 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2103 %add = add <2 x i64> %vmull2.i, %a 2104 ret <2 x i64> %add 2105} 2106 2107define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2108; CHECK-LABEL: test_vmlal_laneq_u16_0: 2109; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2110entry: 2111 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2112 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2113 %add = add <4 x i32> %vmull2.i, %a 2114 ret <4 x i32> %add 2115} 2116 2117define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2118; CHECK-LABEL: test_vmlal_laneq_u32_0: 2119; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2120entry: 2121 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2122 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2123 %add = add <2 x i64> %vmull2.i, %a 2124 ret <2 x i64> %add 2125} 2126 2127define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2128; CHECK-LABEL: test_vmlal_high_lane_u16_0: 2129; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2130entry: 2131 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2132 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2133 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2134 %add = add <4 x i32> %vmull2.i, %a 2135 ret <4 x i32> %add 2136} 2137 2138define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2139; CHECK-LABEL: test_vmlal_high_lane_u32_0: 2140; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2141entry: 2142 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2143 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2144 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2145 %add = add <2 x i64> %vmull2.i, %a 2146 ret <2 x i64> %add 2147} 2148 2149define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2150; CHECK-LABEL: test_vmlal_high_laneq_u16_0: 2151; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2152entry: 2153 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2154 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2155 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2156 %add = add <4 x i32> %vmull2.i, %a 2157 ret <4 x i32> %add 2158} 2159 2160define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2161; CHECK-LABEL: test_vmlal_high_laneq_u32_0: 2162; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2163entry: 2164 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2165 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2166 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2167 %add = add <2 x i64> %vmull2.i, %a 2168 ret <2 x i64> %add 2169} 2170 2171define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2172; CHECK-LABEL: test_vmlsl_lane_u16_0: 2173; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2174entry: 2175 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2176 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2177 %sub = sub <4 x i32> %a, %vmull2.i 2178 ret <4 x i32> %sub 2179} 2180 2181define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2182; CHECK-LABEL: test_vmlsl_lane_u32_0: 2183; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2184entry: 2185 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2186 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2187 %sub = sub <2 x i64> %a, %vmull2.i 2188 ret <2 x i64> %sub 2189} 2190 2191define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2192; CHECK-LABEL: test_vmlsl_laneq_u16_0: 2193; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2194entry: 2195 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2196 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2197 %sub = sub <4 x i32> %a, %vmull2.i 2198 ret <4 x i32> %sub 2199} 2200 2201define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2202; CHECK-LABEL: test_vmlsl_laneq_u32_0: 2203; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2204entry: 2205 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2206 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2207 %sub = sub <2 x i64> %a, %vmull2.i 2208 ret <2 x i64> %sub 2209} 2210 2211define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2212; CHECK-LABEL: test_vmlsl_high_lane_u16_0: 2213; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2214entry: 2215 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2216 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2217 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2218 %sub = sub <4 x i32> %a, %vmull2.i 2219 ret <4 x i32> %sub 2220} 2221 2222define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2223; CHECK-LABEL: test_vmlsl_high_lane_u32_0: 2224; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2225entry: 2226 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2227 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2228 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2229 %sub = sub <2 x i64> %a, %vmull2.i 2230 ret <2 x i64> %sub 2231} 2232 2233define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2234; CHECK-LABEL: test_vmlsl_high_laneq_u16_0: 2235; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2236entry: 2237 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2238 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2239 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2240 %sub = sub <4 x i32> %a, %vmull2.i 2241 ret <4 x i32> %sub 2242} 2243 2244define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2245; CHECK-LABEL: test_vmlsl_high_laneq_u32_0: 2246; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2247entry: 2248 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2249 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2250 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2251 %sub = sub <2 x i64> %a, %vmull2.i 2252 ret <2 x i64> %sub 2253} 2254 2255define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2256; CHECK-LABEL: test_vmull_lane_s16_0: 2257; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2258entry: 2259 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2260 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2261 ret <4 x i32> %vmull2.i 2262} 2263 2264define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2265; CHECK-LABEL: test_vmull_lane_s32_0: 2266; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2267entry: 2268 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2269 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2270 ret <2 x i64> %vmull2.i 2271} 2272 2273define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 2274; CHECK-LABEL: test_vmull_lane_u16_0: 2275; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2276entry: 2277 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2278 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2279 ret <4 x i32> %vmull2.i 2280} 2281 2282define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 2283; CHECK-LABEL: test_vmull_lane_u32_0: 2284; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2285entry: 2286 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2287 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2288 ret <2 x i64> %vmull2.i 2289} 2290 2291define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2292; CHECK-LABEL: test_vmull_high_lane_s16_0: 2293; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2294entry: 2295 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2296 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2297 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2298 ret <4 x i32> %vmull2.i 2299} 2300 2301define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2302; CHECK-LABEL: test_vmull_high_lane_s32_0: 2303; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2304entry: 2305 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2306 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2307 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2308 ret <2 x i64> %vmull2.i 2309} 2310 2311define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 2312; CHECK-LABEL: test_vmull_high_lane_u16_0: 2313; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2314entry: 2315 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2316 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2317 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2318 ret <4 x i32> %vmull2.i 2319} 2320 2321define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 2322; CHECK-LABEL: test_vmull_high_lane_u32_0: 2323; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2324entry: 2325 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2326 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2327 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2328 ret <2 x i64> %vmull2.i 2329} 2330 2331define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2332; CHECK-LABEL: test_vmull_laneq_s16_0: 2333; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2334entry: 2335 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2336 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2337 ret <4 x i32> %vmull2.i 2338} 2339 2340define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2341; CHECK-LABEL: test_vmull_laneq_s32_0: 2342; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2343entry: 2344 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2345 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2346 ret <2 x i64> %vmull2.i 2347} 2348 2349define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 2350; CHECK-LABEL: test_vmull_laneq_u16_0: 2351; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2352entry: 2353 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2354 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2355 ret <4 x i32> %vmull2.i 2356} 2357 2358define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 2359; CHECK-LABEL: test_vmull_laneq_u32_0: 2360; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2361entry: 2362 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2363 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2364 ret <2 x i64> %vmull2.i 2365} 2366 2367define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2368; CHECK-LABEL: test_vmull_high_laneq_s16_0: 2369; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2370entry: 2371 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2372 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2373 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2374 ret <4 x i32> %vmull2.i 2375} 2376 2377define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2378; CHECK-LABEL: test_vmull_high_laneq_s32_0: 2379; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2380entry: 2381 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2382 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2383 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2384 ret <2 x i64> %vmull2.i 2385} 2386 2387define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 2388; CHECK-LABEL: test_vmull_high_laneq_u16_0: 2389; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2390entry: 2391 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2392 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2393 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2394 ret <4 x i32> %vmull2.i 2395} 2396 2397define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 2398; CHECK-LABEL: test_vmull_high_laneq_u32_0: 2399; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2400entry: 2401 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2402 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2403 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2404 ret <2 x i64> %vmull2.i 2405} 2406 2407define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2408; CHECK-LABEL: test_vqdmlal_lane_s16_0: 2409; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2410entry: 2411 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2412 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2413 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2414 ret <4 x i32> %vqdmlal4.i 2415} 2416 2417define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2418; CHECK-LABEL: test_vqdmlal_lane_s32_0: 2419; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2420entry: 2421 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2422 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2423 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2424 ret <2 x i64> %vqdmlal4.i 2425} 2426 2427define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2428; CHECK-LABEL: test_vqdmlal_high_lane_s16_0: 2429; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2430entry: 2431 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2432 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2433 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2434 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2435 ret <4 x i32> %vqdmlal4.i 2436} 2437 2438define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2439; CHECK-LABEL: test_vqdmlal_high_lane_s32_0: 2440; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2441entry: 2442 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2443 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2444 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2445 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2446 ret <2 x i64> %vqdmlal4.i 2447} 2448 2449define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2450; CHECK-LABEL: test_vqdmlsl_lane_s16_0: 2451; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2452entry: 2453 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2454 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2455 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 2456 ret <4 x i32> %vqdmlsl4.i 2457} 2458 2459define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2460; CHECK-LABEL: test_vqdmlsl_lane_s32_0: 2461; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2462entry: 2463 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2464 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2465 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 2466 ret <2 x i64> %vqdmlsl4.i 2467} 2468 2469define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2470; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0: 2471; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2472entry: 2473 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2474 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2475 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2476 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 2477 ret <4 x i32> %vqdmlsl4.i 2478} 2479 2480define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2481; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0: 2482; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2483entry: 2484 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2485 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2486 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2487 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 2488 ret <2 x i64> %vqdmlsl4.i 2489} 2490 2491define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2492; CHECK-LABEL: test_vqdmull_lane_s16_0: 2493; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2494entry: 2495 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2496 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2497 ret <4 x i32> %vqdmull2.i 2498} 2499 2500define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2501; CHECK-LABEL: test_vqdmull_lane_s32_0: 2502; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2503entry: 2504 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2505 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2506 ret <2 x i64> %vqdmull2.i 2507} 2508 2509define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2510; CHECK-LABEL: test_vqdmull_laneq_s16_0: 2511; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2512entry: 2513 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2514 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2515 ret <4 x i32> %vqdmull2.i 2516} 2517 2518define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2519; CHECK-LABEL: test_vqdmull_laneq_s32_0: 2520; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2521entry: 2522 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2523 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2524 ret <2 x i64> %vqdmull2.i 2525} 2526 2527define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2528; CHECK-LABEL: test_vqdmull_high_lane_s16_0: 2529; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2530entry: 2531 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2532 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2533 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2534 ret <4 x i32> %vqdmull2.i 2535} 2536 2537define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2538; CHECK-LABEL: test_vqdmull_high_lane_s32_0: 2539; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2540entry: 2541 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2542 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2543 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2544 ret <2 x i64> %vqdmull2.i 2545} 2546 2547define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2548; CHECK-LABEL: test_vqdmull_high_laneq_s16_0: 2549; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2550entry: 2551 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2552 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2553 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2554 ret <4 x i32> %vqdmull2.i 2555} 2556 2557define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2558; CHECK-LABEL: test_vqdmull_high_laneq_s32_0: 2559; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2560entry: 2561 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2562 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2563 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2564 ret <2 x i64> %vqdmull2.i 2565} 2566 2567define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2568; CHECK-LABEL: test_vqdmulh_lane_s16_0: 2569; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2570entry: 2571 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2572 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 2573 ret <4 x i16> %vqdmulh2.i 2574} 2575 2576define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2577; CHECK-LABEL: test_vqdmulhq_lane_s16_0: 2578; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2579entry: 2580 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2581 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 2582 ret <8 x i16> %vqdmulh2.i 2583} 2584 2585define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2586; CHECK-LABEL: test_vqdmulh_lane_s32_0: 2587; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2588entry: 2589 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2590 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 2591 ret <2 x i32> %vqdmulh2.i 2592} 2593 2594define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2595; CHECK-LABEL: test_vqdmulhq_lane_s32_0: 2596; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2597entry: 2598 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2599 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 2600 ret <4 x i32> %vqdmulh2.i 2601} 2602 2603define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2604; CHECK-LABEL: test_vqrdmulh_lane_s16_0: 2605; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2606entry: 2607 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2608 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 2609 ret <4 x i16> %vqrdmulh2.i 2610} 2611 2612define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2613; CHECK-LABEL: test_vqrdmulhq_lane_s16_0: 2614; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2615entry: 2616 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2617 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 2618 ret <8 x i16> %vqrdmulh2.i 2619} 2620 2621define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2622; CHECK-LABEL: test_vqrdmulh_lane_s32_0: 2623; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2624entry: 2625 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2626 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 2627 ret <2 x i32> %vqrdmulh2.i 2628} 2629 2630define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2631; CHECK-LABEL: test_vqrdmulhq_lane_s32_0: 2632; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2633entry: 2634 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2635 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 2636 ret <4 x i32> %vqrdmulh2.i 2637} 2638 2639define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { 2640; CHECK-LABEL: test_vmul_lane_f32_0: 2641; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2642; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] 2643; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 2644; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2645entry: 2646 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 2647 %mul = fmul <2 x float> %shuffle, %a 2648 ret <2 x float> %mul 2649} 2650 2651define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 2652; CHECK-LABEL: test_vmulq_lane_f32_0: 2653; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2654; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] 2655; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 2656; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2657entry: 2658 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 2659 %mul = fmul <4 x float> %shuffle, %a 2660 ret <4 x float> %mul 2661} 2662 2663define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 2664; CHECK-LABEL: test_vmul_laneq_f32_0: 2665; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2666; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] 2667; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 2668; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2669entry: 2670 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 2671 %mul = fmul <2 x float> %shuffle, %a 2672 ret <2 x float> %mul 2673} 2674 2675define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { 2676; CHECK-LABEL: test_vmul_laneq_f64_0: 2677; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] 2678entry: 2679 %0 = bitcast <1 x double> %a to <8 x i8> 2680 %1 = bitcast <8 x i8> %0 to double 2681 %extract = extractelement <2 x double> %v, i32 0 2682 %2 = fmul double %1, %extract 2683 %3 = insertelement <1 x double> undef, double %2, i32 0 2684 ret <1 x double> %3 2685} 2686 2687define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 2688; CHECK-LABEL: test_vmulq_laneq_f32_0: 2689; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2690; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] 2691; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 2692; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2693entry: 2694 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 2695 %mul = fmul <4 x float> %shuffle, %a 2696 ret <4 x float> %mul 2697} 2698 2699define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 2700; CHECK-LABEL: test_vmulq_laneq_f64_0: 2701; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2702; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] 2703; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 2704; EXYNOSM3: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2705entry: 2706 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 2707 %mul = fmul <2 x double> %shuffle, %a 2708 ret <2 x double> %mul 2709} 2710 2711define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { 2712; CHECK-LABEL: test_vmulx_lane_f32_0: 2713; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2714; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] 2715; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 2716; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2717entry: 2718 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 2719 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 2720 ret <2 x float> %vmulx2.i 2721} 2722 2723define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 2724; CHECK-LABEL: test_vmulxq_lane_f32_0: 2725; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2726; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] 2727; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 2728; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2729entry: 2730 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 2731 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 2732 ret <4 x float> %vmulx2.i 2733} 2734 2735define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { 2736; CHECK-LABEL: test_vmulxq_lane_f64_0: 2737; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2738; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] 2739; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 2740; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2741entry: 2742 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 2743 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 2744 ret <2 x double> %vmulx2.i 2745} 2746 2747define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 2748; CHECK-LABEL: test_vmulx_laneq_f32_0: 2749; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2750; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] 2751; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 2752; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2753entry: 2754 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 2755 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 2756 ret <2 x float> %vmulx2.i 2757} 2758 2759define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 2760; CHECK-LABEL: test_vmulxq_laneq_f32_0: 2761; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2762; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] 2763; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 2764; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2765entry: 2766 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 2767 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 2768 ret <4 x float> %vmulx2.i 2769} 2770 2771define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 2772; CHECK-LABEL: test_vmulxq_laneq_f64_0: 2773; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2774; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] 2775; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d 2776; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2777entry: 2778 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 2779 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 2780 ret <2 x double> %vmulx2.i 2781} 2782 2783define <4 x float> @optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) { 2784; CHECK-LABEL: optimize_dup: 2785; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 2786; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 2787; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] 2788; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 2789; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 2790; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 2791; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 2792entry: 2793 %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 2794 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) 2795 %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 2796 %1 = fmul <4 x float> %lane2, %c 2797 %s = fsub <4 x float> %0, %1 2798 ret <4 x float> %s 2799} 2800 2801define <4 x float> @no_optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) { 2802; CHECK-LABEL: no_optimize_dup: 2803; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 2804; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 2805; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] 2806; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s 2807; EXYNOSM1: dup [[W:v[0-9]+]].4s, {{v[0-9]+}}.s[1] 2808; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[W]].4s 2809; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 2810; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 2811entry: 2812 %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 2813 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) 2814 %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2815 %1 = fmul <4 x float> %lane2, %c 2816 %s = fsub <4 x float> %0, %1 2817 ret <4 x float> %s 2818} 2819 2820define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_a57(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="cortex-a57" { 2821; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_a57: 2822; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 2823entry: 2824 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 2825 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 2826 ret <2 x float> %0 2827} 2828 2829define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_m1(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="exynos-m1" { 2830; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m1: 2831; GENERIC: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] 2832; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s 2833entry: 2834 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 2835 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 2836 ret <2 x float> %0 2837} 2838 2839define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_m3(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="exynos-m3" { 2840; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m3: 2841; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 2842entry: 2843 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 2844 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 2845 ret <2 x float> %0 2846} 2847