1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) 4 5declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) 6 7declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) 8 9declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) 10 11declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) 12 13declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) 14 15declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) 16 17declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) 18 19declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) 20 21declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) 22 23declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) 24 25declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) 26 27declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) 28 29declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 30 31declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 32 33declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 34 35declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 36 37declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) 38 39declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) 40 41declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) 42 43declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) 44 45define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 46; CHECK-LABEL: test_vmla_lane_s16: 47; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 48; CHECK-NEXT: ret 49entry: 50 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 51 %mul = mul <4 x i16> %shuffle, %b 52 %add = add <4 x i16> %mul, %a 53 ret <4 x i16> %add 54} 55 56define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 57; CHECK-LABEL: test_vmlaq_lane_s16: 58; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 59; CHECK-NEXT: ret 60entry: 61 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 62 %mul = mul <8 x i16> %shuffle, %b 63 %add = add <8 x i16> %mul, %a 64 ret <8 x i16> %add 65} 66 67define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 68; CHECK-LABEL: test_vmla_lane_s32: 69; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 70; CHECK-NEXT: ret 71entry: 72 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 73 %mul = mul <2 x i32> %shuffle, %b 74 %add = add <2 x i32> %mul, %a 75 ret <2 x i32> %add 76} 77 78define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 79; CHECK-LABEL: test_vmlaq_lane_s32: 80; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 81; CHECK-NEXT: ret 82entry: 83 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 84 %mul = mul <4 x i32> %shuffle, %b 85 %add = add <4 x i32> %mul, %a 86 ret <4 x i32> %add 87} 88 89define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 90; CHECK-LABEL: test_vmla_laneq_s16: 91; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 92; CHECK-NEXT: ret 93entry: 94 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 95 %mul = mul <4 x i16> %shuffle, %b 96 %add = add <4 x i16> %mul, %a 97 ret <4 x i16> %add 98} 99 100define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 101; CHECK-LABEL: test_vmlaq_laneq_s16: 102; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 103; CHECK-NEXT: ret 104entry: 105 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 106 %mul = mul <8 x i16> %shuffle, %b 107 %add = add <8 x i16> %mul, %a 108 ret <8 x i16> %add 109} 110 111define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 112; CHECK-LABEL: test_vmla_laneq_s32: 113; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 114; CHECK-NEXT: ret 115entry: 116 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 117 %mul = mul <2 x i32> %shuffle, %b 118 %add = add <2 x i32> %mul, %a 119 ret <2 x i32> %add 120} 121 122define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 123; CHECK-LABEL: test_vmlaq_laneq_s32: 124; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 125; CHECK-NEXT: ret 126entry: 127 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 128 %mul = mul <4 x i32> %shuffle, %b 129 %add = add <4 x i32> %mul, %a 130 ret <4 x i32> %add 131} 132 133define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 134; CHECK-LABEL: test_vmls_lane_s16: 135; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 136; CHECK-NEXT: ret 137entry: 138 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 139 %mul = mul <4 x i16> %shuffle, %b 140 %sub = sub <4 x i16> %a, %mul 141 ret <4 x i16> %sub 142} 143 144define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 145; CHECK-LABEL: test_vmlsq_lane_s16: 146; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 147; CHECK-NEXT: ret 148entry: 149 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 150 %mul = mul <8 x i16> %shuffle, %b 151 %sub = sub <8 x i16> %a, %mul 152 ret <8 x i16> %sub 153} 154 155define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 156; CHECK-LABEL: test_vmls_lane_s32: 157; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 158; CHECK-NEXT: ret 159entry: 160 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 161 %mul = mul <2 x i32> %shuffle, %b 162 %sub = sub <2 x i32> %a, %mul 163 ret <2 x i32> %sub 164} 165 166define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 167; CHECK-LABEL: test_vmlsq_lane_s32: 168; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 169; CHECK-NEXT: ret 170entry: 171 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 172 %mul = mul <4 x i32> %shuffle, %b 173 %sub = sub <4 x i32> %a, %mul 174 ret <4 x i32> %sub 175} 176 177define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 178; CHECK-LABEL: test_vmls_laneq_s16: 179; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 180; CHECK-NEXT: ret 181entry: 182 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 183 %mul = mul <4 x i16> %shuffle, %b 184 %sub = sub <4 x i16> %a, %mul 185 ret <4 x i16> %sub 186} 187 188define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 189; CHECK-LABEL: test_vmlsq_laneq_s16: 190; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 191; CHECK-NEXT: ret 192entry: 193 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 194 %mul = mul <8 x i16> %shuffle, %b 195 %sub = sub <8 x i16> %a, %mul 196 ret <8 x i16> %sub 197} 198 199define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 200; CHECK-LABEL: test_vmls_laneq_s32: 201; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 202; CHECK-NEXT: ret 203entry: 204 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 205 %mul = mul <2 x i32> %shuffle, %b 206 %sub = sub <2 x i32> %a, %mul 207 ret <2 x i32> %sub 208} 209 210define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 211; CHECK-LABEL: test_vmlsq_laneq_s32: 212; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 213; CHECK-NEXT: ret 214entry: 215 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 216 %mul = mul <4 x i32> %shuffle, %b 217 %sub = sub <4 x i32> %a, %mul 218 ret <4 x i32> %sub 219} 220 221define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { 222; CHECK-LABEL: test_vmul_lane_s16: 223; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 224; CHECK-NEXT: ret 225entry: 226 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 227 %mul = mul <4 x i16> %shuffle, %a 228 ret <4 x i16> %mul 229} 230 231define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 232; CHECK-LABEL: test_vmulq_lane_s16: 233; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 234; CHECK-NEXT: ret 235entry: 236 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 237 %mul = mul <8 x i16> %shuffle, %a 238 ret <8 x i16> %mul 239} 240 241define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { 242; CHECK-LABEL: test_vmul_lane_s32: 243; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 244; CHECK-NEXT: ret 245entry: 246 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 247 %mul = mul <2 x i32> %shuffle, %a 248 ret <2 x i32> %mul 249} 250 251define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 252; CHECK-LABEL: test_vmulq_lane_s32: 253; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 254; CHECK-NEXT: ret 255entry: 256 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 257 %mul = mul <4 x i32> %shuffle, %a 258 ret <4 x i32> %mul 259} 260 261define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { 262; CHECK-LABEL: test_vmul_lane_u16: 263; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 264; CHECK-NEXT: ret 265entry: 266 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 267 %mul = mul <4 x i16> %shuffle, %a 268 ret <4 x i16> %mul 269} 270 271define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { 272; CHECK-LABEL: test_vmulq_lane_u16: 273; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 274; CHECK-NEXT: ret 275entry: 276 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 277 %mul = mul <8 x i16> %shuffle, %a 278 ret <8 x i16> %mul 279} 280 281define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { 282; CHECK-LABEL: test_vmul_lane_u32: 283; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 284; CHECK-NEXT: ret 285entry: 286 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 287 %mul = mul <2 x i32> %shuffle, %a 288 ret <2 x i32> %mul 289} 290 291define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { 292; CHECK-LABEL: test_vmulq_lane_u32: 293; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 294; CHECK-NEXT: ret 295entry: 296 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 297 %mul = mul <4 x i32> %shuffle, %a 298 ret <4 x i32> %mul 299} 300 301define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 302; CHECK-LABEL: test_vmul_laneq_s16: 303; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 304; CHECK-NEXT: ret 305entry: 306 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 307 %mul = mul <4 x i16> %shuffle, %a 308 ret <4 x i16> %mul 309} 310 311define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 312; CHECK-LABEL: test_vmulq_laneq_s16: 313; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 314; CHECK-NEXT: ret 315entry: 316 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 317 %mul = mul <8 x i16> %shuffle, %a 318 ret <8 x i16> %mul 319} 320 321define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 322; CHECK-LABEL: test_vmul_laneq_s32: 323; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 324; CHECK-NEXT: ret 325entry: 326 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 327 %mul = mul <2 x i32> %shuffle, %a 328 ret <2 x i32> %mul 329} 330 331define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 332; CHECK-LABEL: test_vmulq_laneq_s32: 333; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 334; CHECK-NEXT: ret 335entry: 336 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 337 %mul = mul <4 x i32> %shuffle, %a 338 ret <4 x i32> %mul 339} 340 341define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 342; CHECK-LABEL: test_vmul_laneq_u16: 343; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 344; CHECK-NEXT: ret 345entry: 346 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 347 %mul = mul <4 x i16> %shuffle, %a 348 ret <4 x i16> %mul 349} 350 351define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 352; CHECK-LABEL: test_vmulq_laneq_u16: 353; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 354; CHECK-NEXT: ret 355entry: 356 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 357 %mul = mul <8 x i16> %shuffle, %a 358 ret <8 x i16> %mul 359} 360 361define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 362; CHECK-LABEL: test_vmul_laneq_u32: 363; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 364; CHECK-NEXT: ret 365entry: 366 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 367 %mul = mul <2 x i32> %shuffle, %a 368 ret <2 x i32> %mul 369} 370 371define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 372; CHECK-LABEL: test_vmulq_laneq_u32: 373; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 374; CHECK-NEXT: ret 375entry: 376 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 377 %mul = mul <4 x i32> %shuffle, %a 378 ret <4 x i32> %mul 379} 380 381define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 382; CHECK-LABEL: test_vfma_lane_f32: 383; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 384; CHECK-NEXT: ret 385entry: 386 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 387 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 388 ret <2 x float> %0 389} 390 391declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 392 393define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 394; CHECK-LABEL: test_vfmaq_lane_f32: 395; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 396; CHECK-NEXT: ret 397entry: 398 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 399 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 400 ret <4 x float> %0 401} 402 403declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 404 405define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 406; CHECK-LABEL: test_vfma_laneq_f32: 407; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 408; CHECK-NEXT: ret 409entry: 410 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 411 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 412 ret <2 x float> %0 413} 414 415define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 416; CHECK-LABEL: test_vfmaq_laneq_f32: 417; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 418; CHECK-NEXT: ret 419entry: 420 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 421 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 422 ret <4 x float> %0 423} 424 425define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 426; CHECK-LABEL: test_vfms_lane_f32: 427; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 428; CHECK-NEXT: ret 429entry: 430 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 431 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1> 432 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 433 ret <2 x float> %0 434} 435 436define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 437; CHECK-LABEL: test_vfmsq_lane_f32: 438; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 439; CHECK-NEXT: ret 440entry: 441 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 442 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 443 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 444 ret <4 x float> %0 445} 446 447define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 448; CHECK-LABEL: test_vfms_laneq_f32: 449; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 450; CHECK-NEXT: ret 451entry: 452 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 453 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3> 454 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 455 ret <2 x float> %0 456} 457 458define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 459; CHECK-LABEL: test_vfmsq_laneq_f32: 460; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 461; CHECK-NEXT: ret 462entry: 463 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 464 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 465 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 466 ret <4 x float> %0 467} 468 469define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 470; CHECK-LABEL: test_vfmaq_lane_f64: 471; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 472; CHECK-NEXT: ret 473entry: 474 %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 475 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 476 ret <2 x double> %0 477} 478 479declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) 480 481define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 482; CHECK-LABEL: test_vfmaq_laneq_f64: 483; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 484; CHECK-NEXT: ret 485entry: 486 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 487 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 488 ret <2 x double> %0 489} 490 491define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 492; CHECK-LABEL: test_vfmsq_lane_f64: 493; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 494; CHECK-NEXT: ret 495entry: 496 %sub = fsub <1 x double> <double -0.000000e+00>, %v 497 %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer 498 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 499 ret <2 x double> %0 500} 501 502define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 503; CHECK-LABEL: test_vfmsq_laneq_f64: 504; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 505; CHECK-NEXT: ret 506entry: 507 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 508 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1> 509 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 510 ret <2 x double> %0 511} 512 513define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { 514; CHECK-LABEL: test_vfmas_laneq_f32 515; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 516; CHECK-NEXT: ret 517entry: 518 %extract = extractelement <4 x float> %v, i32 3 519 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 520 ret float %0 521} 522 523declare float @llvm.fma.f32(float, float, float) 524 525define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { 526; CHECK-LABEL: test_vfmsd_lane_f64 527; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 528; CHECK-NEXT: ret 529entry: 530 %extract.rhs = extractelement <1 x double> %v, i32 0 531 %extract = fsub double -0.000000e+00, %extract.rhs 532 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 533 ret double %0 534} 535 536declare double @llvm.fma.f64(double, double, double) 537 538define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) { 539; CHECK-LABEL: test_vfmss_lane_f32 540; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 541; CHECK-NEXT: ret 542entry: 543 %extract.rhs = extractelement <2 x float> %v, i32 1 544 %extract = fsub float -0.000000e+00, %extract.rhs 545 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 546 ret float %0 547} 548 549define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { 550; CHECK-LABEL: test_vfmss_laneq_f32 551; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 552; CHECK-NEXT: ret 553entry: 554 %extract.rhs = extractelement <4 x float> %v, i32 3 555 %extract = fsub float -0.000000e+00, %extract.rhs 556 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 557 ret float %0 558} 559 560define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { 561; CHECK-LABEL: test_vfmsd_laneq_f64 562; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 563; CHECK-NEXT: ret 564entry: 565 %extract.rhs = extractelement <2 x double> %v, i32 1 566 %extract = fsub double -0.000000e+00, %extract.rhs 567 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 568 ret double %0 569} 570 571define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) { 572; CHCK-LABEL: test_vfmsd_lane_f64_0 573; CHCK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 574; CHCK-NEXT: ret 575entry: 576 %tmp0 = fsub <1 x double> <double -0.000000e+00>, %v 577 %tmp1 = extractelement <1 x double> %tmp0, i32 0 578 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) 579 ret double %0 580} 581 582define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) { 583; CHECK-LABEL: test_vfmss_lane_f32_0 584; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 585; CHECK-NEXT: ret 586entry: 587 %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 588 %tmp1 = extractelement <2 x float> %tmp0, i32 1 589 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) 590 ret float %0 591} 592 593define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) { 594; CHECK-LABEL: test_vfmss_laneq_f32_0 595; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 596; CHECK-NEXT: ret 597entry: 598 %tmp0 = fsub <4 x float><float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 599 %tmp1 = extractelement <4 x float> %tmp0, i32 3 600 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) 601 ret float %0 602} 603 604define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) { 605; CHECK-LABEL: test_vfmsd_laneq_f64_0 606; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 607; CHECK-NEXT: ret 608entry: 609 %tmp0 = fsub <2 x double><double -0.000000e+00, double -0.000000e+00>, %v 610 %tmp1 = extractelement <2 x double> %tmp0, i32 1 611 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) 612 ret double %0 613} 614 615define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 616; CHECK-LABEL: test_vmlal_lane_s16: 617; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 618; CHECK-NEXT: ret 619entry: 620 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 621 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 622 %add = add <4 x i32> %vmull2.i, %a 623 ret <4 x i32> %add 624} 625 626define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 627; CHECK-LABEL: test_vmlal_lane_s32: 628; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 629; CHECK-NEXT: ret 630entry: 631 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 632 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 633 %add = add <2 x i64> %vmull2.i, %a 634 ret <2 x i64> %add 635} 636 637define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 638; CHECK-LABEL: test_vmlal_laneq_s16: 639; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 640; CHECK-NEXT: ret 641entry: 642 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 643 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 644 %add = add <4 x i32> %vmull2.i, %a 645 ret <4 x i32> %add 646} 647 648define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 649; CHECK-LABEL: test_vmlal_laneq_s32: 650; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 651; CHECK-NEXT: ret 652entry: 653 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 654 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 655 %add = add <2 x i64> %vmull2.i, %a 656 ret <2 x i64> %add 657} 658 659define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 660; CHECK-LABEL: test_vmlal_high_lane_s16: 661; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 662; CHECK-NEXT: ret 663entry: 664 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 665 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 666 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 667 %add = add <4 x i32> %vmull2.i, %a 668 ret <4 x i32> %add 669} 670 671define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 672; CHECK-LABEL: test_vmlal_high_lane_s32: 673; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 674; CHECK-NEXT: ret 675entry: 676 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 677 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 678 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 679 %add = add <2 x i64> %vmull2.i, %a 680 ret <2 x i64> %add 681} 682 683define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 684; CHECK-LABEL: test_vmlal_high_laneq_s16: 685; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 686; CHECK-NEXT: ret 687entry: 688 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 689 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 690 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 691 %add = add <4 x i32> %vmull2.i, %a 692 ret <4 x i32> %add 693} 694 695define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 696; CHECK-LABEL: test_vmlal_high_laneq_s32: 697; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 698; CHECK-NEXT: ret 699entry: 700 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 701 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 702 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 703 %add = add <2 x i64> %vmull2.i, %a 704 ret <2 x i64> %add 705} 706 707define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 708; CHECK-LABEL: test_vmlsl_lane_s16: 709; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 710; CHECK-NEXT: ret 711entry: 712 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 713 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 714 %sub = sub <4 x i32> %a, %vmull2.i 715 ret <4 x i32> %sub 716} 717 718define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 719; CHECK-LABEL: test_vmlsl_lane_s32: 720; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 721; CHECK-NEXT: ret 722entry: 723 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 724 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 725 %sub = sub <2 x i64> %a, %vmull2.i 726 ret <2 x i64> %sub 727} 728 729define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 730; CHECK-LABEL: test_vmlsl_laneq_s16: 731; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 732; CHECK-NEXT: ret 733entry: 734 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 735 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 736 %sub = sub <4 x i32> %a, %vmull2.i 737 ret <4 x i32> %sub 738} 739 740define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 741; CHECK-LABEL: test_vmlsl_laneq_s32: 742; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 743; CHECK-NEXT: ret 744entry: 745 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 746 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 747 %sub = sub <2 x i64> %a, %vmull2.i 748 ret <2 x i64> %sub 749} 750 751define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 752; CHECK-LABEL: test_vmlsl_high_lane_s16: 753; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 754; CHECK-NEXT: ret 755entry: 756 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 757 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 758 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 759 %sub = sub <4 x i32> %a, %vmull2.i 760 ret <4 x i32> %sub 761} 762 763define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 764; CHECK-LABEL: test_vmlsl_high_lane_s32: 765; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 766; CHECK-NEXT: ret 767entry: 768 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 769 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 770 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 771 %sub = sub <2 x i64> %a, %vmull2.i 772 ret <2 x i64> %sub 773} 774 775define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 776; CHECK-LABEL: test_vmlsl_high_laneq_s16: 777; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 778; CHECK-NEXT: ret 779entry: 780 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 781 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 782 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 783 %sub = sub <4 x i32> %a, %vmull2.i 784 ret <4 x i32> %sub 785} 786 787define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 788; CHECK-LABEL: test_vmlsl_high_laneq_s32: 789; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 790; CHECK-NEXT: ret 791entry: 792 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 793 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 794 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 795 %sub = sub <2 x i64> %a, %vmull2.i 796 ret <2 x i64> %sub 797} 798 799define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 800; CHECK-LABEL: test_vmlal_lane_u16: 801; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 802; CHECK-NEXT: ret 803entry: 804 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 805 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 806 %add = add <4 x i32> %vmull2.i, %a 807 ret <4 x i32> %add 808} 809 810define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 811; CHECK-LABEL: test_vmlal_lane_u32: 812; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 813; CHECK-NEXT: ret 814entry: 815 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 816 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 817 %add = add <2 x i64> %vmull2.i, %a 818 ret <2 x i64> %add 819} 820 821define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 822; CHECK-LABEL: test_vmlal_laneq_u16: 823; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 824; CHECK-NEXT: ret 825entry: 826 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 827 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 828 %add = add <4 x i32> %vmull2.i, %a 829 ret <4 x i32> %add 830} 831 832define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 833; CHECK-LABEL: test_vmlal_laneq_u32: 834; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 835; CHECK-NEXT: ret 836entry: 837 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 838 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 839 %add = add <2 x i64> %vmull2.i, %a 840 ret <2 x i64> %add 841} 842 843define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 844; CHECK-LABEL: test_vmlal_high_lane_u16: 845; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 846; CHECK-NEXT: ret 847entry: 848 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 849 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 850 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 851 %add = add <4 x i32> %vmull2.i, %a 852 ret <4 x i32> %add 853} 854 855define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 856; CHECK-LABEL: test_vmlal_high_lane_u32: 857; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 858; CHECK-NEXT: ret 859entry: 860 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 861 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 862 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 863 %add = add <2 x i64> %vmull2.i, %a 864 ret <2 x i64> %add 865} 866 867define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 868; CHECK-LABEL: test_vmlal_high_laneq_u16: 869; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 870; CHECK-NEXT: ret 871entry: 872 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 873 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 874 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 875 %add = add <4 x i32> %vmull2.i, %a 876 ret <4 x i32> %add 877} 878 879define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 880; CHECK-LABEL: test_vmlal_high_laneq_u32: 881; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 882; CHECK-NEXT: ret 883entry: 884 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 885 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 886 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 887 %add = add <2 x i64> %vmull2.i, %a 888 ret <2 x i64> %add 889} 890 891define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 892; CHECK-LABEL: test_vmlsl_lane_u16: 893; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 894; CHECK-NEXT: ret 895entry: 896 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 897 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 898 %sub = sub <4 x i32> %a, %vmull2.i 899 ret <4 x i32> %sub 900} 901 902define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 903; CHECK-LABEL: test_vmlsl_lane_u32: 904; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 905; CHECK-NEXT: ret 906entry: 907 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 908 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 909 %sub = sub <2 x i64> %a, %vmull2.i 910 ret <2 x i64> %sub 911} 912 913define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 914; CHECK-LABEL: test_vmlsl_laneq_u16: 915; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 916; CHECK-NEXT: ret 917entry: 918 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 919 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 920 %sub = sub <4 x i32> %a, %vmull2.i 921 ret <4 x i32> %sub 922} 923 924define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 925; CHECK-LABEL: test_vmlsl_laneq_u32: 926; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 927; CHECK-NEXT: ret 928entry: 929 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 930 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 931 %sub = sub <2 x i64> %a, %vmull2.i 932 ret <2 x i64> %sub 933} 934 935define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 936; CHECK-LABEL: test_vmlsl_high_lane_u16: 937; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 938; CHECK-NEXT: ret 939entry: 940 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 941 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 942 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 943 %sub = sub <4 x i32> %a, %vmull2.i 944 ret <4 x i32> %sub 945} 946 947define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 948; CHECK-LABEL: test_vmlsl_high_lane_u32: 949; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 950; CHECK-NEXT: ret 951entry: 952 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 953 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 954 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 955 %sub = sub <2 x i64> %a, %vmull2.i 956 ret <2 x i64> %sub 957} 958 959define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 960; CHECK-LABEL: test_vmlsl_high_laneq_u16: 961; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 962; CHECK-NEXT: ret 963entry: 964 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 965 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 966 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 967 %sub = sub <4 x i32> %a, %vmull2.i 968 ret <4 x i32> %sub 969} 970 971define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 972; CHECK-LABEL: test_vmlsl_high_laneq_u32: 973; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 974; CHECK-NEXT: ret 975entry: 976 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 977 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 978 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 979 %sub = sub <2 x i64> %a, %vmull2.i 980 ret <2 x i64> %sub 981} 982 983define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 984; CHECK-LABEL: test_vmull_lane_s16: 985; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 986; CHECK-NEXT: ret 987entry: 988 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 989 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 990 ret <4 x i32> %vmull2.i 991} 992 993define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 994; CHECK-LABEL: test_vmull_lane_s32: 995; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 996; CHECK-NEXT: ret 997entry: 998 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 999 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1000 ret <2 x i64> %vmull2.i 1001} 1002 1003define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { 1004; CHECK-LABEL: test_vmull_lane_u16: 1005; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1006; CHECK-NEXT: ret 1007entry: 1008 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1009 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1010 ret <4 x i32> %vmull2.i 1011} 1012 1013define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { 1014; CHECK-LABEL: test_vmull_lane_u32: 1015; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1016; CHECK-NEXT: ret 1017entry: 1018 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1019 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1020 ret <2 x i64> %vmull2.i 1021} 1022 1023define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1024; CHECK-LABEL: test_vmull_high_lane_s16: 1025; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1026; CHECK-NEXT: ret 1027entry: 1028 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1029 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1030 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1031 ret <4 x i32> %vmull2.i 1032} 1033 1034define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1035; CHECK-LABEL: test_vmull_high_lane_s32: 1036; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1037; CHECK-NEXT: ret 1038entry: 1039 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1040 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1041 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1042 ret <2 x i64> %vmull2.i 1043} 1044 1045define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { 1046; CHECK-LABEL: test_vmull_high_lane_u16: 1047; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1048; CHECK-NEXT: ret 1049entry: 1050 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1051 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1052 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1053 ret <4 x i32> %vmull2.i 1054} 1055 1056define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { 1057; CHECK-LABEL: test_vmull_high_lane_u32: 1058; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1059; CHECK-NEXT: ret 1060entry: 1061 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1062 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1063 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1064 ret <2 x i64> %vmull2.i 1065} 1066 1067define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1068; CHECK-LABEL: test_vmull_laneq_s16: 1069; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 1070; CHECK-NEXT: ret 1071entry: 1072 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1073 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1074 ret <4 x i32> %vmull2.i 1075} 1076 1077define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1078; CHECK-LABEL: test_vmull_laneq_s32: 1079; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1080; CHECK-NEXT: ret 1081entry: 1082 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1083 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1084 ret <2 x i64> %vmull2.i 1085} 1086 1087define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 1088; CHECK-LABEL: test_vmull_laneq_u16: 1089; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 1090; CHECK-NEXT: ret 1091entry: 1092 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1093 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1094 ret <4 x i32> %vmull2.i 1095} 1096 1097define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 1098; CHECK-LABEL: test_vmull_laneq_u32: 1099; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1100; CHECK-NEXT: ret 1101entry: 1102 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1103 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1104 ret <2 x i64> %vmull2.i 1105} 1106 1107define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1108; CHECK-LABEL: test_vmull_high_laneq_s16: 1109; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1110; CHECK-NEXT: ret 1111entry: 1112 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1113 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1114 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1115 ret <4 x i32> %vmull2.i 1116} 1117 1118define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1119; CHECK-LABEL: test_vmull_high_laneq_s32: 1120; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1121; CHECK-NEXT: ret 1122entry: 1123 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1124 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1125 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1126 ret <2 x i64> %vmull2.i 1127} 1128 1129define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 1130; CHECK-LABEL: test_vmull_high_laneq_u16: 1131; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1132; CHECK-NEXT: ret 1133entry: 1134 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1135 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1136 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1137 ret <4 x i32> %vmull2.i 1138} 1139 1140define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 1141; CHECK-LABEL: test_vmull_high_laneq_u32: 1142; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1143; CHECK-NEXT: ret 1144entry: 1145 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1146 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1147 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1148 ret <2 x i64> %vmull2.i 1149} 1150 1151define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1152; CHECK-LABEL: test_vqdmlal_lane_s16: 1153; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1154; CHECK-NEXT: ret 1155entry: 1156 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1157 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1158 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1159 ret <4 x i32> %vqdmlal4.i 1160} 1161 1162define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1163; CHECK-LABEL: test_vqdmlal_lane_s32: 1164; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1165; CHECK-NEXT: ret 1166entry: 1167 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1168 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1169 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1170 ret <2 x i64> %vqdmlal4.i 1171} 1172 1173define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1174; CHECK-LABEL: test_vqdmlal_high_lane_s16: 1175; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1176; CHECK-NEXT: ret 1177entry: 1178 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1179 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1180 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1181 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1182 ret <4 x i32> %vqdmlal4.i 1183} 1184 1185define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1186; CHECK-LABEL: test_vqdmlal_high_lane_s32: 1187; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1188; CHECK-NEXT: ret 1189entry: 1190 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1191 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1192 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1193 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1194 ret <2 x i64> %vqdmlal4.i 1195} 1196 1197define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1198; CHECK-LABEL: test_vqdmlsl_lane_s16: 1199; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1200; CHECK-NEXT: ret 1201entry: 1202 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1203 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1204 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1205 ret <4 x i32> %vqdmlsl4.i 1206} 1207 1208define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1209; CHECK-LABEL: test_vqdmlsl_lane_s32: 1210; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1211; CHECK-NEXT: ret 1212entry: 1213 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1214 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1215 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1216 ret <2 x i64> %vqdmlsl4.i 1217} 1218 1219define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1220; CHECK-LABEL: test_vqdmlsl_high_lane_s16: 1221; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1222; CHECK-NEXT: ret 1223entry: 1224 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1225 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1226 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1227 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1228 ret <4 x i32> %vqdmlsl4.i 1229} 1230 1231define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1232; CHECK-LABEL: test_vqdmlsl_high_lane_s32: 1233; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1234; CHECK-NEXT: ret 1235entry: 1236 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1237 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1238 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1239 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1240 ret <2 x i64> %vqdmlsl4.i 1241} 1242 1243define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1244; CHECK-LABEL: test_vqdmull_lane_s16: 1245; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1246; CHECK-NEXT: ret 1247entry: 1248 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1249 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1250 ret <4 x i32> %vqdmull2.i 1251} 1252 1253define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1254; CHECK-LABEL: test_vqdmull_lane_s32: 1255; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1256; CHECK-NEXT: ret 1257entry: 1258 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1259 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1260 ret <2 x i64> %vqdmull2.i 1261} 1262 1263define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1264; CHECK-LABEL: test_vqdmull_laneq_s16: 1265; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1266; CHECK-NEXT: ret 1267entry: 1268 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1269 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1270 ret <4 x i32> %vqdmull2.i 1271} 1272 1273define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1274; CHECK-LABEL: test_vqdmull_laneq_s32: 1275; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1276; CHECK-NEXT: ret 1277entry: 1278 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1279 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1280 ret <2 x i64> %vqdmull2.i 1281} 1282 1283define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1284; CHECK-LABEL: test_vqdmull_high_lane_s16: 1285; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1286; CHECK-NEXT: ret 1287entry: 1288 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1289 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1290 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1291 ret <4 x i32> %vqdmull2.i 1292} 1293 1294define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1295; CHECK-LABEL: test_vqdmull_high_lane_s32: 1296; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1297; CHECK-NEXT: ret 1298entry: 1299 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1300 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1301 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1302 ret <2 x i64> %vqdmull2.i 1303} 1304 1305define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1306; CHECK-LABEL: test_vqdmull_high_laneq_s16: 1307; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1308; CHECK-NEXT: ret 1309entry: 1310 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1311 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1312 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1313 ret <4 x i32> %vqdmull2.i 1314} 1315 1316define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1317; CHECK-LABEL: test_vqdmull_high_laneq_s32: 1318; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1319; CHECK-NEXT: ret 1320entry: 1321 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1322 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1323 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1324 ret <2 x i64> %vqdmull2.i 1325} 1326 1327define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1328; CHECK-LABEL: test_vqdmulh_lane_s16: 1329; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1330; CHECK-NEXT: ret 1331entry: 1332 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1333 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1334 ret <4 x i16> %vqdmulh2.i 1335} 1336 1337define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1338; CHECK-LABEL: test_vqdmulhq_lane_s16: 1339; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1340; CHECK-NEXT: ret 1341entry: 1342 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1343 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1344 ret <8 x i16> %vqdmulh2.i 1345} 1346 1347define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1348; CHECK-LABEL: test_vqdmulh_lane_s32: 1349; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1350; CHECK-NEXT: ret 1351entry: 1352 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1353 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1354 ret <2 x i32> %vqdmulh2.i 1355} 1356 1357define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1358; CHECK-LABEL: test_vqdmulhq_lane_s32: 1359; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1360; CHECK-NEXT: ret 1361entry: 1362 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1363 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1364 ret <4 x i32> %vqdmulh2.i 1365} 1366 1367define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1368; CHECK-LABEL: test_vqrdmulh_lane_s16: 1369; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1370; CHECK-NEXT: ret 1371entry: 1372 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1373 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1374 ret <4 x i16> %vqrdmulh2.i 1375} 1376 1377define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1378; CHECK-LABEL: test_vqrdmulhq_lane_s16: 1379; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1380; CHECK-NEXT: ret 1381entry: 1382 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1383 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1384 ret <8 x i16> %vqrdmulh2.i 1385} 1386 1387define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1388; CHECK-LABEL: test_vqrdmulh_lane_s32: 1389; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1390; CHECK-NEXT: ret 1391entry: 1392 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1393 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1394 ret <2 x i32> %vqrdmulh2.i 1395} 1396 1397define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1398; CHECK-LABEL: test_vqrdmulhq_lane_s32: 1399; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1400; CHECK-NEXT: ret 1401entry: 1402 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1403 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1404 ret <4 x i32> %vqrdmulh2.i 1405} 1406 1407define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { 1408; CHECK-LABEL: test_vmul_lane_f32: 1409; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1410; CHECK-NEXT: ret 1411entry: 1412 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1413 %mul = fmul <2 x float> %shuffle, %a 1414 ret <2 x float> %mul 1415} 1416 1417define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { 1418; CHECK-LABEL: test_vmul_lane_f64: 1419; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 1420; CHECK-NEXT: ret 1421entry: 1422 %0 = bitcast <1 x double> %a to <8 x i8> 1423 %1 = bitcast <8 x i8> %0 to double 1424 %extract = extractelement <1 x double> %v, i32 0 1425 %2 = fmul double %1, %extract 1426 %3 = insertelement <1 x double> undef, double %2, i32 0 1427 ret <1 x double> %3 1428} 1429 1430define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { 1431; CHECK-LABEL: test_vmulq_lane_f32: 1432; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1433; CHECK-NEXT: ret 1434entry: 1435 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1436 %mul = fmul <4 x float> %shuffle, %a 1437 ret <4 x float> %mul 1438} 1439 1440define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { 1441; CHECK-LABEL: test_vmulq_lane_f64: 1442; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1443; CHECK-NEXT: ret 1444entry: 1445 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1446 %mul = fmul <2 x double> %shuffle, %a 1447 ret <2 x double> %mul 1448} 1449 1450define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { 1451; CHECK-LABEL: test_vmul_laneq_f32: 1452; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1453; CHECK-NEXT: ret 1454entry: 1455 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1456 %mul = fmul <2 x float> %shuffle, %a 1457 ret <2 x float> %mul 1458} 1459 1460define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { 1461; CHECK-LABEL: test_vmul_laneq_f64: 1462; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 1463; CHECK-NEXT: ret 1464entry: 1465 %0 = bitcast <1 x double> %a to <8 x i8> 1466 %1 = bitcast <8 x i8> %0 to double 1467 %extract = extractelement <2 x double> %v, i32 1 1468 %2 = fmul double %1, %extract 1469 %3 = insertelement <1 x double> undef, double %2, i32 0 1470 ret <1 x double> %3 1471} 1472 1473define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1474; CHECK-LABEL: test_vmulq_laneq_f32: 1475; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1476; CHECK-NEXT: ret 1477entry: 1478 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1479 %mul = fmul <4 x float> %shuffle, %a 1480 ret <4 x float> %mul 1481} 1482 1483define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1484; CHECK-LABEL: test_vmulq_laneq_f64: 1485; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1486; CHECK-NEXT: ret 1487entry: 1488 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1489 %mul = fmul <2 x double> %shuffle, %a 1490 ret <2 x double> %mul 1491} 1492 1493define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { 1494; CHECK-LABEL: test_vmulx_lane_f32: 1495; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1496; CHECK-NEXT: ret 1497entry: 1498 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1499 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1500 ret <2 x float> %vmulx2.i 1501} 1502 1503define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { 1504; CHECK-LABEL: test_vmulxq_lane_f32: 1505; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1506; CHECK-NEXT: ret 1507entry: 1508 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1509 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1510 ret <4 x float> %vmulx2.i 1511} 1512 1513define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { 1514; CHECK-LABEL: test_vmulxq_lane_f64: 1515; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1516; CHECK-NEXT: ret 1517entry: 1518 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1519 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1520 ret <2 x double> %vmulx2.i 1521} 1522 1523define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { 1524; CHECK-LABEL: test_vmulx_laneq_f32: 1525; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1526; CHECK-NEXT: ret 1527entry: 1528 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1529 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1530 ret <2 x float> %vmulx2.i 1531} 1532 1533define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1534; CHECK-LABEL: test_vmulxq_laneq_f32: 1535; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1536; CHECK-NEXT: ret 1537entry: 1538 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1539 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1540 ret <4 x float> %vmulx2.i 1541} 1542 1543define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1544; CHECK-LABEL: test_vmulxq_laneq_f64: 1545; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1546; CHECK-NEXT: ret 1547entry: 1548 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1549 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1550 ret <2 x double> %vmulx2.i 1551} 1552 1553define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1554; CHECK-LABEL: test_vmla_lane_s16_0: 1555; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1556; CHECK-NEXT: ret 1557entry: 1558 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1559 %mul = mul <4 x i16> %shuffle, %b 1560 %add = add <4 x i16> %mul, %a 1561 ret <4 x i16> %add 1562} 1563 1564define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1565; CHECK-LABEL: test_vmlaq_lane_s16_0: 1566; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1567; CHECK-NEXT: ret 1568entry: 1569 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1570 %mul = mul <8 x i16> %shuffle, %b 1571 %add = add <8 x i16> %mul, %a 1572 ret <8 x i16> %add 1573} 1574 1575define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1576; CHECK-LABEL: test_vmla_lane_s32_0: 1577; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1578; CHECK-NEXT: ret 1579entry: 1580 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1581 %mul = mul <2 x i32> %shuffle, %b 1582 %add = add <2 x i32> %mul, %a 1583 ret <2 x i32> %add 1584} 1585 1586define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1587; CHECK-LABEL: test_vmlaq_lane_s32_0: 1588; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1589; CHECK-NEXT: ret 1590entry: 1591 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1592 %mul = mul <4 x i32> %shuffle, %b 1593 %add = add <4 x i32> %mul, %a 1594 ret <4 x i32> %add 1595} 1596 1597define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 1598; CHECK-LABEL: test_vmla_laneq_s16_0: 1599; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1600; CHECK-NEXT: ret 1601entry: 1602 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1603 %mul = mul <4 x i16> %shuffle, %b 1604 %add = add <4 x i16> %mul, %a 1605 ret <4 x i16> %add 1606} 1607 1608define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 1609; CHECK-LABEL: test_vmlaq_laneq_s16_0: 1610; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1611; CHECK-NEXT: ret 1612entry: 1613 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1614 %mul = mul <8 x i16> %shuffle, %b 1615 %add = add <8 x i16> %mul, %a 1616 ret <8 x i16> %add 1617} 1618 1619define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 1620; CHECK-LABEL: test_vmla_laneq_s32_0: 1621; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1622; CHECK-NEXT: ret 1623entry: 1624 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1625 %mul = mul <2 x i32> %shuffle, %b 1626 %add = add <2 x i32> %mul, %a 1627 ret <2 x i32> %add 1628} 1629 1630define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 1631; CHECK-LABEL: test_vmlaq_laneq_s32_0: 1632; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1633; CHECK-NEXT: ret 1634entry: 1635 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1636 %mul = mul <4 x i32> %shuffle, %b 1637 %add = add <4 x i32> %mul, %a 1638 ret <4 x i32> %add 1639} 1640 1641define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1642; CHECK-LABEL: test_vmls_lane_s16_0: 1643; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1644; CHECK-NEXT: ret 1645entry: 1646 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1647 %mul = mul <4 x i16> %shuffle, %b 1648 %sub = sub <4 x i16> %a, %mul 1649 ret <4 x i16> %sub 1650} 1651 1652define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1653; CHECK-LABEL: test_vmlsq_lane_s16_0: 1654; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1655; CHECK-NEXT: ret 1656entry: 1657 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1658 %mul = mul <8 x i16> %shuffle, %b 1659 %sub = sub <8 x i16> %a, %mul 1660 ret <8 x i16> %sub 1661} 1662 1663define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1664; CHECK-LABEL: test_vmls_lane_s32_0: 1665; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1666; CHECK-NEXT: ret 1667entry: 1668 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1669 %mul = mul <2 x i32> %shuffle, %b 1670 %sub = sub <2 x i32> %a, %mul 1671 ret <2 x i32> %sub 1672} 1673 1674define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1675; CHECK-LABEL: test_vmlsq_lane_s32_0: 1676; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1677; CHECK-NEXT: ret 1678entry: 1679 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1680 %mul = mul <4 x i32> %shuffle, %b 1681 %sub = sub <4 x i32> %a, %mul 1682 ret <4 x i32> %sub 1683} 1684 1685define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 1686; CHECK-LABEL: test_vmls_laneq_s16_0: 1687; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1688; CHECK-NEXT: ret 1689entry: 1690 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1691 %mul = mul <4 x i16> %shuffle, %b 1692 %sub = sub <4 x i16> %a, %mul 1693 ret <4 x i16> %sub 1694} 1695 1696define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 1697; CHECK-LABEL: test_vmlsq_laneq_s16_0: 1698; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1699; CHECK-NEXT: ret 1700entry: 1701 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1702 %mul = mul <8 x i16> %shuffle, %b 1703 %sub = sub <8 x i16> %a, %mul 1704 ret <8 x i16> %sub 1705} 1706 1707define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 1708; CHECK-LABEL: test_vmls_laneq_s32_0: 1709; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1710; CHECK-NEXT: ret 1711entry: 1712 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1713 %mul = mul <2 x i32> %shuffle, %b 1714 %sub = sub <2 x i32> %a, %mul 1715 ret <2 x i32> %sub 1716} 1717 1718define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 1719; CHECK-LABEL: test_vmlsq_laneq_s32_0: 1720; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1721; CHECK-NEXT: ret 1722entry: 1723 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1724 %mul = mul <4 x i32> %shuffle, %b 1725 %sub = sub <4 x i32> %a, %mul 1726 ret <4 x i32> %sub 1727} 1728 1729define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 1730; CHECK-LABEL: test_vmul_lane_s16_0: 1731; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1732; CHECK-NEXT: ret 1733entry: 1734 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1735 %mul = mul <4 x i16> %shuffle, %a 1736 ret <4 x i16> %mul 1737} 1738 1739define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 1740; CHECK-LABEL: test_vmulq_lane_s16_0: 1741; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1742; CHECK-NEXT: ret 1743entry: 1744 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1745 %mul = mul <8 x i16> %shuffle, %a 1746 ret <8 x i16> %mul 1747} 1748 1749define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 1750; CHECK-LABEL: test_vmul_lane_s32_0: 1751; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1752; CHECK-NEXT: ret 1753entry: 1754 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1755 %mul = mul <2 x i32> %shuffle, %a 1756 ret <2 x i32> %mul 1757} 1758 1759define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 1760; CHECK-LABEL: test_vmulq_lane_s32_0: 1761; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1762; CHECK-NEXT: ret 1763entry: 1764 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1765 %mul = mul <4 x i32> %shuffle, %a 1766 ret <4 x i32> %mul 1767} 1768 1769define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 1770; CHECK-LABEL: test_vmul_lane_u16_0: 1771; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1772; CHECK-NEXT: ret 1773entry: 1774 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1775 %mul = mul <4 x i16> %shuffle, %a 1776 ret <4 x i16> %mul 1777} 1778 1779define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 1780; CHECK-LABEL: test_vmulq_lane_u16_0: 1781; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1782; CHECK-NEXT: ret 1783entry: 1784 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1785 %mul = mul <8 x i16> %shuffle, %a 1786 ret <8 x i16> %mul 1787} 1788 1789define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 1790; CHECK-LABEL: test_vmul_lane_u32_0: 1791; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1792; CHECK-NEXT: ret 1793entry: 1794 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1795 %mul = mul <2 x i32> %shuffle, %a 1796 ret <2 x i32> %mul 1797} 1798 1799define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 1800; CHECK-LABEL: test_vmulq_lane_u32_0: 1801; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1802; CHECK-NEXT: ret 1803entry: 1804 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1805 %mul = mul <4 x i32> %shuffle, %a 1806 ret <4 x i32> %mul 1807} 1808 1809define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 1810; CHECK-LABEL: test_vmul_laneq_s16_0: 1811; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1812; CHECK-NEXT: ret 1813entry: 1814 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1815 %mul = mul <4 x i16> %shuffle, %a 1816 ret <4 x i16> %mul 1817} 1818 1819define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 1820; CHECK-LABEL: test_vmulq_laneq_s16_0: 1821; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1822; CHECK-NEXT: ret 1823entry: 1824 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1825 %mul = mul <8 x i16> %shuffle, %a 1826 ret <8 x i16> %mul 1827} 1828 1829define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 1830; CHECK-LABEL: test_vmul_laneq_s32_0: 1831; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1832; CHECK-NEXT: ret 1833entry: 1834 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1835 %mul = mul <2 x i32> %shuffle, %a 1836 ret <2 x i32> %mul 1837} 1838 1839define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 1840; CHECK-LABEL: test_vmulq_laneq_s32_0: 1841; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1842; CHECK-NEXT: ret 1843entry: 1844 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1845 %mul = mul <4 x i32> %shuffle, %a 1846 ret <4 x i32> %mul 1847} 1848 1849define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 1850; CHECK-LABEL: test_vmul_laneq_u16_0: 1851; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1852; CHECK-NEXT: ret 1853entry: 1854 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1855 %mul = mul <4 x i16> %shuffle, %a 1856 ret <4 x i16> %mul 1857} 1858 1859define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 1860; CHECK-LABEL: test_vmulq_laneq_u16_0: 1861; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1862; CHECK-NEXT: ret 1863entry: 1864 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1865 %mul = mul <8 x i16> %shuffle, %a 1866 ret <8 x i16> %mul 1867} 1868 1869define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 1870; CHECK-LABEL: test_vmul_laneq_u32_0: 1871; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1872; CHECK-NEXT: ret 1873entry: 1874 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1875 %mul = mul <2 x i32> %shuffle, %a 1876 ret <2 x i32> %mul 1877} 1878 1879define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 1880; CHECK-LABEL: test_vmulq_laneq_u32_0: 1881; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1882; CHECK-NEXT: ret 1883entry: 1884 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1885 %mul = mul <4 x i32> %shuffle, %a 1886 ret <4 x i32> %mul 1887} 1888 1889define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 1890; CHECK-LABEL: test_vfma_lane_f32_0: 1891; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1892; CHECK-NEXT: ret 1893entry: 1894 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 1895 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1896 ret <2 x float> %0 1897} 1898 1899define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 1900; CHECK-LABEL: test_vfmaq_lane_f32_0: 1901; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1902; CHECK-NEXT: ret 1903entry: 1904 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 1905 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1906 ret <4 x float> %0 1907} 1908 1909define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 1910; CHECK-LABEL: test_vfma_laneq_f32_0: 1911; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1912; CHECK-NEXT: ret 1913entry: 1914 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 1915 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1916 ret <2 x float> %0 1917} 1918 1919define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 1920; CHECK-LABEL: test_vfmaq_laneq_f32_0: 1921; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1922; CHECK-NEXT: ret 1923entry: 1924 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 1925 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1926 ret <4 x float> %0 1927} 1928 1929define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 1930; CHECK-LABEL: test_vfms_lane_f32_0: 1931; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1932; CHECK-NEXT: ret 1933entry: 1934 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 1935 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer 1936 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1937 ret <2 x float> %0 1938} 1939 1940define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 1941; CHECK-LABEL: test_vfmsq_lane_f32_0: 1942; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1943; CHECK-NEXT: ret 1944entry: 1945 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 1946 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer 1947 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1948 ret <4 x float> %0 1949} 1950 1951define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 1952; CHECK-LABEL: test_vfms_laneq_f32_0: 1953; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1954; CHECK-NEXT: ret 1955entry: 1956 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 1957 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer 1958 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1959 ret <2 x float> %0 1960} 1961 1962define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 1963; CHECK-LABEL: test_vfmsq_laneq_f32_0: 1964; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1965; CHECK-NEXT: ret 1966entry: 1967 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 1968 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer 1969 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1970 ret <4 x float> %0 1971} 1972 1973define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 1974; CHECK-LABEL: test_vfmaq_laneq_f64_0: 1975; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1976; CHECK-NEXT: ret 1977entry: 1978 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 1979 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 1980 ret <2 x double> %0 1981} 1982 1983define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 1984; CHECK-LABEL: test_vfmsq_laneq_f64_0: 1985; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1986; CHECK-NEXT: ret 1987entry: 1988 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 1989 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer 1990 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 1991 ret <2 x double> %0 1992} 1993 1994define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1995; CHECK-LABEL: test_vmlal_lane_s16_0: 1996; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1997; CHECK-NEXT: ret 1998entry: 1999 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2000 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2001 %add = add <4 x i32> %vmull2.i, %a 2002 ret <4 x i32> %add 2003} 2004 2005define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2006; CHECK-LABEL: test_vmlal_lane_s32_0: 2007; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2008; CHECK-NEXT: ret 2009entry: 2010 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2011 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2012 %add = add <2 x i64> %vmull2.i, %a 2013 ret <2 x i64> %add 2014} 2015 2016define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2017; CHECK-LABEL: test_vmlal_laneq_s16_0: 2018; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2019; CHECK-NEXT: ret 2020entry: 2021 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2022 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2023 %add = add <4 x i32> %vmull2.i, %a 2024 ret <4 x i32> %add 2025} 2026 2027define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2028; CHECK-LABEL: test_vmlal_laneq_s32_0: 2029; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2030; CHECK-NEXT: ret 2031entry: 2032 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2033 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2034 %add = add <2 x i64> %vmull2.i, %a 2035 ret <2 x i64> %add 2036} 2037 2038define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2039; CHECK-LABEL: test_vmlal_high_lane_s16_0: 2040; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2041; CHECK-NEXT: ret 2042entry: 2043 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2044 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2045 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2046 %add = add <4 x i32> %vmull2.i, %a 2047 ret <4 x i32> %add 2048} 2049 2050define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2051; CHECK-LABEL: test_vmlal_high_lane_s32_0: 2052; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2053; CHECK-NEXT: ret 2054entry: 2055 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2056 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2057 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2058 %add = add <2 x i64> %vmull2.i, %a 2059 ret <2 x i64> %add 2060} 2061 2062define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2063; CHECK-LABEL: test_vmlal_high_laneq_s16_0: 2064; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2065; CHECK-NEXT: ret 2066entry: 2067 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2068 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2069 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2070 %add = add <4 x i32> %vmull2.i, %a 2071 ret <4 x i32> %add 2072} 2073 2074define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2075; CHECK-LABEL: test_vmlal_high_laneq_s32_0: 2076; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2077; CHECK-NEXT: ret 2078entry: 2079 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2080 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2081 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2082 %add = add <2 x i64> %vmull2.i, %a 2083 ret <2 x i64> %add 2084} 2085 2086define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2087; CHECK-LABEL: test_vmlsl_lane_s16_0: 2088; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2089; CHECK-NEXT: ret 2090entry: 2091 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2092 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2093 %sub = sub <4 x i32> %a, %vmull2.i 2094 ret <4 x i32> %sub 2095} 2096 2097define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2098; CHECK-LABEL: test_vmlsl_lane_s32_0: 2099; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2100; CHECK-NEXT: ret 2101entry: 2102 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2103 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2104 %sub = sub <2 x i64> %a, %vmull2.i 2105 ret <2 x i64> %sub 2106} 2107 2108define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2109; CHECK-LABEL: test_vmlsl_laneq_s16_0: 2110; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2111; CHECK-NEXT: ret 2112entry: 2113 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2114 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2115 %sub = sub <4 x i32> %a, %vmull2.i 2116 ret <4 x i32> %sub 2117} 2118 2119define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2120; CHECK-LABEL: test_vmlsl_laneq_s32_0: 2121; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2122; CHECK-NEXT: ret 2123entry: 2124 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2125 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2126 %sub = sub <2 x i64> %a, %vmull2.i 2127 ret <2 x i64> %sub 2128} 2129 2130define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2131; CHECK-LABEL: test_vmlsl_high_lane_s16_0: 2132; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2133; CHECK-NEXT: ret 2134entry: 2135 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2136 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2137 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2138 %sub = sub <4 x i32> %a, %vmull2.i 2139 ret <4 x i32> %sub 2140} 2141 2142define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2143; CHECK-LABEL: test_vmlsl_high_lane_s32_0: 2144; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2145; CHECK-NEXT: ret 2146entry: 2147 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2148 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2149 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2150 %sub = sub <2 x i64> %a, %vmull2.i 2151 ret <2 x i64> %sub 2152} 2153 2154define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2155; CHECK-LABEL: test_vmlsl_high_laneq_s16_0: 2156; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2157; CHECK-NEXT: ret 2158entry: 2159 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2160 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2161 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2162 %sub = sub <4 x i32> %a, %vmull2.i 2163 ret <4 x i32> %sub 2164} 2165 2166define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2167; CHECK-LABEL: test_vmlsl_high_laneq_s32_0: 2168; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2169; CHECK-NEXT: ret 2170entry: 2171 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2172 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2173 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2174 %sub = sub <2 x i64> %a, %vmull2.i 2175 ret <2 x i64> %sub 2176} 2177 2178define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2179; CHECK-LABEL: test_vmlal_lane_u16_0: 2180; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2181; CHECK-NEXT: ret 2182entry: 2183 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2184 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2185 %add = add <4 x i32> %vmull2.i, %a 2186 ret <4 x i32> %add 2187} 2188 2189define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2190; CHECK-LABEL: test_vmlal_lane_u32_0: 2191; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2192; CHECK-NEXT: ret 2193entry: 2194 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2195 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2196 %add = add <2 x i64> %vmull2.i, %a 2197 ret <2 x i64> %add 2198} 2199 2200define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2201; CHECK-LABEL: test_vmlal_laneq_u16_0: 2202; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2203; CHECK-NEXT: ret 2204entry: 2205 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2206 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2207 %add = add <4 x i32> %vmull2.i, %a 2208 ret <4 x i32> %add 2209} 2210 2211define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2212; CHECK-LABEL: test_vmlal_laneq_u32_0: 2213; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2214; CHECK-NEXT: ret 2215entry: 2216 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2217 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2218 %add = add <2 x i64> %vmull2.i, %a 2219 ret <2 x i64> %add 2220} 2221 2222define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2223; CHECK-LABEL: test_vmlal_high_lane_u16_0: 2224; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2225; CHECK-NEXT: ret 2226entry: 2227 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2228 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2229 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2230 %add = add <4 x i32> %vmull2.i, %a 2231 ret <4 x i32> %add 2232} 2233 2234define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2235; CHECK-LABEL: test_vmlal_high_lane_u32_0: 2236; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2237; CHECK-NEXT: ret 2238entry: 2239 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2240 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2241 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2242 %add = add <2 x i64> %vmull2.i, %a 2243 ret <2 x i64> %add 2244} 2245 2246define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2247; CHECK-LABEL: test_vmlal_high_laneq_u16_0: 2248; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2249; CHECK-NEXT: ret 2250entry: 2251 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2252 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2253 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2254 %add = add <4 x i32> %vmull2.i, %a 2255 ret <4 x i32> %add 2256} 2257 2258define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2259; CHECK-LABEL: test_vmlal_high_laneq_u32_0: 2260; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2261; CHECK-NEXT: ret 2262entry: 2263 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2264 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2265 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2266 %add = add <2 x i64> %vmull2.i, %a 2267 ret <2 x i64> %add 2268} 2269 2270define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2271; CHECK-LABEL: test_vmlsl_lane_u16_0: 2272; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2273; CHECK-NEXT: ret 2274entry: 2275 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2276 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2277 %sub = sub <4 x i32> %a, %vmull2.i 2278 ret <4 x i32> %sub 2279} 2280 2281define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2282; CHECK-LABEL: test_vmlsl_lane_u32_0: 2283; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2284; CHECK-NEXT: ret 2285entry: 2286 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2287 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2288 %sub = sub <2 x i64> %a, %vmull2.i 2289 ret <2 x i64> %sub 2290} 2291 2292define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2293; CHECK-LABEL: test_vmlsl_laneq_u16_0: 2294; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2295; CHECK-NEXT: ret 2296entry: 2297 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2298 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2299 %sub = sub <4 x i32> %a, %vmull2.i 2300 ret <4 x i32> %sub 2301} 2302 2303define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2304; CHECK-LABEL: test_vmlsl_laneq_u32_0: 2305; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2306; CHECK-NEXT: ret 2307entry: 2308 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2309 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2310 %sub = sub <2 x i64> %a, %vmull2.i 2311 ret <2 x i64> %sub 2312} 2313 2314define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2315; CHECK-LABEL: test_vmlsl_high_lane_u16_0: 2316; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2317; CHECK-NEXT: ret 2318entry: 2319 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2320 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2321 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2322 %sub = sub <4 x i32> %a, %vmull2.i 2323 ret <4 x i32> %sub 2324} 2325 2326define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2327; CHECK-LABEL: test_vmlsl_high_lane_u32_0: 2328; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2329; CHECK-NEXT: ret 2330entry: 2331 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2332 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2333 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2334 %sub = sub <2 x i64> %a, %vmull2.i 2335 ret <2 x i64> %sub 2336} 2337 2338define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2339; CHECK-LABEL: test_vmlsl_high_laneq_u16_0: 2340; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2341; CHECK-NEXT: ret 2342entry: 2343 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2344 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2345 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2346 %sub = sub <4 x i32> %a, %vmull2.i 2347 ret <4 x i32> %sub 2348} 2349 2350define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2351; CHECK-LABEL: test_vmlsl_high_laneq_u32_0: 2352; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2353; CHECK-NEXT: ret 2354entry: 2355 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2356 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2357 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2358 %sub = sub <2 x i64> %a, %vmull2.i 2359 ret <2 x i64> %sub 2360} 2361 2362define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2363; CHECK-LABEL: test_vmull_lane_s16_0: 2364; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2365; CHECK-NEXT: ret 2366entry: 2367 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2368 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2369 ret <4 x i32> %vmull2.i 2370} 2371 2372define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2373; CHECK-LABEL: test_vmull_lane_s32_0: 2374; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2375; CHECK-NEXT: ret 2376entry: 2377 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2378 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2379 ret <2 x i64> %vmull2.i 2380} 2381 2382define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 2383; CHECK-LABEL: test_vmull_lane_u16_0: 2384; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2385; CHECK-NEXT: ret 2386entry: 2387 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2388 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2389 ret <4 x i32> %vmull2.i 2390} 2391 2392define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 2393; CHECK-LABEL: test_vmull_lane_u32_0: 2394; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2395; CHECK-NEXT: ret 2396entry: 2397 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2398 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2399 ret <2 x i64> %vmull2.i 2400} 2401 2402define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2403; CHECK-LABEL: test_vmull_high_lane_s16_0: 2404; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2405; CHECK-NEXT: ret 2406entry: 2407 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2408 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2409 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2410 ret <4 x i32> %vmull2.i 2411} 2412 2413define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2414; CHECK-LABEL: test_vmull_high_lane_s32_0: 2415; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2416; CHECK-NEXT: ret 2417entry: 2418 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2419 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2420 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2421 ret <2 x i64> %vmull2.i 2422} 2423 2424define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 2425; CHECK-LABEL: test_vmull_high_lane_u16_0: 2426; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2427; CHECK-NEXT: ret 2428entry: 2429 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2430 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2431 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2432 ret <4 x i32> %vmull2.i 2433} 2434 2435define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 2436; CHECK-LABEL: test_vmull_high_lane_u32_0: 2437; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2438; CHECK-NEXT: ret 2439entry: 2440 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2441 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2442 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2443 ret <2 x i64> %vmull2.i 2444} 2445 2446define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2447; CHECK-LABEL: test_vmull_laneq_s16_0: 2448; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2449; CHECK-NEXT: ret 2450entry: 2451 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2452 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2453 ret <4 x i32> %vmull2.i 2454} 2455 2456define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2457; CHECK-LABEL: test_vmull_laneq_s32_0: 2458; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2459; CHECK-NEXT: ret 2460entry: 2461 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2462 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2463 ret <2 x i64> %vmull2.i 2464} 2465 2466define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 2467; CHECK-LABEL: test_vmull_laneq_u16_0: 2468; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2469; CHECK-NEXT: ret 2470entry: 2471 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2472 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2473 ret <4 x i32> %vmull2.i 2474} 2475 2476define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 2477; CHECK-LABEL: test_vmull_laneq_u32_0: 2478; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2479; CHECK-NEXT: ret 2480entry: 2481 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2482 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2483 ret <2 x i64> %vmull2.i 2484} 2485 2486define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2487; CHECK-LABEL: test_vmull_high_laneq_s16_0: 2488; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2489; CHECK-NEXT: ret 2490entry: 2491 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2492 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2493 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2494 ret <4 x i32> %vmull2.i 2495} 2496 2497define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2498; CHECK-LABEL: test_vmull_high_laneq_s32_0: 2499; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2500; CHECK-NEXT: ret 2501entry: 2502 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2503 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2504 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2505 ret <2 x i64> %vmull2.i 2506} 2507 2508define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 2509; CHECK-LABEL: test_vmull_high_laneq_u16_0: 2510; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2511; CHECK-NEXT: ret 2512entry: 2513 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2514 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2515 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2516 ret <4 x i32> %vmull2.i 2517} 2518 2519define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 2520; CHECK-LABEL: test_vmull_high_laneq_u32_0: 2521; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2522; CHECK-NEXT: ret 2523entry: 2524 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2525 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2526 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2527 ret <2 x i64> %vmull2.i 2528} 2529 2530define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2531; CHECK-LABEL: test_vqdmlal_lane_s16_0: 2532; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2533; CHECK-NEXT: ret 2534entry: 2535 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2536 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2537 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2538 ret <4 x i32> %vqdmlal4.i 2539} 2540 2541define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2542; CHECK-LABEL: test_vqdmlal_lane_s32_0: 2543; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2544; CHECK-NEXT: ret 2545entry: 2546 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2547 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2548 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2549 ret <2 x i64> %vqdmlal4.i 2550} 2551 2552define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2553; CHECK-LABEL: test_vqdmlal_high_lane_s16_0: 2554; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2555; CHECK-NEXT: ret 2556entry: 2557 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2558 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2559 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2560 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2561 ret <4 x i32> %vqdmlal4.i 2562} 2563 2564define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2565; CHECK-LABEL: test_vqdmlal_high_lane_s32_0: 2566; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2567; CHECK-NEXT: ret 2568entry: 2569 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2570 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2571 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2572 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2573 ret <2 x i64> %vqdmlal4.i 2574} 2575 2576define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2577; CHECK-LABEL: test_vqdmlsl_lane_s16_0: 2578; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2579; CHECK-NEXT: ret 2580entry: 2581 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2582 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2583 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 2584 ret <4 x i32> %vqdmlsl4.i 2585} 2586 2587define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2588; CHECK-LABEL: test_vqdmlsl_lane_s32_0: 2589; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2590; CHECK-NEXT: ret 2591entry: 2592 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2593 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2594 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 2595 ret <2 x i64> %vqdmlsl4.i 2596} 2597 2598define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2599; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0: 2600; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2601; CHECK-NEXT: ret 2602entry: 2603 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2604 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2605 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2606 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 2607 ret <4 x i32> %vqdmlsl4.i 2608} 2609 2610define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2611; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0: 2612; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2613; CHECK-NEXT: ret 2614entry: 2615 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2616 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2617 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2618 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 2619 ret <2 x i64> %vqdmlsl4.i 2620} 2621 2622define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2623; CHECK-LABEL: test_vqdmull_lane_s16_0: 2624; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2625; CHECK-NEXT: ret 2626entry: 2627 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2628 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2629 ret <4 x i32> %vqdmull2.i 2630} 2631 2632define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2633; CHECK-LABEL: test_vqdmull_lane_s32_0: 2634; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2635; CHECK-NEXT: ret 2636entry: 2637 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2638 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2639 ret <2 x i64> %vqdmull2.i 2640} 2641 2642define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2643; CHECK-LABEL: test_vqdmull_laneq_s16_0: 2644; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2645; CHECK-NEXT: ret 2646entry: 2647 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2648 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2649 ret <4 x i32> %vqdmull2.i 2650} 2651 2652define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2653; CHECK-LABEL: test_vqdmull_laneq_s32_0: 2654; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2655; CHECK-NEXT: ret 2656entry: 2657 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2658 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2659 ret <2 x i64> %vqdmull2.i 2660} 2661 2662define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2663; CHECK-LABEL: test_vqdmull_high_lane_s16_0: 2664; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2665; CHECK-NEXT: ret 2666entry: 2667 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2668 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2669 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2670 ret <4 x i32> %vqdmull2.i 2671} 2672 2673define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2674; CHECK-LABEL: test_vqdmull_high_lane_s32_0: 2675; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2676; CHECK-NEXT: ret 2677entry: 2678 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2679 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2680 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2681 ret <2 x i64> %vqdmull2.i 2682} 2683 2684define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2685; CHECK-LABEL: test_vqdmull_high_laneq_s16_0: 2686; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2687; CHECK-NEXT: ret 2688entry: 2689 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2690 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2691 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2692 ret <4 x i32> %vqdmull2.i 2693} 2694 2695define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2696; CHECK-LABEL: test_vqdmull_high_laneq_s32_0: 2697; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2698; CHECK-NEXT: ret 2699entry: 2700 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2701 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2702 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2703 ret <2 x i64> %vqdmull2.i 2704} 2705 2706define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2707; CHECK-LABEL: test_vqdmulh_lane_s16_0: 2708; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2709; CHECK-NEXT: ret 2710entry: 2711 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2712 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 2713 ret <4 x i16> %vqdmulh2.i 2714} 2715 2716define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2717; CHECK-LABEL: test_vqdmulhq_lane_s16_0: 2718; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2719; CHECK-NEXT: ret 2720entry: 2721 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2722 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 2723 ret <8 x i16> %vqdmulh2.i 2724} 2725 2726define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2727; CHECK-LABEL: test_vqdmulh_lane_s32_0: 2728; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2729; CHECK-NEXT: ret 2730entry: 2731 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2732 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 2733 ret <2 x i32> %vqdmulh2.i 2734} 2735 2736define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2737; CHECK-LABEL: test_vqdmulhq_lane_s32_0: 2738; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2739; CHECK-NEXT: ret 2740entry: 2741 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2742 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 2743 ret <4 x i32> %vqdmulh2.i 2744} 2745 2746define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2747; CHECK-LABEL: test_vqrdmulh_lane_s16_0: 2748; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2749; CHECK-NEXT: ret 2750entry: 2751 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2752 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 2753 ret <4 x i16> %vqrdmulh2.i 2754} 2755 2756define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2757; CHECK-LABEL: test_vqrdmulhq_lane_s16_0: 2758; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2759; CHECK-NEXT: ret 2760entry: 2761 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2762 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 2763 ret <8 x i16> %vqrdmulh2.i 2764} 2765 2766define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2767; CHECK-LABEL: test_vqrdmulh_lane_s32_0: 2768; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2769; CHECK-NEXT: ret 2770entry: 2771 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2772 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 2773 ret <2 x i32> %vqrdmulh2.i 2774} 2775 2776define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2777; CHECK-LABEL: test_vqrdmulhq_lane_s32_0: 2778; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2779; CHECK-NEXT: ret 2780entry: 2781 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2782 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 2783 ret <4 x i32> %vqrdmulh2.i 2784} 2785 2786define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { 2787; CHECK-LABEL: test_vmul_lane_f32_0: 2788; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2789; CHECK-NEXT: ret 2790entry: 2791 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 2792 %mul = fmul <2 x float> %shuffle, %a 2793 ret <2 x float> %mul 2794} 2795 2796define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 2797; CHECK-LABEL: test_vmulq_lane_f32_0: 2798; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2799; CHECK-NEXT: ret 2800entry: 2801 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 2802 %mul = fmul <4 x float> %shuffle, %a 2803 ret <4 x float> %mul 2804} 2805 2806define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 2807; CHECK-LABEL: test_vmul_laneq_f32_0: 2808; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2809; CHECK-NEXT: ret 2810entry: 2811 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 2812 %mul = fmul <2 x float> %shuffle, %a 2813 ret <2 x float> %mul 2814} 2815 2816define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { 2817; CHECK-LABEL: test_vmul_laneq_f64_0: 2818; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] 2819; CHECK-NEXT: ret 2820entry: 2821 %0 = bitcast <1 x double> %a to <8 x i8> 2822 %1 = bitcast <8 x i8> %0 to double 2823 %extract = extractelement <2 x double> %v, i32 0 2824 %2 = fmul double %1, %extract 2825 %3 = insertelement <1 x double> undef, double %2, i32 0 2826 ret <1 x double> %3 2827} 2828 2829define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 2830; CHECK-LABEL: test_vmulq_laneq_f32_0: 2831; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2832; CHECK-NEXT: ret 2833entry: 2834 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 2835 %mul = fmul <4 x float> %shuffle, %a 2836 ret <4 x float> %mul 2837} 2838 2839define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 2840; CHECK-LABEL: test_vmulq_laneq_f64_0: 2841; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2842; CHECK-NEXT: ret 2843entry: 2844 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 2845 %mul = fmul <2 x double> %shuffle, %a 2846 ret <2 x double> %mul 2847} 2848 2849define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { 2850; CHECK-LABEL: test_vmulx_lane_f32_0: 2851; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2852; CHECK-NEXT: ret 2853entry: 2854 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 2855 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 2856 ret <2 x float> %vmulx2.i 2857} 2858 2859define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 2860; CHECK-LABEL: test_vmulxq_lane_f32_0: 2861; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2862; CHECK-NEXT: ret 2863entry: 2864 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 2865 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 2866 ret <4 x float> %vmulx2.i 2867} 2868 2869define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { 2870; CHECK-LABEL: test_vmulxq_lane_f64_0: 2871; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2872; CHECK-NEXT: ret 2873entry: 2874 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 2875 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 2876 ret <2 x double> %vmulx2.i 2877} 2878 2879define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 2880; CHECK-LABEL: test_vmulx_laneq_f32_0: 2881; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2882; CHECK-NEXT: ret 2883entry: 2884 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 2885 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 2886 ret <2 x float> %vmulx2.i 2887} 2888 2889define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 2890; CHECK-LABEL: test_vmulxq_laneq_f32_0: 2891; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2892; CHECK-NEXT: ret 2893entry: 2894 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 2895 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 2896 ret <4 x float> %vmulx2.i 2897} 2898 2899define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 2900; CHECK-LABEL: test_vmulxq_laneq_f64_0: 2901; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2902; CHECK-NEXT: ret 2903entry: 2904 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 2905 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 2906 ret <2 x double> %vmulx2.i 2907} 2908 2909