1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s 2 3declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) 4 5declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) 6 7declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 8 9declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) 10 11declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 12 13declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 14 15declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 16 17declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) 18 19declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) 20 21declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) 22 23declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) 24 25declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) 26 27declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) 28 29declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) 30 31declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) 32 33declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) 34 35declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) 36 37declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) 38 39declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) 40 41declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) 42 43declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) 44 45declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) 46 47declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) 48 49declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) 50 51declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) 52 53define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) { 54; CHECK-LABEL: test_vaddl_s8: 55; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 56entry: 57 %vmovl.i.i = sext <8 x i8> %a to <8 x i16> 58 %vmovl.i2.i = sext <8 x i8> %b to <8 x i16> 59 %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i 60 ret <8 x i16> %add.i 61} 62 63define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) { 64; CHECK-LABEL: test_vaddl_s16: 65; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 66entry: 67 %vmovl.i.i = sext <4 x i16> %a to <4 x i32> 68 %vmovl.i2.i = sext <4 x i16> %b to <4 x i32> 69 %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i 70 ret <4 x i32> %add.i 71} 72 73define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) { 74; CHECK-LABEL: test_vaddl_s32: 75; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 76entry: 77 %vmovl.i.i = sext <2 x i32> %a to <2 x i64> 78 %vmovl.i2.i = sext <2 x i32> %b to <2 x i64> 79 %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i 80 ret <2 x i64> %add.i 81} 82 83define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) { 84; CHECK-LABEL: test_vaddl_u8: 85; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 86entry: 87 %vmovl.i.i = zext <8 x i8> %a to <8 x i16> 88 %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> 89 %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i 90 ret <8 x i16> %add.i 91} 92 93define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) { 94; CHECK-LABEL: test_vaddl_u16: 95; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 96entry: 97 %vmovl.i.i = zext <4 x i16> %a to <4 x i32> 98 %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> 99 %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i 100 ret <4 x i32> %add.i 101} 102 103define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) { 104; CHECK-LABEL: test_vaddl_u32: 105; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 106entry: 107 %vmovl.i.i = zext <2 x i32> %a to <2 x i64> 108 %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> 109 %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i 110 ret <2 x i64> %add.i 111} 112 113define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) { 114; CHECK-LABEL: test_vaddl_high_s8: 115; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 116entry: 117 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 118 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 119 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 120 %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16> 121 %add.i = add <8 x i16> %0, %1 122 ret <8 x i16> %add.i 123} 124 125define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) { 126; CHECK-LABEL: test_vaddl_high_s16: 127; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 128entry: 129 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 130 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 131 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 132 %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32> 133 %add.i = add <4 x i32> %0, %1 134 ret <4 x i32> %add.i 135} 136 137define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) { 138; CHECK-LABEL: test_vaddl_high_s32: 139; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 140entry: 141 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 142 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 143 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 144 %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64> 145 %add.i = add <2 x i64> %0, %1 146 ret <2 x i64> %add.i 147} 148 149define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) { 150; CHECK-LABEL: test_vaddl_high_u8: 151; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 152entry: 153 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 154 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 155 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 156 %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> 157 %add.i = add <8 x i16> %0, %1 158 ret <8 x i16> %add.i 159} 160 161define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) { 162; CHECK-LABEL: test_vaddl_high_u16: 163; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 164entry: 165 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 166 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 167 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 168 %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> 169 %add.i = add <4 x i32> %0, %1 170 ret <4 x i32> %add.i 171} 172 173define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) { 174; CHECK-LABEL: test_vaddl_high_u32: 175; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 176entry: 177 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 178 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 179 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 180 %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> 181 %add.i = add <2 x i64> %0, %1 182 ret <2 x i64> %add.i 183} 184 185define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) { 186; CHECK-LABEL: test_vaddw_s8: 187; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b 188entry: 189 %vmovl.i.i = sext <8 x i8> %b to <8 x i16> 190 %add.i = add <8 x i16> %vmovl.i.i, %a 191 ret <8 x i16> %add.i 192} 193 194define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) { 195; CHECK-LABEL: test_vaddw_s16: 196; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h 197entry: 198 %vmovl.i.i = sext <4 x i16> %b to <4 x i32> 199 %add.i = add <4 x i32> %vmovl.i.i, %a 200 ret <4 x i32> %add.i 201} 202 203define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) { 204; CHECK-LABEL: test_vaddw_s32: 205; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s 206entry: 207 %vmovl.i.i = sext <2 x i32> %b to <2 x i64> 208 %add.i = add <2 x i64> %vmovl.i.i, %a 209 ret <2 x i64> %add.i 210} 211 212define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) { 213; CHECK-LABEL: test_vaddw_u8: 214; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b 215entry: 216 %vmovl.i.i = zext <8 x i8> %b to <8 x i16> 217 %add.i = add <8 x i16> %vmovl.i.i, %a 218 ret <8 x i16> %add.i 219} 220 221define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) { 222; CHECK-LABEL: test_vaddw_u16: 223; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h 224entry: 225 %vmovl.i.i = zext <4 x i16> %b to <4 x i32> 226 %add.i = add <4 x i32> %vmovl.i.i, %a 227 ret <4 x i32> %add.i 228} 229 230define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) { 231; CHECK-LABEL: test_vaddw_u32: 232; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s 233entry: 234 %vmovl.i.i = zext <2 x i32> %b to <2 x i64> 235 %add.i = add <2 x i64> %vmovl.i.i, %a 236 ret <2 x i64> %add.i 237} 238 239define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) { 240; CHECK-LABEL: test_vaddw_high_s8: 241; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b 242entry: 243 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 244 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 245 %add.i = add <8 x i16> %0, %a 246 ret <8 x i16> %add.i 247} 248 249define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) { 250; CHECK-LABEL: test_vaddw_high_s16: 251; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h 252entry: 253 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 254 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 255 %add.i = add <4 x i32> %0, %a 256 ret <4 x i32> %add.i 257} 258 259define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) { 260; CHECK-LABEL: test_vaddw_high_s32: 261; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s 262entry: 263 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 264 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 265 %add.i = add <2 x i64> %0, %a 266 ret <2 x i64> %add.i 267} 268 269define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) { 270; CHECK-LABEL: test_vaddw_high_u8: 271; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b 272entry: 273 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 274 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 275 %add.i = add <8 x i16> %0, %a 276 ret <8 x i16> %add.i 277} 278 279define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) { 280; CHECK-LABEL: test_vaddw_high_u16: 281; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h 282entry: 283 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 284 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 285 %add.i = add <4 x i32> %0, %a 286 ret <4 x i32> %add.i 287} 288 289define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) { 290; CHECK-LABEL: test_vaddw_high_u32: 291; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s 292entry: 293 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 294 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 295 %add.i = add <2 x i64> %0, %a 296 ret <2 x i64> %add.i 297} 298 299define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) { 300; CHECK-LABEL: test_vsubl_s8: 301; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 302entry: 303 %vmovl.i.i = sext <8 x i8> %a to <8 x i16> 304 %vmovl.i2.i = sext <8 x i8> %b to <8 x i16> 305 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i 306 ret <8 x i16> %sub.i 307} 308 309define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) { 310; CHECK-LABEL: test_vsubl_s16: 311; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 312entry: 313 %vmovl.i.i = sext <4 x i16> %a to <4 x i32> 314 %vmovl.i2.i = sext <4 x i16> %b to <4 x i32> 315 %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i 316 ret <4 x i32> %sub.i 317} 318 319define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) { 320; CHECK-LABEL: test_vsubl_s32: 321; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 322entry: 323 %vmovl.i.i = sext <2 x i32> %a to <2 x i64> 324 %vmovl.i2.i = sext <2 x i32> %b to <2 x i64> 325 %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i 326 ret <2 x i64> %sub.i 327} 328 329define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) { 330; CHECK-LABEL: test_vsubl_u8: 331; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 332entry: 333 %vmovl.i.i = zext <8 x i8> %a to <8 x i16> 334 %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> 335 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i 336 ret <8 x i16> %sub.i 337} 338 339define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) { 340; CHECK-LABEL: test_vsubl_u16: 341; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 342entry: 343 %vmovl.i.i = zext <4 x i16> %a to <4 x i32> 344 %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> 345 %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i 346 ret <4 x i32> %sub.i 347} 348 349define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) { 350; CHECK-LABEL: test_vsubl_u32: 351; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 352entry: 353 %vmovl.i.i = zext <2 x i32> %a to <2 x i64> 354 %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> 355 %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i 356 ret <2 x i64> %sub.i 357} 358 359define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) { 360; CHECK-LABEL: test_vsubl_high_s8: 361; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 362entry: 363 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 364 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 365 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 366 %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16> 367 %sub.i = sub <8 x i16> %0, %1 368 ret <8 x i16> %sub.i 369} 370 371define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) { 372; CHECK-LABEL: test_vsubl_high_s16: 373; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 374entry: 375 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 376 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 377 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 378 %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32> 379 %sub.i = sub <4 x i32> %0, %1 380 ret <4 x i32> %sub.i 381} 382 383define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) { 384; CHECK-LABEL: test_vsubl_high_s32: 385; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 386entry: 387 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 388 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 389 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 390 %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64> 391 %sub.i = sub <2 x i64> %0, %1 392 ret <2 x i64> %sub.i 393} 394 395define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) { 396; CHECK-LABEL: test_vsubl_high_u8: 397; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 398entry: 399 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 400 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 401 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 402 %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> 403 %sub.i = sub <8 x i16> %0, %1 404 ret <8 x i16> %sub.i 405} 406 407define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) { 408; CHECK-LABEL: test_vsubl_high_u16: 409; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 410entry: 411 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 412 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 413 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 414 %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> 415 %sub.i = sub <4 x i32> %0, %1 416 ret <4 x i32> %sub.i 417} 418 419define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) { 420; CHECK-LABEL: test_vsubl_high_u32: 421; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 422entry: 423 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 424 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 425 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 426 %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> 427 %sub.i = sub <2 x i64> %0, %1 428 ret <2 x i64> %sub.i 429} 430 431define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) { 432; CHECK-LABEL: test_vsubw_s8: 433; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b 434entry: 435 %vmovl.i.i = sext <8 x i8> %b to <8 x i16> 436 %sub.i = sub <8 x i16> %a, %vmovl.i.i 437 ret <8 x i16> %sub.i 438} 439 440define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) { 441; CHECK-LABEL: test_vsubw_s16: 442; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h 443entry: 444 %vmovl.i.i = sext <4 x i16> %b to <4 x i32> 445 %sub.i = sub <4 x i32> %a, %vmovl.i.i 446 ret <4 x i32> %sub.i 447} 448 449define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) { 450; CHECK-LABEL: test_vsubw_s32: 451; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s 452entry: 453 %vmovl.i.i = sext <2 x i32> %b to <2 x i64> 454 %sub.i = sub <2 x i64> %a, %vmovl.i.i 455 ret <2 x i64> %sub.i 456} 457 458define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) { 459; CHECK-LABEL: test_vsubw_u8: 460; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b 461entry: 462 %vmovl.i.i = zext <8 x i8> %b to <8 x i16> 463 %sub.i = sub <8 x i16> %a, %vmovl.i.i 464 ret <8 x i16> %sub.i 465} 466 467define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) { 468; CHECK-LABEL: test_vsubw_u16: 469; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h 470entry: 471 %vmovl.i.i = zext <4 x i16> %b to <4 x i32> 472 %sub.i = sub <4 x i32> %a, %vmovl.i.i 473 ret <4 x i32> %sub.i 474} 475 476define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) { 477; CHECK-LABEL: test_vsubw_u32: 478; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s 479entry: 480 %vmovl.i.i = zext <2 x i32> %b to <2 x i64> 481 %sub.i = sub <2 x i64> %a, %vmovl.i.i 482 ret <2 x i64> %sub.i 483} 484 485define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) { 486; CHECK-LABEL: test_vsubw_high_s8: 487; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b 488entry: 489 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 490 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 491 %sub.i = sub <8 x i16> %a, %0 492 ret <8 x i16> %sub.i 493} 494 495define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) { 496; CHECK-LABEL: test_vsubw_high_s16: 497; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h 498entry: 499 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 500 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 501 %sub.i = sub <4 x i32> %a, %0 502 ret <4 x i32> %sub.i 503} 504 505define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) { 506; CHECK-LABEL: test_vsubw_high_s32: 507; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s 508entry: 509 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 510 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 511 %sub.i = sub <2 x i64> %a, %0 512 ret <2 x i64> %sub.i 513} 514 515define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) { 516; CHECK-LABEL: test_vsubw_high_u8: 517; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b 518entry: 519 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 520 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 521 %sub.i = sub <8 x i16> %a, %0 522 ret <8 x i16> %sub.i 523} 524 525define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) { 526; CHECK-LABEL: test_vsubw_high_u16: 527; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h 528entry: 529 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 530 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 531 %sub.i = sub <4 x i32> %a, %0 532 ret <4 x i32> %sub.i 533} 534 535define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) { 536; CHECK-LABEL: test_vsubw_high_u32: 537; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s 538entry: 539 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 540 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 541 %sub.i = sub <2 x i64> %a, %0 542 ret <2 x i64> %sub.i 543} 544 545define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) { 546; CHECK-LABEL: test_vaddhn_s16: 547; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 548entry: 549 %vaddhn.i = add <8 x i16> %a, %b 550 %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 551 %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> 552 ret <8 x i8> %vaddhn2.i 553} 554 555define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) { 556; CHECK-LABEL: test_vaddhn_s32: 557; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 558entry: 559 %vaddhn.i = add <4 x i32> %a, %b 560 %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> 561 %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> 562 ret <4 x i16> %vaddhn2.i 563} 564 565define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) { 566; CHECK-LABEL: test_vaddhn_s64: 567; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 568entry: 569 %vaddhn.i = add <2 x i64> %a, %b 570 %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> 571 %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> 572 ret <2 x i32> %vaddhn2.i 573} 574 575define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) { 576; CHECK-LABEL: test_vaddhn_u16: 577; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 578entry: 579 %vaddhn.i = add <8 x i16> %a, %b 580 %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 581 %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> 582 ret <8 x i8> %vaddhn2.i 583} 584 585define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) { 586; CHECK-LABEL: test_vaddhn_u32: 587; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 588entry: 589 %vaddhn.i = add <4 x i32> %a, %b 590 %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> 591 %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> 592 ret <4 x i16> %vaddhn2.i 593} 594 595define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) { 596; CHECK-LABEL: test_vaddhn_u64: 597; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 598entry: 599 %vaddhn.i = add <2 x i64> %a, %b 600 %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> 601 %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> 602 ret <2 x i32> %vaddhn2.i 603} 604 605define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 606; CHECK-LABEL: test_vaddhn_high_s16: 607; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 608entry: 609 %vaddhn.i.i = add <8 x i16> %a, %b 610 %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 611 %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8> 612 %0 = bitcast <8 x i8> %r to <1 x i64> 613 %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64> 614 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 615 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 616 ret <16 x i8> %2 617} 618 619define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 620; CHECK-LABEL: test_vaddhn_high_s32: 621; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 622entry: 623 %vaddhn.i.i = add <4 x i32> %a, %b 624 %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16> 625 %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16> 626 %0 = bitcast <4 x i16> %r to <1 x i64> 627 %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64> 628 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 629 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 630 ret <8 x i16> %2 631} 632 633define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 634; CHECK-LABEL: test_vaddhn_high_s64: 635; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 636entry: 637 %vaddhn.i.i = add <2 x i64> %a, %b 638 %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32> 639 %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32> 640 %0 = bitcast <2 x i32> %r to <1 x i64> 641 %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64> 642 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 643 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 644 ret <4 x i32> %2 645} 646 647define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 648; CHECK-LABEL: test_vaddhn_high_u16: 649; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 650entry: 651 %vaddhn.i.i = add <8 x i16> %a, %b 652 %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 653 %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8> 654 %0 = bitcast <8 x i8> %r to <1 x i64> 655 %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64> 656 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 657 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 658 ret <16 x i8> %2 659} 660 661define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 662; CHECK-LABEL: test_vaddhn_high_u32: 663; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 664entry: 665 %vaddhn.i.i = add <4 x i32> %a, %b 666 %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16> 667 %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16> 668 %0 = bitcast <4 x i16> %r to <1 x i64> 669 %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64> 670 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 671 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 672 ret <8 x i16> %2 673} 674 675define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 676; CHECK-LABEL: test_vaddhn_high_u64: 677; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 678entry: 679 %vaddhn.i.i = add <2 x i64> %a, %b 680 %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32> 681 %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32> 682 %0 = bitcast <2 x i32> %r to <1 x i64> 683 %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64> 684 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 685 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 686 ret <4 x i32> %2 687} 688 689define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) { 690; CHECK-LABEL: test_vraddhn_s16: 691; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 692entry: 693 %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 694 ret <8 x i8> %vraddhn2.i 695} 696 697define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) { 698; CHECK-LABEL: test_vraddhn_s32: 699; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 700entry: 701 %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 702 ret <4 x i16> %vraddhn2.i 703} 704 705define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) { 706; CHECK-LABEL: test_vraddhn_s64: 707; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 708entry: 709 %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 710 ret <2 x i32> %vraddhn2.i 711} 712 713define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) { 714; CHECK-LABEL: test_vraddhn_u16: 715; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 716entry: 717 %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 718 ret <8 x i8> %vraddhn2.i 719} 720 721define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) { 722; CHECK-LABEL: test_vraddhn_u32: 723; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 724entry: 725 %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 726 ret <4 x i16> %vraddhn2.i 727} 728 729define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) { 730; CHECK-LABEL: test_vraddhn_u64: 731; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 732entry: 733 %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 734 ret <2 x i32> %vraddhn2.i 735} 736 737define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 738; CHECK-LABEL: test_vraddhn_high_s16: 739; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 740entry: 741 %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 742 %0 = bitcast <8 x i8> %r to <1 x i64> 743 %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> 744 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 745 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 746 ret <16 x i8> %2 747} 748 749define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 750; CHECK-LABEL: test_vraddhn_high_s32: 751; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 752entry: 753 %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 754 %0 = bitcast <4 x i16> %r to <1 x i64> 755 %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> 756 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 757 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 758 ret <8 x i16> %2 759} 760 761define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 762; CHECK-LABEL: test_vraddhn_high_s64: 763; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 764entry: 765 %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 766 %0 = bitcast <2 x i32> %r to <1 x i64> 767 %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> 768 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 769 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 770 ret <4 x i32> %2 771} 772 773define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 774; CHECK-LABEL: test_vraddhn_high_u16: 775; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 776entry: 777 %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 778 %0 = bitcast <8 x i8> %r to <1 x i64> 779 %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> 780 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 781 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 782 ret <16 x i8> %2 783} 784 785define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 786; CHECK-LABEL: test_vraddhn_high_u32: 787; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 788entry: 789 %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 790 %0 = bitcast <4 x i16> %r to <1 x i64> 791 %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> 792 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 793 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 794 ret <8 x i16> %2 795} 796 797define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 798; CHECK-LABEL: test_vraddhn_high_u64: 799; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 800entry: 801 %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 802 %0 = bitcast <2 x i32> %r to <1 x i64> 803 %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> 804 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 805 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 806 ret <4 x i32> %2 807} 808 809define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) { 810; CHECK-LABEL: test_vsubhn_s16: 811; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 812entry: 813 %vsubhn.i = sub <8 x i16> %a, %b 814 %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 815 %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> 816 ret <8 x i8> %vsubhn2.i 817} 818 819define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) { 820; CHECK-LABEL: test_vsubhn_s32: 821; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 822entry: 823 %vsubhn.i = sub <4 x i32> %a, %b 824 %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> 825 %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> 826 ret <4 x i16> %vsubhn2.i 827} 828 829define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) { 830; CHECK-LABEL: test_vsubhn_s64: 831; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 832entry: 833 %vsubhn.i = sub <2 x i64> %a, %b 834 %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> 835 %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> 836 ret <2 x i32> %vsubhn2.i 837} 838 839define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) { 840; CHECK-LABEL: test_vsubhn_u16: 841; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 842entry: 843 %vsubhn.i = sub <8 x i16> %a, %b 844 %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 845 %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> 846 ret <8 x i8> %vsubhn2.i 847} 848 849define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) { 850; CHECK-LABEL: test_vsubhn_u32: 851; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 852entry: 853 %vsubhn.i = sub <4 x i32> %a, %b 854 %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> 855 %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> 856 ret <4 x i16> %vsubhn2.i 857} 858 859define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) { 860; CHECK-LABEL: test_vsubhn_u64: 861; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 862entry: 863 %vsubhn.i = sub <2 x i64> %a, %b 864 %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> 865 %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> 866 ret <2 x i32> %vsubhn2.i 867} 868 869define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 870; CHECK-LABEL: test_vsubhn_high_s16: 871; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 872entry: 873 %vsubhn.i.i = sub <8 x i16> %a, %b 874 %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 875 %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8> 876 %0 = bitcast <8 x i8> %r to <1 x i64> 877 %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64> 878 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 879 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 880 ret <16 x i8> %2 881} 882 883define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 884; CHECK-LABEL: test_vsubhn_high_s32: 885; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 886entry: 887 %vsubhn.i.i = sub <4 x i32> %a, %b 888 %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16> 889 %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16> 890 %0 = bitcast <4 x i16> %r to <1 x i64> 891 %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64> 892 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 893 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 894 ret <8 x i16> %2 895} 896 897define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 898; CHECK-LABEL: test_vsubhn_high_s64: 899; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 900entry: 901 %vsubhn.i.i = sub <2 x i64> %a, %b 902 %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32> 903 %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32> 904 %0 = bitcast <2 x i32> %r to <1 x i64> 905 %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64> 906 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 907 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 908 ret <4 x i32> %2 909} 910 911define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 912; CHECK-LABEL: test_vsubhn_high_u16: 913; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 914entry: 915 %vsubhn.i.i = sub <8 x i16> %a, %b 916 %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 917 %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8> 918 %0 = bitcast <8 x i8> %r to <1 x i64> 919 %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64> 920 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 921 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 922 ret <16 x i8> %2 923} 924 925define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 926; CHECK-LABEL: test_vsubhn_high_u32: 927; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 928entry: 929 %vsubhn.i.i = sub <4 x i32> %a, %b 930 %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16> 931 %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16> 932 %0 = bitcast <4 x i16> %r to <1 x i64> 933 %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64> 934 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 935 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 936 ret <8 x i16> %2 937} 938 939define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 940; CHECK-LABEL: test_vsubhn_high_u64: 941; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 942entry: 943 %vsubhn.i.i = sub <2 x i64> %a, %b 944 %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32> 945 %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32> 946 %0 = bitcast <2 x i32> %r to <1 x i64> 947 %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64> 948 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 949 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 950 ret <4 x i32> %2 951} 952 953define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) { 954; CHECK-LABEL: test_vrsubhn_s16: 955; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 956entry: 957 %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 958 ret <8 x i8> %vrsubhn2.i 959} 960 961define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) { 962; CHECK-LABEL: test_vrsubhn_s32: 963; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 964entry: 965 %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 966 ret <4 x i16> %vrsubhn2.i 967} 968 969define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) { 970; CHECK-LABEL: test_vrsubhn_s64: 971; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 972entry: 973 %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 974 ret <2 x i32> %vrsubhn2.i 975} 976 977define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) { 978; CHECK-LABEL: test_vrsubhn_u16: 979; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 980entry: 981 %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 982 ret <8 x i8> %vrsubhn2.i 983} 984 985define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) { 986; CHECK-LABEL: test_vrsubhn_u32: 987; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 988entry: 989 %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 990 ret <4 x i16> %vrsubhn2.i 991} 992 993define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) { 994; CHECK-LABEL: test_vrsubhn_u64: 995; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 996entry: 997 %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 998 ret <2 x i32> %vrsubhn2.i 999} 1000 1001define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1002; CHECK-LABEL: test_vrsubhn_high_s16: 1003; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1004entry: 1005 %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1006 %0 = bitcast <8 x i8> %r to <1 x i64> 1007 %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> 1008 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1009 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1010 ret <16 x i8> %2 1011} 1012 1013define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1014; CHECK-LABEL: test_vrsubhn_high_s32: 1015; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1016entry: 1017 %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1018 %0 = bitcast <4 x i16> %r to <1 x i64> 1019 %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> 1020 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1021 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1022 ret <8 x i16> %2 1023} 1024 1025define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1026; CHECK-LABEL: test_vrsubhn_high_s64: 1027; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1028entry: 1029 %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1030 %0 = bitcast <2 x i32> %r to <1 x i64> 1031 %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> 1032 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1033 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1034 ret <4 x i32> %2 1035} 1036 1037define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1038; CHECK-LABEL: test_vrsubhn_high_u16: 1039; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1040entry: 1041 %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1042 %0 = bitcast <8 x i8> %r to <1 x i64> 1043 %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> 1044 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1045 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1046 ret <16 x i8> %2 1047} 1048 1049define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1050; CHECK-LABEL: test_vrsubhn_high_u32: 1051; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1052entry: 1053 %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1054 %0 = bitcast <4 x i16> %r to <1 x i64> 1055 %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> 1056 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1057 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1058 ret <8 x i16> %2 1059} 1060 1061define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1062; CHECK-LABEL: test_vrsubhn_high_u64: 1063; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1064entry: 1065 %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1066 %0 = bitcast <2 x i32> %r to <1 x i64> 1067 %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> 1068 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1069 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1070 ret <4 x i32> %2 1071} 1072 1073define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) { 1074; CHECK-LABEL: test_vabdl_s8: 1075; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1076entry: 1077 %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) 1078 %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> 1079 ret <8 x i16> %vmovl.i.i 1080} 1081 1082define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) { 1083; CHECK-LABEL: test_vabdl_s16: 1084; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1085entry: 1086 %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b) 1087 %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> 1088 ret <4 x i32> %vmovl.i.i 1089} 1090 1091define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) { 1092; CHECK-LABEL: test_vabdl_s32: 1093; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1094entry: 1095 %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b) 1096 %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> 1097 ret <2 x i64> %vmovl.i.i 1098} 1099 1100define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) { 1101; CHECK-LABEL: test_vabdl_u8: 1102; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1103entry: 1104 %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) 1105 %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> 1106 ret <8 x i16> %vmovl.i.i 1107} 1108 1109define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) { 1110; CHECK-LABEL: test_vabdl_u16: 1111; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1112entry: 1113 %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b) 1114 %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> 1115 ret <4 x i32> %vmovl.i.i 1116} 1117 1118define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) { 1119; CHECK-LABEL: test_vabdl_u32: 1120; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1121entry: 1122 %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b) 1123 %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> 1124 ret <2 x i64> %vmovl.i.i 1125} 1126 1127define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1128; CHECK-LABEL: test_vabal_s8: 1129; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1130entry: 1131 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) 1132 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1133 %add.i = add <8 x i16> %vmovl.i.i.i, %a 1134 ret <8 x i16> %add.i 1135} 1136 1137define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1138; CHECK-LABEL: test_vabal_s16: 1139; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1140entry: 1141 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c) 1142 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1143 %add.i = add <4 x i32> %vmovl.i.i.i, %a 1144 ret <4 x i32> %add.i 1145} 1146 1147define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1148; CHECK-LABEL: test_vabal_s32: 1149; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1150entry: 1151 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c) 1152 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1153 %add.i = add <2 x i64> %vmovl.i.i.i, %a 1154 ret <2 x i64> %add.i 1155} 1156 1157define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1158; CHECK-LABEL: test_vabal_u8: 1159; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1160entry: 1161 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) 1162 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1163 %add.i = add <8 x i16> %vmovl.i.i.i, %a 1164 ret <8 x i16> %add.i 1165} 1166 1167define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1168; CHECK-LABEL: test_vabal_u16: 1169; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1170entry: 1171 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c) 1172 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1173 %add.i = add <4 x i32> %vmovl.i.i.i, %a 1174 ret <4 x i32> %add.i 1175} 1176 1177define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1178; CHECK-LABEL: test_vabal_u32: 1179; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1180entry: 1181 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c) 1182 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1183 %add.i = add <2 x i64> %vmovl.i.i.i, %a 1184 ret <2 x i64> %add.i 1185} 1186 1187define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) { 1188; CHECK-LABEL: test_vabdl_high_s8: 1189; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1190entry: 1191 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1192 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1193 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1194 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1195 ret <8 x i16> %vmovl.i.i.i 1196} 1197 1198define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) { 1199; CHECK-LABEL: test_vabdl_high_s16: 1200; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1201entry: 1202 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1203 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1204 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1205 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1206 ret <4 x i32> %vmovl.i.i.i 1207} 1208 1209define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) { 1210; CHECK-LABEL: test_vabdl_high_s32: 1211; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1212entry: 1213 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1214 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1215 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1216 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1217 ret <2 x i64> %vmovl.i.i.i 1218} 1219 1220define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) { 1221; CHECK-LABEL: test_vabdl_high_u8: 1222; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1223entry: 1224 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1225 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1226 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1227 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1228 ret <8 x i16> %vmovl.i.i.i 1229} 1230 1231define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) { 1232; CHECK-LABEL: test_vabdl_high_u16: 1233; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1234entry: 1235 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1236 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1237 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1238 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1239 ret <4 x i32> %vmovl.i.i.i 1240} 1241 1242define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) { 1243; CHECK-LABEL: test_vabdl_high_u32: 1244; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1245entry: 1246 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1247 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1248 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1249 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1250 ret <2 x i64> %vmovl.i.i.i 1251} 1252 1253define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1254; CHECK-LABEL: test_vabal_high_s8: 1255; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1256entry: 1257 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1258 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1259 %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1260 %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> 1261 %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a 1262 ret <8 x i16> %add.i.i 1263} 1264 1265define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1266; CHECK-LABEL: test_vabal_high_s16: 1267; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1268entry: 1269 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1270 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1271 %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1272 %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> 1273 %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a 1274 ret <4 x i32> %add.i.i 1275} 1276 1277define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1278; CHECK-LABEL: test_vabal_high_s32: 1279; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1280entry: 1281 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1282 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1283 %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1284 %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> 1285 %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a 1286 ret <2 x i64> %add.i.i 1287} 1288 1289define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1290; CHECK-LABEL: test_vabal_high_u8: 1291; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1292entry: 1293 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1294 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1295 %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1296 %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> 1297 %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a 1298 ret <8 x i16> %add.i.i 1299} 1300 1301define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1302; CHECK-LABEL: test_vabal_high_u16: 1303; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1304entry: 1305 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1306 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1307 %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1308 %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> 1309 %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a 1310 ret <4 x i32> %add.i.i 1311} 1312 1313define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1314; CHECK-LABEL: test_vabal_high_u32: 1315; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1316entry: 1317 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1318 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1319 %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1320 %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> 1321 %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a 1322 ret <2 x i64> %add.i.i 1323} 1324 1325define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) { 1326; CHECK-LABEL: test_vmull_s8: 1327; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1328entry: 1329 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) 1330 ret <8 x i16> %vmull.i 1331} 1332 1333define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) { 1334; CHECK-LABEL: test_vmull_s16: 1335; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1336entry: 1337 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b) 1338 ret <4 x i32> %vmull2.i 1339} 1340 1341define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) { 1342; CHECK-LABEL: test_vmull_s32: 1343; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1344entry: 1345 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b) 1346 ret <2 x i64> %vmull2.i 1347} 1348 1349define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) { 1350; CHECK-LABEL: test_vmull_u8: 1351; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1352entry: 1353 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) 1354 ret <8 x i16> %vmull.i 1355} 1356 1357define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) { 1358; CHECK-LABEL: test_vmull_u16: 1359; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1360entry: 1361 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b) 1362 ret <4 x i32> %vmull2.i 1363} 1364 1365define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) { 1366; CHECK-LABEL: test_vmull_u32: 1367; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1368entry: 1369 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b) 1370 ret <2 x i64> %vmull2.i 1371} 1372 1373define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) { 1374; CHECK-LABEL: test_vmull_high_s8: 1375; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1376entry: 1377 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1378 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1379 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1380 ret <8 x i16> %vmull.i.i 1381} 1382 1383define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) { 1384; CHECK-LABEL: test_vmull_high_s16: 1385; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1386entry: 1387 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1388 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1389 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1390 ret <4 x i32> %vmull2.i.i 1391} 1392 1393define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) { 1394; CHECK-LABEL: test_vmull_high_s32: 1395; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1396entry: 1397 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1398 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1399 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1400 ret <2 x i64> %vmull2.i.i 1401} 1402 1403define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) { 1404; CHECK-LABEL: test_vmull_high_u8: 1405; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1406entry: 1407 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1408 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1409 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1410 ret <8 x i16> %vmull.i.i 1411} 1412 1413define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) { 1414; CHECK-LABEL: test_vmull_high_u16: 1415; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1416entry: 1417 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1418 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1419 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1420 ret <4 x i32> %vmull2.i.i 1421} 1422 1423define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) { 1424; CHECK-LABEL: test_vmull_high_u32: 1425; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1426entry: 1427 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1428 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1429 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1430 ret <2 x i64> %vmull2.i.i 1431} 1432 1433define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1434; CHECK-LABEL: test_vmlal_s8: 1435; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1436entry: 1437 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) 1438 %add.i = add <8 x i16> %vmull.i.i, %a 1439 ret <8 x i16> %add.i 1440} 1441 1442define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1443; CHECK-LABEL: test_vmlal_s16: 1444; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1445entry: 1446 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) 1447 %add.i = add <4 x i32> %vmull2.i.i, %a 1448 ret <4 x i32> %add.i 1449} 1450 1451define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1452; CHECK-LABEL: test_vmlal_s32: 1453; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1454entry: 1455 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) 1456 %add.i = add <2 x i64> %vmull2.i.i, %a 1457 ret <2 x i64> %add.i 1458} 1459 1460define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1461; CHECK-LABEL: test_vmlal_u8: 1462; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1463entry: 1464 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) 1465 %add.i = add <8 x i16> %vmull.i.i, %a 1466 ret <8 x i16> %add.i 1467} 1468 1469define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1470; CHECK-LABEL: test_vmlal_u16: 1471; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1472entry: 1473 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) 1474 %add.i = add <4 x i32> %vmull2.i.i, %a 1475 ret <4 x i32> %add.i 1476} 1477 1478define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1479; CHECK-LABEL: test_vmlal_u32: 1480; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1481entry: 1482 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) 1483 %add.i = add <2 x i64> %vmull2.i.i, %a 1484 ret <2 x i64> %add.i 1485} 1486 1487define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1488; CHECK-LABEL: test_vmlal_high_s8: 1489; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1490entry: 1491 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1492 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1493 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1494 %add.i.i = add <8 x i16> %vmull.i.i.i, %a 1495 ret <8 x i16> %add.i.i 1496} 1497 1498define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1499; CHECK-LABEL: test_vmlal_high_s16: 1500; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1501entry: 1502 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1503 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1504 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1505 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a 1506 ret <4 x i32> %add.i.i 1507} 1508 1509define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1510; CHECK-LABEL: test_vmlal_high_s32: 1511; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1512entry: 1513 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1514 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1515 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1516 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a 1517 ret <2 x i64> %add.i.i 1518} 1519 1520define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1521; CHECK-LABEL: test_vmlal_high_u8: 1522; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1523entry: 1524 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1525 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1526 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1527 %add.i.i = add <8 x i16> %vmull.i.i.i, %a 1528 ret <8 x i16> %add.i.i 1529} 1530 1531define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1532; CHECK-LABEL: test_vmlal_high_u16: 1533; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1534entry: 1535 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1536 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1537 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1538 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a 1539 ret <4 x i32> %add.i.i 1540} 1541 1542define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1543; CHECK-LABEL: test_vmlal_high_u32: 1544; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1545entry: 1546 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1547 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1548 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1549 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a 1550 ret <2 x i64> %add.i.i 1551} 1552 1553define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1554; CHECK-LABEL: test_vmlsl_s8: 1555; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1556entry: 1557 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) 1558 %sub.i = sub <8 x i16> %a, %vmull.i.i 1559 ret <8 x i16> %sub.i 1560} 1561 1562define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1563; CHECK-LABEL: test_vmlsl_s16: 1564; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1565entry: 1566 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) 1567 %sub.i = sub <4 x i32> %a, %vmull2.i.i 1568 ret <4 x i32> %sub.i 1569} 1570 1571define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1572; CHECK-LABEL: test_vmlsl_s32: 1573; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1574entry: 1575 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) 1576 %sub.i = sub <2 x i64> %a, %vmull2.i.i 1577 ret <2 x i64> %sub.i 1578} 1579 1580define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1581; CHECK-LABEL: test_vmlsl_u8: 1582; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1583entry: 1584 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) 1585 %sub.i = sub <8 x i16> %a, %vmull.i.i 1586 ret <8 x i16> %sub.i 1587} 1588 1589define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1590; CHECK-LABEL: test_vmlsl_u16: 1591; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1592entry: 1593 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) 1594 %sub.i = sub <4 x i32> %a, %vmull2.i.i 1595 ret <4 x i32> %sub.i 1596} 1597 1598define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1599; CHECK-LABEL: test_vmlsl_u32: 1600; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1601entry: 1602 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) 1603 %sub.i = sub <2 x i64> %a, %vmull2.i.i 1604 ret <2 x i64> %sub.i 1605} 1606 1607define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1608; CHECK-LABEL: test_vmlsl_high_s8: 1609; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1610entry: 1611 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1612 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1613 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1614 %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i 1615 ret <8 x i16> %sub.i.i 1616} 1617 1618define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1619; CHECK-LABEL: test_vmlsl_high_s16: 1620; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1621entry: 1622 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1623 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1624 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1625 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i 1626 ret <4 x i32> %sub.i.i 1627} 1628 1629define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1630; CHECK-LABEL: test_vmlsl_high_s32: 1631; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1632entry: 1633 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1634 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1635 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1636 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i 1637 ret <2 x i64> %sub.i.i 1638} 1639 1640define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1641; CHECK-LABEL: test_vmlsl_high_u8: 1642; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1643entry: 1644 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1645 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1646 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1647 %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i 1648 ret <8 x i16> %sub.i.i 1649} 1650 1651define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1652; CHECK-LABEL: test_vmlsl_high_u16: 1653; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1654entry: 1655 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1656 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1657 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1658 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i 1659 ret <4 x i32> %sub.i.i 1660} 1661 1662define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1663; CHECK-LABEL: test_vmlsl_high_u32: 1664; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1665entry: 1666 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1667 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1668 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1669 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i 1670 ret <2 x i64> %sub.i.i 1671} 1672 1673define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) { 1674; CHECK-LABEL: test_vqdmull_s16: 1675; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1676entry: 1677 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) 1678 ret <4 x i32> %vqdmull2.i 1679} 1680 1681define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) { 1682; CHECK-LABEL: test_vqdmull_s32: 1683; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1684entry: 1685 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) 1686 ret <2 x i64> %vqdmull2.i 1687} 1688 1689define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1690; CHECK-LABEL: test_vqdmlal_s16: 1691; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1692entry: 1693 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) 1694 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1695 ret <4 x i32> %vqdmlal4.i 1696} 1697 1698define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1699; CHECK-LABEL: test_vqdmlal_s32: 1700; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1701entry: 1702 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) 1703 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1704 ret <2 x i64> %vqdmlal4.i 1705} 1706 1707define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1708; CHECK-LABEL: test_vqdmlsl_s16: 1709; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1710entry: 1711 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) 1712 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1713 ret <4 x i32> %vqdmlsl4.i 1714} 1715 1716define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1717; CHECK-LABEL: test_vqdmlsl_s32: 1718; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1719entry: 1720 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) 1721 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1722 ret <2 x i64> %vqdmlsl4.i 1723} 1724 1725define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) { 1726; CHECK-LABEL: test_vqdmull_high_s16: 1727; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1728entry: 1729 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1730 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1731 %vqdmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1732 ret <4 x i32> %vqdmull2.i.i 1733} 1734 1735define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) { 1736; CHECK-LABEL: test_vqdmull_high_s32: 1737; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1738entry: 1739 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1740 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1741 %vqdmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1742 ret <2 x i64> %vqdmull2.i.i 1743} 1744 1745define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1746; CHECK-LABEL: test_vqdmlal_high_s16: 1747; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1748entry: 1749 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1750 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1751 %vqdmlal2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1752 %vqdmlal4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i) 1753 ret <4 x i32> %vqdmlal4.i.i 1754} 1755 1756define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1757; CHECK-LABEL: test_vqdmlal_high_s32: 1758; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1759entry: 1760 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1761 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1762 %vqdmlal2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1763 %vqdmlal4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i) 1764 ret <2 x i64> %vqdmlal4.i.i 1765} 1766 1767define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1768; CHECK-LABEL: test_vqdmlsl_high_s16: 1769; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1770entry: 1771 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1772 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1773 %vqdmlsl2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1774 %vqdmlsl4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i) 1775 ret <4 x i32> %vqdmlsl4.i.i 1776} 1777 1778define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1779; CHECK-LABEL: test_vqdmlsl_high_s32: 1780; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1781entry: 1782 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1783 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1784 %vqdmlsl2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1785 %vqdmlsl4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i) 1786 ret <2 x i64> %vqdmlsl4.i.i 1787} 1788 1789define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) { 1790; CHECK-LABEL: test_vmull_p8: 1791; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1792entry: 1793 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) 1794 ret <8 x i16> %vmull.i 1795} 1796 1797define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) { 1798; CHECK-LABEL: test_vmull_high_p8: 1799; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1800entry: 1801 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1802 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1803 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1804 ret <8 x i16> %vmull.i.i 1805} 1806 1807define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { 1808; CHECK-LABEL: test_vmull_p64 1809; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d 1810entry: 1811 %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) 1812 %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 1813 ret i128 %vmull3.i 1814} 1815 1816define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { 1817; CHECK-LABEL: test_vmull_high_p64 1818; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1819entry: 1820 %0 = extractelement <2 x i64> %a, i32 1 1821 %1 = extractelement <2 x i64> %b, i32 1 1822 %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %0, i64 %1) #1 1823 %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128 1824 ret i128 %vmull3.i.i 1825} 1826 1827declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5 1828 1829 1830