1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s 2 3declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>) 4 5declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>) 6 7declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>) 8 9declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>) 10 11declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>) 12 13declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>) 14 15declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>) 16 17declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>) 18 19declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>) 20 21declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>) 22 23declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>) 24 25declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) 26 27declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>) 28 29declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>) 30 31declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>) 32 33declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>) 34 35declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) 36 37declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>) 38 39declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>) 40 41declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>) 42 43declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>) 44 45declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>) 46 47declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>) 48 49declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>) 50 51declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>) 52 53declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>) 54 55declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>) 56 57declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>) 58 59declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>) 60 61declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>) 62 63declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>) 64 65declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>) 66 67declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>) 68 69declare i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16>) 70 71declare i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8>) 72 73declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>) 74 75declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>) 76 77declare i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16>) 78 79declare i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8>) 80 81define i16 @test_vaddlv_s8(<8 x i8> %a) { 82; CHECK: test_vaddlv_s8: 83; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b 84entry: 85 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) 86 %0 = trunc i32 %saddlvv.i to i16 87 ret i16 %0 88} 89 90define i32 @test_vaddlv_s16(<4 x i16> %a) { 91; CHECK: test_vaddlv_s16: 92; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h 93entry: 94 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) 95 ret i32 %saddlvv.i 96} 97 98define i16 @test_vaddlv_u8(<8 x i8> %a) { 99; CHECK: test_vaddlv_u8: 100; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b 101entry: 102 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) 103 %0 = trunc i32 %uaddlvv.i to i16 104 ret i16 %0 105} 106 107define i32 @test_vaddlv_u16(<4 x i16> %a) { 108; CHECK: test_vaddlv_u16: 109; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h 110entry: 111 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) 112 ret i32 %uaddlvv.i 113} 114 115define i16 @test_vaddlvq_s8(<16 x i8> %a) { 116; CHECK: test_vaddlvq_s8: 117; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b 118entry: 119 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) 120 %0 = trunc i32 %saddlvv.i to i16 121 ret i16 %0 122} 123 124define i32 @test_vaddlvq_s16(<8 x i16> %a) { 125; CHECK: test_vaddlvq_s16: 126; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h 127entry: 128 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) 129 ret i32 %saddlvv.i 130} 131 132define i64 @test_vaddlvq_s32(<4 x i32> %a) { 133; CHECK: test_vaddlvq_s32: 134; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s 135entry: 136 %saddlvv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) 137 ret i64 %saddlvv.i 138} 139 140define i16 @test_vaddlvq_u8(<16 x i8> %a) { 141; CHECK: test_vaddlvq_u8: 142; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b 143entry: 144 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) 145 %0 = trunc i32 %uaddlvv.i to i16 146 ret i16 %0 147} 148 149define i32 @test_vaddlvq_u16(<8 x i16> %a) { 150; CHECK: test_vaddlvq_u16: 151; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h 152entry: 153 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) 154 ret i32 %uaddlvv.i 155} 156 157define i64 @test_vaddlvq_u32(<4 x i32> %a) { 158; CHECK: test_vaddlvq_u32: 159; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s 160entry: 161 %uaddlvv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) 162 ret i64 %uaddlvv.i 163} 164 165define i8 @test_vmaxv_s8(<8 x i8> %a) { 166; CHECK: test_vmaxv_s8: 167; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b 168entry: 169 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) 170 %0 = trunc i32 %smaxv.i to i8 171 ret i8 %0 172} 173 174define i16 @test_vmaxv_s16(<4 x i16> %a) { 175; CHECK: test_vmaxv_s16: 176; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h 177entry: 178 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) 179 %0 = trunc i32 %smaxv.i to i16 180 ret i16 %0 181} 182 183define i8 @test_vmaxv_u8(<8 x i8> %a) { 184; CHECK: test_vmaxv_u8: 185; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b 186entry: 187 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) 188 %0 = trunc i32 %umaxv.i to i8 189 ret i8 %0 190} 191 192define i16 @test_vmaxv_u16(<4 x i16> %a) { 193; CHECK: test_vmaxv_u16: 194; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h 195entry: 196 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) 197 %0 = trunc i32 %umaxv.i to i16 198 ret i16 %0 199} 200 201define i8 @test_vmaxvq_s8(<16 x i8> %a) { 202; CHECK: test_vmaxvq_s8: 203; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b 204entry: 205 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) 206 %0 = trunc i32 %smaxv.i to i8 207 ret i8 %0 208} 209 210define i16 @test_vmaxvq_s16(<8 x i16> %a) { 211; CHECK: test_vmaxvq_s16: 212; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h 213entry: 214 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) 215 %0 = trunc i32 %smaxv.i to i16 216 ret i16 %0 217} 218 219define i32 @test_vmaxvq_s32(<4 x i32> %a) { 220; CHECK: test_vmaxvq_s32: 221; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s 222entry: 223 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) 224 ret i32 %smaxv.i 225} 226 227define i8 @test_vmaxvq_u8(<16 x i8> %a) { 228; CHECK: test_vmaxvq_u8: 229; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b 230entry: 231 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) 232 %0 = trunc i32 %umaxv.i to i8 233 ret i8 %0 234} 235 236define i16 @test_vmaxvq_u16(<8 x i16> %a) { 237; CHECK: test_vmaxvq_u16: 238; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h 239entry: 240 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) 241 %0 = trunc i32 %umaxv.i to i16 242 ret i16 %0 243} 244 245define i32 @test_vmaxvq_u32(<4 x i32> %a) { 246; CHECK: test_vmaxvq_u32: 247; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s 248entry: 249 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) 250 ret i32 %umaxv.i 251} 252 253define i8 @test_vminv_s8(<8 x i8> %a) { 254; CHECK: test_vminv_s8: 255; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b 256entry: 257 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) 258 %0 = trunc i32 %sminv.i to i8 259 ret i8 %0 260} 261 262define i16 @test_vminv_s16(<4 x i16> %a) { 263; CHECK: test_vminv_s16: 264; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h 265entry: 266 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) 267 %0 = trunc i32 %sminv.i to i16 268 ret i16 %0 269} 270 271define i8 @test_vminv_u8(<8 x i8> %a) { 272; CHECK: test_vminv_u8: 273; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b 274entry: 275 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) 276 %0 = trunc i32 %uminv.i to i8 277 ret i8 %0 278} 279 280define i16 @test_vminv_u16(<4 x i16> %a) { 281; CHECK: test_vminv_u16: 282; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h 283entry: 284 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) 285 %0 = trunc i32 %uminv.i to i16 286 ret i16 %0 287} 288 289define i8 @test_vminvq_s8(<16 x i8> %a) { 290; CHECK: test_vminvq_s8: 291; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b 292entry: 293 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) 294 %0 = trunc i32 %sminv.i to i8 295 ret i8 %0 296} 297 298define i16 @test_vminvq_s16(<8 x i16> %a) { 299; CHECK: test_vminvq_s16: 300; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h 301entry: 302 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) 303 %0 = trunc i32 %sminv.i to i16 304 ret i16 %0 305} 306 307define i32 @test_vminvq_s32(<4 x i32> %a) { 308; CHECK: test_vminvq_s32: 309; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s 310entry: 311 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) 312 ret i32 %sminv.i 313} 314 315define i8 @test_vminvq_u8(<16 x i8> %a) { 316; CHECK: test_vminvq_u8: 317; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b 318entry: 319 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) 320 %0 = trunc i32 %uminv.i to i8 321 ret i8 %0 322} 323 324define i16 @test_vminvq_u16(<8 x i16> %a) { 325; CHECK: test_vminvq_u16: 326; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h 327entry: 328 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) 329 %0 = trunc i32 %uminv.i to i16 330 ret i16 %0 331} 332 333define i32 @test_vminvq_u32(<4 x i32> %a) { 334; CHECK: test_vminvq_u32: 335; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s 336entry: 337 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) 338 ret i32 %uminv.i 339} 340 341define i8 @test_vaddv_s8(<8 x i8> %a) { 342; CHECK: test_vaddv_s8: 343; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b 344entry: 345 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) 346 %0 = trunc i32 %vaddv.i to i8 347 ret i8 %0 348} 349 350define i16 @test_vaddv_s16(<4 x i16> %a) { 351; CHECK: test_vaddv_s16: 352; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h 353entry: 354 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) 355 %0 = trunc i32 %vaddv.i to i16 356 ret i16 %0 357} 358 359define i8 @test_vaddv_u8(<8 x i8> %a) { 360; CHECK: test_vaddv_u8: 361; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b 362entry: 363 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) 364 %0 = trunc i32 %vaddv.i to i8 365 ret i8 %0 366} 367 368define i16 @test_vaddv_u16(<4 x i16> %a) { 369; CHECK: test_vaddv_u16: 370; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h 371entry: 372 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) 373 %0 = trunc i32 %vaddv.i to i16 374 ret i16 %0 375} 376 377define i8 @test_vaddvq_s8(<16 x i8> %a) { 378; CHECK: test_vaddvq_s8: 379; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b 380entry: 381 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) 382 %0 = trunc i32 %vaddv.i to i8 383 ret i8 %0 384} 385 386define i16 @test_vaddvq_s16(<8 x i16> %a) { 387; CHECK: test_vaddvq_s16: 388; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h 389entry: 390 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) 391 %0 = trunc i32 %vaddv.i to i16 392 ret i16 %0 393} 394 395define i32 @test_vaddvq_s32(<4 x i32> %a) { 396; CHECK: test_vaddvq_s32: 397; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s 398entry: 399 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) 400 ret i32 %vaddv.i 401} 402 403define i8 @test_vaddvq_u8(<16 x i8> %a) { 404; CHECK: test_vaddvq_u8: 405; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b 406entry: 407 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) 408 %0 = trunc i32 %vaddv.i to i8 409 ret i8 %0 410} 411 412define i16 @test_vaddvq_u16(<8 x i16> %a) { 413; CHECK: test_vaddvq_u16: 414; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h 415entry: 416 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) 417 %0 = trunc i32 %vaddv.i to i16 418 ret i16 %0 419} 420 421define i32 @test_vaddvq_u32(<4 x i32> %a) { 422; CHECK: test_vaddvq_u32: 423; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s 424entry: 425 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) 426 ret i32 %vaddv.i 427} 428 429define float @test_vmaxvq_f32(<4 x float> %a) { 430; CHECK: test_vmaxvq_f32: 431; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s 432entry: 433 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) 434 ret float %0 435} 436 437define float @test_vminvq_f32(<4 x float> %a) { 438; CHECK: test_vminvq_f32: 439; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s 440entry: 441 %0 = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) 442 ret float %0 443} 444 445define float @test_vmaxnmvq_f32(<4 x float> %a) { 446; CHECK: test_vmaxnmvq_f32: 447; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s 448entry: 449 %0 = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) 450 ret float %0 451} 452 453define float @test_vminnmvq_f32(<4 x float> %a) { 454; CHECK: test_vminnmvq_f32: 455; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s 456entry: 457 %0 = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) 458 ret float %0 459} 460 461