1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc zeroext i8 @uminv16i8(<16 x i8> %vec, i8 zeroext %min) { 5; CHECK-LABEL: uminv16i8: 6; CHECK: @ %bb.0: 7; CHECK-NEXT: vminv.u8 r0, q0 8; CHECK-NEXT: uxtb r0, r0 9; CHECK-NEXT: bx lr 10 %x = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %vec) 11 %cmp = icmp ult i8 %x, %min 12 %1 = select i1 %cmp, i8 %x, i8 %min 13 ret i8 %1 14} 15 16define arm_aapcs_vfpcc zeroext i16 @uminv8i16(<8 x i16> %vec, i16 zeroext %min) { 17; CHECK-LABEL: uminv8i16: 18; CHECK: @ %bb.0: 19; CHECK-NEXT: vminv.u16 r0, q0 20; CHECK-NEXT: uxth r0, r0 21; CHECK-NEXT: bx lr 22 %x = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %vec) 23 %cmp = icmp ult i16 %x, %min 24 %1 = select i1 %cmp, i16 %x, i16 %min 25 ret i16 %1 26} 27 28define arm_aapcs_vfpcc i32 @uminv4i32(<4 x i32> %vec, i32 %min) { 29; CHECK-LABEL: uminv4i32: 30; CHECK: @ %bb.0: 31; CHECK-NEXT: vminv.u32 r0, q0 32; CHECK-NEXT: bx lr 33 %x = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vec) 34 %cmp = icmp ult i32 %x, %min 35 %1 = select i1 %cmp, i32 %x, i32 %min 36 ret i32 %1 37} 38 39define arm_aapcs_vfpcc signext i8 @sminv16i8(<16 x i8> %vec, i8 signext %min) { 40; CHECK-LABEL: sminv16i8: 41; CHECK: @ %bb.0: 42; CHECK-NEXT: vminv.s8 r0, q0 43; CHECK-NEXT: sxtb r0, r0 44; CHECK-NEXT: bx lr 45 %x = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %vec) 46 %cmp = icmp slt i8 %x, %min 47 %1 = select i1 %cmp, i8 %x, i8 %min 48 ret i8 %1 49} 50 51define arm_aapcs_vfpcc signext i16 @sminv8i16(<8 x i16> %vec, i16 signext %min) { 52; CHECK-LABEL: sminv8i16: 53; CHECK: @ %bb.0: 54; CHECK-NEXT: vminv.s16 r0, q0 55; CHECK-NEXT: sxth r0, r0 56; CHECK-NEXT: bx lr 57 %x = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %vec) 58 %cmp = icmp slt i16 %x, %min 59 %1 = select i1 %cmp, i16 %x, i16 %min 60 ret i16 %1 61} 62 63define arm_aapcs_vfpcc i32 @sminv4i32(<4 x i32> %vec, i32 %min) { 64; CHECK-LABEL: sminv4i32: 65; CHECK: @ %bb.0: 66; CHECK-NEXT: vminv.s32 r0, q0 67; CHECK-NEXT: bx lr 68 %x = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec) 69 %cmp = icmp slt i32 %x, %min 70 %1 = select i1 %cmp, i32 %x, i32 %min 71 ret i32 %1 72} 73 74define arm_aapcs_vfpcc zeroext i8 @umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { 75; CHECK-LABEL: umaxv16i8: 76; CHECK: @ %bb.0: 77; CHECK-NEXT: vmaxv.u8 r0, q0 78; CHECK-NEXT: uxtb r0, r0 79; CHECK-NEXT: bx lr 80 %x = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %vec) 81 %cmp = icmp ugt i8 %x, %max 82 %1 = select i1 %cmp, i8 %x, i8 %max 83 ret i8 %1 84} 85 86define arm_aapcs_vfpcc zeroext i16 @umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { 87; CHECK-LABEL: umaxv8i16: 88; CHECK: @ %bb.0: 89; CHECK-NEXT: vmaxv.u16 r0, q0 90; CHECK-NEXT: uxth r0, r0 91; CHECK-NEXT: bx lr 92 %x = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %vec) 93 %cmp = icmp ugt i16 %x, %max 94 %1 = select i1 %cmp, i16 %x, i16 %max 95 ret i16 %1 96} 97 98define arm_aapcs_vfpcc i32 @umaxv4i32(<4 x i32> %vec, i32 %max) { 99; CHECK-LABEL: umaxv4i32: 100; CHECK: @ %bb.0: 101; CHECK-NEXT: vmaxv.u32 r0, q0 102; CHECK-NEXT: bx lr 103 %x = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vec) 104 %cmp = icmp ugt i32 %x, %max 105 %1 = select i1 %cmp, i32 %x, i32 %max 106 ret i32 %1 107} 108 109define arm_aapcs_vfpcc signext i8 @smaxv16i8(<16 x i8> %vec, i8 signext %max) { 110; CHECK-LABEL: smaxv16i8: 111; CHECK: @ %bb.0: 112; CHECK-NEXT: vmaxv.s8 r0, q0 113; CHECK-NEXT: sxtb r0, r0 114; CHECK-NEXT: bx lr 115 %x = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %vec) 116 %cmp = icmp sgt i8 %x, %max 117 %1 = select i1 %cmp, i8 %x, i8 %max 118 ret i8 %1 119} 120 121define arm_aapcs_vfpcc signext i16 @smaxv8i16(<8 x i16> %vec, i16 signext %max) { 122; CHECK-LABEL: smaxv8i16: 123; CHECK: @ %bb.0: 124; CHECK-NEXT: vmaxv.s16 r0, q0 125; CHECK-NEXT: sxth r0, r0 126; CHECK-NEXT: bx lr 127 %x = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %vec) 128 %cmp = icmp sgt i16 %x, %max 129 %1 = select i1 %cmp, i16 %x, i16 %max 130 ret i16 %1 131} 132 133define arm_aapcs_vfpcc i32 @smaxv4i32(<4 x i32> %vec, i32 %max) { 134; CHECK-LABEL: smaxv4i32: 135; CHECK: @ %bb.0: 136; CHECK-NEXT: vmaxv.s32 r0, q0 137; CHECK-NEXT: bx lr 138 %x = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec) 139 %cmp = icmp sgt i32 %x, %max 140 %1 = select i1 %cmp, i32 %x, i32 %max 141 ret i32 %1 142} 143 144define arm_aapcs_vfpcc zeroext i8 @commute_uminv16i8(<16 x i8> %vec, i8 zeroext %min) { 145; CHECK-LABEL: commute_uminv16i8: 146; CHECK: @ %bb.0: 147; CHECK-NEXT: vminv.u8 r0, q0 148; CHECK-NEXT: uxtb r0, r0 149; CHECK-NEXT: bx lr 150 %x = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %vec) 151 %cmp = icmp ult i8 %min, %x 152 %1 = select i1 %cmp, i8 %min, i8 %x 153 ret i8 %1 154} 155 156define arm_aapcs_vfpcc zeroext i16 @commute_uminv8i16(<8 x i16> %vec, i16 zeroext %min) { 157; CHECK-LABEL: commute_uminv8i16: 158; CHECK: @ %bb.0: 159; CHECK-NEXT: vminv.u16 r0, q0 160; CHECK-NEXT: uxth r0, r0 161; CHECK-NEXT: bx lr 162 %x = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %vec) 163 %cmp = icmp ult i16 %min, %x 164 %1 = select i1 %cmp, i16 %min, i16 %x 165 ret i16 %1 166} 167 168define arm_aapcs_vfpcc i32 @commute_uminv4i32(<4 x i32> %vec, i32 %min) { 169; CHECK-LABEL: commute_uminv4i32: 170; CHECK: @ %bb.0: 171; CHECK-NEXT: vminv.u32 r0, q0 172; CHECK-NEXT: bx lr 173 %x = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vec) 174 %cmp = icmp ult i32 %min, %x 175 %1 = select i1 %cmp, i32 %min, i32 %x 176 ret i32 %1 177} 178 179define arm_aapcs_vfpcc signext i8 @commute_sminv16i8(<16 x i8> %vec, i8 signext %min) { 180; CHECK-LABEL: commute_sminv16i8: 181; CHECK: @ %bb.0: 182; CHECK-NEXT: vminv.s8 r0, q0 183; CHECK-NEXT: sxtb r0, r0 184; CHECK-NEXT: bx lr 185 %x = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %vec) 186 %cmp = icmp slt i8 %min, %x 187 %1 = select i1 %cmp, i8 %min, i8 %x 188 ret i8 %1 189} 190 191define arm_aapcs_vfpcc signext i16 @commute_sminv8i16(<8 x i16> %vec, i16 signext %min) { 192; CHECK-LABEL: commute_sminv8i16: 193; CHECK: @ %bb.0: 194; CHECK-NEXT: vminv.s16 r0, q0 195; CHECK-NEXT: sxth r0, r0 196; CHECK-NEXT: bx lr 197 %x = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %vec) 198 %cmp = icmp slt i16 %min, %x 199 %1 = select i1 %cmp, i16 %min, i16 %x 200 ret i16 %1 201} 202 203define arm_aapcs_vfpcc i32 @commute_sminv4i32(<4 x i32> %vec, i32 %min) { 204; CHECK-LABEL: commute_sminv4i32: 205; CHECK: @ %bb.0: 206; CHECK-NEXT: vminv.s32 r0, q0 207; CHECK-NEXT: bx lr 208 %x = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec) 209 %cmp = icmp slt i32 %min, %x 210 %1 = select i1 %cmp, i32 %min, i32 %x 211 ret i32 %1 212} 213 214define arm_aapcs_vfpcc zeroext i8 @commute_umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { 215; CHECK-LABEL: commute_umaxv16i8: 216; CHECK: @ %bb.0: 217; CHECK-NEXT: vmaxv.u8 r0, q0 218; CHECK-NEXT: uxtb r0, r0 219; CHECK-NEXT: bx lr 220 %x = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %vec) 221 %cmp = icmp ugt i8 %max, %x 222 %1 = select i1 %cmp, i8 %max, i8 %x 223 ret i8 %1 224} 225 226define arm_aapcs_vfpcc zeroext i16 @commute_umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { 227; CHECK-LABEL: commute_umaxv8i16: 228; CHECK: @ %bb.0: 229; CHECK-NEXT: vmaxv.u16 r0, q0 230; CHECK-NEXT: uxth r0, r0 231; CHECK-NEXT: bx lr 232 %x = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %vec) 233 %cmp = icmp ugt i16 %max, %x 234 %1 = select i1 %cmp, i16 %max, i16 %x 235 ret i16 %1 236} 237 238define arm_aapcs_vfpcc i32 @commute_umaxv4i32(<4 x i32> %vec, i32 %max) { 239; CHECK-LABEL: commute_umaxv4i32: 240; CHECK: @ %bb.0: 241; CHECK-NEXT: vmaxv.u32 r0, q0 242; CHECK-NEXT: bx lr 243 %x = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vec) 244 %cmp = icmp ugt i32 %max, %x 245 %1 = select i1 %cmp, i32 %max, i32 %x 246 ret i32 %1 247} 248 249define arm_aapcs_vfpcc signext i8 @commute_smaxv16i8(<16 x i8> %vec, i8 signext %max) { 250; CHECK-LABEL: commute_smaxv16i8: 251; CHECK: @ %bb.0: 252; CHECK-NEXT: vmaxv.s8 r0, q0 253; CHECK-NEXT: sxtb r0, r0 254; CHECK-NEXT: bx lr 255 %x = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %vec) 256 %cmp = icmp sgt i8 %max, %x 257 %1 = select i1 %cmp, i8 %max, i8 %x 258 ret i8 %1 259} 260 261define arm_aapcs_vfpcc signext i16 @commute_smaxv8i16(<8 x i16> %vec, i16 signext %max) { 262; CHECK-LABEL: commute_smaxv8i16: 263; CHECK: @ %bb.0: 264; CHECK-NEXT: vmaxv.s16 r0, q0 265; CHECK-NEXT: sxth r0, r0 266; CHECK-NEXT: bx lr 267 %x = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %vec) 268 %cmp = icmp sgt i16 %max, %x 269 %1 = select i1 %cmp, i16 %max, i16 %x 270 ret i16 %1 271} 272 273define arm_aapcs_vfpcc i32 @commute_smaxv4i32(<4 x i32> %vec, i32 %max) { 274; CHECK-LABEL: commute_smaxv4i32: 275; CHECK: @ %bb.0: 276; CHECK-NEXT: vmaxv.s32 r0, q0 277; CHECK-NEXT: bx lr 278 %x = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec) 279 %cmp = icmp sgt i32 %max, %x 280 %1 = select i1 %cmp, i32 %max, i32 %x 281 ret i32 %1 282} 283 284define arm_aapcs_vfpcc signext i8 @mismatch_smaxv16i8(<16 x i8> %vec, i8 signext %max) { 285; CHECK-LABEL: mismatch_smaxv16i8: 286; CHECK: @ %bb.0: 287; CHECK-NEXT: mvn r1, #127 288; CHECK-NEXT: vmaxv.s8 r1, q0 289; CHECK-NEXT: sxtb r2, r1 290; CHECK-NEXT: cmp r2, r0 291; CHECK-NEXT: csel r0, r0, r1, gt 292; CHECK-NEXT: sxtb r0, r0 293; CHECK-NEXT: bx lr 294 %x = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %vec) 295 %cmp = icmp sgt i8 %x, %max 296 %1 = select i1 %cmp, i8 %max, i8 %x 297 ret i8 %1 298} 299 300define arm_aapcs_vfpcc signext i8 @mismatch2_smaxv16i8(<16 x i8> %vec, i8 signext %max) { 301; CHECK-LABEL: mismatch2_smaxv16i8: 302; CHECK: @ %bb.0: 303; CHECK-NEXT: mvn r1, #127 304; CHECK-NEXT: vmaxv.s8 r1, q0 305; CHECK-NEXT: sxtb r2, r1 306; CHECK-NEXT: cmp r0, r2 307; CHECK-NEXT: csel r0, r1, r0, gt 308; CHECK-NEXT: sxtb r0, r0 309; CHECK-NEXT: bx lr 310 %x = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %vec) 311 %cmp = icmp sgt i8 %max, %x 312 %1 = select i1 %cmp, i8 %x, i8 %max 313 ret i8 %1 314} 315 316define arm_aapcs_vfpcc zeroext i8 @inverted_uminv16i8(<16 x i8> %vec, i8 zeroext %min) { 317; CHECK-LABEL: inverted_uminv16i8: 318; CHECK: @ %bb.0: 319; CHECK-NEXT: vminv.u8 r0, q0 320; CHECK-NEXT: uxtb r0, r0 321; CHECK-NEXT: bx lr 322 %x = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %vec) 323 %cmp = icmp ugt i8 %x, %min 324 %1 = select i1 %cmp, i8 %min, i8 %x 325 ret i8 %1 326} 327 328define arm_aapcs_vfpcc zeroext i16 @inverted_uminv8i16(<8 x i16> %vec, i16 zeroext %min) { 329; CHECK-LABEL: inverted_uminv8i16: 330; CHECK: @ %bb.0: 331; CHECK-NEXT: vminv.u16 r0, q0 332; CHECK-NEXT: uxth r0, r0 333; CHECK-NEXT: bx lr 334 %x = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %vec) 335 %cmp = icmp ugt i16 %x, %min 336 %1 = select i1 %cmp, i16 %min, i16 %x 337 ret i16 %1 338} 339 340define arm_aapcs_vfpcc i32 @inverted_uminv4i32(<4 x i32> %vec, i32 %min) { 341; CHECK-LABEL: inverted_uminv4i32: 342; CHECK: @ %bb.0: 343; CHECK-NEXT: vminv.u32 r0, q0 344; CHECK-NEXT: bx lr 345 %x = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vec) 346 %cmp = icmp ugt i32 %x, %min 347 %1 = select i1 %cmp, i32 %min, i32 %x 348 ret i32 %1 349} 350 351define arm_aapcs_vfpcc signext i8 @inverted_sminv16i8(<16 x i8> %vec, i8 signext %min) { 352; CHECK-LABEL: inverted_sminv16i8: 353; CHECK: @ %bb.0: 354; CHECK-NEXT: vminv.s8 r0, q0 355; CHECK-NEXT: sxtb r0, r0 356; CHECK-NEXT: bx lr 357 %x = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %vec) 358 %cmp = icmp sgt i8 %x, %min 359 %1 = select i1 %cmp, i8 %min, i8 %x 360 ret i8 %1 361} 362 363define arm_aapcs_vfpcc signext i16 @inverted_sminv8i16(<8 x i16> %vec, i16 signext %min) { 364; CHECK-LABEL: inverted_sminv8i16: 365; CHECK: @ %bb.0: 366; CHECK-NEXT: vminv.s16 r0, q0 367; CHECK-NEXT: sxth r0, r0 368; CHECK-NEXT: bx lr 369 %x = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %vec) 370 %cmp = icmp sgt i16 %x, %min 371 %1 = select i1 %cmp, i16 %min, i16 %x 372 ret i16 %1 373} 374 375define arm_aapcs_vfpcc i32 @inverted_sminv4i32(<4 x i32> %vec, i32 %min) { 376; CHECK-LABEL: inverted_sminv4i32: 377; CHECK: @ %bb.0: 378; CHECK-NEXT: vminv.s32 r0, q0 379; CHECK-NEXT: bx lr 380 %x = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec) 381 %cmp = icmp sgt i32 %x, %min 382 %1 = select i1 %cmp, i32 %min, i32 %x 383 ret i32 %1 384} 385 386define arm_aapcs_vfpcc zeroext i8 @inverted_umaxv16i8(<16 x i8> %vec, i8 zeroext %max) { 387; CHECK-LABEL: inverted_umaxv16i8: 388; CHECK: @ %bb.0: 389; CHECK-NEXT: vmaxv.u8 r0, q0 390; CHECK-NEXT: uxtb r0, r0 391; CHECK-NEXT: bx lr 392 %x = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %vec) 393 %cmp = icmp ult i8 %x, %max 394 %1 = select i1 %cmp, i8 %max, i8 %x 395 ret i8 %1 396} 397 398define arm_aapcs_vfpcc zeroext i16 @inverted_umaxv8i16(<8 x i16> %vec, i16 zeroext %max) { 399; CHECK-LABEL: inverted_umaxv8i16: 400; CHECK: @ %bb.0: 401; CHECK-NEXT: vmaxv.u16 r0, q0 402; CHECK-NEXT: uxth r0, r0 403; CHECK-NEXT: bx lr 404 %x = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %vec) 405 %cmp = icmp ult i16 %x, %max 406 %1 = select i1 %cmp, i16 %max, i16 %x 407 ret i16 %1 408} 409 410define arm_aapcs_vfpcc i32 @inverted_umaxv4i32(<4 x i32> %vec, i32 %max) { 411; CHECK-LABEL: inverted_umaxv4i32: 412; CHECK: @ %bb.0: 413; CHECK-NEXT: vmaxv.u32 r0, q0 414; CHECK-NEXT: bx lr 415 %x = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vec) 416 %cmp = icmp ult i32 %x, %max 417 %1 = select i1 %cmp, i32 %max, i32 %x 418 ret i32 %1 419} 420 421define arm_aapcs_vfpcc signext i8 @inverted_smaxv16i8(<16 x i8> %vec, i8 signext %max) { 422; CHECK-LABEL: inverted_smaxv16i8: 423; CHECK: @ %bb.0: 424; CHECK-NEXT: vmaxv.s8 r0, q0 425; CHECK-NEXT: sxtb r0, r0 426; CHECK-NEXT: bx lr 427 %x = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %vec) 428 %cmp = icmp slt i8 %x, %max 429 %1 = select i1 %cmp, i8 %max, i8 %x 430 ret i8 %1 431} 432 433define arm_aapcs_vfpcc signext i16 @inverted_smaxv8i16(<8 x i16> %vec, i16 signext %max) { 434; CHECK-LABEL: inverted_smaxv8i16: 435; CHECK: @ %bb.0: 436; CHECK-NEXT: vmaxv.s16 r0, q0 437; CHECK-NEXT: sxth r0, r0 438; CHECK-NEXT: bx lr 439 %x = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %vec) 440 %cmp = icmp slt i16 %x, %max 441 %1 = select i1 %cmp, i16 %max, i16 %x 442 ret i16 %1 443} 444 445define arm_aapcs_vfpcc i32 @inverted_smaxv4i32(<4 x i32> %vec, i32 %max) { 446; CHECK-LABEL: inverted_smaxv4i32: 447; CHECK: @ %bb.0: 448; CHECK-NEXT: vmaxv.s32 r0, q0 449; CHECK-NEXT: bx lr 450 %x = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec) 451 %cmp = icmp slt i32 %x, %max 452 %1 = select i1 %cmp, i32 %max, i32 %x 453 ret i32 %1 454} 455 456define arm_aapcs_vfpcc signext i16 @trunc_and_sext(<8 x i16> %vec, i32 %max) #1 { 457; CHECK-LABEL: trunc_and_sext: 458; CHECK: @ %bb.0: 459; CHECK-NEXT: movw r1, #32768 460; CHECK-NEXT: movt r1, #65535 461; CHECK-NEXT: vmaxv.s16 r1, q0 462; CHECK-NEXT: sxth r2, r1 463; CHECK-NEXT: cmp r0, r2 464; CHECK-NEXT: csel r0, r0, r1, gt 465; CHECK-NEXT: sxth r0, r0 466; CHECK-NEXT: bx lr 467 %x = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %vec) 468 %xs = sext i16 %x to i32 469 %cmp = icmp sgt i32 %max, %xs 470 %mt = trunc i32 %max to i16 471 %1 = select i1 %cmp, i16 %mt, i16 %x 472 ret i16 %1 473} 474 475define arm_aapcs_vfpcc signext i16 @trunc_and_zext(<8 x i16> %vec, i32 %max) #1 { 476; CHECK-LABEL: trunc_and_zext: 477; CHECK: @ %bb.0: 478; CHECK-NEXT: movs r1, #0 479; CHECK-NEXT: vmaxv.u16 r1, q0 480; CHECK-NEXT: uxth r2, r1 481; CHECK-NEXT: cmp r0, r2 482; CHECK-NEXT: csel r0, r0, r1, gt 483; CHECK-NEXT: sxth r0, r0 484; CHECK-NEXT: bx lr 485 %x = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %vec) 486 %xs = zext i16 %x to i32 487 %cmp = icmp sgt i32 %max, %xs 488 %mt = trunc i32 %max to i16 489 %1 = select i1 %cmp, i16 %mt, i16 %x 490 ret i16 %1 491} 492 493define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) { 494; CHECK-LABEL: uminv2i64: 495; CHECK: @ %bb.0: 496; CHECK-NEXT: .save {r4, r5, r7, lr} 497; CHECK-NEXT: push {r4, r5, r7, lr} 498; CHECK-NEXT: vmov r12, s3 499; CHECK-NEXT: vmov lr, s1 500; CHECK-NEXT: vmov r2, s0 501; CHECK-NEXT: vmov r3, s2 502; CHECK-NEXT: cmp lr, r12 503; CHECK-NEXT: csel r4, r2, r3, lo 504; CHECK-NEXT: cmp r2, r3 505; CHECK-NEXT: csel r2, r2, r3, lo 506; CHECK-NEXT: cmp lr, r12 507; CHECK-NEXT: csel r5, r2, r4, eq 508; CHECK-NEXT: csel r3, lr, r12, lo 509; CHECK-NEXT: subs r2, r5, r0 510; CHECK-NEXT: mov.w r4, #0 511; CHECK-NEXT: sbcs.w r2, r3, r1 512; CHECK-NEXT: it lo 513; CHECK-NEXT: movlo r4, #1 514; CHECK-NEXT: cmp r4, #0 515; CHECK-NEXT: csel r0, r5, r0, ne 516; CHECK-NEXT: csel r1, r3, r1, ne 517; CHECK-NEXT: pop {r4, r5, r7, pc} 518 %x = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec) 519 %cmp = icmp ult i64 %x, %min 520 %1 = select i1 %cmp, i64 %x, i64 %min 521 ret i64 %1 522} 523 524define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) { 525; CHECK-LABEL: sminv2i64: 526; CHECK: @ %bb.0: 527; CHECK-NEXT: .save {r4, r5, r7, lr} 528; CHECK-NEXT: push {r4, r5, r7, lr} 529; CHECK-NEXT: vmov r12, s3 530; CHECK-NEXT: vmov lr, s1 531; CHECK-NEXT: vmov r2, s0 532; CHECK-NEXT: vmov r3, s2 533; CHECK-NEXT: cmp lr, r12 534; CHECK-NEXT: csel r4, r2, r3, lt 535; CHECK-NEXT: cmp r2, r3 536; CHECK-NEXT: csel r2, r2, r3, lo 537; CHECK-NEXT: cmp lr, r12 538; CHECK-NEXT: csel r5, r2, r4, eq 539; CHECK-NEXT: csel r3, lr, r12, lt 540; CHECK-NEXT: subs r2, r5, r0 541; CHECK-NEXT: mov.w r4, #0 542; CHECK-NEXT: sbcs.w r2, r3, r1 543; CHECK-NEXT: it lt 544; CHECK-NEXT: movlt r4, #1 545; CHECK-NEXT: cmp r4, #0 546; CHECK-NEXT: csel r0, r5, r0, ne 547; CHECK-NEXT: csel r1, r3, r1, ne 548; CHECK-NEXT: pop {r4, r5, r7, pc} 549 %x = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec) 550 %cmp = icmp slt i64 %x, %min 551 %1 = select i1 %cmp, i64 %x, i64 %min 552 ret i64 %1 553} 554 555define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) { 556; CHECK-LABEL: umaxv2i64: 557; CHECK: @ %bb.0: 558; CHECK-NEXT: .save {r4, r5, r7, lr} 559; CHECK-NEXT: push {r4, r5, r7, lr} 560; CHECK-NEXT: vmov r12, s3 561; CHECK-NEXT: vmov lr, s1 562; CHECK-NEXT: vmov r2, s0 563; CHECK-NEXT: vmov r3, s2 564; CHECK-NEXT: cmp lr, r12 565; CHECK-NEXT: csel r4, r2, r3, hi 566; CHECK-NEXT: cmp r2, r3 567; CHECK-NEXT: csel r2, r2, r3, hi 568; CHECK-NEXT: cmp lr, r12 569; CHECK-NEXT: csel r5, r2, r4, eq 570; CHECK-NEXT: csel r3, lr, r12, hi 571; CHECK-NEXT: subs r2, r0, r5 572; CHECK-NEXT: mov.w r4, #0 573; CHECK-NEXT: sbcs.w r2, r1, r3 574; CHECK-NEXT: it lo 575; CHECK-NEXT: movlo r4, #1 576; CHECK-NEXT: cmp r4, #0 577; CHECK-NEXT: csel r0, r5, r0, ne 578; CHECK-NEXT: csel r1, r3, r1, ne 579; CHECK-NEXT: pop {r4, r5, r7, pc} 580 %x = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec) 581 %cmp = icmp ugt i64 %x, %max 582 %1 = select i1 %cmp, i64 %x, i64 %max 583 ret i64 %1 584} 585 586define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) { 587; CHECK-LABEL: smaxv2i64: 588; CHECK: @ %bb.0: 589; CHECK-NEXT: .save {r4, r5, r7, lr} 590; CHECK-NEXT: push {r4, r5, r7, lr} 591; CHECK-NEXT: vmov r12, s3 592; CHECK-NEXT: vmov lr, s1 593; CHECK-NEXT: vmov r2, s0 594; CHECK-NEXT: vmov r3, s2 595; CHECK-NEXT: cmp lr, r12 596; CHECK-NEXT: csel r4, r2, r3, gt 597; CHECK-NEXT: cmp r2, r3 598; CHECK-NEXT: csel r2, r2, r3, hi 599; CHECK-NEXT: cmp lr, r12 600; CHECK-NEXT: csel r5, r2, r4, eq 601; CHECK-NEXT: csel r3, lr, r12, gt 602; CHECK-NEXT: subs r2, r0, r5 603; CHECK-NEXT: mov.w r4, #0 604; CHECK-NEXT: sbcs.w r2, r1, r3 605; CHECK-NEXT: it lt 606; CHECK-NEXT: movlt r4, #1 607; CHECK-NEXT: cmp r4, #0 608; CHECK-NEXT: csel r0, r5, r0, ne 609; CHECK-NEXT: csel r1, r3, r1, ne 610; CHECK-NEXT: pop {r4, r5, r7, pc} 611 %x = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec) 612 %cmp = icmp sgt i64 %x, %max 613 %1 = select i1 %cmp, i64 %x, i64 %max 614 ret i64 %1 615} 616 617declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) 618 619declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) 620 621declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) 622 623declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) 624 625declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) 626 627declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) 628 629declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) 630 631declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) 632 633declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) 634 635declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) 636 637declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) 638 639declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) 640 641declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) 642 643declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) 644 645declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) 646 647declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) 648