1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple armv7-linux-gnueabihf -mattr=+neon | FileCheck %s 3 4; This test checks the @llvm.cttz.* intrinsics for vectors. 5 6declare <1 x i8> @llvm.cttz.v1i8(<1 x i8>, i1) 7declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) 8declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>, i1) 9declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1) 10declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) 11 12declare <1 x i16> @llvm.cttz.v1i16(<1 x i16>, i1) 13declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>, i1) 14declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1) 15declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) 16 17declare <1 x i32> @llvm.cttz.v1i32(<1 x i32>, i1) 18declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) 19declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) 20 21declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>, i1) 22declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) 23 24;------------------------------------------------------------------------------ 25 26define void @test_v1i8(<1 x i8>* %p) { 27; CHECK-LABEL: test_v1i8: 28; CHECK: @ %bb.0: 29; CHECK-NEXT: ldrb r1, [r0] 30; CHECK-NEXT: orr r1, r1, #256 31; CHECK-NEXT: rbit r1, r1 32; CHECK-NEXT: clz r1, r1 33; CHECK-NEXT: strb r1, [r0] 34; CHECK-NEXT: bx lr 35 %a = load <1 x i8>, <1 x i8>* %p 36 %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 false) 37 store <1 x i8> %tmp, <1 x i8>* %p 38 ret void 39} 40 41define void @test_v2i8(<2 x i8>* %p) { 42; CHECK-LABEL: test_v2i8: 43; CHECK: @ %bb.0: 44; CHECK-NEXT: vld1.16 {d16[0]}, [r0:16] 45; CHECK-NEXT: vmovl.u8 q8, d16 46; CHECK-NEXT: vmovl.u16 q8, d16 47; CHECK-NEXT: vorr.i32 d16, #0x100 48; CHECK-NEXT: vneg.s32 d18, d16 49; CHECK-NEXT: vand d16, d16, d18 50; CHECK-NEXT: vmov.i32 d17, #0x1f 51; CHECK-NEXT: vclz.i32 d16, d16 52; CHECK-NEXT: vsub.i32 d16, d17, d16 53; CHECK-NEXT: vmov.32 r1, d16[1] 54; CHECK-NEXT: vmov.32 r2, d16[0] 55; CHECK-NEXT: strb r1, [r0, #1] 56; CHECK-NEXT: strb r2, [r0] 57; CHECK-NEXT: bx lr 58 %a = load <2 x i8>, <2 x i8>* %p 59 %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 false) 60 store <2 x i8> %tmp, <2 x i8>* %p 61 ret void 62} 63 64define void @test_v4i8(<4 x i8>* %p) { 65; CHECK-LABEL: test_v4i8: 66; CHECK: @ %bb.0: 67; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] 68; CHECK-NEXT: vmov.i16 d19, #0x1 69; CHECK-NEXT: vmovl.u8 q8, d16 70; CHECK-NEXT: vorr.i16 d16, #0x100 71; CHECK-NEXT: vneg.s16 d18, d16 72; CHECK-NEXT: vand d16, d16, d18 73; CHECK-NEXT: vsub.i16 d16, d16, d19 74; CHECK-NEXT: vcnt.8 d16, d16 75; CHECK-NEXT: vpaddl.u8 d16, d16 76; CHECK-NEXT: vuzp.8 d16, d17 77; CHECK-NEXT: vst1.32 {d16[0]}, [r0:32] 78; CHECK-NEXT: bx lr 79 %a = load <4 x i8>, <4 x i8>* %p 80 %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 false) 81 store <4 x i8> %tmp, <4 x i8>* %p 82 ret void 83} 84 85define void @test_v8i8(<8 x i8>* %p) { 86; CHECK-LABEL: test_v8i8: 87; CHECK: @ %bb.0: 88; CHECK-NEXT: vldr d16, [r0] 89; CHECK-NEXT: vmov.i8 d18, #0x1 90; CHECK-NEXT: vneg.s8 d17, d16 91; CHECK-NEXT: vand d16, d16, d17 92; CHECK-NEXT: vsub.i8 d16, d16, d18 93; CHECK-NEXT: vcnt.8 d16, d16 94; CHECK-NEXT: vstr d16, [r0] 95; CHECK-NEXT: bx lr 96 %a = load <8 x i8>, <8 x i8>* %p 97 %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 false) 98 store <8 x i8> %tmp, <8 x i8>* %p 99 ret void 100} 101 102define void @test_v16i8(<16 x i8>* %p) { 103; CHECK-LABEL: test_v16i8: 104; CHECK: @ %bb.0: 105; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 106; CHECK-NEXT: vmov.i8 q10, #0x1 107; CHECK-NEXT: vneg.s8 q9, q8 108; CHECK-NEXT: vand q8, q8, q9 109; CHECK-NEXT: vsub.i8 q8, q8, q10 110; CHECK-NEXT: vcnt.8 q8, q8 111; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 112; CHECK-NEXT: bx lr 113 %a = load <16 x i8>, <16 x i8>* %p 114 %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) 115 store <16 x i8> %tmp, <16 x i8>* %p 116 ret void 117} 118 119define void @test_v1i16(<1 x i16>* %p) { 120; CHECK-LABEL: test_v1i16: 121; CHECK: @ %bb.0: 122; CHECK-NEXT: ldrh r1, [r0] 123; CHECK-NEXT: orr r1, r1, #65536 124; CHECK-NEXT: rbit r1, r1 125; CHECK-NEXT: clz r1, r1 126; CHECK-NEXT: strh r1, [r0] 127; CHECK-NEXT: bx lr 128 %a = load <1 x i16>, <1 x i16>* %p 129 %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 false) 130 store <1 x i16> %tmp, <1 x i16>* %p 131 ret void 132} 133 134define void @test_v2i16(<2 x i16>* %p) { 135; CHECK-LABEL: test_v2i16: 136; CHECK: @ %bb.0: 137; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] 138; CHECK-NEXT: vmovl.u16 q8, d16 139; CHECK-NEXT: vorr.i32 d16, #0x10000 140; CHECK-NEXT: vneg.s32 d18, d16 141; CHECK-NEXT: vand d16, d16, d18 142; CHECK-NEXT: vmov.i32 d17, #0x1f 143; CHECK-NEXT: vclz.i32 d16, d16 144; CHECK-NEXT: vsub.i32 d16, d17, d16 145; CHECK-NEXT: vuzp.16 d16, d17 146; CHECK-NEXT: vst1.32 {d16[0]}, [r0:32] 147; CHECK-NEXT: bx lr 148 %a = load <2 x i16>, <2 x i16>* %p 149 %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 false) 150 store <2 x i16> %tmp, <2 x i16>* %p 151 ret void 152} 153 154define void @test_v4i16(<4 x i16>* %p) { 155; CHECK-LABEL: test_v4i16: 156; CHECK: @ %bb.0: 157; CHECK-NEXT: vldr d16, [r0] 158; CHECK-NEXT: vmov.i16 d18, #0x1 159; CHECK-NEXT: vneg.s16 d17, d16 160; CHECK-NEXT: vand d16, d16, d17 161; CHECK-NEXT: vsub.i16 d16, d16, d18 162; CHECK-NEXT: vcnt.8 d16, d16 163; CHECK-NEXT: vpaddl.u8 d16, d16 164; CHECK-NEXT: vstr d16, [r0] 165; CHECK-NEXT: bx lr 166 %a = load <4 x i16>, <4 x i16>* %p 167 %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 false) 168 store <4 x i16> %tmp, <4 x i16>* %p 169 ret void 170} 171 172define void @test_v8i16(<8 x i16>* %p) { 173; CHECK-LABEL: test_v8i16: 174; CHECK: @ %bb.0: 175; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 176; CHECK-NEXT: vmov.i16 q10, #0x1 177; CHECK-NEXT: vneg.s16 q9, q8 178; CHECK-NEXT: vand q8, q8, q9 179; CHECK-NEXT: vsub.i16 q8, q8, q10 180; CHECK-NEXT: vcnt.8 q8, q8 181; CHECK-NEXT: vpaddl.u8 q8, q8 182; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 183; CHECK-NEXT: bx lr 184 %a = load <8 x i16>, <8 x i16>* %p 185 %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) 186 store <8 x i16> %tmp, <8 x i16>* %p 187 ret void 188} 189 190define void @test_v1i32(<1 x i32>* %p) { 191; CHECK-LABEL: test_v1i32: 192; CHECK: @ %bb.0: 193; CHECK-NEXT: ldr r1, [r0] 194; CHECK-NEXT: rbit r1, r1 195; CHECK-NEXT: clz r1, r1 196; CHECK-NEXT: str r1, [r0] 197; CHECK-NEXT: bx lr 198 %a = load <1 x i32>, <1 x i32>* %p 199 %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 false) 200 store <1 x i32> %tmp, <1 x i32>* %p 201 ret void 202} 203 204define void @test_v2i32(<2 x i32>* %p) { 205; CHECK-LABEL: test_v2i32: 206; CHECK: @ %bb.0: 207; CHECK-NEXT: vldr d16, [r0] 208; CHECK-NEXT: vmov.i32 d18, #0x1 209; CHECK-NEXT: vneg.s32 d17, d16 210; CHECK-NEXT: vand d16, d16, d17 211; CHECK-NEXT: vsub.i32 d16, d16, d18 212; CHECK-NEXT: vcnt.8 d16, d16 213; CHECK-NEXT: vpaddl.u8 d16, d16 214; CHECK-NEXT: vpaddl.u16 d16, d16 215; CHECK-NEXT: vstr d16, [r0] 216; CHECK-NEXT: bx lr 217 %a = load <2 x i32>, <2 x i32>* %p 218 %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) 219 store <2 x i32> %tmp, <2 x i32>* %p 220 ret void 221} 222 223define void @test_v4i32(<4 x i32>* %p) { 224; CHECK-LABEL: test_v4i32: 225; CHECK: @ %bb.0: 226; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 227; CHECK-NEXT: vmov.i32 q10, #0x1 228; CHECK-NEXT: vneg.s32 q9, q8 229; CHECK-NEXT: vand q8, q8, q9 230; CHECK-NEXT: vsub.i32 q8, q8, q10 231; CHECK-NEXT: vcnt.8 q8, q8 232; CHECK-NEXT: vpaddl.u8 q8, q8 233; CHECK-NEXT: vpaddl.u16 q8, q8 234; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 235; CHECK-NEXT: bx lr 236 %a = load <4 x i32>, <4 x i32>* %p 237 %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) 238 store <4 x i32> %tmp, <4 x i32>* %p 239 ret void 240} 241 242define void @test_v1i64(<1 x i64>* %p) { 243; CHECK-LABEL: test_v1i64: 244; CHECK: @ %bb.0: 245; CHECK-NEXT: vmov.i32 d16, #0x0 246; CHECK-NEXT: vldr d17, [r0] 247; CHECK-NEXT: vmov.i64 d18, #0xffffffffffffffff 248; CHECK-NEXT: vsub.i64 d16, d16, d17 249; CHECK-NEXT: vand d16, d17, d16 250; CHECK-NEXT: vadd.i64 d16, d16, d18 251; CHECK-NEXT: vcnt.8 d16, d16 252; CHECK-NEXT: vpaddl.u8 d16, d16 253; CHECK-NEXT: vpaddl.u16 d16, d16 254; CHECK-NEXT: vpaddl.u32 d16, d16 255; CHECK-NEXT: vstr d16, [r0] 256; CHECK-NEXT: bx lr 257 %a = load <1 x i64>, <1 x i64>* %p 258 %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 false) 259 store <1 x i64> %tmp, <1 x i64>* %p 260 ret void 261} 262 263define void @test_v2i64(<2 x i64>* %p) { 264; CHECK-LABEL: test_v2i64: 265; CHECK: @ %bb.0: 266; CHECK-NEXT: vmov.i32 q8, #0x0 267; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 268; CHECK-NEXT: vmov.i64 q10, #0xffffffffffffffff 269; CHECK-NEXT: vsub.i64 q8, q8, q9 270; CHECK-NEXT: vand q8, q9, q8 271; CHECK-NEXT: vadd.i64 q8, q8, q10 272; CHECK-NEXT: vcnt.8 q8, q8 273; CHECK-NEXT: vpaddl.u8 q8, q8 274; CHECK-NEXT: vpaddl.u16 q8, q8 275; CHECK-NEXT: vpaddl.u32 q8, q8 276; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 277; CHECK-NEXT: bx lr 278 %a = load <2 x i64>, <2 x i64>* %p 279 %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) 280 store <2 x i64> %tmp, <2 x i64>* %p 281 ret void 282} 283 284;------------------------------------------------------------------------------ 285 286define void @test_v1i8_zero_undef(<1 x i8>* %p) { 287; CHECK-LABEL: test_v1i8_zero_undef: 288; CHECK: @ %bb.0: 289; CHECK-NEXT: ldrb r1, [r0] 290; CHECK-NEXT: rbit r1, r1 291; CHECK-NEXT: clz r1, r1 292; CHECK-NEXT: strb r1, [r0] 293; CHECK-NEXT: bx lr 294 %a = load <1 x i8>, <1 x i8>* %p 295 %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 true) 296 store <1 x i8> %tmp, <1 x i8>* %p 297 ret void 298} 299 300define void @test_v2i8_zero_undef(<2 x i8>* %p) { 301; CHECK-LABEL: test_v2i8_zero_undef: 302; CHECK: @ %bb.0: 303; CHECK-NEXT: vld1.16 {d16[0]}, [r0:16] 304; CHECK-NEXT: vmovl.u8 q8, d16 305; CHECK-NEXT: vmovl.u16 q8, d16 306; CHECK-NEXT: vneg.s32 d18, d16 307; CHECK-NEXT: vand d16, d16, d18 308; CHECK-NEXT: vmov.i32 d17, #0x1f 309; CHECK-NEXT: vclz.i32 d16, d16 310; CHECK-NEXT: vsub.i32 d16, d17, d16 311; CHECK-NEXT: vmov.32 r1, d16[1] 312; CHECK-NEXT: vmov.32 r2, d16[0] 313; CHECK-NEXT: strb r1, [r0, #1] 314; CHECK-NEXT: strb r2, [r0] 315; CHECK-NEXT: bx lr 316 %a = load <2 x i8>, <2 x i8>* %p 317 %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true) 318 store <2 x i8> %tmp, <2 x i8>* %p 319 ret void 320} 321 322define void @test_v4i8_zero_undef(<4 x i8>* %p) { 323; CHECK-LABEL: test_v4i8_zero_undef: 324; CHECK: @ %bb.0: 325; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] 326; CHECK-NEXT: vmovl.u8 q8, d16 327; CHECK-NEXT: vneg.s16 d18, d16 328; CHECK-NEXT: vand d16, d16, d18 329; CHECK-NEXT: vmov.i16 d17, #0xf 330; CHECK-NEXT: vclz.i16 d16, d16 331; CHECK-NEXT: vsub.i16 d16, d17, d16 332; CHECK-NEXT: vuzp.8 d16, d17 333; CHECK-NEXT: vst1.32 {d16[0]}, [r0:32] 334; CHECK-NEXT: bx lr 335 %a = load <4 x i8>, <4 x i8>* %p 336 %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true) 337 store <4 x i8> %tmp, <4 x i8>* %p 338 ret void 339} 340 341define void @test_v8i8_zero_undef(<8 x i8>* %p) { 342; CHECK-LABEL: test_v8i8_zero_undef: 343; CHECK: @ %bb.0: 344; CHECK-NEXT: vldr d16, [r0] 345; CHECK-NEXT: vmov.i8 d18, #0x1 346; CHECK-NEXT: vneg.s8 d17, d16 347; CHECK-NEXT: vand d16, d16, d17 348; CHECK-NEXT: vsub.i8 d16, d16, d18 349; CHECK-NEXT: vcnt.8 d16, d16 350; CHECK-NEXT: vstr d16, [r0] 351; CHECK-NEXT: bx lr 352 %a = load <8 x i8>, <8 x i8>* %p 353 %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true) 354 store <8 x i8> %tmp, <8 x i8>* %p 355 ret void 356} 357 358define void @test_v16i8_zero_undef(<16 x i8>* %p) { 359; CHECK-LABEL: test_v16i8_zero_undef: 360; CHECK: @ %bb.0: 361; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 362; CHECK-NEXT: vmov.i8 q10, #0x1 363; CHECK-NEXT: vneg.s8 q9, q8 364; CHECK-NEXT: vand q8, q8, q9 365; CHECK-NEXT: vsub.i8 q8, q8, q10 366; CHECK-NEXT: vcnt.8 q8, q8 367; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 368; CHECK-NEXT: bx lr 369 %a = load <16 x i8>, <16 x i8>* %p 370 %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) 371 store <16 x i8> %tmp, <16 x i8>* %p 372 ret void 373} 374 375define void @test_v1i16_zero_undef(<1 x i16>* %p) { 376; CHECK-LABEL: test_v1i16_zero_undef: 377; CHECK: @ %bb.0: 378; CHECK-NEXT: ldrh r1, [r0] 379; CHECK-NEXT: rbit r1, r1 380; CHECK-NEXT: clz r1, r1 381; CHECK-NEXT: strh r1, [r0] 382; CHECK-NEXT: bx lr 383 %a = load <1 x i16>, <1 x i16>* %p 384 %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 true) 385 store <1 x i16> %tmp, <1 x i16>* %p 386 ret void 387} 388 389define void @test_v2i16_zero_undef(<2 x i16>* %p) { 390; CHECK-LABEL: test_v2i16_zero_undef: 391; CHECK: @ %bb.0: 392; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] 393; CHECK-NEXT: vmovl.u16 q8, d16 394; CHECK-NEXT: vneg.s32 d18, d16 395; CHECK-NEXT: vand d16, d16, d18 396; CHECK-NEXT: vmov.i32 d17, #0x1f 397; CHECK-NEXT: vclz.i32 d16, d16 398; CHECK-NEXT: vsub.i32 d16, d17, d16 399; CHECK-NEXT: vuzp.16 d16, d17 400; CHECK-NEXT: vst1.32 {d16[0]}, [r0:32] 401; CHECK-NEXT: bx lr 402 %a = load <2 x i16>, <2 x i16>* %p 403 %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true) 404 store <2 x i16> %tmp, <2 x i16>* %p 405 ret void 406} 407 408define void @test_v4i16_zero_undef(<4 x i16>* %p) { 409; CHECK-LABEL: test_v4i16_zero_undef: 410; CHECK: @ %bb.0: 411; CHECK-NEXT: vldr d16, [r0] 412; CHECK-NEXT: vneg.s16 d17, d16 413; CHECK-NEXT: vand d16, d16, d17 414; CHECK-NEXT: vmov.i16 d17, #0xf 415; CHECK-NEXT: vclz.i16 d16, d16 416; CHECK-NEXT: vsub.i16 d16, d17, d16 417; CHECK-NEXT: vstr d16, [r0] 418; CHECK-NEXT: bx lr 419 %a = load <4 x i16>, <4 x i16>* %p 420 %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true) 421 store <4 x i16> %tmp, <4 x i16>* %p 422 ret void 423} 424 425define void @test_v8i16_zero_undef(<8 x i16>* %p) { 426; CHECK-LABEL: test_v8i16_zero_undef: 427; CHECK: @ %bb.0: 428; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 429; CHECK-NEXT: vneg.s16 q9, q8 430; CHECK-NEXT: vand q8, q8, q9 431; CHECK-NEXT: vmov.i16 q9, #0xf 432; CHECK-NEXT: vclz.i16 q8, q8 433; CHECK-NEXT: vsub.i16 q8, q9, q8 434; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 435; CHECK-NEXT: bx lr 436 %a = load <8 x i16>, <8 x i16>* %p 437 %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) 438 store <8 x i16> %tmp, <8 x i16>* %p 439 ret void 440} 441 442define void @test_v1i32_zero_undef(<1 x i32>* %p) { 443; CHECK-LABEL: test_v1i32_zero_undef: 444; CHECK: @ %bb.0: 445; CHECK-NEXT: ldr r1, [r0] 446; CHECK-NEXT: rbit r1, r1 447; CHECK-NEXT: clz r1, r1 448; CHECK-NEXT: str r1, [r0] 449; CHECK-NEXT: bx lr 450 %a = load <1 x i32>, <1 x i32>* %p 451 %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 true) 452 store <1 x i32> %tmp, <1 x i32>* %p 453 ret void 454} 455 456define void @test_v2i32_zero_undef(<2 x i32>* %p) { 457; CHECK-LABEL: test_v2i32_zero_undef: 458; CHECK: @ %bb.0: 459; CHECK-NEXT: vldr d16, [r0] 460; CHECK-NEXT: vneg.s32 d17, d16 461; CHECK-NEXT: vand d16, d16, d17 462; CHECK-NEXT: vmov.i32 d17, #0x1f 463; CHECK-NEXT: vclz.i32 d16, d16 464; CHECK-NEXT: vsub.i32 d16, d17, d16 465; CHECK-NEXT: vstr d16, [r0] 466; CHECK-NEXT: bx lr 467 %a = load <2 x i32>, <2 x i32>* %p 468 %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true) 469 store <2 x i32> %tmp, <2 x i32>* %p 470 ret void 471} 472 473define void @test_v4i32_zero_undef(<4 x i32>* %p) { 474; CHECK-LABEL: test_v4i32_zero_undef: 475; CHECK: @ %bb.0: 476; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 477; CHECK-NEXT: vneg.s32 q9, q8 478; CHECK-NEXT: vand q8, q8, q9 479; CHECK-NEXT: vmov.i32 q9, #0x1f 480; CHECK-NEXT: vclz.i32 q8, q8 481; CHECK-NEXT: vsub.i32 q8, q9, q8 482; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 483; CHECK-NEXT: bx lr 484 %a = load <4 x i32>, <4 x i32>* %p 485 %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) 486 store <4 x i32> %tmp, <4 x i32>* %p 487 ret void 488} 489 490define void @test_v1i64_zero_undef(<1 x i64>* %p) { 491; CHECK-LABEL: test_v1i64_zero_undef: 492; CHECK: @ %bb.0: 493; CHECK-NEXT: vmov.i32 d16, #0x0 494; CHECK-NEXT: vldr d17, [r0] 495; CHECK-NEXT: vmov.i64 d18, #0xffffffffffffffff 496; CHECK-NEXT: vsub.i64 d16, d16, d17 497; CHECK-NEXT: vand d16, d17, d16 498; CHECK-NEXT: vadd.i64 d16, d16, d18 499; CHECK-NEXT: vcnt.8 d16, d16 500; CHECK-NEXT: vpaddl.u8 d16, d16 501; CHECK-NEXT: vpaddl.u16 d16, d16 502; CHECK-NEXT: vpaddl.u32 d16, d16 503; CHECK-NEXT: vstr d16, [r0] 504; CHECK-NEXT: bx lr 505 %a = load <1 x i64>, <1 x i64>* %p 506 %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true) 507 store <1 x i64> %tmp, <1 x i64>* %p 508 ret void 509} 510 511define void @test_v2i64_zero_undef(<2 x i64>* %p) { 512; CHECK-LABEL: test_v2i64_zero_undef: 513; CHECK: @ %bb.0: 514; CHECK-NEXT: vmov.i32 q8, #0x0 515; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 516; CHECK-NEXT: vmov.i64 q10, #0xffffffffffffffff 517; CHECK-NEXT: vsub.i64 q8, q8, q9 518; CHECK-NEXT: vand q8, q9, q8 519; CHECK-NEXT: vadd.i64 q8, q8, q10 520; CHECK-NEXT: vcnt.8 q8, q8 521; CHECK-NEXT: vpaddl.u8 q8, q8 522; CHECK-NEXT: vpaddl.u16 q8, q8 523; CHECK-NEXT: vpaddl.u32 q8, q8 524; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 525; CHECK-NEXT: bx lr 526 %a = load <2 x i64>, <2 x i64>* %p 527 %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) 528 store <2 x i64> %tmp, <2 x i64>* %p 529 ret void 530} 531