1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s 3 4define <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { 5; CHECK-LABEL: shuffle1_v4i32: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vmov d1, r2, r3 8; CHECK-NEXT: vmov d0, r0, r1 9; CHECK-NEXT: vcmp.i32 eq, q0, zr 10; CHECK-NEXT: vmrs r0, p0 11; CHECK-NEXT: rbit r0, r0 12; CHECK-NEXT: lsrs r0, r0, #16 13; CHECK-NEXT: vmsr p0, r0 14; CHECK-NEXT: add r0, sp, #16 15; CHECK-NEXT: vldrw.u32 q0, [r0] 16; CHECK-NEXT: mov r0, sp 17; CHECK-NEXT: vldrw.u32 q1, [r0] 18; CHECK-NEXT: vpsel q0, q1, q0 19; CHECK-NEXT: vmov r0, r1, d0 20; CHECK-NEXT: vmov r2, r3, d1 21; CHECK-NEXT: bx lr 22entry: 23 %c = icmp eq <4 x i32> %src, zeroinitializer 24 %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 25 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 26 ret <4 x i32> %s 27} 28 29define <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { 30; CHECK-LABEL: shuffle1_v8i16: 31; CHECK: @ %bb.0: @ %entry 32; CHECK-NEXT: vmov d1, r2, r3 33; CHECK-NEXT: vmov d0, r0, r1 34; CHECK-NEXT: vcmp.i16 eq, q0, zr 35; CHECK-NEXT: vmrs r0, p0 36; CHECK-NEXT: rbit r0, r0 37; CHECK-NEXT: lsrs r0, r0, #16 38; CHECK-NEXT: vmsr p0, r0 39; CHECK-NEXT: add r0, sp, #16 40; CHECK-NEXT: vldrw.u32 q0, [r0] 41; CHECK-NEXT: mov r0, sp 42; CHECK-NEXT: vldrw.u32 q1, [r0] 43; CHECK-NEXT: vpsel q0, q1, q0 44; CHECK-NEXT: vmov r0, r1, d0 45; CHECK-NEXT: vmov r2, r3, d1 46; CHECK-NEXT: bx lr 47entry: 48 %c = icmp eq <8 x i16> %src, zeroinitializer 49 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 50 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 51 ret <8 x i16> %s 52} 53 54define <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { 55; CHECK-LABEL: shuffle1_v16i8: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vmov d1, r2, r3 58; CHECK-NEXT: vmov d0, r0, r1 59; CHECK-NEXT: vcmp.i8 eq, q0, zr 60; CHECK-NEXT: vmrs r0, p0 61; CHECK-NEXT: rbit r0, r0 62; CHECK-NEXT: lsrs r0, r0, #16 63; CHECK-NEXT: vmsr p0, r0 64; CHECK-NEXT: add r0, sp, #16 65; CHECK-NEXT: vldrw.u32 q0, [r0] 66; CHECK-NEXT: mov r0, sp 67; CHECK-NEXT: vldrw.u32 q1, [r0] 68; CHECK-NEXT: vpsel q0, q1, q0 69; CHECK-NEXT: vmov r0, r1, d0 70; CHECK-NEXT: vmov r2, r3, d1 71; CHECK-NEXT: bx lr 72entry: 73 %c = icmp eq <16 x i8> %src, zeroinitializer 74 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 75 %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b 76 ret <16 x i8> %s 77} 78 79define <4 x i32> @shuffle2_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { 80; CHECK-LABEL: shuffle2_v4i32: 81; CHECK: @ %bb.0: @ %entry 82; CHECK-NEXT: vmov d1, r2, r3 83; CHECK-NEXT: vmov d0, r0, r1 84; CHECK-NEXT: add r0, sp, #16 85; CHECK-NEXT: vcmp.i32 eq, q0, zr 86; CHECK-NEXT: vldrw.u32 q0, [r0] 87; CHECK-NEXT: mov r0, sp 88; CHECK-NEXT: vldrw.u32 q1, [r0] 89; CHECK-NEXT: vpsel q0, q1, q0 90; CHECK-NEXT: vmov r0, r1, d0 91; CHECK-NEXT: vmov r2, r3, d1 92; CHECK-NEXT: bx lr 93entry: 94 %c = icmp eq <4 x i32> %src, zeroinitializer 95 %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 96 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 97 ret <4 x i32> %s 98} 99 100define <8 x i16> @shuffle2_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { 101; CHECK-LABEL: shuffle2_v8i16: 102; CHECK: @ %bb.0: @ %entry 103; CHECK-NEXT: vmov d1, r2, r3 104; CHECK-NEXT: vmov d0, r0, r1 105; CHECK-NEXT: add r0, sp, #16 106; CHECK-NEXT: vcmp.i16 eq, q0, zr 107; CHECK-NEXT: vldrw.u32 q0, [r0] 108; CHECK-NEXT: mov r0, sp 109; CHECK-NEXT: vldrw.u32 q1, [r0] 110; CHECK-NEXT: vpsel q0, q1, q0 111; CHECK-NEXT: vmov r0, r1, d0 112; CHECK-NEXT: vmov r2, r3, d1 113; CHECK-NEXT: bx lr 114entry: 115 %c = icmp eq <8 x i16> %src, zeroinitializer 116 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 117 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 118 ret <8 x i16> %s 119} 120 121define <16 x i8> @shuffle2_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { 122; CHECK-LABEL: shuffle2_v16i8: 123; CHECK: @ %bb.0: @ %entry 124; CHECK-NEXT: vmov d1, r2, r3 125; CHECK-NEXT: vmov d0, r0, r1 126; CHECK-NEXT: add r0, sp, #16 127; CHECK-NEXT: vcmp.i8 eq, q0, zr 128; CHECK-NEXT: vldrw.u32 q0, [r0] 129; CHECK-NEXT: mov r0, sp 130; CHECK-NEXT: vldrw.u32 q1, [r0] 131; CHECK-NEXT: vpsel q0, q1, q0 132; CHECK-NEXT: vmov r0, r1, d0 133; CHECK-NEXT: vmov r2, r3, d1 134; CHECK-NEXT: bx lr 135entry: 136 %c = icmp eq <16 x i8> %src, zeroinitializer 137 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 138 %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b 139 ret <16 x i8> %s 140} 141 142define <4 x i32> @shuffle3_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { 143; CHECK-LABEL: shuffle3_v4i32: 144; CHECK: @ %bb.0: @ %entry 145; CHECK-NEXT: vmov d1, r2, r3 146; CHECK-NEXT: vmov.i8 q1, #0xff 147; CHECK-NEXT: vmov d0, r0, r1 148; CHECK-NEXT: vcmp.i32 eq, q0, zr 149; CHECK-NEXT: vmov.i8 q0, #0x0 150; CHECK-NEXT: vpsel q0, q1, q0 151; CHECK-NEXT: vmov r0, s0 152; CHECK-NEXT: vdup.32 q0, r0 153; CHECK-NEXT: add r0, sp, #16 154; CHECK-NEXT: vcmp.i32 ne, q0, zr 155; CHECK-NEXT: vldrw.u32 q0, [r0] 156; CHECK-NEXT: mov r0, sp 157; CHECK-NEXT: vldrw.u32 q1, [r0] 158; CHECK-NEXT: vpsel q0, q1, q0 159; CHECK-NEXT: vmov r0, r1, d0 160; CHECK-NEXT: vmov r2, r3, d1 161; CHECK-NEXT: bx lr 162entry: 163 %c = icmp eq <4 x i32> %src, zeroinitializer 164 %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 165 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 166 ret <4 x i32> %s 167} 168 169define <8 x i16> @shuffle3_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { 170; CHECK-LABEL: shuffle3_v8i16: 171; CHECK: @ %bb.0: @ %entry 172; CHECK-NEXT: vmov d1, r2, r3 173; CHECK-NEXT: vmov.i8 q1, #0xff 174; CHECK-NEXT: vmov d0, r0, r1 175; CHECK-NEXT: vcmp.i16 eq, q0, zr 176; CHECK-NEXT: vmov.i8 q0, #0x0 177; CHECK-NEXT: vpsel q0, q1, q0 178; CHECK-NEXT: vmov.u16 r0, q0[0] 179; CHECK-NEXT: vdup.16 q0, r0 180; CHECK-NEXT: add r0, sp, #16 181; CHECK-NEXT: vcmp.i16 ne, q0, zr 182; CHECK-NEXT: vldrw.u32 q0, [r0] 183; CHECK-NEXT: mov r0, sp 184; CHECK-NEXT: vldrw.u32 q1, [r0] 185; CHECK-NEXT: vpsel q0, q1, q0 186; CHECK-NEXT: vmov r0, r1, d0 187; CHECK-NEXT: vmov r2, r3, d1 188; CHECK-NEXT: bx lr 189entry: 190 %c = icmp eq <8 x i16> %src, zeroinitializer 191 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 192 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 193 ret <8 x i16> %s 194} 195 196define <16 x i8> @shuffle3_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { 197; CHECK-LABEL: shuffle3_v16i8: 198; CHECK: @ %bb.0: @ %entry 199; CHECK-NEXT: vmov d1, r2, r3 200; CHECK-NEXT: vmov.i8 q1, #0xff 201; CHECK-NEXT: vmov d0, r0, r1 202; CHECK-NEXT: vcmp.i8 eq, q0, zr 203; CHECK-NEXT: vmov.i8 q0, #0x0 204; CHECK-NEXT: vpsel q0, q1, q0 205; CHECK-NEXT: vmov.u8 r0, q0[0] 206; CHECK-NEXT: vdup.8 q0, r0 207; CHECK-NEXT: add r0, sp, #16 208; CHECK-NEXT: vcmp.i8 ne, q0, zr 209; CHECK-NEXT: vldrw.u32 q0, [r0] 210; CHECK-NEXT: mov r0, sp 211; CHECK-NEXT: vldrw.u32 q1, [r0] 212; CHECK-NEXT: vpsel q0, q1, q0 213; CHECK-NEXT: vmov r0, r1, d0 214; CHECK-NEXT: vmov r2, r3, d1 215; CHECK-NEXT: bx lr 216entry: 217 %c = icmp eq <16 x i8> %src, zeroinitializer 218 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 219 %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b 220 ret <16 x i8> %s 221} 222 223define <4 x i32> @shuffle4_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { 224; CHECK-LABEL: shuffle4_v4i32: 225; CHECK: @ %bb.0: @ %entry 226; CHECK-NEXT: vmov d1, r2, r3 227; CHECK-NEXT: vmov.i8 q1, #0xff 228; CHECK-NEXT: vmov d0, r0, r1 229; CHECK-NEXT: add r0, sp, #16 230; CHECK-NEXT: vcmp.i32 eq, q0, zr 231; CHECK-NEXT: vmov.i8 q0, #0x0 232; CHECK-NEXT: vpsel q0, q1, q0 233; CHECK-NEXT: vmov.f32 s4, s0 234; CHECK-NEXT: vmov.f32 s5, s0 235; CHECK-NEXT: vmov.f32 s6, s0 236; CHECK-NEXT: vmov.f32 s7, s1 237; CHECK-NEXT: vldrw.u32 q0, [r0] 238; CHECK-NEXT: mov r0, sp 239; CHECK-NEXT: vcmp.i32 ne, q1, zr 240; CHECK-NEXT: vldrw.u32 q1, [r0] 241; CHECK-NEXT: vpsel q0, q1, q0 242; CHECK-NEXT: vmov r0, r1, d0 243; CHECK-NEXT: vmov r2, r3, d1 244; CHECK-NEXT: bx lr 245entry: 246 %c = icmp eq <4 x i32> %src, zeroinitializer 247 %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 248 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 249 ret <4 x i32> %s 250} 251 252define <8 x i16> @shuffle4_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { 253; CHECK-LABEL: shuffle4_v8i16: 254; CHECK: @ %bb.0: @ %entry 255; CHECK-NEXT: vmov d1, r2, r3 256; CHECK-NEXT: vmov.i8 q1, #0xff 257; CHECK-NEXT: vmov d0, r0, r1 258; CHECK-NEXT: vcmp.i16 eq, q0, zr 259; CHECK-NEXT: vmov.i8 q0, #0x0 260; CHECK-NEXT: vpsel q0, q1, q0 261; CHECK-NEXT: vmov.u16 r0, q0[0] 262; CHECK-NEXT: vdup.16 q1, r0 263; CHECK-NEXT: add r0, sp, #16 264; CHECK-NEXT: vmov.f32 s7, s0 265; CHECK-NEXT: vldrw.u32 q0, [r0] 266; CHECK-NEXT: mov r0, sp 267; CHECK-NEXT: vcmp.i16 ne, q1, zr 268; CHECK-NEXT: vldrw.u32 q1, [r0] 269; CHECK-NEXT: vpsel q0, q1, q0 270; CHECK-NEXT: vmov r0, r1, d0 271; CHECK-NEXT: vmov r2, r3, d1 272; CHECK-NEXT: bx lr 273entry: 274 %c = icmp eq <8 x i16> %src, zeroinitializer 275 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1> 276 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 277 ret <8 x i16> %s 278} 279 280define <16 x i8> @shuffle4_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { 281; CHECK-LABEL: shuffle4_v16i8: 282; CHECK: @ %bb.0: @ %entry 283; CHECK-NEXT: vmov d1, r2, r3 284; CHECK-NEXT: vmov.i8 q1, #0xff 285; CHECK-NEXT: vmov d0, r0, r1 286; CHECK-NEXT: vcmp.i8 eq, q0, zr 287; CHECK-NEXT: vmov.i8 q0, #0x0 288; CHECK-NEXT: vpsel q0, q1, q0 289; CHECK-NEXT: vmov.u8 r0, q0[0] 290; CHECK-NEXT: vdup.8 q1, r0 291; CHECK-NEXT: vmov.u8 r0, q0[1] 292; CHECK-NEXT: vmov.8 q1[15], r0 293; CHECK-NEXT: add r0, sp, #16 294; CHECK-NEXT: vldrw.u32 q0, [r0] 295; CHECK-NEXT: mov r0, sp 296; CHECK-NEXT: vcmp.i8 ne, q1, zr 297; CHECK-NEXT: vldrw.u32 q1, [r0] 298; CHECK-NEXT: vpsel q0, q1, q0 299; CHECK-NEXT: vmov r0, r1, d0 300; CHECK-NEXT: vmov r2, r3, d1 301; CHECK-NEXT: bx lr 302entry: 303 %c = icmp eq <16 x i8> %src, zeroinitializer 304 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1> 305 %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b 306 ret <16 x i8> %s 307} 308 309define <4 x i32> @shuffle5_b_v4i32(<8 x i16> %src, <4 x i32> %a, <4 x i32> %b) { 310; CHECK-LABEL: shuffle5_b_v4i32: 311; CHECK: @ %bb.0: @ %entry 312; CHECK-NEXT: vmov d1, r2, r3 313; CHECK-NEXT: vmov.i8 q1, #0xff 314; CHECK-NEXT: vmov d0, r0, r1 315; CHECK-NEXT: vcmp.i16 eq, q0, zr 316; CHECK-NEXT: vmov.i8 q0, #0x0 317; CHECK-NEXT: vpsel q0, q1, q0 318; CHECK-NEXT: vmov.u16 r0, q0[0] 319; CHECK-NEXT: vmov.32 q1[0], r0 320; CHECK-NEXT: vmov.u16 r0, q0[1] 321; CHECK-NEXT: vmov.32 q1[1], r0 322; CHECK-NEXT: vmov.u16 r0, q0[2] 323; CHECK-NEXT: vmov.32 q1[2], r0 324; CHECK-NEXT: vmov.u16 r0, q0[3] 325; CHECK-NEXT: vmov.32 q1[3], r0 326; CHECK-NEXT: add r0, sp, #16 327; CHECK-NEXT: vldrw.u32 q0, [r0] 328; CHECK-NEXT: mov r0, sp 329; CHECK-NEXT: vcmp.i32 ne, q1, zr 330; CHECK-NEXT: vldrw.u32 q1, [r0] 331; CHECK-NEXT: vpsel q0, q1, q0 332; CHECK-NEXT: vmov r0, r1, d0 333; CHECK-NEXT: vmov r2, r3, d1 334; CHECK-NEXT: bx lr 335entry: 336 %c = icmp eq <8 x i16> %src, zeroinitializer 337 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 338 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 339 ret <4 x i32> %s 340} 341 342define <4 x i32> @shuffle5_t_v4i32(<8 x i16> %src, <4 x i32> %a, <4 x i32> %b) { 343; CHECK-LABEL: shuffle5_t_v4i32: 344; CHECK: @ %bb.0: @ %entry 345; CHECK-NEXT: vmov d1, r2, r3 346; CHECK-NEXT: vmov.i8 q1, #0xff 347; CHECK-NEXT: vmov d0, r0, r1 348; CHECK-NEXT: vcmp.i16 eq, q0, zr 349; CHECK-NEXT: vmov.i8 q0, #0x0 350; CHECK-NEXT: vpsel q0, q1, q0 351; CHECK-NEXT: vmov.u16 r0, q0[4] 352; CHECK-NEXT: vmov.32 q1[0], r0 353; CHECK-NEXT: vmov.u16 r0, q0[5] 354; CHECK-NEXT: vmov.32 q1[1], r0 355; CHECK-NEXT: vmov.u16 r0, q0[6] 356; CHECK-NEXT: vmov.32 q1[2], r0 357; CHECK-NEXT: vmov.u16 r0, q0[7] 358; CHECK-NEXT: vmov.32 q1[3], r0 359; CHECK-NEXT: add r0, sp, #16 360; CHECK-NEXT: vldrw.u32 q0, [r0] 361; CHECK-NEXT: mov r0, sp 362; CHECK-NEXT: vcmp.i32 ne, q1, zr 363; CHECK-NEXT: vldrw.u32 q1, [r0] 364; CHECK-NEXT: vpsel q0, q1, q0 365; CHECK-NEXT: vmov r0, r1, d0 366; CHECK-NEXT: vmov r2, r3, d1 367; CHECK-NEXT: bx lr 368entry: 369 %c = icmp eq <8 x i16> %src, zeroinitializer 370 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 371 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 372 ret <4 x i32> %s 373} 374 375define <8 x i16> @shuffle5_b_v8i16(<16 x i8> %src, <8 x i16> %a, <8 x i16> %b) { 376; CHECK-LABEL: shuffle5_b_v8i16: 377; CHECK: @ %bb.0: @ %entry 378; CHECK-NEXT: vmov d1, r2, r3 379; CHECK-NEXT: vmov.i8 q1, #0xff 380; CHECK-NEXT: vmov d0, r0, r1 381; CHECK-NEXT: vcmp.i8 eq, q0, zr 382; CHECK-NEXT: vmov.i8 q0, #0x0 383; CHECK-NEXT: vpsel q1, q1, q0 384; CHECK-NEXT: vmov.u8 r0, q1[0] 385; CHECK-NEXT: vmov.16 q0[0], r0 386; CHECK-NEXT: vmov.u8 r0, q1[1] 387; CHECK-NEXT: vmov.16 q0[1], r0 388; CHECK-NEXT: vmov.u8 r0, q1[2] 389; CHECK-NEXT: vmov.16 q0[2], r0 390; CHECK-NEXT: vmov.u8 r0, q1[3] 391; CHECK-NEXT: vmov.16 q0[3], r0 392; CHECK-NEXT: vmov.u8 r0, q1[4] 393; CHECK-NEXT: vmov.16 q0[4], r0 394; CHECK-NEXT: vmov.u8 r0, q1[5] 395; CHECK-NEXT: vmov.16 q0[5], r0 396; CHECK-NEXT: vmov.u8 r0, q1[6] 397; CHECK-NEXT: vmov.16 q0[6], r0 398; CHECK-NEXT: vmov.u8 r0, q1[7] 399; CHECK-NEXT: vmov.16 q0[7], r0 400; CHECK-NEXT: add r0, sp, #16 401; CHECK-NEXT: vcmp.i16 ne, q0, zr 402; CHECK-NEXT: vldrw.u32 q0, [r0] 403; CHECK-NEXT: mov r0, sp 404; CHECK-NEXT: vldrw.u32 q1, [r0] 405; CHECK-NEXT: vpsel q0, q1, q0 406; CHECK-NEXT: vmov r0, r1, d0 407; CHECK-NEXT: vmov r2, r3, d1 408; CHECK-NEXT: bx lr 409entry: 410 %c = icmp eq <16 x i8> %src, zeroinitializer 411 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 412 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 413 ret <8 x i16> %s 414} 415 416define <8 x i16> @shuffle5_t_v8i16(<16 x i8> %src, <8 x i16> %a, <8 x i16> %b) { 417; CHECK-LABEL: shuffle5_t_v8i16: 418; CHECK: @ %bb.0: @ %entry 419; CHECK-NEXT: vmov d1, r2, r3 420; CHECK-NEXT: vmov.i8 q1, #0xff 421; CHECK-NEXT: vmov d0, r0, r1 422; CHECK-NEXT: vcmp.i8 eq, q0, zr 423; CHECK-NEXT: vmov.i8 q0, #0x0 424; CHECK-NEXT: vpsel q1, q1, q0 425; CHECK-NEXT: vmov.u8 r0, q1[8] 426; CHECK-NEXT: vmov.16 q0[0], r0 427; CHECK-NEXT: vmov.u8 r0, q1[9] 428; CHECK-NEXT: vmov.16 q0[1], r0 429; CHECK-NEXT: vmov.u8 r0, q1[10] 430; CHECK-NEXT: vmov.16 q0[2], r0 431; CHECK-NEXT: vmov.u8 r0, q1[11] 432; CHECK-NEXT: vmov.16 q0[3], r0 433; CHECK-NEXT: vmov.u8 r0, q1[12] 434; CHECK-NEXT: vmov.16 q0[4], r0 435; CHECK-NEXT: vmov.u8 r0, q1[13] 436; CHECK-NEXT: vmov.16 q0[5], r0 437; CHECK-NEXT: vmov.u8 r0, q1[14] 438; CHECK-NEXT: vmov.16 q0[6], r0 439; CHECK-NEXT: vmov.u8 r0, q1[15] 440; CHECK-NEXT: vmov.16 q0[7], r0 441; CHECK-NEXT: add r0, sp, #16 442; CHECK-NEXT: vcmp.i16 ne, q0, zr 443; CHECK-NEXT: vldrw.u32 q0, [r0] 444; CHECK-NEXT: mov r0, sp 445; CHECK-NEXT: vldrw.u32 q1, [r0] 446; CHECK-NEXT: vpsel q0, q1, q0 447; CHECK-NEXT: vmov r0, r1, d0 448; CHECK-NEXT: vmov r2, r3, d1 449; CHECK-NEXT: bx lr 450entry: 451 %c = icmp eq <16 x i8> %src, zeroinitializer 452 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 453 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 454 ret <8 x i16> %s 455} 456 457define <8 x i16> @shuffle6_v4i32(<4 x i32> %src1, <4 x i32> %src2, <8 x i16> %a, <8 x i16> %b) { 458; CHECK-LABEL: shuffle6_v4i32: 459; CHECK: @ %bb.0: @ %entry 460; CHECK-NEXT: vmov d1, r2, r3 461; CHECK-NEXT: vmov.i8 q1, #0x0 462; CHECK-NEXT: vmov d0, r0, r1 463; CHECK-NEXT: vmov.i8 q2, #0xff 464; CHECK-NEXT: vcmp.i32 eq, q0, zr 465; CHECK-NEXT: vpsel q3, q2, q1 466; CHECK-NEXT: vmov r0, s12 467; CHECK-NEXT: vmov.16 q0[0], r0 468; CHECK-NEXT: vmov r0, s13 469; CHECK-NEXT: vmov.16 q0[1], r0 470; CHECK-NEXT: vmov r0, s14 471; CHECK-NEXT: vmov.16 q0[2], r0 472; CHECK-NEXT: vmov r0, s15 473; CHECK-NEXT: vmov.16 q0[3], r0 474; CHECK-NEXT: mov r0, sp 475; CHECK-NEXT: vldrw.u32 q3, [r0] 476; CHECK-NEXT: vcmp.i32 eq, q3, zr 477; CHECK-NEXT: vpsel q1, q2, q1 478; CHECK-NEXT: vmov r0, s4 479; CHECK-NEXT: vmov.16 q0[4], r0 480; CHECK-NEXT: vmov r0, s5 481; CHECK-NEXT: vmov.16 q0[5], r0 482; CHECK-NEXT: vmov r0, s6 483; CHECK-NEXT: vmov.16 q0[6], r0 484; CHECK-NEXT: vmov r0, s7 485; CHECK-NEXT: vmov.16 q0[7], r0 486; CHECK-NEXT: add r0, sp, #32 487; CHECK-NEXT: vcmp.i16 ne, q0, zr 488; CHECK-NEXT: vldrw.u32 q0, [r0] 489; CHECK-NEXT: add r0, sp, #16 490; CHECK-NEXT: vldrw.u32 q1, [r0] 491; CHECK-NEXT: vpsel q0, q1, q0 492; CHECK-NEXT: vmov r0, r1, d0 493; CHECK-NEXT: vmov r2, r3, d1 494; CHECK-NEXT: bx lr 495entry: 496 %c1 = icmp eq <4 x i32> %src1, zeroinitializer 497 %c2 = icmp eq <4 x i32> %src2, zeroinitializer 498 %sh = shufflevector <4 x i1> %c1, <4 x i1> %c2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 499 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 500 ret <8 x i16> %s 501} 502 503define <16 x i8> @shuffle6_v8i16(<8 x i16> %src1, <8 x i16> %src2, <16 x i8> %a, <16 x i8> %b) { 504; CHECK-LABEL: shuffle6_v8i16: 505; CHECK: @ %bb.0: @ %entry 506; CHECK-NEXT: vmov d1, r2, r3 507; CHECK-NEXT: vmov.i8 q1, #0x0 508; CHECK-NEXT: vmov d0, r0, r1 509; CHECK-NEXT: vmov.i8 q2, #0xff 510; CHECK-NEXT: vcmp.i16 eq, q0, zr 511; CHECK-NEXT: vpsel q3, q2, q1 512; CHECK-NEXT: vmov.u16 r0, q3[0] 513; CHECK-NEXT: vmov.8 q0[0], r0 514; CHECK-NEXT: vmov.u16 r0, q3[1] 515; CHECK-NEXT: vmov.8 q0[1], r0 516; CHECK-NEXT: vmov.u16 r0, q3[2] 517; CHECK-NEXT: vmov.8 q0[2], r0 518; CHECK-NEXT: vmov.u16 r0, q3[3] 519; CHECK-NEXT: vmov.8 q0[3], r0 520; CHECK-NEXT: vmov.u16 r0, q3[4] 521; CHECK-NEXT: vmov.8 q0[4], r0 522; CHECK-NEXT: vmov.u16 r0, q3[5] 523; CHECK-NEXT: vmov.8 q0[5], r0 524; CHECK-NEXT: vmov.u16 r0, q3[6] 525; CHECK-NEXT: vmov.8 q0[6], r0 526; CHECK-NEXT: vmov.u16 r0, q3[7] 527; CHECK-NEXT: vmov.8 q0[7], r0 528; CHECK-NEXT: mov r0, sp 529; CHECK-NEXT: vldrw.u32 q3, [r0] 530; CHECK-NEXT: vcmp.i16 eq, q3, zr 531; CHECK-NEXT: vpsel q1, q2, q1 532; CHECK-NEXT: vmov.u16 r0, q1[0] 533; CHECK-NEXT: vmov.8 q0[8], r0 534; CHECK-NEXT: vmov.u16 r0, q1[1] 535; CHECK-NEXT: vmov.8 q0[9], r0 536; CHECK-NEXT: vmov.u16 r0, q1[2] 537; CHECK-NEXT: vmov.8 q0[10], r0 538; CHECK-NEXT: vmov.u16 r0, q1[3] 539; CHECK-NEXT: vmov.8 q0[11], r0 540; CHECK-NEXT: vmov.u16 r0, q1[4] 541; CHECK-NEXT: vmov.8 q0[12], r0 542; CHECK-NEXT: vmov.u16 r0, q1[5] 543; CHECK-NEXT: vmov.8 q0[13], r0 544; CHECK-NEXT: vmov.u16 r0, q1[6] 545; CHECK-NEXT: vmov.8 q0[14], r0 546; CHECK-NEXT: vmov.u16 r0, q1[7] 547; CHECK-NEXT: vmov.8 q0[15], r0 548; CHECK-NEXT: add r0, sp, #32 549; CHECK-NEXT: vcmp.i8 ne, q0, zr 550; CHECK-NEXT: vldrw.u32 q0, [r0] 551; CHECK-NEXT: add r0, sp, #16 552; CHECK-NEXT: vldrw.u32 q1, [r0] 553; CHECK-NEXT: vpsel q0, q1, q0 554; CHECK-NEXT: vmov r0, r1, d0 555; CHECK-NEXT: vmov r2, r3, d1 556; CHECK-NEXT: bx lr 557entry: 558 %c1 = icmp eq <8 x i16> %src1, zeroinitializer 559 %c2 = icmp eq <8 x i16> %src2, zeroinitializer 560 %sh = shufflevector <8 x i1> %c1, <8 x i1> %c2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 561 %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b 562 ret <16 x i8> %s 563} 564