1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s 4 5; i16 6 7define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_45670123(<8 x i16> %s1, <8 x i16> %s2) { 8; CHECK-LABEL: shuffle_i16_45670123: 9; CHECK: @ %bb.0: @ %entry 10; CHECK-NEXT: vmov.f32 s4, s2 11; CHECK-NEXT: vmov.f32 s5, s3 12; CHECK-NEXT: vmov.f32 s6, s0 13; CHECK-NEXT: vmov.f32 s7, s1 14; CHECK-NEXT: vmov q0, q1 15; CHECK-NEXT: bx lr 16entry: 17 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 18 ret <8 x i16> %out 19} 20 21define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_67452301(<8 x i16> %s1, <8 x i16> %s2) { 22; CHECK-LABEL: shuffle_i16_67452301: 23; CHECK: @ %bb.0: @ %entry 24; CHECK-NEXT: vmov.f32 s4, s3 25; CHECK-NEXT: vmov.f32 s5, s2 26; CHECK-NEXT: vmov.f32 s6, s1 27; CHECK-NEXT: vmov.f32 s7, s0 28; CHECK-NEXT: vmov q0, q1 29; CHECK-NEXT: bx lr 30entry: 31 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> 32 ret <8 x i16> %out 33} 34 35define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_76543210(<8 x i16> %s1, <8 x i16> %s2) { 36; CHECK-LABEL: shuffle_i16_76543210: 37; CHECK: @ %bb.0: @ %entry 38; CHECK-NEXT: vmov q1, q0 39; CHECK-NEXT: vmov.u16 r0, q0[7] 40; CHECK-NEXT: vmov.16 q0[0], r0 41; CHECK-NEXT: vmov.u16 r0, q1[6] 42; CHECK-NEXT: vmov.16 q0[1], r0 43; CHECK-NEXT: vmov.u16 r0, q1[5] 44; CHECK-NEXT: vmov.16 q0[2], r0 45; CHECK-NEXT: vmov.u16 r0, q1[4] 46; CHECK-NEXT: vmov.16 q0[3], r0 47; CHECK-NEXT: vmov.u16 r0, q1[3] 48; CHECK-NEXT: vmov.16 q0[4], r0 49; CHECK-NEXT: vmov.u16 r0, q1[2] 50; CHECK-NEXT: vmov.16 q0[5], r0 51; CHECK-NEXT: vmov.u16 r0, q1[1] 52; CHECK-NEXT: vmov.16 q0[6], r0 53; CHECK-NEXT: vmov.u16 r0, q1[0] 54; CHECK-NEXT: vmov.16 q0[7], r0 55; CHECK-NEXT: bx lr 56entry: 57 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 58 ret <8 x i16> %out 59} 60 61define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_01234567(<8 x i16> %s1, <8 x i16> %s2) { 62; CHECK-LABEL: shuffle_i16_01234567: 63; CHECK: @ %bb.0: @ %entry 64; CHECK-NEXT: bx lr 65entry: 66 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 67 ret <8 x i16> %out 68} 69 70define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0123cdef(<8 x i16> %s1, <8 x i16> %s2) { 71; CHECK-LABEL: shuffle_i16_0123cdef: 72; CHECK: @ %bb.0: @ %entry 73; CHECK-NEXT: vmov.f32 s2, s6 74; CHECK-NEXT: vmov.f32 s3, s7 75; CHECK-NEXT: bx lr 76entry: 77 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 78 ret <8 x i16> %out 79} 80 81define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_u7u5u3u1(<8 x i16> %s1, <8 x i16> %s2) { 82; CHECK-LABEL: shuffle_i16_u7u5u3u1: 83; CHECK: @ %bb.0: @ %entry 84; CHECK-NEXT: vmov.f32 s4, s3 85; CHECK-NEXT: vmov.f32 s5, s2 86; CHECK-NEXT: vmov.f32 s6, s1 87; CHECK-NEXT: vmov.f32 s7, s0 88; CHECK-NEXT: vmov q0, q1 89; CHECK-NEXT: bx lr 90entry: 91 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 undef, i32 7, i32 undef, i32 5, i32 undef, i32 3, i32 undef, i32 1> 92 ret <8 x i16> %out 93} 94 95define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_6u4u2u0u(<8 x i16> %s1, <8 x i16> %s2) { 96; CHECK-LABEL: shuffle_i16_6u4u2u0u: 97; CHECK: @ %bb.0: @ %entry 98; CHECK-NEXT: vmov.f32 s4, s3 99; CHECK-NEXT: vmov.f32 s5, s2 100; CHECK-NEXT: vmov.f32 s6, s1 101; CHECK-NEXT: vmov.f32 s7, s0 102; CHECK-NEXT: vmov q0, q1 103; CHECK-NEXT: bx lr 104entry: 105 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 6, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 0, i32 undef> 106 ret <8 x i16> %out 107} 108 109define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0uuuuuuu(<8 x i16> %s1, <8 x i16> %s2) { 110; CHECK-LABEL: shuffle_i16_0uuuuuuu: 111; CHECK: @ %bb.0: @ %entry 112; CHECK-NEXT: bx lr 113entry: 114 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 115 ret <8 x i16> %out 116} 117 118define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_uuuu0uuu(<8 x i16> %s1, <8 x i16> %s2) { 119; CHECK-LABEL: shuffle_i16_uuuu0uuu: 120; CHECK: @ %bb.0: @ %entry 121; CHECK-NEXT: vmov.u16 r0, q0[0] 122; CHECK-NEXT: vdup.16 q0, r0 123; CHECK-NEXT: bx lr 124entry: 125 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef> 126 ret <8 x i16> %out 127} 128 129 130; i8 131 132define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdef89ab45670123(<16 x i8> %s1, <16 x i8> %s2) { 133; CHECK-LABEL: shuffle_i8_cdef89ab45670123: 134; CHECK: @ %bb.0: @ %entry 135; CHECK-NEXT: vmov.f32 s4, s3 136; CHECK-NEXT: vmov.f32 s5, s2 137; CHECK-NEXT: vmov.f32 s6, s1 138; CHECK-NEXT: vmov.f32 s7, s0 139; CHECK-NEXT: vmov q0, q1 140; CHECK-NEXT: bx lr 141entry: 142 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 143 ret <16 x i8> %out 144} 145 146define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_efcdab8967452301(<16 x i8> %s1, <16 x i8> %s2) { 147; CHECK-LABEL: shuffle_i8_efcdab8967452301: 148; CHECK: @ %bb.0: @ %entry 149; CHECK-NEXT: vmov q1, q0 150; CHECK-NEXT: vmov.u8 r0, q0[14] 151; CHECK-NEXT: vmov.8 q0[0], r0 152; CHECK-NEXT: vmov.u8 r0, q1[15] 153; CHECK-NEXT: vmov.8 q0[1], r0 154; CHECK-NEXT: vmov.u8 r0, q1[12] 155; CHECK-NEXT: vmov.8 q0[2], r0 156; CHECK-NEXT: vmov.u8 r0, q1[13] 157; CHECK-NEXT: vmov.8 q0[3], r0 158; CHECK-NEXT: vmov.u8 r0, q1[10] 159; CHECK-NEXT: vmov.8 q0[4], r0 160; CHECK-NEXT: vmov.u8 r0, q1[11] 161; CHECK-NEXT: vmov.8 q0[5], r0 162; CHECK-NEXT: vmov.u8 r0, q1[8] 163; CHECK-NEXT: vmov.8 q0[6], r0 164; CHECK-NEXT: vmov.u8 r0, q1[9] 165; CHECK-NEXT: vmov.8 q0[7], r0 166; CHECK-NEXT: vmov.u8 r0, q1[6] 167; CHECK-NEXT: vmov.8 q0[8], r0 168; CHECK-NEXT: vmov.u8 r0, q1[7] 169; CHECK-NEXT: vmov.8 q0[9], r0 170; CHECK-NEXT: vmov.u8 r0, q1[4] 171; CHECK-NEXT: vmov.8 q0[10], r0 172; CHECK-NEXT: vmov.u8 r0, q1[5] 173; CHECK-NEXT: vmov.8 q0[11], r0 174; CHECK-NEXT: vmov.u8 r0, q1[2] 175; CHECK-NEXT: vmov.8 q0[12], r0 176; CHECK-NEXT: vmov.u8 r0, q1[3] 177; CHECK-NEXT: vmov.8 q0[13], r0 178; CHECK-NEXT: vmov.u8 r0, q1[0] 179; CHECK-NEXT: vmov.8 q0[14], r0 180; CHECK-NEXT: vmov.u8 r0, q1[1] 181; CHECK-NEXT: vmov.8 q0[15], r0 182; CHECK-NEXT: bx lr 183entry: 184 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> 185 ret <16 x i8> %out 186} 187 188define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_fedcba9876543210(<16 x i8> %s1, <16 x i8> %s2) { 189; CHECK-LABEL: shuffle_i8_fedcba9876543210: 190; CHECK: @ %bb.0: @ %entry 191; CHECK-NEXT: vmov q1, q0 192; CHECK-NEXT: vmov.u8 r0, q0[15] 193; CHECK-NEXT: vmov.8 q0[0], r0 194; CHECK-NEXT: vmov.u8 r0, q1[14] 195; CHECK-NEXT: vmov.8 q0[1], r0 196; CHECK-NEXT: vmov.u8 r0, q1[13] 197; CHECK-NEXT: vmov.8 q0[2], r0 198; CHECK-NEXT: vmov.u8 r0, q1[12] 199; CHECK-NEXT: vmov.8 q0[3], r0 200; CHECK-NEXT: vmov.u8 r0, q1[11] 201; CHECK-NEXT: vmov.8 q0[4], r0 202; CHECK-NEXT: vmov.u8 r0, q1[10] 203; CHECK-NEXT: vmov.8 q0[5], r0 204; CHECK-NEXT: vmov.u8 r0, q1[9] 205; CHECK-NEXT: vmov.8 q0[6], r0 206; CHECK-NEXT: vmov.u8 r0, q1[8] 207; CHECK-NEXT: vmov.8 q0[7], r0 208; CHECK-NEXT: vmov.u8 r0, q1[7] 209; CHECK-NEXT: vmov.8 q0[8], r0 210; CHECK-NEXT: vmov.u8 r0, q1[6] 211; CHECK-NEXT: vmov.8 q0[9], r0 212; CHECK-NEXT: vmov.u8 r0, q1[5] 213; CHECK-NEXT: vmov.8 q0[10], r0 214; CHECK-NEXT: vmov.u8 r0, q1[4] 215; CHECK-NEXT: vmov.8 q0[11], r0 216; CHECK-NEXT: vmov.u8 r0, q1[3] 217; CHECK-NEXT: vmov.8 q0[12], r0 218; CHECK-NEXT: vmov.u8 r0, q1[2] 219; CHECK-NEXT: vmov.8 q0[13], r0 220; CHECK-NEXT: vmov.u8 r0, q1[1] 221; CHECK-NEXT: vmov.8 q0[14], r0 222; CHECK-NEXT: vmov.u8 r0, q1[0] 223; CHECK-NEXT: vmov.8 q0[15], r0 224; CHECK-NEXT: bx lr 225entry: 226 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 227 ret <16 x i8> %out 228} 229 230define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_0123456789abcdef(<16 x i8> %s1, <16 x i8> %s2) { 231; CHECK-LABEL: shuffle_i8_0123456789abcdef: 232; CHECK: @ %bb.0: @ %entry 233; CHECK-NEXT: bx lr 234entry: 235 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 236 ret <16 x i8> %out 237} 238 239define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_0123ghij4567klmn(<16 x i8> %s1, <16 x i8> %s2) { 240; CHECK-LABEL: shuffle_i8_0123ghij4567klmn: 241; CHECK: @ %bb.0: @ %entry 242; CHECK-NEXT: vmov.f32 s8, s0 243; CHECK-NEXT: vmov.f32 s9, s4 244; CHECK-NEXT: vmov.f32 s10, s1 245; CHECK-NEXT: vmov.f32 s11, s5 246; CHECK-NEXT: vmov q0, q2 247; CHECK-NEXT: bx lr 248entry: 249 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23> 250 ret <16 x i8> %out 251} 252 253define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdeu89ub4u67u123(<16 x i8> %s1, <16 x i8> %s2) { 254; CHECK-LABEL: shuffle_i8_cdeu89ub4u67u123: 255; CHECK: @ %bb.0: @ %entry 256; CHECK-NEXT: vmov.f32 s4, s3 257; CHECK-NEXT: vmov.f32 s5, s2 258; CHECK-NEXT: vmov.f32 s6, s1 259; CHECK-NEXT: vmov.f32 s7, s0 260; CHECK-NEXT: vmov q0, q1 261; CHECK-NEXT: bx lr 262entry: 263 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 undef, i32 8, i32 9, i32 undef, i32 11, i32 4, i32 undef, i32 6, i32 7, i32 undef, i32 1, i32 2, i32 3> 264 ret <16 x i8> %out 265} 266 267define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cduu8uubuu67u12u(<16 x i8> %s1, <16 x i8> %s2) { 268; CHECK-LABEL: shuffle_i8_cduu8uubuu67u12u: 269; CHECK: @ %bb.0: @ %entry 270; CHECK-NEXT: vmov.f32 s4, s3 271; CHECK-NEXT: vmov.f32 s5, s2 272; CHECK-NEXT: vmov.f32 s6, s1 273; CHECK-NEXT: vmov.f32 s7, s0 274; CHECK-NEXT: vmov q0, q1 275; CHECK-NEXT: bx lr 276entry: 277 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 1, i32 2, i32 undef> 278 ret <16 x i8> %out 279} 280 281define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cuuuuuubuu6uuu2u(<16 x i8> %s1, <16 x i8> %s2) { 282; CHECK-LABEL: shuffle_i8_cuuuuuubuu6uuu2u: 283; CHECK: @ %bb.0: @ %entry 284; CHECK-NEXT: vmov.f32 s4, s3 285; CHECK-NEXT: vmov.f32 s5, s2 286; CHECK-NEXT: vmov.f32 s6, s1 287; CHECK-NEXT: vmov.f32 s7, s0 288; CHECK-NEXT: vmov q0, q1 289; CHECK-NEXT: bx lr 290entry: 291 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 6, i32 undef, i32 undef, i32 undef, i32 2, i32 undef> 292 ret <16 x i8> %out 293} 294 295define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdef89ab45u700123(<16 x i8> %s1, <16 x i8> %s2) { 296; CHECK-LABEL: shuffle_i8_cdef89ab45u700123: 297; CHECK: @ %bb.0: @ %entry 298; CHECK-NEXT: vmov.u8 r0, q0[4] 299; CHECK-NEXT: vmov.8 q1[8], r0 300; CHECK-NEXT: vmov.u8 r0, q0[5] 301; CHECK-NEXT: vmov.8 q1[9], r0 302; CHECK-NEXT: vmov.u8 r0, q0[0] 303; CHECK-NEXT: vmov.8 q1[11], r0 304; CHECK-NEXT: vmov.f32 s4, s3 305; CHECK-NEXT: vmov.f32 s5, s2 306; CHECK-NEXT: vmov.f32 s7, s0 307; CHECK-NEXT: vmov q0, q1 308; CHECK-NEXT: bx lr 309entry: 310 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 undef, i32 0, i32 0, i32 1, i32 2, i32 3> 311 ret <16 x i8> %out 312} 313 314 315 316; f16 317 318define arm_aapcs_vfpcc <8 x half> @shuffle_f16_45670123(<8 x half> %s1, <8 x half> %s2) { 319; CHECK-LABEL: shuffle_f16_45670123: 320; CHECK: @ %bb.0: @ %entry 321; CHECK-NEXT: vmov.f32 s4, s2 322; CHECK-NEXT: vmov.f32 s5, s3 323; CHECK-NEXT: vmov.f32 s6, s0 324; CHECK-NEXT: vmov.f32 s7, s1 325; CHECK-NEXT: vmov q0, q1 326; CHECK-NEXT: bx lr 327entry: 328 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 329 ret <8 x half> %out 330} 331 332define arm_aapcs_vfpcc <8 x half> @shuffle_f16_67452301(<8 x half> %s1, <8 x half> %s2) { 333; CHECK-LABEL: shuffle_f16_67452301: 334; CHECK: @ %bb.0: @ %entry 335; CHECK-NEXT: vmov.f32 s4, s3 336; CHECK-NEXT: vmov.f32 s5, s2 337; CHECK-NEXT: vmov.f32 s6, s1 338; CHECK-NEXT: vmov.f32 s7, s0 339; CHECK-NEXT: vmov q0, q1 340; CHECK-NEXT: bx lr 341entry: 342 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> 343 ret <8 x half> %out 344} 345 346define arm_aapcs_vfpcc <8 x half> @shuffle_f16_76543210(<8 x half> %s1, <8 x half> %s2) { 347; CHECK-LABEL: shuffle_f16_76543210: 348; CHECK: @ %bb.0: @ %entry 349; CHECK-NEXT: vmovx.f16 s4, s3 350; CHECK-NEXT: vmov r0, s3 351; CHECK-NEXT: vmov r1, s4 352; CHECK-NEXT: vmovx.f16 s8, s2 353; CHECK-NEXT: vmov.16 q1[0], r1 354; CHECK-NEXT: vmov.16 q1[1], r0 355; CHECK-NEXT: vmov r0, s8 356; CHECK-NEXT: vmov.16 q1[2], r0 357; CHECK-NEXT: vmov r0, s2 358; CHECK-NEXT: vmovx.f16 s8, s1 359; CHECK-NEXT: vmov.16 q1[3], r0 360; CHECK-NEXT: vmov r0, s8 361; CHECK-NEXT: vmovx.f16 s8, s0 362; CHECK-NEXT: vmov.16 q1[4], r0 363; CHECK-NEXT: vmov r0, s1 364; CHECK-NEXT: vmov.16 q1[5], r0 365; CHECK-NEXT: vmov r0, s8 366; CHECK-NEXT: vmov.16 q1[6], r0 367; CHECK-NEXT: vmov r0, s0 368; CHECK-NEXT: vmov.16 q1[7], r0 369; CHECK-NEXT: vmov q0, q1 370; CHECK-NEXT: bx lr 371entry: 372 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 373 ret <8 x half> %out 374} 375 376define arm_aapcs_vfpcc <8 x half> @shuffle_f16_01234567(<8 x half> %s1, <8 x half> %s2) { 377; CHECK-LABEL: shuffle_f16_01234567: 378; CHECK: @ %bb.0: @ %entry 379; CHECK-NEXT: bx lr 380entry: 381 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 382 ret <8 x half> %out 383} 384 385define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0123cdef(<8 x half> %s1, <8 x half> %s2) { 386; CHECK-LABEL: shuffle_f16_0123cdef: 387; CHECK: @ %bb.0: @ %entry 388; CHECK-NEXT: vmov.f32 s2, s6 389; CHECK-NEXT: vmov.f32 s3, s7 390; CHECK-NEXT: bx lr 391entry: 392 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 393 ret <8 x half> %out 394} 395 396define arm_aapcs_vfpcc <8 x half> @shuffle_f16_u7u5u3u1(<8 x half> %s1, <8 x half> %s2) { 397; CHECK-LABEL: shuffle_f16_u7u5u3u1: 398; CHECK: @ %bb.0: @ %entry 399; CHECK-NEXT: vmov.f32 s4, s3 400; CHECK-NEXT: vmov.f32 s5, s2 401; CHECK-NEXT: vmov.f32 s6, s1 402; CHECK-NEXT: vmov.f32 s7, s0 403; CHECK-NEXT: vmov q0, q1 404; CHECK-NEXT: bx lr 405entry: 406 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 undef, i32 7, i32 undef, i32 5, i32 undef, i32 3, i32 undef, i32 1> 407 ret <8 x half> %out 408} 409 410define arm_aapcs_vfpcc <8 x half> @shuffle_f16_6u4u2u0u(<8 x half> %s1, <8 x half> %s2) { 411; CHECK-LABEL: shuffle_f16_6u4u2u0u: 412; CHECK: @ %bb.0: @ %entry 413; CHECK-NEXT: vmov.f32 s4, s3 414; CHECK-NEXT: vmov.f32 s5, s2 415; CHECK-NEXT: vmov.f32 s6, s1 416; CHECK-NEXT: vmov.f32 s7, s0 417; CHECK-NEXT: vmov q0, q1 418; CHECK-NEXT: bx lr 419entry: 420 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 6, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 0, i32 undef> 421 ret <8 x half> %out 422} 423 424define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0uuuuuuu(<8 x half> %s1, <8 x half> %s2) { 425; CHECK-LABEL: shuffle_f16_0uuuuuuu: 426; CHECK: @ %bb.0: @ %entry 427; CHECK-NEXT: bx lr 428entry: 429 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 430 ret <8 x half> %out 431} 432 433define arm_aapcs_vfpcc <8 x half> @shuffle_f16_uuuu0uuu(<8 x half> %s1, <8 x half> %s2) { 434; CHECK-LABEL: shuffle_f16_uuuu0uuu: 435; CHECK: @ %bb.0: @ %entry 436; CHECK-NEXT: vmov.u16 r0, q0[0] 437; CHECK-NEXT: vdup.16 q0, r0 438; CHECK-NEXT: bx lr 439entry: 440 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef> 441 ret <8 x half> %out 442} 443