1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 4 5define arm_aapcs_vfpcc <4 x i32> @shuffle1_i32(<4 x i32> %src) { 6; CHECK-LABEL: shuffle1_i32: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vmov.f32 s4, s3 9; CHECK-NEXT: vmov.f32 s5, s2 10; CHECK-NEXT: vmov.f32 s6, s1 11; CHECK-NEXT: vmov.f32 s7, s0 12; CHECK-NEXT: vmov q0, q1 13; CHECK-NEXT: bx lr 14entry: 15 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 16 ret <4 x i32> %out 17} 18 19define arm_aapcs_vfpcc <4 x i32> @shuffle2_i32(<4 x i32> %src) { 20; CHECK-LABEL: shuffle2_i32: 21; CHECK: @ %bb.0: @ %entry 22; CHECK-NEXT: bx lr 23entry: 24 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 25 ret <4 x i32> %out 26} 27 28define arm_aapcs_vfpcc <4 x i32> @shuffle3_i32(<4 x i32> %src) { 29; CHECK-LABEL: shuffle3_i32: 30; CHECK: @ %bb.0: @ %entry 31; CHECK-NEXT: vmov.f32 s4, s3 32; CHECK-NEXT: vmov.f32 s5, s1 33; CHECK-NEXT: vmov.f32 s6, s2 34; CHECK-NEXT: vmov.f32 s7, s0 35; CHECK-NEXT: vmov q0, q1 36; CHECK-NEXT: bx lr 37entry: 38 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 0> 39 ret <4 x i32> %out 40} 41 42define arm_aapcs_vfpcc <4 x i32> @shuffle5_i32(<4 x i32> %src) { 43; CHECK-LABEL: shuffle5_i32: 44; CHECK: @ %bb.0: @ %entry 45; CHECK-NEXT: vrev64.32 q1, q0 46; CHECK-NEXT: vmov q0, q1 47; CHECK-NEXT: bx lr 48entry: 49 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 50 ret <4 x i32> %out 51} 52 53define arm_aapcs_vfpcc <4 x i32> @shuffle6_i32(<4 x i32> %src) { 54; CHECK-LABEL: shuffle6_i32: 55; CHECK: @ %bb.0: @ %entry 56; CHECK-NEXT: bx lr 57entry: 58 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 3> 59 ret <4 x i32> %out 60} 61 62define arm_aapcs_vfpcc <8 x i16> @shuffle1_i16(<8 x i16> %src) { 63; CHECK-LABEL: shuffle1_i16: 64; CHECK: @ %bb.0: @ %entry 65; CHECK-NEXT: vmov q1, q0 66; CHECK-NEXT: vmov.u16 r0, q0[7] 67; CHECK-NEXT: vmov.16 q0[0], r0 68; CHECK-NEXT: vmov.u16 r0, q1[6] 69; CHECK-NEXT: vmov.16 q0[1], r0 70; CHECK-NEXT: vmov.u16 r0, q1[5] 71; CHECK-NEXT: vmov.16 q0[2], r0 72; CHECK-NEXT: vmov.u16 r0, q1[4] 73; CHECK-NEXT: vmov.16 q0[3], r0 74; CHECK-NEXT: vmov.u16 r0, q1[3] 75; CHECK-NEXT: vmov.16 q0[4], r0 76; CHECK-NEXT: vmov.u16 r0, q1[2] 77; CHECK-NEXT: vmov.16 q0[5], r0 78; CHECK-NEXT: vmov.u16 r0, q1[1] 79; CHECK-NEXT: vmov.16 q0[6], r0 80; CHECK-NEXT: vmov.u16 r0, q1[0] 81; CHECK-NEXT: vmov.16 q0[7], r0 82; CHECK-NEXT: bx lr 83entry: 84 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 85 ret <8 x i16> %out 86} 87 88define arm_aapcs_vfpcc <8 x i16> @shuffle2_i16(<8 x i16> %src) { 89; CHECK-LABEL: shuffle2_i16: 90; CHECK: @ %bb.0: @ %entry 91; CHECK-NEXT: bx lr 92entry: 93 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 94 ret <8 x i16> %out 95} 96 97define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) { 98; CHECK-LABEL: shuffle3_i16: 99; CHECK: @ %bb.0: @ %entry 100; CHECK-NEXT: vmov q1, q0 101; CHECK-NEXT: vmov.u16 r0, q0[7] 102; CHECK-NEXT: vmov.16 q0[2], r0 103; CHECK-NEXT: vmov.u16 r0, q1[6] 104; CHECK-NEXT: vmov.16 q0[3], r0 105; CHECK-NEXT: vmov.u16 r0, q1[3] 106; CHECK-NEXT: vmov.16 q0[4], r0 107; CHECK-NEXT: vmov.u16 r0, q1[1] 108; CHECK-NEXT: vmov.16 q0[5], r0 109; CHECK-NEXT: vmov.u16 r0, q1[2] 110; CHECK-NEXT: vmov.16 q0[6], r0 111; CHECK-NEXT: vmov.u16 r0, q1[0] 112; CHECK-NEXT: vmov.16 q0[7], r0 113; CHECK-NEXT: vmov.f32 s0, s6 114; CHECK-NEXT: bx lr 115entry: 116 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0> 117 ret <8 x i16> %out 118} 119 120define arm_aapcs_vfpcc <8 x i16> @shuffle5_i16(<8 x i16> %src) { 121; CHECK-LABEL: shuffle5_i16: 122; CHECK: @ %bb.0: @ %entry 123; CHECK-NEXT: vrev64.16 q1, q0 124; CHECK-NEXT: vmov q0, q1 125; CHECK-NEXT: bx lr 126entry: 127 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 128 ret <8 x i16> %out 129} 130 131define arm_aapcs_vfpcc <8 x i16> @shuffle6_i16(<8 x i16> %src) { 132; CHECK-LABEL: shuffle6_i16: 133; CHECK: @ %bb.0: @ %entry 134; CHECK-NEXT: vrev32.16 q0, q0 135; CHECK-NEXT: bx lr 136entry: 137 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 138 ret <8 x i16> %out 139} 140 141define arm_aapcs_vfpcc <16 x i8> @shuffle1_i8(<16 x i8> %src) { 142; CHECK-LABEL: shuffle1_i8: 143; CHECK: @ %bb.0: @ %entry 144; CHECK-NEXT: vmov q1, q0 145; CHECK-NEXT: vmov.u8 r0, q0[15] 146; CHECK-NEXT: vmov.8 q0[0], r0 147; CHECK-NEXT: vmov.u8 r0, q1[14] 148; CHECK-NEXT: vmov.8 q0[1], r0 149; CHECK-NEXT: vmov.u8 r0, q1[13] 150; CHECK-NEXT: vmov.8 q0[2], r0 151; CHECK-NEXT: vmov.u8 r0, q1[12] 152; CHECK-NEXT: vmov.8 q0[3], r0 153; CHECK-NEXT: vmov.u8 r0, q1[11] 154; CHECK-NEXT: vmov.8 q0[4], r0 155; CHECK-NEXT: vmov.u8 r0, q1[10] 156; CHECK-NEXT: vmov.8 q0[5], r0 157; CHECK-NEXT: vmov.u8 r0, q1[9] 158; CHECK-NEXT: vmov.8 q0[6], r0 159; CHECK-NEXT: vmov.u8 r0, q1[8] 160; CHECK-NEXT: vmov.8 q0[7], r0 161; CHECK-NEXT: vmov.u8 r0, q1[7] 162; CHECK-NEXT: vmov.8 q0[8], r0 163; CHECK-NEXT: vmov.u8 r0, q1[6] 164; CHECK-NEXT: vmov.8 q0[9], r0 165; CHECK-NEXT: vmov.u8 r0, q1[5] 166; CHECK-NEXT: vmov.8 q0[10], r0 167; CHECK-NEXT: vmov.u8 r0, q1[4] 168; CHECK-NEXT: vmov.8 q0[11], r0 169; CHECK-NEXT: vmov.u8 r0, q1[3] 170; CHECK-NEXT: vmov.8 q0[12], r0 171; CHECK-NEXT: vmov.u8 r0, q1[2] 172; CHECK-NEXT: vmov.8 q0[13], r0 173; CHECK-NEXT: vmov.u8 r0, q1[1] 174; CHECK-NEXT: vmov.8 q0[14], r0 175; CHECK-NEXT: vmov.u8 r0, q1[0] 176; CHECK-NEXT: vmov.8 q0[15], r0 177; CHECK-NEXT: bx lr 178entry: 179 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 180 ret <16 x i8> %out 181} 182 183define arm_aapcs_vfpcc <16 x i8> @shuffle2_i8(<16 x i8> %src) { 184; CHECK-LABEL: shuffle2_i8: 185; CHECK: @ %bb.0: @ %entry 186; CHECK-NEXT: bx lr 187entry: 188 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 189 ret <16 x i8> %out 190} 191 192define arm_aapcs_vfpcc <16 x i8> @shuffle3_i8(<16 x i8> %src) { 193; CHECK-LABEL: shuffle3_i8: 194; CHECK: @ %bb.0: @ %entry 195; CHECK-NEXT: vmov q1, q0 196; CHECK-NEXT: vmov.u8 r0, q0[4] 197; CHECK-NEXT: vmov.8 q0[0], r0 198; CHECK-NEXT: vmov.u8 r0, q1[5] 199; CHECK-NEXT: vmov.8 q0[1], r0 200; CHECK-NEXT: vmov.u8 r0, q1[15] 201; CHECK-NEXT: vmov.8 q0[2], r0 202; CHECK-NEXT: vmov.u8 r0, q1[7] 203; CHECK-NEXT: vmov.8 q0[3], r0 204; CHECK-NEXT: vmov.u8 r0, q1[14] 205; CHECK-NEXT: vmov.8 q0[4], r0 206; CHECK-NEXT: vmov.u8 r0, q1[9] 207; CHECK-NEXT: vmov.8 q0[5], r0 208; CHECK-NEXT: vmov.u8 r0, q1[6] 209; CHECK-NEXT: vmov.8 q0[6], r0 210; CHECK-NEXT: vmov.u8 r0, q1[3] 211; CHECK-NEXT: vmov.8 q0[7], r0 212; CHECK-NEXT: vmov.u8 r0, q1[10] 213; CHECK-NEXT: vmov.8 q0[8], r0 214; CHECK-NEXT: vmov.u8 r0, q1[12] 215; CHECK-NEXT: vmov.8 q0[9], r0 216; CHECK-NEXT: vmov.u8 r0, q1[1] 217; CHECK-NEXT: vmov.8 q0[10], r0 218; CHECK-NEXT: vmov.u8 r0, q1[13] 219; CHECK-NEXT: vmov.8 q0[11], r0 220; CHECK-NEXT: vmov.u8 r0, q1[2] 221; CHECK-NEXT: vmov.8 q0[12], r0 222; CHECK-NEXT: vmov.u8 r0, q1[8] 223; CHECK-NEXT: vmov.8 q0[13], r0 224; CHECK-NEXT: vmov.u8 r0, q1[0] 225; CHECK-NEXT: vmov.8 q0[14], r0 226; CHECK-NEXT: vmov.u8 r0, q1[11] 227; CHECK-NEXT: vmov.8 q0[15], r0 228; CHECK-NEXT: bx lr 229entry: 230 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 15, i32 7, i32 14, i32 9, i32 6, i32 3, i32 10, i32 12, i32 1, i32 13, i32 2, i32 8, i32 0, i32 11> 231 ret <16 x i8> %out 232} 233 234define arm_aapcs_vfpcc <16 x i8> @shuffle5_i8(<16 x i8> %src) { 235; CHECK-LABEL: shuffle5_i8: 236; CHECK: @ %bb.0: @ %entry 237; CHECK-NEXT: vrev64.8 q1, q0 238; CHECK-NEXT: vmov q0, q1 239; CHECK-NEXT: bx lr 240entry: 241 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 242 ret <16 x i8> %out 243} 244 245define arm_aapcs_vfpcc <16 x i8> @shuffle6_i8(<16 x i8> %src) { 246; CHECK-LABEL: shuffle6_i8: 247; CHECK: @ %bb.0: @ %entry 248; CHECK-NEXT: vrev32.8 q0, q0 249; CHECK-NEXT: bx lr 250entry: 251 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 252 ret <16 x i8> %out 253} 254 255define arm_aapcs_vfpcc <16 x i8> @shuffle7_i8(<16 x i8> %src) { 256; CHECK-LABEL: shuffle7_i8: 257; CHECK: @ %bb.0: @ %entry 258; CHECK-NEXT: vrev16.8 q0, q0 259; CHECK-NEXT: bx lr 260entry: 261 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 262 ret <16 x i8> %out 263} 264 265define arm_aapcs_vfpcc <2 x i64> @shuffle1_i64(<2 x i64> %src) { 266; CHECK-LABEL: shuffle1_i64: 267; CHECK: @ %bb.0: @ %entry 268; CHECK-NEXT: bx lr 269entry: 270 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 0, i32 1> 271 ret <2 x i64> %out 272} 273 274define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) { 275; CHECK-LABEL: shuffle2_i64: 276; CHECK: @ %bb.0: @ %entry 277; CHECK-NEXT: vmov.f32 s4, s2 278; CHECK-NEXT: vmov.f32 s5, s3 279; CHECK-NEXT: vmov.f32 s6, s0 280; CHECK-NEXT: vmov.f32 s7, s1 281; CHECK-NEXT: vmov q0, q1 282; CHECK-NEXT: bx lr 283entry: 284 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 0> 285 ret <2 x i64> %out 286} 287 288define arm_aapcs_vfpcc <2 x i64> @shuffle3_i64(<2 x i64> %src) { 289; CHECK-LABEL: shuffle3_i64: 290; CHECK: @ %bb.0: @ %entry 291; CHECK-NEXT: bx lr 292entry: 293 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 undef, i32 1> 294 ret <2 x i64> %out 295} 296 297define arm_aapcs_vfpcc <4 x float> @shuffle1_f32(<4 x float> %src) { 298; CHECK-LABEL: shuffle1_f32: 299; CHECK: @ %bb.0: @ %entry 300; CHECK-NEXT: vmov.f32 s4, s3 301; CHECK-NEXT: vmov.f32 s5, s2 302; CHECK-NEXT: vmov.f32 s6, s1 303; CHECK-NEXT: vmov.f32 s7, s0 304; CHECK-NEXT: vmov q0, q1 305; CHECK-NEXT: bx lr 306entry: 307 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 308 ret <4 x float> %out 309} 310 311define arm_aapcs_vfpcc <4 x float> @shuffle2_f32(<4 x float> %src) { 312; CHECK-LABEL: shuffle2_f32: 313; CHECK: @ %bb.0: @ %entry 314; CHECK-NEXT: bx lr 315entry: 316 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 317 ret <4 x float> %out 318} 319 320define arm_aapcs_vfpcc <4 x float> @shuffle3_f32(<4 x float> %src) { 321; CHECK-LABEL: shuffle3_f32: 322; CHECK: @ %bb.0: @ %entry 323; CHECK-NEXT: vmov.f32 s4, s3 324; CHECK-NEXT: vmov.f32 s5, s1 325; CHECK-NEXT: vmov.f32 s6, s2 326; CHECK-NEXT: vmov.f32 s7, s0 327; CHECK-NEXT: vmov q0, q1 328; CHECK-NEXT: bx lr 329entry: 330 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 0> 331 ret <4 x float> %out 332} 333 334define arm_aapcs_vfpcc <4 x float> @shuffle5_f32(<4 x float> %src) { 335; CHECK-LABEL: shuffle5_f32: 336; CHECK: @ %bb.0: @ %entry 337; CHECK-NEXT: vrev64.32 q1, q0 338; CHECK-NEXT: vmov q0, q1 339; CHECK-NEXT: bx lr 340entry: 341 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 342 ret <4 x float> %out 343} 344 345define arm_aapcs_vfpcc <8 x half> @shuffle1_f16(<8 x half> %src) { 346; CHECK-LABEL: shuffle1_f16: 347; CHECK: @ %bb.0: @ %entry 348; CHECK-NEXT: vmovx.f16 s4, s3 349; CHECK-NEXT: vmov r0, s3 350; CHECK-NEXT: vmov r1, s4 351; CHECK-NEXT: vmovx.f16 s8, s2 352; CHECK-NEXT: vmov.16 q1[0], r1 353; CHECK-NEXT: vmov.16 q1[1], r0 354; CHECK-NEXT: vmov r0, s8 355; CHECK-NEXT: vmov.16 q1[2], r0 356; CHECK-NEXT: vmov r0, s2 357; CHECK-NEXT: vmovx.f16 s8, s1 358; CHECK-NEXT: vmov.16 q1[3], r0 359; CHECK-NEXT: vmov r0, s8 360; CHECK-NEXT: vmovx.f16 s8, s0 361; CHECK-NEXT: vmov.16 q1[4], r0 362; CHECK-NEXT: vmov r0, s1 363; CHECK-NEXT: vmov.16 q1[5], r0 364; CHECK-NEXT: vmov r0, s8 365; CHECK-NEXT: vmov.16 q1[6], r0 366; CHECK-NEXT: vmov r0, s0 367; CHECK-NEXT: vmov.16 q1[7], r0 368; CHECK-NEXT: vmov q0, q1 369; CHECK-NEXT: bx lr 370entry: 371 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 372 ret <8 x half> %out 373} 374 375define arm_aapcs_vfpcc <8 x half> @shuffle2_f16(<8 x half> %src) { 376; CHECK-LABEL: shuffle2_f16: 377; CHECK: @ %bb.0: @ %entry 378; CHECK-NEXT: bx lr 379entry: 380 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 381 ret <8 x half> %out 382} 383 384define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) { 385; CHECK-LABEL: shuffle3_f16: 386; CHECK: @ %bb.0: @ %entry 387; CHECK-NEXT: vmovx.f16 s4, s3 388; CHECK-NEXT: vmov r0, s3 389; CHECK-NEXT: vmov r1, s4 390; CHECK-NEXT: vmovx.f16 s8, s1 391; CHECK-NEXT: vmov.16 q1[2], r1 392; CHECK-NEXT: vmov.16 q1[3], r0 393; CHECK-NEXT: vmov r0, s8 394; CHECK-NEXT: vmovx.f16 s8, s0 395; CHECK-NEXT: vmov.16 q1[4], r0 396; CHECK-NEXT: vmov r0, s8 397; CHECK-NEXT: vmov.16 q1[5], r0 398; CHECK-NEXT: vmov r0, s1 399; CHECK-NEXT: vmov.16 q1[6], r0 400; CHECK-NEXT: vmov r0, s0 401; CHECK-NEXT: vmov.16 q1[7], r0 402; CHECK-NEXT: vmov.f32 s4, s2 403; CHECK-NEXT: vmov q0, q1 404; CHECK-NEXT: bx lr 405entry: 406 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0> 407 ret <8 x half> %out 408} 409 410define arm_aapcs_vfpcc <8 x half> @shuffle5_f16(<8 x half> %src) { 411; CHECK-LABEL: shuffle5_f16: 412; CHECK: @ %bb.0: @ %entry 413; CHECK-NEXT: vrev64.16 q1, q0 414; CHECK-NEXT: vmov q0, q1 415; CHECK-NEXT: bx lr 416entry: 417 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 418 ret <8 x half> %out 419} 420 421define arm_aapcs_vfpcc <8 x half> @shuffle6_f16(<8 x half> %src) { 422; CHECK-LABEL: shuffle6_f16: 423; CHECK: @ %bb.0: @ %entry 424; CHECK-NEXT: vrev32.16 q0, q0 425; CHECK-NEXT: bx lr 426entry: 427 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 428 ret <8 x half> %out 429} 430 431define arm_aapcs_vfpcc <2 x double> @shuffle1_f64(<2 x double> %src) { 432; CHECK-LABEL: shuffle1_f64: 433; CHECK: @ %bb.0: @ %entry 434; CHECK-NEXT: bx lr 435entry: 436 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 0, i32 1> 437 ret <2 x double> %out 438} 439 440define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) { 441; CHECK-LABEL: shuffle2_f64: 442; CHECK: @ %bb.0: @ %entry 443; CHECK-NEXT: vmov.f32 s4, s2 444; CHECK-NEXT: vmov.f32 s5, s3 445; CHECK-NEXT: vmov.f32 s6, s0 446; CHECK-NEXT: vmov.f32 s7, s1 447; CHECK-NEXT: vmov q0, q1 448; CHECK-NEXT: bx lr 449entry: 450 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 0> 451 ret <2 x double> %out 452} 453 454define arm_aapcs_vfpcc <2 x double> @shuffle3_f64(<2 x double> %src) { 455; CHECK-LABEL: shuffle3_f64: 456; CHECK: @ %bb.0: @ %entry 457; CHECK-NEXT: bx lr 458entry: 459 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 undef, i32 1> 460 ret <2 x double> %out 461} 462 463 464define arm_aapcs_vfpcc <4 x i32> @insert_i32(i32 %a) { 465; CHECK-LABEL: insert_i32: 466; CHECK: @ %bb.0: @ %entry 467; CHECK-NEXT: vmov.32 q0[0], r0 468; CHECK-NEXT: bx lr 469entry: 470 %res = insertelement <4 x i32> undef, i32 %a, i32 0 471 ret <4 x i32> %res 472} 473 474define arm_aapcs_vfpcc <8 x i16> @insert_i16(i16 %a) { 475; CHECK-LABEL: insert_i16: 476; CHECK: @ %bb.0: @ %entry 477; CHECK-NEXT: vmov.16 q0[0], r0 478; CHECK-NEXT: bx lr 479entry: 480 %res = insertelement <8 x i16> undef, i16 %a, i32 0 481 ret <8 x i16> %res 482} 483 484define arm_aapcs_vfpcc <16 x i8> @insert_i8(i8 %a) { 485; CHECK-LABEL: insert_i8: 486; CHECK: @ %bb.0: @ %entry 487; CHECK-NEXT: vmov.8 q0[0], r0 488; CHECK-NEXT: bx lr 489entry: 490 %res = insertelement <16 x i8> undef, i8 %a, i32 0 491 ret <16 x i8> %res 492} 493 494define arm_aapcs_vfpcc <2 x i64> @insert_i64(i64 %a) { 495; CHECK-LABEL: insert_i64: 496; CHECK: @ %bb.0: @ %entry 497; CHECK-NEXT: vmov.32 q0[0], r0 498; CHECK-NEXT: vmov.32 q0[1], r1 499; CHECK-NEXT: bx lr 500entry: 501 %res = insertelement <2 x i64> undef, i64 %a, i32 0 502 ret <2 x i64> %res 503} 504 505define arm_aapcs_vfpcc <4 x float> @insert_f32(float %a) { 506; CHECK-LABEL: insert_f32: 507; CHECK: @ %bb.0: @ %entry 508; CHECK-NEXT: @ kill: def $s0 killed $s0 def $q0 509; CHECK-NEXT: bx lr 510entry: 511 %res = insertelement <4 x float> undef, float %a, i32 0 512 ret <4 x float> %res 513} 514 515; TODO: Calling convention needs fixing to pass half types directly to functions 516define arm_aapcs_vfpcc <8 x half> @insert_f16(half *%aa) { 517; CHECK-LABEL: insert_f16: 518; CHECK: @ %bb.0: @ %entry 519; CHECK-NEXT: vldr.16 s0, [r0] 520; CHECK-NEXT: bx lr 521entry: 522 %a = load half, half* %aa 523 %res = insertelement <8 x half> undef, half %a, i32 0 524 ret <8 x half> %res 525} 526 527define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) { 528; CHECK-LABEL: insert_f64: 529; CHECK: @ %bb.0: @ %entry 530; CHECK-NEXT: .save {r4, r6, r7, lr} 531; CHECK-NEXT: push {r4, r6, r7, lr} 532; CHECK-NEXT: .setfp r7, sp, #8 533; CHECK-NEXT: add r7, sp, #8 534; CHECK-NEXT: .pad #16 535; CHECK-NEXT: sub sp, #16 536; CHECK-NEXT: mov r4, sp 537; CHECK-NEXT: bfc r4, #0, #4 538; CHECK-NEXT: mov sp, r4 539; CHECK-NEXT: sub.w r4, r7, #8 540; CHECK-NEXT: vstr d0, [sp] 541; CHECK-NEXT: mov r0, sp 542; CHECK-NEXT: vldrw.u32 q0, [r0] 543; CHECK-NEXT: mov sp, r4 544; CHECK-NEXT: pop {r4, r6, r7, pc} 545entry: 546 %res = insertelement <2 x double> undef, double %a, i32 0 547 ret <2 x double> %res 548} 549 550define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) { 551; CHECK-LABEL: scalar_to_vector_i32: 552; CHECK: @ %bb.0: @ %entry 553; CHECK-NEXT: .pad #8 554; CHECK-NEXT: sub sp, #8 555; CHECK-NEXT: adr r1, .LCPI38_0 556; CHECK-NEXT: vmov.u16 r0, q0[0] 557; CHECK-NEXT: vldrw.u32 q1, [r1] 558; CHECK-NEXT: vmov.32 q0[0], r0 559; CHECK-NEXT: mov r2, sp 560; CHECK-NEXT: vmov.f32 s1, s5 561; CHECK-NEXT: vmov.f32 s2, s6 562; CHECK-NEXT: vmov.f32 s3, s7 563; CHECK-NEXT: vstrh.32 q0, [r2] 564; CHECK-NEXT: ldrd r0, r1, [sp], #8 565; CHECK-NEXT: bx lr 566; CHECK-NEXT: .p2align 4 567; CHECK-NEXT: @ %bb.1: 568; CHECK-NEXT: .LCPI38_0: 569; CHECK-NEXT: .zero 4 570; CHECK-NEXT: .long 7 @ 0x7 571; CHECK-NEXT: .long 1 @ 0x1 572; CHECK-NEXT: .long 9 @ 0x9 573entry: 574 %f = shufflevector <8 x i16> %v, <8 x i16> <i16 undef, i16 7, i16 1, i16 9, i16 undef, i16 undef, i16 undef, i16 undef>, <4 x i32> <i32 0, i32 9, i32 10, i32 11> 575 %0 = bitcast <4 x i16> %f to i64 576 ret i64 %0 577} 578 579 580define arm_aapcs_vfpcc i32 @extract_i32_0(<4 x i32> %a) { 581; CHECK-LABEL: extract_i32_0: 582; CHECK: @ %bb.0: @ %entry 583; CHECK-NEXT: vmov r0, s0 584; CHECK-NEXT: bx lr 585entry: 586 %res = extractelement <4 x i32> %a, i32 0 587 ret i32 %res 588} 589 590define arm_aapcs_vfpcc i32 @extract_i32_3(<4 x i32> %a) { 591; CHECK-LABEL: extract_i32_3: 592; CHECK: @ %bb.0: @ %entry 593; CHECK-NEXT: vmov r0, s3 594; CHECK-NEXT: bx lr 595entry: 596 %res = extractelement <4 x i32> %a, i32 3 597 ret i32 %res 598} 599 600define arm_aapcs_vfpcc i16 @extract_i16_0(<8 x i16> %a) { 601; CHECK-LABEL: extract_i16_0: 602; CHECK: @ %bb.0: @ %entry 603; CHECK-NEXT: vmov.u16 r0, q0[0] 604; CHECK-NEXT: bx lr 605entry: 606 %res = extractelement <8 x i16> %a, i32 0 607 ret i16 %res 608} 609 610define arm_aapcs_vfpcc i16 @extract_i16_3(<8 x i16> %a) { 611; CHECK-LABEL: extract_i16_3: 612; CHECK: @ %bb.0: @ %entry 613; CHECK-NEXT: vmov.u16 r0, q0[3] 614; CHECK-NEXT: bx lr 615entry: 616 %res = extractelement <8 x i16> %a, i32 3 617 ret i16 %res 618} 619 620define arm_aapcs_vfpcc i8 @extract_i8_0(<16 x i8> %a) { 621; CHECK-LABEL: extract_i8_0: 622; CHECK: @ %bb.0: @ %entry 623; CHECK-NEXT: vmov.u8 r0, q0[0] 624; CHECK-NEXT: bx lr 625entry: 626 %res = extractelement <16 x i8> %a, i32 0 627 ret i8 %res 628} 629 630define arm_aapcs_vfpcc i8 @extract_i8_3(<16 x i8> %a) { 631; CHECK-LABEL: extract_i8_3: 632; CHECK: @ %bb.0: @ %entry 633; CHECK-NEXT: vmov.u8 r0, q0[3] 634; CHECK-NEXT: bx lr 635entry: 636 %res = extractelement <16 x i8> %a, i32 3 637 ret i8 %res 638} 639 640define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) { 641; CHECK-LABEL: extract_i64_0: 642; CHECK: @ %bb.0: @ %entry 643; CHECK-NEXT: vmov r0, s0 644; CHECK-NEXT: vmov r1, s1 645; CHECK-NEXT: bx lr 646entry: 647 %res = extractelement <2 x i64> %a, i32 0 648 ret i64 %res 649} 650 651define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) { 652; CHECK-LABEL: extract_i64_1: 653; CHECK: @ %bb.0: @ %entry 654; CHECK-NEXT: vmov r0, s2 655; CHECK-NEXT: vmov r1, s3 656; CHECK-NEXT: bx lr 657entry: 658 %res = extractelement <2 x i64> %a, i32 1 659 ret i64 %res 660} 661 662define arm_aapcs_vfpcc float @extract_f32_0(<4 x float> %a) { 663; CHECK-LABEL: extract_f32_0: 664; CHECK: @ %bb.0: @ %entry 665; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $q0 666; CHECK-NEXT: bx lr 667entry: 668 %res = extractelement <4 x float> %a, i32 0 669 ret float %res 670} 671 672define arm_aapcs_vfpcc float @extract_f32_3(<4 x float> %a) { 673; CHECK-LABEL: extract_f32_3: 674; CHECK: @ %bb.0: @ %entry 675; CHECK-NEXT: vmov.f32 s0, s3 676; CHECK-NEXT: bx lr 677entry: 678 %res = extractelement <4 x float> %a, i32 3 679 ret float %res 680} 681 682define arm_aapcs_vfpcc half @extract_f16_0(<8 x half> %a) { 683; CHECK-LABEL: extract_f16_0: 684; CHECK: @ %bb.0: @ %entry 685; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $q0 686; CHECK-NEXT: bx lr 687entry: 688 %res = extractelement <8 x half> %a, i32 0 689 ret half %res 690} 691 692define arm_aapcs_vfpcc half @extract_f16_3(<8 x half> %a) { 693; CHECK-LABEL: extract_f16_3: 694; CHECK: @ %bb.0: @ %entry 695; CHECK-NEXT: vmovx.f16 s0, s1 696; CHECK-NEXT: bx lr 697entry: 698 %res = extractelement <8 x half> %a, i32 3 699 ret half %res 700} 701 702define arm_aapcs_vfpcc double @extract_f64_0(<2 x double> %a) { 703; CHECK-LABEL: extract_f64_0: 704; CHECK: @ %bb.0: @ %entry 705; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 706; CHECK-NEXT: bx lr 707entry: 708 %res = extractelement <2 x double> %a, i32 0 709 ret double %res 710} 711 712define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) { 713; CHECK-LABEL: extract_f64_1: 714; CHECK: @ %bb.0: @ %entry 715; CHECK-NEXT: vmov.f32 s0, s2 716; CHECK-NEXT: vmov.f32 s1, s3 717; CHECK-NEXT: bx lr 718entry: 719 %res = extractelement <2 x double> %a, i32 1 720 ret double %res 721} 722 723