1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 7 8; Widened shuffle broadcast loads 9 10define <4 x float> @load_splat_4f32_4f32_0101(<4 x float>* %ptr) nounwind uwtable readnone ssp { 11; SSE2-LABEL: load_splat_4f32_4f32_0101: 12; SSE2: # %bb.0: # %entry 13; SSE2-NEXT: movaps (%rdi), %xmm0 14; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 15; SSE2-NEXT: retq 16; 17; SSE42-LABEL: load_splat_4f32_4f32_0101: 18; SSE42: # %bb.0: # %entry 19; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 20; SSE42-NEXT: retq 21; 22; AVX-LABEL: load_splat_4f32_4f32_0101: 23; AVX: # %bb.0: # %entry 24; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 25; AVX-NEXT: retq 26entry: 27 %ld = load <4 x float>, <4 x float>* %ptr 28 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 29 ret <4 x float> %ret 30} 31 32define <8 x float> @load_splat_8f32_4f32_01010101(<4 x float>* %ptr) nounwind uwtable readnone ssp { 33; SSE2-LABEL: load_splat_8f32_4f32_01010101: 34; SSE2: # %bb.0: # %entry 35; SSE2-NEXT: movaps (%rdi), %xmm0 36; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 37; SSE2-NEXT: movaps %xmm0, %xmm1 38; SSE2-NEXT: retq 39; 40; SSE42-LABEL: load_splat_8f32_4f32_01010101: 41; SSE42: # %bb.0: # %entry 42; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 43; SSE42-NEXT: movapd %xmm0, %xmm1 44; SSE42-NEXT: retq 45; 46; AVX1-LABEL: load_splat_8f32_4f32_01010101: 47; AVX1: # %bb.0: # %entry 48; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 49; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 50; AVX1-NEXT: retq 51; 52; AVX2-LABEL: load_splat_8f32_4f32_01010101: 53; AVX2: # %bb.0: # %entry 54; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 55; AVX2-NEXT: retq 56; 57; AVX512-LABEL: load_splat_8f32_4f32_01010101: 58; AVX512: # %bb.0: # %entry 59; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 60; AVX512-NEXT: retq 61entry: 62 %ld = load <4 x float>, <4 x float>* %ptr 63 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 64 ret <8 x float> %ret 65} 66 67define <8 x float> @load_splat_8f32_8f32_01010101(<8 x float>* %ptr) nounwind uwtable readnone ssp { 68; SSE2-LABEL: load_splat_8f32_8f32_01010101: 69; SSE2: # %bb.0: # %entry 70; SSE2-NEXT: movaps (%rdi), %xmm0 71; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 72; SSE2-NEXT: movaps %xmm0, %xmm1 73; SSE2-NEXT: retq 74; 75; SSE42-LABEL: load_splat_8f32_8f32_01010101: 76; SSE42: # %bb.0: # %entry 77; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 78; SSE42-NEXT: movapd %xmm0, %xmm1 79; SSE42-NEXT: retq 80; 81; AVX-LABEL: load_splat_8f32_8f32_01010101: 82; AVX: # %bb.0: # %entry 83; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 84; AVX-NEXT: retq 85entry: 86 %ld = load <8 x float>, <8 x float>* %ptr 87 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 88 ret <8 x float> %ret 89} 90 91define <4 x i32> @load_splat_4i32_4i32_0101(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 92; SSE-LABEL: load_splat_4i32_4i32_0101: 93; SSE: # %bb.0: # %entry 94; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 95; SSE-NEXT: retq 96; 97; AVX1-LABEL: load_splat_4i32_4i32_0101: 98; AVX1: # %bb.0: # %entry 99; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] 100; AVX1-NEXT: retq 101; 102; AVX2-LABEL: load_splat_4i32_4i32_0101: 103; AVX2: # %bb.0: # %entry 104; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 105; AVX2-NEXT: retq 106; 107; AVX512-LABEL: load_splat_4i32_4i32_0101: 108; AVX512: # %bb.0: # %entry 109; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 110; AVX512-NEXT: retq 111entry: 112 %ld = load <4 x i32>, <4 x i32>* %ptr 113 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 114 ret <4 x i32> %ret 115} 116 117define <8 x i32> @load_splat_8i32_4i32_01010101(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 118; SSE-LABEL: load_splat_8i32_4i32_01010101: 119; SSE: # %bb.0: # %entry 120; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 121; SSE-NEXT: movdqa %xmm0, %xmm1 122; SSE-NEXT: retq 123; 124; AVX-LABEL: load_splat_8i32_4i32_01010101: 125; AVX: # %bb.0: # %entry 126; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 127; AVX-NEXT: retq 128entry: 129 %ld = load <4 x i32>, <4 x i32>* %ptr 130 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 131 ret <8 x i32> %ret 132} 133 134define <8 x i32> @load_splat_8i32_8i32_01010101(<8 x i32>* %ptr) nounwind uwtable readnone ssp { 135; SSE-LABEL: load_splat_8i32_8i32_01010101: 136; SSE: # %bb.0: # %entry 137; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 138; SSE-NEXT: movdqa %xmm0, %xmm1 139; SSE-NEXT: retq 140; 141; AVX1-LABEL: load_splat_8i32_8i32_01010101: 142; AVX1: # %bb.0: # %entry 143; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 144; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 145; AVX1-NEXT: retq 146; 147; AVX2-LABEL: load_splat_8i32_8i32_01010101: 148; AVX2: # %bb.0: # %entry 149; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 150; AVX2-NEXT: retq 151; 152; AVX512-LABEL: load_splat_8i32_8i32_01010101: 153; AVX512: # %bb.0: # %entry 154; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 155; AVX512-NEXT: retq 156entry: 157 %ld = load <8 x i32>, <8 x i32>* %ptr 158 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 159 ret <8 x i32> %ret 160} 161 162define <8 x i16> @load_splat_8i16_8i16_01010101(<8 x i16>* %ptr) nounwind uwtable readnone ssp { 163; SSE-LABEL: load_splat_8i16_8i16_01010101: 164; SSE: # %bb.0: # %entry 165; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 166; SSE-NEXT: retq 167; 168; AVX1-LABEL: load_splat_8i16_8i16_01010101: 169; AVX1: # %bb.0: # %entry 170; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0] 171; AVX1-NEXT: retq 172; 173; AVX2-LABEL: load_splat_8i16_8i16_01010101: 174; AVX2: # %bb.0: # %entry 175; AVX2-NEXT: vbroadcastss (%rdi), %xmm0 176; AVX2-NEXT: retq 177; 178; AVX512-LABEL: load_splat_8i16_8i16_01010101: 179; AVX512: # %bb.0: # %entry 180; AVX512-NEXT: vbroadcastss (%rdi), %xmm0 181; AVX512-NEXT: retq 182entry: 183 %ld = load <8 x i16>, <8 x i16>* %ptr 184 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 185 ret <8 x i16> %ret 186} 187 188define <8 x i16> @load_splat_8i16_8i16_01230123(<8 x i16>* %ptr) nounwind uwtable readnone ssp { 189; SSE-LABEL: load_splat_8i16_8i16_01230123: 190; SSE: # %bb.0: # %entry 191; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 192; SSE-NEXT: retq 193; 194; AVX1-LABEL: load_splat_8i16_8i16_01230123: 195; AVX1: # %bb.0: # %entry 196; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] 197; AVX1-NEXT: retq 198; 199; AVX2-LABEL: load_splat_8i16_8i16_01230123: 200; AVX2: # %bb.0: # %entry 201; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 202; AVX2-NEXT: retq 203; 204; AVX512-LABEL: load_splat_8i16_8i16_01230123: 205; AVX512: # %bb.0: # %entry 206; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 207; AVX512-NEXT: retq 208entry: 209 %ld = load <8 x i16>, <8 x i16>* %ptr 210 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 211 ret <8 x i16> %ret 212} 213 214define <16 x i16> @load_splat_16i16_8i16_0101010101010101(<8 x i16>* %ptr) nounwind uwtable readnone ssp { 215; SSE-LABEL: load_splat_16i16_8i16_0101010101010101: 216; SSE: # %bb.0: # %entry 217; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 218; SSE-NEXT: movdqa %xmm0, %xmm1 219; SSE-NEXT: retq 220; 221; AVX1-LABEL: load_splat_16i16_8i16_0101010101010101: 222; AVX1: # %bb.0: # %entry 223; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0] 224; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 225; AVX1-NEXT: retq 226; 227; AVX2-LABEL: load_splat_16i16_8i16_0101010101010101: 228; AVX2: # %bb.0: # %entry 229; AVX2-NEXT: vbroadcastss (%rdi), %ymm0 230; AVX2-NEXT: retq 231; 232; AVX512-LABEL: load_splat_16i16_8i16_0101010101010101: 233; AVX512: # %bb.0: # %entry 234; AVX512-NEXT: vbroadcastss (%rdi), %ymm0 235; AVX512-NEXT: retq 236entry: 237 %ld = load <8 x i16>, <8 x i16>* %ptr 238 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 239 ret <16 x i16> %ret 240} 241 242define <16 x i16> @load_splat_16i16_8i16_0123012301230123(<8 x i16>* %ptr) nounwind uwtable readnone ssp { 243; SSE-LABEL: load_splat_16i16_8i16_0123012301230123: 244; SSE: # %bb.0: # %entry 245; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 246; SSE-NEXT: movdqa %xmm0, %xmm1 247; SSE-NEXT: retq 248; 249; AVX-LABEL: load_splat_16i16_8i16_0123012301230123: 250; AVX: # %bb.0: # %entry 251; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 252; AVX-NEXT: retq 253entry: 254 %ld = load <8 x i16>, <8 x i16>* %ptr 255 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 256 ret <16 x i16> %ret 257} 258 259define <16 x i16> @load_splat_16i16_16i16_0101010101010101(<16 x i16>* %ptr) nounwind uwtable readnone ssp { 260; SSE-LABEL: load_splat_16i16_16i16_0101010101010101: 261; SSE: # %bb.0: # %entry 262; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 263; SSE-NEXT: movdqa %xmm0, %xmm1 264; SSE-NEXT: retq 265; 266; AVX1-LABEL: load_splat_16i16_16i16_0101010101010101: 267; AVX1: # %bb.0: # %entry 268; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0] 269; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 270; AVX1-NEXT: retq 271; 272; AVX2-LABEL: load_splat_16i16_16i16_0101010101010101: 273; AVX2: # %bb.0: # %entry 274; AVX2-NEXT: vbroadcastss (%rdi), %ymm0 275; AVX2-NEXT: retq 276; 277; AVX512-LABEL: load_splat_16i16_16i16_0101010101010101: 278; AVX512: # %bb.0: # %entry 279; AVX512-NEXT: vbroadcastss (%rdi), %ymm0 280; AVX512-NEXT: retq 281entry: 282 %ld = load <16 x i16>, <16 x i16>* %ptr 283 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 284 ret <16 x i16> %ret 285} 286 287define <16 x i16> @load_splat_16i16_16i16_0123012301230123(<16 x i16>* %ptr) nounwind uwtable readnone ssp { 288; SSE-LABEL: load_splat_16i16_16i16_0123012301230123: 289; SSE: # %bb.0: # %entry 290; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 291; SSE-NEXT: movdqa %xmm0, %xmm1 292; SSE-NEXT: retq 293; 294; AVX-LABEL: load_splat_16i16_16i16_0123012301230123: 295; AVX: # %bb.0: # %entry 296; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 297; AVX-NEXT: retq 298entry: 299 %ld = load <16 x i16>, <16 x i16>* %ptr 300 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 301 ret <16 x i16> %ret 302} 303 304define <16 x i8> @load_splat_16i8_16i8_0101010101010101(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 305; SSE-LABEL: load_splat_16i8_16i8_0101010101010101: 306; SSE: # %bb.0: # %entry 307; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7] 308; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 309; SSE-NEXT: retq 310; 311; AVX1-LABEL: load_splat_16i8_16i8_0101010101010101: 312; AVX1: # %bb.0: # %entry 313; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7] 314; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 315; AVX1-NEXT: retq 316; 317; AVX2-LABEL: load_splat_16i8_16i8_0101010101010101: 318; AVX2: # %bb.0: # %entry 319; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0 320; AVX2-NEXT: retq 321; 322; AVX512-LABEL: load_splat_16i8_16i8_0101010101010101: 323; AVX512: # %bb.0: # %entry 324; AVX512-NEXT: vpbroadcastw (%rdi), %xmm0 325; AVX512-NEXT: retq 326entry: 327 %ld = load <16 x i8>, <16 x i8>* %ptr 328 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 329 ret <16 x i8> %ret 330} 331 332define <16 x i8> @load_splat_16i8_16i8_0123012301230123(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 333; SSE-LABEL: load_splat_16i8_16i8_0123012301230123: 334; SSE: # %bb.0: # %entry 335; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 336; SSE-NEXT: retq 337; 338; AVX1-LABEL: load_splat_16i8_16i8_0123012301230123: 339; AVX1: # %bb.0: # %entry 340; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0] 341; AVX1-NEXT: retq 342; 343; AVX2-LABEL: load_splat_16i8_16i8_0123012301230123: 344; AVX2: # %bb.0: # %entry 345; AVX2-NEXT: vbroadcastss (%rdi), %xmm0 346; AVX2-NEXT: retq 347; 348; AVX512-LABEL: load_splat_16i8_16i8_0123012301230123: 349; AVX512: # %bb.0: # %entry 350; AVX512-NEXT: vbroadcastss (%rdi), %xmm0 351; AVX512-NEXT: retq 352entry: 353 %ld = load <16 x i8>, <16 x i8>* %ptr 354 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 355 ret <16 x i8> %ret 356} 357 358define <16 x i8> @load_splat_16i8_16i8_0123456701234567(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 359; SSE-LABEL: load_splat_16i8_16i8_0123456701234567: 360; SSE: # %bb.0: # %entry 361; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 362; SSE-NEXT: retq 363; 364; AVX1-LABEL: load_splat_16i8_16i8_0123456701234567: 365; AVX1: # %bb.0: # %entry 366; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] 367; AVX1-NEXT: retq 368; 369; AVX2-LABEL: load_splat_16i8_16i8_0123456701234567: 370; AVX2: # %bb.0: # %entry 371; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 372; AVX2-NEXT: retq 373; 374; AVX512-LABEL: load_splat_16i8_16i8_0123456701234567: 375; AVX512: # %bb.0: # %entry 376; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 377; AVX512-NEXT: retq 378entry: 379 %ld = load <16 x i8>, <16 x i8>* %ptr 380 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 381 ret <16 x i8> %ret 382} 383 384define <32 x i8> @load_splat_32i8_16i8_01010101010101010101010101010101(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 385; SSE-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101: 386; SSE: # %bb.0: # %entry 387; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7] 388; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 389; SSE-NEXT: movdqa %xmm0, %xmm1 390; SSE-NEXT: retq 391; 392; AVX1-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101: 393; AVX1: # %bb.0: # %entry 394; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7] 395; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 396; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 397; AVX1-NEXT: retq 398; 399; AVX2-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101: 400; AVX2: # %bb.0: # %entry 401; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 402; AVX2-NEXT: retq 403; 404; AVX512-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101: 405; AVX512: # %bb.0: # %entry 406; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0 407; AVX512-NEXT: retq 408entry: 409 %ld = load <16 x i8>, <16 x i8>* %ptr 410 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 411 ret <32 x i8> %ret 412} 413 414define <32 x i8> @load_splat_32i8_16i8_01230123012301230123012301230123(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 415; SSE-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123: 416; SSE: # %bb.0: # %entry 417; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 418; SSE-NEXT: movdqa %xmm0, %xmm1 419; SSE-NEXT: retq 420; 421; AVX1-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123: 422; AVX1: # %bb.0: # %entry 423; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0] 424; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 425; AVX1-NEXT: retq 426; 427; AVX2-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123: 428; AVX2: # %bb.0: # %entry 429; AVX2-NEXT: vbroadcastss (%rdi), %ymm0 430; AVX2-NEXT: retq 431; 432; AVX512-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123: 433; AVX512: # %bb.0: # %entry 434; AVX512-NEXT: vbroadcastss (%rdi), %ymm0 435; AVX512-NEXT: retq 436entry: 437 %ld = load <16 x i8>, <16 x i8>* %ptr 438 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 439 ret <32 x i8> %ret 440} 441 442define <32 x i8> @load_splat_32i8_16i8_01234567012345670123456701234567(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 443; SSE-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: 444; SSE: # %bb.0: # %entry 445; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 446; SSE-NEXT: movdqa %xmm0, %xmm1 447; SSE-NEXT: retq 448; 449; AVX-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: 450; AVX: # %bb.0: # %entry 451; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 452; AVX-NEXT: retq 453entry: 454 %ld = load <16 x i8>, <16 x i8>* %ptr 455 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 456 ret <32 x i8> %ret 457} 458 459define <32 x i8> @load_splat_32i8_32i8_01010101010101010101010101010101(<32 x i8>* %ptr) nounwind uwtable readnone ssp { 460; SSE-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101: 461; SSE: # %bb.0: # %entry 462; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7] 463; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 464; SSE-NEXT: movdqa %xmm0, %xmm1 465; SSE-NEXT: retq 466; 467; AVX1-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101: 468; AVX1: # %bb.0: # %entry 469; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,2,3,4,5,6,7] 470; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 471; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 472; AVX1-NEXT: retq 473; 474; AVX2-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101: 475; AVX2: # %bb.0: # %entry 476; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 477; AVX2-NEXT: retq 478; 479; AVX512-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101: 480; AVX512: # %bb.0: # %entry 481; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0 482; AVX512-NEXT: retq 483entry: 484 %ld = load <32 x i8>, <32 x i8>* %ptr 485 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 486 ret <32 x i8> %ret 487} 488 489define <32 x i8> @load_splat_32i8_32i8_01230123012301230123012301230123(<32 x i8>* %ptr) nounwind uwtable readnone ssp { 490; SSE-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123: 491; SSE: # %bb.0: # %entry 492; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 493; SSE-NEXT: movdqa %xmm0, %xmm1 494; SSE-NEXT: retq 495; 496; AVX-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123: 497; AVX: # %bb.0: # %entry 498; AVX-NEXT: vbroadcastss (%rdi), %ymm0 499; AVX-NEXT: retq 500entry: 501 %ld = load <32 x i8>, <32 x i8>* %ptr 502 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 503 ret <32 x i8> %ret 504} 505 506define <32 x i8> @load_splat_32i8_32i8_01234567012345670123456701234567(<32 x i8>* %ptr) nounwind uwtable readnone ssp { 507; SSE-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567: 508; SSE: # %bb.0: # %entry 509; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 510; SSE-NEXT: movdqa %xmm0, %xmm1 511; SSE-NEXT: retq 512; 513; AVX-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567: 514; AVX: # %bb.0: # %entry 515; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 516; AVX-NEXT: retq 517entry: 518 %ld = load <32 x i8>, <32 x i8>* %ptr 519 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 520 ret <32 x i8> %ret 521} 522 523define <4 x float> @load_splat_4f32_8f32_0000(<8 x float>* %ptr) nounwind uwtable readnone ssp { 524; SSE-LABEL: load_splat_4f32_8f32_0000: 525; SSE: # %bb.0: # %entry 526; SSE-NEXT: movaps (%rdi), %xmm0 527; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 528; SSE-NEXT: retq 529; 530; AVX-LABEL: load_splat_4f32_8f32_0000: 531; AVX: # %bb.0: # %entry 532; AVX-NEXT: vbroadcastss (%rdi), %xmm0 533; AVX-NEXT: retq 534entry: 535 %ld = load <8 x float>, <8 x float>* %ptr 536 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> zeroinitializer 537 ret <4 x float> %ret 538} 539 540define <8 x float> @load_splat_8f32_16f32_89898989(<16 x float>* %ptr) nounwind uwtable readnone ssp { 541; SSE2-LABEL: load_splat_8f32_16f32_89898989: 542; SSE2: # %bb.0: # %entry 543; SSE2-NEXT: movaps 32(%rdi), %xmm0 544; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 545; SSE2-NEXT: movaps %xmm0, %xmm1 546; SSE2-NEXT: retq 547; 548; SSE42-LABEL: load_splat_8f32_16f32_89898989: 549; SSE42: # %bb.0: # %entry 550; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 551; SSE42-NEXT: movapd %xmm0, %xmm1 552; SSE42-NEXT: retq 553; 554; AVX-LABEL: load_splat_8f32_16f32_89898989: 555; AVX: # %bb.0: # %entry 556; AVX-NEXT: vbroadcastsd 32(%rdi), %ymm0 557; AVX-NEXT: retq 558entry: 559 %ld = load <16 x float>, <16 x float>* %ptr 560 %ret = shufflevector <16 x float> %ld, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9> 561 ret <8 x float> %ret 562} 563 564; PR34394 565define <4 x i32> @load_splat_4i32_2i32_0101(<2 x i32>* %vp) { 566; SSE-LABEL: load_splat_4i32_2i32_0101: 567; SSE: # %bb.0: 568; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 569; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 570; SSE-NEXT: retq 571; 572; AVX1-LABEL: load_splat_4i32_2i32_0101: 573; AVX1: # %bb.0: 574; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 575; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] 576; AVX1-NEXT: retq 577; 578; AVX2-LABEL: load_splat_4i32_2i32_0101: 579; AVX2: # %bb.0: 580; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 581; AVX2-NEXT: retq 582; 583; AVX512-LABEL: load_splat_4i32_2i32_0101: 584; AVX512: # %bb.0: 585; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 586; AVX512-NEXT: retq 587 %vec = load <2 x i32>, <2 x i32>* %vp 588 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 589 ret <4 x i32> %res 590} 591 592define <8 x i32> @load_splat_8i32_2i32_0101(<2 x i32>* %vp) { 593; SSE-LABEL: load_splat_8i32_2i32_0101: 594; SSE: # %bb.0: 595; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 596; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 597; SSE-NEXT: movdqa %xmm0, %xmm1 598; SSE-NEXT: retq 599; 600; AVX1-LABEL: load_splat_8i32_2i32_0101: 601; AVX1: # %bb.0: 602; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 603; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] 604; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 605; AVX1-NEXT: retq 606; 607; AVX2-LABEL: load_splat_8i32_2i32_0101: 608; AVX2: # %bb.0: 609; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 610; AVX2-NEXT: retq 611; 612; AVX512-LABEL: load_splat_8i32_2i32_0101: 613; AVX512: # %bb.0: 614; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 615; AVX512-NEXT: retq 616 %vec = load <2 x i32>, <2 x i32>* %vp 617 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 618 ret <8 x i32> %res 619} 620 621define <16 x i32> @load_splat_16i32_2i32_0101(<2 x i32>* %vp) { 622; SSE-LABEL: load_splat_16i32_2i32_0101: 623; SSE: # %bb.0: 624; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 625; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 626; SSE-NEXT: movdqa %xmm0, %xmm1 627; SSE-NEXT: movdqa %xmm0, %xmm2 628; SSE-NEXT: movdqa %xmm0, %xmm3 629; SSE-NEXT: retq 630; 631; AVX1-LABEL: load_splat_16i32_2i32_0101: 632; AVX1: # %bb.0: 633; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 634; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] 635; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 636; AVX1-NEXT: vmovaps %ymm0, %ymm1 637; AVX1-NEXT: retq 638; 639; AVX2-LABEL: load_splat_16i32_2i32_0101: 640; AVX2: # %bb.0: 641; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 642; AVX2-NEXT: vmovaps %ymm0, %ymm1 643; AVX2-NEXT: retq 644; 645; AVX512-LABEL: load_splat_16i32_2i32_0101: 646; AVX512: # %bb.0: 647; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 648; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2] 649; AVX512-NEXT: vpermd %zmm0, %zmm1, %zmm0 650; AVX512-NEXT: retq 651 %vec = load <2 x i32>, <2 x i32>* %vp 652 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 653 ret <16 x i32> %res 654} 655