1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 4 5define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp { 6; X32-LABEL: A: 7; X32: ## BB#0: ## %entry 8; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9; X32-NEXT: movl (%eax), %ecx 10; X32-NEXT: movl 4(%eax), %eax 11; X32-NEXT: vmovd %ecx, %xmm0 12; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 13; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 14; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 15; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 16; X32-NEXT: retl 17; 18; X64-LABEL: A: 19; X64: ## BB#0: ## %entry 20; X64-NEXT: vbroadcastsd (%rdi), %ymm0 21; X64-NEXT: retq 22entry: 23 %q = load i64, i64* %ptr, align 8 24 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 25 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 26 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 27 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 28 ret <4 x i64> %vecinit6.i 29} 30 31define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp { 32; X32-LABEL: B: 33; X32: ## BB#0: ## %entry 34; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 35; X32-NEXT: vbroadcastss (%eax), %ymm0 36; X32-NEXT: retl 37; 38; X64-LABEL: B: 39; X64: ## BB#0: ## %entry 40; X64-NEXT: vbroadcastss (%rdi), %ymm0 41; X64-NEXT: retq 42entry: 43 %q = load i32, i32* %ptr, align 4 44 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 45 %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1 46 %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2 47 %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3 48 ret <8 x i32> %vecinit6.i 49} 50 51define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp { 52; X32-LABEL: C: 53; X32: ## BB#0: ## %entry 54; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 55; X32-NEXT: vbroadcastsd (%eax), %ymm0 56; X32-NEXT: retl 57; 58; X64-LABEL: C: 59; X64: ## BB#0: ## %entry 60; X64-NEXT: vbroadcastsd (%rdi), %ymm0 61; X64-NEXT: retq 62entry: 63 %q = load double, double* %ptr, align 8 64 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 65 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 66 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 67 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 68 ret <4 x double> %vecinit6.i 69} 70 71define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp { 72; X32-LABEL: D: 73; X32: ## BB#0: ## %entry 74; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 75; X32-NEXT: vbroadcastss (%eax), %ymm0 76; X32-NEXT: retl 77; 78; X64-LABEL: D: 79; X64: ## BB#0: ## %entry 80; X64-NEXT: vbroadcastss (%rdi), %ymm0 81; X64-NEXT: retq 82entry: 83 %q = load float, float* %ptr, align 4 84 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 85 %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1 86 %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2 87 %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3 88 ret <8 x float> %vecinit6.i 89} 90 91;;;; 128-bit versions 92 93define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp { 94; X32-LABEL: e: 95; X32: ## BB#0: ## %entry 96; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 97; X32-NEXT: vbroadcastss (%eax), %xmm0 98; X32-NEXT: retl 99; 100; X64-LABEL: e: 101; X64: ## BB#0: ## %entry 102; X64-NEXT: vbroadcastss (%rdi), %xmm0 103; X64-NEXT: retq 104entry: 105 %q = load float, float* %ptr, align 4 106 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 107 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 108 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 109 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 110 ret <4 x float> %vecinit6.i 111} 112 113; Don't broadcast constants on pre-AVX2 hardware. 114define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { 115; X32-LABEL: _e2: 116; X32: ## BB#0: ## %entry 117; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03] 118; X32-NEXT: retl 119; 120; X64-LABEL: _e2: 121; X64: ## BB#0: ## %entry 122; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03] 123; X64-NEXT: retq 124entry: 125 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 126 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1 127 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2 128 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3 129 ret <4 x float> %vecinit6.i 130} 131 132 133define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp { 134; X32-LABEL: F: 135; X32: ## BB#0: ## %entry 136; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 137; X32-NEXT: vbroadcastss (%eax), %xmm0 138; X32-NEXT: retl 139; 140; X64-LABEL: F: 141; X64: ## BB#0: ## %entry 142; X64-NEXT: vbroadcastss (%rdi), %xmm0 143; X64-NEXT: retq 144entry: 145 %q = load i32, i32* %ptr, align 4 146 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 147 %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1 148 %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2 149 %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3 150 ret <4 x i32> %vecinit6.i 151} 152 153; FIXME: Pointer adjusted broadcasts 154 155define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 156; X32-LABEL: load_splat_4i32_4i32_1111: 157; X32: ## BB#0: ## %entry 158; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 159; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1] 160; X32-NEXT: retl 161; 162; X64-LABEL: load_splat_4i32_4i32_1111: 163; X64: ## BB#0: ## %entry 164; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1] 165; X64-NEXT: retq 166entry: 167 %ld = load <4 x i32>, <4 x i32>* %ptr 168 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 169 ret <4 x i32> %ret 170} 171 172define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 173; X32-LABEL: load_splat_8i32_4i32_33333333: 174; X32: ## BB#0: ## %entry 175; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 176; X32-NEXT: vbroadcastss 12(%eax), %ymm0 177; X32-NEXT: retl 178; 179; X64-LABEL: load_splat_8i32_4i32_33333333: 180; X64: ## BB#0: ## %entry 181; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 182; X64-NEXT: retq 183entry: 184 %ld = load <4 x i32>, <4 x i32>* %ptr 185 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 186 ret <8 x i32> %ret 187} 188 189define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { 190; X32-LABEL: load_splat_8i32_8i32_55555555: 191; X32: ## BB#0: ## %entry 192; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 193; X32-NEXT: vbroadcastss 20(%eax), %ymm0 194; X32-NEXT: retl 195; 196; X64-LABEL: load_splat_8i32_8i32_55555555: 197; X64: ## BB#0: ## %entry 198; X64-NEXT: vbroadcastss 20(%rdi), %ymm0 199; X64-NEXT: retq 200entry: 201 %ld = load <8 x i32>, <8 x i32>* %ptr 202 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 203 ret <8 x i32> %ret 204} 205 206define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { 207; X32-LABEL: load_splat_4f32_4f32_1111: 208; X32: ## BB#0: ## %entry 209; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 210; X32-NEXT: vbroadcastss 4(%eax), %xmm0 211; X32-NEXT: retl 212; 213; X64-LABEL: load_splat_4f32_4f32_1111: 214; X64: ## BB#0: ## %entry 215; X64-NEXT: vbroadcastss 4(%rdi), %xmm0 216; X64-NEXT: retq 217entry: 218 %ld = load <4 x float>, <4 x float>* %ptr 219 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 220 ret <4 x float> %ret 221} 222 223define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { 224; X32-LABEL: load_splat_8f32_4f32_33333333: 225; X32: ## BB#0: ## %entry 226; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 227; X32-NEXT: vbroadcastss 12(%eax), %ymm0 228; X32-NEXT: retl 229; 230; X64-LABEL: load_splat_8f32_4f32_33333333: 231; X64: ## BB#0: ## %entry 232; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 233; X64-NEXT: retq 234entry: 235 %ld = load <4 x float>, <4 x float>* %ptr 236 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 237 ret <8 x float> %ret 238} 239 240define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { 241; X32-LABEL: load_splat_8f32_8f32_55555555: 242; X32: ## BB#0: ## %entry 243; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 244; X32-NEXT: vbroadcastss 20(%eax), %ymm0 245; X32-NEXT: retl 246; 247; X64-LABEL: load_splat_8f32_8f32_55555555: 248; X64: ## BB#0: ## %entry 249; X64-NEXT: vbroadcastss 20(%rdi), %ymm0 250; X64-NEXT: retq 251entry: 252 %ld = load <8 x float>, <8 x float>* %ptr 253 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 254 ret <8 x float> %ret 255} 256 257define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 258; X32-LABEL: load_splat_2i64_2i64_1111: 259; X32: ## BB#0: ## %entry 260; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 261; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3] 262; X32-NEXT: retl 263; 264; X64-LABEL: load_splat_2i64_2i64_1111: 265; X64: ## BB#0: ## %entry 266; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3] 267; X64-NEXT: retq 268entry: 269 %ld = load <2 x i64>, <2 x i64>* %ptr 270 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 271 ret <2 x i64> %ret 272} 273 274define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 275; X32-LABEL: load_splat_4i64_2i64_1111: 276; X32: ## BB#0: ## %entry 277; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 278; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 279; X32-NEXT: retl 280; 281; X64-LABEL: load_splat_4i64_2i64_1111: 282; X64: ## BB#0: ## %entry 283; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 284; X64-NEXT: retq 285entry: 286 %ld = load <2 x i64>, <2 x i64>* %ptr 287 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 288 ret <4 x i64> %ret 289} 290 291define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { 292; X32-LABEL: load_splat_4i64_4i64_2222: 293; X32: ## BB#0: ## %entry 294; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 295; X32-NEXT: vbroadcastsd 16(%eax), %ymm0 296; X32-NEXT: retl 297; 298; X64-LABEL: load_splat_4i64_4i64_2222: 299; X64: ## BB#0: ## %entry 300; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 301; X64-NEXT: retq 302entry: 303 %ld = load <4 x i64>, <4 x i64>* %ptr 304 %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 305 ret <4 x i64> %ret 306} 307 308define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 309; X32-LABEL: load_splat_2f64_2f64_1111: 310; X32: ## BB#0: ## %entry 311; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 312; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 313; X32-NEXT: retl 314; 315; X64-LABEL: load_splat_2f64_2f64_1111: 316; X64: ## BB#0: ## %entry 317; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 318; X64-NEXT: retq 319entry: 320 %ld = load <2 x double>, <2 x double>* %ptr 321 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1> 322 ret <2 x double> %ret 323} 324 325define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 326; X32-LABEL: load_splat_4f64_2f64_1111: 327; X32: ## BB#0: ## %entry 328; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 329; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 330; X32-NEXT: retl 331; 332; X64-LABEL: load_splat_4f64_2f64_1111: 333; X64: ## BB#0: ## %entry 334; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 335; X64-NEXT: retq 336entry: 337 %ld = load <2 x double>, <2 x double>* %ptr 338 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 339 ret <4 x double> %ret 340} 341 342define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { 343; X32-LABEL: load_splat_4f64_4f64_2222: 344; X32: ## BB#0: ## %entry 345; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 346; X32-NEXT: vbroadcastsd 16(%eax), %ymm0 347; X32-NEXT: retl 348; 349; X64-LABEL: load_splat_4f64_4f64_2222: 350; X64: ## BB#0: ## %entry 351; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 352; X64-NEXT: retq 353entry: 354 %ld = load <4 x double>, <4 x double>* %ptr 355 %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 356 ret <4 x double> %ret 357} 358 359; Unsupported vbroadcasts 360 361define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp { 362; X32-LABEL: G: 363; X32: ## BB#0: ## %entry 364; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 365; X32-NEXT: movl (%eax), %ecx 366; X32-NEXT: movl 4(%eax), %eax 367; X32-NEXT: vmovd %ecx, %xmm0 368; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 369; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 370; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 371; X32-NEXT: retl 372; 373; X64-LABEL: G: 374; X64: ## BB#0: ## %entry 375; X64-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 376; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 377; X64-NEXT: retq 378entry: 379 %q = load i64, i64* %ptr, align 8 380 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 381 %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1 382 ret <2 x i64> %vecinit2.i 383} 384 385define <4 x i32> @H(<4 x i32> %a) { 386; X32-LABEL: H: 387; X32: ## BB#0: ## %entry 388; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 389; X32-NEXT: retl 390; 391; X64-LABEL: H: 392; X64: ## BB#0: ## %entry 393; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 394; X64-NEXT: retq 395entry: 396 %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 397 ret <4 x i32> %x 398} 399 400define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { 401; X32-LABEL: I: 402; X32: ## BB#0: ## %entry 403; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 404; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 405; X32-NEXT: retl 406; 407; X64-LABEL: I: 408; X64: ## BB#0: ## %entry 409; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 410; X64-NEXT: retq 411entry: 412 %q = load double, double* %ptr, align 4 413 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 414 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 415 ret <2 x double> %vecinit2.i 416} 417 418define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 419; X32-LABEL: _RR: 420; X32: ## BB#0: ## %entry 421; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 422; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 423; X32-NEXT: vbroadcastss (%ecx), %xmm0 424; X32-NEXT: movl (%eax), %eax 425; X32-NEXT: movl %eax, (%eax) 426; X32-NEXT: retl 427; 428; X64-LABEL: _RR: 429; X64: ## BB#0: ## %entry 430; X64-NEXT: vbroadcastss (%rdi), %xmm0 431; X64-NEXT: movl (%rsi), %eax 432; X64-NEXT: movl %eax, (%rax) 433; X64-NEXT: retq 434entry: 435 %q = load float, float* %ptr, align 4 436 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 437 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 438 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 439 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 440 ; force a chain 441 %j = load i32, i32* %k, align 4 442 store i32 %j, i32* undef 443 ret <4 x float> %vecinit6.i 444} 445 446define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 447; X32-LABEL: _RR2: 448; X32: ## BB#0: ## %entry 449; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 450; X32-NEXT: vbroadcastss (%eax), %xmm0 451; X32-NEXT: retl 452; 453; X64-LABEL: _RR2: 454; X64: ## BB#0: ## %entry 455; X64-NEXT: vbroadcastss (%rdi), %xmm0 456; X64-NEXT: retq 457entry: 458 %q = load float, float* %ptr, align 4 459 %v = insertelement <4 x float> undef, float %q, i32 0 460 %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 461 ret <4 x float> %t 462} 463 464; These tests check that a vbroadcast instruction is used when we have a splat 465; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs 466; (via the insertelements). 467 468define <8 x float> @splat_concat1(float* %p) { 469; X32-LABEL: splat_concat1: 470; X32: ## BB#0: 471; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 472; X32-NEXT: vbroadcastss (%eax), %ymm0 473; X32-NEXT: retl 474; 475; X64-LABEL: splat_concat1: 476; X64: ## BB#0: 477; X64-NEXT: vbroadcastss (%rdi), %ymm0 478; X64-NEXT: retq 479 %1 = load float, float* %p, align 4 480 %2 = insertelement <4 x float> undef, float %1, i32 0 481 %3 = insertelement <4 x float> %2, float %1, i32 1 482 %4 = insertelement <4 x float> %3, float %1, i32 2 483 %5 = insertelement <4 x float> %4, float %1, i32 3 484 %6 = shufflevector <4 x float> %5, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 485 ret <8 x float> %6 486} 487 488define <8 x float> @splat_concat2(float* %p) { 489; X32-LABEL: splat_concat2: 490; X32: ## BB#0: 491; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 492; X32-NEXT: vbroadcastss (%eax), %ymm0 493; X32-NEXT: retl 494; 495; X64-LABEL: splat_concat2: 496; X64: ## BB#0: 497; X64-NEXT: vbroadcastss (%rdi), %ymm0 498; X64-NEXT: retq 499 %1 = load float, float* %p, align 4 500 %2 = insertelement <4 x float> undef, float %1, i32 0 501 %3 = insertelement <4 x float> %2, float %1, i32 1 502 %4 = insertelement <4 x float> %3, float %1, i32 2 503 %5 = insertelement <4 x float> %4, float %1, i32 3 504 %6 = insertelement <4 x float> undef, float %1, i32 0 505 %7 = insertelement <4 x float> %6, float %1, i32 1 506 %8 = insertelement <4 x float> %7, float %1, i32 2 507 %9 = insertelement <4 x float> %8, float %1, i32 3 508 %10 = shufflevector <4 x float> %5, <4 x float> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 509 ret <8 x float> %10 510} 511 512define <4 x double> @splat_concat3(double* %p) { 513; X32-LABEL: splat_concat3: 514; X32: ## BB#0: 515; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 516; X32-NEXT: vbroadcastsd (%eax), %ymm0 517; X32-NEXT: retl 518; 519; X64-LABEL: splat_concat3: 520; X64: ## BB#0: 521; X64-NEXT: vbroadcastsd (%rdi), %ymm0 522; X64-NEXT: retq 523 %1 = load double, double* %p, align 8 524 %2 = insertelement <2 x double> undef, double %1, i32 0 525 %3 = insertelement <2 x double> %2, double %1, i32 1 526 %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 527 ret <4 x double> %4 528} 529 530define <4 x double> @splat_concat4(double* %p) { 531; X32-LABEL: splat_concat4: 532; X32: ## BB#0: 533; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 534; X32-NEXT: vbroadcastsd (%eax), %ymm0 535; X32-NEXT: retl 536; 537; X64-LABEL: splat_concat4: 538; X64: ## BB#0: 539; X64-NEXT: vbroadcastsd (%rdi), %ymm0 540; X64-NEXT: retq 541 %1 = load double, double* %p, align 8 542 %2 = insertelement <2 x double> undef, double %1, i32 0 543 %3 = insertelement <2 x double> %2, double %1, i32 1 544 %4 = insertelement <2 x double> undef, double %1, i32 0 545 %5 = insertelement <2 x double> %2, double %1, i32 1 546 %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 547 ret <4 x double> %6 548} 549