1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE-32 --check-prefix=SSE2-32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE-64 --check-prefix=SSE2-64 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE-32 --check-prefix=SSE41-32 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE-64 --check-prefix=SSE41-64 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-32 --check-prefix=AVX1-32 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-64 --check-prefix=AVX1-64 8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX-32 --check-prefix=AVX2-32 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX-64 --check-prefix=AVX2-64 10 11define <2 x double> @test_buildvector_v2f64(double %a0, double %a1) { 12; SSE-32-LABEL: test_buildvector_v2f64: 13; SSE-32: # %bb.0: 14; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 15; SSE-32-NEXT: retl 16; 17; SSE-64-LABEL: test_buildvector_v2f64: 18; SSE-64: # %bb.0: 19; SSE-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 20; SSE-64-NEXT: retq 21; 22; AVX-32-LABEL: test_buildvector_v2f64: 23; AVX-32: # %bb.0: 24; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 25; AVX-32-NEXT: retl 26; 27; AVX-64-LABEL: test_buildvector_v2f64: 28; AVX-64: # %bb.0: 29; AVX-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 30; AVX-64-NEXT: retq 31 %ins0 = insertelement <2 x double> undef, double %a0, i32 0 32 %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1 33 ret <2 x double> %ins1 34} 35 36define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, float %a3) { 37; SSE-32-LABEL: test_buildvector_v4f32: 38; SSE-32: # %bb.0: 39; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 40; SSE-32-NEXT: retl 41; 42; SSE2-64-LABEL: test_buildvector_v4f32: 43; SSE2-64: # %bb.0: 44; SSE2-64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 45; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 46; SSE2-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 47; SSE2-64-NEXT: retq 48; 49; SSE41-64-LABEL: test_buildvector_v4f32: 50; SSE41-64: # %bb.0: 51; SSE41-64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 52; SSE41-64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 53; SSE41-64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 54; SSE41-64-NEXT: retq 55; 56; AVX-32-LABEL: test_buildvector_v4f32: 57; AVX-32: # %bb.0: 58; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 59; AVX-32-NEXT: retl 60; 61; AVX-64-LABEL: test_buildvector_v4f32: 62; AVX-64: # %bb.0: 63; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 64; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 65; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 66; AVX-64-NEXT: retq 67 %ins0 = insertelement <4 x float> undef, float %a0, i32 0 68 %ins1 = insertelement <4 x float> %ins0, float %a1, i32 1 69 %ins2 = insertelement <4 x float> %ins1, float %a2, i32 2 70 %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3 71 ret <4 x float> %ins3 72} 73 74define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) { 75; SSE-32-LABEL: test_buildvector_v2i64: 76; SSE-32: # %bb.0: 77; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 78; SSE-32-NEXT: retl 79; 80; SSE-64-LABEL: test_buildvector_v2i64: 81; SSE-64: # %bb.0: 82; SSE-64-NEXT: movq %rsi, %xmm1 83; SSE-64-NEXT: movq %rdi, %xmm0 84; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 85; SSE-64-NEXT: retq 86; 87; AVX-32-LABEL: test_buildvector_v2i64: 88; AVX-32: # %bb.0: 89; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 90; AVX-32-NEXT: retl 91; 92; AVX-64-LABEL: test_buildvector_v2i64: 93; AVX-64: # %bb.0: 94; AVX-64-NEXT: vmovq %rsi, %xmm0 95; AVX-64-NEXT: vmovq %rdi, %xmm1 96; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 97; AVX-64-NEXT: retq 98 %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0 99 %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1 100 ret <2 x i64> %ins1 101} 102 103define <4 x i32> @test_buildvector_v4i32(i32 %f0, i32 %f1, i32 %f2, i32 %f3) { 104; SSE-32-LABEL: test_buildvector_v4i32: 105; SSE-32: # %bb.0: 106; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 107; SSE-32-NEXT: retl 108; 109; SSE2-64-LABEL: test_buildvector_v4i32: 110; SSE2-64: # %bb.0: 111; SSE2-64-NEXT: movd %ecx, %xmm0 112; SSE2-64-NEXT: movd %edx, %xmm1 113; SSE2-64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 114; SSE2-64-NEXT: movd %esi, %xmm2 115; SSE2-64-NEXT: movd %edi, %xmm0 116; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 117; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 118; SSE2-64-NEXT: retq 119; 120; SSE41-64-LABEL: test_buildvector_v4i32: 121; SSE41-64: # %bb.0: 122; SSE41-64-NEXT: movd %edi, %xmm0 123; SSE41-64-NEXT: pinsrd $1, %esi, %xmm0 124; SSE41-64-NEXT: pinsrd $2, %edx, %xmm0 125; SSE41-64-NEXT: pinsrd $3, %ecx, %xmm0 126; SSE41-64-NEXT: retq 127; 128; AVX-32-LABEL: test_buildvector_v4i32: 129; AVX-32: # %bb.0: 130; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 131; AVX-32-NEXT: retl 132; 133; AVX-64-LABEL: test_buildvector_v4i32: 134; AVX-64: # %bb.0: 135; AVX-64-NEXT: vmovd %edi, %xmm0 136; AVX-64-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 137; AVX-64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 138; AVX-64-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 139; AVX-64-NEXT: retq 140 %ins0 = insertelement <4 x i32> undef, i32 %f0, i32 0 141 %ins1 = insertelement <4 x i32> %ins0, i32 %f1, i32 1 142 %ins2 = insertelement <4 x i32> %ins1, i32 %f2, i32 2 143 %ins3 = insertelement <4 x i32> %ins2, i32 %f3, i32 3 144 ret <4 x i32> %ins3 145} 146 147define <8 x i16> @test_buildvector_v8i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) { 148; SSE2-32-LABEL: test_buildvector_v8i16: 149; SSE2-32: # %bb.0: 150; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 151; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 152; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 153; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 154; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 155; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 156; SSE2-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 157; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 158; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 159; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 160; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 161; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 162; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 163; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 164; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 165; SSE2-32-NEXT: retl 166; 167; SSE2-64-LABEL: test_buildvector_v8i16: 168; SSE2-64: # %bb.0: 169; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 170; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 171; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 172; SSE2-64-NEXT: movd %r9d, %xmm0 173; SSE2-64-NEXT: movd %r8d, %xmm2 174; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 175; SSE2-64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 176; SSE2-64-NEXT: movd %ecx, %xmm0 177; SSE2-64-NEXT: movd %edx, %xmm1 178; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 179; SSE2-64-NEXT: movd %esi, %xmm3 180; SSE2-64-NEXT: movd %edi, %xmm0 181; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 182; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 183; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 184; SSE2-64-NEXT: retq 185; 186; SSE41-32-LABEL: test_buildvector_v8i16: 187; SSE41-32: # %bb.0: 188; SSE41-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 189; SSE41-32-NEXT: pinsrw $1, {{[0-9]+}}(%esp), %xmm0 190; SSE41-32-NEXT: pinsrw $2, {{[0-9]+}}(%esp), %xmm0 191; SSE41-32-NEXT: pinsrw $3, {{[0-9]+}}(%esp), %xmm0 192; SSE41-32-NEXT: pinsrw $4, {{[0-9]+}}(%esp), %xmm0 193; SSE41-32-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0 194; SSE41-32-NEXT: pinsrw $6, {{[0-9]+}}(%esp), %xmm0 195; SSE41-32-NEXT: pinsrw $7, {{[0-9]+}}(%esp), %xmm0 196; SSE41-32-NEXT: retl 197; 198; SSE41-64-LABEL: test_buildvector_v8i16: 199; SSE41-64: # %bb.0: 200; SSE41-64-NEXT: movd %edi, %xmm0 201; SSE41-64-NEXT: pinsrw $1, %esi, %xmm0 202; SSE41-64-NEXT: pinsrw $2, %edx, %xmm0 203; SSE41-64-NEXT: pinsrw $3, %ecx, %xmm0 204; SSE41-64-NEXT: pinsrw $4, %r8d, %xmm0 205; SSE41-64-NEXT: pinsrw $5, %r9d, %xmm0 206; SSE41-64-NEXT: pinsrw $6, {{[0-9]+}}(%rsp), %xmm0 207; SSE41-64-NEXT: pinsrw $7, {{[0-9]+}}(%rsp), %xmm0 208; SSE41-64-NEXT: retq 209; 210; AVX-32-LABEL: test_buildvector_v8i16: 211; AVX-32: # %bb.0: 212; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 213; AVX-32-NEXT: vpinsrw $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 214; AVX-32-NEXT: vpinsrw $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 215; AVX-32-NEXT: vpinsrw $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 216; AVX-32-NEXT: vpinsrw $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 217; AVX-32-NEXT: vpinsrw $5, {{[0-9]+}}(%esp), %xmm0, %xmm0 218; AVX-32-NEXT: vpinsrw $6, {{[0-9]+}}(%esp), %xmm0, %xmm0 219; AVX-32-NEXT: vpinsrw $7, {{[0-9]+}}(%esp), %xmm0, %xmm0 220; AVX-32-NEXT: retl 221; 222; AVX-64-LABEL: test_buildvector_v8i16: 223; AVX-64: # %bb.0: 224; AVX-64-NEXT: vmovd %edi, %xmm0 225; AVX-64-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 226; AVX-64-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 227; AVX-64-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 228; AVX-64-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 229; AVX-64-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 230; AVX-64-NEXT: vpinsrw $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 231; AVX-64-NEXT: vpinsrw $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 232; AVX-64-NEXT: retq 233 %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 234 %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1 235 %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2 236 %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3 237 %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4 238 %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5 239 %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6 240 %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7 241 ret <8 x i16> %ins7 242} 243 244define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) { 245; SSE2-32-LABEL: test_buildvector_v16i8: 246; SSE2-32: # %bb.0: 247; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 248; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 249; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 250; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 251; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 252; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 253; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 254; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 255; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 256; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 257; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 258; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 259; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 260; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 261; SSE2-32-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 262; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 263; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 264; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 265; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 266; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 267; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 268; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 269; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 270; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 271; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 272; SSE2-32-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 273; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 274; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 275; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 276; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 277; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 278; SSE2-32-NEXT: retl 279; 280; SSE2-64-LABEL: test_buildvector_v16i8: 281; SSE2-64: # %bb.0: 282; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 283; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 284; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 285; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 286; SSE2-64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 287; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 288; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 289; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 290; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 291; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 292; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 293; SSE2-64-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 294; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 295; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 296; SSE2-64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 297; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 298; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 299; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 300; SSE2-64-NEXT: movd %r9d, %xmm0 301; SSE2-64-NEXT: movd %r8d, %xmm2 302; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 303; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 304; SSE2-64-NEXT: movd %ecx, %xmm0 305; SSE2-64-NEXT: movd %edx, %xmm1 306; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 307; SSE2-64-NEXT: movd %esi, %xmm4 308; SSE2-64-NEXT: movd %edi, %xmm0 309; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 310; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 311; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 312; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 313; SSE2-64-NEXT: retq 314; 315; SSE41-32-LABEL: test_buildvector_v16i8: 316; SSE41-32: # %bb.0: 317; SSE41-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 318; SSE41-32-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0 319; SSE41-32-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0 320; SSE41-32-NEXT: pinsrb $3, {{[0-9]+}}(%esp), %xmm0 321; SSE41-32-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 322; SSE41-32-NEXT: pinsrb $5, {{[0-9]+}}(%esp), %xmm0 323; SSE41-32-NEXT: pinsrb $6, {{[0-9]+}}(%esp), %xmm0 324; SSE41-32-NEXT: pinsrb $7, {{[0-9]+}}(%esp), %xmm0 325; SSE41-32-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 326; SSE41-32-NEXT: pinsrb $9, {{[0-9]+}}(%esp), %xmm0 327; SSE41-32-NEXT: pinsrb $10, {{[0-9]+}}(%esp), %xmm0 328; SSE41-32-NEXT: pinsrb $11, {{[0-9]+}}(%esp), %xmm0 329; SSE41-32-NEXT: pinsrb $12, {{[0-9]+}}(%esp), %xmm0 330; SSE41-32-NEXT: pinsrb $13, {{[0-9]+}}(%esp), %xmm0 331; SSE41-32-NEXT: pinsrb $14, {{[0-9]+}}(%esp), %xmm0 332; SSE41-32-NEXT: pinsrb $15, {{[0-9]+}}(%esp), %xmm0 333; SSE41-32-NEXT: retl 334; 335; SSE41-64-LABEL: test_buildvector_v16i8: 336; SSE41-64: # %bb.0: 337; SSE41-64-NEXT: movd %edi, %xmm0 338; SSE41-64-NEXT: pinsrb $1, %esi, %xmm0 339; SSE41-64-NEXT: pinsrb $2, %edx, %xmm0 340; SSE41-64-NEXT: pinsrb $3, %ecx, %xmm0 341; SSE41-64-NEXT: pinsrb $4, %r8d, %xmm0 342; SSE41-64-NEXT: pinsrb $5, %r9d, %xmm0 343; SSE41-64-NEXT: pinsrb $6, {{[0-9]+}}(%rsp), %xmm0 344; SSE41-64-NEXT: pinsrb $7, {{[0-9]+}}(%rsp), %xmm0 345; SSE41-64-NEXT: pinsrb $8, {{[0-9]+}}(%rsp), %xmm0 346; SSE41-64-NEXT: pinsrb $9, {{[0-9]+}}(%rsp), %xmm0 347; SSE41-64-NEXT: pinsrb $10, {{[0-9]+}}(%rsp), %xmm0 348; SSE41-64-NEXT: pinsrb $11, {{[0-9]+}}(%rsp), %xmm0 349; SSE41-64-NEXT: pinsrb $12, {{[0-9]+}}(%rsp), %xmm0 350; SSE41-64-NEXT: pinsrb $13, {{[0-9]+}}(%rsp), %xmm0 351; SSE41-64-NEXT: pinsrb $14, {{[0-9]+}}(%rsp), %xmm0 352; SSE41-64-NEXT: pinsrb $15, {{[0-9]+}}(%rsp), %xmm0 353; SSE41-64-NEXT: retq 354; 355; AVX-32-LABEL: test_buildvector_v16i8: 356; AVX-32: # %bb.0: 357; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 358; AVX-32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 359; AVX-32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 360; AVX-32-NEXT: vpinsrb $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 361; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 362; AVX-32-NEXT: vpinsrb $5, {{[0-9]+}}(%esp), %xmm0, %xmm0 363; AVX-32-NEXT: vpinsrb $6, {{[0-9]+}}(%esp), %xmm0, %xmm0 364; AVX-32-NEXT: vpinsrb $7, {{[0-9]+}}(%esp), %xmm0, %xmm0 365; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0 366; AVX-32-NEXT: vpinsrb $9, {{[0-9]+}}(%esp), %xmm0, %xmm0 367; AVX-32-NEXT: vpinsrb $10, {{[0-9]+}}(%esp), %xmm0, %xmm0 368; AVX-32-NEXT: vpinsrb $11, {{[0-9]+}}(%esp), %xmm0, %xmm0 369; AVX-32-NEXT: vpinsrb $12, {{[0-9]+}}(%esp), %xmm0, %xmm0 370; AVX-32-NEXT: vpinsrb $13, {{[0-9]+}}(%esp), %xmm0, %xmm0 371; AVX-32-NEXT: vpinsrb $14, {{[0-9]+}}(%esp), %xmm0, %xmm0 372; AVX-32-NEXT: vpinsrb $15, {{[0-9]+}}(%esp), %xmm0, %xmm0 373; AVX-32-NEXT: retl 374; 375; AVX-64-LABEL: test_buildvector_v16i8: 376; AVX-64: # %bb.0: 377; AVX-64-NEXT: vmovd %edi, %xmm0 378; AVX-64-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 379; AVX-64-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 380; AVX-64-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 381; AVX-64-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 382; AVX-64-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 383; AVX-64-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 384; AVX-64-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0 385; AVX-64-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0 386; AVX-64-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0 387; AVX-64-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0 388; AVX-64-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0 389; AVX-64-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0 390; AVX-64-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0 391; AVX-64-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0 392; AVX-64-NEXT: vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0 393; AVX-64-NEXT: retq 394 %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 395 %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 396 %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2 397 %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3 398 %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4 399 %ins5 = insertelement <16 x i8> %ins4, i8 %a5, i32 5 400 %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6 401 %ins7 = insertelement <16 x i8> %ins6, i8 %a7, i32 7 402 %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8 403 %ins9 = insertelement <16 x i8> %ins8, i8 %a9, i32 9 404 %ins10 = insertelement <16 x i8> %ins9, i8 %a10, i32 10 405 %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11 406 %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12 407 %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13 408 %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14 409 %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15 410 ret <16 x i8> %ins15 411} 412 413; PR30780 414 415define <4 x i32> @test_buildvector_v4i32_splat_sext_i8(i8 %in) { 416; SSE-32-LABEL: test_buildvector_v4i32_splat_sext_i8: 417; SSE-32: # %bb.0: 418; SSE-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax 419; SSE-32-NEXT: movd %eax, %xmm0 420; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 421; SSE-32-NEXT: retl 422; 423; SSE-64-LABEL: test_buildvector_v4i32_splat_sext_i8: 424; SSE-64: # %bb.0: 425; SSE-64-NEXT: movsbl %dil, %eax 426; SSE-64-NEXT: movd %eax, %xmm0 427; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 428; SSE-64-NEXT: retq 429; 430; AVX1-32-LABEL: test_buildvector_v4i32_splat_sext_i8: 431; AVX1-32: # %bb.0: 432; AVX1-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax 433; AVX1-32-NEXT: vmovd %eax, %xmm0 434; AVX1-32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 435; AVX1-32-NEXT: retl 436; 437; AVX1-64-LABEL: test_buildvector_v4i32_splat_sext_i8: 438; AVX1-64: # %bb.0: 439; AVX1-64-NEXT: movsbl %dil, %eax 440; AVX1-64-NEXT: vmovd %eax, %xmm0 441; AVX1-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 442; AVX1-64-NEXT: retq 443; 444; AVX2-32-LABEL: test_buildvector_v4i32_splat_sext_i8: 445; AVX2-32: # %bb.0: 446; AVX2-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax 447; AVX2-32-NEXT: vmovd %eax, %xmm0 448; AVX2-32-NEXT: vpbroadcastd %xmm0, %xmm0 449; AVX2-32-NEXT: retl 450; 451; AVX2-64-LABEL: test_buildvector_v4i32_splat_sext_i8: 452; AVX2-64: # %bb.0: 453; AVX2-64-NEXT: movsbl %dil, %eax 454; AVX2-64-NEXT: vmovd %eax, %xmm0 455; AVX2-64-NEXT: vpbroadcastd %xmm0, %xmm0 456; AVX2-64-NEXT: retq 457 %ext = sext i8 %in to i32 458 %insert = insertelement <4 x i32> undef, i32 %ext, i32 0 459 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer 460 ret <4 x i32> %splat 461} 462 463define <4 x i32> @test_buildvector_v4i32_splat_zext_i8(i8 %in) { 464; SSE-32-LABEL: test_buildvector_v4i32_splat_zext_i8: 465; SSE-32: # %bb.0: 466; SSE-32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 467; SSE-32-NEXT: movd %eax, %xmm0 468; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 469; SSE-32-NEXT: retl 470; 471; SSE-64-LABEL: test_buildvector_v4i32_splat_zext_i8: 472; SSE-64: # %bb.0: 473; SSE-64-NEXT: movzbl %dil, %eax 474; SSE-64-NEXT: movd %eax, %xmm0 475; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 476; SSE-64-NEXT: retq 477; 478; AVX1-32-LABEL: test_buildvector_v4i32_splat_zext_i8: 479; AVX1-32: # %bb.0: 480; AVX1-32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 481; AVX1-32-NEXT: vmovd %eax, %xmm0 482; AVX1-32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 483; AVX1-32-NEXT: retl 484; 485; AVX1-64-LABEL: test_buildvector_v4i32_splat_zext_i8: 486; AVX1-64: # %bb.0: 487; AVX1-64-NEXT: movzbl %dil, %eax 488; AVX1-64-NEXT: vmovd %eax, %xmm0 489; AVX1-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 490; AVX1-64-NEXT: retq 491; 492; AVX2-32-LABEL: test_buildvector_v4i32_splat_zext_i8: 493; AVX2-32: # %bb.0: 494; AVX2-32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 495; AVX2-32-NEXT: vmovd %eax, %xmm0 496; AVX2-32-NEXT: vpbroadcastd %xmm0, %xmm0 497; AVX2-32-NEXT: retl 498; 499; AVX2-64-LABEL: test_buildvector_v4i32_splat_zext_i8: 500; AVX2-64: # %bb.0: 501; AVX2-64-NEXT: movzbl %dil, %eax 502; AVX2-64-NEXT: vmovd %eax, %xmm0 503; AVX2-64-NEXT: vpbroadcastd %xmm0, %xmm0 504; AVX2-64-NEXT: retq 505 %ext = zext i8 %in to i32 506 %insert = insertelement <4 x i32> undef, i32 %ext, i32 0 507 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer 508 ret <4 x i32> %splat 509} 510 511; PR37502 - https://bugs.llvm.org/show_bug.cgi?id=37502 512; Don't use a series of insertps when movddup will do. 513 514define <4 x float> @PR37502(float %x, float %y) { 515; SSE2-32-LABEL: PR37502: 516; SSE2-32: # %bb.0: 517; SSE2-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 518; SSE2-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 519; SSE2-32-NEXT: retl 520; 521; SSE2-64-LABEL: PR37502: 522; SSE2-64: # %bb.0: 523; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 524; SSE2-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 525; SSE2-64-NEXT: retq 526; 527; SSE41-32-LABEL: PR37502: 528; SSE41-32: # %bb.0: 529; SSE41-32-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 530; SSE41-32-NEXT: retl 531; 532; SSE41-64-LABEL: PR37502: 533; SSE41-64: # %bb.0: 534; SSE41-64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 535; SSE41-64-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 536; SSE41-64-NEXT: retq 537; 538; AVX-32-LABEL: PR37502: 539; AVX-32: # %bb.0: 540; AVX-32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 541; AVX-32-NEXT: retl 542; 543; AVX-64-LABEL: PR37502: 544; AVX-64: # %bb.0: 545; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 546; AVX-64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 547; AVX-64-NEXT: retq 548 %i0 = insertelement <4 x float> undef, float %x, i32 0 549 %i1 = insertelement <4 x float> %i0, float %y, i32 1 550 %i2 = insertelement <4 x float> %i1, float %x, i32 2 551 %i3 = insertelement <4 x float> %i2, float %y, i32 3 552 ret <4 x float> %i3 553} 554 555