1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST 9 10define <2 x double> @insert_v2f64_z1(<2 x double> %a) { 11; SSE2-LABEL: insert_v2f64_z1: 12; SSE2: # %bb.0: 13; SSE2-NEXT: xorpd %xmm1, %xmm1 14; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 15; SSE2-NEXT: retq 16; 17; SSE3-LABEL: insert_v2f64_z1: 18; SSE3: # %bb.0: 19; SSE3-NEXT: xorpd %xmm1, %xmm1 20; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 21; SSE3-NEXT: retq 22; 23; SSSE3-LABEL: insert_v2f64_z1: 24; SSSE3: # %bb.0: 25; SSSE3-NEXT: xorpd %xmm1, %xmm1 26; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 27; SSSE3-NEXT: retq 28; 29; SSE41-LABEL: insert_v2f64_z1: 30; SSE41: # %bb.0: 31; SSE41-NEXT: xorps %xmm1, %xmm1 32; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 33; SSE41-NEXT: retq 34; 35; AVX-LABEL: insert_v2f64_z1: 36; AVX: # %bb.0: 37; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 38; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 39; AVX-NEXT: retq 40 %1 = insertelement <2 x double> %a, double 0.0, i32 0 41 ret <2 x double> %1 42} 43 44define <4 x double> @insert_v4f64_0zz3(<4 x double> %a) { 45; SSE2-LABEL: insert_v4f64_0zz3: 46; SSE2: # %bb.0: 47; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 48; SSE2-NEXT: xorpd %xmm2, %xmm2 49; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 50; SSE2-NEXT: retq 51; 52; SSE3-LABEL: insert_v4f64_0zz3: 53; SSE3: # %bb.0: 54; SSE3-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 55; SSE3-NEXT: xorpd %xmm2, %xmm2 56; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 57; SSE3-NEXT: retq 58; 59; SSSE3-LABEL: insert_v4f64_0zz3: 60; SSSE3: # %bb.0: 61; SSSE3-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 62; SSSE3-NEXT: xorpd %xmm2, %xmm2 63; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 64; SSSE3-NEXT: retq 65; 66; SSE41-LABEL: insert_v4f64_0zz3: 67; SSE41: # %bb.0: 68; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 69; SSE41-NEXT: xorps %xmm2, %xmm2 70; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] 71; SSE41-NEXT: retq 72; 73; AVX-LABEL: insert_v4f64_0zz3: 74; AVX: # %bb.0: 75; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 76; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 77; AVX-NEXT: retq 78 %1 = insertelement <4 x double> %a, double 0.0, i32 1 79 %2 = insertelement <4 x double> %1, double 0.0, i32 2 80 ret <4 x double> %2 81} 82 83define <2 x i64> @insert_v2i64_z1(<2 x i64> %a) { 84; SSE2-LABEL: insert_v2i64_z1: 85; SSE2: # %bb.0: 86; SSE2-NEXT: xorpd %xmm1, %xmm1 87; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 88; SSE2-NEXT: retq 89; 90; SSE3-LABEL: insert_v2i64_z1: 91; SSE3: # %bb.0: 92; SSE3-NEXT: xorpd %xmm1, %xmm1 93; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 94; SSE3-NEXT: retq 95; 96; SSSE3-LABEL: insert_v2i64_z1: 97; SSSE3: # %bb.0: 98; SSSE3-NEXT: xorpd %xmm1, %xmm1 99; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 100; SSSE3-NEXT: retq 101; 102; SSE41-LABEL: insert_v2i64_z1: 103; SSE41: # %bb.0: 104; SSE41-NEXT: xorps %xmm1, %xmm1 105; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 106; SSE41-NEXT: retq 107; 108; AVX-LABEL: insert_v2i64_z1: 109; AVX: # %bb.0: 110; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 111; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 112; AVX-NEXT: retq 113 %1 = insertelement <2 x i64> %a, i64 0, i32 0 114 ret <2 x i64> %1 115} 116 117define <4 x i64> @insert_v4i64_01z3(<4 x i64> %a) { 118; SSE2-LABEL: insert_v4i64_01z3: 119; SSE2: # %bb.0: 120; SSE2-NEXT: xorpd %xmm2, %xmm2 121; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 122; SSE2-NEXT: retq 123; 124; SSE3-LABEL: insert_v4i64_01z3: 125; SSE3: # %bb.0: 126; SSE3-NEXT: xorpd %xmm2, %xmm2 127; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 128; SSE3-NEXT: retq 129; 130; SSSE3-LABEL: insert_v4i64_01z3: 131; SSSE3: # %bb.0: 132; SSSE3-NEXT: xorpd %xmm2, %xmm2 133; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 134; SSSE3-NEXT: retq 135; 136; SSE41-LABEL: insert_v4i64_01z3: 137; SSE41: # %bb.0: 138; SSE41-NEXT: xorps %xmm2, %xmm2 139; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] 140; SSE41-NEXT: retq 141; 142; AVX-LABEL: insert_v4i64_01z3: 143; AVX: # %bb.0: 144; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 145; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 146; AVX-NEXT: retq 147 %1 = insertelement <4 x i64> %a, i64 0, i32 2 148 ret <4 x i64> %1 149} 150 151define <4 x float> @insert_v4f32_01z3(<4 x float> %a) { 152; SSE2-LABEL: insert_v4f32_01z3: 153; SSE2: # %bb.0: 154; SSE2-NEXT: xorps %xmm1, %xmm1 155; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 156; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 157; SSE2-NEXT: retq 158; 159; SSE3-LABEL: insert_v4f32_01z3: 160; SSE3: # %bb.0: 161; SSE3-NEXT: xorps %xmm1, %xmm1 162; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 163; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 164; SSE3-NEXT: retq 165; 166; SSSE3-LABEL: insert_v4f32_01z3: 167; SSSE3: # %bb.0: 168; SSSE3-NEXT: xorps %xmm1, %xmm1 169; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 170; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 171; SSSE3-NEXT: retq 172; 173; SSE41-LABEL: insert_v4f32_01z3: 174; SSE41: # %bb.0: 175; SSE41-NEXT: xorps %xmm1, %xmm1 176; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 177; SSE41-NEXT: retq 178; 179; AVX-LABEL: insert_v4f32_01z3: 180; AVX: # %bb.0: 181; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 182; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 183; AVX-NEXT: retq 184 %1 = insertelement <4 x float> %a, float 0.0, i32 2 185 ret <4 x float> %1 186} 187 188define <8 x float> @insert_v8f32_z12345z7(<8 x float> %a) { 189; SSE2-LABEL: insert_v8f32_z12345z7: 190; SSE2: # %bb.0: 191; SSE2-NEXT: xorps %xmm2, %xmm2 192; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 193; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 194; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 195; SSE2-NEXT: retq 196; 197; SSE3-LABEL: insert_v8f32_z12345z7: 198; SSE3: # %bb.0: 199; SSE3-NEXT: xorps %xmm2, %xmm2 200; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 201; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 202; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 203; SSE3-NEXT: retq 204; 205; SSSE3-LABEL: insert_v8f32_z12345z7: 206; SSSE3: # %bb.0: 207; SSSE3-NEXT: xorps %xmm2, %xmm2 208; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 209; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 210; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 211; SSSE3-NEXT: retq 212; 213; SSE41-LABEL: insert_v8f32_z12345z7: 214; SSE41: # %bb.0: 215; SSE41-NEXT: xorps %xmm2, %xmm2 216; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 217; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3] 218; SSE41-NEXT: retq 219; 220; AVX-LABEL: insert_v8f32_z12345z7: 221; AVX: # %bb.0: 222; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 223; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7] 224; AVX-NEXT: retq 225 %1 = insertelement <8 x float> %a, float 0.0, i32 0 226 %2 = insertelement <8 x float> %1, float 0.0, i32 6 227 ret <8 x float> %2 228} 229 230define <4 x i32> @insert_v4i32_01z3(<4 x i32> %a) { 231; SSE2-LABEL: insert_v4i32_01z3: 232; SSE2: # %bb.0: 233; SSE2-NEXT: xorps %xmm1, %xmm1 234; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 235; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 236; SSE2-NEXT: retq 237; 238; SSE3-LABEL: insert_v4i32_01z3: 239; SSE3: # %bb.0: 240; SSE3-NEXT: xorps %xmm1, %xmm1 241; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 242; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 243; SSE3-NEXT: retq 244; 245; SSSE3-LABEL: insert_v4i32_01z3: 246; SSSE3: # %bb.0: 247; SSSE3-NEXT: xorps %xmm1, %xmm1 248; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 249; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 250; SSSE3-NEXT: retq 251; 252; SSE41-LABEL: insert_v4i32_01z3: 253; SSE41: # %bb.0: 254; SSE41-NEXT: xorps %xmm1, %xmm1 255; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 256; SSE41-NEXT: retq 257; 258; AVX-LABEL: insert_v4i32_01z3: 259; AVX: # %bb.0: 260; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 261; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 262; AVX-NEXT: retq 263 %1 = insertelement <4 x i32> %a, i32 0, i32 2 264 ret <4 x i32> %1 265} 266 267define <8 x i32> @insert_v8i32_z12345z7(<8 x i32> %a) { 268; SSE2-LABEL: insert_v8i32_z12345z7: 269; SSE2: # %bb.0: 270; SSE2-NEXT: xorps %xmm2, %xmm2 271; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 272; SSE2-NEXT: xorps %xmm2, %xmm2 273; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 274; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 275; SSE2-NEXT: retq 276; 277; SSE3-LABEL: insert_v8i32_z12345z7: 278; SSE3: # %bb.0: 279; SSE3-NEXT: xorps %xmm2, %xmm2 280; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 281; SSE3-NEXT: xorps %xmm2, %xmm2 282; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 283; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 284; SSE3-NEXT: retq 285; 286; SSSE3-LABEL: insert_v8i32_z12345z7: 287; SSSE3: # %bb.0: 288; SSSE3-NEXT: xorps %xmm2, %xmm2 289; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 290; SSSE3-NEXT: xorps %xmm2, %xmm2 291; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 292; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 293; SSSE3-NEXT: retq 294; 295; SSE41-LABEL: insert_v8i32_z12345z7: 296; SSE41: # %bb.0: 297; SSE41-NEXT: xorps %xmm2, %xmm2 298; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 299; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3] 300; SSE41-NEXT: retq 301; 302; AVX-LABEL: insert_v8i32_z12345z7: 303; AVX: # %bb.0: 304; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 305; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7] 306; AVX-NEXT: retq 307 %1 = insertelement <8 x i32> %a, i32 0, i32 0 308 %2 = insertelement <8 x i32> %1, i32 0, i32 6 309 ret <8 x i32> %2 310} 311 312define <8 x i16> @insert_v8i16_z12345z7(<8 x i16> %a) { 313; SSE2-LABEL: insert_v8i16_z12345z7: 314; SSE2: # %bb.0: 315; SSE2-NEXT: xorl %eax, %eax 316; SSE2-NEXT: pinsrw $0, %eax, %xmm0 317; SSE2-NEXT: pinsrw $6, %eax, %xmm0 318; SSE2-NEXT: retq 319; 320; SSE3-LABEL: insert_v8i16_z12345z7: 321; SSE3: # %bb.0: 322; SSE3-NEXT: xorl %eax, %eax 323; SSE3-NEXT: pinsrw $0, %eax, %xmm0 324; SSE3-NEXT: pinsrw $6, %eax, %xmm0 325; SSE3-NEXT: retq 326; 327; SSSE3-LABEL: insert_v8i16_z12345z7: 328; SSSE3: # %bb.0: 329; SSSE3-NEXT: xorl %eax, %eax 330; SSSE3-NEXT: pinsrw $0, %eax, %xmm0 331; SSSE3-NEXT: pinsrw $6, %eax, %xmm0 332; SSSE3-NEXT: retq 333; 334; SSE41-LABEL: insert_v8i16_z12345z7: 335; SSE41: # %bb.0: 336; SSE41-NEXT: pxor %xmm1, %xmm1 337; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] 338; SSE41-NEXT: retq 339; 340; AVX-LABEL: insert_v8i16_z12345z7: 341; AVX: # %bb.0: 342; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 343; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] 344; AVX-NEXT: retq 345 %1 = insertelement <8 x i16> %a, i16 0, i32 0 346 %2 = insertelement <8 x i16> %1, i16 0, i32 6 347 ret <8 x i16> %2 348} 349 350define <16 x i16> @insert_v16i16_z12345z789ABCDEz(<16 x i16> %a) { 351; SSE2-LABEL: insert_v16i16_z12345z789ABCDEz: 352; SSE2: # %bb.0: 353; SSE2-NEXT: xorl %eax, %eax 354; SSE2-NEXT: pinsrw $0, %eax, %xmm0 355; SSE2-NEXT: pinsrw $6, %eax, %xmm0 356; SSE2-NEXT: pinsrw $7, %eax, %xmm1 357; SSE2-NEXT: retq 358; 359; SSE3-LABEL: insert_v16i16_z12345z789ABCDEz: 360; SSE3: # %bb.0: 361; SSE3-NEXT: xorl %eax, %eax 362; SSE3-NEXT: pinsrw $0, %eax, %xmm0 363; SSE3-NEXT: pinsrw $6, %eax, %xmm0 364; SSE3-NEXT: pinsrw $7, %eax, %xmm1 365; SSE3-NEXT: retq 366; 367; SSSE3-LABEL: insert_v16i16_z12345z789ABCDEz: 368; SSSE3: # %bb.0: 369; SSSE3-NEXT: xorl %eax, %eax 370; SSSE3-NEXT: pinsrw $0, %eax, %xmm0 371; SSSE3-NEXT: pinsrw $6, %eax, %xmm0 372; SSSE3-NEXT: pinsrw $7, %eax, %xmm1 373; SSSE3-NEXT: retq 374; 375; SSE41-LABEL: insert_v16i16_z12345z789ABCDEz: 376; SSE41: # %bb.0: 377; SSE41-NEXT: pxor %xmm2, %xmm2 378; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7] 379; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7] 380; SSE41-NEXT: retq 381; 382; AVX-LABEL: insert_v16i16_z12345z789ABCDEz: 383; AVX: # %bb.0: 384; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 385; AVX-NEXT: retq 386 %1 = insertelement <16 x i16> %a, i16 0, i32 0 387 %2 = insertelement <16 x i16> %1, i16 0, i32 6 388 %3 = insertelement <16 x i16> %2, i16 0, i32 15 389 ret <16 x i16> %3 390} 391 392define <16 x i8> @insert_v16i8_z123456789ABCDEz(<16 x i8> %a) { 393; SSE2-LABEL: insert_v16i8_z123456789ABCDEz: 394; SSE2: # %bb.0: 395; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 396; SSE2-NEXT: retq 397; 398; SSE3-LABEL: insert_v16i8_z123456789ABCDEz: 399; SSE3: # %bb.0: 400; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 401; SSE3-NEXT: retq 402; 403; SSSE3-LABEL: insert_v16i8_z123456789ABCDEz: 404; SSSE3: # %bb.0: 405; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 406; SSSE3-NEXT: retq 407; 408; SSE41-LABEL: insert_v16i8_z123456789ABCDEz: 409; SSE41: # %bb.0: 410; SSE41-NEXT: xorl %eax, %eax 411; SSE41-NEXT: pinsrb $0, %eax, %xmm0 412; SSE41-NEXT: pinsrb $15, %eax, %xmm0 413; SSE41-NEXT: retq 414; 415; AVX1-LABEL: insert_v16i8_z123456789ABCDEz: 416; AVX1: # %bb.0: 417; AVX1-NEXT: xorl %eax, %eax 418; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 419; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 420; AVX1-NEXT: retq 421; 422; AVX2-SLOW-LABEL: insert_v16i8_z123456789ABCDEz: 423; AVX2-SLOW: # %bb.0: 424; AVX2-SLOW-NEXT: xorl %eax, %eax 425; AVX2-SLOW-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 426; AVX2-SLOW-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 427; AVX2-SLOW-NEXT: retq 428; 429; AVX2-FAST-LABEL: insert_v16i8_z123456789ABCDEz: 430; AVX2-FAST: # %bb.0: 431; AVX2-FAST-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 432; AVX2-FAST-NEXT: retq 433 %1 = insertelement <16 x i8> %a, i8 0, i32 0 434 %2 = insertelement <16 x i8> %1, i8 0, i32 15 435 ret <16 x i8> %2 436} 437 438define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) { 439; SSE2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 440; SSE2: # %bb.0: 441; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 442; SSE2-NEXT: andps {{.*}}(%rip), %xmm1 443; SSE2-NEXT: retq 444; 445; SSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 446; SSE3: # %bb.0: 447; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 448; SSE3-NEXT: andps {{.*}}(%rip), %xmm1 449; SSE3-NEXT: retq 450; 451; SSSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 452; SSSE3: # %bb.0: 453; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 454; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1 455; SSSE3-NEXT: retq 456; 457; SSE41-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 458; SSE41: # %bb.0: 459; SSE41-NEXT: xorl %eax, %eax 460; SSE41-NEXT: pinsrb $0, %eax, %xmm0 461; SSE41-NEXT: pinsrb $15, %eax, %xmm0 462; SSE41-NEXT: pxor %xmm2, %xmm2 463; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7] 464; SSE41-NEXT: retq 465; 466; AVX1-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 467; AVX1: # %bb.0: 468; AVX1-NEXT: xorl %eax, %eax 469; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 470; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 471; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 472; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 473; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 474; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 475; AVX1-NEXT: retq 476; 477; AVX2-SLOW-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 478; AVX2-SLOW: # %bb.0: 479; AVX2-SLOW-NEXT: xorl %eax, %eax 480; AVX2-SLOW-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 481; AVX2-SLOW-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 482; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0 483; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2 484; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 485; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 486; AVX2-SLOW-NEXT: retq 487; 488; AVX2-FAST-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 489; AVX2-FAST: # %bb.0: 490; AVX2-FAST-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 491; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0 492; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2 493; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 494; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 495; AVX2-FAST-NEXT: retq 496 %1 = insertelement <32 x i8> %a, i8 0, i32 0 497 %2 = insertelement <32 x i8> %1, i8 0, i32 15 498 %3 = insertelement <32 x i8> %2, i8 0, i32 30 499 %4 = insertelement <32 x i8> %3, i8 0, i32 31 500 ret <32 x i8> %4 501} 502 503define <4 x i32> @PR41512(i32 %x, i32 %y) { 504; SSE-LABEL: PR41512: 505; SSE: # %bb.0: 506; SSE-NEXT: movd %edi, %xmm0 507; SSE-NEXT: movd %esi, %xmm1 508; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 509; SSE-NEXT: retq 510; 511; AVX-LABEL: PR41512: 512; AVX: # %bb.0: 513; AVX-NEXT: vmovd %edi, %xmm0 514; AVX-NEXT: vmovd %esi, %xmm1 515; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 516; AVX-NEXT: retq 517 %ins1 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %x, i32 0 518 %ins2 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %y, i32 0 519 %r = shufflevector <4 x i32> %ins1, <4 x i32> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 520 ret <4 x i32> %r 521} 522 523define <4 x i64> @PR41512_v4i64(i64 %x, i64 %y) { 524; SSE-LABEL: PR41512_v4i64: 525; SSE: # %bb.0: 526; SSE-NEXT: movq %rdi, %xmm0 527; SSE-NEXT: movq %rsi, %xmm1 528; SSE-NEXT: retq 529; 530; AVX1-LABEL: PR41512_v4i64: 531; AVX1: # %bb.0: 532; AVX1-NEXT: vmovq %rdi, %xmm0 533; AVX1-NEXT: vmovq %rsi, %xmm1 534; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 535; AVX1-NEXT: retq 536; 537; AVX2-LABEL: PR41512_v4i64: 538; AVX2: # %bb.0: 539; AVX2-NEXT: vmovq %rdi, %xmm0 540; AVX2-NEXT: vmovq %rsi, %xmm1 541; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 542; AVX2-NEXT: retq 543 %ins1 = insertelement <4 x i64> <i64 undef, i64 0, i64 undef, i64 undef>, i64 %x, i32 0 544 %ins2 = insertelement <4 x i64> <i64 undef, i64 0, i64 undef, i64 undef>, i64 %y, i32 0 545 %r = shufflevector <4 x i64> %ins1, <4 x i64> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 546 ret <4 x i64> %r 547} 548 549define <8 x float> @PR41512_v8f32(float %x, float %y) { 550; SSE2-LABEL: PR41512_v8f32: 551; SSE2: # %bb.0: 552; SSE2-NEXT: xorps %xmm2, %xmm2 553; SSE2-NEXT: xorps %xmm3, %xmm3 554; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3] 555; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 556; SSE2-NEXT: movaps %xmm3, %xmm0 557; SSE2-NEXT: movaps %xmm2, %xmm1 558; SSE2-NEXT: retq 559; 560; SSE3-LABEL: PR41512_v8f32: 561; SSE3: # %bb.0: 562; SSE3-NEXT: xorps %xmm2, %xmm2 563; SSE3-NEXT: xorps %xmm3, %xmm3 564; SSE3-NEXT: movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3] 565; SSE3-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 566; SSE3-NEXT: movaps %xmm3, %xmm0 567; SSE3-NEXT: movaps %xmm2, %xmm1 568; SSE3-NEXT: retq 569; 570; SSSE3-LABEL: PR41512_v8f32: 571; SSSE3: # %bb.0: 572; SSSE3-NEXT: xorps %xmm2, %xmm2 573; SSSE3-NEXT: xorps %xmm3, %xmm3 574; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3] 575; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 576; SSSE3-NEXT: movaps %xmm3, %xmm0 577; SSSE3-NEXT: movaps %xmm2, %xmm1 578; SSSE3-NEXT: retq 579; 580; SSE41-LABEL: PR41512_v8f32: 581; SSE41: # %bb.0: 582; SSE41-NEXT: xorps %xmm2, %xmm2 583; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 584; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] 585; SSE41-NEXT: retq 586; 587; AVX-LABEL: PR41512_v8f32: 588; AVX: # %bb.0: 589; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 590; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 591; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] 592; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 593; AVX-NEXT: retq 594 %ins1 = insertelement <8 x float> zeroinitializer, float %x, i32 0 595 %ins2 = insertelement <8 x float> zeroinitializer, float %y, i32 0 596 %r = shufflevector <8 x float> %ins1, <8 x float> %ins2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 597 ret <8 x float> %r 598} 599 600define <4 x i32> @PR41512_loads(i32* %p1, i32* %p2) { 601; SSE-LABEL: PR41512_loads: 602; SSE: # %bb.0: 603; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 604; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 605; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 606; SSE-NEXT: retq 607; 608; AVX-LABEL: PR41512_loads: 609; AVX: # %bb.0: 610; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 611; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 612; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 613; AVX-NEXT: retq 614 %x = load i32, i32* %p1 615 %y = load i32, i32* %p2 616 %ins1 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %x, i32 0 617 %ins2 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %y, i32 0 618 %r = shufflevector <4 x i32> %ins1, <4 x i32> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 619 ret <4 x i32> %r 620} 621