1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 8 9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 10target triple = "x86_64-unknown-unknown" 11 12define <2 x double> @insert_v2f64_z1(<2 x double> %a) { 13; SSE2-LABEL: insert_v2f64_z1: 14; SSE2: # BB#0: 15; SSE2-NEXT: xorpd %xmm1, %xmm1 16; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 17; SSE2-NEXT: retq 18; 19; SSE3-LABEL: insert_v2f64_z1: 20; SSE3: # BB#0: 21; SSE3-NEXT: xorpd %xmm1, %xmm1 22; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 23; SSE3-NEXT: retq 24; 25; SSSE3-LABEL: insert_v2f64_z1: 26; SSSE3: # BB#0: 27; SSSE3-NEXT: xorpd %xmm1, %xmm1 28; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 29; SSSE3-NEXT: retq 30; 31; SSE41-LABEL: insert_v2f64_z1: 32; SSE41: # BB#0: 33; SSE41-NEXT: xorpd %xmm1, %xmm1 34; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 35; SSE41-NEXT: retq 36; 37; AVX-LABEL: insert_v2f64_z1: 38; AVX: # BB#0: 39; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 40; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 41; AVX-NEXT: retq 42 %1 = insertelement <2 x double> %a, double 0.0, i32 0 43 ret <2 x double> %1 44} 45 46define <4 x double> @insert_v4f64_0zz3(<4 x double> %a) { 47; SSE2-LABEL: insert_v4f64_0zz3: 48; SSE2: # BB#0: 49; SSE2-NEXT: xorpd %xmm2, %xmm2 50; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 51; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 52; SSE2-NEXT: retq 53; 54; SSE3-LABEL: insert_v4f64_0zz3: 55; SSE3: # BB#0: 56; SSE3-NEXT: xorpd %xmm2, %xmm2 57; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 58; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 59; SSE3-NEXT: retq 60; 61; SSSE3-LABEL: insert_v4f64_0zz3: 62; SSSE3: # BB#0: 63; SSSE3-NEXT: xorpd %xmm2, %xmm2 64; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 65; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 66; SSSE3-NEXT: retq 67; 68; SSE41-LABEL: insert_v4f64_0zz3: 69; SSE41: # BB#0: 70; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 71; SSE41-NEXT: xorpd %xmm2, %xmm2 72; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 73; SSE41-NEXT: retq 74; 75; AVX-LABEL: insert_v4f64_0zz3: 76; AVX: # BB#0: 77; AVX-NEXT: vxorpd %ymm1, %ymm1, %ymm1 78; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] 79; AVX-NEXT: retq 80 %1 = insertelement <4 x double> %a, double 0.0, i32 1 81 %2 = insertelement <4 x double> %1, double 0.0, i32 2 82 ret <4 x double> %2 83} 84 85define <2 x i64> @insert_v2i64_z1(<2 x i64> %a) { 86; SSE2-LABEL: insert_v2i64_z1: 87; SSE2: # BB#0: 88; SSE2-NEXT: xorpd %xmm1, %xmm1 89; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 90; SSE2-NEXT: retq 91; 92; SSE3-LABEL: insert_v2i64_z1: 93; SSE3: # BB#0: 94; SSE3-NEXT: xorpd %xmm1, %xmm1 95; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 96; SSE3-NEXT: retq 97; 98; SSSE3-LABEL: insert_v2i64_z1: 99; SSSE3: # BB#0: 100; SSSE3-NEXT: xorpd %xmm1, %xmm1 101; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 102; SSSE3-NEXT: retq 103; 104; SSE41-LABEL: insert_v2i64_z1: 105; SSE41: # BB#0: 106; SSE41-NEXT: pxor %xmm1, %xmm1 107; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 108; SSE41-NEXT: retq 109; 110; AVX1-LABEL: insert_v2i64_z1: 111; AVX1: # BB#0: 112; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 113; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 114; AVX1-NEXT: retq 115; 116; AVX2-LABEL: insert_v2i64_z1: 117; AVX2: # BB#0: 118; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 119; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 120; AVX2-NEXT: retq 121 %1 = insertelement <2 x i64> %a, i64 0, i32 0 122 ret <2 x i64> %1 123} 124 125define <4 x i64> @insert_v4i64_01z3(<4 x i64> %a) { 126; SSE2-LABEL: insert_v4i64_01z3: 127; SSE2: # BB#0: 128; SSE2-NEXT: xorpd %xmm2, %xmm2 129; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 130; SSE2-NEXT: retq 131; 132; SSE3-LABEL: insert_v4i64_01z3: 133; SSE3: # BB#0: 134; SSE3-NEXT: xorpd %xmm2, %xmm2 135; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 136; SSE3-NEXT: retq 137; 138; SSSE3-LABEL: insert_v4i64_01z3: 139; SSSE3: # BB#0: 140; SSSE3-NEXT: xorpd %xmm2, %xmm2 141; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 142; SSSE3-NEXT: retq 143; 144; SSE41-LABEL: insert_v4i64_01z3: 145; SSE41: # BB#0: 146; SSE41-NEXT: pxor %xmm2, %xmm2 147; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 148; SSE41-NEXT: retq 149; 150; AVX1-LABEL: insert_v4i64_01z3: 151; AVX1: # BB#0: 152; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 153; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 154; AVX1-NEXT: retq 155; 156; AVX2-LABEL: insert_v4i64_01z3: 157; AVX2: # BB#0: 158; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 159; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 160; AVX2-NEXT: retq 161 %1 = insertelement <4 x i64> %a, i64 0, i32 2 162 ret <4 x i64> %1 163} 164 165define <4 x float> @insert_v4f32_01z3(<4 x float> %a) { 166; SSE2-LABEL: insert_v4f32_01z3: 167; SSE2: # BB#0: 168; SSE2-NEXT: xorps %xmm1, %xmm1 169; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 170; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 171; SSE2-NEXT: retq 172; 173; SSE3-LABEL: insert_v4f32_01z3: 174; SSE3: # BB#0: 175; SSE3-NEXT: xorps %xmm1, %xmm1 176; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 177; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 178; SSE3-NEXT: retq 179; 180; SSSE3-LABEL: insert_v4f32_01z3: 181; SSSE3: # BB#0: 182; SSSE3-NEXT: xorps %xmm1, %xmm1 183; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 184; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 185; SSSE3-NEXT: retq 186; 187; SSE41-LABEL: insert_v4f32_01z3: 188; SSE41: # BB#0: 189; SSE41-NEXT: xorps %xmm1, %xmm1 190; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 191; SSE41-NEXT: retq 192; 193; AVX-LABEL: insert_v4f32_01z3: 194; AVX: # BB#0: 195; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 196; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 197; AVX-NEXT: retq 198 %1 = insertelement <4 x float> %a, float 0.0, i32 2 199 ret <4 x float> %1 200} 201 202define <8 x float> @insert_v8f32_z12345z7(<8 x float> %a) { 203; SSE2-LABEL: insert_v8f32_z12345z7: 204; SSE2: # BB#0: 205; SSE2-NEXT: xorps %xmm2, %xmm2 206; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 207; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 208; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 209; SSE2-NEXT: retq 210; 211; SSE3-LABEL: insert_v8f32_z12345z7: 212; SSE3: # BB#0: 213; SSE3-NEXT: xorps %xmm2, %xmm2 214; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 215; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 216; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 217; SSE3-NEXT: retq 218; 219; SSSE3-LABEL: insert_v8f32_z12345z7: 220; SSSE3: # BB#0: 221; SSSE3-NEXT: xorps %xmm2, %xmm2 222; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 223; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 224; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 225; SSSE3-NEXT: retq 226; 227; SSE41-LABEL: insert_v8f32_z12345z7: 228; SSE41: # BB#0: 229; SSE41-NEXT: xorps %xmm2, %xmm2 230; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 231; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3] 232; SSE41-NEXT: retq 233; 234; AVX-LABEL: insert_v8f32_z12345z7: 235; AVX: # BB#0: 236; AVX-NEXT: vxorps %ymm1, %ymm1, %ymm1 237; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7] 238; AVX-NEXT: retq 239 %1 = insertelement <8 x float> %a, float 0.0, i32 0 240 %2 = insertelement <8 x float> %1, float 0.0, i32 6 241 ret <8 x float> %2 242} 243 244define <4 x i32> @insert_v4i32_01z3(<4 x i32> %a) { 245; SSE2-LABEL: insert_v4i32_01z3: 246; SSE2: # BB#0: 247; SSE2-NEXT: xorl %eax, %eax 248; SSE2-NEXT: movd %eax, %xmm1 249; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 250; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 251; SSE2-NEXT: retq 252; 253; SSE3-LABEL: insert_v4i32_01z3: 254; SSE3: # BB#0: 255; SSE3-NEXT: xorl %eax, %eax 256; SSE3-NEXT: movd %eax, %xmm1 257; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 258; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 259; SSE3-NEXT: retq 260; 261; SSSE3-LABEL: insert_v4i32_01z3: 262; SSSE3: # BB#0: 263; SSSE3-NEXT: xorl %eax, %eax 264; SSSE3-NEXT: movd %eax, %xmm1 265; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 266; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 267; SSSE3-NEXT: retq 268; 269; SSE41-LABEL: insert_v4i32_01z3: 270; SSE41: # BB#0: 271; SSE41-NEXT: pxor %xmm1, %xmm1 272; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 273; SSE41-NEXT: retq 274; 275; AVX1-LABEL: insert_v4i32_01z3: 276; AVX1: # BB#0: 277; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 278; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 279; AVX1-NEXT: retq 280; 281; AVX2-LABEL: insert_v4i32_01z3: 282; AVX2: # BB#0: 283; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 284; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 285; AVX2-NEXT: retq 286 %1 = insertelement <4 x i32> %a, i32 0, i32 2 287 ret <4 x i32> %1 288} 289 290define <8 x i32> @insert_v8i32_z12345z7(<8 x i32> %a) { 291; SSE2-LABEL: insert_v8i32_z12345z7: 292; SSE2: # BB#0: 293; SSE2-NEXT: xorps %xmm2, %xmm2 294; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 295; SSE2-NEXT: xorl %eax, %eax 296; SSE2-NEXT: movd %eax, %xmm2 297; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 298; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 299; SSE2-NEXT: retq 300; 301; SSE3-LABEL: insert_v8i32_z12345z7: 302; SSE3: # BB#0: 303; SSE3-NEXT: xorps %xmm2, %xmm2 304; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 305; SSE3-NEXT: xorl %eax, %eax 306; SSE3-NEXT: movd %eax, %xmm2 307; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 308; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 309; SSE3-NEXT: retq 310; 311; SSSE3-LABEL: insert_v8i32_z12345z7: 312; SSSE3: # BB#0: 313; SSSE3-NEXT: xorps %xmm2, %xmm2 314; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 315; SSSE3-NEXT: xorl %eax, %eax 316; SSSE3-NEXT: movd %eax, %xmm2 317; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 318; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 319; SSSE3-NEXT: retq 320; 321; SSE41-LABEL: insert_v8i32_z12345z7: 322; SSE41: # BB#0: 323; SSE41-NEXT: pxor %xmm2, %xmm2 324; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7] 325; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 326; SSE41-NEXT: retq 327; 328; AVX1-LABEL: insert_v8i32_z12345z7: 329; AVX1: # BB#0: 330; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 331; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7] 332; AVX1-NEXT: retq 333; 334; AVX2-LABEL: insert_v8i32_z12345z7: 335; AVX2: # BB#0: 336; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 337; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7] 338; AVX2-NEXT: retq 339 %1 = insertelement <8 x i32> %a, i32 0, i32 0 340 %2 = insertelement <8 x i32> %1, i32 0, i32 6 341 ret <8 x i32> %2 342} 343 344define <8 x i16> @insert_v8i16_z12345z7(<8 x i16> %a) { 345; SSE2-LABEL: insert_v8i16_z12345z7: 346; SSE2: # BB#0: 347; SSE2-NEXT: xorl %eax, %eax 348; SSE2-NEXT: pinsrw $0, %eax, %xmm0 349; SSE2-NEXT: pinsrw $6, %eax, %xmm0 350; SSE2-NEXT: retq 351; 352; SSE3-LABEL: insert_v8i16_z12345z7: 353; SSE3: # BB#0: 354; SSE3-NEXT: xorl %eax, %eax 355; SSE3-NEXT: pinsrw $0, %eax, %xmm0 356; SSE3-NEXT: pinsrw $6, %eax, %xmm0 357; SSE3-NEXT: retq 358; 359; SSSE3-LABEL: insert_v8i16_z12345z7: 360; SSSE3: # BB#0: 361; SSSE3-NEXT: xorl %eax, %eax 362; SSSE3-NEXT: pinsrw $0, %eax, %xmm0 363; SSSE3-NEXT: pinsrw $6, %eax, %xmm0 364; SSSE3-NEXT: retq 365; 366; SSE41-LABEL: insert_v8i16_z12345z7: 367; SSE41: # BB#0: 368; SSE41-NEXT: pxor %xmm1, %xmm1 369; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] 370; SSE41-NEXT: retq 371; 372; AVX-LABEL: insert_v8i16_z12345z7: 373; AVX: # BB#0: 374; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 375; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] 376; AVX-NEXT: retq 377 %1 = insertelement <8 x i16> %a, i16 0, i32 0 378 %2 = insertelement <8 x i16> %1, i16 0, i32 6 379 ret <8 x i16> %2 380} 381 382define <16 x i16> @insert_v16i16_z12345z789ABZDEz(<16 x i16> %a) { 383; SSE2-LABEL: insert_v16i16_z12345z789ABZDEz: 384; SSE2: # BB#0: 385; SSE2-NEXT: xorl %eax, %eax 386; SSE2-NEXT: pinsrw $0, %eax, %xmm0 387; SSE2-NEXT: pinsrw $6, %eax, %xmm0 388; SSE2-NEXT: pinsrw $7, %eax, %xmm1 389; SSE2-NEXT: retq 390; 391; SSE3-LABEL: insert_v16i16_z12345z789ABZDEz: 392; SSE3: # BB#0: 393; SSE3-NEXT: xorl %eax, %eax 394; SSE3-NEXT: pinsrw $0, %eax, %xmm0 395; SSE3-NEXT: pinsrw $6, %eax, %xmm0 396; SSE3-NEXT: pinsrw $7, %eax, %xmm1 397; SSE3-NEXT: retq 398; 399; SSSE3-LABEL: insert_v16i16_z12345z789ABZDEz: 400; SSSE3: # BB#0: 401; SSSE3-NEXT: xorl %eax, %eax 402; SSSE3-NEXT: pinsrw $0, %eax, %xmm0 403; SSSE3-NEXT: pinsrw $6, %eax, %xmm0 404; SSSE3-NEXT: pinsrw $7, %eax, %xmm1 405; SSSE3-NEXT: retq 406; 407; SSE41-LABEL: insert_v16i16_z12345z789ABZDEz: 408; SSE41: # BB#0: 409; SSE41-NEXT: pxor %xmm2, %xmm2 410; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7] 411; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7] 412; SSE41-NEXT: retq 413; 414; AVX1-LABEL: insert_v16i16_z12345z789ABZDEz: 415; AVX1: # BB#0: 416; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 417; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3,4,5,6,7] 418; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 419; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm1[6],xmm0[7] 420; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 421; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 422; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7] 423; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 424; AVX1-NEXT: retq 425; 426; AVX2-LABEL: insert_v16i16_z12345z789ABZDEz: 427; AVX2: # BB#0: 428; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 429; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3,4,5,6,7] 430; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 431; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm1[6],xmm0[7] 432; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 433; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 434; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7] 435; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 436; AVX2-NEXT: retq 437 %1 = insertelement <16 x i16> %a, i16 0, i32 0 438 %2 = insertelement <16 x i16> %1, i16 0, i32 6 439 %3 = insertelement <16 x i16> %2, i16 0, i32 15 440 ret <16 x i16> %3 441} 442 443define <16 x i8> @insert_v16i8_z123456789ABZDEz(<16 x i8> %a) { 444; SSE2-LABEL: insert_v16i8_z123456789ABZDEz: 445; SSE2: # BB#0: 446; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 447; SSE2-NEXT: pand %xmm1, %xmm0 448; SSE2-NEXT: xorl %eax, %eax 449; SSE2-NEXT: movd %eax, %xmm2 450; SSE2-NEXT: pandn %xmm2, %xmm1 451; SSE2-NEXT: por %xmm1, %xmm0 452; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] 453; SSE2-NEXT: pand %xmm1, %xmm0 454; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] 455; SSE2-NEXT: pandn %xmm2, %xmm1 456; SSE2-NEXT: por %xmm1, %xmm0 457; SSE2-NEXT: retq 458; 459; SSE3-LABEL: insert_v16i8_z123456789ABZDEz: 460; SSE3: # BB#0: 461; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 462; SSE3-NEXT: pand %xmm1, %xmm0 463; SSE3-NEXT: xorl %eax, %eax 464; SSE3-NEXT: movd %eax, %xmm2 465; SSE3-NEXT: pandn %xmm2, %xmm1 466; SSE3-NEXT: por %xmm1, %xmm0 467; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] 468; SSE3-NEXT: pand %xmm1, %xmm0 469; SSE3-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] 470; SSE3-NEXT: pandn %xmm2, %xmm1 471; SSE3-NEXT: por %xmm1, %xmm0 472; SSE3-NEXT: retq 473; 474; SSSE3-LABEL: insert_v16i8_z123456789ABZDEz: 475; SSSE3: # BB#0: 476; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 477; SSSE3-NEXT: xorl %eax, %eax 478; SSSE3-NEXT: movd %eax, %xmm1 479; SSSE3-NEXT: movdqa %xmm1, %xmm2 480; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 481; SSSE3-NEXT: por %xmm2, %xmm0 482; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero 483; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0] 484; SSSE3-NEXT: por %xmm1, %xmm0 485; SSSE3-NEXT: retq 486; 487; SSE41-LABEL: insert_v16i8_z123456789ABZDEz: 488; SSE41: # BB#0: 489; SSE41-NEXT: xorl %eax, %eax 490; SSE41-NEXT: pinsrb $0, %eax, %xmm0 491; SSE41-NEXT: pinsrb $15, %eax, %xmm0 492; SSE41-NEXT: retq 493; 494; AVX-LABEL: insert_v16i8_z123456789ABZDEz: 495; AVX: # BB#0: 496; AVX-NEXT: xorl %eax, %eax 497; AVX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 498; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 499; AVX-NEXT: retq 500 %1 = insertelement <16 x i8> %a, i8 0, i32 0 501 %2 = insertelement <16 x i8> %1, i8 0, i32 15 502 ret <16 x i8> %2 503} 504 505define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) { 506; SSE2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 507; SSE2: # BB#0: 508; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 509; SSE2-NEXT: pand %xmm2, %xmm0 510; SSE2-NEXT: xorl %eax, %eax 511; SSE2-NEXT: movd %eax, %xmm3 512; SSE2-NEXT: pandn %xmm3, %xmm2 513; SSE2-NEXT: por %xmm2, %xmm0 514; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] 515; SSE2-NEXT: pand %xmm2, %xmm0 516; SSE2-NEXT: movdqa %xmm3, %xmm4 517; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] 518; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] 519; SSE2-NEXT: pand %xmm5, %xmm1 520; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] 521; SSE2-NEXT: pandn %xmm3, %xmm5 522; SSE2-NEXT: por %xmm5, %xmm1 523; SSE2-NEXT: pand %xmm2, %xmm1 524; SSE2-NEXT: pandn %xmm4, %xmm2 525; SSE2-NEXT: por %xmm2, %xmm0 526; SSE2-NEXT: por %xmm2, %xmm1 527; SSE2-NEXT: retq 528; 529; SSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 530; SSE3: # BB#0: 531; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 532; SSE3-NEXT: pand %xmm2, %xmm0 533; SSE3-NEXT: xorl %eax, %eax 534; SSE3-NEXT: movd %eax, %xmm3 535; SSE3-NEXT: pandn %xmm3, %xmm2 536; SSE3-NEXT: por %xmm2, %xmm0 537; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] 538; SSE3-NEXT: pand %xmm2, %xmm0 539; SSE3-NEXT: movdqa %xmm3, %xmm4 540; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] 541; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] 542; SSE3-NEXT: pand %xmm5, %xmm1 543; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] 544; SSE3-NEXT: pandn %xmm3, %xmm5 545; SSE3-NEXT: por %xmm5, %xmm1 546; SSE3-NEXT: pand %xmm2, %xmm1 547; SSE3-NEXT: pandn %xmm4, %xmm2 548; SSE3-NEXT: por %xmm2, %xmm0 549; SSE3-NEXT: por %xmm2, %xmm1 550; SSE3-NEXT: retq 551; 552; SSSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 553; SSSE3: # BB#0: 554; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 555; SSSE3-NEXT: xorl %eax, %eax 556; SSSE3-NEXT: movd %eax, %xmm2 557; SSSE3-NEXT: movdqa %xmm2, %xmm3 558; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 559; SSSE3-NEXT: por %xmm3, %xmm0 560; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,128] 561; SSSE3-NEXT: pshufb %xmm3, %xmm0 562; SSSE3-NEXT: movdqa %xmm2, %xmm4 563; SSSE3-NEXT: pshufb {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] 564; SSSE3-NEXT: por %xmm4, %xmm0 565; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero,xmm1[15] 566; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0],zero 567; SSSE3-NEXT: por %xmm2, %xmm1 568; SSSE3-NEXT: pshufb %xmm3, %xmm1 569; SSSE3-NEXT: por %xmm4, %xmm1 570; SSSE3-NEXT: retq 571; 572; SSE41-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 573; SSE41: # BB#0: 574; SSE41-NEXT: xorl %eax, %eax 575; SSE41-NEXT: pinsrb $0, %eax, %xmm0 576; SSE41-NEXT: pinsrb $15, %eax, %xmm0 577; SSE41-NEXT: pinsrb $14, %eax, %xmm1 578; SSE41-NEXT: pinsrb $15, %eax, %xmm1 579; SSE41-NEXT: retq 580; 581; AVX1-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 582; AVX1: # BB#0: 583; AVX1-NEXT: xorl %eax, %eax 584; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 585; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 586; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1 587; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 588; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 589; AVX1-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 590; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 591; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 592; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 593; AVX1-NEXT: retq 594; 595; AVX2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 596; AVX2: # BB#0: 597; AVX2-NEXT: xorl %eax, %eax 598; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 599; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 600; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1 601; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 602; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 603; AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 604; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 605; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 606; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 607; AVX2-NEXT: retq 608 %1 = insertelement <32 x i8> %a, i8 0, i32 0 609 %2 = insertelement <32 x i8> %1, i8 0, i32 15 610 %3 = insertelement <32 x i8> %2, i8 0, i32 30 611 %4 = insertelement <32 x i8> %3, i8 0, i32 31 612 ret <32 x i8> %4 613} 614