1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512 8 9; 10; 128-bit vectors 11; 12 13define <2 x i64> @ext_i2_2i64(i2 %a0) { 14; SSE2-SSSE3-LABEL: ext_i2_2i64: 15; SSE2-SSSE3: # %bb.0: 16; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 17; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 18; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 19; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 20; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 21; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 22; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 23; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 24; SSE2-SSSE3-NEXT: retq 25; 26; AVX1-LABEL: ext_i2_2i64: 27; AVX1: # %bb.0: 28; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 29; AVX1-NEXT: vmovq %rdi, %xmm0 30; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 31; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2] 32; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 33; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 34; AVX1-NEXT: retq 35; 36; AVX2-LABEL: ext_i2_2i64: 37; AVX2: # %bb.0: 38; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 39; AVX2-NEXT: vmovq %rdi, %xmm0 40; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 41; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2] 42; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 43; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 44; AVX2-NEXT: retq 45; 46; AVX512-LABEL: ext_i2_2i64: 47; AVX512: # %bb.0: 48; AVX512-NEXT: kmovd %edi, %k1 49; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 50; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 51; AVX512-NEXT: retq 52 %1 = bitcast i2 %a0 to <2 x i1> 53 %2 = sext <2 x i1> %1 to <2 x i64> 54 ret <2 x i64> %2 55} 56 57define <4 x i32> @ext_i4_4i32(i4 %a0) { 58; SSE2-SSSE3-LABEL: ext_i4_4i32: 59; SSE2-SSSE3: # %bb.0: 60; SSE2-SSSE3-NEXT: movd %edi, %xmm0 61; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 62; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 63; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 64; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 65; SSE2-SSSE3-NEXT: retq 66; 67; AVX1-LABEL: ext_i4_4i32: 68; AVX1: # %bb.0: 69; AVX1-NEXT: vmovd %edi, %xmm0 70; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 71; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8] 72; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 73; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 74; AVX1-NEXT: retq 75; 76; AVX2-LABEL: ext_i4_4i32: 77; AVX2: # %bb.0: 78; AVX2-NEXT: vmovd %edi, %xmm0 79; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 80; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8] 81; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 82; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 83; AVX2-NEXT: retq 84; 85; AVX512-LABEL: ext_i4_4i32: 86; AVX512: # %bb.0: 87; AVX512-NEXT: kmovd %edi, %k1 88; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 89; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 90; AVX512-NEXT: retq 91 %1 = bitcast i4 %a0 to <4 x i1> 92 %2 = sext <4 x i1> %1 to <4 x i32> 93 ret <4 x i32> %2 94} 95 96define <8 x i16> @ext_i8_8i16(i8 %a0) { 97; SSE2-SSSE3-LABEL: ext_i8_8i16: 98; SSE2-SSSE3: # %bb.0: 99; SSE2-SSSE3-NEXT: movd %edi, %xmm0 100; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 101; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 102; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 103; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 104; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0 105; SSE2-SSSE3-NEXT: retq 106; 107; AVX1-LABEL: ext_i8_8i16: 108; AVX1: # %bb.0: 109; AVX1-NEXT: vmovd %edi, %xmm0 110; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 111; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 112; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 113; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 114; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 115; AVX1-NEXT: retq 116; 117; AVX2-LABEL: ext_i8_8i16: 118; AVX2: # %bb.0: 119; AVX2-NEXT: vmovd %edi, %xmm0 120; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 121; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 122; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 123; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 124; AVX2-NEXT: retq 125; 126; AVX512-LABEL: ext_i8_8i16: 127; AVX512: # %bb.0: 128; AVX512-NEXT: kmovd %edi, %k0 129; AVX512-NEXT: vpmovm2w %k0, %xmm0 130; AVX512-NEXT: retq 131 %1 = bitcast i8 %a0 to <8 x i1> 132 %2 = sext <8 x i1> %1 to <8 x i16> 133 ret <8 x i16> %2 134} 135 136define <16 x i8> @ext_i16_16i8(i16 %a0) { 137; SSE2-LABEL: ext_i16_16i8: 138; SSE2: # %bb.0: 139; SSE2-NEXT: movd %edi, %xmm0 140; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 141; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 142; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 143; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 144; SSE2-NEXT: pand %xmm1, %xmm0 145; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 146; SSE2-NEXT: retq 147; 148; SSSE3-LABEL: ext_i16_16i8: 149; SSSE3: # %bb.0: 150; SSSE3-NEXT: movd %edi, %xmm0 151; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 152; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 153; SSSE3-NEXT: pand %xmm1, %xmm0 154; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 155; SSSE3-NEXT: retq 156; 157; AVX1-LABEL: ext_i16_16i8: 158; AVX1: # %bb.0: 159; AVX1-NEXT: vmovd %edi, %xmm0 160; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 161; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 162; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 163; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 164; AVX1-NEXT: retq 165; 166; AVX2-LABEL: ext_i16_16i8: 167; AVX2: # %bb.0: 168; AVX2-NEXT: vmovd %edi, %xmm0 169; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 170; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 171; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 172; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 173; AVX2-NEXT: retq 174; 175; AVX512-LABEL: ext_i16_16i8: 176; AVX512: # %bb.0: 177; AVX512-NEXT: kmovd %edi, %k0 178; AVX512-NEXT: vpmovm2b %k0, %xmm0 179; AVX512-NEXT: retq 180 %1 = bitcast i16 %a0 to <16 x i1> 181 %2 = sext <16 x i1> %1 to <16 x i8> 182 ret <16 x i8> %2 183} 184 185; 186; 256-bit vectors 187; 188 189define <4 x i64> @ext_i4_4i64(i4 %a0) { 190; SSE2-SSSE3-LABEL: ext_i4_4i64: 191; SSE2-SSSE3: # %bb.0: 192; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 193; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 194; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] 195; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 196; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1 197; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 198; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 199; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 200; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 201; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 202; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 203; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 204; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 205; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 206; SSE2-SSSE3-NEXT: retq 207; 208; AVX1-LABEL: ext_i4_4i64: 209; AVX1: # %bb.0: 210; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 211; AVX1-NEXT: vmovq %rdi, %xmm0 212; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 213; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 214; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 215; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 216; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 217; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 218; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 219; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 220; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 221; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 222; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 223; AVX1-NEXT: retq 224; 225; AVX2-LABEL: ext_i4_4i64: 226; AVX2: # %bb.0: 227; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 228; AVX2-NEXT: vmovq %rdi, %xmm0 229; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 230; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8] 231; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 232; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 233; AVX2-NEXT: retq 234; 235; AVX512-LABEL: ext_i4_4i64: 236; AVX512: # %bb.0: 237; AVX512-NEXT: kmovd %edi, %k1 238; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 239; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} 240; AVX512-NEXT: retq 241 %1 = bitcast i4 %a0 to <4 x i1> 242 %2 = sext <4 x i1> %1 to <4 x i64> 243 ret <4 x i64> %2 244} 245 246define <8 x i32> @ext_i8_8i32(i8 %a0) { 247; SSE2-SSSE3-LABEL: ext_i8_8i32: 248; SSE2-SSSE3: # %bb.0: 249; SSE2-SSSE3-NEXT: movd %edi, %xmm0 250; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 251; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 252; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 253; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 254; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 255; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 256; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 257; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 258; SSE2-SSSE3-NEXT: retq 259; 260; AVX1-LABEL: ext_i8_8i32: 261; AVX1: # %bb.0: 262; AVX1-NEXT: vmovd %edi, %xmm0 263; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 264; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 265; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 266; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 267; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 268; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 269; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 270; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 271; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 272; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 273; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 274; AVX1-NEXT: retq 275; 276; AVX2-LABEL: ext_i8_8i32: 277; AVX2: # %bb.0: 278; AVX2-NEXT: vmovd %edi, %xmm0 279; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 280; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128] 281; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 282; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 283; AVX2-NEXT: retq 284; 285; AVX512-LABEL: ext_i8_8i32: 286; AVX512: # %bb.0: 287; AVX512-NEXT: kmovd %edi, %k1 288; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 289; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 290; AVX512-NEXT: retq 291 %1 = bitcast i8 %a0 to <8 x i1> 292 %2 = sext <8 x i1> %1 to <8 x i32> 293 ret <8 x i32> %2 294} 295 296define <16 x i16> @ext_i16_16i16(i16 %a0) { 297; SSE2-SSSE3-LABEL: ext_i16_16i16: 298; SSE2-SSSE3: # %bb.0: 299; SSE2-SSSE3-NEXT: movd %edi, %xmm0 300; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 301; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 302; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 303; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 304; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 305; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0 306; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 307; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 308; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1 309; SSE2-SSSE3-NEXT: retq 310; 311; AVX1-LABEL: ext_i16_16i16: 312; AVX1: # %bb.0: 313; AVX1-NEXT: vmovd %edi, %xmm0 314; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 315; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 316; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 317; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 318; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 319; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 320; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 321; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 322; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 323; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 324; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 325; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 326; AVX1-NEXT: retq 327; 328; AVX2-LABEL: ext_i16_16i16: 329; AVX2: # %bb.0: 330; AVX2-NEXT: vmovd %edi, %xmm0 331; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 332; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 333; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 334; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 335; AVX2-NEXT: retq 336; 337; AVX512-LABEL: ext_i16_16i16: 338; AVX512: # %bb.0: 339; AVX512-NEXT: kmovd %edi, %k0 340; AVX512-NEXT: vpmovm2w %k0, %ymm0 341; AVX512-NEXT: retq 342 %1 = bitcast i16 %a0 to <16 x i1> 343 %2 = sext <16 x i1> %1 to <16 x i16> 344 ret <16 x i16> %2 345} 346 347define <32 x i8> @ext_i32_32i8(i32 %a0) { 348; SSE2-SSSE3-LABEL: ext_i32_32i8: 349; SSE2-SSSE3: # %bb.0: 350; SSE2-SSSE3-NEXT: movd %edi, %xmm1 351; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 352; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 353; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 354; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 355; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 356; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0 357; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 358; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 359; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 360; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1 361; SSE2-SSSE3-NEXT: retq 362; 363; AVX1-LABEL: ext_i32_32i8: 364; AVX1: # %bb.0: 365; AVX1-NEXT: vmovd %edi, %xmm0 366; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 367; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 368; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 369; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 370; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 371; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 372; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 373; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 374; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 375; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 376; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 377; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 378; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 379; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 380; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 381; AVX1-NEXT: retq 382; 383; AVX2-SLOW-LABEL: ext_i32_32i8: 384; AVX2-SLOW: # %bb.0: 385; AVX2-SLOW-NEXT: vmovd %edi, %xmm0 386; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 387; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 388; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 389; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 390; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 391; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 392; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 393; AVX2-SLOW-NEXT: vpand %ymm1, %ymm0, %ymm0 394; AVX2-SLOW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 395; AVX2-SLOW-NEXT: retq 396; 397; AVX2-FAST-LABEL: ext_i32_32i8: 398; AVX2-FAST: # %bb.0: 399; AVX2-FAST-NEXT: vmovd %edi, %xmm0 400; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 401; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3] 402; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7] 403; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 404; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 405; AVX2-FAST-NEXT: vpand %ymm1, %ymm0, %ymm0 406; AVX2-FAST-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 407; AVX2-FAST-NEXT: retq 408; 409; AVX512-LABEL: ext_i32_32i8: 410; AVX512: # %bb.0: 411; AVX512-NEXT: kmovd %edi, %k0 412; AVX512-NEXT: vpmovm2b %k0, %ymm0 413; AVX512-NEXT: retq 414 %1 = bitcast i32 %a0 to <32 x i1> 415 %2 = sext <32 x i1> %1 to <32 x i8> 416 ret <32 x i8> %2 417} 418 419; 420; 512-bit vectors 421; 422 423define <8 x i64> @ext_i8_8i64(i8 %a0) { 424; SSE2-SSSE3-LABEL: ext_i8_8i64: 425; SSE2-SSSE3: # %bb.0: 426; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 427; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 428; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1] 429; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 430; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1 431; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 432; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 433; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 434; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 435; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 436; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2 437; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 438; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 439; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 440; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 441; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32] 442; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3 443; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 444; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 445; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2] 446; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2 447; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128] 448; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4 449; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 450; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2] 451; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 452; SSE2-SSSE3-NEXT: retq 453; 454; AVX1-LABEL: ext_i8_8i64: 455; AVX1: # %bb.0: 456; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 457; AVX1-NEXT: vmovq %rdi, %xmm0 458; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 459; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 460; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 461; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 462; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 463; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 464; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 465; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 466; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 467; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 468; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 469; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 470; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 471; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 472; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 473; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1 474; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 475; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 476; AVX1-NEXT: retq 477; 478; AVX2-LABEL: ext_i8_8i64: 479; AVX2: # %bb.0: 480; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 481; AVX2-NEXT: vmovq %rdi, %xmm0 482; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1 483; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8] 484; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 485; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0 486; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128] 487; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 488; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1 489; AVX2-NEXT: retq 490; 491; AVX512-LABEL: ext_i8_8i64: 492; AVX512: # %bb.0: 493; AVX512-NEXT: kmovd %edi, %k1 494; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 495; AVX512-NEXT: retq 496 %1 = bitcast i8 %a0 to <8 x i1> 497 %2 = sext <8 x i1> %1 to <8 x i64> 498 ret <8 x i64> %2 499} 500 501define <16 x i32> @ext_i16_16i32(i16 %a0) { 502; SSE2-SSSE3-LABEL: ext_i16_16i32: 503; SSE2-SSSE3: # %bb.0: 504; SSE2-SSSE3-NEXT: movd %edi, %xmm0 505; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 506; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 507; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0 508; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 509; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 510; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 511; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1 512; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 513; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 514; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048] 515; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 516; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 517; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2 518; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768] 519; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 520; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3 521; SSE2-SSSE3-NEXT: retq 522; 523; AVX1-LABEL: ext_i16_16i32: 524; AVX1: # %bb.0: 525; AVX1-NEXT: vmovd %edi, %xmm0 526; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 527; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 528; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 529; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 530; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 531; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 532; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 533; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 534; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 535; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 536; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 537; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 538; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 539; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 540; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 541; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 542; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 543; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 544; AVX1-NEXT: retq 545; 546; AVX2-LABEL: ext_i16_16i32: 547; AVX2: # %bb.0: 548; AVX2-NEXT: vmovd %edi, %xmm0 549; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1 550; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128] 551; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 552; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0 553; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768] 554; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 555; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 556; AVX2-NEXT: retq 557; 558; AVX512-LABEL: ext_i16_16i32: 559; AVX512: # %bb.0: 560; AVX512-NEXT: kmovd %edi, %k1 561; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 562; AVX512-NEXT: retq 563 %1 = bitcast i16 %a0 to <16 x i1> 564 %2 = sext <16 x i1> %1 to <16 x i32> 565 ret <16 x i32> %2 566} 567 568define <32 x i16> @ext_i32_32i16(i32 %a0) { 569; SSE2-SSSE3-LABEL: ext_i32_32i16: 570; SSE2-SSSE3: # %bb.0: 571; SSE2-SSSE3-NEXT: movd %edi, %xmm2 572; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7] 573; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 574; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128] 575; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 576; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 577; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0 578; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768] 579; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 580; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1 581; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7] 582; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] 583; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 584; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 585; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2 586; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 587; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3 588; SSE2-SSSE3-NEXT: retq 589; 590; AVX1-LABEL: ext_i32_32i16: 591; AVX1: # %bb.0: 592; AVX1-NEXT: vmovd %edi, %xmm1 593; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7] 594; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 595; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 596; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 597; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 598; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 599; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 600; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3 601; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 602; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 603; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0 604; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 605; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 606; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7] 607; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 608; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 609; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 610; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 611; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2 612; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 613; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1 614; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 615; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 616; AVX1-NEXT: retq 617; 618; AVX2-LABEL: ext_i32_32i16: 619; AVX2: # %bb.0: 620; AVX2-NEXT: vmovd %edi, %xmm0 621; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 622; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 623; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 624; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 625; AVX2-NEXT: shrl $16, %edi 626; AVX2-NEXT: vmovd %edi, %xmm2 627; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2 628; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2 629; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1 630; AVX2-NEXT: retq 631; 632; AVX512-LABEL: ext_i32_32i16: 633; AVX512: # %bb.0: 634; AVX512-NEXT: kmovd %edi, %k0 635; AVX512-NEXT: vpmovm2w %k0, %zmm0 636; AVX512-NEXT: retq 637 %1 = bitcast i32 %a0 to <32 x i1> 638 %2 = sext <32 x i1> %1 to <32 x i16> 639 ret <32 x i16> %2 640} 641 642define <64 x i8> @ext_i64_64i8(i64 %a0) { 643; SSE2-SSSE3-LABEL: ext_i64_64i8: 644; SSE2-SSSE3: # %bb.0: 645; SSE2-SSSE3-NEXT: movq %rdi, %xmm3 646; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 647; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7] 648; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 649; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 650; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 651; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0 652; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7] 653; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 654; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 655; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1 656; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5] 657; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] 658; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 659; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2 660; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7] 661; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 662; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 663; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3 664; SSE2-SSSE3-NEXT: retq 665; 666; AVX1-LABEL: ext_i64_64i8: 667; AVX1: # %bb.0: 668; AVX1-NEXT: vmovq %rdi, %xmm0 669; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 670; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 671; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 672; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7] 673; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] 674; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 675; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 676; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 677; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 678; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 679; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3 680; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 681; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 682; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0 683; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 684; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 685; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5] 686; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 687; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7] 688; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 689; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 690; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 691; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 692; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2 693; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 694; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1 695; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 696; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 697; AVX1-NEXT: retq 698; 699; AVX2-SLOW-LABEL: ext_i64_64i8: 700; AVX2-SLOW: # %bb.0: 701; AVX2-SLOW-NEXT: vmovq %rdi, %xmm0 702; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 703; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 704; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 705; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7] 706; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] 707; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 708; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 709; AVX2-SLOW-NEXT: vpand %ymm2, %ymm0, %ymm0 710; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 711; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5] 712; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 713; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7] 714; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 715; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1 716; AVX2-SLOW-NEXT: vpand %ymm2, %ymm1, %ymm1 717; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 718; AVX2-SLOW-NEXT: retq 719; 720; AVX2-FAST-LABEL: ext_i64_64i8: 721; AVX2-FAST: # %bb.0: 722; AVX2-FAST-NEXT: vmovq %rdi, %xmm0 723; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 724; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3] 725; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7] 726; AVX2-FAST-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 727; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 728; AVX2-FAST-NEXT: vpand %ymm2, %ymm0, %ymm0 729; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 730; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11] 731; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15] 732; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1 733; AVX2-FAST-NEXT: vpand %ymm2, %ymm1, %ymm1 734; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 735; AVX2-FAST-NEXT: retq 736; 737; AVX512-LABEL: ext_i64_64i8: 738; AVX512: # %bb.0: 739; AVX512-NEXT: kmovq %rdi, %k0 740; AVX512-NEXT: vpmovm2b %k0, %zmm0 741; AVX512-NEXT: retq 742 %1 = bitcast i64 %a0 to <64 x i1> 743 %2 = sext <64 x i1> %1 to <64 x i8> 744 ret <64 x i8> %2 745} 746