1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VLBW 9 10; 11; 128-bit vectors 12; 13 14define <2 x i64> @ext_i2_2i64(i2 %a0) { 15; SSE2-SSSE3-LABEL: ext_i2_2i64: 16; SSE2-SSSE3: # %bb.0: 17; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 18; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 19; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 20; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 21; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 22; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 23; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 24; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 25; SSE2-SSSE3-NEXT: psrlq $63, %xmm0 26; SSE2-SSSE3-NEXT: retq 27; 28; AVX1-LABEL: ext_i2_2i64: 29; AVX1: # %bb.0: 30; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 31; AVX1-NEXT: vmovq %rdi, %xmm0 32; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 33; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2] 34; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 35; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 36; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0 37; AVX1-NEXT: retq 38; 39; AVX2-LABEL: ext_i2_2i64: 40; AVX2: # %bb.0: 41; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 42; AVX2-NEXT: vmovq %rdi, %xmm0 43; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 44; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2] 45; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 46; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 47; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0 48; AVX2-NEXT: retq 49; 50; AVX512F-LABEL: ext_i2_2i64: 51; AVX512F: # %bb.0: 52; AVX512F-NEXT: kmovw %edi, %k1 53; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 54; AVX512F-NEXT: vpsrlq $63, %xmm0, %xmm0 55; AVX512F-NEXT: vzeroupper 56; AVX512F-NEXT: retq 57; 58; AVX512VLBW-LABEL: ext_i2_2i64: 59; AVX512VLBW: # %bb.0: 60; AVX512VLBW-NEXT: kmovd %edi, %k1 61; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 62; AVX512VLBW-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 63; AVX512VLBW-NEXT: vpsrlq $63, %xmm0, %xmm0 64; AVX512VLBW-NEXT: retq 65 %1 = bitcast i2 %a0 to <2 x i1> 66 %2 = zext <2 x i1> %1 to <2 x i64> 67 ret <2 x i64> %2 68} 69 70define <4 x i32> @ext_i4_4i32(i4 %a0) { 71; SSE2-SSSE3-LABEL: ext_i4_4i32: 72; SSE2-SSSE3: # %bb.0: 73; SSE2-SSSE3-NEXT: movd %edi, %xmm0 74; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 75; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 76; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 77; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 78; SSE2-SSSE3-NEXT: psrld $31, %xmm0 79; SSE2-SSSE3-NEXT: retq 80; 81; AVX1-LABEL: ext_i4_4i32: 82; AVX1: # %bb.0: 83; AVX1-NEXT: vmovd %edi, %xmm0 84; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 85; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8] 86; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 87; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 88; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 89; AVX1-NEXT: retq 90; 91; AVX2-LABEL: ext_i4_4i32: 92; AVX2: # %bb.0: 93; AVX2-NEXT: vmovd %edi, %xmm0 94; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 95; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8] 96; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 97; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 98; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 99; AVX2-NEXT: retq 100; 101; AVX512F-LABEL: ext_i4_4i32: 102; AVX512F: # %bb.0: 103; AVX512F-NEXT: kmovw %edi, %k1 104; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 105; AVX512F-NEXT: vpsrld $31, %xmm0, %xmm0 106; AVX512F-NEXT: vzeroupper 107; AVX512F-NEXT: retq 108; 109; AVX512VLBW-LABEL: ext_i4_4i32: 110; AVX512VLBW: # %bb.0: 111; AVX512VLBW-NEXT: kmovd %edi, %k1 112; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 113; AVX512VLBW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 114; AVX512VLBW-NEXT: vpsrld $31, %xmm0, %xmm0 115; AVX512VLBW-NEXT: retq 116 %1 = bitcast i4 %a0 to <4 x i1> 117 %2 = zext <4 x i1> %1 to <4 x i32> 118 ret <4 x i32> %2 119} 120 121define <8 x i16> @ext_i8_8i16(i8 %a0) { 122; SSE2-SSSE3-LABEL: ext_i8_8i16: 123; SSE2-SSSE3: # %bb.0: 124; SSE2-SSSE3-NEXT: movd %edi, %xmm0 125; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 126; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 127; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 128; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 129; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0 130; SSE2-SSSE3-NEXT: psrlw $15, %xmm0 131; SSE2-SSSE3-NEXT: retq 132; 133; AVX1-LABEL: ext_i8_8i16: 134; AVX1: # %bb.0: 135; AVX1-NEXT: vmovd %edi, %xmm0 136; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 137; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 138; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 139; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 140; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 141; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 142; AVX1-NEXT: retq 143; 144; AVX2-LABEL: ext_i8_8i16: 145; AVX2: # %bb.0: 146; AVX2-NEXT: vmovd %edi, %xmm0 147; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 148; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 149; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 150; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 151; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 152; AVX2-NEXT: retq 153; 154; AVX512F-LABEL: ext_i8_8i16: 155; AVX512F: # %bb.0: 156; AVX512F-NEXT: kmovw %edi, %k1 157; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 158; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 159; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0 160; AVX512F-NEXT: vzeroupper 161; AVX512F-NEXT: retq 162; 163; AVX512VLBW-LABEL: ext_i8_8i16: 164; AVX512VLBW: # %bb.0: 165; AVX512VLBW-NEXT: kmovd %edi, %k0 166; AVX512VLBW-NEXT: vpmovm2w %k0, %xmm0 167; AVX512VLBW-NEXT: vpsrlw $15, %xmm0, %xmm0 168; AVX512VLBW-NEXT: retq 169 %1 = bitcast i8 %a0 to <8 x i1> 170 %2 = zext <8 x i1> %1 to <8 x i16> 171 ret <8 x i16> %2 172} 173 174define <16 x i8> @ext_i16_16i8(i16 %a0) { 175; SSE2-LABEL: ext_i16_16i8: 176; SSE2: # %bb.0: 177; SSE2-NEXT: movd %edi, %xmm0 178; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 179; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 180; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 181; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 182; SSE2-NEXT: pand %xmm1, %xmm0 183; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 184; SSE2-NEXT: psrlw $7, %xmm0 185; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 186; SSE2-NEXT: retq 187; 188; SSSE3-LABEL: ext_i16_16i8: 189; SSSE3: # %bb.0: 190; SSSE3-NEXT: movd %edi, %xmm0 191; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 192; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 193; SSSE3-NEXT: pand %xmm1, %xmm0 194; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 195; SSSE3-NEXT: psrlw $7, %xmm0 196; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0 197; SSSE3-NEXT: retq 198; 199; AVX1-LABEL: ext_i16_16i8: 200; AVX1: # %bb.0: 201; AVX1-NEXT: vmovd %edi, %xmm0 202; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 203; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 204; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 205; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 206; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0 207; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 208; AVX1-NEXT: retq 209; 210; AVX2-LABEL: ext_i16_16i8: 211; AVX2: # %bb.0: 212; AVX2-NEXT: vmovd %edi, %xmm0 213; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 214; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 215; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 216; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 217; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0 218; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 219; AVX2-NEXT: retq 220; 221; AVX512F-LABEL: ext_i16_16i8: 222; AVX512F: # %bb.0: 223; AVX512F-NEXT: kmovw %edi, %k1 224; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 225; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 226; AVX512F-NEXT: vzeroupper 227; AVX512F-NEXT: retq 228; 229; AVX512VLBW-LABEL: ext_i16_16i8: 230; AVX512VLBW: # %bb.0: 231; AVX512VLBW-NEXT: kmovd %edi, %k1 232; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %xmm0 {%k1} {z} 233; AVX512VLBW-NEXT: retq 234 %1 = bitcast i16 %a0 to <16 x i1> 235 %2 = zext <16 x i1> %1 to <16 x i8> 236 ret <16 x i8> %2 237} 238 239; 240; 256-bit vectors 241; 242 243define <4 x i64> @ext_i4_4i64(i4 %a0) { 244; SSE2-SSSE3-LABEL: ext_i4_4i64: 245; SSE2-SSSE3: # %bb.0: 246; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 247; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 248; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] 249; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 250; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1 251; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 252; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 253; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 254; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 255; SSE2-SSSE3-NEXT: psrlq $63, %xmm0 256; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 257; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 258; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 259; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 260; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 261; SSE2-SSSE3-NEXT: psrlq $63, %xmm1 262; SSE2-SSSE3-NEXT: retq 263; 264; AVX1-LABEL: ext_i4_4i64: 265; AVX1: # %bb.0: 266; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 267; AVX1-NEXT: vmovq %rdi, %xmm0 268; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 269; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 270; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 271; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 272; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 273; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 274; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 275; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2 276; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 277; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 278; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 279; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0 280; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 281; AVX1-NEXT: retq 282; 283; AVX2-LABEL: ext_i4_4i64: 284; AVX2: # %bb.0: 285; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 286; AVX2-NEXT: vmovq %rdi, %xmm0 287; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 288; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8] 289; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 290; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 291; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 292; AVX2-NEXT: retq 293; 294; AVX512F-LABEL: ext_i4_4i64: 295; AVX512F: # %bb.0: 296; AVX512F-NEXT: kmovw %edi, %k1 297; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 298; AVX512F-NEXT: vpsrlq $63, %ymm0, %ymm0 299; AVX512F-NEXT: retq 300; 301; AVX512VLBW-LABEL: ext_i4_4i64: 302; AVX512VLBW: # %bb.0: 303; AVX512VLBW-NEXT: kmovd %edi, %k1 304; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 305; AVX512VLBW-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} 306; AVX512VLBW-NEXT: vpsrlq $63, %ymm0, %ymm0 307; AVX512VLBW-NEXT: retq 308 %1 = bitcast i4 %a0 to <4 x i1> 309 %2 = zext <4 x i1> %1 to <4 x i64> 310 ret <4 x i64> %2 311} 312 313define <8 x i32> @ext_i8_8i32(i8 %a0) { 314; SSE2-SSSE3-LABEL: ext_i8_8i32: 315; SSE2-SSSE3: # %bb.0: 316; SSE2-SSSE3-NEXT: movd %edi, %xmm0 317; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 318; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 319; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 320; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 321; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 322; SSE2-SSSE3-NEXT: psrld $31, %xmm0 323; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 324; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 325; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 326; SSE2-SSSE3-NEXT: psrld $31, %xmm1 327; SSE2-SSSE3-NEXT: retq 328; 329; AVX1-LABEL: ext_i8_8i32: 330; AVX1: # %bb.0: 331; AVX1-NEXT: vmovd %edi, %xmm0 332; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 333; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 334; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 335; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 336; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2 337; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 338; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 339; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2 340; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 341; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 342; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 343; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 344; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 345; AVX1-NEXT: retq 346; 347; AVX2-LABEL: ext_i8_8i32: 348; AVX2: # %bb.0: 349; AVX2-NEXT: vmovd %edi, %xmm0 350; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 351; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128] 352; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 353; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 354; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 355; AVX2-NEXT: retq 356; 357; AVX512F-LABEL: ext_i8_8i32: 358; AVX512F: # %bb.0: 359; AVX512F-NEXT: kmovw %edi, %k1 360; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 361; AVX512F-NEXT: vpsrld $31, %ymm0, %ymm0 362; AVX512F-NEXT: retq 363; 364; AVX512VLBW-LABEL: ext_i8_8i32: 365; AVX512VLBW: # %bb.0: 366; AVX512VLBW-NEXT: kmovd %edi, %k1 367; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 368; AVX512VLBW-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 369; AVX512VLBW-NEXT: vpsrld $31, %ymm0, %ymm0 370; AVX512VLBW-NEXT: retq 371 %1 = bitcast i8 %a0 to <8 x i1> 372 %2 = zext <8 x i1> %1 to <8 x i32> 373 ret <8 x i32> %2 374} 375 376define <16 x i16> @ext_i16_16i16(i16 %a0) { 377; SSE2-SSSE3-LABEL: ext_i16_16i16: 378; SSE2-SSSE3: # %bb.0: 379; SSE2-SSSE3-NEXT: movd %edi, %xmm0 380; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 381; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 382; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 383; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 384; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 385; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0 386; SSE2-SSSE3-NEXT: psrlw $15, %xmm0 387; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 388; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 389; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1 390; SSE2-SSSE3-NEXT: psrlw $15, %xmm1 391; SSE2-SSSE3-NEXT: retq 392; 393; AVX1-LABEL: ext_i16_16i16: 394; AVX1: # %bb.0: 395; AVX1-NEXT: vmovd %edi, %xmm0 396; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 397; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 398; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 399; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 400; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 401; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 402; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 403; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 404; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2 405; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 406; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 407; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 408; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 409; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 410; AVX1-NEXT: retq 411; 412; AVX2-LABEL: ext_i16_16i16: 413; AVX2: # %bb.0: 414; AVX2-NEXT: vmovd %edi, %xmm0 415; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 416; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 417; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 418; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 419; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 420; AVX2-NEXT: retq 421; 422; AVX512F-LABEL: ext_i16_16i16: 423; AVX512F: # %bb.0: 424; AVX512F-NEXT: kmovw %edi, %k1 425; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 426; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 427; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0 428; AVX512F-NEXT: retq 429; 430; AVX512VLBW-LABEL: ext_i16_16i16: 431; AVX512VLBW: # %bb.0: 432; AVX512VLBW-NEXT: kmovd %edi, %k0 433; AVX512VLBW-NEXT: vpmovm2w %k0, %ymm0 434; AVX512VLBW-NEXT: vpsrlw $15, %ymm0, %ymm0 435; AVX512VLBW-NEXT: retq 436 %1 = bitcast i16 %a0 to <16 x i1> 437 %2 = zext <16 x i1> %1 to <16 x i16> 438 ret <16 x i16> %2 439} 440 441define <32 x i8> @ext_i32_32i8(i32 %a0) { 442; SSE2-SSSE3-LABEL: ext_i32_32i8: 443; SSE2-SSSE3: # %bb.0: 444; SSE2-SSSE3-NEXT: movd %edi, %xmm1 445; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 446; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 447; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 448; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 449; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 450; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0 451; SSE2-SSSE3-NEXT: psrlw $7, %xmm0 452; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 453; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 454; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 455; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 456; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 457; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1 458; SSE2-SSSE3-NEXT: psrlw $7, %xmm1 459; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1 460; SSE2-SSSE3-NEXT: retq 461; 462; AVX1-LABEL: ext_i32_32i8: 463; AVX1: # %bb.0: 464; AVX1-NEXT: vmovd %edi, %xmm0 465; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 466; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 467; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 468; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 469; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 470; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 471; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 472; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 473; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 474; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 475; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 476; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 477; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 478; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 479; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 480; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 481; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 482; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0 483; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 484; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 485; AVX1-NEXT: retq 486; 487; AVX2-SLOW-LABEL: ext_i32_32i8: 488; AVX2-SLOW: # %bb.0: 489; AVX2-SLOW-NEXT: vmovd %edi, %xmm0 490; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 491; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 492; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 493; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 494; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 495; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 496; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 497; AVX2-SLOW-NEXT: vpand %ymm1, %ymm0, %ymm0 498; AVX2-SLOW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 499; AVX2-SLOW-NEXT: vpsrlw $7, %ymm0, %ymm0 500; AVX2-SLOW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 501; AVX2-SLOW-NEXT: retq 502; 503; AVX2-FAST-LABEL: ext_i32_32i8: 504; AVX2-FAST: # %bb.0: 505; AVX2-FAST-NEXT: vmovd %edi, %xmm0 506; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 507; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3] 508; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7] 509; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 510; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 511; AVX2-FAST-NEXT: vpand %ymm1, %ymm0, %ymm0 512; AVX2-FAST-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 513; AVX2-FAST-NEXT: vpsrlw $7, %ymm0, %ymm0 514; AVX2-FAST-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 515; AVX2-FAST-NEXT: retq 516; 517; AVX512F-LABEL: ext_i32_32i8: 518; AVX512F: # %bb.0: 519; AVX512F-NEXT: kmovw %edi, %k1 520; AVX512F-NEXT: shrl $16, %edi 521; AVX512F-NEXT: kmovw %edi, %k2 522; AVX512F-NEXT: movl {{.*}}(%rip), %eax 523; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} 524; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 525; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z} 526; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 527; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 528; AVX512F-NEXT: retq 529; 530; AVX512VLBW-LABEL: ext_i32_32i8: 531; AVX512VLBW: # %bb.0: 532; AVX512VLBW-NEXT: kmovd %edi, %k1 533; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} 534; AVX512VLBW-NEXT: retq 535 %1 = bitcast i32 %a0 to <32 x i1> 536 %2 = zext <32 x i1> %1 to <32 x i8> 537 ret <32 x i8> %2 538} 539 540; 541; 512-bit vectors 542; 543 544define <8 x i64> @ext_i8_8i64(i8 %a0) { 545; SSE2-SSSE3-LABEL: ext_i8_8i64: 546; SSE2-SSSE3: # %bb.0: 547; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi 548; SSE2-SSSE3-NEXT: movq %rdi, %xmm0 549; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1] 550; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 551; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1 552; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 553; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 554; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 555; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 556; SSE2-SSSE3-NEXT: psrlq $63, %xmm0 557; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 558; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2 559; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 560; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 561; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 562; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 563; SSE2-SSSE3-NEXT: psrlq $63, %xmm1 564; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32] 565; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3 566; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 567; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 568; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2] 569; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2 570; SSE2-SSSE3-NEXT: psrlq $63, %xmm2 571; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128] 572; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4 573; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 574; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2] 575; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 576; SSE2-SSSE3-NEXT: psrlq $63, %xmm3 577; SSE2-SSSE3-NEXT: retq 578; 579; AVX1-LABEL: ext_i8_8i64: 580; AVX1: # %bb.0: 581; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 582; AVX1-NEXT: vmovq %rdi, %xmm0 583; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 584; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 585; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 586; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 587; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm3 588; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 589; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 590; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3 591; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 592; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 593; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 594; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0 595; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 596; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 597; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm3 598; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 599; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3 600; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 601; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 602; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 603; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1 604; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 605; AVX1-NEXT: retq 606; 607; AVX2-LABEL: ext_i8_8i64: 608; AVX2: # %bb.0: 609; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 610; AVX2-NEXT: vmovq %rdi, %xmm0 611; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1 612; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8] 613; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 614; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0 615; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 616; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128] 617; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 618; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1 619; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1 620; AVX2-NEXT: retq 621; 622; AVX512F-LABEL: ext_i8_8i64: 623; AVX512F: # %bb.0: 624; AVX512F-NEXT: kmovw %edi, %k1 625; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 626; AVX512F-NEXT: vpsrlq $63, %zmm0, %zmm0 627; AVX512F-NEXT: retq 628; 629; AVX512VLBW-LABEL: ext_i8_8i64: 630; AVX512VLBW: # %bb.0: 631; AVX512VLBW-NEXT: kmovd %edi, %k1 632; AVX512VLBW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 633; AVX512VLBW-NEXT: vpsrlq $63, %zmm0, %zmm0 634; AVX512VLBW-NEXT: retq 635 %1 = bitcast i8 %a0 to <8 x i1> 636 %2 = zext <8 x i1> %1 to <8 x i64> 637 ret <8 x i64> %2 638} 639 640define <16 x i32> @ext_i16_16i32(i16 %a0) { 641; SSE2-SSSE3-LABEL: ext_i16_16i32: 642; SSE2-SSSE3: # %bb.0: 643; SSE2-SSSE3-NEXT: movd %edi, %xmm0 644; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 645; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 646; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0 647; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 648; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 649; SSE2-SSSE3-NEXT: psrld $31, %xmm0 650; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 651; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1 652; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 653; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 654; SSE2-SSSE3-NEXT: psrld $31, %xmm1 655; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048] 656; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 657; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 658; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2 659; SSE2-SSSE3-NEXT: psrld $31, %xmm2 660; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768] 661; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 662; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3 663; SSE2-SSSE3-NEXT: psrld $31, %xmm3 664; SSE2-SSSE3-NEXT: retq 665; 666; AVX1-LABEL: ext_i16_16i32: 667; AVX1: # %bb.0: 668; AVX1-NEXT: vmovd %edi, %xmm0 669; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 670; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 671; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 672; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 673; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm3 674; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 675; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 676; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3 677; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 678; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 679; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 680; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 681; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 682; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 683; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm3 684; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 685; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3 686; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 687; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 688; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 689; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 690; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 691; AVX1-NEXT: retq 692; 693; AVX2-LABEL: ext_i16_16i32: 694; AVX2: # %bb.0: 695; AVX2-NEXT: vmovd %edi, %xmm0 696; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1 697; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128] 698; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 699; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0 700; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 701; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768] 702; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 703; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 704; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1 705; AVX2-NEXT: retq 706; 707; AVX512F-LABEL: ext_i16_16i32: 708; AVX512F: # %bb.0: 709; AVX512F-NEXT: kmovw %edi, %k1 710; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 711; AVX512F-NEXT: vpsrld $31, %zmm0, %zmm0 712; AVX512F-NEXT: retq 713; 714; AVX512VLBW-LABEL: ext_i16_16i32: 715; AVX512VLBW: # %bb.0: 716; AVX512VLBW-NEXT: kmovd %edi, %k1 717; AVX512VLBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 718; AVX512VLBW-NEXT: vpsrld $31, %zmm0, %zmm0 719; AVX512VLBW-NEXT: retq 720 %1 = bitcast i16 %a0 to <16 x i1> 721 %2 = zext <16 x i1> %1 to <16 x i32> 722 ret <16 x i32> %2 723} 724 725define <32 x i16> @ext_i32_32i16(i32 %a0) { 726; SSE2-SSSE3-LABEL: ext_i32_32i16: 727; SSE2-SSSE3: # %bb.0: 728; SSE2-SSSE3-NEXT: movd %edi, %xmm2 729; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7] 730; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 731; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128] 732; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 733; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 734; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0 735; SSE2-SSSE3-NEXT: psrlw $15, %xmm0 736; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768] 737; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 738; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1 739; SSE2-SSSE3-NEXT: psrlw $15, %xmm1 740; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7] 741; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] 742; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 743; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 744; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2 745; SSE2-SSSE3-NEXT: psrlw $15, %xmm2 746; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 747; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3 748; SSE2-SSSE3-NEXT: psrlw $15, %xmm3 749; SSE2-SSSE3-NEXT: retq 750; 751; AVX1-LABEL: ext_i32_32i16: 752; AVX1: # %bb.0: 753; AVX1-NEXT: vmovd %edi, %xmm1 754; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7] 755; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 756; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 757; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 758; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 759; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 760; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4 761; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 762; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 763; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4 764; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 765; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 766; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 767; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 768; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0 769; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7] 770; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 771; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 772; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 773; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2 774; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 775; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2 776; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 777; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 778; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 779; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1 780; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 781; AVX1-NEXT: retq 782; 783; AVX2-LABEL: ext_i32_32i16: 784; AVX2: # %bb.0: 785; AVX2-NEXT: vmovd %edi, %xmm0 786; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 787; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 788; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 789; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 790; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 791; AVX2-NEXT: shrl $16, %edi 792; AVX2-NEXT: vmovd %edi, %xmm2 793; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2 794; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2 795; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1 796; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1 797; AVX2-NEXT: retq 798; 799; AVX512F-LABEL: ext_i32_32i16: 800; AVX512F: # %bb.0: 801; AVX512F-NEXT: kmovw %edi, %k1 802; AVX512F-NEXT: shrl $16, %edi 803; AVX512F-NEXT: kmovw %edi, %k2 804; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 805; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 806; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0 807; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} 808; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 809; AVX512F-NEXT: vpsrlw $15, %ymm1, %ymm1 810; AVX512F-NEXT: retq 811; 812; AVX512VLBW-LABEL: ext_i32_32i16: 813; AVX512VLBW: # %bb.0: 814; AVX512VLBW-NEXT: kmovd %edi, %k0 815; AVX512VLBW-NEXT: vpmovm2w %k0, %zmm0 816; AVX512VLBW-NEXT: vpsrlw $15, %zmm0, %zmm0 817; AVX512VLBW-NEXT: retq 818 %1 = bitcast i32 %a0 to <32 x i1> 819 %2 = zext <32 x i1> %1 to <32 x i16> 820 ret <32 x i16> %2 821} 822 823define <64 x i8> @ext_i64_64i8(i64 %a0) { 824; SSE2-SSSE3-LABEL: ext_i64_64i8: 825; SSE2-SSSE3: # %bb.0: 826; SSE2-SSSE3-NEXT: movq %rdi, %xmm3 827; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 828; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7] 829; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 830; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 831; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 832; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0 833; SSE2-SSSE3-NEXT: psrlw $7, %xmm0 834; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 835; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0 836; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7] 837; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 838; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 839; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1 840; SSE2-SSSE3-NEXT: psrlw $7, %xmm1 841; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 842; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5] 843; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] 844; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 845; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2 846; SSE2-SSSE3-NEXT: psrlw $7, %xmm2 847; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2 848; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7] 849; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 850; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 851; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3 852; SSE2-SSSE3-NEXT: psrlw $7, %xmm3 853; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 854; SSE2-SSSE3-NEXT: retq 855; 856; AVX1-LABEL: ext_i64_64i8: 857; AVX1: # %bb.0: 858; AVX1-NEXT: vmovq %rdi, %xmm0 859; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 860; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 861; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 862; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7] 863; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] 864; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 865; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 866; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 867; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 868; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 869; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3 870; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 871; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 872; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3 873; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 874; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 875; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0 876; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 877; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0 878; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0 879; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 880; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5] 881; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 882; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7] 883; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 884; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 885; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 886; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 887; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2 888; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 889; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm2 890; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2 891; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1 892; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 893; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 894; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1 895; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 896; AVX1-NEXT: retq 897; 898; AVX2-SLOW-LABEL: ext_i64_64i8: 899; AVX2-SLOW: # %bb.0: 900; AVX2-SLOW-NEXT: vmovq %rdi, %xmm0 901; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 902; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 903; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 904; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7] 905; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] 906; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 907; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 908; AVX2-SLOW-NEXT: vpand %ymm2, %ymm0, %ymm0 909; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 910; AVX2-SLOW-NEXT: vpsrlw $7, %ymm0, %ymm0 911; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 912; AVX2-SLOW-NEXT: vpand %ymm3, %ymm0, %ymm0 913; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5] 914; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3] 915; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7] 916; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 917; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1 918; AVX2-SLOW-NEXT: vpand %ymm2, %ymm1, %ymm1 919; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 920; AVX2-SLOW-NEXT: vpsrlw $7, %ymm1, %ymm1 921; AVX2-SLOW-NEXT: vpand %ymm3, %ymm1, %ymm1 922; AVX2-SLOW-NEXT: retq 923; 924; AVX2-FAST-LABEL: ext_i64_64i8: 925; AVX2-FAST: # %bb.0: 926; AVX2-FAST-NEXT: vmovq %rdi, %xmm0 927; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 928; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3] 929; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7] 930; AVX2-FAST-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 931; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 932; AVX2-FAST-NEXT: vpand %ymm2, %ymm0, %ymm0 933; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 934; AVX2-FAST-NEXT: vpsrlw $7, %ymm0, %ymm0 935; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 936; AVX2-FAST-NEXT: vpand %ymm3, %ymm0, %ymm0 937; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11] 938; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15] 939; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1 940; AVX2-FAST-NEXT: vpand %ymm2, %ymm1, %ymm1 941; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 942; AVX2-FAST-NEXT: vpsrlw $7, %ymm1, %ymm1 943; AVX2-FAST-NEXT: vpand %ymm3, %ymm1, %ymm1 944; AVX2-FAST-NEXT: retq 945; 946; AVX512F-LABEL: ext_i64_64i8: 947; AVX512F: # %bb.0: 948; AVX512F-NEXT: movq %rdi, %rax 949; AVX512F-NEXT: movq %rdi, %rcx 950; AVX512F-NEXT: kmovw %edi, %k1 951; AVX512F-NEXT: movl %edi, %edx 952; AVX512F-NEXT: shrl $16, %edx 953; AVX512F-NEXT: shrq $32, %rax 954; AVX512F-NEXT: shrq $48, %rcx 955; AVX512F-NEXT: kmovw %ecx, %k2 956; AVX512F-NEXT: kmovw %eax, %k3 957; AVX512F-NEXT: kmovw %edx, %k4 958; AVX512F-NEXT: movl {{.*}}(%rip), %eax 959; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} 960; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 961; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k4} {z} 962; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 963; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 964; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k3} {z} 965; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 966; AVX512F-NEXT: vpbroadcastd %eax, %zmm2 {%k2} {z} 967; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 968; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 969; AVX512F-NEXT: retq 970; 971; AVX512VLBW-LABEL: ext_i64_64i8: 972; AVX512VLBW: # %bb.0: 973; AVX512VLBW-NEXT: kmovq %rdi, %k1 974; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} 975; AVX512VLBW-NEXT: retq 976 %1 = bitcast i64 %a0 to <64 x i1> 977 %2 = zext <64 x i1> %1 to <64 x i8> 978 ret <64 x i8> %2 979} 980