1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=SKX 4 5define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 6; KNL-LABEL: zext_8x8mem_to_8x16: 7; KNL: # %bb.0: 8; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 9; KNL-NEXT: vpsllw $15, %xmm0, %xmm0 10; KNL-NEXT: vpsraw $15, %xmm0, %xmm0 11; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 12; KNL-NEXT: retq 13; 14; SKX-LABEL: zext_8x8mem_to_8x16: 15; SKX: # %bb.0: 16; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 17; SKX-NEXT: vpmovw2m %xmm0, %k1 18; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 19; SKX-NEXT: retq 20 %a = load <8 x i8>,<8 x i8> *%i,align 1 21 %x = zext <8 x i8> %a to <8 x i16> 22 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 23 ret <8 x i16> %ret 24} 25 26define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 27; KNL-LABEL: sext_8x8mem_to_8x16: 28; KNL: # %bb.0: 29; KNL-NEXT: vpmovsxbw (%rdi), %xmm1 30; KNL-NEXT: vpsllw $15, %xmm0, %xmm0 31; KNL-NEXT: vpsraw $15, %xmm0, %xmm0 32; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 33; KNL-NEXT: retq 34; 35; SKX-LABEL: sext_8x8mem_to_8x16: 36; SKX: # %bb.0: 37; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 38; SKX-NEXT: vpmovw2m %xmm0, %k1 39; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} 40; SKX-NEXT: retq 41 %a = load <8 x i8>,<8 x i8> *%i,align 1 42 %x = sext <8 x i8> %a to <8 x i16> 43 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 44 ret <8 x i16> %ret 45} 46 47 48define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 49; KNL-LABEL: zext_16x8mem_to_16x16: 50; KNL: # %bb.0: 51; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 52; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 53; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 54; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 55; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 56; KNL-NEXT: retq 57; 58; SKX-LABEL: zext_16x8mem_to_16x16: 59; SKX: # %bb.0: 60; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 61; SKX-NEXT: vpmovb2m %xmm0, %k1 62; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 63; SKX-NEXT: retq 64 %a = load <16 x i8>,<16 x i8> *%i,align 1 65 %x = zext <16 x i8> %a to <16 x i16> 66 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 67 ret <16 x i16> %ret 68} 69 70define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 71; KNL-LABEL: sext_16x8mem_to_16x16: 72; KNL: # %bb.0: 73; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 74; KNL-NEXT: vpmovsxbw (%rdi), %ymm1 75; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 76; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 77; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 78; KNL-NEXT: retq 79; 80; SKX-LABEL: sext_16x8mem_to_16x16: 81; SKX: # %bb.0: 82; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 83; SKX-NEXT: vpmovb2m %xmm0, %k1 84; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} 85; SKX-NEXT: retq 86 %a = load <16 x i8>,<16 x i8> *%i,align 1 87 %x = sext <16 x i8> %a to <16 x i16> 88 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 89 ret <16 x i16> %ret 90} 91 92define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 93; ALL-LABEL: zext_16x8_to_16x16: 94; ALL: # %bb.0: 95; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 96; ALL-NEXT: retq 97 %x = zext <16 x i8> %a to <16 x i16> 98 ret <16 x i16> %x 99} 100 101define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 102; KNL-LABEL: zext_16x8_to_16x16_mask: 103; KNL: # %bb.0: 104; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 105; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 106; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 107; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 108; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 109; KNL-NEXT: retq 110; 111; SKX-LABEL: zext_16x8_to_16x16_mask: 112; SKX: # %bb.0: 113; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 114; SKX-NEXT: vpmovb2m %xmm1, %k1 115; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 116; SKX-NEXT: retq 117 %x = zext <16 x i8> %a to <16 x i16> 118 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 119 ret <16 x i16> %ret 120} 121 122define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 123; ALL-LABEL: sext_16x8_to_16x16: 124; ALL: # %bb.0: 125; ALL-NEXT: vpmovsxbw %xmm0, %ymm0 126; ALL-NEXT: retq 127 %x = sext <16 x i8> %a to <16 x i16> 128 ret <16 x i16> %x 129} 130 131define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 132; KNL-LABEL: sext_16x8_to_16x16_mask: 133; KNL: # %bb.0: 134; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 135; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 136; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 137; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 138; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 139; KNL-NEXT: retq 140; 141; SKX-LABEL: sext_16x8_to_16x16_mask: 142; SKX: # %bb.0: 143; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 144; SKX-NEXT: vpmovb2m %xmm1, %k1 145; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} 146; SKX-NEXT: retq 147 %x = sext <16 x i8> %a to <16 x i16> 148 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 149 ret <16 x i16> %ret 150} 151 152define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { 153; KNL-LABEL: zext_32x8mem_to_32x16: 154; KNL: # %bb.0: 155; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 156; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 157; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 158; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 159; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 160; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 161; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 162; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0 163; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 164; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 165; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 166; KNL-NEXT: retq 167; 168; SKX-LABEL: zext_32x8mem_to_32x16: 169; SKX: # %bb.0: 170; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 171; SKX-NEXT: vpmovb2m %ymm0, %k1 172; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero 173; SKX-NEXT: retq 174 %a = load <32 x i8>,<32 x i8> *%i,align 1 175 %x = zext <32 x i8> %a to <32 x i16> 176 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 177 ret <32 x i16> %ret 178} 179 180define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { 181; KNL-LABEL: sext_32x8mem_to_32x16: 182; KNL: # %bb.0: 183; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 184; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 185; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 186; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2 187; KNL-NEXT: vpmovsxbw (%rdi), %ymm3 188; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 189; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 190; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0 191; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 192; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 193; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 194; KNL-NEXT: retq 195; 196; SKX-LABEL: sext_32x8mem_to_32x16: 197; SKX: # %bb.0: 198; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 199; SKX-NEXT: vpmovb2m %ymm0, %k1 200; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} 201; SKX-NEXT: retq 202 %a = load <32 x i8>,<32 x i8> *%i,align 1 203 %x = sext <32 x i8> %a to <32 x i16> 204 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 205 ret <32 x i16> %ret 206} 207 208define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 209; KNL-LABEL: zext_32x8_to_32x16: 210; KNL: # %bb.0: 211; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 212; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 213; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 214; KNL-NEXT: vmovdqa %ymm2, %ymm0 215; KNL-NEXT: retq 216; 217; SKX-LABEL: zext_32x8_to_32x16: 218; SKX: # %bb.0: 219; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 220; SKX-NEXT: retq 221 %x = zext <32 x i8> %a to <32 x i16> 222 ret <32 x i16> %x 223} 224 225define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 226; KNL-LABEL: zext_32x8_to_32x16_mask: 227; KNL: # %bb.0: 228; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 229; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 230; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 231; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3 232; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero 233; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 234; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 235; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 236; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 237; KNL-NEXT: vpsllw $15, %ymm2, %ymm1 238; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 239; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1 240; KNL-NEXT: retq 241; 242; SKX-LABEL: zext_32x8_to_32x16_mask: 243; SKX: # %bb.0: 244; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 245; SKX-NEXT: vpmovb2m %ymm1, %k1 246; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 247; SKX-NEXT: retq 248 %x = zext <32 x i8> %a to <32 x i16> 249 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 250 ret <32 x i16> %ret 251} 252 253define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 254; KNL-LABEL: sext_32x8_to_32x16: 255; KNL: # %bb.0: 256; KNL-NEXT: vpmovsxbw %xmm0, %ymm2 257; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 258; KNL-NEXT: vpmovsxbw %xmm0, %ymm1 259; KNL-NEXT: vmovdqa %ymm2, %ymm0 260; KNL-NEXT: retq 261; 262; SKX-LABEL: sext_32x8_to_32x16: 263; SKX: # %bb.0: 264; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 265; SKX-NEXT: retq 266 %x = sext <32 x i8> %a to <32 x i16> 267 ret <32 x i16> %x 268} 269 270define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 271; KNL-LABEL: sext_32x8_to_32x16_mask: 272; KNL: # %bb.0: 273; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 274; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 275; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 276; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3 277; KNL-NEXT: vpmovsxbw %xmm3, %ymm3 278; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 279; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 280; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 281; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 282; KNL-NEXT: vpsllw $15, %ymm2, %ymm1 283; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 284; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1 285; KNL-NEXT: retq 286; 287; SKX-LABEL: sext_32x8_to_32x16_mask: 288; SKX: # %bb.0: 289; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 290; SKX-NEXT: vpmovb2m %ymm1, %k1 291; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} 292; SKX-NEXT: retq 293 %x = sext <32 x i8> %a to <32 x i16> 294 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 295 ret <32 x i16> %ret 296} 297 298define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 299; KNL-LABEL: zext_4x8mem_to_4x32: 300; KNL: # %bb.0: 301; KNL-NEXT: vpslld $31, %xmm0, %xmm0 302; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 303; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 304; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 305; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 306; KNL-NEXT: vzeroupper 307; KNL-NEXT: retq 308; 309; SKX-LABEL: zext_4x8mem_to_4x32: 310; SKX: # %bb.0: 311; SKX-NEXT: vpslld $31, %xmm0, %xmm0 312; SKX-NEXT: vpmovd2m %xmm0, %k1 313; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 314; SKX-NEXT: retq 315 %a = load <4 x i8>,<4 x i8> *%i,align 1 316 %x = zext <4 x i8> %a to <4 x i32> 317 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 318 ret <4 x i32> %ret 319} 320 321define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 322; KNL-LABEL: sext_4x8mem_to_4x32: 323; KNL: # %bb.0: 324; KNL-NEXT: vpslld $31, %xmm0, %xmm0 325; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 326; KNL-NEXT: vpmovsxbd (%rdi), %xmm0 327; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 328; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 329; KNL-NEXT: vzeroupper 330; KNL-NEXT: retq 331; 332; SKX-LABEL: sext_4x8mem_to_4x32: 333; SKX: # %bb.0: 334; SKX-NEXT: vpslld $31, %xmm0, %xmm0 335; SKX-NEXT: vpmovd2m %xmm0, %k1 336; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} 337; SKX-NEXT: retq 338 %a = load <4 x i8>,<4 x i8> *%i,align 1 339 %x = sext <4 x i8> %a to <4 x i32> 340 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 341 ret <4 x i32> %ret 342} 343 344define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 345; KNL-LABEL: zext_8x8mem_to_8x32: 346; KNL: # %bb.0: 347; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 348; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 349; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 350; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 351; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 352; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 353; KNL-NEXT: retq 354; 355; SKX-LABEL: zext_8x8mem_to_8x32: 356; SKX: # %bb.0: 357; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 358; SKX-NEXT: vpmovw2m %xmm0, %k1 359; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 360; SKX-NEXT: retq 361 %a = load <8 x i8>,<8 x i8> *%i,align 1 362 %x = zext <8 x i8> %a to <8 x i32> 363 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 364 ret <8 x i32> %ret 365} 366 367define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 368; KNL-LABEL: sext_8x8mem_to_8x32: 369; KNL: # %bb.0: 370; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 371; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 372; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 373; KNL-NEXT: vpmovsxbd (%rdi), %ymm0 374; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 375; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 376; KNL-NEXT: retq 377; 378; SKX-LABEL: sext_8x8mem_to_8x32: 379; SKX: # %bb.0: 380; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 381; SKX-NEXT: vpmovw2m %xmm0, %k1 382; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} 383; SKX-NEXT: retq 384 %a = load <8 x i8>,<8 x i8> *%i,align 1 385 %x = sext <8 x i8> %a to <8 x i32> 386 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 387 ret <8 x i32> %ret 388} 389 390define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 391; KNL-LABEL: zext_16x8mem_to_16x32: 392; KNL: # %bb.0: 393; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 394; KNL-NEXT: vpslld $31, %zmm0, %zmm0 395; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 396; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero 397; KNL-NEXT: retq 398; 399; SKX-LABEL: zext_16x8mem_to_16x32: 400; SKX: # %bb.0: 401; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 402; SKX-NEXT: vpmovb2m %xmm0, %k1 403; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero 404; SKX-NEXT: retq 405 %a = load <16 x i8>,<16 x i8> *%i,align 1 406 %x = zext <16 x i8> %a to <16 x i32> 407 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 408 ret <16 x i32> %ret 409} 410 411define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 412; KNL-LABEL: sext_16x8mem_to_16x32: 413; KNL: # %bb.0: 414; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 415; KNL-NEXT: vpslld $31, %zmm0, %zmm0 416; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 417; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} 418; KNL-NEXT: retq 419; 420; SKX-LABEL: sext_16x8mem_to_16x32: 421; SKX: # %bb.0: 422; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 423; SKX-NEXT: vpmovb2m %xmm0, %k1 424; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} 425; SKX-NEXT: retq 426 %a = load <16 x i8>,<16 x i8> *%i,align 1 427 %x = sext <16 x i8> %a to <16 x i32> 428 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 429 ret <16 x i32> %ret 430} 431 432define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 433; KNL-LABEL: zext_16x8_to_16x32_mask: 434; KNL: # %bb.0: 435; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 436; KNL-NEXT: vpslld $31, %zmm1, %zmm1 437; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 438; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 439; KNL-NEXT: retq 440; 441; SKX-LABEL: zext_16x8_to_16x32_mask: 442; SKX: # %bb.0: 443; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 444; SKX-NEXT: vpmovb2m %xmm1, %k1 445; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 446; SKX-NEXT: retq 447 %x = zext <16 x i8> %a to <16 x i32> 448 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 449 ret <16 x i32> %ret 450} 451 452define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 453; KNL-LABEL: sext_16x8_to_16x32_mask: 454; KNL: # %bb.0: 455; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 456; KNL-NEXT: vpslld $31, %zmm1, %zmm1 457; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 458; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} 459; KNL-NEXT: retq 460; 461; SKX-LABEL: sext_16x8_to_16x32_mask: 462; SKX: # %bb.0: 463; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 464; SKX-NEXT: vpmovb2m %xmm1, %k1 465; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} 466; SKX-NEXT: retq 467 %x = sext <16 x i8> %a to <16 x i32> 468 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 469 ret <16 x i32> %ret 470} 471 472define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 473; ALL-LABEL: zext_16x8_to_16x32: 474; ALL: # %bb.0: 475; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 476; ALL-NEXT: retq 477 %x = zext <16 x i8> %i to <16 x i32> 478 ret <16 x i32> %x 479} 480 481define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 482; ALL-LABEL: sext_16x8_to_16x32: 483; ALL: # %bb.0: 484; ALL-NEXT: vpmovsxbd %xmm0, %zmm0 485; ALL-NEXT: retq 486 %x = sext <16 x i8> %i to <16 x i32> 487 ret <16 x i32> %x 488} 489 490define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { 491; KNL-LABEL: zext_2x8mem_to_2x64: 492; KNL: # %bb.0: 493; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 494; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 495; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 496; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 497; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 498; KNL-NEXT: vzeroupper 499; KNL-NEXT: retq 500; 501; SKX-LABEL: zext_2x8mem_to_2x64: 502; SKX: # %bb.0: 503; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 504; SKX-NEXT: vpmovq2m %xmm0, %k1 505; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 506; SKX-NEXT: retq 507 %a = load <2 x i8>,<2 x i8> *%i,align 1 508 %x = zext <2 x i8> %a to <2 x i64> 509 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 510 ret <2 x i64> %ret 511} 512define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { 513; KNL-LABEL: sext_2x8mem_to_2x64mask: 514; KNL: # %bb.0: 515; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 516; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 517; KNL-NEXT: vpmovsxbq (%rdi), %xmm0 518; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 519; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 520; KNL-NEXT: vzeroupper 521; KNL-NEXT: retq 522; 523; SKX-LABEL: sext_2x8mem_to_2x64mask: 524; SKX: # %bb.0: 525; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 526; SKX-NEXT: vpmovq2m %xmm0, %k1 527; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} 528; SKX-NEXT: retq 529 %a = load <2 x i8>,<2 x i8> *%i,align 1 530 %x = sext <2 x i8> %a to <2 x i64> 531 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 532 ret <2 x i64> %ret 533} 534define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { 535; ALL-LABEL: sext_2x8mem_to_2x64: 536; ALL: # %bb.0: 537; ALL-NEXT: vpmovsxbq (%rdi), %xmm0 538; ALL-NEXT: retq 539 %a = load <2 x i8>,<2 x i8> *%i,align 1 540 %x = sext <2 x i8> %a to <2 x i64> 541 ret <2 x i64> %x 542} 543 544define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 545; KNL-LABEL: zext_4x8mem_to_4x64: 546; KNL: # %bb.0: 547; KNL-NEXT: vpslld $31, %xmm0, %xmm0 548; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 549; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 550; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 551; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 552; KNL-NEXT: retq 553; 554; SKX-LABEL: zext_4x8mem_to_4x64: 555; SKX: # %bb.0: 556; SKX-NEXT: vpslld $31, %xmm0, %xmm0 557; SKX-NEXT: vpmovd2m %xmm0, %k1 558; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 559; SKX-NEXT: retq 560 %a = load <4 x i8>,<4 x i8> *%i,align 1 561 %x = zext <4 x i8> %a to <4 x i64> 562 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 563 ret <4 x i64> %ret 564} 565 566define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 567; KNL-LABEL: sext_4x8mem_to_4x64mask: 568; KNL: # %bb.0: 569; KNL-NEXT: vpslld $31, %xmm0, %xmm0 570; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 571; KNL-NEXT: vpmovsxbq (%rdi), %ymm0 572; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 573; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 574; KNL-NEXT: retq 575; 576; SKX-LABEL: sext_4x8mem_to_4x64mask: 577; SKX: # %bb.0: 578; SKX-NEXT: vpslld $31, %xmm0, %xmm0 579; SKX-NEXT: vpmovd2m %xmm0, %k1 580; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} 581; SKX-NEXT: retq 582 %a = load <4 x i8>,<4 x i8> *%i,align 1 583 %x = sext <4 x i8> %a to <4 x i64> 584 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 585 ret <4 x i64> %ret 586} 587 588define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { 589; ALL-LABEL: sext_4x8mem_to_4x64: 590; ALL: # %bb.0: 591; ALL-NEXT: vpmovsxbq (%rdi), %ymm0 592; ALL-NEXT: retq 593 %a = load <4 x i8>,<4 x i8> *%i,align 1 594 %x = sext <4 x i8> %a to <4 x i64> 595 ret <4 x i64> %x 596} 597 598define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 599; KNL-LABEL: zext_8x8mem_to_8x64: 600; KNL: # %bb.0: 601; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 602; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 603; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 604; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 605; KNL-NEXT: retq 606; 607; SKX-LABEL: zext_8x8mem_to_8x64: 608; SKX: # %bb.0: 609; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 610; SKX-NEXT: vpmovw2m %xmm0, %k1 611; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 612; SKX-NEXT: retq 613 %a = load <8 x i8>,<8 x i8> *%i,align 1 614 %x = zext <8 x i8> %a to <8 x i64> 615 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 616 ret <8 x i64> %ret 617} 618 619define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 620; KNL-LABEL: sext_8x8mem_to_8x64mask: 621; KNL: # %bb.0: 622; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 623; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 624; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 625; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} 626; KNL-NEXT: retq 627; 628; SKX-LABEL: sext_8x8mem_to_8x64mask: 629; SKX: # %bb.0: 630; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 631; SKX-NEXT: vpmovw2m %xmm0, %k1 632; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} 633; SKX-NEXT: retq 634 %a = load <8 x i8>,<8 x i8> *%i,align 1 635 %x = sext <8 x i8> %a to <8 x i64> 636 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 637 ret <8 x i64> %ret 638} 639 640define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { 641; ALL-LABEL: sext_8x8mem_to_8x64: 642; ALL: # %bb.0: 643; ALL-NEXT: vpmovsxbq (%rdi), %zmm0 644; ALL-NEXT: retq 645 %a = load <8 x i8>,<8 x i8> *%i,align 1 646 %x = sext <8 x i8> %a to <8 x i64> 647 ret <8 x i64> %x 648} 649 650define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 651; KNL-LABEL: zext_4x16mem_to_4x32: 652; KNL: # %bb.0: 653; KNL-NEXT: vpslld $31, %xmm0, %xmm0 654; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 655; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 656; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 657; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 658; KNL-NEXT: vzeroupper 659; KNL-NEXT: retq 660; 661; SKX-LABEL: zext_4x16mem_to_4x32: 662; SKX: # %bb.0: 663; SKX-NEXT: vpslld $31, %xmm0, %xmm0 664; SKX-NEXT: vpmovd2m %xmm0, %k1 665; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 666; SKX-NEXT: retq 667 %a = load <4 x i16>,<4 x i16> *%i,align 1 668 %x = zext <4 x i16> %a to <4 x i32> 669 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 670 ret <4 x i32> %ret 671} 672 673define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 674; KNL-LABEL: sext_4x16mem_to_4x32mask: 675; KNL: # %bb.0: 676; KNL-NEXT: vpslld $31, %xmm0, %xmm0 677; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 678; KNL-NEXT: vpmovsxwd (%rdi), %xmm0 679; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 680; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 681; KNL-NEXT: vzeroupper 682; KNL-NEXT: retq 683; 684; SKX-LABEL: sext_4x16mem_to_4x32mask: 685; SKX: # %bb.0: 686; SKX-NEXT: vpslld $31, %xmm0, %xmm0 687; SKX-NEXT: vpmovd2m %xmm0, %k1 688; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} 689; SKX-NEXT: retq 690 %a = load <4 x i16>,<4 x i16> *%i,align 1 691 %x = sext <4 x i16> %a to <4 x i32> 692 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 693 ret <4 x i32> %ret 694} 695 696define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { 697; ALL-LABEL: sext_4x16mem_to_4x32: 698; ALL: # %bb.0: 699; ALL-NEXT: vpmovsxwd (%rdi), %xmm0 700; ALL-NEXT: retq 701 %a = load <4 x i16>,<4 x i16> *%i,align 1 702 %x = sext <4 x i16> %a to <4 x i32> 703 ret <4 x i32> %x 704} 705 706 707define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 708; KNL-LABEL: zext_8x16mem_to_8x32: 709; KNL: # %bb.0: 710; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 711; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 712; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 713; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 714; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 715; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 716; KNL-NEXT: retq 717; 718; SKX-LABEL: zext_8x16mem_to_8x32: 719; SKX: # %bb.0: 720; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 721; SKX-NEXT: vpmovw2m %xmm0, %k1 722; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 723; SKX-NEXT: retq 724 %a = load <8 x i16>,<8 x i16> *%i,align 1 725 %x = zext <8 x i16> %a to <8 x i32> 726 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 727 ret <8 x i32> %ret 728} 729 730define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 731; KNL-LABEL: sext_8x16mem_to_8x32mask: 732; KNL: # %bb.0: 733; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 734; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 735; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 736; KNL-NEXT: vpmovsxwd (%rdi), %ymm0 737; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 738; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 739; KNL-NEXT: retq 740; 741; SKX-LABEL: sext_8x16mem_to_8x32mask: 742; SKX: # %bb.0: 743; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 744; SKX-NEXT: vpmovw2m %xmm0, %k1 745; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} 746; SKX-NEXT: retq 747 %a = load <8 x i16>,<8 x i16> *%i,align 1 748 %x = sext <8 x i16> %a to <8 x i32> 749 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 750 ret <8 x i32> %ret 751} 752 753define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { 754; ALL-LABEL: sext_8x16mem_to_8x32: 755; ALL: # %bb.0: 756; ALL-NEXT: vpmovsxwd (%rdi), %ymm0 757; ALL-NEXT: retq 758 %a = load <8 x i16>,<8 x i16> *%i,align 1 759 %x = sext <8 x i16> %a to <8 x i32> 760 ret <8 x i32> %x 761} 762 763define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 764; KNL-LABEL: zext_8x16_to_8x32mask: 765; KNL: # %bb.0: 766; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 767; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 768; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 769; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 770; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 771; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 772; KNL-NEXT: retq 773; 774; SKX-LABEL: zext_8x16_to_8x32mask: 775; SKX: # %bb.0: 776; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 777; SKX-NEXT: vpmovw2m %xmm1, %k1 778; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 779; SKX-NEXT: retq 780 %x = zext <8 x i16> %a to <8 x i32> 781 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 782 ret <8 x i32> %ret 783} 784 785define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { 786; ALL-LABEL: zext_8x16_to_8x32: 787; ALL: # %bb.0: 788; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 789; ALL-NEXT: retq 790 %x = zext <8 x i16> %a to <8 x i32> 791 ret <8 x i32> %x 792} 793 794define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { 795; KNL-LABEL: zext_16x16mem_to_16x32: 796; KNL: # %bb.0: 797; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 798; KNL-NEXT: vpslld $31, %zmm0, %zmm0 799; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 800; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 801; KNL-NEXT: retq 802; 803; SKX-LABEL: zext_16x16mem_to_16x32: 804; SKX: # %bb.0: 805; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 806; SKX-NEXT: vpmovb2m %xmm0, %k1 807; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 808; SKX-NEXT: retq 809 %a = load <16 x i16>,<16 x i16> *%i,align 1 810 %x = zext <16 x i16> %a to <16 x i32> 811 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 812 ret <16 x i32> %ret 813} 814 815define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { 816; KNL-LABEL: sext_16x16mem_to_16x32mask: 817; KNL: # %bb.0: 818; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 819; KNL-NEXT: vpslld $31, %zmm0, %zmm0 820; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 821; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} 822; KNL-NEXT: retq 823; 824; SKX-LABEL: sext_16x16mem_to_16x32mask: 825; SKX: # %bb.0: 826; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 827; SKX-NEXT: vpmovb2m %xmm0, %k1 828; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} 829; SKX-NEXT: retq 830 %a = load <16 x i16>,<16 x i16> *%i,align 1 831 %x = sext <16 x i16> %a to <16 x i32> 832 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 833 ret <16 x i32> %ret 834} 835 836define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { 837; ALL-LABEL: sext_16x16mem_to_16x32: 838; ALL: # %bb.0: 839; ALL-NEXT: vpmovsxwd (%rdi), %zmm0 840; ALL-NEXT: retq 841 %a = load <16 x i16>,<16 x i16> *%i,align 1 842 %x = sext <16 x i16> %a to <16 x i32> 843 ret <16 x i32> %x 844} 845define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { 846; KNL-LABEL: zext_16x16_to_16x32mask: 847; KNL: # %bb.0: 848; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 849; KNL-NEXT: vpslld $31, %zmm1, %zmm1 850; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 851; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 852; KNL-NEXT: retq 853; 854; SKX-LABEL: zext_16x16_to_16x32mask: 855; SKX: # %bb.0: 856; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 857; SKX-NEXT: vpmovb2m %xmm1, %k1 858; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 859; SKX-NEXT: retq 860 %x = zext <16 x i16> %a to <16 x i32> 861 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 862 ret <16 x i32> %ret 863} 864 865define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { 866; ALL-LABEL: zext_16x16_to_16x32: 867; ALL: # %bb.0: 868; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 869; ALL-NEXT: retq 870 %x = zext <16 x i16> %a to <16 x i32> 871 ret <16 x i32> %x 872} 873 874define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { 875; KNL-LABEL: zext_2x16mem_to_2x64: 876; KNL: # %bb.0: 877; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 878; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 879; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 880; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 881; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 882; KNL-NEXT: vzeroupper 883; KNL-NEXT: retq 884; 885; SKX-LABEL: zext_2x16mem_to_2x64: 886; SKX: # %bb.0: 887; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 888; SKX-NEXT: vpmovq2m %xmm0, %k1 889; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero 890; SKX-NEXT: retq 891 %a = load <2 x i16>,<2 x i16> *%i,align 1 892 %x = zext <2 x i16> %a to <2 x i64> 893 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 894 ret <2 x i64> %ret 895} 896 897define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { 898; KNL-LABEL: sext_2x16mem_to_2x64mask: 899; KNL: # %bb.0: 900; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 901; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 902; KNL-NEXT: vpmovsxwq (%rdi), %xmm0 903; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 904; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 905; KNL-NEXT: vzeroupper 906; KNL-NEXT: retq 907; 908; SKX-LABEL: sext_2x16mem_to_2x64mask: 909; SKX: # %bb.0: 910; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 911; SKX-NEXT: vpmovq2m %xmm0, %k1 912; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} 913; SKX-NEXT: retq 914 %a = load <2 x i16>,<2 x i16> *%i,align 1 915 %x = sext <2 x i16> %a to <2 x i64> 916 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 917 ret <2 x i64> %ret 918} 919 920define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { 921; ALL-LABEL: sext_2x16mem_to_2x64: 922; ALL: # %bb.0: 923; ALL-NEXT: vpmovsxwq (%rdi), %xmm0 924; ALL-NEXT: retq 925 %a = load <2 x i16>,<2 x i16> *%i,align 1 926 %x = sext <2 x i16> %a to <2 x i64> 927 ret <2 x i64> %x 928} 929 930define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 931; KNL-LABEL: zext_4x16mem_to_4x64: 932; KNL: # %bb.0: 933; KNL-NEXT: vpslld $31, %xmm0, %xmm0 934; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 935; KNL-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 936; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 937; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 938; KNL-NEXT: retq 939; 940; SKX-LABEL: zext_4x16mem_to_4x64: 941; SKX: # %bb.0: 942; SKX-NEXT: vpslld $31, %xmm0, %xmm0 943; SKX-NEXT: vpmovd2m %xmm0, %k1 944; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 945; SKX-NEXT: retq 946 %a = load <4 x i16>,<4 x i16> *%i,align 1 947 %x = zext <4 x i16> %a to <4 x i64> 948 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 949 ret <4 x i64> %ret 950} 951 952define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 953; KNL-LABEL: sext_4x16mem_to_4x64mask: 954; KNL: # %bb.0: 955; KNL-NEXT: vpslld $31, %xmm0, %xmm0 956; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 957; KNL-NEXT: vpmovsxwq (%rdi), %ymm0 958; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 959; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 960; KNL-NEXT: retq 961; 962; SKX-LABEL: sext_4x16mem_to_4x64mask: 963; SKX: # %bb.0: 964; SKX-NEXT: vpslld $31, %xmm0, %xmm0 965; SKX-NEXT: vpmovd2m %xmm0, %k1 966; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} 967; SKX-NEXT: retq 968 %a = load <4 x i16>,<4 x i16> *%i,align 1 969 %x = sext <4 x i16> %a to <4 x i64> 970 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 971 ret <4 x i64> %ret 972} 973 974define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { 975; ALL-LABEL: sext_4x16mem_to_4x64: 976; ALL: # %bb.0: 977; ALL-NEXT: vpmovsxwq (%rdi), %ymm0 978; ALL-NEXT: retq 979 %a = load <4 x i16>,<4 x i16> *%i,align 1 980 %x = sext <4 x i16> %a to <4 x i64> 981 ret <4 x i64> %x 982} 983 984define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 985; KNL-LABEL: zext_8x16mem_to_8x64: 986; KNL: # %bb.0: 987; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 988; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 989; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 990; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 991; KNL-NEXT: retq 992; 993; SKX-LABEL: zext_8x16mem_to_8x64: 994; SKX: # %bb.0: 995; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 996; SKX-NEXT: vpmovw2m %xmm0, %k1 997; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 998; SKX-NEXT: retq 999 %a = load <8 x i16>,<8 x i16> *%i,align 1 1000 %x = zext <8 x i16> %a to <8 x i64> 1001 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1002 ret <8 x i64> %ret 1003} 1004 1005define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 1006; KNL-LABEL: sext_8x16mem_to_8x64mask: 1007; KNL: # %bb.0: 1008; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1009; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1010; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1011; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} 1012; KNL-NEXT: retq 1013; 1014; SKX-LABEL: sext_8x16mem_to_8x64mask: 1015; SKX: # %bb.0: 1016; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1017; SKX-NEXT: vpmovw2m %xmm0, %k1 1018; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} 1019; SKX-NEXT: retq 1020 %a = load <8 x i16>,<8 x i16> *%i,align 1 1021 %x = sext <8 x i16> %a to <8 x i64> 1022 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1023 ret <8 x i64> %ret 1024} 1025 1026define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { 1027; ALL-LABEL: sext_8x16mem_to_8x64: 1028; ALL: # %bb.0: 1029; ALL-NEXT: vpmovsxwq (%rdi), %zmm0 1030; ALL-NEXT: retq 1031 %a = load <8 x i16>,<8 x i16> *%i,align 1 1032 %x = sext <8 x i16> %a to <8 x i64> 1033 ret <8 x i64> %x 1034} 1035 1036define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 1037; KNL-LABEL: zext_8x16_to_8x64mask: 1038; KNL: # %bb.0: 1039; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 1040; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 1041; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 1042; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1043; KNL-NEXT: retq 1044; 1045; SKX-LABEL: zext_8x16_to_8x64mask: 1046; SKX: # %bb.0: 1047; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 1048; SKX-NEXT: vpmovw2m %xmm1, %k1 1049; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1050; SKX-NEXT: retq 1051 %x = zext <8 x i16> %a to <8 x i64> 1052 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1053 ret <8 x i64> %ret 1054} 1055 1056define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { 1057; ALL-LABEL: zext_8x16_to_8x64: 1058; ALL: # %bb.0: 1059; ALL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1060; ALL-NEXT: retq 1061 %ret = zext <8 x i16> %a to <8 x i64> 1062 ret <8 x i64> %ret 1063} 1064 1065define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { 1066; KNL-LABEL: zext_2x32mem_to_2x64: 1067; KNL: # %bb.0: 1068; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1069; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1070; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1071; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1072; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1073; KNL-NEXT: vzeroupper 1074; KNL-NEXT: retq 1075; 1076; SKX-LABEL: zext_2x32mem_to_2x64: 1077; SKX: # %bb.0: 1078; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 1079; SKX-NEXT: vpmovq2m %xmm0, %k1 1080; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero 1081; SKX-NEXT: retq 1082 %a = load <2 x i32>,<2 x i32> *%i,align 1 1083 %x = zext <2 x i32> %a to <2 x i64> 1084 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1085 ret <2 x i64> %ret 1086} 1087 1088define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { 1089; KNL-LABEL: sext_2x32mem_to_2x64mask: 1090; KNL: # %bb.0: 1091; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1092; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1093; KNL-NEXT: vpmovsxdq (%rdi), %xmm0 1094; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1095; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1096; KNL-NEXT: vzeroupper 1097; KNL-NEXT: retq 1098; 1099; SKX-LABEL: sext_2x32mem_to_2x64mask: 1100; SKX: # %bb.0: 1101; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 1102; SKX-NEXT: vpmovq2m %xmm0, %k1 1103; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} 1104; SKX-NEXT: retq 1105 %a = load <2 x i32>,<2 x i32> *%i,align 1 1106 %x = sext <2 x i32> %a to <2 x i64> 1107 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1108 ret <2 x i64> %ret 1109} 1110 1111define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { 1112; ALL-LABEL: sext_2x32mem_to_2x64: 1113; ALL: # %bb.0: 1114; ALL-NEXT: vpmovsxdq (%rdi), %xmm0 1115; ALL-NEXT: retq 1116 %a = load <2 x i32>,<2 x i32> *%i,align 1 1117 %x = sext <2 x i32> %a to <2 x i64> 1118 ret <2 x i64> %x 1119} 1120 1121define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { 1122; KNL-LABEL: zext_4x32mem_to_4x64: 1123; KNL: # %bb.0: 1124; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1125; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1126; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1127; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1128; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1129; KNL-NEXT: retq 1130; 1131; SKX-LABEL: zext_4x32mem_to_4x64: 1132; SKX: # %bb.0: 1133; SKX-NEXT: vpslld $31, %xmm0, %xmm0 1134; SKX-NEXT: vpmovd2m %xmm0, %k1 1135; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1136; SKX-NEXT: retq 1137 %a = load <4 x i32>,<4 x i32> *%i,align 1 1138 %x = zext <4 x i32> %a to <4 x i64> 1139 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1140 ret <4 x i64> %ret 1141} 1142 1143define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { 1144; KNL-LABEL: sext_4x32mem_to_4x64mask: 1145; KNL: # %bb.0: 1146; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1147; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1148; KNL-NEXT: vpmovsxdq (%rdi), %ymm0 1149; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1150; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1151; KNL-NEXT: retq 1152; 1153; SKX-LABEL: sext_4x32mem_to_4x64mask: 1154; SKX: # %bb.0: 1155; SKX-NEXT: vpslld $31, %xmm0, %xmm0 1156; SKX-NEXT: vpmovd2m %xmm0, %k1 1157; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} 1158; SKX-NEXT: retq 1159 %a = load <4 x i32>,<4 x i32> *%i,align 1 1160 %x = sext <4 x i32> %a to <4 x i64> 1161 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1162 ret <4 x i64> %ret 1163} 1164 1165define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { 1166; ALL-LABEL: sext_4x32mem_to_4x64: 1167; ALL: # %bb.0: 1168; ALL-NEXT: vpmovsxdq (%rdi), %ymm0 1169; ALL-NEXT: retq 1170 %a = load <4 x i32>,<4 x i32> *%i,align 1 1171 %x = sext <4 x i32> %a to <4 x i64> 1172 ret <4 x i64> %x 1173} 1174 1175define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { 1176; ALL-LABEL: sext_4x32_to_4x64: 1177; ALL: # %bb.0: 1178; ALL-NEXT: vpmovsxdq %xmm0, %ymm0 1179; ALL-NEXT: retq 1180 %x = sext <4 x i32> %a to <4 x i64> 1181 ret <4 x i64> %x 1182} 1183 1184define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { 1185; KNL-LABEL: zext_4x32_to_4x64mask: 1186; KNL: # %bb.0: 1187; KNL-NEXT: vpslld $31, %xmm1, %xmm1 1188; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 1189; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1190; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1191; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1192; KNL-NEXT: retq 1193; 1194; SKX-LABEL: zext_4x32_to_4x64mask: 1195; SKX: # %bb.0: 1196; SKX-NEXT: vpslld $31, %xmm1, %xmm1 1197; SKX-NEXT: vpmovd2m %xmm1, %k1 1198; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1199; SKX-NEXT: retq 1200 %x = zext <4 x i32> %a to <4 x i64> 1201 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1202 ret <4 x i64> %ret 1203} 1204 1205define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { 1206; KNL-LABEL: zext_8x32mem_to_8x64: 1207; KNL: # %bb.0: 1208; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1209; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1210; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1211; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1212; KNL-NEXT: retq 1213; 1214; SKX-LABEL: zext_8x32mem_to_8x64: 1215; SKX: # %bb.0: 1216; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1217; SKX-NEXT: vpmovw2m %xmm0, %k1 1218; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1219; SKX-NEXT: retq 1220 %a = load <8 x i32>,<8 x i32> *%i,align 1 1221 %x = zext <8 x i32> %a to <8 x i64> 1222 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1223 ret <8 x i64> %ret 1224} 1225 1226define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { 1227; KNL-LABEL: sext_8x32mem_to_8x64mask: 1228; KNL: # %bb.0: 1229; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1230; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1231; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1232; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} 1233; KNL-NEXT: retq 1234; 1235; SKX-LABEL: sext_8x32mem_to_8x64mask: 1236; SKX: # %bb.0: 1237; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1238; SKX-NEXT: vpmovw2m %xmm0, %k1 1239; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} 1240; SKX-NEXT: retq 1241 %a = load <8 x i32>,<8 x i32> *%i,align 1 1242 %x = sext <8 x i32> %a to <8 x i64> 1243 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1244 ret <8 x i64> %ret 1245} 1246 1247define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { 1248; ALL-LABEL: sext_8x32mem_to_8x64: 1249; ALL: # %bb.0: 1250; ALL-NEXT: vpmovsxdq (%rdi), %zmm0 1251; ALL-NEXT: retq 1252 %a = load <8 x i32>,<8 x i32> *%i,align 1 1253 %x = sext <8 x i32> %a to <8 x i64> 1254 ret <8 x i64> %x 1255} 1256 1257define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { 1258; ALL-LABEL: sext_8x32_to_8x64: 1259; ALL: # %bb.0: 1260; ALL-NEXT: vpmovsxdq %ymm0, %zmm0 1261; ALL-NEXT: retq 1262 %x = sext <8 x i32> %a to <8 x i64> 1263 ret <8 x i64> %x 1264} 1265 1266define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { 1267; KNL-LABEL: zext_8x32_to_8x64mask: 1268; KNL: # %bb.0: 1269; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 1270; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 1271; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 1272; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 1273; KNL-NEXT: retq 1274; 1275; SKX-LABEL: zext_8x32_to_8x64mask: 1276; SKX: # %bb.0: 1277; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 1278; SKX-NEXT: vpmovw2m %xmm1, %k1 1279; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 1280; SKX-NEXT: retq 1281 %x = zext <8 x i32> %a to <8 x i64> 1282 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1283 ret <8 x i64> %ret 1284} 1285define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { 1286; ALL-LABEL: fptrunc_test: 1287; ALL: # %bb.0: 1288; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0 1289; ALL-NEXT: retq 1290 %b = fptrunc <8 x double> %a to <8 x float> 1291 ret <8 x float> %b 1292} 1293 1294define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { 1295; ALL-LABEL: fpext_test: 1296; ALL: # %bb.0: 1297; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 1298; ALL-NEXT: retq 1299 %b = fpext <8 x float> %a to <8 x double> 1300 ret <8 x double> %b 1301} 1302 1303define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { 1304; KNL-LABEL: zext_16i1_to_16xi32: 1305; KNL: # %bb.0: 1306; KNL-NEXT: kmovw %edi, %k1 1307; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1308; KNL-NEXT: vpsrld $31, %zmm0, %zmm0 1309; KNL-NEXT: retq 1310; 1311; SKX-LABEL: zext_16i1_to_16xi32: 1312; SKX: # %bb.0: 1313; SKX-NEXT: kmovd %edi, %k0 1314; SKX-NEXT: vpmovm2d %k0, %zmm0 1315; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 1316; SKX-NEXT: retq 1317 %a = bitcast i16 %b to <16 x i1> 1318 %c = zext <16 x i1> %a to <16 x i32> 1319 ret <16 x i32> %c 1320} 1321 1322define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { 1323; KNL-LABEL: zext_8i1_to_8xi64: 1324; KNL: # %bb.0: 1325; KNL-NEXT: kmovw %edi, %k1 1326; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1327; KNL-NEXT: vpsrlq $63, %zmm0, %zmm0 1328; KNL-NEXT: retq 1329; 1330; SKX-LABEL: zext_8i1_to_8xi64: 1331; SKX: # %bb.0: 1332; SKX-NEXT: kmovd %edi, %k0 1333; SKX-NEXT: vpmovm2q %k0, %zmm0 1334; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 1335; SKX-NEXT: retq 1336 %a = bitcast i8 %b to <8 x i1> 1337 %c = zext <8 x i1> %a to <8 x i64> 1338 ret <8 x i64> %c 1339} 1340 1341define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { 1342; KNL-LABEL: trunc_16i8_to_16i1: 1343; KNL: # %bb.0: 1344; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1345; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1346; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1347; KNL-NEXT: kmovw %k0, %eax 1348; KNL-NEXT: # kill: def $ax killed $ax killed $eax 1349; KNL-NEXT: vzeroupper 1350; KNL-NEXT: retq 1351; 1352; SKX-LABEL: trunc_16i8_to_16i1: 1353; SKX: # %bb.0: 1354; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1355; SKX-NEXT: vpmovb2m %xmm0, %k0 1356; SKX-NEXT: kmovd %k0, %eax 1357; SKX-NEXT: # kill: def $ax killed $ax killed $eax 1358; SKX-NEXT: retq 1359 %mask_b = trunc <16 x i8>%a to <16 x i1> 1360 %mask = bitcast <16 x i1> %mask_b to i16 1361 ret i16 %mask 1362} 1363 1364define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { 1365; KNL-LABEL: trunc_16i32_to_16i1: 1366; KNL: # %bb.0: 1367; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1368; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1369; KNL-NEXT: kmovw %k0, %eax 1370; KNL-NEXT: # kill: def $ax killed $ax killed $eax 1371; KNL-NEXT: vzeroupper 1372; KNL-NEXT: retq 1373; 1374; SKX-LABEL: trunc_16i32_to_16i1: 1375; SKX: # %bb.0: 1376; SKX-NEXT: vpslld $31, %zmm0, %zmm0 1377; SKX-NEXT: vpmovd2m %zmm0, %k0 1378; SKX-NEXT: kmovd %k0, %eax 1379; SKX-NEXT: # kill: def $ax killed $ax killed $eax 1380; SKX-NEXT: vzeroupper 1381; SKX-NEXT: retq 1382 %mask_b = trunc <16 x i32>%a to <16 x i1> 1383 %mask = bitcast <16 x i1> %mask_b to i16 1384 ret i16 %mask 1385} 1386 1387define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { 1388; ALL-LABEL: trunc_4i32_to_4i1: 1389; ALL: # %bb.0: 1390; ALL-NEXT: vpand %xmm1, %xmm0, %xmm0 1391; ALL-NEXT: vpslld $31, %xmm0, %xmm0 1392; ALL-NEXT: vpsrad $31, %xmm0, %xmm0 1393; ALL-NEXT: retq 1394 %mask_a = trunc <4 x i32>%a to <4 x i1> 1395 %mask_b = trunc <4 x i32>%b to <4 x i1> 1396 %a_and_b = and <4 x i1>%mask_a, %mask_b 1397 %res = sext <4 x i1>%a_and_b to <4 x i32> 1398 ret <4 x i32>%res 1399} 1400 1401 1402define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { 1403; KNL-LABEL: trunc_8i16_to_8i1: 1404; KNL: # %bb.0: 1405; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1406; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1407; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 1408; KNL-NEXT: kmovw %k0, %eax 1409; KNL-NEXT: # kill: def $al killed $al killed $eax 1410; KNL-NEXT: vzeroupper 1411; KNL-NEXT: retq 1412; 1413; SKX-LABEL: trunc_8i16_to_8i1: 1414; SKX: # %bb.0: 1415; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1416; SKX-NEXT: vpmovw2m %xmm0, %k0 1417; SKX-NEXT: kmovd %k0, %eax 1418; SKX-NEXT: # kill: def $al killed $al killed $eax 1419; SKX-NEXT: retq 1420 %mask_b = trunc <8 x i16>%a to <8 x i1> 1421 %mask = bitcast <8 x i1> %mask_b to i8 1422 ret i8 %mask 1423} 1424 1425define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1426; KNL-LABEL: sext_8i1_8i32: 1427; KNL: # %bb.0: 1428; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1429; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1430; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1431; KNL-NEXT: retq 1432; 1433; SKX-LABEL: sext_8i1_8i32: 1434; SKX: # %bb.0: 1435; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1436; SKX-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 1437; SKX-NEXT: retq 1438 %x = icmp slt <8 x i32> %a1, %a2 1439 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 1440 %y = sext <8 x i1> %x1 to <8 x i32> 1441 ret <8 x i32> %y 1442} 1443 1444 1445define i16 @trunc_i32_to_i1(i32 %a) { 1446; KNL-LABEL: trunc_i32_to_i1: 1447; KNL: # %bb.0: 1448; KNL-NEXT: movw $-4, %ax 1449; KNL-NEXT: kmovw %eax, %k0 1450; KNL-NEXT: kshiftrw $1, %k0, %k0 1451; KNL-NEXT: kshiftlw $1, %k0, %k0 1452; KNL-NEXT: andl $1, %edi 1453; KNL-NEXT: kmovw %edi, %k1 1454; KNL-NEXT: korw %k1, %k0, %k0 1455; KNL-NEXT: kmovw %k0, %eax 1456; KNL-NEXT: # kill: def $ax killed $ax killed $eax 1457; KNL-NEXT: retq 1458; 1459; SKX-LABEL: trunc_i32_to_i1: 1460; SKX: # %bb.0: 1461; SKX-NEXT: movw $-4, %ax 1462; SKX-NEXT: kmovd %eax, %k0 1463; SKX-NEXT: kshiftrw $1, %k0, %k0 1464; SKX-NEXT: kshiftlw $1, %k0, %k0 1465; SKX-NEXT: andl $1, %edi 1466; SKX-NEXT: kmovw %edi, %k1 1467; SKX-NEXT: korw %k1, %k0, %k0 1468; SKX-NEXT: kmovd %k0, %eax 1469; SKX-NEXT: # kill: def $ax killed $ax killed $eax 1470; SKX-NEXT: retq 1471 %a_i = trunc i32 %a to i1 1472 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0 1473 %res = bitcast <16 x i1> %maskv to i16 1474 ret i16 %res 1475} 1476 1477define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1478; KNL-LABEL: sext_8i1_8i16: 1479; KNL: # %bb.0: 1480; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1481; KNL-NEXT: vpmovdw %zmm0, %ymm0 1482; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1483; KNL-NEXT: vzeroupper 1484; KNL-NEXT: retq 1485; 1486; SKX-LABEL: sext_8i1_8i16: 1487; SKX: # %bb.0: 1488; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 1489; SKX-NEXT: vpmovm2w %k0, %xmm0 1490; SKX-NEXT: vzeroupper 1491; SKX-NEXT: retq 1492 %x = icmp slt <8 x i32> %a1, %a2 1493 %y = sext <8 x i1> %x to <8 x i16> 1494 ret <8 x i16> %y 1495} 1496 1497define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { 1498; KNL-LABEL: sext_16i1_16i32: 1499; KNL: # %bb.0: 1500; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 1501; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1502; KNL-NEXT: retq 1503; 1504; SKX-LABEL: sext_16i1_16i32: 1505; SKX: # %bb.0: 1506; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 1507; SKX-NEXT: vpmovm2d %k0, %zmm0 1508; SKX-NEXT: retq 1509 %x = icmp slt <16 x i32> %a1, %a2 1510 %y = sext <16 x i1> %x to <16 x i32> 1511 ret <16 x i32> %y 1512} 1513 1514define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1515; KNL-LABEL: sext_8i1_8i64: 1516; KNL: # %bb.0: 1517; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1518; KNL-NEXT: vpmovsxdq %ymm0, %zmm0 1519; KNL-NEXT: retq 1520; 1521; SKX-LABEL: sext_8i1_8i64: 1522; SKX: # %bb.0: 1523; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 1524; SKX-NEXT: vpmovm2q %k0, %zmm0 1525; SKX-NEXT: retq 1526 %x = icmp slt <8 x i32> %a1, %a2 1527 %y = sext <8 x i1> %x to <8 x i64> 1528 ret <8 x i64> %y 1529} 1530 1531define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { 1532; ALL-LABEL: extload_v8i64: 1533; ALL: # %bb.0: 1534; ALL-NEXT: vpmovsxbq (%rdi), %zmm0 1535; ALL-NEXT: vmovdqa64 %zmm0, (%rsi) 1536; ALL-NEXT: vzeroupper 1537; ALL-NEXT: retq 1538 %sign_load = load <8 x i8>, <8 x i8>* %a 1539 %c = sext <8 x i8> %sign_load to <8 x i64> 1540 store <8 x i64> %c, <8 x i64>* %res 1541 ret void 1542} 1543 1544define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { 1545; KNL-LABEL: test21: 1546; KNL: # %bb.0: 1547; KNL-NEXT: vpmovzxbw {{.*#+}} ymm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero 1548; KNL-NEXT: vpmovzxbw {{.*#+}} ymm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero 1549; KNL-NEXT: vpmovzxbw {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero 1550; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero 1551; KNL-NEXT: vpsllw $15, %ymm4, %ymm4 1552; KNL-NEXT: vpsraw $15, %ymm4, %ymm4 1553; KNL-NEXT: vpand %ymm0, %ymm4, %ymm0 1554; KNL-NEXT: vpsllw $15, %ymm5, %ymm4 1555; KNL-NEXT: vpsraw $15, %ymm4, %ymm4 1556; KNL-NEXT: vpand %ymm1, %ymm4, %ymm1 1557; KNL-NEXT: vpsllw $15, %ymm6, %ymm4 1558; KNL-NEXT: vpsraw $15, %ymm4, %ymm4 1559; KNL-NEXT: vpand %ymm2, %ymm4, %ymm2 1560; KNL-NEXT: vpsllw $15, %ymm7, %ymm4 1561; KNL-NEXT: vpsraw $15, %ymm4, %ymm4 1562; KNL-NEXT: vpand %ymm3, %ymm4, %ymm3 1563; KNL-NEXT: retq 1564; 1565; SKX-LABEL: test21: 1566; SKX: # %bb.0: 1567; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 1568; SKX-NEXT: vpmovb2m %zmm2, %k1 1569; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 1570; SKX-NEXT: kshiftrq $32, %k1, %k1 1571; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} 1572; SKX-NEXT: retq 1573 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer 1574 ret <64 x i16> %ret 1575} 1576 1577define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone { 1578; ALL-LABEL: shuffle_zext_16x8_to_16x16: 1579; ALL: # %bb.0: 1580; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 1581; ALL-NEXT: retq 1582 %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> 1583 %2 = bitcast <32 x i8> %1 to <16 x i16> 1584 ret <16 x i16> %2 1585} 1586 1587define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone { 1588; KNL-LABEL: shuffle_zext_16x8_to_16x16_mask: 1589; KNL: # %bb.0: 1590; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 1591; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 1592; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 1593; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 1594; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 1595; KNL-NEXT: retq 1596; 1597; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask: 1598; SKX: # %bb.0: 1599; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 1600; SKX-NEXT: vpmovb2m %xmm1, %k1 1601; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 1602; SKX-NEXT: retq 1603 %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> 1604 %bc = bitcast <32 x i8> %x to <16 x i16> 1605 %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer 1606 ret <16 x i16> %ret 1607} 1608 1609define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) { 1610; ALL-LABEL: zext_32x8_to_16x16: 1611; ALL: # %bb.0: 1612; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 1613; ALL-NEXT: retq 1614 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32> 1615 %2 = bitcast <32 x i8> %1 to <16 x i16> 1616 ret <16 x i16> %2 1617} 1618 1619define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) { 1620; ALL-LABEL: zext_32x8_to_8x32: 1621; ALL: # %bb.0: 1622; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1623; ALL-NEXT: retq 1624 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32> 1625 %2 = bitcast <32 x i8> %1 to <8 x i32> 1626 ret <8 x i32> %2 1627} 1628 1629define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) { 1630; ALL-LABEL: zext_32x8_to_4x64: 1631; ALL: # %bb.0: 1632; ALL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1633; ALL-NEXT: retq 1634 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> 1635 %2 = bitcast <32 x i8> %1 to <4 x i64> 1636 ret <4 x i64> %2 1637} 1638 1639define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) { 1640; ALL-LABEL: zext_16x16_to_8x32: 1641; ALL: # %bb.0: 1642; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1643; ALL-NEXT: retq 1644 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16> 1645 %2 = bitcast <16 x i16> %1 to <8 x i32> 1646 ret <8 x i32> %2 1647} 1648 1649define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) { 1650; ALL-LABEL: zext_16x16_to_4x64: 1651; ALL: # %bb.0: 1652; ALL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1653; ALL-NEXT: retq 1654 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16> 1655 %2 = bitcast <16 x i16> %1 to <4 x i64> 1656 ret <4 x i64> %2 1657} 1658 1659define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) { 1660; ALL-LABEL: zext_8x32_to_4x64: 1661; ALL: # %bb.0: 1662; ALL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1663; ALL-NEXT: retq 1664 %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8> 1665 %2 = bitcast <8 x i32> %1 to <4 x i64> 1666 ret <4 x i64> %2 1667} 1668 1669define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { 1670; KNL-LABEL: zext_64xi1_to_64xi8: 1671; KNL: # %bb.0: 1672; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 1673; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1674; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0 1675; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1 1676; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 1677; KNL-NEXT: retq 1678; 1679; SKX-LABEL: zext_64xi1_to_64xi8: 1680; SKX: # %bb.0: 1681; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 1682; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} 1683; SKX-NEXT: retq 1684 %mask = icmp eq <64 x i8> %x, %y 1685 %1 = zext <64 x i1> %mask to <64 x i8> 1686 ret <64 x i8> %1 1687} 1688 1689define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { 1690; KNL-LABEL: zext_32xi1_to_32xi16: 1691; KNL: # %bb.0: 1692; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 1693; KNL-NEXT: vpsrlw $15, %ymm0, %ymm0 1694; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 1695; KNL-NEXT: vpsrlw $15, %ymm1, %ymm1 1696; KNL-NEXT: retq 1697; 1698; SKX-LABEL: zext_32xi1_to_32xi16: 1699; SKX: # %bb.0: 1700; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 1701; SKX-NEXT: vpmovm2w %k0, %zmm0 1702; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 1703; SKX-NEXT: retq 1704 %mask = icmp eq <32 x i16> %x, %y 1705 %1 = zext <32 x i1> %mask to <32 x i16> 1706 ret <32 x i16> %1 1707} 1708 1709define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { 1710; ALL-LABEL: zext_16xi1_to_16xi16: 1711; ALL: # %bb.0: 1712; ALL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 1713; ALL-NEXT: vpsrlw $15, %ymm0, %ymm0 1714; ALL-NEXT: retq 1715 %mask = icmp eq <16 x i16> %x, %y 1716 %1 = zext <16 x i1> %mask to <16 x i16> 1717 ret <16 x i16> %1 1718} 1719 1720 1721define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { 1722; KNL-LABEL: zext_32xi1_to_32xi8: 1723; KNL: # %bb.0: 1724; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 1725; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 1726; KNL-NEXT: vpmovdb %zmm0, %xmm0 1727; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 1728; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 1729; KNL-NEXT: vpmovdb %zmm1, %xmm1 1730; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1731; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1732; KNL-NEXT: retq 1733; 1734; SKX-LABEL: zext_32xi1_to_32xi8: 1735; SKX: # %bb.0: 1736; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 1737; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} 1738; SKX-NEXT: retq 1739 %mask = icmp eq <32 x i16> %x, %y 1740 %1 = zext <32 x i1> %mask to <32 x i8> 1741 ret <32 x i8> %1 1742} 1743 1744define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { 1745; ALL-LABEL: zext_4xi1_to_4x32: 1746; ALL: # %bb.0: 1747; ALL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 1748; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 1749; ALL-NEXT: vpand %xmm2, %xmm0, %xmm0 1750; ALL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1751; ALL-NEXT: vpsrld $31, %xmm0, %xmm0 1752; ALL-NEXT: retq 1753 %mask = icmp eq <4 x i8> %x, %y 1754 %1 = zext <4 x i1> %mask to <4 x i32> 1755 ret <4 x i32> %1 1756} 1757 1758define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { 1759; ALL-LABEL: zext_2xi1_to_2xi64: 1760; ALL: # %bb.0: 1761; ALL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 1762; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 1763; ALL-NEXT: vpand %xmm2, %xmm0, %xmm0 1764; ALL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 1765; ALL-NEXT: vpsrlq $63, %xmm0, %xmm0 1766; ALL-NEXT: retq 1767 %mask = icmp eq <2 x i8> %x, %y 1768 %1 = zext <2 x i1> %mask to <2 x i64> 1769 ret <2 x i64> %1 1770} 1771