1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX 4 5define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 6; KNL-LABEL: zext_8x8mem_to_8x16: 7; KNL: ## BB#0: 8; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 9; KNL-NEXT: vpsllw $15, %xmm0, %xmm0 10; KNL-NEXT: vpsraw $15, %xmm0, %xmm0 11; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 12; KNL-NEXT: retq 13; 14; SKX-LABEL: zext_8x8mem_to_8x16: 15; SKX: ## BB#0: 16; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 17; SKX-NEXT: vpmovw2m %xmm0, %k1 18; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 19; SKX-NEXT: retq 20 %a = load <8 x i8>,<8 x i8> *%i,align 1 21 %x = zext <8 x i8> %a to <8 x i16> 22 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 23 ret <8 x i16> %ret 24} 25 26define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 27; KNL-LABEL: sext_8x8mem_to_8x16: 28; KNL: ## BB#0: 29; KNL-NEXT: vpmovsxbw (%rdi), %xmm1 30; KNL-NEXT: vpsllw $15, %xmm0, %xmm0 31; KNL-NEXT: vpsraw $15, %xmm0, %xmm0 32; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 33; KNL-NEXT: retq 34; 35; SKX-LABEL: sext_8x8mem_to_8x16: 36; SKX: ## BB#0: 37; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 38; SKX-NEXT: vpmovw2m %xmm0, %k1 39; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} 40; SKX-NEXT: retq 41 %a = load <8 x i8>,<8 x i8> *%i,align 1 42 %x = sext <8 x i8> %a to <8 x i16> 43 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 44 ret <8 x i16> %ret 45} 46 47 48define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 49; KNL-LABEL: zext_16x8mem_to_16x16: 50; KNL: ## BB#0: 51; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 52; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 53; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 54; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 55; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 56; KNL-NEXT: retq 57; 58; SKX-LABEL: zext_16x8mem_to_16x16: 59; SKX: ## BB#0: 60; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 61; SKX-NEXT: vpmovb2m %xmm0, %k1 62; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 63; SKX-NEXT: retq 64 %a = load <16 x i8>,<16 x i8> *%i,align 1 65 %x = zext <16 x i8> %a to <16 x i16> 66 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 67 ret <16 x i16> %ret 68} 69 70define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 71; KNL-LABEL: sext_16x8mem_to_16x16: 72; KNL: ## BB#0: 73; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 74; KNL-NEXT: vpmovsxbw (%rdi), %ymm1 75; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 76; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 77; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 78; KNL-NEXT: retq 79; 80; SKX-LABEL: sext_16x8mem_to_16x16: 81; SKX: ## BB#0: 82; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 83; SKX-NEXT: vpmovb2m %xmm0, %k1 84; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} 85; SKX-NEXT: retq 86 %a = load <16 x i8>,<16 x i8> *%i,align 1 87 %x = sext <16 x i8> %a to <16 x i16> 88 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 89 ret <16 x i16> %ret 90} 91 92define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 93; ALL-LABEL: zext_16x8_to_16x16: 94; ALL: ## BB#0: 95; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 96; ALL-NEXT: retq 97 %x = zext <16 x i8> %a to <16 x i16> 98 ret <16 x i16> %x 99} 100 101define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 102; KNL-LABEL: zext_16x8_to_16x16_mask: 103; KNL: ## BB#0: 104; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 105; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 106; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 107; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 108; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 109; KNL-NEXT: retq 110; 111; SKX-LABEL: zext_16x8_to_16x16_mask: 112; SKX: ## BB#0: 113; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 114; SKX-NEXT: vpmovb2m %xmm1, %k1 115; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 116; SKX-NEXT: retq 117 %x = zext <16 x i8> %a to <16 x i16> 118 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 119 ret <16 x i16> %ret 120} 121 122define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 123; ALL-LABEL: sext_16x8_to_16x16: 124; ALL: ## BB#0: 125; ALL-NEXT: vpmovsxbw %xmm0, %ymm0 126; ALL-NEXT: retq 127 %x = sext <16 x i8> %a to <16 x i16> 128 ret <16 x i16> %x 129} 130 131define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 132; KNL-LABEL: sext_16x8_to_16x16_mask: 133; KNL: ## BB#0: 134; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 135; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 136; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 137; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 138; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 139; KNL-NEXT: retq 140; 141; SKX-LABEL: sext_16x8_to_16x16_mask: 142; SKX: ## BB#0: 143; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 144; SKX-NEXT: vpmovb2m %xmm1, %k1 145; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} 146; SKX-NEXT: retq 147 %x = sext <16 x i8> %a to <16 x i16> 148 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 149 ret <16 x i16> %ret 150} 151 152define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { 153; KNL-LABEL: zext_32x8mem_to_32x16: 154; KNL: ## BB#0: 155; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 156; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 157; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 158; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 159; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 160; KNL-NEXT: vpand %ymm2, %ymm3, %ymm2 161; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 162; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 163; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 164; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 165; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1 166; KNL-NEXT: vmovaps %zmm2, %zmm0 167; KNL-NEXT: retq 168; 169; SKX-LABEL: zext_32x8mem_to_32x16: 170; SKX: ## BB#0: 171; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 172; SKX-NEXT: vpmovb2m %ymm0, %k1 173; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero 174; SKX-NEXT: retq 175 %a = load <32 x i8>,<32 x i8> *%i,align 1 176 %x = zext <32 x i8> %a to <32 x i16> 177 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 178 ret <32 x i16> %ret 179} 180 181define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { 182; KNL-LABEL: sext_32x8mem_to_32x16: 183; KNL: ## BB#0: 184; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm1 185; KNL-NEXT: vpmovsxbw (%rdi), %ymm2 186; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 187; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 188; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 189; KNL-NEXT: vpand %ymm2, %ymm3, %ymm2 190; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 191; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 192; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 193; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 194; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1 195; KNL-NEXT: vmovaps %zmm2, %zmm0 196; KNL-NEXT: retq 197; 198; SKX-LABEL: sext_32x8mem_to_32x16: 199; SKX: ## BB#0: 200; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 201; SKX-NEXT: vpmovb2m %ymm0, %k1 202; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} 203; SKX-NEXT: retq 204 %a = load <32 x i8>,<32 x i8> *%i,align 1 205 %x = sext <32 x i8> %a to <32 x i16> 206 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 207 ret <32 x i16> %ret 208} 209 210define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 211; KNL-LABEL: zext_32x8_to_32x16: 212; KNL: ## BB#0: 213; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 214; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 215; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 216; KNL-NEXT: vmovaps %zmm2, %zmm0 217; KNL-NEXT: retq 218; 219; SKX-LABEL: zext_32x8_to_32x16: 220; SKX: ## BB#0: 221; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 222; SKX-NEXT: retq 223 %x = zext <32 x i8> %a to <32 x i16> 224 ret <32 x i16> %x 225} 226 227define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 228; KNL-LABEL: zext_32x8_to_32x16_mask: 229; KNL: ## BB#0: 230; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 231; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 232; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 233; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 234; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 235; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 236; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 237; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 238; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 239; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 240; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 241; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 242; KNL-NEXT: retq 243; 244; SKX-LABEL: zext_32x8_to_32x16_mask: 245; SKX: ## BB#0: 246; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 247; SKX-NEXT: vpmovb2m %ymm1, %k1 248; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 249; SKX-NEXT: retq 250 %x = zext <32 x i8> %a to <32 x i16> 251 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 252 ret <32 x i16> %ret 253} 254 255define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 256; KNL-LABEL: sext_32x8_to_32x16: 257; KNL: ## BB#0: 258; KNL-NEXT: vpmovsxbw %xmm0, %ymm2 259; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 260; KNL-NEXT: vpmovsxbw %xmm0, %ymm1 261; KNL-NEXT: vmovaps %zmm2, %zmm0 262; KNL-NEXT: retq 263; 264; SKX-LABEL: sext_32x8_to_32x16: 265; SKX: ## BB#0: 266; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 267; SKX-NEXT: retq 268 %x = sext <32 x i8> %a to <32 x i16> 269 ret <32 x i16> %x 270} 271 272define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 273; KNL-LABEL: sext_32x8_to_32x16_mask: 274; KNL: ## BB#0: 275; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 276; KNL-NEXT: vpmovsxbw %xmm2, %ymm2 277; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 278; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 279; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 280; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 281; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 282; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 283; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 284; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 285; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 286; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 287; KNL-NEXT: retq 288; 289; SKX-LABEL: sext_32x8_to_32x16_mask: 290; SKX: ## BB#0: 291; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 292; SKX-NEXT: vpmovb2m %ymm1, %k1 293; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} 294; SKX-NEXT: retq 295 %x = sext <32 x i8> %a to <32 x i16> 296 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 297 ret <32 x i16> %ret 298} 299 300define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 301; KNL-LABEL: zext_4x8mem_to_4x32: 302; KNL: ## BB#0: 303; KNL-NEXT: vpslld $31, %xmm0, %xmm0 304; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 305; KNL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 306; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 307; KNL-NEXT: retq 308; 309; SKX-LABEL: zext_4x8mem_to_4x32: 310; SKX: ## BB#0: 311; SKX-NEXT: vpslld $31, %xmm0, %xmm0 312; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 313; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 314; SKX-NEXT: retq 315 %a = load <4 x i8>,<4 x i8> *%i,align 1 316 %x = zext <4 x i8> %a to <4 x i32> 317 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 318 ret <4 x i32> %ret 319} 320 321define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 322; KNL-LABEL: sext_4x8mem_to_4x32: 323; KNL: ## BB#0: 324; KNL-NEXT: vpslld $31, %xmm0, %xmm0 325; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 326; KNL-NEXT: vpmovsxbd (%rdi), %xmm1 327; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 328; KNL-NEXT: retq 329; 330; SKX-LABEL: sext_4x8mem_to_4x32: 331; SKX: ## BB#0: 332; SKX-NEXT: vpslld $31, %xmm0, %xmm0 333; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 334; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} 335; SKX-NEXT: retq 336 %a = load <4 x i8>,<4 x i8> *%i,align 1 337 %x = sext <4 x i8> %a to <4 x i32> 338 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 339 ret <4 x i32> %ret 340} 341 342define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 343; KNL-LABEL: zext_8x8mem_to_8x32: 344; KNL: ## BB#0: 345; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 346; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 347; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 348; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 349; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 350; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 351; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 352; KNL-NEXT: retq 353; 354; SKX-LABEL: zext_8x8mem_to_8x32: 355; SKX: ## BB#0: 356; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 357; SKX-NEXT: vpmovw2m %xmm0, %k1 358; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 359; SKX-NEXT: retq 360 %a = load <8 x i8>,<8 x i8> *%i,align 1 361 %x = zext <8 x i8> %a to <8 x i32> 362 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 363 ret <8 x i32> %ret 364} 365 366define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 367; KNL-LABEL: sext_8x8mem_to_8x32: 368; KNL: ## BB#0: 369; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 370; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 371; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 372; KNL-NEXT: vpmovsxbd (%rdi), %ymm0 373; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 374; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 375; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 376; KNL-NEXT: retq 377; 378; SKX-LABEL: sext_8x8mem_to_8x32: 379; SKX: ## BB#0: 380; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 381; SKX-NEXT: vpmovw2m %xmm0, %k1 382; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} 383; SKX-NEXT: retq 384 %a = load <8 x i8>,<8 x i8> *%i,align 1 385 %x = sext <8 x i8> %a to <8 x i32> 386 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 387 ret <8 x i32> %ret 388} 389 390define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 391; KNL-LABEL: zext_16x8mem_to_16x32: 392; KNL: ## BB#0: 393; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 394; KNL-NEXT: vpslld $31, %zmm0, %zmm0 395; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 396; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero 397; KNL-NEXT: retq 398; 399; SKX-LABEL: zext_16x8mem_to_16x32: 400; SKX: ## BB#0: 401; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 402; SKX-NEXT: vpmovb2m %xmm0, %k1 403; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero 404; SKX-NEXT: retq 405 %a = load <16 x i8>,<16 x i8> *%i,align 1 406 %x = zext <16 x i8> %a to <16 x i32> 407 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 408 ret <16 x i32> %ret 409} 410 411define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 412; KNL-LABEL: sext_16x8mem_to_16x32: 413; KNL: ## BB#0: 414; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 415; KNL-NEXT: vpslld $31, %zmm0, %zmm0 416; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 417; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} 418; KNL-NEXT: retq 419; 420; SKX-LABEL: sext_16x8mem_to_16x32: 421; SKX: ## BB#0: 422; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 423; SKX-NEXT: vpmovb2m %xmm0, %k1 424; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} 425; SKX-NEXT: retq 426 %a = load <16 x i8>,<16 x i8> *%i,align 1 427 %x = sext <16 x i8> %a to <16 x i32> 428 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 429 ret <16 x i32> %ret 430} 431 432define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 433; KNL-LABEL: zext_16x8_to_16x32_mask: 434; KNL: ## BB#0: 435; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 436; KNL-NEXT: vpslld $31, %zmm1, %zmm1 437; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 438; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 439; KNL-NEXT: retq 440; 441; SKX-LABEL: zext_16x8_to_16x32_mask: 442; SKX: ## BB#0: 443; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 444; SKX-NEXT: vpmovb2m %xmm1, %k1 445; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 446; SKX-NEXT: retq 447 %x = zext <16 x i8> %a to <16 x i32> 448 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 449 ret <16 x i32> %ret 450} 451 452define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 453; KNL-LABEL: sext_16x8_to_16x32_mask: 454; KNL: ## BB#0: 455; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 456; KNL-NEXT: vpslld $31, %zmm1, %zmm1 457; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 458; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} 459; KNL-NEXT: retq 460; 461; SKX-LABEL: sext_16x8_to_16x32_mask: 462; SKX: ## BB#0: 463; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 464; SKX-NEXT: vpmovb2m %xmm1, %k1 465; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} 466; SKX-NEXT: retq 467 %x = sext <16 x i8> %a to <16 x i32> 468 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 469 ret <16 x i32> %ret 470} 471 472define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 473; ALL-LABEL: zext_16x8_to_16x32: 474; ALL: ## BB#0: 475; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 476; ALL-NEXT: retq 477 %x = zext <16 x i8> %i to <16 x i32> 478 ret <16 x i32> %x 479} 480 481define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 482; ALL-LABEL: sext_16x8_to_16x32: 483; ALL: ## BB#0: 484; ALL-NEXT: vpmovsxbd %xmm0, %zmm0 485; ALL-NEXT: retq 486 %x = sext <16 x i8> %i to <16 x i32> 487 ret <16 x i32> %x 488} 489 490define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { 491; KNL-LABEL: zext_2x8mem_to_2x64: 492; KNL: ## BB#0: 493; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 494; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 495; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 496; KNL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 497; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 498; KNL-NEXT: retq 499; 500; SKX-LABEL: zext_2x8mem_to_2x64: 501; SKX: ## BB#0: 502; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 503; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 504; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 505; SKX-NEXT: retq 506 %a = load <2 x i8>,<2 x i8> *%i,align 1 507 %x = zext <2 x i8> %a to <2 x i64> 508 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 509 ret <2 x i64> %ret 510} 511define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { 512; KNL-LABEL: sext_2x8mem_to_2x64mask: 513; KNL: ## BB#0: 514; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 515; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 516; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 517; KNL-NEXT: vpmovsxbq (%rdi), %xmm1 518; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 519; KNL-NEXT: retq 520; 521; SKX-LABEL: sext_2x8mem_to_2x64mask: 522; SKX: ## BB#0: 523; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 524; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 525; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} 526; SKX-NEXT: retq 527 %a = load <2 x i8>,<2 x i8> *%i,align 1 528 %x = sext <2 x i8> %a to <2 x i64> 529 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 530 ret <2 x i64> %ret 531} 532define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { 533; ALL-LABEL: sext_2x8mem_to_2x64: 534; ALL: ## BB#0: 535; ALL-NEXT: vpmovsxbq (%rdi), %xmm0 536; ALL-NEXT: retq 537 %a = load <2 x i8>,<2 x i8> *%i,align 1 538 %x = sext <2 x i8> %a to <2 x i64> 539 ret <2 x i64> %x 540} 541 542define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 543; KNL-LABEL: zext_4x8mem_to_4x64: 544; KNL: ## BB#0: 545; KNL-NEXT: vpslld $31, %xmm0, %xmm0 546; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 547; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 548; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 549; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 550; KNL-NEXT: retq 551; 552; SKX-LABEL: zext_4x8mem_to_4x64: 553; SKX: ## BB#0: 554; SKX-NEXT: vpslld $31, %xmm0, %xmm0 555; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 556; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 557; SKX-NEXT: retq 558 %a = load <4 x i8>,<4 x i8> *%i,align 1 559 %x = zext <4 x i8> %a to <4 x i64> 560 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 561 ret <4 x i64> %ret 562} 563 564define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 565; KNL-LABEL: sext_4x8mem_to_4x64mask: 566; KNL: ## BB#0: 567; KNL-NEXT: vpslld $31, %xmm0, %xmm0 568; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 569; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 570; KNL-NEXT: vpmovsxbq (%rdi), %ymm1 571; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 572; KNL-NEXT: retq 573; 574; SKX-LABEL: sext_4x8mem_to_4x64mask: 575; SKX: ## BB#0: 576; SKX-NEXT: vpslld $31, %xmm0, %xmm0 577; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 578; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} 579; SKX-NEXT: retq 580 %a = load <4 x i8>,<4 x i8> *%i,align 1 581 %x = sext <4 x i8> %a to <4 x i64> 582 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 583 ret <4 x i64> %ret 584} 585 586define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { 587; ALL-LABEL: sext_4x8mem_to_4x64: 588; ALL: ## BB#0: 589; ALL-NEXT: vpmovsxbq (%rdi), %ymm0 590; ALL-NEXT: retq 591 %a = load <4 x i8>,<4 x i8> *%i,align 1 592 %x = sext <4 x i8> %a to <4 x i64> 593 ret <4 x i64> %x 594} 595 596define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 597; KNL-LABEL: zext_8x8mem_to_8x64: 598; KNL: ## BB#0: 599; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 600; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 601; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 602; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 603; KNL-NEXT: retq 604; 605; SKX-LABEL: zext_8x8mem_to_8x64: 606; SKX: ## BB#0: 607; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 608; SKX-NEXT: vpmovw2m %xmm0, %k1 609; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 610; SKX-NEXT: retq 611 %a = load <8 x i8>,<8 x i8> *%i,align 1 612 %x = zext <8 x i8> %a to <8 x i64> 613 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 614 ret <8 x i64> %ret 615} 616 617define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 618; KNL-LABEL: sext_8x8mem_to_8x64mask: 619; KNL: ## BB#0: 620; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 621; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 622; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 623; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} 624; KNL-NEXT: retq 625; 626; SKX-LABEL: sext_8x8mem_to_8x64mask: 627; SKX: ## BB#0: 628; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 629; SKX-NEXT: vpmovw2m %xmm0, %k1 630; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} 631; SKX-NEXT: retq 632 %a = load <8 x i8>,<8 x i8> *%i,align 1 633 %x = sext <8 x i8> %a to <8 x i64> 634 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 635 ret <8 x i64> %ret 636} 637 638define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { 639; ALL-LABEL: sext_8x8mem_to_8x64: 640; ALL: ## BB#0: 641; ALL-NEXT: vpmovsxbq (%rdi), %zmm0 642; ALL-NEXT: retq 643 %a = load <8 x i8>,<8 x i8> *%i,align 1 644 %x = sext <8 x i8> %a to <8 x i64> 645 ret <8 x i64> %x 646} 647 648define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 649; KNL-LABEL: zext_4x16mem_to_4x32: 650; KNL: ## BB#0: 651; KNL-NEXT: vpslld $31, %xmm0, %xmm0 652; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 653; KNL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 654; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 655; KNL-NEXT: retq 656; 657; SKX-LABEL: zext_4x16mem_to_4x32: 658; SKX: ## BB#0: 659; SKX-NEXT: vpslld $31, %xmm0, %xmm0 660; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 661; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 662; SKX-NEXT: retq 663 %a = load <4 x i16>,<4 x i16> *%i,align 1 664 %x = zext <4 x i16> %a to <4 x i32> 665 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 666 ret <4 x i32> %ret 667} 668 669define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 670; KNL-LABEL: sext_4x16mem_to_4x32mask: 671; KNL: ## BB#0: 672; KNL-NEXT: vpslld $31, %xmm0, %xmm0 673; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 674; KNL-NEXT: vpmovsxwd (%rdi), %xmm1 675; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 676; KNL-NEXT: retq 677; 678; SKX-LABEL: sext_4x16mem_to_4x32mask: 679; SKX: ## BB#0: 680; SKX-NEXT: vpslld $31, %xmm0, %xmm0 681; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 682; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} 683; SKX-NEXT: retq 684 %a = load <4 x i16>,<4 x i16> *%i,align 1 685 %x = sext <4 x i16> %a to <4 x i32> 686 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 687 ret <4 x i32> %ret 688} 689 690define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { 691; ALL-LABEL: sext_4x16mem_to_4x32: 692; ALL: ## BB#0: 693; ALL-NEXT: vpmovsxwd (%rdi), %xmm0 694; ALL-NEXT: retq 695 %a = load <4 x i16>,<4 x i16> *%i,align 1 696 %x = sext <4 x i16> %a to <4 x i32> 697 ret <4 x i32> %x 698} 699 700 701define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 702; KNL-LABEL: zext_8x16mem_to_8x32: 703; KNL: ## BB#0: 704; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 705; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 706; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 707; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 708; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 709; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 710; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 711; KNL-NEXT: retq 712; 713; SKX-LABEL: zext_8x16mem_to_8x32: 714; SKX: ## BB#0: 715; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 716; SKX-NEXT: vpmovw2m %xmm0, %k1 717; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 718; SKX-NEXT: retq 719 %a = load <8 x i16>,<8 x i16> *%i,align 1 720 %x = zext <8 x i16> %a to <8 x i32> 721 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 722 ret <8 x i32> %ret 723} 724 725define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 726; KNL-LABEL: sext_8x16mem_to_8x32mask: 727; KNL: ## BB#0: 728; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 729; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 730; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 731; KNL-NEXT: vpmovsxwd (%rdi), %ymm0 732; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 733; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 734; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 735; KNL-NEXT: retq 736; 737; SKX-LABEL: sext_8x16mem_to_8x32mask: 738; SKX: ## BB#0: 739; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 740; SKX-NEXT: vpmovw2m %xmm0, %k1 741; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} 742; SKX-NEXT: retq 743 %a = load <8 x i16>,<8 x i16> *%i,align 1 744 %x = sext <8 x i16> %a to <8 x i32> 745 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 746 ret <8 x i32> %ret 747} 748 749define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { 750; ALL-LABEL: sext_8x16mem_to_8x32: 751; ALL: ## BB#0: 752; ALL-NEXT: vpmovsxwd (%rdi), %ymm0 753; ALL-NEXT: retq 754 %a = load <8 x i16>,<8 x i16> *%i,align 1 755 %x = sext <8 x i16> %a to <8 x i32> 756 ret <8 x i32> %x 757} 758 759define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 760; KNL-LABEL: zext_8x16_to_8x32mask: 761; KNL: ## BB#0: 762; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 763; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 764; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 765; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 766; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 767; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 768; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 769; KNL-NEXT: retq 770; 771; SKX-LABEL: zext_8x16_to_8x32mask: 772; SKX: ## BB#0: 773; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 774; SKX-NEXT: vpmovw2m %xmm1, %k1 775; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 776; SKX-NEXT: retq 777 %x = zext <8 x i16> %a to <8 x i32> 778 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 779 ret <8 x i32> %ret 780} 781 782define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { 783; ALL-LABEL: zext_8x16_to_8x32: 784; ALL: ## BB#0: 785; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 786; ALL-NEXT: retq 787 %x = zext <8 x i16> %a to <8 x i32> 788 ret <8 x i32> %x 789} 790 791define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { 792; KNL-LABEL: zext_16x16mem_to_16x32: 793; KNL: ## BB#0: 794; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 795; KNL-NEXT: vpslld $31, %zmm0, %zmm0 796; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 797; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 798; KNL-NEXT: retq 799; 800; SKX-LABEL: zext_16x16mem_to_16x32: 801; SKX: ## BB#0: 802; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 803; SKX-NEXT: vpmovb2m %xmm0, %k1 804; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 805; SKX-NEXT: retq 806 %a = load <16 x i16>,<16 x i16> *%i,align 1 807 %x = zext <16 x i16> %a to <16 x i32> 808 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 809 ret <16 x i32> %ret 810} 811 812define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { 813; KNL-LABEL: sext_16x16mem_to_16x32mask: 814; KNL: ## BB#0: 815; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 816; KNL-NEXT: vpslld $31, %zmm0, %zmm0 817; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 818; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} 819; KNL-NEXT: retq 820; 821; SKX-LABEL: sext_16x16mem_to_16x32mask: 822; SKX: ## BB#0: 823; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 824; SKX-NEXT: vpmovb2m %xmm0, %k1 825; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} 826; SKX-NEXT: retq 827 %a = load <16 x i16>,<16 x i16> *%i,align 1 828 %x = sext <16 x i16> %a to <16 x i32> 829 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 830 ret <16 x i32> %ret 831} 832 833define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { 834; ALL-LABEL: sext_16x16mem_to_16x32: 835; ALL: ## BB#0: 836; ALL-NEXT: vpmovsxwd (%rdi), %zmm0 837; ALL-NEXT: retq 838 %a = load <16 x i16>,<16 x i16> *%i,align 1 839 %x = sext <16 x i16> %a to <16 x i32> 840 ret <16 x i32> %x 841} 842define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { 843; KNL-LABEL: zext_16x16_to_16x32mask: 844; KNL: ## BB#0: 845; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 846; KNL-NEXT: vpslld $31, %zmm1, %zmm1 847; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 848; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 849; KNL-NEXT: retq 850; 851; SKX-LABEL: zext_16x16_to_16x32mask: 852; SKX: ## BB#0: 853; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 854; SKX-NEXT: vpmovb2m %xmm1, %k1 855; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 856; SKX-NEXT: retq 857 %x = zext <16 x i16> %a to <16 x i32> 858 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 859 ret <16 x i32> %ret 860} 861 862define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { 863; ALL-LABEL: zext_16x16_to_16x32: 864; ALL: ## BB#0: 865; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 866; ALL-NEXT: retq 867 %x = zext <16 x i16> %a to <16 x i32> 868 ret <16 x i32> %x 869} 870 871define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { 872; KNL-LABEL: zext_2x16mem_to_2x64: 873; KNL: ## BB#0: 874; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 875; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 876; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 877; KNL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 878; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 879; KNL-NEXT: retq 880; 881; SKX-LABEL: zext_2x16mem_to_2x64: 882; SKX: ## BB#0: 883; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 884; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 885; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero 886; SKX-NEXT: retq 887 %a = load <2 x i16>,<2 x i16> *%i,align 1 888 %x = zext <2 x i16> %a to <2 x i64> 889 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 890 ret <2 x i64> %ret 891} 892 893define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { 894; KNL-LABEL: sext_2x16mem_to_2x64mask: 895; KNL: ## BB#0: 896; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 897; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 898; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 899; KNL-NEXT: vpmovsxwq (%rdi), %xmm1 900; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 901; KNL-NEXT: retq 902; 903; SKX-LABEL: sext_2x16mem_to_2x64mask: 904; SKX: ## BB#0: 905; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 906; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 907; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} 908; SKX-NEXT: retq 909 %a = load <2 x i16>,<2 x i16> *%i,align 1 910 %x = sext <2 x i16> %a to <2 x i64> 911 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 912 ret <2 x i64> %ret 913} 914 915define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { 916; ALL-LABEL: sext_2x16mem_to_2x64: 917; ALL: ## BB#0: 918; ALL-NEXT: vpmovsxwq (%rdi), %xmm0 919; ALL-NEXT: retq 920 %a = load <2 x i16>,<2 x i16> *%i,align 1 921 %x = sext <2 x i16> %a to <2 x i64> 922 ret <2 x i64> %x 923} 924 925define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 926; KNL-LABEL: zext_4x16mem_to_4x64: 927; KNL: ## BB#0: 928; KNL-NEXT: vpslld $31, %xmm0, %xmm0 929; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 930; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 931; KNL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 932; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 933; KNL-NEXT: retq 934; 935; SKX-LABEL: zext_4x16mem_to_4x64: 936; SKX: ## BB#0: 937; SKX-NEXT: vpslld $31, %xmm0, %xmm0 938; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 939; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 940; SKX-NEXT: retq 941 %a = load <4 x i16>,<4 x i16> *%i,align 1 942 %x = zext <4 x i16> %a to <4 x i64> 943 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 944 ret <4 x i64> %ret 945} 946 947define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 948; KNL-LABEL: sext_4x16mem_to_4x64mask: 949; KNL: ## BB#0: 950; KNL-NEXT: vpslld $31, %xmm0, %xmm0 951; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 952; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 953; KNL-NEXT: vpmovsxwq (%rdi), %ymm1 954; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 955; KNL-NEXT: retq 956; 957; SKX-LABEL: sext_4x16mem_to_4x64mask: 958; SKX: ## BB#0: 959; SKX-NEXT: vpslld $31, %xmm0, %xmm0 960; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 961; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} 962; SKX-NEXT: retq 963 %a = load <4 x i16>,<4 x i16> *%i,align 1 964 %x = sext <4 x i16> %a to <4 x i64> 965 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 966 ret <4 x i64> %ret 967} 968 969define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { 970; ALL-LABEL: sext_4x16mem_to_4x64: 971; ALL: ## BB#0: 972; ALL-NEXT: vpmovsxwq (%rdi), %ymm0 973; ALL-NEXT: retq 974 %a = load <4 x i16>,<4 x i16> *%i,align 1 975 %x = sext <4 x i16> %a to <4 x i64> 976 ret <4 x i64> %x 977} 978 979define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 980; KNL-LABEL: zext_8x16mem_to_8x64: 981; KNL: ## BB#0: 982; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 983; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 984; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 985; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 986; KNL-NEXT: retq 987; 988; SKX-LABEL: zext_8x16mem_to_8x64: 989; SKX: ## BB#0: 990; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 991; SKX-NEXT: vpmovw2m %xmm0, %k1 992; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 993; SKX-NEXT: retq 994 %a = load <8 x i16>,<8 x i16> *%i,align 1 995 %x = zext <8 x i16> %a to <8 x i64> 996 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 997 ret <8 x i64> %ret 998} 999 1000define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 1001; KNL-LABEL: sext_8x16mem_to_8x64mask: 1002; KNL: ## BB#0: 1003; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1004; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1005; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1006; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} 1007; KNL-NEXT: retq 1008; 1009; SKX-LABEL: sext_8x16mem_to_8x64mask: 1010; SKX: ## BB#0: 1011; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1012; SKX-NEXT: vpmovw2m %xmm0, %k1 1013; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} 1014; SKX-NEXT: retq 1015 %a = load <8 x i16>,<8 x i16> *%i,align 1 1016 %x = sext <8 x i16> %a to <8 x i64> 1017 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1018 ret <8 x i64> %ret 1019} 1020 1021define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { 1022; ALL-LABEL: sext_8x16mem_to_8x64: 1023; ALL: ## BB#0: 1024; ALL-NEXT: vpmovsxwq (%rdi), %zmm0 1025; ALL-NEXT: retq 1026 %a = load <8 x i16>,<8 x i16> *%i,align 1 1027 %x = sext <8 x i16> %a to <8 x i64> 1028 ret <8 x i64> %x 1029} 1030 1031define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 1032; KNL-LABEL: zext_8x16_to_8x64mask: 1033; KNL: ## BB#0: 1034; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 1035; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 1036; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 1037; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1038; KNL-NEXT: retq 1039; 1040; SKX-LABEL: zext_8x16_to_8x64mask: 1041; SKX: ## BB#0: 1042; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 1043; SKX-NEXT: vpmovw2m %xmm1, %k1 1044; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1045; SKX-NEXT: retq 1046 %x = zext <8 x i16> %a to <8 x i64> 1047 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1048 ret <8 x i64> %ret 1049} 1050 1051define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { 1052; ALL-LABEL: zext_8x16_to_8x64: 1053; ALL: ## BB#0: 1054; ALL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1055; ALL-NEXT: retq 1056 %ret = zext <8 x i16> %a to <8 x i64> 1057 ret <8 x i64> %ret 1058} 1059 1060define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { 1061; KNL-LABEL: zext_2x32mem_to_2x64: 1062; KNL: ## BB#0: 1063; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1064; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 1065; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1066; KNL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 1067; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 1068; KNL-NEXT: retq 1069; 1070; SKX-LABEL: zext_2x32mem_to_2x64: 1071; SKX: ## BB#0: 1072; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 1073; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 1074; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero 1075; SKX-NEXT: retq 1076 %a = load <2 x i32>,<2 x i32> *%i,align 1 1077 %x = zext <2 x i32> %a to <2 x i64> 1078 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1079 ret <2 x i64> %ret 1080} 1081 1082define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { 1083; KNL-LABEL: sext_2x32mem_to_2x64mask: 1084; KNL: ## BB#0: 1085; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1086; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 1087; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1088; KNL-NEXT: vpmovsxdq (%rdi), %xmm1 1089; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 1090; KNL-NEXT: retq 1091; 1092; SKX-LABEL: sext_2x32mem_to_2x64mask: 1093; SKX: ## BB#0: 1094; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 1095; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 1096; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} 1097; SKX-NEXT: retq 1098 %a = load <2 x i32>,<2 x i32> *%i,align 1 1099 %x = sext <2 x i32> %a to <2 x i64> 1100 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1101 ret <2 x i64> %ret 1102} 1103 1104define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { 1105; ALL-LABEL: sext_2x32mem_to_2x64: 1106; ALL: ## BB#0: 1107; ALL-NEXT: vpmovsxdq (%rdi), %xmm0 1108; ALL-NEXT: retq 1109 %a = load <2 x i32>,<2 x i32> *%i,align 1 1110 %x = sext <2 x i32> %a to <2 x i64> 1111 ret <2 x i64> %x 1112} 1113 1114define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { 1115; KNL-LABEL: zext_4x32mem_to_4x64: 1116; KNL: ## BB#0: 1117; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1118; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 1119; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 1120; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1121; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 1122; KNL-NEXT: retq 1123; 1124; SKX-LABEL: zext_4x32mem_to_4x64: 1125; SKX: ## BB#0: 1126; SKX-NEXT: vpslld $31, %xmm0, %xmm0 1127; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 1128; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1129; SKX-NEXT: retq 1130 %a = load <4 x i32>,<4 x i32> *%i,align 1 1131 %x = zext <4 x i32> %a to <4 x i64> 1132 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1133 ret <4 x i64> %ret 1134} 1135 1136define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { 1137; KNL-LABEL: sext_4x32mem_to_4x64mask: 1138; KNL: ## BB#0: 1139; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1140; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 1141; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 1142; KNL-NEXT: vpmovsxdq (%rdi), %ymm1 1143; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 1144; KNL-NEXT: retq 1145; 1146; SKX-LABEL: sext_4x32mem_to_4x64mask: 1147; SKX: ## BB#0: 1148; SKX-NEXT: vpslld $31, %xmm0, %xmm0 1149; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 1150; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} 1151; SKX-NEXT: retq 1152 %a = load <4 x i32>,<4 x i32> *%i,align 1 1153 %x = sext <4 x i32> %a to <4 x i64> 1154 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1155 ret <4 x i64> %ret 1156} 1157 1158define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { 1159; ALL-LABEL: sext_4x32mem_to_4x64: 1160; ALL: ## BB#0: 1161; ALL-NEXT: vpmovsxdq (%rdi), %ymm0 1162; ALL-NEXT: retq 1163 %a = load <4 x i32>,<4 x i32> *%i,align 1 1164 %x = sext <4 x i32> %a to <4 x i64> 1165 ret <4 x i64> %x 1166} 1167 1168define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { 1169; ALL-LABEL: sext_4x32_to_4x64: 1170; ALL: ## BB#0: 1171; ALL-NEXT: vpmovsxdq %xmm0, %ymm0 1172; ALL-NEXT: retq 1173 %x = sext <4 x i32> %a to <4 x i64> 1174 ret <4 x i64> %x 1175} 1176 1177define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { 1178; KNL-LABEL: zext_4x32_to_4x64mask: 1179; KNL: ## BB#0: 1180; KNL-NEXT: vpslld $31, %xmm1, %xmm1 1181; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 1182; KNL-NEXT: vpmovsxdq %xmm1, %ymm1 1183; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1184; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 1185; KNL-NEXT: retq 1186; 1187; SKX-LABEL: zext_4x32_to_4x64mask: 1188; SKX: ## BB#0: 1189; SKX-NEXT: vpslld $31, %xmm1, %xmm1 1190; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 1191; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1192; SKX-NEXT: retq 1193 %x = zext <4 x i32> %a to <4 x i64> 1194 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1195 ret <4 x i64> %ret 1196} 1197 1198define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { 1199; KNL-LABEL: zext_8x32mem_to_8x64: 1200; KNL: ## BB#0: 1201; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1202; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1203; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1204; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1205; KNL-NEXT: retq 1206; 1207; SKX-LABEL: zext_8x32mem_to_8x64: 1208; SKX: ## BB#0: 1209; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1210; SKX-NEXT: vpmovw2m %xmm0, %k1 1211; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1212; SKX-NEXT: retq 1213 %a = load <8 x i32>,<8 x i32> *%i,align 1 1214 %x = zext <8 x i32> %a to <8 x i64> 1215 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1216 ret <8 x i64> %ret 1217} 1218 1219define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { 1220; KNL-LABEL: sext_8x32mem_to_8x64mask: 1221; KNL: ## BB#0: 1222; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1223; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1224; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1225; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} 1226; KNL-NEXT: retq 1227; 1228; SKX-LABEL: sext_8x32mem_to_8x64mask: 1229; SKX: ## BB#0: 1230; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1231; SKX-NEXT: vpmovw2m %xmm0, %k1 1232; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} 1233; SKX-NEXT: retq 1234 %a = load <8 x i32>,<8 x i32> *%i,align 1 1235 %x = sext <8 x i32> %a to <8 x i64> 1236 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1237 ret <8 x i64> %ret 1238} 1239 1240define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { 1241; ALL-LABEL: sext_8x32mem_to_8x64: 1242; ALL: ## BB#0: 1243; ALL-NEXT: vpmovsxdq (%rdi), %zmm0 1244; ALL-NEXT: retq 1245 %a = load <8 x i32>,<8 x i32> *%i,align 1 1246 %x = sext <8 x i32> %a to <8 x i64> 1247 ret <8 x i64> %x 1248} 1249 1250define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { 1251; ALL-LABEL: sext_8x32_to_8x64: 1252; ALL: ## BB#0: 1253; ALL-NEXT: vpmovsxdq %ymm0, %zmm0 1254; ALL-NEXT: retq 1255 %x = sext <8 x i32> %a to <8 x i64> 1256 ret <8 x i64> %x 1257} 1258 1259define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { 1260; KNL-LABEL: zext_8x32_to_8x64mask: 1261; KNL: ## BB#0: 1262; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 1263; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 1264; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 1265; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 1266; KNL-NEXT: retq 1267; 1268; SKX-LABEL: zext_8x32_to_8x64mask: 1269; SKX: ## BB#0: 1270; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 1271; SKX-NEXT: vpmovw2m %xmm1, %k1 1272; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 1273; SKX-NEXT: retq 1274 %x = zext <8 x i32> %a to <8 x i64> 1275 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1276 ret <8 x i64> %ret 1277} 1278define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { 1279; ALL-LABEL: fptrunc_test: 1280; ALL: ## BB#0: 1281; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0 1282; ALL-NEXT: retq 1283 %b = fptrunc <8 x double> %a to <8 x float> 1284 ret <8 x float> %b 1285} 1286 1287define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { 1288; ALL-LABEL: fpext_test: 1289; ALL: ## BB#0: 1290; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 1291; ALL-NEXT: retq 1292 %b = fpext <8 x float> %a to <8 x double> 1293 ret <8 x double> %b 1294} 1295 1296define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { 1297; ALL-LABEL: zext_16i1_to_16xi32: 1298; ALL: ## BB#0: 1299; ALL-NEXT: kmovw %edi, %k1 1300; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 1301; ALL-NEXT: retq 1302 %a = bitcast i16 %b to <16 x i1> 1303 %c = zext <16 x i1> %a to <16 x i32> 1304 ret <16 x i32> %c 1305} 1306 1307define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { 1308; KNL-LABEL: zext_8i1_to_8xi64: 1309; KNL: ## BB#0: 1310; KNL-NEXT: kmovw %edi, %k1 1311; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} 1312; KNL-NEXT: retq 1313; 1314; SKX-LABEL: zext_8i1_to_8xi64: 1315; SKX: ## BB#0: 1316; SKX-NEXT: kmovb %edi, %k1 1317; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} 1318; SKX-NEXT: retq 1319 %a = bitcast i8 %b to <8 x i1> 1320 %c = zext <8 x i1> %a to <8 x i64> 1321 ret <8 x i64> %c 1322} 1323 1324define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { 1325; KNL-LABEL: trunc_16i8_to_16i1: 1326; KNL: ## BB#0: 1327; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1328; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1329; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1330; KNL-NEXT: kmovw %k0, %eax 1331; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 1332; KNL-NEXT: retq 1333; 1334; SKX-LABEL: trunc_16i8_to_16i1: 1335; SKX: ## BB#0: 1336; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1337; SKX-NEXT: vpmovb2m %xmm0, %k0 1338; SKX-NEXT: kmovw %k0, %eax 1339; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 1340; SKX-NEXT: retq 1341 %mask_b = trunc <16 x i8>%a to <16 x i1> 1342 %mask = bitcast <16 x i1> %mask_b to i16 1343 ret i16 %mask 1344} 1345 1346define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { 1347; ALL-LABEL: trunc_16i32_to_16i1: 1348; ALL: ## BB#0: 1349; ALL-NEXT: vpslld $31, %zmm0, %zmm0 1350; ALL-NEXT: vptestmd %zmm0, %zmm0, %k0 1351; ALL-NEXT: kmovw %k0, %eax 1352; ALL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 1353; ALL-NEXT: retq 1354 %mask_b = trunc <16 x i32>%a to <16 x i1> 1355 %mask = bitcast <16 x i1> %mask_b to i16 1356 ret i16 %mask 1357} 1358 1359define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { 1360; KNL-LABEL: trunc_4i32_to_4i1: 1361; KNL: ## BB#0: 1362; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 1363; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1364; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 1365; KNL-NEXT: retq 1366; 1367; SKX-LABEL: trunc_4i32_to_4i1: 1368; SKX: ## BB#0: 1369; SKX-NEXT: vpslld $31, %xmm0, %xmm0 1370; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 1371; SKX-NEXT: vpslld $31, %xmm1, %xmm0 1372; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1} 1373; SKX-NEXT: vpmovm2d %k0, %xmm0 1374; SKX-NEXT: retq 1375 %mask_a = trunc <4 x i32>%a to <4 x i1> 1376 %mask_b = trunc <4 x i32>%b to <4 x i1> 1377 %a_and_b = and <4 x i1>%mask_a, %mask_b 1378 %res = sext <4 x i1>%a_and_b to <4 x i32> 1379 ret <4 x i32>%res 1380} 1381 1382 1383define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { 1384; KNL-LABEL: trunc_8i16_to_8i1: 1385; KNL: ## BB#0: 1386; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1387; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1388; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 1389; KNL-NEXT: kmovw %k0, %eax 1390; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1391; KNL-NEXT: retq 1392; 1393; SKX-LABEL: trunc_8i16_to_8i1: 1394; SKX: ## BB#0: 1395; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1396; SKX-NEXT: vpmovw2m %xmm0, %k0 1397; SKX-NEXT: kmovb %k0, %eax 1398; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1399; SKX-NEXT: retq 1400 %mask_b = trunc <8 x i16>%a to <8 x i1> 1401 %mask = bitcast <8 x i1> %mask_b to i8 1402 ret i8 %mask 1403} 1404 1405define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1406; KNL-LABEL: sext_8i1_8i32: 1407; KNL: ## BB#0: 1408; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 1409; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 1410; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 1411; KNL-NEXT: knotw %k0, %k1 1412; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1413; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1414; KNL-NEXT: vpmovqd %zmm0, %ymm0 1415; KNL-NEXT: retq 1416; 1417; SKX-LABEL: sext_8i1_8i32: 1418; SKX: ## BB#0: 1419; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 1420; SKX-NEXT: knotb %k0, %k0 1421; SKX-NEXT: vpmovm2d %k0, %ymm0 1422; SKX-NEXT: retq 1423 %x = icmp slt <8 x i32> %a1, %a2 1424 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 1425 %y = sext <8 x i1> %x1 to <8 x i32> 1426 ret <8 x i32> %y 1427} 1428 1429 1430define i16 @trunc_i32_to_i1(i32 %a) { 1431; ALL-LABEL: trunc_i32_to_i1: 1432; ALL: ## BB#0: 1433; ALL-NEXT: andl $1, %edi 1434; ALL-NEXT: kmovw %edi, %k0 1435; ALL-NEXT: movw $-4, %ax 1436; ALL-NEXT: kmovw %eax, %k1 1437; ALL-NEXT: korw %k0, %k1, %k0 1438; ALL-NEXT: kmovw %k0, %eax 1439; ALL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 1440; ALL-NEXT: retq 1441 %a_i = trunc i32 %a to i1 1442 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0 1443 %res = bitcast <16 x i1> %maskv to i16 1444 ret i16 %res 1445} 1446 1447define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1448; KNL-LABEL: sext_8i1_8i16: 1449; KNL: ## BB#0: 1450; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1451; KNL-NEXT: vpmovdw %zmm0, %ymm0 1452; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1453; KNL-NEXT: retq 1454; 1455; SKX-LABEL: sext_8i1_8i16: 1456; SKX: ## BB#0: 1457; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 1458; SKX-NEXT: vpmovm2w %k0, %xmm0 1459; SKX-NEXT: retq 1460 %x = icmp slt <8 x i32> %a1, %a2 1461 %y = sext <8 x i1> %x to <8 x i16> 1462 ret <8 x i16> %y 1463} 1464 1465define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { 1466; KNL-LABEL: sext_16i1_16i32: 1467; KNL: ## BB#0: 1468; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 1469; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1470; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 1471; KNL-NEXT: retq 1472; 1473; SKX-LABEL: sext_16i1_16i32: 1474; SKX: ## BB#0: 1475; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 1476; SKX-NEXT: vpmovm2d %k0, %zmm0 1477; SKX-NEXT: retq 1478 %x = icmp slt <16 x i32> %a1, %a2 1479 %y = sext <16 x i1> %x to <16 x i32> 1480 ret <16 x i32> %y 1481} 1482 1483define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1484; KNL-LABEL: sext_8i1_8i64: 1485; KNL: ## BB#0: 1486; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1487; KNL-NEXT: vpmovsxdq %ymm0, %zmm0 1488; KNL-NEXT: retq 1489; 1490; SKX-LABEL: sext_8i1_8i64: 1491; SKX: ## BB#0: 1492; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 1493; SKX-NEXT: vpmovm2q %k0, %zmm0 1494; SKX-NEXT: retq 1495 %x = icmp slt <8 x i32> %a1, %a2 1496 %y = sext <8 x i1> %x to <8 x i64> 1497 ret <8 x i64> %y 1498} 1499 1500define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { 1501; ALL-LABEL: extload_v8i64: 1502; ALL: ## BB#0: 1503; ALL-NEXT: vpmovsxbq (%rdi), %zmm0 1504; ALL-NEXT: vmovdqa64 %zmm0, (%rsi) 1505; ALL-NEXT: retq 1506 %sign_load = load <8 x i8>, <8 x i8>* %a 1507 %c = sext <8 x i8> %sign_load to <8 x i64> 1508 store <8 x i64> %c, <8 x i64>* %res 1509 ret void 1510} 1511 1512define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { 1513; KNL-LABEL: test21: 1514; KNL: ## BB#0: 1515; KNL-NEXT: pushq %rbp 1516; KNL-NEXT: pushq %r15 1517; KNL-NEXT: pushq %r14 1518; KNL-NEXT: pushq %r13 1519; KNL-NEXT: pushq %r12 1520; KNL-NEXT: pushq %rbx 1521; KNL-NEXT: vpmovsxbd %xmm7, %zmm7 1522; KNL-NEXT: vpslld $31, %zmm7, %zmm7 1523; KNL-NEXT: vpmovsxbd %xmm6, %zmm6 1524; KNL-NEXT: vpslld $31, %zmm6, %zmm6 1525; KNL-NEXT: vpmovsxbd %xmm5, %zmm5 1526; KNL-NEXT: vpslld $31, %zmm5, %zmm5 1527; KNL-NEXT: vpmovsxbd %xmm4, %zmm4 1528; KNL-NEXT: vpslld $31, %zmm4, %zmm4 1529; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 1530; KNL-NEXT: kshiftlw $14, %k0, %k1 1531; KNL-NEXT: kshiftrw $15, %k1, %k1 1532; KNL-NEXT: kmovw %k1, %ecx 1533; KNL-NEXT: kshiftlw $15, %k0, %k1 1534; KNL-NEXT: kshiftrw $15, %k1, %k1 1535; KNL-NEXT: kmovw %k1, %r15d 1536; KNL-NEXT: kshiftlw $13, %k0, %k1 1537; KNL-NEXT: kshiftrw $15, %k1, %k1 1538; KNL-NEXT: kmovw %k1, %r12d 1539; KNL-NEXT: kshiftlw $12, %k0, %k1 1540; KNL-NEXT: kshiftrw $15, %k1, %k1 1541; KNL-NEXT: kmovw %k1, %edx 1542; KNL-NEXT: kshiftlw $11, %k0, %k1 1543; KNL-NEXT: kshiftrw $15, %k1, %k1 1544; KNL-NEXT: kmovw %k1, %r13d 1545; KNL-NEXT: kshiftlw $10, %k0, %k1 1546; KNL-NEXT: kshiftrw $15, %k1, %k1 1547; KNL-NEXT: kmovw %k1, %eax 1548; KNL-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 1549; KNL-NEXT: kshiftlw $9, %k0, %k1 1550; KNL-NEXT: kshiftrw $15, %k1, %k1 1551; KNL-NEXT: kmovw %k1, %esi 1552; KNL-NEXT: kshiftlw $8, %k0, %k1 1553; KNL-NEXT: kshiftrw $15, %k1, %k1 1554; KNL-NEXT: kmovw %k1, %edi 1555; KNL-NEXT: kshiftlw $7, %k0, %k1 1556; KNL-NEXT: kshiftrw $15, %k1, %k1 1557; KNL-NEXT: kmovw %k1, %r8d 1558; KNL-NEXT: kshiftlw $6, %k0, %k1 1559; KNL-NEXT: kshiftrw $15, %k1, %k1 1560; KNL-NEXT: kmovw %k1, %r9d 1561; KNL-NEXT: kshiftlw $5, %k0, %k1 1562; KNL-NEXT: kshiftrw $15, %k1, %k1 1563; KNL-NEXT: kmovw %k1, %r10d 1564; KNL-NEXT: kshiftlw $4, %k0, %k1 1565; KNL-NEXT: kshiftrw $15, %k1, %k1 1566; KNL-NEXT: kmovw %k1, %r11d 1567; KNL-NEXT: kshiftlw $3, %k0, %k1 1568; KNL-NEXT: kshiftrw $15, %k1, %k1 1569; KNL-NEXT: kmovw %k1, %ebx 1570; KNL-NEXT: kshiftlw $2, %k0, %k1 1571; KNL-NEXT: kshiftrw $15, %k1, %k1 1572; KNL-NEXT: kmovw %k1, %ebp 1573; KNL-NEXT: kshiftlw $1, %k0, %k1 1574; KNL-NEXT: kshiftrw $15, %k1, %k1 1575; KNL-NEXT: kmovw %k1, %r14d 1576; KNL-NEXT: vptestmd %zmm5, %zmm5, %k2 1577; KNL-NEXT: kshiftlw $0, %k0, %k0 1578; KNL-NEXT: kshiftrw $15, %k0, %k0 1579; KNL-NEXT: vmovd %r15d, %xmm4 1580; KNL-NEXT: kmovw %k0, %r15d 1581; KNL-NEXT: kshiftlw $14, %k2, %k0 1582; KNL-NEXT: kshiftrw $15, %k0, %k0 1583; KNL-NEXT: vpinsrb $1, %ecx, %xmm4, %xmm4 1584; KNL-NEXT: kmovw %k0, %ecx 1585; KNL-NEXT: kshiftlw $15, %k2, %k0 1586; KNL-NEXT: kshiftrw $15, %k0, %k0 1587; KNL-NEXT: vpinsrb $2, %r12d, %xmm4, %xmm4 1588; KNL-NEXT: kmovw %k0, %eax 1589; KNL-NEXT: kshiftlw $13, %k2, %k0 1590; KNL-NEXT: kshiftrw $15, %k0, %k0 1591; KNL-NEXT: vpinsrb $3, %edx, %xmm4, %xmm4 1592; KNL-NEXT: kmovw %k0, %r12d 1593; KNL-NEXT: kshiftlw $12, %k2, %k0 1594; KNL-NEXT: kshiftrw $15, %k0, %k0 1595; KNL-NEXT: vpinsrb $4, %r13d, %xmm4, %xmm4 1596; KNL-NEXT: kmovw %k0, %edx 1597; KNL-NEXT: kshiftlw $11, %k2, %k0 1598; KNL-NEXT: kshiftrw $15, %k0, %k0 1599; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 1600; KNL-NEXT: kmovw %k0, %r13d 1601; KNL-NEXT: kshiftlw $10, %k2, %k0 1602; KNL-NEXT: kshiftrw $15, %k0, %k0 1603; KNL-NEXT: vpinsrb $6, %esi, %xmm4, %xmm4 1604; KNL-NEXT: kmovw %k0, %esi 1605; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill 1606; KNL-NEXT: kshiftlw $9, %k2, %k0 1607; KNL-NEXT: kshiftrw $15, %k0, %k0 1608; KNL-NEXT: vpinsrb $7, %edi, %xmm4, %xmm4 1609; KNL-NEXT: kmovw %k0, %esi 1610; KNL-NEXT: kshiftlw $8, %k2, %k0 1611; KNL-NEXT: kshiftrw $15, %k0, %k0 1612; KNL-NEXT: vpinsrb $8, %r8d, %xmm4, %xmm4 1613; KNL-NEXT: kmovw %k0, %edi 1614; KNL-NEXT: kshiftlw $7, %k2, %k0 1615; KNL-NEXT: kshiftrw $15, %k0, %k0 1616; KNL-NEXT: vpinsrb $9, %r9d, %xmm4, %xmm4 1617; KNL-NEXT: kmovw %k0, %r8d 1618; KNL-NEXT: kshiftlw $6, %k2, %k0 1619; KNL-NEXT: kshiftrw $15, %k0, %k0 1620; KNL-NEXT: vpinsrb $10, %r10d, %xmm4, %xmm4 1621; KNL-NEXT: kmovw %k0, %r9d 1622; KNL-NEXT: kshiftlw $5, %k2, %k0 1623; KNL-NEXT: kshiftrw $15, %k0, %k0 1624; KNL-NEXT: vpinsrb $11, %r11d, %xmm4, %xmm4 1625; KNL-NEXT: kmovw %k0, %r10d 1626; KNL-NEXT: kshiftlw $4, %k2, %k0 1627; KNL-NEXT: kshiftrw $15, %k0, %k0 1628; KNL-NEXT: vpinsrb $12, %ebx, %xmm4, %xmm4 1629; KNL-NEXT: kmovw %k0, %ebx 1630; KNL-NEXT: kshiftlw $3, %k2, %k0 1631; KNL-NEXT: kshiftrw $15, %k0, %k0 1632; KNL-NEXT: vpinsrb $13, %ebp, %xmm4, %xmm4 1633; KNL-NEXT: kmovw %k0, %ebp 1634; KNL-NEXT: kshiftlw $2, %k2, %k0 1635; KNL-NEXT: kshiftrw $15, %k0, %k0 1636; KNL-NEXT: vpinsrb $14, %r14d, %xmm4, %xmm4 1637; KNL-NEXT: kmovw %k0, %r11d 1638; KNL-NEXT: kshiftlw $1, %k2, %k0 1639; KNL-NEXT: kshiftrw $15, %k0, %k0 1640; KNL-NEXT: vpinsrb $15, %r15d, %xmm4, %xmm4 1641; KNL-NEXT: kmovw %k0, %r14d 1642; KNL-NEXT: vptestmd %zmm6, %zmm6, %k1 1643; KNL-NEXT: kshiftlw $0, %k2, %k0 1644; KNL-NEXT: kshiftrw $15, %k0, %k0 1645; KNL-NEXT: vmovd %eax, %xmm5 1646; KNL-NEXT: kmovw %k0, %r15d 1647; KNL-NEXT: kshiftlw $14, %k1, %k0 1648; KNL-NEXT: kshiftrw $15, %k0, %k0 1649; KNL-NEXT: vpinsrb $1, %ecx, %xmm5, %xmm5 1650; KNL-NEXT: kmovw %k0, %ecx 1651; KNL-NEXT: kshiftlw $15, %k1, %k0 1652; KNL-NEXT: kshiftrw $15, %k0, %k0 1653; KNL-NEXT: vpinsrb $2, %r12d, %xmm5, %xmm5 1654; KNL-NEXT: kmovw %k0, %eax 1655; KNL-NEXT: kshiftlw $13, %k1, %k0 1656; KNL-NEXT: kshiftrw $15, %k0, %k0 1657; KNL-NEXT: vpinsrb $3, %edx, %xmm5, %xmm5 1658; KNL-NEXT: kmovw %k0, %r12d 1659; KNL-NEXT: kshiftlw $12, %k1, %k0 1660; KNL-NEXT: kshiftrw $15, %k0, %k0 1661; KNL-NEXT: vpinsrb $4, %r13d, %xmm5, %xmm5 1662; KNL-NEXT: kmovw %k0, %edx 1663; KNL-NEXT: kshiftlw $11, %k1, %k0 1664; KNL-NEXT: kshiftrw $15, %k0, %k0 1665; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 1666; KNL-NEXT: kmovw %k0, %r13d 1667; KNL-NEXT: kshiftlw $10, %k1, %k0 1668; KNL-NEXT: kshiftrw $15, %k0, %k0 1669; KNL-NEXT: vpinsrb $6, %esi, %xmm5, %xmm5 1670; KNL-NEXT: kmovw %k0, %esi 1671; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill 1672; KNL-NEXT: kshiftlw $9, %k1, %k0 1673; KNL-NEXT: kshiftrw $15, %k0, %k0 1674; KNL-NEXT: vpinsrb $7, %edi, %xmm5, %xmm5 1675; KNL-NEXT: kmovw %k0, %esi 1676; KNL-NEXT: kshiftlw $8, %k1, %k0 1677; KNL-NEXT: kshiftrw $15, %k0, %k0 1678; KNL-NEXT: vpinsrb $8, %r8d, %xmm5, %xmm5 1679; KNL-NEXT: kmovw %k0, %edi 1680; KNL-NEXT: kshiftlw $7, %k1, %k0 1681; KNL-NEXT: kshiftrw $15, %k0, %k0 1682; KNL-NEXT: vpinsrb $9, %r9d, %xmm5, %xmm5 1683; KNL-NEXT: kmovw %k0, %r8d 1684; KNL-NEXT: kshiftlw $6, %k1, %k0 1685; KNL-NEXT: kshiftrw $15, %k0, %k0 1686; KNL-NEXT: vpinsrb $10, %r10d, %xmm5, %xmm5 1687; KNL-NEXT: kmovw %k0, %r9d 1688; KNL-NEXT: kshiftlw $5, %k1, %k0 1689; KNL-NEXT: kshiftrw $15, %k0, %k0 1690; KNL-NEXT: vpinsrb $11, %ebx, %xmm5, %xmm5 1691; KNL-NEXT: kmovw %k0, %ebx 1692; KNL-NEXT: kshiftlw $4, %k1, %k0 1693; KNL-NEXT: kshiftrw $15, %k0, %k0 1694; KNL-NEXT: vpinsrb $12, %ebp, %xmm5, %xmm5 1695; KNL-NEXT: kmovw %k0, %ebp 1696; KNL-NEXT: kshiftlw $3, %k1, %k0 1697; KNL-NEXT: kshiftrw $15, %k0, %k0 1698; KNL-NEXT: vpinsrb $13, %r11d, %xmm5, %xmm5 1699; KNL-NEXT: kmovw %k0, %r10d 1700; KNL-NEXT: kshiftlw $2, %k1, %k0 1701; KNL-NEXT: kshiftrw $15, %k0, %k0 1702; KNL-NEXT: vpinsrb $14, %r14d, %xmm5, %xmm5 1703; KNL-NEXT: kmovw %k0, %r11d 1704; KNL-NEXT: kshiftlw $1, %k1, %k0 1705; KNL-NEXT: kshiftrw $15, %k0, %k0 1706; KNL-NEXT: vpinsrb $15, %r15d, %xmm5, %xmm5 1707; KNL-NEXT: kmovw %k0, %r14d 1708; KNL-NEXT: vptestmd %zmm7, %zmm7, %k0 1709; KNL-NEXT: kshiftlw $0, %k1, %k1 1710; KNL-NEXT: kshiftrw $15, %k1, %k1 1711; KNL-NEXT: vmovd %eax, %xmm6 1712; KNL-NEXT: kmovw %k1, %r15d 1713; KNL-NEXT: kshiftlw $14, %k0, %k1 1714; KNL-NEXT: kshiftrw $15, %k1, %k1 1715; KNL-NEXT: vpinsrb $1, %ecx, %xmm6, %xmm6 1716; KNL-NEXT: kmovw %k1, %ecx 1717; KNL-NEXT: kshiftlw $15, %k0, %k1 1718; KNL-NEXT: kshiftrw $15, %k1, %k1 1719; KNL-NEXT: vpinsrb $2, %r12d, %xmm6, %xmm6 1720; KNL-NEXT: kmovw %k1, %r12d 1721; KNL-NEXT: kshiftlw $13, %k0, %k1 1722; KNL-NEXT: kshiftrw $15, %k1, %k1 1723; KNL-NEXT: vpinsrb $3, %edx, %xmm6, %xmm6 1724; KNL-NEXT: kmovw %k1, %edx 1725; KNL-NEXT: kshiftlw $12, %k0, %k1 1726; KNL-NEXT: kshiftrw $15, %k1, %k1 1727; KNL-NEXT: vpinsrb $4, %r13d, %xmm6, %xmm6 1728; KNL-NEXT: kmovw %k1, %r13d 1729; KNL-NEXT: kshiftlw $11, %k0, %k1 1730; KNL-NEXT: kshiftrw $15, %k1, %k1 1731; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 1732; KNL-NEXT: kmovw %k1, %eax 1733; KNL-NEXT: kshiftlw $10, %k0, %k1 1734; KNL-NEXT: kshiftrw $15, %k1, %k1 1735; KNL-NEXT: vpinsrb $6, %esi, %xmm6, %xmm6 1736; KNL-NEXT: kmovw %k1, %esi 1737; KNL-NEXT: kshiftlw $9, %k0, %k1 1738; KNL-NEXT: kshiftrw $15, %k1, %k1 1739; KNL-NEXT: vpinsrb $7, %edi, %xmm6, %xmm6 1740; KNL-NEXT: kmovw %k1, %edi 1741; KNL-NEXT: kshiftlw $8, %k0, %k1 1742; KNL-NEXT: kshiftrw $15, %k1, %k1 1743; KNL-NEXT: vpinsrb $8, %r8d, %xmm6, %xmm6 1744; KNL-NEXT: kmovw %k1, %r8d 1745; KNL-NEXT: kshiftlw $7, %k0, %k1 1746; KNL-NEXT: kshiftrw $15, %k1, %k1 1747; KNL-NEXT: vpinsrb $9, %r9d, %xmm6, %xmm6 1748; KNL-NEXT: kmovw %k1, %r9d 1749; KNL-NEXT: kshiftlw $6, %k0, %k1 1750; KNL-NEXT: kshiftrw $15, %k1, %k1 1751; KNL-NEXT: vpinsrb $10, %ebx, %xmm6, %xmm6 1752; KNL-NEXT: kmovw %k1, %ebx 1753; KNL-NEXT: kshiftlw $5, %k0, %k1 1754; KNL-NEXT: kshiftrw $15, %k1, %k1 1755; KNL-NEXT: vpinsrb $11, %ebp, %xmm6, %xmm6 1756; KNL-NEXT: kmovw %k1, %ebp 1757; KNL-NEXT: kshiftlw $4, %k0, %k1 1758; KNL-NEXT: kshiftrw $15, %k1, %k1 1759; KNL-NEXT: vpinsrb $12, %r10d, %xmm6, %xmm6 1760; KNL-NEXT: kmovw %k1, %r10d 1761; KNL-NEXT: kshiftlw $3, %k0, %k1 1762; KNL-NEXT: kshiftrw $15, %k1, %k1 1763; KNL-NEXT: vpinsrb $13, %r11d, %xmm6, %xmm6 1764; KNL-NEXT: kmovw %k1, %r11d 1765; KNL-NEXT: kshiftlw $2, %k0, %k1 1766; KNL-NEXT: kshiftrw $15, %k1, %k1 1767; KNL-NEXT: vpinsrb $14, %r14d, %xmm6, %xmm6 1768; KNL-NEXT: kmovw %k1, %r14d 1769; KNL-NEXT: kshiftlw $1, %k0, %k1 1770; KNL-NEXT: kshiftrw $15, %k1, %k1 1771; KNL-NEXT: vpinsrb $15, %r15d, %xmm6, %xmm6 1772; KNL-NEXT: kmovw %k1, %r15d 1773; KNL-NEXT: kshiftlw $0, %k0, %k0 1774; KNL-NEXT: kshiftrw $15, %k0, %k0 1775; KNL-NEXT: vmovd %r12d, %xmm7 1776; KNL-NEXT: kmovw %k0, %r12d 1777; KNL-NEXT: vpinsrb $1, %ecx, %xmm7, %xmm7 1778; KNL-NEXT: vpinsrb $2, %edx, %xmm7, %xmm7 1779; KNL-NEXT: vpinsrb $3, %r13d, %xmm7, %xmm7 1780; KNL-NEXT: vpinsrb $4, %eax, %xmm7, %xmm7 1781; KNL-NEXT: vpinsrb $5, %esi, %xmm7, %xmm7 1782; KNL-NEXT: vpinsrb $6, %edi, %xmm7, %xmm7 1783; KNL-NEXT: vpinsrb $7, %r8d, %xmm7, %xmm7 1784; KNL-NEXT: vpinsrb $8, %r9d, %xmm7, %xmm7 1785; KNL-NEXT: vpinsrb $9, %ebx, %xmm7, %xmm7 1786; KNL-NEXT: vpinsrb $10, %ebp, %xmm7, %xmm7 1787; KNL-NEXT: vpinsrb $11, %r10d, %xmm7, %xmm7 1788; KNL-NEXT: vpinsrb $12, %r11d, %xmm7, %xmm7 1789; KNL-NEXT: vpinsrb $13, %r14d, %xmm7, %xmm7 1790; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero 1791; KNL-NEXT: vpsllw $15, %ymm4, %ymm4 1792; KNL-NEXT: vpsraw $15, %ymm4, %ymm4 1793; KNL-NEXT: vpand %ymm0, %ymm4, %ymm0 1794; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero 1795; KNL-NEXT: vpsllw $15, %ymm4, %ymm4 1796; KNL-NEXT: vpsraw $15, %ymm4, %ymm4 1797; KNL-NEXT: vpand %ymm1, %ymm4, %ymm1 1798; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero 1799; KNL-NEXT: vpsllw $15, %ymm4, %ymm4 1800; KNL-NEXT: vpsraw $15, %ymm4, %ymm4 1801; KNL-NEXT: vpand %ymm2, %ymm4, %ymm2 1802; KNL-NEXT: vpinsrb $14, %r15d, %xmm7, %xmm4 1803; KNL-NEXT: vpinsrb $15, %r12d, %xmm4, %xmm4 1804; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero 1805; KNL-NEXT: vpsllw $15, %ymm4, %ymm4 1806; KNL-NEXT: vpsraw $15, %ymm4, %ymm4 1807; KNL-NEXT: vpand %ymm3, %ymm4, %ymm3 1808; KNL-NEXT: popq %rbx 1809; KNL-NEXT: popq %r12 1810; KNL-NEXT: popq %r13 1811; KNL-NEXT: popq %r14 1812; KNL-NEXT: popq %r15 1813; KNL-NEXT: popq %rbp 1814; KNL-NEXT: retq 1815; 1816; SKX-LABEL: test21: 1817; SKX: ## BB#0: 1818; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 1819; SKX-NEXT: vpmovb2m %zmm2, %k1 1820; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 1821; SKX-NEXT: kshiftrq $32, %k1, %k1 1822; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} 1823; SKX-NEXT: retq 1824 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer 1825 ret <64 x i16> %ret 1826} 1827 1828define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone { 1829; ALL-LABEL: shuffle_zext_16x8_to_16x16: 1830; ALL: ## BB#0: 1831; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 1832; ALL-NEXT: retq 1833 %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> 1834 %2 = bitcast <32 x i8> %1 to <16 x i16> 1835 ret <16 x i16> %2 1836} 1837 1838define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) { 1839; ALL-LABEL: zext_32x8_to_16x16: 1840; ALL: ## BB#0: 1841; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 1842; ALL-NEXT: retq 1843 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32> 1844 %2 = bitcast <32 x i8> %1 to <16 x i16> 1845 ret <16 x i16> %2 1846} 1847 1848define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) { 1849; ALL-LABEL: zext_32x8_to_8x32: 1850; ALL: ## BB#0: 1851; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1852; ALL-NEXT: retq 1853 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32> 1854 %2 = bitcast <32 x i8> %1 to <8 x i32> 1855 ret <8 x i32> %2 1856} 1857 1858define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) { 1859; ALL-LABEL: zext_32x8_to_4x64: 1860; ALL: ## BB#0: 1861; ALL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1862; ALL-NEXT: retq 1863 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> 1864 %2 = bitcast <32 x i8> %1 to <4 x i64> 1865 ret <4 x i64> %2 1866} 1867 1868define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) { 1869; ALL-LABEL: zext_16x16_to_8x32: 1870; ALL: ## BB#0: 1871; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1872; ALL-NEXT: retq 1873 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16> 1874 %2 = bitcast <16 x i16> %1 to <8 x i32> 1875 ret <8 x i32> %2 1876} 1877 1878define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) { 1879; ALL-LABEL: zext_16x16_to_4x64: 1880; ALL: ## BB#0: 1881; ALL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1882; ALL-NEXT: retq 1883 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16> 1884 %2 = bitcast <16 x i16> %1 to <4 x i64> 1885 ret <4 x i64> %2 1886} 1887 1888define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) { 1889; ALL-LABEL: zext_8x32_to_4x64: 1890; ALL: ## BB#0: 1891; ALL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1892; ALL-NEXT: retq 1893 %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8> 1894 %2 = bitcast <8 x i32> %1 to <4 x i64> 1895 ret <4 x i64> %2 1896} 1897 1898define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { 1899; KNL-LABEL: zext_64xi1_to_64xi8: 1900; KNL: ## BB#0: 1901; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 1902; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1903; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0 1904; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1 1905; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 1906; KNL-NEXT: retq 1907; 1908; SKX-LABEL: zext_64xi1_to_64xi8: 1909; SKX: ## BB#0: 1910; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 1911; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} 1912; SKX-NEXT: retq 1913 %mask = icmp eq <64 x i8> %x, %y 1914 %1 = zext <64 x i1> %mask to <64 x i8> 1915 ret <64 x i8> %1 1916} 1917 1918define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { 1919; KNL-LABEL: zext_32xi1_to_32xi16: 1920; KNL: ## BB#0: 1921; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 1922; KNL-NEXT: vpsrlw $15, %ymm0, %ymm0 1923; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 1924; KNL-NEXT: vpsrlw $15, %ymm1, %ymm1 1925; KNL-NEXT: retq 1926; 1927; SKX-LABEL: zext_32xi1_to_32xi16: 1928; SKX: ## BB#0: 1929; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 1930; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} 1931; SKX-NEXT: retq 1932 %mask = icmp eq <32 x i16> %x, %y 1933 %1 = zext <32 x i1> %mask to <32 x i16> 1934 ret <32 x i16> %1 1935} 1936 1937define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { 1938; KNL-LABEL: zext_16xi1_to_16xi16: 1939; KNL: ## BB#0: 1940; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 1941; KNL-NEXT: vpsrlw $15, %ymm0, %ymm0 1942; KNL-NEXT: retq 1943; 1944; SKX-LABEL: zext_16xi1_to_16xi16: 1945; SKX: ## BB#0: 1946; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 1947; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} 1948; SKX-NEXT: retq 1949 %mask = icmp eq <16 x i16> %x, %y 1950 %1 = zext <16 x i1> %mask to <16 x i16> 1951 ret <16 x i16> %1 1952} 1953 1954 1955define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { 1956; KNL-LABEL: zext_32xi1_to_32xi8: 1957; KNL: ## BB#0: 1958; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 1959; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 1960; KNL-NEXT: vpmovdb %zmm0, %xmm0 1961; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 1962; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 1963; KNL-NEXT: vpmovdb %zmm1, %xmm1 1964; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1965; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1966; KNL-NEXT: retq 1967; 1968; SKX-LABEL: zext_32xi1_to_32xi8: 1969; SKX: ## BB#0: 1970; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 1971; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} 1972; SKX-NEXT: retq 1973 %mask = icmp eq <32 x i16> %x, %y 1974 %1 = zext <32 x i1> %mask to <32 x i8> 1975 ret <32 x i8> %1 1976} 1977 1978define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { 1979; KNL-LABEL: zext_4xi1_to_4x32: 1980; KNL: ## BB#0: 1981; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 1982; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1 1983; KNL-NEXT: vpand %xmm2, %xmm0, %xmm0 1984; KNL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1985; KNL-NEXT: vpsrld $31, %xmm0, %xmm0 1986; KNL-NEXT: retq 1987; 1988; SKX-LABEL: zext_4xi1_to_4x32: 1989; SKX: ## BB#0: 1990; SKX-NEXT: vmovdqa64 {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 1991; SKX-NEXT: vpandq %xmm2, %xmm1, %xmm1 1992; SKX-NEXT: vpandq %xmm2, %xmm0, %xmm0 1993; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 1994; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} 1995; SKX-NEXT: retq 1996 %mask = icmp eq <4 x i8> %x, %y 1997 %1 = zext <4 x i1> %mask to <4 x i32> 1998 ret <4 x i32> %1 1999} 2000 2001define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { 2002; KNL-LABEL: zext_2xi1_to_2xi64: 2003; KNL: ## BB#0: 2004; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 2005; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1 2006; KNL-NEXT: vpand %xmm2, %xmm0, %xmm0 2007; KNL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 2008; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0 2009; KNL-NEXT: retq 2010; 2011; SKX-LABEL: zext_2xi1_to_2xi64: 2012; SKX: ## BB#0: 2013; SKX-NEXT: vmovdqa64 {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 2014; SKX-NEXT: vpandq %xmm2, %xmm1, %xmm1 2015; SKX-NEXT: vpandq %xmm2, %xmm0, %xmm0 2016; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 2017; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} 2018; SKX-NEXT: retq 2019 %mask = icmp eq <2 x i8> %x, %y 2020 %1 = zext <2 x i1> %mask to <2 x i64> 2021 ret <2 x i64> %1 2022} 2023