1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 8 9define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 10; SSE2-LABEL: zext_16i8_to_8i16: 11; SSE2: # BB#0: # %entry 12; SSE2-NEXT: pxor %xmm1, %xmm1 13; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 14; SSE2-NEXT: retq 15; 16; SSSE3-LABEL: zext_16i8_to_8i16: 17; SSSE3: # BB#0: # %entry 18; SSSE3-NEXT: pxor %xmm1, %xmm1 19; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 20; SSSE3-NEXT: retq 21; 22; SSE41-LABEL: zext_16i8_to_8i16: 23; SSE41: # BB#0: # %entry 24; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 25; SSE41-NEXT: retq 26; 27; AVX-LABEL: zext_16i8_to_8i16: 28; AVX: # BB#0: # %entry 29; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 30; AVX-NEXT: retq 31entry: 32 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 33 %C = zext <8 x i8> %B to <8 x i16> 34 ret <8 x i16> %C 35} 36 37; PR17654 38define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) { 39; SSE2-LABEL: zext_16i8_to_16i16: 40; SSE2: # BB#0: # %entry 41; SSE2-NEXT: movdqa %xmm0, %xmm1 42; SSE2-NEXT: pxor %xmm2, %xmm2 43; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 44; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 45; SSE2-NEXT: retq 46; 47; SSSE3-LABEL: zext_16i8_to_16i16: 48; SSSE3: # BB#0: # %entry 49; SSSE3-NEXT: movdqa %xmm0, %xmm1 50; SSSE3-NEXT: pxor %xmm2, %xmm2 51; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 52; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 53; SSSE3-NEXT: retq 54; 55; SSE41-LABEL: zext_16i8_to_16i16: 56; SSE41: # BB#0: # %entry 57; SSE41-NEXT: movdqa %xmm0, %xmm1 58; SSE41-NEXT: pxor %xmm2, %xmm2 59; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 60; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 61; SSE41-NEXT: retq 62; 63; AVX1-LABEL: zext_16i8_to_16i16: 64; AVX1: # BB#0: # %entry 65; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 66; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 67; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 68; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 69; AVX1-NEXT: retq 70; 71; AVX2-LABEL: zext_16i8_to_16i16: 72; AVX2: # BB#0: # %entry 73; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 74; AVX2-NEXT: retq 75; 76; AVX512-LABEL: zext_16i8_to_16i16: 77; AVX512: # BB#0: # %entry 78; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 79; AVX512-NEXT: retq 80entry: 81 %B = zext <16 x i8> %A to <16 x i16> 82 ret <16 x i16> %B 83} 84 85define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 86; SSE2-LABEL: zext_16i8_to_4i32: 87; SSE2: # BB#0: # %entry 88; SSE2-NEXT: pxor %xmm1, %xmm1 89; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 90; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 91; SSE2-NEXT: retq 92; 93; SSSE3-LABEL: zext_16i8_to_4i32: 94; SSSE3: # BB#0: # %entry 95; SSSE3-NEXT: pxor %xmm1, %xmm1 96; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 97; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 98; SSSE3-NEXT: retq 99; 100; SSE41-LABEL: zext_16i8_to_4i32: 101; SSE41: # BB#0: # %entry 102; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 103; SSE41-NEXT: retq 104; 105; AVX-LABEL: zext_16i8_to_4i32: 106; AVX: # BB#0: # %entry 107; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 108; AVX-NEXT: retq 109entry: 110 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 111 %C = zext <4 x i8> %B to <4 x i32> 112 ret <4 x i32> %C 113} 114 115define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 116; SSE2-LABEL: zext_16i8_to_8i32: 117; SSE2: # BB#0: # %entry 118; SSE2-NEXT: movdqa %xmm0, %xmm1 119; SSE2-NEXT: pxor %xmm2, %xmm2 120; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 121; SSE2-NEXT: movdqa %xmm1, %xmm0 122; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 123; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 124; SSE2-NEXT: retq 125; 126; SSSE3-LABEL: zext_16i8_to_8i32: 127; SSSE3: # BB#0: # %entry 128; SSSE3-NEXT: movdqa %xmm0, %xmm1 129; SSSE3-NEXT: pxor %xmm2, %xmm2 130; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 131; SSSE3-NEXT: movdqa %xmm1, %xmm0 132; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 133; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 134; SSSE3-NEXT: retq 135; 136; SSE41-LABEL: zext_16i8_to_8i32: 137; SSE41: # BB#0: # %entry 138; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 139; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 140; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 141; SSE41-NEXT: movdqa %xmm2, %xmm0 142; SSE41-NEXT: retq 143; 144; AVX1-LABEL: zext_16i8_to_8i32: 145; AVX1: # BB#0: # %entry 146; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 147; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 148; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 149; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 150; AVX1-NEXT: retq 151; 152; AVX2-LABEL: zext_16i8_to_8i32: 153; AVX2: # BB#0: # %entry 154; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 155; AVX2-NEXT: retq 156; 157; AVX512-LABEL: zext_16i8_to_8i32: 158; AVX512: # BB#0: # %entry 159; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 160; AVX512-NEXT: retq 161entry: 162 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 163 %C = zext <8 x i8> %B to <8 x i32> 164 ret <8 x i32> %C 165} 166 167define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 168; SSE2-LABEL: zext_16i8_to_2i64: 169; SSE2: # BB#0: # %entry 170; SSE2-NEXT: pxor %xmm1, %xmm1 171; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 172; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 173; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 174; SSE2-NEXT: retq 175; 176; SSSE3-LABEL: zext_16i8_to_2i64: 177; SSSE3: # BB#0: # %entry 178; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 179; SSSE3-NEXT: retq 180; 181; SSE41-LABEL: zext_16i8_to_2i64: 182; SSE41: # BB#0: # %entry 183; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 184; SSE41-NEXT: retq 185; 186; AVX-LABEL: zext_16i8_to_2i64: 187; AVX: # BB#0: # %entry 188; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 189; AVX-NEXT: retq 190entry: 191 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 192 %C = zext <2 x i8> %B to <2 x i64> 193 ret <2 x i64> %C 194} 195 196define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 197; SSE2-LABEL: zext_16i8_to_4i64: 198; SSE2: # BB#0: # %entry 199; SSE2-NEXT: movdqa %xmm0, %xmm1 200; SSE2-NEXT: pxor %xmm2, %xmm2 201; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 202; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 203; SSE2-NEXT: movdqa %xmm1, %xmm0 204; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 205; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 206; SSE2-NEXT: retq 207; 208; SSSE3-LABEL: zext_16i8_to_4i64: 209; SSSE3: # BB#0: # %entry 210; SSSE3-NEXT: movdqa %xmm0, %xmm1 211; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 212; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 213; SSSE3-NEXT: retq 214; 215; SSE41-LABEL: zext_16i8_to_4i64: 216; SSE41: # BB#0: # %entry 217; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 218; SSE41-NEXT: psrld $16, %xmm0 219; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 220; SSE41-NEXT: movdqa %xmm2, %xmm0 221; SSE41-NEXT: retq 222; 223; AVX1-LABEL: zext_16i8_to_4i64: 224; AVX1: # BB#0: # %entry 225; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 226; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 227; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 228; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 229; AVX1-NEXT: retq 230; 231; AVX2-LABEL: zext_16i8_to_4i64: 232; AVX2: # BB#0: # %entry 233; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 234; AVX2-NEXT: retq 235; 236; AVX512-LABEL: zext_16i8_to_4i64: 237; AVX512: # BB#0: # %entry 238; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 239; AVX512-NEXT: retq 240entry: 241 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 242 %C = zext <4 x i8> %B to <4 x i64> 243 ret <4 x i64> %C 244} 245 246define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 247; SSE2-LABEL: zext_8i16_to_4i32: 248; SSE2: # BB#0: # %entry 249; SSE2-NEXT: pxor %xmm1, %xmm1 250; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 251; SSE2-NEXT: retq 252; 253; SSSE3-LABEL: zext_8i16_to_4i32: 254; SSSE3: # BB#0: # %entry 255; SSSE3-NEXT: pxor %xmm1, %xmm1 256; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 257; SSSE3-NEXT: retq 258; 259; SSE41-LABEL: zext_8i16_to_4i32: 260; SSE41: # BB#0: # %entry 261; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 262; SSE41-NEXT: retq 263; 264; AVX-LABEL: zext_8i16_to_4i32: 265; AVX: # BB#0: # %entry 266; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 267; AVX-NEXT: retq 268entry: 269 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 270 %C = zext <4 x i16> %B to <4 x i32> 271 ret <4 x i32> %C 272} 273 274define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 275; SSE2-LABEL: zext_8i16_to_8i32: 276; SSE2: # BB#0: # %entry 277; SSE2-NEXT: movdqa %xmm0, %xmm1 278; SSE2-NEXT: pxor %xmm2, %xmm2 279; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 280; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 281; SSE2-NEXT: retq 282; 283; SSSE3-LABEL: zext_8i16_to_8i32: 284; SSSE3: # BB#0: # %entry 285; SSSE3-NEXT: movdqa %xmm0, %xmm1 286; SSSE3-NEXT: pxor %xmm2, %xmm2 287; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 288; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 289; SSSE3-NEXT: retq 290; 291; SSE41-LABEL: zext_8i16_to_8i32: 292; SSE41: # BB#0: # %entry 293; SSE41-NEXT: movdqa %xmm0, %xmm1 294; SSE41-NEXT: pxor %xmm2, %xmm2 295; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 296; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 297; SSE41-NEXT: retq 298; 299; AVX1-LABEL: zext_8i16_to_8i32: 300; AVX1: # BB#0: # %entry 301; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 302; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 303; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 304; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 305; AVX1-NEXT: retq 306; 307; AVX2-LABEL: zext_8i16_to_8i32: 308; AVX2: # BB#0: # %entry 309; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 310; AVX2-NEXT: retq 311; 312; AVX512-LABEL: zext_8i16_to_8i32: 313; AVX512: # BB#0: # %entry 314; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 315; AVX512-NEXT: retq 316entry: 317 %B = zext <8 x i16> %A to <8 x i32> 318 ret <8 x i32>%B 319} 320 321define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 322; SSE2-LABEL: zext_8i16_to_2i64: 323; SSE2: # BB#0: # %entry 324; SSE2-NEXT: pxor %xmm1, %xmm1 325; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 326; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 327; SSE2-NEXT: retq 328; 329; SSSE3-LABEL: zext_8i16_to_2i64: 330; SSSE3: # BB#0: # %entry 331; SSSE3-NEXT: pxor %xmm1, %xmm1 332; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 333; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 334; SSSE3-NEXT: retq 335; 336; SSE41-LABEL: zext_8i16_to_2i64: 337; SSE41: # BB#0: # %entry 338; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 339; SSE41-NEXT: retq 340; 341; AVX-LABEL: zext_8i16_to_2i64: 342; AVX: # BB#0: # %entry 343; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 344; AVX-NEXT: retq 345entry: 346 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 347 %C = zext <2 x i16> %B to <2 x i64> 348 ret <2 x i64> %C 349} 350 351define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 352; SSE2-LABEL: zext_8i16_to_4i64: 353; SSE2: # BB#0: # %entry 354; SSE2-NEXT: movdqa %xmm0, %xmm1 355; SSE2-NEXT: pxor %xmm2, %xmm2 356; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 357; SSE2-NEXT: movdqa %xmm1, %xmm0 358; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 359; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 360; SSE2-NEXT: retq 361; 362; SSSE3-LABEL: zext_8i16_to_4i64: 363; SSSE3: # BB#0: # %entry 364; SSSE3-NEXT: movdqa %xmm0, %xmm1 365; SSSE3-NEXT: pxor %xmm2, %xmm2 366; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 367; SSSE3-NEXT: movdqa %xmm1, %xmm0 368; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 369; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 370; SSSE3-NEXT: retq 371; 372; SSE41-LABEL: zext_8i16_to_4i64: 373; SSE41: # BB#0: # %entry 374; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 375; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 376; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 377; SSE41-NEXT: movdqa %xmm2, %xmm0 378; SSE41-NEXT: retq 379; 380; AVX1-LABEL: zext_8i16_to_4i64: 381; AVX1: # BB#0: # %entry 382; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 383; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 384; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 385; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 386; AVX1-NEXT: retq 387; 388; AVX2-LABEL: zext_8i16_to_4i64: 389; AVX2: # BB#0: # %entry 390; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 391; AVX2-NEXT: retq 392; 393; AVX512-LABEL: zext_8i16_to_4i64: 394; AVX512: # BB#0: # %entry 395; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 396; AVX512-NEXT: retq 397entry: 398 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 399 %C = zext <4 x i16> %B to <4 x i64> 400 ret <4 x i64> %C 401} 402 403define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 404; SSE2-LABEL: zext_4i32_to_2i64: 405; SSE2: # BB#0: # %entry 406; SSE2-NEXT: pxor %xmm1, %xmm1 407; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 408; SSE2-NEXT: retq 409; 410; SSSE3-LABEL: zext_4i32_to_2i64: 411; SSSE3: # BB#0: # %entry 412; SSSE3-NEXT: pxor %xmm1, %xmm1 413; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 414; SSSE3-NEXT: retq 415; 416; SSE41-LABEL: zext_4i32_to_2i64: 417; SSE41: # BB#0: # %entry 418; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 419; SSE41-NEXT: retq 420; 421; AVX-LABEL: zext_4i32_to_2i64: 422; AVX: # BB#0: # %entry 423; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 424; AVX-NEXT: retq 425entry: 426 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 427 %C = zext <2 x i32> %B to <2 x i64> 428 ret <2 x i64> %C 429} 430 431define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 432; SSE2-LABEL: zext_4i32_to_4i64: 433; SSE2: # BB#0: # %entry 434; SSE2-NEXT: movdqa %xmm0, %xmm1 435; SSE2-NEXT: pxor %xmm2, %xmm2 436; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 437; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 438; SSE2-NEXT: retq 439; 440; SSSE3-LABEL: zext_4i32_to_4i64: 441; SSSE3: # BB#0: # %entry 442; SSSE3-NEXT: movdqa %xmm0, %xmm1 443; SSSE3-NEXT: pxor %xmm2, %xmm2 444; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 445; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 446; SSSE3-NEXT: retq 447; 448; SSE41-LABEL: zext_4i32_to_4i64: 449; SSE41: # BB#0: # %entry 450; SSE41-NEXT: movdqa %xmm0, %xmm1 451; SSE41-NEXT: pxor %xmm2, %xmm2 452; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 453; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 454; SSE41-NEXT: retq 455; 456; AVX1-LABEL: zext_4i32_to_4i64: 457; AVX1: # BB#0: # %entry 458; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 459; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 460; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 461; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 462; AVX1-NEXT: retq 463; 464; AVX2-LABEL: zext_4i32_to_4i64: 465; AVX2: # BB#0: # %entry 466; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 467; AVX2-NEXT: retq 468; 469; AVX512-LABEL: zext_4i32_to_4i64: 470; AVX512: # BB#0: # %entry 471; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 472; AVX512-NEXT: retq 473entry: 474 %B = zext <4 x i32> %A to <4 x i64> 475 ret <4 x i64>%B 476} 477 478define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) { 479; SSE2-LABEL: load_zext_2i8_to_2i64: 480; SSE2: # BB#0: # %entry 481; SSE2-NEXT: movzwl (%rdi), %eax 482; SSE2-NEXT: movd %eax, %xmm0 483; SSE2-NEXT: pxor %xmm1, %xmm1 484; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 485; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 486; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 487; SSE2-NEXT: retq 488; 489; SSSE3-LABEL: load_zext_2i8_to_2i64: 490; SSSE3: # BB#0: # %entry 491; SSSE3-NEXT: movzwl (%rdi), %eax 492; SSSE3-NEXT: movd %eax, %xmm0 493; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 494; SSSE3-NEXT: retq 495; 496; SSE41-LABEL: load_zext_2i8_to_2i64: 497; SSE41: # BB#0: # %entry 498; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 499; SSE41-NEXT: retq 500; 501; AVX-LABEL: load_zext_2i8_to_2i64: 502; AVX: # BB#0: # %entry 503; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 504; AVX-NEXT: retq 505entry: 506 %X = load <2 x i8>, <2 x i8>* %ptr 507 %Y = zext <2 x i8> %X to <2 x i64> 508 ret <2 x i64> %Y 509} 510 511define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) { 512; SSE2-LABEL: load_zext_4i8_to_4i32: 513; SSE2: # BB#0: # %entry 514; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 515; SSE2-NEXT: pxor %xmm1, %xmm1 516; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 517; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 518; SSE2-NEXT: retq 519; 520; SSSE3-LABEL: load_zext_4i8_to_4i32: 521; SSSE3: # BB#0: # %entry 522; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 523; SSSE3-NEXT: pxor %xmm1, %xmm1 524; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 525; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 526; SSSE3-NEXT: retq 527; 528; SSE41-LABEL: load_zext_4i8_to_4i32: 529; SSE41: # BB#0: # %entry 530; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 531; SSE41-NEXT: retq 532; 533; AVX-LABEL: load_zext_4i8_to_4i32: 534; AVX: # BB#0: # %entry 535; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 536; AVX-NEXT: retq 537entry: 538 %X = load <4 x i8>, <4 x i8>* %ptr 539 %Y = zext <4 x i8> %X to <4 x i32> 540 ret <4 x i32> %Y 541} 542 543define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) { 544; SSE2-LABEL: load_zext_4i8_to_4i64: 545; SSE2: # BB#0: # %entry 546; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 547; SSE2-NEXT: pxor %xmm2, %xmm2 548; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 549; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 550; SSE2-NEXT: movdqa %xmm1, %xmm0 551; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 552; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 553; SSE2-NEXT: retq 554; 555; SSSE3-LABEL: load_zext_4i8_to_4i64: 556; SSSE3: # BB#0: # %entry 557; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 558; SSSE3-NEXT: movdqa %xmm1, %xmm0 559; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 560; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 561; SSSE3-NEXT: retq 562; 563; SSE41-LABEL: load_zext_4i8_to_4i64: 564; SSE41: # BB#0: # %entry 565; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 566; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 567; SSE41-NEXT: retq 568; 569; AVX1-LABEL: load_zext_4i8_to_4i64: 570; AVX1: # BB#0: # %entry 571; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 572; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 573; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 574; AVX1-NEXT: retq 575; 576; AVX2-LABEL: load_zext_4i8_to_4i64: 577; AVX2: # BB#0: # %entry 578; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 579; AVX2-NEXT: retq 580; 581; AVX512-LABEL: load_zext_4i8_to_4i64: 582; AVX512: # BB#0: # %entry 583; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 584; AVX512-NEXT: retq 585entry: 586 %X = load <4 x i8>, <4 x i8>* %ptr 587 %Y = zext <4 x i8> %X to <4 x i64> 588 ret <4 x i64> %Y 589} 590 591define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) { 592; SSE2-LABEL: load_zext_8i8_to_8i16: 593; SSE2: # BB#0: # %entry 594; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 595; SSE2-NEXT: pxor %xmm1, %xmm1 596; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 597; SSE2-NEXT: retq 598; 599; SSSE3-LABEL: load_zext_8i8_to_8i16: 600; SSSE3: # BB#0: # %entry 601; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 602; SSSE3-NEXT: pxor %xmm1, %xmm1 603; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 604; SSSE3-NEXT: retq 605; 606; SSE41-LABEL: load_zext_8i8_to_8i16: 607; SSE41: # BB#0: # %entry 608; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 609; SSE41-NEXT: retq 610; 611; AVX-LABEL: load_zext_8i8_to_8i16: 612; AVX: # BB#0: # %entry 613; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 614; AVX-NEXT: retq 615entry: 616 %X = load <8 x i8>, <8 x i8>* %ptr 617 %Y = zext <8 x i8> %X to <8 x i16> 618 ret <8 x i16> %Y 619} 620 621define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) { 622; SSE2-LABEL: load_zext_8i8_to_8i32: 623; SSE2: # BB#0: # %entry 624; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 625; SSE2-NEXT: pxor %xmm2, %xmm2 626; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 627; SSE2-NEXT: movdqa %xmm1, %xmm0 628; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 629; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 630; SSE2-NEXT: retq 631; 632; SSSE3-LABEL: load_zext_8i8_to_8i32: 633; SSSE3: # BB#0: # %entry 634; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 635; SSSE3-NEXT: pxor %xmm2, %xmm2 636; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 637; SSSE3-NEXT: movdqa %xmm1, %xmm0 638; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 639; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 640; SSSE3-NEXT: retq 641; 642; SSE41-LABEL: load_zext_8i8_to_8i32: 643; SSE41: # BB#0: # %entry 644; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 645; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 646; SSE41-NEXT: retq 647; 648; AVX1-LABEL: load_zext_8i8_to_8i32: 649; AVX1: # BB#0: # %entry 650; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 651; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 652; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 653; AVX1-NEXT: retq 654; 655; AVX2-LABEL: load_zext_8i8_to_8i32: 656; AVX2: # BB#0: # %entry 657; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 658; AVX2-NEXT: retq 659; 660; AVX512-LABEL: load_zext_8i8_to_8i32: 661; AVX512: # BB#0: # %entry 662; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 663; AVX512-NEXT: retq 664entry: 665 %X = load <8 x i8>, <8 x i8>* %ptr 666 %Y = zext <8 x i8> %X to <8 x i32> 667 ret <8 x i32> %Y 668} 669 670define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) { 671; SSE2-LABEL: load_zext_16i8_to_8i32: 672; SSE2: # BB#0: # %entry 673; SSE2-NEXT: movdqa (%rdi), %xmm1 674; SSE2-NEXT: pxor %xmm2, %xmm2 675; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 676; SSE2-NEXT: movdqa %xmm1, %xmm0 677; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 678; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 679; SSE2-NEXT: retq 680; 681; SSSE3-LABEL: load_zext_16i8_to_8i32: 682; SSSE3: # BB#0: # %entry 683; SSSE3-NEXT: movdqa (%rdi), %xmm1 684; SSSE3-NEXT: pxor %xmm2, %xmm2 685; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 686; SSSE3-NEXT: movdqa %xmm1, %xmm0 687; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 688; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 689; SSSE3-NEXT: retq 690; 691; SSE41-LABEL: load_zext_16i8_to_8i32: 692; SSE41: # BB#0: # %entry 693; SSE41-NEXT: movdqa (%rdi), %xmm1 694; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 695; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] 696; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 697; SSE41-NEXT: retq 698; 699; AVX1-LABEL: load_zext_16i8_to_8i32: 700; AVX1: # BB#0: # %entry 701; AVX1-NEXT: vmovdqa (%rdi), %xmm0 702; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 703; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 704; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 705; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 706; AVX1-NEXT: retq 707; 708; AVX2-LABEL: load_zext_16i8_to_8i32: 709; AVX2: # BB#0: # %entry 710; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 711; AVX2-NEXT: retq 712; 713; AVX512-LABEL: load_zext_16i8_to_8i32: 714; AVX512: # BB#0: # %entry 715; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 716; AVX512-NEXT: retq 717entry: 718 %X = load <16 x i8>, <16 x i8>* %ptr 719 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 720 %Z = zext <8 x i8> %Y to <8 x i32> 721 ret <8 x i32> %Z 722} 723 724define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) { 725; SSE2-LABEL: load_zext_8i8_to_8i64: 726; SSE2: # BB#0: # %entry 727; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 728; SSE2-NEXT: pxor %xmm4, %xmm4 729; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 730; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 731; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 732; SSE2-NEXT: movdqa %xmm1, %xmm0 733; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 734; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 735; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 736; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] 737; SSE2-NEXT: movdqa %xmm3, %xmm2 738; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 739; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 740; SSE2-NEXT: retq 741; 742; SSSE3-LABEL: load_zext_8i8_to_8i64: 743; SSSE3: # BB#0: # %entry 744; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 745; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128] 746; SSSE3-NEXT: movdqa %xmm1, %xmm0 747; SSSE3-NEXT: pshufb %xmm4, %xmm0 748; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128] 749; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 750; SSSE3-NEXT: pshufb %xmm5, %xmm1 751; SSSE3-NEXT: movdqa %xmm3, %xmm2 752; SSSE3-NEXT: pshufb %xmm4, %xmm2 753; SSSE3-NEXT: pshufb %xmm5, %xmm3 754; SSSE3-NEXT: retq 755; 756; SSE41-LABEL: load_zext_8i8_to_8i64: 757; SSE41: # BB#0: # %entry 758; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 759; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 760; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 761; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 762; SSE41-NEXT: retq 763; 764; AVX1-LABEL: load_zext_8i8_to_8i64: 765; AVX1: # BB#0: # %entry 766; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 767; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 768; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 769; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 770; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 771; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 772; AVX1-NEXT: retq 773; 774; AVX2-LABEL: load_zext_8i8_to_8i64: 775; AVX2: # BB#0: # %entry 776; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 777; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 778; AVX2-NEXT: retq 779; 780; AVX512-LABEL: load_zext_8i8_to_8i64: 781; AVX512: # BB#0: # %entry 782; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 783; AVX512-NEXT: retq 784entry: 785 %X = load <8 x i8>, <8 x i8>* %ptr 786 %Y = zext <8 x i8> %X to <8 x i64> 787 ret <8 x i64> %Y 788} 789 790define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) { 791; SSE2-LABEL: load_zext_16i8_to_16i16: 792; SSE2: # BB#0: # %entry 793; SSE2-NEXT: movdqa (%rdi), %xmm1 794; SSE2-NEXT: pxor %xmm2, %xmm2 795; SSE2-NEXT: movdqa %xmm1, %xmm0 796; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 797; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 798; SSE2-NEXT: retq 799; 800; SSSE3-LABEL: load_zext_16i8_to_16i16: 801; SSSE3: # BB#0: # %entry 802; SSSE3-NEXT: movdqa (%rdi), %xmm1 803; SSSE3-NEXT: pxor %xmm2, %xmm2 804; SSSE3-NEXT: movdqa %xmm1, %xmm0 805; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 806; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 807; SSSE3-NEXT: retq 808; 809; SSE41-LABEL: load_zext_16i8_to_16i16: 810; SSE41: # BB#0: # %entry 811; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 812; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 813; SSE41-NEXT: retq 814; 815; AVX1-LABEL: load_zext_16i8_to_16i16: 816; AVX1: # BB#0: # %entry 817; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 818; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 819; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 820; AVX1-NEXT: retq 821; 822; AVX2-LABEL: load_zext_16i8_to_16i16: 823; AVX2: # BB#0: # %entry 824; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 825; AVX2-NEXT: retq 826; 827; AVX512-LABEL: load_zext_16i8_to_16i16: 828; AVX512: # BB#0: # %entry 829; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 830; AVX512-NEXT: retq 831entry: 832 %X = load <16 x i8>, <16 x i8>* %ptr 833 %Y = zext <16 x i8> %X to <16 x i16> 834 ret <16 x i16> %Y 835} 836 837define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) { 838; SSE2-LABEL: load_zext_2i16_to_2i64: 839; SSE2: # BB#0: # %entry 840; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 841; SSE2-NEXT: pxor %xmm1, %xmm1 842; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 843; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 844; SSE2-NEXT: retq 845; 846; SSSE3-LABEL: load_zext_2i16_to_2i64: 847; SSSE3: # BB#0: # %entry 848; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 849; SSSE3-NEXT: pxor %xmm1, %xmm1 850; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 851; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 852; SSSE3-NEXT: retq 853; 854; SSE41-LABEL: load_zext_2i16_to_2i64: 855; SSE41: # BB#0: # %entry 856; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 857; SSE41-NEXT: retq 858; 859; AVX-LABEL: load_zext_2i16_to_2i64: 860; AVX: # BB#0: # %entry 861; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 862; AVX-NEXT: retq 863entry: 864 %X = load <2 x i16>, <2 x i16>* %ptr 865 %Y = zext <2 x i16> %X to <2 x i64> 866 ret <2 x i64> %Y 867} 868 869define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) { 870; SSE2-LABEL: load_zext_4i16_to_4i32: 871; SSE2: # BB#0: # %entry 872; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 873; SSE2-NEXT: pxor %xmm1, %xmm1 874; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 875; SSE2-NEXT: retq 876; 877; SSSE3-LABEL: load_zext_4i16_to_4i32: 878; SSSE3: # BB#0: # %entry 879; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 880; SSSE3-NEXT: pxor %xmm1, %xmm1 881; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 882; SSSE3-NEXT: retq 883; 884; SSE41-LABEL: load_zext_4i16_to_4i32: 885; SSE41: # BB#0: # %entry 886; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 887; SSE41-NEXT: retq 888; 889; AVX-LABEL: load_zext_4i16_to_4i32: 890; AVX: # BB#0: # %entry 891; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 892; AVX-NEXT: retq 893entry: 894 %X = load <4 x i16>, <4 x i16>* %ptr 895 %Y = zext <4 x i16> %X to <4 x i32> 896 ret <4 x i32> %Y 897} 898 899define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) { 900; SSE2-LABEL: load_zext_4i16_to_4i64: 901; SSE2: # BB#0: # %entry 902; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 903; SSE2-NEXT: pxor %xmm2, %xmm2 904; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 905; SSE2-NEXT: movdqa %xmm1, %xmm0 906; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 907; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 908; SSE2-NEXT: retq 909; 910; SSSE3-LABEL: load_zext_4i16_to_4i64: 911; SSSE3: # BB#0: # %entry 912; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 913; SSSE3-NEXT: pxor %xmm2, %xmm2 914; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 915; SSSE3-NEXT: movdqa %xmm1, %xmm0 916; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 917; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 918; SSSE3-NEXT: retq 919; 920; SSE41-LABEL: load_zext_4i16_to_4i64: 921; SSE41: # BB#0: # %entry 922; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 923; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 924; SSE41-NEXT: retq 925; 926; AVX1-LABEL: load_zext_4i16_to_4i64: 927; AVX1: # BB#0: # %entry 928; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 929; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 930; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 931; AVX1-NEXT: retq 932; 933; AVX2-LABEL: load_zext_4i16_to_4i64: 934; AVX2: # BB#0: # %entry 935; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 936; AVX2-NEXT: retq 937; 938; AVX512-LABEL: load_zext_4i16_to_4i64: 939; AVX512: # BB#0: # %entry 940; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 941; AVX512-NEXT: retq 942entry: 943 %X = load <4 x i16>, <4 x i16>* %ptr 944 %Y = zext <4 x i16> %X to <4 x i64> 945 ret <4 x i64> %Y 946} 947 948define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) { 949; SSE2-LABEL: load_zext_8i16_to_8i32: 950; SSE2: # BB#0: # %entry 951; SSE2-NEXT: movdqa (%rdi), %xmm1 952; SSE2-NEXT: pxor %xmm2, %xmm2 953; SSE2-NEXT: movdqa %xmm1, %xmm0 954; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 955; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 956; SSE2-NEXT: retq 957; 958; SSSE3-LABEL: load_zext_8i16_to_8i32: 959; SSSE3: # BB#0: # %entry 960; SSSE3-NEXT: movdqa (%rdi), %xmm1 961; SSSE3-NEXT: pxor %xmm2, %xmm2 962; SSSE3-NEXT: movdqa %xmm1, %xmm0 963; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 964; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 965; SSSE3-NEXT: retq 966; 967; SSE41-LABEL: load_zext_8i16_to_8i32: 968; SSE41: # BB#0: # %entry 969; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 970; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 971; SSE41-NEXT: retq 972; 973; AVX1-LABEL: load_zext_8i16_to_8i32: 974; AVX1: # BB#0: # %entry 975; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 976; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 977; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 978; AVX1-NEXT: retq 979; 980; AVX2-LABEL: load_zext_8i16_to_8i32: 981; AVX2: # BB#0: # %entry 982; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 983; AVX2-NEXT: retq 984; 985; AVX512-LABEL: load_zext_8i16_to_8i32: 986; AVX512: # BB#0: # %entry 987; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 988; AVX512-NEXT: retq 989entry: 990 %X = load <8 x i16>, <8 x i16>* %ptr 991 %Y = zext <8 x i16> %X to <8 x i32> 992 ret <8 x i32> %Y 993} 994 995define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) { 996; SSE2-LABEL: load_zext_2i32_to_2i64: 997; SSE2: # BB#0: # %entry 998; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 999; SSE2-NEXT: pxor %xmm1, %xmm1 1000; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1001; SSE2-NEXT: retq 1002; 1003; SSSE3-LABEL: load_zext_2i32_to_2i64: 1004; SSSE3: # BB#0: # %entry 1005; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1006; SSSE3-NEXT: pxor %xmm1, %xmm1 1007; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1008; SSSE3-NEXT: retq 1009; 1010; SSE41-LABEL: load_zext_2i32_to_2i64: 1011; SSE41: # BB#0: # %entry 1012; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1013; SSE41-NEXT: retq 1014; 1015; AVX-LABEL: load_zext_2i32_to_2i64: 1016; AVX: # BB#0: # %entry 1017; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1018; AVX-NEXT: retq 1019entry: 1020 %X = load <2 x i32>, <2 x i32>* %ptr 1021 %Y = zext <2 x i32> %X to <2 x i64> 1022 ret <2 x i64> %Y 1023} 1024 1025define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) { 1026; SSE2-LABEL: load_zext_4i32_to_4i64: 1027; SSE2: # BB#0: # %entry 1028; SSE2-NEXT: movdqa (%rdi), %xmm1 1029; SSE2-NEXT: pxor %xmm2, %xmm2 1030; SSE2-NEXT: movdqa %xmm1, %xmm0 1031; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1032; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1033; SSE2-NEXT: retq 1034; 1035; SSSE3-LABEL: load_zext_4i32_to_4i64: 1036; SSSE3: # BB#0: # %entry 1037; SSSE3-NEXT: movdqa (%rdi), %xmm1 1038; SSSE3-NEXT: pxor %xmm2, %xmm2 1039; SSSE3-NEXT: movdqa %xmm1, %xmm0 1040; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1041; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1042; SSSE3-NEXT: retq 1043; 1044; SSE41-LABEL: load_zext_4i32_to_4i64: 1045; SSE41: # BB#0: # %entry 1046; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1047; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 1048; SSE41-NEXT: retq 1049; 1050; AVX1-LABEL: load_zext_4i32_to_4i64: 1051; AVX1: # BB#0: # %entry 1052; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1053; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 1054; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1055; AVX1-NEXT: retq 1056; 1057; AVX2-LABEL: load_zext_4i32_to_4i64: 1058; AVX2: # BB#0: # %entry 1059; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1060; AVX2-NEXT: retq 1061; 1062; AVX512-LABEL: load_zext_4i32_to_4i64: 1063; AVX512: # BB#0: # %entry 1064; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1065; AVX512-NEXT: retq 1066entry: 1067 %X = load <4 x i32>, <4 x i32>* %ptr 1068 %Y = zext <4 x i32> %X to <4 x i64> 1069 ret <4 x i64> %Y 1070} 1071 1072define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) { 1073; SSE2-LABEL: zext_8i8_to_8i32: 1074; SSE2: # BB#0: # %entry 1075; SSE2-NEXT: movdqa %xmm0, %xmm1 1076; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1077; SSE2-NEXT: pxor %xmm2, %xmm2 1078; SSE2-NEXT: movdqa %xmm1, %xmm0 1079; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1080; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1081; SSE2-NEXT: retq 1082; 1083; SSSE3-LABEL: zext_8i8_to_8i32: 1084; SSSE3: # BB#0: # %entry 1085; SSSE3-NEXT: movdqa %xmm0, %xmm1 1086; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1 1087; SSSE3-NEXT: pxor %xmm2, %xmm2 1088; SSSE3-NEXT: movdqa %xmm1, %xmm0 1089; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1090; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1091; SSSE3-NEXT: retq 1092; 1093; SSE41-LABEL: zext_8i8_to_8i32: 1094; SSE41: # BB#0: # %entry 1095; SSE41-NEXT: movdqa %xmm0, %xmm1 1096; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 1097; SSE41-NEXT: pxor %xmm2, %xmm2 1098; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 1099; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1100; SSE41-NEXT: retq 1101; 1102; AVX1-LABEL: zext_8i8_to_8i32: 1103; AVX1: # BB#0: # %entry 1104; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1105; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1106; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1107; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1108; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1109; AVX1-NEXT: retq 1110; 1111; AVX2-LABEL: zext_8i8_to_8i32: 1112; AVX2: # BB#0: # %entry 1113; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1114; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1115; AVX2-NEXT: retq 1116; 1117; AVX512-LABEL: zext_8i8_to_8i32: 1118; AVX512: # BB#0: # %entry 1119; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1120; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1121; AVX512-NEXT: retq 1122entry: 1123 %t = zext <8 x i8> %z to <8 x i32> 1124 ret <8 x i32> %t 1125} 1126 1127define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 1128; SSE2-LABEL: shuf_zext_8i16_to_8i32: 1129; SSE2: # BB#0: # %entry 1130; SSE2-NEXT: movdqa %xmm0, %xmm1 1131; SSE2-NEXT: pxor %xmm2, %xmm2 1132; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1133; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1134; SSE2-NEXT: retq 1135; 1136; SSSE3-LABEL: shuf_zext_8i16_to_8i32: 1137; SSSE3: # BB#0: # %entry 1138; SSSE3-NEXT: movdqa %xmm0, %xmm1 1139; SSSE3-NEXT: pxor %xmm2, %xmm2 1140; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1141; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1142; SSSE3-NEXT: retq 1143; 1144; SSE41-LABEL: shuf_zext_8i16_to_8i32: 1145; SSE41: # BB#0: # %entry 1146; SSE41-NEXT: movdqa %xmm0, %xmm1 1147; SSE41-NEXT: pxor %xmm2, %xmm2 1148; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 1149; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1150; SSE41-NEXT: retq 1151; 1152; AVX1-LABEL: shuf_zext_8i16_to_8i32: 1153; AVX1: # BB#0: # %entry 1154; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1155; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1156; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1157; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1158; AVX1-NEXT: retq 1159; 1160; AVX2-LABEL: shuf_zext_8i16_to_8i32: 1161; AVX2: # BB#0: # %entry 1162; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1163; AVX2-NEXT: retq 1164; 1165; AVX512-LABEL: shuf_zext_8i16_to_8i32: 1166; AVX512: # BB#0: # %entry 1167; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1168; AVX512-NEXT: retq 1169entry: 1170 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8> 1171 %Z = bitcast <16 x i16> %B to <8 x i32> 1172 ret <8 x i32> %Z 1173} 1174 1175define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1176; SSE2-LABEL: shuf_zext_4i32_to_4i64: 1177; SSE2: # BB#0: # %entry 1178; SSE2-NEXT: movdqa %xmm0, %xmm1 1179; SSE2-NEXT: pxor %xmm2, %xmm2 1180; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1181; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1182; SSE2-NEXT: retq 1183; 1184; SSSE3-LABEL: shuf_zext_4i32_to_4i64: 1185; SSSE3: # BB#0: # %entry 1186; SSSE3-NEXT: movdqa %xmm0, %xmm1 1187; SSSE3-NEXT: pxor %xmm2, %xmm2 1188; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1189; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1190; SSSE3-NEXT: retq 1191; 1192; SSE41-LABEL: shuf_zext_4i32_to_4i64: 1193; SSE41: # BB#0: # %entry 1194; SSE41-NEXT: movdqa %xmm0, %xmm1 1195; SSE41-NEXT: pxor %xmm2, %xmm2 1196; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1197; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1198; SSE41-NEXT: retq 1199; 1200; AVX1-LABEL: shuf_zext_4i32_to_4i64: 1201; AVX1: # BB#0: # %entry 1202; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero 1203; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1204; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1205; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0] 1206; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1207; AVX1-NEXT: retq 1208; 1209; AVX2-LABEL: shuf_zext_4i32_to_4i64: 1210; AVX2: # BB#0: # %entry 1211; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1212; AVX2-NEXT: retq 1213; 1214; AVX512-LABEL: shuf_zext_4i32_to_4i64: 1215; AVX512: # BB#0: # %entry 1216; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1217; AVX512-NEXT: retq 1218entry: 1219 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4> 1220 %Z = bitcast <8 x i32> %B to <4 x i64> 1221 ret <4 x i64> %Z 1222} 1223 1224define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) { 1225; SSE2-LABEL: shuf_zext_8i8_to_8i32: 1226; SSE2: # BB#0: # %entry 1227; SSE2-NEXT: movdqa %xmm0, %xmm1 1228; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1229; SSE2-NEXT: packuswb %xmm1, %xmm1 1230; SSE2-NEXT: pxor %xmm2, %xmm2 1231; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1232; SSE2-NEXT: movdqa %xmm1, %xmm0 1233; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1234; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1235; SSE2-NEXT: retq 1236; 1237; SSSE3-LABEL: shuf_zext_8i8_to_8i32: 1238; SSSE3: # BB#0: # %entry 1239; SSSE3-NEXT: movdqa %xmm0, %xmm1 1240; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0],zero,xmm1[2],zero,xmm1[4],zero,xmm1[6],zero,xmm1[8],zero,xmm1[10],zero,xmm1[12],zero,xmm1[14],zero 1241; SSSE3-NEXT: pxor %xmm2, %xmm2 1242; SSSE3-NEXT: movdqa %xmm1, %xmm0 1243; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1244; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1245; SSSE3-NEXT: retq 1246; 1247; SSE41-LABEL: shuf_zext_8i8_to_8i32: 1248; SSE41: # BB#0: # %entry 1249; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1250; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1251; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1252; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1253; SSE41-NEXT: movdqa %xmm2, %xmm0 1254; SSE41-NEXT: retq 1255; 1256; AVX1-LABEL: shuf_zext_8i8_to_8i32: 1257; AVX1: # BB#0: # %entry 1258; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1259; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1260; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1261; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1262; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1263; AVX1-NEXT: retq 1264; 1265; AVX2-LABEL: shuf_zext_8i8_to_8i32: 1266; AVX2: # BB#0: # %entry 1267; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1268; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1269; AVX2-NEXT: retq 1270; 1271; AVX512-LABEL: shuf_zext_8i8_to_8i32: 1272; AVX512: # BB#0: # %entry 1273; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1274; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1275; AVX512-NEXT: retq 1276entry: 1277 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8> 1278 %Z = bitcast <32 x i8> %B to <8 x i32> 1279 ret <8 x i32> %Z 1280} 1281 1282define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp { 1283; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6: 1284; SSE2: # BB#0: # %entry 1285; SSE2-NEXT: pxor %xmm1, %xmm1 1286; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1287; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1288; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1289; SSE2-NEXT: retq 1290; 1291; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6: 1292; SSSE3: # BB#0: # %entry 1293; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 1294; SSSE3-NEXT: retq 1295; 1296; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6: 1297; SSE41: # BB#0: # %entry 1298; SSE41-NEXT: psrlq $48, %xmm0 1299; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1300; SSE41-NEXT: retq 1301; 1302; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6: 1303; AVX: # BB#0: # %entry 1304; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0 1305; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1306; AVX-NEXT: retq 1307entry: 1308 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1309 %Z = bitcast <16 x i8> %B to <2 x i64> 1310 ret <2 x i64> %Z 1311} 1312 1313define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp { 1314; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1315; SSE2: # BB#0: # %entry 1316; SSE2-NEXT: movdqa %xmm0, %xmm1 1317; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 1318; SSE2-NEXT: pxor %xmm2, %xmm2 1319; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1320; SSE2-NEXT: movdqa %xmm1, %xmm0 1321; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1322; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1323; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1324; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1325; SSE2-NEXT: retq 1326; 1327; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11: 1328; SSSE3: # BB#0: # %entry 1329; SSSE3-NEXT: movdqa %xmm0, %xmm1 1330; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero 1331; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero 1332; SSSE3-NEXT: retq 1333; 1334; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11: 1335; SSE41: # BB#0: # %entry 1336; SSE41-NEXT: movdqa %xmm0, %xmm1 1337; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1338; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1339; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1340; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1341; SSE41-NEXT: movdqa %xmm2, %xmm0 1342; SSE41-NEXT: retq 1343; 1344; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11: 1345; AVX1: # BB#0: # %entry 1346; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1347; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1348; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1349; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1350; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1351; AVX1-NEXT: retq 1352; 1353; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1354; AVX2: # BB#0: # %entry 1355; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1356; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1357; AVX2-NEXT: retq 1358; 1359; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11: 1360; AVX512: # BB#0: # %entry 1361; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1362; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1363; AVX512-NEXT: retq 1364entry: 1365 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1366 %Z = bitcast <32 x i8> %B to <4 x i64> 1367 ret <4 x i64> %Z 1368} 1369 1370define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp { 1371; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6: 1372; SSE2: # BB#0: # %entry 1373; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1374; SSE2-NEXT: pxor %xmm1, %xmm1 1375; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1376; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1377; SSE2-NEXT: retq 1378; 1379; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6: 1380; SSSE3: # BB#0: # %entry 1381; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero 1382; SSSE3-NEXT: retq 1383; 1384; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6: 1385; SSE41: # BB#0: # %entry 1386; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1387; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1388; SSE41-NEXT: retq 1389; 1390; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6: 1391; AVX: # BB#0: # %entry 1392; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1393; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1394; AVX-NEXT: retq 1395entry: 1396 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8> 1397 %Z = bitcast <8 x i16> %B to <2 x i64> 1398 ret <2 x i64> %Z 1399} 1400 1401define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp { 1402; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1403; SSE2: # BB#0: # %entry 1404; SSE2-NEXT: movdqa %xmm0, %xmm1 1405; SSE2-NEXT: pxor %xmm2, %xmm2 1406; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1407; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1408; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1409; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1410; SSE2-NEXT: retq 1411; 1412; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2: 1413; SSSE3: # BB#0: # %entry 1414; SSSE3-NEXT: movdqa %xmm0, %xmm1 1415; SSSE3-NEXT: pxor %xmm2, %xmm2 1416; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1417; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1418; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1419; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1420; SSSE3-NEXT: retq 1421; 1422; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2: 1423; SSE41: # BB#0: # %entry 1424; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1425; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1426; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1427; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1428; SSE41-NEXT: movdqa %xmm2, %xmm0 1429; SSE41-NEXT: retq 1430; 1431; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2: 1432; AVX1: # BB#0: # %entry 1433; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1434; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1435; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1436; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1437; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1438; AVX1-NEXT: retq 1439; 1440; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1441; AVX2: # BB#0: # %entry 1442; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] 1443; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1444; AVX2-NEXT: retq 1445; 1446; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2: 1447; AVX512: # BB#0: # %entry 1448; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] 1449; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1450; AVX512-NEXT: retq 1451entry: 1452 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8> 1453 %Z = bitcast <16 x i16> %B to <4 x i64> 1454 ret <4 x i64> %Z 1455} 1456 1457define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp { 1458; SSE-LABEL: shuf_zext_8i16_to_4i32_offset1: 1459; SSE: # BB#0: # %entry 1460; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1461; SSE-NEXT: pxor %xmm1, %xmm1 1462; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1463; SSE-NEXT: retq 1464; 1465; AVX-LABEL: shuf_zext_8i16_to_4i32_offset1: 1466; AVX: # BB#0: # %entry 1467; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1468; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1469; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1470; AVX-NEXT: retq 1471entry: 1472 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8> 1473 %Z = bitcast <8 x i16> %B to <4 x i32> 1474 ret <4 x i32> %Z 1475} 1476 1477define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp { 1478; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3: 1479; SSE2: # BB#0: # %entry 1480; SSE2-NEXT: movdqa %xmm0, %xmm1 1481; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1482; SSE2-NEXT: pxor %xmm2, %xmm2 1483; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1484; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1485; SSE2-NEXT: retq 1486; 1487; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3: 1488; SSSE3: # BB#0: # %entry 1489; SSSE3-NEXT: movdqa %xmm0, %xmm1 1490; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1491; SSSE3-NEXT: pxor %xmm2, %xmm2 1492; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1493; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1494; SSSE3-NEXT: retq 1495; 1496; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3: 1497; SSE41: # BB#0: # %entry 1498; SSE41-NEXT: movdqa %xmm0, %xmm1 1499; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1500; SSE41-NEXT: pxor %xmm2, %xmm2 1501; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1502; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1503; SSE41-NEXT: retq 1504; 1505; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3: 1506; AVX1: # BB#0: # %entry 1507; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1508; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1509; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1510; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1511; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1512; AVX1-NEXT: retq 1513; 1514; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3: 1515; AVX2: # BB#0: # %entry 1516; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1517; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1518; AVX2-NEXT: retq 1519; 1520; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3: 1521; AVX512: # BB#0: # %entry 1522; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1523; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1524; AVX512-NEXT: retq 1525entry: 1526 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8> 1527 %Z = bitcast <16 x i16> %B to <8 x i32> 1528 ret <8 x i32> %Z 1529} 1530 1531define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp { 1532; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8: 1533; SSE2: # BB#0: # %entry 1534; SSE2-NEXT: pxor %xmm2, %xmm2 1535; SSE2-NEXT: movdqa %xmm1, %xmm0 1536; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1537; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1538; SSE2-NEXT: retq 1539; 1540; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8: 1541; SSSE3: # BB#0: # %entry 1542; SSSE3-NEXT: pxor %xmm2, %xmm2 1543; SSSE3-NEXT: movdqa %xmm1, %xmm0 1544; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1545; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1546; SSSE3-NEXT: retq 1547; 1548; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8: 1549; SSE41: # BB#0: # %entry 1550; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 1551; SSE41-NEXT: pxor %xmm2, %xmm2 1552; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] 1553; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 1554; SSE41-NEXT: movdqa %xmm2, %xmm1 1555; SSE41-NEXT: retq 1556; 1557; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8: 1558; AVX1: # BB#0: # %entry 1559; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1560; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] 1561; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1562; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] 1563; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1564; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1565; AVX1-NEXT: retq 1566; 1567; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8: 1568; AVX2: # BB#0: # %entry 1569; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1570; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1571; AVX2-NEXT: retq 1572; 1573; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8: 1574; AVX512: # BB#0: # %entry 1575; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 1576; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1577; AVX512-NEXT: retq 1578entry: 1579 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16> 1580 %Z = bitcast <16 x i16> %B to <8 x i32> 1581 ret <8 x i32> %Z 1582} 1583 1584define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp { 1585; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2: 1586; SSE: # BB#0: # %entry 1587; SSE-NEXT: pxor %xmm1, %xmm1 1588; SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1589; SSE-NEXT: retq 1590; 1591; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2: 1592; AVX: # BB#0: # %entry 1593; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1594; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1595; AVX-NEXT: retq 1596entry: 1597 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4> 1598 %Z = bitcast <4 x i32> %B to <2 x i64> 1599 ret <2 x i64> %Z 1600} 1601 1602define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp { 1603; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1: 1604; SSE2: # BB#0: # %entry 1605; SSE2-NEXT: movdqa %xmm0, %xmm1 1606; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 1607; SSE2-NEXT: pand %xmm1, %xmm0 1608; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1609; SSE2-NEXT: retq 1610; 1611; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1: 1612; SSSE3: # BB#0: # %entry 1613; SSSE3-NEXT: movdqa %xmm0, %xmm1 1614; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 1615; SSSE3-NEXT: pand %xmm1, %xmm0 1616; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1617; SSSE3-NEXT: retq 1618; 1619; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1: 1620; SSE41: # BB#0: # %entry 1621; SSE41-NEXT: movdqa %xmm0, %xmm1 1622; SSE41-NEXT: pxor %xmm0, %xmm0 1623; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 1624; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1625; SSE41-NEXT: retq 1626; 1627; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1: 1628; AVX1: # BB#0: # %entry 1629; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[3],zero,zero,zero 1630; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 1631; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3] 1632; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1633; AVX1-NEXT: retq 1634; 1635; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1: 1636; AVX2: # BB#0: # %entry 1637; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] 1638; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1639; AVX2-NEXT: retq 1640; 1641; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1: 1642; AVX512: # BB#0: # %entry 1643; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] 1644; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1645; AVX512-NEXT: retq 1646entry: 1647 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4> 1648 %Z = bitcast <8 x i32> %B to <4 x i64> 1649 ret <4 x i64> %Z 1650} 1651