1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW 9; 10; Just two 32-bit runs to make sure we do reasonable things there. 11; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE2 12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE41 13 14define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 15; SSE2-LABEL: sext_16i8_to_8i16: 16; SSE2: # %bb.0: # %entry 17; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 18; SSE2-NEXT: psraw $8, %xmm0 19; SSE2-NEXT: retq 20; 21; SSSE3-LABEL: sext_16i8_to_8i16: 22; SSSE3: # %bb.0: # %entry 23; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 24; SSSE3-NEXT: psraw $8, %xmm0 25; SSSE3-NEXT: retq 26; 27; SSE41-LABEL: sext_16i8_to_8i16: 28; SSE41: # %bb.0: # %entry 29; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 30; SSE41-NEXT: retq 31; 32; AVX-LABEL: sext_16i8_to_8i16: 33; AVX: # %bb.0: # %entry 34; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 35; AVX-NEXT: retq 36; 37; X86-SSE2-LABEL: sext_16i8_to_8i16: 38; X86-SSE2: # %bb.0: # %entry 39; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 40; X86-SSE2-NEXT: psraw $8, %xmm0 41; X86-SSE2-NEXT: retl 42; 43; X86-SSE41-LABEL: sext_16i8_to_8i16: 44; X86-SSE41: # %bb.0: # %entry 45; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 46; X86-SSE41-NEXT: retl 47entry: 48 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 49 %C = sext <8 x i8> %B to <8 x i16> 50 ret <8 x i16> %C 51} 52 53define <16 x i16> @sext_16i8_to_16i16(<16 x i8> %A) nounwind uwtable readnone ssp { 54; SSE2-LABEL: sext_16i8_to_16i16: 55; SSE2: # %bb.0: # %entry 56; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 57; SSE2-NEXT: psraw $8, %xmm2 58; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 59; SSE2-NEXT: psraw $8, %xmm1 60; SSE2-NEXT: movdqa %xmm2, %xmm0 61; SSE2-NEXT: retq 62; 63; SSSE3-LABEL: sext_16i8_to_16i16: 64; SSSE3: # %bb.0: # %entry 65; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 66; SSSE3-NEXT: psraw $8, %xmm2 67; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 68; SSSE3-NEXT: psraw $8, %xmm1 69; SSSE3-NEXT: movdqa %xmm2, %xmm0 70; SSSE3-NEXT: retq 71; 72; SSE41-LABEL: sext_16i8_to_16i16: 73; SSE41: # %bb.0: # %entry 74; SSE41-NEXT: pmovsxbw %xmm0, %xmm2 75; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 76; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 77; SSE41-NEXT: movdqa %xmm2, %xmm0 78; SSE41-NEXT: retq 79; 80; AVX1-LABEL: sext_16i8_to_16i16: 81; AVX1: # %bb.0: # %entry 82; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 83; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 84; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 85; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 86; AVX1-NEXT: retq 87; 88; AVX2-LABEL: sext_16i8_to_16i16: 89; AVX2: # %bb.0: # %entry 90; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 91; AVX2-NEXT: retq 92; 93; AVX512-LABEL: sext_16i8_to_16i16: 94; AVX512: # %bb.0: # %entry 95; AVX512-NEXT: vpmovsxbw %xmm0, %ymm0 96; AVX512-NEXT: retq 97; 98; X86-SSE2-LABEL: sext_16i8_to_16i16: 99; X86-SSE2: # %bb.0: # %entry 100; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 101; X86-SSE2-NEXT: psraw $8, %xmm2 102; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 103; X86-SSE2-NEXT: psraw $8, %xmm1 104; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 105; X86-SSE2-NEXT: retl 106; 107; X86-SSE41-LABEL: sext_16i8_to_16i16: 108; X86-SSE41: # %bb.0: # %entry 109; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm2 110; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 111; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm1 112; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 113; X86-SSE41-NEXT: retl 114entry: 115 %B = sext <16 x i8> %A to <16 x i16> 116 ret <16 x i16> %B 117} 118 119define <32 x i16> @sext_32i8_to_32i16(<32 x i8> %A) nounwind uwtable readnone ssp { 120; SSE2-LABEL: sext_32i8_to_32i16: 121; SSE2: # %bb.0: # %entry 122; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 123; SSE2-NEXT: psraw $8, %xmm4 124; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 125; SSE2-NEXT: psraw $8, %xmm5 126; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 127; SSE2-NEXT: psraw $8, %xmm2 128; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 129; SSE2-NEXT: psraw $8, %xmm3 130; SSE2-NEXT: movdqa %xmm4, %xmm0 131; SSE2-NEXT: movdqa %xmm5, %xmm1 132; SSE2-NEXT: retq 133; 134; SSSE3-LABEL: sext_32i8_to_32i16: 135; SSSE3: # %bb.0: # %entry 136; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 137; SSSE3-NEXT: psraw $8, %xmm4 138; SSSE3-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 139; SSSE3-NEXT: psraw $8, %xmm5 140; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 141; SSSE3-NEXT: psraw $8, %xmm2 142; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 143; SSSE3-NEXT: psraw $8, %xmm3 144; SSSE3-NEXT: movdqa %xmm4, %xmm0 145; SSSE3-NEXT: movdqa %xmm5, %xmm1 146; SSSE3-NEXT: retq 147; 148; SSE41-LABEL: sext_32i8_to_32i16: 149; SSE41: # %bb.0: # %entry 150; SSE41-NEXT: pmovsxbw %xmm0, %xmm5 151; SSE41-NEXT: pmovsxbw %xmm1, %xmm2 152; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 153; SSE41-NEXT: pmovsxbw %xmm0, %xmm4 154; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 155; SSE41-NEXT: pmovsxbw %xmm0, %xmm3 156; SSE41-NEXT: movdqa %xmm5, %xmm0 157; SSE41-NEXT: movdqa %xmm4, %xmm1 158; SSE41-NEXT: retq 159; 160; AVX1-LABEL: sext_32i8_to_32i16: 161; AVX1: # %bb.0: # %entry 162; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 163; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 164; AVX1-NEXT: vpmovsxbw %xmm2, %xmm2 165; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 166; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 167; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 168; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 169; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 170; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 171; AVX1-NEXT: vmovaps %ymm2, %ymm0 172; AVX1-NEXT: retq 173; 174; AVX2-LABEL: sext_32i8_to_32i16: 175; AVX2: # %bb.0: # %entry 176; AVX2-NEXT: vpmovsxbw %xmm0, %ymm2 177; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 178; AVX2-NEXT: vpmovsxbw %xmm0, %ymm1 179; AVX2-NEXT: vmovdqa %ymm2, %ymm0 180; AVX2-NEXT: retq 181; 182; AVX512F-LABEL: sext_32i8_to_32i16: 183; AVX512F: # %bb.0: # %entry 184; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1 185; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 186; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0 187; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 188; AVX512F-NEXT: retq 189; 190; AVX512BW-LABEL: sext_32i8_to_32i16: 191; AVX512BW: # %bb.0: # %entry 192; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 193; AVX512BW-NEXT: retq 194; 195; X86-SSE2-LABEL: sext_32i8_to_32i16: 196; X86-SSE2: # %bb.0: # %entry 197; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 198; X86-SSE2-NEXT: psraw $8, %xmm4 199; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 200; X86-SSE2-NEXT: psraw $8, %xmm5 201; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 202; X86-SSE2-NEXT: psraw $8, %xmm2 203; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 204; X86-SSE2-NEXT: psraw $8, %xmm3 205; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 206; X86-SSE2-NEXT: movdqa %xmm5, %xmm1 207; X86-SSE2-NEXT: retl 208; 209; X86-SSE41-LABEL: sext_32i8_to_32i16: 210; X86-SSE41: # %bb.0: # %entry 211; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm5 212; X86-SSE41-NEXT: pmovsxbw %xmm1, %xmm2 213; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 214; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm4 215; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 216; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm3 217; X86-SSE41-NEXT: movdqa %xmm5, %xmm0 218; X86-SSE41-NEXT: movdqa %xmm4, %xmm1 219; X86-SSE41-NEXT: retl 220entry: 221 %B = sext <32 x i8> %A to <32 x i16> 222 ret <32 x i16> %B 223} 224 225define <4 x i32> @sext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 226; SSE2-LABEL: sext_16i8_to_4i32: 227; SSE2: # %bb.0: # %entry 228; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 229; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 230; SSE2-NEXT: psrad $24, %xmm0 231; SSE2-NEXT: retq 232; 233; SSSE3-LABEL: sext_16i8_to_4i32: 234; SSSE3: # %bb.0: # %entry 235; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 236; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 237; SSSE3-NEXT: psrad $24, %xmm0 238; SSSE3-NEXT: retq 239; 240; SSE41-LABEL: sext_16i8_to_4i32: 241; SSE41: # %bb.0: # %entry 242; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 243; SSE41-NEXT: retq 244; 245; AVX-LABEL: sext_16i8_to_4i32: 246; AVX: # %bb.0: # %entry 247; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 248; AVX-NEXT: retq 249; 250; X86-SSE2-LABEL: sext_16i8_to_4i32: 251; X86-SSE2: # %bb.0: # %entry 252; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 253; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 254; X86-SSE2-NEXT: psrad $24, %xmm0 255; X86-SSE2-NEXT: retl 256; 257; X86-SSE41-LABEL: sext_16i8_to_4i32: 258; X86-SSE41: # %bb.0: # %entry 259; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 260; X86-SSE41-NEXT: retl 261entry: 262 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 263 %C = sext <4 x i8> %B to <4 x i32> 264 ret <4 x i32> %C 265} 266 267define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 268; SSE2-LABEL: sext_16i8_to_8i32: 269; SSE2: # %bb.0: # %entry 270; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 271; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 272; SSE2-NEXT: psrad $24, %xmm0 273; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 274; SSE2-NEXT: psrad $24, %xmm1 275; SSE2-NEXT: retq 276; 277; SSSE3-LABEL: sext_16i8_to_8i32: 278; SSSE3: # %bb.0: # %entry 279; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 280; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 281; SSSE3-NEXT: psrad $24, %xmm0 282; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 283; SSSE3-NEXT: psrad $24, %xmm1 284; SSSE3-NEXT: retq 285; 286; SSE41-LABEL: sext_16i8_to_8i32: 287; SSE41: # %bb.0: # %entry 288; SSE41-NEXT: pmovsxbd %xmm0, %xmm2 289; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 290; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 291; SSE41-NEXT: movdqa %xmm2, %xmm0 292; SSE41-NEXT: retq 293; 294; AVX1-LABEL: sext_16i8_to_8i32: 295; AVX1: # %bb.0: # %entry 296; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 297; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 298; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 299; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 300; AVX1-NEXT: retq 301; 302; AVX2-LABEL: sext_16i8_to_8i32: 303; AVX2: # %bb.0: # %entry 304; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 305; AVX2-NEXT: retq 306; 307; AVX512-LABEL: sext_16i8_to_8i32: 308; AVX512: # %bb.0: # %entry 309; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0 310; AVX512-NEXT: retq 311; 312; X86-SSE2-LABEL: sext_16i8_to_8i32: 313; X86-SSE2: # %bb.0: # %entry 314; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 315; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 316; X86-SSE2-NEXT: psrad $24, %xmm0 317; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 318; X86-SSE2-NEXT: psrad $24, %xmm1 319; X86-SSE2-NEXT: retl 320; 321; X86-SSE41-LABEL: sext_16i8_to_8i32: 322; X86-SSE41: # %bb.0: # %entry 323; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm2 324; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 325; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm1 326; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 327; X86-SSE41-NEXT: retl 328entry: 329 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 330 %C = sext <8 x i8> %B to <8 x i32> 331 ret <8 x i32> %C 332} 333 334define <16 x i32> @sext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp { 335; SSE2-LABEL: sext_16i8_to_16i32: 336; SSE2: # %bb.0: # %entry 337; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 338; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] 339; SSE2-NEXT: psrad $24, %xmm4 340; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 341; SSE2-NEXT: psrad $24, %xmm1 342; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 343; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 344; SSE2-NEXT: psrad $24, %xmm2 345; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 346; SSE2-NEXT: psrad $24, %xmm3 347; SSE2-NEXT: movdqa %xmm4, %xmm0 348; SSE2-NEXT: retq 349; 350; SSSE3-LABEL: sext_16i8_to_16i32: 351; SSSE3: # %bb.0: # %entry 352; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 353; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] 354; SSSE3-NEXT: psrad $24, %xmm4 355; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 356; SSSE3-NEXT: psrad $24, %xmm1 357; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 358; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 359; SSSE3-NEXT: psrad $24, %xmm2 360; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 361; SSSE3-NEXT: psrad $24, %xmm3 362; SSSE3-NEXT: movdqa %xmm4, %xmm0 363; SSSE3-NEXT: retq 364; 365; SSE41-LABEL: sext_16i8_to_16i32: 366; SSE41: # %bb.0: # %entry 367; SSE41-NEXT: pmovsxbd %xmm0, %xmm4 368; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 369; SSE41-NEXT: pmovsxbd %xmm1, %xmm1 370; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 371; SSE41-NEXT: pmovsxbd %xmm2, %xmm2 372; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 373; SSE41-NEXT: pmovsxbd %xmm0, %xmm3 374; SSE41-NEXT: movdqa %xmm4, %xmm0 375; SSE41-NEXT: retq 376; 377; AVX1-LABEL: sext_16i8_to_16i32: 378; AVX1: # %bb.0: # %entry 379; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 380; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 381; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2 382; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 383; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 384; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1 385; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 386; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 387; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 388; AVX1-NEXT: vmovaps %ymm2, %ymm0 389; AVX1-NEXT: retq 390; 391; AVX2-LABEL: sext_16i8_to_16i32: 392; AVX2: # %bb.0: # %entry 393; AVX2-NEXT: vpmovsxbd %xmm0, %ymm2 394; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 395; AVX2-NEXT: vpmovsxbd %xmm0, %ymm1 396; AVX2-NEXT: vmovdqa %ymm2, %ymm0 397; AVX2-NEXT: retq 398; 399; AVX512-LABEL: sext_16i8_to_16i32: 400; AVX512: # %bb.0: # %entry 401; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 402; AVX512-NEXT: retq 403; 404; X86-SSE2-LABEL: sext_16i8_to_16i32: 405; X86-SSE2: # %bb.0: # %entry 406; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 407; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] 408; X86-SSE2-NEXT: psrad $24, %xmm4 409; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 410; X86-SSE2-NEXT: psrad $24, %xmm1 411; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 412; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 413; X86-SSE2-NEXT: psrad $24, %xmm2 414; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 415; X86-SSE2-NEXT: psrad $24, %xmm3 416; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 417; X86-SSE2-NEXT: retl 418; 419; X86-SSE41-LABEL: sext_16i8_to_16i32: 420; X86-SSE41: # %bb.0: # %entry 421; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm4 422; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 423; X86-SSE41-NEXT: pmovsxbd %xmm1, %xmm1 424; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 425; X86-SSE41-NEXT: pmovsxbd %xmm2, %xmm2 426; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 427; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm3 428; X86-SSE41-NEXT: movdqa %xmm4, %xmm0 429; X86-SSE41-NEXT: retl 430entry: 431 %B = sext <16 x i8> %A to <16 x i32> 432 ret <16 x i32> %B 433} 434 435define <2 x i64> @sext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 436; SSE2-LABEL: sext_16i8_to_2i64: 437; SSE2: # %bb.0: # %entry 438; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 439; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 440; SSE2-NEXT: pxor %xmm1, %xmm1 441; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 442; SSE2-NEXT: psrad $24, %xmm0 443; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 444; SSE2-NEXT: retq 445; 446; SSSE3-LABEL: sext_16i8_to_2i64: 447; SSSE3: # %bb.0: # %entry 448; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 449; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 450; SSSE3-NEXT: pxor %xmm1, %xmm1 451; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 452; SSSE3-NEXT: psrad $24, %xmm0 453; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 454; SSSE3-NEXT: retq 455; 456; SSE41-LABEL: sext_16i8_to_2i64: 457; SSE41: # %bb.0: # %entry 458; SSE41-NEXT: pmovsxbq %xmm0, %xmm0 459; SSE41-NEXT: retq 460; 461; AVX-LABEL: sext_16i8_to_2i64: 462; AVX: # %bb.0: # %entry 463; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 464; AVX-NEXT: retq 465; 466; X86-SSE2-LABEL: sext_16i8_to_2i64: 467; X86-SSE2: # %bb.0: # %entry 468; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 469; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 470; X86-SSE2-NEXT: pxor %xmm1, %xmm1 471; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 472; X86-SSE2-NEXT: psrad $24, %xmm0 473; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 474; X86-SSE2-NEXT: retl 475; 476; X86-SSE41-LABEL: sext_16i8_to_2i64: 477; X86-SSE41: # %bb.0: # %entry 478; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm0 479; X86-SSE41-NEXT: retl 480entry: 481 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 482 %C = sext <2 x i8> %B to <2 x i64> 483 ret <2 x i64> %C 484} 485 486define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 487; SSE2-LABEL: sext_16i8_to_4i64: 488; SSE2: # %bb.0: # %entry 489; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 490; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 491; SSE2-NEXT: psrad $24, %xmm1 492; SSE2-NEXT: pxor %xmm2, %xmm2 493; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 494; SSE2-NEXT: movdqa %xmm1, %xmm0 495; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 496; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 497; SSE2-NEXT: retq 498; 499; SSSE3-LABEL: sext_16i8_to_4i64: 500; SSSE3: # %bb.0: # %entry 501; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 502; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 503; SSSE3-NEXT: psrad $24, %xmm1 504; SSSE3-NEXT: pxor %xmm2, %xmm2 505; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 506; SSSE3-NEXT: movdqa %xmm1, %xmm0 507; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 508; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 509; SSSE3-NEXT: retq 510; 511; SSE41-LABEL: sext_16i8_to_4i64: 512; SSE41: # %bb.0: # %entry 513; SSE41-NEXT: pmovsxbq %xmm0, %xmm2 514; SSE41-NEXT: psrld $16, %xmm0 515; SSE41-NEXT: pmovsxbq %xmm0, %xmm1 516; SSE41-NEXT: movdqa %xmm2, %xmm0 517; SSE41-NEXT: retq 518; 519; AVX1-LABEL: sext_16i8_to_4i64: 520; AVX1: # %bb.0: # %entry 521; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 522; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 523; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 524; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 525; AVX1-NEXT: retq 526; 527; AVX2-LABEL: sext_16i8_to_4i64: 528; AVX2: # %bb.0: # %entry 529; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0 530; AVX2-NEXT: retq 531; 532; AVX512-LABEL: sext_16i8_to_4i64: 533; AVX512: # %bb.0: # %entry 534; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0 535; AVX512-NEXT: retq 536; 537; X86-SSE2-LABEL: sext_16i8_to_4i64: 538; X86-SSE2: # %bb.0: # %entry 539; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 540; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 541; X86-SSE2-NEXT: psrad $24, %xmm1 542; X86-SSE2-NEXT: pxor %xmm2, %xmm2 543; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 544; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 545; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 546; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 547; X86-SSE2-NEXT: retl 548; 549; X86-SSE41-LABEL: sext_16i8_to_4i64: 550; X86-SSE41: # %bb.0: # %entry 551; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm2 552; X86-SSE41-NEXT: psrld $16, %xmm0 553; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm1 554; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 555; X86-SSE41-NEXT: retl 556entry: 557 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 558 %C = sext <4 x i8> %B to <4 x i64> 559 ret <4 x i64> %C 560} 561 562define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp { 563; SSE2-LABEL: sext_16i8_to_8i64: 564; SSE2: # %bb.0: # %entry 565; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 566; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 567; SSE2-NEXT: psrad $24, %xmm1 568; SSE2-NEXT: pxor %xmm4, %xmm4 569; SSE2-NEXT: pxor %xmm3, %xmm3 570; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 571; SSE2-NEXT: movdqa %xmm1, %xmm0 572; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 573; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 574; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 575; SSE2-NEXT: psrad $24, %xmm3 576; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 577; SSE2-NEXT: movdqa %xmm3, %xmm2 578; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 579; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 580; SSE2-NEXT: retq 581; 582; SSSE3-LABEL: sext_16i8_to_8i64: 583; SSSE3: # %bb.0: # %entry 584; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 585; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 586; SSSE3-NEXT: psrad $24, %xmm1 587; SSSE3-NEXT: pxor %xmm4, %xmm4 588; SSSE3-NEXT: pxor %xmm3, %xmm3 589; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 590; SSSE3-NEXT: movdqa %xmm1, %xmm0 591; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 592; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 593; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 594; SSSE3-NEXT: psrad $24, %xmm3 595; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 596; SSSE3-NEXT: movdqa %xmm3, %xmm2 597; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 598; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 599; SSSE3-NEXT: retq 600; 601; SSE41-LABEL: sext_16i8_to_8i64: 602; SSE41: # %bb.0: # %entry 603; SSE41-NEXT: pmovsxbq %xmm0, %xmm4 604; SSE41-NEXT: movdqa %xmm0, %xmm1 605; SSE41-NEXT: psrld $16, %xmm1 606; SSE41-NEXT: pmovsxbq %xmm1, %xmm1 607; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 608; SSE41-NEXT: pmovsxbq %xmm2, %xmm2 609; SSE41-NEXT: psrlq $48, %xmm0 610; SSE41-NEXT: pmovsxbq %xmm0, %xmm3 611; SSE41-NEXT: movdqa %xmm4, %xmm0 612; SSE41-NEXT: retq 613; 614; AVX1-LABEL: sext_16i8_to_8i64: 615; AVX1: # %bb.0: # %entry 616; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 617; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 618; AVX1-NEXT: vpmovsxbq %xmm2, %xmm2 619; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 620; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 621; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 622; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 623; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 624; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 625; AVX1-NEXT: vmovaps %ymm2, %ymm0 626; AVX1-NEXT: retq 627; 628; AVX2-LABEL: sext_16i8_to_8i64: 629; AVX2: # %bb.0: # %entry 630; AVX2-NEXT: vpmovsxbq %xmm0, %ymm2 631; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 632; AVX2-NEXT: vpmovsxbq %xmm0, %ymm1 633; AVX2-NEXT: vmovdqa %ymm2, %ymm0 634; AVX2-NEXT: retq 635; 636; AVX512-LABEL: sext_16i8_to_8i64: 637; AVX512: # %bb.0: # %entry 638; AVX512-NEXT: vpmovsxbq %xmm0, %zmm0 639; AVX512-NEXT: retq 640; 641; X86-SSE2-LABEL: sext_16i8_to_8i64: 642; X86-SSE2: # %bb.0: # %entry 643; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 644; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 645; X86-SSE2-NEXT: psrad $24, %xmm1 646; X86-SSE2-NEXT: pxor %xmm4, %xmm4 647; X86-SSE2-NEXT: pxor %xmm3, %xmm3 648; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 649; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 650; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 651; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 652; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 653; X86-SSE2-NEXT: psrad $24, %xmm3 654; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 655; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 656; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 657; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 658; X86-SSE2-NEXT: retl 659; 660; X86-SSE41-LABEL: sext_16i8_to_8i64: 661; X86-SSE41: # %bb.0: # %entry 662; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm4 663; X86-SSE41-NEXT: movdqa %xmm0, %xmm1 664; X86-SSE41-NEXT: psrld $16, %xmm1 665; X86-SSE41-NEXT: pmovsxbq %xmm1, %xmm1 666; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 667; X86-SSE41-NEXT: pmovsxbq %xmm2, %xmm2 668; X86-SSE41-NEXT: psrlq $48, %xmm0 669; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm3 670; X86-SSE41-NEXT: movdqa %xmm4, %xmm0 671; X86-SSE41-NEXT: retl 672entry: 673 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 674 %C = sext <8 x i8> %B to <8 x i64> 675 ret <8 x i64> %C 676} 677 678define <4 x i32> @sext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 679; SSE2-LABEL: sext_8i16_to_4i32: 680; SSE2: # %bb.0: # %entry 681; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 682; SSE2-NEXT: psrad $16, %xmm0 683; SSE2-NEXT: retq 684; 685; SSSE3-LABEL: sext_8i16_to_4i32: 686; SSSE3: # %bb.0: # %entry 687; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 688; SSSE3-NEXT: psrad $16, %xmm0 689; SSSE3-NEXT: retq 690; 691; SSE41-LABEL: sext_8i16_to_4i32: 692; SSE41: # %bb.0: # %entry 693; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 694; SSE41-NEXT: retq 695; 696; AVX-LABEL: sext_8i16_to_4i32: 697; AVX: # %bb.0: # %entry 698; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 699; AVX-NEXT: retq 700; 701; X86-SSE2-LABEL: sext_8i16_to_4i32: 702; X86-SSE2: # %bb.0: # %entry 703; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 704; X86-SSE2-NEXT: psrad $16, %xmm0 705; X86-SSE2-NEXT: retl 706; 707; X86-SSE41-LABEL: sext_8i16_to_4i32: 708; X86-SSE41: # %bb.0: # %entry 709; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm0 710; X86-SSE41-NEXT: retl 711entry: 712 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 713 %C = sext <4 x i16> %B to <4 x i32> 714 ret <4 x i32> %C 715} 716 717define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 718; SSE2-LABEL: sext_8i16_to_8i32: 719; SSE2: # %bb.0: # %entry 720; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 721; SSE2-NEXT: psrad $16, %xmm2 722; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 723; SSE2-NEXT: psrad $16, %xmm1 724; SSE2-NEXT: movdqa %xmm2, %xmm0 725; SSE2-NEXT: retq 726; 727; SSSE3-LABEL: sext_8i16_to_8i32: 728; SSSE3: # %bb.0: # %entry 729; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 730; SSSE3-NEXT: psrad $16, %xmm2 731; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 732; SSSE3-NEXT: psrad $16, %xmm1 733; SSSE3-NEXT: movdqa %xmm2, %xmm0 734; SSSE3-NEXT: retq 735; 736; SSE41-LABEL: sext_8i16_to_8i32: 737; SSE41: # %bb.0: # %entry 738; SSE41-NEXT: pmovsxwd %xmm0, %xmm2 739; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 740; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 741; SSE41-NEXT: movdqa %xmm2, %xmm0 742; SSE41-NEXT: retq 743; 744; AVX1-LABEL: sext_8i16_to_8i32: 745; AVX1: # %bb.0: # %entry 746; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 747; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 748; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 749; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 750; AVX1-NEXT: retq 751; 752; AVX2-LABEL: sext_8i16_to_8i32: 753; AVX2: # %bb.0: # %entry 754; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 755; AVX2-NEXT: retq 756; 757; AVX512-LABEL: sext_8i16_to_8i32: 758; AVX512: # %bb.0: # %entry 759; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 760; AVX512-NEXT: retq 761; 762; X86-SSE2-LABEL: sext_8i16_to_8i32: 763; X86-SSE2: # %bb.0: # %entry 764; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 765; X86-SSE2-NEXT: psrad $16, %xmm2 766; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 767; X86-SSE2-NEXT: psrad $16, %xmm1 768; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 769; X86-SSE2-NEXT: retl 770; 771; X86-SSE41-LABEL: sext_8i16_to_8i32: 772; X86-SSE41: # %bb.0: # %entry 773; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm2 774; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 775; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm1 776; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 777; X86-SSE41-NEXT: retl 778entry: 779 %B = sext <8 x i16> %A to <8 x i32> 780 ret <8 x i32> %B 781} 782 783define <16 x i32> @sext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp { 784; SSE2-LABEL: sext_16i16_to_16i32: 785; SSE2: # %bb.0: # %entry 786; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 787; SSE2-NEXT: psrad $16, %xmm4 788; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 789; SSE2-NEXT: psrad $16, %xmm5 790; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 791; SSE2-NEXT: psrad $16, %xmm2 792; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 793; SSE2-NEXT: psrad $16, %xmm3 794; SSE2-NEXT: movdqa %xmm4, %xmm0 795; SSE2-NEXT: movdqa %xmm5, %xmm1 796; SSE2-NEXT: retq 797; 798; SSSE3-LABEL: sext_16i16_to_16i32: 799; SSSE3: # %bb.0: # %entry 800; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 801; SSSE3-NEXT: psrad $16, %xmm4 802; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 803; SSSE3-NEXT: psrad $16, %xmm5 804; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 805; SSSE3-NEXT: psrad $16, %xmm2 806; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 807; SSSE3-NEXT: psrad $16, %xmm3 808; SSSE3-NEXT: movdqa %xmm4, %xmm0 809; SSSE3-NEXT: movdqa %xmm5, %xmm1 810; SSSE3-NEXT: retq 811; 812; SSE41-LABEL: sext_16i16_to_16i32: 813; SSE41: # %bb.0: # %entry 814; SSE41-NEXT: pmovsxwd %xmm0, %xmm5 815; SSE41-NEXT: pmovsxwd %xmm1, %xmm2 816; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 817; SSE41-NEXT: pmovsxwd %xmm0, %xmm4 818; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 819; SSE41-NEXT: pmovsxwd %xmm0, %xmm3 820; SSE41-NEXT: movdqa %xmm5, %xmm0 821; SSE41-NEXT: movdqa %xmm4, %xmm1 822; SSE41-NEXT: retq 823; 824; AVX1-LABEL: sext_16i16_to_16i32: 825; AVX1: # %bb.0: # %entry 826; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 827; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 828; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2 829; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 830; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 831; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 832; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 833; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 834; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 835; AVX1-NEXT: vmovaps %ymm2, %ymm0 836; AVX1-NEXT: retq 837; 838; AVX2-LABEL: sext_16i16_to_16i32: 839; AVX2: # %bb.0: # %entry 840; AVX2-NEXT: vpmovsxwd %xmm0, %ymm2 841; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 842; AVX2-NEXT: vpmovsxwd %xmm0, %ymm1 843; AVX2-NEXT: vmovdqa %ymm2, %ymm0 844; AVX2-NEXT: retq 845; 846; AVX512-LABEL: sext_16i16_to_16i32: 847; AVX512: # %bb.0: # %entry 848; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 849; AVX512-NEXT: retq 850; 851; X86-SSE2-LABEL: sext_16i16_to_16i32: 852; X86-SSE2: # %bb.0: # %entry 853; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 854; X86-SSE2-NEXT: psrad $16, %xmm4 855; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 856; X86-SSE2-NEXT: psrad $16, %xmm5 857; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 858; X86-SSE2-NEXT: psrad $16, %xmm2 859; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 860; X86-SSE2-NEXT: psrad $16, %xmm3 861; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 862; X86-SSE2-NEXT: movdqa %xmm5, %xmm1 863; X86-SSE2-NEXT: retl 864; 865; X86-SSE41-LABEL: sext_16i16_to_16i32: 866; X86-SSE41: # %bb.0: # %entry 867; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm5 868; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm2 869; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 870; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm4 871; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 872; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm3 873; X86-SSE41-NEXT: movdqa %xmm5, %xmm0 874; X86-SSE41-NEXT: movdqa %xmm4, %xmm1 875; X86-SSE41-NEXT: retl 876entry: 877 %B = sext <16 x i16> %A to <16 x i32> 878 ret <16 x i32> %B 879} 880 881define <2 x i64> @sext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 882; SSE2-LABEL: sext_8i16_to_2i64: 883; SSE2: # %bb.0: # %entry 884; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 885; SSE2-NEXT: pxor %xmm1, %xmm1 886; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 887; SSE2-NEXT: psrad $16, %xmm0 888; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 889; SSE2-NEXT: retq 890; 891; SSSE3-LABEL: sext_8i16_to_2i64: 892; SSSE3: # %bb.0: # %entry 893; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 894; SSSE3-NEXT: pxor %xmm1, %xmm1 895; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 896; SSSE3-NEXT: psrad $16, %xmm0 897; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 898; SSSE3-NEXT: retq 899; 900; SSE41-LABEL: sext_8i16_to_2i64: 901; SSE41: # %bb.0: # %entry 902; SSE41-NEXT: pmovsxwq %xmm0, %xmm0 903; SSE41-NEXT: retq 904; 905; AVX-LABEL: sext_8i16_to_2i64: 906; AVX: # %bb.0: # %entry 907; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 908; AVX-NEXT: retq 909; 910; X86-SSE2-LABEL: sext_8i16_to_2i64: 911; X86-SSE2: # %bb.0: # %entry 912; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 913; X86-SSE2-NEXT: pxor %xmm1, %xmm1 914; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 915; X86-SSE2-NEXT: psrad $16, %xmm0 916; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 917; X86-SSE2-NEXT: retl 918; 919; X86-SSE41-LABEL: sext_8i16_to_2i64: 920; X86-SSE41: # %bb.0: # %entry 921; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm0 922; X86-SSE41-NEXT: retl 923entry: 924 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 925 %C = sext <2 x i16> %B to <2 x i64> 926 ret <2 x i64> %C 927} 928 929define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 930; SSE2-LABEL: sext_8i16_to_4i64: 931; SSE2: # %bb.0: # %entry 932; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 933; SSE2-NEXT: psrad $16, %xmm1 934; SSE2-NEXT: pxor %xmm2, %xmm2 935; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 936; SSE2-NEXT: movdqa %xmm1, %xmm0 937; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 938; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 939; SSE2-NEXT: retq 940; 941; SSSE3-LABEL: sext_8i16_to_4i64: 942; SSSE3: # %bb.0: # %entry 943; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 944; SSSE3-NEXT: psrad $16, %xmm1 945; SSSE3-NEXT: pxor %xmm2, %xmm2 946; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 947; SSSE3-NEXT: movdqa %xmm1, %xmm0 948; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 949; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 950; SSSE3-NEXT: retq 951; 952; SSE41-LABEL: sext_8i16_to_4i64: 953; SSE41: # %bb.0: # %entry 954; SSE41-NEXT: pmovsxwq %xmm0, %xmm2 955; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 956; SSE41-NEXT: pmovsxwq %xmm0, %xmm1 957; SSE41-NEXT: movdqa %xmm2, %xmm0 958; SSE41-NEXT: retq 959; 960; AVX1-LABEL: sext_8i16_to_4i64: 961; AVX1: # %bb.0: # %entry 962; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 963; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 964; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 965; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 966; AVX1-NEXT: retq 967; 968; AVX2-LABEL: sext_8i16_to_4i64: 969; AVX2: # %bb.0: # %entry 970; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 971; AVX2-NEXT: retq 972; 973; AVX512-LABEL: sext_8i16_to_4i64: 974; AVX512: # %bb.0: # %entry 975; AVX512-NEXT: vpmovsxwq %xmm0, %ymm0 976; AVX512-NEXT: retq 977; 978; X86-SSE2-LABEL: sext_8i16_to_4i64: 979; X86-SSE2: # %bb.0: # %entry 980; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 981; X86-SSE2-NEXT: psrad $16, %xmm1 982; X86-SSE2-NEXT: pxor %xmm2, %xmm2 983; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 984; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 985; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 986; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 987; X86-SSE2-NEXT: retl 988; 989; X86-SSE41-LABEL: sext_8i16_to_4i64: 990; X86-SSE41: # %bb.0: # %entry 991; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm2 992; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 993; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm1 994; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 995; X86-SSE41-NEXT: retl 996entry: 997 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 998 %C = sext <4 x i16> %B to <4 x i64> 999 ret <4 x i64> %C 1000} 1001 1002define <8 x i64> @sext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp { 1003; SSE2-LABEL: sext_8i16_to_8i64: 1004; SSE2: # %bb.0: # %entry 1005; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1006; SSE2-NEXT: psrad $16, %xmm1 1007; SSE2-NEXT: pxor %xmm5, %xmm5 1008; SSE2-NEXT: pxor %xmm2, %xmm2 1009; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1010; SSE2-NEXT: movdqa %xmm1, %xmm4 1011; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 1012; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1013; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 1014; SSE2-NEXT: psrad $16, %xmm3 1015; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 1016; SSE2-NEXT: movdqa %xmm3, %xmm2 1017; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1018; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 1019; SSE2-NEXT: movdqa %xmm4, %xmm0 1020; SSE2-NEXT: retq 1021; 1022; SSSE3-LABEL: sext_8i16_to_8i64: 1023; SSSE3: # %bb.0: # %entry 1024; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1025; SSSE3-NEXT: psrad $16, %xmm1 1026; SSSE3-NEXT: pxor %xmm5, %xmm5 1027; SSSE3-NEXT: pxor %xmm2, %xmm2 1028; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 1029; SSSE3-NEXT: movdqa %xmm1, %xmm4 1030; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 1031; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1032; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 1033; SSSE3-NEXT: psrad $16, %xmm3 1034; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5 1035; SSSE3-NEXT: movdqa %xmm3, %xmm2 1036; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1037; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 1038; SSSE3-NEXT: movdqa %xmm4, %xmm0 1039; SSSE3-NEXT: retq 1040; 1041; SSE41-LABEL: sext_8i16_to_8i64: 1042; SSE41: # %bb.0: # %entry 1043; SSE41-NEXT: pmovsxwq %xmm0, %xmm4 1044; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1045; SSE41-NEXT: pmovsxwq %xmm1, %xmm1 1046; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 1047; SSE41-NEXT: pmovsxwq %xmm2, %xmm2 1048; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 1049; SSE41-NEXT: pmovsxwq %xmm0, %xmm3 1050; SSE41-NEXT: movdqa %xmm4, %xmm0 1051; SSE41-NEXT: retq 1052; 1053; AVX1-LABEL: sext_8i16_to_8i64: 1054; AVX1: # %bb.0: # %entry 1055; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 1056; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 1057; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 1058; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 1059; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1060; AVX1-NEXT: vpmovsxwq %xmm1, %xmm1 1061; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 1062; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 1063; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 1064; AVX1-NEXT: vmovaps %ymm2, %ymm0 1065; AVX1-NEXT: retq 1066; 1067; AVX2-LABEL: sext_8i16_to_8i64: 1068; AVX2: # %bb.0: # %entry 1069; AVX2-NEXT: vpmovsxwq %xmm0, %ymm2 1070; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1071; AVX2-NEXT: vpmovsxwq %xmm0, %ymm1 1072; AVX2-NEXT: vmovdqa %ymm2, %ymm0 1073; AVX2-NEXT: retq 1074; 1075; AVX512-LABEL: sext_8i16_to_8i64: 1076; AVX512: # %bb.0: # %entry 1077; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0 1078; AVX512-NEXT: retq 1079; 1080; X86-SSE2-LABEL: sext_8i16_to_8i64: 1081; X86-SSE2: # %bb.0: # %entry 1082; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1083; X86-SSE2-NEXT: psrad $16, %xmm1 1084; X86-SSE2-NEXT: pxor %xmm5, %xmm5 1085; X86-SSE2-NEXT: pxor %xmm2, %xmm2 1086; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1087; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 1088; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 1089; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1090; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 1091; X86-SSE2-NEXT: psrad $16, %xmm3 1092; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 1093; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 1094; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1095; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 1096; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 1097; X86-SSE2-NEXT: retl 1098; 1099; X86-SSE41-LABEL: sext_8i16_to_8i64: 1100; X86-SSE41: # %bb.0: # %entry 1101; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm4 1102; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1103; X86-SSE41-NEXT: pmovsxwq %xmm1, %xmm1 1104; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 1105; X86-SSE41-NEXT: pmovsxwq %xmm2, %xmm2 1106; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 1107; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm3 1108; X86-SSE41-NEXT: movdqa %xmm4, %xmm0 1109; X86-SSE41-NEXT: retl 1110entry: 1111 %B = sext <8 x i16> %A to <8 x i64> 1112 ret <8 x i64> %B 1113} 1114 1115define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1116; SSE2-LABEL: sext_4i32_to_2i64: 1117; SSE2: # %bb.0: # %entry 1118; SSE2-NEXT: pxor %xmm1, %xmm1 1119; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1120; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1121; SSE2-NEXT: retq 1122; 1123; SSSE3-LABEL: sext_4i32_to_2i64: 1124; SSSE3: # %bb.0: # %entry 1125; SSSE3-NEXT: pxor %xmm1, %xmm1 1126; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 1127; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1128; SSSE3-NEXT: retq 1129; 1130; SSE41-LABEL: sext_4i32_to_2i64: 1131; SSE41: # %bb.0: # %entry 1132; SSE41-NEXT: pmovsxdq %xmm0, %xmm0 1133; SSE41-NEXT: retq 1134; 1135; AVX-LABEL: sext_4i32_to_2i64: 1136; AVX: # %bb.0: # %entry 1137; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 1138; AVX-NEXT: retq 1139; 1140; X86-SSE2-LABEL: sext_4i32_to_2i64: 1141; X86-SSE2: # %bb.0: # %entry 1142; X86-SSE2-NEXT: pxor %xmm1, %xmm1 1143; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1144; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1145; X86-SSE2-NEXT: retl 1146; 1147; X86-SSE41-LABEL: sext_4i32_to_2i64: 1148; X86-SSE41: # %bb.0: # %entry 1149; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm0 1150; X86-SSE41-NEXT: retl 1151entry: 1152 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 1153 %C = sext <2 x i32> %B to <2 x i64> 1154 ret <2 x i64> %C 1155} 1156 1157define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1158; SSE2-LABEL: sext_4i32_to_4i64: 1159; SSE2: # %bb.0: # %entry 1160; SSE2-NEXT: pxor %xmm2, %xmm2 1161; SSE2-NEXT: pxor %xmm3, %xmm3 1162; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1163; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1164; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1165; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1166; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1167; SSE2-NEXT: retq 1168; 1169; SSSE3-LABEL: sext_4i32_to_4i64: 1170; SSSE3: # %bb.0: # %entry 1171; SSSE3-NEXT: pxor %xmm2, %xmm2 1172; SSSE3-NEXT: pxor %xmm3, %xmm3 1173; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 1174; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1175; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1176; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 1177; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1178; SSSE3-NEXT: retq 1179; 1180; SSE41-LABEL: sext_4i32_to_4i64: 1181; SSE41: # %bb.0: # %entry 1182; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 1183; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1184; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 1185; SSE41-NEXT: movdqa %xmm2, %xmm0 1186; SSE41-NEXT: retq 1187; 1188; AVX1-LABEL: sext_4i32_to_4i64: 1189; AVX1: # %bb.0: # %entry 1190; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1191; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1192; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1193; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1194; AVX1-NEXT: retq 1195; 1196; AVX2-LABEL: sext_4i32_to_4i64: 1197; AVX2: # %bb.0: # %entry 1198; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 1199; AVX2-NEXT: retq 1200; 1201; AVX512-LABEL: sext_4i32_to_4i64: 1202; AVX512: # %bb.0: # %entry 1203; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 1204; AVX512-NEXT: retq 1205; 1206; X86-SSE2-LABEL: sext_4i32_to_4i64: 1207; X86-SSE2: # %bb.0: # %entry 1208; X86-SSE2-NEXT: pxor %xmm2, %xmm2 1209; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1210; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1211; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1212; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1213; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1214; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1215; X86-SSE2-NEXT: retl 1216; 1217; X86-SSE41-LABEL: sext_4i32_to_4i64: 1218; X86-SSE41: # %bb.0: # %entry 1219; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 1220; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1221; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 1222; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 1223; X86-SSE41-NEXT: retl 1224entry: 1225 %B = sext <4 x i32> %A to <4 x i64> 1226 ret <4 x i64> %B 1227} 1228 1229define <8 x i64> @sext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp { 1230; SSE2-LABEL: sext_8i32_to_8i64: 1231; SSE2: # %bb.0: # %entry 1232; SSE2-NEXT: movdqa %xmm1, %xmm2 1233; SSE2-NEXT: pxor %xmm4, %xmm4 1234; SSE2-NEXT: pxor %xmm3, %xmm3 1235; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1236; SSE2-NEXT: pxor %xmm5, %xmm5 1237; SSE2-NEXT: pcmpgtd %xmm1, %xmm5 1238; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1239; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1240; SSE2-NEXT: pxor %xmm3, %xmm3 1241; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1242; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1243; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] 1244; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1245; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1246; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1247; SSE2-NEXT: retq 1248; 1249; SSSE3-LABEL: sext_8i32_to_8i64: 1250; SSSE3: # %bb.0: # %entry 1251; SSSE3-NEXT: movdqa %xmm1, %xmm2 1252; SSSE3-NEXT: pxor %xmm4, %xmm4 1253; SSSE3-NEXT: pxor %xmm3, %xmm3 1254; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 1255; SSSE3-NEXT: pxor %xmm5, %xmm5 1256; SSSE3-NEXT: pcmpgtd %xmm1, %xmm5 1257; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1258; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1259; SSSE3-NEXT: pxor %xmm3, %xmm3 1260; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 1261; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1262; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] 1263; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1264; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 1265; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1266; SSSE3-NEXT: retq 1267; 1268; SSE41-LABEL: sext_8i32_to_8i64: 1269; SSE41: # %bb.0: # %entry 1270; SSE41-NEXT: pmovsxdq %xmm0, %xmm5 1271; SSE41-NEXT: pmovsxdq %xmm1, %xmm2 1272; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1273; SSE41-NEXT: pmovsxdq %xmm0, %xmm4 1274; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1275; SSE41-NEXT: pmovsxdq %xmm0, %xmm3 1276; SSE41-NEXT: movdqa %xmm5, %xmm0 1277; SSE41-NEXT: movdqa %xmm4, %xmm1 1278; SSE41-NEXT: retq 1279; 1280; AVX1-LABEL: sext_8i32_to_8i64: 1281; AVX1: # %bb.0: # %entry 1282; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1283; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 1284; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 1285; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 1286; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1287; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1288; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1289; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1290; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 1291; AVX1-NEXT: vmovaps %ymm2, %ymm0 1292; AVX1-NEXT: retq 1293; 1294; AVX2-LABEL: sext_8i32_to_8i64: 1295; AVX2: # %bb.0: # %entry 1296; AVX2-NEXT: vpmovsxdq %xmm0, %ymm2 1297; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1298; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1 1299; AVX2-NEXT: vmovdqa %ymm2, %ymm0 1300; AVX2-NEXT: retq 1301; 1302; AVX512-LABEL: sext_8i32_to_8i64: 1303; AVX512: # %bb.0: # %entry 1304; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0 1305; AVX512-NEXT: retq 1306; 1307; X86-SSE2-LABEL: sext_8i32_to_8i64: 1308; X86-SSE2: # %bb.0: # %entry 1309; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1310; X86-SSE2-NEXT: pxor %xmm4, %xmm4 1311; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1312; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1313; X86-SSE2-NEXT: pxor %xmm5, %xmm5 1314; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm5 1315; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1316; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1317; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1318; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1319; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1320; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] 1321; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1322; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1323; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1324; X86-SSE2-NEXT: retl 1325; 1326; X86-SSE41-LABEL: sext_8i32_to_8i64: 1327; X86-SSE41: # %bb.0: # %entry 1328; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm5 1329; X86-SSE41-NEXT: pmovsxdq %xmm1, %xmm2 1330; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1331; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm4 1332; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1333; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm3 1334; X86-SSE41-NEXT: movdqa %xmm5, %xmm0 1335; X86-SSE41-NEXT: movdqa %xmm4, %xmm1 1336; X86-SSE41-NEXT: retl 1337entry: 1338 %B = sext <8 x i32> %A to <8 x i64> 1339 ret <8 x i64> %B 1340} 1341 1342define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { 1343; SSE-LABEL: load_sext_2i1_to_2i64: 1344; SSE: # %bb.0: # %entry 1345; SSE-NEXT: movb (%rdi), %al 1346; SSE-NEXT: movzbl %al, %ecx 1347; SSE-NEXT: shrb %al 1348; SSE-NEXT: movzbl %al, %eax 1349; SSE-NEXT: negq %rax 1350; SSE-NEXT: movq %rax, %xmm1 1351; SSE-NEXT: andl $1, %ecx 1352; SSE-NEXT: negq %rcx 1353; SSE-NEXT: movq %rcx, %xmm0 1354; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1355; SSE-NEXT: retq 1356; 1357; AVX1-LABEL: load_sext_2i1_to_2i64: 1358; AVX1: # %bb.0: # %entry 1359; AVX1-NEXT: movb (%rdi), %al 1360; AVX1-NEXT: movzbl %al, %ecx 1361; AVX1-NEXT: shrb %al 1362; AVX1-NEXT: movzbl %al, %eax 1363; AVX1-NEXT: negq %rax 1364; AVX1-NEXT: vmovq %rax, %xmm0 1365; AVX1-NEXT: andl $1, %ecx 1366; AVX1-NEXT: negq %rcx 1367; AVX1-NEXT: vmovq %rcx, %xmm1 1368; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1369; AVX1-NEXT: retq 1370; 1371; AVX2-LABEL: load_sext_2i1_to_2i64: 1372; AVX2: # %bb.0: # %entry 1373; AVX2-NEXT: movb (%rdi), %al 1374; AVX2-NEXT: movzbl %al, %ecx 1375; AVX2-NEXT: shrb %al 1376; AVX2-NEXT: movzbl %al, %eax 1377; AVX2-NEXT: negq %rax 1378; AVX2-NEXT: vmovq %rax, %xmm0 1379; AVX2-NEXT: andl $1, %ecx 1380; AVX2-NEXT: negq %rcx 1381; AVX2-NEXT: vmovq %rcx, %xmm1 1382; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1383; AVX2-NEXT: retq 1384; 1385; AVX512-LABEL: load_sext_2i1_to_2i64: 1386; AVX512: # %bb.0: # %entry 1387; AVX512-NEXT: kmovw (%rdi), %k1 1388; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1389; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1390; AVX512-NEXT: vzeroupper 1391; AVX512-NEXT: retq 1392; 1393; X86-SSE2-LABEL: load_sext_2i1_to_2i64: 1394; X86-SSE2: # %bb.0: # %entry 1395; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1396; X86-SSE2-NEXT: movb (%eax), %al 1397; X86-SSE2-NEXT: movzbl %al, %ecx 1398; X86-SSE2-NEXT: shrb %al 1399; X86-SSE2-NEXT: movzbl %al, %eax 1400; X86-SSE2-NEXT: negl %eax 1401; X86-SSE2-NEXT: movd %eax, %xmm0 1402; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] 1403; X86-SSE2-NEXT: andl $1, %ecx 1404; X86-SSE2-NEXT: negl %ecx 1405; X86-SSE2-NEXT: movd %ecx, %xmm0 1406; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 1407; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1408; X86-SSE2-NEXT: retl 1409; 1410; X86-SSE41-LABEL: load_sext_2i1_to_2i64: 1411; X86-SSE41: # %bb.0: # %entry 1412; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1413; X86-SSE41-NEXT: movb (%eax), %al 1414; X86-SSE41-NEXT: movzbl %al, %ecx 1415; X86-SSE41-NEXT: andl $1, %ecx 1416; X86-SSE41-NEXT: negl %ecx 1417; X86-SSE41-NEXT: movd %ecx, %xmm0 1418; X86-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1419; X86-SSE41-NEXT: shrb %al 1420; X86-SSE41-NEXT: movzbl %al, %eax 1421; X86-SSE41-NEXT: negl %eax 1422; X86-SSE41-NEXT: pinsrd $2, %eax, %xmm0 1423; X86-SSE41-NEXT: pinsrd $3, %eax, %xmm0 1424; X86-SSE41-NEXT: retl 1425entry: 1426 %X = load <2 x i1>, <2 x i1>* %ptr 1427 %Y = sext <2 x i1> %X to <2 x i64> 1428 ret <2 x i64> %Y 1429} 1430 1431define <2 x i64> @load_sext_2i8_to_2i64(<2 x i8> *%ptr) { 1432; SSE2-LABEL: load_sext_2i8_to_2i64: 1433; SSE2: # %bb.0: # %entry 1434; SSE2-NEXT: movzwl (%rdi), %eax 1435; SSE2-NEXT: movd %eax, %xmm0 1436; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1437; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1438; SSE2-NEXT: pxor %xmm1, %xmm1 1439; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1440; SSE2-NEXT: psrad $24, %xmm0 1441; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1442; SSE2-NEXT: retq 1443; 1444; SSSE3-LABEL: load_sext_2i8_to_2i64: 1445; SSSE3: # %bb.0: # %entry 1446; SSSE3-NEXT: movzwl (%rdi), %eax 1447; SSSE3-NEXT: movd %eax, %xmm0 1448; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1449; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1450; SSSE3-NEXT: pxor %xmm1, %xmm1 1451; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 1452; SSSE3-NEXT: psrad $24, %xmm0 1453; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1454; SSSE3-NEXT: retq 1455; 1456; SSE41-LABEL: load_sext_2i8_to_2i64: 1457; SSE41: # %bb.0: # %entry 1458; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1459; SSE41-NEXT: retq 1460; 1461; AVX-LABEL: load_sext_2i8_to_2i64: 1462; AVX: # %bb.0: # %entry 1463; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 1464; AVX-NEXT: retq 1465; 1466; X86-SSE2-LABEL: load_sext_2i8_to_2i64: 1467; X86-SSE2: # %bb.0: # %entry 1468; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1469; X86-SSE2-NEXT: movzwl (%eax), %eax 1470; X86-SSE2-NEXT: movd %eax, %xmm0 1471; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1472; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1473; X86-SSE2-NEXT: pxor %xmm1, %xmm1 1474; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1475; X86-SSE2-NEXT: psrad $24, %xmm0 1476; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1477; X86-SSE2-NEXT: retl 1478; 1479; X86-SSE41-LABEL: load_sext_2i8_to_2i64: 1480; X86-SSE41: # %bb.0: # %entry 1481; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1482; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0 1483; X86-SSE41-NEXT: retl 1484entry: 1485 %X = load <2 x i8>, <2 x i8>* %ptr 1486 %Y = sext <2 x i8> %X to <2 x i64> 1487 ret <2 x i64> %Y 1488} 1489 1490define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { 1491; SSE2-LABEL: load_sext_4i1_to_4i32: 1492; SSE2: # %bb.0: # %entry 1493; SSE2-NEXT: movb (%rdi), %al 1494; SSE2-NEXT: movl %eax, %ecx 1495; SSE2-NEXT: shrb $3, %cl 1496; SSE2-NEXT: movzbl %cl, %ecx 1497; SSE2-NEXT: negl %ecx 1498; SSE2-NEXT: movd %ecx, %xmm0 1499; SSE2-NEXT: movzbl %al, %ecx 1500; SSE2-NEXT: shrb $2, %al 1501; SSE2-NEXT: movzbl %al, %eax 1502; SSE2-NEXT: andl $1, %eax 1503; SSE2-NEXT: negl %eax 1504; SSE2-NEXT: movd %eax, %xmm1 1505; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1506; SSE2-NEXT: movl %ecx, %eax 1507; SSE2-NEXT: andl $1, %eax 1508; SSE2-NEXT: negl %eax 1509; SSE2-NEXT: movd %eax, %xmm0 1510; SSE2-NEXT: shrb %cl 1511; SSE2-NEXT: movzbl %cl, %eax 1512; SSE2-NEXT: andl $1, %eax 1513; SSE2-NEXT: negl %eax 1514; SSE2-NEXT: movd %eax, %xmm2 1515; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1516; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1517; SSE2-NEXT: retq 1518; 1519; SSSE3-LABEL: load_sext_4i1_to_4i32: 1520; SSSE3: # %bb.0: # %entry 1521; SSSE3-NEXT: movb (%rdi), %al 1522; SSSE3-NEXT: movl %eax, %ecx 1523; SSSE3-NEXT: shrb $3, %cl 1524; SSSE3-NEXT: movzbl %cl, %ecx 1525; SSSE3-NEXT: negl %ecx 1526; SSSE3-NEXT: movd %ecx, %xmm0 1527; SSSE3-NEXT: movzbl %al, %ecx 1528; SSSE3-NEXT: shrb $2, %al 1529; SSSE3-NEXT: movzbl %al, %eax 1530; SSSE3-NEXT: andl $1, %eax 1531; SSSE3-NEXT: negl %eax 1532; SSSE3-NEXT: movd %eax, %xmm1 1533; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1534; SSSE3-NEXT: movl %ecx, %eax 1535; SSSE3-NEXT: andl $1, %eax 1536; SSSE3-NEXT: negl %eax 1537; SSSE3-NEXT: movd %eax, %xmm0 1538; SSSE3-NEXT: shrb %cl 1539; SSSE3-NEXT: movzbl %cl, %eax 1540; SSSE3-NEXT: andl $1, %eax 1541; SSSE3-NEXT: negl %eax 1542; SSSE3-NEXT: movd %eax, %xmm2 1543; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1544; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1545; SSSE3-NEXT: retq 1546; 1547; SSE41-LABEL: load_sext_4i1_to_4i32: 1548; SSE41: # %bb.0: # %entry 1549; SSE41-NEXT: movb (%rdi), %al 1550; SSE41-NEXT: movzbl %al, %ecx 1551; SSE41-NEXT: shrb %al 1552; SSE41-NEXT: movzbl %al, %eax 1553; SSE41-NEXT: andl $1, %eax 1554; SSE41-NEXT: negl %eax 1555; SSE41-NEXT: movl %ecx, %edx 1556; SSE41-NEXT: andl $1, %edx 1557; SSE41-NEXT: negl %edx 1558; SSE41-NEXT: movd %edx, %xmm0 1559; SSE41-NEXT: pinsrd $1, %eax, %xmm0 1560; SSE41-NEXT: movl %ecx, %eax 1561; SSE41-NEXT: shrb $2, %al 1562; SSE41-NEXT: movzbl %al, %eax 1563; SSE41-NEXT: andl $1, %eax 1564; SSE41-NEXT: negl %eax 1565; SSE41-NEXT: pinsrd $2, %eax, %xmm0 1566; SSE41-NEXT: shrb $3, %cl 1567; SSE41-NEXT: movzbl %cl, %eax 1568; SSE41-NEXT: negl %eax 1569; SSE41-NEXT: pinsrd $3, %eax, %xmm0 1570; SSE41-NEXT: retq 1571; 1572; AVX1-LABEL: load_sext_4i1_to_4i32: 1573; AVX1: # %bb.0: # %entry 1574; AVX1-NEXT: movb (%rdi), %al 1575; AVX1-NEXT: movzbl %al, %ecx 1576; AVX1-NEXT: shrb %al 1577; AVX1-NEXT: movzbl %al, %eax 1578; AVX1-NEXT: andl $1, %eax 1579; AVX1-NEXT: negl %eax 1580; AVX1-NEXT: movl %ecx, %edx 1581; AVX1-NEXT: andl $1, %edx 1582; AVX1-NEXT: negl %edx 1583; AVX1-NEXT: vmovd %edx, %xmm0 1584; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 1585; AVX1-NEXT: movl %ecx, %eax 1586; AVX1-NEXT: shrb $2, %al 1587; AVX1-NEXT: movzbl %al, %eax 1588; AVX1-NEXT: andl $1, %eax 1589; AVX1-NEXT: negl %eax 1590; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 1591; AVX1-NEXT: shrb $3, %cl 1592; AVX1-NEXT: movzbl %cl, %eax 1593; AVX1-NEXT: negl %eax 1594; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1595; AVX1-NEXT: retq 1596; 1597; AVX2-LABEL: load_sext_4i1_to_4i32: 1598; AVX2: # %bb.0: # %entry 1599; AVX2-NEXT: movb (%rdi), %al 1600; AVX2-NEXT: movzbl %al, %ecx 1601; AVX2-NEXT: shrb %al 1602; AVX2-NEXT: movzbl %al, %eax 1603; AVX2-NEXT: andl $1, %eax 1604; AVX2-NEXT: negl %eax 1605; AVX2-NEXT: movl %ecx, %edx 1606; AVX2-NEXT: andl $1, %edx 1607; AVX2-NEXT: negl %edx 1608; AVX2-NEXT: vmovd %edx, %xmm0 1609; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 1610; AVX2-NEXT: movl %ecx, %eax 1611; AVX2-NEXT: shrb $2, %al 1612; AVX2-NEXT: movzbl %al, %eax 1613; AVX2-NEXT: andl $1, %eax 1614; AVX2-NEXT: negl %eax 1615; AVX2-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 1616; AVX2-NEXT: shrb $3, %cl 1617; AVX2-NEXT: movzbl %cl, %eax 1618; AVX2-NEXT: negl %eax 1619; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1620; AVX2-NEXT: retq 1621; 1622; AVX512-LABEL: load_sext_4i1_to_4i32: 1623; AVX512: # %bb.0: # %entry 1624; AVX512-NEXT: kmovw (%rdi), %k1 1625; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1626; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1627; AVX512-NEXT: vzeroupper 1628; AVX512-NEXT: retq 1629; 1630; X86-SSE2-LABEL: load_sext_4i1_to_4i32: 1631; X86-SSE2: # %bb.0: # %entry 1632; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1633; X86-SSE2-NEXT: movb (%eax), %al 1634; X86-SSE2-NEXT: movl %eax, %ecx 1635; X86-SSE2-NEXT: shrb $3, %cl 1636; X86-SSE2-NEXT: movzbl %cl, %ecx 1637; X86-SSE2-NEXT: negl %ecx 1638; X86-SSE2-NEXT: movd %ecx, %xmm0 1639; X86-SSE2-NEXT: movl %eax, %ecx 1640; X86-SSE2-NEXT: shrb $2, %cl 1641; X86-SSE2-NEXT: movzbl %cl, %ecx 1642; X86-SSE2-NEXT: andl $1, %ecx 1643; X86-SSE2-NEXT: negl %ecx 1644; X86-SSE2-NEXT: movd %ecx, %xmm1 1645; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1646; X86-SSE2-NEXT: movzbl %al, %ecx 1647; X86-SSE2-NEXT: andl $1, %ecx 1648; X86-SSE2-NEXT: negl %ecx 1649; X86-SSE2-NEXT: movd %ecx, %xmm0 1650; X86-SSE2-NEXT: shrb %al 1651; X86-SSE2-NEXT: movzbl %al, %eax 1652; X86-SSE2-NEXT: andl $1, %eax 1653; X86-SSE2-NEXT: negl %eax 1654; X86-SSE2-NEXT: movd %eax, %xmm2 1655; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1656; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1657; X86-SSE2-NEXT: retl 1658; 1659; X86-SSE41-LABEL: load_sext_4i1_to_4i32: 1660; X86-SSE41: # %bb.0: # %entry 1661; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1662; X86-SSE41-NEXT: movb (%eax), %al 1663; X86-SSE41-NEXT: movl %eax, %ecx 1664; X86-SSE41-NEXT: shrb %cl 1665; X86-SSE41-NEXT: movzbl %cl, %ecx 1666; X86-SSE41-NEXT: andl $1, %ecx 1667; X86-SSE41-NEXT: negl %ecx 1668; X86-SSE41-NEXT: movzbl %al, %edx 1669; X86-SSE41-NEXT: andl $1, %edx 1670; X86-SSE41-NEXT: negl %edx 1671; X86-SSE41-NEXT: movd %edx, %xmm0 1672; X86-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1673; X86-SSE41-NEXT: movl %eax, %ecx 1674; X86-SSE41-NEXT: shrb $2, %cl 1675; X86-SSE41-NEXT: movzbl %cl, %ecx 1676; X86-SSE41-NEXT: andl $1, %ecx 1677; X86-SSE41-NEXT: negl %ecx 1678; X86-SSE41-NEXT: pinsrd $2, %ecx, %xmm0 1679; X86-SSE41-NEXT: shrb $3, %al 1680; X86-SSE41-NEXT: movzbl %al, %eax 1681; X86-SSE41-NEXT: negl %eax 1682; X86-SSE41-NEXT: pinsrd $3, %eax, %xmm0 1683; X86-SSE41-NEXT: retl 1684entry: 1685 %X = load <4 x i1>, <4 x i1>* %ptr 1686 %Y = sext <4 x i1> %X to <4 x i32> 1687 ret <4 x i32> %Y 1688} 1689 1690define <4 x i32> @load_sext_4i8_to_4i32(<4 x i8> *%ptr) { 1691; SSE2-LABEL: load_sext_4i8_to_4i32: 1692; SSE2: # %bb.0: # %entry 1693; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1694; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1695; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1696; SSE2-NEXT: psrad $24, %xmm0 1697; SSE2-NEXT: retq 1698; 1699; SSSE3-LABEL: load_sext_4i8_to_4i32: 1700; SSSE3: # %bb.0: # %entry 1701; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1702; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1703; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1704; SSSE3-NEXT: psrad $24, %xmm0 1705; SSSE3-NEXT: retq 1706; 1707; SSE41-LABEL: load_sext_4i8_to_4i32: 1708; SSE41: # %bb.0: # %entry 1709; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 1710; SSE41-NEXT: retq 1711; 1712; AVX-LABEL: load_sext_4i8_to_4i32: 1713; AVX: # %bb.0: # %entry 1714; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 1715; AVX-NEXT: retq 1716; 1717; X86-SSE2-LABEL: load_sext_4i8_to_4i32: 1718; X86-SSE2: # %bb.0: # %entry 1719; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1720; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1721; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1722; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1723; X86-SSE2-NEXT: psrad $24, %xmm0 1724; X86-SSE2-NEXT: retl 1725; 1726; X86-SSE41-LABEL: load_sext_4i8_to_4i32: 1727; X86-SSE41: # %bb.0: # %entry 1728; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1729; X86-SSE41-NEXT: pmovsxbd (%eax), %xmm0 1730; X86-SSE41-NEXT: retl 1731entry: 1732 %X = load <4 x i8>, <4 x i8>* %ptr 1733 %Y = sext <4 x i8> %X to <4 x i32> 1734 ret <4 x i32> %Y 1735} 1736 1737define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { 1738; SSE2-LABEL: load_sext_4i1_to_4i64: 1739; SSE2: # %bb.0: # %entry 1740; SSE2-NEXT: movb (%rdi), %al 1741; SSE2-NEXT: movl %eax, %ecx 1742; SSE2-NEXT: shrb %cl 1743; SSE2-NEXT: andb $1, %cl 1744; SSE2-NEXT: movzbl %cl, %ecx 1745; SSE2-NEXT: movl %eax, %edx 1746; SSE2-NEXT: andb $1, %dl 1747; SSE2-NEXT: movzbl %dl, %edx 1748; SSE2-NEXT: movd %edx, %xmm1 1749; SSE2-NEXT: pinsrw $2, %ecx, %xmm1 1750; SSE2-NEXT: movl %eax, %ecx 1751; SSE2-NEXT: shrb $2, %cl 1752; SSE2-NEXT: andb $1, %cl 1753; SSE2-NEXT: movzbl %cl, %ecx 1754; SSE2-NEXT: pinsrw $4, %ecx, %xmm1 1755; SSE2-NEXT: shrb $3, %al 1756; SSE2-NEXT: movzbl %al, %eax 1757; SSE2-NEXT: pinsrw $6, %eax, %xmm1 1758; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] 1759; SSE2-NEXT: psllq $63, %xmm0 1760; SSE2-NEXT: psrad $31, %xmm0 1761; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1762; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 1763; SSE2-NEXT: psllq $63, %xmm1 1764; SSE2-NEXT: psrad $31, %xmm1 1765; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1766; SSE2-NEXT: retq 1767; 1768; SSSE3-LABEL: load_sext_4i1_to_4i64: 1769; SSSE3: # %bb.0: # %entry 1770; SSSE3-NEXT: movb (%rdi), %al 1771; SSSE3-NEXT: movl %eax, %ecx 1772; SSSE3-NEXT: shrb %cl 1773; SSSE3-NEXT: andb $1, %cl 1774; SSSE3-NEXT: movzbl %cl, %ecx 1775; SSSE3-NEXT: movl %eax, %edx 1776; SSSE3-NEXT: andb $1, %dl 1777; SSSE3-NEXT: movzbl %dl, %edx 1778; SSSE3-NEXT: movd %edx, %xmm1 1779; SSSE3-NEXT: pinsrw $2, %ecx, %xmm1 1780; SSSE3-NEXT: movl %eax, %ecx 1781; SSSE3-NEXT: shrb $2, %cl 1782; SSSE3-NEXT: andb $1, %cl 1783; SSSE3-NEXT: movzbl %cl, %ecx 1784; SSSE3-NEXT: pinsrw $4, %ecx, %xmm1 1785; SSSE3-NEXT: shrb $3, %al 1786; SSSE3-NEXT: movzbl %al, %eax 1787; SSSE3-NEXT: pinsrw $6, %eax, %xmm1 1788; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] 1789; SSSE3-NEXT: psllq $63, %xmm0 1790; SSSE3-NEXT: psrad $31, %xmm0 1791; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1792; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 1793; SSSE3-NEXT: psllq $63, %xmm1 1794; SSSE3-NEXT: psrad $31, %xmm1 1795; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1796; SSSE3-NEXT: retq 1797; 1798; SSE41-LABEL: load_sext_4i1_to_4i64: 1799; SSE41: # %bb.0: # %entry 1800; SSE41-NEXT: movb (%rdi), %al 1801; SSE41-NEXT: movl %eax, %ecx 1802; SSE41-NEXT: shrb %cl 1803; SSE41-NEXT: andb $1, %cl 1804; SSE41-NEXT: movzbl %cl, %ecx 1805; SSE41-NEXT: movl %eax, %edx 1806; SSE41-NEXT: andb $1, %dl 1807; SSE41-NEXT: movzbl %dl, %edx 1808; SSE41-NEXT: movd %edx, %xmm1 1809; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 1810; SSE41-NEXT: movl %eax, %ecx 1811; SSE41-NEXT: shrb $2, %cl 1812; SSE41-NEXT: andb $1, %cl 1813; SSE41-NEXT: movzbl %cl, %ecx 1814; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1815; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 1816; SSE41-NEXT: shrb $3, %al 1817; SSE41-NEXT: movzbl %al, %eax 1818; SSE41-NEXT: pinsrb $12, %eax, %xmm1 1819; SSE41-NEXT: psllq $63, %xmm0 1820; SSE41-NEXT: psrad $31, %xmm0 1821; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1822; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1823; SSE41-NEXT: psllq $63, %xmm1 1824; SSE41-NEXT: psrad $31, %xmm1 1825; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1826; SSE41-NEXT: retq 1827; 1828; AVX1-LABEL: load_sext_4i1_to_4i64: 1829; AVX1: # %bb.0: # %entry 1830; AVX1-NEXT: movb (%rdi), %al 1831; AVX1-NEXT: movzbl %al, %ecx 1832; AVX1-NEXT: shrb %al 1833; AVX1-NEXT: movzbl %al, %eax 1834; AVX1-NEXT: andl $1, %eax 1835; AVX1-NEXT: negl %eax 1836; AVX1-NEXT: movl %ecx, %edx 1837; AVX1-NEXT: andl $1, %edx 1838; AVX1-NEXT: negl %edx 1839; AVX1-NEXT: vmovd %edx, %xmm0 1840; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 1841; AVX1-NEXT: movl %ecx, %eax 1842; AVX1-NEXT: shrb $2, %al 1843; AVX1-NEXT: movzbl %al, %eax 1844; AVX1-NEXT: andl $1, %eax 1845; AVX1-NEXT: negl %eax 1846; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 1847; AVX1-NEXT: shrb $3, %cl 1848; AVX1-NEXT: movzbl %cl, %eax 1849; AVX1-NEXT: negl %eax 1850; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1851; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1852; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1853; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1854; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1855; AVX1-NEXT: retq 1856; 1857; AVX2-LABEL: load_sext_4i1_to_4i64: 1858; AVX2: # %bb.0: # %entry 1859; AVX2-NEXT: movb (%rdi), %al 1860; AVX2-NEXT: movl %eax, %ecx 1861; AVX2-NEXT: shrb $3, %cl 1862; AVX2-NEXT: movzbl %cl, %ecx 1863; AVX2-NEXT: negq %rcx 1864; AVX2-NEXT: vmovq %rcx, %xmm0 1865; AVX2-NEXT: movzbl %al, %ecx 1866; AVX2-NEXT: shrb $2, %al 1867; AVX2-NEXT: movzbl %al, %eax 1868; AVX2-NEXT: andl $1, %eax 1869; AVX2-NEXT: negq %rax 1870; AVX2-NEXT: vmovq %rax, %xmm1 1871; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1872; AVX2-NEXT: movl %ecx, %eax 1873; AVX2-NEXT: andl $1, %eax 1874; AVX2-NEXT: negq %rax 1875; AVX2-NEXT: vmovq %rax, %xmm1 1876; AVX2-NEXT: shrb %cl 1877; AVX2-NEXT: movzbl %cl, %eax 1878; AVX2-NEXT: andl $1, %eax 1879; AVX2-NEXT: negq %rax 1880; AVX2-NEXT: vmovq %rax, %xmm2 1881; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1882; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1883; AVX2-NEXT: retq 1884; 1885; AVX512-LABEL: load_sext_4i1_to_4i64: 1886; AVX512: # %bb.0: # %entry 1887; AVX512-NEXT: kmovw (%rdi), %k1 1888; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1889; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1890; AVX512-NEXT: retq 1891; 1892; X86-SSE2-LABEL: load_sext_4i1_to_4i64: 1893; X86-SSE2: # %bb.0: # %entry 1894; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1895; X86-SSE2-NEXT: movb (%eax), %al 1896; X86-SSE2-NEXT: movl %eax, %ecx 1897; X86-SSE2-NEXT: shrb %cl 1898; X86-SSE2-NEXT: andb $1, %cl 1899; X86-SSE2-NEXT: movzbl %cl, %ecx 1900; X86-SSE2-NEXT: movl %eax, %edx 1901; X86-SSE2-NEXT: andb $1, %dl 1902; X86-SSE2-NEXT: movzbl %dl, %edx 1903; X86-SSE2-NEXT: movd %edx, %xmm1 1904; X86-SSE2-NEXT: pinsrw $2, %ecx, %xmm1 1905; X86-SSE2-NEXT: movl %eax, %ecx 1906; X86-SSE2-NEXT: shrb $2, %cl 1907; X86-SSE2-NEXT: andb $1, %cl 1908; X86-SSE2-NEXT: movzbl %cl, %ecx 1909; X86-SSE2-NEXT: pinsrw $4, %ecx, %xmm1 1910; X86-SSE2-NEXT: shrb $3, %al 1911; X86-SSE2-NEXT: movzbl %al, %eax 1912; X86-SSE2-NEXT: pinsrw $6, %eax, %xmm1 1913; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] 1914; X86-SSE2-NEXT: psllq $63, %xmm0 1915; X86-SSE2-NEXT: psrad $31, %xmm0 1916; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1917; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 1918; X86-SSE2-NEXT: psllq $63, %xmm1 1919; X86-SSE2-NEXT: psrad $31, %xmm1 1920; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1921; X86-SSE2-NEXT: retl 1922; 1923; X86-SSE41-LABEL: load_sext_4i1_to_4i64: 1924; X86-SSE41: # %bb.0: # %entry 1925; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1926; X86-SSE41-NEXT: movb (%eax), %al 1927; X86-SSE41-NEXT: movl %eax, %ecx 1928; X86-SSE41-NEXT: shrb %cl 1929; X86-SSE41-NEXT: andb $1, %cl 1930; X86-SSE41-NEXT: movzbl %cl, %ecx 1931; X86-SSE41-NEXT: movl %eax, %edx 1932; X86-SSE41-NEXT: andb $1, %dl 1933; X86-SSE41-NEXT: movzbl %dl, %edx 1934; X86-SSE41-NEXT: movd %edx, %xmm1 1935; X86-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 1936; X86-SSE41-NEXT: movl %eax, %ecx 1937; X86-SSE41-NEXT: shrb $2, %cl 1938; X86-SSE41-NEXT: andb $1, %cl 1939; X86-SSE41-NEXT: movzbl %cl, %ecx 1940; X86-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1941; X86-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 1942; X86-SSE41-NEXT: shrb $3, %al 1943; X86-SSE41-NEXT: movzbl %al, %eax 1944; X86-SSE41-NEXT: pinsrb $12, %eax, %xmm1 1945; X86-SSE41-NEXT: psllq $63, %xmm0 1946; X86-SSE41-NEXT: psrad $31, %xmm0 1947; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1948; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1949; X86-SSE41-NEXT: psllq $63, %xmm1 1950; X86-SSE41-NEXT: psrad $31, %xmm1 1951; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1952; X86-SSE41-NEXT: retl 1953entry: 1954 %X = load <4 x i1>, <4 x i1>* %ptr 1955 %Y = sext <4 x i1> %X to <4 x i64> 1956 ret <4 x i64> %Y 1957} 1958 1959define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) { 1960; SSE2-LABEL: load_sext_4i8_to_4i64: 1961; SSE2: # %bb.0: # %entry 1962; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1963; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1964; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1965; SSE2-NEXT: psrad $24, %xmm1 1966; SSE2-NEXT: pxor %xmm2, %xmm2 1967; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1968; SSE2-NEXT: movdqa %xmm1, %xmm0 1969; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1970; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1971; SSE2-NEXT: retq 1972; 1973; SSSE3-LABEL: load_sext_4i8_to_4i64: 1974; SSSE3: # %bb.0: # %entry 1975; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1976; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1977; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1978; SSSE3-NEXT: psrad $24, %xmm1 1979; SSSE3-NEXT: pxor %xmm2, %xmm2 1980; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 1981; SSSE3-NEXT: movdqa %xmm1, %xmm0 1982; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1983; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1984; SSSE3-NEXT: retq 1985; 1986; SSE41-LABEL: load_sext_4i8_to_4i64: 1987; SSE41: # %bb.0: # %entry 1988; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1989; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 1990; SSE41-NEXT: retq 1991; 1992; AVX1-LABEL: load_sext_4i8_to_4i64: 1993; AVX1: # %bb.0: # %entry 1994; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0 1995; AVX1-NEXT: vpmovsxbq (%rdi), %xmm1 1996; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1997; AVX1-NEXT: retq 1998; 1999; AVX2-LABEL: load_sext_4i8_to_4i64: 2000; AVX2: # %bb.0: # %entry 2001; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 2002; AVX2-NEXT: retq 2003; 2004; AVX512-LABEL: load_sext_4i8_to_4i64: 2005; AVX512: # %bb.0: # %entry 2006; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0 2007; AVX512-NEXT: retq 2008; 2009; X86-SSE2-LABEL: load_sext_4i8_to_4i64: 2010; X86-SSE2: # %bb.0: # %entry 2011; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2012; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2013; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2014; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2015; X86-SSE2-NEXT: psrad $24, %xmm1 2016; X86-SSE2-NEXT: pxor %xmm2, %xmm2 2017; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 2018; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2019; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2020; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2021; X86-SSE2-NEXT: retl 2022; 2023; X86-SSE41-LABEL: load_sext_4i8_to_4i64: 2024; X86-SSE41: # %bb.0: # %entry 2025; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2026; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0 2027; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 2028; X86-SSE41-NEXT: retl 2029entry: 2030 %X = load <4 x i8>, <4 x i8>* %ptr 2031 %Y = sext <4 x i8> %X to <4 x i64> 2032 ret <4 x i64> %Y 2033} 2034 2035define <2 x i64> @load_sext_4i8_to_4i64_extract(<4 x i8> *%ptr) { 2036; SSE2-LABEL: load_sext_4i8_to_4i64_extract: 2037; SSE2: # %bb.0: 2038; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2039; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2040; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2041; SSE2-NEXT: psrad $24, %xmm0 2042; SSE2-NEXT: pxor %xmm1, %xmm1 2043; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2044; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2045; SSE2-NEXT: retq 2046; 2047; SSSE3-LABEL: load_sext_4i8_to_4i64_extract: 2048; SSSE3: # %bb.0: 2049; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2050; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2051; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2052; SSSE3-NEXT: psrad $24, %xmm0 2053; SSSE3-NEXT: pxor %xmm1, %xmm1 2054; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 2055; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2056; SSSE3-NEXT: retq 2057; 2058; SSE41-LABEL: load_sext_4i8_to_4i64_extract: 2059; SSE41: # %bb.0: 2060; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm0 2061; SSE41-NEXT: retq 2062; 2063; AVX1-LABEL: load_sext_4i8_to_4i64_extract: 2064; AVX1: # %bb.0: 2065; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0 2066; AVX1-NEXT: retq 2067; 2068; AVX2-LABEL: load_sext_4i8_to_4i64_extract: 2069; AVX2: # %bb.0: 2070; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 2071; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 2072; AVX2-NEXT: vzeroupper 2073; AVX2-NEXT: retq 2074; 2075; AVX512-LABEL: load_sext_4i8_to_4i64_extract: 2076; AVX512: # %bb.0: 2077; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0 2078; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 2079; AVX512-NEXT: vzeroupper 2080; AVX512-NEXT: retq 2081; 2082; X86-SSE2-LABEL: load_sext_4i8_to_4i64_extract: 2083; X86-SSE2: # %bb.0: 2084; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2085; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2086; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2087; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2088; X86-SSE2-NEXT: psrad $24, %xmm0 2089; X86-SSE2-NEXT: pxor %xmm1, %xmm1 2090; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2091; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2092; X86-SSE2-NEXT: retl 2093; 2094; X86-SSE41-LABEL: load_sext_4i8_to_4i64_extract: 2095; X86-SSE41: # %bb.0: 2096; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2097; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm0 2098; X86-SSE41-NEXT: retl 2099 %ld = load <4 x i8>, <4 x i8>* %ptr 2100 %sext = sext <4 x i8> %ld to <4 x i64> 2101 %extract = shufflevector <4 x i64> %sext, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 2102 ret <2 x i64> %extract 2103} 2104 2105define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { 2106; SSE-LABEL: load_sext_8i1_to_8i16: 2107; SSE: # %bb.0: # %entry 2108; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2109; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2110; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2111; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2112; SSE-NEXT: pand %xmm1, %xmm0 2113; SSE-NEXT: pcmpeqw %xmm1, %xmm0 2114; SSE-NEXT: retq 2115; 2116; AVX1-LABEL: load_sext_8i1_to_8i16: 2117; AVX1: # %bb.0: # %entry 2118; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2119; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2120; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2121; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2122; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2123; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 2124; AVX1-NEXT: retq 2125; 2126; AVX2-LABEL: load_sext_8i1_to_8i16: 2127; AVX2: # %bb.0: # %entry 2128; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 2129; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2130; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2131; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 2132; AVX2-NEXT: retq 2133; 2134; AVX512F-LABEL: load_sext_8i1_to_8i16: 2135; AVX512F: # %bb.0: # %entry 2136; AVX512F-NEXT: kmovw (%rdi), %k1 2137; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2138; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 2139; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2140; AVX512F-NEXT: vzeroupper 2141; AVX512F-NEXT: retq 2142; 2143; AVX512BW-LABEL: load_sext_8i1_to_8i16: 2144; AVX512BW: # %bb.0: # %entry 2145; AVX512BW-NEXT: kmovw (%rdi), %k0 2146; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 2147; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2148; AVX512BW-NEXT: vzeroupper 2149; AVX512BW-NEXT: retq 2150; 2151; X86-SSE-LABEL: load_sext_8i1_to_8i16: 2152; X86-SSE: # %bb.0: # %entry 2153; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2154; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2155; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2156; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2157; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2158; X86-SSE-NEXT: pand %xmm1, %xmm0 2159; X86-SSE-NEXT: pcmpeqw %xmm1, %xmm0 2160; X86-SSE-NEXT: retl 2161entry: 2162 %X = load <8 x i1>, <8 x i1>* %ptr 2163 %Y = sext <8 x i1> %X to <8 x i16> 2164 ret <8 x i16> %Y 2165} 2166 2167define <8 x i16> @load_sext_8i8_to_8i16(<8 x i8> *%ptr) { 2168; SSE2-LABEL: load_sext_8i8_to_8i16: 2169; SSE2: # %bb.0: # %entry 2170; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2171; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2172; SSE2-NEXT: psraw $8, %xmm0 2173; SSE2-NEXT: retq 2174; 2175; SSSE3-LABEL: load_sext_8i8_to_8i16: 2176; SSSE3: # %bb.0: # %entry 2177; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2178; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2179; SSSE3-NEXT: psraw $8, %xmm0 2180; SSSE3-NEXT: retq 2181; 2182; SSE41-LABEL: load_sext_8i8_to_8i16: 2183; SSE41: # %bb.0: # %entry 2184; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 2185; SSE41-NEXT: retq 2186; 2187; AVX-LABEL: load_sext_8i8_to_8i16: 2188; AVX: # %bb.0: # %entry 2189; AVX-NEXT: vpmovsxbw (%rdi), %xmm0 2190; AVX-NEXT: retq 2191; 2192; X86-SSE2-LABEL: load_sext_8i8_to_8i16: 2193; X86-SSE2: # %bb.0: # %entry 2194; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2195; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2196; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2197; X86-SSE2-NEXT: psraw $8, %xmm0 2198; X86-SSE2-NEXT: retl 2199; 2200; X86-SSE41-LABEL: load_sext_8i8_to_8i16: 2201; X86-SSE41: # %bb.0: # %entry 2202; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2203; X86-SSE41-NEXT: pmovsxbw (%eax), %xmm0 2204; X86-SSE41-NEXT: retl 2205entry: 2206 %X = load <8 x i8>, <8 x i8>* %ptr 2207 %Y = sext <8 x i8> %X to <8 x i16> 2208 ret <8 x i16> %Y 2209} 2210 2211define <8 x i64> @load_sext_8i8_to_8i64(<8 x i8> *%ptr) { 2212; SSE2-LABEL: load_sext_8i8_to_8i64: 2213; SSE2: # %bb.0: # %entry 2214; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2215; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2216; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2217; SSE2-NEXT: psrad $24, %xmm1 2218; SSE2-NEXT: pxor %xmm4, %xmm4 2219; SSE2-NEXT: pxor %xmm3, %xmm3 2220; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 2221; SSE2-NEXT: movdqa %xmm1, %xmm0 2222; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 2223; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2224; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2225; SSE2-NEXT: psrad $24, %xmm3 2226; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 2227; SSE2-NEXT: movdqa %xmm3, %xmm2 2228; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 2229; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 2230; SSE2-NEXT: retq 2231; 2232; SSSE3-LABEL: load_sext_8i8_to_8i64: 2233; SSSE3: # %bb.0: # %entry 2234; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2235; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2236; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2237; SSSE3-NEXT: psrad $24, %xmm1 2238; SSSE3-NEXT: pxor %xmm4, %xmm4 2239; SSSE3-NEXT: pxor %xmm3, %xmm3 2240; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 2241; SSSE3-NEXT: movdqa %xmm1, %xmm0 2242; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 2243; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2244; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2245; SSSE3-NEXT: psrad $24, %xmm3 2246; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 2247; SSSE3-NEXT: movdqa %xmm3, %xmm2 2248; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 2249; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 2250; SSSE3-NEXT: retq 2251; 2252; SSE41-LABEL: load_sext_8i8_to_8i64: 2253; SSE41: # %bb.0: # %entry 2254; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 2255; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 2256; SSE41-NEXT: pmovsxbq 4(%rdi), %xmm2 2257; SSE41-NEXT: pmovsxbq 6(%rdi), %xmm3 2258; SSE41-NEXT: retq 2259; 2260; AVX1-LABEL: load_sext_8i8_to_8i64: 2261; AVX1: # %bb.0: # %entry 2262; AVX1-NEXT: vpmovsxbq 6(%rdi), %xmm1 2263; AVX1-NEXT: vpmovsxbq 4(%rdi), %xmm2 2264; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0 2265; AVX1-NEXT: vpmovsxbq (%rdi), %xmm3 2266; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 2267; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2268; AVX1-NEXT: retq 2269; 2270; AVX2-LABEL: load_sext_8i8_to_8i64: 2271; AVX2: # %bb.0: # %entry 2272; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 2273; AVX2-NEXT: vpmovsxbq 4(%rdi), %ymm1 2274; AVX2-NEXT: retq 2275; 2276; AVX512-LABEL: load_sext_8i8_to_8i64: 2277; AVX512: # %bb.0: # %entry 2278; AVX512-NEXT: vpmovsxbq (%rdi), %zmm0 2279; AVX512-NEXT: retq 2280; 2281; X86-SSE2-LABEL: load_sext_8i8_to_8i64: 2282; X86-SSE2: # %bb.0: # %entry 2283; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2284; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2285; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2286; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2287; X86-SSE2-NEXT: psrad $24, %xmm1 2288; X86-SSE2-NEXT: pxor %xmm4, %xmm4 2289; X86-SSE2-NEXT: pxor %xmm3, %xmm3 2290; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 2291; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2292; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 2293; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2294; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2295; X86-SSE2-NEXT: psrad $24, %xmm3 2296; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 2297; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 2298; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 2299; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 2300; X86-SSE2-NEXT: retl 2301; 2302; X86-SSE41-LABEL: load_sext_8i8_to_8i64: 2303; X86-SSE41: # %bb.0: # %entry 2304; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2305; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0 2306; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 2307; X86-SSE41-NEXT: pmovsxbq 4(%eax), %xmm2 2308; X86-SSE41-NEXT: pmovsxbq 6(%eax), %xmm3 2309; X86-SSE41-NEXT: retl 2310entry: 2311 %X = load <8 x i8>, <8 x i8>* %ptr 2312 %Y = sext <8 x i8> %X to <8 x i64> 2313 ret <8 x i64> %Y 2314} 2315 2316define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { 2317; SSE-LABEL: load_sext_8i1_to_8i32: 2318; SSE: # %bb.0: # %entry 2319; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2320; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2321; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 2322; SSE-NEXT: movdqa %xmm1, %xmm0 2323; SSE-NEXT: pand %xmm2, %xmm0 2324; SSE-NEXT: pcmpeqd %xmm2, %xmm0 2325; SSE-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 2326; SSE-NEXT: pand %xmm2, %xmm1 2327; SSE-NEXT: pcmpeqd %xmm2, %xmm1 2328; SSE-NEXT: retq 2329; 2330; AVX1-LABEL: load_sext_8i1_to_8i32: 2331; AVX1: # %bb.0: # %entry 2332; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2333; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 2334; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2335; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 2336; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2337; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 2338; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 2339; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 2340; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 2341; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 2342; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 2343; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2344; AVX1-NEXT: retq 2345; 2346; AVX2-LABEL: load_sext_8i1_to_8i32: 2347; AVX2: # %bb.0: # %entry 2348; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0 2349; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128] 2350; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2351; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 2352; AVX2-NEXT: retq 2353; 2354; AVX512-LABEL: load_sext_8i1_to_8i32: 2355; AVX512: # %bb.0: # %entry 2356; AVX512-NEXT: kmovw (%rdi), %k1 2357; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2358; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2359; AVX512-NEXT: retq 2360; 2361; X86-SSE-LABEL: load_sext_8i1_to_8i32: 2362; X86-SSE: # %bb.0: # %entry 2363; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2364; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2365; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2366; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 2367; X86-SSE-NEXT: movdqa %xmm1, %xmm0 2368; X86-SSE-NEXT: pand %xmm2, %xmm0 2369; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm0 2370; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 2371; X86-SSE-NEXT: pand %xmm2, %xmm1 2372; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm1 2373; X86-SSE-NEXT: retl 2374entry: 2375 %X = load <8 x i1>, <8 x i1>* %ptr 2376 %Y = sext <8 x i1> %X to <8 x i32> 2377 ret <8 x i32> %Y 2378} 2379 2380define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) { 2381; SSE2-LABEL: load_sext_8i8_to_8i32: 2382; SSE2: # %bb.0: # %entry 2383; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2384; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2385; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2386; SSE2-NEXT: psrad $24, %xmm0 2387; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2388; SSE2-NEXT: psrad $24, %xmm1 2389; SSE2-NEXT: retq 2390; 2391; SSSE3-LABEL: load_sext_8i8_to_8i32: 2392; SSSE3: # %bb.0: # %entry 2393; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2394; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2395; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2396; SSSE3-NEXT: psrad $24, %xmm0 2397; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2398; SSSE3-NEXT: psrad $24, %xmm1 2399; SSSE3-NEXT: retq 2400; 2401; SSE41-LABEL: load_sext_8i8_to_8i32: 2402; SSE41: # %bb.0: # %entry 2403; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 2404; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1 2405; SSE41-NEXT: retq 2406; 2407; AVX1-LABEL: load_sext_8i8_to_8i32: 2408; AVX1: # %bb.0: # %entry 2409; AVX1-NEXT: vpmovsxbd 4(%rdi), %xmm0 2410; AVX1-NEXT: vpmovsxbd (%rdi), %xmm1 2411; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2412; AVX1-NEXT: retq 2413; 2414; AVX2-LABEL: load_sext_8i8_to_8i32: 2415; AVX2: # %bb.0: # %entry 2416; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0 2417; AVX2-NEXT: retq 2418; 2419; AVX512-LABEL: load_sext_8i8_to_8i32: 2420; AVX512: # %bb.0: # %entry 2421; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0 2422; AVX512-NEXT: retq 2423; 2424; X86-SSE2-LABEL: load_sext_8i8_to_8i32: 2425; X86-SSE2: # %bb.0: # %entry 2426; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2427; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2428; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2429; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2430; X86-SSE2-NEXT: psrad $24, %xmm0 2431; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2432; X86-SSE2-NEXT: psrad $24, %xmm1 2433; X86-SSE2-NEXT: retl 2434; 2435; X86-SSE41-LABEL: load_sext_8i8_to_8i32: 2436; X86-SSE41: # %bb.0: # %entry 2437; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2438; X86-SSE41-NEXT: pmovsxbd (%eax), %xmm0 2439; X86-SSE41-NEXT: pmovsxbd 4(%eax), %xmm1 2440; X86-SSE41-NEXT: retl 2441entry: 2442 %X = load <8 x i8>, <8 x i8>* %ptr 2443 %Y = sext <8 x i8> %X to <8 x i32> 2444 ret <8 x i32> %Y 2445} 2446 2447define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone { 2448; SSE2-LABEL: load_sext_16i1_to_16i8: 2449; SSE2: # %bb.0: # %entry 2450; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2451; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2452; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 2453; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2454; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2455; SSE2-NEXT: pand %xmm1, %xmm0 2456; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 2457; SSE2-NEXT: retq 2458; 2459; SSSE3-LABEL: load_sext_16i1_to_16i8: 2460; SSSE3: # %bb.0: # %entry 2461; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2462; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2463; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2464; SSSE3-NEXT: pand %xmm1, %xmm0 2465; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 2466; SSSE3-NEXT: retq 2467; 2468; SSE41-LABEL: load_sext_16i1_to_16i8: 2469; SSE41: # %bb.0: # %entry 2470; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2471; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2472; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2473; SSE41-NEXT: pand %xmm1, %xmm0 2474; SSE41-NEXT: pcmpeqb %xmm1, %xmm0 2475; SSE41-NEXT: retq 2476; 2477; AVX1-LABEL: load_sext_16i1_to_16i8: 2478; AVX1: # %bb.0: # %entry 2479; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2480; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2481; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 2482; AVX1-NEXT: # xmm1 = mem[0,0] 2483; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2484; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 2485; AVX1-NEXT: retq 2486; 2487; AVX2-LABEL: load_sext_16i1_to_16i8: 2488; AVX2: # %bb.0: # %entry 2489; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2490; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2491; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 2492; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2493; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 2494; AVX2-NEXT: retq 2495; 2496; AVX512F-LABEL: load_sext_16i1_to_16i8: 2497; AVX512F: # %bb.0: # %entry 2498; AVX512F-NEXT: kmovw (%rdi), %k1 2499; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2500; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2501; AVX512F-NEXT: vzeroupper 2502; AVX512F-NEXT: retq 2503; 2504; AVX512BW-LABEL: load_sext_16i1_to_16i8: 2505; AVX512BW: # %bb.0: # %entry 2506; AVX512BW-NEXT: kmovw (%rdi), %k0 2507; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 2508; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2509; AVX512BW-NEXT: vzeroupper 2510; AVX512BW-NEXT: retq 2511; 2512; X86-SSE2-LABEL: load_sext_16i1_to_16i8: 2513; X86-SSE2: # %bb.0: # %entry 2514; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2515; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2516; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2517; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 2518; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2519; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2520; X86-SSE2-NEXT: pand %xmm1, %xmm0 2521; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 2522; X86-SSE2-NEXT: retl 2523; 2524; X86-SSE41-LABEL: load_sext_16i1_to_16i8: 2525; X86-SSE41: # %bb.0: # %entry 2526; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2527; X86-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2528; X86-SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2529; X86-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2530; X86-SSE41-NEXT: pand %xmm1, %xmm0 2531; X86-SSE41-NEXT: pcmpeqb %xmm1, %xmm0 2532; X86-SSE41-NEXT: retl 2533entry: 2534 %X = load <16 x i1>, <16 x i1>* %ptr 2535 %Y = sext <16 x i1> %X to <16 x i8> 2536 ret <16 x i8> %Y 2537} 2538 2539define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { 2540; SSE-LABEL: load_sext_16i1_to_16i16: 2541; SSE: # %bb.0: # %entry 2542; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2543; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2544; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2545; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 2546; SSE-NEXT: movdqa %xmm1, %xmm0 2547; SSE-NEXT: pand %xmm2, %xmm0 2548; SSE-NEXT: pcmpeqw %xmm2, %xmm0 2549; SSE-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 2550; SSE-NEXT: pand %xmm2, %xmm1 2551; SSE-NEXT: pcmpeqw %xmm2, %xmm1 2552; SSE-NEXT: retq 2553; 2554; AVX1-LABEL: load_sext_16i1_to_16i16: 2555; AVX1: # %bb.0: # %entry 2556; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2557; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2558; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2559; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2560; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 2561; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2562; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 2563; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 2564; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 2565; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 2566; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 2567; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 2568; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2569; AVX1-NEXT: retq 2570; 2571; AVX2-LABEL: load_sext_16i1_to_16i16: 2572; AVX2: # %bb.0: # %entry 2573; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 2574; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 2575; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2576; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2577; AVX2-NEXT: retq 2578; 2579; AVX512F-LABEL: load_sext_16i1_to_16i16: 2580; AVX512F: # %bb.0: # %entry 2581; AVX512F-NEXT: kmovw (%rdi), %k1 2582; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2583; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 2584; AVX512F-NEXT: retq 2585; 2586; AVX512BW-LABEL: load_sext_16i1_to_16i16: 2587; AVX512BW: # %bb.0: # %entry 2588; AVX512BW-NEXT: kmovw (%rdi), %k0 2589; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 2590; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2591; AVX512BW-NEXT: retq 2592; 2593; X86-SSE-LABEL: load_sext_16i1_to_16i16: 2594; X86-SSE: # %bb.0: # %entry 2595; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2596; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2597; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2598; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2599; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 2600; X86-SSE-NEXT: movdqa %xmm1, %xmm0 2601; X86-SSE-NEXT: pand %xmm2, %xmm0 2602; X86-SSE-NEXT: pcmpeqw %xmm2, %xmm0 2603; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 2604; X86-SSE-NEXT: pand %xmm2, %xmm1 2605; X86-SSE-NEXT: pcmpeqw %xmm2, %xmm1 2606; X86-SSE-NEXT: retl 2607entry: 2608 %X = load <16 x i1>, <16 x i1>* %ptr 2609 %Y = sext <16 x i1> %X to <16 x i16> 2610 ret <16 x i16> %Y 2611} 2612 2613define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone { 2614; SSE-LABEL: load_sext_32i1_to_32i8: 2615; SSE: # %bb.0: # %entry 2616; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2617; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2618; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 2619; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2620; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2621; SSE-NEXT: pand %xmm2, %xmm0 2622; SSE-NEXT: pcmpeqb %xmm2, %xmm0 2623; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 2624; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 2625; SSE-NEXT: pand %xmm2, %xmm1 2626; SSE-NEXT: pcmpeqb %xmm2, %xmm1 2627; SSE-NEXT: retq 2628; 2629; AVX1-LABEL: load_sext_32i1_to_32i8: 2630; AVX1: # %bb.0: # %entry 2631; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2632; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2633; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 2634; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 2635; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2636; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 2637; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 2638; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2639; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 2640; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 2641; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 2642; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 2643; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 2644; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 2645; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2646; AVX1-NEXT: retq 2647; 2648; AVX2-LABEL: load_sext_32i1_to_32i8: 2649; AVX2: # %bb.0: # %entry 2650; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2651; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2652; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 2653; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 2654; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2655; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 2656; AVX2-NEXT: retq 2657; 2658; AVX512F-LABEL: load_sext_32i1_to_32i8: 2659; AVX512F: # %bb.0: # %entry 2660; AVX512F-NEXT: kmovw (%rdi), %k1 2661; AVX512F-NEXT: kmovw 2(%rdi), %k2 2662; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2663; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2664; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} 2665; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 2666; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2667; AVX512F-NEXT: retq 2668; 2669; AVX512BW-LABEL: load_sext_32i1_to_32i8: 2670; AVX512BW: # %bb.0: # %entry 2671; AVX512BW-NEXT: kmovd (%rdi), %k0 2672; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 2673; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2674; AVX512BW-NEXT: retq 2675; 2676; X86-SSE-LABEL: load_sext_32i1_to_32i8: 2677; X86-SSE: # %bb.0: # %entry 2678; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2679; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2680; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2681; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 2682; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2683; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2684; X86-SSE-NEXT: pand %xmm2, %xmm0 2685; X86-SSE-NEXT: pcmpeqb %xmm2, %xmm0 2686; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 2687; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 2688; X86-SSE-NEXT: pand %xmm2, %xmm1 2689; X86-SSE-NEXT: pcmpeqb %xmm2, %xmm1 2690; X86-SSE-NEXT: retl 2691entry: 2692 %X = load <32 x i1>, <32 x i1>* %ptr 2693 %Y = sext <32 x i1> %X to <32 x i8> 2694 ret <32 x i8> %Y 2695} 2696 2697define <16 x i16> @load_sext_16i8_to_16i16(<16 x i8> *%ptr) { 2698; SSE2-LABEL: load_sext_16i8_to_16i16: 2699; SSE2: # %bb.0: # %entry 2700; SSE2-NEXT: movdqa (%rdi), %xmm1 2701; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2702; SSE2-NEXT: psraw $8, %xmm0 2703; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2704; SSE2-NEXT: psraw $8, %xmm1 2705; SSE2-NEXT: retq 2706; 2707; SSSE3-LABEL: load_sext_16i8_to_16i16: 2708; SSSE3: # %bb.0: # %entry 2709; SSSE3-NEXT: movdqa (%rdi), %xmm1 2710; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2711; SSSE3-NEXT: psraw $8, %xmm0 2712; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2713; SSSE3-NEXT: psraw $8, %xmm1 2714; SSSE3-NEXT: retq 2715; 2716; SSE41-LABEL: load_sext_16i8_to_16i16: 2717; SSE41: # %bb.0: # %entry 2718; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 2719; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1 2720; SSE41-NEXT: retq 2721; 2722; AVX1-LABEL: load_sext_16i8_to_16i16: 2723; AVX1: # %bb.0: # %entry 2724; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm0 2725; AVX1-NEXT: vpmovsxbw (%rdi), %xmm1 2726; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2727; AVX1-NEXT: retq 2728; 2729; AVX2-LABEL: load_sext_16i8_to_16i16: 2730; AVX2: # %bb.0: # %entry 2731; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0 2732; AVX2-NEXT: retq 2733; 2734; AVX512-LABEL: load_sext_16i8_to_16i16: 2735; AVX512: # %bb.0: # %entry 2736; AVX512-NEXT: vpmovsxbw (%rdi), %ymm0 2737; AVX512-NEXT: retq 2738; 2739; X86-SSE2-LABEL: load_sext_16i8_to_16i16: 2740; X86-SSE2: # %bb.0: # %entry 2741; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2742; X86-SSE2-NEXT: movdqa (%eax), %xmm1 2743; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2744; X86-SSE2-NEXT: psraw $8, %xmm0 2745; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2746; X86-SSE2-NEXT: psraw $8, %xmm1 2747; X86-SSE2-NEXT: retl 2748; 2749; X86-SSE41-LABEL: load_sext_16i8_to_16i16: 2750; X86-SSE41: # %bb.0: # %entry 2751; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2752; X86-SSE41-NEXT: pmovsxbw (%eax), %xmm0 2753; X86-SSE41-NEXT: pmovsxbw 8(%eax), %xmm1 2754; X86-SSE41-NEXT: retl 2755entry: 2756 %X = load <16 x i8>, <16 x i8>* %ptr 2757 %Y = sext <16 x i8> %X to <16 x i16> 2758 ret <16 x i16> %Y 2759} 2760 2761define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) { 2762; SSE2-LABEL: load_sext_2i16_to_2i64: 2763; SSE2: # %bb.0: # %entry 2764; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2765; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 2766; SSE2-NEXT: pxor %xmm1, %xmm1 2767; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2768; SSE2-NEXT: psrad $16, %xmm0 2769; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2770; SSE2-NEXT: retq 2771; 2772; SSSE3-LABEL: load_sext_2i16_to_2i64: 2773; SSSE3: # %bb.0: # %entry 2774; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2775; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 2776; SSSE3-NEXT: pxor %xmm1, %xmm1 2777; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 2778; SSSE3-NEXT: psrad $16, %xmm0 2779; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2780; SSSE3-NEXT: retq 2781; 2782; SSE41-LABEL: load_sext_2i16_to_2i64: 2783; SSE41: # %bb.0: # %entry 2784; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 2785; SSE41-NEXT: retq 2786; 2787; AVX-LABEL: load_sext_2i16_to_2i64: 2788; AVX: # %bb.0: # %entry 2789; AVX-NEXT: vpmovsxwq (%rdi), %xmm0 2790; AVX-NEXT: retq 2791; 2792; X86-SSE2-LABEL: load_sext_2i16_to_2i64: 2793; X86-SSE2: # %bb.0: # %entry 2794; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2795; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2796; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 2797; X86-SSE2-NEXT: pxor %xmm1, %xmm1 2798; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2799; X86-SSE2-NEXT: psrad $16, %xmm0 2800; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2801; X86-SSE2-NEXT: retl 2802; 2803; X86-SSE41-LABEL: load_sext_2i16_to_2i64: 2804; X86-SSE41: # %bb.0: # %entry 2805; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2806; X86-SSE41-NEXT: pmovsxwq (%eax), %xmm0 2807; X86-SSE41-NEXT: retl 2808entry: 2809 %X = load <2 x i16>, <2 x i16>* %ptr 2810 %Y = sext <2 x i16> %X to <2 x i64> 2811 ret <2 x i64> %Y 2812} 2813 2814define <4 x i32> @load_sext_4i16_to_4i32(<4 x i16> *%ptr) { 2815; SSE2-LABEL: load_sext_4i16_to_4i32: 2816; SSE2: # %bb.0: # %entry 2817; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2818; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2819; SSE2-NEXT: psrad $16, %xmm0 2820; SSE2-NEXT: retq 2821; 2822; SSSE3-LABEL: load_sext_4i16_to_4i32: 2823; SSSE3: # %bb.0: # %entry 2824; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2825; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2826; SSSE3-NEXT: psrad $16, %xmm0 2827; SSSE3-NEXT: retq 2828; 2829; SSE41-LABEL: load_sext_4i16_to_4i32: 2830; SSE41: # %bb.0: # %entry 2831; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 2832; SSE41-NEXT: retq 2833; 2834; AVX-LABEL: load_sext_4i16_to_4i32: 2835; AVX: # %bb.0: # %entry 2836; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 2837; AVX-NEXT: retq 2838; 2839; X86-SSE2-LABEL: load_sext_4i16_to_4i32: 2840; X86-SSE2: # %bb.0: # %entry 2841; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2842; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2843; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2844; X86-SSE2-NEXT: psrad $16, %xmm0 2845; X86-SSE2-NEXT: retl 2846; 2847; X86-SSE41-LABEL: load_sext_4i16_to_4i32: 2848; X86-SSE41: # %bb.0: # %entry 2849; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2850; X86-SSE41-NEXT: pmovsxwd (%eax), %xmm0 2851; X86-SSE41-NEXT: retl 2852entry: 2853 %X = load <4 x i16>, <4 x i16>* %ptr 2854 %Y = sext <4 x i16> %X to <4 x i32> 2855 ret <4 x i32> %Y 2856} 2857 2858define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) { 2859; SSE2-LABEL: load_sext_4i16_to_4i64: 2860; SSE2: # %bb.0: # %entry 2861; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2862; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2863; SSE2-NEXT: psrad $16, %xmm1 2864; SSE2-NEXT: pxor %xmm2, %xmm2 2865; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 2866; SSE2-NEXT: movdqa %xmm1, %xmm0 2867; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2868; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2869; SSE2-NEXT: retq 2870; 2871; SSSE3-LABEL: load_sext_4i16_to_4i64: 2872; SSSE3: # %bb.0: # %entry 2873; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2874; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2875; SSSE3-NEXT: psrad $16, %xmm1 2876; SSSE3-NEXT: pxor %xmm2, %xmm2 2877; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 2878; SSSE3-NEXT: movdqa %xmm1, %xmm0 2879; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2880; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2881; SSSE3-NEXT: retq 2882; 2883; SSE41-LABEL: load_sext_4i16_to_4i64: 2884; SSE41: # %bb.0: # %entry 2885; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 2886; SSE41-NEXT: pmovsxwq 4(%rdi), %xmm1 2887; SSE41-NEXT: retq 2888; 2889; AVX1-LABEL: load_sext_4i16_to_4i64: 2890; AVX1: # %bb.0: # %entry 2891; AVX1-NEXT: vpmovsxwq 4(%rdi), %xmm0 2892; AVX1-NEXT: vpmovsxwq (%rdi), %xmm1 2893; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2894; AVX1-NEXT: retq 2895; 2896; AVX2-LABEL: load_sext_4i16_to_4i64: 2897; AVX2: # %bb.0: # %entry 2898; AVX2-NEXT: vpmovsxwq (%rdi), %ymm0 2899; AVX2-NEXT: retq 2900; 2901; AVX512-LABEL: load_sext_4i16_to_4i64: 2902; AVX512: # %bb.0: # %entry 2903; AVX512-NEXT: vpmovsxwq (%rdi), %ymm0 2904; AVX512-NEXT: retq 2905; 2906; X86-SSE2-LABEL: load_sext_4i16_to_4i64: 2907; X86-SSE2: # %bb.0: # %entry 2908; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2909; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2910; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2911; X86-SSE2-NEXT: psrad $16, %xmm1 2912; X86-SSE2-NEXT: pxor %xmm2, %xmm2 2913; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 2914; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2915; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2916; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2917; X86-SSE2-NEXT: retl 2918; 2919; X86-SSE41-LABEL: load_sext_4i16_to_4i64: 2920; X86-SSE41: # %bb.0: # %entry 2921; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2922; X86-SSE41-NEXT: pmovsxwq (%eax), %xmm0 2923; X86-SSE41-NEXT: pmovsxwq 4(%eax), %xmm1 2924; X86-SSE41-NEXT: retl 2925entry: 2926 %X = load <4 x i16>, <4 x i16>* %ptr 2927 %Y = sext <4 x i16> %X to <4 x i64> 2928 ret <4 x i64> %Y 2929} 2930 2931define <8 x i32> @load_sext_8i16_to_8i32(<8 x i16> *%ptr) { 2932; SSE2-LABEL: load_sext_8i16_to_8i32: 2933; SSE2: # %bb.0: # %entry 2934; SSE2-NEXT: movdqa (%rdi), %xmm1 2935; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2936; SSE2-NEXT: psrad $16, %xmm0 2937; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2938; SSE2-NEXT: psrad $16, %xmm1 2939; SSE2-NEXT: retq 2940; 2941; SSSE3-LABEL: load_sext_8i16_to_8i32: 2942; SSSE3: # %bb.0: # %entry 2943; SSSE3-NEXT: movdqa (%rdi), %xmm1 2944; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2945; SSSE3-NEXT: psrad $16, %xmm0 2946; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2947; SSSE3-NEXT: psrad $16, %xmm1 2948; SSSE3-NEXT: retq 2949; 2950; SSE41-LABEL: load_sext_8i16_to_8i32: 2951; SSE41: # %bb.0: # %entry 2952; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 2953; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1 2954; SSE41-NEXT: retq 2955; 2956; AVX1-LABEL: load_sext_8i16_to_8i32: 2957; AVX1: # %bb.0: # %entry 2958; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm0 2959; AVX1-NEXT: vpmovsxwd (%rdi), %xmm1 2960; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2961; AVX1-NEXT: retq 2962; 2963; AVX2-LABEL: load_sext_8i16_to_8i32: 2964; AVX2: # %bb.0: # %entry 2965; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0 2966; AVX2-NEXT: retq 2967; 2968; AVX512-LABEL: load_sext_8i16_to_8i32: 2969; AVX512: # %bb.0: # %entry 2970; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0 2971; AVX512-NEXT: retq 2972; 2973; X86-SSE2-LABEL: load_sext_8i16_to_8i32: 2974; X86-SSE2: # %bb.0: # %entry 2975; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2976; X86-SSE2-NEXT: movdqa (%eax), %xmm1 2977; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2978; X86-SSE2-NEXT: psrad $16, %xmm0 2979; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2980; X86-SSE2-NEXT: psrad $16, %xmm1 2981; X86-SSE2-NEXT: retl 2982; 2983; X86-SSE41-LABEL: load_sext_8i16_to_8i32: 2984; X86-SSE41: # %bb.0: # %entry 2985; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2986; X86-SSE41-NEXT: pmovsxwd (%eax), %xmm0 2987; X86-SSE41-NEXT: pmovsxwd 8(%eax), %xmm1 2988; X86-SSE41-NEXT: retl 2989entry: 2990 %X = load <8 x i16>, <8 x i16>* %ptr 2991 %Y = sext <8 x i16> %X to <8 x i32> 2992 ret <8 x i32> %Y 2993} 2994 2995define <2 x i64> @load_sext_2i32_to_2i64(<2 x i32> *%ptr) { 2996; SSE2-LABEL: load_sext_2i32_to_2i64: 2997; SSE2: # %bb.0: # %entry 2998; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2999; SSE2-NEXT: pxor %xmm1, %xmm1 3000; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 3001; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3002; SSE2-NEXT: retq 3003; 3004; SSSE3-LABEL: load_sext_2i32_to_2i64: 3005; SSSE3: # %bb.0: # %entry 3006; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3007; SSSE3-NEXT: pxor %xmm1, %xmm1 3008; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 3009; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3010; SSSE3-NEXT: retq 3011; 3012; SSE41-LABEL: load_sext_2i32_to_2i64: 3013; SSE41: # %bb.0: # %entry 3014; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 3015; SSE41-NEXT: retq 3016; 3017; AVX-LABEL: load_sext_2i32_to_2i64: 3018; AVX: # %bb.0: # %entry 3019; AVX-NEXT: vpmovsxdq (%rdi), %xmm0 3020; AVX-NEXT: retq 3021; 3022; X86-SSE2-LABEL: load_sext_2i32_to_2i64: 3023; X86-SSE2: # %bb.0: # %entry 3024; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3025; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3026; X86-SSE2-NEXT: pxor %xmm1, %xmm1 3027; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 3028; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3029; X86-SSE2-NEXT: retl 3030; 3031; X86-SSE41-LABEL: load_sext_2i32_to_2i64: 3032; X86-SSE41: # %bb.0: # %entry 3033; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3034; X86-SSE41-NEXT: pmovsxdq (%eax), %xmm0 3035; X86-SSE41-NEXT: retl 3036entry: 3037 %X = load <2 x i32>, <2 x i32>* %ptr 3038 %Y = sext <2 x i32> %X to <2 x i64> 3039 ret <2 x i64> %Y 3040} 3041 3042define <4 x i64> @load_sext_4i32_to_4i64(<4 x i32> *%ptr) { 3043; SSE2-LABEL: load_sext_4i32_to_4i64: 3044; SSE2: # %bb.0: # %entry 3045; SSE2-NEXT: movdqa (%rdi), %xmm0 3046; SSE2-NEXT: pxor %xmm2, %xmm2 3047; SSE2-NEXT: pxor %xmm3, %xmm3 3048; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3049; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3050; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3051; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3052; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3053; SSE2-NEXT: retq 3054; 3055; SSSE3-LABEL: load_sext_4i32_to_4i64: 3056; SSSE3: # %bb.0: # %entry 3057; SSSE3-NEXT: movdqa (%rdi), %xmm0 3058; SSSE3-NEXT: pxor %xmm2, %xmm2 3059; SSSE3-NEXT: pxor %xmm3, %xmm3 3060; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 3061; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3062; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3063; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 3064; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3065; SSSE3-NEXT: retq 3066; 3067; SSE41-LABEL: load_sext_4i32_to_4i64: 3068; SSE41: # %bb.0: # %entry 3069; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 3070; SSE41-NEXT: pmovsxdq 8(%rdi), %xmm1 3071; SSE41-NEXT: retq 3072; 3073; AVX1-LABEL: load_sext_4i32_to_4i64: 3074; AVX1: # %bb.0: # %entry 3075; AVX1-NEXT: vpmovsxdq 8(%rdi), %xmm0 3076; AVX1-NEXT: vpmovsxdq (%rdi), %xmm1 3077; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3078; AVX1-NEXT: retq 3079; 3080; AVX2-LABEL: load_sext_4i32_to_4i64: 3081; AVX2: # %bb.0: # %entry 3082; AVX2-NEXT: vpmovsxdq (%rdi), %ymm0 3083; AVX2-NEXT: retq 3084; 3085; AVX512-LABEL: load_sext_4i32_to_4i64: 3086; AVX512: # %bb.0: # %entry 3087; AVX512-NEXT: vpmovsxdq (%rdi), %ymm0 3088; AVX512-NEXT: retq 3089; 3090; X86-SSE2-LABEL: load_sext_4i32_to_4i64: 3091; X86-SSE2: # %bb.0: # %entry 3092; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3093; X86-SSE2-NEXT: movdqa (%eax), %xmm0 3094; X86-SSE2-NEXT: pxor %xmm2, %xmm2 3095; X86-SSE2-NEXT: pxor %xmm3, %xmm3 3096; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3097; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3098; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3099; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3100; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3101; X86-SSE2-NEXT: retl 3102; 3103; X86-SSE41-LABEL: load_sext_4i32_to_4i64: 3104; X86-SSE41: # %bb.0: # %entry 3105; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3106; X86-SSE41-NEXT: pmovsxdq (%eax), %xmm0 3107; X86-SSE41-NEXT: pmovsxdq 8(%eax), %xmm1 3108; X86-SSE41-NEXT: retl 3109entry: 3110 %X = load <4 x i32>, <4 x i32>* %ptr 3111 %Y = sext <4 x i32> %X to <4 x i64> 3112 ret <4 x i64> %Y 3113} 3114 3115define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp { 3116; SSE2-LABEL: sext_2i8_to_i32: 3117; SSE2: # %bb.0: # %entry 3118; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3119; SSE2-NEXT: psraw $8, %xmm0 3120; SSE2-NEXT: movd %xmm0, %eax 3121; SSE2-NEXT: retq 3122; 3123; SSSE3-LABEL: sext_2i8_to_i32: 3124; SSSE3: # %bb.0: # %entry 3125; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3126; SSSE3-NEXT: psraw $8, %xmm0 3127; SSSE3-NEXT: movd %xmm0, %eax 3128; SSSE3-NEXT: retq 3129; 3130; SSE41-LABEL: sext_2i8_to_i32: 3131; SSE41: # %bb.0: # %entry 3132; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 3133; SSE41-NEXT: movd %xmm0, %eax 3134; SSE41-NEXT: retq 3135; 3136; AVX-LABEL: sext_2i8_to_i32: 3137; AVX: # %bb.0: # %entry 3138; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 3139; AVX-NEXT: vmovd %xmm0, %eax 3140; AVX-NEXT: retq 3141; 3142; X86-SSE2-LABEL: sext_2i8_to_i32: 3143; X86-SSE2: # %bb.0: # %entry 3144; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3145; X86-SSE2-NEXT: psraw $8, %xmm0 3146; X86-SSE2-NEXT: movd %xmm0, %eax 3147; X86-SSE2-NEXT: retl 3148; 3149; X86-SSE41-LABEL: sext_2i8_to_i32: 3150; X86-SSE41: # %bb.0: # %entry 3151; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 3152; X86-SSE41-NEXT: movd %xmm0, %eax 3153; X86-SSE41-NEXT: retl 3154entry: 3155 %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 3156 %Ex = sext <2 x i8> %Shuf to <2 x i16> 3157 %Bc = bitcast <2 x i16> %Ex to i32 3158 ret i32 %Bc 3159} 3160 3161define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { 3162; SSE2-LABEL: sext_4i1_to_4i64: 3163; SSE2: # %bb.0: 3164; SSE2-NEXT: pslld $31, %xmm0 3165; SSE2-NEXT: psrad $31, %xmm0 3166; SSE2-NEXT: pxor %xmm2, %xmm2 3167; SSE2-NEXT: pxor %xmm3, %xmm3 3168; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3169; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3170; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3171; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3172; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3173; SSE2-NEXT: retq 3174; 3175; SSSE3-LABEL: sext_4i1_to_4i64: 3176; SSSE3: # %bb.0: 3177; SSSE3-NEXT: pslld $31, %xmm0 3178; SSSE3-NEXT: psrad $31, %xmm0 3179; SSSE3-NEXT: pxor %xmm2, %xmm2 3180; SSSE3-NEXT: pxor %xmm3, %xmm3 3181; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 3182; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3183; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3184; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 3185; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3186; SSSE3-NEXT: retq 3187; 3188; SSE41-LABEL: sext_4i1_to_4i64: 3189; SSE41: # %bb.0: 3190; SSE41-NEXT: pslld $31, %xmm0 3191; SSE41-NEXT: psrad $31, %xmm0 3192; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 3193; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 3194; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 3195; SSE41-NEXT: movdqa %xmm2, %xmm0 3196; SSE41-NEXT: retq 3197; 3198; AVX1-LABEL: sext_4i1_to_4i64: 3199; AVX1: # %bb.0: 3200; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 3201; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 3202; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 3203; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 3204; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 3205; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3206; AVX1-NEXT: retq 3207; 3208; AVX2-LABEL: sext_4i1_to_4i64: 3209; AVX2: # %bb.0: 3210; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 3211; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 3212; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 3213; AVX2-NEXT: retq 3214; 3215; AVX512-LABEL: sext_4i1_to_4i64: 3216; AVX512: # %bb.0: 3217; AVX512-NEXT: vpslld $31, %xmm0, %xmm0 3218; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 3219; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 3220; AVX512-NEXT: retq 3221; 3222; X86-SSE2-LABEL: sext_4i1_to_4i64: 3223; X86-SSE2: # %bb.0: 3224; X86-SSE2-NEXT: pslld $31, %xmm0 3225; X86-SSE2-NEXT: psrad $31, %xmm0 3226; X86-SSE2-NEXT: pxor %xmm2, %xmm2 3227; X86-SSE2-NEXT: pxor %xmm3, %xmm3 3228; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3229; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3230; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3231; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3232; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3233; X86-SSE2-NEXT: retl 3234; 3235; X86-SSE41-LABEL: sext_4i1_to_4i64: 3236; X86-SSE41: # %bb.0: 3237; X86-SSE41-NEXT: pslld $31, %xmm0 3238; X86-SSE41-NEXT: psrad $31, %xmm0 3239; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 3240; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 3241; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 3242; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 3243; X86-SSE41-NEXT: retl 3244 %extmask = sext <4 x i1> %mask to <4 x i64> 3245 ret <4 x i64> %extmask 3246} 3247 3248define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) { 3249; SSE2-LABEL: sext_4i8_to_4i64: 3250; SSE2: # %bb.0: 3251; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3252; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3253; SSE2-NEXT: psrad $24, %xmm1 3254; SSE2-NEXT: pxor %xmm2, %xmm2 3255; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3256; SSE2-NEXT: movdqa %xmm1, %xmm0 3257; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3258; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3259; SSE2-NEXT: retq 3260; 3261; SSSE3-LABEL: sext_4i8_to_4i64: 3262; SSSE3: # %bb.0: 3263; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3264; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3265; SSSE3-NEXT: psrad $24, %xmm1 3266; SSSE3-NEXT: pxor %xmm2, %xmm2 3267; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 3268; SSSE3-NEXT: movdqa %xmm1, %xmm0 3269; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3270; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3271; SSSE3-NEXT: retq 3272; 3273; SSE41-LABEL: sext_4i8_to_4i64: 3274; SSE41: # %bb.0: 3275; SSE41-NEXT: pmovsxbq %xmm0, %xmm2 3276; SSE41-NEXT: psrld $16, %xmm0 3277; SSE41-NEXT: pmovsxbq %xmm0, %xmm1 3278; SSE41-NEXT: movdqa %xmm2, %xmm0 3279; SSE41-NEXT: retq 3280; 3281; AVX1-LABEL: sext_4i8_to_4i64: 3282; AVX1: # %bb.0: 3283; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 3284; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 3285; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 3286; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3287; AVX1-NEXT: retq 3288; 3289; AVX2-LABEL: sext_4i8_to_4i64: 3290; AVX2: # %bb.0: 3291; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0 3292; AVX2-NEXT: retq 3293; 3294; AVX512-LABEL: sext_4i8_to_4i64: 3295; AVX512: # %bb.0: 3296; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0 3297; AVX512-NEXT: retq 3298; 3299; X86-SSE2-LABEL: sext_4i8_to_4i64: 3300; X86-SSE2: # %bb.0: 3301; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3302; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3303; X86-SSE2-NEXT: psrad $24, %xmm1 3304; X86-SSE2-NEXT: pxor %xmm2, %xmm2 3305; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3306; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 3307; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3308; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3309; X86-SSE2-NEXT: retl 3310; 3311; X86-SSE41-LABEL: sext_4i8_to_4i64: 3312; X86-SSE41: # %bb.0: 3313; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm2 3314; X86-SSE41-NEXT: psrld $16, %xmm0 3315; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm1 3316; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 3317; X86-SSE41-NEXT: retl 3318 %extmask = sext <4 x i8> %mask to <4 x i64> 3319 ret <4 x i64> %extmask 3320} 3321 3322define <32 x i8> @sext_32xi1_to_32xi8(<32 x i16> %c1, <32 x i16> %c2)nounwind { 3323; SSE-LABEL: sext_32xi1_to_32xi8: 3324; SSE: # %bb.0: 3325; SSE-NEXT: pcmpeqw %xmm5, %xmm1 3326; SSE-NEXT: pcmpeqw %xmm4, %xmm0 3327; SSE-NEXT: packsswb %xmm1, %xmm0 3328; SSE-NEXT: pcmpeqw %xmm7, %xmm3 3329; SSE-NEXT: pcmpeqw %xmm6, %xmm2 3330; SSE-NEXT: packsswb %xmm3, %xmm2 3331; SSE-NEXT: movdqa %xmm2, %xmm1 3332; SSE-NEXT: retq 3333; 3334; AVX1-LABEL: sext_32xi1_to_32xi8: 3335; AVX1: # %bb.0: 3336; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3337; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 3338; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4 3339; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 3340; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1 3341; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 3342; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 3343; AVX1-NEXT: vpcmpeqw %xmm3, %xmm4, %xmm3 3344; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 3345; AVX1-NEXT: vpacksswb %xmm3, %xmm0, %xmm0 3346; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3347; AVX1-NEXT: retq 3348; 3349; AVX2-LABEL: sext_32xi1_to_32xi8: 3350; AVX2: # %bb.0: 3351; AVX2-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 3352; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 3353; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 3354; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 3355; AVX2-NEXT: retq 3356; 3357; AVX512F-LABEL: sext_32xi1_to_32xi8: 3358; AVX512F: # %bb.0: 3359; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 3360; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 3361; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 3362; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 3363; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3364; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 3365; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero 3366; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 3367; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3368; AVX512F-NEXT: retq 3369; 3370; AVX512BW-LABEL: sext_32xi1_to_32xi8: 3371; AVX512BW: # %bb.0: 3372; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 3373; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 3374; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3375; AVX512BW-NEXT: retq 3376; 3377; X86-SSE-LABEL: sext_32xi1_to_32xi8: 3378; X86-SSE: # %bb.0: 3379; X86-SSE-NEXT: pushl %ebp 3380; X86-SSE-NEXT: movl %esp, %ebp 3381; X86-SSE-NEXT: andl $-16, %esp 3382; X86-SSE-NEXT: subl $16, %esp 3383; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3 3384; X86-SSE-NEXT: pcmpeqw 40(%ebp), %xmm1 3385; X86-SSE-NEXT: pcmpeqw 24(%ebp), %xmm0 3386; X86-SSE-NEXT: packsswb %xmm1, %xmm0 3387; X86-SSE-NEXT: pcmpeqw 72(%ebp), %xmm3 3388; X86-SSE-NEXT: pcmpeqw 56(%ebp), %xmm2 3389; X86-SSE-NEXT: packsswb %xmm3, %xmm2 3390; X86-SSE-NEXT: movdqa %xmm2, %xmm1 3391; X86-SSE-NEXT: movl %ebp, %esp 3392; X86-SSE-NEXT: popl %ebp 3393; X86-SSE-NEXT: retl 3394 %a = icmp eq <32 x i16> %c1, %c2 3395 %b = sext <32 x i1> %a to <32 x i8> 3396 ret <32 x i8> %b 3397} 3398 3399define <2 x i32> @sext_2i8_to_2i32(<2 x i8>* %addr) { 3400; SSE2-LABEL: sext_2i8_to_2i32: 3401; SSE2: # %bb.0: 3402; SSE2-NEXT: movzwl (%rdi), %eax 3403; SSE2-NEXT: movd %eax, %xmm0 3404; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3405; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3406; SSE2-NEXT: psrad $24, %xmm0 3407; SSE2-NEXT: paddd %xmm0, %xmm0 3408; SSE2-NEXT: retq 3409; 3410; SSSE3-LABEL: sext_2i8_to_2i32: 3411; SSSE3: # %bb.0: 3412; SSSE3-NEXT: movzwl (%rdi), %eax 3413; SSSE3-NEXT: movd %eax, %xmm0 3414; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3415; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3416; SSSE3-NEXT: psrad $24, %xmm0 3417; SSSE3-NEXT: paddd %xmm0, %xmm0 3418; SSSE3-NEXT: retq 3419; 3420; SSE41-LABEL: sext_2i8_to_2i32: 3421; SSE41: # %bb.0: 3422; SSE41-NEXT: movzwl (%rdi), %eax 3423; SSE41-NEXT: movd %eax, %xmm0 3424; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 3425; SSE41-NEXT: paddd %xmm0, %xmm0 3426; SSE41-NEXT: retq 3427; 3428; AVX-LABEL: sext_2i8_to_2i32: 3429; AVX: # %bb.0: 3430; AVX-NEXT: movzwl (%rdi), %eax 3431; AVX-NEXT: vmovd %eax, %xmm0 3432; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 3433; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 3434; AVX-NEXT: retq 3435; 3436; X86-SSE2-LABEL: sext_2i8_to_2i32: 3437; X86-SSE2: # %bb.0: 3438; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3439; X86-SSE2-NEXT: movzwl (%eax), %eax 3440; X86-SSE2-NEXT: movd %eax, %xmm0 3441; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3442; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3443; X86-SSE2-NEXT: psrad $24, %xmm0 3444; X86-SSE2-NEXT: paddd %xmm0, %xmm0 3445; X86-SSE2-NEXT: retl 3446; 3447; X86-SSE41-LABEL: sext_2i8_to_2i32: 3448; X86-SSE41: # %bb.0: 3449; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3450; X86-SSE41-NEXT: movzwl (%eax), %eax 3451; X86-SSE41-NEXT: movd %eax, %xmm0 3452; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 3453; X86-SSE41-NEXT: paddd %xmm0, %xmm0 3454; X86-SSE41-NEXT: retl 3455 %x = load <2 x i8>, <2 x i8>* %addr, align 1 3456 %y = sext <2 x i8> %x to <2 x i32> 3457 %z = add <2 x i32>%y, %y 3458 ret <2 x i32>%z 3459} 3460 3461define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { 3462; SSE2-LABEL: sext_4i17_to_4i32: 3463; SSE2: # %bb.0: 3464; SSE2-NEXT: movq (%rdi), %rax 3465; SSE2-NEXT: movl %eax, %ecx 3466; SSE2-NEXT: shll $15, %ecx 3467; SSE2-NEXT: sarl $15, %ecx 3468; SSE2-NEXT: movd %ecx, %xmm0 3469; SSE2-NEXT: movq %rax, %rcx 3470; SSE2-NEXT: shrq $17, %rcx 3471; SSE2-NEXT: shll $15, %ecx 3472; SSE2-NEXT: sarl $15, %ecx 3473; SSE2-NEXT: movd %ecx, %xmm1 3474; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3475; SSE2-NEXT: movl 8(%rdi), %ecx 3476; SSE2-NEXT: shll $13, %ecx 3477; SSE2-NEXT: movq %rax, %rdx 3478; SSE2-NEXT: shrq $51, %rdx 3479; SSE2-NEXT: orl %ecx, %edx 3480; SSE2-NEXT: shll $15, %edx 3481; SSE2-NEXT: sarl $15, %edx 3482; SSE2-NEXT: movd %edx, %xmm1 3483; SSE2-NEXT: shrq $34, %rax 3484; SSE2-NEXT: shll $15, %eax 3485; SSE2-NEXT: sarl $15, %eax 3486; SSE2-NEXT: movd %eax, %xmm2 3487; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 3488; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3489; SSE2-NEXT: retq 3490; 3491; SSSE3-LABEL: sext_4i17_to_4i32: 3492; SSSE3: # %bb.0: 3493; SSSE3-NEXT: movq (%rdi), %rax 3494; SSSE3-NEXT: movl %eax, %ecx 3495; SSSE3-NEXT: shll $15, %ecx 3496; SSSE3-NEXT: sarl $15, %ecx 3497; SSSE3-NEXT: movd %ecx, %xmm0 3498; SSSE3-NEXT: movq %rax, %rcx 3499; SSSE3-NEXT: shrq $17, %rcx 3500; SSSE3-NEXT: shll $15, %ecx 3501; SSSE3-NEXT: sarl $15, %ecx 3502; SSSE3-NEXT: movd %ecx, %xmm1 3503; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3504; SSSE3-NEXT: movl 8(%rdi), %ecx 3505; SSSE3-NEXT: shll $13, %ecx 3506; SSSE3-NEXT: movq %rax, %rdx 3507; SSSE3-NEXT: shrq $51, %rdx 3508; SSSE3-NEXT: orl %ecx, %edx 3509; SSSE3-NEXT: shll $15, %edx 3510; SSSE3-NEXT: sarl $15, %edx 3511; SSSE3-NEXT: movd %edx, %xmm1 3512; SSSE3-NEXT: shrq $34, %rax 3513; SSSE3-NEXT: shll $15, %eax 3514; SSSE3-NEXT: sarl $15, %eax 3515; SSSE3-NEXT: movd %eax, %xmm2 3516; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 3517; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3518; SSSE3-NEXT: retq 3519; 3520; SSE41-LABEL: sext_4i17_to_4i32: 3521; SSE41: # %bb.0: 3522; SSE41-NEXT: movq (%rdi), %rax 3523; SSE41-NEXT: movq %rax, %rcx 3524; SSE41-NEXT: shrq $17, %rcx 3525; SSE41-NEXT: shll $15, %ecx 3526; SSE41-NEXT: sarl $15, %ecx 3527; SSE41-NEXT: movl %eax, %edx 3528; SSE41-NEXT: shll $15, %edx 3529; SSE41-NEXT: sarl $15, %edx 3530; SSE41-NEXT: movd %edx, %xmm0 3531; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 3532; SSE41-NEXT: movq %rax, %rcx 3533; SSE41-NEXT: shrq $34, %rcx 3534; SSE41-NEXT: shll $15, %ecx 3535; SSE41-NEXT: sarl $15, %ecx 3536; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 3537; SSE41-NEXT: movl 8(%rdi), %ecx 3538; SSE41-NEXT: shll $13, %ecx 3539; SSE41-NEXT: shrq $51, %rax 3540; SSE41-NEXT: orl %ecx, %eax 3541; SSE41-NEXT: shll $15, %eax 3542; SSE41-NEXT: sarl $15, %eax 3543; SSE41-NEXT: pinsrd $3, %eax, %xmm0 3544; SSE41-NEXT: retq 3545; 3546; AVX-LABEL: sext_4i17_to_4i32: 3547; AVX: # %bb.0: 3548; AVX-NEXT: movq (%rdi), %rax 3549; AVX-NEXT: movq %rax, %rcx 3550; AVX-NEXT: shrq $17, %rcx 3551; AVX-NEXT: shll $15, %ecx 3552; AVX-NEXT: sarl $15, %ecx 3553; AVX-NEXT: movl %eax, %edx 3554; AVX-NEXT: shll $15, %edx 3555; AVX-NEXT: sarl $15, %edx 3556; AVX-NEXT: vmovd %edx, %xmm0 3557; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 3558; AVX-NEXT: movq %rax, %rcx 3559; AVX-NEXT: shrq $34, %rcx 3560; AVX-NEXT: shll $15, %ecx 3561; AVX-NEXT: sarl $15, %ecx 3562; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 3563; AVX-NEXT: movl 8(%rdi), %ecx 3564; AVX-NEXT: shll $13, %ecx 3565; AVX-NEXT: shrq $51, %rax 3566; AVX-NEXT: orl %ecx, %eax 3567; AVX-NEXT: shll $15, %eax 3568; AVX-NEXT: sarl $15, %eax 3569; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 3570; AVX-NEXT: retq 3571; 3572; X86-SSE2-LABEL: sext_4i17_to_4i32: 3573; X86-SSE2: # %bb.0: 3574; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3575; X86-SSE2-NEXT: movl (%eax), %ecx 3576; X86-SSE2-NEXT: movl 4(%eax), %edx 3577; X86-SSE2-NEXT: movl 8(%eax), %eax 3578; X86-SSE2-NEXT: shldl $13, %edx, %eax 3579; X86-SSE2-NEXT: shll $15, %eax 3580; X86-SSE2-NEXT: sarl $15, %eax 3581; X86-SSE2-NEXT: movd %eax, %xmm0 3582; X86-SSE2-NEXT: movl %edx, %eax 3583; X86-SSE2-NEXT: shll $13, %eax 3584; X86-SSE2-NEXT: sarl $15, %eax 3585; X86-SSE2-NEXT: movd %eax, %xmm1 3586; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3587; X86-SSE2-NEXT: shldl $15, %ecx, %edx 3588; X86-SSE2-NEXT: shll $15, %ecx 3589; X86-SSE2-NEXT: sarl $15, %ecx 3590; X86-SSE2-NEXT: movd %ecx, %xmm0 3591; X86-SSE2-NEXT: shll $15, %edx 3592; X86-SSE2-NEXT: sarl $15, %edx 3593; X86-SSE2-NEXT: movd %edx, %xmm2 3594; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3595; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3596; X86-SSE2-NEXT: retl 3597; 3598; X86-SSE41-LABEL: sext_4i17_to_4i32: 3599; X86-SSE41: # %bb.0: 3600; X86-SSE41-NEXT: pushl %esi 3601; X86-SSE41-NEXT: .cfi_def_cfa_offset 8 3602; X86-SSE41-NEXT: .cfi_offset %esi, -8 3603; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3604; X86-SSE41-NEXT: movl (%eax), %ecx 3605; X86-SSE41-NEXT: movl 4(%eax), %edx 3606; X86-SSE41-NEXT: movl %edx, %esi 3607; X86-SSE41-NEXT: movl 8(%eax), %eax 3608; X86-SSE41-NEXT: shldl $13, %edx, %eax 3609; X86-SSE41-NEXT: shldl $15, %ecx, %edx 3610; X86-SSE41-NEXT: shll $15, %edx 3611; X86-SSE41-NEXT: sarl $15, %edx 3612; X86-SSE41-NEXT: shll $15, %ecx 3613; X86-SSE41-NEXT: sarl $15, %ecx 3614; X86-SSE41-NEXT: movd %ecx, %xmm0 3615; X86-SSE41-NEXT: pinsrd $1, %edx, %xmm0 3616; X86-SSE41-NEXT: shll $13, %esi 3617; X86-SSE41-NEXT: sarl $15, %esi 3618; X86-SSE41-NEXT: pinsrd $2, %esi, %xmm0 3619; X86-SSE41-NEXT: shll $15, %eax 3620; X86-SSE41-NEXT: sarl $15, %eax 3621; X86-SSE41-NEXT: pinsrd $3, %eax, %xmm0 3622; X86-SSE41-NEXT: popl %esi 3623; X86-SSE41-NEXT: .cfi_def_cfa_offset 4 3624; X86-SSE41-NEXT: retl 3625 %a = load <4 x i17>, <4 x i17>* %ptr 3626 %b = sext <4 x i17> %a to <4 x i32> 3627 ret <4 x i32> %b 3628} 3629 3630define <8 x i64> @sext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp { 3631; SSE2-LABEL: sext_8i6_to_8i64: 3632; SSE2: # %bb.0: # %entry 3633; SSE2-NEXT: movd %edi, %xmm0 3634; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3635; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3636; SSE2-NEXT: paddw {{.*}}(%rip), %xmm3 3637; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 3638; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 3639; SSE2-NEXT: psllq $58, %xmm0 3640; SSE2-NEXT: movdqa %xmm0, %xmm1 3641; SSE2-NEXT: psrad $31, %xmm1 3642; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3643; SSE2-NEXT: psrad $26, %xmm0 3644; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 3645; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3646; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3647; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 3648; SSE2-NEXT: psllq $58, %xmm1 3649; SSE2-NEXT: movdqa %xmm1, %xmm2 3650; SSE2-NEXT: psrad $31, %xmm2 3651; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3652; SSE2-NEXT: psrad $26, %xmm1 3653; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3654; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3655; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 3656; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 3657; SSE2-NEXT: psllq $58, %xmm2 3658; SSE2-NEXT: movdqa %xmm2, %xmm4 3659; SSE2-NEXT: psrad $31, %xmm4 3660; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3661; SSE2-NEXT: psrad $26, %xmm2 3662; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3663; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 3664; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3665; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 3666; SSE2-NEXT: psllq $58, %xmm3 3667; SSE2-NEXT: movdqa %xmm3, %xmm4 3668; SSE2-NEXT: psrad $31, %xmm4 3669; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3670; SSE2-NEXT: psrad $26, %xmm3 3671; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 3672; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 3673; SSE2-NEXT: retq 3674; 3675; SSSE3-LABEL: sext_8i6_to_8i64: 3676; SSSE3: # %bb.0: # %entry 3677; SSSE3-NEXT: movd %edi, %xmm0 3678; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3679; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3680; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm3 3681; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 3682; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 3683; SSSE3-NEXT: psllq $58, %xmm0 3684; SSSE3-NEXT: movdqa %xmm0, %xmm1 3685; SSSE3-NEXT: psrad $31, %xmm1 3686; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3687; SSSE3-NEXT: psrad $26, %xmm0 3688; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 3689; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3690; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3691; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 3692; SSSE3-NEXT: psllq $58, %xmm1 3693; SSSE3-NEXT: movdqa %xmm1, %xmm2 3694; SSSE3-NEXT: psrad $31, %xmm2 3695; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3696; SSSE3-NEXT: psrad $26, %xmm1 3697; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3698; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3699; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 3700; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 3701; SSSE3-NEXT: psllq $58, %xmm2 3702; SSSE3-NEXT: movdqa %xmm2, %xmm4 3703; SSSE3-NEXT: psrad $31, %xmm4 3704; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3705; SSSE3-NEXT: psrad $26, %xmm2 3706; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3707; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 3708; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3709; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 3710; SSSE3-NEXT: psllq $58, %xmm3 3711; SSSE3-NEXT: movdqa %xmm3, %xmm4 3712; SSSE3-NEXT: psrad $31, %xmm4 3713; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3714; SSSE3-NEXT: psrad $26, %xmm3 3715; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 3716; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 3717; SSSE3-NEXT: retq 3718; 3719; SSE41-LABEL: sext_8i6_to_8i64: 3720; SSE41: # %bb.0: # %entry 3721; SSE41-NEXT: movd %edi, %xmm0 3722; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3723; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3724; SSE41-NEXT: paddw {{.*}}(%rip), %xmm3 3725; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3726; SSE41-NEXT: psllq $58, %xmm0 3727; SSE41-NEXT: movdqa %xmm0, %xmm1 3728; SSE41-NEXT: psrad $31, %xmm1 3729; SSE41-NEXT: psrad $26, %xmm0 3730; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3731; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 3732; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3733; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 3734; SSE41-NEXT: psllq $58, %xmm1 3735; SSE41-NEXT: movdqa %xmm1, %xmm2 3736; SSE41-NEXT: psrad $31, %xmm2 3737; SSE41-NEXT: psrad $26, %xmm1 3738; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3739; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 3740; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] 3741; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 3742; SSE41-NEXT: psllq $58, %xmm2 3743; SSE41-NEXT: movdqa %xmm2, %xmm4 3744; SSE41-NEXT: psrad $31, %xmm4 3745; SSE41-NEXT: psrad $26, %xmm2 3746; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 3747; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 3748; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3749; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3750; SSE41-NEXT: psllq $58, %xmm3 3751; SSE41-NEXT: movdqa %xmm3, %xmm4 3752; SSE41-NEXT: psrad $31, %xmm4 3753; SSE41-NEXT: psrad $26, %xmm3 3754; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3755; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] 3756; SSE41-NEXT: retq 3757; 3758; AVX1-LABEL: sext_8i6_to_8i64: 3759; AVX1: # %bb.0: # %entry 3760; AVX1-NEXT: vmovd %edi, %xmm0 3761; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3762; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 3763; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 3764; AVX1-NEXT: vpsllw $10, %xmm0, %xmm0 3765; AVX1-NEXT: vpsraw $10, %xmm0, %xmm1 3766; AVX1-NEXT: vpmovsxwq %xmm1, %xmm0 3767; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 3768; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 3769; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3770; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 3771; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 3772; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] 3773; AVX1-NEXT: vpmovsxwq %xmm1, %xmm1 3774; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 3775; AVX1-NEXT: retq 3776; 3777; AVX2-LABEL: sext_8i6_to_8i64: 3778; AVX2: # %bb.0: # %entry 3779; AVX2-NEXT: vmovd %edi, %xmm0 3780; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 3781; AVX2-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 3782; AVX2-NEXT: vpsllw $10, %xmm0, %xmm0 3783; AVX2-NEXT: vpsraw $10, %xmm0, %xmm1 3784; AVX2-NEXT: vpmovsxwq %xmm1, %ymm0 3785; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3786; AVX2-NEXT: vpmovsxwq %xmm1, %ymm1 3787; AVX2-NEXT: retq 3788; 3789; AVX512-LABEL: sext_8i6_to_8i64: 3790; AVX512: # %bb.0: # %entry 3791; AVX512-NEXT: vmovd %edi, %xmm0 3792; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0 3793; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 3794; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3795; AVX512-NEXT: vpsllq $58, %zmm0, %zmm0 3796; AVX512-NEXT: vpsraq $58, %zmm0, %zmm0 3797; AVX512-NEXT: retq 3798; 3799; X86-SSE2-LABEL: sext_8i6_to_8i64: 3800; X86-SSE2: # %bb.0: # %entry 3801; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3802; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3803; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3804; X86-SSE2-NEXT: paddw {{\.LCPI.*}}, %xmm3 3805; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 3806; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 3807; X86-SSE2-NEXT: psllq $58, %xmm0 3808; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 3809; X86-SSE2-NEXT: psrad $31, %xmm1 3810; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3811; X86-SSE2-NEXT: psrad $26, %xmm0 3812; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 3813; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3814; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3815; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 3816; X86-SSE2-NEXT: psllq $58, %xmm1 3817; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 3818; X86-SSE2-NEXT: psrad $31, %xmm2 3819; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3820; X86-SSE2-NEXT: psrad $26, %xmm1 3821; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3822; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3823; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 3824; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 3825; X86-SSE2-NEXT: psllq $58, %xmm2 3826; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 3827; X86-SSE2-NEXT: psrad $31, %xmm4 3828; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3829; X86-SSE2-NEXT: psrad $26, %xmm2 3830; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3831; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 3832; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3833; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 3834; X86-SSE2-NEXT: psllq $58, %xmm3 3835; X86-SSE2-NEXT: movdqa %xmm3, %xmm4 3836; X86-SSE2-NEXT: psrad $31, %xmm4 3837; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3838; X86-SSE2-NEXT: psrad $26, %xmm3 3839; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 3840; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 3841; X86-SSE2-NEXT: retl 3842; 3843; X86-SSE41-LABEL: sext_8i6_to_8i64: 3844; X86-SSE41: # %bb.0: # %entry 3845; X86-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3846; X86-SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3847; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3848; X86-SSE41-NEXT: paddw {{\.LCPI.*}}, %xmm3 3849; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3850; X86-SSE41-NEXT: psllq $58, %xmm0 3851; X86-SSE41-NEXT: movdqa %xmm0, %xmm1 3852; X86-SSE41-NEXT: psrad $31, %xmm1 3853; X86-SSE41-NEXT: psrad $26, %xmm0 3854; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3855; X86-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 3856; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3857; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 3858; X86-SSE41-NEXT: psllq $58, %xmm1 3859; X86-SSE41-NEXT: movdqa %xmm1, %xmm2 3860; X86-SSE41-NEXT: psrad $31, %xmm2 3861; X86-SSE41-NEXT: psrad $26, %xmm1 3862; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3863; X86-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 3864; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] 3865; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 3866; X86-SSE41-NEXT: psllq $58, %xmm2 3867; X86-SSE41-NEXT: movdqa %xmm2, %xmm4 3868; X86-SSE41-NEXT: psrad $31, %xmm4 3869; X86-SSE41-NEXT: psrad $26, %xmm2 3870; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 3871; X86-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 3872; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3873; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3874; X86-SSE41-NEXT: psllq $58, %xmm3 3875; X86-SSE41-NEXT: movdqa %xmm3, %xmm4 3876; X86-SSE41-NEXT: psrad $31, %xmm4 3877; X86-SSE41-NEXT: psrad $26, %xmm3 3878; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3879; X86-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] 3880; X86-SSE41-NEXT: retl 3881entry: 3882 %a = trunc i32 %x to i6 3883 %b = insertelement <8 x i6> undef, i6 %a, i32 0 3884 %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer 3885 %d = add <8 x i6> %c, <i6 0, i6 1, i6 2, i6 3, i6 4, i6 5, i6 6, i6 7> 3886 %e = sext <8 x i6> %d to <8 x i64> 3887 ret <8 x i64> %e 3888} 3889 3890define <8 x i32> @zext_negate_sext(<8 x i8> %x) { 3891; SSE2-LABEL: zext_negate_sext: 3892; SSE2: # %bb.0: 3893; SSE2-NEXT: pxor %xmm1, %xmm1 3894; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3895; SSE2-NEXT: psubw %xmm0, %xmm1 3896; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3897; SSE2-NEXT: psrad $16, %xmm0 3898; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3899; SSE2-NEXT: psrad $16, %xmm1 3900; SSE2-NEXT: retq 3901; 3902; SSSE3-LABEL: zext_negate_sext: 3903; SSSE3: # %bb.0: 3904; SSSE3-NEXT: pxor %xmm1, %xmm1 3905; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3906; SSSE3-NEXT: psubw %xmm0, %xmm1 3907; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3908; SSSE3-NEXT: psrad $16, %xmm0 3909; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3910; SSSE3-NEXT: psrad $16, %xmm1 3911; SSSE3-NEXT: retq 3912; 3913; SSE41-LABEL: zext_negate_sext: 3914; SSE41: # %bb.0: 3915; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3916; SSE41-NEXT: pxor %xmm1, %xmm1 3917; SSE41-NEXT: psubw %xmm0, %xmm1 3918; SSE41-NEXT: pmovsxwd %xmm1, %xmm0 3919; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3920; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 3921; SSE41-NEXT: retq 3922; 3923; AVX1-LABEL: zext_negate_sext: 3924; AVX1: # %bb.0: 3925; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 3926; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 3927; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 3928; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 3929; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 3930; AVX1-NEXT: vpsubd %xmm0, %xmm2, %xmm0 3931; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3932; AVX1-NEXT: retq 3933; 3934; AVX2-LABEL: zext_negate_sext: 3935; AVX2: # %bb.0: 3936; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3937; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 3938; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0 3939; AVX2-NEXT: retq 3940; 3941; AVX512-LABEL: zext_negate_sext: 3942; AVX512: # %bb.0: 3943; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3944; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 3945; AVX512-NEXT: vpsubd %ymm0, %ymm1, %ymm0 3946; AVX512-NEXT: retq 3947; 3948; X86-SSE2-LABEL: zext_negate_sext: 3949; X86-SSE2: # %bb.0: 3950; X86-SSE2-NEXT: pxor %xmm1, %xmm1 3951; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3952; X86-SSE2-NEXT: psubw %xmm0, %xmm1 3953; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3954; X86-SSE2-NEXT: psrad $16, %xmm0 3955; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3956; X86-SSE2-NEXT: psrad $16, %xmm1 3957; X86-SSE2-NEXT: retl 3958; 3959; X86-SSE41-LABEL: zext_negate_sext: 3960; X86-SSE41: # %bb.0: 3961; X86-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3962; X86-SSE41-NEXT: pxor %xmm1, %xmm1 3963; X86-SSE41-NEXT: psubw %xmm0, %xmm1 3964; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm0 3965; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3966; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm1 3967; X86-SSE41-NEXT: retl 3968 %z = zext <8 x i8> %x to <8 x i16> 3969 %neg = sub nsw <8 x i16> zeroinitializer, %z 3970 %r = sext <8 x i16> %neg to <8 x i32> 3971 ret <8 x i32> %r 3972} 3973 3974define <8 x i32> @zext_decremenet_sext(<8 x i8> %x) { 3975; SSE2-LABEL: zext_decremenet_sext: 3976; SSE2: # %bb.0: 3977; SSE2-NEXT: pxor %xmm1, %xmm1 3978; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3979; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 3980; SSE2-NEXT: paddw %xmm0, %xmm1 3981; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3982; SSE2-NEXT: psrad $16, %xmm0 3983; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3984; SSE2-NEXT: psrad $16, %xmm1 3985; SSE2-NEXT: retq 3986; 3987; SSSE3-LABEL: zext_decremenet_sext: 3988; SSSE3: # %bb.0: 3989; SSSE3-NEXT: pxor %xmm1, %xmm1 3990; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3991; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 3992; SSSE3-NEXT: paddw %xmm0, %xmm1 3993; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3994; SSSE3-NEXT: psrad $16, %xmm0 3995; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3996; SSSE3-NEXT: psrad $16, %xmm1 3997; SSSE3-NEXT: retq 3998; 3999; SSE41-LABEL: zext_decremenet_sext: 4000; SSE41: # %bb.0: 4001; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4002; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 4003; SSE41-NEXT: paddw %xmm0, %xmm1 4004; SSE41-NEXT: pmovsxwd %xmm1, %xmm0 4005; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 4006; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 4007; SSE41-NEXT: retq 4008; 4009; AVX1-LABEL: zext_decremenet_sext: 4010; AVX1: # %bb.0: 4011; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 4012; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 4013; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 4014; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 4015; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 4016; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 4017; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4018; AVX1-NEXT: retq 4019; 4020; AVX2-LABEL: zext_decremenet_sext: 4021; AVX2: # %bb.0: 4022; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 4023; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 4024; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4025; AVX2-NEXT: retq 4026; 4027; AVX512-LABEL: zext_decremenet_sext: 4028; AVX512: # %bb.0: 4029; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 4030; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 4031; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4032; AVX512-NEXT: retq 4033; 4034; X86-SSE2-LABEL: zext_decremenet_sext: 4035; X86-SSE2: # %bb.0: 4036; X86-SSE2-NEXT: pxor %xmm1, %xmm1 4037; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4038; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 4039; X86-SSE2-NEXT: paddw %xmm0, %xmm1 4040; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4041; X86-SSE2-NEXT: psrad $16, %xmm0 4042; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 4043; X86-SSE2-NEXT: psrad $16, %xmm1 4044; X86-SSE2-NEXT: retl 4045; 4046; X86-SSE41-LABEL: zext_decremenet_sext: 4047; X86-SSE41: # %bb.0: 4048; X86-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4049; X86-SSE41-NEXT: pcmpeqd %xmm1, %xmm1 4050; X86-SSE41-NEXT: paddw %xmm0, %xmm1 4051; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm0 4052; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 4053; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm1 4054; X86-SSE41-NEXT: retl 4055 %z = zext <8 x i8> %x to <8 x i16> 4056 %dec = add <8 x i16> %z, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 4057 %r = sext <8 x i16> %dec to <8 x i32> 4058 ret <8 x i32> %r 4059} 4060