1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW 9; 10; Just one 32-bit run to make sure we do reasonable things there. 11; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41 12 13define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 14; SSE2-LABEL: sext_16i8_to_8i16: 15; SSE2: # %bb.0: # %entry 16; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 17; SSE2-NEXT: psraw $8, %xmm0 18; SSE2-NEXT: retq 19; 20; SSSE3-LABEL: sext_16i8_to_8i16: 21; SSSE3: # %bb.0: # %entry 22; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 23; SSSE3-NEXT: psraw $8, %xmm0 24; SSSE3-NEXT: retq 25; 26; SSE41-LABEL: sext_16i8_to_8i16: 27; SSE41: # %bb.0: # %entry 28; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 29; SSE41-NEXT: retq 30; 31; AVX-LABEL: sext_16i8_to_8i16: 32; AVX: # %bb.0: # %entry 33; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 34; AVX-NEXT: retq 35; 36; X32-SSE41-LABEL: sext_16i8_to_8i16: 37; X32-SSE41: # %bb.0: # %entry 38; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 39; X32-SSE41-NEXT: retl 40entry: 41 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 42 %C = sext <8 x i8> %B to <8 x i16> 43 ret <8 x i16> %C 44} 45 46define <16 x i16> @sext_16i8_to_16i16(<16 x i8> %A) nounwind uwtable readnone ssp { 47; SSE2-LABEL: sext_16i8_to_16i16: 48; SSE2: # %bb.0: # %entry 49; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 50; SSE2-NEXT: psraw $8, %xmm2 51; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 52; SSE2-NEXT: psraw $8, %xmm1 53; SSE2-NEXT: movdqa %xmm2, %xmm0 54; SSE2-NEXT: retq 55; 56; SSSE3-LABEL: sext_16i8_to_16i16: 57; SSSE3: # %bb.0: # %entry 58; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 59; SSSE3-NEXT: psraw $8, %xmm2 60; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 61; SSSE3-NEXT: psraw $8, %xmm1 62; SSSE3-NEXT: movdqa %xmm2, %xmm0 63; SSSE3-NEXT: retq 64; 65; SSE41-LABEL: sext_16i8_to_16i16: 66; SSE41: # %bb.0: # %entry 67; SSE41-NEXT: pmovsxbw %xmm0, %xmm2 68; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 69; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 70; SSE41-NEXT: movdqa %xmm2, %xmm0 71; SSE41-NEXT: retq 72; 73; AVX1-LABEL: sext_16i8_to_16i16: 74; AVX1: # %bb.0: # %entry 75; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 76; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 77; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 78; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 79; AVX1-NEXT: retq 80; 81; AVX2-LABEL: sext_16i8_to_16i16: 82; AVX2: # %bb.0: # %entry 83; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 84; AVX2-NEXT: retq 85; 86; AVX512-LABEL: sext_16i8_to_16i16: 87; AVX512: # %bb.0: # %entry 88; AVX512-NEXT: vpmovsxbw %xmm0, %ymm0 89; AVX512-NEXT: retq 90; 91; X32-SSE41-LABEL: sext_16i8_to_16i16: 92; X32-SSE41: # %bb.0: # %entry 93; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm2 94; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 95; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm1 96; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 97; X32-SSE41-NEXT: retl 98entry: 99 %B = sext <16 x i8> %A to <16 x i16> 100 ret <16 x i16> %B 101} 102 103define <32 x i16> @sext_32i8_to_32i16(<32 x i8> %A) nounwind uwtable readnone ssp { 104; SSE2-LABEL: sext_32i8_to_32i16: 105; SSE2: # %bb.0: # %entry 106; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 107; SSE2-NEXT: psraw $8, %xmm4 108; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 109; SSE2-NEXT: psraw $8, %xmm5 110; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 111; SSE2-NEXT: psraw $8, %xmm2 112; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 113; SSE2-NEXT: psraw $8, %xmm3 114; SSE2-NEXT: movdqa %xmm4, %xmm0 115; SSE2-NEXT: movdqa %xmm5, %xmm1 116; SSE2-NEXT: retq 117; 118; SSSE3-LABEL: sext_32i8_to_32i16: 119; SSSE3: # %bb.0: # %entry 120; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 121; SSSE3-NEXT: psraw $8, %xmm4 122; SSSE3-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 123; SSSE3-NEXT: psraw $8, %xmm5 124; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 125; SSSE3-NEXT: psraw $8, %xmm2 126; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 127; SSSE3-NEXT: psraw $8, %xmm3 128; SSSE3-NEXT: movdqa %xmm4, %xmm0 129; SSSE3-NEXT: movdqa %xmm5, %xmm1 130; SSSE3-NEXT: retq 131; 132; SSE41-LABEL: sext_32i8_to_32i16: 133; SSE41: # %bb.0: # %entry 134; SSE41-NEXT: pmovsxbw %xmm0, %xmm5 135; SSE41-NEXT: pmovsxbw %xmm1, %xmm2 136; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 137; SSE41-NEXT: pmovsxbw %xmm0, %xmm4 138; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 139; SSE41-NEXT: pmovsxbw %xmm0, %xmm3 140; SSE41-NEXT: movdqa %xmm5, %xmm0 141; SSE41-NEXT: movdqa %xmm4, %xmm1 142; SSE41-NEXT: retq 143; 144; AVX1-LABEL: sext_32i8_to_32i16: 145; AVX1: # %bb.0: # %entry 146; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 147; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 148; AVX1-NEXT: vpmovsxbw %xmm2, %xmm2 149; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 150; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 151; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 152; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 153; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 154; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 155; AVX1-NEXT: vmovaps %ymm2, %ymm0 156; AVX1-NEXT: retq 157; 158; AVX2-LABEL: sext_32i8_to_32i16: 159; AVX2: # %bb.0: # %entry 160; AVX2-NEXT: vpmovsxbw %xmm0, %ymm2 161; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 162; AVX2-NEXT: vpmovsxbw %xmm0, %ymm1 163; AVX2-NEXT: vmovdqa %ymm2, %ymm0 164; AVX2-NEXT: retq 165; 166; AVX512F-LABEL: sext_32i8_to_32i16: 167; AVX512F: # %bb.0: # %entry 168; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm2 169; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 170; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1 171; AVX512F-NEXT: vmovdqa %ymm2, %ymm0 172; AVX512F-NEXT: retq 173; 174; AVX512BW-LABEL: sext_32i8_to_32i16: 175; AVX512BW: # %bb.0: # %entry 176; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 177; AVX512BW-NEXT: retq 178; 179; X32-SSE41-LABEL: sext_32i8_to_32i16: 180; X32-SSE41: # %bb.0: # %entry 181; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm5 182; X32-SSE41-NEXT: pmovsxbw %xmm1, %xmm2 183; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 184; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm4 185; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 186; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm3 187; X32-SSE41-NEXT: movdqa %xmm5, %xmm0 188; X32-SSE41-NEXT: movdqa %xmm4, %xmm1 189; X32-SSE41-NEXT: retl 190entry: 191 %B = sext <32 x i8> %A to <32 x i16> 192 ret <32 x i16> %B 193} 194 195define <4 x i32> @sext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 196; SSE2-LABEL: sext_16i8_to_4i32: 197; SSE2: # %bb.0: # %entry 198; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 199; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 200; SSE2-NEXT: psrad $24, %xmm0 201; SSE2-NEXT: retq 202; 203; SSSE3-LABEL: sext_16i8_to_4i32: 204; SSSE3: # %bb.0: # %entry 205; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 206; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 207; SSSE3-NEXT: psrad $24, %xmm0 208; SSSE3-NEXT: retq 209; 210; SSE41-LABEL: sext_16i8_to_4i32: 211; SSE41: # %bb.0: # %entry 212; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 213; SSE41-NEXT: retq 214; 215; AVX-LABEL: sext_16i8_to_4i32: 216; AVX: # %bb.0: # %entry 217; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 218; AVX-NEXT: retq 219; 220; X32-SSE41-LABEL: sext_16i8_to_4i32: 221; X32-SSE41: # %bb.0: # %entry 222; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 223; X32-SSE41-NEXT: retl 224entry: 225 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 226 %C = sext <4 x i8> %B to <4 x i32> 227 ret <4 x i32> %C 228} 229 230define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 231; SSE2-LABEL: sext_16i8_to_8i32: 232; SSE2: # %bb.0: # %entry 233; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 234; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 235; SSE2-NEXT: psrad $24, %xmm2 236; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 237; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 238; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 239; SSE2-NEXT: psrad $24, %xmm1 240; SSE2-NEXT: movdqa %xmm2, %xmm0 241; SSE2-NEXT: retq 242; 243; SSSE3-LABEL: sext_16i8_to_8i32: 244; SSSE3: # %bb.0: # %entry 245; SSSE3-NEXT: movdqa %xmm0, %xmm1 246; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 247; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 248; SSSE3-NEXT: psrad $24, %xmm0 249; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,4,u,u,u,5,u,u,u,6,u,u,u,7] 250; SSSE3-NEXT: psrad $24, %xmm1 251; SSSE3-NEXT: retq 252; 253; SSE41-LABEL: sext_16i8_to_8i32: 254; SSE41: # %bb.0: # %entry 255; SSE41-NEXT: pmovsxbd %xmm0, %xmm2 256; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 257; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 258; SSE41-NEXT: movdqa %xmm2, %xmm0 259; SSE41-NEXT: retq 260; 261; AVX1-LABEL: sext_16i8_to_8i32: 262; AVX1: # %bb.0: # %entry 263; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 264; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 265; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 266; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 267; AVX1-NEXT: retq 268; 269; AVX2-LABEL: sext_16i8_to_8i32: 270; AVX2: # %bb.0: # %entry 271; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 272; AVX2-NEXT: retq 273; 274; AVX512-LABEL: sext_16i8_to_8i32: 275; AVX512: # %bb.0: # %entry 276; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0 277; AVX512-NEXT: retq 278; 279; X32-SSE41-LABEL: sext_16i8_to_8i32: 280; X32-SSE41: # %bb.0: # %entry 281; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm2 282; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 283; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm1 284; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 285; X32-SSE41-NEXT: retl 286entry: 287 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 288 %C = sext <8 x i8> %B to <8 x i32> 289 ret <8 x i32> %C 290} 291 292define <16 x i32> @sext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp { 293; SSE2-LABEL: sext_16i8_to_16i32: 294; SSE2: # %bb.0: # %entry 295; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 296; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] 297; SSE2-NEXT: psrad $24, %xmm4 298; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 299; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 300; SSE2-NEXT: psrad $24, %xmm2 301; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 302; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 303; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 304; SSE2-NEXT: psrad $24, %xmm1 305; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 306; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 307; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] 308; SSE2-NEXT: psrad $24, %xmm3 309; SSE2-NEXT: movdqa %xmm4, %xmm0 310; SSE2-NEXT: retq 311; 312; SSSE3-LABEL: sext_16i8_to_16i32: 313; SSSE3: # %bb.0: # %entry 314; SSSE3-NEXT: movdqa %xmm0, %xmm3 315; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 316; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 317; SSSE3-NEXT: psrad $24, %xmm0 318; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm3[8],xmm1[9],xmm3[9],xmm1[10],xmm3[10],xmm1[11],xmm3[11],xmm1[12],xmm3[12],xmm1[13],xmm3[13],xmm1[14],xmm3[14],xmm1[15],xmm3[15] 319; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 320; SSSE3-NEXT: psrad $24, %xmm2 321; SSSE3-NEXT: movdqa %xmm3, %xmm1 322; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,4,u,u,u,5,u,u,u,6,u,u,u,7] 323; SSSE3-NEXT: psrad $24, %xmm1 324; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[u,u,u,12,u,u,u,13,u,u,u,14,u,u,u,15] 325; SSSE3-NEXT: psrad $24, %xmm3 326; SSSE3-NEXT: retq 327; 328; SSE41-LABEL: sext_16i8_to_16i32: 329; SSE41: # %bb.0: # %entry 330; SSE41-NEXT: pmovsxbd %xmm0, %xmm4 331; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 332; SSE41-NEXT: pmovsxbd %xmm1, %xmm1 333; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 334; SSE41-NEXT: pmovsxbd %xmm2, %xmm2 335; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 336; SSE41-NEXT: pmovsxbd %xmm0, %xmm3 337; SSE41-NEXT: movdqa %xmm4, %xmm0 338; SSE41-NEXT: retq 339; 340; AVX1-LABEL: sext_16i8_to_16i32: 341; AVX1: # %bb.0: # %entry 342; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 343; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 344; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2 345; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 346; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 347; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1 348; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 349; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 350; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 351; AVX1-NEXT: vmovaps %ymm2, %ymm0 352; AVX1-NEXT: retq 353; 354; AVX2-LABEL: sext_16i8_to_16i32: 355; AVX2: # %bb.0: # %entry 356; AVX2-NEXT: vpmovsxbd %xmm0, %ymm2 357; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 358; AVX2-NEXT: vpmovsxbd %xmm0, %ymm1 359; AVX2-NEXT: vmovdqa %ymm2, %ymm0 360; AVX2-NEXT: retq 361; 362; AVX512-LABEL: sext_16i8_to_16i32: 363; AVX512: # %bb.0: # %entry 364; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 365; AVX512-NEXT: retq 366; 367; X32-SSE41-LABEL: sext_16i8_to_16i32: 368; X32-SSE41: # %bb.0: # %entry 369; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm4 370; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 371; X32-SSE41-NEXT: pmovsxbd %xmm1, %xmm1 372; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 373; X32-SSE41-NEXT: pmovsxbd %xmm2, %xmm2 374; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 375; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm3 376; X32-SSE41-NEXT: movdqa %xmm4, %xmm0 377; X32-SSE41-NEXT: retl 378entry: 379 %B = sext <16 x i8> %A to <16 x i32> 380 ret <16 x i32> %B 381} 382 383define <2 x i64> @sext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 384; SSE2-LABEL: sext_16i8_to_2i64: 385; SSE2: # %bb.0: # %entry 386; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 387; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 388; SSE2-NEXT: movdqa %xmm0, %xmm1 389; SSE2-NEXT: psrad $31, %xmm1 390; SSE2-NEXT: psrad $24, %xmm0 391; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 392; SSE2-NEXT: retq 393; 394; SSSE3-LABEL: sext_16i8_to_2i64: 395; SSSE3: # %bb.0: # %entry 396; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 397; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 398; SSSE3-NEXT: movdqa %xmm0, %xmm1 399; SSSE3-NEXT: psrad $31, %xmm1 400; SSSE3-NEXT: psrad $24, %xmm0 401; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 402; SSSE3-NEXT: retq 403; 404; SSE41-LABEL: sext_16i8_to_2i64: 405; SSE41: # %bb.0: # %entry 406; SSE41-NEXT: pmovsxbq %xmm0, %xmm0 407; SSE41-NEXT: retq 408; 409; AVX-LABEL: sext_16i8_to_2i64: 410; AVX: # %bb.0: # %entry 411; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 412; AVX-NEXT: retq 413; 414; X32-SSE41-LABEL: sext_16i8_to_2i64: 415; X32-SSE41: # %bb.0: # %entry 416; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm0 417; X32-SSE41-NEXT: retl 418entry: 419 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 420 %C = sext <2 x i8> %B to <2 x i64> 421 ret <2 x i64> %C 422} 423 424define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 425; SSE2-LABEL: sext_16i8_to_4i64: 426; SSE2: # %bb.0: # %entry 427; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 428; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 429; SSE2-NEXT: movdqa %xmm2, %xmm1 430; SSE2-NEXT: psrad $31, %xmm1 431; SSE2-NEXT: psrad $24, %xmm2 432; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 433; SSE2-NEXT: psrld $16, %xmm0 434; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 435; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 436; SSE2-NEXT: movdqa %xmm1, %xmm0 437; SSE2-NEXT: psrad $31, %xmm0 438; SSE2-NEXT: psrad $24, %xmm1 439; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 440; SSE2-NEXT: movdqa %xmm2, %xmm0 441; SSE2-NEXT: retq 442; 443; SSSE3-LABEL: sext_16i8_to_4i64: 444; SSSE3: # %bb.0: # %entry 445; SSSE3-NEXT: movdqa %xmm0, %xmm1 446; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 447; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 448; SSSE3-NEXT: movdqa %xmm0, %xmm2 449; SSSE3-NEXT: psrad $31, %xmm2 450; SSSE3-NEXT: psrad $24, %xmm0 451; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 452; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,2,u,u,u,3,u,u,u,u,u,u,u,u] 453; SSSE3-NEXT: movdqa %xmm1, %xmm2 454; SSSE3-NEXT: psrad $31, %xmm2 455; SSSE3-NEXT: psrad $24, %xmm1 456; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 457; SSSE3-NEXT: retq 458; 459; SSE41-LABEL: sext_16i8_to_4i64: 460; SSE41: # %bb.0: # %entry 461; SSE41-NEXT: pmovsxbq %xmm0, %xmm2 462; SSE41-NEXT: psrld $16, %xmm0 463; SSE41-NEXT: pmovsxbq %xmm0, %xmm1 464; SSE41-NEXT: movdqa %xmm2, %xmm0 465; SSE41-NEXT: retq 466; 467; AVX1-LABEL: sext_16i8_to_4i64: 468; AVX1: # %bb.0: # %entry 469; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 470; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 471; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 472; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 473; AVX1-NEXT: retq 474; 475; AVX2-LABEL: sext_16i8_to_4i64: 476; AVX2: # %bb.0: # %entry 477; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0 478; AVX2-NEXT: retq 479; 480; AVX512-LABEL: sext_16i8_to_4i64: 481; AVX512: # %bb.0: # %entry 482; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0 483; AVX512-NEXT: retq 484; 485; X32-SSE41-LABEL: sext_16i8_to_4i64: 486; X32-SSE41: # %bb.0: # %entry 487; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm2 488; X32-SSE41-NEXT: psrld $16, %xmm0 489; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm1 490; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 491; X32-SSE41-NEXT: retl 492entry: 493 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 494 %C = sext <4 x i8> %B to <4 x i64> 495 ret <4 x i64> %C 496} 497 498define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp { 499; SSE2-LABEL: sext_16i8_to_8i64: 500; SSE2: # %bb.0: # %entry 501; SSE2-NEXT: movdqa %xmm0, %xmm1 502; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 503; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 504; SSE2-NEXT: movdqa %xmm0, %xmm2 505; SSE2-NEXT: psrad $31, %xmm2 506; SSE2-NEXT: psrad $24, %xmm0 507; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 508; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 509; SSE2-NEXT: psrld $16, %xmm1 510; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 511; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 512; SSE2-NEXT: movdqa %xmm1, %xmm2 513; SSE2-NEXT: psrad $31, %xmm2 514; SSE2-NEXT: psrad $24, %xmm1 515; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 516; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] 517; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] 518; SSE2-NEXT: movdqa %xmm2, %xmm4 519; SSE2-NEXT: psrad $31, %xmm4 520; SSE2-NEXT: psrad $24, %xmm2 521; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 522; SSE2-NEXT: psrld $16, %xmm3 523; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 524; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3] 525; SSE2-NEXT: movdqa %xmm3, %xmm4 526; SSE2-NEXT: psrad $31, %xmm4 527; SSE2-NEXT: psrad $24, %xmm3 528; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 529; SSE2-NEXT: retq 530; 531; SSSE3-LABEL: sext_16i8_to_8i64: 532; SSSE3: # %bb.0: # %entry 533; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <u,u,u,2,u,u,u,3,u,u,u,u,u,u,u,u> 534; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 535; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] 536; SSSE3-NEXT: movdqa %xmm0, %xmm1 537; SSSE3-NEXT: pshufb %xmm2, %xmm1 538; SSSE3-NEXT: movdqa %xmm1, %xmm0 539; SSSE3-NEXT: psrad $31, %xmm0 540; SSSE3-NEXT: psrad $24, %xmm1 541; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 542; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 543; SSSE3-NEXT: movdqa %xmm0, %xmm4 544; SSSE3-NEXT: psrad $31, %xmm4 545; SSSE3-NEXT: psrad $24, %xmm0 546; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 547; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 548; SSSE3-NEXT: pshufb %xmm2, %xmm3 549; SSSE3-NEXT: movdqa %xmm3, %xmm2 550; SSSE3-NEXT: psrad $31, %xmm2 551; SSSE3-NEXT: psrad $24, %xmm3 552; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 553; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 554; SSSE3-NEXT: movdqa %xmm2, %xmm4 555; SSSE3-NEXT: psrad $31, %xmm4 556; SSSE3-NEXT: psrad $24, %xmm2 557; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 558; SSSE3-NEXT: retq 559; 560; SSE41-LABEL: sext_16i8_to_8i64: 561; SSE41: # %bb.0: # %entry 562; SSE41-NEXT: pmovsxbq %xmm0, %xmm4 563; SSE41-NEXT: movdqa %xmm0, %xmm1 564; SSE41-NEXT: psrld $16, %xmm1 565; SSE41-NEXT: pmovsxbq %xmm1, %xmm1 566; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 567; SSE41-NEXT: pmovsxbq %xmm2, %xmm2 568; SSE41-NEXT: psrlq $48, %xmm0 569; SSE41-NEXT: pmovsxbq %xmm0, %xmm3 570; SSE41-NEXT: movdqa %xmm4, %xmm0 571; SSE41-NEXT: retq 572; 573; AVX1-LABEL: sext_16i8_to_8i64: 574; AVX1: # %bb.0: # %entry 575; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 576; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 577; AVX1-NEXT: vpmovsxbq %xmm2, %xmm2 578; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 579; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 580; AVX1-NEXT: vpmovsxbq %xmm1, %xmm1 581; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0 582; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 583; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 584; AVX1-NEXT: vmovaps %ymm2, %ymm0 585; AVX1-NEXT: retq 586; 587; AVX2-LABEL: sext_16i8_to_8i64: 588; AVX2: # %bb.0: # %entry 589; AVX2-NEXT: vpmovsxbq %xmm0, %ymm2 590; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 591; AVX2-NEXT: vpmovsxbq %xmm0, %ymm1 592; AVX2-NEXT: vmovdqa %ymm2, %ymm0 593; AVX2-NEXT: retq 594; 595; AVX512-LABEL: sext_16i8_to_8i64: 596; AVX512: # %bb.0: # %entry 597; AVX512-NEXT: vpmovsxbq %xmm0, %zmm0 598; AVX512-NEXT: retq 599; 600; X32-SSE41-LABEL: sext_16i8_to_8i64: 601; X32-SSE41: # %bb.0: # %entry 602; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm4 603; X32-SSE41-NEXT: movdqa %xmm0, %xmm1 604; X32-SSE41-NEXT: psrld $16, %xmm1 605; X32-SSE41-NEXT: pmovsxbq %xmm1, %xmm1 606; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 607; X32-SSE41-NEXT: pmovsxbq %xmm2, %xmm2 608; X32-SSE41-NEXT: psrlq $48, %xmm0 609; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm3 610; X32-SSE41-NEXT: movdqa %xmm4, %xmm0 611; X32-SSE41-NEXT: retl 612entry: 613 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 614 %C = sext <8 x i8> %B to <8 x i64> 615 ret <8 x i64> %C 616} 617 618define <4 x i32> @sext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 619; SSE2-LABEL: sext_8i16_to_4i32: 620; SSE2: # %bb.0: # %entry 621; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 622; SSE2-NEXT: psrad $16, %xmm0 623; SSE2-NEXT: retq 624; 625; SSSE3-LABEL: sext_8i16_to_4i32: 626; SSSE3: # %bb.0: # %entry 627; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 628; SSSE3-NEXT: psrad $16, %xmm0 629; SSSE3-NEXT: retq 630; 631; SSE41-LABEL: sext_8i16_to_4i32: 632; SSE41: # %bb.0: # %entry 633; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 634; SSE41-NEXT: retq 635; 636; AVX-LABEL: sext_8i16_to_4i32: 637; AVX: # %bb.0: # %entry 638; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 639; AVX-NEXT: retq 640; 641; X32-SSE41-LABEL: sext_8i16_to_4i32: 642; X32-SSE41: # %bb.0: # %entry 643; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm0 644; X32-SSE41-NEXT: retl 645entry: 646 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 647 %C = sext <4 x i16> %B to <4 x i32> 648 ret <4 x i32> %C 649} 650 651define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 652; SSE2-LABEL: sext_8i16_to_8i32: 653; SSE2: # %bb.0: # %entry 654; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 655; SSE2-NEXT: psrad $16, %xmm2 656; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 657; SSE2-NEXT: psrad $16, %xmm1 658; SSE2-NEXT: movdqa %xmm2, %xmm0 659; SSE2-NEXT: retq 660; 661; SSSE3-LABEL: sext_8i16_to_8i32: 662; SSSE3: # %bb.0: # %entry 663; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 664; SSSE3-NEXT: psrad $16, %xmm2 665; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 666; SSSE3-NEXT: psrad $16, %xmm1 667; SSSE3-NEXT: movdqa %xmm2, %xmm0 668; SSSE3-NEXT: retq 669; 670; SSE41-LABEL: sext_8i16_to_8i32: 671; SSE41: # %bb.0: # %entry 672; SSE41-NEXT: pmovsxwd %xmm0, %xmm2 673; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 674; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 675; SSE41-NEXT: movdqa %xmm2, %xmm0 676; SSE41-NEXT: retq 677; 678; AVX1-LABEL: sext_8i16_to_8i32: 679; AVX1: # %bb.0: # %entry 680; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 681; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 682; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 683; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 684; AVX1-NEXT: retq 685; 686; AVX2-LABEL: sext_8i16_to_8i32: 687; AVX2: # %bb.0: # %entry 688; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 689; AVX2-NEXT: retq 690; 691; AVX512-LABEL: sext_8i16_to_8i32: 692; AVX512: # %bb.0: # %entry 693; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 694; AVX512-NEXT: retq 695; 696; X32-SSE41-LABEL: sext_8i16_to_8i32: 697; X32-SSE41: # %bb.0: # %entry 698; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm2 699; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 700; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm1 701; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 702; X32-SSE41-NEXT: retl 703entry: 704 %B = sext <8 x i16> %A to <8 x i32> 705 ret <8 x i32> %B 706} 707 708define <16 x i32> @sext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp { 709; SSE2-LABEL: sext_16i16_to_16i32: 710; SSE2: # %bb.0: # %entry 711; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 712; SSE2-NEXT: psrad $16, %xmm4 713; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 714; SSE2-NEXT: psrad $16, %xmm5 715; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 716; SSE2-NEXT: psrad $16, %xmm2 717; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 718; SSE2-NEXT: psrad $16, %xmm3 719; SSE2-NEXT: movdqa %xmm4, %xmm0 720; SSE2-NEXT: movdqa %xmm5, %xmm1 721; SSE2-NEXT: retq 722; 723; SSSE3-LABEL: sext_16i16_to_16i32: 724; SSSE3: # %bb.0: # %entry 725; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 726; SSSE3-NEXT: psrad $16, %xmm4 727; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 728; SSSE3-NEXT: psrad $16, %xmm5 729; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 730; SSSE3-NEXT: psrad $16, %xmm2 731; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 732; SSSE3-NEXT: psrad $16, %xmm3 733; SSSE3-NEXT: movdqa %xmm4, %xmm0 734; SSSE3-NEXT: movdqa %xmm5, %xmm1 735; SSSE3-NEXT: retq 736; 737; SSE41-LABEL: sext_16i16_to_16i32: 738; SSE41: # %bb.0: # %entry 739; SSE41-NEXT: pmovsxwd %xmm0, %xmm5 740; SSE41-NEXT: pmovsxwd %xmm1, %xmm2 741; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 742; SSE41-NEXT: pmovsxwd %xmm0, %xmm4 743; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 744; SSE41-NEXT: pmovsxwd %xmm0, %xmm3 745; SSE41-NEXT: movdqa %xmm5, %xmm0 746; SSE41-NEXT: movdqa %xmm4, %xmm1 747; SSE41-NEXT: retq 748; 749; AVX1-LABEL: sext_16i16_to_16i32: 750; AVX1: # %bb.0: # %entry 751; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 752; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 753; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2 754; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 755; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 756; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 757; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 758; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 759; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 760; AVX1-NEXT: vmovaps %ymm2, %ymm0 761; AVX1-NEXT: retq 762; 763; AVX2-LABEL: sext_16i16_to_16i32: 764; AVX2: # %bb.0: # %entry 765; AVX2-NEXT: vpmovsxwd %xmm0, %ymm2 766; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 767; AVX2-NEXT: vpmovsxwd %xmm0, %ymm1 768; AVX2-NEXT: vmovdqa %ymm2, %ymm0 769; AVX2-NEXT: retq 770; 771; AVX512-LABEL: sext_16i16_to_16i32: 772; AVX512: # %bb.0: # %entry 773; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 774; AVX512-NEXT: retq 775; 776; X32-SSE41-LABEL: sext_16i16_to_16i32: 777; X32-SSE41: # %bb.0: # %entry 778; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm5 779; X32-SSE41-NEXT: pmovsxwd %xmm1, %xmm2 780; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 781; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm4 782; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 783; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm3 784; X32-SSE41-NEXT: movdqa %xmm5, %xmm0 785; X32-SSE41-NEXT: movdqa %xmm4, %xmm1 786; X32-SSE41-NEXT: retl 787entry: 788 %B = sext <16 x i16> %A to <16 x i32> 789 ret <16 x i32> %B 790} 791 792define <2 x i64> @sext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 793; SSE2-LABEL: sext_8i16_to_2i64: 794; SSE2: # %bb.0: # %entry 795; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 796; SSE2-NEXT: movdqa %xmm0, %xmm1 797; SSE2-NEXT: psrad $31, %xmm1 798; SSE2-NEXT: psrad $16, %xmm0 799; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 800; SSE2-NEXT: retq 801; 802; SSSE3-LABEL: sext_8i16_to_2i64: 803; SSSE3: # %bb.0: # %entry 804; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 805; SSSE3-NEXT: movdqa %xmm0, %xmm1 806; SSSE3-NEXT: psrad $31, %xmm1 807; SSSE3-NEXT: psrad $16, %xmm0 808; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 809; SSSE3-NEXT: retq 810; 811; SSE41-LABEL: sext_8i16_to_2i64: 812; SSE41: # %bb.0: # %entry 813; SSE41-NEXT: pmovsxwq %xmm0, %xmm0 814; SSE41-NEXT: retq 815; 816; AVX-LABEL: sext_8i16_to_2i64: 817; AVX: # %bb.0: # %entry 818; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 819; AVX-NEXT: retq 820; 821; X32-SSE41-LABEL: sext_8i16_to_2i64: 822; X32-SSE41: # %bb.0: # %entry 823; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm0 824; X32-SSE41-NEXT: retl 825entry: 826 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 827 %C = sext <2 x i16> %B to <2 x i64> 828 ret <2 x i64> %C 829} 830 831define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 832; SSE2-LABEL: sext_8i16_to_4i64: 833; SSE2: # %bb.0: # %entry 834; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 835; SSE2-NEXT: movdqa %xmm2, %xmm1 836; SSE2-NEXT: psrad $31, %xmm1 837; SSE2-NEXT: psrad $16, %xmm2 838; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 839; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] 840; SSE2-NEXT: movdqa %xmm1, %xmm0 841; SSE2-NEXT: psrad $31, %xmm0 842; SSE2-NEXT: psrad $16, %xmm1 843; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 844; SSE2-NEXT: movdqa %xmm2, %xmm0 845; SSE2-NEXT: retq 846; 847; SSSE3-LABEL: sext_8i16_to_4i64: 848; SSSE3: # %bb.0: # %entry 849; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 850; SSSE3-NEXT: movdqa %xmm2, %xmm1 851; SSSE3-NEXT: psrad $31, %xmm1 852; SSSE3-NEXT: psrad $16, %xmm2 853; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 854; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] 855; SSSE3-NEXT: movdqa %xmm1, %xmm0 856; SSSE3-NEXT: psrad $31, %xmm0 857; SSSE3-NEXT: psrad $16, %xmm1 858; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 859; SSSE3-NEXT: movdqa %xmm2, %xmm0 860; SSSE3-NEXT: retq 861; 862; SSE41-LABEL: sext_8i16_to_4i64: 863; SSE41: # %bb.0: # %entry 864; SSE41-NEXT: pmovsxwq %xmm0, %xmm2 865; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 866; SSE41-NEXT: pmovsxwq %xmm0, %xmm1 867; SSE41-NEXT: movdqa %xmm2, %xmm0 868; SSE41-NEXT: retq 869; 870; AVX1-LABEL: sext_8i16_to_4i64: 871; AVX1: # %bb.0: # %entry 872; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 873; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 874; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 875; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 876; AVX1-NEXT: retq 877; 878; AVX2-LABEL: sext_8i16_to_4i64: 879; AVX2: # %bb.0: # %entry 880; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 881; AVX2-NEXT: retq 882; 883; AVX512-LABEL: sext_8i16_to_4i64: 884; AVX512: # %bb.0: # %entry 885; AVX512-NEXT: vpmovsxwq %xmm0, %ymm0 886; AVX512-NEXT: retq 887; 888; X32-SSE41-LABEL: sext_8i16_to_4i64: 889; X32-SSE41: # %bb.0: # %entry 890; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm2 891; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 892; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm1 893; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 894; X32-SSE41-NEXT: retl 895entry: 896 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 897 %C = sext <4 x i16> %B to <4 x i64> 898 ret <4 x i64> %C 899} 900 901define <8 x i64> @sext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp { 902; SSE2-LABEL: sext_8i16_to_8i64: 903; SSE2: # %bb.0: # %entry 904; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 905; SSE2-NEXT: movdqa %xmm4, %xmm1 906; SSE2-NEXT: psrad $31, %xmm1 907; SSE2-NEXT: psrad $16, %xmm4 908; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 909; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 910; SSE2-NEXT: movdqa %xmm2, %xmm1 911; SSE2-NEXT: psrad $31, %xmm1 912; SSE2-NEXT: psrad $16, %xmm2 913; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 914; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] 915; SSE2-NEXT: movdqa %xmm1, %xmm3 916; SSE2-NEXT: psrad $31, %xmm3 917; SSE2-NEXT: psrad $16, %xmm1 918; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 919; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 920; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,2,2,3,4,5,6,7] 921; SSE2-NEXT: movdqa %xmm3, %xmm0 922; SSE2-NEXT: psrad $31, %xmm0 923; SSE2-NEXT: psrad $16, %xmm3 924; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 925; SSE2-NEXT: movdqa %xmm4, %xmm0 926; SSE2-NEXT: retq 927; 928; SSSE3-LABEL: sext_8i16_to_8i64: 929; SSSE3: # %bb.0: # %entry 930; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 931; SSSE3-NEXT: movdqa %xmm4, %xmm1 932; SSSE3-NEXT: psrad $31, %xmm1 933; SSSE3-NEXT: psrad $16, %xmm4 934; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 935; SSSE3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 936; SSSE3-NEXT: movdqa %xmm2, %xmm1 937; SSSE3-NEXT: psrad $31, %xmm1 938; SSSE3-NEXT: psrad $16, %xmm2 939; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 940; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] 941; SSSE3-NEXT: movdqa %xmm1, %xmm3 942; SSSE3-NEXT: psrad $31, %xmm3 943; SSSE3-NEXT: psrad $16, %xmm1 944; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 945; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 946; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,2,2,3,4,5,6,7] 947; SSSE3-NEXT: movdqa %xmm3, %xmm0 948; SSSE3-NEXT: psrad $31, %xmm0 949; SSSE3-NEXT: psrad $16, %xmm3 950; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 951; SSSE3-NEXT: movdqa %xmm4, %xmm0 952; SSSE3-NEXT: retq 953; 954; SSE41-LABEL: sext_8i16_to_8i64: 955; SSE41: # %bb.0: # %entry 956; SSE41-NEXT: pmovsxwq %xmm0, %xmm4 957; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 958; SSE41-NEXT: pmovsxwq %xmm1, %xmm1 959; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 960; SSE41-NEXT: pmovsxwq %xmm2, %xmm2 961; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 962; SSE41-NEXT: pmovsxwq %xmm0, %xmm3 963; SSE41-NEXT: movdqa %xmm4, %xmm0 964; SSE41-NEXT: retq 965; 966; AVX1-LABEL: sext_8i16_to_8i64: 967; AVX1: # %bb.0: # %entry 968; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 969; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 970; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 971; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 972; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 973; AVX1-NEXT: vpmovsxwq %xmm1, %xmm1 974; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 975; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 976; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 977; AVX1-NEXT: vmovaps %ymm2, %ymm0 978; AVX1-NEXT: retq 979; 980; AVX2-LABEL: sext_8i16_to_8i64: 981; AVX2: # %bb.0: # %entry 982; AVX2-NEXT: vpmovsxwq %xmm0, %ymm2 983; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 984; AVX2-NEXT: vpmovsxwq %xmm0, %ymm1 985; AVX2-NEXT: vmovdqa %ymm2, %ymm0 986; AVX2-NEXT: retq 987; 988; AVX512-LABEL: sext_8i16_to_8i64: 989; AVX512: # %bb.0: # %entry 990; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0 991; AVX512-NEXT: retq 992; 993; X32-SSE41-LABEL: sext_8i16_to_8i64: 994; X32-SSE41: # %bb.0: # %entry 995; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm4 996; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 997; X32-SSE41-NEXT: pmovsxwq %xmm1, %xmm1 998; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 999; X32-SSE41-NEXT: pmovsxwq %xmm2, %xmm2 1000; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 1001; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm3 1002; X32-SSE41-NEXT: movdqa %xmm4, %xmm0 1003; X32-SSE41-NEXT: retl 1004entry: 1005 %B = sext <8 x i16> %A to <8 x i64> 1006 ret <8 x i64> %B 1007} 1008 1009define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1010; SSE2-LABEL: sext_4i32_to_2i64: 1011; SSE2: # %bb.0: # %entry 1012; SSE2-NEXT: movdqa %xmm0, %xmm1 1013; SSE2-NEXT: psrad $31, %xmm1 1014; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1015; SSE2-NEXT: retq 1016; 1017; SSSE3-LABEL: sext_4i32_to_2i64: 1018; SSSE3: # %bb.0: # %entry 1019; SSSE3-NEXT: movdqa %xmm0, %xmm1 1020; SSSE3-NEXT: psrad $31, %xmm1 1021; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1022; SSSE3-NEXT: retq 1023; 1024; SSE41-LABEL: sext_4i32_to_2i64: 1025; SSE41: # %bb.0: # %entry 1026; SSE41-NEXT: pmovsxdq %xmm0, %xmm0 1027; SSE41-NEXT: retq 1028; 1029; AVX-LABEL: sext_4i32_to_2i64: 1030; AVX: # %bb.0: # %entry 1031; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 1032; AVX-NEXT: retq 1033; 1034; X32-SSE41-LABEL: sext_4i32_to_2i64: 1035; X32-SSE41: # %bb.0: # %entry 1036; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm0 1037; X32-SSE41-NEXT: retl 1038entry: 1039 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 1040 %C = sext <2 x i32> %B to <2 x i64> 1041 ret <2 x i64> %C 1042} 1043 1044define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1045; SSE2-LABEL: sext_4i32_to_4i64: 1046; SSE2: # %bb.0: # %entry 1047; SSE2-NEXT: movdqa %xmm0, %xmm2 1048; SSE2-NEXT: psrad $31, %xmm2 1049; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1050; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1051; SSE2-NEXT: movdqa %xmm1, %xmm2 1052; SSE2-NEXT: psrad $31, %xmm2 1053; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1054; SSE2-NEXT: retq 1055; 1056; SSSE3-LABEL: sext_4i32_to_4i64: 1057; SSSE3: # %bb.0: # %entry 1058; SSSE3-NEXT: movdqa %xmm0, %xmm2 1059; SSSE3-NEXT: psrad $31, %xmm2 1060; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1061; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1062; SSSE3-NEXT: movdqa %xmm1, %xmm2 1063; SSSE3-NEXT: psrad $31, %xmm2 1064; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1065; SSSE3-NEXT: retq 1066; 1067; SSE41-LABEL: sext_4i32_to_4i64: 1068; SSE41: # %bb.0: # %entry 1069; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 1070; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1071; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 1072; SSE41-NEXT: movdqa %xmm2, %xmm0 1073; SSE41-NEXT: retq 1074; 1075; AVX1-LABEL: sext_4i32_to_4i64: 1076; AVX1: # %bb.0: # %entry 1077; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1078; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1079; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1080; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1081; AVX1-NEXT: retq 1082; 1083; AVX2-LABEL: sext_4i32_to_4i64: 1084; AVX2: # %bb.0: # %entry 1085; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 1086; AVX2-NEXT: retq 1087; 1088; AVX512-LABEL: sext_4i32_to_4i64: 1089; AVX512: # %bb.0: # %entry 1090; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 1091; AVX512-NEXT: retq 1092; 1093; X32-SSE41-LABEL: sext_4i32_to_4i64: 1094; X32-SSE41: # %bb.0: # %entry 1095; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 1096; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1097; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 1098; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 1099; X32-SSE41-NEXT: retl 1100entry: 1101 %B = sext <4 x i32> %A to <4 x i64> 1102 ret <4 x i64> %B 1103} 1104 1105define <8 x i64> @sext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp { 1106; SSE2-LABEL: sext_8i32_to_8i64: 1107; SSE2: # %bb.0: # %entry 1108; SSE2-NEXT: movdqa %xmm1, %xmm2 1109; SSE2-NEXT: movdqa %xmm0, %xmm3 1110; SSE2-NEXT: psrad $31, %xmm3 1111; SSE2-NEXT: movdqa %xmm1, %xmm4 1112; SSE2-NEXT: psrad $31, %xmm4 1113; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1114; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1115; SSE2-NEXT: movdqa %xmm1, %xmm3 1116; SSE2-NEXT: psrad $31, %xmm3 1117; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1118; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1] 1119; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 1120; SSE2-NEXT: movdqa %xmm3, %xmm4 1121; SSE2-NEXT: psrad $31, %xmm4 1122; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1123; SSE2-NEXT: retq 1124; 1125; SSSE3-LABEL: sext_8i32_to_8i64: 1126; SSSE3: # %bb.0: # %entry 1127; SSSE3-NEXT: movdqa %xmm1, %xmm2 1128; SSSE3-NEXT: movdqa %xmm0, %xmm3 1129; SSSE3-NEXT: psrad $31, %xmm3 1130; SSSE3-NEXT: movdqa %xmm1, %xmm4 1131; SSSE3-NEXT: psrad $31, %xmm4 1132; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1133; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1134; SSSE3-NEXT: movdqa %xmm1, %xmm3 1135; SSSE3-NEXT: psrad $31, %xmm3 1136; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1137; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1] 1138; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 1139; SSSE3-NEXT: movdqa %xmm3, %xmm4 1140; SSSE3-NEXT: psrad $31, %xmm4 1141; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1142; SSSE3-NEXT: retq 1143; 1144; SSE41-LABEL: sext_8i32_to_8i64: 1145; SSE41: # %bb.0: # %entry 1146; SSE41-NEXT: pmovsxdq %xmm0, %xmm5 1147; SSE41-NEXT: pmovsxdq %xmm1, %xmm2 1148; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1149; SSE41-NEXT: pmovsxdq %xmm0, %xmm4 1150; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1151; SSE41-NEXT: pmovsxdq %xmm0, %xmm3 1152; SSE41-NEXT: movdqa %xmm5, %xmm0 1153; SSE41-NEXT: movdqa %xmm4, %xmm1 1154; SSE41-NEXT: retq 1155; 1156; AVX1-LABEL: sext_8i32_to_8i64: 1157; AVX1: # %bb.0: # %entry 1158; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1159; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 1160; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 1161; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 1162; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1163; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1164; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1165; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1166; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 1167; AVX1-NEXT: vmovaps %ymm2, %ymm0 1168; AVX1-NEXT: retq 1169; 1170; AVX2-LABEL: sext_8i32_to_8i64: 1171; AVX2: # %bb.0: # %entry 1172; AVX2-NEXT: vpmovsxdq %xmm0, %ymm2 1173; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1174; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1 1175; AVX2-NEXT: vmovdqa %ymm2, %ymm0 1176; AVX2-NEXT: retq 1177; 1178; AVX512-LABEL: sext_8i32_to_8i64: 1179; AVX512: # %bb.0: # %entry 1180; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0 1181; AVX512-NEXT: retq 1182; 1183; X32-SSE41-LABEL: sext_8i32_to_8i64: 1184; X32-SSE41: # %bb.0: # %entry 1185; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm5 1186; X32-SSE41-NEXT: pmovsxdq %xmm1, %xmm2 1187; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1188; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm4 1189; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1190; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm3 1191; X32-SSE41-NEXT: movdqa %xmm5, %xmm0 1192; X32-SSE41-NEXT: movdqa %xmm4, %xmm1 1193; X32-SSE41-NEXT: retl 1194entry: 1195 %B = sext <8 x i32> %A to <8 x i64> 1196 ret <8 x i64> %B 1197} 1198 1199define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { 1200; SSE-LABEL: load_sext_2i1_to_2i64: 1201; SSE: # %bb.0: # %entry 1202; SSE-NEXT: movzbl (%rdi), %eax 1203; SSE-NEXT: movq %rax, %rcx 1204; SSE-NEXT: shlq $62, %rcx 1205; SSE-NEXT: sarq $63, %rcx 1206; SSE-NEXT: movq %rcx, %xmm1 1207; SSE-NEXT: shlq $63, %rax 1208; SSE-NEXT: sarq $63, %rax 1209; SSE-NEXT: movq %rax, %xmm0 1210; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1211; SSE-NEXT: retq 1212; 1213; AVX1-LABEL: load_sext_2i1_to_2i64: 1214; AVX1: # %bb.0: # %entry 1215; AVX1-NEXT: movzbl (%rdi), %eax 1216; AVX1-NEXT: movq %rax, %rcx 1217; AVX1-NEXT: shlq $62, %rcx 1218; AVX1-NEXT: sarq $63, %rcx 1219; AVX1-NEXT: vmovq %rcx, %xmm0 1220; AVX1-NEXT: shlq $63, %rax 1221; AVX1-NEXT: sarq $63, %rax 1222; AVX1-NEXT: vmovq %rax, %xmm1 1223; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1224; AVX1-NEXT: retq 1225; 1226; AVX2-LABEL: load_sext_2i1_to_2i64: 1227; AVX2: # %bb.0: # %entry 1228; AVX2-NEXT: movzbl (%rdi), %eax 1229; AVX2-NEXT: movq %rax, %rcx 1230; AVX2-NEXT: shlq $62, %rcx 1231; AVX2-NEXT: sarq $63, %rcx 1232; AVX2-NEXT: vmovq %rcx, %xmm0 1233; AVX2-NEXT: shlq $63, %rax 1234; AVX2-NEXT: sarq $63, %rax 1235; AVX2-NEXT: vmovq %rax, %xmm1 1236; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1237; AVX2-NEXT: retq 1238; 1239; AVX512F-LABEL: load_sext_2i1_to_2i64: 1240; AVX512F: # %bb.0: # %entry 1241; AVX512F-NEXT: movzbl (%rdi), %eax 1242; AVX512F-NEXT: kmovw %eax, %k1 1243; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1244; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1245; AVX512F-NEXT: vzeroupper 1246; AVX512F-NEXT: retq 1247; 1248; AVX512BW-LABEL: load_sext_2i1_to_2i64: 1249; AVX512BW: # %bb.0: # %entry 1250; AVX512BW-NEXT: movzbl (%rdi), %eax 1251; AVX512BW-NEXT: kmovd %eax, %k1 1252; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1253; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1254; AVX512BW-NEXT: vzeroupper 1255; AVX512BW-NEXT: retq 1256; 1257; X32-SSE41-LABEL: load_sext_2i1_to_2i64: 1258; X32-SSE41: # %bb.0: # %entry 1259; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1260; X32-SSE41-NEXT: movzbl (%eax), %eax 1261; X32-SSE41-NEXT: movl %eax, %ecx 1262; X32-SSE41-NEXT: shll $31, %ecx 1263; X32-SSE41-NEXT: sarl $31, %ecx 1264; X32-SSE41-NEXT: movd %ecx, %xmm0 1265; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1266; X32-SSE41-NEXT: shll $30, %eax 1267; X32-SSE41-NEXT: sarl $31, %eax 1268; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0 1269; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 1270; X32-SSE41-NEXT: retl 1271entry: 1272 %X = load <2 x i1>, <2 x i1>* %ptr 1273 %Y = sext <2 x i1> %X to <2 x i64> 1274 ret <2 x i64> %Y 1275} 1276 1277define <2 x i64> @load_sext_2i8_to_2i64(<2 x i8> *%ptr) { 1278; SSE2-LABEL: load_sext_2i8_to_2i64: 1279; SSE2: # %bb.0: # %entry 1280; SSE2-NEXT: movzwl (%rdi), %eax 1281; SSE2-NEXT: movd %eax, %xmm0 1282; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1283; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1284; SSE2-NEXT: movdqa %xmm0, %xmm1 1285; SSE2-NEXT: psrad $31, %xmm1 1286; SSE2-NEXT: psrad $24, %xmm0 1287; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1288; SSE2-NEXT: retq 1289; 1290; SSSE3-LABEL: load_sext_2i8_to_2i64: 1291; SSSE3: # %bb.0: # %entry 1292; SSSE3-NEXT: movzwl (%rdi), %eax 1293; SSSE3-NEXT: movd %eax, %xmm0 1294; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,0,u,u,u,1,u,u,u,u,u,u,u,u] 1295; SSSE3-NEXT: movdqa %xmm0, %xmm1 1296; SSSE3-NEXT: psrad $31, %xmm1 1297; SSSE3-NEXT: psrad $24, %xmm0 1298; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1299; SSSE3-NEXT: retq 1300; 1301; SSE41-LABEL: load_sext_2i8_to_2i64: 1302; SSE41: # %bb.0: # %entry 1303; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1304; SSE41-NEXT: retq 1305; 1306; AVX-LABEL: load_sext_2i8_to_2i64: 1307; AVX: # %bb.0: # %entry 1308; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 1309; AVX-NEXT: retq 1310; 1311; X32-SSE41-LABEL: load_sext_2i8_to_2i64: 1312; X32-SSE41: # %bb.0: # %entry 1313; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1314; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 1315; X32-SSE41-NEXT: retl 1316entry: 1317 %X = load <2 x i8>, <2 x i8>* %ptr 1318 %Y = sext <2 x i8> %X to <2 x i64> 1319 ret <2 x i64> %Y 1320} 1321 1322define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { 1323; SSE2-LABEL: load_sext_4i1_to_4i32: 1324; SSE2: # %bb.0: # %entry 1325; SSE2-NEXT: movzbl (%rdi), %eax 1326; SSE2-NEXT: movq %rax, %rcx 1327; SSE2-NEXT: shlq $60, %rcx 1328; SSE2-NEXT: sarq $63, %rcx 1329; SSE2-NEXT: movd %ecx, %xmm0 1330; SSE2-NEXT: movq %rax, %rcx 1331; SSE2-NEXT: shlq $61, %rcx 1332; SSE2-NEXT: sarq $63, %rcx 1333; SSE2-NEXT: movd %ecx, %xmm1 1334; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1335; SSE2-NEXT: movq %rax, %rcx 1336; SSE2-NEXT: shlq $62, %rcx 1337; SSE2-NEXT: sarq $63, %rcx 1338; SSE2-NEXT: movd %ecx, %xmm2 1339; SSE2-NEXT: shlq $63, %rax 1340; SSE2-NEXT: sarq $63, %rax 1341; SSE2-NEXT: movd %eax, %xmm0 1342; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1343; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1344; SSE2-NEXT: retq 1345; 1346; SSSE3-LABEL: load_sext_4i1_to_4i32: 1347; SSSE3: # %bb.0: # %entry 1348; SSSE3-NEXT: movzbl (%rdi), %eax 1349; SSSE3-NEXT: movq %rax, %rcx 1350; SSSE3-NEXT: shlq $60, %rcx 1351; SSSE3-NEXT: sarq $63, %rcx 1352; SSSE3-NEXT: movd %ecx, %xmm0 1353; SSSE3-NEXT: movq %rax, %rcx 1354; SSSE3-NEXT: shlq $61, %rcx 1355; SSSE3-NEXT: sarq $63, %rcx 1356; SSSE3-NEXT: movd %ecx, %xmm1 1357; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1358; SSSE3-NEXT: movq %rax, %rcx 1359; SSSE3-NEXT: shlq $62, %rcx 1360; SSSE3-NEXT: sarq $63, %rcx 1361; SSSE3-NEXT: movd %ecx, %xmm2 1362; SSSE3-NEXT: shlq $63, %rax 1363; SSSE3-NEXT: sarq $63, %rax 1364; SSSE3-NEXT: movd %eax, %xmm0 1365; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1366; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1367; SSSE3-NEXT: retq 1368; 1369; SSE41-LABEL: load_sext_4i1_to_4i32: 1370; SSE41: # %bb.0: # %entry 1371; SSE41-NEXT: movzbl (%rdi), %eax 1372; SSE41-NEXT: movq %rax, %rcx 1373; SSE41-NEXT: shlq $62, %rcx 1374; SSE41-NEXT: sarq $63, %rcx 1375; SSE41-NEXT: movq %rax, %rdx 1376; SSE41-NEXT: shlq $63, %rdx 1377; SSE41-NEXT: sarq $63, %rdx 1378; SSE41-NEXT: movd %edx, %xmm0 1379; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1380; SSE41-NEXT: movq %rax, %rcx 1381; SSE41-NEXT: shlq $61, %rcx 1382; SSE41-NEXT: sarq $63, %rcx 1383; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 1384; SSE41-NEXT: shlq $60, %rax 1385; SSE41-NEXT: sarq $63, %rax 1386; SSE41-NEXT: pinsrd $3, %eax, %xmm0 1387; SSE41-NEXT: retq 1388; 1389; AVX1-LABEL: load_sext_4i1_to_4i32: 1390; AVX1: # %bb.0: # %entry 1391; AVX1-NEXT: movzbl (%rdi), %eax 1392; AVX1-NEXT: movq %rax, %rcx 1393; AVX1-NEXT: shlq $62, %rcx 1394; AVX1-NEXT: sarq $63, %rcx 1395; AVX1-NEXT: movq %rax, %rdx 1396; AVX1-NEXT: shlq $63, %rdx 1397; AVX1-NEXT: sarq $63, %rdx 1398; AVX1-NEXT: vmovd %edx, %xmm0 1399; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1400; AVX1-NEXT: movq %rax, %rcx 1401; AVX1-NEXT: shlq $61, %rcx 1402; AVX1-NEXT: sarq $63, %rcx 1403; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 1404; AVX1-NEXT: shlq $60, %rax 1405; AVX1-NEXT: sarq $63, %rax 1406; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1407; AVX1-NEXT: retq 1408; 1409; AVX2-LABEL: load_sext_4i1_to_4i32: 1410; AVX2: # %bb.0: # %entry 1411; AVX2-NEXT: movzbl (%rdi), %eax 1412; AVX2-NEXT: movq %rax, %rcx 1413; AVX2-NEXT: shlq $62, %rcx 1414; AVX2-NEXT: sarq $63, %rcx 1415; AVX2-NEXT: movq %rax, %rdx 1416; AVX2-NEXT: shlq $63, %rdx 1417; AVX2-NEXT: sarq $63, %rdx 1418; AVX2-NEXT: vmovd %edx, %xmm0 1419; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1420; AVX2-NEXT: movq %rax, %rcx 1421; AVX2-NEXT: shlq $61, %rcx 1422; AVX2-NEXT: sarq $63, %rcx 1423; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 1424; AVX2-NEXT: shlq $60, %rax 1425; AVX2-NEXT: sarq $63, %rax 1426; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1427; AVX2-NEXT: retq 1428; 1429; AVX512F-LABEL: load_sext_4i1_to_4i32: 1430; AVX512F: # %bb.0: # %entry 1431; AVX512F-NEXT: movzbl (%rdi), %eax 1432; AVX512F-NEXT: kmovw %eax, %k1 1433; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1434; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1435; AVX512F-NEXT: vzeroupper 1436; AVX512F-NEXT: retq 1437; 1438; AVX512BW-LABEL: load_sext_4i1_to_4i32: 1439; AVX512BW: # %bb.0: # %entry 1440; AVX512BW-NEXT: movzbl (%rdi), %eax 1441; AVX512BW-NEXT: kmovd %eax, %k1 1442; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1443; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1444; AVX512BW-NEXT: vzeroupper 1445; AVX512BW-NEXT: retq 1446; 1447; X32-SSE41-LABEL: load_sext_4i1_to_4i32: 1448; X32-SSE41: # %bb.0: # %entry 1449; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1450; X32-SSE41-NEXT: movl (%eax), %eax 1451; X32-SSE41-NEXT: movl %eax, %ecx 1452; X32-SSE41-NEXT: shll $30, %ecx 1453; X32-SSE41-NEXT: sarl $31, %ecx 1454; X32-SSE41-NEXT: movl %eax, %edx 1455; X32-SSE41-NEXT: shll $31, %edx 1456; X32-SSE41-NEXT: sarl $31, %edx 1457; X32-SSE41-NEXT: movd %edx, %xmm0 1458; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1459; X32-SSE41-NEXT: movl %eax, %ecx 1460; X32-SSE41-NEXT: shll $29, %ecx 1461; X32-SSE41-NEXT: sarl $31, %ecx 1462; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0 1463; X32-SSE41-NEXT: shll $28, %eax 1464; X32-SSE41-NEXT: sarl $31, %eax 1465; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 1466; X32-SSE41-NEXT: retl 1467entry: 1468 %X = load <4 x i1>, <4 x i1>* %ptr 1469 %Y = sext <4 x i1> %X to <4 x i32> 1470 ret <4 x i32> %Y 1471} 1472 1473define <4 x i32> @load_sext_4i8_to_4i32(<4 x i8> *%ptr) { 1474; SSE2-LABEL: load_sext_4i8_to_4i32: 1475; SSE2: # %bb.0: # %entry 1476; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1477; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1478; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1479; SSE2-NEXT: psrad $24, %xmm0 1480; SSE2-NEXT: retq 1481; 1482; SSSE3-LABEL: load_sext_4i8_to_4i32: 1483; SSSE3: # %bb.0: # %entry 1484; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1485; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1486; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1487; SSSE3-NEXT: psrad $24, %xmm0 1488; SSSE3-NEXT: retq 1489; 1490; SSE41-LABEL: load_sext_4i8_to_4i32: 1491; SSE41: # %bb.0: # %entry 1492; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 1493; SSE41-NEXT: retq 1494; 1495; AVX-LABEL: load_sext_4i8_to_4i32: 1496; AVX: # %bb.0: # %entry 1497; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 1498; AVX-NEXT: retq 1499; 1500; X32-SSE41-LABEL: load_sext_4i8_to_4i32: 1501; X32-SSE41: # %bb.0: # %entry 1502; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1503; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0 1504; X32-SSE41-NEXT: retl 1505entry: 1506 %X = load <4 x i8>, <4 x i8>* %ptr 1507 %Y = sext <4 x i8> %X to <4 x i32> 1508 ret <4 x i32> %Y 1509} 1510 1511define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { 1512; SSE2-LABEL: load_sext_4i1_to_4i64: 1513; SSE2: # %bb.0: # %entry 1514; SSE2-NEXT: movl (%rdi), %eax 1515; SSE2-NEXT: movl %eax, %ecx 1516; SSE2-NEXT: shrl $3, %ecx 1517; SSE2-NEXT: movd %ecx, %xmm0 1518; SSE2-NEXT: movl %eax, %ecx 1519; SSE2-NEXT: shrl $2, %ecx 1520; SSE2-NEXT: movd %ecx, %xmm1 1521; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1522; SSE2-NEXT: movd %eax, %xmm2 1523; SSE2-NEXT: shrl %eax 1524; SSE2-NEXT: movd %eax, %xmm0 1525; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1526; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 1527; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 1528; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] 1529; SSE2-NEXT: psllq $63, %xmm0 1530; SSE2-NEXT: psrad $31, %xmm0 1531; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1532; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] 1533; SSE2-NEXT: psllq $63, %xmm1 1534; SSE2-NEXT: psrad $31, %xmm1 1535; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1536; SSE2-NEXT: retq 1537; 1538; SSSE3-LABEL: load_sext_4i1_to_4i64: 1539; SSSE3: # %bb.0: # %entry 1540; SSSE3-NEXT: movl (%rdi), %eax 1541; SSSE3-NEXT: movl %eax, %ecx 1542; SSSE3-NEXT: shrl $3, %ecx 1543; SSSE3-NEXT: movd %ecx, %xmm0 1544; SSSE3-NEXT: movl %eax, %ecx 1545; SSSE3-NEXT: shrl $2, %ecx 1546; SSSE3-NEXT: movd %ecx, %xmm1 1547; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1548; SSSE3-NEXT: movd %eax, %xmm2 1549; SSSE3-NEXT: shrl %eax 1550; SSSE3-NEXT: movd %eax, %xmm0 1551; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1552; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 1553; SSSE3-NEXT: pand {{.*}}(%rip), %xmm2 1554; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] 1555; SSSE3-NEXT: psllq $63, %xmm0 1556; SSSE3-NEXT: psrad $31, %xmm0 1557; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1558; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] 1559; SSSE3-NEXT: psllq $63, %xmm1 1560; SSSE3-NEXT: psrad $31, %xmm1 1561; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1562; SSSE3-NEXT: retq 1563; 1564; SSE41-LABEL: load_sext_4i1_to_4i64: 1565; SSE41: # %bb.0: # %entry 1566; SSE41-NEXT: movl (%rdi), %eax 1567; SSE41-NEXT: movl %eax, %ecx 1568; SSE41-NEXT: shrl %ecx 1569; SSE41-NEXT: movd %eax, %xmm1 1570; SSE41-NEXT: pinsrd $1, %ecx, %xmm1 1571; SSE41-NEXT: movl %eax, %ecx 1572; SSE41-NEXT: shrl $2, %ecx 1573; SSE41-NEXT: pinsrd $2, %ecx, %xmm1 1574; SSE41-NEXT: shrl $3, %eax 1575; SSE41-NEXT: pinsrd $3, %eax, %xmm1 1576; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 1577; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1578; SSE41-NEXT: psllq $63, %xmm0 1579; SSE41-NEXT: psrad $31, %xmm0 1580; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1581; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1582; SSE41-NEXT: psllq $63, %xmm1 1583; SSE41-NEXT: psrad $31, %xmm1 1584; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1585; SSE41-NEXT: retq 1586; 1587; AVX1-LABEL: load_sext_4i1_to_4i64: 1588; AVX1: # %bb.0: # %entry 1589; AVX1-NEXT: movzbl (%rdi), %eax 1590; AVX1-NEXT: movq %rax, %rcx 1591; AVX1-NEXT: shlq $62, %rcx 1592; AVX1-NEXT: sarq $63, %rcx 1593; AVX1-NEXT: movq %rax, %rdx 1594; AVX1-NEXT: shlq $63, %rdx 1595; AVX1-NEXT: sarq $63, %rdx 1596; AVX1-NEXT: vmovd %edx, %xmm0 1597; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1598; AVX1-NEXT: movq %rax, %rcx 1599; AVX1-NEXT: shlq $61, %rcx 1600; AVX1-NEXT: sarq $63, %rcx 1601; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 1602; AVX1-NEXT: shlq $60, %rax 1603; AVX1-NEXT: sarq $63, %rax 1604; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1605; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1606; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1607; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1608; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1609; AVX1-NEXT: retq 1610; 1611; AVX2-LABEL: load_sext_4i1_to_4i64: 1612; AVX2: # %bb.0: # %entry 1613; AVX2-NEXT: movzbl (%rdi), %eax 1614; AVX2-NEXT: movq %rax, %rcx 1615; AVX2-NEXT: shlq $60, %rcx 1616; AVX2-NEXT: sarq $63, %rcx 1617; AVX2-NEXT: vmovq %rcx, %xmm0 1618; AVX2-NEXT: movq %rax, %rcx 1619; AVX2-NEXT: shlq $61, %rcx 1620; AVX2-NEXT: sarq $63, %rcx 1621; AVX2-NEXT: vmovq %rcx, %xmm1 1622; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1623; AVX2-NEXT: movq %rax, %rcx 1624; AVX2-NEXT: shlq $62, %rcx 1625; AVX2-NEXT: sarq $63, %rcx 1626; AVX2-NEXT: vmovq %rcx, %xmm1 1627; AVX2-NEXT: shlq $63, %rax 1628; AVX2-NEXT: sarq $63, %rax 1629; AVX2-NEXT: vmovq %rax, %xmm2 1630; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1631; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1632; AVX2-NEXT: retq 1633; 1634; AVX512F-LABEL: load_sext_4i1_to_4i64: 1635; AVX512F: # %bb.0: # %entry 1636; AVX512F-NEXT: movzbl (%rdi), %eax 1637; AVX512F-NEXT: kmovw %eax, %k1 1638; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1639; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1640; AVX512F-NEXT: retq 1641; 1642; AVX512BW-LABEL: load_sext_4i1_to_4i64: 1643; AVX512BW: # %bb.0: # %entry 1644; AVX512BW-NEXT: movzbl (%rdi), %eax 1645; AVX512BW-NEXT: kmovd %eax, %k1 1646; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1647; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1648; AVX512BW-NEXT: retq 1649; 1650; X32-SSE41-LABEL: load_sext_4i1_to_4i64: 1651; X32-SSE41: # %bb.0: # %entry 1652; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1653; X32-SSE41-NEXT: movzbl (%eax), %eax 1654; X32-SSE41-NEXT: movl %eax, %ecx 1655; X32-SSE41-NEXT: shrl %ecx 1656; X32-SSE41-NEXT: movd %eax, %xmm1 1657; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm1 1658; X32-SSE41-NEXT: movl %eax, %ecx 1659; X32-SSE41-NEXT: shrl $2, %ecx 1660; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm1 1661; X32-SSE41-NEXT: shrl $3, %eax 1662; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm1 1663; X32-SSE41-NEXT: pand {{\.LCPI.*}}, %xmm1 1664; X32-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1665; X32-SSE41-NEXT: psllq $63, %xmm0 1666; X32-SSE41-NEXT: psrad $31, %xmm0 1667; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1668; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1669; X32-SSE41-NEXT: psllq $63, %xmm1 1670; X32-SSE41-NEXT: psrad $31, %xmm1 1671; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1672; X32-SSE41-NEXT: retl 1673entry: 1674 %X = load <4 x i1>, <4 x i1>* %ptr 1675 %Y = sext <4 x i1> %X to <4 x i64> 1676 ret <4 x i64> %Y 1677} 1678 1679define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) { 1680; SSE2-LABEL: load_sext_4i8_to_4i64: 1681; SSE2: # %bb.0: # %entry 1682; SSE2-NEXT: movsbq 1(%rdi), %rax 1683; SSE2-NEXT: movq %rax, %xmm1 1684; SSE2-NEXT: movsbq (%rdi), %rax 1685; SSE2-NEXT: movq %rax, %xmm0 1686; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1687; SSE2-NEXT: movsbq 3(%rdi), %rax 1688; SSE2-NEXT: movq %rax, %xmm2 1689; SSE2-NEXT: movsbq 2(%rdi), %rax 1690; SSE2-NEXT: movq %rax, %xmm1 1691; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1692; SSE2-NEXT: retq 1693; 1694; SSSE3-LABEL: load_sext_4i8_to_4i64: 1695; SSSE3: # %bb.0: # %entry 1696; SSSE3-NEXT: movsbq 1(%rdi), %rax 1697; SSSE3-NEXT: movq %rax, %xmm1 1698; SSSE3-NEXT: movsbq (%rdi), %rax 1699; SSSE3-NEXT: movq %rax, %xmm0 1700; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1701; SSSE3-NEXT: movsbq 3(%rdi), %rax 1702; SSSE3-NEXT: movq %rax, %xmm2 1703; SSSE3-NEXT: movsbq 2(%rdi), %rax 1704; SSSE3-NEXT: movq %rax, %xmm1 1705; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1706; SSSE3-NEXT: retq 1707; 1708; SSE41-LABEL: load_sext_4i8_to_4i64: 1709; SSE41: # %bb.0: # %entry 1710; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1711; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 1712; SSE41-NEXT: retq 1713; 1714; AVX1-LABEL: load_sext_4i8_to_4i64: 1715; AVX1: # %bb.0: # %entry 1716; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0 1717; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1718; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1719; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1720; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1721; AVX1-NEXT: retq 1722; 1723; AVX2-LABEL: load_sext_4i8_to_4i64: 1724; AVX2: # %bb.0: # %entry 1725; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 1726; AVX2-NEXT: retq 1727; 1728; AVX512-LABEL: load_sext_4i8_to_4i64: 1729; AVX512: # %bb.0: # %entry 1730; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0 1731; AVX512-NEXT: retq 1732; 1733; X32-SSE41-LABEL: load_sext_4i8_to_4i64: 1734; X32-SSE41: # %bb.0: # %entry 1735; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1736; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 1737; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 1738; X32-SSE41-NEXT: retl 1739entry: 1740 %X = load <4 x i8>, <4 x i8>* %ptr 1741 %Y = sext <4 x i8> %X to <4 x i64> 1742 ret <4 x i64> %Y 1743} 1744 1745define <2 x i64> @load_sext_4i8_to_4i64_extract(<4 x i8> *%ptr) { 1746; SSE2-LABEL: load_sext_4i8_to_4i64_extract: 1747; SSE2: # %bb.0: 1748; SSE2-NEXT: movsbq 3(%rdi), %rax 1749; SSE2-NEXT: movq %rax, %xmm1 1750; SSE2-NEXT: movsbq 2(%rdi), %rax 1751; SSE2-NEXT: movq %rax, %xmm0 1752; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1753; SSE2-NEXT: retq 1754; 1755; SSSE3-LABEL: load_sext_4i8_to_4i64_extract: 1756; SSSE3: # %bb.0: 1757; SSSE3-NEXT: movsbq 3(%rdi), %rax 1758; SSSE3-NEXT: movq %rax, %xmm1 1759; SSSE3-NEXT: movsbq 2(%rdi), %rax 1760; SSSE3-NEXT: movq %rax, %xmm0 1761; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1762; SSSE3-NEXT: retq 1763; 1764; SSE41-LABEL: load_sext_4i8_to_4i64_extract: 1765; SSE41: # %bb.0: 1766; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm0 1767; SSE41-NEXT: retq 1768; 1769; AVX1-LABEL: load_sext_4i8_to_4i64_extract: 1770; AVX1: # %bb.0: 1771; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0 1772; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1773; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1774; AVX1-NEXT: retq 1775; 1776; AVX2-LABEL: load_sext_4i8_to_4i64_extract: 1777; AVX2: # %bb.0: 1778; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 1779; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1780; AVX2-NEXT: vzeroupper 1781; AVX2-NEXT: retq 1782; 1783; AVX512-LABEL: load_sext_4i8_to_4i64_extract: 1784; AVX512: # %bb.0: 1785; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0 1786; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 1787; AVX512-NEXT: vzeroupper 1788; AVX512-NEXT: retq 1789; 1790; X32-SSE41-LABEL: load_sext_4i8_to_4i64_extract: 1791; X32-SSE41: # %bb.0: 1792; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1793; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm0 1794; X32-SSE41-NEXT: retl 1795 %ld = load <4 x i8>, <4 x i8>* %ptr 1796 %sext = sext <4 x i8> %ld to <4 x i64> 1797 %extract = shufflevector <4 x i64> %sext, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 1798 ret <2 x i64> %extract 1799} 1800 1801define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { 1802; SSE2-LABEL: load_sext_8i1_to_8i16: 1803; SSE2: # %bb.0: # %entry 1804; SSE2-NEXT: movsbq (%rdi), %rax 1805; SSE2-NEXT: movq %rax, %rcx 1806; SSE2-NEXT: shrq $7, %rcx 1807; SSE2-NEXT: movd %ecx, %xmm0 1808; SSE2-NEXT: movq %rax, %rcx 1809; SSE2-NEXT: shlq $57, %rcx 1810; SSE2-NEXT: sarq $63, %rcx 1811; SSE2-NEXT: movd %ecx, %xmm2 1812; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1813; SSE2-NEXT: movq %rax, %rcx 1814; SSE2-NEXT: shlq $58, %rcx 1815; SSE2-NEXT: sarq $63, %rcx 1816; SSE2-NEXT: movd %ecx, %xmm0 1817; SSE2-NEXT: movq %rax, %rcx 1818; SSE2-NEXT: shlq $59, %rcx 1819; SSE2-NEXT: sarq $63, %rcx 1820; SSE2-NEXT: movd %ecx, %xmm1 1821; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1822; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1823; SSE2-NEXT: movq %rax, %rcx 1824; SSE2-NEXT: shlq $60, %rcx 1825; SSE2-NEXT: sarq $63, %rcx 1826; SSE2-NEXT: movd %ecx, %xmm0 1827; SSE2-NEXT: movq %rax, %rcx 1828; SSE2-NEXT: shlq $61, %rcx 1829; SSE2-NEXT: sarq $63, %rcx 1830; SSE2-NEXT: movd %ecx, %xmm2 1831; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1832; SSE2-NEXT: movq %rax, %rcx 1833; SSE2-NEXT: shlq $62, %rcx 1834; SSE2-NEXT: sarq $63, %rcx 1835; SSE2-NEXT: movd %ecx, %xmm3 1836; SSE2-NEXT: shlq $63, %rax 1837; SSE2-NEXT: sarq $63, %rax 1838; SSE2-NEXT: movd %eax, %xmm0 1839; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1840; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1841; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1842; SSE2-NEXT: retq 1843; 1844; SSSE3-LABEL: load_sext_8i1_to_8i16: 1845; SSSE3: # %bb.0: # %entry 1846; SSSE3-NEXT: movsbq (%rdi), %rax 1847; SSSE3-NEXT: movq %rax, %rcx 1848; SSSE3-NEXT: shrq $7, %rcx 1849; SSSE3-NEXT: movd %ecx, %xmm0 1850; SSSE3-NEXT: movq %rax, %rcx 1851; SSSE3-NEXT: shlq $57, %rcx 1852; SSSE3-NEXT: sarq $63, %rcx 1853; SSSE3-NEXT: movd %ecx, %xmm2 1854; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1855; SSSE3-NEXT: movq %rax, %rcx 1856; SSSE3-NEXT: shlq $58, %rcx 1857; SSSE3-NEXT: sarq $63, %rcx 1858; SSSE3-NEXT: movd %ecx, %xmm0 1859; SSSE3-NEXT: movq %rax, %rcx 1860; SSSE3-NEXT: shlq $59, %rcx 1861; SSSE3-NEXT: sarq $63, %rcx 1862; SSSE3-NEXT: movd %ecx, %xmm1 1863; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1864; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1865; SSSE3-NEXT: movq %rax, %rcx 1866; SSSE3-NEXT: shlq $60, %rcx 1867; SSSE3-NEXT: sarq $63, %rcx 1868; SSSE3-NEXT: movd %ecx, %xmm0 1869; SSSE3-NEXT: movq %rax, %rcx 1870; SSSE3-NEXT: shlq $61, %rcx 1871; SSSE3-NEXT: sarq $63, %rcx 1872; SSSE3-NEXT: movd %ecx, %xmm2 1873; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1874; SSSE3-NEXT: movq %rax, %rcx 1875; SSSE3-NEXT: shlq $62, %rcx 1876; SSSE3-NEXT: sarq $63, %rcx 1877; SSSE3-NEXT: movd %ecx, %xmm3 1878; SSSE3-NEXT: shlq $63, %rax 1879; SSSE3-NEXT: sarq $63, %rax 1880; SSSE3-NEXT: movd %eax, %xmm0 1881; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1882; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1883; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1884; SSSE3-NEXT: retq 1885; 1886; SSE41-LABEL: load_sext_8i1_to_8i16: 1887; SSE41: # %bb.0: # %entry 1888; SSE41-NEXT: movsbq (%rdi), %rax 1889; SSE41-NEXT: movq %rax, %rcx 1890; SSE41-NEXT: shlq $62, %rcx 1891; SSE41-NEXT: sarq $63, %rcx 1892; SSE41-NEXT: movq %rax, %rdx 1893; SSE41-NEXT: shlq $63, %rdx 1894; SSE41-NEXT: sarq $63, %rdx 1895; SSE41-NEXT: movd %edx, %xmm0 1896; SSE41-NEXT: pinsrw $1, %ecx, %xmm0 1897; SSE41-NEXT: movq %rax, %rcx 1898; SSE41-NEXT: shlq $61, %rcx 1899; SSE41-NEXT: sarq $63, %rcx 1900; SSE41-NEXT: pinsrw $2, %ecx, %xmm0 1901; SSE41-NEXT: movq %rax, %rcx 1902; SSE41-NEXT: shlq $60, %rcx 1903; SSE41-NEXT: sarq $63, %rcx 1904; SSE41-NEXT: pinsrw $3, %ecx, %xmm0 1905; SSE41-NEXT: movq %rax, %rcx 1906; SSE41-NEXT: shlq $59, %rcx 1907; SSE41-NEXT: sarq $63, %rcx 1908; SSE41-NEXT: pinsrw $4, %ecx, %xmm0 1909; SSE41-NEXT: movq %rax, %rcx 1910; SSE41-NEXT: shlq $58, %rcx 1911; SSE41-NEXT: sarq $63, %rcx 1912; SSE41-NEXT: pinsrw $5, %ecx, %xmm0 1913; SSE41-NEXT: movq %rax, %rcx 1914; SSE41-NEXT: shlq $57, %rcx 1915; SSE41-NEXT: sarq $63, %rcx 1916; SSE41-NEXT: pinsrw $6, %ecx, %xmm0 1917; SSE41-NEXT: shrq $7, %rax 1918; SSE41-NEXT: pinsrw $7, %eax, %xmm0 1919; SSE41-NEXT: retq 1920; 1921; AVX1-LABEL: load_sext_8i1_to_8i16: 1922; AVX1: # %bb.0: # %entry 1923; AVX1-NEXT: movsbq (%rdi), %rax 1924; AVX1-NEXT: movq %rax, %rcx 1925; AVX1-NEXT: shlq $62, %rcx 1926; AVX1-NEXT: sarq $63, %rcx 1927; AVX1-NEXT: movq %rax, %rdx 1928; AVX1-NEXT: shlq $63, %rdx 1929; AVX1-NEXT: sarq $63, %rdx 1930; AVX1-NEXT: vmovd %edx, %xmm0 1931; AVX1-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 1932; AVX1-NEXT: movq %rax, %rcx 1933; AVX1-NEXT: shlq $61, %rcx 1934; AVX1-NEXT: sarq $63, %rcx 1935; AVX1-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 1936; AVX1-NEXT: movq %rax, %rcx 1937; AVX1-NEXT: shlq $60, %rcx 1938; AVX1-NEXT: sarq $63, %rcx 1939; AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 1940; AVX1-NEXT: movq %rax, %rcx 1941; AVX1-NEXT: shlq $59, %rcx 1942; AVX1-NEXT: sarq $63, %rcx 1943; AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 1944; AVX1-NEXT: movq %rax, %rcx 1945; AVX1-NEXT: shlq $58, %rcx 1946; AVX1-NEXT: sarq $63, %rcx 1947; AVX1-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 1948; AVX1-NEXT: movq %rax, %rcx 1949; AVX1-NEXT: shlq $57, %rcx 1950; AVX1-NEXT: sarq $63, %rcx 1951; AVX1-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1952; AVX1-NEXT: shrq $7, %rax 1953; AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 1954; AVX1-NEXT: retq 1955; 1956; AVX2-LABEL: load_sext_8i1_to_8i16: 1957; AVX2: # %bb.0: # %entry 1958; AVX2-NEXT: movsbq (%rdi), %rax 1959; AVX2-NEXT: movq %rax, %rcx 1960; AVX2-NEXT: shlq $62, %rcx 1961; AVX2-NEXT: sarq $63, %rcx 1962; AVX2-NEXT: movq %rax, %rdx 1963; AVX2-NEXT: shlq $63, %rdx 1964; AVX2-NEXT: sarq $63, %rdx 1965; AVX2-NEXT: vmovd %edx, %xmm0 1966; AVX2-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 1967; AVX2-NEXT: movq %rax, %rcx 1968; AVX2-NEXT: shlq $61, %rcx 1969; AVX2-NEXT: sarq $63, %rcx 1970; AVX2-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 1971; AVX2-NEXT: movq %rax, %rcx 1972; AVX2-NEXT: shlq $60, %rcx 1973; AVX2-NEXT: sarq $63, %rcx 1974; AVX2-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 1975; AVX2-NEXT: movq %rax, %rcx 1976; AVX2-NEXT: shlq $59, %rcx 1977; AVX2-NEXT: sarq $63, %rcx 1978; AVX2-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 1979; AVX2-NEXT: movq %rax, %rcx 1980; AVX2-NEXT: shlq $58, %rcx 1981; AVX2-NEXT: sarq $63, %rcx 1982; AVX2-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 1983; AVX2-NEXT: movq %rax, %rcx 1984; AVX2-NEXT: shlq $57, %rcx 1985; AVX2-NEXT: sarq $63, %rcx 1986; AVX2-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1987; AVX2-NEXT: shrq $7, %rax 1988; AVX2-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 1989; AVX2-NEXT: retq 1990; 1991; AVX512F-LABEL: load_sext_8i1_to_8i16: 1992; AVX512F: # %bb.0: # %entry 1993; AVX512F-NEXT: movzbl (%rdi), %eax 1994; AVX512F-NEXT: kmovw %eax, %k1 1995; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1996; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 1997; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1998; AVX512F-NEXT: vzeroupper 1999; AVX512F-NEXT: retq 2000; 2001; AVX512BW-LABEL: load_sext_8i1_to_8i16: 2002; AVX512BW: # %bb.0: # %entry 2003; AVX512BW-NEXT: movzbl (%rdi), %eax 2004; AVX512BW-NEXT: kmovd %eax, %k0 2005; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 2006; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2007; AVX512BW-NEXT: vzeroupper 2008; AVX512BW-NEXT: retq 2009; 2010; X32-SSE41-LABEL: load_sext_8i1_to_8i16: 2011; X32-SSE41: # %bb.0: # %entry 2012; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2013; X32-SSE41-NEXT: movsbl (%eax), %eax 2014; X32-SSE41-NEXT: movl %eax, %ecx 2015; X32-SSE41-NEXT: shll $30, %ecx 2016; X32-SSE41-NEXT: sarl $31, %ecx 2017; X32-SSE41-NEXT: movl %eax, %edx 2018; X32-SSE41-NEXT: shll $31, %edx 2019; X32-SSE41-NEXT: sarl $31, %edx 2020; X32-SSE41-NEXT: movd %edx, %xmm0 2021; X32-SSE41-NEXT: pinsrw $1, %ecx, %xmm0 2022; X32-SSE41-NEXT: movl %eax, %ecx 2023; X32-SSE41-NEXT: shll $29, %ecx 2024; X32-SSE41-NEXT: sarl $31, %ecx 2025; X32-SSE41-NEXT: pinsrw $2, %ecx, %xmm0 2026; X32-SSE41-NEXT: movl %eax, %ecx 2027; X32-SSE41-NEXT: shll $28, %ecx 2028; X32-SSE41-NEXT: sarl $31, %ecx 2029; X32-SSE41-NEXT: pinsrw $3, %ecx, %xmm0 2030; X32-SSE41-NEXT: movl %eax, %ecx 2031; X32-SSE41-NEXT: shll $27, %ecx 2032; X32-SSE41-NEXT: sarl $31, %ecx 2033; X32-SSE41-NEXT: pinsrw $4, %ecx, %xmm0 2034; X32-SSE41-NEXT: movl %eax, %ecx 2035; X32-SSE41-NEXT: shll $26, %ecx 2036; X32-SSE41-NEXT: sarl $31, %ecx 2037; X32-SSE41-NEXT: pinsrw $5, %ecx, %xmm0 2038; X32-SSE41-NEXT: movl %eax, %ecx 2039; X32-SSE41-NEXT: shll $25, %ecx 2040; X32-SSE41-NEXT: sarl $31, %ecx 2041; X32-SSE41-NEXT: pinsrw $6, %ecx, %xmm0 2042; X32-SSE41-NEXT: shrl $7, %eax 2043; X32-SSE41-NEXT: pinsrw $7, %eax, %xmm0 2044; X32-SSE41-NEXT: retl 2045entry: 2046 %X = load <8 x i1>, <8 x i1>* %ptr 2047 %Y = sext <8 x i1> %X to <8 x i16> 2048 ret <8 x i16> %Y 2049} 2050 2051define <8 x i16> @load_sext_8i8_to_8i16(<8 x i8> *%ptr) { 2052; SSE2-LABEL: load_sext_8i8_to_8i16: 2053; SSE2: # %bb.0: # %entry 2054; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2055; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2056; SSE2-NEXT: psraw $8, %xmm0 2057; SSE2-NEXT: retq 2058; 2059; SSSE3-LABEL: load_sext_8i8_to_8i16: 2060; SSSE3: # %bb.0: # %entry 2061; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2062; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2063; SSSE3-NEXT: psraw $8, %xmm0 2064; SSSE3-NEXT: retq 2065; 2066; SSE41-LABEL: load_sext_8i8_to_8i16: 2067; SSE41: # %bb.0: # %entry 2068; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 2069; SSE41-NEXT: retq 2070; 2071; AVX-LABEL: load_sext_8i8_to_8i16: 2072; AVX: # %bb.0: # %entry 2073; AVX-NEXT: vpmovsxbw (%rdi), %xmm0 2074; AVX-NEXT: retq 2075; 2076; X32-SSE41-LABEL: load_sext_8i8_to_8i16: 2077; X32-SSE41: # %bb.0: # %entry 2078; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2079; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0 2080; X32-SSE41-NEXT: retl 2081entry: 2082 %X = load <8 x i8>, <8 x i8>* %ptr 2083 %Y = sext <8 x i8> %X to <8 x i16> 2084 ret <8 x i16> %Y 2085} 2086 2087define <8 x i64> @load_sext_8i8_to_8i64(<8 x i8> *%ptr) { 2088; SSE2-LABEL: load_sext_8i8_to_8i64: 2089; SSE2: # %bb.0: # %entry 2090; SSE2-NEXT: movsbq 1(%rdi), %rax 2091; SSE2-NEXT: movq %rax, %xmm1 2092; SSE2-NEXT: movsbq (%rdi), %rax 2093; SSE2-NEXT: movq %rax, %xmm0 2094; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2095; SSE2-NEXT: movsbq 3(%rdi), %rax 2096; SSE2-NEXT: movq %rax, %xmm2 2097; SSE2-NEXT: movsbq 2(%rdi), %rax 2098; SSE2-NEXT: movq %rax, %xmm1 2099; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 2100; SSE2-NEXT: movsbq 5(%rdi), %rax 2101; SSE2-NEXT: movq %rax, %xmm3 2102; SSE2-NEXT: movsbq 4(%rdi), %rax 2103; SSE2-NEXT: movq %rax, %xmm2 2104; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2105; SSE2-NEXT: movsbq 7(%rdi), %rax 2106; SSE2-NEXT: movq %rax, %xmm4 2107; SSE2-NEXT: movsbq 6(%rdi), %rax 2108; SSE2-NEXT: movq %rax, %xmm3 2109; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] 2110; SSE2-NEXT: retq 2111; 2112; SSSE3-LABEL: load_sext_8i8_to_8i64: 2113; SSSE3: # %bb.0: # %entry 2114; SSSE3-NEXT: movsbq 1(%rdi), %rax 2115; SSSE3-NEXT: movq %rax, %xmm1 2116; SSSE3-NEXT: movsbq (%rdi), %rax 2117; SSSE3-NEXT: movq %rax, %xmm0 2118; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2119; SSSE3-NEXT: movsbq 3(%rdi), %rax 2120; SSSE3-NEXT: movq %rax, %xmm2 2121; SSSE3-NEXT: movsbq 2(%rdi), %rax 2122; SSSE3-NEXT: movq %rax, %xmm1 2123; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 2124; SSSE3-NEXT: movsbq 5(%rdi), %rax 2125; SSSE3-NEXT: movq %rax, %xmm3 2126; SSSE3-NEXT: movsbq 4(%rdi), %rax 2127; SSSE3-NEXT: movq %rax, %xmm2 2128; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2129; SSSE3-NEXT: movsbq 7(%rdi), %rax 2130; SSSE3-NEXT: movq %rax, %xmm4 2131; SSSE3-NEXT: movsbq 6(%rdi), %rax 2132; SSSE3-NEXT: movq %rax, %xmm3 2133; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] 2134; SSSE3-NEXT: retq 2135; 2136; SSE41-LABEL: load_sext_8i8_to_8i64: 2137; SSE41: # %bb.0: # %entry 2138; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 2139; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 2140; SSE41-NEXT: pmovsxbq 4(%rdi), %xmm2 2141; SSE41-NEXT: pmovsxbq 6(%rdi), %xmm3 2142; SSE41-NEXT: retq 2143; 2144; AVX1-LABEL: load_sext_8i8_to_8i64: 2145; AVX1: # %bb.0: # %entry 2146; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0 2147; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 2148; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 2149; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 2150; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2151; AVX1-NEXT: vpmovsxbd 4(%rdi), %xmm1 2152; AVX1-NEXT: vpmovsxdq %xmm1, %xmm2 2153; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 2154; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 2155; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2156; AVX1-NEXT: retq 2157; 2158; AVX2-LABEL: load_sext_8i8_to_8i64: 2159; AVX2: # %bb.0: # %entry 2160; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 2161; AVX2-NEXT: vpmovsxbq 4(%rdi), %ymm1 2162; AVX2-NEXT: retq 2163; 2164; AVX512-LABEL: load_sext_8i8_to_8i64: 2165; AVX512: # %bb.0: # %entry 2166; AVX512-NEXT: vpmovsxbq (%rdi), %zmm0 2167; AVX512-NEXT: retq 2168; 2169; X32-SSE41-LABEL: load_sext_8i8_to_8i64: 2170; X32-SSE41: # %bb.0: # %entry 2171; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2172; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 2173; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 2174; X32-SSE41-NEXT: pmovsxbq 4(%eax), %xmm2 2175; X32-SSE41-NEXT: pmovsxbq 6(%eax), %xmm3 2176; X32-SSE41-NEXT: retl 2177entry: 2178 %X = load <8 x i8>, <8 x i8>* %ptr 2179 %Y = sext <8 x i8> %X to <8 x i64> 2180 ret <8 x i64> %Y 2181} 2182 2183define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { 2184; SSE2-LABEL: load_sext_8i1_to_8i32: 2185; SSE2: # %bb.0: # %entry 2186; SSE2-NEXT: movzbl (%rdi), %eax 2187; SSE2-NEXT: movl %eax, %ecx 2188; SSE2-NEXT: shrl $7, %ecx 2189; SSE2-NEXT: movd %ecx, %xmm0 2190; SSE2-NEXT: movl %eax, %ecx 2191; SSE2-NEXT: shrl $6, %ecx 2192; SSE2-NEXT: andl $1, %ecx 2193; SSE2-NEXT: movd %ecx, %xmm1 2194; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2195; SSE2-NEXT: movl %eax, %ecx 2196; SSE2-NEXT: shrl $5, %ecx 2197; SSE2-NEXT: andl $1, %ecx 2198; SSE2-NEXT: movd %ecx, %xmm0 2199; SSE2-NEXT: movl %eax, %ecx 2200; SSE2-NEXT: shrl $4, %ecx 2201; SSE2-NEXT: andl $1, %ecx 2202; SSE2-NEXT: movd %ecx, %xmm2 2203; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 2204; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2205; SSE2-NEXT: movl %eax, %ecx 2206; SSE2-NEXT: shrl $3, %ecx 2207; SSE2-NEXT: andl $1, %ecx 2208; SSE2-NEXT: movd %ecx, %xmm0 2209; SSE2-NEXT: movl %eax, %ecx 2210; SSE2-NEXT: shrl $2, %ecx 2211; SSE2-NEXT: andl $1, %ecx 2212; SSE2-NEXT: movd %ecx, %xmm3 2213; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] 2214; SSE2-NEXT: movl %eax, %ecx 2215; SSE2-NEXT: andl $1, %ecx 2216; SSE2-NEXT: movd %ecx, %xmm1 2217; SSE2-NEXT: shrl %eax 2218; SSE2-NEXT: andl $1, %eax 2219; SSE2-NEXT: movd %eax, %xmm0 2220; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2221; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 2222; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 2223; SSE2-NEXT: movdqa %xmm1, %xmm0 2224; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2225; SSE2-NEXT: pslld $31, %xmm0 2226; SSE2-NEXT: psrad $31, %xmm0 2227; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2228; SSE2-NEXT: pslld $31, %xmm1 2229; SSE2-NEXT: psrad $31, %xmm1 2230; SSE2-NEXT: retq 2231; 2232; SSSE3-LABEL: load_sext_8i1_to_8i32: 2233; SSSE3: # %bb.0: # %entry 2234; SSSE3-NEXT: movzbl (%rdi), %eax 2235; SSSE3-NEXT: movl %eax, %ecx 2236; SSSE3-NEXT: shrl $7, %ecx 2237; SSSE3-NEXT: movd %ecx, %xmm0 2238; SSSE3-NEXT: movl %eax, %ecx 2239; SSSE3-NEXT: shrl $6, %ecx 2240; SSSE3-NEXT: andl $1, %ecx 2241; SSSE3-NEXT: movd %ecx, %xmm1 2242; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2243; SSSE3-NEXT: movl %eax, %ecx 2244; SSSE3-NEXT: shrl $5, %ecx 2245; SSSE3-NEXT: andl $1, %ecx 2246; SSSE3-NEXT: movd %ecx, %xmm0 2247; SSSE3-NEXT: movl %eax, %ecx 2248; SSSE3-NEXT: shrl $4, %ecx 2249; SSSE3-NEXT: andl $1, %ecx 2250; SSSE3-NEXT: movd %ecx, %xmm2 2251; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 2252; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2253; SSSE3-NEXT: movl %eax, %ecx 2254; SSSE3-NEXT: shrl $3, %ecx 2255; SSSE3-NEXT: andl $1, %ecx 2256; SSSE3-NEXT: movd %ecx, %xmm0 2257; SSSE3-NEXT: movl %eax, %ecx 2258; SSSE3-NEXT: shrl $2, %ecx 2259; SSSE3-NEXT: andl $1, %ecx 2260; SSSE3-NEXT: movd %ecx, %xmm3 2261; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] 2262; SSSE3-NEXT: movl %eax, %ecx 2263; SSSE3-NEXT: andl $1, %ecx 2264; SSSE3-NEXT: movd %ecx, %xmm1 2265; SSSE3-NEXT: shrl %eax 2266; SSSE3-NEXT: andl $1, %eax 2267; SSSE3-NEXT: movd %eax, %xmm0 2268; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2269; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 2270; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 2271; SSSE3-NEXT: movdqa %xmm1, %xmm0 2272; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2273; SSSE3-NEXT: pslld $31, %xmm0 2274; SSSE3-NEXT: psrad $31, %xmm0 2275; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2276; SSSE3-NEXT: pslld $31, %xmm1 2277; SSSE3-NEXT: psrad $31, %xmm1 2278; SSSE3-NEXT: retq 2279; 2280; SSE41-LABEL: load_sext_8i1_to_8i32: 2281; SSE41: # %bb.0: # %entry 2282; SSE41-NEXT: movzbl (%rdi), %eax 2283; SSE41-NEXT: movl %eax, %ecx 2284; SSE41-NEXT: shrl %ecx 2285; SSE41-NEXT: andl $1, %ecx 2286; SSE41-NEXT: movl %eax, %edx 2287; SSE41-NEXT: andl $1, %edx 2288; SSE41-NEXT: movd %edx, %xmm1 2289; SSE41-NEXT: pinsrw $1, %ecx, %xmm1 2290; SSE41-NEXT: movl %eax, %ecx 2291; SSE41-NEXT: shrl $2, %ecx 2292; SSE41-NEXT: andl $1, %ecx 2293; SSE41-NEXT: pinsrw $2, %ecx, %xmm1 2294; SSE41-NEXT: movl %eax, %ecx 2295; SSE41-NEXT: shrl $3, %ecx 2296; SSE41-NEXT: andl $1, %ecx 2297; SSE41-NEXT: pinsrw $3, %ecx, %xmm1 2298; SSE41-NEXT: movl %eax, %ecx 2299; SSE41-NEXT: shrl $4, %ecx 2300; SSE41-NEXT: andl $1, %ecx 2301; SSE41-NEXT: pinsrw $4, %ecx, %xmm1 2302; SSE41-NEXT: movl %eax, %ecx 2303; SSE41-NEXT: shrl $5, %ecx 2304; SSE41-NEXT: andl $1, %ecx 2305; SSE41-NEXT: pinsrw $5, %ecx, %xmm1 2306; SSE41-NEXT: movl %eax, %ecx 2307; SSE41-NEXT: shrl $6, %ecx 2308; SSE41-NEXT: andl $1, %ecx 2309; SSE41-NEXT: pinsrw $6, %ecx, %xmm1 2310; SSE41-NEXT: shrl $7, %eax 2311; SSE41-NEXT: pinsrw $7, %eax, %xmm1 2312; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 2313; SSE41-NEXT: pslld $31, %xmm0 2314; SSE41-NEXT: psrad $31, %xmm0 2315; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2316; SSE41-NEXT: pslld $31, %xmm1 2317; SSE41-NEXT: psrad $31, %xmm1 2318; SSE41-NEXT: retq 2319; 2320; AVX1-LABEL: load_sext_8i1_to_8i32: 2321; AVX1: # %bb.0: # %entry 2322; AVX1-NEXT: movsbq (%rdi), %rax 2323; AVX1-NEXT: movq %rax, %rcx 2324; AVX1-NEXT: shlq $58, %rcx 2325; AVX1-NEXT: sarq $63, %rcx 2326; AVX1-NEXT: movq %rax, %rdx 2327; AVX1-NEXT: shlq $59, %rdx 2328; AVX1-NEXT: sarq $63, %rdx 2329; AVX1-NEXT: vmovd %edx, %xmm0 2330; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 2331; AVX1-NEXT: movq %rax, %rcx 2332; AVX1-NEXT: shlq $57, %rcx 2333; AVX1-NEXT: sarq $63, %rcx 2334; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 2335; AVX1-NEXT: movq %rax, %rcx 2336; AVX1-NEXT: shrq $7, %rcx 2337; AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 2338; AVX1-NEXT: movq %rax, %rcx 2339; AVX1-NEXT: shlq $62, %rcx 2340; AVX1-NEXT: sarq $63, %rcx 2341; AVX1-NEXT: movq %rax, %rdx 2342; AVX1-NEXT: shlq $63, %rdx 2343; AVX1-NEXT: sarq $63, %rdx 2344; AVX1-NEXT: vmovd %edx, %xmm1 2345; AVX1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 2346; AVX1-NEXT: movq %rax, %rcx 2347; AVX1-NEXT: shlq $61, %rcx 2348; AVX1-NEXT: sarq $63, %rcx 2349; AVX1-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 2350; AVX1-NEXT: shlq $60, %rax 2351; AVX1-NEXT: sarq $63, %rax 2352; AVX1-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 2353; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2354; AVX1-NEXT: retq 2355; 2356; AVX2-LABEL: load_sext_8i1_to_8i32: 2357; AVX2: # %bb.0: # %entry 2358; AVX2-NEXT: movsbq (%rdi), %rax 2359; AVX2-NEXT: movq %rax, %rcx 2360; AVX2-NEXT: shlq $58, %rcx 2361; AVX2-NEXT: sarq $63, %rcx 2362; AVX2-NEXT: movq %rax, %rdx 2363; AVX2-NEXT: shlq $59, %rdx 2364; AVX2-NEXT: sarq $63, %rdx 2365; AVX2-NEXT: vmovd %edx, %xmm0 2366; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 2367; AVX2-NEXT: movq %rax, %rcx 2368; AVX2-NEXT: shlq $57, %rcx 2369; AVX2-NEXT: sarq $63, %rcx 2370; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 2371; AVX2-NEXT: movq %rax, %rcx 2372; AVX2-NEXT: shrq $7, %rcx 2373; AVX2-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 2374; AVX2-NEXT: movq %rax, %rcx 2375; AVX2-NEXT: shlq $62, %rcx 2376; AVX2-NEXT: sarq $63, %rcx 2377; AVX2-NEXT: movq %rax, %rdx 2378; AVX2-NEXT: shlq $63, %rdx 2379; AVX2-NEXT: sarq $63, %rdx 2380; AVX2-NEXT: vmovd %edx, %xmm1 2381; AVX2-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 2382; AVX2-NEXT: movq %rax, %rcx 2383; AVX2-NEXT: shlq $61, %rcx 2384; AVX2-NEXT: sarq $63, %rcx 2385; AVX2-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 2386; AVX2-NEXT: shlq $60, %rax 2387; AVX2-NEXT: sarq $63, %rax 2388; AVX2-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 2389; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 2390; AVX2-NEXT: retq 2391; 2392; AVX512F-LABEL: load_sext_8i1_to_8i32: 2393; AVX512F: # %bb.0: # %entry 2394; AVX512F-NEXT: movzbl (%rdi), %eax 2395; AVX512F-NEXT: kmovw %eax, %k1 2396; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2397; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2398; AVX512F-NEXT: retq 2399; 2400; AVX512BW-LABEL: load_sext_8i1_to_8i32: 2401; AVX512BW: # %bb.0: # %entry 2402; AVX512BW-NEXT: movzbl (%rdi), %eax 2403; AVX512BW-NEXT: kmovd %eax, %k1 2404; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2405; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2406; AVX512BW-NEXT: retq 2407; 2408; X32-SSE41-LABEL: load_sext_8i1_to_8i32: 2409; X32-SSE41: # %bb.0: # %entry 2410; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2411; X32-SSE41-NEXT: movzbl (%eax), %eax 2412; X32-SSE41-NEXT: movl %eax, %ecx 2413; X32-SSE41-NEXT: shrl %ecx 2414; X32-SSE41-NEXT: andl $1, %ecx 2415; X32-SSE41-NEXT: movl %eax, %edx 2416; X32-SSE41-NEXT: andl $1, %edx 2417; X32-SSE41-NEXT: movd %edx, %xmm1 2418; X32-SSE41-NEXT: pinsrw $1, %ecx, %xmm1 2419; X32-SSE41-NEXT: movl %eax, %ecx 2420; X32-SSE41-NEXT: shrl $2, %ecx 2421; X32-SSE41-NEXT: andl $1, %ecx 2422; X32-SSE41-NEXT: pinsrw $2, %ecx, %xmm1 2423; X32-SSE41-NEXT: movl %eax, %ecx 2424; X32-SSE41-NEXT: shrl $3, %ecx 2425; X32-SSE41-NEXT: andl $1, %ecx 2426; X32-SSE41-NEXT: pinsrw $3, %ecx, %xmm1 2427; X32-SSE41-NEXT: movl %eax, %ecx 2428; X32-SSE41-NEXT: shrl $4, %ecx 2429; X32-SSE41-NEXT: andl $1, %ecx 2430; X32-SSE41-NEXT: pinsrw $4, %ecx, %xmm1 2431; X32-SSE41-NEXT: movl %eax, %ecx 2432; X32-SSE41-NEXT: shrl $5, %ecx 2433; X32-SSE41-NEXT: andl $1, %ecx 2434; X32-SSE41-NEXT: pinsrw $5, %ecx, %xmm1 2435; X32-SSE41-NEXT: movl %eax, %ecx 2436; X32-SSE41-NEXT: shrl $6, %ecx 2437; X32-SSE41-NEXT: andl $1, %ecx 2438; X32-SSE41-NEXT: pinsrw $6, %ecx, %xmm1 2439; X32-SSE41-NEXT: shrl $7, %eax 2440; X32-SSE41-NEXT: pinsrw $7, %eax, %xmm1 2441; X32-SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 2442; X32-SSE41-NEXT: pslld $31, %xmm0 2443; X32-SSE41-NEXT: psrad $31, %xmm0 2444; X32-SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2445; X32-SSE41-NEXT: pslld $31, %xmm1 2446; X32-SSE41-NEXT: psrad $31, %xmm1 2447; X32-SSE41-NEXT: retl 2448entry: 2449 %X = load <8 x i1>, <8 x i1>* %ptr 2450 %Y = sext <8 x i1> %X to <8 x i32> 2451 ret <8 x i32> %Y 2452} 2453 2454define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) { 2455; SSE2-LABEL: load_sext_8i8_to_8i32: 2456; SSE2: # %bb.0: # %entry 2457; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2458; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2459; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2460; SSE2-NEXT: psrad $24, %xmm0 2461; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2462; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2463; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 2464; SSE2-NEXT: psrad $24, %xmm1 2465; SSE2-NEXT: retq 2466; 2467; SSSE3-LABEL: load_sext_8i8_to_8i32: 2468; SSSE3: # %bb.0: # %entry 2469; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2470; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2471; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2472; SSSE3-NEXT: psrad $24, %xmm0 2473; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2474; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2475; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 2476; SSSE3-NEXT: psrad $24, %xmm1 2477; SSSE3-NEXT: retq 2478; 2479; SSE41-LABEL: load_sext_8i8_to_8i32: 2480; SSE41: # %bb.0: # %entry 2481; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 2482; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1 2483; SSE41-NEXT: retq 2484; 2485; AVX1-LABEL: load_sext_8i8_to_8i32: 2486; AVX1: # %bb.0: # %entry 2487; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0 2488; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 2489; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 2490; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 2491; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2492; AVX1-NEXT: retq 2493; 2494; AVX2-LABEL: load_sext_8i8_to_8i32: 2495; AVX2: # %bb.0: # %entry 2496; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0 2497; AVX2-NEXT: retq 2498; 2499; AVX512-LABEL: load_sext_8i8_to_8i32: 2500; AVX512: # %bb.0: # %entry 2501; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0 2502; AVX512-NEXT: retq 2503; 2504; X32-SSE41-LABEL: load_sext_8i8_to_8i32: 2505; X32-SSE41: # %bb.0: # %entry 2506; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2507; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0 2508; X32-SSE41-NEXT: pmovsxbd 4(%eax), %xmm1 2509; X32-SSE41-NEXT: retl 2510entry: 2511 %X = load <8 x i8>, <8 x i8>* %ptr 2512 %Y = sext <8 x i8> %X to <8 x i32> 2513 ret <8 x i32> %Y 2514} 2515 2516define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone { 2517; SSE2-LABEL: load_sext_16i1_to_16i8: 2518; SSE2: # %bb.0: # %entry 2519; SSE2-NEXT: pushq %rbp 2520; SSE2-NEXT: pushq %r15 2521; SSE2-NEXT: pushq %r14 2522; SSE2-NEXT: pushq %r13 2523; SSE2-NEXT: pushq %r12 2524; SSE2-NEXT: pushq %rbx 2525; SSE2-NEXT: movswq (%rdi), %rax 2526; SSE2-NEXT: movq %rax, %r8 2527; SSE2-NEXT: movq %rax, %r9 2528; SSE2-NEXT: movq %rax, %r10 2529; SSE2-NEXT: movq %rax, %r11 2530; SSE2-NEXT: movq %rax, %r14 2531; SSE2-NEXT: movq %rax, %r15 2532; SSE2-NEXT: movq %rax, %r12 2533; SSE2-NEXT: movq %rax, %r13 2534; SSE2-NEXT: movq %rax, %rbx 2535; SSE2-NEXT: movq %rax, %rcx 2536; SSE2-NEXT: movq %rax, %rdx 2537; SSE2-NEXT: movq %rax, %rsi 2538; SSE2-NEXT: movq %rax, %rdi 2539; SSE2-NEXT: movq %rax, %rbp 2540; SSE2-NEXT: shrq $15, %rbp 2541; SSE2-NEXT: movd %ebp, %xmm0 2542; SSE2-NEXT: movq %rax, %rbp 2543; SSE2-NEXT: movsbq %al, %rax 2544; SSE2-NEXT: shlq $49, %r8 2545; SSE2-NEXT: sarq $63, %r8 2546; SSE2-NEXT: movd %r8d, %xmm1 2547; SSE2-NEXT: shlq $50, %r9 2548; SSE2-NEXT: sarq $63, %r9 2549; SSE2-NEXT: movd %r9d, %xmm2 2550; SSE2-NEXT: shlq $51, %r10 2551; SSE2-NEXT: sarq $63, %r10 2552; SSE2-NEXT: movd %r10d, %xmm3 2553; SSE2-NEXT: shlq $52, %r11 2554; SSE2-NEXT: sarq $63, %r11 2555; SSE2-NEXT: movd %r11d, %xmm4 2556; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2557; SSE2-NEXT: shlq $53, %r14 2558; SSE2-NEXT: sarq $63, %r14 2559; SSE2-NEXT: movd %r14d, %xmm0 2560; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2561; SSE2-NEXT: shlq $54, %r15 2562; SSE2-NEXT: sarq $63, %r15 2563; SSE2-NEXT: movd %r15d, %xmm2 2564; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 2565; SSE2-NEXT: shlq $55, %r12 2566; SSE2-NEXT: sarq $63, %r12 2567; SSE2-NEXT: movd %r12d, %xmm1 2568; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 2569; SSE2-NEXT: shlq $60, %r13 2570; SSE2-NEXT: sarq $63, %r13 2571; SSE2-NEXT: movd %r13d, %xmm4 2572; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2573; SSE2-NEXT: shlq $61, %rbx 2574; SSE2-NEXT: sarq $63, %rbx 2575; SSE2-NEXT: movd %ebx, %xmm2 2576; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2577; SSE2-NEXT: shlq $62, %rcx 2578; SSE2-NEXT: sarq $63, %rcx 2579; SSE2-NEXT: movd %ecx, %xmm5 2580; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 2581; SSE2-NEXT: shlq $63, %rdx 2582; SSE2-NEXT: sarq $63, %rdx 2583; SSE2-NEXT: movd %edx, %xmm0 2584; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 2585; SSE2-NEXT: shlq $58, %rsi 2586; SSE2-NEXT: sarq $63, %rsi 2587; SSE2-NEXT: movd %esi, %xmm3 2588; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 2589; SSE2-NEXT: shlq $59, %rdi 2590; SSE2-NEXT: sarq $63, %rdi 2591; SSE2-NEXT: movd %edi, %xmm4 2592; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2593; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 2594; SSE2-NEXT: shlq $57, %rbp 2595; SSE2-NEXT: sarq $63, %rbp 2596; SSE2-NEXT: movd %ebp, %xmm2 2597; SSE2-NEXT: shrq $7, %rax 2598; SSE2-NEXT: movd %eax, %xmm3 2599; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] 2600; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 2601; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 2602; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2603; SSE2-NEXT: popq %rbx 2604; SSE2-NEXT: popq %r12 2605; SSE2-NEXT: popq %r13 2606; SSE2-NEXT: popq %r14 2607; SSE2-NEXT: popq %r15 2608; SSE2-NEXT: popq %rbp 2609; SSE2-NEXT: retq 2610; 2611; SSSE3-LABEL: load_sext_16i1_to_16i8: 2612; SSSE3: # %bb.0: # %entry 2613; SSSE3-NEXT: pushq %rbp 2614; SSSE3-NEXT: pushq %r15 2615; SSSE3-NEXT: pushq %r14 2616; SSSE3-NEXT: pushq %r13 2617; SSSE3-NEXT: pushq %r12 2618; SSSE3-NEXT: pushq %rbx 2619; SSSE3-NEXT: movswq (%rdi), %rax 2620; SSSE3-NEXT: movq %rax, %r8 2621; SSSE3-NEXT: movq %rax, %r9 2622; SSSE3-NEXT: movq %rax, %r10 2623; SSSE3-NEXT: movq %rax, %r11 2624; SSSE3-NEXT: movq %rax, %r14 2625; SSSE3-NEXT: movq %rax, %r15 2626; SSSE3-NEXT: movq %rax, %r12 2627; SSSE3-NEXT: movq %rax, %r13 2628; SSSE3-NEXT: movq %rax, %rbx 2629; SSSE3-NEXT: movq %rax, %rcx 2630; SSSE3-NEXT: movq %rax, %rdx 2631; SSSE3-NEXT: movq %rax, %rsi 2632; SSSE3-NEXT: movq %rax, %rdi 2633; SSSE3-NEXT: movq %rax, %rbp 2634; SSSE3-NEXT: shrq $15, %rbp 2635; SSSE3-NEXT: movd %ebp, %xmm0 2636; SSSE3-NEXT: movq %rax, %rbp 2637; SSSE3-NEXT: movsbq %al, %rax 2638; SSSE3-NEXT: shlq $49, %r8 2639; SSSE3-NEXT: sarq $63, %r8 2640; SSSE3-NEXT: movd %r8d, %xmm1 2641; SSSE3-NEXT: shlq $50, %r9 2642; SSSE3-NEXT: sarq $63, %r9 2643; SSSE3-NEXT: movd %r9d, %xmm2 2644; SSSE3-NEXT: shlq $51, %r10 2645; SSSE3-NEXT: sarq $63, %r10 2646; SSSE3-NEXT: movd %r10d, %xmm3 2647; SSSE3-NEXT: shlq $52, %r11 2648; SSSE3-NEXT: sarq $63, %r11 2649; SSSE3-NEXT: movd %r11d, %xmm4 2650; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2651; SSSE3-NEXT: shlq $53, %r14 2652; SSSE3-NEXT: sarq $63, %r14 2653; SSSE3-NEXT: movd %r14d, %xmm0 2654; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2655; SSSE3-NEXT: shlq $54, %r15 2656; SSSE3-NEXT: sarq $63, %r15 2657; SSSE3-NEXT: movd %r15d, %xmm2 2658; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 2659; SSSE3-NEXT: shlq $55, %r12 2660; SSSE3-NEXT: sarq $63, %r12 2661; SSSE3-NEXT: movd %r12d, %xmm1 2662; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 2663; SSSE3-NEXT: shlq $60, %r13 2664; SSSE3-NEXT: sarq $63, %r13 2665; SSSE3-NEXT: movd %r13d, %xmm4 2666; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2667; SSSE3-NEXT: shlq $61, %rbx 2668; SSSE3-NEXT: sarq $63, %rbx 2669; SSSE3-NEXT: movd %ebx, %xmm2 2670; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2671; SSSE3-NEXT: shlq $62, %rcx 2672; SSSE3-NEXT: sarq $63, %rcx 2673; SSSE3-NEXT: movd %ecx, %xmm5 2674; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 2675; SSSE3-NEXT: shlq $63, %rdx 2676; SSSE3-NEXT: sarq $63, %rdx 2677; SSSE3-NEXT: movd %edx, %xmm0 2678; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 2679; SSSE3-NEXT: shlq $58, %rsi 2680; SSSE3-NEXT: sarq $63, %rsi 2681; SSSE3-NEXT: movd %esi, %xmm3 2682; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 2683; SSSE3-NEXT: shlq $59, %rdi 2684; SSSE3-NEXT: sarq $63, %rdi 2685; SSSE3-NEXT: movd %edi, %xmm4 2686; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2687; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 2688; SSSE3-NEXT: shlq $57, %rbp 2689; SSSE3-NEXT: sarq $63, %rbp 2690; SSSE3-NEXT: movd %ebp, %xmm2 2691; SSSE3-NEXT: shrq $7, %rax 2692; SSSE3-NEXT: movd %eax, %xmm3 2693; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] 2694; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 2695; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 2696; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2697; SSSE3-NEXT: popq %rbx 2698; SSSE3-NEXT: popq %r12 2699; SSSE3-NEXT: popq %r13 2700; SSSE3-NEXT: popq %r14 2701; SSSE3-NEXT: popq %r15 2702; SSSE3-NEXT: popq %rbp 2703; SSSE3-NEXT: retq 2704; 2705; SSE41-LABEL: load_sext_16i1_to_16i8: 2706; SSE41: # %bb.0: # %entry 2707; SSE41-NEXT: movswq (%rdi), %rax 2708; SSE41-NEXT: movq %rax, %rcx 2709; SSE41-NEXT: shlq $62, %rcx 2710; SSE41-NEXT: sarq $63, %rcx 2711; SSE41-NEXT: movq %rax, %rdx 2712; SSE41-NEXT: shlq $63, %rdx 2713; SSE41-NEXT: sarq $63, %rdx 2714; SSE41-NEXT: movd %edx, %xmm0 2715; SSE41-NEXT: pinsrb $1, %ecx, %xmm0 2716; SSE41-NEXT: movq %rax, %rcx 2717; SSE41-NEXT: shlq $61, %rcx 2718; SSE41-NEXT: sarq $63, %rcx 2719; SSE41-NEXT: pinsrb $2, %ecx, %xmm0 2720; SSE41-NEXT: movq %rax, %rcx 2721; SSE41-NEXT: shlq $60, %rcx 2722; SSE41-NEXT: sarq $63, %rcx 2723; SSE41-NEXT: pinsrb $3, %ecx, %xmm0 2724; SSE41-NEXT: movq %rax, %rcx 2725; SSE41-NEXT: shlq $59, %rcx 2726; SSE41-NEXT: sarq $63, %rcx 2727; SSE41-NEXT: pinsrb $4, %ecx, %xmm0 2728; SSE41-NEXT: movq %rax, %rcx 2729; SSE41-NEXT: shlq $58, %rcx 2730; SSE41-NEXT: sarq $63, %rcx 2731; SSE41-NEXT: pinsrb $5, %ecx, %xmm0 2732; SSE41-NEXT: movq %rax, %rcx 2733; SSE41-NEXT: shlq $57, %rcx 2734; SSE41-NEXT: sarq $63, %rcx 2735; SSE41-NEXT: pinsrb $6, %ecx, %xmm0 2736; SSE41-NEXT: movsbq %al, %rcx 2737; SSE41-NEXT: shrq $7, %rcx 2738; SSE41-NEXT: pinsrb $7, %ecx, %xmm0 2739; SSE41-NEXT: movq %rax, %rcx 2740; SSE41-NEXT: shlq $55, %rcx 2741; SSE41-NEXT: sarq $63, %rcx 2742; SSE41-NEXT: pinsrb $8, %ecx, %xmm0 2743; SSE41-NEXT: movq %rax, %rcx 2744; SSE41-NEXT: shlq $54, %rcx 2745; SSE41-NEXT: sarq $63, %rcx 2746; SSE41-NEXT: pinsrb $9, %ecx, %xmm0 2747; SSE41-NEXT: movq %rax, %rcx 2748; SSE41-NEXT: shlq $53, %rcx 2749; SSE41-NEXT: sarq $63, %rcx 2750; SSE41-NEXT: pinsrb $10, %ecx, %xmm0 2751; SSE41-NEXT: movq %rax, %rcx 2752; SSE41-NEXT: shlq $52, %rcx 2753; SSE41-NEXT: sarq $63, %rcx 2754; SSE41-NEXT: pinsrb $11, %ecx, %xmm0 2755; SSE41-NEXT: movq %rax, %rcx 2756; SSE41-NEXT: shlq $51, %rcx 2757; SSE41-NEXT: sarq $63, %rcx 2758; SSE41-NEXT: pinsrb $12, %ecx, %xmm0 2759; SSE41-NEXT: movq %rax, %rcx 2760; SSE41-NEXT: shlq $50, %rcx 2761; SSE41-NEXT: sarq $63, %rcx 2762; SSE41-NEXT: pinsrb $13, %ecx, %xmm0 2763; SSE41-NEXT: movq %rax, %rcx 2764; SSE41-NEXT: shlq $49, %rcx 2765; SSE41-NEXT: sarq $63, %rcx 2766; SSE41-NEXT: pinsrb $14, %ecx, %xmm0 2767; SSE41-NEXT: shrq $15, %rax 2768; SSE41-NEXT: pinsrb $15, %eax, %xmm0 2769; SSE41-NEXT: retq 2770; 2771; AVX1-LABEL: load_sext_16i1_to_16i8: 2772; AVX1: # %bb.0: # %entry 2773; AVX1-NEXT: movswq (%rdi), %rax 2774; AVX1-NEXT: movq %rax, %rcx 2775; AVX1-NEXT: shlq $62, %rcx 2776; AVX1-NEXT: sarq $63, %rcx 2777; AVX1-NEXT: movq %rax, %rdx 2778; AVX1-NEXT: shlq $63, %rdx 2779; AVX1-NEXT: sarq $63, %rdx 2780; AVX1-NEXT: vmovd %edx, %xmm0 2781; AVX1-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 2782; AVX1-NEXT: movq %rax, %rcx 2783; AVX1-NEXT: shlq $61, %rcx 2784; AVX1-NEXT: sarq $63, %rcx 2785; AVX1-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 2786; AVX1-NEXT: movq %rax, %rcx 2787; AVX1-NEXT: shlq $60, %rcx 2788; AVX1-NEXT: sarq $63, %rcx 2789; AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 2790; AVX1-NEXT: movq %rax, %rcx 2791; AVX1-NEXT: shlq $59, %rcx 2792; AVX1-NEXT: sarq $63, %rcx 2793; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 2794; AVX1-NEXT: movq %rax, %rcx 2795; AVX1-NEXT: shlq $58, %rcx 2796; AVX1-NEXT: sarq $63, %rcx 2797; AVX1-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 2798; AVX1-NEXT: movq %rax, %rcx 2799; AVX1-NEXT: shlq $57, %rcx 2800; AVX1-NEXT: sarq $63, %rcx 2801; AVX1-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 2802; AVX1-NEXT: movsbq %al, %rcx 2803; AVX1-NEXT: shrq $7, %rcx 2804; AVX1-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 2805; AVX1-NEXT: movq %rax, %rcx 2806; AVX1-NEXT: shlq $55, %rcx 2807; AVX1-NEXT: sarq $63, %rcx 2808; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 2809; AVX1-NEXT: movq %rax, %rcx 2810; AVX1-NEXT: shlq $54, %rcx 2811; AVX1-NEXT: sarq $63, %rcx 2812; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 2813; AVX1-NEXT: movq %rax, %rcx 2814; AVX1-NEXT: shlq $53, %rcx 2815; AVX1-NEXT: sarq $63, %rcx 2816; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 2817; AVX1-NEXT: movq %rax, %rcx 2818; AVX1-NEXT: shlq $52, %rcx 2819; AVX1-NEXT: sarq $63, %rcx 2820; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 2821; AVX1-NEXT: movq %rax, %rcx 2822; AVX1-NEXT: shlq $51, %rcx 2823; AVX1-NEXT: sarq $63, %rcx 2824; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 2825; AVX1-NEXT: movq %rax, %rcx 2826; AVX1-NEXT: shlq $50, %rcx 2827; AVX1-NEXT: sarq $63, %rcx 2828; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 2829; AVX1-NEXT: movq %rax, %rcx 2830; AVX1-NEXT: shlq $49, %rcx 2831; AVX1-NEXT: sarq $63, %rcx 2832; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 2833; AVX1-NEXT: shrq $15, %rax 2834; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 2835; AVX1-NEXT: retq 2836; 2837; AVX2-LABEL: load_sext_16i1_to_16i8: 2838; AVX2: # %bb.0: # %entry 2839; AVX2-NEXT: movswq (%rdi), %rax 2840; AVX2-NEXT: movq %rax, %rcx 2841; AVX2-NEXT: shlq $62, %rcx 2842; AVX2-NEXT: sarq $63, %rcx 2843; AVX2-NEXT: movq %rax, %rdx 2844; AVX2-NEXT: shlq $63, %rdx 2845; AVX2-NEXT: sarq $63, %rdx 2846; AVX2-NEXT: vmovd %edx, %xmm0 2847; AVX2-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 2848; AVX2-NEXT: movq %rax, %rcx 2849; AVX2-NEXT: shlq $61, %rcx 2850; AVX2-NEXT: sarq $63, %rcx 2851; AVX2-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 2852; AVX2-NEXT: movq %rax, %rcx 2853; AVX2-NEXT: shlq $60, %rcx 2854; AVX2-NEXT: sarq $63, %rcx 2855; AVX2-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 2856; AVX2-NEXT: movq %rax, %rcx 2857; AVX2-NEXT: shlq $59, %rcx 2858; AVX2-NEXT: sarq $63, %rcx 2859; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 2860; AVX2-NEXT: movq %rax, %rcx 2861; AVX2-NEXT: shlq $58, %rcx 2862; AVX2-NEXT: sarq $63, %rcx 2863; AVX2-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 2864; AVX2-NEXT: movq %rax, %rcx 2865; AVX2-NEXT: shlq $57, %rcx 2866; AVX2-NEXT: sarq $63, %rcx 2867; AVX2-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 2868; AVX2-NEXT: movsbq %al, %rcx 2869; AVX2-NEXT: shrq $7, %rcx 2870; AVX2-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 2871; AVX2-NEXT: movq %rax, %rcx 2872; AVX2-NEXT: shlq $55, %rcx 2873; AVX2-NEXT: sarq $63, %rcx 2874; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 2875; AVX2-NEXT: movq %rax, %rcx 2876; AVX2-NEXT: shlq $54, %rcx 2877; AVX2-NEXT: sarq $63, %rcx 2878; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 2879; AVX2-NEXT: movq %rax, %rcx 2880; AVX2-NEXT: shlq $53, %rcx 2881; AVX2-NEXT: sarq $63, %rcx 2882; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 2883; AVX2-NEXT: movq %rax, %rcx 2884; AVX2-NEXT: shlq $52, %rcx 2885; AVX2-NEXT: sarq $63, %rcx 2886; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 2887; AVX2-NEXT: movq %rax, %rcx 2888; AVX2-NEXT: shlq $51, %rcx 2889; AVX2-NEXT: sarq $63, %rcx 2890; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 2891; AVX2-NEXT: movq %rax, %rcx 2892; AVX2-NEXT: shlq $50, %rcx 2893; AVX2-NEXT: sarq $63, %rcx 2894; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 2895; AVX2-NEXT: movq %rax, %rcx 2896; AVX2-NEXT: shlq $49, %rcx 2897; AVX2-NEXT: sarq $63, %rcx 2898; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 2899; AVX2-NEXT: shrq $15, %rax 2900; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 2901; AVX2-NEXT: retq 2902; 2903; AVX512F-LABEL: load_sext_16i1_to_16i8: 2904; AVX512F: # %bb.0: # %entry 2905; AVX512F-NEXT: kmovw (%rdi), %k1 2906; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2907; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2908; AVX512F-NEXT: vzeroupper 2909; AVX512F-NEXT: retq 2910; 2911; AVX512BW-LABEL: load_sext_16i1_to_16i8: 2912; AVX512BW: # %bb.0: # %entry 2913; AVX512BW-NEXT: kmovw (%rdi), %k0 2914; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 2915; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2916; AVX512BW-NEXT: vzeroupper 2917; AVX512BW-NEXT: retq 2918; 2919; X32-SSE41-LABEL: load_sext_16i1_to_16i8: 2920; X32-SSE41: # %bb.0: # %entry 2921; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2922; X32-SSE41-NEXT: movswl (%eax), %eax 2923; X32-SSE41-NEXT: movl %eax, %ecx 2924; X32-SSE41-NEXT: shll $30, %ecx 2925; X32-SSE41-NEXT: sarl $31, %ecx 2926; X32-SSE41-NEXT: movl %eax, %edx 2927; X32-SSE41-NEXT: shll $31, %edx 2928; X32-SSE41-NEXT: sarl $31, %edx 2929; X32-SSE41-NEXT: movd %edx, %xmm0 2930; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm0 2931; X32-SSE41-NEXT: movl %eax, %ecx 2932; X32-SSE41-NEXT: shll $29, %ecx 2933; X32-SSE41-NEXT: sarl $31, %ecx 2934; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm0 2935; X32-SSE41-NEXT: movl %eax, %ecx 2936; X32-SSE41-NEXT: shll $28, %ecx 2937; X32-SSE41-NEXT: sarl $31, %ecx 2938; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm0 2939; X32-SSE41-NEXT: movl %eax, %ecx 2940; X32-SSE41-NEXT: shll $27, %ecx 2941; X32-SSE41-NEXT: sarl $31, %ecx 2942; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm0 2943; X32-SSE41-NEXT: movl %eax, %ecx 2944; X32-SSE41-NEXT: shll $26, %ecx 2945; X32-SSE41-NEXT: sarl $31, %ecx 2946; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm0 2947; X32-SSE41-NEXT: movl %eax, %ecx 2948; X32-SSE41-NEXT: shll $25, %ecx 2949; X32-SSE41-NEXT: sarl $31, %ecx 2950; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm0 2951; X32-SSE41-NEXT: movsbl %al, %ecx 2952; X32-SSE41-NEXT: shrl $7, %ecx 2953; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm0 2954; X32-SSE41-NEXT: movl %eax, %ecx 2955; X32-SSE41-NEXT: shll $23, %ecx 2956; X32-SSE41-NEXT: sarl $31, %ecx 2957; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm0 2958; X32-SSE41-NEXT: movl %eax, %ecx 2959; X32-SSE41-NEXT: shll $22, %ecx 2960; X32-SSE41-NEXT: sarl $31, %ecx 2961; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm0 2962; X32-SSE41-NEXT: movl %eax, %ecx 2963; X32-SSE41-NEXT: shll $21, %ecx 2964; X32-SSE41-NEXT: sarl $31, %ecx 2965; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm0 2966; X32-SSE41-NEXT: movl %eax, %ecx 2967; X32-SSE41-NEXT: shll $20, %ecx 2968; X32-SSE41-NEXT: sarl $31, %ecx 2969; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm0 2970; X32-SSE41-NEXT: movl %eax, %ecx 2971; X32-SSE41-NEXT: shll $19, %ecx 2972; X32-SSE41-NEXT: sarl $31, %ecx 2973; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm0 2974; X32-SSE41-NEXT: movl %eax, %ecx 2975; X32-SSE41-NEXT: shll $18, %ecx 2976; X32-SSE41-NEXT: sarl $31, %ecx 2977; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm0 2978; X32-SSE41-NEXT: movl %eax, %ecx 2979; X32-SSE41-NEXT: shll $17, %ecx 2980; X32-SSE41-NEXT: sarl $31, %ecx 2981; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm0 2982; X32-SSE41-NEXT: shrl $15, %eax 2983; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm0 2984; X32-SSE41-NEXT: retl 2985entry: 2986 %X = load <16 x i1>, <16 x i1>* %ptr 2987 %Y = sext <16 x i1> %X to <16 x i8> 2988 ret <16 x i8> %Y 2989} 2990 2991define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { 2992; SSE2-LABEL: load_sext_16i1_to_16i16: 2993; SSE2: # %bb.0: # %entry 2994; SSE2-NEXT: movzwl (%rdi), %eax 2995; SSE2-NEXT: movl %eax, %ecx 2996; SSE2-NEXT: shrl $15, %ecx 2997; SSE2-NEXT: movd %ecx, %xmm0 2998; SSE2-NEXT: movl %eax, %ecx 2999; SSE2-NEXT: shrl $14, %ecx 3000; SSE2-NEXT: andl $1, %ecx 3001; SSE2-NEXT: movd %ecx, %xmm1 3002; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3003; SSE2-NEXT: movl %eax, %ecx 3004; SSE2-NEXT: shrl $13, %ecx 3005; SSE2-NEXT: andl $1, %ecx 3006; SSE2-NEXT: movd %ecx, %xmm0 3007; SSE2-NEXT: movl %eax, %ecx 3008; SSE2-NEXT: shrl $12, %ecx 3009; SSE2-NEXT: andl $1, %ecx 3010; SSE2-NEXT: movd %ecx, %xmm2 3011; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3012; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3013; SSE2-NEXT: movl %eax, %ecx 3014; SSE2-NEXT: shrl $11, %ecx 3015; SSE2-NEXT: andl $1, %ecx 3016; SSE2-NEXT: movd %ecx, %xmm0 3017; SSE2-NEXT: movl %eax, %ecx 3018; SSE2-NEXT: shrl $10, %ecx 3019; SSE2-NEXT: andl $1, %ecx 3020; SSE2-NEXT: movd %ecx, %xmm1 3021; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3022; SSE2-NEXT: movl %eax, %ecx 3023; SSE2-NEXT: shrl $9, %ecx 3024; SSE2-NEXT: andl $1, %ecx 3025; SSE2-NEXT: movd %ecx, %xmm3 3026; SSE2-NEXT: movl %eax, %ecx 3027; SSE2-NEXT: shrl $8, %ecx 3028; SSE2-NEXT: andl $1, %ecx 3029; SSE2-NEXT: movd %ecx, %xmm0 3030; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 3031; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3032; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3033; SSE2-NEXT: movl %eax, %ecx 3034; SSE2-NEXT: shrl $7, %ecx 3035; SSE2-NEXT: andl $1, %ecx 3036; SSE2-NEXT: movd %ecx, %xmm1 3037; SSE2-NEXT: movl %eax, %ecx 3038; SSE2-NEXT: shrl $6, %ecx 3039; SSE2-NEXT: andl $1, %ecx 3040; SSE2-NEXT: movd %ecx, %xmm2 3041; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 3042; SSE2-NEXT: movl %eax, %ecx 3043; SSE2-NEXT: shrl $5, %ecx 3044; SSE2-NEXT: andl $1, %ecx 3045; SSE2-NEXT: movd %ecx, %xmm1 3046; SSE2-NEXT: movl %eax, %ecx 3047; SSE2-NEXT: shrl $4, %ecx 3048; SSE2-NEXT: andl $1, %ecx 3049; SSE2-NEXT: movd %ecx, %xmm3 3050; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 3051; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3052; SSE2-NEXT: movl %eax, %ecx 3053; SSE2-NEXT: shrl $3, %ecx 3054; SSE2-NEXT: andl $1, %ecx 3055; SSE2-NEXT: movd %ecx, %xmm1 3056; SSE2-NEXT: movl %eax, %ecx 3057; SSE2-NEXT: shrl $2, %ecx 3058; SSE2-NEXT: andl $1, %ecx 3059; SSE2-NEXT: movd %ecx, %xmm2 3060; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 3061; SSE2-NEXT: movl %eax, %ecx 3062; SSE2-NEXT: andl $1, %ecx 3063; SSE2-NEXT: movd %ecx, %xmm1 3064; SSE2-NEXT: shrl %eax 3065; SSE2-NEXT: andl $1, %eax 3066; SSE2-NEXT: movd %eax, %xmm4 3067; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 3068; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3069; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 3070; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 3071; SSE2-NEXT: movdqa %xmm1, %xmm0 3072; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3073; SSE2-NEXT: psllw $15, %xmm0 3074; SSE2-NEXT: psraw $15, %xmm0 3075; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 3076; SSE2-NEXT: psllw $15, %xmm1 3077; SSE2-NEXT: psraw $15, %xmm1 3078; SSE2-NEXT: retq 3079; 3080; SSSE3-LABEL: load_sext_16i1_to_16i16: 3081; SSSE3: # %bb.0: # %entry 3082; SSSE3-NEXT: movzwl (%rdi), %eax 3083; SSSE3-NEXT: movl %eax, %ecx 3084; SSSE3-NEXT: shrl $15, %ecx 3085; SSSE3-NEXT: movd %ecx, %xmm0 3086; SSSE3-NEXT: movl %eax, %ecx 3087; SSSE3-NEXT: shrl $14, %ecx 3088; SSSE3-NEXT: andl $1, %ecx 3089; SSSE3-NEXT: movd %ecx, %xmm1 3090; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3091; SSSE3-NEXT: movl %eax, %ecx 3092; SSSE3-NEXT: shrl $13, %ecx 3093; SSSE3-NEXT: andl $1, %ecx 3094; SSSE3-NEXT: movd %ecx, %xmm0 3095; SSSE3-NEXT: movl %eax, %ecx 3096; SSSE3-NEXT: shrl $12, %ecx 3097; SSSE3-NEXT: andl $1, %ecx 3098; SSSE3-NEXT: movd %ecx, %xmm2 3099; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3100; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3101; SSSE3-NEXT: movl %eax, %ecx 3102; SSSE3-NEXT: shrl $11, %ecx 3103; SSSE3-NEXT: andl $1, %ecx 3104; SSSE3-NEXT: movd %ecx, %xmm0 3105; SSSE3-NEXT: movl %eax, %ecx 3106; SSSE3-NEXT: shrl $10, %ecx 3107; SSSE3-NEXT: andl $1, %ecx 3108; SSSE3-NEXT: movd %ecx, %xmm1 3109; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3110; SSSE3-NEXT: movl %eax, %ecx 3111; SSSE3-NEXT: shrl $9, %ecx 3112; SSSE3-NEXT: andl $1, %ecx 3113; SSSE3-NEXT: movd %ecx, %xmm3 3114; SSSE3-NEXT: movl %eax, %ecx 3115; SSSE3-NEXT: shrl $8, %ecx 3116; SSSE3-NEXT: andl $1, %ecx 3117; SSSE3-NEXT: movd %ecx, %xmm0 3118; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 3119; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3120; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3121; SSSE3-NEXT: movl %eax, %ecx 3122; SSSE3-NEXT: shrl $7, %ecx 3123; SSSE3-NEXT: andl $1, %ecx 3124; SSSE3-NEXT: movd %ecx, %xmm1 3125; SSSE3-NEXT: movl %eax, %ecx 3126; SSSE3-NEXT: shrl $6, %ecx 3127; SSSE3-NEXT: andl $1, %ecx 3128; SSSE3-NEXT: movd %ecx, %xmm2 3129; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 3130; SSSE3-NEXT: movl %eax, %ecx 3131; SSSE3-NEXT: shrl $5, %ecx 3132; SSSE3-NEXT: andl $1, %ecx 3133; SSSE3-NEXT: movd %ecx, %xmm1 3134; SSSE3-NEXT: movl %eax, %ecx 3135; SSSE3-NEXT: shrl $4, %ecx 3136; SSSE3-NEXT: andl $1, %ecx 3137; SSSE3-NEXT: movd %ecx, %xmm3 3138; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 3139; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3140; SSSE3-NEXT: movl %eax, %ecx 3141; SSSE3-NEXT: shrl $3, %ecx 3142; SSSE3-NEXT: andl $1, %ecx 3143; SSSE3-NEXT: movd %ecx, %xmm1 3144; SSSE3-NEXT: movl %eax, %ecx 3145; SSSE3-NEXT: shrl $2, %ecx 3146; SSSE3-NEXT: andl $1, %ecx 3147; SSSE3-NEXT: movd %ecx, %xmm2 3148; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 3149; SSSE3-NEXT: movl %eax, %ecx 3150; SSSE3-NEXT: andl $1, %ecx 3151; SSSE3-NEXT: movd %ecx, %xmm1 3152; SSSE3-NEXT: shrl %eax 3153; SSSE3-NEXT: andl $1, %eax 3154; SSSE3-NEXT: movd %eax, %xmm4 3155; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 3156; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3157; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 3158; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 3159; SSSE3-NEXT: movdqa %xmm1, %xmm0 3160; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3161; SSSE3-NEXT: psllw $15, %xmm0 3162; SSSE3-NEXT: psraw $15, %xmm0 3163; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 3164; SSSE3-NEXT: psllw $15, %xmm1 3165; SSSE3-NEXT: psraw $15, %xmm1 3166; SSSE3-NEXT: retq 3167; 3168; SSE41-LABEL: load_sext_16i1_to_16i16: 3169; SSE41: # %bb.0: # %entry 3170; SSE41-NEXT: movzwl (%rdi), %eax 3171; SSE41-NEXT: movl %eax, %ecx 3172; SSE41-NEXT: shrl %ecx 3173; SSE41-NEXT: andl $1, %ecx 3174; SSE41-NEXT: movl %eax, %edx 3175; SSE41-NEXT: andl $1, %edx 3176; SSE41-NEXT: movd %edx, %xmm1 3177; SSE41-NEXT: pinsrb $1, %ecx, %xmm1 3178; SSE41-NEXT: movl %eax, %ecx 3179; SSE41-NEXT: shrl $2, %ecx 3180; SSE41-NEXT: andl $1, %ecx 3181; SSE41-NEXT: pinsrb $2, %ecx, %xmm1 3182; SSE41-NEXT: movl %eax, %ecx 3183; SSE41-NEXT: shrl $3, %ecx 3184; SSE41-NEXT: andl $1, %ecx 3185; SSE41-NEXT: pinsrb $3, %ecx, %xmm1 3186; SSE41-NEXT: movl %eax, %ecx 3187; SSE41-NEXT: shrl $4, %ecx 3188; SSE41-NEXT: andl $1, %ecx 3189; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 3190; SSE41-NEXT: movl %eax, %ecx 3191; SSE41-NEXT: shrl $5, %ecx 3192; SSE41-NEXT: andl $1, %ecx 3193; SSE41-NEXT: pinsrb $5, %ecx, %xmm1 3194; SSE41-NEXT: movl %eax, %ecx 3195; SSE41-NEXT: shrl $6, %ecx 3196; SSE41-NEXT: andl $1, %ecx 3197; SSE41-NEXT: pinsrb $6, %ecx, %xmm1 3198; SSE41-NEXT: movl %eax, %ecx 3199; SSE41-NEXT: shrl $7, %ecx 3200; SSE41-NEXT: andl $1, %ecx 3201; SSE41-NEXT: pinsrb $7, %ecx, %xmm1 3202; SSE41-NEXT: movl %eax, %ecx 3203; SSE41-NEXT: shrl $8, %ecx 3204; SSE41-NEXT: andl $1, %ecx 3205; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 3206; SSE41-NEXT: movl %eax, %ecx 3207; SSE41-NEXT: shrl $9, %ecx 3208; SSE41-NEXT: andl $1, %ecx 3209; SSE41-NEXT: pinsrb $9, %ecx, %xmm1 3210; SSE41-NEXT: movl %eax, %ecx 3211; SSE41-NEXT: shrl $10, %ecx 3212; SSE41-NEXT: andl $1, %ecx 3213; SSE41-NEXT: pinsrb $10, %ecx, %xmm1 3214; SSE41-NEXT: movl %eax, %ecx 3215; SSE41-NEXT: shrl $11, %ecx 3216; SSE41-NEXT: andl $1, %ecx 3217; SSE41-NEXT: pinsrb $11, %ecx, %xmm1 3218; SSE41-NEXT: movl %eax, %ecx 3219; SSE41-NEXT: shrl $12, %ecx 3220; SSE41-NEXT: andl $1, %ecx 3221; SSE41-NEXT: pinsrb $12, %ecx, %xmm1 3222; SSE41-NEXT: movl %eax, %ecx 3223; SSE41-NEXT: shrl $13, %ecx 3224; SSE41-NEXT: andl $1, %ecx 3225; SSE41-NEXT: pinsrb $13, %ecx, %xmm1 3226; SSE41-NEXT: movl %eax, %ecx 3227; SSE41-NEXT: shrl $14, %ecx 3228; SSE41-NEXT: andl $1, %ecx 3229; SSE41-NEXT: pinsrb $14, %ecx, %xmm1 3230; SSE41-NEXT: shrl $15, %eax 3231; SSE41-NEXT: pinsrb $15, %eax, %xmm1 3232; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 3233; SSE41-NEXT: psllw $15, %xmm0 3234; SSE41-NEXT: psraw $15, %xmm0 3235; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 3236; SSE41-NEXT: psllw $15, %xmm1 3237; SSE41-NEXT: psraw $15, %xmm1 3238; SSE41-NEXT: retq 3239; 3240; AVX1-LABEL: load_sext_16i1_to_16i16: 3241; AVX1: # %bb.0: # %entry 3242; AVX1-NEXT: pushq %rbp 3243; AVX1-NEXT: .cfi_def_cfa_offset 16 3244; AVX1-NEXT: pushq %r15 3245; AVX1-NEXT: .cfi_def_cfa_offset 24 3246; AVX1-NEXT: pushq %r14 3247; AVX1-NEXT: .cfi_def_cfa_offset 32 3248; AVX1-NEXT: pushq %r13 3249; AVX1-NEXT: .cfi_def_cfa_offset 40 3250; AVX1-NEXT: pushq %r12 3251; AVX1-NEXT: .cfi_def_cfa_offset 48 3252; AVX1-NEXT: pushq %rbx 3253; AVX1-NEXT: .cfi_def_cfa_offset 56 3254; AVX1-NEXT: .cfi_offset %rbx, -56 3255; AVX1-NEXT: .cfi_offset %r12, -48 3256; AVX1-NEXT: .cfi_offset %r13, -40 3257; AVX1-NEXT: .cfi_offset %r14, -32 3258; AVX1-NEXT: .cfi_offset %r15, -24 3259; AVX1-NEXT: .cfi_offset %rbp, -16 3260; AVX1-NEXT: movswq (%rdi), %rax 3261; AVX1-NEXT: movq %rax, %rcx 3262; AVX1-NEXT: shlq $55, %rcx 3263; AVX1-NEXT: sarq $63, %rcx 3264; AVX1-NEXT: vmovd %ecx, %xmm0 3265; AVX1-NEXT: movq %rax, %r8 3266; AVX1-NEXT: movq %rax, %r10 3267; AVX1-NEXT: movq %rax, %r11 3268; AVX1-NEXT: movq %rax, %r14 3269; AVX1-NEXT: movq %rax, %r15 3270; AVX1-NEXT: movq %rax, %r9 3271; AVX1-NEXT: movq %rax, %r12 3272; AVX1-NEXT: movq %rax, %r13 3273; AVX1-NEXT: movq %rax, %rbx 3274; AVX1-NEXT: movq %rax, %rdi 3275; AVX1-NEXT: movq %rax, %rcx 3276; AVX1-NEXT: movq %rax, %rdx 3277; AVX1-NEXT: movq %rax, %rsi 3278; AVX1-NEXT: movsbq %al, %rbp 3279; AVX1-NEXT: shlq $54, %rax 3280; AVX1-NEXT: sarq $63, %rax 3281; AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 3282; AVX1-NEXT: shlq $53, %r8 3283; AVX1-NEXT: sarq $63, %r8 3284; AVX1-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0 3285; AVX1-NEXT: shlq $52, %r10 3286; AVX1-NEXT: sarq $63, %r10 3287; AVX1-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0 3288; AVX1-NEXT: shlq $51, %r11 3289; AVX1-NEXT: sarq $63, %r11 3290; AVX1-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0 3291; AVX1-NEXT: shlq $50, %r14 3292; AVX1-NEXT: sarq $63, %r14 3293; AVX1-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 3294; AVX1-NEXT: shlq $49, %r15 3295; AVX1-NEXT: sarq $63, %r15 3296; AVX1-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0 3297; AVX1-NEXT: shrq $15, %r9 3298; AVX1-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0 3299; AVX1-NEXT: shlq $63, %r13 3300; AVX1-NEXT: sarq $63, %r13 3301; AVX1-NEXT: vmovd %r13d, %xmm1 3302; AVX1-NEXT: shlq $62, %r12 3303; AVX1-NEXT: sarq $63, %r12 3304; AVX1-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1 3305; AVX1-NEXT: shlq $61, %rbx 3306; AVX1-NEXT: sarq $63, %rbx 3307; AVX1-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1 3308; AVX1-NEXT: shlq $60, %rdi 3309; AVX1-NEXT: sarq $63, %rdi 3310; AVX1-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1 3311; AVX1-NEXT: shlq $59, %rcx 3312; AVX1-NEXT: sarq $63, %rcx 3313; AVX1-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 3314; AVX1-NEXT: shlq $58, %rdx 3315; AVX1-NEXT: sarq $63, %rdx 3316; AVX1-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1 3317; AVX1-NEXT: shlq $57, %rsi 3318; AVX1-NEXT: sarq $63, %rsi 3319; AVX1-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1 3320; AVX1-NEXT: shrq $7, %rbp 3321; AVX1-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1 3322; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3323; AVX1-NEXT: popq %rbx 3324; AVX1-NEXT: .cfi_def_cfa_offset 48 3325; AVX1-NEXT: popq %r12 3326; AVX1-NEXT: .cfi_def_cfa_offset 40 3327; AVX1-NEXT: popq %r13 3328; AVX1-NEXT: .cfi_def_cfa_offset 32 3329; AVX1-NEXT: popq %r14 3330; AVX1-NEXT: .cfi_def_cfa_offset 24 3331; AVX1-NEXT: popq %r15 3332; AVX1-NEXT: .cfi_def_cfa_offset 16 3333; AVX1-NEXT: popq %rbp 3334; AVX1-NEXT: .cfi_def_cfa_offset 8 3335; AVX1-NEXT: retq 3336; 3337; AVX2-LABEL: load_sext_16i1_to_16i16: 3338; AVX2: # %bb.0: # %entry 3339; AVX2-NEXT: pushq %rbp 3340; AVX2-NEXT: .cfi_def_cfa_offset 16 3341; AVX2-NEXT: pushq %r15 3342; AVX2-NEXT: .cfi_def_cfa_offset 24 3343; AVX2-NEXT: pushq %r14 3344; AVX2-NEXT: .cfi_def_cfa_offset 32 3345; AVX2-NEXT: pushq %r13 3346; AVX2-NEXT: .cfi_def_cfa_offset 40 3347; AVX2-NEXT: pushq %r12 3348; AVX2-NEXT: .cfi_def_cfa_offset 48 3349; AVX2-NEXT: pushq %rbx 3350; AVX2-NEXT: .cfi_def_cfa_offset 56 3351; AVX2-NEXT: .cfi_offset %rbx, -56 3352; AVX2-NEXT: .cfi_offset %r12, -48 3353; AVX2-NEXT: .cfi_offset %r13, -40 3354; AVX2-NEXT: .cfi_offset %r14, -32 3355; AVX2-NEXT: .cfi_offset %r15, -24 3356; AVX2-NEXT: .cfi_offset %rbp, -16 3357; AVX2-NEXT: movswq (%rdi), %rax 3358; AVX2-NEXT: movq %rax, %rcx 3359; AVX2-NEXT: shlq $55, %rcx 3360; AVX2-NEXT: sarq $63, %rcx 3361; AVX2-NEXT: vmovd %ecx, %xmm0 3362; AVX2-NEXT: movq %rax, %r8 3363; AVX2-NEXT: movq %rax, %r10 3364; AVX2-NEXT: movq %rax, %r11 3365; AVX2-NEXT: movq %rax, %r14 3366; AVX2-NEXT: movq %rax, %r15 3367; AVX2-NEXT: movq %rax, %r9 3368; AVX2-NEXT: movq %rax, %r12 3369; AVX2-NEXT: movq %rax, %r13 3370; AVX2-NEXT: movq %rax, %rbx 3371; AVX2-NEXT: movq %rax, %rdi 3372; AVX2-NEXT: movq %rax, %rcx 3373; AVX2-NEXT: movq %rax, %rdx 3374; AVX2-NEXT: movq %rax, %rsi 3375; AVX2-NEXT: movsbq %al, %rbp 3376; AVX2-NEXT: shlq $54, %rax 3377; AVX2-NEXT: sarq $63, %rax 3378; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 3379; AVX2-NEXT: shlq $53, %r8 3380; AVX2-NEXT: sarq $63, %r8 3381; AVX2-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0 3382; AVX2-NEXT: shlq $52, %r10 3383; AVX2-NEXT: sarq $63, %r10 3384; AVX2-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0 3385; AVX2-NEXT: shlq $51, %r11 3386; AVX2-NEXT: sarq $63, %r11 3387; AVX2-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0 3388; AVX2-NEXT: shlq $50, %r14 3389; AVX2-NEXT: sarq $63, %r14 3390; AVX2-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 3391; AVX2-NEXT: shlq $49, %r15 3392; AVX2-NEXT: sarq $63, %r15 3393; AVX2-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0 3394; AVX2-NEXT: shrq $15, %r9 3395; AVX2-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0 3396; AVX2-NEXT: shlq $63, %r13 3397; AVX2-NEXT: sarq $63, %r13 3398; AVX2-NEXT: vmovd %r13d, %xmm1 3399; AVX2-NEXT: shlq $62, %r12 3400; AVX2-NEXT: sarq $63, %r12 3401; AVX2-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1 3402; AVX2-NEXT: shlq $61, %rbx 3403; AVX2-NEXT: sarq $63, %rbx 3404; AVX2-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1 3405; AVX2-NEXT: shlq $60, %rdi 3406; AVX2-NEXT: sarq $63, %rdi 3407; AVX2-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1 3408; AVX2-NEXT: shlq $59, %rcx 3409; AVX2-NEXT: sarq $63, %rcx 3410; AVX2-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 3411; AVX2-NEXT: shlq $58, %rdx 3412; AVX2-NEXT: sarq $63, %rdx 3413; AVX2-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1 3414; AVX2-NEXT: shlq $57, %rsi 3415; AVX2-NEXT: sarq $63, %rsi 3416; AVX2-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1 3417; AVX2-NEXT: shrq $7, %rbp 3418; AVX2-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1 3419; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 3420; AVX2-NEXT: popq %rbx 3421; AVX2-NEXT: .cfi_def_cfa_offset 48 3422; AVX2-NEXT: popq %r12 3423; AVX2-NEXT: .cfi_def_cfa_offset 40 3424; AVX2-NEXT: popq %r13 3425; AVX2-NEXT: .cfi_def_cfa_offset 32 3426; AVX2-NEXT: popq %r14 3427; AVX2-NEXT: .cfi_def_cfa_offset 24 3428; AVX2-NEXT: popq %r15 3429; AVX2-NEXT: .cfi_def_cfa_offset 16 3430; AVX2-NEXT: popq %rbp 3431; AVX2-NEXT: .cfi_def_cfa_offset 8 3432; AVX2-NEXT: retq 3433; 3434; AVX512F-LABEL: load_sext_16i1_to_16i16: 3435; AVX512F: # %bb.0: # %entry 3436; AVX512F-NEXT: kmovw (%rdi), %k1 3437; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 3438; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 3439; AVX512F-NEXT: retq 3440; 3441; AVX512BW-LABEL: load_sext_16i1_to_16i16: 3442; AVX512BW: # %bb.0: # %entry 3443; AVX512BW-NEXT: kmovw (%rdi), %k0 3444; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 3445; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3446; AVX512BW-NEXT: retq 3447; 3448; X32-SSE41-LABEL: load_sext_16i1_to_16i16: 3449; X32-SSE41: # %bb.0: # %entry 3450; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3451; X32-SSE41-NEXT: movzwl (%eax), %eax 3452; X32-SSE41-NEXT: movl %eax, %ecx 3453; X32-SSE41-NEXT: shrl %ecx 3454; X32-SSE41-NEXT: andl $1, %ecx 3455; X32-SSE41-NEXT: movl %eax, %edx 3456; X32-SSE41-NEXT: andl $1, %edx 3457; X32-SSE41-NEXT: movd %edx, %xmm1 3458; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm1 3459; X32-SSE41-NEXT: movl %eax, %ecx 3460; X32-SSE41-NEXT: shrl $2, %ecx 3461; X32-SSE41-NEXT: andl $1, %ecx 3462; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm1 3463; X32-SSE41-NEXT: movl %eax, %ecx 3464; X32-SSE41-NEXT: shrl $3, %ecx 3465; X32-SSE41-NEXT: andl $1, %ecx 3466; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm1 3467; X32-SSE41-NEXT: movl %eax, %ecx 3468; X32-SSE41-NEXT: shrl $4, %ecx 3469; X32-SSE41-NEXT: andl $1, %ecx 3470; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 3471; X32-SSE41-NEXT: movl %eax, %ecx 3472; X32-SSE41-NEXT: shrl $5, %ecx 3473; X32-SSE41-NEXT: andl $1, %ecx 3474; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm1 3475; X32-SSE41-NEXT: movl %eax, %ecx 3476; X32-SSE41-NEXT: shrl $6, %ecx 3477; X32-SSE41-NEXT: andl $1, %ecx 3478; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm1 3479; X32-SSE41-NEXT: movl %eax, %ecx 3480; X32-SSE41-NEXT: shrl $7, %ecx 3481; X32-SSE41-NEXT: andl $1, %ecx 3482; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm1 3483; X32-SSE41-NEXT: movl %eax, %ecx 3484; X32-SSE41-NEXT: shrl $8, %ecx 3485; X32-SSE41-NEXT: andl $1, %ecx 3486; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 3487; X32-SSE41-NEXT: movl %eax, %ecx 3488; X32-SSE41-NEXT: shrl $9, %ecx 3489; X32-SSE41-NEXT: andl $1, %ecx 3490; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm1 3491; X32-SSE41-NEXT: movl %eax, %ecx 3492; X32-SSE41-NEXT: shrl $10, %ecx 3493; X32-SSE41-NEXT: andl $1, %ecx 3494; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm1 3495; X32-SSE41-NEXT: movl %eax, %ecx 3496; X32-SSE41-NEXT: shrl $11, %ecx 3497; X32-SSE41-NEXT: andl $1, %ecx 3498; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm1 3499; X32-SSE41-NEXT: movl %eax, %ecx 3500; X32-SSE41-NEXT: shrl $12, %ecx 3501; X32-SSE41-NEXT: andl $1, %ecx 3502; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm1 3503; X32-SSE41-NEXT: movl %eax, %ecx 3504; X32-SSE41-NEXT: shrl $13, %ecx 3505; X32-SSE41-NEXT: andl $1, %ecx 3506; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm1 3507; X32-SSE41-NEXT: movl %eax, %ecx 3508; X32-SSE41-NEXT: shrl $14, %ecx 3509; X32-SSE41-NEXT: andl $1, %ecx 3510; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm1 3511; X32-SSE41-NEXT: shrl $15, %eax 3512; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm1 3513; X32-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 3514; X32-SSE41-NEXT: psllw $15, %xmm0 3515; X32-SSE41-NEXT: psraw $15, %xmm0 3516; X32-SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 3517; X32-SSE41-NEXT: psllw $15, %xmm1 3518; X32-SSE41-NEXT: psraw $15, %xmm1 3519; X32-SSE41-NEXT: retl 3520entry: 3521 %X = load <16 x i1>, <16 x i1>* %ptr 3522 %Y = sext <16 x i1> %X to <16 x i16> 3523 ret <16 x i16> %Y 3524} 3525 3526define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone { 3527; SSE2-LABEL: load_sext_32i1_to_32i8: 3528; SSE2: # %bb.0: # %entry 3529; SSE2-NEXT: pushq %rbp 3530; SSE2-NEXT: pushq %r15 3531; SSE2-NEXT: pushq %r14 3532; SSE2-NEXT: pushq %r13 3533; SSE2-NEXT: pushq %r12 3534; SSE2-NEXT: pushq %rbx 3535; SSE2-NEXT: movswq (%rdi), %rax 3536; SSE2-NEXT: movq %rax, %r10 3537; SSE2-NEXT: movq %rax, %r8 3538; SSE2-NEXT: movq %rax, %r9 3539; SSE2-NEXT: movq %rax, %r11 3540; SSE2-NEXT: movq %rax, %r14 3541; SSE2-NEXT: movq %rax, %r15 3542; SSE2-NEXT: movq %rax, %r12 3543; SSE2-NEXT: movq %rax, %r13 3544; SSE2-NEXT: movq %rax, %rdx 3545; SSE2-NEXT: movq %rax, %rsi 3546; SSE2-NEXT: movq %rax, %rcx 3547; SSE2-NEXT: movq %rax, %rbp 3548; SSE2-NEXT: movq %rax, %rbx 3549; SSE2-NEXT: shrq $15, %rbx 3550; SSE2-NEXT: movd %ebx, %xmm0 3551; SSE2-NEXT: movq %rax, %rbx 3552; SSE2-NEXT: shlq $49, %r10 3553; SSE2-NEXT: sarq $63, %r10 3554; SSE2-NEXT: movd %r10d, %xmm15 3555; SSE2-NEXT: movq %rax, %r10 3556; SSE2-NEXT: movsbq %al, %rax 3557; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] 3558; SSE2-NEXT: shlq $50, %r8 3559; SSE2-NEXT: sarq $63, %r8 3560; SSE2-NEXT: movd %r8d, %xmm8 3561; SSE2-NEXT: shlq $51, %r9 3562; SSE2-NEXT: sarq $63, %r9 3563; SSE2-NEXT: movd %r9d, %xmm3 3564; SSE2-NEXT: shlq $52, %r11 3565; SSE2-NEXT: sarq $63, %r11 3566; SSE2-NEXT: movd %r11d, %xmm9 3567; SSE2-NEXT: shlq $53, %r14 3568; SSE2-NEXT: sarq $63, %r14 3569; SSE2-NEXT: movd %r14d, %xmm6 3570; SSE2-NEXT: shlq $54, %r15 3571; SSE2-NEXT: sarq $63, %r15 3572; SSE2-NEXT: movd %r15d, %xmm10 3573; SSE2-NEXT: shlq $55, %r12 3574; SSE2-NEXT: sarq $63, %r12 3575; SSE2-NEXT: movd %r12d, %xmm2 3576; SSE2-NEXT: shlq $60, %r13 3577; SSE2-NEXT: sarq $63, %r13 3578; SSE2-NEXT: movd %r13d, %xmm11 3579; SSE2-NEXT: shlq $61, %rdx 3580; SSE2-NEXT: sarq $63, %rdx 3581; SSE2-NEXT: movd %edx, %xmm5 3582; SSE2-NEXT: shlq $62, %rsi 3583; SSE2-NEXT: sarq $63, %rsi 3584; SSE2-NEXT: movd %esi, %xmm12 3585; SSE2-NEXT: shlq $63, %rcx 3586; SSE2-NEXT: sarq $63, %rcx 3587; SSE2-NEXT: movd %ecx, %xmm0 3588; SSE2-NEXT: shlq $58, %rbp 3589; SSE2-NEXT: sarq $63, %rbp 3590; SSE2-NEXT: movd %ebp, %xmm13 3591; SSE2-NEXT: shlq $59, %rbx 3592; SSE2-NEXT: sarq $63, %rbx 3593; SSE2-NEXT: movd %ebx, %xmm7 3594; SSE2-NEXT: shlq $57, %r10 3595; SSE2-NEXT: sarq $63, %r10 3596; SSE2-NEXT: movd %r10d, %xmm4 3597; SSE2-NEXT: shrq $7, %rax 3598; SSE2-NEXT: movd %eax, %xmm14 3599; SSE2-NEXT: movswq 2(%rdi), %rsi 3600; SSE2-NEXT: movq %rsi, %r8 3601; SSE2-NEXT: movq %rsi, %r9 3602; SSE2-NEXT: movq %rsi, %r10 3603; SSE2-NEXT: movq %rsi, %r11 3604; SSE2-NEXT: movq %rsi, %r14 3605; SSE2-NEXT: movq %rsi, %r15 3606; SSE2-NEXT: movq %rsi, %r12 3607; SSE2-NEXT: movq %rsi, %r13 3608; SSE2-NEXT: movq %rsi, %rbx 3609; SSE2-NEXT: movq %rsi, %rax 3610; SSE2-NEXT: movq %rsi, %rcx 3611; SSE2-NEXT: movq %rsi, %rdx 3612; SSE2-NEXT: movq %rsi, %rdi 3613; SSE2-NEXT: movq %rsi, %rbp 3614; SSE2-NEXT: shrq $15, %rbp 3615; SSE2-NEXT: movd %ebp, %xmm1 3616; SSE2-NEXT: movq %rsi, %rbp 3617; SSE2-NEXT: movsbq %sil, %rsi 3618; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7] 3619; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3] 3620; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7] 3621; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7] 3622; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3] 3623; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3624; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7] 3625; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7] 3626; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3] 3627; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7] 3628; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7] 3629; SSE2-NEXT: shlq $49, %r8 3630; SSE2-NEXT: sarq $63, %r8 3631; SSE2-NEXT: movd %r8d, %xmm3 3632; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3] 3633; SSE2-NEXT: shlq $50, %r9 3634; SSE2-NEXT: sarq $63, %r9 3635; SSE2-NEXT: movd %r9d, %xmm4 3636; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1] 3637; SSE2-NEXT: shlq $51, %r10 3638; SSE2-NEXT: sarq $63, %r10 3639; SSE2-NEXT: movd %r10d, %xmm5 3640; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3641; SSE2-NEXT: shlq $52, %r11 3642; SSE2-NEXT: sarq $63, %r11 3643; SSE2-NEXT: movd %r11d, %xmm2 3644; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 3645; SSE2-NEXT: shlq $53, %r14 3646; SSE2-NEXT: sarq $63, %r14 3647; SSE2-NEXT: movd %r14d, %xmm1 3648; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] 3649; SSE2-NEXT: shlq $54, %r15 3650; SSE2-NEXT: sarq $63, %r15 3651; SSE2-NEXT: movd %r15d, %xmm4 3652; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3] 3653; SSE2-NEXT: shlq $55, %r12 3654; SSE2-NEXT: sarq $63, %r12 3655; SSE2-NEXT: movd %r12d, %xmm3 3656; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 3657; SSE2-NEXT: shlq $60, %r13 3658; SSE2-NEXT: sarq $63, %r13 3659; SSE2-NEXT: movd %r13d, %xmm2 3660; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 3661; SSE2-NEXT: shlq $61, %rbx 3662; SSE2-NEXT: sarq $63, %rbx 3663; SSE2-NEXT: movd %ebx, %xmm4 3664; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 3665; SSE2-NEXT: shlq $62, %rax 3666; SSE2-NEXT: sarq $63, %rax 3667; SSE2-NEXT: movd %eax, %xmm6 3668; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] 3669; SSE2-NEXT: shlq $63, %rcx 3670; SSE2-NEXT: sarq $63, %rcx 3671; SSE2-NEXT: movd %ecx, %xmm1 3672; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 3673; SSE2-NEXT: shlq $58, %rdx 3674; SSE2-NEXT: sarq $63, %rdx 3675; SSE2-NEXT: movd %edx, %xmm2 3676; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] 3677; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 3678; SSE2-NEXT: shlq $59, %rdi 3679; SSE2-NEXT: sarq $63, %rdi 3680; SSE2-NEXT: movd %edi, %xmm4 3681; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 3682; SSE2-NEXT: shlq $57, %rbp 3683; SSE2-NEXT: sarq $63, %rbp 3684; SSE2-NEXT: movd %ebp, %xmm2 3685; SSE2-NEXT: shrq $7, %rsi 3686; SSE2-NEXT: movd %esi, %xmm5 3687; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7] 3688; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 3689; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] 3690; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 3691; SSE2-NEXT: popq %rbx 3692; SSE2-NEXT: popq %r12 3693; SSE2-NEXT: popq %r13 3694; SSE2-NEXT: popq %r14 3695; SSE2-NEXT: popq %r15 3696; SSE2-NEXT: popq %rbp 3697; SSE2-NEXT: retq 3698; 3699; SSSE3-LABEL: load_sext_32i1_to_32i8: 3700; SSSE3: # %bb.0: # %entry 3701; SSSE3-NEXT: pushq %rbp 3702; SSSE3-NEXT: pushq %r15 3703; SSSE3-NEXT: pushq %r14 3704; SSSE3-NEXT: pushq %r13 3705; SSSE3-NEXT: pushq %r12 3706; SSSE3-NEXT: pushq %rbx 3707; SSSE3-NEXT: movswq (%rdi), %rax 3708; SSSE3-NEXT: movq %rax, %r10 3709; SSSE3-NEXT: movq %rax, %r8 3710; SSSE3-NEXT: movq %rax, %r9 3711; SSSE3-NEXT: movq %rax, %r11 3712; SSSE3-NEXT: movq %rax, %r14 3713; SSSE3-NEXT: movq %rax, %r15 3714; SSSE3-NEXT: movq %rax, %r12 3715; SSSE3-NEXT: movq %rax, %r13 3716; SSSE3-NEXT: movq %rax, %rdx 3717; SSSE3-NEXT: movq %rax, %rsi 3718; SSSE3-NEXT: movq %rax, %rcx 3719; SSSE3-NEXT: movq %rax, %rbp 3720; SSSE3-NEXT: movq %rax, %rbx 3721; SSSE3-NEXT: shrq $15, %rbx 3722; SSSE3-NEXT: movd %ebx, %xmm0 3723; SSSE3-NEXT: movq %rax, %rbx 3724; SSSE3-NEXT: shlq $49, %r10 3725; SSSE3-NEXT: sarq $63, %r10 3726; SSSE3-NEXT: movd %r10d, %xmm15 3727; SSSE3-NEXT: movq %rax, %r10 3728; SSSE3-NEXT: movsbq %al, %rax 3729; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] 3730; SSSE3-NEXT: shlq $50, %r8 3731; SSSE3-NEXT: sarq $63, %r8 3732; SSSE3-NEXT: movd %r8d, %xmm8 3733; SSSE3-NEXT: shlq $51, %r9 3734; SSSE3-NEXT: sarq $63, %r9 3735; SSSE3-NEXT: movd %r9d, %xmm3 3736; SSSE3-NEXT: shlq $52, %r11 3737; SSSE3-NEXT: sarq $63, %r11 3738; SSSE3-NEXT: movd %r11d, %xmm9 3739; SSSE3-NEXT: shlq $53, %r14 3740; SSSE3-NEXT: sarq $63, %r14 3741; SSSE3-NEXT: movd %r14d, %xmm6 3742; SSSE3-NEXT: shlq $54, %r15 3743; SSSE3-NEXT: sarq $63, %r15 3744; SSSE3-NEXT: movd %r15d, %xmm10 3745; SSSE3-NEXT: shlq $55, %r12 3746; SSSE3-NEXT: sarq $63, %r12 3747; SSSE3-NEXT: movd %r12d, %xmm2 3748; SSSE3-NEXT: shlq $60, %r13 3749; SSSE3-NEXT: sarq $63, %r13 3750; SSSE3-NEXT: movd %r13d, %xmm11 3751; SSSE3-NEXT: shlq $61, %rdx 3752; SSSE3-NEXT: sarq $63, %rdx 3753; SSSE3-NEXT: movd %edx, %xmm5 3754; SSSE3-NEXT: shlq $62, %rsi 3755; SSSE3-NEXT: sarq $63, %rsi 3756; SSSE3-NEXT: movd %esi, %xmm12 3757; SSSE3-NEXT: shlq $63, %rcx 3758; SSSE3-NEXT: sarq $63, %rcx 3759; SSSE3-NEXT: movd %ecx, %xmm0 3760; SSSE3-NEXT: shlq $58, %rbp 3761; SSSE3-NEXT: sarq $63, %rbp 3762; SSSE3-NEXT: movd %ebp, %xmm13 3763; SSSE3-NEXT: shlq $59, %rbx 3764; SSSE3-NEXT: sarq $63, %rbx 3765; SSSE3-NEXT: movd %ebx, %xmm7 3766; SSSE3-NEXT: shlq $57, %r10 3767; SSSE3-NEXT: sarq $63, %r10 3768; SSSE3-NEXT: movd %r10d, %xmm4 3769; SSSE3-NEXT: shrq $7, %rax 3770; SSSE3-NEXT: movd %eax, %xmm14 3771; SSSE3-NEXT: movswq 2(%rdi), %rsi 3772; SSSE3-NEXT: movq %rsi, %r8 3773; SSSE3-NEXT: movq %rsi, %r9 3774; SSSE3-NEXT: movq %rsi, %r10 3775; SSSE3-NEXT: movq %rsi, %r11 3776; SSSE3-NEXT: movq %rsi, %r14 3777; SSSE3-NEXT: movq %rsi, %r15 3778; SSSE3-NEXT: movq %rsi, %r12 3779; SSSE3-NEXT: movq %rsi, %r13 3780; SSSE3-NEXT: movq %rsi, %rbx 3781; SSSE3-NEXT: movq %rsi, %rax 3782; SSSE3-NEXT: movq %rsi, %rcx 3783; SSSE3-NEXT: movq %rsi, %rdx 3784; SSSE3-NEXT: movq %rsi, %rdi 3785; SSSE3-NEXT: movq %rsi, %rbp 3786; SSSE3-NEXT: shrq $15, %rbp 3787; SSSE3-NEXT: movd %ebp, %xmm1 3788; SSSE3-NEXT: movq %rsi, %rbp 3789; SSSE3-NEXT: movsbq %sil, %rsi 3790; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7] 3791; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3] 3792; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7] 3793; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7] 3794; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3] 3795; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3796; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7] 3797; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7] 3798; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3] 3799; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7] 3800; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7] 3801; SSSE3-NEXT: shlq $49, %r8 3802; SSSE3-NEXT: sarq $63, %r8 3803; SSSE3-NEXT: movd %r8d, %xmm3 3804; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3] 3805; SSSE3-NEXT: shlq $50, %r9 3806; SSSE3-NEXT: sarq $63, %r9 3807; SSSE3-NEXT: movd %r9d, %xmm4 3808; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1] 3809; SSSE3-NEXT: shlq $51, %r10 3810; SSSE3-NEXT: sarq $63, %r10 3811; SSSE3-NEXT: movd %r10d, %xmm5 3812; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3813; SSSE3-NEXT: shlq $52, %r11 3814; SSSE3-NEXT: sarq $63, %r11 3815; SSSE3-NEXT: movd %r11d, %xmm2 3816; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 3817; SSSE3-NEXT: shlq $53, %r14 3818; SSSE3-NEXT: sarq $63, %r14 3819; SSSE3-NEXT: movd %r14d, %xmm1 3820; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] 3821; SSSE3-NEXT: shlq $54, %r15 3822; SSSE3-NEXT: sarq $63, %r15 3823; SSSE3-NEXT: movd %r15d, %xmm4 3824; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3] 3825; SSSE3-NEXT: shlq $55, %r12 3826; SSSE3-NEXT: sarq $63, %r12 3827; SSSE3-NEXT: movd %r12d, %xmm3 3828; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 3829; SSSE3-NEXT: shlq $60, %r13 3830; SSSE3-NEXT: sarq $63, %r13 3831; SSSE3-NEXT: movd %r13d, %xmm2 3832; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 3833; SSSE3-NEXT: shlq $61, %rbx 3834; SSSE3-NEXT: sarq $63, %rbx 3835; SSSE3-NEXT: movd %ebx, %xmm4 3836; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 3837; SSSE3-NEXT: shlq $62, %rax 3838; SSSE3-NEXT: sarq $63, %rax 3839; SSSE3-NEXT: movd %eax, %xmm6 3840; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] 3841; SSSE3-NEXT: shlq $63, %rcx 3842; SSSE3-NEXT: sarq $63, %rcx 3843; SSSE3-NEXT: movd %ecx, %xmm1 3844; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 3845; SSSE3-NEXT: shlq $58, %rdx 3846; SSSE3-NEXT: sarq $63, %rdx 3847; SSSE3-NEXT: movd %edx, %xmm2 3848; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] 3849; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 3850; SSSE3-NEXT: shlq $59, %rdi 3851; SSSE3-NEXT: sarq $63, %rdi 3852; SSSE3-NEXT: movd %edi, %xmm4 3853; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 3854; SSSE3-NEXT: shlq $57, %rbp 3855; SSSE3-NEXT: sarq $63, %rbp 3856; SSSE3-NEXT: movd %ebp, %xmm2 3857; SSSE3-NEXT: shrq $7, %rsi 3858; SSSE3-NEXT: movd %esi, %xmm5 3859; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7] 3860; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 3861; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] 3862; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 3863; SSSE3-NEXT: popq %rbx 3864; SSSE3-NEXT: popq %r12 3865; SSSE3-NEXT: popq %r13 3866; SSSE3-NEXT: popq %r14 3867; SSSE3-NEXT: popq %r15 3868; SSSE3-NEXT: popq %rbp 3869; SSSE3-NEXT: retq 3870; 3871; SSE41-LABEL: load_sext_32i1_to_32i8: 3872; SSE41: # %bb.0: # %entry 3873; SSE41-NEXT: movswq (%rdi), %rax 3874; SSE41-NEXT: movq %rax, %rcx 3875; SSE41-NEXT: shlq $62, %rcx 3876; SSE41-NEXT: sarq $63, %rcx 3877; SSE41-NEXT: movq %rax, %rdx 3878; SSE41-NEXT: shlq $63, %rdx 3879; SSE41-NEXT: sarq $63, %rdx 3880; SSE41-NEXT: movd %edx, %xmm0 3881; SSE41-NEXT: pinsrb $1, %ecx, %xmm0 3882; SSE41-NEXT: movq %rax, %rcx 3883; SSE41-NEXT: shlq $61, %rcx 3884; SSE41-NEXT: sarq $63, %rcx 3885; SSE41-NEXT: pinsrb $2, %ecx, %xmm0 3886; SSE41-NEXT: movq %rax, %rcx 3887; SSE41-NEXT: shlq $60, %rcx 3888; SSE41-NEXT: sarq $63, %rcx 3889; SSE41-NEXT: pinsrb $3, %ecx, %xmm0 3890; SSE41-NEXT: movq %rax, %rcx 3891; SSE41-NEXT: shlq $59, %rcx 3892; SSE41-NEXT: sarq $63, %rcx 3893; SSE41-NEXT: pinsrb $4, %ecx, %xmm0 3894; SSE41-NEXT: movq %rax, %rcx 3895; SSE41-NEXT: shlq $58, %rcx 3896; SSE41-NEXT: sarq $63, %rcx 3897; SSE41-NEXT: pinsrb $5, %ecx, %xmm0 3898; SSE41-NEXT: movq %rax, %rcx 3899; SSE41-NEXT: shlq $57, %rcx 3900; SSE41-NEXT: sarq $63, %rcx 3901; SSE41-NEXT: pinsrb $6, %ecx, %xmm0 3902; SSE41-NEXT: movsbq %al, %rcx 3903; SSE41-NEXT: shrq $7, %rcx 3904; SSE41-NEXT: pinsrb $7, %ecx, %xmm0 3905; SSE41-NEXT: movq %rax, %rcx 3906; SSE41-NEXT: shlq $55, %rcx 3907; SSE41-NEXT: sarq $63, %rcx 3908; SSE41-NEXT: pinsrb $8, %ecx, %xmm0 3909; SSE41-NEXT: movq %rax, %rcx 3910; SSE41-NEXT: shlq $54, %rcx 3911; SSE41-NEXT: sarq $63, %rcx 3912; SSE41-NEXT: pinsrb $9, %ecx, %xmm0 3913; SSE41-NEXT: movq %rax, %rcx 3914; SSE41-NEXT: shlq $53, %rcx 3915; SSE41-NEXT: sarq $63, %rcx 3916; SSE41-NEXT: pinsrb $10, %ecx, %xmm0 3917; SSE41-NEXT: movq %rax, %rcx 3918; SSE41-NEXT: shlq $52, %rcx 3919; SSE41-NEXT: sarq $63, %rcx 3920; SSE41-NEXT: pinsrb $11, %ecx, %xmm0 3921; SSE41-NEXT: movq %rax, %rcx 3922; SSE41-NEXT: shlq $51, %rcx 3923; SSE41-NEXT: sarq $63, %rcx 3924; SSE41-NEXT: pinsrb $12, %ecx, %xmm0 3925; SSE41-NEXT: movq %rax, %rcx 3926; SSE41-NEXT: shlq $50, %rcx 3927; SSE41-NEXT: sarq $63, %rcx 3928; SSE41-NEXT: pinsrb $13, %ecx, %xmm0 3929; SSE41-NEXT: movq %rax, %rcx 3930; SSE41-NEXT: shlq $49, %rcx 3931; SSE41-NEXT: sarq $63, %rcx 3932; SSE41-NEXT: pinsrb $14, %ecx, %xmm0 3933; SSE41-NEXT: shrq $15, %rax 3934; SSE41-NEXT: pinsrb $15, %eax, %xmm0 3935; SSE41-NEXT: movswq 2(%rdi), %rax 3936; SSE41-NEXT: movq %rax, %rcx 3937; SSE41-NEXT: shlq $62, %rcx 3938; SSE41-NEXT: sarq $63, %rcx 3939; SSE41-NEXT: movq %rax, %rdx 3940; SSE41-NEXT: shlq $63, %rdx 3941; SSE41-NEXT: sarq $63, %rdx 3942; SSE41-NEXT: movd %edx, %xmm1 3943; SSE41-NEXT: pinsrb $1, %ecx, %xmm1 3944; SSE41-NEXT: movq %rax, %rcx 3945; SSE41-NEXT: shlq $61, %rcx 3946; SSE41-NEXT: sarq $63, %rcx 3947; SSE41-NEXT: pinsrb $2, %ecx, %xmm1 3948; SSE41-NEXT: movq %rax, %rcx 3949; SSE41-NEXT: shlq $60, %rcx 3950; SSE41-NEXT: sarq $63, %rcx 3951; SSE41-NEXT: pinsrb $3, %ecx, %xmm1 3952; SSE41-NEXT: movq %rax, %rcx 3953; SSE41-NEXT: shlq $59, %rcx 3954; SSE41-NEXT: sarq $63, %rcx 3955; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 3956; SSE41-NEXT: movq %rax, %rcx 3957; SSE41-NEXT: shlq $58, %rcx 3958; SSE41-NEXT: sarq $63, %rcx 3959; SSE41-NEXT: pinsrb $5, %ecx, %xmm1 3960; SSE41-NEXT: movq %rax, %rcx 3961; SSE41-NEXT: shlq $57, %rcx 3962; SSE41-NEXT: sarq $63, %rcx 3963; SSE41-NEXT: pinsrb $6, %ecx, %xmm1 3964; SSE41-NEXT: movsbq %al, %rcx 3965; SSE41-NEXT: shrq $7, %rcx 3966; SSE41-NEXT: pinsrb $7, %ecx, %xmm1 3967; SSE41-NEXT: movq %rax, %rcx 3968; SSE41-NEXT: shlq $55, %rcx 3969; SSE41-NEXT: sarq $63, %rcx 3970; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 3971; SSE41-NEXT: movq %rax, %rcx 3972; SSE41-NEXT: shlq $54, %rcx 3973; SSE41-NEXT: sarq $63, %rcx 3974; SSE41-NEXT: pinsrb $9, %ecx, %xmm1 3975; SSE41-NEXT: movq %rax, %rcx 3976; SSE41-NEXT: shlq $53, %rcx 3977; SSE41-NEXT: sarq $63, %rcx 3978; SSE41-NEXT: pinsrb $10, %ecx, %xmm1 3979; SSE41-NEXT: movq %rax, %rcx 3980; SSE41-NEXT: shlq $52, %rcx 3981; SSE41-NEXT: sarq $63, %rcx 3982; SSE41-NEXT: pinsrb $11, %ecx, %xmm1 3983; SSE41-NEXT: movq %rax, %rcx 3984; SSE41-NEXT: shlq $51, %rcx 3985; SSE41-NEXT: sarq $63, %rcx 3986; SSE41-NEXT: pinsrb $12, %ecx, %xmm1 3987; SSE41-NEXT: movq %rax, %rcx 3988; SSE41-NEXT: shlq $50, %rcx 3989; SSE41-NEXT: sarq $63, %rcx 3990; SSE41-NEXT: pinsrb $13, %ecx, %xmm1 3991; SSE41-NEXT: movq %rax, %rcx 3992; SSE41-NEXT: shlq $49, %rcx 3993; SSE41-NEXT: sarq $63, %rcx 3994; SSE41-NEXT: pinsrb $14, %ecx, %xmm1 3995; SSE41-NEXT: shrq $15, %rax 3996; SSE41-NEXT: pinsrb $15, %eax, %xmm1 3997; SSE41-NEXT: retq 3998; 3999; AVX1-LABEL: load_sext_32i1_to_32i8: 4000; AVX1: # %bb.0: # %entry 4001; AVX1-NEXT: pushq %rbp 4002; AVX1-NEXT: pushq %r15 4003; AVX1-NEXT: pushq %r14 4004; AVX1-NEXT: pushq %r13 4005; AVX1-NEXT: pushq %r12 4006; AVX1-NEXT: pushq %rbx 4007; AVX1-NEXT: movslq (%rdi), %rax 4008; AVX1-NEXT: movq %rax, %rcx 4009; AVX1-NEXT: shlq $47, %rcx 4010; AVX1-NEXT: sarq $63, %rcx 4011; AVX1-NEXT: vmovd %ecx, %xmm0 4012; AVX1-NEXT: movq %rax, %r8 4013; AVX1-NEXT: movq %rax, %rdx 4014; AVX1-NEXT: movq %rax, %rcx 4015; AVX1-NEXT: movq %rax, %rdi 4016; AVX1-NEXT: movq %rax, %r13 4017; AVX1-NEXT: movq %rax, %rsi 4018; AVX1-NEXT: movq %rax, %r10 4019; AVX1-NEXT: movq %rax, %r11 4020; AVX1-NEXT: movq %rax, %r9 4021; AVX1-NEXT: movq %rax, %rbx 4022; AVX1-NEXT: movq %rax, %r14 4023; AVX1-NEXT: movq %rax, %r15 4024; AVX1-NEXT: movq %rax, %r12 4025; AVX1-NEXT: movq %rax, %rbp 4026; AVX1-NEXT: shlq $46, %rbp 4027; AVX1-NEXT: sarq $63, %rbp 4028; AVX1-NEXT: vpinsrb $1, %ebp, %xmm0, %xmm0 4029; AVX1-NEXT: movq %rax, %rbp 4030; AVX1-NEXT: shlq $45, %r8 4031; AVX1-NEXT: sarq $63, %r8 4032; AVX1-NEXT: vpinsrb $2, %r8d, %xmm0, %xmm0 4033; AVX1-NEXT: movq %rax, %r8 4034; AVX1-NEXT: shlq $44, %rdx 4035; AVX1-NEXT: sarq $63, %rdx 4036; AVX1-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 4037; AVX1-NEXT: movq %rax, %rdx 4038; AVX1-NEXT: shlq $43, %rcx 4039; AVX1-NEXT: sarq $63, %rcx 4040; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 4041; AVX1-NEXT: movq %rax, %rcx 4042; AVX1-NEXT: shlq $42, %rdi 4043; AVX1-NEXT: sarq $63, %rdi 4044; AVX1-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 4045; AVX1-NEXT: movq %rax, %rdi 4046; AVX1-NEXT: shlq $41, %r13 4047; AVX1-NEXT: sarq $63, %r13 4048; AVX1-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0 4049; AVX1-NEXT: movq %rax, %r13 4050; AVX1-NEXT: shlq $40, %rsi 4051; AVX1-NEXT: sarq $63, %rsi 4052; AVX1-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0 4053; AVX1-NEXT: movq %rax, %rsi 4054; AVX1-NEXT: shlq $39, %r10 4055; AVX1-NEXT: sarq $63, %r10 4056; AVX1-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 4057; AVX1-NEXT: movq %rax, %r10 4058; AVX1-NEXT: shlq $38, %r11 4059; AVX1-NEXT: sarq $63, %r11 4060; AVX1-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0 4061; AVX1-NEXT: movsbq %al, %r11 4062; AVX1-NEXT: shlq $37, %r9 4063; AVX1-NEXT: sarq $63, %r9 4064; AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 4065; AVX1-NEXT: movq %rax, %r9 4066; AVX1-NEXT: shlq $36, %rbx 4067; AVX1-NEXT: sarq $63, %rbx 4068; AVX1-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 4069; AVX1-NEXT: movq %rax, %rbx 4070; AVX1-NEXT: shlq $35, %r14 4071; AVX1-NEXT: sarq $63, %r14 4072; AVX1-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0 4073; AVX1-NEXT: movq %rax, %r14 4074; AVX1-NEXT: shlq $34, %r15 4075; AVX1-NEXT: sarq $63, %r15 4076; AVX1-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0 4077; AVX1-NEXT: movq %rax, %r15 4078; AVX1-NEXT: shlq $33, %r12 4079; AVX1-NEXT: sarq $63, %r12 4080; AVX1-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0 4081; AVX1-NEXT: movq %rax, %r12 4082; AVX1-NEXT: shrq $31, %rbp 4083; AVX1-NEXT: vpinsrb $15, %ebp, %xmm0, %xmm0 4084; AVX1-NEXT: movq %rax, %rbp 4085; AVX1-NEXT: shlq $63, %rdx 4086; AVX1-NEXT: sarq $63, %rdx 4087; AVX1-NEXT: vmovd %edx, %xmm1 4088; AVX1-NEXT: movq %rax, %rdx 4089; AVX1-NEXT: movswq %ax, %rax 4090; AVX1-NEXT: shlq $62, %r8 4091; AVX1-NEXT: sarq $63, %r8 4092; AVX1-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1 4093; AVX1-NEXT: shlq $61, %rcx 4094; AVX1-NEXT: sarq $63, %rcx 4095; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 4096; AVX1-NEXT: shlq $60, %rdi 4097; AVX1-NEXT: sarq $63, %rdi 4098; AVX1-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1 4099; AVX1-NEXT: shlq $59, %r13 4100; AVX1-NEXT: sarq $63, %r13 4101; AVX1-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1 4102; AVX1-NEXT: shlq $58, %rsi 4103; AVX1-NEXT: sarq $63, %rsi 4104; AVX1-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 4105; AVX1-NEXT: shlq $57, %r10 4106; AVX1-NEXT: sarq $63, %r10 4107; AVX1-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1 4108; AVX1-NEXT: shrq $7, %r11 4109; AVX1-NEXT: vpinsrb $7, %r11d, %xmm1, %xmm1 4110; AVX1-NEXT: shlq $55, %r9 4111; AVX1-NEXT: sarq $63, %r9 4112; AVX1-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1 4113; AVX1-NEXT: shlq $54, %rbx 4114; AVX1-NEXT: sarq $63, %rbx 4115; AVX1-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1 4116; AVX1-NEXT: shlq $53, %r14 4117; AVX1-NEXT: sarq $63, %r14 4118; AVX1-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 4119; AVX1-NEXT: shlq $52, %r15 4120; AVX1-NEXT: sarq $63, %r15 4121; AVX1-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1 4122; AVX1-NEXT: shlq $51, %r12 4123; AVX1-NEXT: sarq $63, %r12 4124; AVX1-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1 4125; AVX1-NEXT: shlq $50, %rbp 4126; AVX1-NEXT: sarq $63, %rbp 4127; AVX1-NEXT: vpinsrb $13, %ebp, %xmm1, %xmm1 4128; AVX1-NEXT: shlq $49, %rdx 4129; AVX1-NEXT: sarq $63, %rdx 4130; AVX1-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1 4131; AVX1-NEXT: shrq $15, %rax 4132; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 4133; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4134; AVX1-NEXT: popq %rbx 4135; AVX1-NEXT: popq %r12 4136; AVX1-NEXT: popq %r13 4137; AVX1-NEXT: popq %r14 4138; AVX1-NEXT: popq %r15 4139; AVX1-NEXT: popq %rbp 4140; AVX1-NEXT: retq 4141; 4142; AVX2-LABEL: load_sext_32i1_to_32i8: 4143; AVX2: # %bb.0: # %entry 4144; AVX2-NEXT: pushq %rbp 4145; AVX2-NEXT: pushq %r15 4146; AVX2-NEXT: pushq %r14 4147; AVX2-NEXT: pushq %r13 4148; AVX2-NEXT: pushq %r12 4149; AVX2-NEXT: pushq %rbx 4150; AVX2-NEXT: movslq (%rdi), %rax 4151; AVX2-NEXT: movq %rax, %rcx 4152; AVX2-NEXT: shlq $47, %rcx 4153; AVX2-NEXT: sarq $63, %rcx 4154; AVX2-NEXT: vmovd %ecx, %xmm0 4155; AVX2-NEXT: movq %rax, %r8 4156; AVX2-NEXT: movq %rax, %rdx 4157; AVX2-NEXT: movq %rax, %rcx 4158; AVX2-NEXT: movq %rax, %rdi 4159; AVX2-NEXT: movq %rax, %r13 4160; AVX2-NEXT: movq %rax, %rsi 4161; AVX2-NEXT: movq %rax, %r10 4162; AVX2-NEXT: movq %rax, %r11 4163; AVX2-NEXT: movq %rax, %r9 4164; AVX2-NEXT: movq %rax, %rbx 4165; AVX2-NEXT: movq %rax, %r14 4166; AVX2-NEXT: movq %rax, %r15 4167; AVX2-NEXT: movq %rax, %r12 4168; AVX2-NEXT: movq %rax, %rbp 4169; AVX2-NEXT: shlq $46, %rbp 4170; AVX2-NEXT: sarq $63, %rbp 4171; AVX2-NEXT: vpinsrb $1, %ebp, %xmm0, %xmm0 4172; AVX2-NEXT: movq %rax, %rbp 4173; AVX2-NEXT: shlq $45, %r8 4174; AVX2-NEXT: sarq $63, %r8 4175; AVX2-NEXT: vpinsrb $2, %r8d, %xmm0, %xmm0 4176; AVX2-NEXT: movq %rax, %r8 4177; AVX2-NEXT: shlq $44, %rdx 4178; AVX2-NEXT: sarq $63, %rdx 4179; AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 4180; AVX2-NEXT: movq %rax, %rdx 4181; AVX2-NEXT: shlq $43, %rcx 4182; AVX2-NEXT: sarq $63, %rcx 4183; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 4184; AVX2-NEXT: movq %rax, %rcx 4185; AVX2-NEXT: shlq $42, %rdi 4186; AVX2-NEXT: sarq $63, %rdi 4187; AVX2-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 4188; AVX2-NEXT: movq %rax, %rdi 4189; AVX2-NEXT: shlq $41, %r13 4190; AVX2-NEXT: sarq $63, %r13 4191; AVX2-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0 4192; AVX2-NEXT: movq %rax, %r13 4193; AVX2-NEXT: shlq $40, %rsi 4194; AVX2-NEXT: sarq $63, %rsi 4195; AVX2-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0 4196; AVX2-NEXT: movq %rax, %rsi 4197; AVX2-NEXT: shlq $39, %r10 4198; AVX2-NEXT: sarq $63, %r10 4199; AVX2-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 4200; AVX2-NEXT: movq %rax, %r10 4201; AVX2-NEXT: shlq $38, %r11 4202; AVX2-NEXT: sarq $63, %r11 4203; AVX2-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0 4204; AVX2-NEXT: movsbq %al, %r11 4205; AVX2-NEXT: shlq $37, %r9 4206; AVX2-NEXT: sarq $63, %r9 4207; AVX2-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 4208; AVX2-NEXT: movq %rax, %r9 4209; AVX2-NEXT: shlq $36, %rbx 4210; AVX2-NEXT: sarq $63, %rbx 4211; AVX2-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 4212; AVX2-NEXT: movq %rax, %rbx 4213; AVX2-NEXT: shlq $35, %r14 4214; AVX2-NEXT: sarq $63, %r14 4215; AVX2-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0 4216; AVX2-NEXT: movq %rax, %r14 4217; AVX2-NEXT: shlq $34, %r15 4218; AVX2-NEXT: sarq $63, %r15 4219; AVX2-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0 4220; AVX2-NEXT: movq %rax, %r15 4221; AVX2-NEXT: shlq $33, %r12 4222; AVX2-NEXT: sarq $63, %r12 4223; AVX2-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0 4224; AVX2-NEXT: movq %rax, %r12 4225; AVX2-NEXT: shrq $31, %rbp 4226; AVX2-NEXT: vpinsrb $15, %ebp, %xmm0, %xmm0 4227; AVX2-NEXT: movq %rax, %rbp 4228; AVX2-NEXT: shlq $63, %rdx 4229; AVX2-NEXT: sarq $63, %rdx 4230; AVX2-NEXT: vmovd %edx, %xmm1 4231; AVX2-NEXT: movq %rax, %rdx 4232; AVX2-NEXT: movswq %ax, %rax 4233; AVX2-NEXT: shlq $62, %r8 4234; AVX2-NEXT: sarq $63, %r8 4235; AVX2-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1 4236; AVX2-NEXT: shlq $61, %rcx 4237; AVX2-NEXT: sarq $63, %rcx 4238; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 4239; AVX2-NEXT: shlq $60, %rdi 4240; AVX2-NEXT: sarq $63, %rdi 4241; AVX2-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1 4242; AVX2-NEXT: shlq $59, %r13 4243; AVX2-NEXT: sarq $63, %r13 4244; AVX2-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1 4245; AVX2-NEXT: shlq $58, %rsi 4246; AVX2-NEXT: sarq $63, %rsi 4247; AVX2-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 4248; AVX2-NEXT: shlq $57, %r10 4249; AVX2-NEXT: sarq $63, %r10 4250; AVX2-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1 4251; AVX2-NEXT: shrq $7, %r11 4252; AVX2-NEXT: vpinsrb $7, %r11d, %xmm1, %xmm1 4253; AVX2-NEXT: shlq $55, %r9 4254; AVX2-NEXT: sarq $63, %r9 4255; AVX2-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1 4256; AVX2-NEXT: shlq $54, %rbx 4257; AVX2-NEXT: sarq $63, %rbx 4258; AVX2-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1 4259; AVX2-NEXT: shlq $53, %r14 4260; AVX2-NEXT: sarq $63, %r14 4261; AVX2-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 4262; AVX2-NEXT: shlq $52, %r15 4263; AVX2-NEXT: sarq $63, %r15 4264; AVX2-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1 4265; AVX2-NEXT: shlq $51, %r12 4266; AVX2-NEXT: sarq $63, %r12 4267; AVX2-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1 4268; AVX2-NEXT: shlq $50, %rbp 4269; AVX2-NEXT: sarq $63, %rbp 4270; AVX2-NEXT: vpinsrb $13, %ebp, %xmm1, %xmm1 4271; AVX2-NEXT: shlq $49, %rdx 4272; AVX2-NEXT: sarq $63, %rdx 4273; AVX2-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1 4274; AVX2-NEXT: shrq $15, %rax 4275; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 4276; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 4277; AVX2-NEXT: popq %rbx 4278; AVX2-NEXT: popq %r12 4279; AVX2-NEXT: popq %r13 4280; AVX2-NEXT: popq %r14 4281; AVX2-NEXT: popq %r15 4282; AVX2-NEXT: popq %rbp 4283; AVX2-NEXT: retq 4284; 4285; AVX512F-LABEL: load_sext_32i1_to_32i8: 4286; AVX512F: # %bb.0: # %entry 4287; AVX512F-NEXT: kmovw (%rdi), %k1 4288; AVX512F-NEXT: kmovw 2(%rdi), %k2 4289; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 4290; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 4291; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} 4292; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 4293; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4294; AVX512F-NEXT: retq 4295; 4296; AVX512BW-LABEL: load_sext_32i1_to_32i8: 4297; AVX512BW: # %bb.0: # %entry 4298; AVX512BW-NEXT: kmovd (%rdi), %k0 4299; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 4300; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 4301; AVX512BW-NEXT: retq 4302; 4303; X32-SSE41-LABEL: load_sext_32i1_to_32i8: 4304; X32-SSE41: # %bb.0: # %entry 4305; X32-SSE41-NEXT: pushl %esi 4306; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4307; X32-SSE41-NEXT: movswl (%eax), %ecx 4308; X32-SSE41-NEXT: movl %ecx, %edx 4309; X32-SSE41-NEXT: shll $30, %edx 4310; X32-SSE41-NEXT: sarl $31, %edx 4311; X32-SSE41-NEXT: movl %ecx, %esi 4312; X32-SSE41-NEXT: shll $31, %esi 4313; X32-SSE41-NEXT: sarl $31, %esi 4314; X32-SSE41-NEXT: movd %esi, %xmm0 4315; X32-SSE41-NEXT: pinsrb $1, %edx, %xmm0 4316; X32-SSE41-NEXT: movl %ecx, %edx 4317; X32-SSE41-NEXT: shll $29, %edx 4318; X32-SSE41-NEXT: sarl $31, %edx 4319; X32-SSE41-NEXT: pinsrb $2, %edx, %xmm0 4320; X32-SSE41-NEXT: movl %ecx, %edx 4321; X32-SSE41-NEXT: shll $28, %edx 4322; X32-SSE41-NEXT: sarl $31, %edx 4323; X32-SSE41-NEXT: pinsrb $3, %edx, %xmm0 4324; X32-SSE41-NEXT: movl %ecx, %edx 4325; X32-SSE41-NEXT: shll $27, %edx 4326; X32-SSE41-NEXT: sarl $31, %edx 4327; X32-SSE41-NEXT: pinsrb $4, %edx, %xmm0 4328; X32-SSE41-NEXT: movl %ecx, %edx 4329; X32-SSE41-NEXT: shll $26, %edx 4330; X32-SSE41-NEXT: sarl $31, %edx 4331; X32-SSE41-NEXT: pinsrb $5, %edx, %xmm0 4332; X32-SSE41-NEXT: movl %ecx, %edx 4333; X32-SSE41-NEXT: shll $25, %edx 4334; X32-SSE41-NEXT: sarl $31, %edx 4335; X32-SSE41-NEXT: pinsrb $6, %edx, %xmm0 4336; X32-SSE41-NEXT: movsbl %cl, %edx 4337; X32-SSE41-NEXT: shrl $7, %edx 4338; X32-SSE41-NEXT: pinsrb $7, %edx, %xmm0 4339; X32-SSE41-NEXT: movl %ecx, %edx 4340; X32-SSE41-NEXT: shll $23, %edx 4341; X32-SSE41-NEXT: sarl $31, %edx 4342; X32-SSE41-NEXT: pinsrb $8, %edx, %xmm0 4343; X32-SSE41-NEXT: movl %ecx, %edx 4344; X32-SSE41-NEXT: shll $22, %edx 4345; X32-SSE41-NEXT: sarl $31, %edx 4346; X32-SSE41-NEXT: pinsrb $9, %edx, %xmm0 4347; X32-SSE41-NEXT: movl %ecx, %edx 4348; X32-SSE41-NEXT: shll $21, %edx 4349; X32-SSE41-NEXT: sarl $31, %edx 4350; X32-SSE41-NEXT: pinsrb $10, %edx, %xmm0 4351; X32-SSE41-NEXT: movl %ecx, %edx 4352; X32-SSE41-NEXT: shll $20, %edx 4353; X32-SSE41-NEXT: sarl $31, %edx 4354; X32-SSE41-NEXT: pinsrb $11, %edx, %xmm0 4355; X32-SSE41-NEXT: movl %ecx, %edx 4356; X32-SSE41-NEXT: shll $19, %edx 4357; X32-SSE41-NEXT: sarl $31, %edx 4358; X32-SSE41-NEXT: pinsrb $12, %edx, %xmm0 4359; X32-SSE41-NEXT: movl %ecx, %edx 4360; X32-SSE41-NEXT: shll $18, %edx 4361; X32-SSE41-NEXT: sarl $31, %edx 4362; X32-SSE41-NEXT: pinsrb $13, %edx, %xmm0 4363; X32-SSE41-NEXT: movl %ecx, %edx 4364; X32-SSE41-NEXT: shll $17, %edx 4365; X32-SSE41-NEXT: sarl $31, %edx 4366; X32-SSE41-NEXT: pinsrb $14, %edx, %xmm0 4367; X32-SSE41-NEXT: shrl $15, %ecx 4368; X32-SSE41-NEXT: pinsrb $15, %ecx, %xmm0 4369; X32-SSE41-NEXT: movswl 2(%eax), %eax 4370; X32-SSE41-NEXT: movl %eax, %ecx 4371; X32-SSE41-NEXT: shll $30, %ecx 4372; X32-SSE41-NEXT: sarl $31, %ecx 4373; X32-SSE41-NEXT: movl %eax, %edx 4374; X32-SSE41-NEXT: shll $31, %edx 4375; X32-SSE41-NEXT: sarl $31, %edx 4376; X32-SSE41-NEXT: movd %edx, %xmm1 4377; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm1 4378; X32-SSE41-NEXT: movl %eax, %ecx 4379; X32-SSE41-NEXT: shll $29, %ecx 4380; X32-SSE41-NEXT: sarl $31, %ecx 4381; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm1 4382; X32-SSE41-NEXT: movl %eax, %ecx 4383; X32-SSE41-NEXT: shll $28, %ecx 4384; X32-SSE41-NEXT: sarl $31, %ecx 4385; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm1 4386; X32-SSE41-NEXT: movl %eax, %ecx 4387; X32-SSE41-NEXT: shll $27, %ecx 4388; X32-SSE41-NEXT: sarl $31, %ecx 4389; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 4390; X32-SSE41-NEXT: movl %eax, %ecx 4391; X32-SSE41-NEXT: shll $26, %ecx 4392; X32-SSE41-NEXT: sarl $31, %ecx 4393; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm1 4394; X32-SSE41-NEXT: movl %eax, %ecx 4395; X32-SSE41-NEXT: shll $25, %ecx 4396; X32-SSE41-NEXT: sarl $31, %ecx 4397; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm1 4398; X32-SSE41-NEXT: movsbl %al, %ecx 4399; X32-SSE41-NEXT: shrl $7, %ecx 4400; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm1 4401; X32-SSE41-NEXT: movl %eax, %ecx 4402; X32-SSE41-NEXT: shll $23, %ecx 4403; X32-SSE41-NEXT: sarl $31, %ecx 4404; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 4405; X32-SSE41-NEXT: movl %eax, %ecx 4406; X32-SSE41-NEXT: shll $22, %ecx 4407; X32-SSE41-NEXT: sarl $31, %ecx 4408; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm1 4409; X32-SSE41-NEXT: movl %eax, %ecx 4410; X32-SSE41-NEXT: shll $21, %ecx 4411; X32-SSE41-NEXT: sarl $31, %ecx 4412; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm1 4413; X32-SSE41-NEXT: movl %eax, %ecx 4414; X32-SSE41-NEXT: shll $20, %ecx 4415; X32-SSE41-NEXT: sarl $31, %ecx 4416; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm1 4417; X32-SSE41-NEXT: movl %eax, %ecx 4418; X32-SSE41-NEXT: shll $19, %ecx 4419; X32-SSE41-NEXT: sarl $31, %ecx 4420; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm1 4421; X32-SSE41-NEXT: movl %eax, %ecx 4422; X32-SSE41-NEXT: shll $18, %ecx 4423; X32-SSE41-NEXT: sarl $31, %ecx 4424; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm1 4425; X32-SSE41-NEXT: movl %eax, %ecx 4426; X32-SSE41-NEXT: shll $17, %ecx 4427; X32-SSE41-NEXT: sarl $31, %ecx 4428; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm1 4429; X32-SSE41-NEXT: shrl $15, %eax 4430; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm1 4431; X32-SSE41-NEXT: popl %esi 4432; X32-SSE41-NEXT: retl 4433entry: 4434 %X = load <32 x i1>, <32 x i1>* %ptr 4435 %Y = sext <32 x i1> %X to <32 x i8> 4436 ret <32 x i8> %Y 4437} 4438 4439define <16 x i16> @load_sext_16i8_to_16i16(<16 x i8> *%ptr) { 4440; SSE2-LABEL: load_sext_16i8_to_16i16: 4441; SSE2: # %bb.0: # %entry 4442; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4443; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4444; SSE2-NEXT: psraw $8, %xmm0 4445; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 4446; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4447; SSE2-NEXT: psraw $8, %xmm1 4448; SSE2-NEXT: retq 4449; 4450; SSSE3-LABEL: load_sext_16i8_to_16i16: 4451; SSSE3: # %bb.0: # %entry 4452; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4453; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4454; SSSE3-NEXT: psraw $8, %xmm0 4455; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 4456; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4457; SSSE3-NEXT: psraw $8, %xmm1 4458; SSSE3-NEXT: retq 4459; 4460; SSE41-LABEL: load_sext_16i8_to_16i16: 4461; SSE41: # %bb.0: # %entry 4462; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 4463; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1 4464; SSE41-NEXT: retq 4465; 4466; AVX1-LABEL: load_sext_16i8_to_16i16: 4467; AVX1: # %bb.0: # %entry 4468; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm0 4469; AVX1-NEXT: vpmovsxbw (%rdi), %xmm1 4470; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4471; AVX1-NEXT: retq 4472; 4473; AVX2-LABEL: load_sext_16i8_to_16i16: 4474; AVX2: # %bb.0: # %entry 4475; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0 4476; AVX2-NEXT: retq 4477; 4478; AVX512-LABEL: load_sext_16i8_to_16i16: 4479; AVX512: # %bb.0: # %entry 4480; AVX512-NEXT: vpmovsxbw (%rdi), %ymm0 4481; AVX512-NEXT: retq 4482; 4483; X32-SSE41-LABEL: load_sext_16i8_to_16i16: 4484; X32-SSE41: # %bb.0: # %entry 4485; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4486; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0 4487; X32-SSE41-NEXT: pmovsxbw 8(%eax), %xmm1 4488; X32-SSE41-NEXT: retl 4489entry: 4490 %X = load <16 x i8>, <16 x i8>* %ptr 4491 %Y = sext <16 x i8> %X to <16 x i16> 4492 ret <16 x i16> %Y 4493} 4494 4495define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) { 4496; SSE2-LABEL: load_sext_2i16_to_2i64: 4497; SSE2: # %bb.0: # %entry 4498; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 4499; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 4500; SSE2-NEXT: movdqa %xmm0, %xmm1 4501; SSE2-NEXT: psrad $31, %xmm1 4502; SSE2-NEXT: psrad $16, %xmm0 4503; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4504; SSE2-NEXT: retq 4505; 4506; SSSE3-LABEL: load_sext_2i16_to_2i64: 4507; SSSE3: # %bb.0: # %entry 4508; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 4509; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 4510; SSSE3-NEXT: movdqa %xmm0, %xmm1 4511; SSSE3-NEXT: psrad $31, %xmm1 4512; SSSE3-NEXT: psrad $16, %xmm0 4513; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4514; SSSE3-NEXT: retq 4515; 4516; SSE41-LABEL: load_sext_2i16_to_2i64: 4517; SSE41: # %bb.0: # %entry 4518; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 4519; SSE41-NEXT: retq 4520; 4521; AVX-LABEL: load_sext_2i16_to_2i64: 4522; AVX: # %bb.0: # %entry 4523; AVX-NEXT: vpmovsxwq (%rdi), %xmm0 4524; AVX-NEXT: retq 4525; 4526; X32-SSE41-LABEL: load_sext_2i16_to_2i64: 4527; X32-SSE41: # %bb.0: # %entry 4528; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4529; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0 4530; X32-SSE41-NEXT: retl 4531entry: 4532 %X = load <2 x i16>, <2 x i16>* %ptr 4533 %Y = sext <2 x i16> %X to <2 x i64> 4534 ret <2 x i64> %Y 4535} 4536 4537define <4 x i32> @load_sext_4i16_to_4i32(<4 x i16> *%ptr) { 4538; SSE2-LABEL: load_sext_4i16_to_4i32: 4539; SSE2: # %bb.0: # %entry 4540; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4541; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4542; SSE2-NEXT: psrad $16, %xmm0 4543; SSE2-NEXT: retq 4544; 4545; SSSE3-LABEL: load_sext_4i16_to_4i32: 4546; SSSE3: # %bb.0: # %entry 4547; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4548; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4549; SSSE3-NEXT: psrad $16, %xmm0 4550; SSSE3-NEXT: retq 4551; 4552; SSE41-LABEL: load_sext_4i16_to_4i32: 4553; SSE41: # %bb.0: # %entry 4554; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 4555; SSE41-NEXT: retq 4556; 4557; AVX-LABEL: load_sext_4i16_to_4i32: 4558; AVX: # %bb.0: # %entry 4559; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 4560; AVX-NEXT: retq 4561; 4562; X32-SSE41-LABEL: load_sext_4i16_to_4i32: 4563; X32-SSE41: # %bb.0: # %entry 4564; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4565; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0 4566; X32-SSE41-NEXT: retl 4567entry: 4568 %X = load <4 x i16>, <4 x i16>* %ptr 4569 %Y = sext <4 x i16> %X to <4 x i32> 4570 ret <4 x i32> %Y 4571} 4572 4573define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) { 4574; SSE2-LABEL: load_sext_4i16_to_4i64: 4575; SSE2: # %bb.0: # %entry 4576; SSE2-NEXT: movswq 2(%rdi), %rax 4577; SSE2-NEXT: movq %rax, %xmm1 4578; SSE2-NEXT: movswq (%rdi), %rax 4579; SSE2-NEXT: movq %rax, %xmm0 4580; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4581; SSE2-NEXT: movswq 6(%rdi), %rax 4582; SSE2-NEXT: movq %rax, %xmm2 4583; SSE2-NEXT: movswq 4(%rdi), %rax 4584; SSE2-NEXT: movq %rax, %xmm1 4585; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 4586; SSE2-NEXT: retq 4587; 4588; SSSE3-LABEL: load_sext_4i16_to_4i64: 4589; SSSE3: # %bb.0: # %entry 4590; SSSE3-NEXT: movswq 2(%rdi), %rax 4591; SSSE3-NEXT: movq %rax, %xmm1 4592; SSSE3-NEXT: movswq (%rdi), %rax 4593; SSSE3-NEXT: movq %rax, %xmm0 4594; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4595; SSSE3-NEXT: movswq 6(%rdi), %rax 4596; SSSE3-NEXT: movq %rax, %xmm2 4597; SSSE3-NEXT: movswq 4(%rdi), %rax 4598; SSSE3-NEXT: movq %rax, %xmm1 4599; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 4600; SSSE3-NEXT: retq 4601; 4602; SSE41-LABEL: load_sext_4i16_to_4i64: 4603; SSE41: # %bb.0: # %entry 4604; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 4605; SSE41-NEXT: pmovsxwq 4(%rdi), %xmm1 4606; SSE41-NEXT: retq 4607; 4608; AVX1-LABEL: load_sext_4i16_to_4i64: 4609; AVX1: # %bb.0: # %entry 4610; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0 4611; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 4612; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4613; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 4614; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4615; AVX1-NEXT: retq 4616; 4617; AVX2-LABEL: load_sext_4i16_to_4i64: 4618; AVX2: # %bb.0: # %entry 4619; AVX2-NEXT: vpmovsxwq (%rdi), %ymm0 4620; AVX2-NEXT: retq 4621; 4622; AVX512-LABEL: load_sext_4i16_to_4i64: 4623; AVX512: # %bb.0: # %entry 4624; AVX512-NEXT: vpmovsxwq (%rdi), %ymm0 4625; AVX512-NEXT: retq 4626; 4627; X32-SSE41-LABEL: load_sext_4i16_to_4i64: 4628; X32-SSE41: # %bb.0: # %entry 4629; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4630; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0 4631; X32-SSE41-NEXT: pmovsxwq 4(%eax), %xmm1 4632; X32-SSE41-NEXT: retl 4633entry: 4634 %X = load <4 x i16>, <4 x i16>* %ptr 4635 %Y = sext <4 x i16> %X to <4 x i64> 4636 ret <4 x i64> %Y 4637} 4638 4639define <8 x i32> @load_sext_8i16_to_8i32(<8 x i16> *%ptr) { 4640; SSE2-LABEL: load_sext_8i16_to_8i32: 4641; SSE2: # %bb.0: # %entry 4642; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4643; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4644; SSE2-NEXT: psrad $16, %xmm0 4645; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 4646; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 4647; SSE2-NEXT: psrad $16, %xmm1 4648; SSE2-NEXT: retq 4649; 4650; SSSE3-LABEL: load_sext_8i16_to_8i32: 4651; SSSE3: # %bb.0: # %entry 4652; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4653; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4654; SSSE3-NEXT: psrad $16, %xmm0 4655; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 4656; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 4657; SSSE3-NEXT: psrad $16, %xmm1 4658; SSSE3-NEXT: retq 4659; 4660; SSE41-LABEL: load_sext_8i16_to_8i32: 4661; SSE41: # %bb.0: # %entry 4662; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 4663; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1 4664; SSE41-NEXT: retq 4665; 4666; AVX1-LABEL: load_sext_8i16_to_8i32: 4667; AVX1: # %bb.0: # %entry 4668; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm0 4669; AVX1-NEXT: vpmovsxwd (%rdi), %xmm1 4670; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4671; AVX1-NEXT: retq 4672; 4673; AVX2-LABEL: load_sext_8i16_to_8i32: 4674; AVX2: # %bb.0: # %entry 4675; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0 4676; AVX2-NEXT: retq 4677; 4678; AVX512-LABEL: load_sext_8i16_to_8i32: 4679; AVX512: # %bb.0: # %entry 4680; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0 4681; AVX512-NEXT: retq 4682; 4683; X32-SSE41-LABEL: load_sext_8i16_to_8i32: 4684; X32-SSE41: # %bb.0: # %entry 4685; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4686; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0 4687; X32-SSE41-NEXT: pmovsxwd 8(%eax), %xmm1 4688; X32-SSE41-NEXT: retl 4689entry: 4690 %X = load <8 x i16>, <8 x i16>* %ptr 4691 %Y = sext <8 x i16> %X to <8 x i32> 4692 ret <8 x i32> %Y 4693} 4694 4695define <2 x i64> @load_sext_2i32_to_2i64(<2 x i32> *%ptr) { 4696; SSE2-LABEL: load_sext_2i32_to_2i64: 4697; SSE2: # %bb.0: # %entry 4698; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4699; SSE2-NEXT: movdqa %xmm0, %xmm1 4700; SSE2-NEXT: psrad $31, %xmm1 4701; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4702; SSE2-NEXT: retq 4703; 4704; SSSE3-LABEL: load_sext_2i32_to_2i64: 4705; SSSE3: # %bb.0: # %entry 4706; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4707; SSSE3-NEXT: movdqa %xmm0, %xmm1 4708; SSSE3-NEXT: psrad $31, %xmm1 4709; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4710; SSSE3-NEXT: retq 4711; 4712; SSE41-LABEL: load_sext_2i32_to_2i64: 4713; SSE41: # %bb.0: # %entry 4714; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 4715; SSE41-NEXT: retq 4716; 4717; AVX-LABEL: load_sext_2i32_to_2i64: 4718; AVX: # %bb.0: # %entry 4719; AVX-NEXT: vpmovsxdq (%rdi), %xmm0 4720; AVX-NEXT: retq 4721; 4722; X32-SSE41-LABEL: load_sext_2i32_to_2i64: 4723; X32-SSE41: # %bb.0: # %entry 4724; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4725; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0 4726; X32-SSE41-NEXT: retl 4727entry: 4728 %X = load <2 x i32>, <2 x i32>* %ptr 4729 %Y = sext <2 x i32> %X to <2 x i64> 4730 ret <2 x i64> %Y 4731} 4732 4733define <4 x i64> @load_sext_4i32_to_4i64(<4 x i32> *%ptr) { 4734; SSE2-LABEL: load_sext_4i32_to_4i64: 4735; SSE2: # %bb.0: # %entry 4736; SSE2-NEXT: movdqa (%rdi), %xmm0 4737; SSE2-NEXT: movdqa %xmm0, %xmm2 4738; SSE2-NEXT: psrad $31, %xmm2 4739; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4740; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4741; SSE2-NEXT: movdqa %xmm1, %xmm2 4742; SSE2-NEXT: psrad $31, %xmm2 4743; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4744; SSE2-NEXT: retq 4745; 4746; SSSE3-LABEL: load_sext_4i32_to_4i64: 4747; SSSE3: # %bb.0: # %entry 4748; SSSE3-NEXT: movdqa (%rdi), %xmm0 4749; SSSE3-NEXT: movdqa %xmm0, %xmm2 4750; SSSE3-NEXT: psrad $31, %xmm2 4751; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4752; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4753; SSSE3-NEXT: movdqa %xmm1, %xmm2 4754; SSSE3-NEXT: psrad $31, %xmm2 4755; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4756; SSSE3-NEXT: retq 4757; 4758; SSE41-LABEL: load_sext_4i32_to_4i64: 4759; SSE41: # %bb.0: # %entry 4760; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 4761; SSE41-NEXT: pmovsxdq 8(%rdi), %xmm1 4762; SSE41-NEXT: retq 4763; 4764; AVX1-LABEL: load_sext_4i32_to_4i64: 4765; AVX1: # %bb.0: # %entry 4766; AVX1-NEXT: vpmovsxdq 8(%rdi), %xmm0 4767; AVX1-NEXT: vpmovsxdq (%rdi), %xmm1 4768; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4769; AVX1-NEXT: retq 4770; 4771; AVX2-LABEL: load_sext_4i32_to_4i64: 4772; AVX2: # %bb.0: # %entry 4773; AVX2-NEXT: vpmovsxdq (%rdi), %ymm0 4774; AVX2-NEXT: retq 4775; 4776; AVX512-LABEL: load_sext_4i32_to_4i64: 4777; AVX512: # %bb.0: # %entry 4778; AVX512-NEXT: vpmovsxdq (%rdi), %ymm0 4779; AVX512-NEXT: retq 4780; 4781; X32-SSE41-LABEL: load_sext_4i32_to_4i64: 4782; X32-SSE41: # %bb.0: # %entry 4783; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4784; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0 4785; X32-SSE41-NEXT: pmovsxdq 8(%eax), %xmm1 4786; X32-SSE41-NEXT: retl 4787entry: 4788 %X = load <4 x i32>, <4 x i32>* %ptr 4789 %Y = sext <4 x i32> %X to <4 x i64> 4790 ret <4 x i64> %Y 4791} 4792 4793define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp { 4794; SSE2-LABEL: sext_2i8_to_i32: 4795; SSE2: # %bb.0: # %entry 4796; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4797; SSE2-NEXT: psraw $8, %xmm0 4798; SSE2-NEXT: movd %xmm0, %eax 4799; SSE2-NEXT: retq 4800; 4801; SSSE3-LABEL: sext_2i8_to_i32: 4802; SSSE3: # %bb.0: # %entry 4803; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4804; SSSE3-NEXT: psraw $8, %xmm0 4805; SSSE3-NEXT: movd %xmm0, %eax 4806; SSSE3-NEXT: retq 4807; 4808; SSE41-LABEL: sext_2i8_to_i32: 4809; SSE41: # %bb.0: # %entry 4810; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 4811; SSE41-NEXT: movd %xmm0, %eax 4812; SSE41-NEXT: retq 4813; 4814; AVX-LABEL: sext_2i8_to_i32: 4815; AVX: # %bb.0: # %entry 4816; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 4817; AVX-NEXT: vmovd %xmm0, %eax 4818; AVX-NEXT: retq 4819; 4820; X32-SSE41-LABEL: sext_2i8_to_i32: 4821; X32-SSE41: # %bb.0: # %entry 4822; X32-SSE41-NEXT: pushl %eax 4823; X32-SSE41-NEXT: .cfi_def_cfa_offset 8 4824; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 4825; X32-SSE41-NEXT: movd %xmm0, %eax 4826; X32-SSE41-NEXT: popl %ecx 4827; X32-SSE41-NEXT: .cfi_def_cfa_offset 4 4828; X32-SSE41-NEXT: retl 4829entry: 4830 %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 4831 %Ex = sext <2 x i8> %Shuf to <2 x i16> 4832 %Bc = bitcast <2 x i16> %Ex to i32 4833 ret i32 %Bc 4834} 4835 4836define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { 4837; SSE2-LABEL: sext_4i1_to_4i64: 4838; SSE2: # %bb.0: 4839; SSE2-NEXT: pslld $31, %xmm0 4840; SSE2-NEXT: psrad $31, %xmm0 4841; SSE2-NEXT: movdqa %xmm0, %xmm2 4842; SSE2-NEXT: psrad $31, %xmm2 4843; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4844; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4845; SSE2-NEXT: movdqa %xmm1, %xmm2 4846; SSE2-NEXT: psrad $31, %xmm2 4847; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4848; SSE2-NEXT: retq 4849; 4850; SSSE3-LABEL: sext_4i1_to_4i64: 4851; SSSE3: # %bb.0: 4852; SSSE3-NEXT: pslld $31, %xmm0 4853; SSSE3-NEXT: psrad $31, %xmm0 4854; SSSE3-NEXT: movdqa %xmm0, %xmm2 4855; SSSE3-NEXT: psrad $31, %xmm2 4856; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4857; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4858; SSSE3-NEXT: movdqa %xmm1, %xmm2 4859; SSSE3-NEXT: psrad $31, %xmm2 4860; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4861; SSSE3-NEXT: retq 4862; 4863; SSE41-LABEL: sext_4i1_to_4i64: 4864; SSE41: # %bb.0: 4865; SSE41-NEXT: pslld $31, %xmm0 4866; SSE41-NEXT: psrad $31, %xmm0 4867; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4868; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4869; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4870; SSE41-NEXT: movdqa %xmm2, %xmm0 4871; SSE41-NEXT: retq 4872; 4873; AVX1-LABEL: sext_4i1_to_4i64: 4874; AVX1: # %bb.0: 4875; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 4876; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 4877; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 4878; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4879; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 4880; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4881; AVX1-NEXT: retq 4882; 4883; AVX2-LABEL: sext_4i1_to_4i64: 4884; AVX2: # %bb.0: 4885; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 4886; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 4887; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 4888; AVX2-NEXT: retq 4889; 4890; AVX512-LABEL: sext_4i1_to_4i64: 4891; AVX512: # %bb.0: 4892; AVX512-NEXT: vpslld $31, %xmm0, %xmm0 4893; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 4894; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 4895; AVX512-NEXT: retq 4896; 4897; X32-SSE41-LABEL: sext_4i1_to_4i64: 4898; X32-SSE41: # %bb.0: 4899; X32-SSE41-NEXT: pslld $31, %xmm0 4900; X32-SSE41-NEXT: psrad $31, %xmm0 4901; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4902; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4903; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4904; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 4905; X32-SSE41-NEXT: retl 4906 %extmask = sext <4 x i1> %mask to <4 x i64> 4907 ret <4 x i64> %extmask 4908} 4909 4910define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) { 4911; SSE2-LABEL: sext_4i8_to_4i64: 4912; SSE2: # %bb.0: 4913; SSE2-NEXT: pslld $24, %xmm0 4914; SSE2-NEXT: psrad $24, %xmm0 4915; SSE2-NEXT: movdqa %xmm0, %xmm2 4916; SSE2-NEXT: psrad $31, %xmm2 4917; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4918; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4919; SSE2-NEXT: movdqa %xmm1, %xmm2 4920; SSE2-NEXT: psrad $31, %xmm2 4921; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4922; SSE2-NEXT: retq 4923; 4924; SSSE3-LABEL: sext_4i8_to_4i64: 4925; SSSE3: # %bb.0: 4926; SSSE3-NEXT: pslld $24, %xmm0 4927; SSSE3-NEXT: psrad $24, %xmm0 4928; SSSE3-NEXT: movdqa %xmm0, %xmm2 4929; SSSE3-NEXT: psrad $31, %xmm2 4930; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4931; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4932; SSSE3-NEXT: movdqa %xmm1, %xmm2 4933; SSSE3-NEXT: psrad $31, %xmm2 4934; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4935; SSSE3-NEXT: retq 4936; 4937; SSE41-LABEL: sext_4i8_to_4i64: 4938; SSE41: # %bb.0: 4939; SSE41-NEXT: pslld $24, %xmm0 4940; SSE41-NEXT: psrad $24, %xmm0 4941; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4942; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4943; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4944; SSE41-NEXT: movdqa %xmm2, %xmm0 4945; SSE41-NEXT: retq 4946; 4947; AVX1-LABEL: sext_4i8_to_4i64: 4948; AVX1: # %bb.0: 4949; AVX1-NEXT: vpslld $24, %xmm0, %xmm0 4950; AVX1-NEXT: vpsrad $24, %xmm0, %xmm0 4951; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 4952; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4953; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 4954; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4955; AVX1-NEXT: retq 4956; 4957; AVX2-LABEL: sext_4i8_to_4i64: 4958; AVX2: # %bb.0: 4959; AVX2-NEXT: vpslld $24, %xmm0, %xmm0 4960; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0 4961; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 4962; AVX2-NEXT: retq 4963; 4964; AVX512-LABEL: sext_4i8_to_4i64: 4965; AVX512: # %bb.0: 4966; AVX512-NEXT: vpslld $24, %xmm0, %xmm0 4967; AVX512-NEXT: vpsrad $24, %xmm0, %xmm0 4968; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 4969; AVX512-NEXT: retq 4970; 4971; X32-SSE41-LABEL: sext_4i8_to_4i64: 4972; X32-SSE41: # %bb.0: 4973; X32-SSE41-NEXT: pslld $24, %xmm0 4974; X32-SSE41-NEXT: psrad $24, %xmm0 4975; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4976; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4977; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4978; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 4979; X32-SSE41-NEXT: retl 4980 %extmask = sext <4 x i8> %mask to <4 x i64> 4981 ret <4 x i64> %extmask 4982} 4983 4984define <32 x i8> @sext_32xi1_to_32xi8(<32 x i16> %c1, <32 x i16> %c2)nounwind { 4985; SSE-LABEL: sext_32xi1_to_32xi8: 4986; SSE: # %bb.0: 4987; SSE-NEXT: pcmpeqw %xmm5, %xmm1 4988; SSE-NEXT: pcmpeqw %xmm4, %xmm0 4989; SSE-NEXT: packsswb %xmm1, %xmm0 4990; SSE-NEXT: pcmpeqw %xmm7, %xmm3 4991; SSE-NEXT: pcmpeqw %xmm6, %xmm2 4992; SSE-NEXT: packsswb %xmm3, %xmm2 4993; SSE-NEXT: movdqa %xmm2, %xmm1 4994; SSE-NEXT: retq 4995; 4996; AVX1-LABEL: sext_32xi1_to_32xi8: 4997; AVX1: # %bb.0: 4998; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 4999; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 5000; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4 5001; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 5002; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1 5003; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 5004; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 5005; AVX1-NEXT: vpcmpeqw %xmm3, %xmm4, %xmm3 5006; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 5007; AVX1-NEXT: vpacksswb %xmm3, %xmm0, %xmm0 5008; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 5009; AVX1-NEXT: retq 5010; 5011; AVX2-LABEL: sext_32xi1_to_32xi8: 5012; AVX2: # %bb.0: 5013; AVX2-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 5014; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 5015; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 5016; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5017; AVX2-NEXT: retq 5018; 5019; AVX512F-LABEL: sext_32xi1_to_32xi8: 5020; AVX512F: # %bb.0: 5021; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 5022; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 5023; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 5024; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 5025; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 5026; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 5027; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 5028; AVX512F-NEXT: retq 5029; 5030; AVX512BW-LABEL: sext_32xi1_to_32xi8: 5031; AVX512BW: # %bb.0: 5032; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 5033; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 5034; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 5035; AVX512BW-NEXT: retq 5036; 5037; X32-SSE41-LABEL: sext_32xi1_to_32xi8: 5038; X32-SSE41: # %bb.0: 5039; X32-SSE41-NEXT: pushl %ebp 5040; X32-SSE41-NEXT: movl %esp, %ebp 5041; X32-SSE41-NEXT: andl $-16, %esp 5042; X32-SSE41-NEXT: subl $16, %esp 5043; X32-SSE41-NEXT: movdqa 8(%ebp), %xmm3 5044; X32-SSE41-NEXT: pcmpeqw 40(%ebp), %xmm1 5045; X32-SSE41-NEXT: pcmpeqw 24(%ebp), %xmm0 5046; X32-SSE41-NEXT: packsswb %xmm1, %xmm0 5047; X32-SSE41-NEXT: pcmpeqw 72(%ebp), %xmm3 5048; X32-SSE41-NEXT: pcmpeqw 56(%ebp), %xmm2 5049; X32-SSE41-NEXT: packsswb %xmm3, %xmm2 5050; X32-SSE41-NEXT: movdqa %xmm2, %xmm1 5051; X32-SSE41-NEXT: movl %ebp, %esp 5052; X32-SSE41-NEXT: popl %ebp 5053; X32-SSE41-NEXT: retl 5054 %a = icmp eq <32 x i16> %c1, %c2 5055 %b = sext <32 x i1> %a to <32 x i8> 5056 ret <32 x i8> %b 5057} 5058 5059define <2 x i32> @sext_2i8_to_2i32(<2 x i8>* %addr) { 5060; SSE2-LABEL: sext_2i8_to_2i32: 5061; SSE2: # %bb.0: 5062; SSE2-NEXT: movzwl (%rdi), %eax 5063; SSE2-NEXT: movd %eax, %xmm0 5064; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 5065; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 5066; SSE2-NEXT: psrad $24, %xmm0 5067; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 5068; SSE2-NEXT: paddq %xmm0, %xmm0 5069; SSE2-NEXT: retq 5070; 5071; SSSE3-LABEL: sext_2i8_to_2i32: 5072; SSSE3: # %bb.0: 5073; SSSE3-NEXT: movzwl (%rdi), %eax 5074; SSSE3-NEXT: movd %eax, %xmm0 5075; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,0,u,u,u,1,u,u,u,u,u,u,u,u] 5076; SSSE3-NEXT: psrad $24, %xmm0 5077; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 5078; SSSE3-NEXT: paddq %xmm0, %xmm0 5079; SSSE3-NEXT: retq 5080; 5081; SSE41-LABEL: sext_2i8_to_2i32: 5082; SSE41: # %bb.0: 5083; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 5084; SSE41-NEXT: paddq %xmm0, %xmm0 5085; SSE41-NEXT: retq 5086; 5087; AVX-LABEL: sext_2i8_to_2i32: 5088; AVX: # %bb.0: 5089; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 5090; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 5091; AVX-NEXT: retq 5092; 5093; X32-SSE41-LABEL: sext_2i8_to_2i32: 5094; X32-SSE41: # %bb.0: 5095; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 5096; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 5097; X32-SSE41-NEXT: paddq %xmm0, %xmm0 5098; X32-SSE41-NEXT: retl 5099 %x = load <2 x i8>, <2 x i8>* %addr, align 1 5100 %y = sext <2 x i8> %x to <2 x i32> 5101 %z = add <2 x i32>%y, %y 5102 ret <2 x i32>%z 5103} 5104 5105