1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 8; 9; Just one 32-bit run to make sure we do reasonable things there. 10; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41 11 12define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 13; SSE2-LABEL: sext_16i8_to_8i16: 14; SSE2: # BB#0: # %entry 15; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 16; SSE2-NEXT: psraw $8, %xmm0 17; SSE2-NEXT: retq 18; 19; SSSE3-LABEL: sext_16i8_to_8i16: 20; SSSE3: # BB#0: # %entry 21; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 22; SSSE3-NEXT: psraw $8, %xmm0 23; SSSE3-NEXT: retq 24; 25; SSE41-LABEL: sext_16i8_to_8i16: 26; SSE41: # BB#0: # %entry 27; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 28; SSE41-NEXT: retq 29; 30; AVX-LABEL: sext_16i8_to_8i16: 31; AVX: # BB#0: # %entry 32; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 33; AVX-NEXT: retq 34; 35; X32-SSE41-LABEL: sext_16i8_to_8i16: 36; X32-SSE41: # BB#0: # %entry 37; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 38; X32-SSE41-NEXT: retl 39entry: 40 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 41 %C = sext <8 x i8> %B to <8 x i16> 42 ret <8 x i16> %C 43} 44 45define <16 x i16> @sext_16i8_to_16i16(<16 x i8> %A) nounwind uwtable readnone ssp { 46; SSE2-LABEL: sext_16i8_to_16i16: 47; SSE2: # BB#0: # %entry 48; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 49; SSE2-NEXT: psraw $8, %xmm2 50; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 51; SSE2-NEXT: psraw $8, %xmm1 52; SSE2-NEXT: movdqa %xmm2, %xmm0 53; SSE2-NEXT: retq 54; 55; SSSE3-LABEL: sext_16i8_to_16i16: 56; SSSE3: # BB#0: # %entry 57; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 58; SSSE3-NEXT: psraw $8, %xmm2 59; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 60; SSSE3-NEXT: psraw $8, %xmm1 61; SSSE3-NEXT: movdqa %xmm2, %xmm0 62; SSSE3-NEXT: retq 63; 64; SSE41-LABEL: sext_16i8_to_16i16: 65; SSE41: # BB#0: # %entry 66; SSE41-NEXT: pmovsxbw %xmm0, %xmm2 67; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 68; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 69; SSE41-NEXT: movdqa %xmm2, %xmm0 70; SSE41-NEXT: retq 71; 72; AVX1-LABEL: sext_16i8_to_16i16: 73; AVX1: # BB#0: # %entry 74; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 75; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 76; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 77; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 78; AVX1-NEXT: retq 79; 80; AVX2-LABEL: sext_16i8_to_16i16: 81; AVX2: # BB#0: # %entry 82; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 83; AVX2-NEXT: retq 84; 85; AVX512-LABEL: sext_16i8_to_16i16: 86; AVX512: # BB#0: # %entry 87; AVX512-NEXT: vpmovsxbw %xmm0, %ymm0 88; AVX512-NEXT: retq 89; 90; X32-SSE41-LABEL: sext_16i8_to_16i16: 91; X32-SSE41: # BB#0: # %entry 92; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm2 93; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 94; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm1 95; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 96; X32-SSE41-NEXT: retl 97entry: 98 %B = sext <16 x i8> %A to <16 x i16> 99 ret <16 x i16> %B 100} 101 102define <4 x i32> @sext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 103; SSE2-LABEL: sext_16i8_to_4i32: 104; SSE2: # BB#0: # %entry 105; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 106; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 107; SSE2-NEXT: psrad $24, %xmm0 108; SSE2-NEXT: retq 109; 110; SSSE3-LABEL: sext_16i8_to_4i32: 111; SSSE3: # BB#0: # %entry 112; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 113; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 114; SSSE3-NEXT: psrad $24, %xmm0 115; SSSE3-NEXT: retq 116; 117; SSE41-LABEL: sext_16i8_to_4i32: 118; SSE41: # BB#0: # %entry 119; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 120; SSE41-NEXT: retq 121; 122; AVX-LABEL: sext_16i8_to_4i32: 123; AVX: # BB#0: # %entry 124; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 125; AVX-NEXT: retq 126; 127; X32-SSE41-LABEL: sext_16i8_to_4i32: 128; X32-SSE41: # BB#0: # %entry 129; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 130; X32-SSE41-NEXT: retl 131entry: 132 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 133 %C = sext <4 x i8> %B to <4 x i32> 134 ret <4 x i32> %C 135} 136 137define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 138; SSE2-LABEL: sext_16i8_to_8i32: 139; SSE2: # BB#0: # %entry 140; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 141; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 142; SSE2-NEXT: psrad $24, %xmm2 143; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 144; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 145; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 146; SSE2-NEXT: psrad $24, %xmm1 147; SSE2-NEXT: movdqa %xmm2, %xmm0 148; SSE2-NEXT: retq 149; 150; SSSE3-LABEL: sext_16i8_to_8i32: 151; SSSE3: # BB#0: # %entry 152; SSSE3-NEXT: movdqa %xmm0, %xmm1 153; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 154; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 155; SSSE3-NEXT: psrad $24, %xmm0 156; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,4,u,u,u,5,u,u,u,6,u,u,u,7] 157; SSSE3-NEXT: psrad $24, %xmm1 158; SSSE3-NEXT: retq 159; 160; SSE41-LABEL: sext_16i8_to_8i32: 161; SSE41: # BB#0: # %entry 162; SSE41-NEXT: pmovsxbd %xmm0, %xmm2 163; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 164; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 165; SSE41-NEXT: movdqa %xmm2, %xmm0 166; SSE41-NEXT: retq 167; 168; AVX1-LABEL: sext_16i8_to_8i32: 169; AVX1: # BB#0: # %entry 170; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 171; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 172; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 173; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 174; AVX1-NEXT: retq 175; 176; AVX2-LABEL: sext_16i8_to_8i32: 177; AVX2: # BB#0: # %entry 178; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 179; AVX2-NEXT: retq 180; 181; AVX512-LABEL: sext_16i8_to_8i32: 182; AVX512: # BB#0: # %entry 183; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0 184; AVX512-NEXT: retq 185; 186; X32-SSE41-LABEL: sext_16i8_to_8i32: 187; X32-SSE41: # BB#0: # %entry 188; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm2 189; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 190; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm1 191; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 192; X32-SSE41-NEXT: retl 193entry: 194 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 195 %C = sext <8 x i8> %B to <8 x i32> 196 ret <8 x i32> %C 197} 198 199define <2 x i64> @sext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 200; SSE2-LABEL: sext_16i8_to_2i64: 201; SSE2: # BB#0: # %entry 202; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 203; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 204; SSE2-NEXT: movdqa %xmm0, %xmm1 205; SSE2-NEXT: psrad $31, %xmm1 206; SSE2-NEXT: psrad $24, %xmm0 207; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 208; SSE2-NEXT: retq 209; 210; SSSE3-LABEL: sext_16i8_to_2i64: 211; SSSE3: # BB#0: # %entry 212; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 213; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 214; SSSE3-NEXT: movdqa %xmm0, %xmm1 215; SSSE3-NEXT: psrad $31, %xmm1 216; SSSE3-NEXT: psrad $24, %xmm0 217; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 218; SSSE3-NEXT: retq 219; 220; SSE41-LABEL: sext_16i8_to_2i64: 221; SSE41: # BB#0: # %entry 222; SSE41-NEXT: pmovsxbq %xmm0, %xmm0 223; SSE41-NEXT: retq 224; 225; AVX-LABEL: sext_16i8_to_2i64: 226; AVX: # BB#0: # %entry 227; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 228; AVX-NEXT: retq 229; 230; X32-SSE41-LABEL: sext_16i8_to_2i64: 231; X32-SSE41: # BB#0: # %entry 232; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm0 233; X32-SSE41-NEXT: retl 234entry: 235 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 236 %C = sext <2 x i8> %B to <2 x i64> 237 ret <2 x i64> %C 238} 239 240define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 241; SSE2-LABEL: sext_16i8_to_4i64: 242; SSE2: # BB#0: # %entry 243; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 244; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 245; SSE2-NEXT: movdqa %xmm2, %xmm1 246; SSE2-NEXT: psrad $31, %xmm1 247; SSE2-NEXT: psrad $24, %xmm2 248; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 249; SSE2-NEXT: psrld $16, %xmm0 250; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 251; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 252; SSE2-NEXT: movdqa %xmm1, %xmm0 253; SSE2-NEXT: psrad $31, %xmm0 254; SSE2-NEXT: psrad $24, %xmm1 255; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 256; SSE2-NEXT: movdqa %xmm2, %xmm0 257; SSE2-NEXT: retq 258; 259; SSSE3-LABEL: sext_16i8_to_4i64: 260; SSSE3: # BB#0: # %entry 261; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 262; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 263; SSSE3-NEXT: movdqa %xmm2, %xmm1 264; SSSE3-NEXT: psrad $31, %xmm1 265; SSSE3-NEXT: psrad $24, %xmm2 266; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 267; SSSE3-NEXT: psrld $16, %xmm0 268; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 269; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 270; SSSE3-NEXT: movdqa %xmm1, %xmm0 271; SSSE3-NEXT: psrad $31, %xmm0 272; SSSE3-NEXT: psrad $24, %xmm1 273; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 274; SSSE3-NEXT: movdqa %xmm2, %xmm0 275; SSSE3-NEXT: retq 276; 277; SSE41-LABEL: sext_16i8_to_4i64: 278; SSE41: # BB#0: # %entry 279; SSE41-NEXT: pmovsxbq %xmm0, %xmm2 280; SSE41-NEXT: psrld $16, %xmm0 281; SSE41-NEXT: pmovsxbq %xmm0, %xmm1 282; SSE41-NEXT: movdqa %xmm2, %xmm0 283; SSE41-NEXT: retq 284; 285; AVX1-LABEL: sext_16i8_to_4i64: 286; AVX1: # BB#0: # %entry 287; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 288; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 289; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 290; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 291; AVX1-NEXT: retq 292; 293; AVX2-LABEL: sext_16i8_to_4i64: 294; AVX2: # BB#0: # %entry 295; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0 296; AVX2-NEXT: retq 297; 298; AVX512-LABEL: sext_16i8_to_4i64: 299; AVX512: # BB#0: # %entry 300; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0 301; AVX512-NEXT: retq 302; 303; X32-SSE41-LABEL: sext_16i8_to_4i64: 304; X32-SSE41: # BB#0: # %entry 305; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm2 306; X32-SSE41-NEXT: psrld $16, %xmm0 307; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm1 308; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 309; X32-SSE41-NEXT: retl 310entry: 311 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 312 %C = sext <4 x i8> %B to <4 x i64> 313 ret <4 x i64> %C 314} 315 316define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp { 317; SSE2-LABEL: sext_16i8_to_8i64: 318; SSE2: # BB#0: # %entry 319; SSE2-NEXT: movdqa %xmm0, %xmm1 320; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 321; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 322; SSE2-NEXT: movdqa %xmm0, %xmm2 323; SSE2-NEXT: psrad $31, %xmm2 324; SSE2-NEXT: psrad $24, %xmm0 325; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 326; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 327; SSE2-NEXT: psrld $16, %xmm1 328; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 329; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 330; SSE2-NEXT: movdqa %xmm1, %xmm2 331; SSE2-NEXT: psrad $31, %xmm2 332; SSE2-NEXT: psrad $24, %xmm1 333; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 334; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] 335; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] 336; SSE2-NEXT: movdqa %xmm2, %xmm4 337; SSE2-NEXT: psrad $31, %xmm4 338; SSE2-NEXT: psrad $24, %xmm2 339; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 340; SSE2-NEXT: psrld $16, %xmm3 341; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 342; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3] 343; SSE2-NEXT: movdqa %xmm3, %xmm4 344; SSE2-NEXT: psrad $31, %xmm4 345; SSE2-NEXT: psrad $24, %xmm3 346; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 347; SSE2-NEXT: retq 348; 349; SSSE3-LABEL: sext_16i8_to_8i64: 350; SSSE3: # BB#0: # %entry 351; SSSE3-NEXT: movdqa %xmm0, %xmm1 352; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 353; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 354; SSSE3-NEXT: movdqa %xmm0, %xmm2 355; SSSE3-NEXT: psrad $31, %xmm2 356; SSSE3-NEXT: psrad $24, %xmm0 357; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 358; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 359; SSSE3-NEXT: psrld $16, %xmm1 360; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 361; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 362; SSSE3-NEXT: movdqa %xmm1, %xmm2 363; SSSE3-NEXT: psrad $31, %xmm2 364; SSSE3-NEXT: psrad $24, %xmm1 365; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 366; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] 367; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] 368; SSSE3-NEXT: movdqa %xmm2, %xmm4 369; SSSE3-NEXT: psrad $31, %xmm4 370; SSSE3-NEXT: psrad $24, %xmm2 371; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 372; SSSE3-NEXT: psrld $16, %xmm3 373; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 374; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3] 375; SSSE3-NEXT: movdqa %xmm3, %xmm4 376; SSSE3-NEXT: psrad $31, %xmm4 377; SSSE3-NEXT: psrad $24, %xmm3 378; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 379; SSSE3-NEXT: retq 380; 381; SSE41-LABEL: sext_16i8_to_8i64: 382; SSE41: # BB#0: # %entry 383; SSE41-NEXT: pmovsxbq %xmm0, %xmm4 384; SSE41-NEXT: movdqa %xmm0, %xmm1 385; SSE41-NEXT: psrld $16, %xmm1 386; SSE41-NEXT: pmovsxbq %xmm1, %xmm1 387; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 388; SSE41-NEXT: pmovsxbq %xmm2, %xmm2 389; SSE41-NEXT: psrlq $48, %xmm0 390; SSE41-NEXT: pmovsxbq %xmm0, %xmm3 391; SSE41-NEXT: movdqa %xmm4, %xmm0 392; SSE41-NEXT: retq 393; 394; AVX1-LABEL: sext_16i8_to_8i64: 395; AVX1: # BB#0: # %entry 396; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 397; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 398; AVX1-NEXT: vpmovsxbq %xmm2, %xmm2 399; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 400; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 401; AVX1-NEXT: vpmovsxbq %xmm1, %xmm1 402; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0 403; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 404; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 405; AVX1-NEXT: vmovaps %ymm2, %ymm0 406; AVX1-NEXT: retq 407; 408; AVX2-LABEL: sext_16i8_to_8i64: 409; AVX2: # BB#0: # %entry 410; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 411; AVX2-NEXT: vpslld $24, %xmm1, %xmm1 412; AVX2-NEXT: vpsrad $24, %xmm1, %xmm1 413; AVX2-NEXT: vpmovsxdq %xmm1, %ymm2 414; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 415; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 416; AVX2-NEXT: vpslld $24, %xmm0, %xmm0 417; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0 418; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1 419; AVX2-NEXT: vmovdqa %ymm2, %ymm0 420; AVX2-NEXT: retq 421; 422; AVX512-LABEL: sext_16i8_to_8i64: 423; AVX512: # BB#0: # %entry 424; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 425; AVX512-NEXT: vpsllq $56, %zmm0, %zmm0 426; AVX512-NEXT: vpsraq $56, %zmm0, %zmm0 427; AVX512-NEXT: retq 428; 429; X32-SSE41-LABEL: sext_16i8_to_8i64: 430; X32-SSE41: # BB#0: # %entry 431; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm4 432; X32-SSE41-NEXT: movdqa %xmm0, %xmm1 433; X32-SSE41-NEXT: psrld $16, %xmm1 434; X32-SSE41-NEXT: pmovsxbq %xmm1, %xmm1 435; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 436; X32-SSE41-NEXT: pmovsxbq %xmm2, %xmm2 437; X32-SSE41-NEXT: psrlq $48, %xmm0 438; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm3 439; X32-SSE41-NEXT: movdqa %xmm4, %xmm0 440; X32-SSE41-NEXT: retl 441entry: 442 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 443 %C = sext <8 x i8> %B to <8 x i64> 444 ret <8 x i64> %C 445} 446 447define <4 x i32> @sext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 448; SSE2-LABEL: sext_8i16_to_4i32: 449; SSE2: # BB#0: # %entry 450; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 451; SSE2-NEXT: psrad $16, %xmm0 452; SSE2-NEXT: retq 453; 454; SSSE3-LABEL: sext_8i16_to_4i32: 455; SSSE3: # BB#0: # %entry 456; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 457; SSSE3-NEXT: psrad $16, %xmm0 458; SSSE3-NEXT: retq 459; 460; SSE41-LABEL: sext_8i16_to_4i32: 461; SSE41: # BB#0: # %entry 462; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 463; SSE41-NEXT: retq 464; 465; AVX-LABEL: sext_8i16_to_4i32: 466; AVX: # BB#0: # %entry 467; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 468; AVX-NEXT: retq 469; 470; X32-SSE41-LABEL: sext_8i16_to_4i32: 471; X32-SSE41: # BB#0: # %entry 472; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm0 473; X32-SSE41-NEXT: retl 474entry: 475 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 476 %C = sext <4 x i16> %B to <4 x i32> 477 ret <4 x i32> %C 478} 479 480define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 481; SSE2-LABEL: sext_8i16_to_8i32: 482; SSE2: # BB#0: # %entry 483; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 484; SSE2-NEXT: psrad $16, %xmm2 485; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 486; SSE2-NEXT: psrad $16, %xmm1 487; SSE2-NEXT: movdqa %xmm2, %xmm0 488; SSE2-NEXT: retq 489; 490; SSSE3-LABEL: sext_8i16_to_8i32: 491; SSSE3: # BB#0: # %entry 492; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 493; SSSE3-NEXT: psrad $16, %xmm2 494; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 495; SSSE3-NEXT: psrad $16, %xmm1 496; SSSE3-NEXT: movdqa %xmm2, %xmm0 497; SSSE3-NEXT: retq 498; 499; SSE41-LABEL: sext_8i16_to_8i32: 500; SSE41: # BB#0: # %entry 501; SSE41-NEXT: pmovsxwd %xmm0, %xmm2 502; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 503; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 504; SSE41-NEXT: movdqa %xmm2, %xmm0 505; SSE41-NEXT: retq 506; 507; AVX1-LABEL: sext_8i16_to_8i32: 508; AVX1: # BB#0: # %entry 509; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 510; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 511; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 512; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 513; AVX1-NEXT: retq 514; 515; AVX2-LABEL: sext_8i16_to_8i32: 516; AVX2: # BB#0: # %entry 517; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 518; AVX2-NEXT: retq 519; 520; AVX512-LABEL: sext_8i16_to_8i32: 521; AVX512: # BB#0: # %entry 522; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 523; AVX512-NEXT: retq 524; 525; X32-SSE41-LABEL: sext_8i16_to_8i32: 526; X32-SSE41: # BB#0: # %entry 527; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm2 528; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 529; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm1 530; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 531; X32-SSE41-NEXT: retl 532entry: 533 %B = sext <8 x i16> %A to <8 x i32> 534 ret <8 x i32> %B 535} 536 537define <2 x i64> @sext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 538; SSE2-LABEL: sext_8i16_to_2i64: 539; SSE2: # BB#0: # %entry 540; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 541; SSE2-NEXT: movdqa %xmm0, %xmm1 542; SSE2-NEXT: psrad $31, %xmm1 543; SSE2-NEXT: psrad $16, %xmm0 544; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 545; SSE2-NEXT: retq 546; 547; SSSE3-LABEL: sext_8i16_to_2i64: 548; SSSE3: # BB#0: # %entry 549; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 550; SSSE3-NEXT: movdqa %xmm0, %xmm1 551; SSSE3-NEXT: psrad $31, %xmm1 552; SSSE3-NEXT: psrad $16, %xmm0 553; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 554; SSSE3-NEXT: retq 555; 556; SSE41-LABEL: sext_8i16_to_2i64: 557; SSE41: # BB#0: # %entry 558; SSE41-NEXT: pmovsxwq %xmm0, %xmm0 559; SSE41-NEXT: retq 560; 561; AVX-LABEL: sext_8i16_to_2i64: 562; AVX: # BB#0: # %entry 563; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 564; AVX-NEXT: retq 565; 566; X32-SSE41-LABEL: sext_8i16_to_2i64: 567; X32-SSE41: # BB#0: # %entry 568; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm0 569; X32-SSE41-NEXT: retl 570entry: 571 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 572 %C = sext <2 x i16> %B to <2 x i64> 573 ret <2 x i64> %C 574} 575 576define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 577; SSE2-LABEL: sext_8i16_to_4i64: 578; SSE2: # BB#0: # %entry 579; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 580; SSE2-NEXT: movdqa %xmm2, %xmm1 581; SSE2-NEXT: psrad $31, %xmm1 582; SSE2-NEXT: psrad $16, %xmm2 583; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 584; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 585; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 586; SSE2-NEXT: movdqa %xmm1, %xmm0 587; SSE2-NEXT: psrad $31, %xmm0 588; SSE2-NEXT: psrad $16, %xmm1 589; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 590; SSE2-NEXT: movdqa %xmm2, %xmm0 591; SSE2-NEXT: retq 592; 593; SSSE3-LABEL: sext_8i16_to_4i64: 594; SSSE3: # BB#0: # %entry 595; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 596; SSSE3-NEXT: movdqa %xmm2, %xmm1 597; SSSE3-NEXT: psrad $31, %xmm1 598; SSSE3-NEXT: psrad $16, %xmm2 599; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 600; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 601; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 602; SSSE3-NEXT: movdqa %xmm1, %xmm0 603; SSSE3-NEXT: psrad $31, %xmm0 604; SSSE3-NEXT: psrad $16, %xmm1 605; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 606; SSSE3-NEXT: movdqa %xmm2, %xmm0 607; SSSE3-NEXT: retq 608; 609; SSE41-LABEL: sext_8i16_to_4i64: 610; SSE41: # BB#0: # %entry 611; SSE41-NEXT: pmovsxwq %xmm0, %xmm2 612; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 613; SSE41-NEXT: pmovsxwq %xmm0, %xmm1 614; SSE41-NEXT: movdqa %xmm2, %xmm0 615; SSE41-NEXT: retq 616; 617; AVX1-LABEL: sext_8i16_to_4i64: 618; AVX1: # BB#0: # %entry 619; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 620; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 621; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 622; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 623; AVX1-NEXT: retq 624; 625; AVX2-LABEL: sext_8i16_to_4i64: 626; AVX2: # BB#0: # %entry 627; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 628; AVX2-NEXT: retq 629; 630; AVX512-LABEL: sext_8i16_to_4i64: 631; AVX512: # BB#0: # %entry 632; AVX512-NEXT: vpmovsxwq %xmm0, %ymm0 633; AVX512-NEXT: retq 634; 635; X32-SSE41-LABEL: sext_8i16_to_4i64: 636; X32-SSE41: # BB#0: # %entry 637; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm2 638; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 639; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm1 640; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 641; X32-SSE41-NEXT: retl 642entry: 643 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 644 %C = sext <4 x i16> %B to <4 x i64> 645 ret <4 x i64> %C 646} 647 648define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 649; SSE2-LABEL: sext_4i32_to_2i64: 650; SSE2: # BB#0: # %entry 651; SSE2-NEXT: movdqa %xmm0, %xmm1 652; SSE2-NEXT: psrad $31, %xmm1 653; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 654; SSE2-NEXT: retq 655; 656; SSSE3-LABEL: sext_4i32_to_2i64: 657; SSSE3: # BB#0: # %entry 658; SSSE3-NEXT: movdqa %xmm0, %xmm1 659; SSSE3-NEXT: psrad $31, %xmm1 660; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 661; SSSE3-NEXT: retq 662; 663; SSE41-LABEL: sext_4i32_to_2i64: 664; SSE41: # BB#0: # %entry 665; SSE41-NEXT: pmovsxdq %xmm0, %xmm0 666; SSE41-NEXT: retq 667; 668; AVX-LABEL: sext_4i32_to_2i64: 669; AVX: # BB#0: # %entry 670; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 671; AVX-NEXT: retq 672; 673; X32-SSE41-LABEL: sext_4i32_to_2i64: 674; X32-SSE41: # BB#0: # %entry 675; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm0 676; X32-SSE41-NEXT: retl 677entry: 678 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 679 %C = sext <2 x i32> %B to <2 x i64> 680 ret <2 x i64> %C 681} 682 683define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 684; SSE2-LABEL: sext_4i32_to_4i64: 685; SSE2: # BB#0: # %entry 686; SSE2-NEXT: movdqa %xmm0, %xmm2 687; SSE2-NEXT: psrad $31, %xmm2 688; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 689; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 690; SSE2-NEXT: movdqa %xmm1, %xmm2 691; SSE2-NEXT: psrad $31, %xmm2 692; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 693; SSE2-NEXT: retq 694; 695; SSSE3-LABEL: sext_4i32_to_4i64: 696; SSSE3: # BB#0: # %entry 697; SSSE3-NEXT: movdqa %xmm0, %xmm2 698; SSSE3-NEXT: psrad $31, %xmm2 699; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 700; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 701; SSSE3-NEXT: movdqa %xmm1, %xmm2 702; SSSE3-NEXT: psrad $31, %xmm2 703; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 704; SSSE3-NEXT: retq 705; 706; SSE41-LABEL: sext_4i32_to_4i64: 707; SSE41: # BB#0: # %entry 708; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 709; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 710; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 711; SSE41-NEXT: movdqa %xmm2, %xmm0 712; SSE41-NEXT: retq 713; 714; AVX1-LABEL: sext_4i32_to_4i64: 715; AVX1: # BB#0: # %entry 716; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 717; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 718; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 719; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 720; AVX1-NEXT: retq 721; 722; AVX2-LABEL: sext_4i32_to_4i64: 723; AVX2: # BB#0: # %entry 724; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 725; AVX2-NEXT: retq 726; 727; AVX512-LABEL: sext_4i32_to_4i64: 728; AVX512: # BB#0: # %entry 729; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 730; AVX512-NEXT: retq 731; 732; X32-SSE41-LABEL: sext_4i32_to_4i64: 733; X32-SSE41: # BB#0: # %entry 734; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 735; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 736; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 737; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 738; X32-SSE41-NEXT: retl 739entry: 740 %B = sext <4 x i32> %A to <4 x i64> 741 ret <4 x i64> %B 742} 743 744define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { 745; SSE-LABEL: load_sext_2i1_to_2i64: 746; SSE: # BB#0: # %entry 747; SSE-NEXT: movzbl (%rdi), %eax 748; SSE-NEXT: movq %rax, %rcx 749; SSE-NEXT: shlq $62, %rcx 750; SSE-NEXT: sarq $63, %rcx 751; SSE-NEXT: movd %rcx, %xmm1 752; SSE-NEXT: shlq $63, %rax 753; SSE-NEXT: sarq $63, %rax 754; SSE-NEXT: movd %rax, %xmm0 755; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 756; SSE-NEXT: retq 757; 758; AVX1-LABEL: load_sext_2i1_to_2i64: 759; AVX1: # BB#0: # %entry 760; AVX1-NEXT: movzbl (%rdi), %eax 761; AVX1-NEXT: movq %rax, %rcx 762; AVX1-NEXT: shlq $62, %rcx 763; AVX1-NEXT: sarq $63, %rcx 764; AVX1-NEXT: vmovq %rcx, %xmm0 765; AVX1-NEXT: shlq $63, %rax 766; AVX1-NEXT: sarq $63, %rax 767; AVX1-NEXT: vmovq %rax, %xmm1 768; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 769; AVX1-NEXT: retq 770; 771; AVX2-LABEL: load_sext_2i1_to_2i64: 772; AVX2: # BB#0: # %entry 773; AVX2-NEXT: movzbl (%rdi), %eax 774; AVX2-NEXT: movq %rax, %rcx 775; AVX2-NEXT: shlq $62, %rcx 776; AVX2-NEXT: sarq $63, %rcx 777; AVX2-NEXT: vmovq %rcx, %xmm0 778; AVX2-NEXT: shlq $63, %rax 779; AVX2-NEXT: sarq $63, %rax 780; AVX2-NEXT: vmovq %rax, %xmm1 781; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 782; AVX2-NEXT: retq 783; 784; AVX512-LABEL: load_sext_2i1_to_2i64: 785; AVX512: # BB#0: # %entry 786; AVX512-NEXT: movzbl (%rdi), %eax 787; AVX512-NEXT: kmovw %eax, %k1 788; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 789; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 790; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 791; AVX512-NEXT: retq 792; 793; X32-SSE41-LABEL: load_sext_2i1_to_2i64: 794; X32-SSE41: # BB#0: # %entry 795; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 796; X32-SSE41-NEXT: movzbl (%eax), %eax 797; X32-SSE41-NEXT: movl %eax, %ecx 798; X32-SSE41-NEXT: shll $31, %ecx 799; X32-SSE41-NEXT: sarl $31, %ecx 800; X32-SSE41-NEXT: movd %ecx, %xmm0 801; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 802; X32-SSE41-NEXT: shll $30, %eax 803; X32-SSE41-NEXT: sarl $31, %eax 804; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0 805; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 806; X32-SSE41-NEXT: retl 807entry: 808 %X = load <2 x i1>, <2 x i1>* %ptr 809 %Y = sext <2 x i1> %X to <2 x i64> 810 ret <2 x i64> %Y 811} 812 813define <2 x i64> @load_sext_2i8_to_2i64(<2 x i8> *%ptr) { 814; SSE2-LABEL: load_sext_2i8_to_2i64: 815; SSE2: # BB#0: # %entry 816; SSE2-NEXT: movzwl (%rdi), %eax 817; SSE2-NEXT: movd %eax, %xmm0 818; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 819; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 820; SSE2-NEXT: movdqa %xmm0, %xmm1 821; SSE2-NEXT: psrad $31, %xmm1 822; SSE2-NEXT: psrad $24, %xmm0 823; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 824; SSE2-NEXT: retq 825; 826; SSSE3-LABEL: load_sext_2i8_to_2i64: 827; SSSE3: # BB#0: # %entry 828; SSSE3-NEXT: movzwl (%rdi), %eax 829; SSSE3-NEXT: movd %eax, %xmm0 830; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 831; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 832; SSSE3-NEXT: movdqa %xmm0, %xmm1 833; SSSE3-NEXT: psrad $31, %xmm1 834; SSSE3-NEXT: psrad $24, %xmm0 835; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 836; SSSE3-NEXT: retq 837; 838; SSE41-LABEL: load_sext_2i8_to_2i64: 839; SSE41: # BB#0: # %entry 840; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 841; SSE41-NEXT: retq 842; 843; AVX-LABEL: load_sext_2i8_to_2i64: 844; AVX: # BB#0: # %entry 845; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 846; AVX-NEXT: retq 847; 848; X32-SSE41-LABEL: load_sext_2i8_to_2i64: 849; X32-SSE41: # BB#0: # %entry 850; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 851; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 852; X32-SSE41-NEXT: retl 853entry: 854 %X = load <2 x i8>, <2 x i8>* %ptr 855 %Y = sext <2 x i8> %X to <2 x i64> 856 ret <2 x i64> %Y 857} 858 859define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { 860; SSE2-LABEL: load_sext_4i1_to_4i32: 861; SSE2: # BB#0: # %entry 862; SSE2-NEXT: movzbl (%rdi), %eax 863; SSE2-NEXT: movq %rax, %rcx 864; SSE2-NEXT: shlq $60, %rcx 865; SSE2-NEXT: sarq $63, %rcx 866; SSE2-NEXT: movd %ecx, %xmm0 867; SSE2-NEXT: movq %rax, %rcx 868; SSE2-NEXT: shlq $62, %rcx 869; SSE2-NEXT: sarq $63, %rcx 870; SSE2-NEXT: movd %ecx, %xmm1 871; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 872; SSE2-NEXT: movq %rax, %rcx 873; SSE2-NEXT: shlq $61, %rcx 874; SSE2-NEXT: sarq $63, %rcx 875; SSE2-NEXT: movd %ecx, %xmm2 876; SSE2-NEXT: shlq $63, %rax 877; SSE2-NEXT: sarq $63, %rax 878; SSE2-NEXT: movd %eax, %xmm0 879; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 880; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 881; SSE2-NEXT: retq 882; 883; SSSE3-LABEL: load_sext_4i1_to_4i32: 884; SSSE3: # BB#0: # %entry 885; SSSE3-NEXT: movzbl (%rdi), %eax 886; SSSE3-NEXT: movq %rax, %rcx 887; SSSE3-NEXT: shlq $60, %rcx 888; SSSE3-NEXT: sarq $63, %rcx 889; SSSE3-NEXT: movd %ecx, %xmm0 890; SSSE3-NEXT: movq %rax, %rcx 891; SSSE3-NEXT: shlq $62, %rcx 892; SSSE3-NEXT: sarq $63, %rcx 893; SSSE3-NEXT: movd %ecx, %xmm1 894; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 895; SSSE3-NEXT: movq %rax, %rcx 896; SSSE3-NEXT: shlq $61, %rcx 897; SSSE3-NEXT: sarq $63, %rcx 898; SSSE3-NEXT: movd %ecx, %xmm2 899; SSSE3-NEXT: shlq $63, %rax 900; SSSE3-NEXT: sarq $63, %rax 901; SSSE3-NEXT: movd %eax, %xmm0 902; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 903; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 904; SSSE3-NEXT: retq 905; 906; SSE41-LABEL: load_sext_4i1_to_4i32: 907; SSE41: # BB#0: # %entry 908; SSE41-NEXT: movzbl (%rdi), %eax 909; SSE41-NEXT: movq %rax, %rcx 910; SSE41-NEXT: shlq $62, %rcx 911; SSE41-NEXT: sarq $63, %rcx 912; SSE41-NEXT: movq %rax, %rdx 913; SSE41-NEXT: shlq $63, %rdx 914; SSE41-NEXT: sarq $63, %rdx 915; SSE41-NEXT: movd %edx, %xmm0 916; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 917; SSE41-NEXT: movq %rax, %rcx 918; SSE41-NEXT: shlq $61, %rcx 919; SSE41-NEXT: sarq $63, %rcx 920; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 921; SSE41-NEXT: shlq $60, %rax 922; SSE41-NEXT: sarq $63, %rax 923; SSE41-NEXT: pinsrd $3, %eax, %xmm0 924; SSE41-NEXT: retq 925; 926; AVX1-LABEL: load_sext_4i1_to_4i32: 927; AVX1: # BB#0: # %entry 928; AVX1-NEXT: movzbl (%rdi), %eax 929; AVX1-NEXT: movq %rax, %rcx 930; AVX1-NEXT: shlq $62, %rcx 931; AVX1-NEXT: sarq $63, %rcx 932; AVX1-NEXT: movq %rax, %rdx 933; AVX1-NEXT: shlq $63, %rdx 934; AVX1-NEXT: sarq $63, %rdx 935; AVX1-NEXT: vmovd %edx, %xmm0 936; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 937; AVX1-NEXT: movq %rax, %rcx 938; AVX1-NEXT: shlq $61, %rcx 939; AVX1-NEXT: sarq $63, %rcx 940; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 941; AVX1-NEXT: shlq $60, %rax 942; AVX1-NEXT: sarq $63, %rax 943; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 944; AVX1-NEXT: retq 945; 946; AVX2-LABEL: load_sext_4i1_to_4i32: 947; AVX2: # BB#0: # %entry 948; AVX2-NEXT: movzbl (%rdi), %eax 949; AVX2-NEXT: movq %rax, %rcx 950; AVX2-NEXT: shlq $62, %rcx 951; AVX2-NEXT: sarq $63, %rcx 952; AVX2-NEXT: movq %rax, %rdx 953; AVX2-NEXT: shlq $63, %rdx 954; AVX2-NEXT: sarq $63, %rdx 955; AVX2-NEXT: vmovd %edx, %xmm0 956; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 957; AVX2-NEXT: movq %rax, %rcx 958; AVX2-NEXT: shlq $61, %rcx 959; AVX2-NEXT: sarq $63, %rcx 960; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 961; AVX2-NEXT: shlq $60, %rax 962; AVX2-NEXT: sarq $63, %rax 963; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 964; AVX2-NEXT: retq 965; 966; AVX512-LABEL: load_sext_4i1_to_4i32: 967; AVX512: # BB#0: # %entry 968; AVX512-NEXT: movzbl (%rdi), %eax 969; AVX512-NEXT: kmovw %eax, %k1 970; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 971; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 972; AVX512-NEXT: vpmovqd %zmm0, %ymm0 973; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 974; AVX512-NEXT: retq 975; 976; X32-SSE41-LABEL: load_sext_4i1_to_4i32: 977; X32-SSE41: # BB#0: # %entry 978; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 979; X32-SSE41-NEXT: movl (%eax), %eax 980; X32-SSE41-NEXT: movl %eax, %ecx 981; X32-SSE41-NEXT: shll $30, %ecx 982; X32-SSE41-NEXT: sarl $31, %ecx 983; X32-SSE41-NEXT: movl %eax, %edx 984; X32-SSE41-NEXT: shll $31, %edx 985; X32-SSE41-NEXT: sarl $31, %edx 986; X32-SSE41-NEXT: movd %edx, %xmm0 987; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 988; X32-SSE41-NEXT: movl %eax, %ecx 989; X32-SSE41-NEXT: shll $29, %ecx 990; X32-SSE41-NEXT: sarl $31, %ecx 991; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0 992; X32-SSE41-NEXT: shll $28, %eax 993; X32-SSE41-NEXT: sarl $31, %eax 994; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 995; X32-SSE41-NEXT: retl 996entry: 997 %X = load <4 x i1>, <4 x i1>* %ptr 998 %Y = sext <4 x i1> %X to <4 x i32> 999 ret <4 x i32> %Y 1000} 1001 1002define <4 x i32> @load_sext_4i8_to_4i32(<4 x i8> *%ptr) { 1003; SSE2-LABEL: load_sext_4i8_to_4i32: 1004; SSE2: # BB#0: # %entry 1005; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1006; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1007; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1008; SSE2-NEXT: psrad $24, %xmm0 1009; SSE2-NEXT: retq 1010; 1011; SSSE3-LABEL: load_sext_4i8_to_4i32: 1012; SSSE3: # BB#0: # %entry 1013; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1014; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1015; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1016; SSSE3-NEXT: psrad $24, %xmm0 1017; SSSE3-NEXT: retq 1018; 1019; SSE41-LABEL: load_sext_4i8_to_4i32: 1020; SSE41: # BB#0: # %entry 1021; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 1022; SSE41-NEXT: retq 1023; 1024; AVX-LABEL: load_sext_4i8_to_4i32: 1025; AVX: # BB#0: # %entry 1026; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 1027; AVX-NEXT: retq 1028; 1029; X32-SSE41-LABEL: load_sext_4i8_to_4i32: 1030; X32-SSE41: # BB#0: # %entry 1031; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1032; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0 1033; X32-SSE41-NEXT: retl 1034entry: 1035 %X = load <4 x i8>, <4 x i8>* %ptr 1036 %Y = sext <4 x i8> %X to <4 x i32> 1037 ret <4 x i32> %Y 1038} 1039 1040define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { 1041; SSE2-LABEL: load_sext_4i1_to_4i64: 1042; SSE2: # BB#0: # %entry 1043; SSE2-NEXT: movl (%rdi), %eax 1044; SSE2-NEXT: movl %eax, %ecx 1045; SSE2-NEXT: shrl $3, %ecx 1046; SSE2-NEXT: movd %ecx, %xmm0 1047; SSE2-NEXT: movl %eax, %ecx 1048; SSE2-NEXT: shrl %ecx 1049; SSE2-NEXT: movd %ecx, %xmm1 1050; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1051; SSE2-NEXT: movd %eax, %xmm2 1052; SSE2-NEXT: shrl $2, %eax 1053; SSE2-NEXT: movd %eax, %xmm0 1054; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1055; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1056; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 1057; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] 1058; SSE2-NEXT: psllq $63, %xmm0 1059; SSE2-NEXT: psrad $31, %xmm0 1060; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1061; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] 1062; SSE2-NEXT: psllq $63, %xmm1 1063; SSE2-NEXT: psrad $31, %xmm1 1064; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1065; SSE2-NEXT: retq 1066; 1067; SSSE3-LABEL: load_sext_4i1_to_4i64: 1068; SSSE3: # BB#0: # %entry 1069; SSSE3-NEXT: movl (%rdi), %eax 1070; SSSE3-NEXT: movl %eax, %ecx 1071; SSSE3-NEXT: shrl $3, %ecx 1072; SSSE3-NEXT: movd %ecx, %xmm0 1073; SSSE3-NEXT: movl %eax, %ecx 1074; SSSE3-NEXT: shrl %ecx 1075; SSSE3-NEXT: movd %ecx, %xmm1 1076; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1077; SSSE3-NEXT: movd %eax, %xmm2 1078; SSSE3-NEXT: shrl $2, %eax 1079; SSSE3-NEXT: movd %eax, %xmm0 1080; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1081; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1082; SSSE3-NEXT: pand {{.*}}(%rip), %xmm2 1083; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] 1084; SSSE3-NEXT: psllq $63, %xmm0 1085; SSSE3-NEXT: psrad $31, %xmm0 1086; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1087; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] 1088; SSSE3-NEXT: psllq $63, %xmm1 1089; SSSE3-NEXT: psrad $31, %xmm1 1090; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1091; SSSE3-NEXT: retq 1092; 1093; SSE41-LABEL: load_sext_4i1_to_4i64: 1094; SSE41: # BB#0: # %entry 1095; SSE41-NEXT: movl (%rdi), %eax 1096; SSE41-NEXT: movl %eax, %ecx 1097; SSE41-NEXT: shrl %ecx 1098; SSE41-NEXT: movd %eax, %xmm1 1099; SSE41-NEXT: pinsrd $1, %ecx, %xmm1 1100; SSE41-NEXT: movl %eax, %ecx 1101; SSE41-NEXT: shrl $2, %ecx 1102; SSE41-NEXT: pinsrd $2, %ecx, %xmm1 1103; SSE41-NEXT: shrl $3, %eax 1104; SSE41-NEXT: pinsrd $3, %eax, %xmm1 1105; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 1106; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1107; SSE41-NEXT: psllq $63, %xmm0 1108; SSE41-NEXT: psrad $31, %xmm0 1109; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1110; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1111; SSE41-NEXT: psllq $63, %xmm1 1112; SSE41-NEXT: psrad $31, %xmm1 1113; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1114; SSE41-NEXT: retq 1115; 1116; AVX1-LABEL: load_sext_4i1_to_4i64: 1117; AVX1: # BB#0: # %entry 1118; AVX1-NEXT: movzbl (%rdi), %eax 1119; AVX1-NEXT: movq %rax, %rcx 1120; AVX1-NEXT: shlq $62, %rcx 1121; AVX1-NEXT: sarq $63, %rcx 1122; AVX1-NEXT: movq %rax, %rdx 1123; AVX1-NEXT: shlq $63, %rdx 1124; AVX1-NEXT: sarq $63, %rdx 1125; AVX1-NEXT: vmovd %edx, %xmm0 1126; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1127; AVX1-NEXT: movq %rax, %rcx 1128; AVX1-NEXT: shlq $61, %rcx 1129; AVX1-NEXT: sarq $63, %rcx 1130; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 1131; AVX1-NEXT: shlq $60, %rax 1132; AVX1-NEXT: sarq $63, %rax 1133; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1134; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1135; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1136; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1137; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1138; AVX1-NEXT: retq 1139; 1140; AVX2-LABEL: load_sext_4i1_to_4i64: 1141; AVX2: # BB#0: # %entry 1142; AVX2-NEXT: movzbl (%rdi), %eax 1143; AVX2-NEXT: movq %rax, %rcx 1144; AVX2-NEXT: shlq $60, %rcx 1145; AVX2-NEXT: sarq $63, %rcx 1146; AVX2-NEXT: vmovq %rcx, %xmm0 1147; AVX2-NEXT: movq %rax, %rcx 1148; AVX2-NEXT: shlq $61, %rcx 1149; AVX2-NEXT: sarq $63, %rcx 1150; AVX2-NEXT: vmovq %rcx, %xmm1 1151; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1152; AVX2-NEXT: movq %rax, %rcx 1153; AVX2-NEXT: shlq $62, %rcx 1154; AVX2-NEXT: sarq $63, %rcx 1155; AVX2-NEXT: vmovq %rcx, %xmm1 1156; AVX2-NEXT: shlq $63, %rax 1157; AVX2-NEXT: sarq $63, %rax 1158; AVX2-NEXT: vmovq %rax, %xmm2 1159; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1160; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1161; AVX2-NEXT: retq 1162; 1163; AVX512-LABEL: load_sext_4i1_to_4i64: 1164; AVX512: # BB#0: # %entry 1165; AVX512-NEXT: movzbl (%rdi), %eax 1166; AVX512-NEXT: kmovw %eax, %k1 1167; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1168; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1169; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 1170; AVX512-NEXT: retq 1171; 1172; X32-SSE41-LABEL: load_sext_4i1_to_4i64: 1173; X32-SSE41: # BB#0: # %entry 1174; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1175; X32-SSE41-NEXT: movzbl (%eax), %eax 1176; X32-SSE41-NEXT: movl %eax, %ecx 1177; X32-SSE41-NEXT: shrl %ecx 1178; X32-SSE41-NEXT: movd %eax, %xmm1 1179; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm1 1180; X32-SSE41-NEXT: movl %eax, %ecx 1181; X32-SSE41-NEXT: shrl $2, %ecx 1182; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm1 1183; X32-SSE41-NEXT: shrl $3, %eax 1184; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm1 1185; X32-SSE41-NEXT: pand {{\.LCPI.*}}, %xmm1 1186; X32-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1187; X32-SSE41-NEXT: psllq $63, %xmm0 1188; X32-SSE41-NEXT: psrad $31, %xmm0 1189; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1190; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1191; X32-SSE41-NEXT: psllq $63, %xmm1 1192; X32-SSE41-NEXT: psrad $31, %xmm1 1193; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1194; X32-SSE41-NEXT: retl 1195entry: 1196 %X = load <4 x i1>, <4 x i1>* %ptr 1197 %Y = sext <4 x i1> %X to <4 x i64> 1198 ret <4 x i64> %Y 1199} 1200 1201define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) { 1202; SSE2-LABEL: load_sext_4i8_to_4i64: 1203; SSE2: # BB#0: # %entry 1204; SSE2-NEXT: movsbq 1(%rdi), %rax 1205; SSE2-NEXT: movd %rax, %xmm1 1206; SSE2-NEXT: movsbq (%rdi), %rax 1207; SSE2-NEXT: movd %rax, %xmm0 1208; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1209; SSE2-NEXT: movsbq 3(%rdi), %rax 1210; SSE2-NEXT: movd %rax, %xmm2 1211; SSE2-NEXT: movsbq 2(%rdi), %rax 1212; SSE2-NEXT: movd %rax, %xmm1 1213; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1214; SSE2-NEXT: retq 1215; 1216; SSSE3-LABEL: load_sext_4i8_to_4i64: 1217; SSSE3: # BB#0: # %entry 1218; SSSE3-NEXT: movsbq 1(%rdi), %rax 1219; SSSE3-NEXT: movd %rax, %xmm1 1220; SSSE3-NEXT: movsbq (%rdi), %rax 1221; SSSE3-NEXT: movd %rax, %xmm0 1222; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1223; SSSE3-NEXT: movsbq 3(%rdi), %rax 1224; SSSE3-NEXT: movd %rax, %xmm2 1225; SSSE3-NEXT: movsbq 2(%rdi), %rax 1226; SSSE3-NEXT: movd %rax, %xmm1 1227; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1228; SSSE3-NEXT: retq 1229; 1230; SSE41-LABEL: load_sext_4i8_to_4i64: 1231; SSE41: # BB#0: # %entry 1232; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1233; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 1234; SSE41-NEXT: retq 1235; 1236; AVX1-LABEL: load_sext_4i8_to_4i64: 1237; AVX1: # BB#0: # %entry 1238; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0 1239; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1240; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1241; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1242; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1243; AVX1-NEXT: retq 1244; 1245; AVX2-LABEL: load_sext_4i8_to_4i64: 1246; AVX2: # BB#0: # %entry 1247; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 1248; AVX2-NEXT: retq 1249; 1250; AVX512-LABEL: load_sext_4i8_to_4i64: 1251; AVX512: # BB#0: # %entry 1252; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0 1253; AVX512-NEXT: retq 1254; 1255; X32-SSE41-LABEL: load_sext_4i8_to_4i64: 1256; X32-SSE41: # BB#0: # %entry 1257; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1258; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 1259; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 1260; X32-SSE41-NEXT: retl 1261entry: 1262 %X = load <4 x i8>, <4 x i8>* %ptr 1263 %Y = sext <4 x i8> %X to <4 x i64> 1264 ret <4 x i64> %Y 1265} 1266 1267define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { 1268; SSE2-LABEL: load_sext_8i1_to_8i16: 1269; SSE2: # BB#0: # %entry 1270; SSE2-NEXT: movsbq (%rdi), %rax 1271; SSE2-NEXT: movq %rax, %rcx 1272; SSE2-NEXT: shrq $7, %rcx 1273; SSE2-NEXT: movd %ecx, %xmm0 1274; SSE2-NEXT: movq %rax, %rcx 1275; SSE2-NEXT: shlq $60, %rcx 1276; SSE2-NEXT: sarq $63, %rcx 1277; SSE2-NEXT: movd %ecx, %xmm2 1278; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1279; SSE2-NEXT: movq %rax, %rcx 1280; SSE2-NEXT: shlq $58, %rcx 1281; SSE2-NEXT: sarq $63, %rcx 1282; SSE2-NEXT: movd %ecx, %xmm0 1283; SSE2-NEXT: movq %rax, %rcx 1284; SSE2-NEXT: shlq $62, %rcx 1285; SSE2-NEXT: sarq $63, %rcx 1286; SSE2-NEXT: movd %ecx, %xmm1 1287; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1288; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1289; SSE2-NEXT: movq %rax, %rcx 1290; SSE2-NEXT: shlq $57, %rcx 1291; SSE2-NEXT: sarq $63, %rcx 1292; SSE2-NEXT: movd %ecx, %xmm0 1293; SSE2-NEXT: movq %rax, %rcx 1294; SSE2-NEXT: shlq $61, %rcx 1295; SSE2-NEXT: sarq $63, %rcx 1296; SSE2-NEXT: movd %ecx, %xmm2 1297; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1298; SSE2-NEXT: movq %rax, %rcx 1299; SSE2-NEXT: shlq $59, %rcx 1300; SSE2-NEXT: sarq $63, %rcx 1301; SSE2-NEXT: movd %ecx, %xmm3 1302; SSE2-NEXT: shlq $63, %rax 1303; SSE2-NEXT: sarq $63, %rax 1304; SSE2-NEXT: movd %eax, %xmm0 1305; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1306; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1307; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1308; SSE2-NEXT: retq 1309; 1310; SSSE3-LABEL: load_sext_8i1_to_8i16: 1311; SSSE3: # BB#0: # %entry 1312; SSSE3-NEXT: movsbq (%rdi), %rax 1313; SSSE3-NEXT: movq %rax, %rcx 1314; SSSE3-NEXT: shrq $7, %rcx 1315; SSSE3-NEXT: movd %ecx, %xmm0 1316; SSSE3-NEXT: movq %rax, %rcx 1317; SSSE3-NEXT: shlq $60, %rcx 1318; SSSE3-NEXT: sarq $63, %rcx 1319; SSSE3-NEXT: movd %ecx, %xmm2 1320; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1321; SSSE3-NEXT: movq %rax, %rcx 1322; SSSE3-NEXT: shlq $58, %rcx 1323; SSSE3-NEXT: sarq $63, %rcx 1324; SSSE3-NEXT: movd %ecx, %xmm0 1325; SSSE3-NEXT: movq %rax, %rcx 1326; SSSE3-NEXT: shlq $62, %rcx 1327; SSSE3-NEXT: sarq $63, %rcx 1328; SSSE3-NEXT: movd %ecx, %xmm1 1329; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1330; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1331; SSSE3-NEXT: movq %rax, %rcx 1332; SSSE3-NEXT: shlq $57, %rcx 1333; SSSE3-NEXT: sarq $63, %rcx 1334; SSSE3-NEXT: movd %ecx, %xmm0 1335; SSSE3-NEXT: movq %rax, %rcx 1336; SSSE3-NEXT: shlq $61, %rcx 1337; SSSE3-NEXT: sarq $63, %rcx 1338; SSSE3-NEXT: movd %ecx, %xmm2 1339; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1340; SSSE3-NEXT: movq %rax, %rcx 1341; SSSE3-NEXT: shlq $59, %rcx 1342; SSSE3-NEXT: sarq $63, %rcx 1343; SSSE3-NEXT: movd %ecx, %xmm3 1344; SSSE3-NEXT: shlq $63, %rax 1345; SSSE3-NEXT: sarq $63, %rax 1346; SSSE3-NEXT: movd %eax, %xmm0 1347; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1348; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1349; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1350; SSSE3-NEXT: retq 1351; 1352; SSE41-LABEL: load_sext_8i1_to_8i16: 1353; SSE41: # BB#0: # %entry 1354; SSE41-NEXT: movsbq (%rdi), %rax 1355; SSE41-NEXT: movq %rax, %rcx 1356; SSE41-NEXT: shlq $62, %rcx 1357; SSE41-NEXT: sarq $63, %rcx 1358; SSE41-NEXT: movq %rax, %rdx 1359; SSE41-NEXT: shlq $63, %rdx 1360; SSE41-NEXT: sarq $63, %rdx 1361; SSE41-NEXT: movd %edx, %xmm0 1362; SSE41-NEXT: pinsrw $1, %ecx, %xmm0 1363; SSE41-NEXT: movq %rax, %rcx 1364; SSE41-NEXT: shlq $61, %rcx 1365; SSE41-NEXT: sarq $63, %rcx 1366; SSE41-NEXT: pinsrw $2, %ecx, %xmm0 1367; SSE41-NEXT: movq %rax, %rcx 1368; SSE41-NEXT: shlq $60, %rcx 1369; SSE41-NEXT: sarq $63, %rcx 1370; SSE41-NEXT: pinsrw $3, %ecx, %xmm0 1371; SSE41-NEXT: movq %rax, %rcx 1372; SSE41-NEXT: shlq $59, %rcx 1373; SSE41-NEXT: sarq $63, %rcx 1374; SSE41-NEXT: pinsrw $4, %ecx, %xmm0 1375; SSE41-NEXT: movq %rax, %rcx 1376; SSE41-NEXT: shlq $58, %rcx 1377; SSE41-NEXT: sarq $63, %rcx 1378; SSE41-NEXT: pinsrw $5, %ecx, %xmm0 1379; SSE41-NEXT: movq %rax, %rcx 1380; SSE41-NEXT: shlq $57, %rcx 1381; SSE41-NEXT: sarq $63, %rcx 1382; SSE41-NEXT: pinsrw $6, %ecx, %xmm0 1383; SSE41-NEXT: shrq $7, %rax 1384; SSE41-NEXT: pinsrw $7, %eax, %xmm0 1385; SSE41-NEXT: retq 1386; 1387; AVX1-LABEL: load_sext_8i1_to_8i16: 1388; AVX1: # BB#0: # %entry 1389; AVX1-NEXT: movsbq (%rdi), %rax 1390; AVX1-NEXT: movq %rax, %rcx 1391; AVX1-NEXT: shlq $62, %rcx 1392; AVX1-NEXT: sarq $63, %rcx 1393; AVX1-NEXT: movq %rax, %rdx 1394; AVX1-NEXT: shlq $63, %rdx 1395; AVX1-NEXT: sarq $63, %rdx 1396; AVX1-NEXT: vmovd %edx, %xmm0 1397; AVX1-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 1398; AVX1-NEXT: movq %rax, %rcx 1399; AVX1-NEXT: shlq $61, %rcx 1400; AVX1-NEXT: sarq $63, %rcx 1401; AVX1-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 1402; AVX1-NEXT: movq %rax, %rcx 1403; AVX1-NEXT: shlq $60, %rcx 1404; AVX1-NEXT: sarq $63, %rcx 1405; AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 1406; AVX1-NEXT: movq %rax, %rcx 1407; AVX1-NEXT: shlq $59, %rcx 1408; AVX1-NEXT: sarq $63, %rcx 1409; AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 1410; AVX1-NEXT: movq %rax, %rcx 1411; AVX1-NEXT: shlq $58, %rcx 1412; AVX1-NEXT: sarq $63, %rcx 1413; AVX1-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 1414; AVX1-NEXT: movq %rax, %rcx 1415; AVX1-NEXT: shlq $57, %rcx 1416; AVX1-NEXT: sarq $63, %rcx 1417; AVX1-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1418; AVX1-NEXT: shrq $7, %rax 1419; AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 1420; AVX1-NEXT: retq 1421; 1422; AVX2-LABEL: load_sext_8i1_to_8i16: 1423; AVX2: # BB#0: # %entry 1424; AVX2-NEXT: movsbq (%rdi), %rax 1425; AVX2-NEXT: movq %rax, %rcx 1426; AVX2-NEXT: shlq $62, %rcx 1427; AVX2-NEXT: sarq $63, %rcx 1428; AVX2-NEXT: movq %rax, %rdx 1429; AVX2-NEXT: shlq $63, %rdx 1430; AVX2-NEXT: sarq $63, %rdx 1431; AVX2-NEXT: vmovd %edx, %xmm0 1432; AVX2-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 1433; AVX2-NEXT: movq %rax, %rcx 1434; AVX2-NEXT: shlq $61, %rcx 1435; AVX2-NEXT: sarq $63, %rcx 1436; AVX2-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 1437; AVX2-NEXT: movq %rax, %rcx 1438; AVX2-NEXT: shlq $60, %rcx 1439; AVX2-NEXT: sarq $63, %rcx 1440; AVX2-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 1441; AVX2-NEXT: movq %rax, %rcx 1442; AVX2-NEXT: shlq $59, %rcx 1443; AVX2-NEXT: sarq $63, %rcx 1444; AVX2-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 1445; AVX2-NEXT: movq %rax, %rcx 1446; AVX2-NEXT: shlq $58, %rcx 1447; AVX2-NEXT: sarq $63, %rcx 1448; AVX2-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 1449; AVX2-NEXT: movq %rax, %rcx 1450; AVX2-NEXT: shlq $57, %rcx 1451; AVX2-NEXT: sarq $63, %rcx 1452; AVX2-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1453; AVX2-NEXT: shrq $7, %rax 1454; AVX2-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 1455; AVX2-NEXT: retq 1456; 1457; AVX512-LABEL: load_sext_8i1_to_8i16: 1458; AVX512: # BB#0: # %entry 1459; AVX512-NEXT: movzbl (%rdi), %eax 1460; AVX512-NEXT: kmovw %eax, %k1 1461; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1462; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1463; AVX512-NEXT: vpmovqw %zmm0, %xmm0 1464; AVX512-NEXT: retq 1465; 1466; X32-SSE41-LABEL: load_sext_8i1_to_8i16: 1467; X32-SSE41: # BB#0: # %entry 1468; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1469; X32-SSE41-NEXT: movsbl (%eax), %eax 1470; X32-SSE41-NEXT: movl %eax, %ecx 1471; X32-SSE41-NEXT: shll $30, %ecx 1472; X32-SSE41-NEXT: sarl $31, %ecx 1473; X32-SSE41-NEXT: movl %eax, %edx 1474; X32-SSE41-NEXT: shll $31, %edx 1475; X32-SSE41-NEXT: sarl $31, %edx 1476; X32-SSE41-NEXT: movd %edx, %xmm0 1477; X32-SSE41-NEXT: pinsrw $1, %ecx, %xmm0 1478; X32-SSE41-NEXT: movl %eax, %ecx 1479; X32-SSE41-NEXT: shll $29, %ecx 1480; X32-SSE41-NEXT: sarl $31, %ecx 1481; X32-SSE41-NEXT: pinsrw $2, %ecx, %xmm0 1482; X32-SSE41-NEXT: movl %eax, %ecx 1483; X32-SSE41-NEXT: shll $28, %ecx 1484; X32-SSE41-NEXT: sarl $31, %ecx 1485; X32-SSE41-NEXT: pinsrw $3, %ecx, %xmm0 1486; X32-SSE41-NEXT: movl %eax, %ecx 1487; X32-SSE41-NEXT: shll $27, %ecx 1488; X32-SSE41-NEXT: sarl $31, %ecx 1489; X32-SSE41-NEXT: pinsrw $4, %ecx, %xmm0 1490; X32-SSE41-NEXT: movl %eax, %ecx 1491; X32-SSE41-NEXT: shll $26, %ecx 1492; X32-SSE41-NEXT: sarl $31, %ecx 1493; X32-SSE41-NEXT: pinsrw $5, %ecx, %xmm0 1494; X32-SSE41-NEXT: movl %eax, %ecx 1495; X32-SSE41-NEXT: shll $25, %ecx 1496; X32-SSE41-NEXT: sarl $31, %ecx 1497; X32-SSE41-NEXT: pinsrw $6, %ecx, %xmm0 1498; X32-SSE41-NEXT: shrl $7, %eax 1499; X32-SSE41-NEXT: pinsrw $7, %eax, %xmm0 1500; X32-SSE41-NEXT: retl 1501entry: 1502 %X = load <8 x i1>, <8 x i1>* %ptr 1503 %Y = sext <8 x i1> %X to <8 x i16> 1504 ret <8 x i16> %Y 1505} 1506 1507define <8 x i16> @load_sext_8i8_to_8i16(<8 x i8> *%ptr) { 1508; SSE2-LABEL: load_sext_8i8_to_8i16: 1509; SSE2: # BB#0: # %entry 1510; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1511; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1512; SSE2-NEXT: psraw $8, %xmm0 1513; SSE2-NEXT: retq 1514; 1515; SSSE3-LABEL: load_sext_8i8_to_8i16: 1516; SSSE3: # BB#0: # %entry 1517; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1518; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1519; SSSE3-NEXT: psraw $8, %xmm0 1520; SSSE3-NEXT: retq 1521; 1522; SSE41-LABEL: load_sext_8i8_to_8i16: 1523; SSE41: # BB#0: # %entry 1524; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 1525; SSE41-NEXT: retq 1526; 1527; AVX-LABEL: load_sext_8i8_to_8i16: 1528; AVX: # BB#0: # %entry 1529; AVX-NEXT: vpmovsxbw (%rdi), %xmm0 1530; AVX-NEXT: retq 1531; 1532; X32-SSE41-LABEL: load_sext_8i8_to_8i16: 1533; X32-SSE41: # BB#0: # %entry 1534; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1535; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0 1536; X32-SSE41-NEXT: retl 1537entry: 1538 %X = load <8 x i8>, <8 x i8>* %ptr 1539 %Y = sext <8 x i8> %X to <8 x i16> 1540 ret <8 x i16> %Y 1541} 1542 1543define <8 x i64> @load_sext_8i8_to_8i64(<8 x i8> *%ptr) { 1544; SSE2-LABEL: load_sext_8i8_to_8i64: 1545; SSE2: # BB#0: # %entry 1546; SSE2-NEXT: movsbq 1(%rdi), %rax 1547; SSE2-NEXT: movd %rax, %xmm1 1548; SSE2-NEXT: movsbq (%rdi), %rax 1549; SSE2-NEXT: movd %rax, %xmm0 1550; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1551; SSE2-NEXT: movsbq 3(%rdi), %rax 1552; SSE2-NEXT: movd %rax, %xmm2 1553; SSE2-NEXT: movsbq 2(%rdi), %rax 1554; SSE2-NEXT: movd %rax, %xmm1 1555; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1556; SSE2-NEXT: movsbq 5(%rdi), %rax 1557; SSE2-NEXT: movd %rax, %xmm3 1558; SSE2-NEXT: movsbq 4(%rdi), %rax 1559; SSE2-NEXT: movd %rax, %xmm2 1560; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1561; SSE2-NEXT: movsbq 7(%rdi), %rax 1562; SSE2-NEXT: movd %rax, %xmm4 1563; SSE2-NEXT: movsbq 6(%rdi), %rax 1564; SSE2-NEXT: movd %rax, %xmm3 1565; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] 1566; SSE2-NEXT: retq 1567; 1568; SSSE3-LABEL: load_sext_8i8_to_8i64: 1569; SSSE3: # BB#0: # %entry 1570; SSSE3-NEXT: movsbq 1(%rdi), %rax 1571; SSSE3-NEXT: movd %rax, %xmm1 1572; SSSE3-NEXT: movsbq (%rdi), %rax 1573; SSSE3-NEXT: movd %rax, %xmm0 1574; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1575; SSSE3-NEXT: movsbq 3(%rdi), %rax 1576; SSSE3-NEXT: movd %rax, %xmm2 1577; SSSE3-NEXT: movsbq 2(%rdi), %rax 1578; SSSE3-NEXT: movd %rax, %xmm1 1579; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1580; SSSE3-NEXT: movsbq 5(%rdi), %rax 1581; SSSE3-NEXT: movd %rax, %xmm3 1582; SSSE3-NEXT: movsbq 4(%rdi), %rax 1583; SSSE3-NEXT: movd %rax, %xmm2 1584; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1585; SSSE3-NEXT: movsbq 7(%rdi), %rax 1586; SSSE3-NEXT: movd %rax, %xmm4 1587; SSSE3-NEXT: movsbq 6(%rdi), %rax 1588; SSSE3-NEXT: movd %rax, %xmm3 1589; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] 1590; SSSE3-NEXT: retq 1591; 1592; SSE41-LABEL: load_sext_8i8_to_8i64: 1593; SSE41: # BB#0: # %entry 1594; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1595; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 1596; SSE41-NEXT: pmovsxbq 4(%rdi), %xmm2 1597; SSE41-NEXT: pmovsxbq 6(%rdi), %xmm3 1598; SSE41-NEXT: retq 1599; 1600; AVX1-LABEL: load_sext_8i8_to_8i64: 1601; AVX1: # BB#0: # %entry 1602; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0 1603; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1604; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1605; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1606; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1607; AVX1-NEXT: vpmovsxbd 4(%rdi), %xmm1 1608; AVX1-NEXT: vpmovsxdq %xmm1, %xmm2 1609; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1610; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 1611; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1612; AVX1-NEXT: retq 1613; 1614; AVX2-LABEL: load_sext_8i8_to_8i64: 1615; AVX2: # BB#0: # %entry 1616; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 1617; AVX2-NEXT: vpmovsxbq 4(%rdi), %ymm1 1618; AVX2-NEXT: retq 1619; 1620; AVX512-LABEL: load_sext_8i8_to_8i64: 1621; AVX512: # BB#0: # %entry 1622; AVX512-NEXT: vpmovsxbq (%rdi), %zmm0 1623; AVX512-NEXT: retq 1624; 1625; X32-SSE41-LABEL: load_sext_8i8_to_8i64: 1626; X32-SSE41: # BB#0: # %entry 1627; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1628; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 1629; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 1630; X32-SSE41-NEXT: pmovsxbq 4(%eax), %xmm2 1631; X32-SSE41-NEXT: pmovsxbq 6(%eax), %xmm3 1632; X32-SSE41-NEXT: retl 1633entry: 1634 %X = load <8 x i8>, <8 x i8>* %ptr 1635 %Y = sext <8 x i8> %X to <8 x i64> 1636 ret <8 x i64> %Y 1637} 1638 1639define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { 1640; SSE2-LABEL: load_sext_8i1_to_8i32: 1641; SSE2: # BB#0: # %entry 1642; SSE2-NEXT: movzbl (%rdi), %eax 1643; SSE2-NEXT: movl %eax, %ecx 1644; SSE2-NEXT: shrl $6, %ecx 1645; SSE2-NEXT: andl $1, %ecx 1646; SSE2-NEXT: movd %ecx, %xmm0 1647; SSE2-NEXT: movl %eax, %ecx 1648; SSE2-NEXT: shrl $2, %ecx 1649; SSE2-NEXT: andl $1, %ecx 1650; SSE2-NEXT: movd %ecx, %xmm2 1651; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1652; SSE2-NEXT: movl %eax, %ecx 1653; SSE2-NEXT: andl $1, %ecx 1654; SSE2-NEXT: movd %ecx, %xmm1 1655; SSE2-NEXT: movl %eax, %ecx 1656; SSE2-NEXT: shrl $4, %ecx 1657; SSE2-NEXT: andl $1, %ecx 1658; SSE2-NEXT: movd %ecx, %xmm0 1659; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1660; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1661; SSE2-NEXT: movl %eax, %ecx 1662; SSE2-NEXT: shrl $5, %ecx 1663; SSE2-NEXT: andl $1, %ecx 1664; SSE2-NEXT: movd %ecx, %xmm0 1665; SSE2-NEXT: movl %eax, %ecx 1666; SSE2-NEXT: shrl %ecx 1667; SSE2-NEXT: andl $1, %ecx 1668; SSE2-NEXT: movd %ecx, %xmm2 1669; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1670; SSE2-NEXT: movl %eax, %ecx 1671; SSE2-NEXT: shrl $3, %ecx 1672; SSE2-NEXT: andl $1, %ecx 1673; SSE2-NEXT: movd %ecx, %xmm0 1674; SSE2-NEXT: shrl $7, %eax 1675; SSE2-NEXT: movzwl %ax, %eax 1676; SSE2-NEXT: movd %eax, %xmm3 1677; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1678; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1679; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1680; SSE2-NEXT: movdqa %xmm1, %xmm0 1681; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1682; SSE2-NEXT: pslld $31, %xmm0 1683; SSE2-NEXT: psrad $31, %xmm0 1684; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1685; SSE2-NEXT: pslld $31, %xmm1 1686; SSE2-NEXT: psrad $31, %xmm1 1687; SSE2-NEXT: retq 1688; 1689; SSSE3-LABEL: load_sext_8i1_to_8i32: 1690; SSSE3: # BB#0: # %entry 1691; SSSE3-NEXT: movzbl (%rdi), %eax 1692; SSSE3-NEXT: movl %eax, %ecx 1693; SSSE3-NEXT: shrl $6, %ecx 1694; SSSE3-NEXT: andl $1, %ecx 1695; SSSE3-NEXT: movd %ecx, %xmm0 1696; SSSE3-NEXT: movl %eax, %ecx 1697; SSSE3-NEXT: shrl $2, %ecx 1698; SSSE3-NEXT: andl $1, %ecx 1699; SSSE3-NEXT: movd %ecx, %xmm2 1700; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1701; SSSE3-NEXT: movl %eax, %ecx 1702; SSSE3-NEXT: andl $1, %ecx 1703; SSSE3-NEXT: movd %ecx, %xmm1 1704; SSSE3-NEXT: movl %eax, %ecx 1705; SSSE3-NEXT: shrl $4, %ecx 1706; SSSE3-NEXT: andl $1, %ecx 1707; SSSE3-NEXT: movd %ecx, %xmm0 1708; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1709; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1710; SSSE3-NEXT: movl %eax, %ecx 1711; SSSE3-NEXT: shrl $5, %ecx 1712; SSSE3-NEXT: andl $1, %ecx 1713; SSSE3-NEXT: movd %ecx, %xmm0 1714; SSSE3-NEXT: movl %eax, %ecx 1715; SSSE3-NEXT: shrl %ecx 1716; SSSE3-NEXT: andl $1, %ecx 1717; SSSE3-NEXT: movd %ecx, %xmm2 1718; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1719; SSSE3-NEXT: movl %eax, %ecx 1720; SSSE3-NEXT: shrl $3, %ecx 1721; SSSE3-NEXT: andl $1, %ecx 1722; SSSE3-NEXT: movd %ecx, %xmm0 1723; SSSE3-NEXT: shrl $7, %eax 1724; SSSE3-NEXT: movzwl %ax, %eax 1725; SSSE3-NEXT: movd %eax, %xmm3 1726; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1727; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1728; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1729; SSSE3-NEXT: movdqa %xmm1, %xmm0 1730; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1731; SSSE3-NEXT: pslld $31, %xmm0 1732; SSSE3-NEXT: psrad $31, %xmm0 1733; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1734; SSSE3-NEXT: pslld $31, %xmm1 1735; SSSE3-NEXT: psrad $31, %xmm1 1736; SSSE3-NEXT: retq 1737; 1738; SSE41-LABEL: load_sext_8i1_to_8i32: 1739; SSE41: # BB#0: # %entry 1740; SSE41-NEXT: movzbl (%rdi), %eax 1741; SSE41-NEXT: movl %eax, %ecx 1742; SSE41-NEXT: shrl %ecx 1743; SSE41-NEXT: andl $1, %ecx 1744; SSE41-NEXT: movl %eax, %edx 1745; SSE41-NEXT: andl $1, %edx 1746; SSE41-NEXT: movd %edx, %xmm1 1747; SSE41-NEXT: pinsrw $1, %ecx, %xmm1 1748; SSE41-NEXT: movl %eax, %ecx 1749; SSE41-NEXT: shrl $2, %ecx 1750; SSE41-NEXT: andl $1, %ecx 1751; SSE41-NEXT: pinsrw $2, %ecx, %xmm1 1752; SSE41-NEXT: movl %eax, %ecx 1753; SSE41-NEXT: shrl $3, %ecx 1754; SSE41-NEXT: andl $1, %ecx 1755; SSE41-NEXT: pinsrw $3, %ecx, %xmm1 1756; SSE41-NEXT: movl %eax, %ecx 1757; SSE41-NEXT: shrl $4, %ecx 1758; SSE41-NEXT: andl $1, %ecx 1759; SSE41-NEXT: pinsrw $4, %ecx, %xmm1 1760; SSE41-NEXT: movl %eax, %ecx 1761; SSE41-NEXT: shrl $5, %ecx 1762; SSE41-NEXT: andl $1, %ecx 1763; SSE41-NEXT: pinsrw $5, %ecx, %xmm1 1764; SSE41-NEXT: movl %eax, %ecx 1765; SSE41-NEXT: shrl $6, %ecx 1766; SSE41-NEXT: andl $1, %ecx 1767; SSE41-NEXT: pinsrw $6, %ecx, %xmm1 1768; SSE41-NEXT: shrl $7, %eax 1769; SSE41-NEXT: movzwl %ax, %eax 1770; SSE41-NEXT: pinsrw $7, %eax, %xmm1 1771; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 1772; SSE41-NEXT: pslld $31, %xmm0 1773; SSE41-NEXT: psrad $31, %xmm0 1774; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1775; SSE41-NEXT: pslld $31, %xmm1 1776; SSE41-NEXT: psrad $31, %xmm1 1777; SSE41-NEXT: retq 1778; 1779; AVX1-LABEL: load_sext_8i1_to_8i32: 1780; AVX1: # BB#0: # %entry 1781; AVX1-NEXT: movsbq (%rdi), %rax 1782; AVX1-NEXT: movq %rax, %rcx 1783; AVX1-NEXT: shlq $58, %rcx 1784; AVX1-NEXT: sarq $63, %rcx 1785; AVX1-NEXT: movq %rax, %rdx 1786; AVX1-NEXT: shlq $59, %rdx 1787; AVX1-NEXT: sarq $63, %rdx 1788; AVX1-NEXT: vmovd %edx, %xmm0 1789; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1790; AVX1-NEXT: movq %rax, %rcx 1791; AVX1-NEXT: shlq $57, %rcx 1792; AVX1-NEXT: sarq $63, %rcx 1793; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 1794; AVX1-NEXT: movq %rax, %rcx 1795; AVX1-NEXT: shrq $7, %rcx 1796; AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 1797; AVX1-NEXT: movq %rax, %rcx 1798; AVX1-NEXT: shlq $62, %rcx 1799; AVX1-NEXT: sarq $63, %rcx 1800; AVX1-NEXT: movq %rax, %rdx 1801; AVX1-NEXT: shlq $63, %rdx 1802; AVX1-NEXT: sarq $63, %rdx 1803; AVX1-NEXT: vmovd %edx, %xmm1 1804; AVX1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 1805; AVX1-NEXT: movq %rax, %rcx 1806; AVX1-NEXT: shlq $61, %rcx 1807; AVX1-NEXT: sarq $63, %rcx 1808; AVX1-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 1809; AVX1-NEXT: shlq $60, %rax 1810; AVX1-NEXT: sarq $63, %rax 1811; AVX1-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 1812; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1813; AVX1-NEXT: retq 1814; 1815; AVX2-LABEL: load_sext_8i1_to_8i32: 1816; AVX2: # BB#0: # %entry 1817; AVX2-NEXT: movsbq (%rdi), %rax 1818; AVX2-NEXT: movq %rax, %rcx 1819; AVX2-NEXT: shlq $58, %rcx 1820; AVX2-NEXT: sarq $63, %rcx 1821; AVX2-NEXT: movq %rax, %rdx 1822; AVX2-NEXT: shlq $59, %rdx 1823; AVX2-NEXT: sarq $63, %rdx 1824; AVX2-NEXT: vmovd %edx, %xmm0 1825; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1826; AVX2-NEXT: movq %rax, %rcx 1827; AVX2-NEXT: shlq $57, %rcx 1828; AVX2-NEXT: sarq $63, %rcx 1829; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 1830; AVX2-NEXT: movq %rax, %rcx 1831; AVX2-NEXT: shrq $7, %rcx 1832; AVX2-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 1833; AVX2-NEXT: movq %rax, %rcx 1834; AVX2-NEXT: shlq $62, %rcx 1835; AVX2-NEXT: sarq $63, %rcx 1836; AVX2-NEXT: movq %rax, %rdx 1837; AVX2-NEXT: shlq $63, %rdx 1838; AVX2-NEXT: sarq $63, %rdx 1839; AVX2-NEXT: vmovd %edx, %xmm1 1840; AVX2-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 1841; AVX2-NEXT: movq %rax, %rcx 1842; AVX2-NEXT: shlq $61, %rcx 1843; AVX2-NEXT: sarq $63, %rcx 1844; AVX2-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 1845; AVX2-NEXT: shlq $60, %rax 1846; AVX2-NEXT: sarq $63, %rax 1847; AVX2-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 1848; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1849; AVX2-NEXT: retq 1850; 1851; AVX512-LABEL: load_sext_8i1_to_8i32: 1852; AVX512: # BB#0: # %entry 1853; AVX512-NEXT: movzbl (%rdi), %eax 1854; AVX512-NEXT: kmovw %eax, %k1 1855; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1856; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1857; AVX512-NEXT: vpmovqd %zmm0, %ymm0 1858; AVX512-NEXT: retq 1859; 1860; X32-SSE41-LABEL: load_sext_8i1_to_8i32: 1861; X32-SSE41: # BB#0: # %entry 1862; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1863; X32-SSE41-NEXT: movzbl (%eax), %eax 1864; X32-SSE41-NEXT: movl %eax, %ecx 1865; X32-SSE41-NEXT: shrl %ecx 1866; X32-SSE41-NEXT: andl $1, %ecx 1867; X32-SSE41-NEXT: movl %eax, %edx 1868; X32-SSE41-NEXT: andl $1, %edx 1869; X32-SSE41-NEXT: movd %edx, %xmm1 1870; X32-SSE41-NEXT: pinsrw $1, %ecx, %xmm1 1871; X32-SSE41-NEXT: movl %eax, %ecx 1872; X32-SSE41-NEXT: shrl $2, %ecx 1873; X32-SSE41-NEXT: andl $1, %ecx 1874; X32-SSE41-NEXT: pinsrw $2, %ecx, %xmm1 1875; X32-SSE41-NEXT: movl %eax, %ecx 1876; X32-SSE41-NEXT: shrl $3, %ecx 1877; X32-SSE41-NEXT: andl $1, %ecx 1878; X32-SSE41-NEXT: pinsrw $3, %ecx, %xmm1 1879; X32-SSE41-NEXT: movl %eax, %ecx 1880; X32-SSE41-NEXT: shrl $4, %ecx 1881; X32-SSE41-NEXT: andl $1, %ecx 1882; X32-SSE41-NEXT: pinsrw $4, %ecx, %xmm1 1883; X32-SSE41-NEXT: movl %eax, %ecx 1884; X32-SSE41-NEXT: shrl $5, %ecx 1885; X32-SSE41-NEXT: andl $1, %ecx 1886; X32-SSE41-NEXT: pinsrw $5, %ecx, %xmm1 1887; X32-SSE41-NEXT: movl %eax, %ecx 1888; X32-SSE41-NEXT: shrl $6, %ecx 1889; X32-SSE41-NEXT: andl $1, %ecx 1890; X32-SSE41-NEXT: pinsrw $6, %ecx, %xmm1 1891; X32-SSE41-NEXT: shrl $7, %eax 1892; X32-SSE41-NEXT: pinsrw $7, %eax, %xmm1 1893; X32-SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 1894; X32-SSE41-NEXT: pslld $31, %xmm0 1895; X32-SSE41-NEXT: psrad $31, %xmm0 1896; X32-SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1897; X32-SSE41-NEXT: pslld $31, %xmm1 1898; X32-SSE41-NEXT: psrad $31, %xmm1 1899; X32-SSE41-NEXT: retl 1900entry: 1901 %X = load <8 x i1>, <8 x i1>* %ptr 1902 %Y = sext <8 x i1> %X to <8 x i32> 1903 ret <8 x i32> %Y 1904} 1905 1906define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) { 1907; SSE2-LABEL: load_sext_8i8_to_8i32: 1908; SSE2: # BB#0: # %entry 1909; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1910; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1911; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1912; SSE2-NEXT: psrad $24, %xmm0 1913; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1914; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1915; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 1916; SSE2-NEXT: psrad $24, %xmm1 1917; SSE2-NEXT: retq 1918; 1919; SSSE3-LABEL: load_sext_8i8_to_8i32: 1920; SSSE3: # BB#0: # %entry 1921; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1922; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1923; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1924; SSSE3-NEXT: psrad $24, %xmm0 1925; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1926; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1927; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 1928; SSSE3-NEXT: psrad $24, %xmm1 1929; SSSE3-NEXT: retq 1930; 1931; SSE41-LABEL: load_sext_8i8_to_8i32: 1932; SSE41: # BB#0: # %entry 1933; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 1934; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1 1935; SSE41-NEXT: retq 1936; 1937; AVX1-LABEL: load_sext_8i8_to_8i32: 1938; AVX1: # BB#0: # %entry 1939; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0 1940; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 1941; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1942; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 1943; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1944; AVX1-NEXT: retq 1945; 1946; AVX2-LABEL: load_sext_8i8_to_8i32: 1947; AVX2: # BB#0: # %entry 1948; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0 1949; AVX2-NEXT: retq 1950; 1951; AVX512-LABEL: load_sext_8i8_to_8i32: 1952; AVX512: # BB#0: # %entry 1953; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0 1954; AVX512-NEXT: retq 1955; 1956; X32-SSE41-LABEL: load_sext_8i8_to_8i32: 1957; X32-SSE41: # BB#0: # %entry 1958; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1959; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0 1960; X32-SSE41-NEXT: pmovsxbd 4(%eax), %xmm1 1961; X32-SSE41-NEXT: retl 1962entry: 1963 %X = load <8 x i8>, <8 x i8>* %ptr 1964 %Y = sext <8 x i8> %X to <8 x i32> 1965 ret <8 x i32> %Y 1966} 1967 1968define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone { 1969; SSE2-LABEL: load_sext_16i1_to_16i8: 1970; SSE2: # BB#0: # %entry 1971; SSE2-NEXT: pushq %rbp 1972; SSE2-NEXT: pushq %r15 1973; SSE2-NEXT: pushq %r14 1974; SSE2-NEXT: pushq %r13 1975; SSE2-NEXT: pushq %r12 1976; SSE2-NEXT: pushq %rbx 1977; SSE2-NEXT: movswq (%rdi), %rax 1978; SSE2-NEXT: movq %rax, %r8 1979; SSE2-NEXT: movq %rax, %r9 1980; SSE2-NEXT: movq %rax, %r10 1981; SSE2-NEXT: movq %rax, %r11 1982; SSE2-NEXT: movq %rax, %r14 1983; SSE2-NEXT: movq %rax, %r15 1984; SSE2-NEXT: movq %rax, %r12 1985; SSE2-NEXT: movq %rax, %r13 1986; SSE2-NEXT: movq %rax, %rbx 1987; SSE2-NEXT: movq %rax, %rcx 1988; SSE2-NEXT: movq %rax, %rdx 1989; SSE2-NEXT: movq %rax, %rsi 1990; SSE2-NEXT: movq %rax, %rdi 1991; SSE2-NEXT: movq %rax, %rbp 1992; SSE2-NEXT: shlq $49, %rbp 1993; SSE2-NEXT: sarq $63, %rbp 1994; SSE2-NEXT: movd %ebp, %xmm0 1995; SSE2-NEXT: movq %rax, %rbp 1996; SSE2-NEXT: movsbq %al, %rax 1997; SSE2-NEXT: shlq $57, %r8 1998; SSE2-NEXT: sarq $63, %r8 1999; SSE2-NEXT: movd %r8d, %xmm1 2000; SSE2-NEXT: shlq $53, %r9 2001; SSE2-NEXT: sarq $63, %r9 2002; SSE2-NEXT: movd %r9d, %xmm2 2003; SSE2-NEXT: shlq $61, %r10 2004; SSE2-NEXT: sarq $63, %r10 2005; SSE2-NEXT: movd %r10d, %xmm3 2006; SSE2-NEXT: shlq $51, %r11 2007; SSE2-NEXT: sarq $63, %r11 2008; SSE2-NEXT: movd %r11d, %xmm4 2009; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2010; SSE2-NEXT: shlq $59, %r14 2011; SSE2-NEXT: sarq $63, %r14 2012; SSE2-NEXT: movd %r14d, %xmm5 2013; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2014; SSE2-NEXT: shlq $55, %r15 2015; SSE2-NEXT: sarq $63, %r15 2016; SSE2-NEXT: movd %r15d, %xmm2 2017; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 2018; SSE2-NEXT: shlq $63, %r12 2019; SSE2-NEXT: sarq $63, %r12 2020; SSE2-NEXT: movd %r12d, %xmm0 2021; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] 2022; SSE2-NEXT: shlq $50, %r13 2023; SSE2-NEXT: sarq $63, %r13 2024; SSE2-NEXT: movd %r13d, %xmm1 2025; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2026; SSE2-NEXT: shlq $58, %rbx 2027; SSE2-NEXT: sarq $63, %rbx 2028; SSE2-NEXT: movd %ebx, %xmm2 2029; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 2030; SSE2-NEXT: shlq $54, %rcx 2031; SSE2-NEXT: sarq $63, %rcx 2032; SSE2-NEXT: movd %ecx, %xmm4 2033; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2034; SSE2-NEXT: shlq $62, %rdx 2035; SSE2-NEXT: sarq $63, %rdx 2036; SSE2-NEXT: movd %edx, %xmm3 2037; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2038; SSE2-NEXT: shlq $52, %rsi 2039; SSE2-NEXT: sarq $63, %rsi 2040; SSE2-NEXT: movd %esi, %xmm1 2041; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 2042; SSE2-NEXT: shlq $60, %rdi 2043; SSE2-NEXT: sarq $63, %rdi 2044; SSE2-NEXT: movd %edi, %xmm4 2045; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2046; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 2047; SSE2-NEXT: shrq $15, %rbp 2048; SSE2-NEXT: movd %ebp, %xmm1 2049; SSE2-NEXT: shrq $7, %rax 2050; SSE2-NEXT: movd %eax, %xmm2 2051; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2052; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 2053; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 2054; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2055; SSE2-NEXT: popq %rbx 2056; SSE2-NEXT: popq %r12 2057; SSE2-NEXT: popq %r13 2058; SSE2-NEXT: popq %r14 2059; SSE2-NEXT: popq %r15 2060; SSE2-NEXT: popq %rbp 2061; SSE2-NEXT: retq 2062; 2063; SSSE3-LABEL: load_sext_16i1_to_16i8: 2064; SSSE3: # BB#0: # %entry 2065; SSSE3-NEXT: pushq %rbp 2066; SSSE3-NEXT: pushq %r15 2067; SSSE3-NEXT: pushq %r14 2068; SSSE3-NEXT: pushq %r13 2069; SSSE3-NEXT: pushq %r12 2070; SSSE3-NEXT: pushq %rbx 2071; SSSE3-NEXT: movswq (%rdi), %rax 2072; SSSE3-NEXT: movq %rax, %r8 2073; SSSE3-NEXT: movq %rax, %r9 2074; SSSE3-NEXT: movq %rax, %r10 2075; SSSE3-NEXT: movq %rax, %r11 2076; SSSE3-NEXT: movq %rax, %r14 2077; SSSE3-NEXT: movq %rax, %r15 2078; SSSE3-NEXT: movq %rax, %r12 2079; SSSE3-NEXT: movq %rax, %r13 2080; SSSE3-NEXT: movq %rax, %rbx 2081; SSSE3-NEXT: movq %rax, %rcx 2082; SSSE3-NEXT: movq %rax, %rdx 2083; SSSE3-NEXT: movq %rax, %rsi 2084; SSSE3-NEXT: movq %rax, %rdi 2085; SSSE3-NEXT: movq %rax, %rbp 2086; SSSE3-NEXT: shlq $49, %rbp 2087; SSSE3-NEXT: sarq $63, %rbp 2088; SSSE3-NEXT: movd %ebp, %xmm0 2089; SSSE3-NEXT: movq %rax, %rbp 2090; SSSE3-NEXT: movsbq %al, %rax 2091; SSSE3-NEXT: shlq $57, %r8 2092; SSSE3-NEXT: sarq $63, %r8 2093; SSSE3-NEXT: movd %r8d, %xmm1 2094; SSSE3-NEXT: shlq $53, %r9 2095; SSSE3-NEXT: sarq $63, %r9 2096; SSSE3-NEXT: movd %r9d, %xmm2 2097; SSSE3-NEXT: shlq $61, %r10 2098; SSSE3-NEXT: sarq $63, %r10 2099; SSSE3-NEXT: movd %r10d, %xmm3 2100; SSSE3-NEXT: shlq $51, %r11 2101; SSSE3-NEXT: sarq $63, %r11 2102; SSSE3-NEXT: movd %r11d, %xmm4 2103; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2104; SSSE3-NEXT: shlq $59, %r14 2105; SSSE3-NEXT: sarq $63, %r14 2106; SSSE3-NEXT: movd %r14d, %xmm5 2107; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2108; SSSE3-NEXT: shlq $55, %r15 2109; SSSE3-NEXT: sarq $63, %r15 2110; SSSE3-NEXT: movd %r15d, %xmm2 2111; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 2112; SSSE3-NEXT: shlq $63, %r12 2113; SSSE3-NEXT: sarq $63, %r12 2114; SSSE3-NEXT: movd %r12d, %xmm0 2115; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] 2116; SSSE3-NEXT: shlq $50, %r13 2117; SSSE3-NEXT: sarq $63, %r13 2118; SSSE3-NEXT: movd %r13d, %xmm1 2119; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2120; SSSE3-NEXT: shlq $58, %rbx 2121; SSSE3-NEXT: sarq $63, %rbx 2122; SSSE3-NEXT: movd %ebx, %xmm2 2123; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 2124; SSSE3-NEXT: shlq $54, %rcx 2125; SSSE3-NEXT: sarq $63, %rcx 2126; SSSE3-NEXT: movd %ecx, %xmm4 2127; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2128; SSSE3-NEXT: shlq $62, %rdx 2129; SSSE3-NEXT: sarq $63, %rdx 2130; SSSE3-NEXT: movd %edx, %xmm3 2131; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2132; SSSE3-NEXT: shlq $52, %rsi 2133; SSSE3-NEXT: sarq $63, %rsi 2134; SSSE3-NEXT: movd %esi, %xmm1 2135; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 2136; SSSE3-NEXT: shlq $60, %rdi 2137; SSSE3-NEXT: sarq $63, %rdi 2138; SSSE3-NEXT: movd %edi, %xmm4 2139; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2140; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 2141; SSSE3-NEXT: shrq $15, %rbp 2142; SSSE3-NEXT: movd %ebp, %xmm1 2143; SSSE3-NEXT: shrq $7, %rax 2144; SSSE3-NEXT: movd %eax, %xmm2 2145; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2146; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 2147; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 2148; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2149; SSSE3-NEXT: popq %rbx 2150; SSSE3-NEXT: popq %r12 2151; SSSE3-NEXT: popq %r13 2152; SSSE3-NEXT: popq %r14 2153; SSSE3-NEXT: popq %r15 2154; SSSE3-NEXT: popq %rbp 2155; SSSE3-NEXT: retq 2156; 2157; SSE41-LABEL: load_sext_16i1_to_16i8: 2158; SSE41: # BB#0: # %entry 2159; SSE41-NEXT: movswq (%rdi), %rax 2160; SSE41-NEXT: movq %rax, %rcx 2161; SSE41-NEXT: shlq $62, %rcx 2162; SSE41-NEXT: sarq $63, %rcx 2163; SSE41-NEXT: movq %rax, %rdx 2164; SSE41-NEXT: shlq $63, %rdx 2165; SSE41-NEXT: sarq $63, %rdx 2166; SSE41-NEXT: movd %edx, %xmm0 2167; SSE41-NEXT: pinsrb $1, %ecx, %xmm0 2168; SSE41-NEXT: movq %rax, %rcx 2169; SSE41-NEXT: shlq $61, %rcx 2170; SSE41-NEXT: sarq $63, %rcx 2171; SSE41-NEXT: pinsrb $2, %ecx, %xmm0 2172; SSE41-NEXT: movq %rax, %rcx 2173; SSE41-NEXT: shlq $60, %rcx 2174; SSE41-NEXT: sarq $63, %rcx 2175; SSE41-NEXT: pinsrb $3, %ecx, %xmm0 2176; SSE41-NEXT: movq %rax, %rcx 2177; SSE41-NEXT: shlq $59, %rcx 2178; SSE41-NEXT: sarq $63, %rcx 2179; SSE41-NEXT: pinsrb $4, %ecx, %xmm0 2180; SSE41-NEXT: movq %rax, %rcx 2181; SSE41-NEXT: shlq $58, %rcx 2182; SSE41-NEXT: sarq $63, %rcx 2183; SSE41-NEXT: pinsrb $5, %ecx, %xmm0 2184; SSE41-NEXT: movq %rax, %rcx 2185; SSE41-NEXT: shlq $57, %rcx 2186; SSE41-NEXT: sarq $63, %rcx 2187; SSE41-NEXT: pinsrb $6, %ecx, %xmm0 2188; SSE41-NEXT: movsbq %al, %rcx 2189; SSE41-NEXT: shrq $7, %rcx 2190; SSE41-NEXT: pinsrb $7, %ecx, %xmm0 2191; SSE41-NEXT: movq %rax, %rcx 2192; SSE41-NEXT: shlq $55, %rcx 2193; SSE41-NEXT: sarq $63, %rcx 2194; SSE41-NEXT: pinsrb $8, %ecx, %xmm0 2195; SSE41-NEXT: movq %rax, %rcx 2196; SSE41-NEXT: shlq $54, %rcx 2197; SSE41-NEXT: sarq $63, %rcx 2198; SSE41-NEXT: pinsrb $9, %ecx, %xmm0 2199; SSE41-NEXT: movq %rax, %rcx 2200; SSE41-NEXT: shlq $53, %rcx 2201; SSE41-NEXT: sarq $63, %rcx 2202; SSE41-NEXT: pinsrb $10, %ecx, %xmm0 2203; SSE41-NEXT: movq %rax, %rcx 2204; SSE41-NEXT: shlq $52, %rcx 2205; SSE41-NEXT: sarq $63, %rcx 2206; SSE41-NEXT: pinsrb $11, %ecx, %xmm0 2207; SSE41-NEXT: movq %rax, %rcx 2208; SSE41-NEXT: shlq $51, %rcx 2209; SSE41-NEXT: sarq $63, %rcx 2210; SSE41-NEXT: pinsrb $12, %ecx, %xmm0 2211; SSE41-NEXT: movq %rax, %rcx 2212; SSE41-NEXT: shlq $50, %rcx 2213; SSE41-NEXT: sarq $63, %rcx 2214; SSE41-NEXT: pinsrb $13, %ecx, %xmm0 2215; SSE41-NEXT: movq %rax, %rcx 2216; SSE41-NEXT: shlq $49, %rcx 2217; SSE41-NEXT: sarq $63, %rcx 2218; SSE41-NEXT: pinsrb $14, %ecx, %xmm0 2219; SSE41-NEXT: shrq $15, %rax 2220; SSE41-NEXT: pinsrb $15, %eax, %xmm0 2221; SSE41-NEXT: retq 2222; 2223; AVX1-LABEL: load_sext_16i1_to_16i8: 2224; AVX1: # BB#0: # %entry 2225; AVX1-NEXT: movswq (%rdi), %rax 2226; AVX1-NEXT: movq %rax, %rcx 2227; AVX1-NEXT: shlq $62, %rcx 2228; AVX1-NEXT: sarq $63, %rcx 2229; AVX1-NEXT: movq %rax, %rdx 2230; AVX1-NEXT: shlq $63, %rdx 2231; AVX1-NEXT: sarq $63, %rdx 2232; AVX1-NEXT: vmovd %edx, %xmm0 2233; AVX1-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 2234; AVX1-NEXT: movq %rax, %rcx 2235; AVX1-NEXT: shlq $61, %rcx 2236; AVX1-NEXT: sarq $63, %rcx 2237; AVX1-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 2238; AVX1-NEXT: movq %rax, %rcx 2239; AVX1-NEXT: shlq $60, %rcx 2240; AVX1-NEXT: sarq $63, %rcx 2241; AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 2242; AVX1-NEXT: movq %rax, %rcx 2243; AVX1-NEXT: shlq $59, %rcx 2244; AVX1-NEXT: sarq $63, %rcx 2245; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 2246; AVX1-NEXT: movq %rax, %rcx 2247; AVX1-NEXT: shlq $58, %rcx 2248; AVX1-NEXT: sarq $63, %rcx 2249; AVX1-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 2250; AVX1-NEXT: movq %rax, %rcx 2251; AVX1-NEXT: shlq $57, %rcx 2252; AVX1-NEXT: sarq $63, %rcx 2253; AVX1-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 2254; AVX1-NEXT: movsbq %al, %rcx 2255; AVX1-NEXT: shrq $7, %rcx 2256; AVX1-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 2257; AVX1-NEXT: movq %rax, %rcx 2258; AVX1-NEXT: shlq $55, %rcx 2259; AVX1-NEXT: sarq $63, %rcx 2260; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 2261; AVX1-NEXT: movq %rax, %rcx 2262; AVX1-NEXT: shlq $54, %rcx 2263; AVX1-NEXT: sarq $63, %rcx 2264; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 2265; AVX1-NEXT: movq %rax, %rcx 2266; AVX1-NEXT: shlq $53, %rcx 2267; AVX1-NEXT: sarq $63, %rcx 2268; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 2269; AVX1-NEXT: movq %rax, %rcx 2270; AVX1-NEXT: shlq $52, %rcx 2271; AVX1-NEXT: sarq $63, %rcx 2272; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 2273; AVX1-NEXT: movq %rax, %rcx 2274; AVX1-NEXT: shlq $51, %rcx 2275; AVX1-NEXT: sarq $63, %rcx 2276; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 2277; AVX1-NEXT: movq %rax, %rcx 2278; AVX1-NEXT: shlq $50, %rcx 2279; AVX1-NEXT: sarq $63, %rcx 2280; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 2281; AVX1-NEXT: movq %rax, %rcx 2282; AVX1-NEXT: shlq $49, %rcx 2283; AVX1-NEXT: sarq $63, %rcx 2284; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 2285; AVX1-NEXT: shrq $15, %rax 2286; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 2287; AVX1-NEXT: retq 2288; 2289; AVX2-LABEL: load_sext_16i1_to_16i8: 2290; AVX2: # BB#0: # %entry 2291; AVX2-NEXT: movswq (%rdi), %rax 2292; AVX2-NEXT: movq %rax, %rcx 2293; AVX2-NEXT: shlq $62, %rcx 2294; AVX2-NEXT: sarq $63, %rcx 2295; AVX2-NEXT: movq %rax, %rdx 2296; AVX2-NEXT: shlq $63, %rdx 2297; AVX2-NEXT: sarq $63, %rdx 2298; AVX2-NEXT: vmovd %edx, %xmm0 2299; AVX2-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 2300; AVX2-NEXT: movq %rax, %rcx 2301; AVX2-NEXT: shlq $61, %rcx 2302; AVX2-NEXT: sarq $63, %rcx 2303; AVX2-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 2304; AVX2-NEXT: movq %rax, %rcx 2305; AVX2-NEXT: shlq $60, %rcx 2306; AVX2-NEXT: sarq $63, %rcx 2307; AVX2-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 2308; AVX2-NEXT: movq %rax, %rcx 2309; AVX2-NEXT: shlq $59, %rcx 2310; AVX2-NEXT: sarq $63, %rcx 2311; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 2312; AVX2-NEXT: movq %rax, %rcx 2313; AVX2-NEXT: shlq $58, %rcx 2314; AVX2-NEXT: sarq $63, %rcx 2315; AVX2-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 2316; AVX2-NEXT: movq %rax, %rcx 2317; AVX2-NEXT: shlq $57, %rcx 2318; AVX2-NEXT: sarq $63, %rcx 2319; AVX2-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 2320; AVX2-NEXT: movsbq %al, %rcx 2321; AVX2-NEXT: shrq $7, %rcx 2322; AVX2-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 2323; AVX2-NEXT: movq %rax, %rcx 2324; AVX2-NEXT: shlq $55, %rcx 2325; AVX2-NEXT: sarq $63, %rcx 2326; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 2327; AVX2-NEXT: movq %rax, %rcx 2328; AVX2-NEXT: shlq $54, %rcx 2329; AVX2-NEXT: sarq $63, %rcx 2330; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 2331; AVX2-NEXT: movq %rax, %rcx 2332; AVX2-NEXT: shlq $53, %rcx 2333; AVX2-NEXT: sarq $63, %rcx 2334; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 2335; AVX2-NEXT: movq %rax, %rcx 2336; AVX2-NEXT: shlq $52, %rcx 2337; AVX2-NEXT: sarq $63, %rcx 2338; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 2339; AVX2-NEXT: movq %rax, %rcx 2340; AVX2-NEXT: shlq $51, %rcx 2341; AVX2-NEXT: sarq $63, %rcx 2342; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 2343; AVX2-NEXT: movq %rax, %rcx 2344; AVX2-NEXT: shlq $50, %rcx 2345; AVX2-NEXT: sarq $63, %rcx 2346; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 2347; AVX2-NEXT: movq %rax, %rcx 2348; AVX2-NEXT: shlq $49, %rcx 2349; AVX2-NEXT: sarq $63, %rcx 2350; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 2351; AVX2-NEXT: shrq $15, %rax 2352; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 2353; AVX2-NEXT: retq 2354; 2355; AVX512-LABEL: load_sext_16i1_to_16i8: 2356; AVX512: # BB#0: # %entry 2357; AVX512-NEXT: kmovw (%rdi), %k1 2358; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 2359; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 2360; AVX512-NEXT: vpmovdb %zmm0, %xmm0 2361; AVX512-NEXT: retq 2362; 2363; X32-SSE41-LABEL: load_sext_16i1_to_16i8: 2364; X32-SSE41: # BB#0: # %entry 2365; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2366; X32-SSE41-NEXT: movswl (%eax), %eax 2367; X32-SSE41-NEXT: movl %eax, %ecx 2368; X32-SSE41-NEXT: shll $30, %ecx 2369; X32-SSE41-NEXT: sarl $31, %ecx 2370; X32-SSE41-NEXT: movl %eax, %edx 2371; X32-SSE41-NEXT: shll $31, %edx 2372; X32-SSE41-NEXT: sarl $31, %edx 2373; X32-SSE41-NEXT: movd %edx, %xmm0 2374; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm0 2375; X32-SSE41-NEXT: movl %eax, %ecx 2376; X32-SSE41-NEXT: shll $29, %ecx 2377; X32-SSE41-NEXT: sarl $31, %ecx 2378; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm0 2379; X32-SSE41-NEXT: movl %eax, %ecx 2380; X32-SSE41-NEXT: shll $28, %ecx 2381; X32-SSE41-NEXT: sarl $31, %ecx 2382; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm0 2383; X32-SSE41-NEXT: movl %eax, %ecx 2384; X32-SSE41-NEXT: shll $27, %ecx 2385; X32-SSE41-NEXT: sarl $31, %ecx 2386; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm0 2387; X32-SSE41-NEXT: movl %eax, %ecx 2388; X32-SSE41-NEXT: shll $26, %ecx 2389; X32-SSE41-NEXT: sarl $31, %ecx 2390; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm0 2391; X32-SSE41-NEXT: movl %eax, %ecx 2392; X32-SSE41-NEXT: shll $25, %ecx 2393; X32-SSE41-NEXT: sarl $31, %ecx 2394; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm0 2395; X32-SSE41-NEXT: movsbl %al, %ecx 2396; X32-SSE41-NEXT: shrl $7, %ecx 2397; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm0 2398; X32-SSE41-NEXT: movl %eax, %ecx 2399; X32-SSE41-NEXT: shll $23, %ecx 2400; X32-SSE41-NEXT: sarl $31, %ecx 2401; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm0 2402; X32-SSE41-NEXT: movl %eax, %ecx 2403; X32-SSE41-NEXT: shll $22, %ecx 2404; X32-SSE41-NEXT: sarl $31, %ecx 2405; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm0 2406; X32-SSE41-NEXT: movl %eax, %ecx 2407; X32-SSE41-NEXT: shll $21, %ecx 2408; X32-SSE41-NEXT: sarl $31, %ecx 2409; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm0 2410; X32-SSE41-NEXT: movl %eax, %ecx 2411; X32-SSE41-NEXT: shll $20, %ecx 2412; X32-SSE41-NEXT: sarl $31, %ecx 2413; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm0 2414; X32-SSE41-NEXT: movl %eax, %ecx 2415; X32-SSE41-NEXT: shll $19, %ecx 2416; X32-SSE41-NEXT: sarl $31, %ecx 2417; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm0 2418; X32-SSE41-NEXT: movl %eax, %ecx 2419; X32-SSE41-NEXT: shll $18, %ecx 2420; X32-SSE41-NEXT: sarl $31, %ecx 2421; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm0 2422; X32-SSE41-NEXT: movl %eax, %ecx 2423; X32-SSE41-NEXT: shll $17, %ecx 2424; X32-SSE41-NEXT: sarl $31, %ecx 2425; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm0 2426; X32-SSE41-NEXT: shrl $15, %eax 2427; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm0 2428; X32-SSE41-NEXT: retl 2429entry: 2430 %X = load <16 x i1>, <16 x i1>* %ptr 2431 %Y = sext <16 x i1> %X to <16 x i8> 2432 ret <16 x i8> %Y 2433} 2434 2435define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { 2436; SSE2-LABEL: load_sext_16i1_to_16i16: 2437; SSE2: # BB#0: # %entry 2438; SSE2-NEXT: movzwl (%rdi), %eax 2439; SSE2-NEXT: movl %eax, %ecx 2440; SSE2-NEXT: shrl $14, %ecx 2441; SSE2-NEXT: andl $1, %ecx 2442; SSE2-NEXT: movd %ecx, %xmm0 2443; SSE2-NEXT: movl %eax, %ecx 2444; SSE2-NEXT: shrl $6, %ecx 2445; SSE2-NEXT: andl $1, %ecx 2446; SSE2-NEXT: movd %ecx, %xmm1 2447; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2448; SSE2-NEXT: movl %eax, %ecx 2449; SSE2-NEXT: shrl $10, %ecx 2450; SSE2-NEXT: andl $1, %ecx 2451; SSE2-NEXT: movd %ecx, %xmm0 2452; SSE2-NEXT: movl %eax, %ecx 2453; SSE2-NEXT: shrl $2, %ecx 2454; SSE2-NEXT: andl $1, %ecx 2455; SSE2-NEXT: movd %ecx, %xmm2 2456; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2457; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2458; SSE2-NEXT: movl %eax, %ecx 2459; SSE2-NEXT: shrl $12, %ecx 2460; SSE2-NEXT: andl $1, %ecx 2461; SSE2-NEXT: movd %ecx, %xmm0 2462; SSE2-NEXT: movl %eax, %ecx 2463; SSE2-NEXT: shrl $4, %ecx 2464; SSE2-NEXT: andl $1, %ecx 2465; SSE2-NEXT: movd %ecx, %xmm3 2466; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2467; SSE2-NEXT: movl %eax, %ecx 2468; SSE2-NEXT: andl $1, %ecx 2469; SSE2-NEXT: movd %ecx, %xmm1 2470; SSE2-NEXT: movl %eax, %ecx 2471; SSE2-NEXT: shrl $8, %ecx 2472; SSE2-NEXT: andl $1, %ecx 2473; SSE2-NEXT: movd %ecx, %xmm0 2474; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2475; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2476; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2477; SSE2-NEXT: movl %eax, %ecx 2478; SSE2-NEXT: shrl $13, %ecx 2479; SSE2-NEXT: andl $1, %ecx 2480; SSE2-NEXT: movd %ecx, %xmm0 2481; SSE2-NEXT: movl %eax, %ecx 2482; SSE2-NEXT: shrl $5, %ecx 2483; SSE2-NEXT: andl $1, %ecx 2484; SSE2-NEXT: movd %ecx, %xmm2 2485; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2486; SSE2-NEXT: movl %eax, %ecx 2487; SSE2-NEXT: shrl $9, %ecx 2488; SSE2-NEXT: andl $1, %ecx 2489; SSE2-NEXT: movd %ecx, %xmm3 2490; SSE2-NEXT: movl %eax, %ecx 2491; SSE2-NEXT: shrl %ecx 2492; SSE2-NEXT: andl $1, %ecx 2493; SSE2-NEXT: movd %ecx, %xmm0 2494; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2495; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2496; SSE2-NEXT: movl %eax, %ecx 2497; SSE2-NEXT: shrl $11, %ecx 2498; SSE2-NEXT: andl $1, %ecx 2499; SSE2-NEXT: movd %ecx, %xmm2 2500; SSE2-NEXT: movl %eax, %ecx 2501; SSE2-NEXT: shrl $3, %ecx 2502; SSE2-NEXT: andl $1, %ecx 2503; SSE2-NEXT: movd %ecx, %xmm3 2504; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2505; SSE2-NEXT: movl %eax, %ecx 2506; SSE2-NEXT: shrl $7, %ecx 2507; SSE2-NEXT: andl $1, %ecx 2508; SSE2-NEXT: movd %ecx, %xmm2 2509; SSE2-NEXT: shrl $15, %eax 2510; SSE2-NEXT: movzwl %ax, %eax 2511; SSE2-NEXT: movd %eax, %xmm4 2512; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 2513; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2514; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2515; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2516; SSE2-NEXT: movdqa %xmm1, %xmm0 2517; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2518; SSE2-NEXT: psllw $15, %xmm0 2519; SSE2-NEXT: psraw $15, %xmm0 2520; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 2521; SSE2-NEXT: psllw $15, %xmm1 2522; SSE2-NEXT: psraw $15, %xmm1 2523; SSE2-NEXT: retq 2524; 2525; SSSE3-LABEL: load_sext_16i1_to_16i16: 2526; SSSE3: # BB#0: # %entry 2527; SSSE3-NEXT: movzwl (%rdi), %eax 2528; SSSE3-NEXT: movl %eax, %ecx 2529; SSSE3-NEXT: shrl $14, %ecx 2530; SSSE3-NEXT: andl $1, %ecx 2531; SSSE3-NEXT: movd %ecx, %xmm0 2532; SSSE3-NEXT: movl %eax, %ecx 2533; SSSE3-NEXT: shrl $6, %ecx 2534; SSSE3-NEXT: andl $1, %ecx 2535; SSSE3-NEXT: movd %ecx, %xmm1 2536; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2537; SSSE3-NEXT: movl %eax, %ecx 2538; SSSE3-NEXT: shrl $10, %ecx 2539; SSSE3-NEXT: andl $1, %ecx 2540; SSSE3-NEXT: movd %ecx, %xmm0 2541; SSSE3-NEXT: movl %eax, %ecx 2542; SSSE3-NEXT: shrl $2, %ecx 2543; SSSE3-NEXT: andl $1, %ecx 2544; SSSE3-NEXT: movd %ecx, %xmm2 2545; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2546; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2547; SSSE3-NEXT: movl %eax, %ecx 2548; SSSE3-NEXT: shrl $12, %ecx 2549; SSSE3-NEXT: andl $1, %ecx 2550; SSSE3-NEXT: movd %ecx, %xmm0 2551; SSSE3-NEXT: movl %eax, %ecx 2552; SSSE3-NEXT: shrl $4, %ecx 2553; SSSE3-NEXT: andl $1, %ecx 2554; SSSE3-NEXT: movd %ecx, %xmm3 2555; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2556; SSSE3-NEXT: movl %eax, %ecx 2557; SSSE3-NEXT: andl $1, %ecx 2558; SSSE3-NEXT: movd %ecx, %xmm1 2559; SSSE3-NEXT: movl %eax, %ecx 2560; SSSE3-NEXT: shrl $8, %ecx 2561; SSSE3-NEXT: andl $1, %ecx 2562; SSSE3-NEXT: movd %ecx, %xmm0 2563; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2564; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2565; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2566; SSSE3-NEXT: movl %eax, %ecx 2567; SSSE3-NEXT: shrl $13, %ecx 2568; SSSE3-NEXT: andl $1, %ecx 2569; SSSE3-NEXT: movd %ecx, %xmm0 2570; SSSE3-NEXT: movl %eax, %ecx 2571; SSSE3-NEXT: shrl $5, %ecx 2572; SSSE3-NEXT: andl $1, %ecx 2573; SSSE3-NEXT: movd %ecx, %xmm2 2574; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2575; SSSE3-NEXT: movl %eax, %ecx 2576; SSSE3-NEXT: shrl $9, %ecx 2577; SSSE3-NEXT: andl $1, %ecx 2578; SSSE3-NEXT: movd %ecx, %xmm3 2579; SSSE3-NEXT: movl %eax, %ecx 2580; SSSE3-NEXT: shrl %ecx 2581; SSSE3-NEXT: andl $1, %ecx 2582; SSSE3-NEXT: movd %ecx, %xmm0 2583; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2584; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2585; SSSE3-NEXT: movl %eax, %ecx 2586; SSSE3-NEXT: shrl $11, %ecx 2587; SSSE3-NEXT: andl $1, %ecx 2588; SSSE3-NEXT: movd %ecx, %xmm2 2589; SSSE3-NEXT: movl %eax, %ecx 2590; SSSE3-NEXT: shrl $3, %ecx 2591; SSSE3-NEXT: andl $1, %ecx 2592; SSSE3-NEXT: movd %ecx, %xmm3 2593; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2594; SSSE3-NEXT: movl %eax, %ecx 2595; SSSE3-NEXT: shrl $7, %ecx 2596; SSSE3-NEXT: andl $1, %ecx 2597; SSSE3-NEXT: movd %ecx, %xmm2 2598; SSSE3-NEXT: shrl $15, %eax 2599; SSSE3-NEXT: movzwl %ax, %eax 2600; SSSE3-NEXT: movd %eax, %xmm4 2601; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 2602; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2603; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2604; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2605; SSSE3-NEXT: movdqa %xmm1, %xmm0 2606; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2607; SSSE3-NEXT: psllw $15, %xmm0 2608; SSSE3-NEXT: psraw $15, %xmm0 2609; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 2610; SSSE3-NEXT: psllw $15, %xmm1 2611; SSSE3-NEXT: psraw $15, %xmm1 2612; SSSE3-NEXT: retq 2613; 2614; SSE41-LABEL: load_sext_16i1_to_16i16: 2615; SSE41: # BB#0: # %entry 2616; SSE41-NEXT: movzwl (%rdi), %eax 2617; SSE41-NEXT: movl %eax, %ecx 2618; SSE41-NEXT: shrl %ecx 2619; SSE41-NEXT: andl $1, %ecx 2620; SSE41-NEXT: movl %eax, %edx 2621; SSE41-NEXT: andl $1, %edx 2622; SSE41-NEXT: movd %edx, %xmm1 2623; SSE41-NEXT: pinsrb $1, %ecx, %xmm1 2624; SSE41-NEXT: movl %eax, %ecx 2625; SSE41-NEXT: shrl $2, %ecx 2626; SSE41-NEXT: andl $1, %ecx 2627; SSE41-NEXT: pinsrb $2, %ecx, %xmm1 2628; SSE41-NEXT: movl %eax, %ecx 2629; SSE41-NEXT: shrl $3, %ecx 2630; SSE41-NEXT: andl $1, %ecx 2631; SSE41-NEXT: pinsrb $3, %ecx, %xmm1 2632; SSE41-NEXT: movl %eax, %ecx 2633; SSE41-NEXT: shrl $4, %ecx 2634; SSE41-NEXT: andl $1, %ecx 2635; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 2636; SSE41-NEXT: movl %eax, %ecx 2637; SSE41-NEXT: shrl $5, %ecx 2638; SSE41-NEXT: andl $1, %ecx 2639; SSE41-NEXT: pinsrb $5, %ecx, %xmm1 2640; SSE41-NEXT: movl %eax, %ecx 2641; SSE41-NEXT: shrl $6, %ecx 2642; SSE41-NEXT: andl $1, %ecx 2643; SSE41-NEXT: pinsrb $6, %ecx, %xmm1 2644; SSE41-NEXT: movl %eax, %ecx 2645; SSE41-NEXT: shrl $7, %ecx 2646; SSE41-NEXT: andl $1, %ecx 2647; SSE41-NEXT: pinsrb $7, %ecx, %xmm1 2648; SSE41-NEXT: movl %eax, %ecx 2649; SSE41-NEXT: shrl $8, %ecx 2650; SSE41-NEXT: andl $1, %ecx 2651; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 2652; SSE41-NEXT: movl %eax, %ecx 2653; SSE41-NEXT: shrl $9, %ecx 2654; SSE41-NEXT: andl $1, %ecx 2655; SSE41-NEXT: pinsrb $9, %ecx, %xmm1 2656; SSE41-NEXT: movl %eax, %ecx 2657; SSE41-NEXT: shrl $10, %ecx 2658; SSE41-NEXT: andl $1, %ecx 2659; SSE41-NEXT: pinsrb $10, %ecx, %xmm1 2660; SSE41-NEXT: movl %eax, %ecx 2661; SSE41-NEXT: shrl $11, %ecx 2662; SSE41-NEXT: andl $1, %ecx 2663; SSE41-NEXT: pinsrb $11, %ecx, %xmm1 2664; SSE41-NEXT: movl %eax, %ecx 2665; SSE41-NEXT: shrl $12, %ecx 2666; SSE41-NEXT: andl $1, %ecx 2667; SSE41-NEXT: pinsrb $12, %ecx, %xmm1 2668; SSE41-NEXT: movl %eax, %ecx 2669; SSE41-NEXT: shrl $13, %ecx 2670; SSE41-NEXT: andl $1, %ecx 2671; SSE41-NEXT: pinsrb $13, %ecx, %xmm1 2672; SSE41-NEXT: movl %eax, %ecx 2673; SSE41-NEXT: shrl $14, %ecx 2674; SSE41-NEXT: andl $1, %ecx 2675; SSE41-NEXT: pinsrb $14, %ecx, %xmm1 2676; SSE41-NEXT: shrl $15, %eax 2677; SSE41-NEXT: movzwl %ax, %eax 2678; SSE41-NEXT: pinsrb $15, %eax, %xmm1 2679; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 2680; SSE41-NEXT: psllw $15, %xmm0 2681; SSE41-NEXT: psraw $15, %xmm0 2682; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2683; SSE41-NEXT: psllw $15, %xmm1 2684; SSE41-NEXT: psraw $15, %xmm1 2685; SSE41-NEXT: retq 2686; 2687; AVX1-LABEL: load_sext_16i1_to_16i16: 2688; AVX1: # BB#0: # %entry 2689; AVX1-NEXT: pushq %rbp 2690; AVX1-NEXT: .Ltmp0: 2691; AVX1-NEXT: .cfi_def_cfa_offset 16 2692; AVX1-NEXT: pushq %r15 2693; AVX1-NEXT: .Ltmp1: 2694; AVX1-NEXT: .cfi_def_cfa_offset 24 2695; AVX1-NEXT: pushq %r14 2696; AVX1-NEXT: .Ltmp2: 2697; AVX1-NEXT: .cfi_def_cfa_offset 32 2698; AVX1-NEXT: pushq %r13 2699; AVX1-NEXT: .Ltmp3: 2700; AVX1-NEXT: .cfi_def_cfa_offset 40 2701; AVX1-NEXT: pushq %r12 2702; AVX1-NEXT: .Ltmp4: 2703; AVX1-NEXT: .cfi_def_cfa_offset 48 2704; AVX1-NEXT: pushq %rbx 2705; AVX1-NEXT: .Ltmp5: 2706; AVX1-NEXT: .cfi_def_cfa_offset 56 2707; AVX1-NEXT: .Ltmp6: 2708; AVX1-NEXT: .cfi_offset %rbx, -56 2709; AVX1-NEXT: .Ltmp7: 2710; AVX1-NEXT: .cfi_offset %r12, -48 2711; AVX1-NEXT: .Ltmp8: 2712; AVX1-NEXT: .cfi_offset %r13, -40 2713; AVX1-NEXT: .Ltmp9: 2714; AVX1-NEXT: .cfi_offset %r14, -32 2715; AVX1-NEXT: .Ltmp10: 2716; AVX1-NEXT: .cfi_offset %r15, -24 2717; AVX1-NEXT: .Ltmp11: 2718; AVX1-NEXT: .cfi_offset %rbp, -16 2719; AVX1-NEXT: movswq (%rdi), %rax 2720; AVX1-NEXT: movq %rax, %rcx 2721; AVX1-NEXT: shlq $55, %rcx 2722; AVX1-NEXT: sarq $63, %rcx 2723; AVX1-NEXT: vmovd %ecx, %xmm0 2724; AVX1-NEXT: movq %rax, %r8 2725; AVX1-NEXT: movq %rax, %r10 2726; AVX1-NEXT: movq %rax, %r11 2727; AVX1-NEXT: movq %rax, %r14 2728; AVX1-NEXT: movq %rax, %r15 2729; AVX1-NEXT: movq %rax, %r9 2730; AVX1-NEXT: movq %rax, %r12 2731; AVX1-NEXT: movq %rax, %r13 2732; AVX1-NEXT: movq %rax, %rbx 2733; AVX1-NEXT: movq %rax, %rdi 2734; AVX1-NEXT: movq %rax, %rcx 2735; AVX1-NEXT: movq %rax, %rdx 2736; AVX1-NEXT: movq %rax, %rsi 2737; AVX1-NEXT: movsbq %al, %rbp 2738; AVX1-NEXT: shlq $54, %rax 2739; AVX1-NEXT: sarq $63, %rax 2740; AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 2741; AVX1-NEXT: shlq $53, %r8 2742; AVX1-NEXT: sarq $63, %r8 2743; AVX1-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0 2744; AVX1-NEXT: shlq $52, %r10 2745; AVX1-NEXT: sarq $63, %r10 2746; AVX1-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0 2747; AVX1-NEXT: shlq $51, %r11 2748; AVX1-NEXT: sarq $63, %r11 2749; AVX1-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0 2750; AVX1-NEXT: shlq $50, %r14 2751; AVX1-NEXT: sarq $63, %r14 2752; AVX1-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 2753; AVX1-NEXT: shlq $49, %r15 2754; AVX1-NEXT: sarq $63, %r15 2755; AVX1-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0 2756; AVX1-NEXT: shrq $15, %r9 2757; AVX1-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0 2758; AVX1-NEXT: shlq $63, %r13 2759; AVX1-NEXT: sarq $63, %r13 2760; AVX1-NEXT: vmovd %r13d, %xmm1 2761; AVX1-NEXT: shlq $62, %r12 2762; AVX1-NEXT: sarq $63, %r12 2763; AVX1-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1 2764; AVX1-NEXT: shlq $61, %rbx 2765; AVX1-NEXT: sarq $63, %rbx 2766; AVX1-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1 2767; AVX1-NEXT: shlq $60, %rdi 2768; AVX1-NEXT: sarq $63, %rdi 2769; AVX1-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1 2770; AVX1-NEXT: shlq $59, %rcx 2771; AVX1-NEXT: sarq $63, %rcx 2772; AVX1-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 2773; AVX1-NEXT: shlq $58, %rdx 2774; AVX1-NEXT: sarq $63, %rdx 2775; AVX1-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1 2776; AVX1-NEXT: shlq $57, %rsi 2777; AVX1-NEXT: sarq $63, %rsi 2778; AVX1-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1 2779; AVX1-NEXT: shrq $7, %rbp 2780; AVX1-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1 2781; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2782; AVX1-NEXT: popq %rbx 2783; AVX1-NEXT: popq %r12 2784; AVX1-NEXT: popq %r13 2785; AVX1-NEXT: popq %r14 2786; AVX1-NEXT: popq %r15 2787; AVX1-NEXT: popq %rbp 2788; AVX1-NEXT: retq 2789; 2790; AVX2-LABEL: load_sext_16i1_to_16i16: 2791; AVX2: # BB#0: # %entry 2792; AVX2-NEXT: pushq %rbp 2793; AVX2-NEXT: .Ltmp0: 2794; AVX2-NEXT: .cfi_def_cfa_offset 16 2795; AVX2-NEXT: pushq %r15 2796; AVX2-NEXT: .Ltmp1: 2797; AVX2-NEXT: .cfi_def_cfa_offset 24 2798; AVX2-NEXT: pushq %r14 2799; AVX2-NEXT: .Ltmp2: 2800; AVX2-NEXT: .cfi_def_cfa_offset 32 2801; AVX2-NEXT: pushq %r13 2802; AVX2-NEXT: .Ltmp3: 2803; AVX2-NEXT: .cfi_def_cfa_offset 40 2804; AVX2-NEXT: pushq %r12 2805; AVX2-NEXT: .Ltmp4: 2806; AVX2-NEXT: .cfi_def_cfa_offset 48 2807; AVX2-NEXT: pushq %rbx 2808; AVX2-NEXT: .Ltmp5: 2809; AVX2-NEXT: .cfi_def_cfa_offset 56 2810; AVX2-NEXT: .Ltmp6: 2811; AVX2-NEXT: .cfi_offset %rbx, -56 2812; AVX2-NEXT: .Ltmp7: 2813; AVX2-NEXT: .cfi_offset %r12, -48 2814; AVX2-NEXT: .Ltmp8: 2815; AVX2-NEXT: .cfi_offset %r13, -40 2816; AVX2-NEXT: .Ltmp9: 2817; AVX2-NEXT: .cfi_offset %r14, -32 2818; AVX2-NEXT: .Ltmp10: 2819; AVX2-NEXT: .cfi_offset %r15, -24 2820; AVX2-NEXT: .Ltmp11: 2821; AVX2-NEXT: .cfi_offset %rbp, -16 2822; AVX2-NEXT: movswq (%rdi), %rax 2823; AVX2-NEXT: movq %rax, %rcx 2824; AVX2-NEXT: shlq $55, %rcx 2825; AVX2-NEXT: sarq $63, %rcx 2826; AVX2-NEXT: vmovd %ecx, %xmm0 2827; AVX2-NEXT: movq %rax, %r8 2828; AVX2-NEXT: movq %rax, %r10 2829; AVX2-NEXT: movq %rax, %r11 2830; AVX2-NEXT: movq %rax, %r14 2831; AVX2-NEXT: movq %rax, %r15 2832; AVX2-NEXT: movq %rax, %r9 2833; AVX2-NEXT: movq %rax, %r12 2834; AVX2-NEXT: movq %rax, %r13 2835; AVX2-NEXT: movq %rax, %rbx 2836; AVX2-NEXT: movq %rax, %rdi 2837; AVX2-NEXT: movq %rax, %rcx 2838; AVX2-NEXT: movq %rax, %rdx 2839; AVX2-NEXT: movq %rax, %rsi 2840; AVX2-NEXT: movsbq %al, %rbp 2841; AVX2-NEXT: shlq $54, %rax 2842; AVX2-NEXT: sarq $63, %rax 2843; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 2844; AVX2-NEXT: shlq $53, %r8 2845; AVX2-NEXT: sarq $63, %r8 2846; AVX2-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0 2847; AVX2-NEXT: shlq $52, %r10 2848; AVX2-NEXT: sarq $63, %r10 2849; AVX2-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0 2850; AVX2-NEXT: shlq $51, %r11 2851; AVX2-NEXT: sarq $63, %r11 2852; AVX2-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0 2853; AVX2-NEXT: shlq $50, %r14 2854; AVX2-NEXT: sarq $63, %r14 2855; AVX2-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 2856; AVX2-NEXT: shlq $49, %r15 2857; AVX2-NEXT: sarq $63, %r15 2858; AVX2-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0 2859; AVX2-NEXT: shrq $15, %r9 2860; AVX2-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0 2861; AVX2-NEXT: shlq $63, %r13 2862; AVX2-NEXT: sarq $63, %r13 2863; AVX2-NEXT: vmovd %r13d, %xmm1 2864; AVX2-NEXT: shlq $62, %r12 2865; AVX2-NEXT: sarq $63, %r12 2866; AVX2-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1 2867; AVX2-NEXT: shlq $61, %rbx 2868; AVX2-NEXT: sarq $63, %rbx 2869; AVX2-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1 2870; AVX2-NEXT: shlq $60, %rdi 2871; AVX2-NEXT: sarq $63, %rdi 2872; AVX2-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1 2873; AVX2-NEXT: shlq $59, %rcx 2874; AVX2-NEXT: sarq $63, %rcx 2875; AVX2-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 2876; AVX2-NEXT: shlq $58, %rdx 2877; AVX2-NEXT: sarq $63, %rdx 2878; AVX2-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1 2879; AVX2-NEXT: shlq $57, %rsi 2880; AVX2-NEXT: sarq $63, %rsi 2881; AVX2-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1 2882; AVX2-NEXT: shrq $7, %rbp 2883; AVX2-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1 2884; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 2885; AVX2-NEXT: popq %rbx 2886; AVX2-NEXT: popq %r12 2887; AVX2-NEXT: popq %r13 2888; AVX2-NEXT: popq %r14 2889; AVX2-NEXT: popq %r15 2890; AVX2-NEXT: popq %rbp 2891; AVX2-NEXT: retq 2892; 2893; AVX512-LABEL: load_sext_16i1_to_16i16: 2894; AVX512: # BB#0: # %entry 2895; AVX512-NEXT: kmovw (%rdi), %k1 2896; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 2897; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 2898; AVX512-NEXT: vpmovdw %zmm0, %ymm0 2899; AVX512-NEXT: retq 2900; 2901; X32-SSE41-LABEL: load_sext_16i1_to_16i16: 2902; X32-SSE41: # BB#0: # %entry 2903; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2904; X32-SSE41-NEXT: movzwl (%eax), %eax 2905; X32-SSE41-NEXT: movl %eax, %ecx 2906; X32-SSE41-NEXT: shrl %ecx 2907; X32-SSE41-NEXT: andl $1, %ecx 2908; X32-SSE41-NEXT: movl %eax, %edx 2909; X32-SSE41-NEXT: andl $1, %edx 2910; X32-SSE41-NEXT: movd %edx, %xmm1 2911; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm1 2912; X32-SSE41-NEXT: movl %eax, %ecx 2913; X32-SSE41-NEXT: shrl $2, %ecx 2914; X32-SSE41-NEXT: andl $1, %ecx 2915; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm1 2916; X32-SSE41-NEXT: movl %eax, %ecx 2917; X32-SSE41-NEXT: shrl $3, %ecx 2918; X32-SSE41-NEXT: andl $1, %ecx 2919; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm1 2920; X32-SSE41-NEXT: movl %eax, %ecx 2921; X32-SSE41-NEXT: shrl $4, %ecx 2922; X32-SSE41-NEXT: andl $1, %ecx 2923; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 2924; X32-SSE41-NEXT: movl %eax, %ecx 2925; X32-SSE41-NEXT: shrl $5, %ecx 2926; X32-SSE41-NEXT: andl $1, %ecx 2927; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm1 2928; X32-SSE41-NEXT: movl %eax, %ecx 2929; X32-SSE41-NEXT: shrl $6, %ecx 2930; X32-SSE41-NEXT: andl $1, %ecx 2931; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm1 2932; X32-SSE41-NEXT: movl %eax, %ecx 2933; X32-SSE41-NEXT: shrl $7, %ecx 2934; X32-SSE41-NEXT: andl $1, %ecx 2935; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm1 2936; X32-SSE41-NEXT: movl %eax, %ecx 2937; X32-SSE41-NEXT: shrl $8, %ecx 2938; X32-SSE41-NEXT: andl $1, %ecx 2939; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 2940; X32-SSE41-NEXT: movl %eax, %ecx 2941; X32-SSE41-NEXT: shrl $9, %ecx 2942; X32-SSE41-NEXT: andl $1, %ecx 2943; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm1 2944; X32-SSE41-NEXT: movl %eax, %ecx 2945; X32-SSE41-NEXT: shrl $10, %ecx 2946; X32-SSE41-NEXT: andl $1, %ecx 2947; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm1 2948; X32-SSE41-NEXT: movl %eax, %ecx 2949; X32-SSE41-NEXT: shrl $11, %ecx 2950; X32-SSE41-NEXT: andl $1, %ecx 2951; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm1 2952; X32-SSE41-NEXT: movl %eax, %ecx 2953; X32-SSE41-NEXT: shrl $12, %ecx 2954; X32-SSE41-NEXT: andl $1, %ecx 2955; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm1 2956; X32-SSE41-NEXT: movl %eax, %ecx 2957; X32-SSE41-NEXT: shrl $13, %ecx 2958; X32-SSE41-NEXT: andl $1, %ecx 2959; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm1 2960; X32-SSE41-NEXT: movl %eax, %ecx 2961; X32-SSE41-NEXT: shrl $14, %ecx 2962; X32-SSE41-NEXT: andl $1, %ecx 2963; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm1 2964; X32-SSE41-NEXT: shrl $15, %eax 2965; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm1 2966; X32-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 2967; X32-SSE41-NEXT: psllw $15, %xmm0 2968; X32-SSE41-NEXT: psraw $15, %xmm0 2969; X32-SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2970; X32-SSE41-NEXT: psllw $15, %xmm1 2971; X32-SSE41-NEXT: psraw $15, %xmm1 2972; X32-SSE41-NEXT: retl 2973entry: 2974 %X = load <16 x i1>, <16 x i1>* %ptr 2975 %Y = sext <16 x i1> %X to <16 x i16> 2976 ret <16 x i16> %Y 2977} 2978 2979define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone { 2980; SSE2-LABEL: load_sext_32i1_to_32i8: 2981; SSE2: # BB#0: # %entry 2982; SSE2-NEXT: pushq %rbp 2983; SSE2-NEXT: pushq %r15 2984; SSE2-NEXT: pushq %r14 2985; SSE2-NEXT: pushq %r13 2986; SSE2-NEXT: pushq %r12 2987; SSE2-NEXT: pushq %rbx 2988; SSE2-NEXT: movswq (%rdi), %rbx 2989; SSE2-NEXT: movq %rbx, %r10 2990; SSE2-NEXT: movq %rbx, %r8 2991; SSE2-NEXT: movq %rbx, %r9 2992; SSE2-NEXT: movq %rbx, %r11 2993; SSE2-NEXT: movq %rbx, %r14 2994; SSE2-NEXT: movq %rbx, %r15 2995; SSE2-NEXT: movq %rbx, %r12 2996; SSE2-NEXT: movq %rbx, %r13 2997; SSE2-NEXT: movq %rbx, %rdx 2998; SSE2-NEXT: movq %rbx, %rsi 2999; SSE2-NEXT: movq %rbx, %rcx 3000; SSE2-NEXT: movq %rbx, %rbp 3001; SSE2-NEXT: movq %rbx, %rax 3002; SSE2-NEXT: shlq $49, %rax 3003; SSE2-NEXT: sarq $63, %rax 3004; SSE2-NEXT: movd %eax, %xmm0 3005; SSE2-NEXT: movq %rbx, %rax 3006; SSE2-NEXT: shlq $57, %r10 3007; SSE2-NEXT: sarq $63, %r10 3008; SSE2-NEXT: movd %r10d, %xmm15 3009; SSE2-NEXT: movq %rbx, %r10 3010; SSE2-NEXT: movsbq %bl, %rbx 3011; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] 3012; SSE2-NEXT: shlq $53, %r8 3013; SSE2-NEXT: sarq $63, %r8 3014; SSE2-NEXT: movd %r8d, %xmm8 3015; SSE2-NEXT: shlq $61, %r9 3016; SSE2-NEXT: sarq $63, %r9 3017; SSE2-NEXT: movd %r9d, %xmm2 3018; SSE2-NEXT: shlq $51, %r11 3019; SSE2-NEXT: sarq $63, %r11 3020; SSE2-NEXT: movd %r11d, %xmm9 3021; SSE2-NEXT: shlq $59, %r14 3022; SSE2-NEXT: sarq $63, %r14 3023; SSE2-NEXT: movd %r14d, %xmm5 3024; SSE2-NEXT: shlq $55, %r15 3025; SSE2-NEXT: sarq $63, %r15 3026; SSE2-NEXT: movd %r15d, %xmm10 3027; SSE2-NEXT: shlq $63, %r12 3028; SSE2-NEXT: sarq $63, %r12 3029; SSE2-NEXT: movd %r12d, %xmm0 3030; SSE2-NEXT: shlq $50, %r13 3031; SSE2-NEXT: sarq $63, %r13 3032; SSE2-NEXT: movd %r13d, %xmm11 3033; SSE2-NEXT: shlq $58, %rdx 3034; SSE2-NEXT: sarq $63, %rdx 3035; SSE2-NEXT: movd %edx, %xmm4 3036; SSE2-NEXT: shlq $54, %rsi 3037; SSE2-NEXT: sarq $63, %rsi 3038; SSE2-NEXT: movd %esi, %xmm12 3039; SSE2-NEXT: shlq $62, %rcx 3040; SSE2-NEXT: sarq $63, %rcx 3041; SSE2-NEXT: movd %ecx, %xmm6 3042; SSE2-NEXT: shlq $52, %rbp 3043; SSE2-NEXT: sarq $63, %rbp 3044; SSE2-NEXT: movd %ebp, %xmm13 3045; SSE2-NEXT: shlq $60, %rax 3046; SSE2-NEXT: sarq $63, %rax 3047; SSE2-NEXT: movd %eax, %xmm7 3048; SSE2-NEXT: shrq $15, %r10 3049; SSE2-NEXT: movd %r10d, %xmm14 3050; SSE2-NEXT: shrq $7, %rbx 3051; SSE2-NEXT: movd %ebx, %xmm3 3052; SSE2-NEXT: movswq 2(%rdi), %rdx 3053; SSE2-NEXT: movq %rdx, %r8 3054; SSE2-NEXT: movq %rdx, %r9 3055; SSE2-NEXT: movq %rdx, %r10 3056; SSE2-NEXT: movq %rdx, %r11 3057; SSE2-NEXT: movq %rdx, %r14 3058; SSE2-NEXT: movq %rdx, %r15 3059; SSE2-NEXT: movq %rdx, %r12 3060; SSE2-NEXT: movq %rdx, %r13 3061; SSE2-NEXT: movq %rdx, %rbx 3062; SSE2-NEXT: movq %rdx, %rax 3063; SSE2-NEXT: movq %rdx, %rcx 3064; SSE2-NEXT: movq %rdx, %rsi 3065; SSE2-NEXT: movq %rdx, %rdi 3066; SSE2-NEXT: movq %rdx, %rbp 3067; SSE2-NEXT: shlq $49, %rbp 3068; SSE2-NEXT: sarq $63, %rbp 3069; SSE2-NEXT: movd %ebp, %xmm1 3070; SSE2-NEXT: movq %rdx, %rbp 3071; SSE2-NEXT: movsbq %dl, %rdx 3072; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] 3073; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7] 3074; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7] 3075; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7] 3076; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 3077; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 3078; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7] 3079; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7] 3080; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7] 3081; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7] 3082; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7] 3083; SSE2-NEXT: shlq $57, %r8 3084; SSE2-NEXT: sarq $63, %r8 3085; SSE2-NEXT: movd %r8d, %xmm2 3086; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] 3087; SSE2-NEXT: shlq $53, %r9 3088; SSE2-NEXT: sarq $63, %r9 3089; SSE2-NEXT: movd %r9d, %xmm3 3090; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] 3091; SSE2-NEXT: shlq $61, %r10 3092; SSE2-NEXT: sarq $63, %r10 3093; SSE2-NEXT: movd %r10d, %xmm4 3094; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] 3095; SSE2-NEXT: shlq $51, %r11 3096; SSE2-NEXT: sarq $63, %r11 3097; SSE2-NEXT: movd %r11d, %xmm5 3098; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 3099; SSE2-NEXT: shlq $59, %r14 3100; SSE2-NEXT: sarq $63, %r14 3101; SSE2-NEXT: movd %r14d, %xmm6 3102; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3103; SSE2-NEXT: shlq $55, %r15 3104; SSE2-NEXT: sarq $63, %r15 3105; SSE2-NEXT: movd %r15d, %xmm3 3106; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 3107; SSE2-NEXT: shlq $63, %r12 3108; SSE2-NEXT: sarq $63, %r12 3109; SSE2-NEXT: movd %r12d, %xmm1 3110; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] 3111; SSE2-NEXT: shlq $50, %r13 3112; SSE2-NEXT: sarq $63, %r13 3113; SSE2-NEXT: movd %r13d, %xmm2 3114; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 3115; SSE2-NEXT: shlq $58, %rbx 3116; SSE2-NEXT: sarq $63, %rbx 3117; SSE2-NEXT: movd %ebx, %xmm3 3118; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] 3119; SSE2-NEXT: shlq $54, %rax 3120; SSE2-NEXT: sarq $63, %rax 3121; SSE2-NEXT: movd %eax, %xmm5 3122; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 3123; SSE2-NEXT: shlq $62, %rcx 3124; SSE2-NEXT: sarq $63, %rcx 3125; SSE2-NEXT: movd %ecx, %xmm4 3126; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3127; SSE2-NEXT: shlq $52, %rsi 3128; SSE2-NEXT: sarq $63, %rsi 3129; SSE2-NEXT: movd %esi, %xmm2 3130; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] 3131; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3132; SSE2-NEXT: shlq $60, %rdi 3133; SSE2-NEXT: sarq $63, %rdi 3134; SSE2-NEXT: movd %edi, %xmm3 3135; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3136; SSE2-NEXT: shrq $15, %rbp 3137; SSE2-NEXT: movd %ebp, %xmm2 3138; SSE2-NEXT: shrq $7, %rdx 3139; SSE2-NEXT: movd %edx, %xmm5 3140; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] 3141; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7] 3142; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3143; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 3144; SSE2-NEXT: popq %rbx 3145; SSE2-NEXT: popq %r12 3146; SSE2-NEXT: popq %r13 3147; SSE2-NEXT: popq %r14 3148; SSE2-NEXT: popq %r15 3149; SSE2-NEXT: popq %rbp 3150; SSE2-NEXT: retq 3151; 3152; SSSE3-LABEL: load_sext_32i1_to_32i8: 3153; SSSE3: # BB#0: # %entry 3154; SSSE3-NEXT: pushq %rbp 3155; SSSE3-NEXT: pushq %r15 3156; SSSE3-NEXT: pushq %r14 3157; SSSE3-NEXT: pushq %r13 3158; SSSE3-NEXT: pushq %r12 3159; SSSE3-NEXT: pushq %rbx 3160; SSSE3-NEXT: movswq (%rdi), %rbx 3161; SSSE3-NEXT: movq %rbx, %r10 3162; SSSE3-NEXT: movq %rbx, %r8 3163; SSSE3-NEXT: movq %rbx, %r9 3164; SSSE3-NEXT: movq %rbx, %r11 3165; SSSE3-NEXT: movq %rbx, %r14 3166; SSSE3-NEXT: movq %rbx, %r15 3167; SSSE3-NEXT: movq %rbx, %r12 3168; SSSE3-NEXT: movq %rbx, %r13 3169; SSSE3-NEXT: movq %rbx, %rdx 3170; SSSE3-NEXT: movq %rbx, %rsi 3171; SSSE3-NEXT: movq %rbx, %rcx 3172; SSSE3-NEXT: movq %rbx, %rbp 3173; SSSE3-NEXT: movq %rbx, %rax 3174; SSSE3-NEXT: shlq $49, %rax 3175; SSSE3-NEXT: sarq $63, %rax 3176; SSSE3-NEXT: movd %eax, %xmm0 3177; SSSE3-NEXT: movq %rbx, %rax 3178; SSSE3-NEXT: shlq $57, %r10 3179; SSSE3-NEXT: sarq $63, %r10 3180; SSSE3-NEXT: movd %r10d, %xmm15 3181; SSSE3-NEXT: movq %rbx, %r10 3182; SSSE3-NEXT: movsbq %bl, %rbx 3183; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] 3184; SSSE3-NEXT: shlq $53, %r8 3185; SSSE3-NEXT: sarq $63, %r8 3186; SSSE3-NEXT: movd %r8d, %xmm8 3187; SSSE3-NEXT: shlq $61, %r9 3188; SSSE3-NEXT: sarq $63, %r9 3189; SSSE3-NEXT: movd %r9d, %xmm2 3190; SSSE3-NEXT: shlq $51, %r11 3191; SSSE3-NEXT: sarq $63, %r11 3192; SSSE3-NEXT: movd %r11d, %xmm9 3193; SSSE3-NEXT: shlq $59, %r14 3194; SSSE3-NEXT: sarq $63, %r14 3195; SSSE3-NEXT: movd %r14d, %xmm5 3196; SSSE3-NEXT: shlq $55, %r15 3197; SSSE3-NEXT: sarq $63, %r15 3198; SSSE3-NEXT: movd %r15d, %xmm10 3199; SSSE3-NEXT: shlq $63, %r12 3200; SSSE3-NEXT: sarq $63, %r12 3201; SSSE3-NEXT: movd %r12d, %xmm0 3202; SSSE3-NEXT: shlq $50, %r13 3203; SSSE3-NEXT: sarq $63, %r13 3204; SSSE3-NEXT: movd %r13d, %xmm11 3205; SSSE3-NEXT: shlq $58, %rdx 3206; SSSE3-NEXT: sarq $63, %rdx 3207; SSSE3-NEXT: movd %edx, %xmm4 3208; SSSE3-NEXT: shlq $54, %rsi 3209; SSSE3-NEXT: sarq $63, %rsi 3210; SSSE3-NEXT: movd %esi, %xmm12 3211; SSSE3-NEXT: shlq $62, %rcx 3212; SSSE3-NEXT: sarq $63, %rcx 3213; SSSE3-NEXT: movd %ecx, %xmm6 3214; SSSE3-NEXT: shlq $52, %rbp 3215; SSSE3-NEXT: sarq $63, %rbp 3216; SSSE3-NEXT: movd %ebp, %xmm13 3217; SSSE3-NEXT: shlq $60, %rax 3218; SSSE3-NEXT: sarq $63, %rax 3219; SSSE3-NEXT: movd %eax, %xmm7 3220; SSSE3-NEXT: shrq $15, %r10 3221; SSSE3-NEXT: movd %r10d, %xmm14 3222; SSSE3-NEXT: shrq $7, %rbx 3223; SSSE3-NEXT: movd %ebx, %xmm3 3224; SSSE3-NEXT: movswq 2(%rdi), %rdx 3225; SSSE3-NEXT: movq %rdx, %r8 3226; SSSE3-NEXT: movq %rdx, %r9 3227; SSSE3-NEXT: movq %rdx, %r10 3228; SSSE3-NEXT: movq %rdx, %r11 3229; SSSE3-NEXT: movq %rdx, %r14 3230; SSSE3-NEXT: movq %rdx, %r15 3231; SSSE3-NEXT: movq %rdx, %r12 3232; SSSE3-NEXT: movq %rdx, %r13 3233; SSSE3-NEXT: movq %rdx, %rbx 3234; SSSE3-NEXT: movq %rdx, %rax 3235; SSSE3-NEXT: movq %rdx, %rcx 3236; SSSE3-NEXT: movq %rdx, %rsi 3237; SSSE3-NEXT: movq %rdx, %rdi 3238; SSSE3-NEXT: movq %rdx, %rbp 3239; SSSE3-NEXT: shlq $49, %rbp 3240; SSSE3-NEXT: sarq $63, %rbp 3241; SSSE3-NEXT: movd %ebp, %xmm1 3242; SSSE3-NEXT: movq %rdx, %rbp 3243; SSSE3-NEXT: movsbq %dl, %rdx 3244; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] 3245; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7] 3246; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7] 3247; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7] 3248; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 3249; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 3250; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7] 3251; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7] 3252; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7] 3253; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7] 3254; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7] 3255; SSSE3-NEXT: shlq $57, %r8 3256; SSSE3-NEXT: sarq $63, %r8 3257; SSSE3-NEXT: movd %r8d, %xmm2 3258; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] 3259; SSSE3-NEXT: shlq $53, %r9 3260; SSSE3-NEXT: sarq $63, %r9 3261; SSSE3-NEXT: movd %r9d, %xmm3 3262; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] 3263; SSSE3-NEXT: shlq $61, %r10 3264; SSSE3-NEXT: sarq $63, %r10 3265; SSSE3-NEXT: movd %r10d, %xmm4 3266; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] 3267; SSSE3-NEXT: shlq $51, %r11 3268; SSSE3-NEXT: sarq $63, %r11 3269; SSSE3-NEXT: movd %r11d, %xmm5 3270; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 3271; SSSE3-NEXT: shlq $59, %r14 3272; SSSE3-NEXT: sarq $63, %r14 3273; SSSE3-NEXT: movd %r14d, %xmm6 3274; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3275; SSSE3-NEXT: shlq $55, %r15 3276; SSSE3-NEXT: sarq $63, %r15 3277; SSSE3-NEXT: movd %r15d, %xmm3 3278; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 3279; SSSE3-NEXT: shlq $63, %r12 3280; SSSE3-NEXT: sarq $63, %r12 3281; SSSE3-NEXT: movd %r12d, %xmm1 3282; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] 3283; SSSE3-NEXT: shlq $50, %r13 3284; SSSE3-NEXT: sarq $63, %r13 3285; SSSE3-NEXT: movd %r13d, %xmm2 3286; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 3287; SSSE3-NEXT: shlq $58, %rbx 3288; SSSE3-NEXT: sarq $63, %rbx 3289; SSSE3-NEXT: movd %ebx, %xmm3 3290; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] 3291; SSSE3-NEXT: shlq $54, %rax 3292; SSSE3-NEXT: sarq $63, %rax 3293; SSSE3-NEXT: movd %eax, %xmm5 3294; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 3295; SSSE3-NEXT: shlq $62, %rcx 3296; SSSE3-NEXT: sarq $63, %rcx 3297; SSSE3-NEXT: movd %ecx, %xmm4 3298; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3299; SSSE3-NEXT: shlq $52, %rsi 3300; SSSE3-NEXT: sarq $63, %rsi 3301; SSSE3-NEXT: movd %esi, %xmm2 3302; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] 3303; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3304; SSSE3-NEXT: shlq $60, %rdi 3305; SSSE3-NEXT: sarq $63, %rdi 3306; SSSE3-NEXT: movd %edi, %xmm3 3307; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3308; SSSE3-NEXT: shrq $15, %rbp 3309; SSSE3-NEXT: movd %ebp, %xmm2 3310; SSSE3-NEXT: shrq $7, %rdx 3311; SSSE3-NEXT: movd %edx, %xmm5 3312; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] 3313; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7] 3314; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3315; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 3316; SSSE3-NEXT: popq %rbx 3317; SSSE3-NEXT: popq %r12 3318; SSSE3-NEXT: popq %r13 3319; SSSE3-NEXT: popq %r14 3320; SSSE3-NEXT: popq %r15 3321; SSSE3-NEXT: popq %rbp 3322; SSSE3-NEXT: retq 3323; 3324; SSE41-LABEL: load_sext_32i1_to_32i8: 3325; SSE41: # BB#0: # %entry 3326; SSE41-NEXT: movswq (%rdi), %rax 3327; SSE41-NEXT: movq %rax, %rcx 3328; SSE41-NEXT: shlq $62, %rcx 3329; SSE41-NEXT: sarq $63, %rcx 3330; SSE41-NEXT: movq %rax, %rdx 3331; SSE41-NEXT: shlq $63, %rdx 3332; SSE41-NEXT: sarq $63, %rdx 3333; SSE41-NEXT: movd %edx, %xmm0 3334; SSE41-NEXT: pinsrb $1, %ecx, %xmm0 3335; SSE41-NEXT: movq %rax, %rcx 3336; SSE41-NEXT: shlq $61, %rcx 3337; SSE41-NEXT: sarq $63, %rcx 3338; SSE41-NEXT: pinsrb $2, %ecx, %xmm0 3339; SSE41-NEXT: movq %rax, %rcx 3340; SSE41-NEXT: shlq $60, %rcx 3341; SSE41-NEXT: sarq $63, %rcx 3342; SSE41-NEXT: pinsrb $3, %ecx, %xmm0 3343; SSE41-NEXT: movq %rax, %rcx 3344; SSE41-NEXT: shlq $59, %rcx 3345; SSE41-NEXT: sarq $63, %rcx 3346; SSE41-NEXT: pinsrb $4, %ecx, %xmm0 3347; SSE41-NEXT: movq %rax, %rcx 3348; SSE41-NEXT: shlq $58, %rcx 3349; SSE41-NEXT: sarq $63, %rcx 3350; SSE41-NEXT: pinsrb $5, %ecx, %xmm0 3351; SSE41-NEXT: movq %rax, %rcx 3352; SSE41-NEXT: shlq $57, %rcx 3353; SSE41-NEXT: sarq $63, %rcx 3354; SSE41-NEXT: pinsrb $6, %ecx, %xmm0 3355; SSE41-NEXT: movsbq %al, %rcx 3356; SSE41-NEXT: shrq $7, %rcx 3357; SSE41-NEXT: pinsrb $7, %ecx, %xmm0 3358; SSE41-NEXT: movq %rax, %rcx 3359; SSE41-NEXT: shlq $55, %rcx 3360; SSE41-NEXT: sarq $63, %rcx 3361; SSE41-NEXT: pinsrb $8, %ecx, %xmm0 3362; SSE41-NEXT: movq %rax, %rcx 3363; SSE41-NEXT: shlq $54, %rcx 3364; SSE41-NEXT: sarq $63, %rcx 3365; SSE41-NEXT: pinsrb $9, %ecx, %xmm0 3366; SSE41-NEXT: movq %rax, %rcx 3367; SSE41-NEXT: shlq $53, %rcx 3368; SSE41-NEXT: sarq $63, %rcx 3369; SSE41-NEXT: pinsrb $10, %ecx, %xmm0 3370; SSE41-NEXT: movq %rax, %rcx 3371; SSE41-NEXT: shlq $52, %rcx 3372; SSE41-NEXT: sarq $63, %rcx 3373; SSE41-NEXT: pinsrb $11, %ecx, %xmm0 3374; SSE41-NEXT: movq %rax, %rcx 3375; SSE41-NEXT: shlq $51, %rcx 3376; SSE41-NEXT: sarq $63, %rcx 3377; SSE41-NEXT: pinsrb $12, %ecx, %xmm0 3378; SSE41-NEXT: movq %rax, %rcx 3379; SSE41-NEXT: shlq $50, %rcx 3380; SSE41-NEXT: sarq $63, %rcx 3381; SSE41-NEXT: pinsrb $13, %ecx, %xmm0 3382; SSE41-NEXT: movq %rax, %rcx 3383; SSE41-NEXT: shlq $49, %rcx 3384; SSE41-NEXT: sarq $63, %rcx 3385; SSE41-NEXT: pinsrb $14, %ecx, %xmm0 3386; SSE41-NEXT: shrq $15, %rax 3387; SSE41-NEXT: pinsrb $15, %eax, %xmm0 3388; SSE41-NEXT: movswq 2(%rdi), %rax 3389; SSE41-NEXT: movq %rax, %rcx 3390; SSE41-NEXT: shlq $62, %rcx 3391; SSE41-NEXT: sarq $63, %rcx 3392; SSE41-NEXT: movq %rax, %rdx 3393; SSE41-NEXT: shlq $63, %rdx 3394; SSE41-NEXT: sarq $63, %rdx 3395; SSE41-NEXT: movd %edx, %xmm1 3396; SSE41-NEXT: pinsrb $1, %ecx, %xmm1 3397; SSE41-NEXT: movq %rax, %rcx 3398; SSE41-NEXT: shlq $61, %rcx 3399; SSE41-NEXT: sarq $63, %rcx 3400; SSE41-NEXT: pinsrb $2, %ecx, %xmm1 3401; SSE41-NEXT: movq %rax, %rcx 3402; SSE41-NEXT: shlq $60, %rcx 3403; SSE41-NEXT: sarq $63, %rcx 3404; SSE41-NEXT: pinsrb $3, %ecx, %xmm1 3405; SSE41-NEXT: movq %rax, %rcx 3406; SSE41-NEXT: shlq $59, %rcx 3407; SSE41-NEXT: sarq $63, %rcx 3408; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 3409; SSE41-NEXT: movq %rax, %rcx 3410; SSE41-NEXT: shlq $58, %rcx 3411; SSE41-NEXT: sarq $63, %rcx 3412; SSE41-NEXT: pinsrb $5, %ecx, %xmm1 3413; SSE41-NEXT: movq %rax, %rcx 3414; SSE41-NEXT: shlq $57, %rcx 3415; SSE41-NEXT: sarq $63, %rcx 3416; SSE41-NEXT: pinsrb $6, %ecx, %xmm1 3417; SSE41-NEXT: movsbq %al, %rcx 3418; SSE41-NEXT: shrq $7, %rcx 3419; SSE41-NEXT: pinsrb $7, %ecx, %xmm1 3420; SSE41-NEXT: movq %rax, %rcx 3421; SSE41-NEXT: shlq $55, %rcx 3422; SSE41-NEXT: sarq $63, %rcx 3423; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 3424; SSE41-NEXT: movq %rax, %rcx 3425; SSE41-NEXT: shlq $54, %rcx 3426; SSE41-NEXT: sarq $63, %rcx 3427; SSE41-NEXT: pinsrb $9, %ecx, %xmm1 3428; SSE41-NEXT: movq %rax, %rcx 3429; SSE41-NEXT: shlq $53, %rcx 3430; SSE41-NEXT: sarq $63, %rcx 3431; SSE41-NEXT: pinsrb $10, %ecx, %xmm1 3432; SSE41-NEXT: movq %rax, %rcx 3433; SSE41-NEXT: shlq $52, %rcx 3434; SSE41-NEXT: sarq $63, %rcx 3435; SSE41-NEXT: pinsrb $11, %ecx, %xmm1 3436; SSE41-NEXT: movq %rax, %rcx 3437; SSE41-NEXT: shlq $51, %rcx 3438; SSE41-NEXT: sarq $63, %rcx 3439; SSE41-NEXT: pinsrb $12, %ecx, %xmm1 3440; SSE41-NEXT: movq %rax, %rcx 3441; SSE41-NEXT: shlq $50, %rcx 3442; SSE41-NEXT: sarq $63, %rcx 3443; SSE41-NEXT: pinsrb $13, %ecx, %xmm1 3444; SSE41-NEXT: movq %rax, %rcx 3445; SSE41-NEXT: shlq $49, %rcx 3446; SSE41-NEXT: sarq $63, %rcx 3447; SSE41-NEXT: pinsrb $14, %ecx, %xmm1 3448; SSE41-NEXT: shrq $15, %rax 3449; SSE41-NEXT: pinsrb $15, %eax, %xmm1 3450; SSE41-NEXT: retq 3451; 3452; AVX1-LABEL: load_sext_32i1_to_32i8: 3453; AVX1: # BB#0: # %entry 3454; AVX1-NEXT: pushq %rbp 3455; AVX1-NEXT: pushq %r15 3456; AVX1-NEXT: pushq %r14 3457; AVX1-NEXT: pushq %r13 3458; AVX1-NEXT: pushq %r12 3459; AVX1-NEXT: pushq %rbx 3460; AVX1-NEXT: movslq (%rdi), %rax 3461; AVX1-NEXT: movq %rax, %rcx 3462; AVX1-NEXT: shlq $47, %rcx 3463; AVX1-NEXT: sarq $63, %rcx 3464; AVX1-NEXT: vmovd %ecx, %xmm0 3465; AVX1-NEXT: movq %rax, %r8 3466; AVX1-NEXT: movq %rax, %rdx 3467; AVX1-NEXT: movq %rax, %rcx 3468; AVX1-NEXT: movq %rax, %rdi 3469; AVX1-NEXT: movq %rax, %r13 3470; AVX1-NEXT: movq %rax, %rsi 3471; AVX1-NEXT: movq %rax, %r10 3472; AVX1-NEXT: movq %rax, %r11 3473; AVX1-NEXT: movq %rax, %r9 3474; AVX1-NEXT: movq %rax, %rbx 3475; AVX1-NEXT: movq %rax, %r14 3476; AVX1-NEXT: movq %rax, %r15 3477; AVX1-NEXT: movq %rax, %r12 3478; AVX1-NEXT: movq %rax, %rbp 3479; AVX1-NEXT: shlq $46, %rbp 3480; AVX1-NEXT: sarq $63, %rbp 3481; AVX1-NEXT: vpinsrb $1, %ebp, %xmm0, %xmm0 3482; AVX1-NEXT: movq %rax, %rbp 3483; AVX1-NEXT: shlq $45, %r8 3484; AVX1-NEXT: sarq $63, %r8 3485; AVX1-NEXT: vpinsrb $2, %r8d, %xmm0, %xmm0 3486; AVX1-NEXT: movq %rax, %r8 3487; AVX1-NEXT: shlq $44, %rdx 3488; AVX1-NEXT: sarq $63, %rdx 3489; AVX1-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 3490; AVX1-NEXT: movq %rax, %rdx 3491; AVX1-NEXT: shlq $43, %rcx 3492; AVX1-NEXT: sarq $63, %rcx 3493; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 3494; AVX1-NEXT: movq %rax, %rcx 3495; AVX1-NEXT: shlq $42, %rdi 3496; AVX1-NEXT: sarq $63, %rdi 3497; AVX1-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 3498; AVX1-NEXT: movq %rax, %rdi 3499; AVX1-NEXT: shlq $41, %r13 3500; AVX1-NEXT: sarq $63, %r13 3501; AVX1-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0 3502; AVX1-NEXT: movq %rax, %r13 3503; AVX1-NEXT: shlq $40, %rsi 3504; AVX1-NEXT: sarq $63, %rsi 3505; AVX1-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0 3506; AVX1-NEXT: movq %rax, %rsi 3507; AVX1-NEXT: shlq $39, %r10 3508; AVX1-NEXT: sarq $63, %r10 3509; AVX1-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 3510; AVX1-NEXT: movq %rax, %r10 3511; AVX1-NEXT: shlq $38, %r11 3512; AVX1-NEXT: sarq $63, %r11 3513; AVX1-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0 3514; AVX1-NEXT: movsbq %al, %r11 3515; AVX1-NEXT: shlq $37, %r9 3516; AVX1-NEXT: sarq $63, %r9 3517; AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 3518; AVX1-NEXT: movq %rax, %r9 3519; AVX1-NEXT: shlq $36, %rbx 3520; AVX1-NEXT: sarq $63, %rbx 3521; AVX1-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 3522; AVX1-NEXT: movq %rax, %rbx 3523; AVX1-NEXT: shlq $35, %r14 3524; AVX1-NEXT: sarq $63, %r14 3525; AVX1-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0 3526; AVX1-NEXT: movq %rax, %r14 3527; AVX1-NEXT: shlq $34, %r15 3528; AVX1-NEXT: sarq $63, %r15 3529; AVX1-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0 3530; AVX1-NEXT: movq %rax, %r15 3531; AVX1-NEXT: shlq $33, %r12 3532; AVX1-NEXT: sarq $63, %r12 3533; AVX1-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0 3534; AVX1-NEXT: movq %rax, %r12 3535; AVX1-NEXT: shrq $31, %rbp 3536; AVX1-NEXT: vpinsrb $15, %ebp, %xmm0, %xmm0 3537; AVX1-NEXT: movq %rax, %rbp 3538; AVX1-NEXT: shlq $63, %rdx 3539; AVX1-NEXT: sarq $63, %rdx 3540; AVX1-NEXT: vmovd %edx, %xmm1 3541; AVX1-NEXT: movq %rax, %rdx 3542; AVX1-NEXT: movswq %ax, %rax 3543; AVX1-NEXT: shlq $62, %r8 3544; AVX1-NEXT: sarq $63, %r8 3545; AVX1-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1 3546; AVX1-NEXT: shlq $61, %rcx 3547; AVX1-NEXT: sarq $63, %rcx 3548; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 3549; AVX1-NEXT: shlq $60, %rdi 3550; AVX1-NEXT: sarq $63, %rdi 3551; AVX1-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1 3552; AVX1-NEXT: shlq $59, %r13 3553; AVX1-NEXT: sarq $63, %r13 3554; AVX1-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1 3555; AVX1-NEXT: shlq $58, %rsi 3556; AVX1-NEXT: sarq $63, %rsi 3557; AVX1-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 3558; AVX1-NEXT: shlq $57, %r10 3559; AVX1-NEXT: sarq $63, %r10 3560; AVX1-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1 3561; AVX1-NEXT: shrq $7, %r11 3562; AVX1-NEXT: vpinsrb $7, %r11d, %xmm1, %xmm1 3563; AVX1-NEXT: shlq $55, %r9 3564; AVX1-NEXT: sarq $63, %r9 3565; AVX1-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1 3566; AVX1-NEXT: shlq $54, %rbx 3567; AVX1-NEXT: sarq $63, %rbx 3568; AVX1-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1 3569; AVX1-NEXT: shlq $53, %r14 3570; AVX1-NEXT: sarq $63, %r14 3571; AVX1-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 3572; AVX1-NEXT: shlq $52, %r15 3573; AVX1-NEXT: sarq $63, %r15 3574; AVX1-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1 3575; AVX1-NEXT: shlq $51, %r12 3576; AVX1-NEXT: sarq $63, %r12 3577; AVX1-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1 3578; AVX1-NEXT: shlq $50, %rbp 3579; AVX1-NEXT: sarq $63, %rbp 3580; AVX1-NEXT: vpinsrb $13, %ebp, %xmm1, %xmm1 3581; AVX1-NEXT: shlq $49, %rdx 3582; AVX1-NEXT: sarq $63, %rdx 3583; AVX1-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1 3584; AVX1-NEXT: shrq $15, %rax 3585; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 3586; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3587; AVX1-NEXT: popq %rbx 3588; AVX1-NEXT: popq %r12 3589; AVX1-NEXT: popq %r13 3590; AVX1-NEXT: popq %r14 3591; AVX1-NEXT: popq %r15 3592; AVX1-NEXT: popq %rbp 3593; AVX1-NEXT: retq 3594; 3595; AVX2-LABEL: load_sext_32i1_to_32i8: 3596; AVX2: # BB#0: # %entry 3597; AVX2-NEXT: pushq %rbp 3598; AVX2-NEXT: pushq %r15 3599; AVX2-NEXT: pushq %r14 3600; AVX2-NEXT: pushq %r13 3601; AVX2-NEXT: pushq %r12 3602; AVX2-NEXT: pushq %rbx 3603; AVX2-NEXT: movslq (%rdi), %rax 3604; AVX2-NEXT: movq %rax, %rcx 3605; AVX2-NEXT: shlq $47, %rcx 3606; AVX2-NEXT: sarq $63, %rcx 3607; AVX2-NEXT: vmovd %ecx, %xmm0 3608; AVX2-NEXT: movq %rax, %r8 3609; AVX2-NEXT: movq %rax, %rdx 3610; AVX2-NEXT: movq %rax, %rcx 3611; AVX2-NEXT: movq %rax, %rdi 3612; AVX2-NEXT: movq %rax, %r13 3613; AVX2-NEXT: movq %rax, %rsi 3614; AVX2-NEXT: movq %rax, %r10 3615; AVX2-NEXT: movq %rax, %r11 3616; AVX2-NEXT: movq %rax, %r9 3617; AVX2-NEXT: movq %rax, %rbx 3618; AVX2-NEXT: movq %rax, %r14 3619; AVX2-NEXT: movq %rax, %r15 3620; AVX2-NEXT: movq %rax, %r12 3621; AVX2-NEXT: movq %rax, %rbp 3622; AVX2-NEXT: shlq $46, %rbp 3623; AVX2-NEXT: sarq $63, %rbp 3624; AVX2-NEXT: vpinsrb $1, %ebp, %xmm0, %xmm0 3625; AVX2-NEXT: movq %rax, %rbp 3626; AVX2-NEXT: shlq $45, %r8 3627; AVX2-NEXT: sarq $63, %r8 3628; AVX2-NEXT: vpinsrb $2, %r8d, %xmm0, %xmm0 3629; AVX2-NEXT: movq %rax, %r8 3630; AVX2-NEXT: shlq $44, %rdx 3631; AVX2-NEXT: sarq $63, %rdx 3632; AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 3633; AVX2-NEXT: movq %rax, %rdx 3634; AVX2-NEXT: shlq $43, %rcx 3635; AVX2-NEXT: sarq $63, %rcx 3636; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 3637; AVX2-NEXT: movq %rax, %rcx 3638; AVX2-NEXT: shlq $42, %rdi 3639; AVX2-NEXT: sarq $63, %rdi 3640; AVX2-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 3641; AVX2-NEXT: movq %rax, %rdi 3642; AVX2-NEXT: shlq $41, %r13 3643; AVX2-NEXT: sarq $63, %r13 3644; AVX2-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0 3645; AVX2-NEXT: movq %rax, %r13 3646; AVX2-NEXT: shlq $40, %rsi 3647; AVX2-NEXT: sarq $63, %rsi 3648; AVX2-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0 3649; AVX2-NEXT: movq %rax, %rsi 3650; AVX2-NEXT: shlq $39, %r10 3651; AVX2-NEXT: sarq $63, %r10 3652; AVX2-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 3653; AVX2-NEXT: movq %rax, %r10 3654; AVX2-NEXT: shlq $38, %r11 3655; AVX2-NEXT: sarq $63, %r11 3656; AVX2-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0 3657; AVX2-NEXT: movsbq %al, %r11 3658; AVX2-NEXT: shlq $37, %r9 3659; AVX2-NEXT: sarq $63, %r9 3660; AVX2-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 3661; AVX2-NEXT: movq %rax, %r9 3662; AVX2-NEXT: shlq $36, %rbx 3663; AVX2-NEXT: sarq $63, %rbx 3664; AVX2-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 3665; AVX2-NEXT: movq %rax, %rbx 3666; AVX2-NEXT: shlq $35, %r14 3667; AVX2-NEXT: sarq $63, %r14 3668; AVX2-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0 3669; AVX2-NEXT: movq %rax, %r14 3670; AVX2-NEXT: shlq $34, %r15 3671; AVX2-NEXT: sarq $63, %r15 3672; AVX2-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0 3673; AVX2-NEXT: movq %rax, %r15 3674; AVX2-NEXT: shlq $33, %r12 3675; AVX2-NEXT: sarq $63, %r12 3676; AVX2-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0 3677; AVX2-NEXT: movq %rax, %r12 3678; AVX2-NEXT: shrq $31, %rbp 3679; AVX2-NEXT: vpinsrb $15, %ebp, %xmm0, %xmm0 3680; AVX2-NEXT: movq %rax, %rbp 3681; AVX2-NEXT: shlq $63, %rdx 3682; AVX2-NEXT: sarq $63, %rdx 3683; AVX2-NEXT: vmovd %edx, %xmm1 3684; AVX2-NEXT: movq %rax, %rdx 3685; AVX2-NEXT: movswq %ax, %rax 3686; AVX2-NEXT: shlq $62, %r8 3687; AVX2-NEXT: sarq $63, %r8 3688; AVX2-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1 3689; AVX2-NEXT: shlq $61, %rcx 3690; AVX2-NEXT: sarq $63, %rcx 3691; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 3692; AVX2-NEXT: shlq $60, %rdi 3693; AVX2-NEXT: sarq $63, %rdi 3694; AVX2-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1 3695; AVX2-NEXT: shlq $59, %r13 3696; AVX2-NEXT: sarq $63, %r13 3697; AVX2-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1 3698; AVX2-NEXT: shlq $58, %rsi 3699; AVX2-NEXT: sarq $63, %rsi 3700; AVX2-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 3701; AVX2-NEXT: shlq $57, %r10 3702; AVX2-NEXT: sarq $63, %r10 3703; AVX2-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1 3704; AVX2-NEXT: shrq $7, %r11 3705; AVX2-NEXT: vpinsrb $7, %r11d, %xmm1, %xmm1 3706; AVX2-NEXT: shlq $55, %r9 3707; AVX2-NEXT: sarq $63, %r9 3708; AVX2-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1 3709; AVX2-NEXT: shlq $54, %rbx 3710; AVX2-NEXT: sarq $63, %rbx 3711; AVX2-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1 3712; AVX2-NEXT: shlq $53, %r14 3713; AVX2-NEXT: sarq $63, %r14 3714; AVX2-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 3715; AVX2-NEXT: shlq $52, %r15 3716; AVX2-NEXT: sarq $63, %r15 3717; AVX2-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1 3718; AVX2-NEXT: shlq $51, %r12 3719; AVX2-NEXT: sarq $63, %r12 3720; AVX2-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1 3721; AVX2-NEXT: shlq $50, %rbp 3722; AVX2-NEXT: sarq $63, %rbp 3723; AVX2-NEXT: vpinsrb $13, %ebp, %xmm1, %xmm1 3724; AVX2-NEXT: shlq $49, %rdx 3725; AVX2-NEXT: sarq $63, %rdx 3726; AVX2-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1 3727; AVX2-NEXT: shrq $15, %rax 3728; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 3729; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 3730; AVX2-NEXT: popq %rbx 3731; AVX2-NEXT: popq %r12 3732; AVX2-NEXT: popq %r13 3733; AVX2-NEXT: popq %r14 3734; AVX2-NEXT: popq %r15 3735; AVX2-NEXT: popq %rbp 3736; AVX2-NEXT: retq 3737; 3738; AVX512-LABEL: load_sext_32i1_to_32i8: 3739; AVX512: # BB#0: # %entry 3740; AVX512-NEXT: kmovw (%rdi), %k1 3741; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 3742; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} 3743; AVX512-NEXT: vpmovdb %zmm1, %xmm1 3744; AVX512-NEXT: kmovw 2(%rdi), %k1 3745; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 3746; AVX512-NEXT: vpmovdb %zmm0, %xmm0 3747; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3748; AVX512-NEXT: retq 3749; 3750; X32-SSE41-LABEL: load_sext_32i1_to_32i8: 3751; X32-SSE41: # BB#0: # %entry 3752; X32-SSE41-NEXT: pushl %esi 3753; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3754; X32-SSE41-NEXT: movswl (%eax), %ecx 3755; X32-SSE41-NEXT: movl %ecx, %edx 3756; X32-SSE41-NEXT: shll $30, %edx 3757; X32-SSE41-NEXT: sarl $31, %edx 3758; X32-SSE41-NEXT: movl %ecx, %esi 3759; X32-SSE41-NEXT: shll $31, %esi 3760; X32-SSE41-NEXT: sarl $31, %esi 3761; X32-SSE41-NEXT: movd %esi, %xmm0 3762; X32-SSE41-NEXT: pinsrb $1, %edx, %xmm0 3763; X32-SSE41-NEXT: movl %ecx, %edx 3764; X32-SSE41-NEXT: shll $29, %edx 3765; X32-SSE41-NEXT: sarl $31, %edx 3766; X32-SSE41-NEXT: pinsrb $2, %edx, %xmm0 3767; X32-SSE41-NEXT: movl %ecx, %edx 3768; X32-SSE41-NEXT: shll $28, %edx 3769; X32-SSE41-NEXT: sarl $31, %edx 3770; X32-SSE41-NEXT: pinsrb $3, %edx, %xmm0 3771; X32-SSE41-NEXT: movl %ecx, %edx 3772; X32-SSE41-NEXT: shll $27, %edx 3773; X32-SSE41-NEXT: sarl $31, %edx 3774; X32-SSE41-NEXT: pinsrb $4, %edx, %xmm0 3775; X32-SSE41-NEXT: movl %ecx, %edx 3776; X32-SSE41-NEXT: shll $26, %edx 3777; X32-SSE41-NEXT: sarl $31, %edx 3778; X32-SSE41-NEXT: pinsrb $5, %edx, %xmm0 3779; X32-SSE41-NEXT: movl %ecx, %edx 3780; X32-SSE41-NEXT: shll $25, %edx 3781; X32-SSE41-NEXT: sarl $31, %edx 3782; X32-SSE41-NEXT: pinsrb $6, %edx, %xmm0 3783; X32-SSE41-NEXT: movsbl %cl, %edx 3784; X32-SSE41-NEXT: shrl $7, %edx 3785; X32-SSE41-NEXT: pinsrb $7, %edx, %xmm0 3786; X32-SSE41-NEXT: movl %ecx, %edx 3787; X32-SSE41-NEXT: shll $23, %edx 3788; X32-SSE41-NEXT: sarl $31, %edx 3789; X32-SSE41-NEXT: pinsrb $8, %edx, %xmm0 3790; X32-SSE41-NEXT: movl %ecx, %edx 3791; X32-SSE41-NEXT: shll $22, %edx 3792; X32-SSE41-NEXT: sarl $31, %edx 3793; X32-SSE41-NEXT: pinsrb $9, %edx, %xmm0 3794; X32-SSE41-NEXT: movl %ecx, %edx 3795; X32-SSE41-NEXT: shll $21, %edx 3796; X32-SSE41-NEXT: sarl $31, %edx 3797; X32-SSE41-NEXT: pinsrb $10, %edx, %xmm0 3798; X32-SSE41-NEXT: movl %ecx, %edx 3799; X32-SSE41-NEXT: shll $20, %edx 3800; X32-SSE41-NEXT: sarl $31, %edx 3801; X32-SSE41-NEXT: pinsrb $11, %edx, %xmm0 3802; X32-SSE41-NEXT: movl %ecx, %edx 3803; X32-SSE41-NEXT: shll $19, %edx 3804; X32-SSE41-NEXT: sarl $31, %edx 3805; X32-SSE41-NEXT: pinsrb $12, %edx, %xmm0 3806; X32-SSE41-NEXT: movl %ecx, %edx 3807; X32-SSE41-NEXT: shll $18, %edx 3808; X32-SSE41-NEXT: sarl $31, %edx 3809; X32-SSE41-NEXT: pinsrb $13, %edx, %xmm0 3810; X32-SSE41-NEXT: movl %ecx, %edx 3811; X32-SSE41-NEXT: shll $17, %edx 3812; X32-SSE41-NEXT: sarl $31, %edx 3813; X32-SSE41-NEXT: pinsrb $14, %edx, %xmm0 3814; X32-SSE41-NEXT: shrl $15, %ecx 3815; X32-SSE41-NEXT: pinsrb $15, %ecx, %xmm0 3816; X32-SSE41-NEXT: movswl 2(%eax), %eax 3817; X32-SSE41-NEXT: movl %eax, %ecx 3818; X32-SSE41-NEXT: shll $30, %ecx 3819; X32-SSE41-NEXT: sarl $31, %ecx 3820; X32-SSE41-NEXT: movl %eax, %edx 3821; X32-SSE41-NEXT: shll $31, %edx 3822; X32-SSE41-NEXT: sarl $31, %edx 3823; X32-SSE41-NEXT: movd %edx, %xmm1 3824; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm1 3825; X32-SSE41-NEXT: movl %eax, %ecx 3826; X32-SSE41-NEXT: shll $29, %ecx 3827; X32-SSE41-NEXT: sarl $31, %ecx 3828; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm1 3829; X32-SSE41-NEXT: movl %eax, %ecx 3830; X32-SSE41-NEXT: shll $28, %ecx 3831; X32-SSE41-NEXT: sarl $31, %ecx 3832; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm1 3833; X32-SSE41-NEXT: movl %eax, %ecx 3834; X32-SSE41-NEXT: shll $27, %ecx 3835; X32-SSE41-NEXT: sarl $31, %ecx 3836; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 3837; X32-SSE41-NEXT: movl %eax, %ecx 3838; X32-SSE41-NEXT: shll $26, %ecx 3839; X32-SSE41-NEXT: sarl $31, %ecx 3840; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm1 3841; X32-SSE41-NEXT: movl %eax, %ecx 3842; X32-SSE41-NEXT: shll $25, %ecx 3843; X32-SSE41-NEXT: sarl $31, %ecx 3844; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm1 3845; X32-SSE41-NEXT: movsbl %al, %ecx 3846; X32-SSE41-NEXT: shrl $7, %ecx 3847; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm1 3848; X32-SSE41-NEXT: movl %eax, %ecx 3849; X32-SSE41-NEXT: shll $23, %ecx 3850; X32-SSE41-NEXT: sarl $31, %ecx 3851; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 3852; X32-SSE41-NEXT: movl %eax, %ecx 3853; X32-SSE41-NEXT: shll $22, %ecx 3854; X32-SSE41-NEXT: sarl $31, %ecx 3855; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm1 3856; X32-SSE41-NEXT: movl %eax, %ecx 3857; X32-SSE41-NEXT: shll $21, %ecx 3858; X32-SSE41-NEXT: sarl $31, %ecx 3859; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm1 3860; X32-SSE41-NEXT: movl %eax, %ecx 3861; X32-SSE41-NEXT: shll $20, %ecx 3862; X32-SSE41-NEXT: sarl $31, %ecx 3863; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm1 3864; X32-SSE41-NEXT: movl %eax, %ecx 3865; X32-SSE41-NEXT: shll $19, %ecx 3866; X32-SSE41-NEXT: sarl $31, %ecx 3867; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm1 3868; X32-SSE41-NEXT: movl %eax, %ecx 3869; X32-SSE41-NEXT: shll $18, %ecx 3870; X32-SSE41-NEXT: sarl $31, %ecx 3871; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm1 3872; X32-SSE41-NEXT: movl %eax, %ecx 3873; X32-SSE41-NEXT: shll $17, %ecx 3874; X32-SSE41-NEXT: sarl $31, %ecx 3875; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm1 3876; X32-SSE41-NEXT: shrl $15, %eax 3877; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm1 3878; X32-SSE41-NEXT: popl %esi 3879; X32-SSE41-NEXT: retl 3880entry: 3881 %X = load <32 x i1>, <32 x i1>* %ptr 3882 %Y = sext <32 x i1> %X to <32 x i8> 3883 ret <32 x i8> %Y 3884} 3885 3886define <16 x i16> @load_sext_16i8_to_16i16(<16 x i8> *%ptr) { 3887; SSE2-LABEL: load_sext_16i8_to_16i16: 3888; SSE2: # BB#0: # %entry 3889; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3890; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3891; SSE2-NEXT: psraw $8, %xmm0 3892; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 3893; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3894; SSE2-NEXT: psraw $8, %xmm1 3895; SSE2-NEXT: retq 3896; 3897; SSSE3-LABEL: load_sext_16i8_to_16i16: 3898; SSSE3: # BB#0: # %entry 3899; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3900; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3901; SSSE3-NEXT: psraw $8, %xmm0 3902; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 3903; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3904; SSSE3-NEXT: psraw $8, %xmm1 3905; SSSE3-NEXT: retq 3906; 3907; SSE41-LABEL: load_sext_16i8_to_16i16: 3908; SSE41: # BB#0: # %entry 3909; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 3910; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1 3911; SSE41-NEXT: retq 3912; 3913; AVX1-LABEL: load_sext_16i8_to_16i16: 3914; AVX1: # BB#0: # %entry 3915; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0 3916; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm1 3917; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3918; AVX1-NEXT: retq 3919; 3920; AVX2-LABEL: load_sext_16i8_to_16i16: 3921; AVX2: # BB#0: # %entry 3922; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0 3923; AVX2-NEXT: retq 3924; 3925; AVX512-LABEL: load_sext_16i8_to_16i16: 3926; AVX512: # BB#0: # %entry 3927; AVX512-NEXT: vpmovsxbw (%rdi), %ymm0 3928; AVX512-NEXT: retq 3929; 3930; X32-SSE41-LABEL: load_sext_16i8_to_16i16: 3931; X32-SSE41: # BB#0: # %entry 3932; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3933; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0 3934; X32-SSE41-NEXT: pmovsxbw 8(%eax), %xmm1 3935; X32-SSE41-NEXT: retl 3936entry: 3937 %X = load <16 x i8>, <16 x i8>* %ptr 3938 %Y = sext <16 x i8> %X to <16 x i16> 3939 ret <16 x i16> %Y 3940} 3941 3942define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) { 3943; SSE2-LABEL: load_sext_2i16_to_2i64: 3944; SSE2: # BB#0: # %entry 3945; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3946; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3947; SSE2-NEXT: movdqa %xmm0, %xmm1 3948; SSE2-NEXT: psrad $31, %xmm1 3949; SSE2-NEXT: psrad $16, %xmm0 3950; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3951; SSE2-NEXT: retq 3952; 3953; SSSE3-LABEL: load_sext_2i16_to_2i64: 3954; SSSE3: # BB#0: # %entry 3955; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3956; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3957; SSSE3-NEXT: movdqa %xmm0, %xmm1 3958; SSSE3-NEXT: psrad $31, %xmm1 3959; SSSE3-NEXT: psrad $16, %xmm0 3960; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3961; SSSE3-NEXT: retq 3962; 3963; SSE41-LABEL: load_sext_2i16_to_2i64: 3964; SSE41: # BB#0: # %entry 3965; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 3966; SSE41-NEXT: retq 3967; 3968; AVX-LABEL: load_sext_2i16_to_2i64: 3969; AVX: # BB#0: # %entry 3970; AVX-NEXT: vpmovsxwq (%rdi), %xmm0 3971; AVX-NEXT: retq 3972; 3973; X32-SSE41-LABEL: load_sext_2i16_to_2i64: 3974; X32-SSE41: # BB#0: # %entry 3975; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3976; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0 3977; X32-SSE41-NEXT: retl 3978entry: 3979 %X = load <2 x i16>, <2 x i16>* %ptr 3980 %Y = sext <2 x i16> %X to <2 x i64> 3981 ret <2 x i64> %Y 3982} 3983 3984define <4 x i32> @load_sext_4i16_to_4i32(<4 x i16> *%ptr) { 3985; SSE2-LABEL: load_sext_4i16_to_4i32: 3986; SSE2: # BB#0: # %entry 3987; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3988; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3989; SSE2-NEXT: psrad $16, %xmm0 3990; SSE2-NEXT: retq 3991; 3992; SSSE3-LABEL: load_sext_4i16_to_4i32: 3993; SSSE3: # BB#0: # %entry 3994; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3995; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3996; SSSE3-NEXT: psrad $16, %xmm0 3997; SSSE3-NEXT: retq 3998; 3999; SSE41-LABEL: load_sext_4i16_to_4i32: 4000; SSE41: # BB#0: # %entry 4001; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 4002; SSE41-NEXT: retq 4003; 4004; AVX-LABEL: load_sext_4i16_to_4i32: 4005; AVX: # BB#0: # %entry 4006; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 4007; AVX-NEXT: retq 4008; 4009; X32-SSE41-LABEL: load_sext_4i16_to_4i32: 4010; X32-SSE41: # BB#0: # %entry 4011; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4012; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0 4013; X32-SSE41-NEXT: retl 4014entry: 4015 %X = load <4 x i16>, <4 x i16>* %ptr 4016 %Y = sext <4 x i16> %X to <4 x i32> 4017 ret <4 x i32> %Y 4018} 4019 4020define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) { 4021; SSE2-LABEL: load_sext_4i16_to_4i64: 4022; SSE2: # BB#0: # %entry 4023; SSE2-NEXT: movswq 2(%rdi), %rax 4024; SSE2-NEXT: movd %rax, %xmm1 4025; SSE2-NEXT: movswq (%rdi), %rax 4026; SSE2-NEXT: movd %rax, %xmm0 4027; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4028; SSE2-NEXT: movswq 6(%rdi), %rax 4029; SSE2-NEXT: movd %rax, %xmm2 4030; SSE2-NEXT: movswq 4(%rdi), %rax 4031; SSE2-NEXT: movd %rax, %xmm1 4032; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 4033; SSE2-NEXT: retq 4034; 4035; SSSE3-LABEL: load_sext_4i16_to_4i64: 4036; SSSE3: # BB#0: # %entry 4037; SSSE3-NEXT: movswq 2(%rdi), %rax 4038; SSSE3-NEXT: movd %rax, %xmm1 4039; SSSE3-NEXT: movswq (%rdi), %rax 4040; SSSE3-NEXT: movd %rax, %xmm0 4041; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4042; SSSE3-NEXT: movswq 6(%rdi), %rax 4043; SSSE3-NEXT: movd %rax, %xmm2 4044; SSSE3-NEXT: movswq 4(%rdi), %rax 4045; SSSE3-NEXT: movd %rax, %xmm1 4046; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 4047; SSSE3-NEXT: retq 4048; 4049; SSE41-LABEL: load_sext_4i16_to_4i64: 4050; SSE41: # BB#0: # %entry 4051; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 4052; SSE41-NEXT: pmovsxwq 4(%rdi), %xmm1 4053; SSE41-NEXT: retq 4054; 4055; AVX1-LABEL: load_sext_4i16_to_4i64: 4056; AVX1: # BB#0: # %entry 4057; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0 4058; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 4059; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4060; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 4061; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4062; AVX1-NEXT: retq 4063; 4064; AVX2-LABEL: load_sext_4i16_to_4i64: 4065; AVX2: # BB#0: # %entry 4066; AVX2-NEXT: vpmovsxwq (%rdi), %ymm0 4067; AVX2-NEXT: retq 4068; 4069; AVX512-LABEL: load_sext_4i16_to_4i64: 4070; AVX512: # BB#0: # %entry 4071; AVX512-NEXT: vpmovsxwq (%rdi), %ymm0 4072; AVX512-NEXT: retq 4073; 4074; X32-SSE41-LABEL: load_sext_4i16_to_4i64: 4075; X32-SSE41: # BB#0: # %entry 4076; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4077; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0 4078; X32-SSE41-NEXT: pmovsxwq 4(%eax), %xmm1 4079; X32-SSE41-NEXT: retl 4080entry: 4081 %X = load <4 x i16>, <4 x i16>* %ptr 4082 %Y = sext <4 x i16> %X to <4 x i64> 4083 ret <4 x i64> %Y 4084} 4085 4086define <8 x i32> @load_sext_8i16_to_8i32(<8 x i16> *%ptr) { 4087; SSE2-LABEL: load_sext_8i16_to_8i32: 4088; SSE2: # BB#0: # %entry 4089; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4090; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4091; SSE2-NEXT: psrad $16, %xmm0 4092; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 4093; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 4094; SSE2-NEXT: psrad $16, %xmm1 4095; SSE2-NEXT: retq 4096; 4097; SSSE3-LABEL: load_sext_8i16_to_8i32: 4098; SSSE3: # BB#0: # %entry 4099; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4100; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4101; SSSE3-NEXT: psrad $16, %xmm0 4102; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 4103; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 4104; SSSE3-NEXT: psrad $16, %xmm1 4105; SSSE3-NEXT: retq 4106; 4107; SSE41-LABEL: load_sext_8i16_to_8i32: 4108; SSE41: # BB#0: # %entry 4109; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 4110; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1 4111; SSE41-NEXT: retq 4112; 4113; AVX1-LABEL: load_sext_8i16_to_8i32: 4114; AVX1: # BB#0: # %entry 4115; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0 4116; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm1 4117; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4118; AVX1-NEXT: retq 4119; 4120; AVX2-LABEL: load_sext_8i16_to_8i32: 4121; AVX2: # BB#0: # %entry 4122; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0 4123; AVX2-NEXT: retq 4124; 4125; AVX512-LABEL: load_sext_8i16_to_8i32: 4126; AVX512: # BB#0: # %entry 4127; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0 4128; AVX512-NEXT: retq 4129; 4130; X32-SSE41-LABEL: load_sext_8i16_to_8i32: 4131; X32-SSE41: # BB#0: # %entry 4132; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4133; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0 4134; X32-SSE41-NEXT: pmovsxwd 8(%eax), %xmm1 4135; X32-SSE41-NEXT: retl 4136entry: 4137 %X = load <8 x i16>, <8 x i16>* %ptr 4138 %Y = sext <8 x i16> %X to <8 x i32> 4139 ret <8 x i32> %Y 4140} 4141 4142define <2 x i64> @load_sext_2i32_to_2i64(<2 x i32> *%ptr) { 4143; SSE2-LABEL: load_sext_2i32_to_2i64: 4144; SSE2: # BB#0: # %entry 4145; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4146; SSE2-NEXT: movdqa %xmm0, %xmm1 4147; SSE2-NEXT: psrad $31, %xmm1 4148; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4149; SSE2-NEXT: retq 4150; 4151; SSSE3-LABEL: load_sext_2i32_to_2i64: 4152; SSSE3: # BB#0: # %entry 4153; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4154; SSSE3-NEXT: movdqa %xmm0, %xmm1 4155; SSSE3-NEXT: psrad $31, %xmm1 4156; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4157; SSSE3-NEXT: retq 4158; 4159; SSE41-LABEL: load_sext_2i32_to_2i64: 4160; SSE41: # BB#0: # %entry 4161; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 4162; SSE41-NEXT: retq 4163; 4164; AVX-LABEL: load_sext_2i32_to_2i64: 4165; AVX: # BB#0: # %entry 4166; AVX-NEXT: vpmovsxdq (%rdi), %xmm0 4167; AVX-NEXT: retq 4168; 4169; X32-SSE41-LABEL: load_sext_2i32_to_2i64: 4170; X32-SSE41: # BB#0: # %entry 4171; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4172; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0 4173; X32-SSE41-NEXT: retl 4174entry: 4175 %X = load <2 x i32>, <2 x i32>* %ptr 4176 %Y = sext <2 x i32> %X to <2 x i64> 4177 ret <2 x i64> %Y 4178} 4179 4180define <4 x i64> @load_sext_4i32_to_4i64(<4 x i32> *%ptr) { 4181; SSE2-LABEL: load_sext_4i32_to_4i64: 4182; SSE2: # BB#0: # %entry 4183; SSE2-NEXT: movdqa (%rdi), %xmm0 4184; SSE2-NEXT: movdqa %xmm0, %xmm2 4185; SSE2-NEXT: psrad $31, %xmm2 4186; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4187; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4188; SSE2-NEXT: movdqa %xmm1, %xmm2 4189; SSE2-NEXT: psrad $31, %xmm2 4190; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4191; SSE2-NEXT: retq 4192; 4193; SSSE3-LABEL: load_sext_4i32_to_4i64: 4194; SSSE3: # BB#0: # %entry 4195; SSSE3-NEXT: movdqa (%rdi), %xmm0 4196; SSSE3-NEXT: movdqa %xmm0, %xmm2 4197; SSSE3-NEXT: psrad $31, %xmm2 4198; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4199; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4200; SSSE3-NEXT: movdqa %xmm1, %xmm2 4201; SSSE3-NEXT: psrad $31, %xmm2 4202; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4203; SSSE3-NEXT: retq 4204; 4205; SSE41-LABEL: load_sext_4i32_to_4i64: 4206; SSE41: # BB#0: # %entry 4207; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 4208; SSE41-NEXT: pmovsxdq 8(%rdi), %xmm1 4209; SSE41-NEXT: retq 4210; 4211; AVX1-LABEL: load_sext_4i32_to_4i64: 4212; AVX1: # BB#0: # %entry 4213; AVX1-NEXT: vpmovsxdq (%rdi), %xmm0 4214; AVX1-NEXT: vpmovsxdq 8(%rdi), %xmm1 4215; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4216; AVX1-NEXT: retq 4217; 4218; AVX2-LABEL: load_sext_4i32_to_4i64: 4219; AVX2: # BB#0: # %entry 4220; AVX2-NEXT: vpmovsxdq (%rdi), %ymm0 4221; AVX2-NEXT: retq 4222; 4223; AVX512-LABEL: load_sext_4i32_to_4i64: 4224; AVX512: # BB#0: # %entry 4225; AVX512-NEXT: vpmovsxdq (%rdi), %ymm0 4226; AVX512-NEXT: retq 4227; 4228; X32-SSE41-LABEL: load_sext_4i32_to_4i64: 4229; X32-SSE41: # BB#0: # %entry 4230; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4231; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0 4232; X32-SSE41-NEXT: pmovsxdq 8(%eax), %xmm1 4233; X32-SSE41-NEXT: retl 4234entry: 4235 %X = load <4 x i32>, <4 x i32>* %ptr 4236 %Y = sext <4 x i32> %X to <4 x i64> 4237 ret <4 x i64> %Y 4238} 4239 4240define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp { 4241; SSE2-LABEL: sext_2i8_to_i32: 4242; SSE2: # BB#0: # %entry 4243; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4244; SSE2-NEXT: psraw $8, %xmm0 4245; SSE2-NEXT: movd %xmm0, %eax 4246; SSE2-NEXT: retq 4247; 4248; SSSE3-LABEL: sext_2i8_to_i32: 4249; SSSE3: # BB#0: # %entry 4250; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4251; SSSE3-NEXT: psraw $8, %xmm0 4252; SSSE3-NEXT: movd %xmm0, %eax 4253; SSSE3-NEXT: retq 4254; 4255; SSE41-LABEL: sext_2i8_to_i32: 4256; SSE41: # BB#0: # %entry 4257; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 4258; SSE41-NEXT: movd %xmm0, %eax 4259; SSE41-NEXT: retq 4260; 4261; AVX-LABEL: sext_2i8_to_i32: 4262; AVX: # BB#0: # %entry 4263; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 4264; AVX-NEXT: vmovd %xmm0, %eax 4265; AVX-NEXT: retq 4266; 4267; X32-SSE41-LABEL: sext_2i8_to_i32: 4268; X32-SSE41: # BB#0: # %entry 4269; X32-SSE41-NEXT: pushl %eax 4270; X32-SSE41-NEXT: .Ltmp0: 4271; X32-SSE41-NEXT: .cfi_def_cfa_offset 8 4272; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 4273; X32-SSE41-NEXT: movd %xmm0, %eax 4274; X32-SSE41-NEXT: popl %ecx 4275; X32-SSE41-NEXT: retl 4276entry: 4277 %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 4278 %Ex = sext <2 x i8> %Shuf to <2 x i16> 4279 %Bc = bitcast <2 x i16> %Ex to i32 4280 ret i32 %Bc 4281} 4282 4283define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { 4284; SSE2-LABEL: sext_4i1_to_4i64: 4285; SSE2: # BB#0: 4286; SSE2-NEXT: pslld $31, %xmm0 4287; SSE2-NEXT: psrad $31, %xmm0 4288; SSE2-NEXT: movdqa %xmm0, %xmm2 4289; SSE2-NEXT: psrad $31, %xmm2 4290; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4291; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4292; SSE2-NEXT: movdqa %xmm1, %xmm2 4293; SSE2-NEXT: psrad $31, %xmm2 4294; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4295; SSE2-NEXT: retq 4296; 4297; SSSE3-LABEL: sext_4i1_to_4i64: 4298; SSSE3: # BB#0: 4299; SSSE3-NEXT: pslld $31, %xmm0 4300; SSSE3-NEXT: psrad $31, %xmm0 4301; SSSE3-NEXT: movdqa %xmm0, %xmm2 4302; SSSE3-NEXT: psrad $31, %xmm2 4303; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4304; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4305; SSSE3-NEXT: movdqa %xmm1, %xmm2 4306; SSSE3-NEXT: psrad $31, %xmm2 4307; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4308; SSSE3-NEXT: retq 4309; 4310; SSE41-LABEL: sext_4i1_to_4i64: 4311; SSE41: # BB#0: 4312; SSE41-NEXT: pslld $31, %xmm0 4313; SSE41-NEXT: psrad $31, %xmm0 4314; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4315; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4316; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4317; SSE41-NEXT: movdqa %xmm2, %xmm0 4318; SSE41-NEXT: retq 4319; 4320; AVX1-LABEL: sext_4i1_to_4i64: 4321; AVX1: # BB#0: 4322; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 4323; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 4324; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 4325; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4326; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 4327; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4328; AVX1-NEXT: retq 4329; 4330; AVX2-LABEL: sext_4i1_to_4i64: 4331; AVX2: # BB#0: 4332; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 4333; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 4334; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 4335; AVX2-NEXT: retq 4336; 4337; AVX512-LABEL: sext_4i1_to_4i64: 4338; AVX512: # BB#0: 4339; AVX512-NEXT: vpslld $31, %xmm0, %xmm0 4340; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 4341; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 4342; AVX512-NEXT: retq 4343; 4344; X32-SSE41-LABEL: sext_4i1_to_4i64: 4345; X32-SSE41: # BB#0: 4346; X32-SSE41-NEXT: pslld $31, %xmm0 4347; X32-SSE41-NEXT: psrad $31, %xmm0 4348; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4349; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4350; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4351; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 4352; X32-SSE41-NEXT: retl 4353 %extmask = sext <4 x i1> %mask to <4 x i64> 4354 ret <4 x i64> %extmask 4355} 4356 4357define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) { 4358; SSE2-LABEL: sext_4i8_to_4i64: 4359; SSE2: # BB#0: 4360; SSE2-NEXT: pslld $24, %xmm0 4361; SSE2-NEXT: psrad $24, %xmm0 4362; SSE2-NEXT: movdqa %xmm0, %xmm2 4363; SSE2-NEXT: psrad $31, %xmm2 4364; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4365; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4366; SSE2-NEXT: movdqa %xmm1, %xmm2 4367; SSE2-NEXT: psrad $31, %xmm2 4368; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4369; SSE2-NEXT: retq 4370; 4371; SSSE3-LABEL: sext_4i8_to_4i64: 4372; SSSE3: # BB#0: 4373; SSSE3-NEXT: pslld $24, %xmm0 4374; SSSE3-NEXT: psrad $24, %xmm0 4375; SSSE3-NEXT: movdqa %xmm0, %xmm2 4376; SSSE3-NEXT: psrad $31, %xmm2 4377; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4378; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4379; SSSE3-NEXT: movdqa %xmm1, %xmm2 4380; SSSE3-NEXT: psrad $31, %xmm2 4381; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4382; SSSE3-NEXT: retq 4383; 4384; SSE41-LABEL: sext_4i8_to_4i64: 4385; SSE41: # BB#0: 4386; SSE41-NEXT: pslld $24, %xmm0 4387; SSE41-NEXT: psrad $24, %xmm0 4388; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4389; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4390; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4391; SSE41-NEXT: movdqa %xmm2, %xmm0 4392; SSE41-NEXT: retq 4393; 4394; AVX1-LABEL: sext_4i8_to_4i64: 4395; AVX1: # BB#0: 4396; AVX1-NEXT: vpslld $24, %xmm0, %xmm0 4397; AVX1-NEXT: vpsrad $24, %xmm0, %xmm0 4398; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 4399; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4400; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 4401; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4402; AVX1-NEXT: retq 4403; 4404; AVX2-LABEL: sext_4i8_to_4i64: 4405; AVX2: # BB#0: 4406; AVX2-NEXT: vpslld $24, %xmm0, %xmm0 4407; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0 4408; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 4409; AVX2-NEXT: retq 4410; 4411; AVX512-LABEL: sext_4i8_to_4i64: 4412; AVX512: # BB#0: 4413; AVX512-NEXT: vpslld $24, %xmm0, %xmm0 4414; AVX512-NEXT: vpsrad $24, %xmm0, %xmm0 4415; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 4416; AVX512-NEXT: retq 4417; 4418; X32-SSE41-LABEL: sext_4i8_to_4i64: 4419; X32-SSE41: # BB#0: 4420; X32-SSE41-NEXT: pslld $24, %xmm0 4421; X32-SSE41-NEXT: psrad $24, %xmm0 4422; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4423; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4424; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4425; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 4426; X32-SSE41-NEXT: retl 4427 %extmask = sext <4 x i8> %mask to <4 x i64> 4428 ret <4 x i64> %extmask 4429} 4430