1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW 8 9define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) { 10; SSE2-LABEL: trunc8i64_8i32: 11; SSE2: # BB#0: # %entry 12; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 13; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 14; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 15; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 16; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 17; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 18; SSE2-NEXT: retq 19; 20; SSSE3-LABEL: trunc8i64_8i32: 21; SSSE3: # BB#0: # %entry 22; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 23; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 24; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 25; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 26; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 27; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 28; SSSE3-NEXT: retq 29; 30; SSE41-LABEL: trunc8i64_8i32: 31; SSE41: # BB#0: # %entry 32; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 33; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 34; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 35; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2] 36; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 37; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 38; SSE41-NEXT: retq 39; 40; AVX1-LABEL: trunc8i64_8i32: 41; AVX1: # BB#0: # %entry 42; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 43; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 44; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 45; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 46; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 47; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 48; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 49; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 50; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 51; AVX1-NEXT: retq 52; 53; AVX2-LABEL: trunc8i64_8i32: 54; AVX2: # BB#0: # %entry 55; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u> 56; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 57; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 58; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 59; AVX2-NEXT: retq 60; 61; AVX512BW-LABEL: trunc8i64_8i32: 62; AVX512BW: # BB#0: # %entry 63; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 64; AVX512BW-NEXT: retq 65entry: 66 %0 = trunc <8 x i64> %a to <8 x i32> 67 ret <8 x i32> %0 68} 69 70define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) { 71; SSE2-LABEL: trunc8i64_8i16: 72; SSE2: # BB#0: # %entry 73; SSE2-NEXT: pextrw $4, %xmm1, %eax 74; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 75; SSE2-NEXT: pextrw $4, %xmm0, %ecx 76; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 77; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 78; SSE2-NEXT: pextrw $4, %xmm3, %edx 79; SSE2-NEXT: movd %edx, %xmm1 80; SSE2-NEXT: movd %eax, %xmm3 81; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 82; SSE2-NEXT: pextrw $4, %xmm2, %eax 83; SSE2-NEXT: movd %eax, %xmm1 84; SSE2-NEXT: movd %ecx, %xmm2 85; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 86; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 87; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 88; SSE2-NEXT: retq 89; 90; SSSE3-LABEL: trunc8i64_8i16: 91; SSSE3: # BB#0: # %entry 92; SSSE3-NEXT: pextrw $4, %xmm1, %eax 93; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 94; SSSE3-NEXT: pextrw $4, %xmm0, %ecx 95; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 96; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 97; SSSE3-NEXT: pextrw $4, %xmm3, %edx 98; SSSE3-NEXT: movd %edx, %xmm1 99; SSSE3-NEXT: movd %eax, %xmm3 100; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 101; SSSE3-NEXT: pextrw $4, %xmm2, %eax 102; SSSE3-NEXT: movd %eax, %xmm1 103; SSSE3-NEXT: movd %ecx, %xmm2 104; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 105; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 106; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 107; SSSE3-NEXT: retq 108; 109; SSE41-LABEL: trunc8i64_8i16: 110; SSE41: # BB#0: # %entry 111; SSE41-NEXT: pxor %xmm4, %xmm4 112; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] 113; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] 114; SSE41-NEXT: packusdw %xmm3, %xmm2 115; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] 116; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] 117; SSE41-NEXT: packusdw %xmm1, %xmm0 118; SSE41-NEXT: packusdw %xmm2, %xmm0 119; SSE41-NEXT: retq 120; 121; AVX1-LABEL: trunc8i64_8i16: 122; AVX1: # BB#0: # %entry 123; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 124; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 125; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 126; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 127; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 128; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 129; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 130; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 131; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 132; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 133; AVX1-NEXT: vzeroupper 134; AVX1-NEXT: retq 135; 136; AVX2-LABEL: trunc8i64_8i16: 137; AVX2: # BB#0: # %entry 138; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u> 139; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 140; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 141; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 142; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 143; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 144; AVX2-NEXT: vzeroupper 145; AVX2-NEXT: retq 146; 147; AVX512BW-LABEL: trunc8i64_8i16: 148; AVX512BW: # BB#0: # %entry 149; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 150; AVX512BW-NEXT: retq 151entry: 152 %0 = trunc <8 x i64> %a to <8 x i16> 153 ret <8 x i16> %0 154} 155 156define void @trunc8i64_8i8(<8 x i64> %a) { 157; SSE-LABEL: trunc8i64_8i8: 158; SSE: # BB#0: # %entry 159; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 160; SSE-NEXT: pand %xmm4, %xmm3 161; SSE-NEXT: pand %xmm4, %xmm2 162; SSE-NEXT: packuswb %xmm3, %xmm2 163; SSE-NEXT: pand %xmm4, %xmm1 164; SSE-NEXT: pand %xmm4, %xmm0 165; SSE-NEXT: packuswb %xmm1, %xmm0 166; SSE-NEXT: packuswb %xmm2, %xmm0 167; SSE-NEXT: packuswb %xmm0, %xmm0 168; SSE-NEXT: movq %xmm0, (%rax) 169; SSE-NEXT: retq 170; 171; AVX1-LABEL: trunc8i64_8i8: 172; AVX1: # BB#0: # %entry 173; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 174; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 175; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 176; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 177; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 178; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 179; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 180; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 181; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 182; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 183; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 184; AVX1-NEXT: vmovq %xmm0, (%rax) 185; AVX1-NEXT: vzeroupper 186; AVX1-NEXT: retq 187; 188; AVX2-LABEL: trunc8i64_8i8: 189; AVX2: # BB#0: # %entry 190; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u> 191; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 192; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 193; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 194; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 195; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 196; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 197; AVX2-NEXT: vmovq %xmm0, (%rax) 198; AVX2-NEXT: vzeroupper 199; AVX2-NEXT: retq 200; 201; AVX512BW-LABEL: trunc8i64_8i8: 202; AVX512BW: # BB#0: # %entry 203; AVX512BW-NEXT: vpmovqb %zmm0, (%rax) 204; AVX512BW-NEXT: retq 205entry: 206 %0 = trunc <8 x i64> %a to <8 x i8> 207 store <8 x i8> %0, <8 x i8>* undef, align 4 208 ret void 209} 210 211define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) { 212; SSE2-LABEL: trunc8i32_8i16: 213; SSE2: # BB#0: # %entry 214; SSE2-NEXT: pslld $16, %xmm1 215; SSE2-NEXT: psrad $16, %xmm1 216; SSE2-NEXT: pslld $16, %xmm0 217; SSE2-NEXT: psrad $16, %xmm0 218; SSE2-NEXT: packssdw %xmm1, %xmm0 219; SSE2-NEXT: retq 220; 221; SSSE3-LABEL: trunc8i32_8i16: 222; SSSE3: # BB#0: # %entry 223; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 224; SSSE3-NEXT: pshufb %xmm2, %xmm1 225; SSSE3-NEXT: pshufb %xmm2, %xmm0 226; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 227; SSSE3-NEXT: retq 228; 229; SSE41-LABEL: trunc8i32_8i16: 230; SSE41: # BB#0: # %entry 231; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 232; SSE41-NEXT: pshufb %xmm2, %xmm1 233; SSE41-NEXT: pshufb %xmm2, %xmm0 234; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 235; SSE41-NEXT: retq 236; 237; AVX1-LABEL: trunc8i32_8i16: 238; AVX1: # BB#0: # %entry 239; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 240; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 241; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 242; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 243; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 244; AVX1-NEXT: vzeroupper 245; AVX1-NEXT: retq 246; 247; AVX2-LABEL: trunc8i32_8i16: 248; AVX2: # BB#0: # %entry 249; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 250; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 251; AVX2-NEXT: vzeroupper 252; AVX2-NEXT: retq 253; 254; AVX512BW-LABEL: trunc8i32_8i16: 255; AVX512BW: # BB#0: # %entry 256; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 257; AVX512BW-NEXT: retq 258entry: 259 %0 = trunc <8 x i32> %a to <8 x i16> 260 ret <8 x i16> %0 261} 262 263define void @trunc8i32_8i8(<8 x i32> %a) { 264; SSE2-LABEL: trunc8i32_8i8: 265; SSE2: # BB#0: # %entry 266; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 267; SSE2-NEXT: pand %xmm2, %xmm1 268; SSE2-NEXT: pand %xmm2, %xmm0 269; SSE2-NEXT: packuswb %xmm1, %xmm0 270; SSE2-NEXT: packuswb %xmm0, %xmm0 271; SSE2-NEXT: movq %xmm0, (%rax) 272; SSE2-NEXT: retq 273; 274; SSSE3-LABEL: trunc8i32_8i8: 275; SSSE3: # BB#0: # %entry 276; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 277; SSSE3-NEXT: pshufb %xmm2, %xmm1 278; SSSE3-NEXT: pshufb %xmm2, %xmm0 279; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 280; SSSE3-NEXT: movq %xmm0, (%rax) 281; SSSE3-NEXT: retq 282; 283; SSE41-LABEL: trunc8i32_8i8: 284; SSE41: # BB#0: # %entry 285; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 286; SSE41-NEXT: pshufb %xmm2, %xmm1 287; SSE41-NEXT: pshufb %xmm2, %xmm0 288; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 289; SSE41-NEXT: movq %xmm0, (%rax) 290; SSE41-NEXT: retq 291; 292; AVX1-LABEL: trunc8i32_8i8: 293; AVX1: # BB#0: # %entry 294; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 295; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 296; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 297; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 298; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 299; AVX1-NEXT: vmovq %xmm0, (%rax) 300; AVX1-NEXT: vzeroupper 301; AVX1-NEXT: retq 302; 303; AVX2-LABEL: trunc8i32_8i8: 304; AVX2: # BB#0: # %entry 305; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 306; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 307; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 308; AVX2-NEXT: vmovq %xmm0, (%rax) 309; AVX2-NEXT: vzeroupper 310; AVX2-NEXT: retq 311; 312; AVX512BW-LABEL: trunc8i32_8i8: 313; AVX512BW: # BB#0: # %entry 314; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 315; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 316; AVX512BW-NEXT: vmovq %xmm0, (%rax) 317; AVX512BW-NEXT: retq 318entry: 319 %0 = trunc <8 x i32> %a to <8 x i8> 320 store <8 x i8> %0, <8 x i8>* undef, align 4 321 ret void 322} 323 324define void @trunc16i32_16i8(<16 x i32> %a) { 325; SSE-LABEL: trunc16i32_16i8: 326; SSE: # BB#0: # %entry 327; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 328; SSE-NEXT: pand %xmm4, %xmm3 329; SSE-NEXT: pand %xmm4, %xmm2 330; SSE-NEXT: packuswb %xmm3, %xmm2 331; SSE-NEXT: pand %xmm4, %xmm1 332; SSE-NEXT: pand %xmm4, %xmm0 333; SSE-NEXT: packuswb %xmm1, %xmm0 334; SSE-NEXT: packuswb %xmm2, %xmm0 335; SSE-NEXT: movdqu %xmm0, (%rax) 336; SSE-NEXT: retq 337; 338; AVX1-LABEL: trunc16i32_16i8: 339; AVX1: # BB#0: # %entry 340; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 341; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 342; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 343; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 344; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 345; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 346; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 347; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 348; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 349; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 350; AVX1-NEXT: vmovdqu %xmm0, (%rax) 351; AVX1-NEXT: vzeroupper 352; AVX1-NEXT: retq 353; 354; AVX2-LABEL: trunc16i32_16i8: 355; AVX2: # BB#0: # %entry 356; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 357; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 358; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 359; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 360; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 361; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 362; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 363; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 364; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 365; AVX2-NEXT: vmovdqu %xmm0, (%rax) 366; AVX2-NEXT: vzeroupper 367; AVX2-NEXT: retq 368; 369; AVX512BW-LABEL: trunc16i32_16i8: 370; AVX512BW: # BB#0: # %entry 371; AVX512BW-NEXT: vpmovdb %zmm0, (%rax) 372; AVX512BW-NEXT: retq 373entry: 374 %0 = trunc <16 x i32> %a to <16 x i8> 375 store <16 x i8> %0, <16 x i8>* undef, align 4 376 ret void 377} 378 379define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) { 380; SSE2-LABEL: trunc2x4i64_8i32: 381; SSE2: # BB#0: # %entry 382; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 383; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 384; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 385; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 386; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 387; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 388; SSE2-NEXT: retq 389; 390; SSSE3-LABEL: trunc2x4i64_8i32: 391; SSSE3: # BB#0: # %entry 392; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 393; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 394; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 395; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 396; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 397; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 398; SSSE3-NEXT: retq 399; 400; SSE41-LABEL: trunc2x4i64_8i32: 401; SSE41: # BB#0: # %entry 402; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 403; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 404; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 405; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2] 406; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 407; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 408; SSE41-NEXT: retq 409; 410; AVX1-LABEL: trunc2x4i64_8i32: 411; AVX1: # BB#0: # %entry 412; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 413; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 414; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 415; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 416; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 417; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 418; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 419; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 420; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 421; AVX1-NEXT: retq 422; 423; AVX2-LABEL: trunc2x4i64_8i32: 424; AVX2: # BB#0: # %entry 425; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u> 426; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 427; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 428; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 429; AVX2-NEXT: retq 430; 431; AVX512BW-LABEL: trunc2x4i64_8i32: 432; AVX512BW: # BB#0: # %entry 433; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 434; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 435; AVX512BW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 436; AVX512BW-NEXT: retq 437entry: 438 %0 = trunc <4 x i64> %a to <4 x i32> 439 %1 = trunc <4 x i64> %b to <4 x i32> 440 %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 441 ret <8 x i32> %2 442} 443 444define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) { 445; SSE2-LABEL: trunc2x4i64_8i16: 446; SSE2: # BB#0: # %entry 447; SSE2-NEXT: pextrw $4, %xmm1, %eax 448; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 449; SSE2-NEXT: pextrw $4, %xmm0, %ecx 450; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 451; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 452; SSE2-NEXT: pextrw $4, %xmm3, %edx 453; SSE2-NEXT: movd %edx, %xmm1 454; SSE2-NEXT: movd %eax, %xmm3 455; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 456; SSE2-NEXT: pextrw $4, %xmm2, %eax 457; SSE2-NEXT: movd %eax, %xmm1 458; SSE2-NEXT: movd %ecx, %xmm2 459; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 460; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 461; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 462; SSE2-NEXT: retq 463; 464; SSSE3-LABEL: trunc2x4i64_8i16: 465; SSSE3: # BB#0: # %entry 466; SSSE3-NEXT: pextrw $4, %xmm1, %eax 467; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 468; SSSE3-NEXT: pextrw $4, %xmm0, %ecx 469; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 470; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 471; SSSE3-NEXT: pextrw $4, %xmm3, %edx 472; SSSE3-NEXT: movd %edx, %xmm1 473; SSSE3-NEXT: movd %eax, %xmm3 474; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 475; SSSE3-NEXT: pextrw $4, %xmm2, %eax 476; SSSE3-NEXT: movd %eax, %xmm1 477; SSSE3-NEXT: movd %ecx, %xmm2 478; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 479; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 480; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 481; SSSE3-NEXT: retq 482; 483; SSE41-LABEL: trunc2x4i64_8i16: 484; SSE41: # BB#0: # %entry 485; SSE41-NEXT: pextrw $4, %xmm0, %eax 486; SSE41-NEXT: pinsrw $1, %eax, %xmm0 487; SSE41-NEXT: movd %xmm1, %eax 488; SSE41-NEXT: pinsrw $2, %eax, %xmm0 489; SSE41-NEXT: pextrw $4, %xmm1, %eax 490; SSE41-NEXT: pinsrw $3, %eax, %xmm0 491; SSE41-NEXT: movd %xmm2, %eax 492; SSE41-NEXT: pinsrw $4, %eax, %xmm0 493; SSE41-NEXT: pextrw $4, %xmm2, %eax 494; SSE41-NEXT: pinsrw $5, %eax, %xmm0 495; SSE41-NEXT: movd %xmm3, %eax 496; SSE41-NEXT: pinsrw $6, %eax, %xmm0 497; SSE41-NEXT: pextrw $4, %xmm3, %eax 498; SSE41-NEXT: pinsrw $7, %eax, %xmm0 499; SSE41-NEXT: retq 500; 501; AVX1-LABEL: trunc2x4i64_8i16: 502; AVX1: # BB#0: # %entry 503; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 504; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 505; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 506; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 507; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 508; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 509; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 510; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 511; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 512; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 513; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 514; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 515; AVX1-NEXT: vzeroupper 516; AVX1-NEXT: retq 517; 518; AVX2-LABEL: trunc2x4i64_8i16: 519; AVX2: # BB#0: # %entry 520; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u> 521; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 522; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 523; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 524; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 525; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 526; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 527; AVX2-NEXT: vzeroupper 528; AVX2-NEXT: retq 529; 530; AVX512BW-LABEL: trunc2x4i64_8i16: 531; AVX512BW: # BB#0: # %entry 532; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 533; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 534; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 535; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 536; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 537; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 538; AVX512BW-NEXT: retq 539entry: 540 %0 = trunc <4 x i64> %a to <4 x i16> 541 %1 = trunc <4 x i64> %b to <4 x i16> 542 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 543 ret <8 x i16> %2 544} 545 546define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) { 547; SSE2-LABEL: trunc2x2i64_4i32: 548; SSE2: # BB#0: # %entry 549; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 550; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 551; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 552; SSE2-NEXT: retq 553; 554; SSSE3-LABEL: trunc2x2i64_4i32: 555; SSSE3: # BB#0: # %entry 556; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 557; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 558; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 559; SSSE3-NEXT: retq 560; 561; SSE41-LABEL: trunc2x2i64_4i32: 562; SSE41: # BB#0: # %entry 563; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 564; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 565; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 566; SSE41-NEXT: retq 567; 568; AVX1-LABEL: trunc2x2i64_4i32: 569; AVX1: # BB#0: # %entry 570; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 571; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 572; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 573; AVX1-NEXT: retq 574; 575; AVX2-LABEL: trunc2x2i64_4i32: 576; AVX2: # BB#0: # %entry 577; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 578; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 579; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 580; AVX2-NEXT: retq 581; 582; AVX512BW-LABEL: trunc2x2i64_4i32: 583; AVX512BW: # BB#0: # %entry 584; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 585; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 586; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 587; AVX512BW-NEXT: retq 588entry: 589 %0 = trunc <2 x i64> %a to <2 x i32> 590 %1 = trunc <2 x i64> %b to <2 x i32> 591 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 592 ret <4 x i32> %2 593} 594 595define i64 @trunc2i64_i64(<2 x i64> %inval) { 596; SSE-LABEL: trunc2i64_i64: 597; SSE: # BB#0: # %entry 598; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 599; SSE-NEXT: movd %xmm0, %rax 600; SSE-NEXT: retq 601; 602; AVX-LABEL: trunc2i64_i64: 603; AVX: # BB#0: # %entry 604; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 605; AVX-NEXT: vmovq %xmm0, %rax 606; AVX-NEXT: retq 607; 608; AVX512BW-LABEL: trunc2i64_i64: 609; AVX512BW: # BB#0: # %entry 610; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 611; AVX512BW-NEXT: vmovq %xmm0, %rax 612; AVX512BW-NEXT: retq 613entry: 614 %0 = trunc <2 x i64> %inval to <2 x i32> 615 %1 = bitcast <2 x i32> %0 to i64 616 ret i64 %1 617} 618 619define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) { 620; SSE2-LABEL: trunc2x4i32_8i16: 621; SSE2: # BB#0: # %entry 622; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 623; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 624; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 625; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 626; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 627; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 628; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 629; SSE2-NEXT: retq 630; 631; SSSE3-LABEL: trunc2x4i32_8i16: 632; SSSE3: # BB#0: # %entry 633; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 634; SSSE3-NEXT: pshufb %xmm2, %xmm1 635; SSSE3-NEXT: pshufb %xmm2, %xmm0 636; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 637; SSSE3-NEXT: retq 638; 639; SSE41-LABEL: trunc2x4i32_8i16: 640; SSE41: # BB#0: # %entry 641; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 642; SSE41-NEXT: pshufb %xmm2, %xmm1 643; SSE41-NEXT: pshufb %xmm2, %xmm0 644; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 645; SSE41-NEXT: retq 646; 647; AVX-LABEL: trunc2x4i32_8i16: 648; AVX: # BB#0: # %entry 649; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 650; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 651; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 652; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 653; AVX-NEXT: retq 654; 655; AVX512BW-LABEL: trunc2x4i32_8i16: 656; AVX512BW: # BB#0: # %entry 657; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 658; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 659; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 660; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 661; AVX512BW-NEXT: retq 662entry: 663 %0 = trunc <4 x i32> %a to <4 x i16> 664 %1 = trunc <4 x i32> %b to <4 x i16> 665 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 666 ret <8 x i16> %2 667} 668 669; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524 670define i64 @trunc4i32_i64(<4 x i32> %inval) { 671; SSE2-LABEL: trunc4i32_i64: 672; SSE2: # BB#0: # %entry 673; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 674; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 675; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 676; SSE2-NEXT: movd %xmm0, %rax 677; SSE2-NEXT: retq 678; 679; SSSE3-LABEL: trunc4i32_i64: 680; SSSE3: # BB#0: # %entry 681; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 682; SSSE3-NEXT: movd %xmm0, %rax 683; SSSE3-NEXT: retq 684; 685; SSE41-LABEL: trunc4i32_i64: 686; SSE41: # BB#0: # %entry 687; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 688; SSE41-NEXT: movd %xmm0, %rax 689; SSE41-NEXT: retq 690; 691; AVX-LABEL: trunc4i32_i64: 692; AVX: # BB#0: # %entry 693; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 694; AVX-NEXT: vmovq %xmm0, %rax 695; AVX-NEXT: retq 696; 697; AVX512BW-LABEL: trunc4i32_i64: 698; AVX512BW: # BB#0: # %entry 699; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 700; AVX512BW-NEXT: vmovq %xmm0, %rax 701; AVX512BW-NEXT: retq 702entry: 703 %0 = trunc <4 x i32> %inval to <4 x i16> 704 %1 = bitcast <4 x i16> %0 to i64 705 ret i64 %1 706} 707 708define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) { 709; SSE2-LABEL: trunc2x8i16_16i8: 710; SSE2: # BB#0: # %entry 711; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 712; SSE2-NEXT: pand %xmm2, %xmm1 713; SSE2-NEXT: pand %xmm2, %xmm0 714; SSE2-NEXT: packuswb %xmm1, %xmm0 715; SSE2-NEXT: retq 716; 717; SSSE3-LABEL: trunc2x8i16_16i8: 718; SSSE3: # BB#0: # %entry 719; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 720; SSSE3-NEXT: pshufb %xmm2, %xmm1 721; SSSE3-NEXT: pshufb %xmm2, %xmm0 722; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 723; SSSE3-NEXT: retq 724; 725; SSE41-LABEL: trunc2x8i16_16i8: 726; SSE41: # BB#0: # %entry 727; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 728; SSE41-NEXT: pshufb %xmm2, %xmm1 729; SSE41-NEXT: pshufb %xmm2, %xmm0 730; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 731; SSE41-NEXT: retq 732; 733; AVX-LABEL: trunc2x8i16_16i8: 734; AVX: # BB#0: # %entry 735; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 736; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 737; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 738; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 739; AVX-NEXT: retq 740; 741; AVX512BW-LABEL: trunc2x8i16_16i8: 742; AVX512BW: # BB#0: # %entry 743; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 744; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 745; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 746; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 747; AVX512BW-NEXT: retq 748entry: 749 %0 = trunc <8 x i16> %a to <8 x i8> 750 %1 = trunc <8 x i16> %b to <8 x i8> 751 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 752 ret <16 x i8> %2 753} 754 755; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524 756define i64 @trunc8i16_i64(<8 x i16> %inval) { 757; SSE2-LABEL: trunc8i16_i64: 758; SSE2: # BB#0: # %entry 759; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 760; SSE2-NEXT: packuswb %xmm0, %xmm0 761; SSE2-NEXT: movd %xmm0, %rax 762; SSE2-NEXT: retq 763; 764; SSSE3-LABEL: trunc8i16_i64: 765; SSSE3: # BB#0: # %entry 766; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 767; SSSE3-NEXT: movd %xmm0, %rax 768; SSSE3-NEXT: retq 769; 770; SSE41-LABEL: trunc8i16_i64: 771; SSE41: # BB#0: # %entry 772; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 773; SSE41-NEXT: movd %xmm0, %rax 774; SSE41-NEXT: retq 775; 776; AVX-LABEL: trunc8i16_i64: 777; AVX: # BB#0: # %entry 778; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 779; AVX-NEXT: vmovq %xmm0, %rax 780; AVX-NEXT: retq 781; 782; AVX512BW-LABEL: trunc8i16_i64: 783; AVX512BW: # BB#0: # %entry 784; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 785; AVX512BW-NEXT: vmovq %xmm0, %rax 786; AVX512BW-NEXT: retq 787entry: 788 %0 = trunc <8 x i16> %inval to <8 x i8> 789 %1 = bitcast <8 x i8> %0 to i64 790 ret i64 %1 791} 792 793define <16 x i8> @trunc16i64_16i8_const() { 794; SSE-LABEL: trunc16i64_16i8_const: 795; SSE: # BB#0: # %entry 796; SSE-NEXT: xorps %xmm0, %xmm0 797; SSE-NEXT: retq 798; 799; AVX-LABEL: trunc16i64_16i8_const: 800; AVX: # BB#0: # %entry 801; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 802; AVX-NEXT: retq 803; 804; AVX512BW-LABEL: trunc16i64_16i8_const: 805; AVX512BW: # BB#0: # %entry 806; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0 807; AVX512BW-NEXT: retq 808 809entry: 810 %0 = trunc <16 x i64> zeroinitializer to <16 x i8> 811 %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26> 812 ret <16 x i8> %1 813} 814