1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW 8 9define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) { 10; SSE2-LABEL: trunc8i64_8i32: 11; SSE2: # BB#0: # %entry 12; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 13; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 14; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 15; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 16; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 17; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 18; SSE2-NEXT: retq 19; 20; SSSE3-LABEL: trunc8i64_8i32: 21; SSSE3: # BB#0: # %entry 22; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 23; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 24; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 25; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 26; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 27; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 28; SSSE3-NEXT: retq 29; 30; SSE41-LABEL: trunc8i64_8i32: 31; SSE41: # BB#0: # %entry 32; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 33; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 34; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 35; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2] 36; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 37; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 38; SSE41-NEXT: retq 39; 40; AVX1-LABEL: trunc8i64_8i32: 41; AVX1: # BB#0: # %entry 42; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 43; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 44; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 45; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 46; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 47; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 48; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 49; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 50; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 51; AVX1-NEXT: retq 52; 53; AVX2-LABEL: trunc8i64_8i32: 54; AVX2: # BB#0: # %entry 55; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6] 56; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3] 57; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6] 58; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3] 59; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 60; AVX2-NEXT: retq 61; 62; AVX512BW-LABEL: trunc8i64_8i32: 63; AVX512BW: # BB#0: # %entry 64; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 65; AVX512BW-NEXT: retq 66entry: 67 %0 = trunc <8 x i64> %a to <8 x i32> 68 ret <8 x i32> %0 69} 70 71define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) { 72; SSE2-LABEL: trunc8i64_8i16: 73; SSE2: # BB#0: # %entry 74; SSE2-NEXT: pextrw $4, %xmm1, %eax 75; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 76; SSE2-NEXT: pextrw $4, %xmm0, %ecx 77; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 78; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 79; SSE2-NEXT: pextrw $4, %xmm3, %edx 80; SSE2-NEXT: movd %edx, %xmm1 81; SSE2-NEXT: movd %eax, %xmm3 82; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 83; SSE2-NEXT: pextrw $4, %xmm2, %eax 84; SSE2-NEXT: movd %eax, %xmm1 85; SSE2-NEXT: movd %ecx, %xmm2 86; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 87; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 88; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 89; SSE2-NEXT: retq 90; 91; SSSE3-LABEL: trunc8i64_8i16: 92; SSSE3: # BB#0: # %entry 93; SSSE3-NEXT: pextrw $4, %xmm1, %eax 94; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 95; SSSE3-NEXT: pextrw $4, %xmm0, %ecx 96; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 97; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 98; SSSE3-NEXT: pextrw $4, %xmm3, %edx 99; SSSE3-NEXT: movd %edx, %xmm1 100; SSSE3-NEXT: movd %eax, %xmm3 101; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 102; SSSE3-NEXT: pextrw $4, %xmm2, %eax 103; SSSE3-NEXT: movd %eax, %xmm1 104; SSSE3-NEXT: movd %ecx, %xmm2 105; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 106; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 107; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 108; SSSE3-NEXT: retq 109; 110; SSE41-LABEL: trunc8i64_8i16: 111; SSE41: # BB#0: # %entry 112; SSE41-NEXT: pxor %xmm4, %xmm4 113; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] 114; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] 115; SSE41-NEXT: packusdw %xmm3, %xmm2 116; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] 117; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] 118; SSE41-NEXT: packusdw %xmm1, %xmm0 119; SSE41-NEXT: packusdw %xmm2, %xmm0 120; SSE41-NEXT: retq 121; 122; AVX1-LABEL: trunc8i64_8i16: 123; AVX1: # BB#0: # %entry 124; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 125; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 126; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 127; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 128; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 129; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 130; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 131; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 132; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 133; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 134; AVX1-NEXT: vzeroupper 135; AVX1-NEXT: retq 136; 137; AVX2-LABEL: trunc8i64_8i16: 138; AVX2: # BB#0: # %entry 139; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6] 140; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3] 141; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6] 142; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3] 143; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 144; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 145; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 146; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 147; AVX2-NEXT: vzeroupper 148; AVX2-NEXT: retq 149; 150; AVX512BW-LABEL: trunc8i64_8i16: 151; AVX512BW: # BB#0: # %entry 152; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 153; AVX512BW-NEXT: retq 154entry: 155 %0 = trunc <8 x i64> %a to <8 x i16> 156 ret <8 x i16> %0 157} 158 159define void @trunc8i64_8i8(<8 x i64> %a) { 160; SSE-LABEL: trunc8i64_8i8: 161; SSE: # BB#0: # %entry 162; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 163; SSE-NEXT: pand %xmm4, %xmm3 164; SSE-NEXT: pand %xmm4, %xmm2 165; SSE-NEXT: packuswb %xmm3, %xmm2 166; SSE-NEXT: pand %xmm4, %xmm1 167; SSE-NEXT: pand %xmm4, %xmm0 168; SSE-NEXT: packuswb %xmm1, %xmm0 169; SSE-NEXT: packuswb %xmm2, %xmm0 170; SSE-NEXT: packuswb %xmm0, %xmm0 171; SSE-NEXT: movq %xmm0, (%rax) 172; SSE-NEXT: retq 173; 174; AVX1-LABEL: trunc8i64_8i8: 175; AVX1: # BB#0: # %entry 176; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 177; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 178; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 179; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 180; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 181; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 182; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 183; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 184; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 185; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 186; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 187; AVX1-NEXT: vmovq %xmm0, (%rax) 188; AVX1-NEXT: vzeroupper 189; AVX1-NEXT: retq 190; 191; AVX2-LABEL: trunc8i64_8i8: 192; AVX2: # BB#0: # %entry 193; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6] 194; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3] 195; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6] 196; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3] 197; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 198; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 199; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 200; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 201; AVX2-NEXT: vmovq %xmm0, (%rax) 202; AVX2-NEXT: vzeroupper 203; AVX2-NEXT: retq 204; 205; AVX512BW-LABEL: trunc8i64_8i8: 206; AVX512BW: # BB#0: # %entry 207; AVX512BW-NEXT: vpmovqb %zmm0, (%rax) 208; AVX512BW-NEXT: retq 209entry: 210 %0 = trunc <8 x i64> %a to <8 x i8> 211 store <8 x i8> %0, <8 x i8>* undef, align 4 212 ret void 213} 214 215define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) { 216; SSE2-LABEL: trunc8i32_8i16: 217; SSE2: # BB#0: # %entry 218; SSE2-NEXT: pslld $16, %xmm1 219; SSE2-NEXT: psrad $16, %xmm1 220; SSE2-NEXT: pslld $16, %xmm0 221; SSE2-NEXT: psrad $16, %xmm0 222; SSE2-NEXT: packssdw %xmm1, %xmm0 223; SSE2-NEXT: retq 224; 225; SSSE3-LABEL: trunc8i32_8i16: 226; SSSE3: # BB#0: # %entry 227; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 228; SSSE3-NEXT: pshufb %xmm2, %xmm1 229; SSSE3-NEXT: pshufb %xmm2, %xmm0 230; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 231; SSSE3-NEXT: retq 232; 233; SSE41-LABEL: trunc8i32_8i16: 234; SSE41: # BB#0: # %entry 235; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 236; SSE41-NEXT: pshufb %xmm2, %xmm1 237; SSE41-NEXT: pshufb %xmm2, %xmm0 238; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 239; SSE41-NEXT: retq 240; 241; AVX1-LABEL: trunc8i32_8i16: 242; AVX1: # BB#0: # %entry 243; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 244; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 245; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 246; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 247; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 248; AVX1-NEXT: vzeroupper 249; AVX1-NEXT: retq 250; 251; AVX2-LABEL: trunc8i32_8i16: 252; AVX2: # BB#0: # %entry 253; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 254; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 255; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 256; AVX2-NEXT: vzeroupper 257; AVX2-NEXT: retq 258; 259; AVX512BW-LABEL: trunc8i32_8i16: 260; AVX512BW: # BB#0: # %entry 261; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 262; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 263; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 264; AVX512BW-NEXT: retq 265entry: 266 %0 = trunc <8 x i32> %a to <8 x i16> 267 ret <8 x i16> %0 268} 269 270define void @trunc8i32_8i8(<8 x i32> %a) { 271; SSE2-LABEL: trunc8i32_8i8: 272; SSE2: # BB#0: # %entry 273; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 274; SSE2-NEXT: pand %xmm2, %xmm1 275; SSE2-NEXT: pand %xmm2, %xmm0 276; SSE2-NEXT: packuswb %xmm1, %xmm0 277; SSE2-NEXT: packuswb %xmm0, %xmm0 278; SSE2-NEXT: movq %xmm0, (%rax) 279; SSE2-NEXT: retq 280; 281; SSSE3-LABEL: trunc8i32_8i8: 282; SSSE3: # BB#0: # %entry 283; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 284; SSSE3-NEXT: pshufb %xmm2, %xmm1 285; SSSE3-NEXT: pshufb %xmm2, %xmm0 286; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 287; SSSE3-NEXT: movq %xmm0, (%rax) 288; SSSE3-NEXT: retq 289; 290; SSE41-LABEL: trunc8i32_8i8: 291; SSE41: # BB#0: # %entry 292; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 293; SSE41-NEXT: pshufb %xmm2, %xmm1 294; SSE41-NEXT: pshufb %xmm2, %xmm0 295; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 296; SSE41-NEXT: movq %xmm0, (%rax) 297; SSE41-NEXT: retq 298; 299; AVX1-LABEL: trunc8i32_8i8: 300; AVX1: # BB#0: # %entry 301; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 302; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 303; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 304; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 305; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 306; AVX1-NEXT: vmovq %xmm0, (%rax) 307; AVX1-NEXT: vzeroupper 308; AVX1-NEXT: retq 309; 310; AVX2-LABEL: trunc8i32_8i8: 311; AVX2: # BB#0: # %entry 312; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 313; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 314; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 315; AVX2-NEXT: vmovq %xmm0, (%rax) 316; AVX2-NEXT: vzeroupper 317; AVX2-NEXT: retq 318; 319; AVX512BW-LABEL: trunc8i32_8i8: 320; AVX512BW: # BB#0: # %entry 321; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 322; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 323; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 324; AVX512BW-NEXT: vmovq %xmm0, (%rax) 325; AVX512BW-NEXT: retq 326entry: 327 %0 = trunc <8 x i32> %a to <8 x i8> 328 store <8 x i8> %0, <8 x i8>* undef, align 4 329 ret void 330} 331 332define void @trunc16i32_16i8(<16 x i32> %a) { 333; SSE-LABEL: trunc16i32_16i8: 334; SSE: # BB#0: # %entry 335; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 336; SSE-NEXT: pand %xmm4, %xmm3 337; SSE-NEXT: pand %xmm4, %xmm2 338; SSE-NEXT: packuswb %xmm3, %xmm2 339; SSE-NEXT: pand %xmm4, %xmm1 340; SSE-NEXT: pand %xmm4, %xmm0 341; SSE-NEXT: packuswb %xmm1, %xmm0 342; SSE-NEXT: packuswb %xmm2, %xmm0 343; SSE-NEXT: movdqu %xmm0, (%rax) 344; SSE-NEXT: retq 345; 346; AVX1-LABEL: trunc16i32_16i8: 347; AVX1: # BB#0: # %entry 348; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 349; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 350; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 351; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 352; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 353; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 354; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 355; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 356; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 357; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 358; AVX1-NEXT: vmovdqu %xmm0, (%rax) 359; AVX1-NEXT: vzeroupper 360; AVX1-NEXT: retq 361; 362; AVX2-LABEL: trunc16i32_16i8: 363; AVX2: # BB#0: # %entry 364; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 365; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 366; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 367; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 368; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 369; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 370; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 371; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 372; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 373; AVX2-NEXT: vmovdqu %xmm0, (%rax) 374; AVX2-NEXT: vzeroupper 375; AVX2-NEXT: retq 376; 377; AVX512BW-LABEL: trunc16i32_16i8: 378; AVX512BW: # BB#0: # %entry 379; AVX512BW-NEXT: vpmovdb %zmm0, (%rax) 380; AVX512BW-NEXT: retq 381entry: 382 %0 = trunc <16 x i32> %a to <16 x i8> 383 store <16 x i8> %0, <16 x i8>* undef, align 4 384 ret void 385} 386 387define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) { 388; SSE2-LABEL: trunc2x4i64_8i32: 389; SSE2: # BB#0: # %entry 390; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 391; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 392; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 393; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 394; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 395; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 396; SSE2-NEXT: retq 397; 398; SSSE3-LABEL: trunc2x4i64_8i32: 399; SSSE3: # BB#0: # %entry 400; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 401; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 402; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 403; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 404; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 405; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 406; SSSE3-NEXT: retq 407; 408; SSE41-LABEL: trunc2x4i64_8i32: 409; SSE41: # BB#0: # %entry 410; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 411; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 412; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 413; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2] 414; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 415; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 416; SSE41-NEXT: retq 417; 418; AVX1-LABEL: trunc2x4i64_8i32: 419; AVX1: # BB#0: # %entry 420; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 421; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 422; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 423; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 424; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 425; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 426; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 427; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 428; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 429; AVX1-NEXT: retq 430; 431; AVX2-LABEL: trunc2x4i64_8i32: 432; AVX2: # BB#0: # %entry 433; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6] 434; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3] 435; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6] 436; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3] 437; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 438; AVX2-NEXT: retq 439; 440; AVX512BW-LABEL: trunc2x4i64_8i32: 441; AVX512BW: # BB#0: # %entry 442; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 443; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 444; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 445; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 446; AVX512BW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 447; AVX512BW-NEXT: retq 448entry: 449 %0 = trunc <4 x i64> %a to <4 x i32> 450 %1 = trunc <4 x i64> %b to <4 x i32> 451 %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 452 ret <8 x i32> %2 453} 454 455define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) { 456; SSE2-LABEL: trunc2x4i64_8i16: 457; SSE2: # BB#0: # %entry 458; SSE2-NEXT: pextrw $4, %xmm1, %eax 459; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 460; SSE2-NEXT: pextrw $4, %xmm0, %ecx 461; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 462; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 463; SSE2-NEXT: pextrw $4, %xmm3, %edx 464; SSE2-NEXT: movd %edx, %xmm1 465; SSE2-NEXT: movd %eax, %xmm3 466; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 467; SSE2-NEXT: pextrw $4, %xmm2, %eax 468; SSE2-NEXT: movd %eax, %xmm1 469; SSE2-NEXT: movd %ecx, %xmm2 470; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 471; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 472; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 473; SSE2-NEXT: retq 474; 475; SSSE3-LABEL: trunc2x4i64_8i16: 476; SSSE3: # BB#0: # %entry 477; SSSE3-NEXT: pextrw $4, %xmm1, %eax 478; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 479; SSSE3-NEXT: pextrw $4, %xmm0, %ecx 480; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 481; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 482; SSSE3-NEXT: pextrw $4, %xmm3, %edx 483; SSSE3-NEXT: movd %edx, %xmm1 484; SSSE3-NEXT: movd %eax, %xmm3 485; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] 486; SSSE3-NEXT: pextrw $4, %xmm2, %eax 487; SSSE3-NEXT: movd %eax, %xmm1 488; SSSE3-NEXT: movd %ecx, %xmm2 489; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 490; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 491; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 492; SSSE3-NEXT: retq 493; 494; SSE41-LABEL: trunc2x4i64_8i16: 495; SSE41: # BB#0: # %entry 496; SSE41-NEXT: pextrw $4, %xmm0, %eax 497; SSE41-NEXT: pinsrw $1, %eax, %xmm0 498; SSE41-NEXT: movd %xmm1, %eax 499; SSE41-NEXT: pinsrw $2, %eax, %xmm0 500; SSE41-NEXT: pextrw $4, %xmm1, %eax 501; SSE41-NEXT: pinsrw $3, %eax, %xmm0 502; SSE41-NEXT: movd %xmm2, %eax 503; SSE41-NEXT: pinsrw $4, %eax, %xmm0 504; SSE41-NEXT: pextrw $4, %xmm2, %eax 505; SSE41-NEXT: pinsrw $5, %eax, %xmm0 506; SSE41-NEXT: movd %xmm3, %eax 507; SSE41-NEXT: pinsrw $6, %eax, %xmm0 508; SSE41-NEXT: pextrw $4, %xmm3, %eax 509; SSE41-NEXT: pinsrw $7, %eax, %xmm0 510; SSE41-NEXT: retq 511; 512; AVX1-LABEL: trunc2x4i64_8i16: 513; AVX1: # BB#0: # %entry 514; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 515; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 516; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 517; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 518; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 519; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2] 520; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 521; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 522; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 523; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 524; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 525; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 526; AVX1-NEXT: vzeroupper 527; AVX1-NEXT: retq 528; 529; AVX2-LABEL: trunc2x4i64_8i16: 530; AVX2: # BB#0: # %entry 531; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6] 532; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3] 533; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6] 534; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3] 535; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 536; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 537; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 538; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 539; AVX2-NEXT: vzeroupper 540; AVX2-NEXT: retq 541; 542; AVX512BW-LABEL: trunc2x4i64_8i16: 543; AVX512BW: # BB#0: # %entry 544; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 545; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 546; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 547; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 548; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 549; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 550; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 551; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 552; AVX512BW-NEXT: retq 553entry: 554 %0 = trunc <4 x i64> %a to <4 x i16> 555 %1 = trunc <4 x i64> %b to <4 x i16> 556 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 557 ret <8 x i16> %2 558} 559 560define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) { 561; SSE2-LABEL: trunc2x2i64_4i32: 562; SSE2: # BB#0: # %entry 563; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 564; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 565; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 566; SSE2-NEXT: retq 567; 568; SSSE3-LABEL: trunc2x2i64_4i32: 569; SSSE3: # BB#0: # %entry 570; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 571; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 572; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 573; SSSE3-NEXT: retq 574; 575; SSE41-LABEL: trunc2x2i64_4i32: 576; SSE41: # BB#0: # %entry 577; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 578; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 579; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 580; SSE41-NEXT: retq 581; 582; AVX1-LABEL: trunc2x2i64_4i32: 583; AVX1: # BB#0: # %entry 584; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 585; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 586; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 587; AVX1-NEXT: retq 588; 589; AVX2-LABEL: trunc2x2i64_4i32: 590; AVX2: # BB#0: # %entry 591; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 592; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 593; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 594; AVX2-NEXT: retq 595; 596; AVX512BW-LABEL: trunc2x2i64_4i32: 597; AVX512BW: # BB#0: # %entry 598; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] 599; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 600; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 601; AVX512BW-NEXT: retq 602entry: 603 %0 = trunc <2 x i64> %a to <2 x i32> 604 %1 = trunc <2 x i64> %b to <2 x i32> 605 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 606 ret <4 x i32> %2 607} 608 609define i64 @trunc2i64_i64(<2 x i64> %inval) { 610; SSE-LABEL: trunc2i64_i64: 611; SSE: # BB#0: # %entry 612; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 613; SSE-NEXT: movd %xmm0, %rax 614; SSE-NEXT: retq 615; 616; AVX-LABEL: trunc2i64_i64: 617; AVX: # BB#0: # %entry 618; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 619; AVX-NEXT: vmovq %xmm0, %rax 620; AVX-NEXT: retq 621; 622; AVX512BW-LABEL: trunc2i64_i64: 623; AVX512BW: # BB#0: # %entry 624; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 625; AVX512BW-NEXT: vmovq %xmm0, %rax 626; AVX512BW-NEXT: retq 627entry: 628 %0 = trunc <2 x i64> %inval to <2 x i32> 629 %1 = bitcast <2 x i32> %0 to i64 630 ret i64 %1 631} 632 633define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) { 634; SSE2-LABEL: trunc2x4i32_8i16: 635; SSE2: # BB#0: # %entry 636; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 637; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 638; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 639; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 640; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 641; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 642; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 643; SSE2-NEXT: retq 644; 645; SSSE3-LABEL: trunc2x4i32_8i16: 646; SSSE3: # BB#0: # %entry 647; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 648; SSSE3-NEXT: pshufb %xmm2, %xmm1 649; SSSE3-NEXT: pshufb %xmm2, %xmm0 650; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 651; SSSE3-NEXT: retq 652; 653; SSE41-LABEL: trunc2x4i32_8i16: 654; SSE41: # BB#0: # %entry 655; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 656; SSE41-NEXT: pshufb %xmm2, %xmm1 657; SSE41-NEXT: pshufb %xmm2, %xmm0 658; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 659; SSE41-NEXT: retq 660; 661; AVX-LABEL: trunc2x4i32_8i16: 662; AVX: # BB#0: # %entry 663; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 664; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 665; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 666; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 667; AVX-NEXT: retq 668; 669; AVX512BW-LABEL: trunc2x4i32_8i16: 670; AVX512BW: # BB#0: # %entry 671; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 672; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 673; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 674; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 675; AVX512BW-NEXT: retq 676entry: 677 %0 = trunc <4 x i32> %a to <4 x i16> 678 %1 = trunc <4 x i32> %b to <4 x i16> 679 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 680 ret <8 x i16> %2 681} 682 683; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524 684define i64 @trunc4i32_i64(<4 x i32> %inval) { 685; SSE2-LABEL: trunc4i32_i64: 686; SSE2: # BB#0: # %entry 687; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 688; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 689; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 690; SSE2-NEXT: movd %xmm0, %rax 691; SSE2-NEXT: retq 692; 693; SSSE3-LABEL: trunc4i32_i64: 694; SSSE3: # BB#0: # %entry 695; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 696; SSSE3-NEXT: movd %xmm0, %rax 697; SSSE3-NEXT: retq 698; 699; SSE41-LABEL: trunc4i32_i64: 700; SSE41: # BB#0: # %entry 701; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 702; SSE41-NEXT: movd %xmm0, %rax 703; SSE41-NEXT: retq 704; 705; AVX-LABEL: trunc4i32_i64: 706; AVX: # BB#0: # %entry 707; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 708; AVX-NEXT: vmovq %xmm0, %rax 709; AVX-NEXT: retq 710; 711; AVX512BW-LABEL: trunc4i32_i64: 712; AVX512BW: # BB#0: # %entry 713; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 714; AVX512BW-NEXT: vmovq %xmm0, %rax 715; AVX512BW-NEXT: retq 716entry: 717 %0 = trunc <4 x i32> %inval to <4 x i16> 718 %1 = bitcast <4 x i16> %0 to i64 719 ret i64 %1 720} 721 722define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) { 723; SSE2-LABEL: trunc2x8i16_16i8: 724; SSE2: # BB#0: # %entry 725; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 726; SSE2-NEXT: pand %xmm2, %xmm1 727; SSE2-NEXT: pand %xmm2, %xmm0 728; SSE2-NEXT: packuswb %xmm1, %xmm0 729; SSE2-NEXT: retq 730; 731; SSSE3-LABEL: trunc2x8i16_16i8: 732; SSSE3: # BB#0: # %entry 733; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 734; SSSE3-NEXT: pshufb %xmm2, %xmm1 735; SSSE3-NEXT: pshufb %xmm2, %xmm0 736; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 737; SSSE3-NEXT: retq 738; 739; SSE41-LABEL: trunc2x8i16_16i8: 740; SSE41: # BB#0: # %entry 741; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 742; SSE41-NEXT: pshufb %xmm2, %xmm1 743; SSE41-NEXT: pshufb %xmm2, %xmm0 744; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 745; SSE41-NEXT: retq 746; 747; AVX-LABEL: trunc2x8i16_16i8: 748; AVX: # BB#0: # %entry 749; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 750; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 751; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 752; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 753; AVX-NEXT: retq 754; 755; AVX512BW-LABEL: trunc2x8i16_16i8: 756; AVX512BW: # BB#0: # %entry 757; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 758; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 759; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 760; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 761; AVX512BW-NEXT: retq 762entry: 763 %0 = trunc <8 x i16> %a to <8 x i8> 764 %1 = trunc <8 x i16> %b to <8 x i8> 765 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 766 ret <16 x i8> %2 767} 768 769; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524 770define i64 @trunc8i16_i64(<8 x i16> %inval) { 771; SSE2-LABEL: trunc8i16_i64: 772; SSE2: # BB#0: # %entry 773; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 774; SSE2-NEXT: packuswb %xmm0, %xmm0 775; SSE2-NEXT: movd %xmm0, %rax 776; SSE2-NEXT: retq 777; 778; SSSE3-LABEL: trunc8i16_i64: 779; SSSE3: # BB#0: # %entry 780; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 781; SSSE3-NEXT: movd %xmm0, %rax 782; SSSE3-NEXT: retq 783; 784; SSE41-LABEL: trunc8i16_i64: 785; SSE41: # BB#0: # %entry 786; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 787; SSE41-NEXT: movd %xmm0, %rax 788; SSE41-NEXT: retq 789; 790; AVX-LABEL: trunc8i16_i64: 791; AVX: # BB#0: # %entry 792; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 793; AVX-NEXT: vmovq %xmm0, %rax 794; AVX-NEXT: retq 795; 796; AVX512BW-LABEL: trunc8i16_i64: 797; AVX512BW: # BB#0: # %entry 798; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 799; AVX512BW-NEXT: vmovq %xmm0, %rax 800; AVX512BW-NEXT: retq 801entry: 802 %0 = trunc <8 x i16> %inval to <8 x i8> 803 %1 = bitcast <8 x i8> %0 to i64 804 ret i64 %1 805} 806 807define <16 x i8> @trunc16i64_16i8_const() { 808; SSE-LABEL: trunc16i64_16i8_const: 809; SSE: # BB#0: # %entry 810; SSE-NEXT: xorps %xmm0, %xmm0 811; SSE-NEXT: retq 812; 813; AVX-LABEL: trunc16i64_16i8_const: 814; AVX: # BB#0: # %entry 815; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 816; AVX-NEXT: retq 817; 818; AVX512BW-LABEL: trunc16i64_16i8_const: 819; AVX512BW: # BB#0: # %entry 820; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0 821; AVX512BW-NEXT: retq 822 823entry: 824 %0 = trunc <16 x i64> zeroinitializer to <16 x i8> 825 %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26> 826 ret <16 x i8> %1 827} 828