1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=SKX 13 14; 15; Signed saturation truncation to vXi32 16; 17 18define <2 x i32> @trunc_ssat_v2i64_v2i32(<2 x i64> %a0) { 19; SSE2-LABEL: trunc_ssat_v2i64_v2i32: 20; SSE2: # %bb.0: 21; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 22; SSE2-NEXT: movdqa %xmm0, %xmm2 23; SSE2-NEXT: pxor %xmm1, %xmm2 24; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295] 25; SSE2-NEXT: movdqa %xmm3, %xmm4 26; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 27; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 28; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 29; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 30; SSE2-NEXT: pand %xmm5, %xmm2 31; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 32; SSE2-NEXT: por %xmm2, %xmm3 33; SSE2-NEXT: pand %xmm3, %xmm0 34; SSE2-NEXT: pandn {{.*}}(%rip), %xmm3 35; SSE2-NEXT: por %xmm0, %xmm3 36; SSE2-NEXT: pxor %xmm3, %xmm1 37; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320] 38; SSE2-NEXT: movdqa %xmm1, %xmm2 39; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 40; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 41; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 42; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 43; SSE2-NEXT: pand %xmm4, %xmm0 44; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 45; SSE2-NEXT: por %xmm0, %xmm1 46; SSE2-NEXT: pand %xmm1, %xmm3 47; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1 48; SSE2-NEXT: por %xmm3, %xmm1 49; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 50; SSE2-NEXT: retq 51; 52; SSSE3-LABEL: trunc_ssat_v2i64_v2i32: 53; SSSE3: # %bb.0: 54; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 55; SSSE3-NEXT: movdqa %xmm0, %xmm2 56; SSSE3-NEXT: pxor %xmm1, %xmm2 57; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295] 58; SSSE3-NEXT: movdqa %xmm3, %xmm4 59; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 60; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 61; SSSE3-NEXT: pcmpeqd %xmm3, %xmm2 62; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 63; SSSE3-NEXT: pand %xmm5, %xmm2 64; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 65; SSSE3-NEXT: por %xmm2, %xmm3 66; SSSE3-NEXT: pand %xmm3, %xmm0 67; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm3 68; SSSE3-NEXT: por %xmm0, %xmm3 69; SSSE3-NEXT: pxor %xmm3, %xmm1 70; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320] 71; SSSE3-NEXT: movdqa %xmm1, %xmm2 72; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 73; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 74; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 75; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 76; SSSE3-NEXT: pand %xmm4, %xmm0 77; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 78; SSSE3-NEXT: por %xmm0, %xmm1 79; SSSE3-NEXT: pand %xmm1, %xmm3 80; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm1 81; SSSE3-NEXT: por %xmm3, %xmm1 82; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 83; SSSE3-NEXT: retq 84; 85; SSE41-LABEL: trunc_ssat_v2i64_v2i32: 86; SSE41: # %bb.0: 87; SSE41-NEXT: movdqa %xmm0, %xmm1 88; SSE41-NEXT: movapd {{.*#+}} xmm2 = [2147483647,2147483647] 89; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 90; SSE41-NEXT: pxor %xmm3, %xmm0 91; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967295] 92; SSE41-NEXT: movdqa %xmm4, %xmm5 93; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 94; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 95; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 96; SSE41-NEXT: pand %xmm5, %xmm0 97; SSE41-NEXT: por %xmm4, %xmm0 98; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 99; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 100; SSE41-NEXT: pxor %xmm2, %xmm3 101; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320] 102; SSE41-NEXT: movdqa %xmm3, %xmm4 103; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 104; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 105; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 106; SSE41-NEXT: pand %xmm4, %xmm0 107; SSE41-NEXT: por %xmm3, %xmm0 108; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 109; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 110; SSE41-NEXT: retq 111; 112; AVX-LABEL: trunc_ssat_v2i64_v2i32: 113; AVX: # %bb.0: 114; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] 115; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 116; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 117; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 118; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 119; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 120; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 121; AVX-NEXT: retq 122; 123; AVX512F-LABEL: trunc_ssat_v2i64_v2i32: 124; AVX512F: # %bb.0: 125; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 126; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0 127; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 128; AVX512F-NEXT: vzeroupper 129; AVX512F-NEXT: retq 130; 131; AVX512VL-LABEL: trunc_ssat_v2i64_v2i32: 132; AVX512VL: # %bb.0: 133; AVX512VL-NEXT: vpmovsqd %xmm0, %xmm0 134; AVX512VL-NEXT: retq 135; 136; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32: 137; AVX512BW: # %bb.0: 138; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 139; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0 140; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 141; AVX512BW-NEXT: vzeroupper 142; AVX512BW-NEXT: retq 143; 144; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i32: 145; AVX512BWVL: # %bb.0: 146; AVX512BWVL-NEXT: vpmovsqd %xmm0, %xmm0 147; AVX512BWVL-NEXT: retq 148; 149; SKX-LABEL: trunc_ssat_v2i64_v2i32: 150; SKX: # %bb.0: 151; SKX-NEXT: vpmovsqd %xmm0, %xmm0 152; SKX-NEXT: retq 153 %1 = icmp slt <2 x i64> %a0, <i64 2147483647, i64 2147483647> 154 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 2147483647, i64 2147483647> 155 %3 = icmp sgt <2 x i64> %2, <i64 -2147483648, i64 -2147483648> 156 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -2147483648, i64 -2147483648> 157 %5 = trunc <2 x i64> %4 to <2 x i32> 158 ret <2 x i32> %5 159} 160 161define void @trunc_ssat_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) { 162; SSE2-LABEL: trunc_ssat_v2i64_v2i32_store: 163; SSE2: # %bb.0: 164; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 165; SSE2-NEXT: movdqa %xmm0, %xmm2 166; SSE2-NEXT: pxor %xmm1, %xmm2 167; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295] 168; SSE2-NEXT: movdqa %xmm3, %xmm4 169; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 170; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 171; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 172; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 173; SSE2-NEXT: pand %xmm5, %xmm2 174; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 175; SSE2-NEXT: por %xmm2, %xmm3 176; SSE2-NEXT: pand %xmm3, %xmm0 177; SSE2-NEXT: pandn {{.*}}(%rip), %xmm3 178; SSE2-NEXT: por %xmm0, %xmm3 179; SSE2-NEXT: pxor %xmm3, %xmm1 180; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320] 181; SSE2-NEXT: movdqa %xmm1, %xmm2 182; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 183; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 184; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 185; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 186; SSE2-NEXT: pand %xmm4, %xmm0 187; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 188; SSE2-NEXT: por %xmm0, %xmm1 189; SSE2-NEXT: pand %xmm1, %xmm3 190; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1 191; SSE2-NEXT: por %xmm3, %xmm1 192; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 193; SSE2-NEXT: movq %xmm0, (%rdi) 194; SSE2-NEXT: retq 195; 196; SSSE3-LABEL: trunc_ssat_v2i64_v2i32_store: 197; SSSE3: # %bb.0: 198; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 199; SSSE3-NEXT: movdqa %xmm0, %xmm2 200; SSSE3-NEXT: pxor %xmm1, %xmm2 201; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295] 202; SSSE3-NEXT: movdqa %xmm3, %xmm4 203; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 204; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 205; SSSE3-NEXT: pcmpeqd %xmm3, %xmm2 206; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 207; SSSE3-NEXT: pand %xmm5, %xmm2 208; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 209; SSSE3-NEXT: por %xmm2, %xmm3 210; SSSE3-NEXT: pand %xmm3, %xmm0 211; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm3 212; SSSE3-NEXT: por %xmm0, %xmm3 213; SSSE3-NEXT: pxor %xmm3, %xmm1 214; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320] 215; SSSE3-NEXT: movdqa %xmm1, %xmm2 216; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 217; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 218; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 219; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 220; SSSE3-NEXT: pand %xmm4, %xmm0 221; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 222; SSSE3-NEXT: por %xmm0, %xmm1 223; SSSE3-NEXT: pand %xmm1, %xmm3 224; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm1 225; SSSE3-NEXT: por %xmm3, %xmm1 226; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 227; SSSE3-NEXT: movq %xmm0, (%rdi) 228; SSSE3-NEXT: retq 229; 230; SSE41-LABEL: trunc_ssat_v2i64_v2i32_store: 231; SSE41: # %bb.0: 232; SSE41-NEXT: movdqa %xmm0, %xmm1 233; SSE41-NEXT: movapd {{.*#+}} xmm2 = [2147483647,2147483647] 234; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 235; SSE41-NEXT: pxor %xmm3, %xmm0 236; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967295] 237; SSE41-NEXT: movdqa %xmm4, %xmm5 238; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 239; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 240; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 241; SSE41-NEXT: pand %xmm5, %xmm0 242; SSE41-NEXT: por %xmm4, %xmm0 243; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 244; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 245; SSE41-NEXT: pxor %xmm2, %xmm3 246; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18446744069414584320,18446744069414584320] 247; SSE41-NEXT: movdqa %xmm3, %xmm4 248; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 249; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 250; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 251; SSE41-NEXT: pand %xmm4, %xmm0 252; SSE41-NEXT: por %xmm3, %xmm0 253; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 254; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 255; SSE41-NEXT: movq %xmm0, (%rdi) 256; SSE41-NEXT: retq 257; 258; AVX-LABEL: trunc_ssat_v2i64_v2i32_store: 259; AVX: # %bb.0: 260; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] 261; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 262; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 263; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 264; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 265; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 266; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 267; AVX-NEXT: vmovlpd %xmm0, (%rdi) 268; AVX-NEXT: retq 269; 270; AVX512F-LABEL: trunc_ssat_v2i64_v2i32_store: 271; AVX512F: # %bb.0: 272; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 273; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0 274; AVX512F-NEXT: vmovq %xmm0, (%rdi) 275; AVX512F-NEXT: vzeroupper 276; AVX512F-NEXT: retq 277; 278; AVX512VL-LABEL: trunc_ssat_v2i64_v2i32_store: 279; AVX512VL: # %bb.0: 280; AVX512VL-NEXT: vpmovsqd %xmm0, (%rdi) 281; AVX512VL-NEXT: retq 282; 283; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32_store: 284; AVX512BW: # %bb.0: 285; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 286; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0 287; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 288; AVX512BW-NEXT: vzeroupper 289; AVX512BW-NEXT: retq 290; 291; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i32_store: 292; AVX512BWVL: # %bb.0: 293; AVX512BWVL-NEXT: vpmovsqd %xmm0, (%rdi) 294; AVX512BWVL-NEXT: retq 295; 296; SKX-LABEL: trunc_ssat_v2i64_v2i32_store: 297; SKX: # %bb.0: 298; SKX-NEXT: vpmovsqd %xmm0, (%rdi) 299; SKX-NEXT: retq 300 %1 = icmp slt <2 x i64> %a0, <i64 2147483647, i64 2147483647> 301 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 2147483647, i64 2147483647> 302 %3 = icmp sgt <2 x i64> %2, <i64 -2147483648, i64 -2147483648> 303 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -2147483648, i64 -2147483648> 304 %5 = trunc <2 x i64> %4 to <2 x i32> 305 store <2 x i32> %5, <2 x i32>* %p1 306 ret void 307} 308 309define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) { 310; SSE2-LABEL: trunc_ssat_v4i64_v4i32: 311; SSE2: # %bb.0: 312; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483647,2147483647] 313; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 314; SSE2-NEXT: movdqa %xmm0, %xmm3 315; SSE2-NEXT: pxor %xmm2, %xmm3 316; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4294967295,4294967295] 317; SSE2-NEXT: movdqa %xmm5, %xmm6 318; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 319; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 320; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 321; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 322; SSE2-NEXT: pand %xmm7, %xmm4 323; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 324; SSE2-NEXT: por %xmm4, %xmm3 325; SSE2-NEXT: pand %xmm3, %xmm0 326; SSE2-NEXT: pandn %xmm8, %xmm3 327; SSE2-NEXT: por %xmm0, %xmm3 328; SSE2-NEXT: movdqa %xmm1, %xmm0 329; SSE2-NEXT: pxor %xmm2, %xmm0 330; SSE2-NEXT: movdqa %xmm5, %xmm4 331; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 332; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 333; SSE2-NEXT: pcmpeqd %xmm5, %xmm0 334; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 335; SSE2-NEXT: pand %xmm6, %xmm0 336; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 337; SSE2-NEXT: por %xmm0, %xmm4 338; SSE2-NEXT: pand %xmm4, %xmm1 339; SSE2-NEXT: pandn %xmm8, %xmm4 340; SSE2-NEXT: por %xmm1, %xmm4 341; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 342; SSE2-NEXT: movdqa %xmm4, %xmm0 343; SSE2-NEXT: pxor %xmm2, %xmm0 344; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744069414584320,18446744069414584320] 345; SSE2-NEXT: movdqa %xmm0, %xmm6 346; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 347; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 348; SSE2-NEXT: pcmpeqd %xmm5, %xmm0 349; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 350; SSE2-NEXT: pand %xmm7, %xmm0 351; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 352; SSE2-NEXT: por %xmm0, %xmm6 353; SSE2-NEXT: pand %xmm6, %xmm4 354; SSE2-NEXT: pandn %xmm1, %xmm6 355; SSE2-NEXT: por %xmm4, %xmm6 356; SSE2-NEXT: pxor %xmm3, %xmm2 357; SSE2-NEXT: movdqa %xmm2, %xmm0 358; SSE2-NEXT: pcmpgtd %xmm5, %xmm0 359; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] 360; SSE2-NEXT: pcmpeqd %xmm5, %xmm2 361; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 362; SSE2-NEXT: pand %xmm4, %xmm2 363; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 364; SSE2-NEXT: por %xmm2, %xmm0 365; SSE2-NEXT: pand %xmm0, %xmm3 366; SSE2-NEXT: pandn %xmm1, %xmm0 367; SSE2-NEXT: por %xmm3, %xmm0 368; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm6[0,2] 369; SSE2-NEXT: retq 370; 371; SSSE3-LABEL: trunc_ssat_v4i64_v4i32: 372; SSSE3: # %bb.0: 373; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [2147483647,2147483647] 374; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 375; SSSE3-NEXT: movdqa %xmm0, %xmm3 376; SSSE3-NEXT: pxor %xmm2, %xmm3 377; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [4294967295,4294967295] 378; SSSE3-NEXT: movdqa %xmm5, %xmm6 379; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 380; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 381; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 382; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 383; SSSE3-NEXT: pand %xmm7, %xmm4 384; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 385; SSSE3-NEXT: por %xmm4, %xmm3 386; SSSE3-NEXT: pand %xmm3, %xmm0 387; SSSE3-NEXT: pandn %xmm8, %xmm3 388; SSSE3-NEXT: por %xmm0, %xmm3 389; SSSE3-NEXT: movdqa %xmm1, %xmm0 390; SSSE3-NEXT: pxor %xmm2, %xmm0 391; SSSE3-NEXT: movdqa %xmm5, %xmm4 392; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 393; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 394; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0 395; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 396; SSSE3-NEXT: pand %xmm6, %xmm0 397; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 398; SSSE3-NEXT: por %xmm0, %xmm4 399; SSSE3-NEXT: pand %xmm4, %xmm1 400; SSSE3-NEXT: pandn %xmm8, %xmm4 401; SSSE3-NEXT: por %xmm1, %xmm4 402; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 403; SSSE3-NEXT: movdqa %xmm4, %xmm0 404; SSSE3-NEXT: pxor %xmm2, %xmm0 405; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [18446744069414584320,18446744069414584320] 406; SSSE3-NEXT: movdqa %xmm0, %xmm6 407; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 408; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 409; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0 410; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 411; SSSE3-NEXT: pand %xmm7, %xmm0 412; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 413; SSSE3-NEXT: por %xmm0, %xmm6 414; SSSE3-NEXT: pand %xmm6, %xmm4 415; SSSE3-NEXT: pandn %xmm1, %xmm6 416; SSSE3-NEXT: por %xmm4, %xmm6 417; SSSE3-NEXT: pxor %xmm3, %xmm2 418; SSSE3-NEXT: movdqa %xmm2, %xmm0 419; SSSE3-NEXT: pcmpgtd %xmm5, %xmm0 420; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] 421; SSSE3-NEXT: pcmpeqd %xmm5, %xmm2 422; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 423; SSSE3-NEXT: pand %xmm4, %xmm2 424; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 425; SSSE3-NEXT: por %xmm2, %xmm0 426; SSSE3-NEXT: pand %xmm0, %xmm3 427; SSSE3-NEXT: pandn %xmm1, %xmm0 428; SSSE3-NEXT: por %xmm3, %xmm0 429; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm6[0,2] 430; SSSE3-NEXT: retq 431; 432; SSE41-LABEL: trunc_ssat_v4i64_v4i32: 433; SSE41: # %bb.0: 434; SSE41-NEXT: movdqa %xmm0, %xmm2 435; SSE41-NEXT: movapd {{.*#+}} xmm4 = [2147483647,2147483647] 436; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 437; SSE41-NEXT: pxor %xmm3, %xmm0 438; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [4294967295,4294967295] 439; SSE41-NEXT: movdqa %xmm6, %xmm5 440; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 441; SSE41-NEXT: movdqa %xmm6, %xmm7 442; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 443; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 444; SSE41-NEXT: pand %xmm5, %xmm0 445; SSE41-NEXT: por %xmm7, %xmm0 446; SSE41-NEXT: movapd %xmm4, %xmm5 447; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 448; SSE41-NEXT: movdqa %xmm1, %xmm0 449; SSE41-NEXT: pxor %xmm3, %xmm0 450; SSE41-NEXT: movdqa %xmm6, %xmm2 451; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 452; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 453; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 454; SSE41-NEXT: pand %xmm2, %xmm0 455; SSE41-NEXT: por %xmm6, %xmm0 456; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 457; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 458; SSE41-NEXT: movapd %xmm4, %xmm2 459; SSE41-NEXT: xorpd %xmm3, %xmm2 460; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744069414584320,18446744069414584320] 461; SSE41-NEXT: movapd %xmm2, %xmm7 462; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 463; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 464; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 465; SSE41-NEXT: pand %xmm7, %xmm0 466; SSE41-NEXT: por %xmm2, %xmm0 467; SSE41-NEXT: movapd %xmm1, %xmm2 468; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 469; SSE41-NEXT: xorpd %xmm5, %xmm3 470; SSE41-NEXT: movapd %xmm3, %xmm4 471; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 472; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 473; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 474; SSE41-NEXT: pand %xmm4, %xmm0 475; SSE41-NEXT: por %xmm3, %xmm0 476; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 477; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 478; SSE41-NEXT: movaps %xmm1, %xmm0 479; SSE41-NEXT: retq 480; 481; AVX1-LABEL: trunc_ssat_v4i64_v4i32: 482; AVX1: # %bb.0: 483; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 484; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483647,2147483647] 485; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 486; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 487; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 488; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744071562067968,18446744071562067968] 489; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 490; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 491; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 492; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm4, %xmm1 493; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 494; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 495; AVX1-NEXT: vzeroupper 496; AVX1-NEXT: retq 497; 498; AVX2-SLOW-LABEL: trunc_ssat_v4i64_v4i32: 499; AVX2-SLOW: # %bb.0: 500; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647] 501; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 502; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 503; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968] 504; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 505; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 506; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 507; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 508; AVX2-SLOW-NEXT: vzeroupper 509; AVX2-SLOW-NEXT: retq 510; 511; AVX2-FAST-LABEL: trunc_ssat_v4i64_v4i32: 512; AVX2-FAST: # %bb.0: 513; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647] 514; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 515; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 516; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968] 517; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 518; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 519; AVX2-FAST-NEXT: vmovapd {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u> 520; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 521; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 522; AVX2-FAST-NEXT: vzeroupper 523; AVX2-FAST-NEXT: retq 524; 525; AVX512F-LABEL: trunc_ssat_v4i64_v4i32: 526; AVX512F: # %bb.0: 527; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 528; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0 529; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 530; AVX512F-NEXT: vzeroupper 531; AVX512F-NEXT: retq 532; 533; AVX512VL-LABEL: trunc_ssat_v4i64_v4i32: 534; AVX512VL: # %bb.0: 535; AVX512VL-NEXT: vpmovsqd %ymm0, %xmm0 536; AVX512VL-NEXT: vzeroupper 537; AVX512VL-NEXT: retq 538; 539; AVX512BW-LABEL: trunc_ssat_v4i64_v4i32: 540; AVX512BW: # %bb.0: 541; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 542; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0 543; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 544; AVX512BW-NEXT: vzeroupper 545; AVX512BW-NEXT: retq 546; 547; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i32: 548; AVX512BWVL: # %bb.0: 549; AVX512BWVL-NEXT: vpmovsqd %ymm0, %xmm0 550; AVX512BWVL-NEXT: vzeroupper 551; AVX512BWVL-NEXT: retq 552; 553; SKX-LABEL: trunc_ssat_v4i64_v4i32: 554; SKX: # %bb.0: 555; SKX-NEXT: vpmovsqd %ymm0, %xmm0 556; SKX-NEXT: vzeroupper 557; SKX-NEXT: retq 558 %1 = icmp slt <4 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> 559 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> 560 %3 = icmp sgt <4 x i64> %2, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648> 561 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648> 562 %5 = trunc <4 x i64> %4 to <4 x i32> 563 ret <4 x i32> %5 564} 565 566 567define <8 x i32> @trunc_ssat_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-width"="256" { 568; SSE2-LABEL: trunc_ssat_v8i64_v8i32: 569; SSE2: # %bb.0: 570; SSE2-NEXT: movdqa (%rdi), %xmm3 571; SSE2-NEXT: movdqa 16(%rdi), %xmm5 572; SSE2-NEXT: movdqa 32(%rdi), %xmm7 573; SSE2-NEXT: movdqa 48(%rdi), %xmm9 574; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483647,2147483647] 575; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] 576; SSE2-NEXT: movdqa %xmm3, %xmm2 577; SSE2-NEXT: pxor %xmm0, %xmm2 578; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [4294967295,4294967295] 579; SSE2-NEXT: movdqa %xmm10, %xmm6 580; SSE2-NEXT: pcmpgtd %xmm2, %xmm6 581; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2] 582; SSE2-NEXT: pcmpeqd %xmm10, %xmm2 583; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 584; SSE2-NEXT: pand %xmm1, %xmm4 585; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 586; SSE2-NEXT: por %xmm4, %xmm2 587; SSE2-NEXT: pand %xmm2, %xmm3 588; SSE2-NEXT: pandn %xmm8, %xmm2 589; SSE2-NEXT: por %xmm3, %xmm2 590; SSE2-NEXT: movdqa %xmm5, %xmm1 591; SSE2-NEXT: pxor %xmm0, %xmm1 592; SSE2-NEXT: movdqa %xmm10, %xmm3 593; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 594; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 595; SSE2-NEXT: pcmpeqd %xmm10, %xmm1 596; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 597; SSE2-NEXT: pand %xmm4, %xmm1 598; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 599; SSE2-NEXT: por %xmm1, %xmm3 600; SSE2-NEXT: pand %xmm3, %xmm5 601; SSE2-NEXT: pandn %xmm8, %xmm3 602; SSE2-NEXT: por %xmm5, %xmm3 603; SSE2-NEXT: movdqa %xmm7, %xmm1 604; SSE2-NEXT: pxor %xmm0, %xmm1 605; SSE2-NEXT: movdqa %xmm10, %xmm4 606; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 607; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 608; SSE2-NEXT: pcmpeqd %xmm10, %xmm1 609; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 610; SSE2-NEXT: pand %xmm5, %xmm1 611; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 612; SSE2-NEXT: por %xmm1, %xmm5 613; SSE2-NEXT: pand %xmm5, %xmm7 614; SSE2-NEXT: pandn %xmm8, %xmm5 615; SSE2-NEXT: por %xmm7, %xmm5 616; SSE2-NEXT: movdqa %xmm9, %xmm1 617; SSE2-NEXT: pxor %xmm0, %xmm1 618; SSE2-NEXT: movdqa %xmm10, %xmm4 619; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 620; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 621; SSE2-NEXT: pcmpeqd %xmm10, %xmm1 622; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 623; SSE2-NEXT: pand %xmm6, %xmm1 624; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3] 625; SSE2-NEXT: por %xmm1, %xmm7 626; SSE2-NEXT: pand %xmm7, %xmm9 627; SSE2-NEXT: pandn %xmm8, %xmm7 628; SSE2-NEXT: por %xmm9, %xmm7 629; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [18446744071562067968,18446744071562067968] 630; SSE2-NEXT: movdqa %xmm7, %xmm1 631; SSE2-NEXT: pxor %xmm0, %xmm1 632; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [18446744069414584320,18446744069414584320] 633; SSE2-NEXT: movdqa %xmm1, %xmm4 634; SSE2-NEXT: pcmpgtd %xmm9, %xmm4 635; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 636; SSE2-NEXT: pcmpeqd %xmm9, %xmm1 637; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 638; SSE2-NEXT: pand %xmm6, %xmm1 639; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 640; SSE2-NEXT: por %xmm1, %xmm4 641; SSE2-NEXT: pand %xmm4, %xmm7 642; SSE2-NEXT: pandn %xmm8, %xmm4 643; SSE2-NEXT: por %xmm7, %xmm4 644; SSE2-NEXT: movdqa %xmm5, %xmm1 645; SSE2-NEXT: pxor %xmm0, %xmm1 646; SSE2-NEXT: movdqa %xmm1, %xmm6 647; SSE2-NEXT: pcmpgtd %xmm9, %xmm6 648; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] 649; SSE2-NEXT: pcmpeqd %xmm9, %xmm1 650; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3] 651; SSE2-NEXT: pand %xmm10, %xmm7 652; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3] 653; SSE2-NEXT: por %xmm7, %xmm1 654; SSE2-NEXT: pand %xmm1, %xmm5 655; SSE2-NEXT: pandn %xmm8, %xmm1 656; SSE2-NEXT: por %xmm5, %xmm1 657; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm4[0,2] 658; SSE2-NEXT: movdqa %xmm3, %xmm4 659; SSE2-NEXT: pxor %xmm0, %xmm4 660; SSE2-NEXT: movdqa %xmm4, %xmm5 661; SSE2-NEXT: pcmpgtd %xmm9, %xmm5 662; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 663; SSE2-NEXT: pcmpeqd %xmm9, %xmm4 664; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 665; SSE2-NEXT: pand %xmm6, %xmm4 666; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 667; SSE2-NEXT: por %xmm4, %xmm5 668; SSE2-NEXT: pand %xmm5, %xmm3 669; SSE2-NEXT: pandn %xmm8, %xmm5 670; SSE2-NEXT: por %xmm3, %xmm5 671; SSE2-NEXT: pxor %xmm2, %xmm0 672; SSE2-NEXT: movdqa %xmm0, %xmm3 673; SSE2-NEXT: pcmpgtd %xmm9, %xmm3 674; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 675; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 676; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] 677; SSE2-NEXT: pand %xmm4, %xmm6 678; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 679; SSE2-NEXT: por %xmm6, %xmm0 680; SSE2-NEXT: pand %xmm0, %xmm2 681; SSE2-NEXT: pandn %xmm8, %xmm0 682; SSE2-NEXT: por %xmm2, %xmm0 683; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2] 684; SSE2-NEXT: retq 685; 686; SSSE3-LABEL: trunc_ssat_v8i64_v8i32: 687; SSSE3: # %bb.0: 688; SSSE3-NEXT: movdqa (%rdi), %xmm3 689; SSSE3-NEXT: movdqa 16(%rdi), %xmm5 690; SSSE3-NEXT: movdqa 32(%rdi), %xmm7 691; SSSE3-NEXT: movdqa 48(%rdi), %xmm9 692; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [2147483647,2147483647] 693; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] 694; SSSE3-NEXT: movdqa %xmm3, %xmm2 695; SSSE3-NEXT: pxor %xmm0, %xmm2 696; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [4294967295,4294967295] 697; SSSE3-NEXT: movdqa %xmm10, %xmm6 698; SSSE3-NEXT: pcmpgtd %xmm2, %xmm6 699; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2] 700; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2 701; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 702; SSSE3-NEXT: pand %xmm1, %xmm4 703; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 704; SSSE3-NEXT: por %xmm4, %xmm2 705; SSSE3-NEXT: pand %xmm2, %xmm3 706; SSSE3-NEXT: pandn %xmm8, %xmm2 707; SSSE3-NEXT: por %xmm3, %xmm2 708; SSSE3-NEXT: movdqa %xmm5, %xmm1 709; SSSE3-NEXT: pxor %xmm0, %xmm1 710; SSSE3-NEXT: movdqa %xmm10, %xmm3 711; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 712; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 713; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1 714; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 715; SSSE3-NEXT: pand %xmm4, %xmm1 716; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 717; SSSE3-NEXT: por %xmm1, %xmm3 718; SSSE3-NEXT: pand %xmm3, %xmm5 719; SSSE3-NEXT: pandn %xmm8, %xmm3 720; SSSE3-NEXT: por %xmm5, %xmm3 721; SSSE3-NEXT: movdqa %xmm7, %xmm1 722; SSSE3-NEXT: pxor %xmm0, %xmm1 723; SSSE3-NEXT: movdqa %xmm10, %xmm4 724; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 725; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 726; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1 727; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 728; SSSE3-NEXT: pand %xmm5, %xmm1 729; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 730; SSSE3-NEXT: por %xmm1, %xmm5 731; SSSE3-NEXT: pand %xmm5, %xmm7 732; SSSE3-NEXT: pandn %xmm8, %xmm5 733; SSSE3-NEXT: por %xmm7, %xmm5 734; SSSE3-NEXT: movdqa %xmm9, %xmm1 735; SSSE3-NEXT: pxor %xmm0, %xmm1 736; SSSE3-NEXT: movdqa %xmm10, %xmm4 737; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 738; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 739; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1 740; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 741; SSSE3-NEXT: pand %xmm6, %xmm1 742; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3] 743; SSSE3-NEXT: por %xmm1, %xmm7 744; SSSE3-NEXT: pand %xmm7, %xmm9 745; SSSE3-NEXT: pandn %xmm8, %xmm7 746; SSSE3-NEXT: por %xmm9, %xmm7 747; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [18446744071562067968,18446744071562067968] 748; SSSE3-NEXT: movdqa %xmm7, %xmm1 749; SSSE3-NEXT: pxor %xmm0, %xmm1 750; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [18446744069414584320,18446744069414584320] 751; SSSE3-NEXT: movdqa %xmm1, %xmm4 752; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4 753; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 754; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1 755; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 756; SSSE3-NEXT: pand %xmm6, %xmm1 757; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 758; SSSE3-NEXT: por %xmm1, %xmm4 759; SSSE3-NEXT: pand %xmm4, %xmm7 760; SSSE3-NEXT: pandn %xmm8, %xmm4 761; SSSE3-NEXT: por %xmm7, %xmm4 762; SSSE3-NEXT: movdqa %xmm5, %xmm1 763; SSSE3-NEXT: pxor %xmm0, %xmm1 764; SSSE3-NEXT: movdqa %xmm1, %xmm6 765; SSSE3-NEXT: pcmpgtd %xmm9, %xmm6 766; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] 767; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1 768; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3] 769; SSSE3-NEXT: pand %xmm10, %xmm7 770; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3] 771; SSSE3-NEXT: por %xmm7, %xmm1 772; SSSE3-NEXT: pand %xmm1, %xmm5 773; SSSE3-NEXT: pandn %xmm8, %xmm1 774; SSSE3-NEXT: por %xmm5, %xmm1 775; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm4[0,2] 776; SSSE3-NEXT: movdqa %xmm3, %xmm4 777; SSSE3-NEXT: pxor %xmm0, %xmm4 778; SSSE3-NEXT: movdqa %xmm4, %xmm5 779; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5 780; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 781; SSSE3-NEXT: pcmpeqd %xmm9, %xmm4 782; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 783; SSSE3-NEXT: pand %xmm6, %xmm4 784; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 785; SSSE3-NEXT: por %xmm4, %xmm5 786; SSSE3-NEXT: pand %xmm5, %xmm3 787; SSSE3-NEXT: pandn %xmm8, %xmm5 788; SSSE3-NEXT: por %xmm3, %xmm5 789; SSSE3-NEXT: pxor %xmm2, %xmm0 790; SSSE3-NEXT: movdqa %xmm0, %xmm3 791; SSSE3-NEXT: pcmpgtd %xmm9, %xmm3 792; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 793; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 794; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] 795; SSSE3-NEXT: pand %xmm4, %xmm6 796; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 797; SSSE3-NEXT: por %xmm6, %xmm0 798; SSSE3-NEXT: pand %xmm0, %xmm2 799; SSSE3-NEXT: pandn %xmm8, %xmm0 800; SSSE3-NEXT: por %xmm2, %xmm0 801; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2] 802; SSSE3-NEXT: retq 803; 804; SSE41-LABEL: trunc_ssat_v8i64_v8i32: 805; SSE41: # %bb.0: 806; SSE41-NEXT: movdqa (%rdi), %xmm5 807; SSE41-NEXT: movdqa 16(%rdi), %xmm4 808; SSE41-NEXT: movdqa 32(%rdi), %xmm10 809; SSE41-NEXT: movdqa 48(%rdi), %xmm9 810; SSE41-NEXT: movapd {{.*#+}} xmm1 = [2147483647,2147483647] 811; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 812; SSE41-NEXT: movdqa %xmm5, %xmm0 813; SSE41-NEXT: pxor %xmm3, %xmm0 814; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295] 815; SSE41-NEXT: movdqa %xmm2, %xmm7 816; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 817; SSE41-NEXT: movdqa %xmm2, %xmm6 818; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 819; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 820; SSE41-NEXT: pand %xmm7, %xmm0 821; SSE41-NEXT: por %xmm6, %xmm0 822; SSE41-NEXT: movapd %xmm1, %xmm8 823; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm8 824; SSE41-NEXT: movdqa %xmm4, %xmm0 825; SSE41-NEXT: pxor %xmm3, %xmm0 826; SSE41-NEXT: movdqa %xmm2, %xmm5 827; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 828; SSE41-NEXT: movdqa %xmm2, %xmm6 829; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 830; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 831; SSE41-NEXT: pand %xmm5, %xmm0 832; SSE41-NEXT: por %xmm6, %xmm0 833; SSE41-NEXT: movapd %xmm1, %xmm11 834; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm11 835; SSE41-NEXT: movdqa %xmm10, %xmm0 836; SSE41-NEXT: pxor %xmm3, %xmm0 837; SSE41-NEXT: movdqa %xmm2, %xmm4 838; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 839; SSE41-NEXT: movdqa %xmm2, %xmm6 840; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 841; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 842; SSE41-NEXT: pand %xmm4, %xmm0 843; SSE41-NEXT: por %xmm6, %xmm0 844; SSE41-NEXT: movapd %xmm1, %xmm4 845; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm4 846; SSE41-NEXT: movdqa %xmm9, %xmm0 847; SSE41-NEXT: pxor %xmm3, %xmm0 848; SSE41-NEXT: movdqa %xmm2, %xmm6 849; SSE41-NEXT: pcmpeqd %xmm0, %xmm6 850; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 851; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 852; SSE41-NEXT: pand %xmm6, %xmm0 853; SSE41-NEXT: por %xmm2, %xmm0 854; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm1 855; SSE41-NEXT: movapd {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968] 856; SSE41-NEXT: movapd %xmm1, %xmm7 857; SSE41-NEXT: xorpd %xmm3, %xmm7 858; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744069414584320,18446744069414584320] 859; SSE41-NEXT: movapd %xmm7, %xmm5 860; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 861; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 862; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 863; SSE41-NEXT: pand %xmm5, %xmm0 864; SSE41-NEXT: por %xmm7, %xmm0 865; SSE41-NEXT: movapd %xmm2, %xmm5 866; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 867; SSE41-NEXT: movapd %xmm4, %xmm1 868; SSE41-NEXT: xorpd %xmm3, %xmm1 869; SSE41-NEXT: movapd %xmm1, %xmm7 870; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 871; SSE41-NEXT: pcmpgtd %xmm6, %xmm1 872; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 873; SSE41-NEXT: pand %xmm7, %xmm0 874; SSE41-NEXT: por %xmm1, %xmm0 875; SSE41-NEXT: movapd %xmm2, %xmm1 876; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 877; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm5[0,2] 878; SSE41-NEXT: movapd %xmm11, %xmm4 879; SSE41-NEXT: xorpd %xmm3, %xmm4 880; SSE41-NEXT: movapd %xmm4, %xmm5 881; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 882; SSE41-NEXT: pcmpgtd %xmm6, %xmm4 883; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 884; SSE41-NEXT: pand %xmm5, %xmm0 885; SSE41-NEXT: por %xmm4, %xmm0 886; SSE41-NEXT: movapd %xmm2, %xmm4 887; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm4 888; SSE41-NEXT: xorpd %xmm8, %xmm3 889; SSE41-NEXT: movapd %xmm3, %xmm5 890; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 891; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 892; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 893; SSE41-NEXT: pand %xmm5, %xmm0 894; SSE41-NEXT: por %xmm3, %xmm0 895; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2 896; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm4[0,2] 897; SSE41-NEXT: movaps %xmm2, %xmm0 898; SSE41-NEXT: retq 899; 900; AVX1-LABEL: trunc_ssat_v8i64_v8i32: 901; AVX1: # %bb.0: 902; AVX1-NEXT: vmovdqa (%rdi), %xmm0 903; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 904; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 905; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 906; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [2147483647,2147483647] 907; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm8 908; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm6 909; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm7 910; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm5 911; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 912; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [18446744071562067968,18446744071562067968] 913; AVX1-NEXT: vpcmpgtq %xmm5, %xmm0, %xmm9 914; AVX1-NEXT: vblendvpd %xmm7, %xmm2, %xmm4, %xmm2 915; AVX1-NEXT: vpcmpgtq %xmm5, %xmm2, %xmm7 916; AVX1-NEXT: vblendvpd %xmm6, %xmm1, %xmm4, %xmm1 917; AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm6 918; AVX1-NEXT: vblendvpd %xmm8, %xmm3, %xmm4, %xmm3 919; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm4 920; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm5, %xmm3 921; AVX1-NEXT: vblendvpd %xmm6, %xmm1, %xmm5, %xmm1 922; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 923; AVX1-NEXT: vblendvpd %xmm7, %xmm2, %xmm5, %xmm2 924; AVX1-NEXT: vblendvpd %xmm9, %xmm0, %xmm5, %xmm0 925; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 926; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 927; AVX1-NEXT: retq 928; 929; AVX2-SLOW-LABEL: trunc_ssat_v8i64_v8i32: 930; AVX2-SLOW: # %bb.0: 931; AVX2-SLOW-NEXT: vmovdqa (%rdi), %ymm0 932; AVX2-SLOW-NEXT: vmovdqa 32(%rdi), %ymm1 933; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647] 934; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 935; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 936; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 937; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 938; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968] 939; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 940; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 941; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 942; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 943; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 944; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 945; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 946; AVX2-SLOW-NEXT: retq 947; 948; AVX2-FAST-LABEL: trunc_ssat_v8i64_v8i32: 949; AVX2-FAST: # %bb.0: 950; AVX2-FAST-NEXT: vmovdqa (%rdi), %ymm0 951; AVX2-FAST-NEXT: vmovdqa 32(%rdi), %ymm1 952; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647] 953; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 954; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 955; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 956; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 957; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968] 958; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 959; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 960; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 961; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 962; AVX2-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] 963; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0 964; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1 965; AVX2-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 966; AVX2-FAST-NEXT: retq 967; 968; AVX512-LABEL: trunc_ssat_v8i64_v8i32: 969; AVX512: # %bb.0: 970; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 971; AVX512-NEXT: vpmovsqd %zmm0, %ymm0 972; AVX512-NEXT: retq 973; 974; SKX-LABEL: trunc_ssat_v8i64_v8i32: 975; SKX: # %bb.0: 976; SKX-NEXT: vmovdqa (%rdi), %ymm0 977; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 978; SKX-NEXT: vpmovsqd %ymm0, %xmm0 979; SKX-NEXT: vpmovsqd %ymm1, %xmm1 980; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 981; SKX-NEXT: retq 982 %a0 = load <8 x i64>, <8 x i64>* %p0 983 %1 = icmp slt <8 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> 984 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> 985 %3 = icmp sgt <8 x i64> %2, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648> 986 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648> 987 %5 = trunc <8 x i64> %4 to <8 x i32> 988 ret <8 x i32> %5 989} 990 991; 992; Signed saturation truncation to vXi16 993; 994 995define <2 x i16> @trunc_ssat_v2i64_v2i16(<2 x i64> %a0) { 996; SSE2-LABEL: trunc_ssat_v2i64_v2i16: 997; SSE2: # %bb.0: 998; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 999; SSE2-NEXT: movdqa %xmm0, %xmm2 1000; SSE2-NEXT: pxor %xmm1, %xmm2 1001; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147516415,2147516415] 1002; SSE2-NEXT: movdqa %xmm3, %xmm4 1003; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 1004; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 1005; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 1006; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1007; SSE2-NEXT: pand %xmm5, %xmm2 1008; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 1009; SSE2-NEXT: por %xmm2, %xmm3 1010; SSE2-NEXT: pand %xmm3, %xmm0 1011; SSE2-NEXT: pandn {{.*}}(%rip), %xmm3 1012; SSE2-NEXT: por %xmm0, %xmm3 1013; SSE2-NEXT: pxor %xmm3, %xmm1 1014; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200] 1015; SSE2-NEXT: movdqa %xmm1, %xmm2 1016; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1017; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 1018; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1019; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1020; SSE2-NEXT: pand %xmm4, %xmm0 1021; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1022; SSE2-NEXT: por %xmm0, %xmm1 1023; SSE2-NEXT: pand %xmm1, %xmm3 1024; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1 1025; SSE2-NEXT: por %xmm3, %xmm1 1026; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1027; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1028; SSE2-NEXT: retq 1029; 1030; SSSE3-LABEL: trunc_ssat_v2i64_v2i16: 1031; SSSE3: # %bb.0: 1032; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 1033; SSSE3-NEXT: movdqa %xmm0, %xmm2 1034; SSSE3-NEXT: pxor %xmm1, %xmm2 1035; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147516415,2147516415] 1036; SSSE3-NEXT: movdqa %xmm3, %xmm4 1037; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 1038; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 1039; SSSE3-NEXT: pcmpeqd %xmm3, %xmm2 1040; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1041; SSSE3-NEXT: pand %xmm5, %xmm2 1042; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 1043; SSSE3-NEXT: por %xmm2, %xmm3 1044; SSSE3-NEXT: pand %xmm3, %xmm0 1045; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm3 1046; SSSE3-NEXT: por %xmm0, %xmm3 1047; SSSE3-NEXT: pxor %xmm3, %xmm1 1048; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200] 1049; SSSE3-NEXT: movdqa %xmm1, %xmm2 1050; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 1051; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 1052; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 1053; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1054; SSSE3-NEXT: pand %xmm4, %xmm0 1055; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1056; SSSE3-NEXT: por %xmm0, %xmm1 1057; SSSE3-NEXT: pand %xmm1, %xmm3 1058; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm1 1059; SSSE3-NEXT: por %xmm3, %xmm1 1060; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1061; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1062; SSSE3-NEXT: retq 1063; 1064; SSE41-LABEL: trunc_ssat_v2i64_v2i16: 1065; SSE41: # %bb.0: 1066; SSE41-NEXT: movdqa %xmm0, %xmm1 1067; SSE41-NEXT: movapd {{.*#+}} xmm2 = [32767,32767] 1068; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 1069; SSE41-NEXT: pxor %xmm3, %xmm0 1070; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147516415,2147516415] 1071; SSE41-NEXT: movdqa %xmm4, %xmm5 1072; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 1073; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 1074; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 1075; SSE41-NEXT: pand %xmm5, %xmm0 1076; SSE41-NEXT: por %xmm4, %xmm0 1077; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 1078; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1079; SSE41-NEXT: pxor %xmm2, %xmm3 1080; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200] 1081; SSE41-NEXT: movdqa %xmm3, %xmm4 1082; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 1083; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 1084; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1085; SSE41-NEXT: pand %xmm4, %xmm0 1086; SSE41-NEXT: por %xmm3, %xmm0 1087; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 1088; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1089; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1090; SSE41-NEXT: retq 1091; 1092; AVX1-LABEL: trunc_ssat_v2i64_v2i16: 1093; AVX1: # %bb.0: 1094; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] 1095; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1096; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1097; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1098; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1099; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1100; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 1101; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1102; AVX1-NEXT: retq 1103; 1104; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16: 1105; AVX2-SLOW: # %bb.0: 1106; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] 1107; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1108; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1109; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1110; AVX2-SLOW-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1111; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1112; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 1113; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1114; AVX2-SLOW-NEXT: retq 1115; 1116; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16: 1117; AVX2-FAST: # %bb.0: 1118; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] 1119; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1120; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1121; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1122; AVX2-FAST-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1123; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1124; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] 1125; AVX2-FAST-NEXT: retq 1126; 1127; AVX512F-LABEL: trunc_ssat_v2i64_v2i16: 1128; AVX512F: # %bb.0: 1129; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1130; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 1131; AVX512F-NEXT: vzeroupper 1132; AVX512F-NEXT: retq 1133; 1134; AVX512VL-LABEL: trunc_ssat_v2i64_v2i16: 1135; AVX512VL: # %bb.0: 1136; AVX512VL-NEXT: vpmovsqw %xmm0, %xmm0 1137; AVX512VL-NEXT: retq 1138; 1139; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16: 1140; AVX512BW: # %bb.0: 1141; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1142; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 1143; AVX512BW-NEXT: vzeroupper 1144; AVX512BW-NEXT: retq 1145; 1146; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i16: 1147; AVX512BWVL: # %bb.0: 1148; AVX512BWVL-NEXT: vpmovsqw %xmm0, %xmm0 1149; AVX512BWVL-NEXT: retq 1150; 1151; SKX-LABEL: trunc_ssat_v2i64_v2i16: 1152; SKX: # %bb.0: 1153; SKX-NEXT: vpmovsqw %xmm0, %xmm0 1154; SKX-NEXT: retq 1155 %1 = icmp slt <2 x i64> %a0, <i64 32767, i64 32767> 1156 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 32767, i64 32767> 1157 %3 = icmp sgt <2 x i64> %2, <i64 -32768, i64 -32768> 1158 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -32768, i64 -32768> 1159 %5 = trunc <2 x i64> %4 to <2 x i16> 1160 ret <2 x i16> %5 1161} 1162 1163define void @trunc_ssat_v2i64_v2i16_store(<2 x i64> %a0, <2 x i16> *%p1) { 1164; SSE2-LABEL: trunc_ssat_v2i64_v2i16_store: 1165; SSE2: # %bb.0: 1166; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 1167; SSE2-NEXT: movdqa %xmm0, %xmm2 1168; SSE2-NEXT: pxor %xmm1, %xmm2 1169; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147516415,2147516415] 1170; SSE2-NEXT: movdqa %xmm3, %xmm4 1171; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 1172; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 1173; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 1174; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1175; SSE2-NEXT: pand %xmm5, %xmm2 1176; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 1177; SSE2-NEXT: por %xmm2, %xmm3 1178; SSE2-NEXT: pand %xmm3, %xmm0 1179; SSE2-NEXT: pandn {{.*}}(%rip), %xmm3 1180; SSE2-NEXT: por %xmm0, %xmm3 1181; SSE2-NEXT: pxor %xmm3, %xmm1 1182; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200] 1183; SSE2-NEXT: movdqa %xmm1, %xmm2 1184; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1185; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 1186; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1187; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1188; SSE2-NEXT: pand %xmm4, %xmm0 1189; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1190; SSE2-NEXT: por %xmm0, %xmm1 1191; SSE2-NEXT: pand %xmm1, %xmm3 1192; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1 1193; SSE2-NEXT: por %xmm3, %xmm1 1194; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1195; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1196; SSE2-NEXT: movd %xmm0, (%rdi) 1197; SSE2-NEXT: retq 1198; 1199; SSSE3-LABEL: trunc_ssat_v2i64_v2i16_store: 1200; SSSE3: # %bb.0: 1201; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 1202; SSSE3-NEXT: movdqa %xmm0, %xmm2 1203; SSSE3-NEXT: pxor %xmm1, %xmm2 1204; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147516415,2147516415] 1205; SSSE3-NEXT: movdqa %xmm3, %xmm4 1206; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 1207; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 1208; SSSE3-NEXT: pcmpeqd %xmm3, %xmm2 1209; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1210; SSSE3-NEXT: pand %xmm5, %xmm2 1211; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 1212; SSSE3-NEXT: por %xmm2, %xmm3 1213; SSSE3-NEXT: pand %xmm3, %xmm0 1214; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm3 1215; SSSE3-NEXT: por %xmm0, %xmm3 1216; SSSE3-NEXT: pxor %xmm3, %xmm1 1217; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200] 1218; SSSE3-NEXT: movdqa %xmm1, %xmm2 1219; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 1220; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 1221; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 1222; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1223; SSSE3-NEXT: pand %xmm4, %xmm0 1224; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1225; SSSE3-NEXT: por %xmm0, %xmm1 1226; SSSE3-NEXT: pand %xmm1, %xmm3 1227; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm1 1228; SSSE3-NEXT: por %xmm3, %xmm1 1229; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1230; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1231; SSSE3-NEXT: movd %xmm0, (%rdi) 1232; SSSE3-NEXT: retq 1233; 1234; SSE41-LABEL: trunc_ssat_v2i64_v2i16_store: 1235; SSE41: # %bb.0: 1236; SSE41-NEXT: movdqa %xmm0, %xmm1 1237; SSE41-NEXT: movapd {{.*#+}} xmm2 = [32767,32767] 1238; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 1239; SSE41-NEXT: pxor %xmm3, %xmm0 1240; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147516415,2147516415] 1241; SSE41-NEXT: movdqa %xmm4, %xmm5 1242; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 1243; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 1244; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 1245; SSE41-NEXT: pand %xmm5, %xmm0 1246; SSE41-NEXT: por %xmm4, %xmm0 1247; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 1248; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1249; SSE41-NEXT: pxor %xmm2, %xmm3 1250; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200] 1251; SSE41-NEXT: movdqa %xmm3, %xmm4 1252; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 1253; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 1254; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1255; SSE41-NEXT: pand %xmm4, %xmm0 1256; SSE41-NEXT: por %xmm3, %xmm0 1257; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 1258; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1259; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1260; SSE41-NEXT: movd %xmm0, (%rdi) 1261; SSE41-NEXT: retq 1262; 1263; AVX1-LABEL: trunc_ssat_v2i64_v2i16_store: 1264; AVX1: # %bb.0: 1265; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] 1266; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1267; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1268; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1269; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1270; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1271; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 1272; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1273; AVX1-NEXT: vmovd %xmm0, (%rdi) 1274; AVX1-NEXT: retq 1275; 1276; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16_store: 1277; AVX2-SLOW: # %bb.0: 1278; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] 1279; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1280; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1281; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1282; AVX2-SLOW-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1283; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1284; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 1285; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1286; AVX2-SLOW-NEXT: vmovd %xmm0, (%rdi) 1287; AVX2-SLOW-NEXT: retq 1288; 1289; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16_store: 1290; AVX2-FAST: # %bb.0: 1291; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] 1292; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1293; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1294; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1295; AVX2-FAST-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1296; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1297; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u] 1298; AVX2-FAST-NEXT: vmovd %xmm0, (%rdi) 1299; AVX2-FAST-NEXT: retq 1300; 1301; AVX512F-LABEL: trunc_ssat_v2i64_v2i16_store: 1302; AVX512F: # %bb.0: 1303; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1304; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 1305; AVX512F-NEXT: vmovd %xmm0, (%rdi) 1306; AVX512F-NEXT: vzeroupper 1307; AVX512F-NEXT: retq 1308; 1309; AVX512VL-LABEL: trunc_ssat_v2i64_v2i16_store: 1310; AVX512VL: # %bb.0: 1311; AVX512VL-NEXT: vpmovsqw %xmm0, (%rdi) 1312; AVX512VL-NEXT: retq 1313; 1314; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16_store: 1315; AVX512BW: # %bb.0: 1316; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1317; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 1318; AVX512BW-NEXT: vmovd %xmm0, (%rdi) 1319; AVX512BW-NEXT: vzeroupper 1320; AVX512BW-NEXT: retq 1321; 1322; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i16_store: 1323; AVX512BWVL: # %bb.0: 1324; AVX512BWVL-NEXT: vpmovsqw %xmm0, (%rdi) 1325; AVX512BWVL-NEXT: retq 1326; 1327; SKX-LABEL: trunc_ssat_v2i64_v2i16_store: 1328; SKX: # %bb.0: 1329; SKX-NEXT: vpmovsqw %xmm0, (%rdi) 1330; SKX-NEXT: retq 1331 %1 = icmp slt <2 x i64> %a0, <i64 32767, i64 32767> 1332 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 32767, i64 32767> 1333 %3 = icmp sgt <2 x i64> %2, <i64 -32768, i64 -32768> 1334 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -32768, i64 -32768> 1335 %5 = trunc <2 x i64> %4 to <2 x i16> 1336 store <2 x i16> %5, <2 x i16> *%p1 1337 ret void 1338} 1339 1340define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) { 1341; SSE2-LABEL: trunc_ssat_v4i64_v4i16: 1342; SSE2: # %bb.0: 1343; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] 1344; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 1345; SSE2-NEXT: movdqa %xmm0, %xmm3 1346; SSE2-NEXT: pxor %xmm2, %xmm3 1347; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147516415,2147516415] 1348; SSE2-NEXT: movdqa %xmm5, %xmm6 1349; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 1350; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1351; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 1352; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1353; SSE2-NEXT: pand %xmm7, %xmm4 1354; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 1355; SSE2-NEXT: por %xmm4, %xmm3 1356; SSE2-NEXT: pand %xmm3, %xmm0 1357; SSE2-NEXT: pandn %xmm8, %xmm3 1358; SSE2-NEXT: por %xmm0, %xmm3 1359; SSE2-NEXT: movdqa %xmm1, %xmm0 1360; SSE2-NEXT: pxor %xmm2, %xmm0 1361; SSE2-NEXT: movdqa %xmm5, %xmm4 1362; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1363; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 1364; SSE2-NEXT: pcmpeqd %xmm5, %xmm0 1365; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1366; SSE2-NEXT: pand %xmm6, %xmm0 1367; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1368; SSE2-NEXT: por %xmm0, %xmm4 1369; SSE2-NEXT: pand %xmm4, %xmm1 1370; SSE2-NEXT: pandn %xmm8, %xmm4 1371; SSE2-NEXT: por %xmm1, %xmm4 1372; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1373; SSE2-NEXT: movdqa %xmm4, %xmm0 1374; SSE2-NEXT: pxor %xmm2, %xmm0 1375; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200] 1376; SSE2-NEXT: movdqa %xmm0, %xmm6 1377; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 1378; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1379; SSE2-NEXT: pcmpeqd %xmm5, %xmm0 1380; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1381; SSE2-NEXT: pand %xmm7, %xmm0 1382; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1383; SSE2-NEXT: por %xmm0, %xmm6 1384; SSE2-NEXT: pand %xmm6, %xmm4 1385; SSE2-NEXT: pandn %xmm1, %xmm6 1386; SSE2-NEXT: por %xmm4, %xmm6 1387; SSE2-NEXT: pxor %xmm3, %xmm2 1388; SSE2-NEXT: movdqa %xmm2, %xmm0 1389; SSE2-NEXT: pcmpgtd %xmm5, %xmm0 1390; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] 1391; SSE2-NEXT: pcmpeqd %xmm5, %xmm2 1392; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1393; SSE2-NEXT: pand %xmm4, %xmm2 1394; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1395; SSE2-NEXT: por %xmm2, %xmm0 1396; SSE2-NEXT: pand %xmm0, %xmm3 1397; SSE2-NEXT: pandn %xmm1, %xmm0 1398; SSE2-NEXT: por %xmm3, %xmm0 1399; SSE2-NEXT: packssdw %xmm6, %xmm0 1400; SSE2-NEXT: packssdw %xmm0, %xmm0 1401; SSE2-NEXT: retq 1402; 1403; SSSE3-LABEL: trunc_ssat_v4i64_v4i16: 1404; SSSE3: # %bb.0: 1405; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] 1406; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 1407; SSSE3-NEXT: movdqa %xmm0, %xmm3 1408; SSSE3-NEXT: pxor %xmm2, %xmm3 1409; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147516415,2147516415] 1410; SSSE3-NEXT: movdqa %xmm5, %xmm6 1411; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 1412; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1413; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 1414; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1415; SSSE3-NEXT: pand %xmm7, %xmm4 1416; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 1417; SSSE3-NEXT: por %xmm4, %xmm3 1418; SSSE3-NEXT: pand %xmm3, %xmm0 1419; SSSE3-NEXT: pandn %xmm8, %xmm3 1420; SSSE3-NEXT: por %xmm0, %xmm3 1421; SSSE3-NEXT: movdqa %xmm1, %xmm0 1422; SSSE3-NEXT: pxor %xmm2, %xmm0 1423; SSSE3-NEXT: movdqa %xmm5, %xmm4 1424; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 1425; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 1426; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0 1427; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1428; SSSE3-NEXT: pand %xmm6, %xmm0 1429; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1430; SSSE3-NEXT: por %xmm0, %xmm4 1431; SSSE3-NEXT: pand %xmm4, %xmm1 1432; SSSE3-NEXT: pandn %xmm8, %xmm4 1433; SSSE3-NEXT: por %xmm1, %xmm4 1434; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1435; SSSE3-NEXT: movdqa %xmm4, %xmm0 1436; SSSE3-NEXT: pxor %xmm2, %xmm0 1437; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200] 1438; SSSE3-NEXT: movdqa %xmm0, %xmm6 1439; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 1440; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1441; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0 1442; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1443; SSSE3-NEXT: pand %xmm7, %xmm0 1444; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1445; SSSE3-NEXT: por %xmm0, %xmm6 1446; SSSE3-NEXT: pand %xmm6, %xmm4 1447; SSSE3-NEXT: pandn %xmm1, %xmm6 1448; SSSE3-NEXT: por %xmm4, %xmm6 1449; SSSE3-NEXT: pxor %xmm3, %xmm2 1450; SSSE3-NEXT: movdqa %xmm2, %xmm0 1451; SSSE3-NEXT: pcmpgtd %xmm5, %xmm0 1452; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] 1453; SSSE3-NEXT: pcmpeqd %xmm5, %xmm2 1454; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1455; SSSE3-NEXT: pand %xmm4, %xmm2 1456; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1457; SSSE3-NEXT: por %xmm2, %xmm0 1458; SSSE3-NEXT: pand %xmm0, %xmm3 1459; SSSE3-NEXT: pandn %xmm1, %xmm0 1460; SSSE3-NEXT: por %xmm3, %xmm0 1461; SSSE3-NEXT: packssdw %xmm6, %xmm0 1462; SSSE3-NEXT: packssdw %xmm0, %xmm0 1463; SSSE3-NEXT: retq 1464; 1465; SSE41-LABEL: trunc_ssat_v4i64_v4i16: 1466; SSE41: # %bb.0: 1467; SSE41-NEXT: movdqa %xmm0, %xmm2 1468; SSE41-NEXT: movapd {{.*#+}} xmm4 = [32767,32767] 1469; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 1470; SSE41-NEXT: pxor %xmm3, %xmm0 1471; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147516415,2147516415] 1472; SSE41-NEXT: movdqa %xmm6, %xmm5 1473; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 1474; SSE41-NEXT: movdqa %xmm6, %xmm7 1475; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 1476; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 1477; SSE41-NEXT: pand %xmm5, %xmm0 1478; SSE41-NEXT: por %xmm7, %xmm0 1479; SSE41-NEXT: movapd %xmm4, %xmm5 1480; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 1481; SSE41-NEXT: movdqa %xmm1, %xmm0 1482; SSE41-NEXT: pxor %xmm3, %xmm0 1483; SSE41-NEXT: movdqa %xmm6, %xmm2 1484; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 1485; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 1486; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1487; SSE41-NEXT: pand %xmm2, %xmm0 1488; SSE41-NEXT: por %xmm6, %xmm0 1489; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 1490; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1491; SSE41-NEXT: movapd %xmm4, %xmm2 1492; SSE41-NEXT: xorpd %xmm3, %xmm2 1493; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200] 1494; SSE41-NEXT: movapd %xmm2, %xmm7 1495; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 1496; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 1497; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 1498; SSE41-NEXT: pand %xmm7, %xmm0 1499; SSE41-NEXT: por %xmm2, %xmm0 1500; SSE41-NEXT: movapd %xmm1, %xmm2 1501; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1502; SSE41-NEXT: xorpd %xmm5, %xmm3 1503; SSE41-NEXT: movapd %xmm3, %xmm4 1504; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 1505; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 1506; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1507; SSE41-NEXT: pand %xmm4, %xmm0 1508; SSE41-NEXT: por %xmm3, %xmm0 1509; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 1510; SSE41-NEXT: packssdw %xmm2, %xmm1 1511; SSE41-NEXT: packssdw %xmm1, %xmm1 1512; SSE41-NEXT: movdqa %xmm1, %xmm0 1513; SSE41-NEXT: retq 1514; 1515; AVX1-LABEL: trunc_ssat_v4i64_v4i16: 1516; AVX1: # %bb.0: 1517; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1518; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,32767] 1519; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 1520; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 1521; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 1522; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848] 1523; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 1524; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 1525; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 1526; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm4, %xmm1 1527; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 1528; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1529; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1530; AVX1-NEXT: vzeroupper 1531; AVX1-NEXT: retq 1532; 1533; AVX2-LABEL: trunc_ssat_v4i64_v4i16: 1534; AVX2: # %bb.0: 1535; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767] 1536; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1537; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1538; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] 1539; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1540; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1541; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1542; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1543; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1544; AVX2-NEXT: vzeroupper 1545; AVX2-NEXT: retq 1546; 1547; AVX512F-LABEL: trunc_ssat_v4i64_v4i16: 1548; AVX512F: # %bb.0: 1549; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1550; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 1551; AVX512F-NEXT: vzeroupper 1552; AVX512F-NEXT: retq 1553; 1554; AVX512VL-LABEL: trunc_ssat_v4i64_v4i16: 1555; AVX512VL: # %bb.0: 1556; AVX512VL-NEXT: vpmovsqw %ymm0, %xmm0 1557; AVX512VL-NEXT: vzeroupper 1558; AVX512VL-NEXT: retq 1559; 1560; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16: 1561; AVX512BW: # %bb.0: 1562; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1563; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 1564; AVX512BW-NEXT: vzeroupper 1565; AVX512BW-NEXT: retq 1566; 1567; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i16: 1568; AVX512BWVL: # %bb.0: 1569; AVX512BWVL-NEXT: vpmovsqw %ymm0, %xmm0 1570; AVX512BWVL-NEXT: vzeroupper 1571; AVX512BWVL-NEXT: retq 1572; 1573; SKX-LABEL: trunc_ssat_v4i64_v4i16: 1574; SKX: # %bb.0: 1575; SKX-NEXT: vpmovsqw %ymm0, %xmm0 1576; SKX-NEXT: vzeroupper 1577; SKX-NEXT: retq 1578 %1 = icmp slt <4 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767> 1579 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767> 1580 %3 = icmp sgt <4 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768> 1581 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768> 1582 %5 = trunc <4 x i64> %4 to <4 x i16> 1583 ret <4 x i16> %5 1584} 1585 1586define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { 1587; SSE2-LABEL: trunc_ssat_v4i64_v4i16_store: 1588; SSE2: # %bb.0: 1589; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] 1590; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 1591; SSE2-NEXT: movdqa %xmm0, %xmm3 1592; SSE2-NEXT: pxor %xmm2, %xmm3 1593; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147516415,2147516415] 1594; SSE2-NEXT: movdqa %xmm5, %xmm6 1595; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 1596; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1597; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 1598; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1599; SSE2-NEXT: pand %xmm7, %xmm4 1600; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 1601; SSE2-NEXT: por %xmm4, %xmm3 1602; SSE2-NEXT: pand %xmm3, %xmm0 1603; SSE2-NEXT: pandn %xmm8, %xmm3 1604; SSE2-NEXT: por %xmm0, %xmm3 1605; SSE2-NEXT: movdqa %xmm1, %xmm0 1606; SSE2-NEXT: pxor %xmm2, %xmm0 1607; SSE2-NEXT: movdqa %xmm5, %xmm4 1608; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1609; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 1610; SSE2-NEXT: pcmpeqd %xmm5, %xmm0 1611; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1612; SSE2-NEXT: pand %xmm6, %xmm0 1613; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1614; SSE2-NEXT: por %xmm0, %xmm4 1615; SSE2-NEXT: pand %xmm4, %xmm1 1616; SSE2-NEXT: pandn %xmm8, %xmm4 1617; SSE2-NEXT: por %xmm1, %xmm4 1618; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18446744073709518848,18446744073709518848] 1619; SSE2-NEXT: movdqa %xmm4, %xmm1 1620; SSE2-NEXT: pxor %xmm2, %xmm1 1621; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200] 1622; SSE2-NEXT: movdqa %xmm1, %xmm6 1623; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 1624; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1625; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 1626; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1627; SSE2-NEXT: pand %xmm7, %xmm1 1628; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1629; SSE2-NEXT: por %xmm1, %xmm6 1630; SSE2-NEXT: pand %xmm6, %xmm4 1631; SSE2-NEXT: pandn %xmm0, %xmm6 1632; SSE2-NEXT: por %xmm4, %xmm6 1633; SSE2-NEXT: pxor %xmm3, %xmm2 1634; SSE2-NEXT: movdqa %xmm2, %xmm1 1635; SSE2-NEXT: pcmpgtd %xmm5, %xmm1 1636; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] 1637; SSE2-NEXT: pcmpeqd %xmm5, %xmm2 1638; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1639; SSE2-NEXT: pand %xmm4, %xmm2 1640; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1641; SSE2-NEXT: por %xmm2, %xmm1 1642; SSE2-NEXT: pand %xmm1, %xmm3 1643; SSE2-NEXT: pandn %xmm0, %xmm1 1644; SSE2-NEXT: por %xmm3, %xmm1 1645; SSE2-NEXT: packssdw %xmm6, %xmm1 1646; SSE2-NEXT: packssdw %xmm1, %xmm1 1647; SSE2-NEXT: movq %xmm1, (%rdi) 1648; SSE2-NEXT: retq 1649; 1650; SSSE3-LABEL: trunc_ssat_v4i64_v4i16_store: 1651; SSSE3: # %bb.0: 1652; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] 1653; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 1654; SSSE3-NEXT: movdqa %xmm0, %xmm3 1655; SSSE3-NEXT: pxor %xmm2, %xmm3 1656; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147516415,2147516415] 1657; SSSE3-NEXT: movdqa %xmm5, %xmm6 1658; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 1659; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1660; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 1661; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1662; SSSE3-NEXT: pand %xmm7, %xmm4 1663; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 1664; SSSE3-NEXT: por %xmm4, %xmm3 1665; SSSE3-NEXT: pand %xmm3, %xmm0 1666; SSSE3-NEXT: pandn %xmm8, %xmm3 1667; SSSE3-NEXT: por %xmm0, %xmm3 1668; SSSE3-NEXT: movdqa %xmm1, %xmm0 1669; SSSE3-NEXT: pxor %xmm2, %xmm0 1670; SSSE3-NEXT: movdqa %xmm5, %xmm4 1671; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 1672; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 1673; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0 1674; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1675; SSSE3-NEXT: pand %xmm6, %xmm0 1676; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1677; SSSE3-NEXT: por %xmm0, %xmm4 1678; SSSE3-NEXT: pand %xmm4, %xmm1 1679; SSSE3-NEXT: pandn %xmm8, %xmm4 1680; SSSE3-NEXT: por %xmm1, %xmm4 1681; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18446744073709518848,18446744073709518848] 1682; SSSE3-NEXT: movdqa %xmm4, %xmm1 1683; SSSE3-NEXT: pxor %xmm2, %xmm1 1684; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200] 1685; SSSE3-NEXT: movdqa %xmm1, %xmm6 1686; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 1687; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1688; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 1689; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1690; SSSE3-NEXT: pand %xmm7, %xmm1 1691; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1692; SSSE3-NEXT: por %xmm1, %xmm6 1693; SSSE3-NEXT: pand %xmm6, %xmm4 1694; SSSE3-NEXT: pandn %xmm0, %xmm6 1695; SSSE3-NEXT: por %xmm4, %xmm6 1696; SSSE3-NEXT: pxor %xmm3, %xmm2 1697; SSSE3-NEXT: movdqa %xmm2, %xmm1 1698; SSSE3-NEXT: pcmpgtd %xmm5, %xmm1 1699; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] 1700; SSSE3-NEXT: pcmpeqd %xmm5, %xmm2 1701; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1702; SSSE3-NEXT: pand %xmm4, %xmm2 1703; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1704; SSSE3-NEXT: por %xmm2, %xmm1 1705; SSSE3-NEXT: pand %xmm1, %xmm3 1706; SSSE3-NEXT: pandn %xmm0, %xmm1 1707; SSSE3-NEXT: por %xmm3, %xmm1 1708; SSSE3-NEXT: packssdw %xmm6, %xmm1 1709; SSSE3-NEXT: packssdw %xmm1, %xmm1 1710; SSSE3-NEXT: movq %xmm1, (%rdi) 1711; SSSE3-NEXT: retq 1712; 1713; SSE41-LABEL: trunc_ssat_v4i64_v4i16_store: 1714; SSE41: # %bb.0: 1715; SSE41-NEXT: movdqa %xmm0, %xmm2 1716; SSE41-NEXT: movapd {{.*#+}} xmm4 = [32767,32767] 1717; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 1718; SSE41-NEXT: pxor %xmm3, %xmm0 1719; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147516415,2147516415] 1720; SSE41-NEXT: movdqa %xmm6, %xmm5 1721; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 1722; SSE41-NEXT: movdqa %xmm6, %xmm7 1723; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 1724; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 1725; SSE41-NEXT: pand %xmm5, %xmm0 1726; SSE41-NEXT: por %xmm7, %xmm0 1727; SSE41-NEXT: movapd %xmm4, %xmm5 1728; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 1729; SSE41-NEXT: movdqa %xmm1, %xmm0 1730; SSE41-NEXT: pxor %xmm3, %xmm0 1731; SSE41-NEXT: movdqa %xmm6, %xmm2 1732; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 1733; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 1734; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1735; SSE41-NEXT: pand %xmm2, %xmm0 1736; SSE41-NEXT: por %xmm6, %xmm0 1737; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 1738; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1739; SSE41-NEXT: movapd %xmm4, %xmm2 1740; SSE41-NEXT: xorpd %xmm3, %xmm2 1741; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200] 1742; SSE41-NEXT: movapd %xmm2, %xmm7 1743; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 1744; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 1745; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 1746; SSE41-NEXT: pand %xmm7, %xmm0 1747; SSE41-NEXT: por %xmm2, %xmm0 1748; SSE41-NEXT: movapd %xmm1, %xmm2 1749; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1750; SSE41-NEXT: xorpd %xmm5, %xmm3 1751; SSE41-NEXT: movapd %xmm3, %xmm4 1752; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 1753; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 1754; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1755; SSE41-NEXT: pand %xmm4, %xmm0 1756; SSE41-NEXT: por %xmm3, %xmm0 1757; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 1758; SSE41-NEXT: packssdw %xmm2, %xmm1 1759; SSE41-NEXT: packssdw %xmm1, %xmm1 1760; SSE41-NEXT: movq %xmm1, (%rdi) 1761; SSE41-NEXT: retq 1762; 1763; AVX1-LABEL: trunc_ssat_v4i64_v4i16_store: 1764; AVX1: # %bb.0: 1765; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1766; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,32767] 1767; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 1768; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 1769; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 1770; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848] 1771; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 1772; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 1773; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 1774; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm4, %xmm1 1775; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 1776; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1777; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1778; AVX1-NEXT: vmovq %xmm0, (%rdi) 1779; AVX1-NEXT: vzeroupper 1780; AVX1-NEXT: retq 1781; 1782; AVX2-LABEL: trunc_ssat_v4i64_v4i16_store: 1783; AVX2: # %bb.0: 1784; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767] 1785; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1786; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1787; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] 1788; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1789; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1790; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1791; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1792; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1793; AVX2-NEXT: vmovq %xmm0, (%rdi) 1794; AVX2-NEXT: vzeroupper 1795; AVX2-NEXT: retq 1796; 1797; AVX512F-LABEL: trunc_ssat_v4i64_v4i16_store: 1798; AVX512F: # %bb.0: 1799; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1800; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 1801; AVX512F-NEXT: vmovq %xmm0, (%rdi) 1802; AVX512F-NEXT: vzeroupper 1803; AVX512F-NEXT: retq 1804; 1805; AVX512VL-LABEL: trunc_ssat_v4i64_v4i16_store: 1806; AVX512VL: # %bb.0: 1807; AVX512VL-NEXT: vpmovsqw %ymm0, (%rdi) 1808; AVX512VL-NEXT: vzeroupper 1809; AVX512VL-NEXT: retq 1810; 1811; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16_store: 1812; AVX512BW: # %bb.0: 1813; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1814; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 1815; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 1816; AVX512BW-NEXT: vzeroupper 1817; AVX512BW-NEXT: retq 1818; 1819; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i16_store: 1820; AVX512BWVL: # %bb.0: 1821; AVX512BWVL-NEXT: vpmovsqw %ymm0, (%rdi) 1822; AVX512BWVL-NEXT: vzeroupper 1823; AVX512BWVL-NEXT: retq 1824; 1825; SKX-LABEL: trunc_ssat_v4i64_v4i16_store: 1826; SKX: # %bb.0: 1827; SKX-NEXT: vpmovsqw %ymm0, (%rdi) 1828; SKX-NEXT: vzeroupper 1829; SKX-NEXT: retq 1830 %1 = icmp slt <4 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767> 1831 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767> 1832 %3 = icmp sgt <4 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768> 1833 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768> 1834 %5 = trunc <4 x i64> %4 to <4 x i16> 1835 store <4 x i16> %5, <4 x i16> *%p1 1836 ret void 1837} 1838 1839define <8 x i16> @trunc_ssat_v8i64_v8i16(<8 x i64>* %p0) "min-legal-vector-width"="256" { 1840; SSE2-LABEL: trunc_ssat_v8i64_v8i16: 1841; SSE2: # %bb.0: 1842; SSE2-NEXT: movdqa (%rdi), %xmm6 1843; SSE2-NEXT: movdqa 16(%rdi), %xmm9 1844; SSE2-NEXT: movdqa 32(%rdi), %xmm3 1845; SSE2-NEXT: movdqa 48(%rdi), %xmm5 1846; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] 1847; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 1848; SSE2-NEXT: movdqa %xmm3, %xmm2 1849; SSE2-NEXT: pxor %xmm1, %xmm2 1850; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147516415,2147516415] 1851; SSE2-NEXT: movdqa %xmm10, %xmm7 1852; SSE2-NEXT: pcmpgtd %xmm2, %xmm7 1853; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 1854; SSE2-NEXT: pcmpeqd %xmm10, %xmm2 1855; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1856; SSE2-NEXT: pand %xmm0, %xmm4 1857; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3] 1858; SSE2-NEXT: por %xmm4, %xmm2 1859; SSE2-NEXT: pand %xmm2, %xmm3 1860; SSE2-NEXT: pandn %xmm8, %xmm2 1861; SSE2-NEXT: por %xmm3, %xmm2 1862; SSE2-NEXT: movdqa %xmm5, %xmm0 1863; SSE2-NEXT: pxor %xmm1, %xmm0 1864; SSE2-NEXT: movdqa %xmm10, %xmm3 1865; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1866; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 1867; SSE2-NEXT: pcmpeqd %xmm10, %xmm0 1868; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1869; SSE2-NEXT: pand %xmm4, %xmm0 1870; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1871; SSE2-NEXT: por %xmm0, %xmm3 1872; SSE2-NEXT: pand %xmm3, %xmm5 1873; SSE2-NEXT: pandn %xmm8, %xmm3 1874; SSE2-NEXT: por %xmm5, %xmm3 1875; SSE2-NEXT: movdqa %xmm6, %xmm0 1876; SSE2-NEXT: pxor %xmm1, %xmm0 1877; SSE2-NEXT: movdqa %xmm10, %xmm4 1878; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1879; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 1880; SSE2-NEXT: pcmpeqd %xmm10, %xmm0 1881; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1882; SSE2-NEXT: pand %xmm5, %xmm0 1883; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 1884; SSE2-NEXT: por %xmm0, %xmm5 1885; SSE2-NEXT: pand %xmm5, %xmm6 1886; SSE2-NEXT: pandn %xmm8, %xmm5 1887; SSE2-NEXT: por %xmm6, %xmm5 1888; SSE2-NEXT: movdqa %xmm9, %xmm0 1889; SSE2-NEXT: pxor %xmm1, %xmm0 1890; SSE2-NEXT: movdqa %xmm10, %xmm4 1891; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1892; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 1893; SSE2-NEXT: pcmpeqd %xmm10, %xmm0 1894; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1895; SSE2-NEXT: pand %xmm6, %xmm0 1896; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3] 1897; SSE2-NEXT: por %xmm0, %xmm7 1898; SSE2-NEXT: pand %xmm7, %xmm9 1899; SSE2-NEXT: pandn %xmm8, %xmm7 1900; SSE2-NEXT: por %xmm9, %xmm7 1901; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709518848,18446744073709518848] 1902; SSE2-NEXT: movdqa %xmm7, %xmm0 1903; SSE2-NEXT: pxor %xmm1, %xmm0 1904; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562035200,18446744071562035200] 1905; SSE2-NEXT: movdqa %xmm0, %xmm4 1906; SSE2-NEXT: pcmpgtd %xmm9, %xmm4 1907; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 1908; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 1909; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1910; SSE2-NEXT: pand %xmm6, %xmm0 1911; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1912; SSE2-NEXT: por %xmm0, %xmm4 1913; SSE2-NEXT: pand %xmm4, %xmm7 1914; SSE2-NEXT: pandn %xmm8, %xmm4 1915; SSE2-NEXT: por %xmm7, %xmm4 1916; SSE2-NEXT: movdqa %xmm5, %xmm0 1917; SSE2-NEXT: pxor %xmm1, %xmm0 1918; SSE2-NEXT: movdqa %xmm0, %xmm6 1919; SSE2-NEXT: pcmpgtd %xmm9, %xmm6 1920; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] 1921; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 1922; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] 1923; SSE2-NEXT: pand %xmm10, %xmm7 1924; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 1925; SSE2-NEXT: por %xmm7, %xmm0 1926; SSE2-NEXT: pand %xmm0, %xmm5 1927; SSE2-NEXT: pandn %xmm8, %xmm0 1928; SSE2-NEXT: por %xmm5, %xmm0 1929; SSE2-NEXT: packssdw %xmm4, %xmm0 1930; SSE2-NEXT: movdqa %xmm3, %xmm4 1931; SSE2-NEXT: pxor %xmm1, %xmm4 1932; SSE2-NEXT: movdqa %xmm4, %xmm5 1933; SSE2-NEXT: pcmpgtd %xmm9, %xmm5 1934; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 1935; SSE2-NEXT: pcmpeqd %xmm9, %xmm4 1936; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1937; SSE2-NEXT: pand %xmm6, %xmm4 1938; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 1939; SSE2-NEXT: por %xmm4, %xmm5 1940; SSE2-NEXT: pand %xmm5, %xmm3 1941; SSE2-NEXT: pandn %xmm8, %xmm5 1942; SSE2-NEXT: por %xmm3, %xmm5 1943; SSE2-NEXT: pxor %xmm2, %xmm1 1944; SSE2-NEXT: movdqa %xmm1, %xmm3 1945; SSE2-NEXT: pcmpgtd %xmm9, %xmm3 1946; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 1947; SSE2-NEXT: pcmpeqd %xmm9, %xmm1 1948; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1949; SSE2-NEXT: pand %xmm4, %xmm1 1950; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1951; SSE2-NEXT: por %xmm1, %xmm3 1952; SSE2-NEXT: pand %xmm3, %xmm2 1953; SSE2-NEXT: pandn %xmm8, %xmm3 1954; SSE2-NEXT: por %xmm2, %xmm3 1955; SSE2-NEXT: packssdw %xmm5, %xmm3 1956; SSE2-NEXT: packssdw %xmm3, %xmm0 1957; SSE2-NEXT: retq 1958; 1959; SSSE3-LABEL: trunc_ssat_v8i64_v8i16: 1960; SSSE3: # %bb.0: 1961; SSSE3-NEXT: movdqa (%rdi), %xmm6 1962; SSSE3-NEXT: movdqa 16(%rdi), %xmm9 1963; SSSE3-NEXT: movdqa 32(%rdi), %xmm3 1964; SSSE3-NEXT: movdqa 48(%rdi), %xmm5 1965; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] 1966; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 1967; SSSE3-NEXT: movdqa %xmm3, %xmm2 1968; SSSE3-NEXT: pxor %xmm1, %xmm2 1969; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147516415,2147516415] 1970; SSSE3-NEXT: movdqa %xmm10, %xmm7 1971; SSSE3-NEXT: pcmpgtd %xmm2, %xmm7 1972; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 1973; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2 1974; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1975; SSSE3-NEXT: pand %xmm0, %xmm4 1976; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3] 1977; SSSE3-NEXT: por %xmm4, %xmm2 1978; SSSE3-NEXT: pand %xmm2, %xmm3 1979; SSSE3-NEXT: pandn %xmm8, %xmm2 1980; SSSE3-NEXT: por %xmm3, %xmm2 1981; SSSE3-NEXT: movdqa %xmm5, %xmm0 1982; SSSE3-NEXT: pxor %xmm1, %xmm0 1983; SSSE3-NEXT: movdqa %xmm10, %xmm3 1984; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 1985; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 1986; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0 1987; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1988; SSSE3-NEXT: pand %xmm4, %xmm0 1989; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1990; SSSE3-NEXT: por %xmm0, %xmm3 1991; SSSE3-NEXT: pand %xmm3, %xmm5 1992; SSSE3-NEXT: pandn %xmm8, %xmm3 1993; SSSE3-NEXT: por %xmm5, %xmm3 1994; SSSE3-NEXT: movdqa %xmm6, %xmm0 1995; SSSE3-NEXT: pxor %xmm1, %xmm0 1996; SSSE3-NEXT: movdqa %xmm10, %xmm4 1997; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 1998; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 1999; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0 2000; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 2001; SSSE3-NEXT: pand %xmm5, %xmm0 2002; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 2003; SSSE3-NEXT: por %xmm0, %xmm5 2004; SSSE3-NEXT: pand %xmm5, %xmm6 2005; SSSE3-NEXT: pandn %xmm8, %xmm5 2006; SSSE3-NEXT: por %xmm6, %xmm5 2007; SSSE3-NEXT: movdqa %xmm9, %xmm0 2008; SSSE3-NEXT: pxor %xmm1, %xmm0 2009; SSSE3-NEXT: movdqa %xmm10, %xmm4 2010; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 2011; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 2012; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0 2013; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 2014; SSSE3-NEXT: pand %xmm6, %xmm0 2015; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3] 2016; SSSE3-NEXT: por %xmm0, %xmm7 2017; SSSE3-NEXT: pand %xmm7, %xmm9 2018; SSSE3-NEXT: pandn %xmm8, %xmm7 2019; SSSE3-NEXT: por %xmm9, %xmm7 2020; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709518848,18446744073709518848] 2021; SSSE3-NEXT: movdqa %xmm7, %xmm0 2022; SSSE3-NEXT: pxor %xmm1, %xmm0 2023; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562035200,18446744071562035200] 2024; SSSE3-NEXT: movdqa %xmm0, %xmm4 2025; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4 2026; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 2027; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 2028; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 2029; SSSE3-NEXT: pand %xmm6, %xmm0 2030; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2031; SSSE3-NEXT: por %xmm0, %xmm4 2032; SSSE3-NEXT: pand %xmm4, %xmm7 2033; SSSE3-NEXT: pandn %xmm8, %xmm4 2034; SSSE3-NEXT: por %xmm7, %xmm4 2035; SSSE3-NEXT: movdqa %xmm5, %xmm0 2036; SSSE3-NEXT: pxor %xmm1, %xmm0 2037; SSSE3-NEXT: movdqa %xmm0, %xmm6 2038; SSSE3-NEXT: pcmpgtd %xmm9, %xmm6 2039; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] 2040; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 2041; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] 2042; SSSE3-NEXT: pand %xmm10, %xmm7 2043; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 2044; SSSE3-NEXT: por %xmm7, %xmm0 2045; SSSE3-NEXT: pand %xmm0, %xmm5 2046; SSSE3-NEXT: pandn %xmm8, %xmm0 2047; SSSE3-NEXT: por %xmm5, %xmm0 2048; SSSE3-NEXT: packssdw %xmm4, %xmm0 2049; SSSE3-NEXT: movdqa %xmm3, %xmm4 2050; SSSE3-NEXT: pxor %xmm1, %xmm4 2051; SSSE3-NEXT: movdqa %xmm4, %xmm5 2052; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5 2053; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 2054; SSSE3-NEXT: pcmpeqd %xmm9, %xmm4 2055; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2056; SSSE3-NEXT: pand %xmm6, %xmm4 2057; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 2058; SSSE3-NEXT: por %xmm4, %xmm5 2059; SSSE3-NEXT: pand %xmm5, %xmm3 2060; SSSE3-NEXT: pandn %xmm8, %xmm5 2061; SSSE3-NEXT: por %xmm3, %xmm5 2062; SSSE3-NEXT: pxor %xmm2, %xmm1 2063; SSSE3-NEXT: movdqa %xmm1, %xmm3 2064; SSSE3-NEXT: pcmpgtd %xmm9, %xmm3 2065; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 2066; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1 2067; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2068; SSSE3-NEXT: pand %xmm4, %xmm1 2069; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 2070; SSSE3-NEXT: por %xmm1, %xmm3 2071; SSSE3-NEXT: pand %xmm3, %xmm2 2072; SSSE3-NEXT: pandn %xmm8, %xmm3 2073; SSSE3-NEXT: por %xmm2, %xmm3 2074; SSSE3-NEXT: packssdw %xmm5, %xmm3 2075; SSSE3-NEXT: packssdw %xmm3, %xmm0 2076; SSSE3-NEXT: retq 2077; 2078; SSE41-LABEL: trunc_ssat_v8i64_v8i16: 2079; SSE41: # %bb.0: 2080; SSE41-NEXT: movdqa (%rdi), %xmm10 2081; SSE41-NEXT: movdqa 16(%rdi), %xmm9 2082; SSE41-NEXT: movdqa 32(%rdi), %xmm3 2083; SSE41-NEXT: movdqa 48(%rdi), %xmm5 2084; SSE41-NEXT: movapd {{.*#+}} xmm1 = [32767,32767] 2085; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 2086; SSE41-NEXT: movdqa %xmm3, %xmm0 2087; SSE41-NEXT: pxor %xmm2, %xmm0 2088; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147516415,2147516415] 2089; SSE41-NEXT: movdqa %xmm4, %xmm7 2090; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 2091; SSE41-NEXT: movdqa %xmm4, %xmm6 2092; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 2093; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 2094; SSE41-NEXT: pand %xmm7, %xmm0 2095; SSE41-NEXT: por %xmm6, %xmm0 2096; SSE41-NEXT: movapd %xmm1, %xmm8 2097; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm8 2098; SSE41-NEXT: movdqa %xmm5, %xmm0 2099; SSE41-NEXT: pxor %xmm2, %xmm0 2100; SSE41-NEXT: movdqa %xmm4, %xmm3 2101; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 2102; SSE41-NEXT: movdqa %xmm4, %xmm6 2103; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 2104; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 2105; SSE41-NEXT: pand %xmm3, %xmm0 2106; SSE41-NEXT: por %xmm6, %xmm0 2107; SSE41-NEXT: movapd %xmm1, %xmm11 2108; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm11 2109; SSE41-NEXT: movdqa %xmm10, %xmm0 2110; SSE41-NEXT: pxor %xmm2, %xmm0 2111; SSE41-NEXT: movdqa %xmm4, %xmm3 2112; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 2113; SSE41-NEXT: movdqa %xmm4, %xmm5 2114; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 2115; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 2116; SSE41-NEXT: pand %xmm3, %xmm0 2117; SSE41-NEXT: por %xmm5, %xmm0 2118; SSE41-NEXT: movapd %xmm1, %xmm3 2119; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm3 2120; SSE41-NEXT: movdqa %xmm9, %xmm0 2121; SSE41-NEXT: pxor %xmm2, %xmm0 2122; SSE41-NEXT: movdqa %xmm4, %xmm5 2123; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 2124; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 2125; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 2126; SSE41-NEXT: pand %xmm5, %xmm0 2127; SSE41-NEXT: por %xmm4, %xmm0 2128; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm1 2129; SSE41-NEXT: movapd {{.*#+}} xmm5 = [18446744073709518848,18446744073709518848] 2130; SSE41-NEXT: movapd %xmm1, %xmm4 2131; SSE41-NEXT: xorpd %xmm2, %xmm4 2132; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200] 2133; SSE41-NEXT: movapd %xmm4, %xmm7 2134; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 2135; SSE41-NEXT: pcmpgtd %xmm6, %xmm4 2136; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 2137; SSE41-NEXT: pand %xmm7, %xmm0 2138; SSE41-NEXT: por %xmm4, %xmm0 2139; SSE41-NEXT: movapd %xmm5, %xmm4 2140; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 2141; SSE41-NEXT: movapd %xmm3, %xmm1 2142; SSE41-NEXT: xorpd %xmm2, %xmm1 2143; SSE41-NEXT: movapd %xmm1, %xmm7 2144; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 2145; SSE41-NEXT: pcmpgtd %xmm6, %xmm1 2146; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 2147; SSE41-NEXT: pand %xmm7, %xmm0 2148; SSE41-NEXT: por %xmm1, %xmm0 2149; SSE41-NEXT: movapd %xmm5, %xmm1 2150; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 2151; SSE41-NEXT: packssdw %xmm4, %xmm1 2152; SSE41-NEXT: movapd %xmm11, %xmm3 2153; SSE41-NEXT: xorpd %xmm2, %xmm3 2154; SSE41-NEXT: movapd %xmm3, %xmm4 2155; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 2156; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 2157; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 2158; SSE41-NEXT: pand %xmm4, %xmm0 2159; SSE41-NEXT: por %xmm3, %xmm0 2160; SSE41-NEXT: movapd %xmm5, %xmm3 2161; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm3 2162; SSE41-NEXT: xorpd %xmm8, %xmm2 2163; SSE41-NEXT: movapd %xmm2, %xmm4 2164; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 2165; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 2166; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 2167; SSE41-NEXT: pand %xmm4, %xmm0 2168; SSE41-NEXT: por %xmm2, %xmm0 2169; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5 2170; SSE41-NEXT: packssdw %xmm3, %xmm5 2171; SSE41-NEXT: packssdw %xmm5, %xmm1 2172; SSE41-NEXT: movdqa %xmm1, %xmm0 2173; SSE41-NEXT: retq 2174; 2175; AVX1-LABEL: trunc_ssat_v8i64_v8i16: 2176; AVX1: # %bb.0: 2177; AVX1-NEXT: vmovdqa (%rdi), %xmm0 2178; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 2179; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 2180; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 2181; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [32767,32767] 2182; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm8 2183; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm6 2184; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm7 2185; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm5 2186; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 2187; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [18446744073709518848,18446744073709518848] 2188; AVX1-NEXT: vpcmpgtq %xmm5, %xmm0, %xmm9 2189; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm4, %xmm1 2190; AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm7 2191; AVX1-NEXT: vblendvpd %xmm6, %xmm2, %xmm4, %xmm2 2192; AVX1-NEXT: vpcmpgtq %xmm5, %xmm2, %xmm6 2193; AVX1-NEXT: vblendvpd %xmm8, %xmm3, %xmm4, %xmm3 2194; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm4 2195; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm5, %xmm3 2196; AVX1-NEXT: vblendvpd %xmm6, %xmm2, %xmm5, %xmm2 2197; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 2198; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm5, %xmm1 2199; AVX1-NEXT: vblendvpd %xmm9, %xmm0, %xmm5, %xmm0 2200; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2201; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 2202; AVX1-NEXT: retq 2203; 2204; AVX2-LABEL: trunc_ssat_v8i64_v8i16: 2205; AVX2: # %bb.0: 2206; AVX2-NEXT: vmovdqa (%rdi), %ymm0 2207; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 2208; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [32767,32767,32767,32767] 2209; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 2210; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 2211; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 2212; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 2213; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] 2214; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 2215; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 2216; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 2217; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 2218; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 2219; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2220; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2221; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 2222; AVX2-NEXT: vzeroupper 2223; AVX2-NEXT: retq 2224; 2225; AVX512-LABEL: trunc_ssat_v8i64_v8i16: 2226; AVX512: # %bb.0: 2227; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 2228; AVX512-NEXT: vpmovsqw %zmm0, %xmm0 2229; AVX512-NEXT: vzeroupper 2230; AVX512-NEXT: retq 2231; 2232; SKX-LABEL: trunc_ssat_v8i64_v8i16: 2233; SKX: # %bb.0: 2234; SKX-NEXT: vmovdqa (%rdi), %ymm0 2235; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 2236; SKX-NEXT: vpmovsqw %ymm1, %xmm1 2237; SKX-NEXT: vpmovsqw %ymm0, %xmm0 2238; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2239; SKX-NEXT: vzeroupper 2240; SKX-NEXT: retq 2241 %a0 = load <8 x i64>, <8 x i64>* %p0 2242 %1 = icmp slt <8 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767> 2243 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767> 2244 %3 = icmp sgt <8 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768> 2245 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768> 2246 %5 = trunc <8 x i64> %4 to <8 x i16> 2247 ret <8 x i16> %5 2248} 2249 2250define <4 x i16> @trunc_ssat_v4i32_v4i16(<4 x i32> %a0) { 2251; SSE-LABEL: trunc_ssat_v4i32_v4i16: 2252; SSE: # %bb.0: 2253; SSE-NEXT: packssdw %xmm0, %xmm0 2254; SSE-NEXT: retq 2255; 2256; AVX-LABEL: trunc_ssat_v4i32_v4i16: 2257; AVX: # %bb.0: 2258; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2259; AVX-NEXT: retq 2260; 2261; AVX512-LABEL: trunc_ssat_v4i32_v4i16: 2262; AVX512: # %bb.0: 2263; AVX512-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2264; AVX512-NEXT: retq 2265; 2266; SKX-LABEL: trunc_ssat_v4i32_v4i16: 2267; SKX: # %bb.0: 2268; SKX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2269; SKX-NEXT: retq 2270 %1 = icmp slt <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767> 2271 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767> 2272 %3 = icmp sgt <4 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768> 2273 %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768> 2274 %5 = trunc <4 x i32> %4 to <4 x i16> 2275 ret <4 x i16> %5 2276} 2277 2278define void @trunc_ssat_v4i32_v4i16_store(<4 x i32> %a0, <4 x i16> *%p1) { 2279; SSE-LABEL: trunc_ssat_v4i32_v4i16_store: 2280; SSE: # %bb.0: 2281; SSE-NEXT: packssdw %xmm0, %xmm0 2282; SSE-NEXT: movq %xmm0, (%rdi) 2283; SSE-NEXT: retq 2284; 2285; AVX-LABEL: trunc_ssat_v4i32_v4i16_store: 2286; AVX: # %bb.0: 2287; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2288; AVX-NEXT: vmovq %xmm0, (%rdi) 2289; AVX-NEXT: retq 2290; 2291; AVX512F-LABEL: trunc_ssat_v4i32_v4i16_store: 2292; AVX512F: # %bb.0: 2293; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2294; AVX512F-NEXT: vmovq %xmm0, (%rdi) 2295; AVX512F-NEXT: retq 2296; 2297; AVX512VL-LABEL: trunc_ssat_v4i32_v4i16_store: 2298; AVX512VL: # %bb.0: 2299; AVX512VL-NEXT: vpmovsdw %xmm0, (%rdi) 2300; AVX512VL-NEXT: retq 2301; 2302; AVX512BW-LABEL: trunc_ssat_v4i32_v4i16_store: 2303; AVX512BW: # %bb.0: 2304; AVX512BW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2305; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 2306; AVX512BW-NEXT: retq 2307; 2308; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i16_store: 2309; AVX512BWVL: # %bb.0: 2310; AVX512BWVL-NEXT: vpmovsdw %xmm0, (%rdi) 2311; AVX512BWVL-NEXT: retq 2312; 2313; SKX-LABEL: trunc_ssat_v4i32_v4i16_store: 2314; SKX: # %bb.0: 2315; SKX-NEXT: vpmovsdw %xmm0, (%rdi) 2316; SKX-NEXT: retq 2317 %1 = icmp slt <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767> 2318 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767> 2319 %3 = icmp sgt <4 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768> 2320 %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768> 2321 %5 = trunc <4 x i32> %4 to <4 x i16> 2322 store <4 x i16> %5, <4 x i16> *%p1 2323 ret void 2324} 2325 2326define <8 x i16> @trunc_ssat_v8i32_v8i16(<8 x i32> %a0) { 2327; SSE-LABEL: trunc_ssat_v8i32_v8i16: 2328; SSE: # %bb.0: 2329; SSE-NEXT: packssdw %xmm1, %xmm0 2330; SSE-NEXT: retq 2331; 2332; AVX1-LABEL: trunc_ssat_v8i32_v8i16: 2333; AVX1: # %bb.0: 2334; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2335; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2336; AVX1-NEXT: vzeroupper 2337; AVX1-NEXT: retq 2338; 2339; AVX2-LABEL: trunc_ssat_v8i32_v8i16: 2340; AVX2: # %bb.0: 2341; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2342; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2343; AVX2-NEXT: vzeroupper 2344; AVX2-NEXT: retq 2345; 2346; AVX512F-LABEL: trunc_ssat_v8i32_v8i16: 2347; AVX512F: # %bb.0: 2348; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 2349; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2350; AVX512F-NEXT: vzeroupper 2351; AVX512F-NEXT: retq 2352; 2353; AVX512VL-LABEL: trunc_ssat_v8i32_v8i16: 2354; AVX512VL: # %bb.0: 2355; AVX512VL-NEXT: vpmovsdw %ymm0, %xmm0 2356; AVX512VL-NEXT: vzeroupper 2357; AVX512VL-NEXT: retq 2358; 2359; AVX512BW-LABEL: trunc_ssat_v8i32_v8i16: 2360; AVX512BW: # %bb.0: 2361; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 2362; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2363; AVX512BW-NEXT: vzeroupper 2364; AVX512BW-NEXT: retq 2365; 2366; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i16: 2367; AVX512BWVL: # %bb.0: 2368; AVX512BWVL-NEXT: vpmovsdw %ymm0, %xmm0 2369; AVX512BWVL-NEXT: vzeroupper 2370; AVX512BWVL-NEXT: retq 2371; 2372; SKX-LABEL: trunc_ssat_v8i32_v8i16: 2373; SKX: # %bb.0: 2374; SKX-NEXT: vpmovsdw %ymm0, %xmm0 2375; SKX-NEXT: vzeroupper 2376; SKX-NEXT: retq 2377 %1 = icmp slt <8 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> 2378 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> 2379 %3 = icmp sgt <8 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> 2380 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> 2381 %5 = trunc <8 x i32> %4 to <8 x i16> 2382 ret <8 x i16> %5 2383} 2384 2385define <16 x i16> @trunc_ssat_v16i32_v16i16(<16 x i32>* %p0) "min-legal-vector-width"="256" { 2386; SSE-LABEL: trunc_ssat_v16i32_v16i16: 2387; SSE: # %bb.0: 2388; SSE-NEXT: movdqa (%rdi), %xmm0 2389; SSE-NEXT: movdqa 32(%rdi), %xmm1 2390; SSE-NEXT: packssdw 16(%rdi), %xmm0 2391; SSE-NEXT: packssdw 48(%rdi), %xmm1 2392; SSE-NEXT: retq 2393; 2394; AVX1-LABEL: trunc_ssat_v16i32_v16i16: 2395; AVX1: # %bb.0: 2396; AVX1-NEXT: vmovdqa (%rdi), %xmm0 2397; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1 2398; AVX1-NEXT: vpackssdw 48(%rdi), %xmm1, %xmm1 2399; AVX1-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0 2400; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2401; AVX1-NEXT: retq 2402; 2403; AVX2-LABEL: trunc_ssat_v16i32_v16i16: 2404; AVX2: # %bb.0: 2405; AVX2-NEXT: vmovdqa (%rdi), %ymm0 2406; AVX2-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 2407; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2408; AVX2-NEXT: retq 2409; 2410; AVX512-LABEL: trunc_ssat_v16i32_v16i16: 2411; AVX512: # %bb.0: 2412; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 2413; AVX512-NEXT: vpmovsdw %zmm0, %ymm0 2414; AVX512-NEXT: retq 2415; 2416; SKX-LABEL: trunc_ssat_v16i32_v16i16: 2417; SKX: # %bb.0: 2418; SKX-NEXT: vmovdqa (%rdi), %ymm0 2419; SKX-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 2420; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2421; SKX-NEXT: retq 2422 %a0 = load <16 x i32>, <16 x i32>* %p0 2423 %1 = icmp slt <16 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> 2424 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> 2425 %3 = icmp sgt <16 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> 2426 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> 2427 %5 = trunc <16 x i32> %4 to <16 x i16> 2428 ret <16 x i16> %5 2429} 2430 2431; 2432; Signed saturation truncation to vXi8 2433; 2434 2435define <2 x i8> @trunc_ssat_v2i64_v2i8(<2 x i64> %a0) { 2436; SSE2-LABEL: trunc_ssat_v2i64_v2i8: 2437; SSE2: # %bb.0: 2438; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 2439; SSE2-NEXT: movdqa %xmm0, %xmm2 2440; SSE2-NEXT: pxor %xmm1, %xmm2 2441; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483775,2147483775] 2442; SSE2-NEXT: movdqa %xmm3, %xmm4 2443; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 2444; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 2445; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 2446; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2447; SSE2-NEXT: pand %xmm5, %xmm2 2448; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 2449; SSE2-NEXT: por %xmm2, %xmm3 2450; SSE2-NEXT: pand %xmm3, %xmm0 2451; SSE2-NEXT: pandn {{.*}}(%rip), %xmm3 2452; SSE2-NEXT: por %xmm0, %xmm3 2453; SSE2-NEXT: pxor %xmm3, %xmm1 2454; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840] 2455; SSE2-NEXT: movdqa %xmm1, %xmm2 2456; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 2457; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 2458; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 2459; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2460; SSE2-NEXT: pand %xmm4, %xmm1 2461; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 2462; SSE2-NEXT: por %xmm1, %xmm0 2463; SSE2-NEXT: pand %xmm0, %xmm3 2464; SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 2465; SSE2-NEXT: por %xmm3, %xmm0 2466; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 2467; SSE2-NEXT: packuswb %xmm0, %xmm0 2468; SSE2-NEXT: packuswb %xmm0, %xmm0 2469; SSE2-NEXT: packuswb %xmm0, %xmm0 2470; SSE2-NEXT: retq 2471; 2472; SSSE3-LABEL: trunc_ssat_v2i64_v2i8: 2473; SSSE3: # %bb.0: 2474; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 2475; SSSE3-NEXT: movdqa %xmm0, %xmm2 2476; SSSE3-NEXT: pxor %xmm1, %xmm2 2477; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483775,2147483775] 2478; SSSE3-NEXT: movdqa %xmm3, %xmm4 2479; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 2480; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 2481; SSSE3-NEXT: pcmpeqd %xmm3, %xmm2 2482; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2483; SSSE3-NEXT: pand %xmm5, %xmm2 2484; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 2485; SSSE3-NEXT: por %xmm2, %xmm3 2486; SSSE3-NEXT: pand %xmm3, %xmm0 2487; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm3 2488; SSSE3-NEXT: por %xmm0, %xmm3 2489; SSSE3-NEXT: pxor %xmm3, %xmm1 2490; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840] 2491; SSSE3-NEXT: movdqa %xmm1, %xmm2 2492; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 2493; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 2494; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 2495; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2496; SSSE3-NEXT: pand %xmm4, %xmm1 2497; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 2498; SSSE3-NEXT: por %xmm1, %xmm0 2499; SSSE3-NEXT: pand %xmm0, %xmm3 2500; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm0 2501; SSSE3-NEXT: por %xmm3, %xmm0 2502; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2503; SSSE3-NEXT: retq 2504; 2505; SSE41-LABEL: trunc_ssat_v2i64_v2i8: 2506; SSE41: # %bb.0: 2507; SSE41-NEXT: movdqa %xmm0, %xmm1 2508; SSE41-NEXT: movapd {{.*#+}} xmm2 = [127,127] 2509; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 2510; SSE41-NEXT: pxor %xmm3, %xmm0 2511; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147483775,2147483775] 2512; SSE41-NEXT: movdqa %xmm4, %xmm5 2513; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 2514; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 2515; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 2516; SSE41-NEXT: pand %xmm5, %xmm0 2517; SSE41-NEXT: por %xmm4, %xmm0 2518; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 2519; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2520; SSE41-NEXT: pxor %xmm2, %xmm3 2521; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840] 2522; SSE41-NEXT: movdqa %xmm3, %xmm4 2523; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 2524; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 2525; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 2526; SSE41-NEXT: pand %xmm4, %xmm0 2527; SSE41-NEXT: por %xmm3, %xmm0 2528; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 2529; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2530; SSE41-NEXT: movdqa %xmm1, %xmm0 2531; SSE41-NEXT: retq 2532; 2533; AVX-LABEL: trunc_ssat_v2i64_v2i8: 2534; AVX: # %bb.0: 2535; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] 2536; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 2537; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 2538; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2539; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 2540; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 2541; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2542; AVX-NEXT: retq 2543; 2544; AVX512F-LABEL: trunc_ssat_v2i64_v2i8: 2545; AVX512F: # %bb.0: 2546; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2547; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 2548; AVX512F-NEXT: vzeroupper 2549; AVX512F-NEXT: retq 2550; 2551; AVX512VL-LABEL: trunc_ssat_v2i64_v2i8: 2552; AVX512VL: # %bb.0: 2553; AVX512VL-NEXT: vpmovsqb %xmm0, %xmm0 2554; AVX512VL-NEXT: retq 2555; 2556; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8: 2557; AVX512BW: # %bb.0: 2558; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2559; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 2560; AVX512BW-NEXT: vzeroupper 2561; AVX512BW-NEXT: retq 2562; 2563; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i8: 2564; AVX512BWVL: # %bb.0: 2565; AVX512BWVL-NEXT: vpmovsqb %xmm0, %xmm0 2566; AVX512BWVL-NEXT: retq 2567; 2568; SKX-LABEL: trunc_ssat_v2i64_v2i8: 2569; SKX: # %bb.0: 2570; SKX-NEXT: vpmovsqb %xmm0, %xmm0 2571; SKX-NEXT: retq 2572 %1 = icmp slt <2 x i64> %a0, <i64 127, i64 127> 2573 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 127, i64 127> 2574 %3 = icmp sgt <2 x i64> %2, <i64 -128, i64 -128> 2575 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -128, i64 -128> 2576 %5 = trunc <2 x i64> %4 to <2 x i8> 2577 ret <2 x i8> %5 2578} 2579 2580define void @trunc_ssat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8> *%p1) { 2581; SSE2-LABEL: trunc_ssat_v2i64_v2i8_store: 2582; SSE2: # %bb.0: 2583; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 2584; SSE2-NEXT: movdqa %xmm0, %xmm2 2585; SSE2-NEXT: pxor %xmm1, %xmm2 2586; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483775,2147483775] 2587; SSE2-NEXT: movdqa %xmm3, %xmm4 2588; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 2589; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 2590; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 2591; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2592; SSE2-NEXT: pand %xmm5, %xmm2 2593; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 2594; SSE2-NEXT: por %xmm2, %xmm3 2595; SSE2-NEXT: pand %xmm3, %xmm0 2596; SSE2-NEXT: pandn {{.*}}(%rip), %xmm3 2597; SSE2-NEXT: por %xmm0, %xmm3 2598; SSE2-NEXT: pxor %xmm3, %xmm1 2599; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840] 2600; SSE2-NEXT: movdqa %xmm1, %xmm2 2601; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 2602; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 2603; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 2604; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 2605; SSE2-NEXT: pand %xmm4, %xmm0 2606; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 2607; SSE2-NEXT: por %xmm0, %xmm1 2608; SSE2-NEXT: pand %xmm1, %xmm3 2609; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1 2610; SSE2-NEXT: por %xmm3, %xmm1 2611; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 2612; SSE2-NEXT: packuswb %xmm1, %xmm1 2613; SSE2-NEXT: packuswb %xmm1, %xmm1 2614; SSE2-NEXT: packuswb %xmm1, %xmm1 2615; SSE2-NEXT: movd %xmm1, %eax 2616; SSE2-NEXT: movw %ax, (%rdi) 2617; SSE2-NEXT: retq 2618; 2619; SSSE3-LABEL: trunc_ssat_v2i64_v2i8_store: 2620; SSSE3: # %bb.0: 2621; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 2622; SSSE3-NEXT: movdqa %xmm0, %xmm2 2623; SSSE3-NEXT: pxor %xmm1, %xmm2 2624; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483775,2147483775] 2625; SSSE3-NEXT: movdqa %xmm3, %xmm4 2626; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 2627; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 2628; SSSE3-NEXT: pcmpeqd %xmm3, %xmm2 2629; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2630; SSSE3-NEXT: pand %xmm5, %xmm2 2631; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 2632; SSSE3-NEXT: por %xmm2, %xmm3 2633; SSSE3-NEXT: pand %xmm3, %xmm0 2634; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm3 2635; SSSE3-NEXT: por %xmm0, %xmm3 2636; SSSE3-NEXT: pxor %xmm3, %xmm1 2637; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840] 2638; SSSE3-NEXT: movdqa %xmm1, %xmm2 2639; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 2640; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 2641; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 2642; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 2643; SSSE3-NEXT: pand %xmm4, %xmm0 2644; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 2645; SSSE3-NEXT: por %xmm0, %xmm1 2646; SSSE3-NEXT: pand %xmm1, %xmm3 2647; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm1 2648; SSSE3-NEXT: por %xmm3, %xmm1 2649; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2650; SSSE3-NEXT: movd %xmm1, %eax 2651; SSSE3-NEXT: movw %ax, (%rdi) 2652; SSSE3-NEXT: retq 2653; 2654; SSE41-LABEL: trunc_ssat_v2i64_v2i8_store: 2655; SSE41: # %bb.0: 2656; SSE41-NEXT: movdqa %xmm0, %xmm1 2657; SSE41-NEXT: movapd {{.*#+}} xmm2 = [127,127] 2658; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 2659; SSE41-NEXT: pxor %xmm3, %xmm0 2660; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147483775,2147483775] 2661; SSE41-NEXT: movdqa %xmm4, %xmm5 2662; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 2663; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 2664; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 2665; SSE41-NEXT: pand %xmm5, %xmm0 2666; SSE41-NEXT: por %xmm4, %xmm0 2667; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 2668; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2669; SSE41-NEXT: pxor %xmm2, %xmm3 2670; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840] 2671; SSE41-NEXT: movdqa %xmm3, %xmm4 2672; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 2673; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 2674; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 2675; SSE41-NEXT: pand %xmm4, %xmm0 2676; SSE41-NEXT: por %xmm3, %xmm0 2677; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 2678; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2679; SSE41-NEXT: pextrw $0, %xmm1, (%rdi) 2680; SSE41-NEXT: retq 2681; 2682; AVX-LABEL: trunc_ssat_v2i64_v2i8_store: 2683; AVX: # %bb.0: 2684; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] 2685; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 2686; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 2687; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2688; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 2689; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 2690; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2691; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) 2692; AVX-NEXT: retq 2693; 2694; AVX512F-LABEL: trunc_ssat_v2i64_v2i8_store: 2695; AVX512F: # %bb.0: 2696; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2697; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 2698; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) 2699; AVX512F-NEXT: vzeroupper 2700; AVX512F-NEXT: retq 2701; 2702; AVX512VL-LABEL: trunc_ssat_v2i64_v2i8_store: 2703; AVX512VL: # %bb.0: 2704; AVX512VL-NEXT: vpmovsqb %xmm0, (%rdi) 2705; AVX512VL-NEXT: retq 2706; 2707; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8_store: 2708; AVX512BW: # %bb.0: 2709; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2710; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 2711; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) 2712; AVX512BW-NEXT: vzeroupper 2713; AVX512BW-NEXT: retq 2714; 2715; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i8_store: 2716; AVX512BWVL: # %bb.0: 2717; AVX512BWVL-NEXT: vpmovsqb %xmm0, (%rdi) 2718; AVX512BWVL-NEXT: retq 2719; 2720; SKX-LABEL: trunc_ssat_v2i64_v2i8_store: 2721; SKX: # %bb.0: 2722; SKX-NEXT: vpmovsqb %xmm0, (%rdi) 2723; SKX-NEXT: retq 2724 %1 = icmp slt <2 x i64> %a0, <i64 127, i64 127> 2725 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 127, i64 127> 2726 %3 = icmp sgt <2 x i64> %2, <i64 -128, i64 -128> 2727 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -128, i64 -128> 2728 %5 = trunc <2 x i64> %4 to <2 x i8> 2729 store <2 x i8> %5, <2 x i8> *%p1 2730 ret void 2731} 2732 2733define <4 x i8> @trunc_ssat_v4i64_v4i8(<4 x i64> %a0) { 2734; SSE2-LABEL: trunc_ssat_v4i64_v4i8: 2735; SSE2: # %bb.0: 2736; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [127,127] 2737; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 2738; SSE2-NEXT: movdqa %xmm1, %xmm3 2739; SSE2-NEXT: pxor %xmm2, %xmm3 2740; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483775,2147483775] 2741; SSE2-NEXT: movdqa %xmm5, %xmm6 2742; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 2743; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 2744; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 2745; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 2746; SSE2-NEXT: pand %xmm7, %xmm4 2747; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 2748; SSE2-NEXT: por %xmm4, %xmm3 2749; SSE2-NEXT: pand %xmm3, %xmm1 2750; SSE2-NEXT: pandn %xmm8, %xmm3 2751; SSE2-NEXT: por %xmm1, %xmm3 2752; SSE2-NEXT: movdqa %xmm0, %xmm1 2753; SSE2-NEXT: pxor %xmm2, %xmm1 2754; SSE2-NEXT: movdqa %xmm5, %xmm4 2755; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 2756; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 2757; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 2758; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2759; SSE2-NEXT: pand %xmm6, %xmm1 2760; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2761; SSE2-NEXT: por %xmm1, %xmm4 2762; SSE2-NEXT: pand %xmm4, %xmm0 2763; SSE2-NEXT: pandn %xmm8, %xmm4 2764; SSE2-NEXT: por %xmm0, %xmm4 2765; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] 2766; SSE2-NEXT: movdqa %xmm4, %xmm0 2767; SSE2-NEXT: pxor %xmm2, %xmm0 2768; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840] 2769; SSE2-NEXT: movdqa %xmm0, %xmm6 2770; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 2771; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 2772; SSE2-NEXT: pcmpeqd %xmm5, %xmm0 2773; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 2774; SSE2-NEXT: pand %xmm7, %xmm1 2775; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 2776; SSE2-NEXT: por %xmm1, %xmm0 2777; SSE2-NEXT: pand %xmm0, %xmm4 2778; SSE2-NEXT: pandn %xmm8, %xmm0 2779; SSE2-NEXT: por %xmm4, %xmm0 2780; SSE2-NEXT: pxor %xmm3, %xmm2 2781; SSE2-NEXT: movdqa %xmm2, %xmm1 2782; SSE2-NEXT: pcmpgtd %xmm5, %xmm1 2783; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] 2784; SSE2-NEXT: pcmpeqd %xmm5, %xmm2 2785; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2786; SSE2-NEXT: pand %xmm4, %xmm2 2787; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2788; SSE2-NEXT: por %xmm2, %xmm1 2789; SSE2-NEXT: pand %xmm1, %xmm3 2790; SSE2-NEXT: pandn %xmm8, %xmm1 2791; SSE2-NEXT: por %xmm3, %xmm1 2792; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0] 2793; SSE2-NEXT: pand %xmm2, %xmm1 2794; SSE2-NEXT: pand %xmm2, %xmm0 2795; SSE2-NEXT: packuswb %xmm1, %xmm0 2796; SSE2-NEXT: packuswb %xmm0, %xmm0 2797; SSE2-NEXT: packuswb %xmm0, %xmm0 2798; SSE2-NEXT: retq 2799; 2800; SSSE3-LABEL: trunc_ssat_v4i64_v4i8: 2801; SSSE3: # %bb.0: 2802; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [127,127] 2803; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 2804; SSSE3-NEXT: movdqa %xmm1, %xmm3 2805; SSSE3-NEXT: pxor %xmm2, %xmm3 2806; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483775,2147483775] 2807; SSSE3-NEXT: movdqa %xmm5, %xmm6 2808; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 2809; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 2810; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 2811; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 2812; SSSE3-NEXT: pand %xmm7, %xmm4 2813; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 2814; SSSE3-NEXT: por %xmm4, %xmm3 2815; SSSE3-NEXT: pand %xmm3, %xmm1 2816; SSSE3-NEXT: pandn %xmm8, %xmm3 2817; SSSE3-NEXT: por %xmm1, %xmm3 2818; SSSE3-NEXT: movdqa %xmm0, %xmm1 2819; SSSE3-NEXT: pxor %xmm2, %xmm1 2820; SSSE3-NEXT: movdqa %xmm5, %xmm4 2821; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 2822; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 2823; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 2824; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2825; SSSE3-NEXT: pand %xmm6, %xmm1 2826; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2827; SSSE3-NEXT: por %xmm1, %xmm4 2828; SSSE3-NEXT: pand %xmm4, %xmm0 2829; SSSE3-NEXT: pandn %xmm8, %xmm4 2830; SSSE3-NEXT: por %xmm0, %xmm4 2831; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] 2832; SSSE3-NEXT: movdqa %xmm4, %xmm0 2833; SSSE3-NEXT: pxor %xmm2, %xmm0 2834; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840] 2835; SSSE3-NEXT: movdqa %xmm0, %xmm6 2836; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 2837; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 2838; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0 2839; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 2840; SSSE3-NEXT: pand %xmm7, %xmm1 2841; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 2842; SSSE3-NEXT: por %xmm1, %xmm0 2843; SSSE3-NEXT: pand %xmm0, %xmm4 2844; SSSE3-NEXT: pandn %xmm8, %xmm0 2845; SSSE3-NEXT: por %xmm4, %xmm0 2846; SSSE3-NEXT: pxor %xmm3, %xmm2 2847; SSSE3-NEXT: movdqa %xmm2, %xmm1 2848; SSSE3-NEXT: pcmpgtd %xmm5, %xmm1 2849; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] 2850; SSSE3-NEXT: pcmpeqd %xmm5, %xmm2 2851; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2852; SSSE3-NEXT: pand %xmm4, %xmm2 2853; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2854; SSSE3-NEXT: por %xmm2, %xmm1 2855; SSSE3-NEXT: pand %xmm1, %xmm3 2856; SSSE3-NEXT: pandn %xmm8, %xmm1 2857; SSSE3-NEXT: por %xmm3, %xmm1 2858; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 2859; SSSE3-NEXT: pshufb %xmm2, %xmm1 2860; SSSE3-NEXT: pshufb %xmm2, %xmm0 2861; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2862; SSSE3-NEXT: retq 2863; 2864; SSE41-LABEL: trunc_ssat_v4i64_v4i8: 2865; SSE41: # %bb.0: 2866; SSE41-NEXT: movdqa %xmm0, %xmm2 2867; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127] 2868; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 2869; SSE41-NEXT: movdqa %xmm1, %xmm0 2870; SSE41-NEXT: pxor %xmm3, %xmm0 2871; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483775,2147483775] 2872; SSE41-NEXT: movdqa %xmm6, %xmm5 2873; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 2874; SSE41-NEXT: movdqa %xmm6, %xmm7 2875; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 2876; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 2877; SSE41-NEXT: pand %xmm5, %xmm0 2878; SSE41-NEXT: por %xmm7, %xmm0 2879; SSE41-NEXT: movapd %xmm4, %xmm5 2880; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 2881; SSE41-NEXT: movdqa %xmm2, %xmm0 2882; SSE41-NEXT: pxor %xmm3, %xmm0 2883; SSE41-NEXT: movdqa %xmm6, %xmm1 2884; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 2885; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 2886; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 2887; SSE41-NEXT: pand %xmm1, %xmm0 2888; SSE41-NEXT: por %xmm6, %xmm0 2889; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 2890; SSE41-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551488,18446744073709551488] 2891; SSE41-NEXT: movapd %xmm4, %xmm1 2892; SSE41-NEXT: xorpd %xmm3, %xmm1 2893; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840] 2894; SSE41-NEXT: movapd %xmm1, %xmm7 2895; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 2896; SSE41-NEXT: pcmpgtd %xmm6, %xmm1 2897; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 2898; SSE41-NEXT: pand %xmm7, %xmm0 2899; SSE41-NEXT: por %xmm1, %xmm0 2900; SSE41-NEXT: movapd %xmm2, %xmm1 2901; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 2902; SSE41-NEXT: xorpd %xmm5, %xmm3 2903; SSE41-NEXT: movapd %xmm3, %xmm4 2904; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 2905; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 2906; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 2907; SSE41-NEXT: pand %xmm4, %xmm0 2908; SSE41-NEXT: por %xmm3, %xmm0 2909; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm2 2910; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 2911; SSE41-NEXT: pshufb %xmm0, %xmm2 2912; SSE41-NEXT: pshufb %xmm0, %xmm1 2913; SSE41-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2914; SSE41-NEXT: movdqa %xmm1, %xmm0 2915; SSE41-NEXT: retq 2916; 2917; AVX1-LABEL: trunc_ssat_v4i64_v4i8: 2918; AVX1: # %bb.0: 2919; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2920; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127] 2921; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 2922; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 2923; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 2924; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488] 2925; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 2926; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 2927; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 2928; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm4, %xmm1 2929; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 2930; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2931; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 2932; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2933; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2934; AVX1-NEXT: vzeroupper 2935; AVX1-NEXT: retq 2936; 2937; AVX2-LABEL: trunc_ssat_v4i64_v4i8: 2938; AVX2: # %bb.0: 2939; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [127,127,127,127] 2940; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 2941; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 2942; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] 2943; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 2944; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 2945; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 2946; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 2947; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2948; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2949; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2950; AVX2-NEXT: vzeroupper 2951; AVX2-NEXT: retq 2952; 2953; AVX512F-LABEL: trunc_ssat_v4i64_v4i8: 2954; AVX512F: # %bb.0: 2955; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2956; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 2957; AVX512F-NEXT: vzeroupper 2958; AVX512F-NEXT: retq 2959; 2960; AVX512VL-LABEL: trunc_ssat_v4i64_v4i8: 2961; AVX512VL: # %bb.0: 2962; AVX512VL-NEXT: vpmovsqb %ymm0, %xmm0 2963; AVX512VL-NEXT: vzeroupper 2964; AVX512VL-NEXT: retq 2965; 2966; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8: 2967; AVX512BW: # %bb.0: 2968; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2969; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 2970; AVX512BW-NEXT: vzeroupper 2971; AVX512BW-NEXT: retq 2972; 2973; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i8: 2974; AVX512BWVL: # %bb.0: 2975; AVX512BWVL-NEXT: vpmovsqb %ymm0, %xmm0 2976; AVX512BWVL-NEXT: vzeroupper 2977; AVX512BWVL-NEXT: retq 2978; 2979; SKX-LABEL: trunc_ssat_v4i64_v4i8: 2980; SKX: # %bb.0: 2981; SKX-NEXT: vpmovsqb %ymm0, %xmm0 2982; SKX-NEXT: vzeroupper 2983; SKX-NEXT: retq 2984 %1 = icmp slt <4 x i64> %a0, <i64 127, i64 127, i64 127, i64 127> 2985 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 127, i64 127, i64 127, i64 127> 2986 %3 = icmp sgt <4 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128> 2987 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128> 2988 %5 = trunc <4 x i64> %4 to <4 x i8> 2989 ret <4 x i8> %5 2990} 2991 2992define void @trunc_ssat_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { 2993; SSE2-LABEL: trunc_ssat_v4i64_v4i8_store: 2994; SSE2: # %bb.0: 2995; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [127,127] 2996; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 2997; SSE2-NEXT: movdqa %xmm1, %xmm3 2998; SSE2-NEXT: pxor %xmm2, %xmm3 2999; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483775,2147483775] 3000; SSE2-NEXT: movdqa %xmm5, %xmm6 3001; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 3002; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 3003; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 3004; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 3005; SSE2-NEXT: pand %xmm7, %xmm4 3006; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 3007; SSE2-NEXT: por %xmm4, %xmm3 3008; SSE2-NEXT: pand %xmm3, %xmm1 3009; SSE2-NEXT: pandn %xmm8, %xmm3 3010; SSE2-NEXT: por %xmm1, %xmm3 3011; SSE2-NEXT: movdqa %xmm0, %xmm1 3012; SSE2-NEXT: pxor %xmm2, %xmm1 3013; SSE2-NEXT: movdqa %xmm5, %xmm4 3014; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 3015; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 3016; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 3017; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3018; SSE2-NEXT: pand %xmm6, %xmm1 3019; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 3020; SSE2-NEXT: por %xmm1, %xmm4 3021; SSE2-NEXT: pand %xmm4, %xmm0 3022; SSE2-NEXT: pandn %xmm8, %xmm4 3023; SSE2-NEXT: por %xmm0, %xmm4 3024; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] 3025; SSE2-NEXT: movdqa %xmm4, %xmm0 3026; SSE2-NEXT: pxor %xmm2, %xmm0 3027; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840] 3028; SSE2-NEXT: movdqa %xmm0, %xmm6 3029; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 3030; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 3031; SSE2-NEXT: pcmpeqd %xmm5, %xmm0 3032; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 3033; SSE2-NEXT: pand %xmm7, %xmm1 3034; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 3035; SSE2-NEXT: por %xmm1, %xmm0 3036; SSE2-NEXT: pand %xmm0, %xmm4 3037; SSE2-NEXT: pandn %xmm8, %xmm0 3038; SSE2-NEXT: por %xmm4, %xmm0 3039; SSE2-NEXT: pxor %xmm3, %xmm2 3040; SSE2-NEXT: movdqa %xmm2, %xmm1 3041; SSE2-NEXT: pcmpgtd %xmm5, %xmm1 3042; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] 3043; SSE2-NEXT: pcmpeqd %xmm5, %xmm2 3044; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 3045; SSE2-NEXT: pand %xmm4, %xmm2 3046; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3047; SSE2-NEXT: por %xmm2, %xmm1 3048; SSE2-NEXT: pand %xmm1, %xmm3 3049; SSE2-NEXT: pandn %xmm8, %xmm1 3050; SSE2-NEXT: por %xmm3, %xmm1 3051; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0] 3052; SSE2-NEXT: pand %xmm2, %xmm1 3053; SSE2-NEXT: pand %xmm2, %xmm0 3054; SSE2-NEXT: packuswb %xmm1, %xmm0 3055; SSE2-NEXT: packuswb %xmm0, %xmm0 3056; SSE2-NEXT: packuswb %xmm0, %xmm0 3057; SSE2-NEXT: movd %xmm0, (%rdi) 3058; SSE2-NEXT: retq 3059; 3060; SSSE3-LABEL: trunc_ssat_v4i64_v4i8_store: 3061; SSSE3: # %bb.0: 3062; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [127,127] 3063; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 3064; SSSE3-NEXT: movdqa %xmm1, %xmm3 3065; SSSE3-NEXT: pxor %xmm2, %xmm3 3066; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483775,2147483775] 3067; SSSE3-NEXT: movdqa %xmm5, %xmm6 3068; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 3069; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 3070; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 3071; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 3072; SSSE3-NEXT: pand %xmm7, %xmm4 3073; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 3074; SSSE3-NEXT: por %xmm4, %xmm3 3075; SSSE3-NEXT: pand %xmm3, %xmm1 3076; SSSE3-NEXT: pandn %xmm8, %xmm3 3077; SSSE3-NEXT: por %xmm1, %xmm3 3078; SSSE3-NEXT: movdqa %xmm0, %xmm1 3079; SSSE3-NEXT: pxor %xmm2, %xmm1 3080; SSSE3-NEXT: movdqa %xmm5, %xmm4 3081; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 3082; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 3083; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 3084; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3085; SSSE3-NEXT: pand %xmm6, %xmm1 3086; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 3087; SSSE3-NEXT: por %xmm1, %xmm4 3088; SSSE3-NEXT: pand %xmm4, %xmm0 3089; SSSE3-NEXT: pandn %xmm8, %xmm4 3090; SSSE3-NEXT: por %xmm0, %xmm4 3091; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] 3092; SSSE3-NEXT: movdqa %xmm4, %xmm1 3093; SSSE3-NEXT: pxor %xmm2, %xmm1 3094; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840] 3095; SSSE3-NEXT: movdqa %xmm1, %xmm6 3096; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 3097; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 3098; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 3099; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 3100; SSSE3-NEXT: pand %xmm7, %xmm0 3101; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3] 3102; SSSE3-NEXT: por %xmm0, %xmm1 3103; SSSE3-NEXT: pand %xmm1, %xmm4 3104; SSSE3-NEXT: pandn %xmm8, %xmm1 3105; SSSE3-NEXT: por %xmm4, %xmm1 3106; SSSE3-NEXT: pxor %xmm3, %xmm2 3107; SSSE3-NEXT: movdqa %xmm2, %xmm0 3108; SSSE3-NEXT: pcmpgtd %xmm5, %xmm0 3109; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] 3110; SSSE3-NEXT: pcmpeqd %xmm5, %xmm2 3111; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 3112; SSSE3-NEXT: pand %xmm4, %xmm2 3113; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3114; SSSE3-NEXT: por %xmm2, %xmm0 3115; SSSE3-NEXT: pand %xmm0, %xmm3 3116; SSSE3-NEXT: pandn %xmm8, %xmm0 3117; SSSE3-NEXT: por %xmm3, %xmm0 3118; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 3119; SSSE3-NEXT: pshufb %xmm2, %xmm0 3120; SSSE3-NEXT: pshufb %xmm2, %xmm1 3121; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3122; SSSE3-NEXT: movd %xmm1, (%rdi) 3123; SSSE3-NEXT: retq 3124; 3125; SSE41-LABEL: trunc_ssat_v4i64_v4i8_store: 3126; SSE41: # %bb.0: 3127; SSE41-NEXT: movdqa %xmm0, %xmm2 3128; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127] 3129; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 3130; SSE41-NEXT: movdqa %xmm1, %xmm0 3131; SSE41-NEXT: pxor %xmm3, %xmm0 3132; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483775,2147483775] 3133; SSE41-NEXT: movdqa %xmm6, %xmm5 3134; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 3135; SSE41-NEXT: movdqa %xmm6, %xmm7 3136; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 3137; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 3138; SSE41-NEXT: pand %xmm5, %xmm0 3139; SSE41-NEXT: por %xmm7, %xmm0 3140; SSE41-NEXT: movapd %xmm4, %xmm5 3141; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 3142; SSE41-NEXT: movdqa %xmm2, %xmm0 3143; SSE41-NEXT: pxor %xmm3, %xmm0 3144; SSE41-NEXT: movdqa %xmm6, %xmm1 3145; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 3146; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 3147; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 3148; SSE41-NEXT: pand %xmm1, %xmm0 3149; SSE41-NEXT: por %xmm6, %xmm0 3150; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 3151; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 3152; SSE41-NEXT: movapd %xmm4, %xmm2 3153; SSE41-NEXT: xorpd %xmm3, %xmm2 3154; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840] 3155; SSE41-NEXT: movapd %xmm2, %xmm7 3156; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 3157; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 3158; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 3159; SSE41-NEXT: pand %xmm7, %xmm0 3160; SSE41-NEXT: por %xmm2, %xmm0 3161; SSE41-NEXT: movapd %xmm1, %xmm2 3162; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 3163; SSE41-NEXT: xorpd %xmm5, %xmm3 3164; SSE41-NEXT: movapd %xmm3, %xmm4 3165; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 3166; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 3167; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 3168; SSE41-NEXT: pand %xmm4, %xmm0 3169; SSE41-NEXT: por %xmm3, %xmm0 3170; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 3171; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 3172; SSE41-NEXT: pshufb %xmm0, %xmm1 3173; SSE41-NEXT: pshufb %xmm0, %xmm2 3174; SSE41-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3175; SSE41-NEXT: movd %xmm2, (%rdi) 3176; SSE41-NEXT: retq 3177; 3178; AVX1-LABEL: trunc_ssat_v4i64_v4i8_store: 3179; AVX1: # %bb.0: 3180; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3181; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127] 3182; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 3183; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 3184; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 3185; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488] 3186; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 3187; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 3188; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 3189; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm4, %xmm1 3190; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 3191; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3192; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 3193; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3194; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3195; AVX1-NEXT: vmovd %xmm0, (%rdi) 3196; AVX1-NEXT: vzeroupper 3197; AVX1-NEXT: retq 3198; 3199; AVX2-LABEL: trunc_ssat_v4i64_v4i8_store: 3200; AVX2: # %bb.0: 3201; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [127,127,127,127] 3202; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 3203; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 3204; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] 3205; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 3206; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 3207; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 3208; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 3209; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3210; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3211; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3212; AVX2-NEXT: vmovd %xmm0, (%rdi) 3213; AVX2-NEXT: vzeroupper 3214; AVX2-NEXT: retq 3215; 3216; AVX512F-LABEL: trunc_ssat_v4i64_v4i8_store: 3217; AVX512F: # %bb.0: 3218; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3219; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 3220; AVX512F-NEXT: vmovd %xmm0, (%rdi) 3221; AVX512F-NEXT: vzeroupper 3222; AVX512F-NEXT: retq 3223; 3224; AVX512VL-LABEL: trunc_ssat_v4i64_v4i8_store: 3225; AVX512VL: # %bb.0: 3226; AVX512VL-NEXT: vpmovsqb %ymm0, (%rdi) 3227; AVX512VL-NEXT: vzeroupper 3228; AVX512VL-NEXT: retq 3229; 3230; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8_store: 3231; AVX512BW: # %bb.0: 3232; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3233; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 3234; AVX512BW-NEXT: vmovd %xmm0, (%rdi) 3235; AVX512BW-NEXT: vzeroupper 3236; AVX512BW-NEXT: retq 3237; 3238; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i8_store: 3239; AVX512BWVL: # %bb.0: 3240; AVX512BWVL-NEXT: vpmovsqb %ymm0, (%rdi) 3241; AVX512BWVL-NEXT: vzeroupper 3242; AVX512BWVL-NEXT: retq 3243; 3244; SKX-LABEL: trunc_ssat_v4i64_v4i8_store: 3245; SKX: # %bb.0: 3246; SKX-NEXT: vpmovsqb %ymm0, (%rdi) 3247; SKX-NEXT: vzeroupper 3248; SKX-NEXT: retq 3249 %1 = icmp slt <4 x i64> %a0, <i64 127, i64 127, i64 127, i64 127> 3250 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 127, i64 127, i64 127, i64 127> 3251 %3 = icmp sgt <4 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128> 3252 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128> 3253 %5 = trunc <4 x i64> %4 to <4 x i8> 3254 store <4 x i8> %5, <4 x i8> *%p1 3255 ret void 3256} 3257 3258define <8 x i8> @trunc_ssat_v8i64_v8i8(<8 x i64>* %p0) "min-legal-vector-width"="256" { 3259; SSE2-LABEL: trunc_ssat_v8i64_v8i8: 3260; SSE2: # %bb.0: 3261; SSE2-NEXT: movdqa (%rdi), %xmm6 3262; SSE2-NEXT: movdqa 16(%rdi), %xmm9 3263; SSE2-NEXT: movdqa 32(%rdi), %xmm3 3264; SSE2-NEXT: movdqa 48(%rdi), %xmm5 3265; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [127,127] 3266; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 3267; SSE2-NEXT: movdqa %xmm3, %xmm2 3268; SSE2-NEXT: pxor %xmm1, %xmm2 3269; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483775,2147483775] 3270; SSE2-NEXT: movdqa %xmm10, %xmm7 3271; SSE2-NEXT: pcmpgtd %xmm2, %xmm7 3272; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 3273; SSE2-NEXT: pcmpeqd %xmm10, %xmm2 3274; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 3275; SSE2-NEXT: pand %xmm0, %xmm4 3276; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3] 3277; SSE2-NEXT: por %xmm4, %xmm2 3278; SSE2-NEXT: pand %xmm2, %xmm3 3279; SSE2-NEXT: pandn %xmm8, %xmm2 3280; SSE2-NEXT: por %xmm3, %xmm2 3281; SSE2-NEXT: movdqa %xmm5, %xmm0 3282; SSE2-NEXT: pxor %xmm1, %xmm0 3283; SSE2-NEXT: movdqa %xmm10, %xmm3 3284; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3285; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 3286; SSE2-NEXT: pcmpeqd %xmm10, %xmm0 3287; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3288; SSE2-NEXT: pand %xmm4, %xmm0 3289; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3290; SSE2-NEXT: por %xmm0, %xmm3 3291; SSE2-NEXT: pand %xmm3, %xmm5 3292; SSE2-NEXT: pandn %xmm8, %xmm3 3293; SSE2-NEXT: por %xmm5, %xmm3 3294; SSE2-NEXT: movdqa %xmm6, %xmm0 3295; SSE2-NEXT: pxor %xmm1, %xmm0 3296; SSE2-NEXT: movdqa %xmm10, %xmm4 3297; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 3298; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 3299; SSE2-NEXT: pcmpeqd %xmm10, %xmm0 3300; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3301; SSE2-NEXT: pand %xmm5, %xmm0 3302; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 3303; SSE2-NEXT: por %xmm0, %xmm5 3304; SSE2-NEXT: pand %xmm5, %xmm6 3305; SSE2-NEXT: pandn %xmm8, %xmm5 3306; SSE2-NEXT: por %xmm6, %xmm5 3307; SSE2-NEXT: movdqa %xmm9, %xmm0 3308; SSE2-NEXT: pxor %xmm1, %xmm0 3309; SSE2-NEXT: movdqa %xmm10, %xmm4 3310; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 3311; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 3312; SSE2-NEXT: pcmpeqd %xmm10, %xmm0 3313; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3314; SSE2-NEXT: pand %xmm6, %xmm0 3315; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3] 3316; SSE2-NEXT: por %xmm0, %xmm7 3317; SSE2-NEXT: pand %xmm7, %xmm9 3318; SSE2-NEXT: pandn %xmm8, %xmm7 3319; SSE2-NEXT: por %xmm9, %xmm7 3320; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] 3321; SSE2-NEXT: movdqa %xmm7, %xmm0 3322; SSE2-NEXT: pxor %xmm1, %xmm0 3323; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840] 3324; SSE2-NEXT: movdqa %xmm0, %xmm4 3325; SSE2-NEXT: pcmpgtd %xmm9, %xmm4 3326; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 3327; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 3328; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3329; SSE2-NEXT: pand %xmm6, %xmm0 3330; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 3331; SSE2-NEXT: por %xmm0, %xmm4 3332; SSE2-NEXT: pand %xmm4, %xmm7 3333; SSE2-NEXT: pandn %xmm8, %xmm4 3334; SSE2-NEXT: por %xmm7, %xmm4 3335; SSE2-NEXT: movdqa %xmm5, %xmm0 3336; SSE2-NEXT: pxor %xmm1, %xmm0 3337; SSE2-NEXT: movdqa %xmm0, %xmm6 3338; SSE2-NEXT: pcmpgtd %xmm9, %xmm6 3339; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] 3340; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 3341; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] 3342; SSE2-NEXT: pand %xmm10, %xmm7 3343; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 3344; SSE2-NEXT: por %xmm7, %xmm0 3345; SSE2-NEXT: pand %xmm0, %xmm5 3346; SSE2-NEXT: pandn %xmm8, %xmm0 3347; SSE2-NEXT: por %xmm5, %xmm0 3348; SSE2-NEXT: packssdw %xmm4, %xmm0 3349; SSE2-NEXT: movdqa %xmm3, %xmm4 3350; SSE2-NEXT: pxor %xmm1, %xmm4 3351; SSE2-NEXT: movdqa %xmm4, %xmm5 3352; SSE2-NEXT: pcmpgtd %xmm9, %xmm5 3353; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 3354; SSE2-NEXT: pcmpeqd %xmm9, %xmm4 3355; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 3356; SSE2-NEXT: pand %xmm6, %xmm4 3357; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 3358; SSE2-NEXT: por %xmm4, %xmm5 3359; SSE2-NEXT: pand %xmm5, %xmm3 3360; SSE2-NEXT: pandn %xmm8, %xmm5 3361; SSE2-NEXT: por %xmm3, %xmm5 3362; SSE2-NEXT: pxor %xmm2, %xmm1 3363; SSE2-NEXT: movdqa %xmm1, %xmm3 3364; SSE2-NEXT: pcmpgtd %xmm9, %xmm3 3365; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 3366; SSE2-NEXT: pcmpeqd %xmm9, %xmm1 3367; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3368; SSE2-NEXT: pand %xmm4, %xmm1 3369; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3370; SSE2-NEXT: por %xmm1, %xmm3 3371; SSE2-NEXT: pand %xmm3, %xmm2 3372; SSE2-NEXT: pandn %xmm8, %xmm3 3373; SSE2-NEXT: por %xmm2, %xmm3 3374; SSE2-NEXT: packssdw %xmm5, %xmm3 3375; SSE2-NEXT: packssdw %xmm3, %xmm0 3376; SSE2-NEXT: packsswb %xmm0, %xmm0 3377; SSE2-NEXT: retq 3378; 3379; SSSE3-LABEL: trunc_ssat_v8i64_v8i8: 3380; SSSE3: # %bb.0: 3381; SSSE3-NEXT: movdqa (%rdi), %xmm6 3382; SSSE3-NEXT: movdqa 16(%rdi), %xmm9 3383; SSSE3-NEXT: movdqa 32(%rdi), %xmm3 3384; SSSE3-NEXT: movdqa 48(%rdi), %xmm5 3385; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [127,127] 3386; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 3387; SSSE3-NEXT: movdqa %xmm3, %xmm2 3388; SSSE3-NEXT: pxor %xmm1, %xmm2 3389; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483775,2147483775] 3390; SSSE3-NEXT: movdqa %xmm10, %xmm7 3391; SSSE3-NEXT: pcmpgtd %xmm2, %xmm7 3392; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 3393; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2 3394; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 3395; SSSE3-NEXT: pand %xmm0, %xmm4 3396; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3] 3397; SSSE3-NEXT: por %xmm4, %xmm2 3398; SSSE3-NEXT: pand %xmm2, %xmm3 3399; SSSE3-NEXT: pandn %xmm8, %xmm2 3400; SSSE3-NEXT: por %xmm3, %xmm2 3401; SSSE3-NEXT: movdqa %xmm5, %xmm0 3402; SSSE3-NEXT: pxor %xmm1, %xmm0 3403; SSSE3-NEXT: movdqa %xmm10, %xmm3 3404; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 3405; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 3406; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0 3407; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3408; SSSE3-NEXT: pand %xmm4, %xmm0 3409; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3410; SSSE3-NEXT: por %xmm0, %xmm3 3411; SSSE3-NEXT: pand %xmm3, %xmm5 3412; SSSE3-NEXT: pandn %xmm8, %xmm3 3413; SSSE3-NEXT: por %xmm5, %xmm3 3414; SSSE3-NEXT: movdqa %xmm6, %xmm0 3415; SSSE3-NEXT: pxor %xmm1, %xmm0 3416; SSSE3-NEXT: movdqa %xmm10, %xmm4 3417; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 3418; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 3419; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0 3420; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3421; SSSE3-NEXT: pand %xmm5, %xmm0 3422; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 3423; SSSE3-NEXT: por %xmm0, %xmm5 3424; SSSE3-NEXT: pand %xmm5, %xmm6 3425; SSSE3-NEXT: pandn %xmm8, %xmm5 3426; SSSE3-NEXT: por %xmm6, %xmm5 3427; SSSE3-NEXT: movdqa %xmm9, %xmm0 3428; SSSE3-NEXT: pxor %xmm1, %xmm0 3429; SSSE3-NEXT: movdqa %xmm10, %xmm4 3430; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 3431; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 3432; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0 3433; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3434; SSSE3-NEXT: pand %xmm6, %xmm0 3435; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3] 3436; SSSE3-NEXT: por %xmm0, %xmm7 3437; SSSE3-NEXT: pand %xmm7, %xmm9 3438; SSSE3-NEXT: pandn %xmm8, %xmm7 3439; SSSE3-NEXT: por %xmm9, %xmm7 3440; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] 3441; SSSE3-NEXT: movdqa %xmm7, %xmm0 3442; SSSE3-NEXT: pxor %xmm1, %xmm0 3443; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840] 3444; SSSE3-NEXT: movdqa %xmm0, %xmm4 3445; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4 3446; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 3447; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 3448; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3449; SSSE3-NEXT: pand %xmm6, %xmm0 3450; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 3451; SSSE3-NEXT: por %xmm0, %xmm4 3452; SSSE3-NEXT: pand %xmm4, %xmm7 3453; SSSE3-NEXT: pandn %xmm8, %xmm4 3454; SSSE3-NEXT: por %xmm7, %xmm4 3455; SSSE3-NEXT: movdqa %xmm5, %xmm0 3456; SSSE3-NEXT: pxor %xmm1, %xmm0 3457; SSSE3-NEXT: movdqa %xmm0, %xmm6 3458; SSSE3-NEXT: pcmpgtd %xmm9, %xmm6 3459; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] 3460; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 3461; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] 3462; SSSE3-NEXT: pand %xmm10, %xmm7 3463; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 3464; SSSE3-NEXT: por %xmm7, %xmm0 3465; SSSE3-NEXT: pand %xmm0, %xmm5 3466; SSSE3-NEXT: pandn %xmm8, %xmm0 3467; SSSE3-NEXT: por %xmm5, %xmm0 3468; SSSE3-NEXT: packssdw %xmm4, %xmm0 3469; SSSE3-NEXT: movdqa %xmm3, %xmm4 3470; SSSE3-NEXT: pxor %xmm1, %xmm4 3471; SSSE3-NEXT: movdqa %xmm4, %xmm5 3472; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5 3473; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 3474; SSSE3-NEXT: pcmpeqd %xmm9, %xmm4 3475; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 3476; SSSE3-NEXT: pand %xmm6, %xmm4 3477; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 3478; SSSE3-NEXT: por %xmm4, %xmm5 3479; SSSE3-NEXT: pand %xmm5, %xmm3 3480; SSSE3-NEXT: pandn %xmm8, %xmm5 3481; SSSE3-NEXT: por %xmm3, %xmm5 3482; SSSE3-NEXT: pxor %xmm2, %xmm1 3483; SSSE3-NEXT: movdqa %xmm1, %xmm3 3484; SSSE3-NEXT: pcmpgtd %xmm9, %xmm3 3485; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 3486; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1 3487; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3488; SSSE3-NEXT: pand %xmm4, %xmm1 3489; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3490; SSSE3-NEXT: por %xmm1, %xmm3 3491; SSSE3-NEXT: pand %xmm3, %xmm2 3492; SSSE3-NEXT: pandn %xmm8, %xmm3 3493; SSSE3-NEXT: por %xmm2, %xmm3 3494; SSSE3-NEXT: packssdw %xmm5, %xmm3 3495; SSSE3-NEXT: packssdw %xmm3, %xmm0 3496; SSSE3-NEXT: packsswb %xmm0, %xmm0 3497; SSSE3-NEXT: retq 3498; 3499; SSE41-LABEL: trunc_ssat_v8i64_v8i8: 3500; SSE41: # %bb.0: 3501; SSE41-NEXT: movdqa (%rdi), %xmm10 3502; SSE41-NEXT: movdqa 16(%rdi), %xmm9 3503; SSE41-NEXT: movdqa 32(%rdi), %xmm3 3504; SSE41-NEXT: movdqa 48(%rdi), %xmm5 3505; SSE41-NEXT: movapd {{.*#+}} xmm1 = [127,127] 3506; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 3507; SSE41-NEXT: movdqa %xmm3, %xmm0 3508; SSE41-NEXT: pxor %xmm2, %xmm0 3509; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147483775,2147483775] 3510; SSE41-NEXT: movdqa %xmm4, %xmm7 3511; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 3512; SSE41-NEXT: movdqa %xmm4, %xmm6 3513; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 3514; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 3515; SSE41-NEXT: pand %xmm7, %xmm0 3516; SSE41-NEXT: por %xmm6, %xmm0 3517; SSE41-NEXT: movapd %xmm1, %xmm8 3518; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm8 3519; SSE41-NEXT: movdqa %xmm5, %xmm0 3520; SSE41-NEXT: pxor %xmm2, %xmm0 3521; SSE41-NEXT: movdqa %xmm4, %xmm3 3522; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 3523; SSE41-NEXT: movdqa %xmm4, %xmm6 3524; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 3525; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 3526; SSE41-NEXT: pand %xmm3, %xmm0 3527; SSE41-NEXT: por %xmm6, %xmm0 3528; SSE41-NEXT: movapd %xmm1, %xmm11 3529; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm11 3530; SSE41-NEXT: movdqa %xmm10, %xmm0 3531; SSE41-NEXT: pxor %xmm2, %xmm0 3532; SSE41-NEXT: movdqa %xmm4, %xmm3 3533; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 3534; SSE41-NEXT: movdqa %xmm4, %xmm5 3535; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 3536; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 3537; SSE41-NEXT: pand %xmm3, %xmm0 3538; SSE41-NEXT: por %xmm5, %xmm0 3539; SSE41-NEXT: movapd %xmm1, %xmm3 3540; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm3 3541; SSE41-NEXT: movdqa %xmm9, %xmm0 3542; SSE41-NEXT: pxor %xmm2, %xmm0 3543; SSE41-NEXT: movdqa %xmm4, %xmm5 3544; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 3545; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 3546; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 3547; SSE41-NEXT: pand %xmm5, %xmm0 3548; SSE41-NEXT: por %xmm4, %xmm0 3549; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm1 3550; SSE41-NEXT: movapd {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488] 3551; SSE41-NEXT: movapd %xmm1, %xmm4 3552; SSE41-NEXT: xorpd %xmm2, %xmm4 3553; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840] 3554; SSE41-NEXT: movapd %xmm4, %xmm7 3555; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 3556; SSE41-NEXT: pcmpgtd %xmm6, %xmm4 3557; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 3558; SSE41-NEXT: pand %xmm7, %xmm0 3559; SSE41-NEXT: por %xmm4, %xmm0 3560; SSE41-NEXT: movapd %xmm5, %xmm4 3561; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 3562; SSE41-NEXT: movapd %xmm3, %xmm1 3563; SSE41-NEXT: xorpd %xmm2, %xmm1 3564; SSE41-NEXT: movapd %xmm1, %xmm7 3565; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 3566; SSE41-NEXT: pcmpgtd %xmm6, %xmm1 3567; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 3568; SSE41-NEXT: pand %xmm7, %xmm0 3569; SSE41-NEXT: por %xmm1, %xmm0 3570; SSE41-NEXT: movapd %xmm5, %xmm1 3571; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 3572; SSE41-NEXT: packssdw %xmm4, %xmm1 3573; SSE41-NEXT: movapd %xmm11, %xmm3 3574; SSE41-NEXT: xorpd %xmm2, %xmm3 3575; SSE41-NEXT: movapd %xmm3, %xmm4 3576; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 3577; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 3578; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 3579; SSE41-NEXT: pand %xmm4, %xmm0 3580; SSE41-NEXT: por %xmm3, %xmm0 3581; SSE41-NEXT: movapd %xmm5, %xmm3 3582; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm3 3583; SSE41-NEXT: xorpd %xmm8, %xmm2 3584; SSE41-NEXT: movapd %xmm2, %xmm4 3585; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 3586; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 3587; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 3588; SSE41-NEXT: pand %xmm4, %xmm0 3589; SSE41-NEXT: por %xmm2, %xmm0 3590; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5 3591; SSE41-NEXT: packssdw %xmm3, %xmm5 3592; SSE41-NEXT: packssdw %xmm5, %xmm1 3593; SSE41-NEXT: packsswb %xmm1, %xmm1 3594; SSE41-NEXT: movdqa %xmm1, %xmm0 3595; SSE41-NEXT: retq 3596; 3597; AVX1-LABEL: trunc_ssat_v8i64_v8i8: 3598; AVX1: # %bb.0: 3599; AVX1-NEXT: vmovdqa (%rdi), %xmm0 3600; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 3601; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 3602; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 3603; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [127,127] 3604; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm8 3605; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm6 3606; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm7 3607; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm5 3608; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 3609; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488] 3610; AVX1-NEXT: vpcmpgtq %xmm5, %xmm0, %xmm9 3611; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm4, %xmm1 3612; AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm7 3613; AVX1-NEXT: vblendvpd %xmm6, %xmm2, %xmm4, %xmm2 3614; AVX1-NEXT: vpcmpgtq %xmm5, %xmm2, %xmm6 3615; AVX1-NEXT: vblendvpd %xmm8, %xmm3, %xmm4, %xmm3 3616; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm4 3617; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm5, %xmm3 3618; AVX1-NEXT: vblendvpd %xmm6, %xmm2, %xmm5, %xmm2 3619; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 3620; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm5, %xmm1 3621; AVX1-NEXT: vblendvpd %xmm9, %xmm0, %xmm5, %xmm0 3622; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3623; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 3624; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3625; AVX1-NEXT: retq 3626; 3627; AVX2-LABEL: trunc_ssat_v8i64_v8i8: 3628; AVX2: # %bb.0: 3629; AVX2-NEXT: vmovdqa (%rdi), %ymm0 3630; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 3631; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [127,127,127,127] 3632; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 3633; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 3634; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 3635; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 3636; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] 3637; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 3638; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 3639; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 3640; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 3641; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 3642; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 3643; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3644; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3645; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3646; AVX2-NEXT: vzeroupper 3647; AVX2-NEXT: retq 3648; 3649; AVX512-LABEL: trunc_ssat_v8i64_v8i8: 3650; AVX512: # %bb.0: 3651; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 3652; AVX512-NEXT: vpmovsqb %zmm0, %xmm0 3653; AVX512-NEXT: vzeroupper 3654; AVX512-NEXT: retq 3655; 3656; SKX-LABEL: trunc_ssat_v8i64_v8i8: 3657; SKX: # %bb.0: 3658; SKX-NEXT: vmovdqa (%rdi), %ymm0 3659; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 3660; SKX-NEXT: vpmovsqb %ymm1, %xmm1 3661; SKX-NEXT: vpmovsqb %ymm0, %xmm0 3662; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3663; SKX-NEXT: vzeroupper 3664; SKX-NEXT: retq 3665 %a0 = load <8 x i64>, <8 x i64>* %p0 3666 %1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 3667 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 3668 %3 = icmp sgt <8 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 3669 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 3670 %5 = trunc <8 x i64> %4 to <8 x i8> 3671 ret <8 x i8> %5 3672} 3673 3674; TODO: The AVX1 codegen shows a missed opportunity to narrow blendv+logic to 128-bit. 3675 3676define void @trunc_ssat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-legal-vector-width"="256" { 3677; SSE2-LABEL: trunc_ssat_v8i64_v8i8_store: 3678; SSE2: # %bb.0: 3679; SSE2-NEXT: movdqa (%rdi), %xmm6 3680; SSE2-NEXT: movdqa 16(%rdi), %xmm9 3681; SSE2-NEXT: movdqa 32(%rdi), %xmm2 3682; SSE2-NEXT: movdqa 48(%rdi), %xmm5 3683; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [127,127] 3684; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] 3685; SSE2-NEXT: movdqa %xmm2, %xmm1 3686; SSE2-NEXT: pxor %xmm0, %xmm1 3687; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483775,2147483775] 3688; SSE2-NEXT: movdqa %xmm10, %xmm7 3689; SSE2-NEXT: pcmpgtd %xmm1, %xmm7 3690; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2] 3691; SSE2-NEXT: pcmpeqd %xmm10, %xmm1 3692; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 3693; SSE2-NEXT: pand %xmm3, %xmm4 3694; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3] 3695; SSE2-NEXT: por %xmm4, %xmm1 3696; SSE2-NEXT: pand %xmm1, %xmm2 3697; SSE2-NEXT: pandn %xmm8, %xmm1 3698; SSE2-NEXT: por %xmm2, %xmm1 3699; SSE2-NEXT: movdqa %xmm5, %xmm2 3700; SSE2-NEXT: pxor %xmm0, %xmm2 3701; SSE2-NEXT: movdqa %xmm10, %xmm3 3702; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 3703; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 3704; SSE2-NEXT: pcmpeqd %xmm10, %xmm2 3705; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] 3706; SSE2-NEXT: pand %xmm4, %xmm7 3707; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 3708; SSE2-NEXT: por %xmm7, %xmm2 3709; SSE2-NEXT: pand %xmm2, %xmm5 3710; SSE2-NEXT: pandn %xmm8, %xmm2 3711; SSE2-NEXT: por %xmm5, %xmm2 3712; SSE2-NEXT: movdqa %xmm6, %xmm3 3713; SSE2-NEXT: pxor %xmm0, %xmm3 3714; SSE2-NEXT: movdqa %xmm10, %xmm4 3715; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 3716; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 3717; SSE2-NEXT: pcmpeqd %xmm10, %xmm3 3718; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3719; SSE2-NEXT: pand %xmm5, %xmm3 3720; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 3721; SSE2-NEXT: por %xmm3, %xmm5 3722; SSE2-NEXT: pand %xmm5, %xmm6 3723; SSE2-NEXT: pandn %xmm8, %xmm5 3724; SSE2-NEXT: por %xmm6, %xmm5 3725; SSE2-NEXT: movdqa %xmm9, %xmm3 3726; SSE2-NEXT: pxor %xmm0, %xmm3 3727; SSE2-NEXT: movdqa %xmm10, %xmm4 3728; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 3729; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 3730; SSE2-NEXT: pcmpeqd %xmm10, %xmm3 3731; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3732; SSE2-NEXT: pand %xmm6, %xmm3 3733; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3] 3734; SSE2-NEXT: por %xmm3, %xmm6 3735; SSE2-NEXT: pand %xmm6, %xmm9 3736; SSE2-NEXT: pandn %xmm8, %xmm6 3737; SSE2-NEXT: por %xmm9, %xmm6 3738; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] 3739; SSE2-NEXT: movdqa %xmm6, %xmm7 3740; SSE2-NEXT: pxor %xmm0, %xmm7 3741; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840] 3742; SSE2-NEXT: movdqa %xmm7, %xmm3 3743; SSE2-NEXT: pcmpgtd %xmm9, %xmm3 3744; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 3745; SSE2-NEXT: pcmpeqd %xmm9, %xmm7 3746; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 3747; SSE2-NEXT: pand %xmm4, %xmm7 3748; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3749; SSE2-NEXT: por %xmm7, %xmm3 3750; SSE2-NEXT: pand %xmm3, %xmm6 3751; SSE2-NEXT: pandn %xmm8, %xmm3 3752; SSE2-NEXT: por %xmm6, %xmm3 3753; SSE2-NEXT: movdqa %xmm5, %xmm4 3754; SSE2-NEXT: pxor %xmm0, %xmm4 3755; SSE2-NEXT: movdqa %xmm4, %xmm6 3756; SSE2-NEXT: pcmpgtd %xmm9, %xmm6 3757; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 3758; SSE2-NEXT: pcmpeqd %xmm9, %xmm4 3759; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 3760; SSE2-NEXT: pand %xmm7, %xmm4 3761; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 3762; SSE2-NEXT: por %xmm4, %xmm6 3763; SSE2-NEXT: pand %xmm6, %xmm5 3764; SSE2-NEXT: pandn %xmm8, %xmm6 3765; SSE2-NEXT: por %xmm5, %xmm6 3766; SSE2-NEXT: packssdw %xmm3, %xmm6 3767; SSE2-NEXT: movdqa %xmm2, %xmm3 3768; SSE2-NEXT: pxor %xmm0, %xmm3 3769; SSE2-NEXT: movdqa %xmm3, %xmm4 3770; SSE2-NEXT: pcmpgtd %xmm9, %xmm4 3771; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 3772; SSE2-NEXT: pcmpeqd %xmm9, %xmm3 3773; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3774; SSE2-NEXT: pand %xmm5, %xmm3 3775; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 3776; SSE2-NEXT: por %xmm3, %xmm4 3777; SSE2-NEXT: pand %xmm4, %xmm2 3778; SSE2-NEXT: pandn %xmm8, %xmm4 3779; SSE2-NEXT: por %xmm2, %xmm4 3780; SSE2-NEXT: pxor %xmm1, %xmm0 3781; SSE2-NEXT: movdqa %xmm0, %xmm2 3782; SSE2-NEXT: pcmpgtd %xmm9, %xmm2 3783; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 3784; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 3785; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3786; SSE2-NEXT: pand %xmm3, %xmm0 3787; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 3788; SSE2-NEXT: por %xmm0, %xmm2 3789; SSE2-NEXT: pand %xmm2, %xmm1 3790; SSE2-NEXT: pandn %xmm8, %xmm2 3791; SSE2-NEXT: por %xmm1, %xmm2 3792; SSE2-NEXT: packssdw %xmm4, %xmm2 3793; SSE2-NEXT: packssdw %xmm2, %xmm6 3794; SSE2-NEXT: packsswb %xmm6, %xmm6 3795; SSE2-NEXT: movq %xmm6, (%rsi) 3796; SSE2-NEXT: retq 3797; 3798; SSSE3-LABEL: trunc_ssat_v8i64_v8i8_store: 3799; SSSE3: # %bb.0: 3800; SSSE3-NEXT: movdqa (%rdi), %xmm6 3801; SSSE3-NEXT: movdqa 16(%rdi), %xmm9 3802; SSSE3-NEXT: movdqa 32(%rdi), %xmm2 3803; SSSE3-NEXT: movdqa 48(%rdi), %xmm5 3804; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [127,127] 3805; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] 3806; SSSE3-NEXT: movdqa %xmm2, %xmm1 3807; SSSE3-NEXT: pxor %xmm0, %xmm1 3808; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483775,2147483775] 3809; SSSE3-NEXT: movdqa %xmm10, %xmm7 3810; SSSE3-NEXT: pcmpgtd %xmm1, %xmm7 3811; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2] 3812; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1 3813; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 3814; SSSE3-NEXT: pand %xmm3, %xmm4 3815; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3] 3816; SSSE3-NEXT: por %xmm4, %xmm1 3817; SSSE3-NEXT: pand %xmm1, %xmm2 3818; SSSE3-NEXT: pandn %xmm8, %xmm1 3819; SSSE3-NEXT: por %xmm2, %xmm1 3820; SSSE3-NEXT: movdqa %xmm5, %xmm2 3821; SSSE3-NEXT: pxor %xmm0, %xmm2 3822; SSSE3-NEXT: movdqa %xmm10, %xmm3 3823; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 3824; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 3825; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2 3826; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] 3827; SSSE3-NEXT: pand %xmm4, %xmm7 3828; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 3829; SSSE3-NEXT: por %xmm7, %xmm2 3830; SSSE3-NEXT: pand %xmm2, %xmm5 3831; SSSE3-NEXT: pandn %xmm8, %xmm2 3832; SSSE3-NEXT: por %xmm5, %xmm2 3833; SSSE3-NEXT: movdqa %xmm6, %xmm3 3834; SSSE3-NEXT: pxor %xmm0, %xmm3 3835; SSSE3-NEXT: movdqa %xmm10, %xmm4 3836; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 3837; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 3838; SSSE3-NEXT: pcmpeqd %xmm10, %xmm3 3839; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3840; SSSE3-NEXT: pand %xmm5, %xmm3 3841; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 3842; SSSE3-NEXT: por %xmm3, %xmm5 3843; SSSE3-NEXT: pand %xmm5, %xmm6 3844; SSSE3-NEXT: pandn %xmm8, %xmm5 3845; SSSE3-NEXT: por %xmm6, %xmm5 3846; SSSE3-NEXT: movdqa %xmm9, %xmm3 3847; SSSE3-NEXT: pxor %xmm0, %xmm3 3848; SSSE3-NEXT: movdqa %xmm10, %xmm4 3849; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 3850; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 3851; SSSE3-NEXT: pcmpeqd %xmm10, %xmm3 3852; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3853; SSSE3-NEXT: pand %xmm6, %xmm3 3854; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3] 3855; SSSE3-NEXT: por %xmm3, %xmm6 3856; SSSE3-NEXT: pand %xmm6, %xmm9 3857; SSSE3-NEXT: pandn %xmm8, %xmm6 3858; SSSE3-NEXT: por %xmm9, %xmm6 3859; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] 3860; SSSE3-NEXT: movdqa %xmm6, %xmm7 3861; SSSE3-NEXT: pxor %xmm0, %xmm7 3862; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840] 3863; SSSE3-NEXT: movdqa %xmm7, %xmm3 3864; SSSE3-NEXT: pcmpgtd %xmm9, %xmm3 3865; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 3866; SSSE3-NEXT: pcmpeqd %xmm9, %xmm7 3867; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 3868; SSSE3-NEXT: pand %xmm4, %xmm7 3869; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3870; SSSE3-NEXT: por %xmm7, %xmm3 3871; SSSE3-NEXT: pand %xmm3, %xmm6 3872; SSSE3-NEXT: pandn %xmm8, %xmm3 3873; SSSE3-NEXT: por %xmm6, %xmm3 3874; SSSE3-NEXT: movdqa %xmm5, %xmm4 3875; SSSE3-NEXT: pxor %xmm0, %xmm4 3876; SSSE3-NEXT: movdqa %xmm4, %xmm6 3877; SSSE3-NEXT: pcmpgtd %xmm9, %xmm6 3878; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 3879; SSSE3-NEXT: pcmpeqd %xmm9, %xmm4 3880; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 3881; SSSE3-NEXT: pand %xmm7, %xmm4 3882; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 3883; SSSE3-NEXT: por %xmm4, %xmm6 3884; SSSE3-NEXT: pand %xmm6, %xmm5 3885; SSSE3-NEXT: pandn %xmm8, %xmm6 3886; SSSE3-NEXT: por %xmm5, %xmm6 3887; SSSE3-NEXT: packssdw %xmm3, %xmm6 3888; SSSE3-NEXT: movdqa %xmm2, %xmm3 3889; SSSE3-NEXT: pxor %xmm0, %xmm3 3890; SSSE3-NEXT: movdqa %xmm3, %xmm4 3891; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4 3892; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 3893; SSSE3-NEXT: pcmpeqd %xmm9, %xmm3 3894; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3895; SSSE3-NEXT: pand %xmm5, %xmm3 3896; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 3897; SSSE3-NEXT: por %xmm3, %xmm4 3898; SSSE3-NEXT: pand %xmm4, %xmm2 3899; SSSE3-NEXT: pandn %xmm8, %xmm4 3900; SSSE3-NEXT: por %xmm2, %xmm4 3901; SSSE3-NEXT: pxor %xmm1, %xmm0 3902; SSSE3-NEXT: movdqa %xmm0, %xmm2 3903; SSSE3-NEXT: pcmpgtd %xmm9, %xmm2 3904; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 3905; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 3906; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3907; SSSE3-NEXT: pand %xmm3, %xmm0 3908; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 3909; SSSE3-NEXT: por %xmm0, %xmm2 3910; SSSE3-NEXT: pand %xmm2, %xmm1 3911; SSSE3-NEXT: pandn %xmm8, %xmm2 3912; SSSE3-NEXT: por %xmm1, %xmm2 3913; SSSE3-NEXT: packssdw %xmm4, %xmm2 3914; SSSE3-NEXT: packssdw %xmm2, %xmm6 3915; SSSE3-NEXT: packsswb %xmm6, %xmm6 3916; SSSE3-NEXT: movq %xmm6, (%rsi) 3917; SSSE3-NEXT: retq 3918; 3919; SSE41-LABEL: trunc_ssat_v8i64_v8i8_store: 3920; SSE41: # %bb.0: 3921; SSE41-NEXT: movdqa (%rdi), %xmm10 3922; SSE41-NEXT: movdqa 16(%rdi), %xmm9 3923; SSE41-NEXT: movdqa 32(%rdi), %xmm2 3924; SSE41-NEXT: movdqa 48(%rdi), %xmm5 3925; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127] 3926; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 3927; SSE41-NEXT: movdqa %xmm2, %xmm0 3928; SSE41-NEXT: pxor %xmm1, %xmm0 3929; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483775,2147483775] 3930; SSE41-NEXT: movdqa %xmm3, %xmm7 3931; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 3932; SSE41-NEXT: movdqa %xmm3, %xmm6 3933; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 3934; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 3935; SSE41-NEXT: pand %xmm7, %xmm0 3936; SSE41-NEXT: por %xmm6, %xmm0 3937; SSE41-NEXT: movapd %xmm4, %xmm8 3938; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm8 3939; SSE41-NEXT: movdqa %xmm5, %xmm0 3940; SSE41-NEXT: pxor %xmm1, %xmm0 3941; SSE41-NEXT: movdqa %xmm3, %xmm2 3942; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 3943; SSE41-NEXT: movdqa %xmm3, %xmm6 3944; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 3945; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 3946; SSE41-NEXT: pand %xmm2, %xmm0 3947; SSE41-NEXT: por %xmm6, %xmm0 3948; SSE41-NEXT: movapd %xmm4, %xmm11 3949; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm11 3950; SSE41-NEXT: movdqa %xmm10, %xmm0 3951; SSE41-NEXT: pxor %xmm1, %xmm0 3952; SSE41-NEXT: movdqa %xmm3, %xmm2 3953; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 3954; SSE41-NEXT: movdqa %xmm3, %xmm5 3955; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 3956; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 3957; SSE41-NEXT: pand %xmm2, %xmm0 3958; SSE41-NEXT: por %xmm5, %xmm0 3959; SSE41-NEXT: movapd %xmm4, %xmm2 3960; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm2 3961; SSE41-NEXT: movdqa %xmm9, %xmm0 3962; SSE41-NEXT: pxor %xmm1, %xmm0 3963; SSE41-NEXT: movdqa %xmm3, %xmm5 3964; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 3965; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 3966; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 3967; SSE41-NEXT: pand %xmm5, %xmm0 3968; SSE41-NEXT: por %xmm3, %xmm0 3969; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm4 3970; SSE41-NEXT: movapd {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488] 3971; SSE41-NEXT: movapd %xmm4, %xmm3 3972; SSE41-NEXT: xorpd %xmm1, %xmm3 3973; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840] 3974; SSE41-NEXT: movapd %xmm3, %xmm7 3975; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 3976; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 3977; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 3978; SSE41-NEXT: pand %xmm7, %xmm0 3979; SSE41-NEXT: por %xmm3, %xmm0 3980; SSE41-NEXT: movapd %xmm5, %xmm3 3981; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 3982; SSE41-NEXT: movapd %xmm2, %xmm4 3983; SSE41-NEXT: xorpd %xmm1, %xmm4 3984; SSE41-NEXT: movapd %xmm4, %xmm7 3985; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 3986; SSE41-NEXT: pcmpgtd %xmm6, %xmm4 3987; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 3988; SSE41-NEXT: pand %xmm7, %xmm0 3989; SSE41-NEXT: por %xmm4, %xmm0 3990; SSE41-NEXT: movapd %xmm5, %xmm4 3991; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 3992; SSE41-NEXT: packssdw %xmm3, %xmm4 3993; SSE41-NEXT: movapd %xmm11, %xmm2 3994; SSE41-NEXT: xorpd %xmm1, %xmm2 3995; SSE41-NEXT: movapd %xmm2, %xmm3 3996; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 3997; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 3998; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 3999; SSE41-NEXT: pand %xmm3, %xmm0 4000; SSE41-NEXT: por %xmm2, %xmm0 4001; SSE41-NEXT: movapd %xmm5, %xmm2 4002; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm2 4003; SSE41-NEXT: xorpd %xmm8, %xmm1 4004; SSE41-NEXT: movapd %xmm1, %xmm3 4005; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 4006; SSE41-NEXT: pcmpgtd %xmm6, %xmm1 4007; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 4008; SSE41-NEXT: pand %xmm3, %xmm0 4009; SSE41-NEXT: por %xmm1, %xmm0 4010; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5 4011; SSE41-NEXT: packssdw %xmm2, %xmm5 4012; SSE41-NEXT: packssdw %xmm5, %xmm4 4013; SSE41-NEXT: packsswb %xmm4, %xmm4 4014; SSE41-NEXT: movq %xmm4, (%rsi) 4015; SSE41-NEXT: retq 4016; 4017; AVX1-LABEL: trunc_ssat_v8i64_v8i8_store: 4018; AVX1: # %bb.0: 4019; AVX1-NEXT: vmovdqa (%rdi), %xmm0 4020; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 4021; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 4022; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 4023; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [127,127] 4024; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm8 4025; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm6 4026; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm7 4027; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm5 4028; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 4029; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488] 4030; AVX1-NEXT: vpcmpgtq %xmm5, %xmm0, %xmm9 4031; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm4, %xmm1 4032; AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm7 4033; AVX1-NEXT: vblendvpd %xmm6, %xmm2, %xmm4, %xmm2 4034; AVX1-NEXT: vpcmpgtq %xmm5, %xmm2, %xmm6 4035; AVX1-NEXT: vblendvpd %xmm8, %xmm3, %xmm4, %xmm3 4036; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm4 4037; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm5, %xmm3 4038; AVX1-NEXT: vblendvpd %xmm6, %xmm2, %xmm5, %xmm2 4039; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 4040; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm5, %xmm1 4041; AVX1-NEXT: vblendvpd %xmm9, %xmm0, %xmm5, %xmm0 4042; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 4043; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 4044; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 4045; AVX1-NEXT: vmovq %xmm0, (%rsi) 4046; AVX1-NEXT: retq 4047; 4048; AVX2-LABEL: trunc_ssat_v8i64_v8i8_store: 4049; AVX2: # %bb.0: 4050; AVX2-NEXT: vmovdqa (%rdi), %ymm0 4051; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 4052; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [127,127,127,127] 4053; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 4054; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 4055; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 4056; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 4057; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] 4058; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 4059; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 4060; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 4061; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 4062; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 4063; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4064; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 4065; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 4066; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 4067; AVX2-NEXT: vmovq %xmm0, (%rsi) 4068; AVX2-NEXT: vzeroupper 4069; AVX2-NEXT: retq 4070; 4071; AVX512-LABEL: trunc_ssat_v8i64_v8i8_store: 4072; AVX512: # %bb.0: 4073; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 4074; AVX512-NEXT: vpmovsqb %zmm0, (%rsi) 4075; AVX512-NEXT: vzeroupper 4076; AVX512-NEXT: retq 4077; 4078; SKX-LABEL: trunc_ssat_v8i64_v8i8_store: 4079; SKX: # %bb.0: 4080; SKX-NEXT: vmovdqa (%rdi), %ymm0 4081; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 4082; SKX-NEXT: vpmovsqb %ymm1, %xmm1 4083; SKX-NEXT: vpmovsqb %ymm0, %xmm0 4084; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4085; SKX-NEXT: vmovq %xmm0, (%rsi) 4086; SKX-NEXT: vzeroupper 4087; SKX-NEXT: retq 4088 %a0 = load <8 x i64>, <8 x i64>* %p0 4089 %1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 4090 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 4091 %3 = icmp sgt <8 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 4092 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 4093 %5 = trunc <8 x i64> %4 to <8 x i8> 4094 store <8 x i8> %5, <8 x i8> *%p1 4095 ret void 4096} 4097 4098define <16 x i8> @trunc_ssat_v16i64_v16i8(<16 x i64>* %p0) "min-legal-vector-width"="256" { 4099; SSE2-LABEL: trunc_ssat_v16i64_v16i8: 4100; SSE2: # %bb.0: 4101; SSE2-NEXT: movdqa (%rdi), %xmm10 4102; SSE2-NEXT: movdqa 16(%rdi), %xmm9 4103; SSE2-NEXT: movdqa 32(%rdi), %xmm15 4104; SSE2-NEXT: movdqa 48(%rdi), %xmm13 4105; SSE2-NEXT: movdqa 80(%rdi), %xmm6 4106; SSE2-NEXT: movdqa 64(%rdi), %xmm3 4107; SSE2-NEXT: movdqa 112(%rdi), %xmm4 4108; SSE2-NEXT: movdqa 96(%rdi), %xmm7 4109; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [127,127] 4110; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 4111; SSE2-NEXT: movdqa %xmm7, %xmm5 4112; SSE2-NEXT: pxor %xmm1, %xmm5 4113; SSE2-NEXT: movdqa {{.*#+}} xmm14 = [2147483775,2147483775] 4114; SSE2-NEXT: movdqa %xmm14, %xmm0 4115; SSE2-NEXT: pcmpgtd %xmm5, %xmm0 4116; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] 4117; SSE2-NEXT: pcmpeqd %xmm14, %xmm5 4118; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 4119; SSE2-NEXT: pand %xmm2, %xmm5 4120; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm0[1,1,3,3] 4121; SSE2-NEXT: por %xmm5, %xmm11 4122; SSE2-NEXT: pand %xmm11, %xmm7 4123; SSE2-NEXT: pandn %xmm8, %xmm11 4124; SSE2-NEXT: por %xmm7, %xmm11 4125; SSE2-NEXT: movdqa %xmm4, %xmm0 4126; SSE2-NEXT: pxor %xmm1, %xmm0 4127; SSE2-NEXT: movdqa %xmm14, %xmm2 4128; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 4129; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2] 4130; SSE2-NEXT: pcmpeqd %xmm14, %xmm0 4131; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4132; SSE2-NEXT: pand %xmm5, %xmm0 4133; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm2[1,1,3,3] 4134; SSE2-NEXT: por %xmm0, %xmm12 4135; SSE2-NEXT: pand %xmm12, %xmm4 4136; SSE2-NEXT: pandn %xmm8, %xmm12 4137; SSE2-NEXT: por %xmm4, %xmm12 4138; SSE2-NEXT: movdqa %xmm3, %xmm0 4139; SSE2-NEXT: pxor %xmm1, %xmm0 4140; SSE2-NEXT: movdqa %xmm14, %xmm2 4141; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 4142; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 4143; SSE2-NEXT: pcmpeqd %xmm14, %xmm0 4144; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4145; SSE2-NEXT: pand %xmm4, %xmm0 4146; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 4147; SSE2-NEXT: por %xmm0, %xmm4 4148; SSE2-NEXT: pand %xmm4, %xmm3 4149; SSE2-NEXT: pandn %xmm8, %xmm4 4150; SSE2-NEXT: por %xmm3, %xmm4 4151; SSE2-NEXT: movdqa %xmm6, %xmm0 4152; SSE2-NEXT: pxor %xmm1, %xmm0 4153; SSE2-NEXT: movdqa %xmm14, %xmm2 4154; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 4155; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 4156; SSE2-NEXT: pcmpeqd %xmm14, %xmm0 4157; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4158; SSE2-NEXT: pand %xmm3, %xmm0 4159; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3] 4160; SSE2-NEXT: por %xmm0, %xmm5 4161; SSE2-NEXT: pand %xmm5, %xmm6 4162; SSE2-NEXT: pandn %xmm8, %xmm5 4163; SSE2-NEXT: por %xmm6, %xmm5 4164; SSE2-NEXT: movdqa %xmm15, %xmm0 4165; SSE2-NEXT: pxor %xmm1, %xmm0 4166; SSE2-NEXT: movdqa %xmm14, %xmm2 4167; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 4168; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 4169; SSE2-NEXT: pcmpeqd %xmm14, %xmm0 4170; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4171; SSE2-NEXT: pand %xmm3, %xmm0 4172; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[1,1,3,3] 4173; SSE2-NEXT: por %xmm0, %xmm6 4174; SSE2-NEXT: pand %xmm6, %xmm15 4175; SSE2-NEXT: pandn %xmm8, %xmm6 4176; SSE2-NEXT: por %xmm15, %xmm6 4177; SSE2-NEXT: movdqa %xmm13, %xmm0 4178; SSE2-NEXT: pxor %xmm1, %xmm0 4179; SSE2-NEXT: movdqa %xmm14, %xmm2 4180; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 4181; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 4182; SSE2-NEXT: pcmpeqd %xmm14, %xmm0 4183; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4184; SSE2-NEXT: pand %xmm3, %xmm0 4185; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm2[1,1,3,3] 4186; SSE2-NEXT: por %xmm0, %xmm15 4187; SSE2-NEXT: pand %xmm15, %xmm13 4188; SSE2-NEXT: pandn %xmm8, %xmm15 4189; SSE2-NEXT: por %xmm13, %xmm15 4190; SSE2-NEXT: movdqa %xmm10, %xmm0 4191; SSE2-NEXT: pxor %xmm1, %xmm0 4192; SSE2-NEXT: movdqa %xmm14, %xmm3 4193; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 4194; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm3[0,0,2,2] 4195; SSE2-NEXT: pcmpeqd %xmm14, %xmm0 4196; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4197; SSE2-NEXT: pand %xmm7, %xmm0 4198; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm3[1,1,3,3] 4199; SSE2-NEXT: por %xmm0, %xmm13 4200; SSE2-NEXT: pand %xmm13, %xmm10 4201; SSE2-NEXT: pandn %xmm8, %xmm13 4202; SSE2-NEXT: por %xmm10, %xmm13 4203; SSE2-NEXT: movdqa %xmm9, %xmm0 4204; SSE2-NEXT: pxor %xmm1, %xmm0 4205; SSE2-NEXT: movdqa %xmm14, %xmm7 4206; SSE2-NEXT: pcmpgtd %xmm0, %xmm7 4207; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] 4208; SSE2-NEXT: pcmpeqd %xmm14, %xmm0 4209; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4210; SSE2-NEXT: pand %xmm10, %xmm0 4211; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 4212; SSE2-NEXT: por %xmm0, %xmm7 4213; SSE2-NEXT: pand %xmm7, %xmm9 4214; SSE2-NEXT: pandn %xmm8, %xmm7 4215; SSE2-NEXT: por %xmm9, %xmm7 4216; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] 4217; SSE2-NEXT: movdqa %xmm7, %xmm0 4218; SSE2-NEXT: pxor %xmm1, %xmm0 4219; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840] 4220; SSE2-NEXT: movdqa %xmm0, %xmm2 4221; SSE2-NEXT: pcmpgtd %xmm9, %xmm2 4222; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm2[0,0,2,2] 4223; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 4224; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4225; SSE2-NEXT: pand %xmm10, %xmm0 4226; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 4227; SSE2-NEXT: por %xmm0, %xmm2 4228; SSE2-NEXT: pand %xmm2, %xmm7 4229; SSE2-NEXT: pandn %xmm8, %xmm2 4230; SSE2-NEXT: por %xmm7, %xmm2 4231; SSE2-NEXT: movdqa %xmm13, %xmm0 4232; SSE2-NEXT: pxor %xmm1, %xmm0 4233; SSE2-NEXT: movdqa %xmm0, %xmm7 4234; SSE2-NEXT: pcmpgtd %xmm9, %xmm7 4235; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] 4236; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 4237; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 4238; SSE2-NEXT: pand %xmm10, %xmm3 4239; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3] 4240; SSE2-NEXT: por %xmm3, %xmm0 4241; SSE2-NEXT: pand %xmm0, %xmm13 4242; SSE2-NEXT: pandn %xmm8, %xmm0 4243; SSE2-NEXT: por %xmm13, %xmm0 4244; SSE2-NEXT: packssdw %xmm2, %xmm0 4245; SSE2-NEXT: movdqa %xmm15, %xmm2 4246; SSE2-NEXT: pxor %xmm1, %xmm2 4247; SSE2-NEXT: movdqa %xmm2, %xmm3 4248; SSE2-NEXT: pcmpgtd %xmm9, %xmm3 4249; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm3[0,0,2,2] 4250; SSE2-NEXT: pcmpeqd %xmm9, %xmm2 4251; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 4252; SSE2-NEXT: pand %xmm7, %xmm2 4253; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 4254; SSE2-NEXT: por %xmm2, %xmm3 4255; SSE2-NEXT: pand %xmm3, %xmm15 4256; SSE2-NEXT: pandn %xmm8, %xmm3 4257; SSE2-NEXT: por %xmm15, %xmm3 4258; SSE2-NEXT: movdqa %xmm6, %xmm2 4259; SSE2-NEXT: pxor %xmm1, %xmm2 4260; SSE2-NEXT: movdqa %xmm2, %xmm7 4261; SSE2-NEXT: pcmpgtd %xmm9, %xmm7 4262; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] 4263; SSE2-NEXT: pcmpeqd %xmm9, %xmm2 4264; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 4265; SSE2-NEXT: pand %xmm10, %xmm2 4266; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 4267; SSE2-NEXT: por %xmm2, %xmm7 4268; SSE2-NEXT: pand %xmm7, %xmm6 4269; SSE2-NEXT: pandn %xmm8, %xmm7 4270; SSE2-NEXT: por %xmm6, %xmm7 4271; SSE2-NEXT: packssdw %xmm3, %xmm7 4272; SSE2-NEXT: packssdw %xmm7, %xmm0 4273; SSE2-NEXT: movdqa %xmm5, %xmm2 4274; SSE2-NEXT: pxor %xmm1, %xmm2 4275; SSE2-NEXT: movdqa %xmm2, %xmm3 4276; SSE2-NEXT: pcmpgtd %xmm9, %xmm3 4277; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] 4278; SSE2-NEXT: pcmpeqd %xmm9, %xmm2 4279; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 4280; SSE2-NEXT: pand %xmm6, %xmm2 4281; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 4282; SSE2-NEXT: por %xmm2, %xmm3 4283; SSE2-NEXT: pand %xmm3, %xmm5 4284; SSE2-NEXT: pandn %xmm8, %xmm3 4285; SSE2-NEXT: por %xmm5, %xmm3 4286; SSE2-NEXT: movdqa %xmm4, %xmm2 4287; SSE2-NEXT: pxor %xmm1, %xmm2 4288; SSE2-NEXT: movdqa %xmm2, %xmm5 4289; SSE2-NEXT: pcmpgtd %xmm9, %xmm5 4290; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 4291; SSE2-NEXT: pcmpeqd %xmm9, %xmm2 4292; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] 4293; SSE2-NEXT: pand %xmm6, %xmm7 4294; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3] 4295; SSE2-NEXT: por %xmm7, %xmm2 4296; SSE2-NEXT: pand %xmm2, %xmm4 4297; SSE2-NEXT: pandn %xmm8, %xmm2 4298; SSE2-NEXT: por %xmm4, %xmm2 4299; SSE2-NEXT: packssdw %xmm3, %xmm2 4300; SSE2-NEXT: movdqa %xmm12, %xmm3 4301; SSE2-NEXT: pxor %xmm1, %xmm3 4302; SSE2-NEXT: movdqa %xmm3, %xmm4 4303; SSE2-NEXT: pcmpgtd %xmm9, %xmm4 4304; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 4305; SSE2-NEXT: pcmpeqd %xmm9, %xmm3 4306; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 4307; SSE2-NEXT: pand %xmm5, %xmm3 4308; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 4309; SSE2-NEXT: por %xmm3, %xmm4 4310; SSE2-NEXT: pand %xmm4, %xmm12 4311; SSE2-NEXT: pandn %xmm8, %xmm4 4312; SSE2-NEXT: por %xmm12, %xmm4 4313; SSE2-NEXT: pxor %xmm11, %xmm1 4314; SSE2-NEXT: movdqa %xmm1, %xmm3 4315; SSE2-NEXT: pcmpgtd %xmm9, %xmm3 4316; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 4317; SSE2-NEXT: pcmpeqd %xmm9, %xmm1 4318; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 4319; SSE2-NEXT: pand %xmm5, %xmm1 4320; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 4321; SSE2-NEXT: por %xmm1, %xmm3 4322; SSE2-NEXT: pand %xmm3, %xmm11 4323; SSE2-NEXT: pandn %xmm8, %xmm3 4324; SSE2-NEXT: por %xmm11, %xmm3 4325; SSE2-NEXT: packssdw %xmm4, %xmm3 4326; SSE2-NEXT: packssdw %xmm3, %xmm2 4327; SSE2-NEXT: packsswb %xmm2, %xmm0 4328; SSE2-NEXT: retq 4329; 4330; SSSE3-LABEL: trunc_ssat_v16i64_v16i8: 4331; SSSE3: # %bb.0: 4332; SSSE3-NEXT: movdqa (%rdi), %xmm10 4333; SSSE3-NEXT: movdqa 16(%rdi), %xmm9 4334; SSSE3-NEXT: movdqa 32(%rdi), %xmm15 4335; SSSE3-NEXT: movdqa 48(%rdi), %xmm13 4336; SSSE3-NEXT: movdqa 80(%rdi), %xmm6 4337; SSSE3-NEXT: movdqa 64(%rdi), %xmm3 4338; SSSE3-NEXT: movdqa 112(%rdi), %xmm4 4339; SSSE3-NEXT: movdqa 96(%rdi), %xmm7 4340; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [127,127] 4341; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 4342; SSSE3-NEXT: movdqa %xmm7, %xmm5 4343; SSSE3-NEXT: pxor %xmm1, %xmm5 4344; SSSE3-NEXT: movdqa {{.*#+}} xmm14 = [2147483775,2147483775] 4345; SSSE3-NEXT: movdqa %xmm14, %xmm0 4346; SSSE3-NEXT: pcmpgtd %xmm5, %xmm0 4347; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] 4348; SSSE3-NEXT: pcmpeqd %xmm14, %xmm5 4349; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 4350; SSSE3-NEXT: pand %xmm2, %xmm5 4351; SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm0[1,1,3,3] 4352; SSSE3-NEXT: por %xmm5, %xmm11 4353; SSSE3-NEXT: pand %xmm11, %xmm7 4354; SSSE3-NEXT: pandn %xmm8, %xmm11 4355; SSSE3-NEXT: por %xmm7, %xmm11 4356; SSSE3-NEXT: movdqa %xmm4, %xmm0 4357; SSSE3-NEXT: pxor %xmm1, %xmm0 4358; SSSE3-NEXT: movdqa %xmm14, %xmm2 4359; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 4360; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2] 4361; SSSE3-NEXT: pcmpeqd %xmm14, %xmm0 4362; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4363; SSSE3-NEXT: pand %xmm5, %xmm0 4364; SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm2[1,1,3,3] 4365; SSSE3-NEXT: por %xmm0, %xmm12 4366; SSSE3-NEXT: pand %xmm12, %xmm4 4367; SSSE3-NEXT: pandn %xmm8, %xmm12 4368; SSSE3-NEXT: por %xmm4, %xmm12 4369; SSSE3-NEXT: movdqa %xmm3, %xmm0 4370; SSSE3-NEXT: pxor %xmm1, %xmm0 4371; SSSE3-NEXT: movdqa %xmm14, %xmm2 4372; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 4373; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 4374; SSSE3-NEXT: pcmpeqd %xmm14, %xmm0 4375; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4376; SSSE3-NEXT: pand %xmm4, %xmm0 4377; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 4378; SSSE3-NEXT: por %xmm0, %xmm4 4379; SSSE3-NEXT: pand %xmm4, %xmm3 4380; SSSE3-NEXT: pandn %xmm8, %xmm4 4381; SSSE3-NEXT: por %xmm3, %xmm4 4382; SSSE3-NEXT: movdqa %xmm6, %xmm0 4383; SSSE3-NEXT: pxor %xmm1, %xmm0 4384; SSSE3-NEXT: movdqa %xmm14, %xmm2 4385; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 4386; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 4387; SSSE3-NEXT: pcmpeqd %xmm14, %xmm0 4388; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4389; SSSE3-NEXT: pand %xmm3, %xmm0 4390; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3] 4391; SSSE3-NEXT: por %xmm0, %xmm5 4392; SSSE3-NEXT: pand %xmm5, %xmm6 4393; SSSE3-NEXT: pandn %xmm8, %xmm5 4394; SSSE3-NEXT: por %xmm6, %xmm5 4395; SSSE3-NEXT: movdqa %xmm15, %xmm0 4396; SSSE3-NEXT: pxor %xmm1, %xmm0 4397; SSSE3-NEXT: movdqa %xmm14, %xmm2 4398; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 4399; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 4400; SSSE3-NEXT: pcmpeqd %xmm14, %xmm0 4401; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4402; SSSE3-NEXT: pand %xmm3, %xmm0 4403; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm2[1,1,3,3] 4404; SSSE3-NEXT: por %xmm0, %xmm6 4405; SSSE3-NEXT: pand %xmm6, %xmm15 4406; SSSE3-NEXT: pandn %xmm8, %xmm6 4407; SSSE3-NEXT: por %xmm15, %xmm6 4408; SSSE3-NEXT: movdqa %xmm13, %xmm0 4409; SSSE3-NEXT: pxor %xmm1, %xmm0 4410; SSSE3-NEXT: movdqa %xmm14, %xmm2 4411; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 4412; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 4413; SSSE3-NEXT: pcmpeqd %xmm14, %xmm0 4414; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4415; SSSE3-NEXT: pand %xmm3, %xmm0 4416; SSSE3-NEXT: pshufd {{.*#+}} xmm15 = xmm2[1,1,3,3] 4417; SSSE3-NEXT: por %xmm0, %xmm15 4418; SSSE3-NEXT: pand %xmm15, %xmm13 4419; SSSE3-NEXT: pandn %xmm8, %xmm15 4420; SSSE3-NEXT: por %xmm13, %xmm15 4421; SSSE3-NEXT: movdqa %xmm10, %xmm0 4422; SSSE3-NEXT: pxor %xmm1, %xmm0 4423; SSSE3-NEXT: movdqa %xmm14, %xmm3 4424; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 4425; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm3[0,0,2,2] 4426; SSSE3-NEXT: pcmpeqd %xmm14, %xmm0 4427; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4428; SSSE3-NEXT: pand %xmm7, %xmm0 4429; SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm3[1,1,3,3] 4430; SSSE3-NEXT: por %xmm0, %xmm13 4431; SSSE3-NEXT: pand %xmm13, %xmm10 4432; SSSE3-NEXT: pandn %xmm8, %xmm13 4433; SSSE3-NEXT: por %xmm10, %xmm13 4434; SSSE3-NEXT: movdqa %xmm9, %xmm0 4435; SSSE3-NEXT: pxor %xmm1, %xmm0 4436; SSSE3-NEXT: movdqa %xmm14, %xmm7 4437; SSSE3-NEXT: pcmpgtd %xmm0, %xmm7 4438; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] 4439; SSSE3-NEXT: pcmpeqd %xmm14, %xmm0 4440; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4441; SSSE3-NEXT: pand %xmm10, %xmm0 4442; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 4443; SSSE3-NEXT: por %xmm0, %xmm7 4444; SSSE3-NEXT: pand %xmm7, %xmm9 4445; SSSE3-NEXT: pandn %xmm8, %xmm7 4446; SSSE3-NEXT: por %xmm9, %xmm7 4447; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] 4448; SSSE3-NEXT: movdqa %xmm7, %xmm0 4449; SSSE3-NEXT: pxor %xmm1, %xmm0 4450; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840] 4451; SSSE3-NEXT: movdqa %xmm0, %xmm2 4452; SSSE3-NEXT: pcmpgtd %xmm9, %xmm2 4453; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm2[0,0,2,2] 4454; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 4455; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 4456; SSSE3-NEXT: pand %xmm10, %xmm0 4457; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 4458; SSSE3-NEXT: por %xmm0, %xmm2 4459; SSSE3-NEXT: pand %xmm2, %xmm7 4460; SSSE3-NEXT: pandn %xmm8, %xmm2 4461; SSSE3-NEXT: por %xmm7, %xmm2 4462; SSSE3-NEXT: movdqa %xmm13, %xmm0 4463; SSSE3-NEXT: pxor %xmm1, %xmm0 4464; SSSE3-NEXT: movdqa %xmm0, %xmm7 4465; SSSE3-NEXT: pcmpgtd %xmm9, %xmm7 4466; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] 4467; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 4468; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 4469; SSSE3-NEXT: pand %xmm10, %xmm3 4470; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3] 4471; SSSE3-NEXT: por %xmm3, %xmm0 4472; SSSE3-NEXT: pand %xmm0, %xmm13 4473; SSSE3-NEXT: pandn %xmm8, %xmm0 4474; SSSE3-NEXT: por %xmm13, %xmm0 4475; SSSE3-NEXT: packssdw %xmm2, %xmm0 4476; SSSE3-NEXT: movdqa %xmm15, %xmm2 4477; SSSE3-NEXT: pxor %xmm1, %xmm2 4478; SSSE3-NEXT: movdqa %xmm2, %xmm3 4479; SSSE3-NEXT: pcmpgtd %xmm9, %xmm3 4480; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm3[0,0,2,2] 4481; SSSE3-NEXT: pcmpeqd %xmm9, %xmm2 4482; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 4483; SSSE3-NEXT: pand %xmm7, %xmm2 4484; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 4485; SSSE3-NEXT: por %xmm2, %xmm3 4486; SSSE3-NEXT: pand %xmm3, %xmm15 4487; SSSE3-NEXT: pandn %xmm8, %xmm3 4488; SSSE3-NEXT: por %xmm15, %xmm3 4489; SSSE3-NEXT: movdqa %xmm6, %xmm2 4490; SSSE3-NEXT: pxor %xmm1, %xmm2 4491; SSSE3-NEXT: movdqa %xmm2, %xmm7 4492; SSSE3-NEXT: pcmpgtd %xmm9, %xmm7 4493; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] 4494; SSSE3-NEXT: pcmpeqd %xmm9, %xmm2 4495; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 4496; SSSE3-NEXT: pand %xmm10, %xmm2 4497; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 4498; SSSE3-NEXT: por %xmm2, %xmm7 4499; SSSE3-NEXT: pand %xmm7, %xmm6 4500; SSSE3-NEXT: pandn %xmm8, %xmm7 4501; SSSE3-NEXT: por %xmm6, %xmm7 4502; SSSE3-NEXT: packssdw %xmm3, %xmm7 4503; SSSE3-NEXT: packssdw %xmm7, %xmm0 4504; SSSE3-NEXT: movdqa %xmm5, %xmm2 4505; SSSE3-NEXT: pxor %xmm1, %xmm2 4506; SSSE3-NEXT: movdqa %xmm2, %xmm3 4507; SSSE3-NEXT: pcmpgtd %xmm9, %xmm3 4508; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] 4509; SSSE3-NEXT: pcmpeqd %xmm9, %xmm2 4510; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 4511; SSSE3-NEXT: pand %xmm6, %xmm2 4512; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 4513; SSSE3-NEXT: por %xmm2, %xmm3 4514; SSSE3-NEXT: pand %xmm3, %xmm5 4515; SSSE3-NEXT: pandn %xmm8, %xmm3 4516; SSSE3-NEXT: por %xmm5, %xmm3 4517; SSSE3-NEXT: movdqa %xmm4, %xmm2 4518; SSSE3-NEXT: pxor %xmm1, %xmm2 4519; SSSE3-NEXT: movdqa %xmm2, %xmm5 4520; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5 4521; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 4522; SSSE3-NEXT: pcmpeqd %xmm9, %xmm2 4523; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] 4524; SSSE3-NEXT: pand %xmm6, %xmm7 4525; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3] 4526; SSSE3-NEXT: por %xmm7, %xmm2 4527; SSSE3-NEXT: pand %xmm2, %xmm4 4528; SSSE3-NEXT: pandn %xmm8, %xmm2 4529; SSSE3-NEXT: por %xmm4, %xmm2 4530; SSSE3-NEXT: packssdw %xmm3, %xmm2 4531; SSSE3-NEXT: movdqa %xmm12, %xmm3 4532; SSSE3-NEXT: pxor %xmm1, %xmm3 4533; SSSE3-NEXT: movdqa %xmm3, %xmm4 4534; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4 4535; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 4536; SSSE3-NEXT: pcmpeqd %xmm9, %xmm3 4537; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 4538; SSSE3-NEXT: pand %xmm5, %xmm3 4539; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 4540; SSSE3-NEXT: por %xmm3, %xmm4 4541; SSSE3-NEXT: pand %xmm4, %xmm12 4542; SSSE3-NEXT: pandn %xmm8, %xmm4 4543; SSSE3-NEXT: por %xmm12, %xmm4 4544; SSSE3-NEXT: pxor %xmm11, %xmm1 4545; SSSE3-NEXT: movdqa %xmm1, %xmm3 4546; SSSE3-NEXT: pcmpgtd %xmm9, %xmm3 4547; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 4548; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1 4549; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 4550; SSSE3-NEXT: pand %xmm5, %xmm1 4551; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 4552; SSSE3-NEXT: por %xmm1, %xmm3 4553; SSSE3-NEXT: pand %xmm3, %xmm11 4554; SSSE3-NEXT: pandn %xmm8, %xmm3 4555; SSSE3-NEXT: por %xmm11, %xmm3 4556; SSSE3-NEXT: packssdw %xmm4, %xmm3 4557; SSSE3-NEXT: packssdw %xmm3, %xmm2 4558; SSSE3-NEXT: packsswb %xmm2, %xmm0 4559; SSSE3-NEXT: retq 4560; 4561; SSE41-LABEL: trunc_ssat_v16i64_v16i8: 4562; SSE41: # %bb.0: 4563; SSE41-NEXT: movdqa (%rdi), %xmm11 4564; SSE41-NEXT: movdqa 16(%rdi), %xmm9 4565; SSE41-NEXT: movdqa 32(%rdi), %xmm15 4566; SSE41-NEXT: movdqa 48(%rdi), %xmm12 4567; SSE41-NEXT: movdqa 80(%rdi), %xmm4 4568; SSE41-NEXT: movdqa 64(%rdi), %xmm14 4569; SSE41-NEXT: movdqa 112(%rdi), %xmm13 4570; SSE41-NEXT: movdqa 96(%rdi), %xmm3 4571; SSE41-NEXT: movapd {{.*#+}} xmm1 = [127,127] 4572; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 4573; SSE41-NEXT: movdqa %xmm3, %xmm0 4574; SSE41-NEXT: pxor %xmm2, %xmm0 4575; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [2147483775,2147483775] 4576; SSE41-NEXT: movdqa %xmm7, %xmm5 4577; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 4578; SSE41-NEXT: movdqa %xmm7, %xmm6 4579; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 4580; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 4581; SSE41-NEXT: pand %xmm5, %xmm0 4582; SSE41-NEXT: por %xmm6, %xmm0 4583; SSE41-NEXT: movapd %xmm1, %xmm8 4584; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm8 4585; SSE41-NEXT: movdqa %xmm13, %xmm0 4586; SSE41-NEXT: pxor %xmm2, %xmm0 4587; SSE41-NEXT: movdqa %xmm7, %xmm3 4588; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 4589; SSE41-NEXT: movdqa %xmm7, %xmm5 4590; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 4591; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 4592; SSE41-NEXT: pand %xmm3, %xmm0 4593; SSE41-NEXT: por %xmm5, %xmm0 4594; SSE41-NEXT: movapd %xmm1, %xmm10 4595; SSE41-NEXT: blendvpd %xmm0, %xmm13, %xmm10 4596; SSE41-NEXT: movdqa %xmm14, %xmm0 4597; SSE41-NEXT: pxor %xmm2, %xmm0 4598; SSE41-NEXT: movdqa %xmm7, %xmm3 4599; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 4600; SSE41-NEXT: movdqa %xmm7, %xmm5 4601; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 4602; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 4603; SSE41-NEXT: pand %xmm3, %xmm0 4604; SSE41-NEXT: por %xmm5, %xmm0 4605; SSE41-NEXT: movapd %xmm1, %xmm13 4606; SSE41-NEXT: blendvpd %xmm0, %xmm14, %xmm13 4607; SSE41-NEXT: movdqa %xmm4, %xmm0 4608; SSE41-NEXT: pxor %xmm2, %xmm0 4609; SSE41-NEXT: movdqa %xmm7, %xmm3 4610; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 4611; SSE41-NEXT: movdqa %xmm7, %xmm5 4612; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 4613; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 4614; SSE41-NEXT: pand %xmm3, %xmm0 4615; SSE41-NEXT: por %xmm5, %xmm0 4616; SSE41-NEXT: movapd %xmm1, %xmm14 4617; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm14 4618; SSE41-NEXT: movdqa %xmm15, %xmm0 4619; SSE41-NEXT: pxor %xmm2, %xmm0 4620; SSE41-NEXT: movdqa %xmm7, %xmm3 4621; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 4622; SSE41-NEXT: movdqa %xmm7, %xmm4 4623; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 4624; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 4625; SSE41-NEXT: pand %xmm3, %xmm0 4626; SSE41-NEXT: por %xmm4, %xmm0 4627; SSE41-NEXT: movapd %xmm1, %xmm4 4628; SSE41-NEXT: blendvpd %xmm0, %xmm15, %xmm4 4629; SSE41-NEXT: movdqa %xmm12, %xmm0 4630; SSE41-NEXT: pxor %xmm2, %xmm0 4631; SSE41-NEXT: movdqa %xmm7, %xmm3 4632; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 4633; SSE41-NEXT: movdqa %xmm7, %xmm5 4634; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 4635; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 4636; SSE41-NEXT: pand %xmm3, %xmm0 4637; SSE41-NEXT: por %xmm5, %xmm0 4638; SSE41-NEXT: movapd %xmm1, %xmm15 4639; SSE41-NEXT: blendvpd %xmm0, %xmm12, %xmm15 4640; SSE41-NEXT: movdqa %xmm11, %xmm0 4641; SSE41-NEXT: pxor %xmm2, %xmm0 4642; SSE41-NEXT: movdqa %xmm7, %xmm3 4643; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 4644; SSE41-NEXT: movdqa %xmm7, %xmm6 4645; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 4646; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 4647; SSE41-NEXT: pand %xmm3, %xmm0 4648; SSE41-NEXT: por %xmm6, %xmm0 4649; SSE41-NEXT: movapd %xmm1, %xmm6 4650; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm6 4651; SSE41-NEXT: movdqa %xmm9, %xmm0 4652; SSE41-NEXT: pxor %xmm2, %xmm0 4653; SSE41-NEXT: movdqa %xmm7, %xmm3 4654; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 4655; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 4656; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 4657; SSE41-NEXT: pand %xmm3, %xmm0 4658; SSE41-NEXT: por %xmm7, %xmm0 4659; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm1 4660; SSE41-NEXT: movapd {{.*#+}} xmm7 = [18446744073709551488,18446744073709551488] 4661; SSE41-NEXT: movapd %xmm1, %xmm5 4662; SSE41-NEXT: xorpd %xmm2, %xmm5 4663; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840] 4664; SSE41-NEXT: movapd %xmm5, %xmm3 4665; SSE41-NEXT: pcmpeqd %xmm9, %xmm3 4666; SSE41-NEXT: pcmpgtd %xmm9, %xmm5 4667; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 4668; SSE41-NEXT: pand %xmm3, %xmm0 4669; SSE41-NEXT: por %xmm5, %xmm0 4670; SSE41-NEXT: movapd %xmm7, %xmm3 4671; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 4672; SSE41-NEXT: movapd %xmm6, %xmm1 4673; SSE41-NEXT: xorpd %xmm2, %xmm1 4674; SSE41-NEXT: movapd %xmm1, %xmm5 4675; SSE41-NEXT: pcmpeqd %xmm9, %xmm5 4676; SSE41-NEXT: pcmpgtd %xmm9, %xmm1 4677; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 4678; SSE41-NEXT: pand %xmm5, %xmm0 4679; SSE41-NEXT: por %xmm1, %xmm0 4680; SSE41-NEXT: movapd %xmm7, %xmm1 4681; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm1 4682; SSE41-NEXT: packssdw %xmm3, %xmm1 4683; SSE41-NEXT: movapd %xmm15, %xmm3 4684; SSE41-NEXT: xorpd %xmm2, %xmm3 4685; SSE41-NEXT: movapd %xmm3, %xmm5 4686; SSE41-NEXT: pcmpeqd %xmm9, %xmm5 4687; SSE41-NEXT: pcmpgtd %xmm9, %xmm3 4688; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 4689; SSE41-NEXT: pand %xmm5, %xmm0 4690; SSE41-NEXT: por %xmm3, %xmm0 4691; SSE41-NEXT: movapd %xmm7, %xmm3 4692; SSE41-NEXT: blendvpd %xmm0, %xmm15, %xmm3 4693; SSE41-NEXT: movapd %xmm4, %xmm5 4694; SSE41-NEXT: xorpd %xmm2, %xmm5 4695; SSE41-NEXT: movapd %xmm5, %xmm6 4696; SSE41-NEXT: pcmpeqd %xmm9, %xmm6 4697; SSE41-NEXT: pcmpgtd %xmm9, %xmm5 4698; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 4699; SSE41-NEXT: pand %xmm6, %xmm0 4700; SSE41-NEXT: por %xmm5, %xmm0 4701; SSE41-NEXT: movapd %xmm7, %xmm5 4702; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm5 4703; SSE41-NEXT: packssdw %xmm3, %xmm5 4704; SSE41-NEXT: packssdw %xmm5, %xmm1 4705; SSE41-NEXT: movapd %xmm14, %xmm3 4706; SSE41-NEXT: xorpd %xmm2, %xmm3 4707; SSE41-NEXT: movapd %xmm3, %xmm4 4708; SSE41-NEXT: pcmpeqd %xmm9, %xmm4 4709; SSE41-NEXT: pcmpgtd %xmm9, %xmm3 4710; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 4711; SSE41-NEXT: pand %xmm4, %xmm0 4712; SSE41-NEXT: por %xmm3, %xmm0 4713; SSE41-NEXT: movapd %xmm7, %xmm3 4714; SSE41-NEXT: blendvpd %xmm0, %xmm14, %xmm3 4715; SSE41-NEXT: movapd %xmm13, %xmm4 4716; SSE41-NEXT: xorpd %xmm2, %xmm4 4717; SSE41-NEXT: movapd %xmm4, %xmm5 4718; SSE41-NEXT: pcmpeqd %xmm9, %xmm5 4719; SSE41-NEXT: pcmpgtd %xmm9, %xmm4 4720; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 4721; SSE41-NEXT: pand %xmm5, %xmm0 4722; SSE41-NEXT: por %xmm4, %xmm0 4723; SSE41-NEXT: movapd %xmm7, %xmm4 4724; SSE41-NEXT: blendvpd %xmm0, %xmm13, %xmm4 4725; SSE41-NEXT: packssdw %xmm3, %xmm4 4726; SSE41-NEXT: movapd %xmm10, %xmm3 4727; SSE41-NEXT: xorpd %xmm2, %xmm3 4728; SSE41-NEXT: movapd %xmm3, %xmm5 4729; SSE41-NEXT: pcmpeqd %xmm9, %xmm5 4730; SSE41-NEXT: pcmpgtd %xmm9, %xmm3 4731; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 4732; SSE41-NEXT: pand %xmm5, %xmm0 4733; SSE41-NEXT: por %xmm3, %xmm0 4734; SSE41-NEXT: movapd %xmm7, %xmm3 4735; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm3 4736; SSE41-NEXT: xorpd %xmm8, %xmm2 4737; SSE41-NEXT: movapd %xmm2, %xmm5 4738; SSE41-NEXT: pcmpeqd %xmm9, %xmm5 4739; SSE41-NEXT: pcmpgtd %xmm9, %xmm2 4740; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 4741; SSE41-NEXT: pand %xmm5, %xmm0 4742; SSE41-NEXT: por %xmm2, %xmm0 4743; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm7 4744; SSE41-NEXT: packssdw %xmm3, %xmm7 4745; SSE41-NEXT: packssdw %xmm7, %xmm4 4746; SSE41-NEXT: packsswb %xmm4, %xmm1 4747; SSE41-NEXT: movdqa %xmm1, %xmm0 4748; SSE41-NEXT: retq 4749; 4750; AVX1-LABEL: trunc_ssat_v16i64_v16i8: 4751; AVX1: # %bb.0: 4752; AVX1-NEXT: vmovdqa 112(%rdi), %xmm8 4753; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] 4754; AVX1-NEXT: vmovdqa 96(%rdi), %xmm9 4755; AVX1-NEXT: vmovdqa 80(%rdi), %xmm3 4756; AVX1-NEXT: vmovdqa 64(%rdi), %xmm4 4757; AVX1-NEXT: vmovdqa (%rdi), %xmm5 4758; AVX1-NEXT: vmovdqa 16(%rdi), %xmm6 4759; AVX1-NEXT: vmovdqa 32(%rdi), %xmm7 4760; AVX1-NEXT: vmovdqa 48(%rdi), %xmm0 4761; AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm2 4762; AVX1-NEXT: vblendvpd %xmm2, %xmm5, %xmm1, %xmm10 4763; AVX1-NEXT: vpcmpgtq %xmm6, %xmm1, %xmm5 4764; AVX1-NEXT: vblendvpd %xmm5, %xmm6, %xmm1, %xmm11 4765; AVX1-NEXT: vpcmpgtq %xmm7, %xmm1, %xmm6 4766; AVX1-NEXT: vblendvpd %xmm6, %xmm7, %xmm1, %xmm6 4767; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm7 4768; AVX1-NEXT: vblendvpd %xmm7, %xmm0, %xmm1, %xmm0 4769; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm7 4770; AVX1-NEXT: vblendvpd %xmm7, %xmm4, %xmm1, %xmm4 4771; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm7 4772; AVX1-NEXT: vblendvpd %xmm7, %xmm3, %xmm1, %xmm3 4773; AVX1-NEXT: vpcmpgtq %xmm9, %xmm1, %xmm7 4774; AVX1-NEXT: vblendvpd %xmm7, %xmm9, %xmm1, %xmm7 4775; AVX1-NEXT: vpcmpgtq %xmm8, %xmm1, %xmm2 4776; AVX1-NEXT: vblendvpd %xmm2, %xmm8, %xmm1, %xmm1 4777; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744073709551488,18446744073709551488] 4778; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm5 4779; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm2, %xmm8 4780; AVX1-NEXT: vpcmpgtq %xmm2, %xmm7, %xmm5 4781; AVX1-NEXT: vblendvpd %xmm5, %xmm7, %xmm2, %xmm5 4782; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm7 4783; AVX1-NEXT: vblendvpd %xmm7, %xmm3, %xmm2, %xmm3 4784; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm7 4785; AVX1-NEXT: vblendvpd %xmm7, %xmm4, %xmm2, %xmm4 4786; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm7 4787; AVX1-NEXT: vblendvpd %xmm7, %xmm0, %xmm2, %xmm0 4788; AVX1-NEXT: vpcmpgtq %xmm2, %xmm6, %xmm7 4789; AVX1-NEXT: vblendvpd %xmm7, %xmm6, %xmm2, %xmm6 4790; AVX1-NEXT: vpcmpgtq %xmm2, %xmm11, %xmm7 4791; AVX1-NEXT: vblendvpd %xmm7, %xmm11, %xmm2, %xmm7 4792; AVX1-NEXT: vpcmpgtq %xmm2, %xmm10, %xmm1 4793; AVX1-NEXT: vblendvpd %xmm1, %xmm10, %xmm2, %xmm1 4794; AVX1-NEXT: vpackssdw %xmm8, %xmm5, %xmm2 4795; AVX1-NEXT: vpackssdw %xmm3, %xmm4, %xmm3 4796; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2 4797; AVX1-NEXT: vpackssdw %xmm0, %xmm6, %xmm0 4798; AVX1-NEXT: vpackssdw %xmm7, %xmm1, %xmm1 4799; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 4800; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 4801; AVX1-NEXT: retq 4802; 4803; AVX2-LABEL: trunc_ssat_v16i64_v16i8: 4804; AVX2: # %bb.0: 4805; AVX2-NEXT: vmovdqa (%rdi), %ymm0 4806; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 4807; AVX2-NEXT: vmovdqa 64(%rdi), %ymm2 4808; AVX2-NEXT: vmovdqa 96(%rdi), %ymm3 4809; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [127,127,127,127] 4810; AVX2-NEXT: vpcmpgtq %ymm2, %ymm4, %ymm5 4811; AVX2-NEXT: vblendvpd %ymm5, %ymm2, %ymm4, %ymm2 4812; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm5 4813; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm4, %ymm3 4814; AVX2-NEXT: vpcmpgtq %ymm0, %ymm4, %ymm5 4815; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm4, %ymm0 4816; AVX2-NEXT: vpcmpgtq %ymm1, %ymm4, %ymm5 4817; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm4, %ymm1 4818; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] 4819; AVX2-NEXT: vpcmpgtq %ymm4, %ymm1, %ymm5 4820; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm4, %ymm1 4821; AVX2-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm5 4822; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm4, %ymm0 4823; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 4824; AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm1 4825; AVX2-NEXT: vblendvpd %ymm1, %ymm3, %ymm4, %ymm1 4826; AVX2-NEXT: vpcmpgtq %ymm4, %ymm2, %ymm3 4827; AVX2-NEXT: vblendvpd %ymm3, %ymm2, %ymm4, %ymm2 4828; AVX2-NEXT: vpackssdw %ymm1, %ymm2, %ymm1 4829; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] 4830; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4831; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 4832; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 4833; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 4834; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 4835; AVX2-NEXT: vzeroupper 4836; AVX2-NEXT: retq 4837; 4838; AVX512-LABEL: trunc_ssat_v16i64_v16i8: 4839; AVX512: # %bb.0: 4840; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 4841; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1 4842; AVX512-NEXT: vpmovsqb %zmm1, %xmm1 4843; AVX512-NEXT: vpmovsqb %zmm0, %xmm0 4844; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4845; AVX512-NEXT: vzeroupper 4846; AVX512-NEXT: retq 4847; 4848; SKX-LABEL: trunc_ssat_v16i64_v16i8: 4849; SKX: # %bb.0: 4850; SKX-NEXT: vmovdqa (%rdi), %ymm0 4851; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 4852; SKX-NEXT: vmovdqa 64(%rdi), %ymm2 4853; SKX-NEXT: vmovdqa 96(%rdi), %ymm3 4854; SKX-NEXT: vpmovsqb %ymm3, %xmm3 4855; SKX-NEXT: vpmovsqb %ymm2, %xmm2 4856; SKX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 4857; SKX-NEXT: vpmovsqb %ymm1, %xmm1 4858; SKX-NEXT: vpmovsqb %ymm0, %xmm0 4859; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4860; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 4861; SKX-NEXT: vzeroupper 4862; SKX-NEXT: retq 4863 %a0 = load <16 x i64>, <16 x i64>* %p0 4864 %1 = icmp slt <16 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 4865 %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 4866 %3 = icmp sgt <16 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 4867 %4 = select <16 x i1> %3, <16 x i64> %2, <16 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 4868 %5 = trunc <16 x i64> %4 to <16 x i8> 4869 ret <16 x i8> %5 4870} 4871 4872define <4 x i8> @trunc_ssat_v4i32_v4i8(<4 x i32> %a0) { 4873; SSE2-LABEL: trunc_ssat_v4i32_v4i8: 4874; SSE2: # %bb.0: 4875; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127] 4876; SSE2-NEXT: movdqa %xmm1, %xmm2 4877; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 4878; SSE2-NEXT: pand %xmm2, %xmm0 4879; SSE2-NEXT: pandn %xmm1, %xmm2 4880; SSE2-NEXT: por %xmm0, %xmm2 4881; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] 4882; SSE2-NEXT: movdqa %xmm2, %xmm0 4883; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 4884; SSE2-NEXT: pand %xmm0, %xmm2 4885; SSE2-NEXT: pandn %xmm1, %xmm0 4886; SSE2-NEXT: por %xmm2, %xmm0 4887; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 4888; SSE2-NEXT: packuswb %xmm0, %xmm0 4889; SSE2-NEXT: packuswb %xmm0, %xmm0 4890; SSE2-NEXT: retq 4891; 4892; SSSE3-LABEL: trunc_ssat_v4i32_v4i8: 4893; SSSE3: # %bb.0: 4894; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127] 4895; SSSE3-NEXT: movdqa %xmm1, %xmm2 4896; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 4897; SSSE3-NEXT: pand %xmm2, %xmm0 4898; SSSE3-NEXT: pandn %xmm1, %xmm2 4899; SSSE3-NEXT: por %xmm0, %xmm2 4900; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] 4901; SSSE3-NEXT: movdqa %xmm2, %xmm0 4902; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0 4903; SSSE3-NEXT: pand %xmm0, %xmm2 4904; SSSE3-NEXT: pandn %xmm1, %xmm0 4905; SSSE3-NEXT: por %xmm2, %xmm0 4906; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 4907; SSSE3-NEXT: retq 4908; 4909; SSE41-LABEL: trunc_ssat_v4i32_v4i8: 4910; SSE41: # %bb.0: 4911; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 4912; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0 4913; SSE41-NEXT: packssdw %xmm0, %xmm0 4914; SSE41-NEXT: packsswb %xmm0, %xmm0 4915; SSE41-NEXT: retq 4916; 4917; AVX1-LABEL: trunc_ssat_v4i32_v4i8: 4918; AVX1: # %bb.0: 4919; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 4920; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 4921; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 4922; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 4923; AVX1-NEXT: retq 4924; 4925; AVX2-LABEL: trunc_ssat_v4i32_v4i8: 4926; AVX2: # %bb.0: 4927; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] 4928; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 4929; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] 4930; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 4931; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 4932; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 4933; AVX2-NEXT: retq 4934; 4935; AVX512F-LABEL: trunc_ssat_v4i32_v4i8: 4936; AVX512F: # %bb.0: 4937; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 4938; AVX512F-NEXT: vpmovsdb %zmm0, %xmm0 4939; AVX512F-NEXT: vzeroupper 4940; AVX512F-NEXT: retq 4941; 4942; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8: 4943; AVX512VL: # %bb.0: 4944; AVX512VL-NEXT: vpmovsdb %xmm0, %xmm0 4945; AVX512VL-NEXT: retq 4946; 4947; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8: 4948; AVX512BW: # %bb.0: 4949; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 4950; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0 4951; AVX512BW-NEXT: vzeroupper 4952; AVX512BW-NEXT: retq 4953; 4954; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8: 4955; AVX512BWVL: # %bb.0: 4956; AVX512BWVL-NEXT: vpmovsdb %xmm0, %xmm0 4957; AVX512BWVL-NEXT: retq 4958; 4959; SKX-LABEL: trunc_ssat_v4i32_v4i8: 4960; SKX: # %bb.0: 4961; SKX-NEXT: vpmovsdb %xmm0, %xmm0 4962; SKX-NEXT: retq 4963 %1 = icmp slt <4 x i32> %a0, <i32 127, i32 127, i32 127, i32 127> 4964 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 127, i32 127, i32 127, i32 127> 4965 %3 = icmp sgt <4 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128> 4966 %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128> 4967 %5 = trunc <4 x i32> %4 to <4 x i8> 4968 ret <4 x i8> %5 4969} 4970 4971define void @trunc_ssat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { 4972; SSE2-LABEL: trunc_ssat_v4i32_v4i8_store: 4973; SSE2: # %bb.0: 4974; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127] 4975; SSE2-NEXT: movdqa %xmm1, %xmm2 4976; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 4977; SSE2-NEXT: pand %xmm2, %xmm0 4978; SSE2-NEXT: pandn %xmm1, %xmm2 4979; SSE2-NEXT: por %xmm0, %xmm2 4980; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4294967168,4294967168,4294967168,4294967168] 4981; SSE2-NEXT: movdqa %xmm2, %xmm1 4982; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 4983; SSE2-NEXT: pand %xmm1, %xmm2 4984; SSE2-NEXT: pandn %xmm0, %xmm1 4985; SSE2-NEXT: por %xmm2, %xmm1 4986; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 4987; SSE2-NEXT: packuswb %xmm1, %xmm1 4988; SSE2-NEXT: packuswb %xmm1, %xmm1 4989; SSE2-NEXT: movd %xmm1, (%rdi) 4990; SSE2-NEXT: retq 4991; 4992; SSSE3-LABEL: trunc_ssat_v4i32_v4i8_store: 4993; SSSE3: # %bb.0: 4994; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127] 4995; SSSE3-NEXT: movdqa %xmm1, %xmm2 4996; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 4997; SSSE3-NEXT: pand %xmm2, %xmm0 4998; SSSE3-NEXT: pandn %xmm1, %xmm2 4999; SSSE3-NEXT: por %xmm0, %xmm2 5000; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4294967168,4294967168,4294967168,4294967168] 5001; SSSE3-NEXT: movdqa %xmm2, %xmm1 5002; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 5003; SSSE3-NEXT: pand %xmm1, %xmm2 5004; SSSE3-NEXT: pandn %xmm0, %xmm1 5005; SSSE3-NEXT: por %xmm2, %xmm1 5006; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 5007; SSSE3-NEXT: movd %xmm1, (%rdi) 5008; SSSE3-NEXT: retq 5009; 5010; SSE41-LABEL: trunc_ssat_v4i32_v4i8_store: 5011; SSE41: # %bb.0: 5012; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 5013; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0 5014; SSE41-NEXT: packssdw %xmm0, %xmm0 5015; SSE41-NEXT: packsswb %xmm0, %xmm0 5016; SSE41-NEXT: movd %xmm0, (%rdi) 5017; SSE41-NEXT: retq 5018; 5019; AVX1-LABEL: trunc_ssat_v4i32_v4i8_store: 5020; AVX1: # %bb.0: 5021; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 5022; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 5023; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 5024; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5025; AVX1-NEXT: vmovd %xmm0, (%rdi) 5026; AVX1-NEXT: retq 5027; 5028; AVX2-LABEL: trunc_ssat_v4i32_v4i8_store: 5029; AVX2: # %bb.0: 5030; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] 5031; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 5032; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] 5033; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 5034; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 5035; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5036; AVX2-NEXT: vmovd %xmm0, (%rdi) 5037; AVX2-NEXT: retq 5038; 5039; AVX512F-LABEL: trunc_ssat_v4i32_v4i8_store: 5040; AVX512F: # %bb.0: 5041; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5042; AVX512F-NEXT: vpmovsdb %zmm0, %xmm0 5043; AVX512F-NEXT: vmovd %xmm0, (%rdi) 5044; AVX512F-NEXT: vzeroupper 5045; AVX512F-NEXT: retq 5046; 5047; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8_store: 5048; AVX512VL: # %bb.0: 5049; AVX512VL-NEXT: vpmovsdb %xmm0, (%rdi) 5050; AVX512VL-NEXT: retq 5051; 5052; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8_store: 5053; AVX512BW: # %bb.0: 5054; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5055; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0 5056; AVX512BW-NEXT: vmovd %xmm0, (%rdi) 5057; AVX512BW-NEXT: vzeroupper 5058; AVX512BW-NEXT: retq 5059; 5060; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8_store: 5061; AVX512BWVL: # %bb.0: 5062; AVX512BWVL-NEXT: vpmovsdb %xmm0, (%rdi) 5063; AVX512BWVL-NEXT: retq 5064; 5065; SKX-LABEL: trunc_ssat_v4i32_v4i8_store: 5066; SKX: # %bb.0: 5067; SKX-NEXT: vpmovsdb %xmm0, (%rdi) 5068; SKX-NEXT: retq 5069 %1 = icmp slt <4 x i32> %a0, <i32 127, i32 127, i32 127, i32 127> 5070 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 127, i32 127, i32 127, i32 127> 5071 %3 = icmp sgt <4 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128> 5072 %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128> 5073 %5 = trunc <4 x i32> %4 to <4 x i8> 5074 store <4 x i8> %5, <4 x i8> *%p1 5075 ret void 5076} 5077 5078define <8 x i8> @trunc_ssat_v8i32_v8i8(<8 x i32> %a0) { 5079; SSE-LABEL: trunc_ssat_v8i32_v8i8: 5080; SSE: # %bb.0: 5081; SSE-NEXT: packssdw %xmm1, %xmm0 5082; SSE-NEXT: packsswb %xmm0, %xmm0 5083; SSE-NEXT: retq 5084; 5085; AVX1-LABEL: trunc_ssat_v8i32_v8i8: 5086; AVX1: # %bb.0: 5087; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 5088; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 5089; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5090; AVX1-NEXT: vzeroupper 5091; AVX1-NEXT: retq 5092; 5093; AVX2-LABEL: trunc_ssat_v8i32_v8i8: 5094; AVX2: # %bb.0: 5095; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 5096; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 5097; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5098; AVX2-NEXT: vzeroupper 5099; AVX2-NEXT: retq 5100; 5101; AVX512F-LABEL: trunc_ssat_v8i32_v8i8: 5102; AVX512F: # %bb.0: 5103; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 5104; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 5105; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5106; AVX512F-NEXT: vzeroupper 5107; AVX512F-NEXT: retq 5108; 5109; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8: 5110; AVX512VL: # %bb.0: 5111; AVX512VL-NEXT: vpmovsdb %ymm0, %xmm0 5112; AVX512VL-NEXT: vzeroupper 5113; AVX512VL-NEXT: retq 5114; 5115; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8: 5116; AVX512BW: # %bb.0: 5117; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 5118; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 5119; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5120; AVX512BW-NEXT: vzeroupper 5121; AVX512BW-NEXT: retq 5122; 5123; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8: 5124; AVX512BWVL: # %bb.0: 5125; AVX512BWVL-NEXT: vpmovsdb %ymm0, %xmm0 5126; AVX512BWVL-NEXT: vzeroupper 5127; AVX512BWVL-NEXT: retq 5128; 5129; SKX-LABEL: trunc_ssat_v8i32_v8i8: 5130; SKX: # %bb.0: 5131; SKX-NEXT: vpmovsdb %ymm0, %xmm0 5132; SKX-NEXT: vzeroupper 5133; SKX-NEXT: retq 5134 %1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 5135 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 5136 %3 = icmp sgt <8 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 5137 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 5138 %5 = trunc <8 x i32> %4 to <8 x i8> 5139 ret <8 x i8> %5 5140} 5141 5142define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { 5143; SSE-LABEL: trunc_ssat_v8i32_v8i8_store: 5144; SSE: # %bb.0: 5145; SSE-NEXT: packssdw %xmm1, %xmm0 5146; SSE-NEXT: packsswb %xmm0, %xmm0 5147; SSE-NEXT: movq %xmm0, (%rdi) 5148; SSE-NEXT: retq 5149; 5150; AVX1-LABEL: trunc_ssat_v8i32_v8i8_store: 5151; AVX1: # %bb.0: 5152; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 5153; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 5154; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5155; AVX1-NEXT: vmovq %xmm0, (%rdi) 5156; AVX1-NEXT: vzeroupper 5157; AVX1-NEXT: retq 5158; 5159; AVX2-LABEL: trunc_ssat_v8i32_v8i8_store: 5160; AVX2: # %bb.0: 5161; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 5162; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 5163; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5164; AVX2-NEXT: vmovq %xmm0, (%rdi) 5165; AVX2-NEXT: vzeroupper 5166; AVX2-NEXT: retq 5167; 5168; AVX512F-LABEL: trunc_ssat_v8i32_v8i8_store: 5169; AVX512F: # %bb.0: 5170; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 5171; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 5172; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5173; AVX512F-NEXT: vmovq %xmm0, (%rdi) 5174; AVX512F-NEXT: vzeroupper 5175; AVX512F-NEXT: retq 5176; 5177; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8_store: 5178; AVX512VL: # %bb.0: 5179; AVX512VL-NEXT: vpmovsdb %ymm0, (%rdi) 5180; AVX512VL-NEXT: vzeroupper 5181; AVX512VL-NEXT: retq 5182; 5183; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8_store: 5184; AVX512BW: # %bb.0: 5185; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 5186; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 5187; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5188; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 5189; AVX512BW-NEXT: vzeroupper 5190; AVX512BW-NEXT: retq 5191; 5192; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8_store: 5193; AVX512BWVL: # %bb.0: 5194; AVX512BWVL-NEXT: vpmovsdb %ymm0, (%rdi) 5195; AVX512BWVL-NEXT: vzeroupper 5196; AVX512BWVL-NEXT: retq 5197; 5198; SKX-LABEL: trunc_ssat_v8i32_v8i8_store: 5199; SKX: # %bb.0: 5200; SKX-NEXT: vpmovsdb %ymm0, (%rdi) 5201; SKX-NEXT: vzeroupper 5202; SKX-NEXT: retq 5203 %1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 5204 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 5205 %3 = icmp sgt <8 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 5206 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 5207 %5 = trunc <8 x i32> %4 to <8 x i8> 5208 store <8 x i8> %5, <8 x i8> *%p1 5209 ret void 5210} 5211 5212define <16 x i8> @trunc_ssat_v16i32_v16i8(<16 x i32>* %p0) "min-legal-vector-width"="256" { 5213; SSE-LABEL: trunc_ssat_v16i32_v16i8: 5214; SSE: # %bb.0: 5215; SSE-NEXT: movdqa (%rdi), %xmm0 5216; SSE-NEXT: movdqa 32(%rdi), %xmm1 5217; SSE-NEXT: packssdw 48(%rdi), %xmm1 5218; SSE-NEXT: packssdw 16(%rdi), %xmm0 5219; SSE-NEXT: packsswb %xmm1, %xmm0 5220; SSE-NEXT: retq 5221; 5222; AVX1-LABEL: trunc_ssat_v16i32_v16i8: 5223; AVX1: # %bb.0: 5224; AVX1-NEXT: vmovdqa (%rdi), %xmm0 5225; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1 5226; AVX1-NEXT: vpackssdw 48(%rdi), %xmm1, %xmm1 5227; AVX1-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0 5228; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 5229; AVX1-NEXT: retq 5230; 5231; AVX2-LABEL: trunc_ssat_v16i32_v16i8: 5232; AVX2: # %bb.0: 5233; AVX2-NEXT: vmovdqa (%rdi), %ymm0 5234; AVX2-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 5235; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 5236; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 5237; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 5238; AVX2-NEXT: vzeroupper 5239; AVX2-NEXT: retq 5240; 5241; AVX512-LABEL: trunc_ssat_v16i32_v16i8: 5242; AVX512: # %bb.0: 5243; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 5244; AVX512-NEXT: vpmovsdb %zmm0, %xmm0 5245; AVX512-NEXT: vzeroupper 5246; AVX512-NEXT: retq 5247; 5248; SKX-LABEL: trunc_ssat_v16i32_v16i8: 5249; SKX: # %bb.0: 5250; SKX-NEXT: vmovdqa (%rdi), %ymm0 5251; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 5252; SKX-NEXT: vpmovsdb %ymm1, %xmm1 5253; SKX-NEXT: vpmovsdb %ymm0, %xmm0 5254; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 5255; SKX-NEXT: vzeroupper 5256; SKX-NEXT: retq 5257 %a0 = load <16 x i32>, <16 x i32>* %p0 5258 %1 = icmp slt <16 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 5259 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 5260 %3 = icmp sgt <16 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 5261 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 5262 %5 = trunc <16 x i32> %4 to <16 x i8> 5263 ret <16 x i8> %5 5264} 5265 5266define void @trunc_ssat_v16i32_v16i8_store(<16 x i32>* %p0, <16 x i8>* %p1) "min-legal-vector-width"="256" { 5267; SSE-LABEL: trunc_ssat_v16i32_v16i8_store: 5268; SSE: # %bb.0: 5269; SSE-NEXT: movdqa (%rdi), %xmm0 5270; SSE-NEXT: movdqa 32(%rdi), %xmm1 5271; SSE-NEXT: packssdw 48(%rdi), %xmm1 5272; SSE-NEXT: packssdw 16(%rdi), %xmm0 5273; SSE-NEXT: packsswb %xmm1, %xmm0 5274; SSE-NEXT: movdqa %xmm0, (%rsi) 5275; SSE-NEXT: retq 5276; 5277; AVX1-LABEL: trunc_ssat_v16i32_v16i8_store: 5278; AVX1: # %bb.0: 5279; AVX1-NEXT: vmovdqa (%rdi), %xmm0 5280; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1 5281; AVX1-NEXT: vpackssdw 48(%rdi), %xmm1, %xmm1 5282; AVX1-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0 5283; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 5284; AVX1-NEXT: vmovdqa %xmm0, (%rsi) 5285; AVX1-NEXT: retq 5286; 5287; AVX2-LABEL: trunc_ssat_v16i32_v16i8_store: 5288; AVX2: # %bb.0: 5289; AVX2-NEXT: vmovdqa (%rdi), %ymm0 5290; AVX2-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 5291; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 5292; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 5293; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 5294; AVX2-NEXT: vmovdqa %xmm0, (%rsi) 5295; AVX2-NEXT: vzeroupper 5296; AVX2-NEXT: retq 5297; 5298; AVX512-LABEL: trunc_ssat_v16i32_v16i8_store: 5299; AVX512: # %bb.0: 5300; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 5301; AVX512-NEXT: vpmovsdb %zmm0, (%rsi) 5302; AVX512-NEXT: vzeroupper 5303; AVX512-NEXT: retq 5304; 5305; SKX-LABEL: trunc_ssat_v16i32_v16i8_store: 5306; SKX: # %bb.0: 5307; SKX-NEXT: vmovdqa (%rdi), %ymm0 5308; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 5309; SKX-NEXT: vpmovsdb %ymm1, %xmm1 5310; SKX-NEXT: vpmovsdb %ymm0, %xmm0 5311; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 5312; SKX-NEXT: vmovdqa %xmm0, (%rsi) 5313; SKX-NEXT: vzeroupper 5314; SKX-NEXT: retq 5315 %a0 = load <16 x i32>, <16 x i32>* %p0 5316 %1 = icmp slt <16 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 5317 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 5318 %3 = icmp sgt <16 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 5319 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 5320 %5 = trunc <16 x i32> %4 to <16 x i8> 5321 store <16 x i8> %5, <16 x i8>* %p1 5322 ret void 5323} 5324 5325define <8 x i8> @trunc_ssat_v8i16_v8i8(<8 x i16> %a0) { 5326; SSE-LABEL: trunc_ssat_v8i16_v8i8: 5327; SSE: # %bb.0: 5328; SSE-NEXT: packsswb %xmm0, %xmm0 5329; SSE-NEXT: retq 5330; 5331; AVX-LABEL: trunc_ssat_v8i16_v8i8: 5332; AVX: # %bb.0: 5333; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5334; AVX-NEXT: retq 5335; 5336; AVX512-LABEL: trunc_ssat_v8i16_v8i8: 5337; AVX512: # %bb.0: 5338; AVX512-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5339; AVX512-NEXT: retq 5340; 5341; SKX-LABEL: trunc_ssat_v8i16_v8i8: 5342; SKX: # %bb.0: 5343; SKX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5344; SKX-NEXT: retq 5345 %1 = icmp slt <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 5346 %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 5347 %3 = icmp sgt <8 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 5348 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 5349 %5 = trunc <8 x i16> %4 to <8 x i8> 5350 ret <8 x i8> %5 5351} 5352 5353define void @trunc_ssat_v8i16_v8i8_store(<8 x i16> %a0, <8 x i8> *%p1) { 5354; SSE-LABEL: trunc_ssat_v8i16_v8i8_store: 5355; SSE: # %bb.0: 5356; SSE-NEXT: packsswb %xmm0, %xmm0 5357; SSE-NEXT: movq %xmm0, (%rdi) 5358; SSE-NEXT: retq 5359; 5360; AVX-LABEL: trunc_ssat_v8i16_v8i8_store: 5361; AVX: # %bb.0: 5362; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5363; AVX-NEXT: vmovq %xmm0, (%rdi) 5364; AVX-NEXT: retq 5365; 5366; AVX512F-LABEL: trunc_ssat_v8i16_v8i8_store: 5367; AVX512F: # %bb.0: 5368; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5369; AVX512F-NEXT: vmovq %xmm0, (%rdi) 5370; AVX512F-NEXT: retq 5371; 5372; AVX512VL-LABEL: trunc_ssat_v8i16_v8i8_store: 5373; AVX512VL: # %bb.0: 5374; AVX512VL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5375; AVX512VL-NEXT: vmovq %xmm0, (%rdi) 5376; AVX512VL-NEXT: retq 5377; 5378; AVX512BW-LABEL: trunc_ssat_v8i16_v8i8_store: 5379; AVX512BW: # %bb.0: 5380; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 5381; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 5382; AVX512BW-NEXT: retq 5383; 5384; AVX512BWVL-LABEL: trunc_ssat_v8i16_v8i8_store: 5385; AVX512BWVL: # %bb.0: 5386; AVX512BWVL-NEXT: vpmovswb %xmm0, (%rdi) 5387; AVX512BWVL-NEXT: retq 5388; 5389; SKX-LABEL: trunc_ssat_v8i16_v8i8_store: 5390; SKX: # %bb.0: 5391; SKX-NEXT: vpmovswb %xmm0, (%rdi) 5392; SKX-NEXT: retq 5393 %1 = icmp slt <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 5394 %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 5395 %3 = icmp sgt <8 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 5396 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 5397 %5 = trunc <8 x i16> %4 to <8 x i8> 5398 store <8 x i8> %5, <8 x i8> *%p1 5399 ret void 5400} 5401 5402define <16 x i8> @trunc_ssat_v16i16_v16i8(<16 x i16> %a0) { 5403; SSE-LABEL: trunc_ssat_v16i16_v16i8: 5404; SSE: # %bb.0: 5405; SSE-NEXT: packsswb %xmm1, %xmm0 5406; SSE-NEXT: retq 5407; 5408; AVX1-LABEL: trunc_ssat_v16i16_v16i8: 5409; AVX1: # %bb.0: 5410; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 5411; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 5412; AVX1-NEXT: vzeroupper 5413; AVX1-NEXT: retq 5414; 5415; AVX2-LABEL: trunc_ssat_v16i16_v16i8: 5416; AVX2: # %bb.0: 5417; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 5418; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 5419; AVX2-NEXT: vzeroupper 5420; AVX2-NEXT: retq 5421; 5422; AVX512F-LABEL: trunc_ssat_v16i16_v16i8: 5423; AVX512F: # %bb.0: 5424; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 5425; AVX512F-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 5426; AVX512F-NEXT: vzeroupper 5427; AVX512F-NEXT: retq 5428; 5429; AVX512VL-LABEL: trunc_ssat_v16i16_v16i8: 5430; AVX512VL: # %bb.0: 5431; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 5432; AVX512VL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 5433; AVX512VL-NEXT: vzeroupper 5434; AVX512VL-NEXT: retq 5435; 5436; AVX512BW-LABEL: trunc_ssat_v16i16_v16i8: 5437; AVX512BW: # %bb.0: 5438; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 5439; AVX512BW-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 5440; AVX512BW-NEXT: vzeroupper 5441; AVX512BW-NEXT: retq 5442; 5443; AVX512BWVL-LABEL: trunc_ssat_v16i16_v16i8: 5444; AVX512BWVL: # %bb.0: 5445; AVX512BWVL-NEXT: vpmovswb %ymm0, %xmm0 5446; AVX512BWVL-NEXT: vzeroupper 5447; AVX512BWVL-NEXT: retq 5448; 5449; SKX-LABEL: trunc_ssat_v16i16_v16i8: 5450; SKX: # %bb.0: 5451; SKX-NEXT: vpmovswb %ymm0, %xmm0 5452; SKX-NEXT: vzeroupper 5453; SKX-NEXT: retq 5454 %1 = icmp slt <16 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 5455 %2 = select <16 x i1> %1, <16 x i16> %a0, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 5456 %3 = icmp sgt <16 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 5457 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 5458 %5 = trunc <16 x i16> %4 to <16 x i8> 5459 ret <16 x i8> %5 5460} 5461 5462define <32 x i8> @trunc_ssat_v32i16_v32i8(<32 x i16>* %p0) "min-legal-vector-width"="256" { 5463; SSE-LABEL: trunc_ssat_v32i16_v32i8: 5464; SSE: # %bb.0: 5465; SSE-NEXT: movdqa (%rdi), %xmm0 5466; SSE-NEXT: movdqa 32(%rdi), %xmm1 5467; SSE-NEXT: packsswb 16(%rdi), %xmm0 5468; SSE-NEXT: packsswb 48(%rdi), %xmm1 5469; SSE-NEXT: retq 5470; 5471; AVX1-LABEL: trunc_ssat_v32i16_v32i8: 5472; AVX1: # %bb.0: 5473; AVX1-NEXT: vmovdqa (%rdi), %xmm0 5474; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1 5475; AVX1-NEXT: vpacksswb 48(%rdi), %xmm1, %xmm1 5476; AVX1-NEXT: vpacksswb 16(%rdi), %xmm0, %xmm0 5477; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 5478; AVX1-NEXT: retq 5479; 5480; AVX2-LABEL: trunc_ssat_v32i16_v32i8: 5481; AVX2: # %bb.0: 5482; AVX2-NEXT: vmovdqa (%rdi), %ymm0 5483; AVX2-NEXT: vpacksswb 32(%rdi), %ymm0, %ymm0 5484; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5485; AVX2-NEXT: retq 5486; 5487; AVX512F-LABEL: trunc_ssat_v32i16_v32i8: 5488; AVX512F: # %bb.0: 5489; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 5490; AVX512F-NEXT: vpacksswb 32(%rdi), %ymm0, %ymm0 5491; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5492; AVX512F-NEXT: retq 5493; 5494; AVX512VL-LABEL: trunc_ssat_v32i16_v32i8: 5495; AVX512VL: # %bb.0: 5496; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 5497; AVX512VL-NEXT: vpacksswb 32(%rdi), %ymm0, %ymm0 5498; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5499; AVX512VL-NEXT: retq 5500; 5501; AVX512BW-LABEL: trunc_ssat_v32i16_v32i8: 5502; AVX512BW: # %bb.0: 5503; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 5504; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0 5505; AVX512BW-NEXT: retq 5506; 5507; AVX512BWVL-LABEL: trunc_ssat_v32i16_v32i8: 5508; AVX512BWVL: # %bb.0: 5509; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0 5510; AVX512BWVL-NEXT: vpmovswb %zmm0, %ymm0 5511; AVX512BWVL-NEXT: retq 5512; 5513; SKX-LABEL: trunc_ssat_v32i16_v32i8: 5514; SKX: # %bb.0: 5515; SKX-NEXT: vmovdqa (%rdi), %ymm0 5516; SKX-NEXT: vpacksswb 32(%rdi), %ymm0, %ymm0 5517; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5518; SKX-NEXT: retq 5519 %a0 = load <32 x i16>, <32 x i16>* %p0 5520 %1 = icmp slt <32 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 5521 %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 5522 %3 = icmp sgt <32 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 5523 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 5524 %5 = trunc <32 x i16> %4 to <32 x i8> 5525 ret <32 x i8> %5 5526} 5527 5528define <32 x i8> @trunc_ssat_v32i32_v32i8(<32 x i32>* %p0) "min-legal-vector-width"="256" { 5529; SSE-LABEL: trunc_ssat_v32i32_v32i8: 5530; SSE: # %bb.0: 5531; SSE-NEXT: movdqa (%rdi), %xmm0 5532; SSE-NEXT: movdqa 32(%rdi), %xmm2 5533; SSE-NEXT: movdqa 64(%rdi), %xmm1 5534; SSE-NEXT: movdqa 96(%rdi), %xmm3 5535; SSE-NEXT: packssdw 48(%rdi), %xmm2 5536; SSE-NEXT: packssdw 16(%rdi), %xmm0 5537; SSE-NEXT: packsswb %xmm2, %xmm0 5538; SSE-NEXT: packssdw 112(%rdi), %xmm3 5539; SSE-NEXT: packssdw 80(%rdi), %xmm1 5540; SSE-NEXT: packsswb %xmm3, %xmm1 5541; SSE-NEXT: retq 5542; 5543; AVX1-LABEL: trunc_ssat_v32i32_v32i8: 5544; AVX1: # %bb.0: 5545; AVX1-NEXT: vmovdqa (%rdi), %xmm0 5546; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1 5547; AVX1-NEXT: vmovdqa 64(%rdi), %xmm2 5548; AVX1-NEXT: vmovdqa 96(%rdi), %xmm3 5549; AVX1-NEXT: vpackssdw 112(%rdi), %xmm3, %xmm3 5550; AVX1-NEXT: vpackssdw 80(%rdi), %xmm2, %xmm2 5551; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 5552; AVX1-NEXT: vpackssdw 48(%rdi), %xmm1, %xmm1 5553; AVX1-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0 5554; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 5555; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 5556; AVX1-NEXT: retq 5557; 5558; AVX2-LABEL: trunc_ssat_v32i32_v32i8: 5559; AVX2: # %bb.0: 5560; AVX2-NEXT: vmovdqa (%rdi), %ymm0 5561; AVX2-NEXT: vmovdqa 64(%rdi), %ymm1 5562; AVX2-NEXT: vpackssdw 96(%rdi), %ymm1, %ymm1 5563; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] 5564; AVX2-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 5565; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5566; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 5567; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5568; AVX2-NEXT: retq 5569; 5570; AVX512-LABEL: trunc_ssat_v32i32_v32i8: 5571; AVX512: # %bb.0: 5572; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 5573; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1 5574; AVX512-NEXT: vpmovsdb %zmm0, %xmm0 5575; AVX512-NEXT: vpmovsdb %zmm1, %xmm1 5576; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 5577; AVX512-NEXT: retq 5578; 5579; SKX-LABEL: trunc_ssat_v32i32_v32i8: 5580; SKX: # %bb.0: 5581; SKX-NEXT: vmovdqa (%rdi), %ymm0 5582; SKX-NEXT: vmovdqa 64(%rdi), %ymm1 5583; SKX-NEXT: vpackssdw 96(%rdi), %ymm1, %ymm1 5584; SKX-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] 5585; SKX-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 5586; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5587; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 5588; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5589; SKX-NEXT: retq 5590 %a0 = load <32 x i32>, <32 x i32>* %p0 5591 %1 = icmp slt <32 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 5592 %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 5593 %3 = icmp sgt <32 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 5594 %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 5595 %5 = trunc <32 x i32> %4 to <32 x i8> 5596 ret <32 x i8> %5 5597} 5598