1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,VEX,AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,VEX,AVX2 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLDQ 9; 10; 32-bit tests to make sure we're not doing anything stupid. 11; RUN: llc < %s -mtriple=i686-unknown-unknown 12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse 13; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 14 15; 16; Double to Signed Integer 17; 18 19define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) { 20; SSE-LABEL: fptosi_2f64_to_2i64: 21; SSE: # %bb.0: 22; SSE-NEXT: cvttsd2si %xmm0, %rax 23; SSE-NEXT: movq %rax, %xmm1 24; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 25; SSE-NEXT: cvttsd2si %xmm0, %rax 26; SSE-NEXT: movq %rax, %xmm0 27; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 28; SSE-NEXT: movdqa %xmm1, %xmm0 29; SSE-NEXT: retq 30; 31; VEX-LABEL: fptosi_2f64_to_2i64: 32; VEX: # %bb.0: 33; VEX-NEXT: vcvttsd2si %xmm0, %rax 34; VEX-NEXT: vmovq %rax, %xmm1 35; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 36; VEX-NEXT: vcvttsd2si %xmm0, %rax 37; VEX-NEXT: vmovq %rax, %xmm0 38; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 39; VEX-NEXT: retq 40; 41; AVX512F-LABEL: fptosi_2f64_to_2i64: 42; AVX512F: # %bb.0: 43; AVX512F-NEXT: vcvttsd2si %xmm0, %rax 44; AVX512F-NEXT: vmovq %rax, %xmm1 45; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 46; AVX512F-NEXT: vcvttsd2si %xmm0, %rax 47; AVX512F-NEXT: vmovq %rax, %xmm0 48; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 49; AVX512F-NEXT: retq 50; 51; AVX512VL-LABEL: fptosi_2f64_to_2i64: 52; AVX512VL: # %bb.0: 53; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax 54; AVX512VL-NEXT: vmovq %rax, %xmm1 55; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 56; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax 57; AVX512VL-NEXT: vmovq %rax, %xmm0 58; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 59; AVX512VL-NEXT: retq 60; 61; AVX512DQ-LABEL: fptosi_2f64_to_2i64: 62; AVX512DQ: # %bb.0: 63; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 64; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 65; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 66; AVX512DQ-NEXT: vzeroupper 67; AVX512DQ-NEXT: retq 68; 69; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64: 70; AVX512VLDQ: # %bb.0: 71; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0 72; AVX512VLDQ-NEXT: retq 73 %cvt = fptosi <2 x double> %a to <2 x i64> 74 ret <2 x i64> %cvt 75} 76 77define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) { 78; SSE-LABEL: fptosi_2f64_to_4i32: 79; SSE: # %bb.0: 80; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 81; SSE-NEXT: retq 82; 83; AVX-LABEL: fptosi_2f64_to_4i32: 84; AVX: # %bb.0: 85; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 86; AVX-NEXT: retq 87 %cvt = fptosi <2 x double> %a to <2 x i32> 88 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 89 ret <4 x i32> %ext 90} 91 92define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) { 93; SSE-LABEL: fptosi_2f64_to_2i32: 94; SSE: # %bb.0: 95; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 96; SSE-NEXT: retq 97; 98; AVX-LABEL: fptosi_2f64_to_2i32: 99; AVX: # %bb.0: 100; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 101; AVX-NEXT: retq 102 %cvt = fptosi <2 x double> %a to <2 x i32> 103 ret <2 x i32> %cvt 104} 105 106define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) { 107; SSE-LABEL: fptosi_4f64_to_2i32: 108; SSE: # %bb.0: 109; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 110; SSE-NEXT: retq 111; 112; AVX-LABEL: fptosi_4f64_to_2i32: 113; AVX: # %bb.0: 114; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 115; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 116; AVX-NEXT: vzeroupper 117; AVX-NEXT: retq 118 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 119 %cvt = fptosi <4 x double> %ext to <4 x i32> 120 ret <4 x i32> %cvt 121} 122 123define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) { 124; SSE-LABEL: fptosi_4f64_to_4i64: 125; SSE: # %bb.0: 126; SSE-NEXT: cvttsd2si %xmm0, %rax 127; SSE-NEXT: movq %rax, %xmm2 128; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 129; SSE-NEXT: cvttsd2si %xmm0, %rax 130; SSE-NEXT: movq %rax, %xmm0 131; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 132; SSE-NEXT: cvttsd2si %xmm1, %rax 133; SSE-NEXT: movq %rax, %xmm3 134; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 135; SSE-NEXT: cvttsd2si %xmm1, %rax 136; SSE-NEXT: movq %rax, %xmm0 137; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] 138; SSE-NEXT: movdqa %xmm2, %xmm0 139; SSE-NEXT: movdqa %xmm3, %xmm1 140; SSE-NEXT: retq 141; 142; AVX1-LABEL: fptosi_4f64_to_4i64: 143; AVX1: # %bb.0: 144; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 145; AVX1-NEXT: vcvttsd2si %xmm1, %rax 146; AVX1-NEXT: vmovq %rax, %xmm2 147; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 148; AVX1-NEXT: vcvttsd2si %xmm1, %rax 149; AVX1-NEXT: vmovq %rax, %xmm1 150; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 151; AVX1-NEXT: vcvttsd2si %xmm0, %rax 152; AVX1-NEXT: vmovq %rax, %xmm2 153; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 154; AVX1-NEXT: vcvttsd2si %xmm0, %rax 155; AVX1-NEXT: vmovq %rax, %xmm0 156; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 157; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 158; AVX1-NEXT: retq 159; 160; AVX2-LABEL: fptosi_4f64_to_4i64: 161; AVX2: # %bb.0: 162; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 163; AVX2-NEXT: vcvttsd2si %xmm1, %rax 164; AVX2-NEXT: vmovq %rax, %xmm2 165; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 166; AVX2-NEXT: vcvttsd2si %xmm1, %rax 167; AVX2-NEXT: vmovq %rax, %xmm1 168; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 169; AVX2-NEXT: vcvttsd2si %xmm0, %rax 170; AVX2-NEXT: vmovq %rax, %xmm2 171; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 172; AVX2-NEXT: vcvttsd2si %xmm0, %rax 173; AVX2-NEXT: vmovq %rax, %xmm0 174; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 175; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 176; AVX2-NEXT: retq 177; 178; AVX512F-LABEL: fptosi_4f64_to_4i64: 179; AVX512F: # %bb.0: 180; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 181; AVX512F-NEXT: vcvttsd2si %xmm1, %rax 182; AVX512F-NEXT: vmovq %rax, %xmm2 183; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 184; AVX512F-NEXT: vcvttsd2si %xmm1, %rax 185; AVX512F-NEXT: vmovq %rax, %xmm1 186; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 187; AVX512F-NEXT: vcvttsd2si %xmm0, %rax 188; AVX512F-NEXT: vmovq %rax, %xmm2 189; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 190; AVX512F-NEXT: vcvttsd2si %xmm0, %rax 191; AVX512F-NEXT: vmovq %rax, %xmm0 192; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 193; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 194; AVX512F-NEXT: retq 195; 196; AVX512VL-LABEL: fptosi_4f64_to_4i64: 197; AVX512VL: # %bb.0: 198; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 199; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax 200; AVX512VL-NEXT: vmovq %rax, %xmm2 201; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 202; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax 203; AVX512VL-NEXT: vmovq %rax, %xmm1 204; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 205; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax 206; AVX512VL-NEXT: vmovq %rax, %xmm2 207; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 208; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax 209; AVX512VL-NEXT: vmovq %rax, %xmm0 210; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 211; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 212; AVX512VL-NEXT: retq 213; 214; AVX512DQ-LABEL: fptosi_4f64_to_4i64: 215; AVX512DQ: # %bb.0: 216; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 217; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 218; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 219; AVX512DQ-NEXT: retq 220; 221; AVX512VLDQ-LABEL: fptosi_4f64_to_4i64: 222; AVX512VLDQ: # %bb.0: 223; AVX512VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0 224; AVX512VLDQ-NEXT: retq 225 %cvt = fptosi <4 x double> %a to <4 x i64> 226 ret <4 x i64> %cvt 227} 228 229define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) { 230; SSE-LABEL: fptosi_4f64_to_4i32: 231; SSE: # %bb.0: 232; SSE-NEXT: cvttpd2dq %xmm1, %xmm1 233; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 234; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 235; SSE-NEXT: retq 236; 237; AVX-LABEL: fptosi_4f64_to_4i32: 238; AVX: # %bb.0: 239; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 240; AVX-NEXT: vzeroupper 241; AVX-NEXT: retq 242 %cvt = fptosi <4 x double> %a to <4 x i32> 243 ret <4 x i32> %cvt 244} 245 246; 247; Double to Unsigned Integer 248; 249 250define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) { 251; SSE-LABEL: fptoui_2f64_to_2i64: 252; SSE: # %bb.0: 253; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 254; SSE-NEXT: movapd %xmm0, %xmm1 255; SSE-NEXT: subsd %xmm2, %xmm1 256; SSE-NEXT: cvttsd2si %xmm1, %rax 257; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 258; SSE-NEXT: xorq %rcx, %rax 259; SSE-NEXT: cvttsd2si %xmm0, %rdx 260; SSE-NEXT: ucomisd %xmm2, %xmm0 261; SSE-NEXT: cmovaeq %rax, %rdx 262; SSE-NEXT: movq %rdx, %xmm1 263; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 264; SSE-NEXT: movapd %xmm0, %xmm3 265; SSE-NEXT: subsd %xmm2, %xmm3 266; SSE-NEXT: cvttsd2si %xmm3, %rax 267; SSE-NEXT: xorq %rcx, %rax 268; SSE-NEXT: cvttsd2si %xmm0, %rcx 269; SSE-NEXT: ucomisd %xmm2, %xmm0 270; SSE-NEXT: cmovaeq %rax, %rcx 271; SSE-NEXT: movq %rcx, %xmm0 272; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 273; SSE-NEXT: movdqa %xmm1, %xmm0 274; SSE-NEXT: retq 275; 276; VEX-LABEL: fptoui_2f64_to_2i64: 277; VEX: # %bb.0: 278; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 279; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 280; VEX-NEXT: vcvttsd2si %xmm2, %rax 281; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 282; VEX-NEXT: xorq %rcx, %rax 283; VEX-NEXT: vcvttsd2si %xmm0, %rdx 284; VEX-NEXT: vucomisd %xmm1, %xmm0 285; VEX-NEXT: cmovaeq %rax, %rdx 286; VEX-NEXT: vmovq %rdx, %xmm2 287; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 288; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 289; VEX-NEXT: vcvttsd2si %xmm3, %rax 290; VEX-NEXT: xorq %rcx, %rax 291; VEX-NEXT: vcvttsd2si %xmm0, %rcx 292; VEX-NEXT: vucomisd %xmm1, %xmm0 293; VEX-NEXT: cmovaeq %rax, %rcx 294; VEX-NEXT: vmovq %rcx, %xmm0 295; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 296; VEX-NEXT: retq 297; 298; AVX512F-LABEL: fptoui_2f64_to_2i64: 299; AVX512F: # %bb.0: 300; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax 301; AVX512F-NEXT: vmovq %rax, %xmm1 302; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 303; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax 304; AVX512F-NEXT: vmovq %rax, %xmm0 305; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 306; AVX512F-NEXT: retq 307; 308; AVX512VL-LABEL: fptoui_2f64_to_2i64: 309; AVX512VL: # %bb.0: 310; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax 311; AVX512VL-NEXT: vmovq %rax, %xmm1 312; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 313; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax 314; AVX512VL-NEXT: vmovq %rax, %xmm0 315; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 316; AVX512VL-NEXT: retq 317; 318; AVX512DQ-LABEL: fptoui_2f64_to_2i64: 319; AVX512DQ: # %bb.0: 320; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 321; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 322; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 323; AVX512DQ-NEXT: vzeroupper 324; AVX512DQ-NEXT: retq 325; 326; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64: 327; AVX512VLDQ: # %bb.0: 328; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0 329; AVX512VLDQ-NEXT: retq 330 %cvt = fptoui <2 x double> %a to <2 x i64> 331 ret <2 x i64> %cvt 332} 333 334define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) { 335; SSE-LABEL: fptoui_2f64_to_4i32: 336; SSE: # %bb.0: 337; SSE-NEXT: cvttsd2si %xmm0, %rax 338; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 339; SSE-NEXT: cvttsd2si %xmm0, %rcx 340; SSE-NEXT: movd %eax, %xmm0 341; SSE-NEXT: movd %ecx, %xmm1 342; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 343; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 344; SSE-NEXT: retq 345; 346; AVX1-LABEL: fptoui_2f64_to_4i32: 347; AVX1: # %bb.0: 348; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 349; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 350; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 351; AVX1-NEXT: vpackssdw %xmm2, %xmm2, %xmm2 352; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm3 353; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 354; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0 355; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 356; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0 357; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 358; AVX1-NEXT: vzeroupper 359; AVX1-NEXT: retq 360; 361; AVX2-LABEL: fptoui_2f64_to_4i32: 362; AVX2: # %bb.0: 363; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 364; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 365; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 366; AVX2-NEXT: vpackssdw %xmm2, %xmm2, %xmm2 367; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1 368; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1 369; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 370; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1 371; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0 372; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 373; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 374; AVX2-NEXT: vzeroupper 375; AVX2-NEXT: retq 376; 377; AVX512F-LABEL: fptoui_2f64_to_4i32: 378; AVX512F: # %bb.0: 379; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 380; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 381; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 382; AVX512F-NEXT: vzeroupper 383; AVX512F-NEXT: retq 384; 385; AVX512VL-LABEL: fptoui_2f64_to_4i32: 386; AVX512VL: # %bb.0: 387; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0 388; AVX512VL-NEXT: retq 389; 390; AVX512DQ-LABEL: fptoui_2f64_to_4i32: 391; AVX512DQ: # %bb.0: 392; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 393; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 394; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 395; AVX512DQ-NEXT: vzeroupper 396; AVX512DQ-NEXT: retq 397; 398; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32: 399; AVX512VLDQ: # %bb.0: 400; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 401; AVX512VLDQ-NEXT: retq 402 %cvt = fptoui <2 x double> %a to <2 x i32> 403 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 404 ret <4 x i32> %ext 405} 406 407define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) { 408; SSE-LABEL: fptoui_2f64_to_2i32: 409; SSE: # %bb.0: 410; SSE-NEXT: cvttsd2si %xmm0, %rax 411; SSE-NEXT: movd %eax, %xmm1 412; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 413; SSE-NEXT: cvttsd2si %xmm0, %rax 414; SSE-NEXT: movd %eax, %xmm0 415; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 416; SSE-NEXT: movdqa %xmm1, %xmm0 417; SSE-NEXT: retq 418; 419; AVX1-LABEL: fptoui_2f64_to_2i32: 420; AVX1: # %bb.0: 421; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 422; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 423; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 424; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 425; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 426; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm3 427; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 428; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0 429; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 430; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0 431; AVX1-NEXT: vzeroupper 432; AVX1-NEXT: retq 433; 434; AVX2-LABEL: fptoui_2f64_to_2i32: 435; AVX2: # %bb.0: 436; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 437; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 438; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 439; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 440; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 441; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1 442; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1 443; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 444; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1 445; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0 446; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 447; AVX2-NEXT: vzeroupper 448; AVX2-NEXT: retq 449; 450; AVX512F-LABEL: fptoui_2f64_to_2i32: 451; AVX512F: # %bb.0: 452; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 453; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 454; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 455; AVX512F-NEXT: vzeroupper 456; AVX512F-NEXT: retq 457; 458; AVX512VL-LABEL: fptoui_2f64_to_2i32: 459; AVX512VL: # %bb.0: 460; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0 461; AVX512VL-NEXT: retq 462; 463; AVX512DQ-LABEL: fptoui_2f64_to_2i32: 464; AVX512DQ: # %bb.0: 465; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 466; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 467; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 468; AVX512DQ-NEXT: vzeroupper 469; AVX512DQ-NEXT: retq 470; 471; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32: 472; AVX512VLDQ: # %bb.0: 473; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 474; AVX512VLDQ-NEXT: retq 475 %cvt = fptoui <2 x double> %a to <2 x i32> 476 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 477 ret <4 x i32> %ext 478} 479 480define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) { 481; SSE-LABEL: fptoui_4f64_to_2i32: 482; SSE: # %bb.0: 483; SSE-NEXT: cvttsd2si %xmm0, %rax 484; SSE-NEXT: movd %eax, %xmm1 485; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 486; SSE-NEXT: cvttsd2si %xmm0, %rax 487; SSE-NEXT: movd %eax, %xmm0 488; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 489; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 490; SSE-NEXT: retq 491; 492; AVX1-LABEL: fptoui_4f64_to_2i32: 493; AVX1: # %bb.0: 494; AVX1-NEXT: vmovapd %xmm0, %xmm0 495; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 496; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 497; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 498; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 499; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm1 500; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1 501; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1 502; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0 503; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 504; AVX1-NEXT: vzeroupper 505; AVX1-NEXT: retq 506; 507; AVX2-LABEL: fptoui_4f64_to_2i32: 508; AVX2: # %bb.0: 509; AVX2-NEXT: vmovapd %xmm0, %xmm0 510; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 511; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 512; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 513; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 514; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1 515; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1 516; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 517; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1 518; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0 519; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 520; AVX2-NEXT: vzeroupper 521; AVX2-NEXT: retq 522; 523; AVX512F-LABEL: fptoui_4f64_to_2i32: 524; AVX512F: # %bb.0: 525; AVX512F-NEXT: vmovaps %xmm0, %xmm0 526; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 527; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 528; AVX512F-NEXT: vzeroupper 529; AVX512F-NEXT: retq 530; 531; AVX512VL-LABEL: fptoui_4f64_to_2i32: 532; AVX512VL: # %bb.0: 533; AVX512VL-NEXT: vmovaps %xmm0, %xmm0 534; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0 535; AVX512VL-NEXT: vzeroupper 536; AVX512VL-NEXT: retq 537; 538; AVX512DQ-LABEL: fptoui_4f64_to_2i32: 539; AVX512DQ: # %bb.0: 540; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 541; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 542; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 543; AVX512DQ-NEXT: vzeroupper 544; AVX512DQ-NEXT: retq 545; 546; AVX512VLDQ-LABEL: fptoui_4f64_to_2i32: 547; AVX512VLDQ: # %bb.0: 548; AVX512VLDQ-NEXT: vmovaps %xmm0, %xmm0 549; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0 550; AVX512VLDQ-NEXT: vzeroupper 551; AVX512VLDQ-NEXT: retq 552 %ext = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 553 %cvt = fptoui <4 x double> %ext to <4 x i32> 554 ret <4 x i32> %cvt 555} 556 557define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) { 558; SSE-LABEL: fptoui_4f64_to_4i64: 559; SSE: # %bb.0: 560; SSE-NEXT: movapd %xmm0, %xmm2 561; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero 562; SSE-NEXT: subsd %xmm3, %xmm0 563; SSE-NEXT: cvttsd2si %xmm0, %rcx 564; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 565; SSE-NEXT: xorq %rax, %rcx 566; SSE-NEXT: cvttsd2si %xmm2, %rdx 567; SSE-NEXT: ucomisd %xmm3, %xmm2 568; SSE-NEXT: cmovaeq %rcx, %rdx 569; SSE-NEXT: movq %rdx, %xmm0 570; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] 571; SSE-NEXT: movapd %xmm2, %xmm4 572; SSE-NEXT: subsd %xmm3, %xmm4 573; SSE-NEXT: cvttsd2si %xmm4, %rcx 574; SSE-NEXT: xorq %rax, %rcx 575; SSE-NEXT: cvttsd2si %xmm2, %rdx 576; SSE-NEXT: ucomisd %xmm3, %xmm2 577; SSE-NEXT: cmovaeq %rcx, %rdx 578; SSE-NEXT: movq %rdx, %xmm2 579; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 580; SSE-NEXT: movapd %xmm1, %xmm2 581; SSE-NEXT: subsd %xmm3, %xmm2 582; SSE-NEXT: cvttsd2si %xmm2, %rcx 583; SSE-NEXT: xorq %rax, %rcx 584; SSE-NEXT: cvttsd2si %xmm1, %rdx 585; SSE-NEXT: ucomisd %xmm3, %xmm1 586; SSE-NEXT: cmovaeq %rcx, %rdx 587; SSE-NEXT: movq %rdx, %xmm2 588; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 589; SSE-NEXT: movapd %xmm1, %xmm4 590; SSE-NEXT: subsd %xmm3, %xmm4 591; SSE-NEXT: cvttsd2si %xmm4, %rcx 592; SSE-NEXT: xorq %rax, %rcx 593; SSE-NEXT: cvttsd2si %xmm1, %rax 594; SSE-NEXT: ucomisd %xmm3, %xmm1 595; SSE-NEXT: cmovaeq %rcx, %rax 596; SSE-NEXT: movq %rax, %xmm1 597; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 598; SSE-NEXT: movdqa %xmm2, %xmm1 599; SSE-NEXT: retq 600; 601; AVX1-LABEL: fptoui_4f64_to_4i64: 602; AVX1: # %bb.0: 603; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 604; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 605; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3 606; AVX1-NEXT: vcvttsd2si %xmm3, %rax 607; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 608; AVX1-NEXT: xorq %rcx, %rax 609; AVX1-NEXT: vcvttsd2si %xmm2, %rdx 610; AVX1-NEXT: vucomisd %xmm1, %xmm2 611; AVX1-NEXT: cmovaeq %rax, %rdx 612; AVX1-NEXT: vmovq %rdx, %xmm3 613; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 614; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm4 615; AVX1-NEXT: vcvttsd2si %xmm4, %rax 616; AVX1-NEXT: xorq %rcx, %rax 617; AVX1-NEXT: vcvttsd2si %xmm2, %rdx 618; AVX1-NEXT: vucomisd %xmm1, %xmm2 619; AVX1-NEXT: cmovaeq %rax, %rdx 620; AVX1-NEXT: vmovq %rdx, %xmm2 621; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 622; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm3 623; AVX1-NEXT: vcvttsd2si %xmm3, %rax 624; AVX1-NEXT: xorq %rcx, %rax 625; AVX1-NEXT: vcvttsd2si %xmm0, %rdx 626; AVX1-NEXT: vucomisd %xmm1, %xmm0 627; AVX1-NEXT: cmovaeq %rax, %rdx 628; AVX1-NEXT: vmovq %rdx, %xmm3 629; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 630; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm4 631; AVX1-NEXT: vcvttsd2si %xmm4, %rax 632; AVX1-NEXT: xorq %rcx, %rax 633; AVX1-NEXT: vcvttsd2si %xmm0, %rcx 634; AVX1-NEXT: vucomisd %xmm1, %xmm0 635; AVX1-NEXT: cmovaeq %rax, %rcx 636; AVX1-NEXT: vmovq %rcx, %xmm0 637; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 638; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 639; AVX1-NEXT: retq 640; 641; AVX2-LABEL: fptoui_4f64_to_4i64: 642; AVX2: # %bb.0: 643; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 644; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 645; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm3 646; AVX2-NEXT: vcvttsd2si %xmm3, %rax 647; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 648; AVX2-NEXT: xorq %rcx, %rax 649; AVX2-NEXT: vcvttsd2si %xmm2, %rdx 650; AVX2-NEXT: vucomisd %xmm1, %xmm2 651; AVX2-NEXT: cmovaeq %rax, %rdx 652; AVX2-NEXT: vmovq %rdx, %xmm3 653; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 654; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm4 655; AVX2-NEXT: vcvttsd2si %xmm4, %rax 656; AVX2-NEXT: xorq %rcx, %rax 657; AVX2-NEXT: vcvttsd2si %xmm2, %rdx 658; AVX2-NEXT: vucomisd %xmm1, %xmm2 659; AVX2-NEXT: cmovaeq %rax, %rdx 660; AVX2-NEXT: vmovq %rdx, %xmm2 661; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 662; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm3 663; AVX2-NEXT: vcvttsd2si %xmm3, %rax 664; AVX2-NEXT: xorq %rcx, %rax 665; AVX2-NEXT: vcvttsd2si %xmm0, %rdx 666; AVX2-NEXT: vucomisd %xmm1, %xmm0 667; AVX2-NEXT: cmovaeq %rax, %rdx 668; AVX2-NEXT: vmovq %rdx, %xmm3 669; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 670; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm4 671; AVX2-NEXT: vcvttsd2si %xmm4, %rax 672; AVX2-NEXT: xorq %rcx, %rax 673; AVX2-NEXT: vcvttsd2si %xmm0, %rcx 674; AVX2-NEXT: vucomisd %xmm1, %xmm0 675; AVX2-NEXT: cmovaeq %rax, %rcx 676; AVX2-NEXT: vmovq %rcx, %xmm0 677; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 678; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 679; AVX2-NEXT: retq 680; 681; AVX512F-LABEL: fptoui_4f64_to_4i64: 682; AVX512F: # %bb.0: 683; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 684; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax 685; AVX512F-NEXT: vmovq %rax, %xmm2 686; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 687; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax 688; AVX512F-NEXT: vmovq %rax, %xmm1 689; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 690; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax 691; AVX512F-NEXT: vmovq %rax, %xmm2 692; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 693; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax 694; AVX512F-NEXT: vmovq %rax, %xmm0 695; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 696; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 697; AVX512F-NEXT: retq 698; 699; AVX512VL-LABEL: fptoui_4f64_to_4i64: 700; AVX512VL: # %bb.0: 701; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 702; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax 703; AVX512VL-NEXT: vmovq %rax, %xmm2 704; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 705; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax 706; AVX512VL-NEXT: vmovq %rax, %xmm1 707; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 708; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax 709; AVX512VL-NEXT: vmovq %rax, %xmm2 710; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 711; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax 712; AVX512VL-NEXT: vmovq %rax, %xmm0 713; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 714; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 715; AVX512VL-NEXT: retq 716; 717; AVX512DQ-LABEL: fptoui_4f64_to_4i64: 718; AVX512DQ: # %bb.0: 719; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 720; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 721; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 722; AVX512DQ-NEXT: retq 723; 724; AVX512VLDQ-LABEL: fptoui_4f64_to_4i64: 725; AVX512VLDQ: # %bb.0: 726; AVX512VLDQ-NEXT: vcvttpd2uqq %ymm0, %ymm0 727; AVX512VLDQ-NEXT: retq 728 %cvt = fptoui <4 x double> %a to <4 x i64> 729 ret <4 x i64> %cvt 730} 731 732define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) { 733; SSE-LABEL: fptoui_4f64_to_4i32: 734; SSE: # %bb.0: 735; SSE-NEXT: cvttsd2si %xmm1, %rax 736; SSE-NEXT: movd %eax, %xmm2 737; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 738; SSE-NEXT: cvttsd2si %xmm1, %rax 739; SSE-NEXT: movd %eax, %xmm1 740; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 741; SSE-NEXT: cvttsd2si %xmm0, %rax 742; SSE-NEXT: movd %eax, %xmm1 743; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 744; SSE-NEXT: cvttsd2si %xmm0, %rax 745; SSE-NEXT: movd %eax, %xmm0 746; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 747; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 748; SSE-NEXT: movdqa %xmm1, %xmm0 749; SSE-NEXT: retq 750; 751; AVX1-LABEL: fptoui_4f64_to_4i32: 752; AVX1: # %bb.0: 753; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 754; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 755; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 756; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 757; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm1 758; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1 759; AVX1-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1 760; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0 761; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 762; AVX1-NEXT: vzeroupper 763; AVX1-NEXT: retq 764; 765; AVX2-LABEL: fptoui_4f64_to_4i32: 766; AVX2: # %bb.0: 767; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 768; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 769; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 770; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 771; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1 772; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1 773; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 774; AVX2-NEXT: vxorpd %xmm3, %xmm1, %xmm1 775; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0 776; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 777; AVX2-NEXT: vzeroupper 778; AVX2-NEXT: retq 779; 780; AVX512F-LABEL: fptoui_4f64_to_4i32: 781; AVX512F: # %bb.0: 782; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 783; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 784; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 785; AVX512F-NEXT: vzeroupper 786; AVX512F-NEXT: retq 787; 788; AVX512VL-LABEL: fptoui_4f64_to_4i32: 789; AVX512VL: # %bb.0: 790; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0 791; AVX512VL-NEXT: vzeroupper 792; AVX512VL-NEXT: retq 793; 794; AVX512DQ-LABEL: fptoui_4f64_to_4i32: 795; AVX512DQ: # %bb.0: 796; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 797; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 798; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 799; AVX512DQ-NEXT: vzeroupper 800; AVX512DQ-NEXT: retq 801; 802; AVX512VLDQ-LABEL: fptoui_4f64_to_4i32: 803; AVX512VLDQ: # %bb.0: 804; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0 805; AVX512VLDQ-NEXT: vzeroupper 806; AVX512VLDQ-NEXT: retq 807 %cvt = fptoui <4 x double> %a to <4 x i32> 808 ret <4 x i32> %cvt 809} 810 811; 812; Float to Signed Integer 813; 814 815define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) { 816; SSE-LABEL: fptosi_2f32_to_2i32: 817; SSE: # %bb.0: 818; SSE-NEXT: cvttps2dq %xmm0, %xmm0 819; SSE-NEXT: retq 820; 821; AVX-LABEL: fptosi_2f32_to_2i32: 822; AVX: # %bb.0: 823; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 824; AVX-NEXT: retq 825 %cvt = fptosi <2 x float> %a to <2 x i32> 826 ret <2 x i32> %cvt 827} 828 829define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) { 830; SSE-LABEL: fptosi_4f32_to_4i32: 831; SSE: # %bb.0: 832; SSE-NEXT: cvttps2dq %xmm0, %xmm0 833; SSE-NEXT: retq 834; 835; AVX-LABEL: fptosi_4f32_to_4i32: 836; AVX: # %bb.0: 837; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 838; AVX-NEXT: retq 839 %cvt = fptosi <4 x float> %a to <4 x i32> 840 ret <4 x i32> %cvt 841} 842 843define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) { 844; SSE-LABEL: fptosi_2f32_to_2i64: 845; SSE: # %bb.0: 846; SSE-NEXT: cvttss2si %xmm0, %rax 847; SSE-NEXT: movq %rax, %xmm1 848; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 849; SSE-NEXT: cvttss2si %xmm0, %rax 850; SSE-NEXT: movq %rax, %xmm0 851; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 852; SSE-NEXT: movdqa %xmm1, %xmm0 853; SSE-NEXT: retq 854; 855; VEX-LABEL: fptosi_2f32_to_2i64: 856; VEX: # %bb.0: 857; VEX-NEXT: vcvttss2si %xmm0, %rax 858; VEX-NEXT: vmovq %rax, %xmm1 859; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 860; VEX-NEXT: vcvttss2si %xmm0, %rax 861; VEX-NEXT: vmovq %rax, %xmm0 862; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 863; VEX-NEXT: retq 864; 865; AVX512F-LABEL: fptosi_2f32_to_2i64: 866; AVX512F: # %bb.0: 867; AVX512F-NEXT: vcvttss2si %xmm0, %rax 868; AVX512F-NEXT: vmovq %rax, %xmm1 869; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 870; AVX512F-NEXT: vcvttss2si %xmm0, %rax 871; AVX512F-NEXT: vmovq %rax, %xmm0 872; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 873; AVX512F-NEXT: retq 874; 875; AVX512VL-LABEL: fptosi_2f32_to_2i64: 876; AVX512VL: # %bb.0: 877; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 878; AVX512VL-NEXT: vmovq %rax, %xmm1 879; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 880; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 881; AVX512VL-NEXT: vmovq %rax, %xmm0 882; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 883; AVX512VL-NEXT: retq 884; 885; AVX512DQ-LABEL: fptosi_2f32_to_2i64: 886; AVX512DQ: # %bb.0: 887; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 888; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 889; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 890; AVX512DQ-NEXT: vzeroupper 891; AVX512DQ-NEXT: retq 892; 893; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64: 894; AVX512VLDQ: # %bb.0: 895; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0 896; AVX512VLDQ-NEXT: retq 897 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 898 %cvt = fptosi <2 x float> %shuf to <2 x i64> 899 ret <2 x i64> %cvt 900} 901 902define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) { 903; SSE-LABEL: fptosi_4f32_to_2i64: 904; SSE: # %bb.0: 905; SSE-NEXT: cvttss2si %xmm0, %rax 906; SSE-NEXT: movq %rax, %xmm1 907; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 908; SSE-NEXT: cvttss2si %xmm0, %rax 909; SSE-NEXT: movq %rax, %xmm0 910; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 911; SSE-NEXT: movdqa %xmm1, %xmm0 912; SSE-NEXT: retq 913; 914; VEX-LABEL: fptosi_4f32_to_2i64: 915; VEX: # %bb.0: 916; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 917; VEX-NEXT: vcvttss2si %xmm1, %rax 918; VEX-NEXT: vcvttss2si %xmm0, %rcx 919; VEX-NEXT: vmovq %rcx, %xmm0 920; VEX-NEXT: vmovq %rax, %xmm1 921; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 922; VEX-NEXT: retq 923; 924; AVX512F-LABEL: fptosi_4f32_to_2i64: 925; AVX512F: # %bb.0: 926; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 927; AVX512F-NEXT: vcvttss2si %xmm1, %rax 928; AVX512F-NEXT: vcvttss2si %xmm0, %rcx 929; AVX512F-NEXT: vmovq %rcx, %xmm0 930; AVX512F-NEXT: vmovq %rax, %xmm1 931; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 932; AVX512F-NEXT: retq 933; 934; AVX512VL-LABEL: fptosi_4f32_to_2i64: 935; AVX512VL: # %bb.0: 936; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 937; AVX512VL-NEXT: vcvttss2si %xmm1, %rax 938; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx 939; AVX512VL-NEXT: vmovq %rcx, %xmm0 940; AVX512VL-NEXT: vmovq %rax, %xmm1 941; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 942; AVX512VL-NEXT: retq 943; 944; AVX512DQ-LABEL: fptosi_4f32_to_2i64: 945; AVX512DQ: # %bb.0: 946; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 947; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 948; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 949; AVX512DQ-NEXT: vzeroupper 950; AVX512DQ-NEXT: retq 951; 952; AVX512VLDQ-LABEL: fptosi_4f32_to_2i64: 953; AVX512VLDQ: # %bb.0: 954; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0 955; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 956; AVX512VLDQ-NEXT: vzeroupper 957; AVX512VLDQ-NEXT: retq 958 %cvt = fptosi <4 x float> %a to <4 x i64> 959 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 960 ret <2 x i64> %shuf 961} 962 963define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) { 964; SSE-LABEL: fptosi_8f32_to_8i32: 965; SSE: # %bb.0: 966; SSE-NEXT: cvttps2dq %xmm0, %xmm0 967; SSE-NEXT: cvttps2dq %xmm1, %xmm1 968; SSE-NEXT: retq 969; 970; AVX-LABEL: fptosi_8f32_to_8i32: 971; AVX: # %bb.0: 972; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 973; AVX-NEXT: retq 974 %cvt = fptosi <8 x float> %a to <8 x i32> 975 ret <8 x i32> %cvt 976} 977 978define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) { 979; SSE-LABEL: fptosi_4f32_to_4i64: 980; SSE: # %bb.0: 981; SSE-NEXT: cvttss2si %xmm0, %rax 982; SSE-NEXT: movq %rax, %xmm2 983; SSE-NEXT: movaps %xmm0, %xmm1 984; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 985; SSE-NEXT: cvttss2si %xmm1, %rax 986; SSE-NEXT: movq %rax, %xmm1 987; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 988; SSE-NEXT: movaps %xmm0, %xmm1 989; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] 990; SSE-NEXT: cvttss2si %xmm1, %rax 991; SSE-NEXT: movq %rax, %xmm3 992; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 993; SSE-NEXT: cvttss2si %xmm0, %rax 994; SSE-NEXT: movq %rax, %xmm1 995; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 996; SSE-NEXT: movdqa %xmm2, %xmm0 997; SSE-NEXT: retq 998; 999; AVX1-LABEL: fptosi_4f32_to_4i64: 1000; AVX1: # %bb.0: 1001; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1002; AVX1-NEXT: vcvttss2si %xmm1, %rax 1003; AVX1-NEXT: vmovq %rax, %xmm1 1004; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1005; AVX1-NEXT: vcvttss2si %xmm2, %rax 1006; AVX1-NEXT: vmovq %rax, %xmm2 1007; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1008; AVX1-NEXT: vcvttss2si %xmm0, %rax 1009; AVX1-NEXT: vmovq %rax, %xmm2 1010; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1011; AVX1-NEXT: vcvttss2si %xmm0, %rax 1012; AVX1-NEXT: vmovq %rax, %xmm0 1013; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1014; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1015; AVX1-NEXT: retq 1016; 1017; AVX2-LABEL: fptosi_4f32_to_4i64: 1018; AVX2: # %bb.0: 1019; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1020; AVX2-NEXT: vcvttss2si %xmm1, %rax 1021; AVX2-NEXT: vmovq %rax, %xmm1 1022; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1023; AVX2-NEXT: vcvttss2si %xmm2, %rax 1024; AVX2-NEXT: vmovq %rax, %xmm2 1025; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1026; AVX2-NEXT: vcvttss2si %xmm0, %rax 1027; AVX2-NEXT: vmovq %rax, %xmm2 1028; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1029; AVX2-NEXT: vcvttss2si %xmm0, %rax 1030; AVX2-NEXT: vmovq %rax, %xmm0 1031; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1032; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1033; AVX2-NEXT: retq 1034; 1035; AVX512F-LABEL: fptosi_4f32_to_4i64: 1036; AVX512F: # %bb.0: 1037; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1038; AVX512F-NEXT: vcvttss2si %xmm1, %rax 1039; AVX512F-NEXT: vmovq %rax, %xmm1 1040; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1041; AVX512F-NEXT: vcvttss2si %xmm2, %rax 1042; AVX512F-NEXT: vmovq %rax, %xmm2 1043; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1044; AVX512F-NEXT: vcvttss2si %xmm0, %rax 1045; AVX512F-NEXT: vmovq %rax, %xmm2 1046; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1047; AVX512F-NEXT: vcvttss2si %xmm0, %rax 1048; AVX512F-NEXT: vmovq %rax, %xmm0 1049; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1050; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1051; AVX512F-NEXT: retq 1052; 1053; AVX512VL-LABEL: fptosi_4f32_to_4i64: 1054; AVX512VL: # %bb.0: 1055; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1056; AVX512VL-NEXT: vcvttss2si %xmm1, %rax 1057; AVX512VL-NEXT: vmovq %rax, %xmm1 1058; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1059; AVX512VL-NEXT: vcvttss2si %xmm2, %rax 1060; AVX512VL-NEXT: vmovq %rax, %xmm2 1061; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1062; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 1063; AVX512VL-NEXT: vmovq %rax, %xmm2 1064; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1065; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 1066; AVX512VL-NEXT: vmovq %rax, %xmm0 1067; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1068; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1069; AVX512VL-NEXT: retq 1070; 1071; AVX512DQ-LABEL: fptosi_4f32_to_4i64: 1072; AVX512DQ: # %bb.0: 1073; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 1074; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1075; AVX512DQ-NEXT: retq 1076; 1077; AVX512VLDQ-LABEL: fptosi_4f32_to_4i64: 1078; AVX512VLDQ: # %bb.0: 1079; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0 1080; AVX512VLDQ-NEXT: retq 1081 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1082 %cvt = fptosi <4 x float> %shuf to <4 x i64> 1083 ret <4 x i64> %cvt 1084} 1085 1086define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) { 1087; SSE-LABEL: fptosi_8f32_to_4i64: 1088; SSE: # %bb.0: 1089; SSE-NEXT: cvttss2si %xmm0, %rax 1090; SSE-NEXT: movq %rax, %xmm2 1091; SSE-NEXT: movaps %xmm0, %xmm1 1092; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 1093; SSE-NEXT: cvttss2si %xmm1, %rax 1094; SSE-NEXT: movq %rax, %xmm1 1095; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 1096; SSE-NEXT: movaps %xmm0, %xmm1 1097; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] 1098; SSE-NEXT: cvttss2si %xmm1, %rax 1099; SSE-NEXT: movq %rax, %xmm3 1100; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1101; SSE-NEXT: cvttss2si %xmm0, %rax 1102; SSE-NEXT: movq %rax, %xmm1 1103; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 1104; SSE-NEXT: movdqa %xmm2, %xmm0 1105; SSE-NEXT: retq 1106; 1107; AVX1-LABEL: fptosi_8f32_to_4i64: 1108; AVX1: # %bb.0: 1109; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1110; AVX1-NEXT: vcvttss2si %xmm1, %rax 1111; AVX1-NEXT: vmovq %rax, %xmm1 1112; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1113; AVX1-NEXT: vcvttss2si %xmm2, %rax 1114; AVX1-NEXT: vmovq %rax, %xmm2 1115; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1116; AVX1-NEXT: vcvttss2si %xmm0, %rax 1117; AVX1-NEXT: vmovq %rax, %xmm2 1118; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1119; AVX1-NEXT: vcvttss2si %xmm0, %rax 1120; AVX1-NEXT: vmovq %rax, %xmm0 1121; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1122; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1123; AVX1-NEXT: retq 1124; 1125; AVX2-LABEL: fptosi_8f32_to_4i64: 1126; AVX2: # %bb.0: 1127; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1128; AVX2-NEXT: vcvttss2si %xmm1, %rax 1129; AVX2-NEXT: vmovq %rax, %xmm1 1130; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1131; AVX2-NEXT: vcvttss2si %xmm2, %rax 1132; AVX2-NEXT: vmovq %rax, %xmm2 1133; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1134; AVX2-NEXT: vcvttss2si %xmm0, %rax 1135; AVX2-NEXT: vmovq %rax, %xmm2 1136; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1137; AVX2-NEXT: vcvttss2si %xmm0, %rax 1138; AVX2-NEXT: vmovq %rax, %xmm0 1139; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1140; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1141; AVX2-NEXT: retq 1142; 1143; AVX512F-LABEL: fptosi_8f32_to_4i64: 1144; AVX512F: # %bb.0: 1145; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1146; AVX512F-NEXT: vcvttss2si %xmm1, %rax 1147; AVX512F-NEXT: vcvttss2si %xmm0, %rcx 1148; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1149; AVX512F-NEXT: vcvttss2si %xmm1, %rdx 1150; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1151; AVX512F-NEXT: vcvttss2si %xmm0, %rsi 1152; AVX512F-NEXT: vmovq %rsi, %xmm0 1153; AVX512F-NEXT: vmovq %rdx, %xmm1 1154; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1155; AVX512F-NEXT: vmovq %rcx, %xmm1 1156; AVX512F-NEXT: vmovq %rax, %xmm2 1157; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1158; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1159; AVX512F-NEXT: retq 1160; 1161; AVX512VL-LABEL: fptosi_8f32_to_4i64: 1162; AVX512VL: # %bb.0: 1163; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1164; AVX512VL-NEXT: vcvttss2si %xmm1, %rax 1165; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx 1166; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1167; AVX512VL-NEXT: vcvttss2si %xmm1, %rdx 1168; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1169; AVX512VL-NEXT: vcvttss2si %xmm0, %rsi 1170; AVX512VL-NEXT: vmovq %rsi, %xmm0 1171; AVX512VL-NEXT: vmovq %rdx, %xmm1 1172; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1173; AVX512VL-NEXT: vmovq %rcx, %xmm1 1174; AVX512VL-NEXT: vmovq %rax, %xmm2 1175; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1176; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1177; AVX512VL-NEXT: retq 1178; 1179; AVX512DQ-LABEL: fptosi_8f32_to_4i64: 1180; AVX512DQ: # %bb.0: 1181; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 1182; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1183; AVX512DQ-NEXT: retq 1184; 1185; AVX512VLDQ-LABEL: fptosi_8f32_to_4i64: 1186; AVX512VLDQ: # %bb.0: 1187; AVX512VLDQ-NEXT: vcvttps2qq %ymm0, %zmm0 1188; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1189; AVX512VLDQ-NEXT: retq 1190 %cvt = fptosi <8 x float> %a to <8 x i64> 1191 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1192 ret <4 x i64> %shuf 1193} 1194 1195; 1196; Float to Unsigned Integer 1197; 1198 1199define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) { 1200; SSE-LABEL: fptoui_2f32_to_2i32: 1201; SSE: # %bb.0: 1202; SSE-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1203; SSE-NEXT: movaps %xmm0, %xmm1 1204; SSE-NEXT: cmpltps %xmm2, %xmm1 1205; SSE-NEXT: cvttps2dq %xmm0, %xmm3 1206; SSE-NEXT: subps %xmm2, %xmm0 1207; SSE-NEXT: cvttps2dq %xmm0, %xmm0 1208; SSE-NEXT: xorps {{.*}}(%rip), %xmm0 1209; SSE-NEXT: andps %xmm1, %xmm3 1210; SSE-NEXT: andnps %xmm0, %xmm1 1211; SSE-NEXT: orps %xmm3, %xmm1 1212; SSE-NEXT: movaps %xmm1, %xmm0 1213; SSE-NEXT: retq 1214; 1215; AVX1-LABEL: fptoui_2f32_to_2i32: 1216; AVX1: # %bb.0: 1217; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1218; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 1219; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1 1220; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 1221; AVX1-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1 1222; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 1223; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 1224; AVX1-NEXT: retq 1225; 1226; AVX2-LABEL: fptoui_2f32_to_2i32: 1227; AVX2: # %bb.0: 1228; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1229; AVX2-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 1230; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1 1231; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1 1232; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 1233; AVX2-NEXT: vxorps %xmm3, %xmm1, %xmm1 1234; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0 1235; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 1236; AVX2-NEXT: retq 1237; 1238; AVX512F-LABEL: fptoui_2f32_to_2i32: 1239; AVX512F: # %bb.0: 1240; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1241; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 1242; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1243; AVX512F-NEXT: vzeroupper 1244; AVX512F-NEXT: retq 1245; 1246; AVX512VL-LABEL: fptoui_2f32_to_2i32: 1247; AVX512VL: # %bb.0: 1248; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0 1249; AVX512VL-NEXT: retq 1250; 1251; AVX512DQ-LABEL: fptoui_2f32_to_2i32: 1252; AVX512DQ: # %bb.0: 1253; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1254; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 1255; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1256; AVX512DQ-NEXT: vzeroupper 1257; AVX512DQ-NEXT: retq 1258; 1259; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32: 1260; AVX512VLDQ: # %bb.0: 1261; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0 1262; AVX512VLDQ-NEXT: retq 1263 %cvt = fptoui <2 x float> %a to <2 x i32> 1264 ret <2 x i32> %cvt 1265} 1266 1267define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) { 1268; SSE-LABEL: fptoui_4f32_to_4i32: 1269; SSE: # %bb.0: 1270; SSE-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1271; SSE-NEXT: movaps %xmm0, %xmm1 1272; SSE-NEXT: cmpltps %xmm2, %xmm1 1273; SSE-NEXT: cvttps2dq %xmm0, %xmm3 1274; SSE-NEXT: subps %xmm2, %xmm0 1275; SSE-NEXT: cvttps2dq %xmm0, %xmm0 1276; SSE-NEXT: xorps {{.*}}(%rip), %xmm0 1277; SSE-NEXT: andps %xmm1, %xmm3 1278; SSE-NEXT: andnps %xmm0, %xmm1 1279; SSE-NEXT: orps %xmm3, %xmm1 1280; SSE-NEXT: movaps %xmm1, %xmm0 1281; SSE-NEXT: retq 1282; 1283; AVX1-LABEL: fptoui_4f32_to_4i32: 1284; AVX1: # %bb.0: 1285; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1286; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 1287; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1 1288; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 1289; AVX1-NEXT: vxorps {{.*}}(%rip), %xmm1, %xmm1 1290; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 1291; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 1292; AVX1-NEXT: retq 1293; 1294; AVX2-LABEL: fptoui_4f32_to_4i32: 1295; AVX2: # %bb.0: 1296; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1297; AVX2-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 1298; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1 1299; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1 1300; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 1301; AVX2-NEXT: vxorps %xmm3, %xmm1, %xmm1 1302; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0 1303; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 1304; AVX2-NEXT: retq 1305; 1306; AVX512F-LABEL: fptoui_4f32_to_4i32: 1307; AVX512F: # %bb.0: 1308; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1309; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 1310; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1311; AVX512F-NEXT: vzeroupper 1312; AVX512F-NEXT: retq 1313; 1314; AVX512VL-LABEL: fptoui_4f32_to_4i32: 1315; AVX512VL: # %bb.0: 1316; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0 1317; AVX512VL-NEXT: retq 1318; 1319; AVX512DQ-LABEL: fptoui_4f32_to_4i32: 1320; AVX512DQ: # %bb.0: 1321; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1322; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 1323; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1324; AVX512DQ-NEXT: vzeroupper 1325; AVX512DQ-NEXT: retq 1326; 1327; AVX512VLDQ-LABEL: fptoui_4f32_to_4i32: 1328; AVX512VLDQ: # %bb.0: 1329; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0 1330; AVX512VLDQ-NEXT: retq 1331 %cvt = fptoui <4 x float> %a to <4 x i32> 1332 ret <4 x i32> %cvt 1333} 1334 1335define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) { 1336; SSE-LABEL: fptoui_2f32_to_2i64: 1337; SSE: # %bb.0: 1338; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1339; SSE-NEXT: movaps %xmm0, %xmm1 1340; SSE-NEXT: subss %xmm2, %xmm1 1341; SSE-NEXT: cvttss2si %xmm1, %rax 1342; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 1343; SSE-NEXT: xorq %rcx, %rax 1344; SSE-NEXT: cvttss2si %xmm0, %rdx 1345; SSE-NEXT: ucomiss %xmm2, %xmm0 1346; SSE-NEXT: cmovaeq %rax, %rdx 1347; SSE-NEXT: movq %rdx, %xmm1 1348; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1349; SSE-NEXT: movaps %xmm0, %xmm3 1350; SSE-NEXT: subss %xmm2, %xmm3 1351; SSE-NEXT: cvttss2si %xmm3, %rax 1352; SSE-NEXT: xorq %rcx, %rax 1353; SSE-NEXT: cvttss2si %xmm0, %rcx 1354; SSE-NEXT: ucomiss %xmm2, %xmm0 1355; SSE-NEXT: cmovaeq %rax, %rcx 1356; SSE-NEXT: movq %rcx, %xmm0 1357; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1358; SSE-NEXT: movdqa %xmm1, %xmm0 1359; SSE-NEXT: retq 1360; 1361; VEX-LABEL: fptoui_2f32_to_2i64: 1362; VEX: # %bb.0: 1363; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1364; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 1365; VEX-NEXT: vcvttss2si %xmm2, %rax 1366; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 1367; VEX-NEXT: xorq %rcx, %rax 1368; VEX-NEXT: vcvttss2si %xmm0, %rdx 1369; VEX-NEXT: vucomiss %xmm1, %xmm0 1370; VEX-NEXT: cmovaeq %rax, %rdx 1371; VEX-NEXT: vmovq %rdx, %xmm2 1372; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1373; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 1374; VEX-NEXT: vcvttss2si %xmm3, %rax 1375; VEX-NEXT: xorq %rcx, %rax 1376; VEX-NEXT: vcvttss2si %xmm0, %rcx 1377; VEX-NEXT: vucomiss %xmm1, %xmm0 1378; VEX-NEXT: cmovaeq %rax, %rcx 1379; VEX-NEXT: vmovq %rcx, %xmm0 1380; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1381; VEX-NEXT: retq 1382; 1383; AVX512F-LABEL: fptoui_2f32_to_2i64: 1384; AVX512F: # %bb.0: 1385; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 1386; AVX512F-NEXT: vmovq %rax, %xmm1 1387; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1388; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 1389; AVX512F-NEXT: vmovq %rax, %xmm0 1390; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1391; AVX512F-NEXT: retq 1392; 1393; AVX512VL-LABEL: fptoui_2f32_to_2i64: 1394; AVX512VL: # %bb.0: 1395; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 1396; AVX512VL-NEXT: vmovq %rax, %xmm1 1397; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1398; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 1399; AVX512VL-NEXT: vmovq %rax, %xmm0 1400; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1401; AVX512VL-NEXT: retq 1402; 1403; AVX512DQ-LABEL: fptoui_2f32_to_2i64: 1404; AVX512DQ: # %bb.0: 1405; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1406; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1407; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1408; AVX512DQ-NEXT: vzeroupper 1409; AVX512DQ-NEXT: retq 1410; 1411; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64: 1412; AVX512VLDQ: # %bb.0: 1413; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0 1414; AVX512VLDQ-NEXT: retq 1415 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 1416 %cvt = fptoui <2 x float> %shuf to <2 x i64> 1417 ret <2 x i64> %cvt 1418} 1419 1420define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) { 1421; SSE-LABEL: fptoui_4f32_to_2i64: 1422; SSE: # %bb.0: 1423; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1424; SSE-NEXT: movaps %xmm0, %xmm1 1425; SSE-NEXT: subss %xmm2, %xmm1 1426; SSE-NEXT: cvttss2si %xmm1, %rax 1427; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 1428; SSE-NEXT: xorq %rcx, %rax 1429; SSE-NEXT: cvttss2si %xmm0, %rdx 1430; SSE-NEXT: ucomiss %xmm2, %xmm0 1431; SSE-NEXT: cmovaeq %rax, %rdx 1432; SSE-NEXT: movq %rdx, %xmm1 1433; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1434; SSE-NEXT: movaps %xmm0, %xmm3 1435; SSE-NEXT: subss %xmm2, %xmm3 1436; SSE-NEXT: cvttss2si %xmm3, %rax 1437; SSE-NEXT: xorq %rcx, %rax 1438; SSE-NEXT: cvttss2si %xmm0, %rcx 1439; SSE-NEXT: ucomiss %xmm2, %xmm0 1440; SSE-NEXT: cmovaeq %rax, %rcx 1441; SSE-NEXT: movq %rcx, %xmm0 1442; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1443; SSE-NEXT: movdqa %xmm1, %xmm0 1444; SSE-NEXT: retq 1445; 1446; VEX-LABEL: fptoui_4f32_to_2i64: 1447; VEX: # %bb.0: 1448; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1449; VEX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1450; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm3 1451; VEX-NEXT: vcvttss2si %xmm3, %rax 1452; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 1453; VEX-NEXT: xorq %rcx, %rax 1454; VEX-NEXT: vcvttss2si %xmm1, %rdx 1455; VEX-NEXT: vucomiss %xmm2, %xmm1 1456; VEX-NEXT: cmovaeq %rax, %rdx 1457; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm1 1458; VEX-NEXT: vcvttss2si %xmm1, %rax 1459; VEX-NEXT: xorq %rcx, %rax 1460; VEX-NEXT: vcvttss2si %xmm0, %rcx 1461; VEX-NEXT: vucomiss %xmm2, %xmm0 1462; VEX-NEXT: cmovaeq %rax, %rcx 1463; VEX-NEXT: vmovq %rcx, %xmm0 1464; VEX-NEXT: vmovq %rdx, %xmm1 1465; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1466; VEX-NEXT: retq 1467; 1468; AVX512F-LABEL: fptoui_4f32_to_2i64: 1469; AVX512F: # %bb.0: 1470; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1471; AVX512F-NEXT: vcvttss2usi %xmm1, %rax 1472; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx 1473; AVX512F-NEXT: vmovq %rcx, %xmm0 1474; AVX512F-NEXT: vmovq %rax, %xmm1 1475; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1476; AVX512F-NEXT: retq 1477; 1478; AVX512VL-LABEL: fptoui_4f32_to_2i64: 1479; AVX512VL: # %bb.0: 1480; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1481; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax 1482; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx 1483; AVX512VL-NEXT: vmovq %rcx, %xmm0 1484; AVX512VL-NEXT: vmovq %rax, %xmm1 1485; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1486; AVX512VL-NEXT: retq 1487; 1488; AVX512DQ-LABEL: fptoui_4f32_to_2i64: 1489; AVX512DQ: # %bb.0: 1490; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1491; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1492; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1493; AVX512DQ-NEXT: vzeroupper 1494; AVX512DQ-NEXT: retq 1495; 1496; AVX512VLDQ-LABEL: fptoui_4f32_to_2i64: 1497; AVX512VLDQ: # %bb.0: 1498; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0 1499; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1500; AVX512VLDQ-NEXT: vzeroupper 1501; AVX512VLDQ-NEXT: retq 1502 %cvt = fptoui <4 x float> %a to <4 x i64> 1503 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 1504 ret <2 x i64> %shuf 1505} 1506 1507define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) { 1508; SSE-LABEL: fptoui_8f32_to_8i32: 1509; SSE: # %bb.0: 1510; SSE-NEXT: movaps {{.*#+}} xmm4 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1511; SSE-NEXT: movaps %xmm0, %xmm2 1512; SSE-NEXT: cmpltps %xmm4, %xmm2 1513; SSE-NEXT: cvttps2dq %xmm0, %xmm3 1514; SSE-NEXT: subps %xmm4, %xmm0 1515; SSE-NEXT: cvttps2dq %xmm0, %xmm0 1516; SSE-NEXT: movaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] 1517; SSE-NEXT: xorps %xmm5, %xmm0 1518; SSE-NEXT: andps %xmm2, %xmm3 1519; SSE-NEXT: andnps %xmm0, %xmm2 1520; SSE-NEXT: orps %xmm3, %xmm2 1521; SSE-NEXT: movaps %xmm1, %xmm3 1522; SSE-NEXT: cmpltps %xmm4, %xmm3 1523; SSE-NEXT: cvttps2dq %xmm1, %xmm0 1524; SSE-NEXT: subps %xmm4, %xmm1 1525; SSE-NEXT: cvttps2dq %xmm1, %xmm1 1526; SSE-NEXT: xorps %xmm5, %xmm1 1527; SSE-NEXT: andps %xmm3, %xmm0 1528; SSE-NEXT: andnps %xmm1, %xmm3 1529; SSE-NEXT: orps %xmm0, %xmm3 1530; SSE-NEXT: movaps %xmm2, %xmm0 1531; SSE-NEXT: movaps %xmm3, %xmm1 1532; SSE-NEXT: retq 1533; 1534; AVX1-LABEL: fptoui_8f32_to_8i32: 1535; AVX1: # %bb.0: 1536; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1537; AVX1-NEXT: vcmpltps %ymm1, %ymm0, %ymm2 1538; AVX1-NEXT: vsubps %ymm1, %ymm0, %ymm1 1539; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1 1540; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm1, %ymm1 1541; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0 1542; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 1543; AVX1-NEXT: retq 1544; 1545; AVX2-LABEL: fptoui_8f32_to_8i32: 1546; AVX2: # %bb.0: 1547; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1548; AVX2-NEXT: vcmpltps %ymm1, %ymm0, %ymm2 1549; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm1 1550; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1 1551; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648] 1552; AVX2-NEXT: vxorps %ymm3, %ymm1, %ymm1 1553; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0 1554; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 1555; AVX2-NEXT: retq 1556; 1557; AVX512F-LABEL: fptoui_8f32_to_8i32: 1558; AVX512F: # %bb.0: 1559; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1560; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 1561; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1562; AVX512F-NEXT: retq 1563; 1564; AVX512VL-LABEL: fptoui_8f32_to_8i32: 1565; AVX512VL: # %bb.0: 1566; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0 1567; AVX512VL-NEXT: retq 1568; 1569; AVX512DQ-LABEL: fptoui_8f32_to_8i32: 1570; AVX512DQ: # %bb.0: 1571; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1572; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 1573; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1574; AVX512DQ-NEXT: retq 1575; 1576; AVX512VLDQ-LABEL: fptoui_8f32_to_8i32: 1577; AVX512VLDQ: # %bb.0: 1578; AVX512VLDQ-NEXT: vcvttps2udq %ymm0, %ymm0 1579; AVX512VLDQ-NEXT: retq 1580 %cvt = fptoui <8 x float> %a to <8 x i32> 1581 ret <8 x i32> %cvt 1582} 1583 1584define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) { 1585; SSE-LABEL: fptoui_4f32_to_4i64: 1586; SSE: # %bb.0: 1587; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1588; SSE-NEXT: movaps %xmm0, %xmm2 1589; SSE-NEXT: subss %xmm1, %xmm2 1590; SSE-NEXT: cvttss2si %xmm2, %rcx 1591; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 1592; SSE-NEXT: xorq %rax, %rcx 1593; SSE-NEXT: cvttss2si %xmm0, %rdx 1594; SSE-NEXT: ucomiss %xmm1, %xmm0 1595; SSE-NEXT: cmovaeq %rcx, %rdx 1596; SSE-NEXT: movq %rdx, %xmm2 1597; SSE-NEXT: movaps %xmm0, %xmm3 1598; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1] 1599; SSE-NEXT: movaps %xmm3, %xmm4 1600; SSE-NEXT: subss %xmm1, %xmm4 1601; SSE-NEXT: cvttss2si %xmm4, %rcx 1602; SSE-NEXT: xorq %rax, %rcx 1603; SSE-NEXT: cvttss2si %xmm3, %rdx 1604; SSE-NEXT: ucomiss %xmm1, %xmm3 1605; SSE-NEXT: cmovaeq %rcx, %rdx 1606; SSE-NEXT: movq %rdx, %xmm3 1607; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1608; SSE-NEXT: movaps %xmm0, %xmm3 1609; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3] 1610; SSE-NEXT: movaps %xmm3, %xmm4 1611; SSE-NEXT: subss %xmm1, %xmm4 1612; SSE-NEXT: cvttss2si %xmm4, %rcx 1613; SSE-NEXT: xorq %rax, %rcx 1614; SSE-NEXT: cvttss2si %xmm3, %rdx 1615; SSE-NEXT: ucomiss %xmm1, %xmm3 1616; SSE-NEXT: cmovaeq %rcx, %rdx 1617; SSE-NEXT: movq %rdx, %xmm3 1618; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1619; SSE-NEXT: movaps %xmm0, %xmm4 1620; SSE-NEXT: subss %xmm1, %xmm4 1621; SSE-NEXT: cvttss2si %xmm4, %rcx 1622; SSE-NEXT: xorq %rax, %rcx 1623; SSE-NEXT: cvttss2si %xmm0, %rax 1624; SSE-NEXT: ucomiss %xmm1, %xmm0 1625; SSE-NEXT: cmovaeq %rcx, %rax 1626; SSE-NEXT: movq %rax, %xmm1 1627; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 1628; SSE-NEXT: movdqa %xmm2, %xmm0 1629; SSE-NEXT: retq 1630; 1631; AVX1-LABEL: fptoui_4f32_to_4i64: 1632; AVX1: # %bb.0: 1633; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] 1634; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1635; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3 1636; AVX1-NEXT: vcvttss2si %xmm3, %rax 1637; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 1638; AVX1-NEXT: xorq %rcx, %rax 1639; AVX1-NEXT: vcvttss2si %xmm2, %rdx 1640; AVX1-NEXT: vucomiss %xmm1, %xmm2 1641; AVX1-NEXT: cmovaeq %rax, %rdx 1642; AVX1-NEXT: vmovq %rdx, %xmm2 1643; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 1644; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4 1645; AVX1-NEXT: vcvttss2si %xmm4, %rax 1646; AVX1-NEXT: xorq %rcx, %rax 1647; AVX1-NEXT: vcvttss2si %xmm3, %rdx 1648; AVX1-NEXT: vucomiss %xmm1, %xmm3 1649; AVX1-NEXT: cmovaeq %rax, %rdx 1650; AVX1-NEXT: vmovq %rdx, %xmm3 1651; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 1652; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3 1653; AVX1-NEXT: vcvttss2si %xmm3, %rax 1654; AVX1-NEXT: xorq %rcx, %rax 1655; AVX1-NEXT: vcvttss2si %xmm0, %rdx 1656; AVX1-NEXT: vucomiss %xmm1, %xmm0 1657; AVX1-NEXT: cmovaeq %rax, %rdx 1658; AVX1-NEXT: vmovq %rdx, %xmm3 1659; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1660; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4 1661; AVX1-NEXT: vcvttss2si %xmm4, %rax 1662; AVX1-NEXT: xorq %rcx, %rax 1663; AVX1-NEXT: vcvttss2si %xmm0, %rcx 1664; AVX1-NEXT: vucomiss %xmm1, %xmm0 1665; AVX1-NEXT: cmovaeq %rax, %rcx 1666; AVX1-NEXT: vmovq %rcx, %xmm0 1667; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1668; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1669; AVX1-NEXT: retq 1670; 1671; AVX2-LABEL: fptoui_4f32_to_4i64: 1672; AVX2: # %bb.0: 1673; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] 1674; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1675; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 1676; AVX2-NEXT: vcvttss2si %xmm3, %rax 1677; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 1678; AVX2-NEXT: xorq %rcx, %rax 1679; AVX2-NEXT: vcvttss2si %xmm2, %rdx 1680; AVX2-NEXT: vucomiss %xmm1, %xmm2 1681; AVX2-NEXT: cmovaeq %rax, %rdx 1682; AVX2-NEXT: vmovq %rdx, %xmm2 1683; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 1684; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4 1685; AVX2-NEXT: vcvttss2si %xmm4, %rax 1686; AVX2-NEXT: xorq %rcx, %rax 1687; AVX2-NEXT: vcvttss2si %xmm3, %rdx 1688; AVX2-NEXT: vucomiss %xmm1, %xmm3 1689; AVX2-NEXT: cmovaeq %rax, %rdx 1690; AVX2-NEXT: vmovq %rdx, %xmm3 1691; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 1692; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3 1693; AVX2-NEXT: vcvttss2si %xmm3, %rax 1694; AVX2-NEXT: xorq %rcx, %rax 1695; AVX2-NEXT: vcvttss2si %xmm0, %rdx 1696; AVX2-NEXT: vucomiss %xmm1, %xmm0 1697; AVX2-NEXT: cmovaeq %rax, %rdx 1698; AVX2-NEXT: vmovq %rdx, %xmm3 1699; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1700; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4 1701; AVX2-NEXT: vcvttss2si %xmm4, %rax 1702; AVX2-NEXT: xorq %rcx, %rax 1703; AVX2-NEXT: vcvttss2si %xmm0, %rcx 1704; AVX2-NEXT: vucomiss %xmm1, %xmm0 1705; AVX2-NEXT: cmovaeq %rax, %rcx 1706; AVX2-NEXT: vmovq %rcx, %xmm0 1707; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1708; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1709; AVX2-NEXT: retq 1710; 1711; AVX512F-LABEL: fptoui_4f32_to_4i64: 1712; AVX512F: # %bb.0: 1713; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1714; AVX512F-NEXT: vcvttss2usi %xmm1, %rax 1715; AVX512F-NEXT: vmovq %rax, %xmm1 1716; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1717; AVX512F-NEXT: vcvttss2usi %xmm2, %rax 1718; AVX512F-NEXT: vmovq %rax, %xmm2 1719; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1720; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 1721; AVX512F-NEXT: vmovq %rax, %xmm2 1722; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1723; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 1724; AVX512F-NEXT: vmovq %rax, %xmm0 1725; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1726; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1727; AVX512F-NEXT: retq 1728; 1729; AVX512VL-LABEL: fptoui_4f32_to_4i64: 1730; AVX512VL: # %bb.0: 1731; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1732; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax 1733; AVX512VL-NEXT: vmovq %rax, %xmm1 1734; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1735; AVX512VL-NEXT: vcvttss2usi %xmm2, %rax 1736; AVX512VL-NEXT: vmovq %rax, %xmm2 1737; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1738; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 1739; AVX512VL-NEXT: vmovq %rax, %xmm2 1740; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1741; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 1742; AVX512VL-NEXT: vmovq %rax, %xmm0 1743; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1744; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1745; AVX512VL-NEXT: retq 1746; 1747; AVX512DQ-LABEL: fptoui_4f32_to_4i64: 1748; AVX512DQ: # %bb.0: 1749; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1750; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1751; AVX512DQ-NEXT: retq 1752; 1753; AVX512VLDQ-LABEL: fptoui_4f32_to_4i64: 1754; AVX512VLDQ: # %bb.0: 1755; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0 1756; AVX512VLDQ-NEXT: retq 1757 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1758 %cvt = fptoui <4 x float> %shuf to <4 x i64> 1759 ret <4 x i64> %cvt 1760} 1761 1762define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) { 1763; SSE-LABEL: fptoui_8f32_to_4i64: 1764; SSE: # %bb.0: 1765; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1766; SSE-NEXT: movaps %xmm0, %xmm2 1767; SSE-NEXT: subss %xmm1, %xmm2 1768; SSE-NEXT: cvttss2si %xmm2, %rcx 1769; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 1770; SSE-NEXT: xorq %rax, %rcx 1771; SSE-NEXT: cvttss2si %xmm0, %rdx 1772; SSE-NEXT: ucomiss %xmm1, %xmm0 1773; SSE-NEXT: cmovaeq %rcx, %rdx 1774; SSE-NEXT: movq %rdx, %xmm2 1775; SSE-NEXT: movaps %xmm0, %xmm3 1776; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1] 1777; SSE-NEXT: movaps %xmm3, %xmm4 1778; SSE-NEXT: subss %xmm1, %xmm4 1779; SSE-NEXT: cvttss2si %xmm4, %rcx 1780; SSE-NEXT: xorq %rax, %rcx 1781; SSE-NEXT: cvttss2si %xmm3, %rdx 1782; SSE-NEXT: ucomiss %xmm1, %xmm3 1783; SSE-NEXT: cmovaeq %rcx, %rdx 1784; SSE-NEXT: movq %rdx, %xmm3 1785; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1786; SSE-NEXT: movaps %xmm0, %xmm3 1787; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3] 1788; SSE-NEXT: movaps %xmm3, %xmm4 1789; SSE-NEXT: subss %xmm1, %xmm4 1790; SSE-NEXT: cvttss2si %xmm4, %rcx 1791; SSE-NEXT: xorq %rax, %rcx 1792; SSE-NEXT: cvttss2si %xmm3, %rdx 1793; SSE-NEXT: ucomiss %xmm1, %xmm3 1794; SSE-NEXT: cmovaeq %rcx, %rdx 1795; SSE-NEXT: movq %rdx, %xmm3 1796; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1797; SSE-NEXT: movaps %xmm0, %xmm4 1798; SSE-NEXT: subss %xmm1, %xmm4 1799; SSE-NEXT: cvttss2si %xmm4, %rcx 1800; SSE-NEXT: xorq %rax, %rcx 1801; SSE-NEXT: cvttss2si %xmm0, %rax 1802; SSE-NEXT: ucomiss %xmm1, %xmm0 1803; SSE-NEXT: cmovaeq %rcx, %rax 1804; SSE-NEXT: movq %rax, %xmm1 1805; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 1806; SSE-NEXT: movdqa %xmm2, %xmm0 1807; SSE-NEXT: retq 1808; 1809; AVX1-LABEL: fptoui_8f32_to_4i64: 1810; AVX1: # %bb.0: 1811; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] 1812; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1813; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3 1814; AVX1-NEXT: vcvttss2si %xmm3, %rax 1815; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 1816; AVX1-NEXT: xorq %rcx, %rax 1817; AVX1-NEXT: vcvttss2si %xmm2, %rdx 1818; AVX1-NEXT: vucomiss %xmm1, %xmm2 1819; AVX1-NEXT: cmovaeq %rax, %rdx 1820; AVX1-NEXT: vmovq %rdx, %xmm2 1821; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 1822; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4 1823; AVX1-NEXT: vcvttss2si %xmm4, %rax 1824; AVX1-NEXT: xorq %rcx, %rax 1825; AVX1-NEXT: vcvttss2si %xmm3, %rdx 1826; AVX1-NEXT: vucomiss %xmm1, %xmm3 1827; AVX1-NEXT: cmovaeq %rax, %rdx 1828; AVX1-NEXT: vmovq %rdx, %xmm3 1829; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 1830; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3 1831; AVX1-NEXT: vcvttss2si %xmm3, %rax 1832; AVX1-NEXT: xorq %rcx, %rax 1833; AVX1-NEXT: vcvttss2si %xmm0, %rdx 1834; AVX1-NEXT: vucomiss %xmm1, %xmm0 1835; AVX1-NEXT: cmovaeq %rax, %rdx 1836; AVX1-NEXT: vmovq %rdx, %xmm3 1837; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1838; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4 1839; AVX1-NEXT: vcvttss2si %xmm4, %rax 1840; AVX1-NEXT: xorq %rcx, %rax 1841; AVX1-NEXT: vcvttss2si %xmm0, %rcx 1842; AVX1-NEXT: vucomiss %xmm1, %xmm0 1843; AVX1-NEXT: cmovaeq %rax, %rcx 1844; AVX1-NEXT: vmovq %rcx, %xmm0 1845; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1846; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1847; AVX1-NEXT: retq 1848; 1849; AVX2-LABEL: fptoui_8f32_to_4i64: 1850; AVX2: # %bb.0: 1851; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] 1852; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1853; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 1854; AVX2-NEXT: vcvttss2si %xmm3, %rax 1855; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 1856; AVX2-NEXT: xorq %rcx, %rax 1857; AVX2-NEXT: vcvttss2si %xmm2, %rdx 1858; AVX2-NEXT: vucomiss %xmm1, %xmm2 1859; AVX2-NEXT: cmovaeq %rax, %rdx 1860; AVX2-NEXT: vmovq %rdx, %xmm2 1861; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 1862; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4 1863; AVX2-NEXT: vcvttss2si %xmm4, %rax 1864; AVX2-NEXT: xorq %rcx, %rax 1865; AVX2-NEXT: vcvttss2si %xmm3, %rdx 1866; AVX2-NEXT: vucomiss %xmm1, %xmm3 1867; AVX2-NEXT: cmovaeq %rax, %rdx 1868; AVX2-NEXT: vmovq %rdx, %xmm3 1869; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 1870; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3 1871; AVX2-NEXT: vcvttss2si %xmm3, %rax 1872; AVX2-NEXT: xorq %rcx, %rax 1873; AVX2-NEXT: vcvttss2si %xmm0, %rdx 1874; AVX2-NEXT: vucomiss %xmm1, %xmm0 1875; AVX2-NEXT: cmovaeq %rax, %rdx 1876; AVX2-NEXT: vmovq %rdx, %xmm3 1877; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1878; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4 1879; AVX2-NEXT: vcvttss2si %xmm4, %rax 1880; AVX2-NEXT: xorq %rcx, %rax 1881; AVX2-NEXT: vcvttss2si %xmm0, %rcx 1882; AVX2-NEXT: vucomiss %xmm1, %xmm0 1883; AVX2-NEXT: cmovaeq %rax, %rcx 1884; AVX2-NEXT: vmovq %rcx, %xmm0 1885; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1886; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1887; AVX2-NEXT: retq 1888; 1889; AVX512F-LABEL: fptoui_8f32_to_4i64: 1890; AVX512F: # %bb.0: 1891; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1892; AVX512F-NEXT: vcvttss2usi %xmm1, %rax 1893; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx 1894; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1895; AVX512F-NEXT: vcvttss2usi %xmm1, %rdx 1896; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1897; AVX512F-NEXT: vcvttss2usi %xmm0, %rsi 1898; AVX512F-NEXT: vmovq %rsi, %xmm0 1899; AVX512F-NEXT: vmovq %rdx, %xmm1 1900; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1901; AVX512F-NEXT: vmovq %rcx, %xmm1 1902; AVX512F-NEXT: vmovq %rax, %xmm2 1903; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1904; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1905; AVX512F-NEXT: retq 1906; 1907; AVX512VL-LABEL: fptoui_8f32_to_4i64: 1908; AVX512VL: # %bb.0: 1909; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1910; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax 1911; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx 1912; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1913; AVX512VL-NEXT: vcvttss2usi %xmm1, %rdx 1914; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1915; AVX512VL-NEXT: vcvttss2usi %xmm0, %rsi 1916; AVX512VL-NEXT: vmovq %rsi, %xmm0 1917; AVX512VL-NEXT: vmovq %rdx, %xmm1 1918; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1919; AVX512VL-NEXT: vmovq %rcx, %xmm1 1920; AVX512VL-NEXT: vmovq %rax, %xmm2 1921; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1922; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1923; AVX512VL-NEXT: retq 1924; 1925; AVX512DQ-LABEL: fptoui_8f32_to_4i64: 1926; AVX512DQ: # %bb.0: 1927; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1928; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1929; AVX512DQ-NEXT: retq 1930; 1931; AVX512VLDQ-LABEL: fptoui_8f32_to_4i64: 1932; AVX512VLDQ: # %bb.0: 1933; AVX512VLDQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1934; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1935; AVX512VLDQ-NEXT: retq 1936 %cvt = fptoui <8 x float> %a to <8 x i64> 1937 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1938 ret <4 x i64> %shuf 1939} 1940 1941; 1942; Constant Folding 1943; 1944 1945define <2 x i64> @fptosi_2f64_to_2i64_const() { 1946; SSE-LABEL: fptosi_2f64_to_2i64_const: 1947; SSE: # %bb.0: 1948; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1949; SSE-NEXT: retq 1950; 1951; AVX-LABEL: fptosi_2f64_to_2i64_const: 1952; AVX: # %bb.0: 1953; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615] 1954; AVX-NEXT: retq 1955 %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64> 1956 ret <2 x i64> %cvt 1957} 1958 1959define <4 x i32> @fptosi_2f64_to_2i32_const() { 1960; SSE-LABEL: fptosi_2f64_to_2i32_const: 1961; SSE: # %bb.0: 1962; SSE-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u> 1963; SSE-NEXT: retq 1964; 1965; AVX-LABEL: fptosi_2f64_to_2i32_const: 1966; AVX: # %bb.0: 1967; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u> 1968; AVX-NEXT: retq 1969 %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32> 1970 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1971 ret <4 x i32> %ext 1972} 1973 1974define <4 x i64> @fptosi_4f64_to_4i64_const() { 1975; SSE-LABEL: fptosi_4f64_to_4i64_const: 1976; SSE: # %bb.0: 1977; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1978; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613] 1979; SSE-NEXT: retq 1980; 1981; AVX-LABEL: fptosi_4f64_to_4i64_const: 1982; AVX: # %bb.0: 1983; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613] 1984; AVX-NEXT: retq 1985 %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64> 1986 ret <4 x i64> %cvt 1987} 1988 1989define <4 x i32> @fptosi_4f64_to_4i32_const() { 1990; SSE-LABEL: fptosi_4f64_to_4i32_const: 1991; SSE: # %bb.0: 1992; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] 1993; SSE-NEXT: retq 1994; 1995; AVX-LABEL: fptosi_4f64_to_4i32_const: 1996; AVX: # %bb.0: 1997; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] 1998; AVX-NEXT: retq 1999 %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32> 2000 ret <4 x i32> %cvt 2001} 2002 2003define <2 x i64> @fptoui_2f64_to_2i64_const() { 2004; SSE-LABEL: fptoui_2f64_to_2i64_const: 2005; SSE: # %bb.0: 2006; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4] 2007; SSE-NEXT: retq 2008; 2009; AVX-LABEL: fptoui_2f64_to_2i64_const: 2010; AVX: # %bb.0: 2011; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4] 2012; AVX-NEXT: retq 2013 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64> 2014 ret <2 x i64> %cvt 2015} 2016 2017define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) { 2018; SSE-LABEL: fptoui_2f64_to_2i32_const: 2019; SSE: # %bb.0: 2020; SSE-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u> 2021; SSE-NEXT: retq 2022; 2023; AVX-LABEL: fptoui_2f64_to_2i32_const: 2024; AVX: # %bb.0: 2025; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u> 2026; AVX-NEXT: retq 2027 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32> 2028 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2029 ret <4 x i32> %ext 2030} 2031 2032define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) { 2033; SSE-LABEL: fptoui_4f64_to_4i64_const: 2034; SSE: # %bb.0: 2035; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4] 2036; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8] 2037; SSE-NEXT: retq 2038; 2039; AVX-LABEL: fptoui_4f64_to_4i64_const: 2040; AVX: # %bb.0: 2041; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8] 2042; AVX-NEXT: retq 2043 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64> 2044 ret <4 x i64> %cvt 2045} 2046 2047define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) { 2048; SSE-LABEL: fptoui_4f64_to_4i32_const: 2049; SSE: # %bb.0: 2050; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8] 2051; SSE-NEXT: retq 2052; 2053; AVX-LABEL: fptoui_4f64_to_4i32_const: 2054; AVX: # %bb.0: 2055; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8] 2056; AVX-NEXT: retq 2057 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32> 2058 ret <4 x i32> %cvt 2059} 2060 2061define <4 x i32> @fptosi_4f32_to_4i32_const() { 2062; SSE-LABEL: fptosi_4f32_to_4i32_const: 2063; SSE: # %bb.0: 2064; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3] 2065; SSE-NEXT: retq 2066; 2067; AVX-LABEL: fptosi_4f32_to_4i32_const: 2068; AVX: # %bb.0: 2069; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3] 2070; AVX-NEXT: retq 2071 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32> 2072 ret <4 x i32> %cvt 2073} 2074 2075define <4 x i64> @fptosi_4f32_to_4i64_const() { 2076; SSE-LABEL: fptosi_4f32_to_4i64_const: 2077; SSE: # %bb.0: 2078; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 2079; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3] 2080; SSE-NEXT: retq 2081; 2082; AVX-LABEL: fptosi_4f32_to_4i64_const: 2083; AVX: # %bb.0: 2084; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3] 2085; AVX-NEXT: retq 2086 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64> 2087 ret <4 x i64> %cvt 2088} 2089 2090define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) { 2091; SSE-LABEL: fptosi_8f32_to_8i32_const: 2092; SSE: # %bb.0: 2093; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3] 2094; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295] 2095; SSE-NEXT: retq 2096; 2097; AVX-LABEL: fptosi_8f32_to_8i32_const: 2098; AVX: # %bb.0: 2099; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295] 2100; AVX-NEXT: retq 2101 %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32> 2102 ret <8 x i32> %cvt 2103} 2104 2105define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) { 2106; SSE-LABEL: fptoui_4f32_to_4i32_const: 2107; SSE: # %bb.0: 2108; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6] 2109; SSE-NEXT: retq 2110; 2111; AVX-LABEL: fptoui_4f32_to_4i32_const: 2112; AVX: # %bb.0: 2113; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6] 2114; AVX-NEXT: retq 2115 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32> 2116 ret <4 x i32> %cvt 2117} 2118 2119define <4 x i64> @fptoui_4f32_to_4i64_const() { 2120; SSE-LABEL: fptoui_4f32_to_4i64_const: 2121; SSE: # %bb.0: 2122; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2] 2123; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8] 2124; SSE-NEXT: retq 2125; 2126; AVX-LABEL: fptoui_4f32_to_4i64_const: 2127; AVX: # %bb.0: 2128; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8] 2129; AVX-NEXT: retq 2130 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64> 2131 ret <4 x i64> %cvt 2132} 2133 2134define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) { 2135; SSE-LABEL: fptoui_8f32_to_8i32_const: 2136; SSE: # %bb.0: 2137; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6] 2138; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1] 2139; SSE-NEXT: retq 2140; 2141; AVX-LABEL: fptoui_8f32_to_8i32_const: 2142; AVX: # %bb.0: 2143; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1] 2144; AVX-NEXT: retq 2145 %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32> 2146 ret <8 x i32> %cvt 2147} 2148 2149; 2150; Special Cases 2151; 2152 2153define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind { 2154; SSE-LABEL: fptosi_2f16_to_4i32: 2155; SSE: # %bb.0: 2156; SSE-NEXT: pushq %rbp 2157; SSE-NEXT: pushq %rbx 2158; SSE-NEXT: pushq %rax 2159; SSE-NEXT: movl %esi, %ebx 2160; SSE-NEXT: movzwl %di, %edi 2161; SSE-NEXT: callq __gnu_h2f_ieee 2162; SSE-NEXT: cvttss2si %xmm0, %ebp 2163; SSE-NEXT: movzwl %bx, %edi 2164; SSE-NEXT: callq __gnu_h2f_ieee 2165; SSE-NEXT: cvttss2si %xmm0, %eax 2166; SSE-NEXT: movd %eax, %xmm0 2167; SSE-NEXT: movd %ebp, %xmm1 2168; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2169; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 2170; SSE-NEXT: addq $8, %rsp 2171; SSE-NEXT: popq %rbx 2172; SSE-NEXT: popq %rbp 2173; SSE-NEXT: retq 2174; 2175; VEX-LABEL: fptosi_2f16_to_4i32: 2176; VEX: # %bb.0: 2177; VEX-NEXT: pushq %rbp 2178; VEX-NEXT: pushq %rbx 2179; VEX-NEXT: pushq %rax 2180; VEX-NEXT: movl %esi, %ebx 2181; VEX-NEXT: movzwl %di, %edi 2182; VEX-NEXT: callq __gnu_h2f_ieee 2183; VEX-NEXT: vcvttss2si %xmm0, %ebp 2184; VEX-NEXT: movzwl %bx, %edi 2185; VEX-NEXT: callq __gnu_h2f_ieee 2186; VEX-NEXT: vcvttss2si %xmm0, %eax 2187; VEX-NEXT: vmovd %eax, %xmm0 2188; VEX-NEXT: vmovd %ebp, %xmm1 2189; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2190; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2191; VEX-NEXT: addq $8, %rsp 2192; VEX-NEXT: popq %rbx 2193; VEX-NEXT: popq %rbp 2194; VEX-NEXT: retq 2195; 2196; AVX512-LABEL: fptosi_2f16_to_4i32: 2197; AVX512: # %bb.0: 2198; AVX512-NEXT: movzwl %di, %eax 2199; AVX512-NEXT: vmovd %eax, %xmm0 2200; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 2201; AVX512-NEXT: vcvttss2si %xmm0, %eax 2202; AVX512-NEXT: movzwl %si, %ecx 2203; AVX512-NEXT: vmovd %ecx, %xmm0 2204; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 2205; AVX512-NEXT: vcvttss2si %xmm0, %ecx 2206; AVX512-NEXT: vmovd %ecx, %xmm0 2207; AVX512-NEXT: vmovd %eax, %xmm1 2208; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2209; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2210; AVX512-NEXT: retq 2211 %cvt = fptosi <2 x half> %a to <2 x i32> 2212 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2213 ret <4 x i32> %ext 2214} 2215 2216define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind { 2217; SSE-LABEL: fptosi_2f80_to_4i32: 2218; SSE: # %bb.0: 2219; SSE-NEXT: fldt {{[0-9]+}}(%rsp) 2220; SSE-NEXT: fldt {{[0-9]+}}(%rsp) 2221; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp) 2222; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax 2223; SSE-NEXT: orl $3072, %eax # imm = 0xC00 2224; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp) 2225; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp) 2226; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp) 2227; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp) 2228; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp) 2229; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax 2230; SSE-NEXT: orl $3072, %eax # imm = 0xC00 2231; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp) 2232; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp) 2233; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp) 2234; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp) 2235; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2236; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2237; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2238; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 2239; SSE-NEXT: retq 2240; 2241; AVX-LABEL: fptosi_2f80_to_4i32: 2242; AVX: # %bb.0: 2243; AVX-NEXT: fldt {{[0-9]+}}(%rsp) 2244; AVX-NEXT: fldt {{[0-9]+}}(%rsp) 2245; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp) 2246; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp) 2247; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2248; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2249; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2250; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2251; AVX-NEXT: retq 2252 %cvt = fptosi <2 x x86_fp80> %a to <2 x i32> 2253 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2254 ret <4 x i32> %ext 2255} 2256 2257define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind { 2258; SSE-LABEL: fptosi_2f128_to_4i32: 2259; SSE: # %bb.0: 2260; SSE-NEXT: pushq %rbx 2261; SSE-NEXT: subq $16, %rsp 2262; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 2263; SSE-NEXT: callq __fixtfsi 2264; SSE-NEXT: movl %eax, %ebx 2265; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2266; SSE-NEXT: callq __fixtfsi 2267; SSE-NEXT: movd %eax, %xmm0 2268; SSE-NEXT: movd %ebx, %xmm1 2269; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2270; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 2271; SSE-NEXT: addq $16, %rsp 2272; SSE-NEXT: popq %rbx 2273; SSE-NEXT: retq 2274; 2275; AVX-LABEL: fptosi_2f128_to_4i32: 2276; AVX: # %bb.0: 2277; AVX-NEXT: pushq %rbx 2278; AVX-NEXT: subq $16, %rsp 2279; AVX-NEXT: vmovaps %xmm1, (%rsp) # 16-byte Spill 2280; AVX-NEXT: callq __fixtfsi 2281; AVX-NEXT: movl %eax, %ebx 2282; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 2283; AVX-NEXT: callq __fixtfsi 2284; AVX-NEXT: vmovd %eax, %xmm0 2285; AVX-NEXT: vmovd %ebx, %xmm1 2286; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2287; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2288; AVX-NEXT: addq $16, %rsp 2289; AVX-NEXT: popq %rbx 2290; AVX-NEXT: retq 2291 %cvt = fptosi <2 x fp128> %a to <2 x i32> 2292 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2293 ret <4 x i32> %ext 2294} 2295 2296define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) { 2297; SSE-LABEL: fptosi_2f32_to_2i8: 2298; SSE: # %bb.0: 2299; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2300; SSE-NEXT: pand {{.*}}(%rip), %xmm0 2301; SSE-NEXT: packuswb %xmm0, %xmm0 2302; SSE-NEXT: packuswb %xmm0, %xmm0 2303; SSE-NEXT: retq 2304; 2305; VEX-LABEL: fptosi_2f32_to_2i8: 2306; VEX: # %bb.0: 2307; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 2308; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2309; VEX-NEXT: retq 2310; 2311; AVX512F-LABEL: fptosi_2f32_to_2i8: 2312; AVX512F: # %bb.0: 2313; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 2314; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2315; AVX512F-NEXT: retq 2316; 2317; AVX512VL-LABEL: fptosi_2f32_to_2i8: 2318; AVX512VL: # %bb.0: 2319; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 2320; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2321; AVX512VL-NEXT: retq 2322; 2323; AVX512DQ-LABEL: fptosi_2f32_to_2i8: 2324; AVX512DQ: # %bb.0: 2325; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 2326; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2327; AVX512DQ-NEXT: retq 2328; 2329; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8: 2330; AVX512VLDQ: # %bb.0: 2331; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2332; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2333; AVX512VLDQ-NEXT: retq 2334 %cvt = fptosi <2 x float> %a to <2 x i8> 2335 ret <2 x i8> %cvt 2336} 2337 2338define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) { 2339; SSE-LABEL: fptosi_2f32_to_2i16: 2340; SSE: # %bb.0: 2341; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2342; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2343; SSE-NEXT: retq 2344; 2345; AVX-LABEL: fptosi_2f32_to_2i16: 2346; AVX: # %bb.0: 2347; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 2348; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2349; AVX-NEXT: retq 2350 %cvt = fptosi <2 x float> %a to <2 x i16> 2351 ret <2 x i16> %cvt 2352} 2353 2354define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) { 2355; SSE-LABEL: fptoui_2f32_to_2i8: 2356; SSE: # %bb.0: 2357; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2358; SSE-NEXT: pand {{.*}}(%rip), %xmm0 2359; SSE-NEXT: packuswb %xmm0, %xmm0 2360; SSE-NEXT: packuswb %xmm0, %xmm0 2361; SSE-NEXT: retq 2362; 2363; VEX-LABEL: fptoui_2f32_to_2i8: 2364; VEX: # %bb.0: 2365; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 2366; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2367; VEX-NEXT: retq 2368; 2369; AVX512F-LABEL: fptoui_2f32_to_2i8: 2370; AVX512F: # %bb.0: 2371; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 2372; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2373; AVX512F-NEXT: retq 2374; 2375; AVX512VL-LABEL: fptoui_2f32_to_2i8: 2376; AVX512VL: # %bb.0: 2377; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 2378; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2379; AVX512VL-NEXT: retq 2380; 2381; AVX512DQ-LABEL: fptoui_2f32_to_2i8: 2382; AVX512DQ: # %bb.0: 2383; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 2384; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2385; AVX512DQ-NEXT: retq 2386; 2387; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8: 2388; AVX512VLDQ: # %bb.0: 2389; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2390; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2391; AVX512VLDQ-NEXT: retq 2392 %cvt = fptoui <2 x float> %a to <2 x i8> 2393 ret <2 x i8> %cvt 2394} 2395 2396define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) { 2397; SSE-LABEL: fptoui_2f32_to_2i16: 2398; SSE: # %bb.0: 2399; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2400; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2401; SSE-NEXT: retq 2402; 2403; AVX-LABEL: fptoui_2f32_to_2i16: 2404; AVX: # %bb.0: 2405; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 2406; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2407; AVX-NEXT: retq 2408 %cvt = fptoui <2 x float> %a to <2 x i16> 2409 ret <2 x i16> %cvt 2410} 2411 2412define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) { 2413; SSE-LABEL: fptosi_2f64_to_2i8: 2414; SSE: # %bb.0: 2415; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2416; SSE-NEXT: andpd {{.*}}(%rip), %xmm0 2417; SSE-NEXT: packuswb %xmm0, %xmm0 2418; SSE-NEXT: packuswb %xmm0, %xmm0 2419; SSE-NEXT: retq 2420; 2421; VEX-LABEL: fptosi_2f64_to_2i8: 2422; VEX: # %bb.0: 2423; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 2424; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2425; VEX-NEXT: retq 2426; 2427; AVX512F-LABEL: fptosi_2f64_to_2i8: 2428; AVX512F: # %bb.0: 2429; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 2430; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2431; AVX512F-NEXT: retq 2432; 2433; AVX512VL-LABEL: fptosi_2f64_to_2i8: 2434; AVX512VL: # %bb.0: 2435; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 2436; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2437; AVX512VL-NEXT: retq 2438; 2439; AVX512DQ-LABEL: fptosi_2f64_to_2i8: 2440; AVX512DQ: # %bb.0: 2441; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2442; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2443; AVX512DQ-NEXT: retq 2444; 2445; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8: 2446; AVX512VLDQ: # %bb.0: 2447; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2448; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2449; AVX512VLDQ-NEXT: retq 2450 %cvt = fptosi <2 x double> %a to <2 x i8> 2451 ret <2 x i8> %cvt 2452} 2453 2454define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) { 2455; SSE-LABEL: fptosi_2f64_to_2i16: 2456; SSE: # %bb.0: 2457; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2458; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2459; SSE-NEXT: retq 2460; 2461; AVX-LABEL: fptosi_2f64_to_2i16: 2462; AVX: # %bb.0: 2463; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 2464; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2465; AVX-NEXT: retq 2466 %cvt = fptosi <2 x double> %a to <2 x i16> 2467 ret <2 x i16> %cvt 2468} 2469 2470define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) { 2471; SSE-LABEL: fptoui_2f64_to_2i8: 2472; SSE: # %bb.0: 2473; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2474; SSE-NEXT: andpd {{.*}}(%rip), %xmm0 2475; SSE-NEXT: packuswb %xmm0, %xmm0 2476; SSE-NEXT: packuswb %xmm0, %xmm0 2477; SSE-NEXT: retq 2478; 2479; VEX-LABEL: fptoui_2f64_to_2i8: 2480; VEX: # %bb.0: 2481; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 2482; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2483; VEX-NEXT: retq 2484; 2485; AVX512F-LABEL: fptoui_2f64_to_2i8: 2486; AVX512F: # %bb.0: 2487; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 2488; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2489; AVX512F-NEXT: retq 2490; 2491; AVX512VL-LABEL: fptoui_2f64_to_2i8: 2492; AVX512VL: # %bb.0: 2493; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 2494; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2495; AVX512VL-NEXT: retq 2496; 2497; AVX512DQ-LABEL: fptoui_2f64_to_2i8: 2498; AVX512DQ: # %bb.0: 2499; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2500; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2501; AVX512DQ-NEXT: retq 2502; 2503; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8: 2504; AVX512VLDQ: # %bb.0: 2505; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2506; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2507; AVX512VLDQ-NEXT: retq 2508 %cvt = fptoui <2 x double> %a to <2 x i8> 2509 ret <2 x i8> %cvt 2510} 2511 2512define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) { 2513; SSE-LABEL: fptoui_2f64_to_2i16: 2514; SSE: # %bb.0: 2515; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2516; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2517; SSE-NEXT: retq 2518; 2519; AVX-LABEL: fptoui_2f64_to_2i16: 2520; AVX: # %bb.0: 2521; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 2522; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2523; AVX-NEXT: retq 2524 %cvt = fptoui <2 x double> %a to <2 x i16> 2525 ret <2 x i16> %cvt 2526} 2527 2528define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) { 2529; SSE-LABEL: fptosi_8f64_to_8i16: 2530; SSE: # %bb.0: 2531; SSE-NEXT: cvttpd2dq %xmm3, %xmm3 2532; SSE-NEXT: cvttpd2dq %xmm2, %xmm2 2533; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2534; SSE-NEXT: cvttpd2dq %xmm1, %xmm1 2535; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2536; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2537; SSE-NEXT: packssdw %xmm2, %xmm0 2538; SSE-NEXT: retq 2539; 2540; VEX-LABEL: fptosi_8f64_to_8i16: 2541; VEX: # %bb.0: 2542; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1 2543; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0 2544; VEX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2545; VEX-NEXT: vzeroupper 2546; VEX-NEXT: retq 2547; 2548; AVX512F-LABEL: fptosi_8f64_to_8i16: 2549; AVX512F: # %bb.0: 2550; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0 2551; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 2552; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2553; AVX512F-NEXT: vzeroupper 2554; AVX512F-NEXT: retq 2555; 2556; AVX512VL-LABEL: fptosi_8f64_to_8i16: 2557; AVX512VL: # %bb.0: 2558; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0 2559; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 2560; AVX512VL-NEXT: vzeroupper 2561; AVX512VL-NEXT: retq 2562; 2563; AVX512DQ-LABEL: fptosi_8f64_to_8i16: 2564; AVX512DQ: # %bb.0: 2565; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2566; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 2567; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2568; AVX512DQ-NEXT: vzeroupper 2569; AVX512DQ-NEXT: retq 2570; 2571; AVX512VLDQ-LABEL: fptosi_8f64_to_8i16: 2572; AVX512VLDQ: # %bb.0: 2573; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2574; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0 2575; AVX512VLDQ-NEXT: vzeroupper 2576; AVX512VLDQ-NEXT: retq 2577 %cvt = fptosi <8 x double> %a to <8 x i16> 2578 ret <8 x i16> %cvt 2579} 2580 2581define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) { 2582; SSE-LABEL: fptoui_8f64_to_8i16: 2583; SSE: # %bb.0: 2584; SSE-NEXT: cvttpd2dq %xmm3, %xmm3 2585; SSE-NEXT: cvttpd2dq %xmm2, %xmm2 2586; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2587; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7] 2588; SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7] 2589; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2590; SSE-NEXT: cvttpd2dq %xmm1, %xmm1 2591; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2592; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2593; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2594; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 2595; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2596; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2597; SSE-NEXT: retq 2598; 2599; VEX-LABEL: fptoui_8f64_to_8i16: 2600; VEX: # %bb.0: 2601; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1 2602; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0 2603; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2604; VEX-NEXT: vzeroupper 2605; VEX-NEXT: retq 2606; 2607; AVX512F-LABEL: fptoui_8f64_to_8i16: 2608; AVX512F: # %bb.0: 2609; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0 2610; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 2611; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2612; AVX512F-NEXT: vzeroupper 2613; AVX512F-NEXT: retq 2614; 2615; AVX512VL-LABEL: fptoui_8f64_to_8i16: 2616; AVX512VL: # %bb.0: 2617; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0 2618; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 2619; AVX512VL-NEXT: vzeroupper 2620; AVX512VL-NEXT: retq 2621; 2622; AVX512DQ-LABEL: fptoui_8f64_to_8i16: 2623; AVX512DQ: # %bb.0: 2624; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2625; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 2626; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2627; AVX512DQ-NEXT: vzeroupper 2628; AVX512DQ-NEXT: retq 2629; 2630; AVX512VLDQ-LABEL: fptoui_8f64_to_8i16: 2631; AVX512VLDQ: # %bb.0: 2632; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2633; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0 2634; AVX512VLDQ-NEXT: vzeroupper 2635; AVX512VLDQ-NEXT: retq 2636 %cvt = fptoui <8 x double> %a to <8 x i16> 2637 ret <8 x i16> %cvt 2638} 2639 2640define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) { 2641; SSE-LABEL: fptosi_16f32_to_16i8: 2642; SSE: # %bb.0: 2643; SSE-NEXT: cvttps2dq %xmm3, %xmm3 2644; SSE-NEXT: cvttps2dq %xmm2, %xmm2 2645; SSE-NEXT: packssdw %xmm3, %xmm2 2646; SSE-NEXT: cvttps2dq %xmm1, %xmm1 2647; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2648; SSE-NEXT: packssdw %xmm1, %xmm0 2649; SSE-NEXT: packsswb %xmm2, %xmm0 2650; SSE-NEXT: retq 2651; 2652; AVX1-LABEL: fptosi_16f32_to_16i8: 2653; AVX1: # %bb.0: 2654; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1 2655; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2656; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 2657; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0 2658; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2659; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 2660; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 2661; AVX1-NEXT: vzeroupper 2662; AVX1-NEXT: retq 2663; 2664; AVX2-LABEL: fptosi_16f32_to_16i8: 2665; AVX2: # %bb.0: 2666; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1 2667; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2668; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 2669; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0 2670; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2671; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 2672; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 2673; AVX2-NEXT: vzeroupper 2674; AVX2-NEXT: retq 2675; 2676; AVX512-LABEL: fptosi_16f32_to_16i8: 2677; AVX512: # %bb.0: 2678; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0 2679; AVX512-NEXT: vpmovdb %zmm0, %xmm0 2680; AVX512-NEXT: vzeroupper 2681; AVX512-NEXT: retq 2682 %cvt = fptosi <16 x float> %a to <16 x i8> 2683 ret <16 x i8> %cvt 2684} 2685 2686define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) { 2687; SSE-LABEL: fptoui_16f32_to_16i8: 2688; SSE: # %bb.0: 2689; SSE-NEXT: cvttps2dq %xmm3, %xmm3 2690; SSE-NEXT: cvttps2dq %xmm2, %xmm2 2691; SSE-NEXT: packssdw %xmm3, %xmm2 2692; SSE-NEXT: cvttps2dq %xmm1, %xmm1 2693; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2694; SSE-NEXT: packssdw %xmm1, %xmm0 2695; SSE-NEXT: packuswb %xmm2, %xmm0 2696; SSE-NEXT: retq 2697; 2698; AVX1-LABEL: fptoui_16f32_to_16i8: 2699; AVX1: # %bb.0: 2700; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1 2701; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2702; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 2703; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0 2704; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2705; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 2706; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 2707; AVX1-NEXT: vzeroupper 2708; AVX1-NEXT: retq 2709; 2710; AVX2-LABEL: fptoui_16f32_to_16i8: 2711; AVX2: # %bb.0: 2712; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1 2713; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2714; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 2715; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0 2716; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2717; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 2718; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 2719; AVX2-NEXT: vzeroupper 2720; AVX2-NEXT: retq 2721; 2722; AVX512-LABEL: fptoui_16f32_to_16i8: 2723; AVX512: # %bb.0: 2724; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0 2725; AVX512-NEXT: vpmovdb %zmm0, %xmm0 2726; AVX512-NEXT: vzeroupper 2727; AVX512-NEXT: retq 2728 %cvt = fptoui <16 x float> %a to <16 x i8> 2729 ret <16 x i8> %cvt 2730} 2731 2732define <2 x i64> @fptosi_2f32_to_2i64_load(<2 x float>* %x) { 2733; SSE-LABEL: fptosi_2f32_to_2i64_load: 2734; SSE: # %bb.0: 2735; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 2736; SSE-NEXT: cvttss2si %xmm1, %rax 2737; SSE-NEXT: movq %rax, %xmm0 2738; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 2739; SSE-NEXT: cvttss2si %xmm1, %rax 2740; SSE-NEXT: movq %rax, %xmm1 2741; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2742; SSE-NEXT: retq 2743; 2744; VEX-LABEL: fptosi_2f32_to_2i64_load: 2745; VEX: # %bb.0: 2746; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2747; VEX-NEXT: vcvttss2si %xmm0, %rax 2748; VEX-NEXT: vmovq %rax, %xmm1 2749; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2750; VEX-NEXT: vcvttss2si %xmm0, %rax 2751; VEX-NEXT: vmovq %rax, %xmm0 2752; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2753; VEX-NEXT: retq 2754; 2755; AVX512F-LABEL: fptosi_2f32_to_2i64_load: 2756; AVX512F: # %bb.0: 2757; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2758; AVX512F-NEXT: vcvttss2si %xmm0, %rax 2759; AVX512F-NEXT: vmovq %rax, %xmm1 2760; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2761; AVX512F-NEXT: vcvttss2si %xmm0, %rax 2762; AVX512F-NEXT: vmovq %rax, %xmm0 2763; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2764; AVX512F-NEXT: retq 2765; 2766; AVX512VL-LABEL: fptosi_2f32_to_2i64_load: 2767; AVX512VL: # %bb.0: 2768; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2769; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 2770; AVX512VL-NEXT: vmovq %rax, %xmm1 2771; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2772; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 2773; AVX512VL-NEXT: vmovq %rax, %xmm0 2774; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2775; AVX512VL-NEXT: retq 2776; 2777; AVX512DQ-LABEL: fptosi_2f32_to_2i64_load: 2778; AVX512DQ: # %bb.0: 2779; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2780; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 2781; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2782; AVX512DQ-NEXT: vzeroupper 2783; AVX512DQ-NEXT: retq 2784; 2785; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load: 2786; AVX512VLDQ: # %bb.0: 2787; AVX512VLDQ-NEXT: vcvttps2qq (%rdi), %xmm0 2788; AVX512VLDQ-NEXT: retq 2789 %a = load <2 x float>, <2 x float>* %x 2790 %b = fptosi <2 x float> %a to <2 x i64> 2791 ret <2 x i64> %b 2792} 2793 2794define <2 x i64> @fptoui_2f32_to_2i64_load(<2 x float>* %x) { 2795; SSE-LABEL: fptoui_2f32_to_2i64_load: 2796; SSE: # %bb.0: 2797; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 2798; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 2799; SSE-NEXT: movaps %xmm1, %xmm0 2800; SSE-NEXT: subss %xmm2, %xmm0 2801; SSE-NEXT: cvttss2si %xmm0, %rax 2802; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 2803; SSE-NEXT: xorq %rcx, %rax 2804; SSE-NEXT: cvttss2si %xmm1, %rdx 2805; SSE-NEXT: ucomiss %xmm2, %xmm1 2806; SSE-NEXT: cmovaeq %rax, %rdx 2807; SSE-NEXT: movq %rdx, %xmm0 2808; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 2809; SSE-NEXT: movaps %xmm1, %xmm3 2810; SSE-NEXT: subss %xmm2, %xmm3 2811; SSE-NEXT: cvttss2si %xmm3, %rax 2812; SSE-NEXT: xorq %rcx, %rax 2813; SSE-NEXT: cvttss2si %xmm1, %rcx 2814; SSE-NEXT: ucomiss %xmm2, %xmm1 2815; SSE-NEXT: cmovaeq %rax, %rcx 2816; SSE-NEXT: movq %rcx, %xmm1 2817; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2818; SSE-NEXT: retq 2819; 2820; VEX-LABEL: fptoui_2f32_to_2i64_load: 2821; VEX: # %bb.0: 2822; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2823; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 2824; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 2825; VEX-NEXT: vcvttss2si %xmm2, %rax 2826; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 2827; VEX-NEXT: xorq %rcx, %rax 2828; VEX-NEXT: vcvttss2si %xmm0, %rdx 2829; VEX-NEXT: vucomiss %xmm1, %xmm0 2830; VEX-NEXT: cmovaeq %rax, %rdx 2831; VEX-NEXT: vmovq %rdx, %xmm2 2832; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2833; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 2834; VEX-NEXT: vcvttss2si %xmm3, %rax 2835; VEX-NEXT: xorq %rcx, %rax 2836; VEX-NEXT: vcvttss2si %xmm0, %rcx 2837; VEX-NEXT: vucomiss %xmm1, %xmm0 2838; VEX-NEXT: cmovaeq %rax, %rcx 2839; VEX-NEXT: vmovq %rcx, %xmm0 2840; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 2841; VEX-NEXT: retq 2842; 2843; AVX512F-LABEL: fptoui_2f32_to_2i64_load: 2844; AVX512F: # %bb.0: 2845; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2846; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 2847; AVX512F-NEXT: vmovq %rax, %xmm1 2848; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2849; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 2850; AVX512F-NEXT: vmovq %rax, %xmm0 2851; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2852; AVX512F-NEXT: retq 2853; 2854; AVX512VL-LABEL: fptoui_2f32_to_2i64_load: 2855; AVX512VL: # %bb.0: 2856; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2857; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 2858; AVX512VL-NEXT: vmovq %rax, %xmm1 2859; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2860; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 2861; AVX512VL-NEXT: vmovq %rax, %xmm0 2862; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2863; AVX512VL-NEXT: retq 2864; 2865; AVX512DQ-LABEL: fptoui_2f32_to_2i64_load: 2866; AVX512DQ: # %bb.0: 2867; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2868; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 2869; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2870; AVX512DQ-NEXT: vzeroupper 2871; AVX512DQ-NEXT: retq 2872; 2873; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load: 2874; AVX512VLDQ: # %bb.0: 2875; AVX512VLDQ-NEXT: vcvttps2uqq (%rdi), %xmm0 2876; AVX512VLDQ-NEXT: retq 2877 %a = load <2 x float>, <2 x float>* %x 2878 %b = fptoui <2 x float> %a to <2 x i64> 2879 ret <2 x i64> %b 2880} 2881