1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64 8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ 10; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL 12 13 14declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata) 15declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double>, metadata) 16declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata) 17declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata) 18declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double>, metadata) 19declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double>, metadata) 20declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f64(<4 x double>, metadata) 21declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f64(<4 x double>, metadata) 22declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f64(<4 x double>, metadata) 23declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f64(<4 x double>, metadata) 24declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float>, metadata) 25declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float>, metadata) 26declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f64(<4 x double>, metadata) 27declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f64(<4 x double>, metadata) 28declare <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f32(<8 x float>, metadata) 29declare <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f32(<8 x float>, metadata) 30declare <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f32(<8 x float>, metadata) 31declare <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f32(<8 x float>, metadata) 32declare <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f32(<8 x float>, metadata) 33declare <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f32(<8 x float>, metadata) 34declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float>, metadata) 35declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float>, metadata) 36 37define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 { 38; AVX-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64: 39; AVX-32: # %bb.0: 40; AVX-32-NEXT: pushl %ebp 41; AVX-32-NEXT: .cfi_def_cfa_offset 8 42; AVX-32-NEXT: .cfi_offset %ebp, -8 43; AVX-32-NEXT: movl %esp, %ebp 44; AVX-32-NEXT: .cfi_def_cfa_register %ebp 45; AVX-32-NEXT: andl $-8, %esp 46; AVX-32-NEXT: subl $32, %esp 47; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 48; AVX-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp) 49; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0 50; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 51; AVX-32-NEXT: vmovhps %xmm0, (%esp) 52; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) 53; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 54; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) 55; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 56; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) 57; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 58; AVX-32-NEXT: fldl (%esp) 59; AVX-32-NEXT: fisttpll (%esp) 60; AVX-32-NEXT: wait 61; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 62; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 63; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 64; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 65; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 66; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 67; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1 68; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 69; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 70; AVX-32-NEXT: movl %ebp, %esp 71; AVX-32-NEXT: popl %ebp 72; AVX-32-NEXT: .cfi_def_cfa %esp, 4 73; AVX-32-NEXT: retl 74; 75; AVX-64-LABEL: strict_vector_fptosi_v4f64_to_v4i64: 76; AVX-64: # %bb.0: 77; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 78; AVX-64-NEXT: vcvttsd2si %xmm1, %rax 79; AVX-64-NEXT: vmovq %rax, %xmm2 80; AVX-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 81; AVX-64-NEXT: vcvttsd2si %xmm1, %rax 82; AVX-64-NEXT: vmovq %rax, %xmm1 83; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 84; AVX-64-NEXT: vcvttsd2si %xmm0, %rax 85; AVX-64-NEXT: vmovq %rax, %xmm2 86; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 87; AVX-64-NEXT: vcvttsd2si %xmm0, %rax 88; AVX-64-NEXT: vmovq %rax, %xmm0 89; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 90; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 91; AVX-64-NEXT: retq 92; 93; AVX512F-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64: 94; AVX512F-32: # %bb.0: 95; AVX512F-32-NEXT: pushl %ebp 96; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 97; AVX512F-32-NEXT: .cfi_offset %ebp, -8 98; AVX512F-32-NEXT: movl %esp, %ebp 99; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp 100; AVX512F-32-NEXT: andl $-8, %esp 101; AVX512F-32-NEXT: subl $32, %esp 102; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 103; AVX512F-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp) 104; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm0 105; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 106; AVX512F-32-NEXT: vmovhps %xmm0, (%esp) 107; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) 108; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 109; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) 110; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 111; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) 112; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 113; AVX512F-32-NEXT: fldl (%esp) 114; AVX512F-32-NEXT: fisttpll (%esp) 115; AVX512F-32-NEXT: wait 116; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 117; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 118; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 119; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 120; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 121; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 122; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1 123; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 124; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 125; AVX512F-32-NEXT: movl %ebp, %esp 126; AVX512F-32-NEXT: popl %ebp 127; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4 128; AVX512F-32-NEXT: retl 129; 130; AVX512F-64-LABEL: strict_vector_fptosi_v4f64_to_v4i64: 131; AVX512F-64: # %bb.0: 132; AVX512F-64-NEXT: vextractf128 $1, %ymm0, %xmm1 133; AVX512F-64-NEXT: vcvttsd2si %xmm1, %rax 134; AVX512F-64-NEXT: vmovq %rax, %xmm2 135; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 136; AVX512F-64-NEXT: vcvttsd2si %xmm1, %rax 137; AVX512F-64-NEXT: vmovq %rax, %xmm1 138; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 139; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax 140; AVX512F-64-NEXT: vmovq %rax, %xmm2 141; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 142; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax 143; AVX512F-64-NEXT: vmovq %rax, %xmm0 144; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 145; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 146; AVX512F-64-NEXT: retq 147; 148; AVX512VL-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64: 149; AVX512VL-32: # %bb.0: 150; AVX512VL-32-NEXT: pushl %ebp 151; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 152; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 153; AVX512VL-32-NEXT: movl %esp, %ebp 154; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 155; AVX512VL-32-NEXT: andl $-8, %esp 156; AVX512VL-32-NEXT: subl $32, %esp 157; AVX512VL-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 158; AVX512VL-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp) 159; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm0 160; AVX512VL-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 161; AVX512VL-32-NEXT: vmovhps %xmm0, (%esp) 162; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 163; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 164; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 165; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 166; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 167; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 168; AVX512VL-32-NEXT: fldl (%esp) 169; AVX512VL-32-NEXT: fisttpll (%esp) 170; AVX512VL-32-NEXT: wait 171; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 172; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 173; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 174; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 175; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 176; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 177; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1 178; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 179; AVX512VL-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 180; AVX512VL-32-NEXT: movl %ebp, %esp 181; AVX512VL-32-NEXT: popl %ebp 182; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 183; AVX512VL-32-NEXT: retl 184; 185; AVX512VL-64-LABEL: strict_vector_fptosi_v4f64_to_v4i64: 186; AVX512VL-64: # %bb.0: 187; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1 188; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax 189; AVX512VL-64-NEXT: vmovq %rax, %xmm2 190; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 191; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax 192; AVX512VL-64-NEXT: vmovq %rax, %xmm1 193; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 194; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax 195; AVX512VL-64-NEXT: vmovq %rax, %xmm2 196; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 197; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax 198; AVX512VL-64-NEXT: vmovq %rax, %xmm0 199; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 200; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 201; AVX512VL-64-NEXT: retq 202; 203; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i64: 204; AVX512DQ: # %bb.0: 205; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 206; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 207; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 208; AVX512DQ-NEXT: ret{{[l|q]}} 209; 210; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i64: 211; AVX512DQVL: # %bb.0: 212; AVX512DQVL-NEXT: vcvttpd2qq %ymm0, %ymm0 213; AVX512DQVL-NEXT: ret{{[l|q]}} 214 %ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double> %a, 215 metadata !"fpexcept.strict") #0 216 ret <4 x i64> %ret 217} 218 219define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { 220; AVX-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64: 221; AVX-32: # %bb.0: 222; AVX-32-NEXT: pushl %ebp 223; AVX-32-NEXT: .cfi_def_cfa_offset 8 224; AVX-32-NEXT: .cfi_offset %ebp, -8 225; AVX-32-NEXT: movl %esp, %ebp 226; AVX-32-NEXT: .cfi_def_cfa_register %ebp 227; AVX-32-NEXT: andl $-8, %esp 228; AVX-32-NEXT: subl $32, %esp 229; AVX-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 230; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 231; AVX-32-NEXT: vcomisd %xmm1, %xmm2 232; AVX-32-NEXT: vmovapd %xmm1, %xmm3 233; AVX-32-NEXT: jae .LBB1_2 234; AVX-32-NEXT: # %bb.1: 235; AVX-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3 236; AVX-32-NEXT: .LBB1_2: 237; AVX-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2 238; AVX-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) 239; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) 240; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 241; AVX-32-NEXT: wait 242; AVX-32-NEXT: setae %al 243; AVX-32-NEXT: movzbl %al, %eax 244; AVX-32-NEXT: shll $31, %eax 245; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 246; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm2 247; AVX-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] 248; AVX-32-NEXT: vcomisd %xmm1, %xmm3 249; AVX-32-NEXT: vmovapd %xmm1, %xmm4 250; AVX-32-NEXT: jae .LBB1_4 251; AVX-32-NEXT: # %bb.3: 252; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4 253; AVX-32-NEXT: .LBB1_4: 254; AVX-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3 255; AVX-32-NEXT: vmovsd %xmm3, (%esp) 256; AVX-32-NEXT: fldl (%esp) 257; AVX-32-NEXT: fisttpll (%esp) 258; AVX-32-NEXT: wait 259; AVX-32-NEXT: setae %cl 260; AVX-32-NEXT: movzbl %cl, %ecx 261; AVX-32-NEXT: shll $31, %ecx 262; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 263; AVX-32-NEXT: vcomisd %xmm1, %xmm2 264; AVX-32-NEXT: vmovapd %xmm1, %xmm3 265; AVX-32-NEXT: jae .LBB1_6 266; AVX-32-NEXT: # %bb.5: 267; AVX-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3 268; AVX-32-NEXT: .LBB1_6: 269; AVX-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2 270; AVX-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) 271; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) 272; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 273; AVX-32-NEXT: wait 274; AVX-32-NEXT: setae %dl 275; AVX-32-NEXT: movzbl %dl, %edx 276; AVX-32-NEXT: shll $31, %edx 277; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %edx 278; AVX-32-NEXT: vcomisd %xmm1, %xmm0 279; AVX-32-NEXT: jae .LBB1_8 280; AVX-32-NEXT: # %bb.7: 281; AVX-32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 282; AVX-32-NEXT: .LBB1_8: 283; AVX-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0 284; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) 285; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) 286; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 287; AVX-32-NEXT: wait 288; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 289; AVX-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 290; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 291; AVX-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 292; AVX-32-NEXT: setae %cl 293; AVX-32-NEXT: movzbl %cl, %ecx 294; AVX-32-NEXT: shll $31, %ecx 295; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 296; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 297; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 298; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 299; AVX-32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 300; AVX-32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 301; AVX-32-NEXT: movl %ebp, %esp 302; AVX-32-NEXT: popl %ebp 303; AVX-32-NEXT: .cfi_def_cfa %esp, 4 304; AVX-32-NEXT: retl 305; 306; AVX-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64: 307; AVX-64: # %bb.0: 308; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3 309; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 310; AVX-64-NEXT: vcomisd %xmm1, %xmm3 311; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2 312; AVX-64-NEXT: vxorpd %xmm4, %xmm4, %xmm4 313; AVX-64-NEXT: jb .LBB1_2 314; AVX-64-NEXT: # %bb.1: 315; AVX-64-NEXT: vmovapd %xmm1, %xmm4 316; AVX-64-NEXT: .LBB1_2: 317; AVX-64-NEXT: vsubsd %xmm4, %xmm3, %xmm4 318; AVX-64-NEXT: vcvttsd2si %xmm4, %rcx 319; AVX-64-NEXT: setae %al 320; AVX-64-NEXT: movzbl %al, %eax 321; AVX-64-NEXT: shlq $63, %rax 322; AVX-64-NEXT: xorq %rcx, %rax 323; AVX-64-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] 324; AVX-64-NEXT: vcomisd %xmm1, %xmm4 325; AVX-64-NEXT: vxorpd %xmm5, %xmm5, %xmm5 326; AVX-64-NEXT: jb .LBB1_4 327; AVX-64-NEXT: # %bb.3: 328; AVX-64-NEXT: vmovapd %xmm1, %xmm5 329; AVX-64-NEXT: .LBB1_4: 330; AVX-64-NEXT: vmovq %rax, %xmm3 331; AVX-64-NEXT: vsubsd %xmm5, %xmm4, %xmm4 332; AVX-64-NEXT: vcvttsd2si %xmm4, %rax 333; AVX-64-NEXT: setae %cl 334; AVX-64-NEXT: movzbl %cl, %ecx 335; AVX-64-NEXT: shlq $63, %rcx 336; AVX-64-NEXT: xorq %rax, %rcx 337; AVX-64-NEXT: vmovq %rcx, %xmm4 338; AVX-64-NEXT: vcomisd %xmm1, %xmm0 339; AVX-64-NEXT: vxorpd %xmm5, %xmm5, %xmm5 340; AVX-64-NEXT: jb .LBB1_6 341; AVX-64-NEXT: # %bb.5: 342; AVX-64-NEXT: vmovapd %xmm1, %xmm5 343; AVX-64-NEXT: .LBB1_6: 344; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] 345; AVX-64-NEXT: vsubsd %xmm5, %xmm0, %xmm4 346; AVX-64-NEXT: vcvttsd2si %xmm4, %rax 347; AVX-64-NEXT: setae %cl 348; AVX-64-NEXT: movzbl %cl, %ecx 349; AVX-64-NEXT: shlq $63, %rcx 350; AVX-64-NEXT: xorq %rax, %rcx 351; AVX-64-NEXT: vmovq %rcx, %xmm4 352; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 353; AVX-64-NEXT: vcomisd %xmm1, %xmm0 354; AVX-64-NEXT: jb .LBB1_8 355; AVX-64-NEXT: # %bb.7: 356; AVX-64-NEXT: vmovapd %xmm1, %xmm2 357; AVX-64-NEXT: .LBB1_8: 358; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0 359; AVX-64-NEXT: vcvttsd2si %xmm0, %rax 360; AVX-64-NEXT: setae %cl 361; AVX-64-NEXT: movzbl %cl, %ecx 362; AVX-64-NEXT: shlq $63, %rcx 363; AVX-64-NEXT: xorq %rax, %rcx 364; AVX-64-NEXT: vmovq %rcx, %xmm0 365; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0] 366; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 367; AVX-64-NEXT: retq 368; 369; AVX512F-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64: 370; AVX512F-32: # %bb.0: 371; AVX512F-32-NEXT: pushl %ebp 372; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 373; AVX512F-32-NEXT: .cfi_offset %ebp, -8 374; AVX512F-32-NEXT: movl %esp, %ebp 375; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp 376; AVX512F-32-NEXT: pushl %ebx 377; AVX512F-32-NEXT: andl $-8, %esp 378; AVX512F-32-NEXT: subl $40, %esp 379; AVX512F-32-NEXT: .cfi_offset %ebx, -12 380; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm1 381; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 382; AVX512F-32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 383; AVX512F-32-NEXT: xorl %eax, %eax 384; AVX512F-32-NEXT: vcomisd %xmm3, %xmm2 385; AVX512F-32-NEXT: setae %al 386; AVX512F-32-NEXT: kmovw %eax, %k1 387; AVX512F-32-NEXT: vmovsd %xmm3, %xmm3, %xmm4 {%k1} {z} 388; AVX512F-32-NEXT: vsubsd %xmm4, %xmm2, %xmm2 389; AVX512F-32-NEXT: vmovsd %xmm2, (%esp) 390; AVX512F-32-NEXT: xorl %edx, %edx 391; AVX512F-32-NEXT: vcomisd %xmm3, %xmm1 392; AVX512F-32-NEXT: setae %dl 393; AVX512F-32-NEXT: kmovw %edx, %k1 394; AVX512F-32-NEXT: vmovsd %xmm3, %xmm3, %xmm2 {%k1} {z} 395; AVX512F-32-NEXT: vsubsd %xmm2, %xmm1, %xmm1 396; AVX512F-32-NEXT: vmovsd %xmm1, {{[0-9]+}}(%esp) 397; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 398; AVX512F-32-NEXT: xorl %ecx, %ecx 399; AVX512F-32-NEXT: vcomisd %xmm3, %xmm1 400; AVX512F-32-NEXT: setae %cl 401; AVX512F-32-NEXT: kmovw %ecx, %k1 402; AVX512F-32-NEXT: vmovsd %xmm3, %xmm3, %xmm2 {%k1} {z} 403; AVX512F-32-NEXT: vsubsd %xmm2, %xmm1, %xmm1 404; AVX512F-32-NEXT: vmovsd %xmm1, {{[0-9]+}}(%esp) 405; AVX512F-32-NEXT: xorl %ebx, %ebx 406; AVX512F-32-NEXT: vcomisd %xmm3, %xmm0 407; AVX512F-32-NEXT: setae %bl 408; AVX512F-32-NEXT: kmovw %ebx, %k1 409; AVX512F-32-NEXT: vmovsd %xmm3, %xmm3, %xmm1 {%k1} {z} 410; AVX512F-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0 411; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) 412; AVX512F-32-NEXT: fldl (%esp) 413; AVX512F-32-NEXT: fisttpll (%esp) 414; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) 415; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 416; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) 417; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 418; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) 419; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 420; AVX512F-32-NEXT: wait 421; AVX512F-32-NEXT: shll $31, %eax 422; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 423; AVX512F-32-NEXT: shll $31, %edx 424; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx 425; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 426; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 427; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 428; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 429; AVX512F-32-NEXT: shll $31, %ecx 430; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 431; AVX512F-32-NEXT: shll $31, %ebx 432; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx 433; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 434; AVX512F-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1 435; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 436; AVX512F-32-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1 437; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 438; AVX512F-32-NEXT: leal -4(%ebp), %esp 439; AVX512F-32-NEXT: popl %ebx 440; AVX512F-32-NEXT: popl %ebp 441; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4 442; AVX512F-32-NEXT: retl 443; 444; AVX512F-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64: 445; AVX512F-64: # %bb.0: 446; AVX512F-64-NEXT: vextractf128 $1, %ymm0, %xmm1 447; AVX512F-64-NEXT: vcvttsd2usi %xmm1, %rax 448; AVX512F-64-NEXT: vmovq %rax, %xmm2 449; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 450; AVX512F-64-NEXT: vcvttsd2usi %xmm1, %rax 451; AVX512F-64-NEXT: vmovq %rax, %xmm1 452; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 453; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax 454; AVX512F-64-NEXT: vmovq %rax, %xmm2 455; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 456; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax 457; AVX512F-64-NEXT: vmovq %rax, %xmm0 458; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 459; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 460; AVX512F-64-NEXT: retq 461; 462; AVX512VL-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64: 463; AVX512VL-32: # %bb.0: 464; AVX512VL-32-NEXT: pushl %ebp 465; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 466; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 467; AVX512VL-32-NEXT: movl %esp, %ebp 468; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 469; AVX512VL-32-NEXT: pushl %ebx 470; AVX512VL-32-NEXT: andl $-8, %esp 471; AVX512VL-32-NEXT: subl $40, %esp 472; AVX512VL-32-NEXT: .cfi_offset %ebx, -12 473; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm1 474; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 475; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 476; AVX512VL-32-NEXT: xorl %eax, %eax 477; AVX512VL-32-NEXT: vcomisd %xmm3, %xmm2 478; AVX512VL-32-NEXT: setae %al 479; AVX512VL-32-NEXT: kmovw %eax, %k1 480; AVX512VL-32-NEXT: vmovsd %xmm3, %xmm3, %xmm4 {%k1} {z} 481; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm2, %xmm2 482; AVX512VL-32-NEXT: vmovsd %xmm2, (%esp) 483; AVX512VL-32-NEXT: xorl %edx, %edx 484; AVX512VL-32-NEXT: vcomisd %xmm3, %xmm1 485; AVX512VL-32-NEXT: setae %dl 486; AVX512VL-32-NEXT: kmovw %edx, %k1 487; AVX512VL-32-NEXT: vmovsd %xmm3, %xmm3, %xmm2 {%k1} {z} 488; AVX512VL-32-NEXT: vsubsd %xmm2, %xmm1, %xmm1 489; AVX512VL-32-NEXT: vmovsd %xmm1, {{[0-9]+}}(%esp) 490; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 491; AVX512VL-32-NEXT: xorl %ecx, %ecx 492; AVX512VL-32-NEXT: vcomisd %xmm3, %xmm1 493; AVX512VL-32-NEXT: setae %cl 494; AVX512VL-32-NEXT: kmovw %ecx, %k1 495; AVX512VL-32-NEXT: vmovsd %xmm3, %xmm3, %xmm2 {%k1} {z} 496; AVX512VL-32-NEXT: vsubsd %xmm2, %xmm1, %xmm1 497; AVX512VL-32-NEXT: vmovsd %xmm1, {{[0-9]+}}(%esp) 498; AVX512VL-32-NEXT: xorl %ebx, %ebx 499; AVX512VL-32-NEXT: vcomisd %xmm3, %xmm0 500; AVX512VL-32-NEXT: setae %bl 501; AVX512VL-32-NEXT: kmovw %ebx, %k1 502; AVX512VL-32-NEXT: vmovsd %xmm3, %xmm3, %xmm1 {%k1} {z} 503; AVX512VL-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0 504; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) 505; AVX512VL-32-NEXT: fldl (%esp) 506; AVX512VL-32-NEXT: fisttpll (%esp) 507; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 508; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 509; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 510; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 511; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 512; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 513; AVX512VL-32-NEXT: wait 514; AVX512VL-32-NEXT: shll $31, %eax 515; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 516; AVX512VL-32-NEXT: shll $31, %edx 517; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx 518; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 519; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 520; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 521; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 522; AVX512VL-32-NEXT: shll $31, %ecx 523; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 524; AVX512VL-32-NEXT: shll $31, %ebx 525; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx 526; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 527; AVX512VL-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1 528; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 529; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1 530; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 531; AVX512VL-32-NEXT: leal -4(%ebp), %esp 532; AVX512VL-32-NEXT: popl %ebx 533; AVX512VL-32-NEXT: popl %ebp 534; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 535; AVX512VL-32-NEXT: retl 536; 537; AVX512VL-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64: 538; AVX512VL-64: # %bb.0: 539; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1 540; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax 541; AVX512VL-64-NEXT: vmovq %rax, %xmm2 542; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 543; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax 544; AVX512VL-64-NEXT: vmovq %rax, %xmm1 545; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 546; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax 547; AVX512VL-64-NEXT: vmovq %rax, %xmm2 548; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 549; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax 550; AVX512VL-64-NEXT: vmovq %rax, %xmm0 551; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 552; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 553; AVX512VL-64-NEXT: retq 554; 555; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i64: 556; AVX512DQ: # %bb.0: 557; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 558; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 559; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 560; AVX512DQ-NEXT: ret{{[l|q]}} 561; 562; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i64: 563; AVX512DQVL: # %bb.0: 564; AVX512DQVL-NEXT: vcvttpd2uqq %ymm0, %ymm0 565; AVX512DQVL-NEXT: ret{{[l|q]}} 566 %ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double> %a, 567 metadata !"fpexcept.strict") #0 568 ret <4 x i64> %ret 569} 570 571define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 { 572; AVX-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64: 573; AVX-32: # %bb.0: 574; AVX-32-NEXT: pushl %ebp 575; AVX-32-NEXT: .cfi_def_cfa_offset 8 576; AVX-32-NEXT: .cfi_offset %ebp, -8 577; AVX-32-NEXT: movl %esp, %ebp 578; AVX-32-NEXT: .cfi_def_cfa_register %ebp 579; AVX-32-NEXT: andl $-8, %esp 580; AVX-32-NEXT: subl $32, %esp 581; AVX-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) 582; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 583; AVX-32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) 584; AVX-32-NEXT: vextractps $3, %xmm0, (%esp) 585; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 586; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 587; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 588; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 589; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 590; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 591; AVX-32-NEXT: flds (%esp) 592; AVX-32-NEXT: fisttpll (%esp) 593; AVX-32-NEXT: wait 594; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 595; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 596; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 597; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 598; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 599; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 600; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1 601; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 602; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 603; AVX-32-NEXT: movl %ebp, %esp 604; AVX-32-NEXT: popl %ebp 605; AVX-32-NEXT: .cfi_def_cfa %esp, 4 606; AVX-32-NEXT: retl 607; 608; AVX-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64: 609; AVX-64: # %bb.0: 610; AVX-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 611; AVX-64-NEXT: vcvttss2si %xmm1, %rax 612; AVX-64-NEXT: vmovq %rax, %xmm1 613; AVX-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 614; AVX-64-NEXT: vcvttss2si %xmm2, %rax 615; AVX-64-NEXT: vmovq %rax, %xmm2 616; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 617; AVX-64-NEXT: vcvttss2si %xmm0, %rax 618; AVX-64-NEXT: vmovq %rax, %xmm2 619; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 620; AVX-64-NEXT: vcvttss2si %xmm0, %rax 621; AVX-64-NEXT: vmovq %rax, %xmm0 622; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 623; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 624; AVX-64-NEXT: retq 625; 626; AVX512F-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64: 627; AVX512F-32: # %bb.0: 628; AVX512F-32-NEXT: pushl %ebp 629; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 630; AVX512F-32-NEXT: .cfi_offset %ebp, -8 631; AVX512F-32-NEXT: movl %esp, %ebp 632; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp 633; AVX512F-32-NEXT: andl $-8, %esp 634; AVX512F-32-NEXT: subl $32, %esp 635; AVX512F-32-NEXT: vmovd %xmm0, {{[0-9]+}}(%esp) 636; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 637; AVX512F-32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) 638; AVX512F-32-NEXT: vextractps $3, %xmm0, (%esp) 639; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) 640; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 641; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) 642; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 643; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) 644; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 645; AVX512F-32-NEXT: flds (%esp) 646; AVX512F-32-NEXT: fisttpll (%esp) 647; AVX512F-32-NEXT: wait 648; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 649; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 650; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 651; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 652; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 653; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 654; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1 655; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 656; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 657; AVX512F-32-NEXT: movl %ebp, %esp 658; AVX512F-32-NEXT: popl %ebp 659; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4 660; AVX512F-32-NEXT: retl 661; 662; AVX512F-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64: 663; AVX512F-64: # %bb.0: 664; AVX512F-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 665; AVX512F-64-NEXT: vcvttss2si %xmm1, %rax 666; AVX512F-64-NEXT: vmovq %rax, %xmm1 667; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 668; AVX512F-64-NEXT: vcvttss2si %xmm2, %rax 669; AVX512F-64-NEXT: vmovq %rax, %xmm2 670; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 671; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax 672; AVX512F-64-NEXT: vmovq %rax, %xmm2 673; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 674; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax 675; AVX512F-64-NEXT: vmovq %rax, %xmm0 676; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 677; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 678; AVX512F-64-NEXT: retq 679; 680; AVX512VL-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64: 681; AVX512VL-32: # %bb.0: 682; AVX512VL-32-NEXT: pushl %ebp 683; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 684; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 685; AVX512VL-32-NEXT: movl %esp, %ebp 686; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 687; AVX512VL-32-NEXT: andl $-8, %esp 688; AVX512VL-32-NEXT: subl $32, %esp 689; AVX512VL-32-NEXT: vmovd %xmm0, {{[0-9]+}}(%esp) 690; AVX512VL-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 691; AVX512VL-32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) 692; AVX512VL-32-NEXT: vextractps $3, %xmm0, (%esp) 693; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 694; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 695; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 696; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 697; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 698; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 699; AVX512VL-32-NEXT: flds (%esp) 700; AVX512VL-32-NEXT: fisttpll (%esp) 701; AVX512VL-32-NEXT: wait 702; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 703; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 704; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 705; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 706; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 707; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 708; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1 709; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 710; AVX512VL-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 711; AVX512VL-32-NEXT: movl %ebp, %esp 712; AVX512VL-32-NEXT: popl %ebp 713; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 714; AVX512VL-32-NEXT: retl 715; 716; AVX512VL-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64: 717; AVX512VL-64: # %bb.0: 718; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 719; AVX512VL-64-NEXT: vcvttss2si %xmm1, %rax 720; AVX512VL-64-NEXT: vmovq %rax, %xmm1 721; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 722; AVX512VL-64-NEXT: vcvttss2si %xmm2, %rax 723; AVX512VL-64-NEXT: vmovq %rax, %xmm2 724; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 725; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax 726; AVX512VL-64-NEXT: vmovq %rax, %xmm2 727; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 728; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax 729; AVX512VL-64-NEXT: vmovq %rax, %xmm0 730; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 731; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 732; AVX512VL-64-NEXT: retq 733; 734; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i64: 735; AVX512DQ: # %bb.0: 736; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 737; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 738; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 739; AVX512DQ-NEXT: ret{{[l|q]}} 740; 741; AVX512DQVL-LABEL: strict_vector_fptosi_v4f32_to_v4i64: 742; AVX512DQVL: # %bb.0: 743; AVX512DQVL-NEXT: vcvttps2qq %xmm0, %ymm0 744; AVX512DQVL-NEXT: ret{{[l|q]}} 745 %ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %a, 746 metadata !"fpexcept.strict") #0 747 ret <4 x i64> %ret 748} 749 750define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { 751; AVX-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64: 752; AVX-32: # %bb.0: 753; AVX-32-NEXT: pushl %ebp 754; AVX-32-NEXT: .cfi_def_cfa_offset 8 755; AVX-32-NEXT: .cfi_offset %ebp, -8 756; AVX-32-NEXT: movl %esp, %ebp 757; AVX-32-NEXT: .cfi_def_cfa_register %ebp 758; AVX-32-NEXT: andl $-8, %esp 759; AVX-32-NEXT: subl $32, %esp 760; AVX-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 761; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 762; AVX-32-NEXT: vcomiss %xmm1, %xmm2 763; AVX-32-NEXT: vmovaps %xmm1, %xmm3 764; AVX-32-NEXT: jae .LBB3_2 765; AVX-32-NEXT: # %bb.1: 766; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3 767; AVX-32-NEXT: .LBB3_2: 768; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2 769; AVX-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) 770; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 771; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 772; AVX-32-NEXT: wait 773; AVX-32-NEXT: setae %al 774; AVX-32-NEXT: movzbl %al, %eax 775; AVX-32-NEXT: shll $31, %eax 776; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 777; AVX-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] 778; AVX-32-NEXT: vcomiss %xmm1, %xmm2 779; AVX-32-NEXT: vmovaps %xmm1, %xmm3 780; AVX-32-NEXT: jae .LBB3_4 781; AVX-32-NEXT: # %bb.3: 782; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3 783; AVX-32-NEXT: .LBB3_4: 784; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2 785; AVX-32-NEXT: vmovss %xmm2, (%esp) 786; AVX-32-NEXT: flds (%esp) 787; AVX-32-NEXT: fisttpll (%esp) 788; AVX-32-NEXT: wait 789; AVX-32-NEXT: setae %cl 790; AVX-32-NEXT: movzbl %cl, %ecx 791; AVX-32-NEXT: shll $31, %ecx 792; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 793; AVX-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 794; AVX-32-NEXT: vcomiss %xmm1, %xmm2 795; AVX-32-NEXT: vmovaps %xmm1, %xmm3 796; AVX-32-NEXT: jae .LBB3_6 797; AVX-32-NEXT: # %bb.5: 798; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3 799; AVX-32-NEXT: .LBB3_6: 800; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2 801; AVX-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) 802; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 803; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 804; AVX-32-NEXT: wait 805; AVX-32-NEXT: setae %dl 806; AVX-32-NEXT: movzbl %dl, %edx 807; AVX-32-NEXT: shll $31, %edx 808; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %edx 809; AVX-32-NEXT: vcomiss %xmm1, %xmm0 810; AVX-32-NEXT: jae .LBB3_8 811; AVX-32-NEXT: # %bb.7: 812; AVX-32-NEXT: vxorps %xmm1, %xmm1, %xmm1 813; AVX-32-NEXT: .LBB3_8: 814; AVX-32-NEXT: vsubss %xmm1, %xmm0, %xmm0 815; AVX-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) 816; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 817; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 818; AVX-32-NEXT: wait 819; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 820; AVX-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 821; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 822; AVX-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 823; AVX-32-NEXT: setae %cl 824; AVX-32-NEXT: movzbl %cl, %ecx 825; AVX-32-NEXT: shll $31, %ecx 826; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 827; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 828; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 829; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 830; AVX-32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 831; AVX-32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 832; AVX-32-NEXT: movl %ebp, %esp 833; AVX-32-NEXT: popl %ebp 834; AVX-32-NEXT: .cfi_def_cfa %esp, 4 835; AVX-32-NEXT: retl 836; 837; AVX-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64: 838; AVX-64: # %bb.0: 839; AVX-64-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3] 840; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 841; AVX-64-NEXT: vcomiss %xmm1, %xmm3 842; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2 843; AVX-64-NEXT: vxorps %xmm4, %xmm4, %xmm4 844; AVX-64-NEXT: jb .LBB3_2 845; AVX-64-NEXT: # %bb.1: 846; AVX-64-NEXT: vmovaps %xmm1, %xmm4 847; AVX-64-NEXT: .LBB3_2: 848; AVX-64-NEXT: vsubss %xmm4, %xmm3, %xmm3 849; AVX-64-NEXT: vcvttss2si %xmm3, %rcx 850; AVX-64-NEXT: setae %al 851; AVX-64-NEXT: movzbl %al, %eax 852; AVX-64-NEXT: shlq $63, %rax 853; AVX-64-NEXT: xorq %rcx, %rax 854; AVX-64-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] 855; AVX-64-NEXT: vcomiss %xmm1, %xmm4 856; AVX-64-NEXT: vxorps %xmm5, %xmm5, %xmm5 857; AVX-64-NEXT: jb .LBB3_4 858; AVX-64-NEXT: # %bb.3: 859; AVX-64-NEXT: vmovaps %xmm1, %xmm5 860; AVX-64-NEXT: .LBB3_4: 861; AVX-64-NEXT: vmovq %rax, %xmm3 862; AVX-64-NEXT: vsubss %xmm5, %xmm4, %xmm4 863; AVX-64-NEXT: vcvttss2si %xmm4, %rax 864; AVX-64-NEXT: setae %cl 865; AVX-64-NEXT: movzbl %cl, %ecx 866; AVX-64-NEXT: shlq $63, %rcx 867; AVX-64-NEXT: xorq %rax, %rcx 868; AVX-64-NEXT: vmovq %rcx, %xmm4 869; AVX-64-NEXT: vcomiss %xmm1, %xmm0 870; AVX-64-NEXT: vxorps %xmm5, %xmm5, %xmm5 871; AVX-64-NEXT: jb .LBB3_6 872; AVX-64-NEXT: # %bb.5: 873; AVX-64-NEXT: vmovaps %xmm1, %xmm5 874; AVX-64-NEXT: .LBB3_6: 875; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] 876; AVX-64-NEXT: vsubss %xmm5, %xmm0, %xmm4 877; AVX-64-NEXT: vcvttss2si %xmm4, %rax 878; AVX-64-NEXT: setae %cl 879; AVX-64-NEXT: movzbl %cl, %ecx 880; AVX-64-NEXT: shlq $63, %rcx 881; AVX-64-NEXT: xorq %rax, %rcx 882; AVX-64-NEXT: vmovq %rcx, %xmm4 883; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 884; AVX-64-NEXT: vcomiss %xmm1, %xmm0 885; AVX-64-NEXT: jb .LBB3_8 886; AVX-64-NEXT: # %bb.7: 887; AVX-64-NEXT: vmovaps %xmm1, %xmm2 888; AVX-64-NEXT: .LBB3_8: 889; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0 890; AVX-64-NEXT: vcvttss2si %xmm0, %rax 891; AVX-64-NEXT: setae %cl 892; AVX-64-NEXT: movzbl %cl, %ecx 893; AVX-64-NEXT: shlq $63, %rcx 894; AVX-64-NEXT: xorq %rax, %rcx 895; AVX-64-NEXT: vmovq %rcx, %xmm0 896; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0] 897; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 898; AVX-64-NEXT: retq 899; 900; AVX512F-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64: 901; AVX512F-32: # %bb.0: 902; AVX512F-32-NEXT: pushl %ebp 903; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 904; AVX512F-32-NEXT: .cfi_offset %ebp, -8 905; AVX512F-32-NEXT: movl %esp, %ebp 906; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp 907; AVX512F-32-NEXT: pushl %ebx 908; AVX512F-32-NEXT: andl $-8, %esp 909; AVX512F-32-NEXT: subl $40, %esp 910; AVX512F-32-NEXT: .cfi_offset %ebx, -12 911; AVX512F-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 912; AVX512F-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 913; AVX512F-32-NEXT: xorl %eax, %eax 914; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1 915; AVX512F-32-NEXT: setae %al 916; AVX512F-32-NEXT: kmovw %eax, %k1 917; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z} 918; AVX512F-32-NEXT: vsubss %xmm3, %xmm1, %xmm1 919; AVX512F-32-NEXT: vmovss %xmm1, (%esp) 920; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 921; AVX512F-32-NEXT: xorl %edx, %edx 922; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1 923; AVX512F-32-NEXT: setae %dl 924; AVX512F-32-NEXT: kmovw %edx, %k1 925; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z} 926; AVX512F-32-NEXT: vsubss %xmm3, %xmm1, %xmm1 927; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) 928; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 929; AVX512F-32-NEXT: xorl %ecx, %ecx 930; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1 931; AVX512F-32-NEXT: setae %cl 932; AVX512F-32-NEXT: kmovw %ecx, %k1 933; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z} 934; AVX512F-32-NEXT: vsubss %xmm3, %xmm1, %xmm1 935; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) 936; AVX512F-32-NEXT: xorl %ebx, %ebx 937; AVX512F-32-NEXT: vcomiss %xmm2, %xmm0 938; AVX512F-32-NEXT: setae %bl 939; AVX512F-32-NEXT: kmovw %ebx, %k1 940; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm1 {%k1} {z} 941; AVX512F-32-NEXT: vsubss %xmm1, %xmm0, %xmm0 942; AVX512F-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) 943; AVX512F-32-NEXT: flds (%esp) 944; AVX512F-32-NEXT: fisttpll (%esp) 945; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) 946; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 947; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) 948; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 949; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) 950; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 951; AVX512F-32-NEXT: wait 952; AVX512F-32-NEXT: shll $31, %eax 953; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 954; AVX512F-32-NEXT: shll $31, %edx 955; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx 956; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 957; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 958; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 959; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 960; AVX512F-32-NEXT: shll $31, %ecx 961; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 962; AVX512F-32-NEXT: shll $31, %ebx 963; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx 964; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 965; AVX512F-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1 966; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 967; AVX512F-32-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1 968; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 969; AVX512F-32-NEXT: leal -4(%ebp), %esp 970; AVX512F-32-NEXT: popl %ebx 971; AVX512F-32-NEXT: popl %ebp 972; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4 973; AVX512F-32-NEXT: retl 974; 975; AVX512F-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64: 976; AVX512F-64: # %bb.0: 977; AVX512F-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 978; AVX512F-64-NEXT: vcvttss2usi %xmm1, %rax 979; AVX512F-64-NEXT: vmovq %rax, %xmm1 980; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 981; AVX512F-64-NEXT: vcvttss2usi %xmm2, %rax 982; AVX512F-64-NEXT: vmovq %rax, %xmm2 983; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 984; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax 985; AVX512F-64-NEXT: vmovq %rax, %xmm2 986; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 987; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax 988; AVX512F-64-NEXT: vmovq %rax, %xmm0 989; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 990; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 991; AVX512F-64-NEXT: retq 992; 993; AVX512VL-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64: 994; AVX512VL-32: # %bb.0: 995; AVX512VL-32-NEXT: pushl %ebp 996; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 997; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 998; AVX512VL-32-NEXT: movl %esp, %ebp 999; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 1000; AVX512VL-32-NEXT: pushl %ebx 1001; AVX512VL-32-NEXT: andl $-8, %esp 1002; AVX512VL-32-NEXT: subl $40, %esp 1003; AVX512VL-32-NEXT: .cfi_offset %ebx, -12 1004; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1005; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1006; AVX512VL-32-NEXT: xorl %eax, %eax 1007; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1 1008; AVX512VL-32-NEXT: setae %al 1009; AVX512VL-32-NEXT: kmovw %eax, %k1 1010; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z} 1011; AVX512VL-32-NEXT: vsubss %xmm3, %xmm1, %xmm1 1012; AVX512VL-32-NEXT: vmovss %xmm1, (%esp) 1013; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 1014; AVX512VL-32-NEXT: xorl %edx, %edx 1015; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1 1016; AVX512VL-32-NEXT: setae %dl 1017; AVX512VL-32-NEXT: kmovw %edx, %k1 1018; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z} 1019; AVX512VL-32-NEXT: vsubss %xmm3, %xmm1, %xmm1 1020; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) 1021; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1022; AVX512VL-32-NEXT: xorl %ecx, %ecx 1023; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1 1024; AVX512VL-32-NEXT: setae %cl 1025; AVX512VL-32-NEXT: kmovw %ecx, %k1 1026; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z} 1027; AVX512VL-32-NEXT: vsubss %xmm3, %xmm1, %xmm1 1028; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) 1029; AVX512VL-32-NEXT: xorl %ebx, %ebx 1030; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm0 1031; AVX512VL-32-NEXT: setae %bl 1032; AVX512VL-32-NEXT: kmovw %ebx, %k1 1033; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm1 {%k1} {z} 1034; AVX512VL-32-NEXT: vsubss %xmm1, %xmm0, %xmm0 1035; AVX512VL-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) 1036; AVX512VL-32-NEXT: flds (%esp) 1037; AVX512VL-32-NEXT: fisttpll (%esp) 1038; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 1039; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 1040; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 1041; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 1042; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 1043; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 1044; AVX512VL-32-NEXT: wait 1045; AVX512VL-32-NEXT: shll $31, %eax 1046; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 1047; AVX512VL-32-NEXT: shll $31, %edx 1048; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx 1049; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1050; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 1051; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 1052; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1053; AVX512VL-32-NEXT: shll $31, %ecx 1054; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 1055; AVX512VL-32-NEXT: shll $31, %ebx 1056; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx 1057; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1058; AVX512VL-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1 1059; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 1060; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1 1061; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1062; AVX512VL-32-NEXT: leal -4(%ebp), %esp 1063; AVX512VL-32-NEXT: popl %ebx 1064; AVX512VL-32-NEXT: popl %ebp 1065; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 1066; AVX512VL-32-NEXT: retl 1067; 1068; AVX512VL-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64: 1069; AVX512VL-64: # %bb.0: 1070; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1071; AVX512VL-64-NEXT: vcvttss2usi %xmm1, %rax 1072; AVX512VL-64-NEXT: vmovq %rax, %xmm1 1073; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 1074; AVX512VL-64-NEXT: vcvttss2usi %xmm2, %rax 1075; AVX512VL-64-NEXT: vmovq %rax, %xmm2 1076; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1077; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax 1078; AVX512VL-64-NEXT: vmovq %rax, %xmm2 1079; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1080; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax 1081; AVX512VL-64-NEXT: vmovq %rax, %xmm0 1082; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1083; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1084; AVX512VL-64-NEXT: retq 1085; 1086; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i64: 1087; AVX512DQ: # %bb.0: 1088; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 1089; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1090; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1091; AVX512DQ-NEXT: ret{{[l|q]}} 1092; 1093; AVX512DQVL-LABEL: strict_vector_fptoui_v4f32_to_v4i64: 1094; AVX512DQVL: # %bb.0: 1095; AVX512DQVL-NEXT: vcvttps2uqq %xmm0, %ymm0 1096; AVX512DQVL-NEXT: ret{{[l|q]}} 1097 %ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %a, 1098 metadata !"fpexcept.strict") #0 1099 ret <4 x i64> %ret 1100} 1101 1102define <4 x i32> @strict_vector_fptosi_v4f64_to_v4i32(<4 x double> %a) #0 { 1103; CHECK-LABEL: strict_vector_fptosi_v4f64_to_v4i32: 1104; CHECK: # %bb.0: 1105; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 1106; CHECK-NEXT: vzeroupper 1107; CHECK-NEXT: ret{{[l|q]}} 1108 %ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double> %a, 1109 metadata !"fpexcept.strict") #0 1110 ret <4 x i32> %ret 1111} 1112 1113define <4 x i32> @strict_vector_fptoui_v4f64_to_v4i32(<4 x double> %a) #0 { 1114; AVX-LABEL: strict_vector_fptoui_v4f64_to_v4i32: 1115; AVX: # %bb.0: 1116; AVX-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 1117; AVX-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 1118; AVX-NEXT: vextractf128 $1, %ymm2, %xmm3 1119; AVX-NEXT: vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm3[0,2] 1120; AVX-NEXT: vxorps %xmm4, %xmm4, %xmm4 1121; AVX-NEXT: vmovaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] 1122; AVX-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3 1123; AVX-NEXT: vxorps %xmm4, %xmm4, %xmm4 1124; AVX-NEXT: vblendvpd %ymm2, %ymm4, %ymm1, %ymm1 1125; AVX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 1126; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 1127; AVX-NEXT: vxorpd %xmm3, %xmm0, %xmm0 1128; AVX-NEXT: vzeroupper 1129; AVX-NEXT: ret{{[l|q]}} 1130; 1131; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i32: 1132; AVX512F: # %bb.0: 1133; AVX512F-NEXT: vmovaps %ymm0, %ymm0 1134; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 1135; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1136; AVX512F-NEXT: vzeroupper 1137; AVX512F-NEXT: ret{{[l|q]}} 1138; 1139; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i32: 1140; AVX512VL: # %bb.0: 1141; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0 1142; AVX512VL-NEXT: vzeroupper 1143; AVX512VL-NEXT: ret{{[l|q]}} 1144; 1145; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i32: 1146; AVX512DQ: # %bb.0: 1147; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 1148; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 1149; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1150; AVX512DQ-NEXT: vzeroupper 1151; AVX512DQ-NEXT: ret{{[l|q]}} 1152; 1153; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i32: 1154; AVX512DQVL: # %bb.0: 1155; AVX512DQVL-NEXT: vcvttpd2udq %ymm0, %xmm0 1156; AVX512DQVL-NEXT: vzeroupper 1157; AVX512DQVL-NEXT: ret{{[l|q]}} 1158 %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double> %a, 1159 metadata !"fpexcept.strict") #0 1160 ret <4 x i32> %ret 1161} 1162 1163define <4 x i16> @strict_vector_fptosi_v4f64_to_v4i16(<4 x double> %a) #0 { 1164; CHECK-LABEL: strict_vector_fptosi_v4f64_to_v4i16: 1165; CHECK: # %bb.0: 1166; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 1167; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1168; CHECK-NEXT: vzeroupper 1169; CHECK-NEXT: ret{{[l|q]}} 1170 %ret = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f64(<4 x double> %a, 1171 metadata !"fpexcept.strict") #0 1172 ret <4 x i16> %ret 1173} 1174 1175define <4 x i16> @strict_vector_fptoui_v4f64_to_v4i16(<4 x double> %a) #0 { 1176; CHECK-LABEL: strict_vector_fptoui_v4f64_to_v4i16: 1177; CHECK: # %bb.0: 1178; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 1179; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1180; CHECK-NEXT: vzeroupper 1181; CHECK-NEXT: ret{{[l|q]}} 1182 %ret = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f64(<4 x double> %a, 1183 metadata !"fpexcept.strict") #0 1184 ret <4 x i16> %ret 1185} 1186 1187define <4 x i8> @strict_vector_fptosi_v4f64_to_v4i8(<4 x double> %a) #0 { 1188; AVX-LABEL: strict_vector_fptosi_v4f64_to_v4i8: 1189; AVX: # %bb.0: 1190; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 1191; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1192; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 1193; AVX-NEXT: vzeroupper 1194; AVX-NEXT: ret{{[l|q]}} 1195; 1196; AVX512F-LABEL: strict_vector_fptosi_v4f64_to_v4i8: 1197; AVX512F: # %bb.0: 1198; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0 1199; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1200; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 1201; AVX512F-NEXT: vzeroupper 1202; AVX512F-NEXT: ret{{[l|q]}} 1203; 1204; AVX512VL-LABEL: strict_vector_fptosi_v4f64_to_v4i8: 1205; AVX512VL: # %bb.0: 1206; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0 1207; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 1208; AVX512VL-NEXT: vzeroupper 1209; AVX512VL-NEXT: ret{{[l|q]}} 1210; 1211; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i8: 1212; AVX512DQ: # %bb.0: 1213; AVX512DQ-NEXT: vcvttpd2dq %ymm0, %xmm0 1214; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1215; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 1216; AVX512DQ-NEXT: vzeroupper 1217; AVX512DQ-NEXT: ret{{[l|q]}} 1218; 1219; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i8: 1220; AVX512DQVL: # %bb.0: 1221; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0 1222; AVX512DQVL-NEXT: vpmovdb %xmm0, %xmm0 1223; AVX512DQVL-NEXT: vzeroupper 1224; AVX512DQVL-NEXT: ret{{[l|q]}} 1225 %ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f64(<4 x double> %a, 1226 metadata !"fpexcept.strict") #0 1227 ret <4 x i8> %ret 1228} 1229 1230define <4 x i8> @strict_vector_fptoui_v4f64_to_v4i8(<4 x double> %a) #0 { 1231; AVX-LABEL: strict_vector_fptoui_v4f64_to_v4i8: 1232; AVX: # %bb.0: 1233; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 1234; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1235; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 1236; AVX-NEXT: vzeroupper 1237; AVX-NEXT: ret{{[l|q]}} 1238; 1239; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i8: 1240; AVX512F: # %bb.0: 1241; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0 1242; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1243; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 1244; AVX512F-NEXT: vzeroupper 1245; AVX512F-NEXT: ret{{[l|q]}} 1246; 1247; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i8: 1248; AVX512VL: # %bb.0: 1249; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0 1250; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 1251; AVX512VL-NEXT: vzeroupper 1252; AVX512VL-NEXT: ret{{[l|q]}} 1253; 1254; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i8: 1255; AVX512DQ: # %bb.0: 1256; AVX512DQ-NEXT: vcvttpd2dq %ymm0, %xmm0 1257; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1258; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 1259; AVX512DQ-NEXT: vzeroupper 1260; AVX512DQ-NEXT: ret{{[l|q]}} 1261; 1262; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i8: 1263; AVX512DQVL: # %bb.0: 1264; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0 1265; AVX512DQVL-NEXT: vpmovdb %xmm0, %xmm0 1266; AVX512DQVL-NEXT: vzeroupper 1267; AVX512DQVL-NEXT: ret{{[l|q]}} 1268 %ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f64(<4 x double> %a, 1269 metadata !"fpexcept.strict") #0 1270 ret <4 x i8> %ret 1271} 1272 1273define <4 x i1> @strict_vector_fptosi_v4f64_to_v4i1(<4 x double> %a) #0 { 1274; AVX-LABEL: strict_vector_fptosi_v4f64_to_v4i1: 1275; AVX: # %bb.0: 1276; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 1277; AVX-NEXT: vzeroupper 1278; AVX-NEXT: ret{{[l|q]}} 1279; 1280; AVX512F-LABEL: strict_vector_fptosi_v4f64_to_v4i1: 1281; AVX512F: # %bb.0: 1282; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0 1283; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1284; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1285; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1286; AVX512F-NEXT: vzeroupper 1287; AVX512F-NEXT: ret{{[l|q]}} 1288; 1289; AVX512VL-LABEL: strict_vector_fptosi_v4f64_to_v4i1: 1290; AVX512VL: # %bb.0: 1291; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0 1292; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 1293; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 1294; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 1295; AVX512VL-NEXT: vzeroupper 1296; AVX512VL-NEXT: ret{{[l|q]}} 1297; 1298; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i1: 1299; AVX512DQ: # %bb.0: 1300; AVX512DQ-NEXT: vcvttpd2dq %ymm0, %xmm0 1301; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 1302; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 1303; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1304; AVX512DQ-NEXT: vzeroupper 1305; AVX512DQ-NEXT: ret{{[l|q]}} 1306; 1307; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i1: 1308; AVX512DQVL: # %bb.0: 1309; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0 1310; AVX512DQVL-NEXT: vpmovd2m %xmm0, %k0 1311; AVX512DQVL-NEXT: vpmovm2d %k0, %xmm0 1312; AVX512DQVL-NEXT: vzeroupper 1313; AVX512DQVL-NEXT: ret{{[l|q]}} 1314 %ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f64(<4 x double> %a, 1315 metadata !"fpexcept.strict") #0 1316 ret <4 x i1> %ret 1317} 1318 1319define <4 x i1> @strict_vector_fptoui_v4f64_to_v4i1(<4 x double> %a) #0 { 1320; AVX-LABEL: strict_vector_fptoui_v4f64_to_v4i1: 1321; AVX: # %bb.0: 1322; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 1323; AVX-NEXT: vzeroupper 1324; AVX-NEXT: ret{{[l|q]}} 1325; 1326; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i1: 1327; AVX512F: # %bb.0: 1328; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0 1329; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 1330; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1331; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1332; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1333; AVX512F-NEXT: vzeroupper 1334; AVX512F-NEXT: ret{{[l|q]}} 1335; 1336; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i1: 1337; AVX512VL: # %bb.0: 1338; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0 1339; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 1340; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 1341; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 1342; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 1343; AVX512VL-NEXT: vzeroupper 1344; AVX512VL-NEXT: ret{{[l|q]}} 1345; 1346; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i1: 1347; AVX512DQ: # %bb.0: 1348; AVX512DQ-NEXT: vcvttpd2dq %ymm0, %xmm0 1349; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 1350; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 1351; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 1352; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1353; AVX512DQ-NEXT: vzeroupper 1354; AVX512DQ-NEXT: ret{{[l|q]}} 1355; 1356; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i1: 1357; AVX512DQVL: # %bb.0: 1358; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0 1359; AVX512DQVL-NEXT: vpslld $31, %xmm0, %xmm0 1360; AVX512DQVL-NEXT: vpmovd2m %xmm0, %k0 1361; AVX512DQVL-NEXT: vpmovm2d %k0, %xmm0 1362; AVX512DQVL-NEXT: vzeroupper 1363; AVX512DQVL-NEXT: ret{{[l|q]}} 1364 %ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f64(<4 x double> %a, 1365 metadata !"fpexcept.strict") #0 1366 ret <4 x i1> %ret 1367} 1368 1369define <8 x i32> @strict_vector_fptosi_v8f32_to_v8i32(<8 x float> %a) #0 { 1370; CHECK-LABEL: strict_vector_fptosi_v8f32_to_v8i32: 1371; CHECK: # %bb.0: 1372; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 1373; CHECK-NEXT: ret{{[l|q]}} 1374 %ret = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f32(<8 x float> %a, 1375 metadata !"fpexcept.strict") #0 1376 ret <8 x i32> %ret 1377} 1378 1379define <8 x i32> @strict_vector_fptoui_v8f32_to_v8i32(<8 x float> %a) #0 { 1380; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i32: 1381; AVX: # %bb.0: 1382; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1383; AVX-NEXT: vcmpltps %ymm1, %ymm0, %ymm2 1384; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3 1385; AVX-NEXT: vmovaps {{.*#+}} ymm4 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648] 1386; AVX-NEXT: vblendvps %ymm2, %ymm3, %ymm4, %ymm4 1387; AVX-NEXT: vblendvps %ymm2, %ymm3, %ymm1, %ymm1 1388; AVX-NEXT: vsubps %ymm1, %ymm0, %ymm0 1389; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 1390; AVX-NEXT: vxorps %ymm4, %ymm0, %ymm0 1391; AVX-NEXT: ret{{[l|q]}} 1392; 1393; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i32: 1394; AVX512F: # %bb.0: 1395; AVX512F-NEXT: vmovaps %ymm0, %ymm0 1396; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 1397; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1398; AVX512F-NEXT: ret{{[l|q]}} 1399; 1400; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i32: 1401; AVX512VL: # %bb.0: 1402; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0 1403; AVX512VL-NEXT: ret{{[l|q]}} 1404; 1405; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i32: 1406; AVX512DQ: # %bb.0: 1407; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 1408; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 1409; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1410; AVX512DQ-NEXT: ret{{[l|q]}} 1411; 1412; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i32: 1413; AVX512DQVL: # %bb.0: 1414; AVX512DQVL-NEXT: vcvttps2udq %ymm0, %ymm0 1415; AVX512DQVL-NEXT: ret{{[l|q]}} 1416 %ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f32(<8 x float> %a, 1417 metadata !"fpexcept.strict") #0 1418 ret <8 x i32> %ret 1419} 1420 1421define <8 x i16> @strict_vector_fptosi_v8f32_to_v8i16(<8 x float> %a) #0 { 1422; AVX-LABEL: strict_vector_fptosi_v8f32_to_v8i16: 1423; AVX: # %bb.0: 1424; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 1425; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 1426; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1427; AVX-NEXT: vzeroupper 1428; AVX-NEXT: ret{{[l|q]}} 1429; 1430; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i16: 1431; AVX512F: # %bb.0: 1432; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0 1433; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 1434; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1435; AVX512F-NEXT: vzeroupper 1436; AVX512F-NEXT: ret{{[l|q]}} 1437; 1438; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i16: 1439; AVX512VL: # %bb.0: 1440; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 1441; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 1442; AVX512VL-NEXT: vzeroupper 1443; AVX512VL-NEXT: ret{{[l|q]}} 1444; 1445; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i16: 1446; AVX512DQ: # %bb.0: 1447; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0 1448; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 1449; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1450; AVX512DQ-NEXT: vzeroupper 1451; AVX512DQ-NEXT: ret{{[l|q]}} 1452; 1453; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i16: 1454; AVX512DQVL: # %bb.0: 1455; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0 1456; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0 1457; AVX512DQVL-NEXT: vzeroupper 1458; AVX512DQVL-NEXT: ret{{[l|q]}} 1459 %ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f32(<8 x float> %a, 1460 metadata !"fpexcept.strict") #0 1461 ret <8 x i16> %ret 1462} 1463 1464define <8 x i16> @strict_vector_fptoui_v8f32_to_v8i16(<8 x float> %a) #0 { 1465; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i16: 1466; AVX: # %bb.0: 1467; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 1468; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 1469; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1470; AVX-NEXT: vzeroupper 1471; AVX-NEXT: ret{{[l|q]}} 1472; 1473; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i16: 1474; AVX512F: # %bb.0: 1475; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0 1476; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 1477; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1478; AVX512F-NEXT: vzeroupper 1479; AVX512F-NEXT: ret{{[l|q]}} 1480; 1481; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i16: 1482; AVX512VL: # %bb.0: 1483; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 1484; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 1485; AVX512VL-NEXT: vzeroupper 1486; AVX512VL-NEXT: ret{{[l|q]}} 1487; 1488; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i16: 1489; AVX512DQ: # %bb.0: 1490; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0 1491; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 1492; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1493; AVX512DQ-NEXT: vzeroupper 1494; AVX512DQ-NEXT: ret{{[l|q]}} 1495; 1496; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i16: 1497; AVX512DQVL: # %bb.0: 1498; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0 1499; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0 1500; AVX512DQVL-NEXT: vzeroupper 1501; AVX512DQVL-NEXT: ret{{[l|q]}} 1502 %ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f32(<8 x float> %a, 1503 metadata !"fpexcept.strict") #0 1504 ret <8 x i16> %ret 1505} 1506 1507define <8 x i8> @strict_vector_fptosi_v8f32_to_v8i8(<8 x float> %a) #0 { 1508; AVX-LABEL: strict_vector_fptosi_v8f32_to_v8i8: 1509; AVX: # %bb.0: 1510; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 1511; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 1512; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1513; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 1514; AVX-NEXT: vzeroupper 1515; AVX-NEXT: ret{{[l|q]}} 1516; 1517; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i8: 1518; AVX512F: # %bb.0: 1519; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0 1520; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1521; AVX512F-NEXT: vzeroupper 1522; AVX512F-NEXT: ret{{[l|q]}} 1523; 1524; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i8: 1525; AVX512VL: # %bb.0: 1526; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 1527; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0 1528; AVX512VL-NEXT: vzeroupper 1529; AVX512VL-NEXT: ret{{[l|q]}} 1530; 1531; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i8: 1532; AVX512DQ: # %bb.0: 1533; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0 1534; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1535; AVX512DQ-NEXT: vzeroupper 1536; AVX512DQ-NEXT: ret{{[l|q]}} 1537; 1538; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i8: 1539; AVX512DQVL: # %bb.0: 1540; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0 1541; AVX512DQVL-NEXT: vpmovdb %ymm0, %xmm0 1542; AVX512DQVL-NEXT: vzeroupper 1543; AVX512DQVL-NEXT: ret{{[l|q]}} 1544 %ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f32(<8 x float> %a, 1545 metadata !"fpexcept.strict") #0 1546 ret <8 x i8> %ret 1547} 1548 1549define <8 x i8> @strict_vector_fptoui_v8f32_to_v8i8(<8 x float> %a) #0 { 1550; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i8: 1551; AVX: # %bb.0: 1552; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 1553; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 1554; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1555; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 1556; AVX-NEXT: vzeroupper 1557; AVX-NEXT: ret{{[l|q]}} 1558; 1559; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i8: 1560; AVX512F: # %bb.0: 1561; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0 1562; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1563; AVX512F-NEXT: vzeroupper 1564; AVX512F-NEXT: ret{{[l|q]}} 1565; 1566; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i8: 1567; AVX512VL: # %bb.0: 1568; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 1569; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0 1570; AVX512VL-NEXT: vzeroupper 1571; AVX512VL-NEXT: ret{{[l|q]}} 1572; 1573; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i8: 1574; AVX512DQ: # %bb.0: 1575; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0 1576; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1577; AVX512DQ-NEXT: vzeroupper 1578; AVX512DQ-NEXT: ret{{[l|q]}} 1579; 1580; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i8: 1581; AVX512DQVL: # %bb.0: 1582; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0 1583; AVX512DQVL-NEXT: vpmovdb %ymm0, %xmm0 1584; AVX512DQVL-NEXT: vzeroupper 1585; AVX512DQVL-NEXT: ret{{[l|q]}} 1586 %ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f32(<8 x float> %a, 1587 metadata !"fpexcept.strict") #0 1588 ret <8 x i8> %ret 1589} 1590 1591define <8 x i1> @strict_vector_fptosi_v8f32_to_v8i1(<8 x float> %a) #0 { 1592; AVX-LABEL: strict_vector_fptosi_v8f32_to_v8i1: 1593; AVX: # %bb.0: 1594; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 1595; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 1596; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1597; AVX-NEXT: vzeroupper 1598; AVX-NEXT: ret{{[l|q]}} 1599; 1600; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i1: 1601; AVX512F: # %bb.0: 1602; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0 1603; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1604; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1605; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 1606; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1607; AVX512F-NEXT: vzeroupper 1608; AVX512F-NEXT: ret{{[l|q]}} 1609; 1610; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i1: 1611; AVX512VL: # %bb.0: 1612; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 1613; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 1614; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 1615; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 1616; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 1617; AVX512VL-NEXT: vzeroupper 1618; AVX512VL-NEXT: ret{{[l|q]}} 1619; 1620; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i1: 1621; AVX512DQ: # %bb.0: 1622; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0 1623; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 1624; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 1625; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 1626; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1627; AVX512DQ-NEXT: vzeroupper 1628; AVX512DQ-NEXT: ret{{[l|q]}} 1629; 1630; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i1: 1631; AVX512DQVL: # %bb.0: 1632; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0 1633; AVX512DQVL-NEXT: vpmovd2m %ymm0, %k0 1634; AVX512DQVL-NEXT: vpmovm2d %k0, %ymm0 1635; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0 1636; AVX512DQVL-NEXT: vzeroupper 1637; AVX512DQVL-NEXT: ret{{[l|q]}} 1638 %ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float> %a, 1639 metadata !"fpexcept.strict") #0 1640 ret <8 x i1> %ret 1641} 1642 1643define <8 x i1> @strict_vector_fptoui_v8f32_to_v8i1(<8 x float> %a) #0 { 1644; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i1: 1645; AVX: # %bb.0: 1646; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 1647; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 1648; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1649; AVX-NEXT: vzeroupper 1650; AVX-NEXT: ret{{[l|q]}} 1651; 1652; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i1: 1653; AVX512F: # %bb.0: 1654; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0 1655; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0 1656; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1657; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1658; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 1659; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1660; AVX512F-NEXT: vzeroupper 1661; AVX512F-NEXT: ret{{[l|q]}} 1662; 1663; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i1: 1664; AVX512VL: # %bb.0: 1665; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 1666; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 1667; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 1668; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 1669; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 1670; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 1671; AVX512VL-NEXT: vzeroupper 1672; AVX512VL-NEXT: ret{{[l|q]}} 1673; 1674; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i1: 1675; AVX512DQ: # %bb.0: 1676; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0 1677; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 1678; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 1679; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 1680; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 1681; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1682; AVX512DQ-NEXT: vzeroupper 1683; AVX512DQ-NEXT: ret{{[l|q]}} 1684; 1685; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i1: 1686; AVX512DQVL: # %bb.0: 1687; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0 1688; AVX512DQVL-NEXT: vpslld $31, %ymm0, %ymm0 1689; AVX512DQVL-NEXT: vpmovd2m %ymm0, %k0 1690; AVX512DQVL-NEXT: vpmovm2d %k0, %ymm0 1691; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0 1692; AVX512DQVL-NEXT: vzeroupper 1693; AVX512DQVL-NEXT: ret{{[l|q]}} 1694 %ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float> %a, 1695 metadata !"fpexcept.strict") #0 1696 ret <8 x i1> %ret 1697} 1698 1699attributes #0 = { strictfp } 1700