1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s 3; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 4; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 5; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512dq < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ 6 7define <1 x float> @constrained_vector_fdiv_v1f32() #0 { 8; CHECK-LABEL: constrained_vector_fdiv_v1f32: 9; CHECK: # %bb.0: # %entry 10; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 11; CHECK-NEXT: divss {{.*}}(%rip), %xmm0 12; CHECK-NEXT: retq 13; 14; AVX-LABEL: constrained_vector_fdiv_v1f32: 15; AVX: # %bb.0: # %entry 16; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 17; AVX-NEXT: vdivss {{.*}}(%rip), %xmm0, %xmm0 18; AVX-NEXT: retq 19entry: 20 %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32( 21 <1 x float> <float 1.000000e+00>, 22 <1 x float> <float 1.000000e+01>, 23 metadata !"round.dynamic", 24 metadata !"fpexcept.strict") #0 25 ret <1 x float> %div 26} 27 28define <2 x double> @constrained_vector_fdiv_v2f64() #0 { 29; CHECK-LABEL: constrained_vector_fdiv_v2f64: 30; CHECK: # %bb.0: # %entry 31; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] 32; CHECK-NEXT: divpd {{.*}}(%rip), %xmm0 33; CHECK-NEXT: retq 34; 35; AVX-LABEL: constrained_vector_fdiv_v2f64: 36; AVX: # %bb.0: # %entry 37; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] 38; AVX-NEXT: vdivpd {{.*}}(%rip), %xmm0, %xmm0 39; AVX-NEXT: retq 40entry: 41 %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64( 42 <2 x double> <double 1.000000e+00, double 2.000000e+00>, 43 <2 x double> <double 1.000000e+01, double 1.000000e+01>, 44 metadata !"round.dynamic", 45 metadata !"fpexcept.strict") #0 46 ret <2 x double> %div 47} 48 49define <3 x float> @constrained_vector_fdiv_v3f32() #0 { 50; CHECK-LABEL: constrained_vector_fdiv_v3f32: 51; CHECK: # %bb.0: # %entry 52; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 53; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 54; CHECK-NEXT: divss %xmm1, %xmm2 55; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 56; CHECK-NEXT: divss %xmm1, %xmm0 57; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero 58; CHECK-NEXT: divss %xmm1, %xmm3 59; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 60; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 61; CHECK-NEXT: retq 62; 63; AVX-LABEL: constrained_vector_fdiv_v3f32: 64; AVX: # %bb.0: # %entry 65; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 66; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 67; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm1 68; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 69; AVX-NEXT: vdivss %xmm0, %xmm2, %xmm2 70; AVX-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 71; AVX-NEXT: vdivss %xmm0, %xmm3, %xmm0 72; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] 73; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 74; AVX-NEXT: retq 75entry: 76 %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32( 77 <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, 78 <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, 79 metadata !"round.dynamic", 80 metadata !"fpexcept.strict") #0 81 ret <3 x float> %div 82} 83 84define <3 x double> @constrained_vector_fdiv_v3f64() #0 { 85; CHECK-LABEL: constrained_vector_fdiv_v3f64: 86; CHECK: # %bb.0: # %entry 87; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] 88; CHECK-NEXT: divpd {{.*}}(%rip), %xmm0 89; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 90; CHECK-NEXT: divsd {{.*}}(%rip), %xmm1 91; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) 92; CHECK-NEXT: movapd %xmm0, %xmm1 93; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 94; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 95; CHECK-NEXT: wait 96; CHECK-NEXT: retq 97; 98; AVX-LABEL: constrained_vector_fdiv_v3f64: 99; AVX: # %bb.0: # %entry 100; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 101; AVX-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0 102; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0] 103; AVX-NEXT: vdivpd {{.*}}(%rip), %xmm1, %xmm1 104; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 105; AVX-NEXT: retq 106entry: 107 %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64( 108 <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>, 109 <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>, 110 metadata !"round.dynamic", 111 metadata !"fpexcept.strict") #0 112 ret <3 x double> %div 113} 114 115define <4 x double> @constrained_vector_fdiv_v4f64() #0 { 116; CHECK-LABEL: constrained_vector_fdiv_v4f64: 117; CHECK: # %bb.0: # %entry 118; CHECK-NEXT: movapd {{.*#+}} xmm2 = [1.0E+1,1.0E+1] 119; CHECK-NEXT: movapd {{.*#+}} xmm1 = [3.0E+0,4.0E+0] 120; CHECK-NEXT: divpd %xmm2, %xmm1 121; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] 122; CHECK-NEXT: divpd %xmm2, %xmm0 123; CHECK-NEXT: retq 124; 125; AVX1-LABEL: constrained_vector_fdiv_v4f64: 126; AVX1: # %bb.0: # %entry 127; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0] 128; AVX1-NEXT: vdivpd {{.*}}(%rip), %ymm0, %ymm0 129; AVX1-NEXT: retq 130; 131; AVX512-LABEL: constrained_vector_fdiv_v4f64: 132; AVX512: # %bb.0: # %entry 133; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.0E+1,1.0E+1,1.0E+1,1.0E+1] 134; AVX512-NEXT: vmovapd {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0] 135; AVX512-NEXT: vdivpd %ymm0, %ymm1, %ymm0 136; AVX512-NEXT: retq 137entry: 138 %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64( 139 <4 x double> <double 1.000000e+00, double 2.000000e+00, 140 double 3.000000e+00, double 4.000000e+00>, 141 <4 x double> <double 1.000000e+01, double 1.000000e+01, 142 double 1.000000e+01, double 1.000000e+01>, 143 metadata !"round.dynamic", 144 metadata !"fpexcept.strict") #0 145 ret <4 x double> %div 146} 147 148define <1 x float> @constrained_vector_frem_v1f32() #0 { 149; CHECK-LABEL: constrained_vector_frem_v1f32: 150; CHECK: # %bb.0: # %entry 151; CHECK-NEXT: pushq %rax 152; CHECK-NEXT: .cfi_def_cfa_offset 16 153; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 154; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 155; CHECK-NEXT: callq fmodf 156; CHECK-NEXT: popq %rax 157; CHECK-NEXT: .cfi_def_cfa_offset 8 158; CHECK-NEXT: retq 159; 160; AVX-LABEL: constrained_vector_frem_v1f32: 161; AVX: # %bb.0: # %entry 162; AVX-NEXT: pushq %rax 163; AVX-NEXT: .cfi_def_cfa_offset 16 164; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 165; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 166; AVX-NEXT: callq fmodf 167; AVX-NEXT: popq %rax 168; AVX-NEXT: .cfi_def_cfa_offset 8 169; AVX-NEXT: retq 170entry: 171 %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32( 172 <1 x float> <float 1.000000e+00>, 173 <1 x float> <float 1.000000e+01>, 174 metadata !"round.dynamic", 175 metadata !"fpexcept.strict") #0 176 ret <1 x float> %rem 177} 178 179define <2 x double> @constrained_vector_frem_v2f64() #0 { 180; CHECK-LABEL: constrained_vector_frem_v2f64: 181; CHECK: # %bb.0: # %entry 182; CHECK-NEXT: subq $24, %rsp 183; CHECK-NEXT: .cfi_def_cfa_offset 32 184; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 185; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 186; CHECK-NEXT: callq fmod 187; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 188; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 189; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 190; CHECK-NEXT: callq fmod 191; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 192; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 193; CHECK-NEXT: addq $24, %rsp 194; CHECK-NEXT: .cfi_def_cfa_offset 8 195; CHECK-NEXT: retq 196; 197; AVX-LABEL: constrained_vector_frem_v2f64: 198; AVX: # %bb.0: # %entry 199; AVX-NEXT: subq $24, %rsp 200; AVX-NEXT: .cfi_def_cfa_offset 32 201; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 202; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 203; AVX-NEXT: callq fmod 204; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 205; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 206; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 207; AVX-NEXT: callq fmod 208; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 209; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 210; AVX-NEXT: addq $24, %rsp 211; AVX-NEXT: .cfi_def_cfa_offset 8 212; AVX-NEXT: retq 213entry: 214 %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64( 215 <2 x double> <double 1.000000e+00, double 2.000000e+00>, 216 <2 x double> <double 1.000000e+01, double 1.000000e+01>, 217 metadata !"round.dynamic", 218 metadata !"fpexcept.strict") #0 219 ret <2 x double> %rem 220} 221 222define <3 x float> @constrained_vector_frem_v3f32() #0 { 223; CHECK-LABEL: constrained_vector_frem_v3f32: 224; CHECK: # %bb.0: # %entry 225; CHECK-NEXT: subq $40, %rsp 226; CHECK-NEXT: .cfi_def_cfa_offset 48 227; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 228; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 229; CHECK-NEXT: callq fmodf 230; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 231; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 232; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 233; CHECK-NEXT: callq fmodf 234; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 235; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 236; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 237; CHECK-NEXT: callq fmodf 238; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 239; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 240; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 241; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 242; CHECK-NEXT: movaps %xmm1, %xmm0 243; CHECK-NEXT: addq $40, %rsp 244; CHECK-NEXT: .cfi_def_cfa_offset 8 245; CHECK-NEXT: retq 246; 247; AVX-LABEL: constrained_vector_frem_v3f32: 248; AVX: # %bb.0: # %entry 249; AVX-NEXT: subq $40, %rsp 250; AVX-NEXT: .cfi_def_cfa_offset 48 251; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 252; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 253; AVX-NEXT: callq fmodf 254; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 255; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 256; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 257; AVX-NEXT: callq fmodf 258; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 259; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 260; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 261; AVX-NEXT: callq fmodf 262; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 263; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 264; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 265; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 266; AVX-NEXT: addq $40, %rsp 267; AVX-NEXT: .cfi_def_cfa_offset 8 268; AVX-NEXT: retq 269entry: 270 %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32( 271 <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, 272 <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, 273 metadata !"round.dynamic", 274 metadata !"fpexcept.strict") #0 275 ret <3 x float> %rem 276} 277 278define <3 x double> @constrained_vector_frem_v3f64() #0 { 279; CHECK-LABEL: constrained_vector_frem_v3f64: 280; CHECK: # %bb.0: # %entry 281; CHECK-NEXT: subq $24, %rsp 282; CHECK-NEXT: .cfi_def_cfa_offset 32 283; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 284; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 285; CHECK-NEXT: callq fmod 286; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 287; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 288; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 289; CHECK-NEXT: callq fmod 290; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 291; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 292; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 293; CHECK-NEXT: callq fmod 294; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 295; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 296; CHECK-NEXT: wait 297; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 298; CHECK-NEXT: # xmm0 = mem[0],zero 299; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 300; CHECK-NEXT: # xmm1 = mem[0],zero 301; CHECK-NEXT: addq $24, %rsp 302; CHECK-NEXT: .cfi_def_cfa_offset 8 303; CHECK-NEXT: retq 304; 305; AVX-LABEL: constrained_vector_frem_v3f64: 306; AVX: # %bb.0: # %entry 307; AVX-NEXT: subq $56, %rsp 308; AVX-NEXT: .cfi_def_cfa_offset 64 309; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 310; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 311; AVX-NEXT: callq fmod 312; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 313; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 314; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 315; AVX-NEXT: callq fmod 316; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 317; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 318; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 319; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 320; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 321; AVX-NEXT: vzeroupper 322; AVX-NEXT: callq fmod 323; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 324; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 325; AVX-NEXT: addq $56, %rsp 326; AVX-NEXT: .cfi_def_cfa_offset 8 327; AVX-NEXT: retq 328entry: 329 %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64( 330 <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>, 331 <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>, 332 metadata !"round.dynamic", 333 metadata !"fpexcept.strict") #0 334 ret <3 x double> %rem 335} 336 337define <4 x double> @constrained_vector_frem_v4f64() #0 { 338; CHECK-LABEL: constrained_vector_frem_v4f64: 339; CHECK: # %bb.0: 340; CHECK-NEXT: subq $40, %rsp 341; CHECK-NEXT: .cfi_def_cfa_offset 48 342; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 343; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 344; CHECK-NEXT: callq fmod 345; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 346; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 347; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 348; CHECK-NEXT: callq fmod 349; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 350; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 351; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 352; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 353; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 354; CHECK-NEXT: callq fmod 355; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 356; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 357; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 358; CHECK-NEXT: callq fmod 359; CHECK-NEXT: movaps %xmm0, %xmm1 360; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 361; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 362; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 363; CHECK-NEXT: addq $40, %rsp 364; CHECK-NEXT: .cfi_def_cfa_offset 8 365; CHECK-NEXT: retq 366; 367; AVX-LABEL: constrained_vector_frem_v4f64: 368; AVX: # %bb.0: 369; AVX-NEXT: subq $40, %rsp 370; AVX-NEXT: .cfi_def_cfa_offset 48 371; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 372; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 373; AVX-NEXT: callq fmod 374; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 375; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 376; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 377; AVX-NEXT: callq fmod 378; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 379; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 380; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 381; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 382; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 383; AVX-NEXT: callq fmod 384; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 385; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 386; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 387; AVX-NEXT: callq fmod 388; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 389; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 390; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 391; AVX-NEXT: addq $40, %rsp 392; AVX-NEXT: .cfi_def_cfa_offset 8 393; AVX-NEXT: retq 394 %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64( 395 <4 x double> <double 1.000000e+00, double 2.000000e+00, 396 double 3.000000e+00, double 4.000000e+00>, 397 <4 x double> <double 1.000000e+01, double 1.000000e+01, 398 double 1.000000e+01, double 1.000000e+01>, 399 metadata !"round.dynamic", 400 metadata !"fpexcept.strict") #0 401 ret <4 x double> %rem 402} 403 404define <1 x float> @constrained_vector_fmul_v1f32() #0 { 405; CHECK-LABEL: constrained_vector_fmul_v1f32: 406; CHECK: # %bb.0: # %entry 407; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 408; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 409; CHECK-NEXT: retq 410; 411; AVX-LABEL: constrained_vector_fmul_v1f32: 412; AVX: # %bb.0: # %entry 413; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 414; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 415; AVX-NEXT: retq 416entry: 417 %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32( 418 <1 x float> <float 0x7FF0000000000000>, 419 <1 x float> <float 2.000000e+00>, 420 metadata !"round.dynamic", 421 metadata !"fpexcept.strict") #0 422 ret <1 x float> %mul 423} 424 425define <2 x double> @constrained_vector_fmul_v2f64() #0 { 426; CHECK-LABEL: constrained_vector_fmul_v2f64: 427; CHECK: # %bb.0: # %entry 428; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 429; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 430; CHECK-NEXT: retq 431; 432; AVX-LABEL: constrained_vector_fmul_v2f64: 433; AVX: # %bb.0: # %entry 434; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 435; AVX-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0 436; AVX-NEXT: retq 437entry: 438 %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64( 439 <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 440 <2 x double> <double 2.000000e+00, double 3.000000e+00>, 441 metadata !"round.dynamic", 442 metadata !"fpexcept.strict") #0 443 ret <2 x double> %mul 444} 445 446define <3 x float> @constrained_vector_fmul_v3f32() #0 { 447; CHECK-LABEL: constrained_vector_fmul_v3f32: 448; CHECK: # %bb.0: # %entry 449; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 450; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 451; CHECK-NEXT: mulss %xmm1, %xmm2 452; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 453; CHECK-NEXT: mulss %xmm1, %xmm0 454; CHECK-NEXT: mulss {{.*}}(%rip), %xmm1 455; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 456; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 457; CHECK-NEXT: retq 458; 459; AVX-LABEL: constrained_vector_fmul_v3f32: 460; AVX: # %bb.0: # %entry 461; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 462; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 463; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm2 464; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 465; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] 466; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 467; AVX-NEXT: retq 468entry: 469 %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32( 470 <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, 471 float 0x7FF0000000000000>, 472 <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>, 473 metadata !"round.dynamic", 474 metadata !"fpexcept.strict") #0 475 ret <3 x float> %mul 476} 477 478define <3 x double> @constrained_vector_fmul_v3f64() #0 { 479; CHECK-LABEL: constrained_vector_fmul_v3f64: 480; CHECK: # %bb.0: # %entry 481; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 482; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 483; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 484; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm1 485; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) 486; CHECK-NEXT: movapd %xmm0, %xmm1 487; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 488; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 489; CHECK-NEXT: wait 490; CHECK-NEXT: retq 491; 492; AVX-LABEL: constrained_vector_fmul_v3f64: 493; AVX: # %bb.0: # %entry 494; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 495; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 496; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] 497; AVX-NEXT: vmulpd {{.*}}(%rip), %xmm1, %xmm1 498; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 499; AVX-NEXT: retq 500entry: 501 %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64( 502 <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, 503 double 0x7FEFFFFFFFFFFFFF>, 504 <3 x double> <double 1.000000e+00, double 1.000000e+01, double 1.000000e+02>, 505 metadata !"round.dynamic", 506 metadata !"fpexcept.strict") #0 507 ret <3 x double> %mul 508} 509 510define <4 x double> @constrained_vector_fmul_v4f64() #0 { 511; CHECK-LABEL: constrained_vector_fmul_v4f64: 512; CHECK: # %bb.0: # %entry 513; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 514; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.0E+0,5.0E+0] 515; CHECK-NEXT: mulpd %xmm0, %xmm1 516; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 517; CHECK-NEXT: retq 518; 519; AVX1-LABEL: constrained_vector_fmul_v4f64: 520; AVX1: # %bb.0: # %entry 521; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308] 522; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0 523; AVX1-NEXT: retq 524; 525; AVX512-LABEL: constrained_vector_fmul_v4f64: 526; AVX512: # %bb.0: # %entry 527; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308] 528; AVX512-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0 529; AVX512-NEXT: retq 530entry: 531 %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64( 532 <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, 533 double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 534 <4 x double> <double 2.000000e+00, double 3.000000e+00, 535 double 4.000000e+00, double 5.000000e+00>, 536 metadata !"round.dynamic", 537 metadata !"fpexcept.strict") #0 538 ret <4 x double> %mul 539} 540 541define <1 x float> @constrained_vector_fadd_v1f32() #0 { 542; CHECK-LABEL: constrained_vector_fadd_v1f32: 543; CHECK: # %bb.0: # %entry 544; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 545; CHECK-NEXT: addss {{.*}}(%rip), %xmm0 546; CHECK-NEXT: retq 547; 548; AVX-LABEL: constrained_vector_fadd_v1f32: 549; AVX: # %bb.0: # %entry 550; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 551; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 552; AVX-NEXT: retq 553entry: 554 %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32( 555 <1 x float> <float 0x7FF0000000000000>, 556 <1 x float> <float 1.0>, 557 metadata !"round.dynamic", 558 metadata !"fpexcept.strict") #0 559 ret <1 x float> %add 560} 561 562define <2 x double> @constrained_vector_fadd_v2f64() #0 { 563; CHECK-LABEL: constrained_vector_fadd_v2f64: 564; CHECK: # %bb.0: # %entry 565; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 566; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 567; CHECK-NEXT: retq 568; 569; AVX-LABEL: constrained_vector_fadd_v2f64: 570; AVX: # %bb.0: # %entry 571; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 572; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0 573; AVX-NEXT: retq 574entry: 575 %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( 576 <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 577 <2 x double> <double 1.000000e+00, double 1.000000e-01>, 578 metadata !"round.dynamic", 579 metadata !"fpexcept.strict") #0 580 ret <2 x double> %add 581} 582 583define <3 x float> @constrained_vector_fadd_v3f32() #0 { 584; CHECK-LABEL: constrained_vector_fadd_v3f32: 585; CHECK: # %bb.0: # %entry 586; CHECK-NEXT: xorps %xmm1, %xmm1 587; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 588; CHECK-NEXT: addss %xmm2, %xmm1 589; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 590; CHECK-NEXT: addss %xmm2, %xmm0 591; CHECK-NEXT: addss {{.*}}(%rip), %xmm2 592; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 593; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 594; CHECK-NEXT: retq 595; 596; AVX-LABEL: constrained_vector_fadd_v3f32: 597; AVX: # %bb.0: # %entry 598; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 599; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 600; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 601; AVX-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm2 602; AVX-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1 603; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 604; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 605; AVX-NEXT: retq 606entry: 607 %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32( 608 <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, 609 float 0xFFFFFFFFE0000000>, 610 <3 x float> <float 2.0, float 1.0, float 0.0>, 611 metadata !"round.dynamic", 612 metadata !"fpexcept.strict") #0 613 ret <3 x float> %add 614} 615 616define <3 x double> @constrained_vector_fadd_v3f64() #0 { 617; CHECK-LABEL: constrained_vector_fadd_v3f64: 618; CHECK: # %bb.0: # %entry 619; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 620; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 621; CHECK-NEXT: xorpd %xmm1, %xmm1 622; CHECK-NEXT: addsd {{.*}}(%rip), %xmm1 623; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) 624; CHECK-NEXT: movapd %xmm0, %xmm1 625; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 626; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 627; CHECK-NEXT: wait 628; CHECK-NEXT: retq 629; 630; AVX-LABEL: constrained_vector_fadd_v3f64: 631; AVX: # %bb.0: # %entry 632; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 633; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 634; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] 635; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm1, %xmm1 636; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 637; AVX-NEXT: retq 638entry: 639 %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64( 640 <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, 641 double 0x7FEFFFFFFFFFFFFF>, 642 <3 x double> <double 2.0, double 1.0, double 0.0>, 643 metadata !"round.dynamic", 644 metadata !"fpexcept.strict") #0 645 ret <3 x double> %add 646} 647 648define <4 x double> @constrained_vector_fadd_v4f64() #0 { 649; CHECK-LABEL: constrained_vector_fadd_v4f64: 650; CHECK: # %bb.0: # %entry 651; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 652; CHECK-NEXT: movapd {{.*#+}} xmm1 = [2.0E+0,2.0000000000000001E-1] 653; CHECK-NEXT: addpd %xmm0, %xmm1 654; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 655; CHECK-NEXT: retq 656; 657; AVX1-LABEL: constrained_vector_fadd_v4f64: 658; AVX1: # %bb.0: # %entry 659; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308] 660; AVX1-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 661; AVX1-NEXT: retq 662; 663; AVX512-LABEL: constrained_vector_fadd_v4f64: 664; AVX512: # %bb.0: # %entry 665; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308] 666; AVX512-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 667; AVX512-NEXT: retq 668entry: 669 %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64( 670 <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, 671 double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 672 <4 x double> <double 1.000000e+00, double 1.000000e-01, 673 double 2.000000e+00, double 2.000000e-01>, 674 metadata !"round.dynamic", 675 metadata !"fpexcept.strict") #0 676 ret <4 x double> %add 677} 678 679define <1 x float> @constrained_vector_fsub_v1f32() #0 { 680; CHECK-LABEL: constrained_vector_fsub_v1f32: 681; CHECK: # %bb.0: # %entry 682; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 683; CHECK-NEXT: subss {{.*}}(%rip), %xmm0 684; CHECK-NEXT: retq 685; 686; AVX-LABEL: constrained_vector_fsub_v1f32: 687; AVX: # %bb.0: # %entry 688; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 689; AVX-NEXT: vsubss {{.*}}(%rip), %xmm0, %xmm0 690; AVX-NEXT: retq 691entry: 692 %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32( 693 <1 x float> <float 0x7FF0000000000000>, 694 <1 x float> <float 1.000000e+00>, 695 metadata !"round.dynamic", 696 metadata !"fpexcept.strict") #0 697 ret <1 x float> %sub 698} 699 700define <2 x double> @constrained_vector_fsub_v2f64() #0 { 701; CHECK-LABEL: constrained_vector_fsub_v2f64: 702; CHECK: # %bb.0: # %entry 703; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] 704; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 705; CHECK-NEXT: retq 706; 707; AVX-LABEL: constrained_vector_fsub_v2f64: 708; AVX: # %bb.0: # %entry 709; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] 710; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 711; AVX-NEXT: retq 712entry: 713 %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( 714 <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>, 715 <2 x double> <double 1.000000e+00, double 1.000000e-01>, 716 metadata !"round.dynamic", 717 metadata !"fpexcept.strict") #0 718 ret <2 x double> %sub 719} 720 721define <3 x float> @constrained_vector_fsub_v3f32() #0 { 722; CHECK-LABEL: constrained_vector_fsub_v3f32: 723; CHECK: # %bb.0: # %entry 724; CHECK-NEXT: xorps %xmm0, %xmm0 725; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 726; CHECK-NEXT: movaps %xmm1, %xmm2 727; CHECK-NEXT: subss %xmm0, %xmm2 728; CHECK-NEXT: movaps %xmm1, %xmm0 729; CHECK-NEXT: subss {{.*}}(%rip), %xmm0 730; CHECK-NEXT: subss {{.*}}(%rip), %xmm1 731; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 732; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 733; CHECK-NEXT: retq 734; 735; AVX-LABEL: constrained_vector_fsub_v3f32: 736; AVX: # %bb.0: # %entry 737; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 738; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 739; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 740; AVX-NEXT: vsubss {{.*}}(%rip), %xmm1, %xmm2 741; AVX-NEXT: vsubss {{.*}}(%rip), %xmm1, %xmm1 742; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 743; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 744; AVX-NEXT: retq 745entry: 746 %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32( 747 <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, 748 float 0xFFFFFFFFE0000000>, 749 <3 x float> <float 2.0, float 1.0, float 0.0>, 750 metadata !"round.dynamic", 751 metadata !"fpexcept.strict") #0 752 ret <3 x float> %sub 753} 754 755define <3 x double> @constrained_vector_fsub_v3f64() #0 { 756; CHECK-LABEL: constrained_vector_fsub_v3f64: 757; CHECK: # %bb.0: # %entry 758; CHECK-NEXT: xorpd %xmm0, %xmm0 759; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 760; CHECK-NEXT: subsd %xmm0, %xmm1 761; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] 762; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 763; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) 764; CHECK-NEXT: movapd %xmm0, %xmm1 765; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 766; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 767; CHECK-NEXT: wait 768; CHECK-NEXT: retq 769; 770; AVX-LABEL: constrained_vector_fsub_v3f64: 771; AVX: # %bb.0: # %entry 772; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 773; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 774; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 775; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] 776; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 777; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 778; AVX-NEXT: retq 779entry: 780 %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64( 781 <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF, 782 double 0xFFEFFFFFFFFFFFFF>, 783 <3 x double> <double 2.0, double 1.0, double 0.0>, 784 metadata !"round.dynamic", 785 metadata !"fpexcept.strict") #0 786 ret <3 x double> %sub 787} 788 789define <4 x double> @constrained_vector_fsub_v4f64() #0 { 790; CHECK-LABEL: constrained_vector_fsub_v4f64: 791; CHECK: # %bb.0: # %entry 792; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] 793; CHECK-NEXT: movapd %xmm0, %xmm1 794; CHECK-NEXT: subpd {{.*}}(%rip), %xmm1 795; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 796; CHECK-NEXT: retq 797; 798; AVX1-LABEL: constrained_vector_fsub_v4f64: 799; AVX1: # %bb.0: # %entry 800; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308] 801; AVX1-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0 802; AVX1-NEXT: retq 803; 804; AVX512-LABEL: constrained_vector_fsub_v4f64: 805; AVX512: # %bb.0: # %entry 806; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308] 807; AVX512-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0 808; AVX512-NEXT: retq 809entry: 810 %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64( 811 <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF, 812 double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>, 813 <4 x double> <double 1.000000e+00, double 1.000000e-01, 814 double 2.000000e+00, double 2.000000e-01>, 815 metadata !"round.dynamic", 816 metadata !"fpexcept.strict") #0 817 ret <4 x double> %sub 818} 819 820define <1 x float> @constrained_vector_sqrt_v1f32() #0 { 821; CHECK-LABEL: constrained_vector_sqrt_v1f32: 822; CHECK: # %bb.0: # %entry 823; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 824; CHECK-NEXT: sqrtss %xmm0, %xmm0 825; CHECK-NEXT: retq 826; 827; AVX-LABEL: constrained_vector_sqrt_v1f32: 828; AVX: # %bb.0: # %entry 829; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 830; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 831; AVX-NEXT: retq 832entry: 833 %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32( 834 <1 x float> <float 42.0>, 835 metadata !"round.dynamic", 836 metadata !"fpexcept.strict") #0 837 ret <1 x float> %sqrt 838} 839 840define <2 x double> @constrained_vector_sqrt_v2f64() #0 { 841; CHECK-LABEL: constrained_vector_sqrt_v2f64: 842; CHECK: # %bb.0: # %entry 843; CHECK-NEXT: sqrtpd {{.*}}(%rip), %xmm0 844; CHECK-NEXT: retq 845; 846; AVX-LABEL: constrained_vector_sqrt_v2f64: 847; AVX: # %bb.0: # %entry 848; AVX-NEXT: vsqrtpd {{.*}}(%rip), %xmm0 849; AVX-NEXT: retq 850entry: 851 %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( 852 <2 x double> <double 42.0, double 42.1>, 853 metadata !"round.dynamic", 854 metadata !"fpexcept.strict") #0 855 ret <2 x double> %sqrt 856} 857 858define <3 x float> @constrained_vector_sqrt_v3f32() #0 { 859; CHECK-LABEL: constrained_vector_sqrt_v3f32: 860; CHECK: # %bb.0: # %entry 861; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 862; CHECK-NEXT: sqrtss %xmm0, %xmm1 863; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 864; CHECK-NEXT: sqrtss %xmm0, %xmm0 865; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 866; CHECK-NEXT: sqrtss %xmm2, %xmm2 867; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 868; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 869; CHECK-NEXT: retq 870; 871; AVX-LABEL: constrained_vector_sqrt_v3f32: 872; AVX: # %bb.0: # %entry 873; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 874; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 875; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 876; AVX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 877; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 878; AVX-NEXT: vsqrtss %xmm2, %xmm2, %xmm2 879; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 880; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 881; AVX-NEXT: retq 882entry: 883 %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32( 884 <3 x float> <float 42.0, float 43.0, float 44.0>, 885 metadata !"round.dynamic", 886 metadata !"fpexcept.strict") #0 887 ret <3 x float> %sqrt 888} 889 890define <3 x double> @constrained_vector_sqrt_v3f64() #0 { 891; CHECK-LABEL: constrained_vector_sqrt_v3f64: 892; CHECK: # %bb.0: # %entry 893; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 894; CHECK-NEXT: sqrtsd %xmm0, %xmm1 895; CHECK-NEXT: sqrtpd {{.*}}(%rip), %xmm0 896; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) 897; CHECK-NEXT: movapd %xmm0, %xmm1 898; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 899; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 900; CHECK-NEXT: wait 901; CHECK-NEXT: retq 902; 903; AVX-LABEL: constrained_vector_sqrt_v3f64: 904; AVX: # %bb.0: # %entry 905; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 906; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 907; AVX-NEXT: vsqrtpd {{.*}}(%rip), %xmm1 908; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 909; AVX-NEXT: retq 910entry: 911 %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64( 912 <3 x double> <double 42.0, double 42.1, double 42.2>, 913 metadata !"round.dynamic", 914 metadata !"fpexcept.strict") #0 915 ret <3 x double> %sqrt 916} 917 918define <4 x double> @constrained_vector_sqrt_v4f64() #0 { 919; CHECK-LABEL: constrained_vector_sqrt_v4f64: 920; CHECK: # %bb.0: # %entry 921; CHECK-NEXT: sqrtpd {{.*}}(%rip), %xmm1 922; CHECK-NEXT: sqrtpd {{.*}}(%rip), %xmm0 923; CHECK-NEXT: retq 924; 925; AVX-LABEL: constrained_vector_sqrt_v4f64: 926; AVX: # %bb.0: # %entry 927; AVX-NEXT: vsqrtpd {{.*}}(%rip), %ymm0 928; AVX-NEXT: retq 929 entry: 930 %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64( 931 <4 x double> <double 42.0, double 42.1, 932 double 42.2, double 42.3>, 933 metadata !"round.dynamic", 934 metadata !"fpexcept.strict") #0 935 ret <4 x double> %sqrt 936} 937 938define <1 x float> @constrained_vector_pow_v1f32() #0 { 939; CHECK-LABEL: constrained_vector_pow_v1f32: 940; CHECK: # %bb.0: # %entry 941; CHECK-NEXT: pushq %rax 942; CHECK-NEXT: .cfi_def_cfa_offset 16 943; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 944; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 945; CHECK-NEXT: callq powf 946; CHECK-NEXT: popq %rax 947; CHECK-NEXT: .cfi_def_cfa_offset 8 948; CHECK-NEXT: retq 949; 950; AVX-LABEL: constrained_vector_pow_v1f32: 951; AVX: # %bb.0: # %entry 952; AVX-NEXT: pushq %rax 953; AVX-NEXT: .cfi_def_cfa_offset 16 954; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 955; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 956; AVX-NEXT: callq powf 957; AVX-NEXT: popq %rax 958; AVX-NEXT: .cfi_def_cfa_offset 8 959; AVX-NEXT: retq 960entry: 961 %pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32( 962 <1 x float> <float 42.0>, 963 <1 x float> <float 3.0>, 964 metadata !"round.dynamic", 965 metadata !"fpexcept.strict") #0 966 ret <1 x float> %pow 967} 968 969define <2 x double> @constrained_vector_pow_v2f64() #0 { 970; CHECK-LABEL: constrained_vector_pow_v2f64: 971; CHECK: # %bb.0: # %entry 972; CHECK-NEXT: subq $24, %rsp 973; CHECK-NEXT: .cfi_def_cfa_offset 32 974; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 975; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 976; CHECK-NEXT: callq pow 977; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 978; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 979; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 980; CHECK-NEXT: callq pow 981; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 982; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 983; CHECK-NEXT: addq $24, %rsp 984; CHECK-NEXT: .cfi_def_cfa_offset 8 985; CHECK-NEXT: retq 986; 987; AVX-LABEL: constrained_vector_pow_v2f64: 988; AVX: # %bb.0: # %entry 989; AVX-NEXT: subq $24, %rsp 990; AVX-NEXT: .cfi_def_cfa_offset 32 991; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 992; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 993; AVX-NEXT: callq pow 994; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 995; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 996; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 997; AVX-NEXT: callq pow 998; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 999; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1000; AVX-NEXT: addq $24, %rsp 1001; AVX-NEXT: .cfi_def_cfa_offset 8 1002; AVX-NEXT: retq 1003entry: 1004 %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64( 1005 <2 x double> <double 42.1, double 42.2>, 1006 <2 x double> <double 3.0, double 3.0>, 1007 metadata !"round.dynamic", 1008 metadata !"fpexcept.strict") #0 1009 ret <2 x double> %pow 1010} 1011 1012define <3 x float> @constrained_vector_pow_v3f32() #0 { 1013; CHECK-LABEL: constrained_vector_pow_v3f32: 1014; CHECK: # %bb.0: # %entry 1015; CHECK-NEXT: subq $40, %rsp 1016; CHECK-NEXT: .cfi_def_cfa_offset 48 1017; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1018; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1019; CHECK-NEXT: callq powf 1020; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1021; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1022; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1023; CHECK-NEXT: callq powf 1024; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1025; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1026; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1027; CHECK-NEXT: callq powf 1028; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 1029; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1030; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1031; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1032; CHECK-NEXT: movaps %xmm1, %xmm0 1033; CHECK-NEXT: addq $40, %rsp 1034; CHECK-NEXT: .cfi_def_cfa_offset 8 1035; CHECK-NEXT: retq 1036; 1037; AVX-LABEL: constrained_vector_pow_v3f32: 1038; AVX: # %bb.0: # %entry 1039; AVX-NEXT: subq $40, %rsp 1040; AVX-NEXT: .cfi_def_cfa_offset 48 1041; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1042; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1043; AVX-NEXT: callq powf 1044; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1045; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1046; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1047; AVX-NEXT: callq powf 1048; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1049; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1050; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1051; AVX-NEXT: callq powf 1052; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1053; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1054; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1055; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 1056; AVX-NEXT: addq $40, %rsp 1057; AVX-NEXT: .cfi_def_cfa_offset 8 1058; AVX-NEXT: retq 1059entry: 1060 %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32( 1061 <3 x float> <float 42.0, float 43.0, float 44.0>, 1062 <3 x float> <float 3.0, float 3.0, float 3.0>, 1063 metadata !"round.dynamic", 1064 metadata !"fpexcept.strict") #0 1065 ret <3 x float> %pow 1066} 1067 1068define <3 x double> @constrained_vector_pow_v3f64() #0 { 1069; CHECK-LABEL: constrained_vector_pow_v3f64: 1070; CHECK: # %bb.0: # %entry 1071; CHECK-NEXT: subq $24, %rsp 1072; CHECK-NEXT: .cfi_def_cfa_offset 32 1073; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1074; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1075; CHECK-NEXT: callq pow 1076; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1077; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1078; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1079; CHECK-NEXT: callq pow 1080; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 1081; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1082; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1083; CHECK-NEXT: callq pow 1084; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 1085; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 1086; CHECK-NEXT: wait 1087; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 1088; CHECK-NEXT: # xmm0 = mem[0],zero 1089; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 1090; CHECK-NEXT: # xmm1 = mem[0],zero 1091; CHECK-NEXT: addq $24, %rsp 1092; CHECK-NEXT: .cfi_def_cfa_offset 8 1093; CHECK-NEXT: retq 1094; 1095; AVX-LABEL: constrained_vector_pow_v3f64: 1096; AVX: # %bb.0: # %entry 1097; AVX-NEXT: subq $56, %rsp 1098; AVX-NEXT: .cfi_def_cfa_offset 64 1099; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1100; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1101; AVX-NEXT: callq pow 1102; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1103; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1104; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1105; AVX-NEXT: callq pow 1106; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1107; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1108; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 1109; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1110; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1111; AVX-NEXT: vzeroupper 1112; AVX-NEXT: callq pow 1113; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1114; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1115; AVX-NEXT: addq $56, %rsp 1116; AVX-NEXT: .cfi_def_cfa_offset 8 1117; AVX-NEXT: retq 1118entry: 1119 %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64( 1120 <3 x double> <double 42.0, double 42.1, double 42.2>, 1121 <3 x double> <double 3.0, double 3.0, double 3.0>, 1122 metadata !"round.dynamic", 1123 metadata !"fpexcept.strict") #0 1124 ret <3 x double> %pow 1125} 1126 1127define <4 x double> @constrained_vector_pow_v4f64() #0 { 1128; CHECK-LABEL: constrained_vector_pow_v4f64: 1129; CHECK: # %bb.0: # %entry 1130; CHECK-NEXT: subq $40, %rsp 1131; CHECK-NEXT: .cfi_def_cfa_offset 48 1132; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1133; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1134; CHECK-NEXT: callq pow 1135; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1136; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1137; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1138; CHECK-NEXT: callq pow 1139; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1140; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1141; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1142; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1143; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1144; CHECK-NEXT: callq pow 1145; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1146; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1147; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1148; CHECK-NEXT: callq pow 1149; CHECK-NEXT: movaps %xmm0, %xmm1 1150; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1151; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1152; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1153; CHECK-NEXT: addq $40, %rsp 1154; CHECK-NEXT: .cfi_def_cfa_offset 8 1155; CHECK-NEXT: retq 1156; 1157; AVX-LABEL: constrained_vector_pow_v4f64: 1158; AVX: # %bb.0: # %entry 1159; AVX-NEXT: subq $40, %rsp 1160; AVX-NEXT: .cfi_def_cfa_offset 48 1161; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1162; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1163; AVX-NEXT: callq pow 1164; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1165; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1166; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1167; AVX-NEXT: callq pow 1168; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1169; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1170; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1171; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1172; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1173; AVX-NEXT: callq pow 1174; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1175; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1176; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1177; AVX-NEXT: callq pow 1178; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1179; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1180; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1181; AVX-NEXT: addq $40, %rsp 1182; AVX-NEXT: .cfi_def_cfa_offset 8 1183; AVX-NEXT: retq 1184entry: 1185 %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64( 1186 <4 x double> <double 42.1, double 42.2, 1187 double 42.3, double 42.4>, 1188 <4 x double> <double 3.0, double 3.0, 1189 double 3.0, double 3.0>, 1190 metadata !"round.dynamic", 1191 metadata !"fpexcept.strict") #0 1192 ret <4 x double> %pow 1193} 1194 1195define <1 x float> @constrained_vector_powi_v1f32() #0 { 1196; CHECK-LABEL: constrained_vector_powi_v1f32: 1197; CHECK: # %bb.0: # %entry 1198; CHECK-NEXT: pushq %rax 1199; CHECK-NEXT: .cfi_def_cfa_offset 16 1200; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1201; CHECK-NEXT: movl $3, %edi 1202; CHECK-NEXT: callq __powisf2 1203; CHECK-NEXT: popq %rax 1204; CHECK-NEXT: .cfi_def_cfa_offset 8 1205; CHECK-NEXT: retq 1206; 1207; AVX-LABEL: constrained_vector_powi_v1f32: 1208; AVX: # %bb.0: # %entry 1209; AVX-NEXT: pushq %rax 1210; AVX-NEXT: .cfi_def_cfa_offset 16 1211; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1212; AVX-NEXT: movl $3, %edi 1213; AVX-NEXT: callq __powisf2 1214; AVX-NEXT: popq %rax 1215; AVX-NEXT: .cfi_def_cfa_offset 8 1216; AVX-NEXT: retq 1217entry: 1218 %powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32( 1219 <1 x float> <float 42.0>, 1220 i32 3, 1221 metadata !"round.dynamic", 1222 metadata !"fpexcept.strict") #0 1223 ret <1 x float> %powi 1224} 1225 1226define <2 x double> @constrained_vector_powi_v2f64() #0 { 1227; CHECK-LABEL: constrained_vector_powi_v2f64: 1228; CHECK: # %bb.0: # %entry 1229; CHECK-NEXT: subq $24, %rsp 1230; CHECK-NEXT: .cfi_def_cfa_offset 32 1231; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1232; CHECK-NEXT: movl $3, %edi 1233; CHECK-NEXT: callq __powidf2 1234; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1235; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1236; CHECK-NEXT: movl $3, %edi 1237; CHECK-NEXT: callq __powidf2 1238; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1239; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1240; CHECK-NEXT: addq $24, %rsp 1241; CHECK-NEXT: .cfi_def_cfa_offset 8 1242; CHECK-NEXT: retq 1243; 1244; AVX-LABEL: constrained_vector_powi_v2f64: 1245; AVX: # %bb.0: # %entry 1246; AVX-NEXT: subq $24, %rsp 1247; AVX-NEXT: .cfi_def_cfa_offset 32 1248; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1249; AVX-NEXT: movl $3, %edi 1250; AVX-NEXT: callq __powidf2 1251; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1252; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1253; AVX-NEXT: movl $3, %edi 1254; AVX-NEXT: callq __powidf2 1255; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1256; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1257; AVX-NEXT: addq $24, %rsp 1258; AVX-NEXT: .cfi_def_cfa_offset 8 1259; AVX-NEXT: retq 1260entry: 1261 %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64( 1262 <2 x double> <double 42.1, double 42.2>, 1263 i32 3, 1264 metadata !"round.dynamic", 1265 metadata !"fpexcept.strict") #0 1266 ret <2 x double> %powi 1267} 1268 1269define <3 x float> @constrained_vector_powi_v3f32() #0 { 1270; CHECK-LABEL: constrained_vector_powi_v3f32: 1271; CHECK: # %bb.0: # %entry 1272; CHECK-NEXT: subq $40, %rsp 1273; CHECK-NEXT: .cfi_def_cfa_offset 48 1274; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1275; CHECK-NEXT: movl $3, %edi 1276; CHECK-NEXT: callq __powisf2 1277; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1278; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1279; CHECK-NEXT: movl $3, %edi 1280; CHECK-NEXT: callq __powisf2 1281; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1282; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1283; CHECK-NEXT: movl $3, %edi 1284; CHECK-NEXT: callq __powisf2 1285; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 1286; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1287; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1288; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1289; CHECK-NEXT: movaps %xmm1, %xmm0 1290; CHECK-NEXT: addq $40, %rsp 1291; CHECK-NEXT: .cfi_def_cfa_offset 8 1292; CHECK-NEXT: retq 1293; 1294; AVX-LABEL: constrained_vector_powi_v3f32: 1295; AVX: # %bb.0: # %entry 1296; AVX-NEXT: subq $40, %rsp 1297; AVX-NEXT: .cfi_def_cfa_offset 48 1298; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1299; AVX-NEXT: movl $3, %edi 1300; AVX-NEXT: callq __powisf2 1301; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1302; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1303; AVX-NEXT: movl $3, %edi 1304; AVX-NEXT: callq __powisf2 1305; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1306; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1307; AVX-NEXT: movl $3, %edi 1308; AVX-NEXT: callq __powisf2 1309; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1310; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1311; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1312; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 1313; AVX-NEXT: addq $40, %rsp 1314; AVX-NEXT: .cfi_def_cfa_offset 8 1315; AVX-NEXT: retq 1316entry: 1317 %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32( 1318 <3 x float> <float 42.0, float 43.0, float 44.0>, 1319 i32 3, 1320 metadata !"round.dynamic", 1321 metadata !"fpexcept.strict") #0 1322 ret <3 x float> %powi 1323} 1324 1325define <3 x double> @constrained_vector_powi_v3f64() #0 { 1326; CHECK-LABEL: constrained_vector_powi_v3f64: 1327; CHECK: # %bb.0: # %entry 1328; CHECK-NEXT: subq $24, %rsp 1329; CHECK-NEXT: .cfi_def_cfa_offset 32 1330; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1331; CHECK-NEXT: movl $3, %edi 1332; CHECK-NEXT: callq __powidf2 1333; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1334; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1335; CHECK-NEXT: movl $3, %edi 1336; CHECK-NEXT: callq __powidf2 1337; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 1338; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1339; CHECK-NEXT: movl $3, %edi 1340; CHECK-NEXT: callq __powidf2 1341; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 1342; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 1343; CHECK-NEXT: wait 1344; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 1345; CHECK-NEXT: # xmm0 = mem[0],zero 1346; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 1347; CHECK-NEXT: # xmm1 = mem[0],zero 1348; CHECK-NEXT: addq $24, %rsp 1349; CHECK-NEXT: .cfi_def_cfa_offset 8 1350; CHECK-NEXT: retq 1351; 1352; AVX-LABEL: constrained_vector_powi_v3f64: 1353; AVX: # %bb.0: # %entry 1354; AVX-NEXT: subq $56, %rsp 1355; AVX-NEXT: .cfi_def_cfa_offset 64 1356; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1357; AVX-NEXT: movl $3, %edi 1358; AVX-NEXT: callq __powidf2 1359; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1360; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1361; AVX-NEXT: movl $3, %edi 1362; AVX-NEXT: callq __powidf2 1363; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1364; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1365; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 1366; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1367; AVX-NEXT: movl $3, %edi 1368; AVX-NEXT: vzeroupper 1369; AVX-NEXT: callq __powidf2 1370; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1371; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1372; AVX-NEXT: addq $56, %rsp 1373; AVX-NEXT: .cfi_def_cfa_offset 8 1374; AVX-NEXT: retq 1375entry: 1376 %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64( 1377 <3 x double> <double 42.0, double 42.1, double 42.2>, 1378 i32 3, 1379 metadata !"round.dynamic", 1380 metadata !"fpexcept.strict") #0 1381 ret <3 x double> %powi 1382} 1383 1384define <4 x double> @constrained_vector_powi_v4f64() #0 { 1385; CHECK-LABEL: constrained_vector_powi_v4f64: 1386; CHECK: # %bb.0: # %entry 1387; CHECK-NEXT: subq $40, %rsp 1388; CHECK-NEXT: .cfi_def_cfa_offset 48 1389; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1390; CHECK-NEXT: movl $3, %edi 1391; CHECK-NEXT: callq __powidf2 1392; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1393; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1394; CHECK-NEXT: movl $3, %edi 1395; CHECK-NEXT: callq __powidf2 1396; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1397; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1398; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1399; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1400; CHECK-NEXT: movl $3, %edi 1401; CHECK-NEXT: callq __powidf2 1402; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1403; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1404; CHECK-NEXT: movl $3, %edi 1405; CHECK-NEXT: callq __powidf2 1406; CHECK-NEXT: movaps %xmm0, %xmm1 1407; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1408; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1409; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1410; CHECK-NEXT: addq $40, %rsp 1411; CHECK-NEXT: .cfi_def_cfa_offset 8 1412; CHECK-NEXT: retq 1413; 1414; AVX-LABEL: constrained_vector_powi_v4f64: 1415; AVX: # %bb.0: # %entry 1416; AVX-NEXT: subq $40, %rsp 1417; AVX-NEXT: .cfi_def_cfa_offset 48 1418; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1419; AVX-NEXT: movl $3, %edi 1420; AVX-NEXT: callq __powidf2 1421; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1422; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1423; AVX-NEXT: movl $3, %edi 1424; AVX-NEXT: callq __powidf2 1425; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1426; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1427; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1428; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1429; AVX-NEXT: movl $3, %edi 1430; AVX-NEXT: callq __powidf2 1431; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1432; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1433; AVX-NEXT: movl $3, %edi 1434; AVX-NEXT: callq __powidf2 1435; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1436; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1437; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1438; AVX-NEXT: addq $40, %rsp 1439; AVX-NEXT: .cfi_def_cfa_offset 8 1440; AVX-NEXT: retq 1441entry: 1442 %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64( 1443 <4 x double> <double 42.1, double 42.2, 1444 double 42.3, double 42.4>, 1445 i32 3, 1446 metadata !"round.dynamic", 1447 metadata !"fpexcept.strict") #0 1448 ret <4 x double> %powi 1449} 1450 1451define <1 x float> @constrained_vector_sin_v1f32() #0 { 1452; CHECK-LABEL: constrained_vector_sin_v1f32: 1453; CHECK: # %bb.0: # %entry 1454; CHECK-NEXT: pushq %rax 1455; CHECK-NEXT: .cfi_def_cfa_offset 16 1456; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1457; CHECK-NEXT: callq sinf 1458; CHECK-NEXT: popq %rax 1459; CHECK-NEXT: .cfi_def_cfa_offset 8 1460; CHECK-NEXT: retq 1461; 1462; AVX-LABEL: constrained_vector_sin_v1f32: 1463; AVX: # %bb.0: # %entry 1464; AVX-NEXT: pushq %rax 1465; AVX-NEXT: .cfi_def_cfa_offset 16 1466; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1467; AVX-NEXT: callq sinf 1468; AVX-NEXT: popq %rax 1469; AVX-NEXT: .cfi_def_cfa_offset 8 1470; AVX-NEXT: retq 1471entry: 1472 %sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32( 1473 <1 x float> <float 42.0>, 1474 metadata !"round.dynamic", 1475 metadata !"fpexcept.strict") #0 1476 ret <1 x float> %sin 1477} 1478 1479define <2 x double> @constrained_vector_sin_v2f64() #0 { 1480; CHECK-LABEL: constrained_vector_sin_v2f64: 1481; CHECK: # %bb.0: # %entry 1482; CHECK-NEXT: subq $24, %rsp 1483; CHECK-NEXT: .cfi_def_cfa_offset 32 1484; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1485; CHECK-NEXT: callq sin 1486; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1487; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1488; CHECK-NEXT: callq sin 1489; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1490; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1491; CHECK-NEXT: addq $24, %rsp 1492; CHECK-NEXT: .cfi_def_cfa_offset 8 1493; CHECK-NEXT: retq 1494; 1495; AVX-LABEL: constrained_vector_sin_v2f64: 1496; AVX: # %bb.0: # %entry 1497; AVX-NEXT: subq $24, %rsp 1498; AVX-NEXT: .cfi_def_cfa_offset 32 1499; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1500; AVX-NEXT: callq sin 1501; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1502; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1503; AVX-NEXT: callq sin 1504; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1505; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1506; AVX-NEXT: addq $24, %rsp 1507; AVX-NEXT: .cfi_def_cfa_offset 8 1508; AVX-NEXT: retq 1509entry: 1510 %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64( 1511 <2 x double> <double 42.0, double 42.1>, 1512 metadata !"round.dynamic", 1513 metadata !"fpexcept.strict") #0 1514 ret <2 x double> %sin 1515} 1516 1517define <3 x float> @constrained_vector_sin_v3f32() #0 { 1518; CHECK-LABEL: constrained_vector_sin_v3f32: 1519; CHECK: # %bb.0: # %entry 1520; CHECK-NEXT: subq $40, %rsp 1521; CHECK-NEXT: .cfi_def_cfa_offset 48 1522; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1523; CHECK-NEXT: callq sinf 1524; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1525; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1526; CHECK-NEXT: callq sinf 1527; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1528; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1529; CHECK-NEXT: callq sinf 1530; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 1531; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1532; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1533; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1534; CHECK-NEXT: movaps %xmm1, %xmm0 1535; CHECK-NEXT: addq $40, %rsp 1536; CHECK-NEXT: .cfi_def_cfa_offset 8 1537; CHECK-NEXT: retq 1538; 1539; AVX-LABEL: constrained_vector_sin_v3f32: 1540; AVX: # %bb.0: # %entry 1541; AVX-NEXT: subq $40, %rsp 1542; AVX-NEXT: .cfi_def_cfa_offset 48 1543; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1544; AVX-NEXT: callq sinf 1545; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1546; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1547; AVX-NEXT: callq sinf 1548; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1549; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1550; AVX-NEXT: callq sinf 1551; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1552; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1553; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1554; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 1555; AVX-NEXT: addq $40, %rsp 1556; AVX-NEXT: .cfi_def_cfa_offset 8 1557; AVX-NEXT: retq 1558entry: 1559 %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32( 1560 <3 x float> <float 42.0, float 43.0, float 44.0>, 1561 metadata !"round.dynamic", 1562 metadata !"fpexcept.strict") #0 1563 ret <3 x float> %sin 1564} 1565 1566define <3 x double> @constrained_vector_sin_v3f64() #0 { 1567; CHECK-LABEL: constrained_vector_sin_v3f64: 1568; CHECK: # %bb.0: # %entry 1569; CHECK-NEXT: subq $24, %rsp 1570; CHECK-NEXT: .cfi_def_cfa_offset 32 1571; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1572; CHECK-NEXT: callq sin 1573; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1574; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1575; CHECK-NEXT: callq sin 1576; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 1577; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1578; CHECK-NEXT: callq sin 1579; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 1580; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 1581; CHECK-NEXT: wait 1582; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 1583; CHECK-NEXT: # xmm0 = mem[0],zero 1584; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 1585; CHECK-NEXT: # xmm1 = mem[0],zero 1586; CHECK-NEXT: addq $24, %rsp 1587; CHECK-NEXT: .cfi_def_cfa_offset 8 1588; CHECK-NEXT: retq 1589; 1590; AVX-LABEL: constrained_vector_sin_v3f64: 1591; AVX: # %bb.0: # %entry 1592; AVX-NEXT: subq $56, %rsp 1593; AVX-NEXT: .cfi_def_cfa_offset 64 1594; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1595; AVX-NEXT: callq sin 1596; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1597; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1598; AVX-NEXT: callq sin 1599; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1600; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1601; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 1602; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1603; AVX-NEXT: vzeroupper 1604; AVX-NEXT: callq sin 1605; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1606; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1607; AVX-NEXT: addq $56, %rsp 1608; AVX-NEXT: .cfi_def_cfa_offset 8 1609; AVX-NEXT: retq 1610entry: 1611 %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64( 1612 <3 x double> <double 42.0, double 42.1, double 42.2>, 1613 metadata !"round.dynamic", 1614 metadata !"fpexcept.strict") #0 1615 ret <3 x double> %sin 1616} 1617 1618define <4 x double> @constrained_vector_sin_v4f64() #0 { 1619; CHECK-LABEL: constrained_vector_sin_v4f64: 1620; CHECK: # %bb.0: # %entry 1621; CHECK-NEXT: subq $40, %rsp 1622; CHECK-NEXT: .cfi_def_cfa_offset 48 1623; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1624; CHECK-NEXT: callq sin 1625; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1626; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1627; CHECK-NEXT: callq sin 1628; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1629; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1630; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1631; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1632; CHECK-NEXT: callq sin 1633; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1634; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1635; CHECK-NEXT: callq sin 1636; CHECK-NEXT: movaps %xmm0, %xmm1 1637; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1638; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1639; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1640; CHECK-NEXT: addq $40, %rsp 1641; CHECK-NEXT: .cfi_def_cfa_offset 8 1642; CHECK-NEXT: retq 1643; 1644; AVX-LABEL: constrained_vector_sin_v4f64: 1645; AVX: # %bb.0: # %entry 1646; AVX-NEXT: subq $40, %rsp 1647; AVX-NEXT: .cfi_def_cfa_offset 48 1648; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1649; AVX-NEXT: callq sin 1650; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1651; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1652; AVX-NEXT: callq sin 1653; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1654; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1655; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1656; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1657; AVX-NEXT: callq sin 1658; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1659; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1660; AVX-NEXT: callq sin 1661; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1662; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1663; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1664; AVX-NEXT: addq $40, %rsp 1665; AVX-NEXT: .cfi_def_cfa_offset 8 1666; AVX-NEXT: retq 1667entry: 1668 %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64( 1669 <4 x double> <double 42.0, double 42.1, 1670 double 42.2, double 42.3>, 1671 metadata !"round.dynamic", 1672 metadata !"fpexcept.strict") #0 1673 ret <4 x double> %sin 1674} 1675 1676define <1 x float> @constrained_vector_cos_v1f32() #0 { 1677; CHECK-LABEL: constrained_vector_cos_v1f32: 1678; CHECK: # %bb.0: # %entry 1679; CHECK-NEXT: pushq %rax 1680; CHECK-NEXT: .cfi_def_cfa_offset 16 1681; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1682; CHECK-NEXT: callq cosf 1683; CHECK-NEXT: popq %rax 1684; CHECK-NEXT: .cfi_def_cfa_offset 8 1685; CHECK-NEXT: retq 1686; 1687; AVX-LABEL: constrained_vector_cos_v1f32: 1688; AVX: # %bb.0: # %entry 1689; AVX-NEXT: pushq %rax 1690; AVX-NEXT: .cfi_def_cfa_offset 16 1691; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1692; AVX-NEXT: callq cosf 1693; AVX-NEXT: popq %rax 1694; AVX-NEXT: .cfi_def_cfa_offset 8 1695; AVX-NEXT: retq 1696entry: 1697 %cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32( 1698 <1 x float> <float 42.0>, 1699 metadata !"round.dynamic", 1700 metadata !"fpexcept.strict") #0 1701 ret <1 x float> %cos 1702} 1703 1704define <2 x double> @constrained_vector_cos_v2f64() #0 { 1705; CHECK-LABEL: constrained_vector_cos_v2f64: 1706; CHECK: # %bb.0: # %entry 1707; CHECK-NEXT: subq $24, %rsp 1708; CHECK-NEXT: .cfi_def_cfa_offset 32 1709; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1710; CHECK-NEXT: callq cos 1711; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1712; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1713; CHECK-NEXT: callq cos 1714; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1715; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1716; CHECK-NEXT: addq $24, %rsp 1717; CHECK-NEXT: .cfi_def_cfa_offset 8 1718; CHECK-NEXT: retq 1719; 1720; AVX-LABEL: constrained_vector_cos_v2f64: 1721; AVX: # %bb.0: # %entry 1722; AVX-NEXT: subq $24, %rsp 1723; AVX-NEXT: .cfi_def_cfa_offset 32 1724; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1725; AVX-NEXT: callq cos 1726; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1727; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1728; AVX-NEXT: callq cos 1729; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1730; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1731; AVX-NEXT: addq $24, %rsp 1732; AVX-NEXT: .cfi_def_cfa_offset 8 1733; AVX-NEXT: retq 1734entry: 1735 %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64( 1736 <2 x double> <double 42.0, double 42.1>, 1737 metadata !"round.dynamic", 1738 metadata !"fpexcept.strict") #0 1739 ret <2 x double> %cos 1740} 1741 1742define <3 x float> @constrained_vector_cos_v3f32() #0 { 1743; CHECK-LABEL: constrained_vector_cos_v3f32: 1744; CHECK: # %bb.0: # %entry 1745; CHECK-NEXT: subq $40, %rsp 1746; CHECK-NEXT: .cfi_def_cfa_offset 48 1747; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1748; CHECK-NEXT: callq cosf 1749; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1750; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1751; CHECK-NEXT: callq cosf 1752; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1753; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1754; CHECK-NEXT: callq cosf 1755; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 1756; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1757; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1758; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1759; CHECK-NEXT: movaps %xmm1, %xmm0 1760; CHECK-NEXT: addq $40, %rsp 1761; CHECK-NEXT: .cfi_def_cfa_offset 8 1762; CHECK-NEXT: retq 1763; 1764; AVX-LABEL: constrained_vector_cos_v3f32: 1765; AVX: # %bb.0: # %entry 1766; AVX-NEXT: subq $40, %rsp 1767; AVX-NEXT: .cfi_def_cfa_offset 48 1768; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1769; AVX-NEXT: callq cosf 1770; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1771; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1772; AVX-NEXT: callq cosf 1773; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1774; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1775; AVX-NEXT: callq cosf 1776; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1777; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1778; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1779; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 1780; AVX-NEXT: addq $40, %rsp 1781; AVX-NEXT: .cfi_def_cfa_offset 8 1782; AVX-NEXT: retq 1783entry: 1784 %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32( 1785 <3 x float> <float 42.0, float 43.0, float 44.0>, 1786 metadata !"round.dynamic", 1787 metadata !"fpexcept.strict") #0 1788 ret <3 x float> %cos 1789} 1790 1791define <3 x double> @constrained_vector_cos_v3f64() #0 { 1792; CHECK-LABEL: constrained_vector_cos_v3f64: 1793; CHECK: # %bb.0: # %entry 1794; CHECK-NEXT: subq $24, %rsp 1795; CHECK-NEXT: .cfi_def_cfa_offset 32 1796; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1797; CHECK-NEXT: callq cos 1798; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1799; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1800; CHECK-NEXT: callq cos 1801; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 1802; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1803; CHECK-NEXT: callq cos 1804; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 1805; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 1806; CHECK-NEXT: wait 1807; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 1808; CHECK-NEXT: # xmm0 = mem[0],zero 1809; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 1810; CHECK-NEXT: # xmm1 = mem[0],zero 1811; CHECK-NEXT: addq $24, %rsp 1812; CHECK-NEXT: .cfi_def_cfa_offset 8 1813; CHECK-NEXT: retq 1814; 1815; AVX-LABEL: constrained_vector_cos_v3f64: 1816; AVX: # %bb.0: # %entry 1817; AVX-NEXT: subq $56, %rsp 1818; AVX-NEXT: .cfi_def_cfa_offset 64 1819; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1820; AVX-NEXT: callq cos 1821; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1822; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1823; AVX-NEXT: callq cos 1824; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1825; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1826; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 1827; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1828; AVX-NEXT: vzeroupper 1829; AVX-NEXT: callq cos 1830; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1831; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1832; AVX-NEXT: addq $56, %rsp 1833; AVX-NEXT: .cfi_def_cfa_offset 8 1834; AVX-NEXT: retq 1835entry: 1836 %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64( 1837 <3 x double> <double 42.0, double 42.1, double 42.2>, 1838 metadata !"round.dynamic", 1839 metadata !"fpexcept.strict") #0 1840 ret <3 x double> %cos 1841} 1842 1843define <4 x double> @constrained_vector_cos_v4f64() #0 { 1844; CHECK-LABEL: constrained_vector_cos_v4f64: 1845; CHECK: # %bb.0: # %entry 1846; CHECK-NEXT: subq $40, %rsp 1847; CHECK-NEXT: .cfi_def_cfa_offset 48 1848; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1849; CHECK-NEXT: callq cos 1850; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1851; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1852; CHECK-NEXT: callq cos 1853; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1854; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1855; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1856; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1857; CHECK-NEXT: callq cos 1858; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1859; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1860; CHECK-NEXT: callq cos 1861; CHECK-NEXT: movaps %xmm0, %xmm1 1862; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1863; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1864; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1865; CHECK-NEXT: addq $40, %rsp 1866; CHECK-NEXT: .cfi_def_cfa_offset 8 1867; CHECK-NEXT: retq 1868; 1869; AVX-LABEL: constrained_vector_cos_v4f64: 1870; AVX: # %bb.0: # %entry 1871; AVX-NEXT: subq $40, %rsp 1872; AVX-NEXT: .cfi_def_cfa_offset 48 1873; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1874; AVX-NEXT: callq cos 1875; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1876; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1877; AVX-NEXT: callq cos 1878; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1879; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1880; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1881; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1882; AVX-NEXT: callq cos 1883; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1884; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1885; AVX-NEXT: callq cos 1886; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1887; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1888; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1889; AVX-NEXT: addq $40, %rsp 1890; AVX-NEXT: .cfi_def_cfa_offset 8 1891; AVX-NEXT: retq 1892entry: 1893 %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64( 1894 <4 x double> <double 42.0, double 42.1, 1895 double 42.2, double 42.3>, 1896 metadata !"round.dynamic", 1897 metadata !"fpexcept.strict") #0 1898 ret <4 x double> %cos 1899} 1900 1901define <1 x float> @constrained_vector_exp_v1f32() #0 { 1902; CHECK-LABEL: constrained_vector_exp_v1f32: 1903; CHECK: # %bb.0: # %entry 1904; CHECK-NEXT: pushq %rax 1905; CHECK-NEXT: .cfi_def_cfa_offset 16 1906; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1907; CHECK-NEXT: callq expf 1908; CHECK-NEXT: popq %rax 1909; CHECK-NEXT: .cfi_def_cfa_offset 8 1910; CHECK-NEXT: retq 1911; 1912; AVX-LABEL: constrained_vector_exp_v1f32: 1913; AVX: # %bb.0: # %entry 1914; AVX-NEXT: pushq %rax 1915; AVX-NEXT: .cfi_def_cfa_offset 16 1916; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1917; AVX-NEXT: callq expf 1918; AVX-NEXT: popq %rax 1919; AVX-NEXT: .cfi_def_cfa_offset 8 1920; AVX-NEXT: retq 1921entry: 1922 %exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32( 1923 <1 x float> <float 42.0>, 1924 metadata !"round.dynamic", 1925 metadata !"fpexcept.strict") #0 1926 ret <1 x float> %exp 1927} 1928 1929define <2 x double> @constrained_vector_exp_v2f64() #0 { 1930; CHECK-LABEL: constrained_vector_exp_v2f64: 1931; CHECK: # %bb.0: # %entry 1932; CHECK-NEXT: subq $24, %rsp 1933; CHECK-NEXT: .cfi_def_cfa_offset 32 1934; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1935; CHECK-NEXT: callq exp 1936; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1937; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1938; CHECK-NEXT: callq exp 1939; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1940; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1941; CHECK-NEXT: addq $24, %rsp 1942; CHECK-NEXT: .cfi_def_cfa_offset 8 1943; CHECK-NEXT: retq 1944; 1945; AVX-LABEL: constrained_vector_exp_v2f64: 1946; AVX: # %bb.0: # %entry 1947; AVX-NEXT: subq $24, %rsp 1948; AVX-NEXT: .cfi_def_cfa_offset 32 1949; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1950; AVX-NEXT: callq exp 1951; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1952; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1953; AVX-NEXT: callq exp 1954; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1955; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1956; AVX-NEXT: addq $24, %rsp 1957; AVX-NEXT: .cfi_def_cfa_offset 8 1958; AVX-NEXT: retq 1959entry: 1960 %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64( 1961 <2 x double> <double 42.0, double 42.1>, 1962 metadata !"round.dynamic", 1963 metadata !"fpexcept.strict") #0 1964 ret <2 x double> %exp 1965} 1966 1967define <3 x float> @constrained_vector_exp_v3f32() #0 { 1968; CHECK-LABEL: constrained_vector_exp_v3f32: 1969; CHECK: # %bb.0: # %entry 1970; CHECK-NEXT: subq $40, %rsp 1971; CHECK-NEXT: .cfi_def_cfa_offset 48 1972; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1973; CHECK-NEXT: callq expf 1974; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1975; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1976; CHECK-NEXT: callq expf 1977; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1978; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1979; CHECK-NEXT: callq expf 1980; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 1981; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1982; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1983; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1984; CHECK-NEXT: movaps %xmm1, %xmm0 1985; CHECK-NEXT: addq $40, %rsp 1986; CHECK-NEXT: .cfi_def_cfa_offset 8 1987; CHECK-NEXT: retq 1988; 1989; AVX-LABEL: constrained_vector_exp_v3f32: 1990; AVX: # %bb.0: # %entry 1991; AVX-NEXT: subq $40, %rsp 1992; AVX-NEXT: .cfi_def_cfa_offset 48 1993; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1994; AVX-NEXT: callq expf 1995; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1996; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1997; AVX-NEXT: callq expf 1998; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1999; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2000; AVX-NEXT: callq expf 2001; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 2002; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 2003; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2004; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 2005; AVX-NEXT: addq $40, %rsp 2006; AVX-NEXT: .cfi_def_cfa_offset 8 2007; AVX-NEXT: retq 2008entry: 2009 %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32( 2010 <3 x float> <float 42.0, float 43.0, float 44.0>, 2011 metadata !"round.dynamic", 2012 metadata !"fpexcept.strict") #0 2013 ret <3 x float> %exp 2014} 2015 2016define <3 x double> @constrained_vector_exp_v3f64() #0 { 2017; CHECK-LABEL: constrained_vector_exp_v3f64: 2018; CHECK: # %bb.0: # %entry 2019; CHECK-NEXT: subq $24, %rsp 2020; CHECK-NEXT: .cfi_def_cfa_offset 32 2021; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2022; CHECK-NEXT: callq exp 2023; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2024; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2025; CHECK-NEXT: callq exp 2026; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 2027; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2028; CHECK-NEXT: callq exp 2029; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 2030; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 2031; CHECK-NEXT: wait 2032; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 2033; CHECK-NEXT: # xmm0 = mem[0],zero 2034; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 2035; CHECK-NEXT: # xmm1 = mem[0],zero 2036; CHECK-NEXT: addq $24, %rsp 2037; CHECK-NEXT: .cfi_def_cfa_offset 8 2038; CHECK-NEXT: retq 2039; 2040; AVX-LABEL: constrained_vector_exp_v3f64: 2041; AVX: # %bb.0: # %entry 2042; AVX-NEXT: subq $56, %rsp 2043; AVX-NEXT: .cfi_def_cfa_offset 64 2044; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2045; AVX-NEXT: callq exp 2046; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2047; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2048; AVX-NEXT: callq exp 2049; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2050; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2051; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 2052; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2053; AVX-NEXT: vzeroupper 2054; AVX-NEXT: callq exp 2055; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 2056; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2057; AVX-NEXT: addq $56, %rsp 2058; AVX-NEXT: .cfi_def_cfa_offset 8 2059; AVX-NEXT: retq 2060entry: 2061 %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64( 2062 <3 x double> <double 42.0, double 42.1, double 42.2>, 2063 metadata !"round.dynamic", 2064 metadata !"fpexcept.strict") #0 2065 ret <3 x double> %exp 2066} 2067 2068define <4 x double> @constrained_vector_exp_v4f64() #0 { 2069; CHECK-LABEL: constrained_vector_exp_v4f64: 2070; CHECK: # %bb.0: # %entry 2071; CHECK-NEXT: subq $40, %rsp 2072; CHECK-NEXT: .cfi_def_cfa_offset 48 2073; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2074; CHECK-NEXT: callq exp 2075; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2076; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2077; CHECK-NEXT: callq exp 2078; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2079; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2080; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2081; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2082; CHECK-NEXT: callq exp 2083; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2084; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2085; CHECK-NEXT: callq exp 2086; CHECK-NEXT: movaps %xmm0, %xmm1 2087; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2088; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2089; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2090; CHECK-NEXT: addq $40, %rsp 2091; CHECK-NEXT: .cfi_def_cfa_offset 8 2092; CHECK-NEXT: retq 2093; 2094; AVX-LABEL: constrained_vector_exp_v4f64: 2095; AVX: # %bb.0: # %entry 2096; AVX-NEXT: subq $40, %rsp 2097; AVX-NEXT: .cfi_def_cfa_offset 48 2098; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2099; AVX-NEXT: callq exp 2100; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2101; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2102; AVX-NEXT: callq exp 2103; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2104; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2105; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2106; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2107; AVX-NEXT: callq exp 2108; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2109; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2110; AVX-NEXT: callq exp 2111; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2112; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2113; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 2114; AVX-NEXT: addq $40, %rsp 2115; AVX-NEXT: .cfi_def_cfa_offset 8 2116; AVX-NEXT: retq 2117entry: 2118 %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64( 2119 <4 x double> <double 42.0, double 42.1, 2120 double 42.2, double 42.3>, 2121 metadata !"round.dynamic", 2122 metadata !"fpexcept.strict") #0 2123 ret <4 x double> %exp 2124} 2125 2126define <1 x float> @constrained_vector_exp2_v1f32() #0 { 2127; CHECK-LABEL: constrained_vector_exp2_v1f32: 2128; CHECK: # %bb.0: # %entry 2129; CHECK-NEXT: pushq %rax 2130; CHECK-NEXT: .cfi_def_cfa_offset 16 2131; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2132; CHECK-NEXT: callq exp2f 2133; CHECK-NEXT: popq %rax 2134; CHECK-NEXT: .cfi_def_cfa_offset 8 2135; CHECK-NEXT: retq 2136; 2137; AVX-LABEL: constrained_vector_exp2_v1f32: 2138; AVX: # %bb.0: # %entry 2139; AVX-NEXT: pushq %rax 2140; AVX-NEXT: .cfi_def_cfa_offset 16 2141; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2142; AVX-NEXT: callq exp2f 2143; AVX-NEXT: popq %rax 2144; AVX-NEXT: .cfi_def_cfa_offset 8 2145; AVX-NEXT: retq 2146entry: 2147 %exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32( 2148 <1 x float> <float 42.0>, 2149 metadata !"round.dynamic", 2150 metadata !"fpexcept.strict") #0 2151 ret <1 x float> %exp2 2152} 2153 2154define <2 x double> @constrained_vector_exp2_v2f64() #0 { 2155; CHECK-LABEL: constrained_vector_exp2_v2f64: 2156; CHECK: # %bb.0: # %entry 2157; CHECK-NEXT: subq $24, %rsp 2158; CHECK-NEXT: .cfi_def_cfa_offset 32 2159; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2160; CHECK-NEXT: callq exp2 2161; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2162; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2163; CHECK-NEXT: callq exp2 2164; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2165; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2166; CHECK-NEXT: addq $24, %rsp 2167; CHECK-NEXT: .cfi_def_cfa_offset 8 2168; CHECK-NEXT: retq 2169; 2170; AVX-LABEL: constrained_vector_exp2_v2f64: 2171; AVX: # %bb.0: # %entry 2172; AVX-NEXT: subq $24, %rsp 2173; AVX-NEXT: .cfi_def_cfa_offset 32 2174; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2175; AVX-NEXT: callq exp2 2176; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2177; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2178; AVX-NEXT: callq exp2 2179; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2180; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2181; AVX-NEXT: addq $24, %rsp 2182; AVX-NEXT: .cfi_def_cfa_offset 8 2183; AVX-NEXT: retq 2184entry: 2185 %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64( 2186 <2 x double> <double 42.1, double 42.0>, 2187 metadata !"round.dynamic", 2188 metadata !"fpexcept.strict") #0 2189 ret <2 x double> %exp2 2190} 2191 2192define <3 x float> @constrained_vector_exp2_v3f32() #0 { 2193; CHECK-LABEL: constrained_vector_exp2_v3f32: 2194; CHECK: # %bb.0: # %entry 2195; CHECK-NEXT: subq $40, %rsp 2196; CHECK-NEXT: .cfi_def_cfa_offset 48 2197; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2198; CHECK-NEXT: callq exp2f 2199; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2200; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2201; CHECK-NEXT: callq exp2f 2202; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2203; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2204; CHECK-NEXT: callq exp2f 2205; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 2206; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2207; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2208; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2209; CHECK-NEXT: movaps %xmm1, %xmm0 2210; CHECK-NEXT: addq $40, %rsp 2211; CHECK-NEXT: .cfi_def_cfa_offset 8 2212; CHECK-NEXT: retq 2213; 2214; AVX-LABEL: constrained_vector_exp2_v3f32: 2215; AVX: # %bb.0: # %entry 2216; AVX-NEXT: subq $40, %rsp 2217; AVX-NEXT: .cfi_def_cfa_offset 48 2218; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2219; AVX-NEXT: callq exp2f 2220; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2221; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2222; AVX-NEXT: callq exp2f 2223; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2224; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2225; AVX-NEXT: callq exp2f 2226; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 2227; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 2228; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2229; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 2230; AVX-NEXT: addq $40, %rsp 2231; AVX-NEXT: .cfi_def_cfa_offset 8 2232; AVX-NEXT: retq 2233entry: 2234 %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32( 2235 <3 x float> <float 42.0, float 43.0, float 44.0>, 2236 metadata !"round.dynamic", 2237 metadata !"fpexcept.strict") #0 2238 ret <3 x float> %exp2 2239} 2240 2241define <3 x double> @constrained_vector_exp2_v3f64() #0 { 2242; CHECK-LABEL: constrained_vector_exp2_v3f64: 2243; CHECK: # %bb.0: # %entry 2244; CHECK-NEXT: subq $24, %rsp 2245; CHECK-NEXT: .cfi_def_cfa_offset 32 2246; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2247; CHECK-NEXT: callq exp2 2248; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2249; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2250; CHECK-NEXT: callq exp2 2251; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 2252; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2253; CHECK-NEXT: callq exp2 2254; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 2255; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 2256; CHECK-NEXT: wait 2257; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 2258; CHECK-NEXT: # xmm0 = mem[0],zero 2259; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 2260; CHECK-NEXT: # xmm1 = mem[0],zero 2261; CHECK-NEXT: addq $24, %rsp 2262; CHECK-NEXT: .cfi_def_cfa_offset 8 2263; CHECK-NEXT: retq 2264; 2265; AVX-LABEL: constrained_vector_exp2_v3f64: 2266; AVX: # %bb.0: # %entry 2267; AVX-NEXT: subq $56, %rsp 2268; AVX-NEXT: .cfi_def_cfa_offset 64 2269; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2270; AVX-NEXT: callq exp2 2271; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2272; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2273; AVX-NEXT: callq exp2 2274; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2275; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2276; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 2277; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2278; AVX-NEXT: vzeroupper 2279; AVX-NEXT: callq exp2 2280; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 2281; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2282; AVX-NEXT: addq $56, %rsp 2283; AVX-NEXT: .cfi_def_cfa_offset 8 2284; AVX-NEXT: retq 2285entry: 2286 %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64( 2287 <3 x double> <double 42.0, double 42.1, double 42.2>, 2288 metadata !"round.dynamic", 2289 metadata !"fpexcept.strict") #0 2290 ret <3 x double> %exp2 2291} 2292 2293define <4 x double> @constrained_vector_exp2_v4f64() #0 { 2294; CHECK-LABEL: constrained_vector_exp2_v4f64: 2295; CHECK: # %bb.0: # %entry 2296; CHECK-NEXT: subq $40, %rsp 2297; CHECK-NEXT: .cfi_def_cfa_offset 48 2298; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2299; CHECK-NEXT: callq exp2 2300; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2301; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2302; CHECK-NEXT: callq exp2 2303; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2304; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2305; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2306; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2307; CHECK-NEXT: callq exp2 2308; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2309; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2310; CHECK-NEXT: callq exp2 2311; CHECK-NEXT: movaps %xmm0, %xmm1 2312; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2313; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2314; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2315; CHECK-NEXT: addq $40, %rsp 2316; CHECK-NEXT: .cfi_def_cfa_offset 8 2317; CHECK-NEXT: retq 2318; 2319; AVX-LABEL: constrained_vector_exp2_v4f64: 2320; AVX: # %bb.0: # %entry 2321; AVX-NEXT: subq $40, %rsp 2322; AVX-NEXT: .cfi_def_cfa_offset 48 2323; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2324; AVX-NEXT: callq exp2 2325; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2326; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2327; AVX-NEXT: callq exp2 2328; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2329; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2330; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2331; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2332; AVX-NEXT: callq exp2 2333; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2334; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2335; AVX-NEXT: callq exp2 2336; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2337; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2338; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 2339; AVX-NEXT: addq $40, %rsp 2340; AVX-NEXT: .cfi_def_cfa_offset 8 2341; AVX-NEXT: retq 2342entry: 2343 %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64( 2344 <4 x double> <double 42.1, double 42.2, 2345 double 42.3, double 42.4>, 2346 metadata !"round.dynamic", 2347 metadata !"fpexcept.strict") #0 2348 ret <4 x double> %exp2 2349} 2350 2351define <1 x float> @constrained_vector_log_v1f32() #0 { 2352; CHECK-LABEL: constrained_vector_log_v1f32: 2353; CHECK: # %bb.0: # %entry 2354; CHECK-NEXT: pushq %rax 2355; CHECK-NEXT: .cfi_def_cfa_offset 16 2356; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2357; CHECK-NEXT: callq logf 2358; CHECK-NEXT: popq %rax 2359; CHECK-NEXT: .cfi_def_cfa_offset 8 2360; CHECK-NEXT: retq 2361; 2362; AVX-LABEL: constrained_vector_log_v1f32: 2363; AVX: # %bb.0: # %entry 2364; AVX-NEXT: pushq %rax 2365; AVX-NEXT: .cfi_def_cfa_offset 16 2366; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2367; AVX-NEXT: callq logf 2368; AVX-NEXT: popq %rax 2369; AVX-NEXT: .cfi_def_cfa_offset 8 2370; AVX-NEXT: retq 2371entry: 2372 %log = call <1 x float> @llvm.experimental.constrained.log.v1f32( 2373 <1 x float> <float 42.0>, 2374 metadata !"round.dynamic", 2375 metadata !"fpexcept.strict") #0 2376 ret <1 x float> %log 2377} 2378 2379define <2 x double> @constrained_vector_log_v2f64() #0 { 2380; CHECK-LABEL: constrained_vector_log_v2f64: 2381; CHECK: # %bb.0: # %entry 2382; CHECK-NEXT: subq $24, %rsp 2383; CHECK-NEXT: .cfi_def_cfa_offset 32 2384; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2385; CHECK-NEXT: callq log 2386; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2387; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2388; CHECK-NEXT: callq log 2389; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2390; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2391; CHECK-NEXT: addq $24, %rsp 2392; CHECK-NEXT: .cfi_def_cfa_offset 8 2393; CHECK-NEXT: retq 2394; 2395; AVX-LABEL: constrained_vector_log_v2f64: 2396; AVX: # %bb.0: # %entry 2397; AVX-NEXT: subq $24, %rsp 2398; AVX-NEXT: .cfi_def_cfa_offset 32 2399; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2400; AVX-NEXT: callq log 2401; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2402; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2403; AVX-NEXT: callq log 2404; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2405; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2406; AVX-NEXT: addq $24, %rsp 2407; AVX-NEXT: .cfi_def_cfa_offset 8 2408; AVX-NEXT: retq 2409entry: 2410 %log = call <2 x double> @llvm.experimental.constrained.log.v2f64( 2411 <2 x double> <double 42.0, double 42.1>, 2412 metadata !"round.dynamic", 2413 metadata !"fpexcept.strict") #0 2414 ret <2 x double> %log 2415} 2416 2417define <3 x float> @constrained_vector_log_v3f32() #0 { 2418; CHECK-LABEL: constrained_vector_log_v3f32: 2419; CHECK: # %bb.0: # %entry 2420; CHECK-NEXT: subq $40, %rsp 2421; CHECK-NEXT: .cfi_def_cfa_offset 48 2422; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2423; CHECK-NEXT: callq logf 2424; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2425; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2426; CHECK-NEXT: callq logf 2427; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2428; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2429; CHECK-NEXT: callq logf 2430; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 2431; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2432; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2433; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2434; CHECK-NEXT: movaps %xmm1, %xmm0 2435; CHECK-NEXT: addq $40, %rsp 2436; CHECK-NEXT: .cfi_def_cfa_offset 8 2437; CHECK-NEXT: retq 2438; 2439; AVX-LABEL: constrained_vector_log_v3f32: 2440; AVX: # %bb.0: # %entry 2441; AVX-NEXT: subq $40, %rsp 2442; AVX-NEXT: .cfi_def_cfa_offset 48 2443; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2444; AVX-NEXT: callq logf 2445; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2446; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2447; AVX-NEXT: callq logf 2448; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2449; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2450; AVX-NEXT: callq logf 2451; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 2452; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 2453; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2454; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 2455; AVX-NEXT: addq $40, %rsp 2456; AVX-NEXT: .cfi_def_cfa_offset 8 2457; AVX-NEXT: retq 2458entry: 2459 %log = call <3 x float> @llvm.experimental.constrained.log.v3f32( 2460 <3 x float> <float 42.0, float 43.0, float 44.0>, 2461 metadata !"round.dynamic", 2462 metadata !"fpexcept.strict") #0 2463 ret <3 x float> %log 2464} 2465 2466define <3 x double> @constrained_vector_log_v3f64() #0 { 2467; CHECK-LABEL: constrained_vector_log_v3f64: 2468; CHECK: # %bb.0: # %entry 2469; CHECK-NEXT: subq $24, %rsp 2470; CHECK-NEXT: .cfi_def_cfa_offset 32 2471; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2472; CHECK-NEXT: callq log 2473; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2474; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2475; CHECK-NEXT: callq log 2476; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 2477; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2478; CHECK-NEXT: callq log 2479; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 2480; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 2481; CHECK-NEXT: wait 2482; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 2483; CHECK-NEXT: # xmm0 = mem[0],zero 2484; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 2485; CHECK-NEXT: # xmm1 = mem[0],zero 2486; CHECK-NEXT: addq $24, %rsp 2487; CHECK-NEXT: .cfi_def_cfa_offset 8 2488; CHECK-NEXT: retq 2489; 2490; AVX-LABEL: constrained_vector_log_v3f64: 2491; AVX: # %bb.0: # %entry 2492; AVX-NEXT: subq $56, %rsp 2493; AVX-NEXT: .cfi_def_cfa_offset 64 2494; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2495; AVX-NEXT: callq log 2496; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2497; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2498; AVX-NEXT: callq log 2499; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2500; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2501; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 2502; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2503; AVX-NEXT: vzeroupper 2504; AVX-NEXT: callq log 2505; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 2506; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2507; AVX-NEXT: addq $56, %rsp 2508; AVX-NEXT: .cfi_def_cfa_offset 8 2509; AVX-NEXT: retq 2510entry: 2511 %log = call <3 x double> @llvm.experimental.constrained.log.v3f64( 2512 <3 x double> <double 42.0, double 42.1, double 42.2>, 2513 metadata !"round.dynamic", 2514 metadata !"fpexcept.strict") #0 2515 ret <3 x double> %log 2516} 2517 2518define <4 x double> @constrained_vector_log_v4f64() #0 { 2519; CHECK-LABEL: constrained_vector_log_v4f64: 2520; CHECK: # %bb.0: # %entry 2521; CHECK-NEXT: subq $40, %rsp 2522; CHECK-NEXT: .cfi_def_cfa_offset 48 2523; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2524; CHECK-NEXT: callq log 2525; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2526; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2527; CHECK-NEXT: callq log 2528; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2529; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2530; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2531; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2532; CHECK-NEXT: callq log 2533; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2534; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2535; CHECK-NEXT: callq log 2536; CHECK-NEXT: movaps %xmm0, %xmm1 2537; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2538; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2539; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2540; CHECK-NEXT: addq $40, %rsp 2541; CHECK-NEXT: .cfi_def_cfa_offset 8 2542; CHECK-NEXT: retq 2543; 2544; AVX-LABEL: constrained_vector_log_v4f64: 2545; AVX: # %bb.0: # %entry 2546; AVX-NEXT: subq $40, %rsp 2547; AVX-NEXT: .cfi_def_cfa_offset 48 2548; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2549; AVX-NEXT: callq log 2550; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2551; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2552; AVX-NEXT: callq log 2553; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2554; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2555; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2556; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2557; AVX-NEXT: callq log 2558; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2559; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2560; AVX-NEXT: callq log 2561; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2562; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2563; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 2564; AVX-NEXT: addq $40, %rsp 2565; AVX-NEXT: .cfi_def_cfa_offset 8 2566; AVX-NEXT: retq 2567entry: 2568 %log = call <4 x double> @llvm.experimental.constrained.log.v4f64( 2569 <4 x double> <double 42.0, double 42.1, 2570 double 42.2, double 42.3>, 2571 metadata !"round.dynamic", 2572 metadata !"fpexcept.strict") #0 2573 ret <4 x double> %log 2574} 2575 2576define <1 x float> @constrained_vector_log10_v1f32() #0 { 2577; CHECK-LABEL: constrained_vector_log10_v1f32: 2578; CHECK: # %bb.0: # %entry 2579; CHECK-NEXT: pushq %rax 2580; CHECK-NEXT: .cfi_def_cfa_offset 16 2581; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2582; CHECK-NEXT: callq log10f 2583; CHECK-NEXT: popq %rax 2584; CHECK-NEXT: .cfi_def_cfa_offset 8 2585; CHECK-NEXT: retq 2586; 2587; AVX-LABEL: constrained_vector_log10_v1f32: 2588; AVX: # %bb.0: # %entry 2589; AVX-NEXT: pushq %rax 2590; AVX-NEXT: .cfi_def_cfa_offset 16 2591; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2592; AVX-NEXT: callq log10f 2593; AVX-NEXT: popq %rax 2594; AVX-NEXT: .cfi_def_cfa_offset 8 2595; AVX-NEXT: retq 2596entry: 2597 %log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32( 2598 <1 x float> <float 42.0>, 2599 metadata !"round.dynamic", 2600 metadata !"fpexcept.strict") #0 2601 ret <1 x float> %log10 2602} 2603 2604define <2 x double> @constrained_vector_log10_v2f64() #0 { 2605; CHECK-LABEL: constrained_vector_log10_v2f64: 2606; CHECK: # %bb.0: # %entry 2607; CHECK-NEXT: subq $24, %rsp 2608; CHECK-NEXT: .cfi_def_cfa_offset 32 2609; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2610; CHECK-NEXT: callq log10 2611; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2612; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2613; CHECK-NEXT: callq log10 2614; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2615; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2616; CHECK-NEXT: addq $24, %rsp 2617; CHECK-NEXT: .cfi_def_cfa_offset 8 2618; CHECK-NEXT: retq 2619; 2620; AVX-LABEL: constrained_vector_log10_v2f64: 2621; AVX: # %bb.0: # %entry 2622; AVX-NEXT: subq $24, %rsp 2623; AVX-NEXT: .cfi_def_cfa_offset 32 2624; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2625; AVX-NEXT: callq log10 2626; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2627; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2628; AVX-NEXT: callq log10 2629; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2630; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2631; AVX-NEXT: addq $24, %rsp 2632; AVX-NEXT: .cfi_def_cfa_offset 8 2633; AVX-NEXT: retq 2634entry: 2635 %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64( 2636 <2 x double> <double 42.0, double 42.1>, 2637 metadata !"round.dynamic", 2638 metadata !"fpexcept.strict") #0 2639 ret <2 x double> %log10 2640} 2641 2642define <3 x float> @constrained_vector_log10_v3f32() #0 { 2643; CHECK-LABEL: constrained_vector_log10_v3f32: 2644; CHECK: # %bb.0: # %entry 2645; CHECK-NEXT: subq $40, %rsp 2646; CHECK-NEXT: .cfi_def_cfa_offset 48 2647; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2648; CHECK-NEXT: callq log10f 2649; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2650; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2651; CHECK-NEXT: callq log10f 2652; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2653; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2654; CHECK-NEXT: callq log10f 2655; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 2656; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2657; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2658; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2659; CHECK-NEXT: movaps %xmm1, %xmm0 2660; CHECK-NEXT: addq $40, %rsp 2661; CHECK-NEXT: .cfi_def_cfa_offset 8 2662; CHECK-NEXT: retq 2663; 2664; AVX-LABEL: constrained_vector_log10_v3f32: 2665; AVX: # %bb.0: # %entry 2666; AVX-NEXT: subq $40, %rsp 2667; AVX-NEXT: .cfi_def_cfa_offset 48 2668; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2669; AVX-NEXT: callq log10f 2670; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2671; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2672; AVX-NEXT: callq log10f 2673; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2674; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2675; AVX-NEXT: callq log10f 2676; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 2677; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 2678; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2679; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 2680; AVX-NEXT: addq $40, %rsp 2681; AVX-NEXT: .cfi_def_cfa_offset 8 2682; AVX-NEXT: retq 2683entry: 2684 %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32( 2685 <3 x float> <float 42.0, float 43.0, float 44.0>, 2686 metadata !"round.dynamic", 2687 metadata !"fpexcept.strict") #0 2688 ret <3 x float> %log10 2689} 2690 2691define <3 x double> @constrained_vector_log10_v3f64() #0 { 2692; CHECK-LABEL: constrained_vector_log10_v3f64: 2693; CHECK: # %bb.0: # %entry 2694; CHECK-NEXT: subq $24, %rsp 2695; CHECK-NEXT: .cfi_def_cfa_offset 32 2696; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2697; CHECK-NEXT: callq log10 2698; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2699; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2700; CHECK-NEXT: callq log10 2701; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 2702; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2703; CHECK-NEXT: callq log10 2704; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 2705; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 2706; CHECK-NEXT: wait 2707; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 2708; CHECK-NEXT: # xmm0 = mem[0],zero 2709; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 2710; CHECK-NEXT: # xmm1 = mem[0],zero 2711; CHECK-NEXT: addq $24, %rsp 2712; CHECK-NEXT: .cfi_def_cfa_offset 8 2713; CHECK-NEXT: retq 2714; 2715; AVX-LABEL: constrained_vector_log10_v3f64: 2716; AVX: # %bb.0: # %entry 2717; AVX-NEXT: subq $56, %rsp 2718; AVX-NEXT: .cfi_def_cfa_offset 64 2719; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2720; AVX-NEXT: callq log10 2721; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2722; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2723; AVX-NEXT: callq log10 2724; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2725; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2726; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 2727; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2728; AVX-NEXT: vzeroupper 2729; AVX-NEXT: callq log10 2730; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 2731; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2732; AVX-NEXT: addq $56, %rsp 2733; AVX-NEXT: .cfi_def_cfa_offset 8 2734; AVX-NEXT: retq 2735entry: 2736 %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64( 2737 <3 x double> <double 42.0, double 42.1, double 42.2>, 2738 metadata !"round.dynamic", 2739 metadata !"fpexcept.strict") #0 2740 ret <3 x double> %log10 2741} 2742 2743define <4 x double> @constrained_vector_log10_v4f64() #0 { 2744; CHECK-LABEL: constrained_vector_log10_v4f64: 2745; CHECK: # %bb.0: # %entry 2746; CHECK-NEXT: subq $40, %rsp 2747; CHECK-NEXT: .cfi_def_cfa_offset 48 2748; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2749; CHECK-NEXT: callq log10 2750; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2751; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2752; CHECK-NEXT: callq log10 2753; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2754; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2755; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2756; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2757; CHECK-NEXT: callq log10 2758; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2759; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2760; CHECK-NEXT: callq log10 2761; CHECK-NEXT: movaps %xmm0, %xmm1 2762; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2763; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2764; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2765; CHECK-NEXT: addq $40, %rsp 2766; CHECK-NEXT: .cfi_def_cfa_offset 8 2767; CHECK-NEXT: retq 2768; 2769; AVX-LABEL: constrained_vector_log10_v4f64: 2770; AVX: # %bb.0: # %entry 2771; AVX-NEXT: subq $40, %rsp 2772; AVX-NEXT: .cfi_def_cfa_offset 48 2773; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2774; AVX-NEXT: callq log10 2775; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2776; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2777; AVX-NEXT: callq log10 2778; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2779; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2780; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2781; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2782; AVX-NEXT: callq log10 2783; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2784; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2785; AVX-NEXT: callq log10 2786; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2787; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2788; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 2789; AVX-NEXT: addq $40, %rsp 2790; AVX-NEXT: .cfi_def_cfa_offset 8 2791; AVX-NEXT: retq 2792entry: 2793 %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64( 2794 <4 x double> <double 42.0, double 42.1, 2795 double 42.2, double 42.3>, 2796 metadata !"round.dynamic", 2797 metadata !"fpexcept.strict") #0 2798 ret <4 x double> %log10 2799} 2800 2801define <1 x float> @constrained_vector_log2_v1f32() #0 { 2802; CHECK-LABEL: constrained_vector_log2_v1f32: 2803; CHECK: # %bb.0: # %entry 2804; CHECK-NEXT: pushq %rax 2805; CHECK-NEXT: .cfi_def_cfa_offset 16 2806; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2807; CHECK-NEXT: callq log2f 2808; CHECK-NEXT: popq %rax 2809; CHECK-NEXT: .cfi_def_cfa_offset 8 2810; CHECK-NEXT: retq 2811; 2812; AVX-LABEL: constrained_vector_log2_v1f32: 2813; AVX: # %bb.0: # %entry 2814; AVX-NEXT: pushq %rax 2815; AVX-NEXT: .cfi_def_cfa_offset 16 2816; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2817; AVX-NEXT: callq log2f 2818; AVX-NEXT: popq %rax 2819; AVX-NEXT: .cfi_def_cfa_offset 8 2820; AVX-NEXT: retq 2821entry: 2822 %log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32( 2823 <1 x float> <float 42.0>, 2824 metadata !"round.dynamic", 2825 metadata !"fpexcept.strict") #0 2826 ret <1 x float> %log2 2827} 2828 2829define <2 x double> @constrained_vector_log2_v2f64() #0 { 2830; CHECK-LABEL: constrained_vector_log2_v2f64: 2831; CHECK: # %bb.0: # %entry 2832; CHECK-NEXT: subq $24, %rsp 2833; CHECK-NEXT: .cfi_def_cfa_offset 32 2834; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2835; CHECK-NEXT: callq log2 2836; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2837; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2838; CHECK-NEXT: callq log2 2839; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2840; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2841; CHECK-NEXT: addq $24, %rsp 2842; CHECK-NEXT: .cfi_def_cfa_offset 8 2843; CHECK-NEXT: retq 2844; 2845; AVX-LABEL: constrained_vector_log2_v2f64: 2846; AVX: # %bb.0: # %entry 2847; AVX-NEXT: subq $24, %rsp 2848; AVX-NEXT: .cfi_def_cfa_offset 32 2849; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2850; AVX-NEXT: callq log2 2851; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2852; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2853; AVX-NEXT: callq log2 2854; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2855; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2856; AVX-NEXT: addq $24, %rsp 2857; AVX-NEXT: .cfi_def_cfa_offset 8 2858; AVX-NEXT: retq 2859entry: 2860 %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64( 2861 <2 x double> <double 42.0, double 42.1>, 2862 metadata !"round.dynamic", 2863 metadata !"fpexcept.strict") #0 2864 ret <2 x double> %log2 2865} 2866 2867define <3 x float> @constrained_vector_log2_v3f32() #0 { 2868; CHECK-LABEL: constrained_vector_log2_v3f32: 2869; CHECK: # %bb.0: # %entry 2870; CHECK-NEXT: subq $40, %rsp 2871; CHECK-NEXT: .cfi_def_cfa_offset 48 2872; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2873; CHECK-NEXT: callq log2f 2874; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2875; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2876; CHECK-NEXT: callq log2f 2877; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2878; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2879; CHECK-NEXT: callq log2f 2880; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 2881; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2882; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2883; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2884; CHECK-NEXT: movaps %xmm1, %xmm0 2885; CHECK-NEXT: addq $40, %rsp 2886; CHECK-NEXT: .cfi_def_cfa_offset 8 2887; CHECK-NEXT: retq 2888; 2889; AVX-LABEL: constrained_vector_log2_v3f32: 2890; AVX: # %bb.0: # %entry 2891; AVX-NEXT: subq $40, %rsp 2892; AVX-NEXT: .cfi_def_cfa_offset 48 2893; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2894; AVX-NEXT: callq log2f 2895; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2896; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2897; AVX-NEXT: callq log2f 2898; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2899; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2900; AVX-NEXT: callq log2f 2901; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 2902; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 2903; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2904; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 2905; AVX-NEXT: addq $40, %rsp 2906; AVX-NEXT: .cfi_def_cfa_offset 8 2907; AVX-NEXT: retq 2908entry: 2909 %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32( 2910 <3 x float> <float 42.0, float 43.0, float 44.0>, 2911 metadata !"round.dynamic", 2912 metadata !"fpexcept.strict") #0 2913 ret <3 x float> %log2 2914} 2915 2916define <3 x double> @constrained_vector_log2_v3f64() #0 { 2917; CHECK-LABEL: constrained_vector_log2_v3f64: 2918; CHECK: # %bb.0: # %entry 2919; CHECK-NEXT: subq $24, %rsp 2920; CHECK-NEXT: .cfi_def_cfa_offset 32 2921; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2922; CHECK-NEXT: callq log2 2923; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2924; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2925; CHECK-NEXT: callq log2 2926; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 2927; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2928; CHECK-NEXT: callq log2 2929; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 2930; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 2931; CHECK-NEXT: wait 2932; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 2933; CHECK-NEXT: # xmm0 = mem[0],zero 2934; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 2935; CHECK-NEXT: # xmm1 = mem[0],zero 2936; CHECK-NEXT: addq $24, %rsp 2937; CHECK-NEXT: .cfi_def_cfa_offset 8 2938; CHECK-NEXT: retq 2939; 2940; AVX-LABEL: constrained_vector_log2_v3f64: 2941; AVX: # %bb.0: # %entry 2942; AVX-NEXT: subq $56, %rsp 2943; AVX-NEXT: .cfi_def_cfa_offset 64 2944; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2945; AVX-NEXT: callq log2 2946; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2947; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2948; AVX-NEXT: callq log2 2949; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2950; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2951; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 2952; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2953; AVX-NEXT: vzeroupper 2954; AVX-NEXT: callq log2 2955; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 2956; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2957; AVX-NEXT: addq $56, %rsp 2958; AVX-NEXT: .cfi_def_cfa_offset 8 2959; AVX-NEXT: retq 2960entry: 2961 %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64( 2962 <3 x double> <double 42.0, double 42.1, double 42.2>, 2963 metadata !"round.dynamic", 2964 metadata !"fpexcept.strict") #0 2965 ret <3 x double> %log2 2966} 2967 2968define <4 x double> @constrained_vector_log2_v4f64() #0 { 2969; CHECK-LABEL: constrained_vector_log2_v4f64: 2970; CHECK: # %bb.0: # %entry 2971; CHECK-NEXT: subq $40, %rsp 2972; CHECK-NEXT: .cfi_def_cfa_offset 48 2973; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2974; CHECK-NEXT: callq log2 2975; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2976; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2977; CHECK-NEXT: callq log2 2978; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2979; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2980; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2981; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2982; CHECK-NEXT: callq log2 2983; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2984; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2985; CHECK-NEXT: callq log2 2986; CHECK-NEXT: movaps %xmm0, %xmm1 2987; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2988; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2989; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2990; CHECK-NEXT: addq $40, %rsp 2991; CHECK-NEXT: .cfi_def_cfa_offset 8 2992; CHECK-NEXT: retq 2993; 2994; AVX-LABEL: constrained_vector_log2_v4f64: 2995; AVX: # %bb.0: # %entry 2996; AVX-NEXT: subq $40, %rsp 2997; AVX-NEXT: .cfi_def_cfa_offset 48 2998; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2999; AVX-NEXT: callq log2 3000; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3001; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3002; AVX-NEXT: callq log2 3003; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 3004; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3005; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3006; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3007; AVX-NEXT: callq log2 3008; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3009; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3010; AVX-NEXT: callq log2 3011; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 3012; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3013; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 3014; AVX-NEXT: addq $40, %rsp 3015; AVX-NEXT: .cfi_def_cfa_offset 8 3016; AVX-NEXT: retq 3017entry: 3018 %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64( 3019 <4 x double> <double 42.0, double 42.1, 3020 double 42.2, double 42.3>, 3021 metadata !"round.dynamic", 3022 metadata !"fpexcept.strict") #0 3023 ret <4 x double> %log2 3024} 3025 3026define <1 x float> @constrained_vector_rint_v1f32() #0 { 3027; CHECK-LABEL: constrained_vector_rint_v1f32: 3028; CHECK: # %bb.0: # %entry 3029; CHECK-NEXT: pushq %rax 3030; CHECK-NEXT: .cfi_def_cfa_offset 16 3031; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3032; CHECK-NEXT: callq rintf 3033; CHECK-NEXT: popq %rax 3034; CHECK-NEXT: .cfi_def_cfa_offset 8 3035; CHECK-NEXT: retq 3036; 3037; AVX-LABEL: constrained_vector_rint_v1f32: 3038; AVX: # %bb.0: # %entry 3039; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3040; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 3041; AVX-NEXT: retq 3042entry: 3043 %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32( 3044 <1 x float> <float 42.0>, 3045 metadata !"round.dynamic", 3046 metadata !"fpexcept.strict") #0 3047 ret <1 x float> %rint 3048} 3049 3050define <2 x double> @constrained_vector_rint_v2f64() #0 { 3051; CHECK-LABEL: constrained_vector_rint_v2f64: 3052; CHECK: # %bb.0: # %entry 3053; CHECK-NEXT: subq $24, %rsp 3054; CHECK-NEXT: .cfi_def_cfa_offset 32 3055; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3056; CHECK-NEXT: callq rint 3057; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3058; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3059; CHECK-NEXT: callq rint 3060; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3061; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3062; CHECK-NEXT: addq $24, %rsp 3063; CHECK-NEXT: .cfi_def_cfa_offset 8 3064; CHECK-NEXT: retq 3065; 3066; AVX-LABEL: constrained_vector_rint_v2f64: 3067; AVX: # %bb.0: # %entry 3068; AVX-NEXT: vroundpd $4, {{.*}}(%rip), %xmm0 3069; AVX-NEXT: retq 3070entry: 3071 %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64( 3072 <2 x double> <double 42.1, double 42.0>, 3073 metadata !"round.dynamic", 3074 metadata !"fpexcept.strict") #0 3075 ret <2 x double> %rint 3076} 3077 3078define <3 x float> @constrained_vector_rint_v3f32() #0 { 3079; CHECK-LABEL: constrained_vector_rint_v3f32: 3080; CHECK: # %bb.0: # %entry 3081; CHECK-NEXT: subq $40, %rsp 3082; CHECK-NEXT: .cfi_def_cfa_offset 48 3083; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3084; CHECK-NEXT: callq rintf 3085; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3086; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3087; CHECK-NEXT: callq rintf 3088; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3089; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3090; CHECK-NEXT: callq rintf 3091; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 3092; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3093; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3094; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3095; CHECK-NEXT: movaps %xmm1, %xmm0 3096; CHECK-NEXT: addq $40, %rsp 3097; CHECK-NEXT: .cfi_def_cfa_offset 8 3098; CHECK-NEXT: retq 3099; 3100; AVX-LABEL: constrained_vector_rint_v3f32: 3101; AVX: # %bb.0: # %entry 3102; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3103; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 3104; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3105; AVX-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1 3106; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 3107; AVX-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2 3108; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 3109; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 3110; AVX-NEXT: retq 3111 entry: 3112 %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32( 3113 <3 x float> <float 42.0, float 43.0, float 44.0>, 3114 metadata !"round.dynamic", 3115 metadata !"fpexcept.strict") #0 3116 ret <3 x float> %rint 3117} 3118 3119define <3 x double> @constrained_vector_rint_v3f64() #0 { 3120; CHECK-LABEL: constrained_vector_rint_v3f64: 3121; CHECK: # %bb.0: # %entry 3122; CHECK-NEXT: subq $24, %rsp 3123; CHECK-NEXT: .cfi_def_cfa_offset 32 3124; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3125; CHECK-NEXT: callq rint 3126; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3127; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3128; CHECK-NEXT: callq rint 3129; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 3130; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3131; CHECK-NEXT: callq rint 3132; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 3133; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 3134; CHECK-NEXT: wait 3135; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 3136; CHECK-NEXT: # xmm0 = mem[0],zero 3137; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 3138; CHECK-NEXT: # xmm1 = mem[0],zero 3139; CHECK-NEXT: addq $24, %rsp 3140; CHECK-NEXT: .cfi_def_cfa_offset 8 3141; CHECK-NEXT: retq 3142; 3143; AVX-LABEL: constrained_vector_rint_v3f64: 3144; AVX: # %bb.0: # %entry 3145; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3146; AVX-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 3147; AVX-NEXT: vroundpd $4, {{.*}}(%rip), %xmm1 3148; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3149; AVX-NEXT: retq 3150entry: 3151 %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64( 3152 <3 x double> <double 42.0, double 42.1, double 42.2>, 3153 metadata !"round.dynamic", 3154 metadata !"fpexcept.strict") #0 3155 ret <3 x double> %rint 3156} 3157 3158define <4 x double> @constrained_vector_rint_v4f64() #0 { 3159; CHECK-LABEL: constrained_vector_rint_v4f64: 3160; CHECK: # %bb.0: # %entry 3161; CHECK-NEXT: subq $40, %rsp 3162; CHECK-NEXT: .cfi_def_cfa_offset 48 3163; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3164; CHECK-NEXT: callq rint 3165; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3166; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3167; CHECK-NEXT: callq rint 3168; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3169; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3170; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3171; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3172; CHECK-NEXT: callq rint 3173; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3174; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3175; CHECK-NEXT: callq rint 3176; CHECK-NEXT: movaps %xmm0, %xmm1 3177; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3178; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3179; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3180; CHECK-NEXT: addq $40, %rsp 3181; CHECK-NEXT: .cfi_def_cfa_offset 8 3182; CHECK-NEXT: retq 3183; 3184; AVX-LABEL: constrained_vector_rint_v4f64: 3185; AVX: # %bb.0: # %entry 3186; AVX-NEXT: vroundpd $4, {{.*}}(%rip), %ymm0 3187; AVX-NEXT: retq 3188entry: 3189 %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64( 3190 <4 x double> <double 42.1, double 42.2, 3191 double 42.3, double 42.4>, 3192 metadata !"round.dynamic", 3193 metadata !"fpexcept.strict") #0 3194 ret <4 x double> %rint 3195} 3196 3197define <1 x float> @constrained_vector_nearbyint_v1f32() #0 { 3198; CHECK-LABEL: constrained_vector_nearbyint_v1f32: 3199; CHECK: # %bb.0: # %entry 3200; CHECK-NEXT: pushq %rax 3201; CHECK-NEXT: .cfi_def_cfa_offset 16 3202; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3203; CHECK-NEXT: callq nearbyintf 3204; CHECK-NEXT: popq %rax 3205; CHECK-NEXT: .cfi_def_cfa_offset 8 3206; CHECK-NEXT: retq 3207; 3208; AVX-LABEL: constrained_vector_nearbyint_v1f32: 3209; AVX: # %bb.0: # %entry 3210; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3211; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 3212; AVX-NEXT: retq 3213entry: 3214 %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32( 3215 <1 x float> <float 42.0>, 3216 metadata !"round.dynamic", 3217 metadata !"fpexcept.strict") #0 3218 ret <1 x float> %nearby 3219} 3220 3221define <2 x double> @constrained_vector_nearbyint_v2f64() #0 { 3222; CHECK-LABEL: constrained_vector_nearbyint_v2f64: 3223; CHECK: # %bb.0: # %entry 3224; CHECK-NEXT: subq $24, %rsp 3225; CHECK-NEXT: .cfi_def_cfa_offset 32 3226; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3227; CHECK-NEXT: callq nearbyint 3228; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3229; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3230; CHECK-NEXT: callq nearbyint 3231; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3232; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3233; CHECK-NEXT: addq $24, %rsp 3234; CHECK-NEXT: .cfi_def_cfa_offset 8 3235; CHECK-NEXT: retq 3236; 3237; AVX-LABEL: constrained_vector_nearbyint_v2f64: 3238; AVX: # %bb.0: # %entry 3239; AVX-NEXT: vroundpd $12, {{.*}}(%rip), %xmm0 3240; AVX-NEXT: retq 3241entry: 3242 %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( 3243 <2 x double> <double 42.1, double 42.0>, 3244 metadata !"round.dynamic", 3245 metadata !"fpexcept.strict") #0 3246 ret <2 x double> %nearby 3247} 3248 3249define <3 x float> @constrained_vector_nearbyint_v3f32() #0 { 3250; CHECK-LABEL: constrained_vector_nearbyint_v3f32: 3251; CHECK: # %bb.0: # %entry 3252; CHECK-NEXT: subq $40, %rsp 3253; CHECK-NEXT: .cfi_def_cfa_offset 48 3254; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3255; CHECK-NEXT: callq nearbyintf 3256; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3257; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3258; CHECK-NEXT: callq nearbyintf 3259; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3260; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3261; CHECK-NEXT: callq nearbyintf 3262; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 3263; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3264; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3265; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3266; CHECK-NEXT: movaps %xmm1, %xmm0 3267; CHECK-NEXT: addq $40, %rsp 3268; CHECK-NEXT: .cfi_def_cfa_offset 8 3269; CHECK-NEXT: retq 3270; 3271; AVX-LABEL: constrained_vector_nearbyint_v3f32: 3272; AVX: # %bb.0: # %entry 3273; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3274; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 3275; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3276; AVX-NEXT: vroundss $12, %xmm1, %xmm1, %xmm1 3277; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 3278; AVX-NEXT: vroundss $12, %xmm2, %xmm2, %xmm2 3279; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 3280; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 3281; AVX-NEXT: retq 3282entry: 3283 %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32( 3284 <3 x float> <float 42.0, float 43.0, float 44.0>, 3285 metadata !"round.dynamic", 3286 metadata !"fpexcept.strict") #0 3287 ret <3 x float> %nearby 3288} 3289 3290define <3 x double> @constrained_vector_nearby_v3f64() #0 { 3291; CHECK-LABEL: constrained_vector_nearby_v3f64: 3292; CHECK: # %bb.0: # %entry 3293; CHECK-NEXT: subq $24, %rsp 3294; CHECK-NEXT: .cfi_def_cfa_offset 32 3295; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3296; CHECK-NEXT: callq nearbyint 3297; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3298; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3299; CHECK-NEXT: callq nearbyint 3300; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 3301; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3302; CHECK-NEXT: callq nearbyint 3303; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 3304; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 3305; CHECK-NEXT: wait 3306; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 3307; CHECK-NEXT: # xmm0 = mem[0],zero 3308; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 3309; CHECK-NEXT: # xmm1 = mem[0],zero 3310; CHECK-NEXT: addq $24, %rsp 3311; CHECK-NEXT: .cfi_def_cfa_offset 8 3312; CHECK-NEXT: retq 3313; 3314; AVX-LABEL: constrained_vector_nearby_v3f64: 3315; AVX: # %bb.0: # %entry 3316; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3317; AVX-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 3318; AVX-NEXT: vroundpd $12, {{.*}}(%rip), %xmm1 3319; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3320; AVX-NEXT: retq 3321entry: 3322 %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64( 3323 <3 x double> <double 42.0, double 42.1, double 42.2>, 3324 metadata !"round.dynamic", 3325 metadata !"fpexcept.strict") #0 3326 ret <3 x double> %nearby 3327} 3328 3329define <4 x double> @constrained_vector_nearbyint_v4f64() #0 { 3330; CHECK-LABEL: constrained_vector_nearbyint_v4f64: 3331; CHECK: # %bb.0: # %entry 3332; CHECK-NEXT: subq $40, %rsp 3333; CHECK-NEXT: .cfi_def_cfa_offset 48 3334; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3335; CHECK-NEXT: callq nearbyint 3336; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3337; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3338; CHECK-NEXT: callq nearbyint 3339; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3340; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3341; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3342; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3343; CHECK-NEXT: callq nearbyint 3344; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3345; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3346; CHECK-NEXT: callq nearbyint 3347; CHECK-NEXT: movaps %xmm0, %xmm1 3348; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3349; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3350; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3351; CHECK-NEXT: addq $40, %rsp 3352; CHECK-NEXT: .cfi_def_cfa_offset 8 3353; CHECK-NEXT: retq 3354; 3355; AVX-LABEL: constrained_vector_nearbyint_v4f64: 3356; AVX: # %bb.0: # %entry 3357; AVX-NEXT: vroundpd $12, {{.*}}(%rip), %ymm0 3358; AVX-NEXT: retq 3359entry: 3360 %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( 3361 <4 x double> <double 42.1, double 42.2, 3362 double 42.3, double 42.4>, 3363 metadata !"round.dynamic", 3364 metadata !"fpexcept.strict") #0 3365 ret <4 x double> %nearby 3366} 3367 3368define <1 x float> @constrained_vector_maxnum_v1f32() #0 { 3369; CHECK-LABEL: constrained_vector_maxnum_v1f32: 3370; CHECK: # %bb.0: # %entry 3371; CHECK-NEXT: pushq %rax 3372; CHECK-NEXT: .cfi_def_cfa_offset 16 3373; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3374; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3375; CHECK-NEXT: callq fmaxf 3376; CHECK-NEXT: popq %rax 3377; CHECK-NEXT: .cfi_def_cfa_offset 8 3378; CHECK-NEXT: retq 3379; 3380; AVX-LABEL: constrained_vector_maxnum_v1f32: 3381; AVX: # %bb.0: # %entry 3382; AVX-NEXT: pushq %rax 3383; AVX-NEXT: .cfi_def_cfa_offset 16 3384; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3385; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3386; AVX-NEXT: callq fmaxf 3387; AVX-NEXT: popq %rax 3388; AVX-NEXT: .cfi_def_cfa_offset 8 3389; AVX-NEXT: retq 3390entry: 3391 %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32( 3392 <1 x float> <float 42.0>, <1 x float> <float 41.0>, 3393 metadata !"fpexcept.strict") #0 3394 ret <1 x float> %max 3395} 3396 3397define <2 x double> @constrained_vector_maxnum_v2f64() #0 { 3398; CHECK-LABEL: constrained_vector_maxnum_v2f64: 3399; CHECK: # %bb.0: # %entry 3400; CHECK-NEXT: subq $24, %rsp 3401; CHECK-NEXT: .cfi_def_cfa_offset 32 3402; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3403; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3404; CHECK-NEXT: callq fmax 3405; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3406; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3407; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3408; CHECK-NEXT: callq fmax 3409; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3410; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3411; CHECK-NEXT: addq $24, %rsp 3412; CHECK-NEXT: .cfi_def_cfa_offset 8 3413; CHECK-NEXT: retq 3414; 3415; AVX-LABEL: constrained_vector_maxnum_v2f64: 3416; AVX: # %bb.0: # %entry 3417; AVX-NEXT: subq $24, %rsp 3418; AVX-NEXT: .cfi_def_cfa_offset 32 3419; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3420; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3421; AVX-NEXT: callq fmax 3422; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3423; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3424; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3425; AVX-NEXT: callq fmax 3426; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 3427; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3428; AVX-NEXT: addq $24, %rsp 3429; AVX-NEXT: .cfi_def_cfa_offset 8 3430; AVX-NEXT: retq 3431entry: 3432 %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64( 3433 <2 x double> <double 43.0, double 42.0>, 3434 <2 x double> <double 41.0, double 40.0>, 3435 metadata !"fpexcept.strict") #0 3436 ret <2 x double> %max 3437} 3438 3439define <3 x float> @constrained_vector_maxnum_v3f32() #0 { 3440; CHECK-LABEL: constrained_vector_maxnum_v3f32: 3441; CHECK: # %bb.0: # %entry 3442; CHECK-NEXT: subq $40, %rsp 3443; CHECK-NEXT: .cfi_def_cfa_offset 48 3444; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3445; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3446; CHECK-NEXT: callq fmaxf 3447; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3448; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3449; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3450; CHECK-NEXT: callq fmaxf 3451; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3452; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3453; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3454; CHECK-NEXT: callq fmaxf 3455; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 3456; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3457; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3458; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3459; CHECK-NEXT: movaps %xmm1, %xmm0 3460; CHECK-NEXT: addq $40, %rsp 3461; CHECK-NEXT: .cfi_def_cfa_offset 8 3462; CHECK-NEXT: retq 3463; 3464; AVX-LABEL: constrained_vector_maxnum_v3f32: 3465; AVX: # %bb.0: # %entry 3466; AVX-NEXT: subq $40, %rsp 3467; AVX-NEXT: .cfi_def_cfa_offset 48 3468; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3469; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3470; AVX-NEXT: callq fmaxf 3471; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3472; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3473; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3474; AVX-NEXT: callq fmaxf 3475; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3476; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3477; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3478; AVX-NEXT: callq fmaxf 3479; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 3480; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 3481; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 3482; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 3483; AVX-NEXT: addq $40, %rsp 3484; AVX-NEXT: .cfi_def_cfa_offset 8 3485; AVX-NEXT: retq 3486entry: 3487 %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32( 3488 <3 x float> <float 43.0, float 44.0, float 45.0>, 3489 <3 x float> <float 41.0, float 42.0, float 43.0>, 3490 metadata !"fpexcept.strict") #0 3491 ret <3 x float> %max 3492} 3493 3494define <3 x double> @constrained_vector_max_v3f64() #0 { 3495; CHECK-LABEL: constrained_vector_max_v3f64: 3496; CHECK: # %bb.0: # %entry 3497; CHECK-NEXT: subq $24, %rsp 3498; CHECK-NEXT: .cfi_def_cfa_offset 32 3499; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3500; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3501; CHECK-NEXT: callq fmax 3502; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3503; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3504; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3505; CHECK-NEXT: callq fmax 3506; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 3507; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3508; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3509; CHECK-NEXT: callq fmax 3510; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 3511; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 3512; CHECK-NEXT: wait 3513; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 3514; CHECK-NEXT: # xmm0 = mem[0],zero 3515; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 3516; CHECK-NEXT: # xmm1 = mem[0],zero 3517; CHECK-NEXT: addq $24, %rsp 3518; CHECK-NEXT: .cfi_def_cfa_offset 8 3519; CHECK-NEXT: retq 3520; 3521; AVX-LABEL: constrained_vector_max_v3f64: 3522; AVX: # %bb.0: # %entry 3523; AVX-NEXT: subq $56, %rsp 3524; AVX-NEXT: .cfi_def_cfa_offset 64 3525; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3526; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3527; AVX-NEXT: callq fmax 3528; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3529; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3530; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3531; AVX-NEXT: callq fmax 3532; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 3533; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3534; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 3535; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3536; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3537; AVX-NEXT: vzeroupper 3538; AVX-NEXT: callq fmax 3539; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 3540; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3541; AVX-NEXT: addq $56, %rsp 3542; AVX-NEXT: .cfi_def_cfa_offset 8 3543; AVX-NEXT: retq 3544entry: 3545 %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64( 3546 <3 x double> <double 43.0, double 44.0, double 45.0>, 3547 <3 x double> <double 40.0, double 41.0, double 42.0>, 3548 metadata !"fpexcept.strict") #0 3549 ret <3 x double> %max 3550} 3551 3552define <4 x double> @constrained_vector_maxnum_v4f64() #0 { 3553; CHECK-LABEL: constrained_vector_maxnum_v4f64: 3554; CHECK: # %bb.0: # %entry 3555; CHECK-NEXT: subq $40, %rsp 3556; CHECK-NEXT: .cfi_def_cfa_offset 48 3557; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3558; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3559; CHECK-NEXT: callq fmax 3560; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3561; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3562; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3563; CHECK-NEXT: callq fmax 3564; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3565; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3566; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3567; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3568; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3569; CHECK-NEXT: callq fmax 3570; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3571; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3572; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3573; CHECK-NEXT: callq fmax 3574; CHECK-NEXT: movaps %xmm0, %xmm1 3575; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3576; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3577; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3578; CHECK-NEXT: addq $40, %rsp 3579; CHECK-NEXT: .cfi_def_cfa_offset 8 3580; CHECK-NEXT: retq 3581; 3582; AVX-LABEL: constrained_vector_maxnum_v4f64: 3583; AVX: # %bb.0: # %entry 3584; AVX-NEXT: subq $40, %rsp 3585; AVX-NEXT: .cfi_def_cfa_offset 48 3586; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3587; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3588; AVX-NEXT: callq fmax 3589; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3590; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3591; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3592; AVX-NEXT: callq fmax 3593; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 3594; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3595; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3596; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3597; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3598; AVX-NEXT: callq fmax 3599; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3600; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3601; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3602; AVX-NEXT: callq fmax 3603; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 3604; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3605; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 3606; AVX-NEXT: addq $40, %rsp 3607; AVX-NEXT: .cfi_def_cfa_offset 8 3608; AVX-NEXT: retq 3609entry: 3610 %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64( 3611 <4 x double> <double 44.0, double 45.0, 3612 double 46.0, double 47.0>, 3613 <4 x double> <double 40.0, double 41.0, 3614 double 42.0, double 43.0>, 3615 metadata !"fpexcept.strict") #0 3616 ret <4 x double> %max 3617} 3618 3619define <1 x float> @constrained_vector_minnum_v1f32() #0 { 3620; CHECK-LABEL: constrained_vector_minnum_v1f32: 3621; CHECK: # %bb.0: # %entry 3622; CHECK-NEXT: pushq %rax 3623; CHECK-NEXT: .cfi_def_cfa_offset 16 3624; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3625; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3626; CHECK-NEXT: callq fminf 3627; CHECK-NEXT: popq %rax 3628; CHECK-NEXT: .cfi_def_cfa_offset 8 3629; CHECK-NEXT: retq 3630; 3631; AVX-LABEL: constrained_vector_minnum_v1f32: 3632; AVX: # %bb.0: # %entry 3633; AVX-NEXT: pushq %rax 3634; AVX-NEXT: .cfi_def_cfa_offset 16 3635; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3636; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3637; AVX-NEXT: callq fminf 3638; AVX-NEXT: popq %rax 3639; AVX-NEXT: .cfi_def_cfa_offset 8 3640; AVX-NEXT: retq 3641 entry: 3642 %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32( 3643 <1 x float> <float 42.0>, <1 x float> <float 41.0>, 3644 metadata !"fpexcept.strict") #0 3645 ret <1 x float> %min 3646} 3647 3648define <2 x double> @constrained_vector_minnum_v2f64() #0 { 3649; CHECK-LABEL: constrained_vector_minnum_v2f64: 3650; CHECK: # %bb.0: # %entry 3651; CHECK-NEXT: subq $24, %rsp 3652; CHECK-NEXT: .cfi_def_cfa_offset 32 3653; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3654; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3655; CHECK-NEXT: callq fmin 3656; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3657; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3658; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3659; CHECK-NEXT: callq fmin 3660; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3661; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3662; CHECK-NEXT: addq $24, %rsp 3663; CHECK-NEXT: .cfi_def_cfa_offset 8 3664; CHECK-NEXT: retq 3665; 3666; AVX-LABEL: constrained_vector_minnum_v2f64: 3667; AVX: # %bb.0: # %entry 3668; AVX-NEXT: subq $24, %rsp 3669; AVX-NEXT: .cfi_def_cfa_offset 32 3670; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3671; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3672; AVX-NEXT: callq fmin 3673; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3674; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3675; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3676; AVX-NEXT: callq fmin 3677; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 3678; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3679; AVX-NEXT: addq $24, %rsp 3680; AVX-NEXT: .cfi_def_cfa_offset 8 3681; AVX-NEXT: retq 3682entry: 3683 %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64( 3684 <2 x double> <double 43.0, double 42.0>, 3685 <2 x double> <double 41.0, double 40.0>, 3686 metadata !"fpexcept.strict") #0 3687 ret <2 x double> %min 3688} 3689 3690define <3 x float> @constrained_vector_minnum_v3f32() #0 { 3691; CHECK-LABEL: constrained_vector_minnum_v3f32: 3692; CHECK: # %bb.0: # %entry 3693; CHECK-NEXT: subq $40, %rsp 3694; CHECK-NEXT: .cfi_def_cfa_offset 48 3695; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3696; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3697; CHECK-NEXT: callq fminf 3698; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3699; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3700; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3701; CHECK-NEXT: callq fminf 3702; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3703; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3704; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3705; CHECK-NEXT: callq fminf 3706; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 3707; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3708; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3709; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3710; CHECK-NEXT: movaps %xmm1, %xmm0 3711; CHECK-NEXT: addq $40, %rsp 3712; CHECK-NEXT: .cfi_def_cfa_offset 8 3713; CHECK-NEXT: retq 3714; 3715; AVX-LABEL: constrained_vector_minnum_v3f32: 3716; AVX: # %bb.0: # %entry 3717; AVX-NEXT: subq $40, %rsp 3718; AVX-NEXT: .cfi_def_cfa_offset 48 3719; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3720; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3721; AVX-NEXT: callq fminf 3722; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3723; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3724; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3725; AVX-NEXT: callq fminf 3726; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3727; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3728; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3729; AVX-NEXT: callq fminf 3730; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 3731; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 3732; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 3733; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 3734; AVX-NEXT: addq $40, %rsp 3735; AVX-NEXT: .cfi_def_cfa_offset 8 3736; AVX-NEXT: retq 3737entry: 3738 %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32( 3739 <3 x float> <float 43.0, float 44.0, float 45.0>, 3740 <3 x float> <float 41.0, float 42.0, float 43.0>, 3741 metadata !"fpexcept.strict") #0 3742 ret <3 x float> %min 3743} 3744 3745define <3 x double> @constrained_vector_min_v3f64() #0 { 3746; CHECK-LABEL: constrained_vector_min_v3f64: 3747; CHECK: # %bb.0: # %entry 3748; CHECK-NEXT: subq $24, %rsp 3749; CHECK-NEXT: .cfi_def_cfa_offset 32 3750; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3751; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3752; CHECK-NEXT: callq fmin 3753; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3754; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3755; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3756; CHECK-NEXT: callq fmin 3757; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 3758; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3759; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3760; CHECK-NEXT: callq fmin 3761; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 3762; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 3763; CHECK-NEXT: wait 3764; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 3765; CHECK-NEXT: # xmm0 = mem[0],zero 3766; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 3767; CHECK-NEXT: # xmm1 = mem[0],zero 3768; CHECK-NEXT: addq $24, %rsp 3769; CHECK-NEXT: .cfi_def_cfa_offset 8 3770; CHECK-NEXT: retq 3771; 3772; AVX-LABEL: constrained_vector_min_v3f64: 3773; AVX: # %bb.0: # %entry 3774; AVX-NEXT: subq $56, %rsp 3775; AVX-NEXT: .cfi_def_cfa_offset 64 3776; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3777; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3778; AVX-NEXT: callq fmin 3779; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3780; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3781; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3782; AVX-NEXT: callq fmin 3783; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 3784; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3785; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 3786; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3787; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3788; AVX-NEXT: vzeroupper 3789; AVX-NEXT: callq fmin 3790; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 3791; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3792; AVX-NEXT: addq $56, %rsp 3793; AVX-NEXT: .cfi_def_cfa_offset 8 3794; AVX-NEXT: retq 3795entry: 3796 %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64( 3797 <3 x double> <double 43.0, double 44.0, double 45.0>, 3798 <3 x double> <double 40.0, double 41.0, double 42.0>, 3799 metadata !"fpexcept.strict") #0 3800 ret <3 x double> %min 3801} 3802 3803define <4 x double> @constrained_vector_minnum_v4f64() #0 { 3804; CHECK-LABEL: constrained_vector_minnum_v4f64: 3805; CHECK: # %bb.0: # %entry 3806; CHECK-NEXT: subq $40, %rsp 3807; CHECK-NEXT: .cfi_def_cfa_offset 48 3808; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3809; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3810; CHECK-NEXT: callq fmin 3811; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3812; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3813; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3814; CHECK-NEXT: callq fmin 3815; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3816; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3817; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3818; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3819; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3820; CHECK-NEXT: callq fmin 3821; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3822; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3823; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3824; CHECK-NEXT: callq fmin 3825; CHECK-NEXT: movaps %xmm0, %xmm1 3826; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3827; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3828; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3829; CHECK-NEXT: addq $40, %rsp 3830; CHECK-NEXT: .cfi_def_cfa_offset 8 3831; CHECK-NEXT: retq 3832; 3833; AVX-LABEL: constrained_vector_minnum_v4f64: 3834; AVX: # %bb.0: # %entry 3835; AVX-NEXT: subq $40, %rsp 3836; AVX-NEXT: .cfi_def_cfa_offset 48 3837; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3838; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3839; AVX-NEXT: callq fmin 3840; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3841; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3842; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3843; AVX-NEXT: callq fmin 3844; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 3845; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3846; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3847; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3848; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3849; AVX-NEXT: callq fmin 3850; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3851; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3852; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3853; AVX-NEXT: callq fmin 3854; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 3855; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3856; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 3857; AVX-NEXT: addq $40, %rsp 3858; AVX-NEXT: .cfi_def_cfa_offset 8 3859; AVX-NEXT: retq 3860entry: 3861 %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64( 3862 <4 x double> <double 44.0, double 45.0, 3863 double 46.0, double 47.0>, 3864 <4 x double> <double 40.0, double 41.0, 3865 double 42.0, double 43.0>, 3866 metadata !"fpexcept.strict") #0 3867 ret <4 x double> %min 3868} 3869 3870define <1 x i32> @constrained_vector_fptosi_v1i32_v1f32() #0 { 3871; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f32: 3872; CHECK: # %bb.0: # %entry 3873; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax 3874; CHECK-NEXT: retq 3875; 3876; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f32: 3877; AVX: # %bb.0: # %entry 3878; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax 3879; AVX-NEXT: retq 3880entry: 3881 %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32( 3882 <1 x float><float 42.0>, 3883 metadata !"fpexcept.strict") #0 3884 ret <1 x i32> %result 3885} 3886 3887define <2 x i32> @constrained_vector_fptosi_v2i32_v2f32() #0 { 3888; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f32: 3889; CHECK: # %bb.0: # %entry 3890; CHECK-NEXT: cvttps2dq {{.*}}(%rip), %xmm0 3891; CHECK-NEXT: retq 3892; 3893; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f32: 3894; AVX: # %bb.0: # %entry 3895; AVX-NEXT: vcvttps2dq {{.*}}(%rip), %xmm0 3896; AVX-NEXT: retq 3897entry: 3898 %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32( 3899 <2 x float><float 42.0, float 43.0>, 3900 metadata !"fpexcept.strict") #0 3901 ret <2 x i32> %result 3902} 3903 3904define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() #0 { 3905; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f32: 3906; CHECK: # %bb.0: # %entry 3907; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax 3908; CHECK-NEXT: movd %eax, %xmm1 3909; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax 3910; CHECK-NEXT: movd %eax, %xmm0 3911; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3912; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax 3913; CHECK-NEXT: movd %eax, %xmm1 3914; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3915; CHECK-NEXT: retq 3916; 3917; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f32: 3918; AVX: # %bb.0: # %entry 3919; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax 3920; AVX-NEXT: vmovd %eax, %xmm0 3921; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax 3922; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 3923; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax 3924; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 3925; AVX-NEXT: retq 3926entry: 3927 %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32( 3928 <3 x float><float 42.0, float 43.0, 3929 float 44.0>, 3930 metadata !"fpexcept.strict") #0 3931 ret <3 x i32> %result 3932} 3933 3934define <4 x i32> @constrained_vector_fptosi_v4i32_v4f32() #0 { 3935; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f32: 3936; CHECK: # %bb.0: # %entry 3937; CHECK-NEXT: cvttps2dq {{.*}}(%rip), %xmm0 3938; CHECK-NEXT: retq 3939; 3940; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f32: 3941; AVX: # %bb.0: # %entry 3942; AVX-NEXT: vcvttps2dq {{.*}}(%rip), %xmm0 3943; AVX-NEXT: retq 3944entry: 3945 %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32( 3946 <4 x float><float 42.0, float 43.0, 3947 float 44.0, float 45.0>, 3948 metadata !"fpexcept.strict") #0 3949 ret <4 x i32> %result 3950} 3951 3952define <1 x i64> @constrained_vector_fptosi_v1i64_v1f32() #0 { 3953; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f32: 3954; CHECK: # %bb.0: # %entry 3955; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 3956; CHECK-NEXT: retq 3957; 3958; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f32: 3959; AVX: # %bb.0: # %entry 3960; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax 3961; AVX-NEXT: retq 3962entry: 3963 %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32( 3964 <1 x float><float 42.0>, 3965 metadata !"fpexcept.strict") #0 3966 ret <1 x i64> %result 3967} 3968 3969define <2 x i64> @constrained_vector_fptosi_v2i64_v2f32() #0 { 3970; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f32: 3971; CHECK: # %bb.0: # %entry 3972; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 3973; CHECK-NEXT: movq %rax, %xmm1 3974; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 3975; CHECK-NEXT: movq %rax, %xmm0 3976; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3977; CHECK-NEXT: retq 3978; 3979; AVX1-LABEL: constrained_vector_fptosi_v2i64_v2f32: 3980; AVX1: # %bb.0: # %entry 3981; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 3982; AVX1-NEXT: vmovq %rax, %xmm0 3983; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 3984; AVX1-NEXT: vmovq %rax, %xmm1 3985; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 3986; AVX1-NEXT: retq 3987; 3988; AVX512F-LABEL: constrained_vector_fptosi_v2i64_v2f32: 3989; AVX512F: # %bb.0: # %entry 3990; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax 3991; AVX512F-NEXT: vmovq %rax, %xmm0 3992; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax 3993; AVX512F-NEXT: vmovq %rax, %xmm1 3994; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 3995; AVX512F-NEXT: retq 3996; 3997; AVX512DQ-LABEL: constrained_vector_fptosi_v2i64_v2f32: 3998; AVX512DQ: # %bb.0: # %entry 3999; AVX512DQ-NEXT: vcvttps2qq {{.*}}(%rip), %zmm0 4000; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4001; AVX512DQ-NEXT: vzeroupper 4002; AVX512DQ-NEXT: retq 4003entry: 4004 %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32( 4005 <2 x float><float 42.0, float 43.0>, 4006 metadata !"fpexcept.strict") #0 4007 ret <2 x i64> %result 4008} 4009 4010define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32() #0 { 4011; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f32: 4012; CHECK: # %bb.0: # %entry 4013; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rcx 4014; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rdx 4015; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 4016; CHECK-NEXT: retq 4017; 4018; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f32: 4019; AVX1: # %bb.0: # %entry 4020; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 4021; AVX1-NEXT: vmovq %rax, %xmm0 4022; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 4023; AVX1-NEXT: vmovq %rax, %xmm1 4024; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4025; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 4026; AVX1-NEXT: vmovq %rax, %xmm1 4027; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4028; AVX1-NEXT: retq 4029; 4030; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f32: 4031; AVX512: # %bb.0: # %entry 4032; AVX512-NEXT: vcvttss2si {{.*}}(%rip), %rax 4033; AVX512-NEXT: vmovq %rax, %xmm0 4034; AVX512-NEXT: vcvttss2si {{.*}}(%rip), %rax 4035; AVX512-NEXT: vmovq %rax, %xmm1 4036; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4037; AVX512-NEXT: vcvttss2si {{.*}}(%rip), %rax 4038; AVX512-NEXT: vmovq %rax, %xmm1 4039; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4040; AVX512-NEXT: retq 4041entry: 4042 %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32( 4043 <3 x float><float 42.0, float 43.0, 4044 float 44.0>, 4045 metadata !"fpexcept.strict") #0 4046 ret <3 x i64> %result 4047} 4048 4049define <4 x i64> @constrained_vector_fptosi_v4i64_v4f32() #0 { 4050; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f32: 4051; CHECK: # %bb.0: # %entry 4052; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 4053; CHECK-NEXT: movq %rax, %xmm1 4054; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 4055; CHECK-NEXT: movq %rax, %xmm0 4056; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4057; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 4058; CHECK-NEXT: movq %rax, %xmm2 4059; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 4060; CHECK-NEXT: movq %rax, %xmm1 4061; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 4062; CHECK-NEXT: retq 4063; 4064; AVX1-LABEL: constrained_vector_fptosi_v4i64_v4f32: 4065; AVX1: # %bb.0: # %entry 4066; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 4067; AVX1-NEXT: vmovq %rax, %xmm0 4068; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 4069; AVX1-NEXT: vmovq %rax, %xmm1 4070; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4071; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 4072; AVX1-NEXT: vmovq %rax, %xmm1 4073; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 4074; AVX1-NEXT: vmovq %rax, %xmm2 4075; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 4076; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4077; AVX1-NEXT: retq 4078; 4079; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f32: 4080; AVX512F: # %bb.0: # %entry 4081; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax 4082; AVX512F-NEXT: vmovq %rax, %xmm0 4083; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax 4084; AVX512F-NEXT: vmovq %rax, %xmm1 4085; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4086; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax 4087; AVX512F-NEXT: vmovq %rax, %xmm1 4088; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax 4089; AVX512F-NEXT: vmovq %rax, %xmm2 4090; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 4091; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 4092; AVX512F-NEXT: retq 4093; 4094; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f32: 4095; AVX512DQ: # %bb.0: # %entry 4096; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1] 4097; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 4098; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 4099; AVX512DQ-NEXT: retq 4100entry: 4101 %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32( 4102 <4 x float><float 42.0, float 43.0, 4103 float 44.0, float 45.0>, 4104 metadata !"fpexcept.strict") #0 4105 ret <4 x i64> %result 4106} 4107 4108define <1 x i32> @constrained_vector_fptosi_v1i32_v1f64() #0 { 4109; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f64: 4110; CHECK: # %bb.0: # %entry 4111; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax 4112; CHECK-NEXT: retq 4113; 4114; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f64: 4115; AVX: # %bb.0: # %entry 4116; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax 4117; AVX-NEXT: retq 4118entry: 4119 %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64( 4120 <1 x double><double 42.1>, 4121 metadata !"fpexcept.strict") #0 4122 ret <1 x i32> %result 4123} 4124 4125 4126define <2 x i32> @constrained_vector_fptosi_v2i32_v2f64() #0 { 4127; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f64: 4128; CHECK: # %bb.0: # %entry 4129; CHECK-NEXT: cvttpd2dq {{.*}}(%rip), %xmm0 4130; CHECK-NEXT: retq 4131; 4132; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f64: 4133; AVX: # %bb.0: # %entry 4134; AVX-NEXT: vcvttpd2dqx {{.*}}(%rip), %xmm0 4135; AVX-NEXT: retq 4136entry: 4137 %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64( 4138 <2 x double><double 42.1, double 42.2>, 4139 metadata !"fpexcept.strict") #0 4140 ret <2 x i32> %result 4141} 4142 4143define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() #0 { 4144; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f64: 4145; CHECK: # %bb.0: # %entry 4146; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax 4147; CHECK-NEXT: movd %eax, %xmm1 4148; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax 4149; CHECK-NEXT: movd %eax, %xmm0 4150; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4151; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax 4152; CHECK-NEXT: movd %eax, %xmm1 4153; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4154; CHECK-NEXT: retq 4155; 4156; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f64: 4157; AVX: # %bb.0: # %entry 4158; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax 4159; AVX-NEXT: vmovd %eax, %xmm0 4160; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax 4161; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4162; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax 4163; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 4164; AVX-NEXT: retq 4165entry: 4166 %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64( 4167 <3 x double><double 42.1, double 42.2, 4168 double 42.3>, 4169 metadata !"fpexcept.strict") #0 4170 ret <3 x i32> %result 4171} 4172 4173define <4 x i32> @constrained_vector_fptosi_v4i32_v4f64() #0 { 4174; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f64: 4175; CHECK: # %bb.0: # %entry 4176; CHECK-NEXT: cvttpd2dq {{.*}}(%rip), %xmm1 4177; CHECK-NEXT: cvttpd2dq {{.*}}(%rip), %xmm0 4178; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4179; CHECK-NEXT: retq 4180; 4181; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f64: 4182; AVX: # %bb.0: # %entry 4183; AVX-NEXT: vcvttpd2dqy {{.*}}(%rip), %xmm0 4184; AVX-NEXT: retq 4185entry: 4186 %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64( 4187 <4 x double><double 42.1, double 42.2, 4188 double 42.3, double 42.4>, 4189 metadata !"fpexcept.strict") #0 4190 ret <4 x i32> %result 4191} 4192 4193define <1 x i64> @constrained_vector_fptosi_v1i64_v1f64() #0 { 4194; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f64: 4195; CHECK: # %bb.0: # %entry 4196; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4197; CHECK-NEXT: retq 4198; 4199; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f64: 4200; AVX: # %bb.0: # %entry 4201; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4202; AVX-NEXT: retq 4203entry: 4204 %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64( 4205 <1 x double><double 42.1>, 4206 metadata !"fpexcept.strict") #0 4207 ret <1 x i64> %result 4208} 4209 4210define <2 x i64> @constrained_vector_fptosi_v2i64_v2f64() #0 { 4211; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f64: 4212; CHECK: # %bb.0: # %entry 4213; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4214; CHECK-NEXT: movq %rax, %xmm1 4215; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4216; CHECK-NEXT: movq %rax, %xmm0 4217; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4218; CHECK-NEXT: retq 4219; 4220; AVX1-LABEL: constrained_vector_fptosi_v2i64_v2f64: 4221; AVX1: # %bb.0: # %entry 4222; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4223; AVX1-NEXT: vmovq %rax, %xmm0 4224; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4225; AVX1-NEXT: vmovq %rax, %xmm1 4226; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4227; AVX1-NEXT: retq 4228; 4229; AVX512F-LABEL: constrained_vector_fptosi_v2i64_v2f64: 4230; AVX512F: # %bb.0: # %entry 4231; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4232; AVX512F-NEXT: vmovq %rax, %xmm0 4233; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4234; AVX512F-NEXT: vmovq %rax, %xmm1 4235; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4236; AVX512F-NEXT: retq 4237; 4238; AVX512DQ-LABEL: constrained_vector_fptosi_v2i64_v2f64: 4239; AVX512DQ: # %bb.0: # %entry 4240; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1] 4241; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 4242; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4243; AVX512DQ-NEXT: vzeroupper 4244; AVX512DQ-NEXT: retq 4245entry: 4246 %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64( 4247 <2 x double><double 42.1, double 42.2>, 4248 metadata !"fpexcept.strict") #0 4249 ret <2 x i64> %result 4250} 4251 4252define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64() #0 { 4253; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f64: 4254; CHECK: # %bb.0: # %entry 4255; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rcx 4256; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rdx 4257; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4258; CHECK-NEXT: retq 4259; 4260; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f64: 4261; AVX1: # %bb.0: # %entry 4262; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4263; AVX1-NEXT: vmovq %rax, %xmm0 4264; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4265; AVX1-NEXT: vmovq %rax, %xmm1 4266; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4267; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4268; AVX1-NEXT: vmovq %rax, %xmm1 4269; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4270; AVX1-NEXT: retq 4271; 4272; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f64: 4273; AVX512: # %bb.0: # %entry 4274; AVX512-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4275; AVX512-NEXT: vmovq %rax, %xmm0 4276; AVX512-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4277; AVX512-NEXT: vmovq %rax, %xmm1 4278; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4279; AVX512-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4280; AVX512-NEXT: vmovq %rax, %xmm1 4281; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4282; AVX512-NEXT: retq 4283entry: 4284 %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64( 4285 <3 x double><double 42.1, double 42.2, 4286 double 42.3>, 4287 metadata !"fpexcept.strict") #0 4288 ret <3 x i64> %result 4289} 4290 4291define <4 x i64> @constrained_vector_fptosi_v4i64_v4f64() #0 { 4292; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f64: 4293; CHECK: # %bb.0: # %entry 4294; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4295; CHECK-NEXT: movq %rax, %xmm1 4296; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4297; CHECK-NEXT: movq %rax, %xmm0 4298; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4299; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4300; CHECK-NEXT: movq %rax, %xmm2 4301; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4302; CHECK-NEXT: movq %rax, %xmm1 4303; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 4304; CHECK-NEXT: retq 4305; 4306; AVX1-LABEL: constrained_vector_fptosi_v4i64_v4f64: 4307; AVX1: # %bb.0: # %entry 4308; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4309; AVX1-NEXT: vmovq %rax, %xmm0 4310; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4311; AVX1-NEXT: vmovq %rax, %xmm1 4312; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4313; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4314; AVX1-NEXT: vmovq %rax, %xmm1 4315; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4316; AVX1-NEXT: vmovq %rax, %xmm2 4317; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 4318; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4319; AVX1-NEXT: retq 4320; 4321; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f64: 4322; AVX512F: # %bb.0: # %entry 4323; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4324; AVX512F-NEXT: vmovq %rax, %xmm0 4325; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4326; AVX512F-NEXT: vmovq %rax, %xmm1 4327; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4328; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4329; AVX512F-NEXT: vmovq %rax, %xmm1 4330; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4331; AVX512F-NEXT: vmovq %rax, %xmm2 4332; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 4333; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 4334; AVX512F-NEXT: retq 4335; 4336; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f64: 4337; AVX512DQ: # %bb.0: # %entry 4338; AVX512DQ-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1] 4339; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 4340; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 4341; AVX512DQ-NEXT: retq 4342entry: 4343 %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64( 4344 <4 x double><double 42.1, double 42.2, 4345 double 42.3, double 42.4>, 4346 metadata !"fpexcept.strict") #0 4347 ret <4 x i64> %result 4348} 4349 4350define <1 x i32> @constrained_vector_fptoui_v1i32_v1f32() #0 { 4351; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f32: 4352; CHECK: # %bb.0: # %entry 4353; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 4354; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 4355; CHECK-NEXT: retq 4356; 4357; AVX1-LABEL: constrained_vector_fptoui_v1i32_v1f32: 4358; AVX1: # %bb.0: # %entry 4359; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 4360; AVX1-NEXT: # kill: def $eax killed $eax killed $rax 4361; AVX1-NEXT: retq 4362; 4363; AVX512-LABEL: constrained_vector_fptoui_v1i32_v1f32: 4364; AVX512: # %bb.0: # %entry 4365; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax 4366; AVX512-NEXT: retq 4367entry: 4368 %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32( 4369 <1 x float><float 42.0>, 4370 metadata !"fpexcept.strict") #0 4371 ret <1 x i32> %result 4372} 4373 4374define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32() #0 { 4375; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f32: 4376; CHECK: # %bb.0: # %entry 4377; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 4378; CHECK-NEXT: movd %eax, %xmm1 4379; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 4380; CHECK-NEXT: movd %eax, %xmm0 4381; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4382; CHECK-NEXT: retq 4383; 4384; AVX1-LABEL: constrained_vector_fptoui_v2i32_v2f32: 4385; AVX1: # %bb.0: # %entry 4386; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 4387; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rcx 4388; AVX1-NEXT: vmovd %ecx, %xmm0 4389; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4390; AVX1-NEXT: retq 4391; 4392; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f32: 4393; AVX512: # %bb.0: # %entry 4394; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,0.0E+0,0.0E+0] 4395; AVX512-NEXT: vcvttps2udq %zmm0, %zmm0 4396; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4397; AVX512-NEXT: vzeroupper 4398; AVX512-NEXT: retq 4399entry: 4400 %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32( 4401 <2 x float><float 42.0, float 43.0>, 4402 metadata !"fpexcept.strict") #0 4403 ret <2 x i32> %result 4404} 4405 4406define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() #0 { 4407; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f32: 4408; CHECK: # %bb.0: # %entry 4409; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 4410; CHECK-NEXT: movd %eax, %xmm1 4411; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 4412; CHECK-NEXT: movd %eax, %xmm0 4413; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4414; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax 4415; CHECK-NEXT: movd %eax, %xmm1 4416; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4417; CHECK-NEXT: retq 4418; 4419; AVX1-LABEL: constrained_vector_fptoui_v3i32_v3f32: 4420; AVX1: # %bb.0: # %entry 4421; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 4422; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rcx 4423; AVX1-NEXT: vmovd %ecx, %xmm0 4424; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4425; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax 4426; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 4427; AVX1-NEXT: retq 4428; 4429; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f32: 4430; AVX512: # %bb.0: # %entry 4431; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax 4432; AVX512-NEXT: vmovd %eax, %xmm0 4433; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax 4434; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4435; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %eax 4436; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 4437; AVX512-NEXT: retq 4438entry: 4439 %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32( 4440 <3 x float><float 42.0, float 43.0, 4441 float 44.0>, 4442 metadata !"fpexcept.strict") #0 4443 ret <3 x i32> %result 4444} 4445 4446define <4 x i32> @constrained_vector_fptoui_v4i32_v4f32() #0 { 4447; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f32: 4448; CHECK: # %bb.0: # %entry 4449; CHECK-NEXT: movaps {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 4450; CHECK-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1] 4451; CHECK-NEXT: movaps %xmm1, %xmm2 4452; CHECK-NEXT: cmpltps %xmm0, %xmm2 4453; CHECK-NEXT: movaps %xmm2, %xmm3 4454; CHECK-NEXT: andnps {{.*}}(%rip), %xmm3 4455; CHECK-NEXT: andnps %xmm0, %xmm2 4456; CHECK-NEXT: subps %xmm2, %xmm1 4457; CHECK-NEXT: cvttps2dq %xmm1, %xmm0 4458; CHECK-NEXT: xorps %xmm3, %xmm0 4459; CHECK-NEXT: retq 4460; 4461; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f32: 4462; AVX1: # %bb.0: # %entry 4463; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 4464; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1] 4465; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm2 4466; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 4467; AVX1-NEXT: vmovaps {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 4468; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm4 4469; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0 4470; AVX1-NEXT: vsubps %xmm0, %xmm1, %xmm0 4471; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 4472; AVX1-NEXT: vxorps %xmm4, %xmm0, %xmm0 4473; AVX1-NEXT: retq 4474; 4475; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f32: 4476; AVX512: # %bb.0: # %entry 4477; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1] 4478; AVX512-NEXT: vcvttps2udq %zmm0, %zmm0 4479; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4480; AVX512-NEXT: vzeroupper 4481; AVX512-NEXT: retq 4482entry: 4483 %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32( 4484 <4 x float><float 42.0, float 43.0, 4485 float 44.0, float 45.0>, 4486 metadata !"fpexcept.strict") #0 4487 ret <4 x i32> %result 4488} 4489 4490define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 { 4491; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32: 4492; CHECK: # %bb.0: # %entry 4493; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 4494; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 4495; CHECK-NEXT: comiss %xmm0, %xmm2 4496; CHECK-NEXT: xorps %xmm1, %xmm1 4497; CHECK-NEXT: ja .LBB115_2 4498; CHECK-NEXT: # %bb.1: # %entry 4499; CHECK-NEXT: movaps %xmm2, %xmm1 4500; CHECK-NEXT: .LBB115_2: # %entry 4501; CHECK-NEXT: subss %xmm1, %xmm0 4502; CHECK-NEXT: cvttss2si %xmm0, %rcx 4503; CHECK-NEXT: setbe %al 4504; CHECK-NEXT: movzbl %al, %eax 4505; CHECK-NEXT: shlq $63, %rax 4506; CHECK-NEXT: xorq %rcx, %rax 4507; CHECK-NEXT: retq 4508; 4509; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f32: 4510; AVX1: # %bb.0: # %entry 4511; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 4512; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 4513; AVX1-NEXT: vcomiss %xmm0, %xmm1 4514; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 4515; AVX1-NEXT: ja .LBB115_2 4516; AVX1-NEXT: # %bb.1: # %entry 4517; AVX1-NEXT: vmovaps %xmm1, %xmm2 4518; AVX1-NEXT: .LBB115_2: # %entry 4519; AVX1-NEXT: vsubss %xmm2, %xmm0, %xmm0 4520; AVX1-NEXT: vcvttss2si %xmm0, %rcx 4521; AVX1-NEXT: setbe %al 4522; AVX1-NEXT: movzbl %al, %eax 4523; AVX1-NEXT: shlq $63, %rax 4524; AVX1-NEXT: xorq %rcx, %rax 4525; AVX1-NEXT: retq 4526; 4527; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f32: 4528; AVX512: # %bb.0: # %entry 4529; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax 4530; AVX512-NEXT: retq 4531entry: 4532 %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32( 4533 <1 x float><float 42.0>, 4534 metadata !"fpexcept.strict") #0 4535 ret <1 x i64> %result 4536} 4537 4538define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 { 4539; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32: 4540; CHECK: # %bb.0: # %entry 4541; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 4542; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 4543; CHECK-NEXT: comiss %xmm2, %xmm1 4544; CHECK-NEXT: xorps %xmm0, %xmm0 4545; CHECK-NEXT: xorps %xmm3, %xmm3 4546; CHECK-NEXT: ja .LBB116_2 4547; CHECK-NEXT: # %bb.1: # %entry 4548; CHECK-NEXT: movaps %xmm1, %xmm3 4549; CHECK-NEXT: .LBB116_2: # %entry 4550; CHECK-NEXT: subss %xmm3, %xmm2 4551; CHECK-NEXT: cvttss2si %xmm2, %rax 4552; CHECK-NEXT: setbe %cl 4553; CHECK-NEXT: movzbl %cl, %ecx 4554; CHECK-NEXT: shlq $63, %rcx 4555; CHECK-NEXT: xorq %rax, %rcx 4556; CHECK-NEXT: movq %rcx, %xmm2 4557; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero 4558; CHECK-NEXT: comiss %xmm3, %xmm1 4559; CHECK-NEXT: ja .LBB116_4 4560; CHECK-NEXT: # %bb.3: # %entry 4561; CHECK-NEXT: movaps %xmm1, %xmm0 4562; CHECK-NEXT: .LBB116_4: # %entry 4563; CHECK-NEXT: subss %xmm0, %xmm3 4564; CHECK-NEXT: cvttss2si %xmm3, %rax 4565; CHECK-NEXT: setbe %cl 4566; CHECK-NEXT: movzbl %cl, %ecx 4567; CHECK-NEXT: shlq $63, %rcx 4568; CHECK-NEXT: xorq %rax, %rcx 4569; CHECK-NEXT: movq %rcx, %xmm0 4570; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 4571; CHECK-NEXT: retq 4572; 4573; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f32: 4574; AVX1: # %bb.0: # %entry 4575; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 4576; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 4577; AVX1-NEXT: vcomiss %xmm2, %xmm0 4578; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 4579; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 4580; AVX1-NEXT: ja .LBB116_2 4581; AVX1-NEXT: # %bb.1: # %entry 4582; AVX1-NEXT: vmovaps %xmm0, %xmm3 4583; AVX1-NEXT: .LBB116_2: # %entry 4584; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 4585; AVX1-NEXT: vcvttss2si %xmm2, %rax 4586; AVX1-NEXT: setbe %cl 4587; AVX1-NEXT: movzbl %cl, %ecx 4588; AVX1-NEXT: shlq $63, %rcx 4589; AVX1-NEXT: xorq %rax, %rcx 4590; AVX1-NEXT: vmovq %rcx, %xmm2 4591; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 4592; AVX1-NEXT: vcomiss %xmm3, %xmm0 4593; AVX1-NEXT: ja .LBB116_4 4594; AVX1-NEXT: # %bb.3: # %entry 4595; AVX1-NEXT: vmovaps %xmm0, %xmm1 4596; AVX1-NEXT: .LBB116_4: # %entry 4597; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0 4598; AVX1-NEXT: vcvttss2si %xmm0, %rax 4599; AVX1-NEXT: setbe %cl 4600; AVX1-NEXT: movzbl %cl, %ecx 4601; AVX1-NEXT: shlq $63, %rcx 4602; AVX1-NEXT: xorq %rax, %rcx 4603; AVX1-NEXT: vmovq %rcx, %xmm0 4604; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 4605; AVX1-NEXT: retq 4606; 4607; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f32: 4608; AVX512F: # %bb.0: # %entry 4609; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax 4610; AVX512F-NEXT: vmovq %rax, %xmm0 4611; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax 4612; AVX512F-NEXT: vmovq %rax, %xmm1 4613; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4614; AVX512F-NEXT: retq 4615; 4616; AVX512DQ-LABEL: constrained_vector_fptoui_v2i64_v2f32: 4617; AVX512DQ: # %bb.0: # %entry 4618; AVX512DQ-NEXT: vcvttps2uqq {{.*}}(%rip), %zmm0 4619; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4620; AVX512DQ-NEXT: vzeroupper 4621; AVX512DQ-NEXT: retq 4622entry: 4623 %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32( 4624 <2 x float><float 42.0, float 43.0>, 4625 metadata !"fpexcept.strict") #0 4626 ret <2 x i64> %result 4627} 4628 4629define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { 4630; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32: 4631; CHECK: # %bb.0: # %entry 4632; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 4633; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 4634; CHECK-NEXT: comiss %xmm2, %xmm1 4635; CHECK-NEXT: xorps %xmm0, %xmm0 4636; CHECK-NEXT: xorps %xmm3, %xmm3 4637; CHECK-NEXT: ja .LBB117_2 4638; CHECK-NEXT: # %bb.1: # %entry 4639; CHECK-NEXT: movaps %xmm1, %xmm3 4640; CHECK-NEXT: .LBB117_2: # %entry 4641; CHECK-NEXT: subss %xmm3, %xmm2 4642; CHECK-NEXT: cvttss2si %xmm2, %rcx 4643; CHECK-NEXT: setbe %al 4644; CHECK-NEXT: movzbl %al, %eax 4645; CHECK-NEXT: shlq $63, %rax 4646; CHECK-NEXT: xorq %rcx, %rax 4647; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 4648; CHECK-NEXT: comiss %xmm2, %xmm1 4649; CHECK-NEXT: xorps %xmm3, %xmm3 4650; CHECK-NEXT: ja .LBB117_4 4651; CHECK-NEXT: # %bb.3: # %entry 4652; CHECK-NEXT: movaps %xmm1, %xmm3 4653; CHECK-NEXT: .LBB117_4: # %entry 4654; CHECK-NEXT: subss %xmm3, %xmm2 4655; CHECK-NEXT: cvttss2si %xmm2, %rcx 4656; CHECK-NEXT: setbe %dl 4657; CHECK-NEXT: movzbl %dl, %edx 4658; CHECK-NEXT: shlq $63, %rdx 4659; CHECK-NEXT: xorq %rcx, %rdx 4660; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 4661; CHECK-NEXT: comiss %xmm2, %xmm1 4662; CHECK-NEXT: ja .LBB117_6 4663; CHECK-NEXT: # %bb.5: # %entry 4664; CHECK-NEXT: movaps %xmm1, %xmm0 4665; CHECK-NEXT: .LBB117_6: # %entry 4666; CHECK-NEXT: subss %xmm0, %xmm2 4667; CHECK-NEXT: cvttss2si %xmm2, %rsi 4668; CHECK-NEXT: setbe %cl 4669; CHECK-NEXT: movzbl %cl, %ecx 4670; CHECK-NEXT: shlq $63, %rcx 4671; CHECK-NEXT: xorq %rsi, %rcx 4672; CHECK-NEXT: retq 4673; 4674; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f32: 4675; AVX1: # %bb.0: # %entry 4676; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 4677; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 4678; AVX1-NEXT: vcomiss %xmm2, %xmm0 4679; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 4680; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 4681; AVX1-NEXT: ja .LBB117_2 4682; AVX1-NEXT: # %bb.1: # %entry 4683; AVX1-NEXT: vmovaps %xmm0, %xmm3 4684; AVX1-NEXT: .LBB117_2: # %entry 4685; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 4686; AVX1-NEXT: vcvttss2si %xmm2, %rax 4687; AVX1-NEXT: setbe %cl 4688; AVX1-NEXT: movzbl %cl, %ecx 4689; AVX1-NEXT: shlq $63, %rcx 4690; AVX1-NEXT: xorq %rax, %rcx 4691; AVX1-NEXT: vmovq %rcx, %xmm2 4692; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 4693; AVX1-NEXT: vcomiss %xmm3, %xmm0 4694; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 4695; AVX1-NEXT: ja .LBB117_4 4696; AVX1-NEXT: # %bb.3: # %entry 4697; AVX1-NEXT: vmovaps %xmm0, %xmm4 4698; AVX1-NEXT: .LBB117_4: # %entry 4699; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3 4700; AVX1-NEXT: vcvttss2si %xmm3, %rax 4701; AVX1-NEXT: setbe %cl 4702; AVX1-NEXT: movzbl %cl, %ecx 4703; AVX1-NEXT: shlq $63, %rcx 4704; AVX1-NEXT: xorq %rax, %rcx 4705; AVX1-NEXT: vmovq %rcx, %xmm3 4706; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 4707; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 4708; AVX1-NEXT: vcomiss %xmm3, %xmm0 4709; AVX1-NEXT: ja .LBB117_6 4710; AVX1-NEXT: # %bb.5: # %entry 4711; AVX1-NEXT: vmovaps %xmm0, %xmm1 4712; AVX1-NEXT: .LBB117_6: # %entry 4713; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0 4714; AVX1-NEXT: vcvttss2si %xmm0, %rax 4715; AVX1-NEXT: setbe %cl 4716; AVX1-NEXT: movzbl %cl, %ecx 4717; AVX1-NEXT: shlq $63, %rcx 4718; AVX1-NEXT: xorq %rax, %rcx 4719; AVX1-NEXT: vmovq %rcx, %xmm0 4720; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 4721; AVX1-NEXT: retq 4722; 4723; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f32: 4724; AVX512: # %bb.0: # %entry 4725; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax 4726; AVX512-NEXT: vmovq %rax, %xmm0 4727; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax 4728; AVX512-NEXT: vmovq %rax, %xmm1 4729; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4730; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax 4731; AVX512-NEXT: vmovq %rax, %xmm1 4732; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4733; AVX512-NEXT: retq 4734entry: 4735 %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32( 4736 <3 x float><float 42.0, float 43.0, 4737 float 44.0>, 4738 metadata !"fpexcept.strict") #0 4739 ret <3 x i64> %result 4740} 4741 4742define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { 4743; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32: 4744; CHECK: # %bb.0: # %entry 4745; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 4746; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 4747; CHECK-NEXT: comiss %xmm0, %xmm2 4748; CHECK-NEXT: xorps %xmm1, %xmm1 4749; CHECK-NEXT: xorps %xmm3, %xmm3 4750; CHECK-NEXT: ja .LBB118_2 4751; CHECK-NEXT: # %bb.1: # %entry 4752; CHECK-NEXT: movaps %xmm2, %xmm3 4753; CHECK-NEXT: .LBB118_2: # %entry 4754; CHECK-NEXT: subss %xmm3, %xmm0 4755; CHECK-NEXT: cvttss2si %xmm0, %rcx 4756; CHECK-NEXT: setbe %al 4757; CHECK-NEXT: movzbl %al, %eax 4758; CHECK-NEXT: shlq $63, %rax 4759; CHECK-NEXT: xorq %rcx, %rax 4760; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 4761; CHECK-NEXT: comiss %xmm0, %xmm2 4762; CHECK-NEXT: xorps %xmm4, %xmm4 4763; CHECK-NEXT: ja .LBB118_4 4764; CHECK-NEXT: # %bb.3: # %entry 4765; CHECK-NEXT: movaps %xmm2, %xmm4 4766; CHECK-NEXT: .LBB118_4: # %entry 4767; CHECK-NEXT: movq %rax, %xmm3 4768; CHECK-NEXT: subss %xmm4, %xmm0 4769; CHECK-NEXT: cvttss2si %xmm0, %rax 4770; CHECK-NEXT: setbe %cl 4771; CHECK-NEXT: movzbl %cl, %ecx 4772; CHECK-NEXT: shlq $63, %rcx 4773; CHECK-NEXT: xorq %rax, %rcx 4774; CHECK-NEXT: movq %rcx, %xmm0 4775; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero 4776; CHECK-NEXT: comiss %xmm4, %xmm2 4777; CHECK-NEXT: xorps %xmm5, %xmm5 4778; CHECK-NEXT: ja .LBB118_6 4779; CHECK-NEXT: # %bb.5: # %entry 4780; CHECK-NEXT: movaps %xmm2, %xmm5 4781; CHECK-NEXT: .LBB118_6: # %entry 4782; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 4783; CHECK-NEXT: subss %xmm5, %xmm4 4784; CHECK-NEXT: cvttss2si %xmm4, %rax 4785; CHECK-NEXT: setbe %cl 4786; CHECK-NEXT: movzbl %cl, %ecx 4787; CHECK-NEXT: shlq $63, %rcx 4788; CHECK-NEXT: xorq %rax, %rcx 4789; CHECK-NEXT: movq %rcx, %xmm3 4790; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero 4791; CHECK-NEXT: comiss %xmm4, %xmm2 4792; CHECK-NEXT: ja .LBB118_8 4793; CHECK-NEXT: # %bb.7: # %entry 4794; CHECK-NEXT: movaps %xmm2, %xmm1 4795; CHECK-NEXT: .LBB118_8: # %entry 4796; CHECK-NEXT: subss %xmm1, %xmm4 4797; CHECK-NEXT: cvttss2si %xmm4, %rax 4798; CHECK-NEXT: setbe %cl 4799; CHECK-NEXT: movzbl %cl, %ecx 4800; CHECK-NEXT: shlq $63, %rcx 4801; CHECK-NEXT: xorq %rax, %rcx 4802; CHECK-NEXT: movq %rcx, %xmm1 4803; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 4804; CHECK-NEXT: retq 4805; 4806; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f32: 4807; AVX1: # %bb.0: # %entry 4808; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 4809; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 4810; AVX1-NEXT: vcomiss %xmm2, %xmm0 4811; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 4812; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 4813; AVX1-NEXT: ja .LBB118_2 4814; AVX1-NEXT: # %bb.1: # %entry 4815; AVX1-NEXT: vmovaps %xmm0, %xmm3 4816; AVX1-NEXT: .LBB118_2: # %entry 4817; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 4818; AVX1-NEXT: vcvttss2si %xmm2, %rcx 4819; AVX1-NEXT: setbe %al 4820; AVX1-NEXT: movzbl %al, %eax 4821; AVX1-NEXT: shlq $63, %rax 4822; AVX1-NEXT: xorq %rcx, %rax 4823; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 4824; AVX1-NEXT: vcomiss %xmm3, %xmm0 4825; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 4826; AVX1-NEXT: ja .LBB118_4 4827; AVX1-NEXT: # %bb.3: # %entry 4828; AVX1-NEXT: vmovaps %xmm0, %xmm4 4829; AVX1-NEXT: .LBB118_4: # %entry 4830; AVX1-NEXT: vmovq %rax, %xmm2 4831; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3 4832; AVX1-NEXT: vcvttss2si %xmm3, %rax 4833; AVX1-NEXT: setbe %cl 4834; AVX1-NEXT: movzbl %cl, %ecx 4835; AVX1-NEXT: shlq $63, %rcx 4836; AVX1-NEXT: xorq %rax, %rcx 4837; AVX1-NEXT: vmovq %rcx, %xmm3 4838; AVX1-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero 4839; AVX1-NEXT: vcomiss %xmm4, %xmm0 4840; AVX1-NEXT: vxorps %xmm5, %xmm5, %xmm5 4841; AVX1-NEXT: ja .LBB118_6 4842; AVX1-NEXT: # %bb.5: # %entry 4843; AVX1-NEXT: vmovaps %xmm0, %xmm5 4844; AVX1-NEXT: .LBB118_6: # %entry 4845; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 4846; AVX1-NEXT: vsubss %xmm5, %xmm4, %xmm3 4847; AVX1-NEXT: vcvttss2si %xmm3, %rax 4848; AVX1-NEXT: setbe %cl 4849; AVX1-NEXT: movzbl %cl, %ecx 4850; AVX1-NEXT: shlq $63, %rcx 4851; AVX1-NEXT: xorq %rax, %rcx 4852; AVX1-NEXT: vmovq %rcx, %xmm3 4853; AVX1-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero 4854; AVX1-NEXT: vcomiss %xmm4, %xmm0 4855; AVX1-NEXT: ja .LBB118_8 4856; AVX1-NEXT: # %bb.7: # %entry 4857; AVX1-NEXT: vmovaps %xmm0, %xmm1 4858; AVX1-NEXT: .LBB118_8: # %entry 4859; AVX1-NEXT: vsubss %xmm1, %xmm4, %xmm0 4860; AVX1-NEXT: vcvttss2si %xmm0, %rax 4861; AVX1-NEXT: setbe %cl 4862; AVX1-NEXT: movzbl %cl, %ecx 4863; AVX1-NEXT: shlq $63, %rcx 4864; AVX1-NEXT: xorq %rax, %rcx 4865; AVX1-NEXT: vmovq %rcx, %xmm0 4866; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 4867; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4868; AVX1-NEXT: retq 4869; 4870; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f32: 4871; AVX512F: # %bb.0: # %entry 4872; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax 4873; AVX512F-NEXT: vmovq %rax, %xmm0 4874; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax 4875; AVX512F-NEXT: vmovq %rax, %xmm1 4876; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4877; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax 4878; AVX512F-NEXT: vmovq %rax, %xmm1 4879; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax 4880; AVX512F-NEXT: vmovq %rax, %xmm2 4881; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 4882; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 4883; AVX512F-NEXT: retq 4884; 4885; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f32: 4886; AVX512DQ: # %bb.0: # %entry 4887; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1] 4888; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 4889; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 4890; AVX512DQ-NEXT: retq 4891entry: 4892 %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32( 4893 <4 x float><float 42.0, float 43.0, 4894 float 44.0, float 45.0>, 4895 metadata !"fpexcept.strict") #0 4896 ret <4 x i64> %result 4897} 4898 4899define <1 x i32> @constrained_vector_fptoui_v1i32_v1f64() #0 { 4900; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f64: 4901; CHECK: # %bb.0: # %entry 4902; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4903; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 4904; CHECK-NEXT: retq 4905; 4906; AVX1-LABEL: constrained_vector_fptoui_v1i32_v1f64: 4907; AVX1: # %bb.0: # %entry 4908; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4909; AVX1-NEXT: # kill: def $eax killed $eax killed $rax 4910; AVX1-NEXT: retq 4911; 4912; AVX512-LABEL: constrained_vector_fptoui_v1i32_v1f64: 4913; AVX512: # %bb.0: # %entry 4914; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax 4915; AVX512-NEXT: retq 4916entry: 4917 %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64( 4918 <1 x double><double 42.1>, 4919 metadata !"fpexcept.strict") #0 4920 ret <1 x i32> %result 4921} 4922 4923define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64() #0 { 4924; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f64: 4925; CHECK: # %bb.0: # %entry 4926; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4927; CHECK-NEXT: movd %eax, %xmm1 4928; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4929; CHECK-NEXT: movd %eax, %xmm0 4930; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4931; CHECK-NEXT: retq 4932; 4933; AVX1-LABEL: constrained_vector_fptoui_v2i32_v2f64: 4934; AVX1: # %bb.0: # %entry 4935; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4936; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rcx 4937; AVX1-NEXT: vmovd %ecx, %xmm0 4938; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4939; AVX1-NEXT: retq 4940; 4941; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f64: 4942; AVX512: # %bb.0: # %entry 4943; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,0.0E+0,0.0E+0] 4944; AVX512-NEXT: vcvttpd2udq %zmm0, %ymm0 4945; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 4946; AVX512-NEXT: vzeroupper 4947; AVX512-NEXT: retq 4948entry: 4949 %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64( 4950 <2 x double><double 42.1, double 42.2>, 4951 metadata !"fpexcept.strict") #0 4952 ret <2 x i32> %result 4953} 4954 4955define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() #0 { 4956; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f64: 4957; CHECK: # %bb.0: # %entry 4958; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4959; CHECK-NEXT: movd %eax, %xmm1 4960; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4961; CHECK-NEXT: movd %eax, %xmm0 4962; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4963; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4964; CHECK-NEXT: movd %eax, %xmm1 4965; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4966; CHECK-NEXT: retq 4967; 4968; AVX1-LABEL: constrained_vector_fptoui_v3i32_v3f64: 4969; AVX1: # %bb.0: # %entry 4970; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4971; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rcx 4972; AVX1-NEXT: vmovd %ecx, %xmm0 4973; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4974; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax 4975; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 4976; AVX1-NEXT: retq 4977; 4978; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f64: 4979; AVX512: # %bb.0: # %entry 4980; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax 4981; AVX512-NEXT: vmovd %eax, %xmm0 4982; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax 4983; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4984; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %eax 4985; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 4986; AVX512-NEXT: retq 4987entry: 4988 %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64( 4989 <3 x double><double 42.1, double 42.2, 4990 double 42.3>, 4991 metadata !"fpexcept.strict") #0 4992 ret <3 x i32> %result 4993} 4994 4995define <4 x i32> @constrained_vector_fptoui_v4i32_v4f64() #0 { 4996; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f64: 4997; CHECK: # %bb.0: # %entry 4998; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 4999; CHECK-NEXT: movd %eax, %xmm0 5000; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 5001; CHECK-NEXT: movd %eax, %xmm1 5002; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 5003; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 5004; CHECK-NEXT: movd %eax, %xmm2 5005; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax 5006; CHECK-NEXT: movd %eax, %xmm0 5007; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 5008; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 5009; CHECK-NEXT: retq 5010; 5011; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f64: 5012; AVX1: # %bb.0: # %entry 5013; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 5014; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1] 5015; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm2 5016; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 5017; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm3[0,2] 5018; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 5019; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] 5020; AVX1-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3 5021; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 5022; AVX1-NEXT: vblendvpd %ymm2, %ymm4, %ymm0, %ymm0 5023; AVX1-NEXT: vsubpd %ymm0, %ymm1, %ymm0 5024; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0 5025; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm0 5026; AVX1-NEXT: vzeroupper 5027; AVX1-NEXT: retq 5028; 5029; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f64: 5030; AVX512: # %bb.0: # %entry 5031; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1] 5032; AVX512-NEXT: vcvttpd2udq %zmm0, %ymm0 5033; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 5034; AVX512-NEXT: vzeroupper 5035; AVX512-NEXT: retq 5036entry: 5037 %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64( 5038 <4 x double><double 42.1, double 42.2, 5039 double 42.3, double 42.4>, 5040 metadata !"fpexcept.strict") #0 5041 ret <4 x i32> %result 5042} 5043 5044define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 { 5045; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64: 5046; CHECK: # %bb.0: # %entry 5047; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5048; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 5049; CHECK-NEXT: comisd %xmm0, %xmm2 5050; CHECK-NEXT: xorpd %xmm1, %xmm1 5051; CHECK-NEXT: ja .LBB123_2 5052; CHECK-NEXT: # %bb.1: # %entry 5053; CHECK-NEXT: movapd %xmm2, %xmm1 5054; CHECK-NEXT: .LBB123_2: # %entry 5055; CHECK-NEXT: subsd %xmm1, %xmm0 5056; CHECK-NEXT: cvttsd2si %xmm0, %rcx 5057; CHECK-NEXT: setbe %al 5058; CHECK-NEXT: movzbl %al, %eax 5059; CHECK-NEXT: shlq $63, %rax 5060; CHECK-NEXT: xorq %rcx, %rax 5061; CHECK-NEXT: retq 5062; 5063; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f64: 5064; AVX1: # %bb.0: # %entry 5065; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 5066; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 5067; AVX1-NEXT: vcomisd %xmm0, %xmm1 5068; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2 5069; AVX1-NEXT: ja .LBB123_2 5070; AVX1-NEXT: # %bb.1: # %entry 5071; AVX1-NEXT: vmovapd %xmm1, %xmm2 5072; AVX1-NEXT: .LBB123_2: # %entry 5073; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm0 5074; AVX1-NEXT: vcvttsd2si %xmm0, %rcx 5075; AVX1-NEXT: setbe %al 5076; AVX1-NEXT: movzbl %al, %eax 5077; AVX1-NEXT: shlq $63, %rax 5078; AVX1-NEXT: xorq %rcx, %rax 5079; AVX1-NEXT: retq 5080; 5081; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f64: 5082; AVX512: # %bb.0: # %entry 5083; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax 5084; AVX512-NEXT: retq 5085entry: 5086 %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64( 5087 <1 x double><double 42.1>, 5088 metadata !"fpexcept.strict") #0 5089 ret <1 x i64> %result 5090} 5091 5092define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 { 5093; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64: 5094; CHECK: # %bb.0: # %entry 5095; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 5096; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 5097; CHECK-NEXT: comisd %xmm2, %xmm1 5098; CHECK-NEXT: xorpd %xmm0, %xmm0 5099; CHECK-NEXT: xorpd %xmm3, %xmm3 5100; CHECK-NEXT: ja .LBB124_2 5101; CHECK-NEXT: # %bb.1: # %entry 5102; CHECK-NEXT: movapd %xmm1, %xmm3 5103; CHECK-NEXT: .LBB124_2: # %entry 5104; CHECK-NEXT: subsd %xmm3, %xmm2 5105; CHECK-NEXT: cvttsd2si %xmm2, %rax 5106; CHECK-NEXT: setbe %cl 5107; CHECK-NEXT: movzbl %cl, %ecx 5108; CHECK-NEXT: shlq $63, %rcx 5109; CHECK-NEXT: xorq %rax, %rcx 5110; CHECK-NEXT: movq %rcx, %xmm2 5111; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero 5112; CHECK-NEXT: comisd %xmm3, %xmm1 5113; CHECK-NEXT: ja .LBB124_4 5114; CHECK-NEXT: # %bb.3: # %entry 5115; CHECK-NEXT: movapd %xmm1, %xmm0 5116; CHECK-NEXT: .LBB124_4: # %entry 5117; CHECK-NEXT: subsd %xmm0, %xmm3 5118; CHECK-NEXT: cvttsd2si %xmm3, %rax 5119; CHECK-NEXT: setbe %cl 5120; CHECK-NEXT: movzbl %cl, %ecx 5121; CHECK-NEXT: shlq $63, %rcx 5122; CHECK-NEXT: xorq %rax, %rcx 5123; CHECK-NEXT: movq %rcx, %xmm0 5124; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 5125; CHECK-NEXT: retq 5126; 5127; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f64: 5128; AVX1: # %bb.0: # %entry 5129; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 5130; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 5131; AVX1-NEXT: vcomisd %xmm2, %xmm0 5132; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 5133; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 5134; AVX1-NEXT: ja .LBB124_2 5135; AVX1-NEXT: # %bb.1: # %entry 5136; AVX1-NEXT: vmovapd %xmm0, %xmm3 5137; AVX1-NEXT: .LBB124_2: # %entry 5138; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 5139; AVX1-NEXT: vcvttsd2si %xmm2, %rax 5140; AVX1-NEXT: setbe %cl 5141; AVX1-NEXT: movzbl %cl, %ecx 5142; AVX1-NEXT: shlq $63, %rcx 5143; AVX1-NEXT: xorq %rax, %rcx 5144; AVX1-NEXT: vmovq %rcx, %xmm2 5145; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 5146; AVX1-NEXT: vcomisd %xmm3, %xmm0 5147; AVX1-NEXT: ja .LBB124_4 5148; AVX1-NEXT: # %bb.3: # %entry 5149; AVX1-NEXT: vmovapd %xmm0, %xmm1 5150; AVX1-NEXT: .LBB124_4: # %entry 5151; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0 5152; AVX1-NEXT: vcvttsd2si %xmm0, %rax 5153; AVX1-NEXT: setbe %cl 5154; AVX1-NEXT: movzbl %cl, %ecx 5155; AVX1-NEXT: shlq $63, %rcx 5156; AVX1-NEXT: xorq %rax, %rcx 5157; AVX1-NEXT: vmovq %rcx, %xmm0 5158; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 5159; AVX1-NEXT: retq 5160; 5161; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f64: 5162; AVX512F: # %bb.0: # %entry 5163; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax 5164; AVX512F-NEXT: vmovq %rax, %xmm0 5165; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax 5166; AVX512F-NEXT: vmovq %rax, %xmm1 5167; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 5168; AVX512F-NEXT: retq 5169; 5170; AVX512DQ-LABEL: constrained_vector_fptoui_v2i64_v2f64: 5171; AVX512DQ: # %bb.0: # %entry 5172; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1] 5173; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 5174; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5175; AVX512DQ-NEXT: vzeroupper 5176; AVX512DQ-NEXT: retq 5177entry: 5178 %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64( 5179 <2 x double><double 42.1, double 42.2>, 5180 metadata !"fpexcept.strict") #0 5181 ret <2 x i64> %result 5182} 5183 5184define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { 5185; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64: 5186; CHECK: # %bb.0: # %entry 5187; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 5188; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 5189; CHECK-NEXT: comisd %xmm2, %xmm1 5190; CHECK-NEXT: xorpd %xmm0, %xmm0 5191; CHECK-NEXT: xorpd %xmm3, %xmm3 5192; CHECK-NEXT: ja .LBB125_2 5193; CHECK-NEXT: # %bb.1: # %entry 5194; CHECK-NEXT: movapd %xmm1, %xmm3 5195; CHECK-NEXT: .LBB125_2: # %entry 5196; CHECK-NEXT: subsd %xmm3, %xmm2 5197; CHECK-NEXT: cvttsd2si %xmm2, %rcx 5198; CHECK-NEXT: setbe %al 5199; CHECK-NEXT: movzbl %al, %eax 5200; CHECK-NEXT: shlq $63, %rax 5201; CHECK-NEXT: xorq %rcx, %rax 5202; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 5203; CHECK-NEXT: comisd %xmm2, %xmm1 5204; CHECK-NEXT: xorpd %xmm3, %xmm3 5205; CHECK-NEXT: ja .LBB125_4 5206; CHECK-NEXT: # %bb.3: # %entry 5207; CHECK-NEXT: movapd %xmm1, %xmm3 5208; CHECK-NEXT: .LBB125_4: # %entry 5209; CHECK-NEXT: subsd %xmm3, %xmm2 5210; CHECK-NEXT: cvttsd2si %xmm2, %rcx 5211; CHECK-NEXT: setbe %dl 5212; CHECK-NEXT: movzbl %dl, %edx 5213; CHECK-NEXT: shlq $63, %rdx 5214; CHECK-NEXT: xorq %rcx, %rdx 5215; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 5216; CHECK-NEXT: comisd %xmm2, %xmm1 5217; CHECK-NEXT: ja .LBB125_6 5218; CHECK-NEXT: # %bb.5: # %entry 5219; CHECK-NEXT: movapd %xmm1, %xmm0 5220; CHECK-NEXT: .LBB125_6: # %entry 5221; CHECK-NEXT: subsd %xmm0, %xmm2 5222; CHECK-NEXT: cvttsd2si %xmm2, %rsi 5223; CHECK-NEXT: setbe %cl 5224; CHECK-NEXT: movzbl %cl, %ecx 5225; CHECK-NEXT: shlq $63, %rcx 5226; CHECK-NEXT: xorq %rsi, %rcx 5227; CHECK-NEXT: retq 5228; 5229; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f64: 5230; AVX1: # %bb.0: # %entry 5231; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 5232; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 5233; AVX1-NEXT: vcomisd %xmm2, %xmm0 5234; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 5235; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 5236; AVX1-NEXT: ja .LBB125_2 5237; AVX1-NEXT: # %bb.1: # %entry 5238; AVX1-NEXT: vmovapd %xmm0, %xmm3 5239; AVX1-NEXT: .LBB125_2: # %entry 5240; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 5241; AVX1-NEXT: vcvttsd2si %xmm2, %rax 5242; AVX1-NEXT: setbe %cl 5243; AVX1-NEXT: movzbl %cl, %ecx 5244; AVX1-NEXT: shlq $63, %rcx 5245; AVX1-NEXT: xorq %rax, %rcx 5246; AVX1-NEXT: vmovq %rcx, %xmm2 5247; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 5248; AVX1-NEXT: vcomisd %xmm3, %xmm0 5249; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4 5250; AVX1-NEXT: ja .LBB125_4 5251; AVX1-NEXT: # %bb.3: # %entry 5252; AVX1-NEXT: vmovapd %xmm0, %xmm4 5253; AVX1-NEXT: .LBB125_4: # %entry 5254; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3 5255; AVX1-NEXT: vcvttsd2si %xmm3, %rax 5256; AVX1-NEXT: setbe %cl 5257; AVX1-NEXT: movzbl %cl, %ecx 5258; AVX1-NEXT: shlq $63, %rcx 5259; AVX1-NEXT: xorq %rax, %rcx 5260; AVX1-NEXT: vmovq %rcx, %xmm3 5261; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 5262; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 5263; AVX1-NEXT: vcomisd %xmm3, %xmm0 5264; AVX1-NEXT: ja .LBB125_6 5265; AVX1-NEXT: # %bb.5: # %entry 5266; AVX1-NEXT: vmovapd %xmm0, %xmm1 5267; AVX1-NEXT: .LBB125_6: # %entry 5268; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0 5269; AVX1-NEXT: vcvttsd2si %xmm0, %rax 5270; AVX1-NEXT: setbe %cl 5271; AVX1-NEXT: movzbl %cl, %ecx 5272; AVX1-NEXT: shlq $63, %rcx 5273; AVX1-NEXT: xorq %rax, %rcx 5274; AVX1-NEXT: vmovq %rcx, %xmm0 5275; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 5276; AVX1-NEXT: retq 5277; 5278; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f64: 5279; AVX512: # %bb.0: # %entry 5280; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax 5281; AVX512-NEXT: vmovq %rax, %xmm0 5282; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax 5283; AVX512-NEXT: vmovq %rax, %xmm1 5284; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 5285; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax 5286; AVX512-NEXT: vmovq %rax, %xmm1 5287; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 5288; AVX512-NEXT: retq 5289entry: 5290 %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64( 5291 <3 x double><double 42.1, double 42.2, 5292 double 42.3>, 5293 metadata !"fpexcept.strict") #0 5294 ret <3 x i64> %result 5295} 5296 5297define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { 5298; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64: 5299; CHECK: # %bb.0: # %entry 5300; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5301; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 5302; CHECK-NEXT: comisd %xmm0, %xmm2 5303; CHECK-NEXT: xorpd %xmm1, %xmm1 5304; CHECK-NEXT: xorpd %xmm3, %xmm3 5305; CHECK-NEXT: ja .LBB126_2 5306; CHECK-NEXT: # %bb.1: # %entry 5307; CHECK-NEXT: movapd %xmm2, %xmm3 5308; CHECK-NEXT: .LBB126_2: # %entry 5309; CHECK-NEXT: subsd %xmm3, %xmm0 5310; CHECK-NEXT: cvttsd2si %xmm0, %rcx 5311; CHECK-NEXT: setbe %al 5312; CHECK-NEXT: movzbl %al, %eax 5313; CHECK-NEXT: shlq $63, %rax 5314; CHECK-NEXT: xorq %rcx, %rax 5315; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5316; CHECK-NEXT: comisd %xmm0, %xmm2 5317; CHECK-NEXT: xorpd %xmm4, %xmm4 5318; CHECK-NEXT: ja .LBB126_4 5319; CHECK-NEXT: # %bb.3: # %entry 5320; CHECK-NEXT: movapd %xmm2, %xmm4 5321; CHECK-NEXT: .LBB126_4: # %entry 5322; CHECK-NEXT: movq %rax, %xmm3 5323; CHECK-NEXT: subsd %xmm4, %xmm0 5324; CHECK-NEXT: cvttsd2si %xmm0, %rax 5325; CHECK-NEXT: setbe %cl 5326; CHECK-NEXT: movzbl %cl, %ecx 5327; CHECK-NEXT: shlq $63, %rcx 5328; CHECK-NEXT: xorq %rax, %rcx 5329; CHECK-NEXT: movq %rcx, %xmm0 5330; CHECK-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero 5331; CHECK-NEXT: comisd %xmm4, %xmm2 5332; CHECK-NEXT: xorpd %xmm5, %xmm5 5333; CHECK-NEXT: ja .LBB126_6 5334; CHECK-NEXT: # %bb.5: # %entry 5335; CHECK-NEXT: movapd %xmm2, %xmm5 5336; CHECK-NEXT: .LBB126_6: # %entry 5337; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 5338; CHECK-NEXT: subsd %xmm5, %xmm4 5339; CHECK-NEXT: cvttsd2si %xmm4, %rax 5340; CHECK-NEXT: setbe %cl 5341; CHECK-NEXT: movzbl %cl, %ecx 5342; CHECK-NEXT: shlq $63, %rcx 5343; CHECK-NEXT: xorq %rax, %rcx 5344; CHECK-NEXT: movq %rcx, %xmm3 5345; CHECK-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero 5346; CHECK-NEXT: comisd %xmm4, %xmm2 5347; CHECK-NEXT: ja .LBB126_8 5348; CHECK-NEXT: # %bb.7: # %entry 5349; CHECK-NEXT: movapd %xmm2, %xmm1 5350; CHECK-NEXT: .LBB126_8: # %entry 5351; CHECK-NEXT: subsd %xmm1, %xmm4 5352; CHECK-NEXT: cvttsd2si %xmm4, %rax 5353; CHECK-NEXT: setbe %cl 5354; CHECK-NEXT: movzbl %cl, %ecx 5355; CHECK-NEXT: shlq $63, %rcx 5356; CHECK-NEXT: xorq %rax, %rcx 5357; CHECK-NEXT: movq %rcx, %xmm1 5358; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 5359; CHECK-NEXT: retq 5360; 5361; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f64: 5362; AVX1: # %bb.0: # %entry 5363; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 5364; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 5365; AVX1-NEXT: vcomisd %xmm2, %xmm0 5366; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 5367; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 5368; AVX1-NEXT: ja .LBB126_2 5369; AVX1-NEXT: # %bb.1: # %entry 5370; AVX1-NEXT: vmovapd %xmm0, %xmm3 5371; AVX1-NEXT: .LBB126_2: # %entry 5372; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 5373; AVX1-NEXT: vcvttsd2si %xmm2, %rcx 5374; AVX1-NEXT: setbe %al 5375; AVX1-NEXT: movzbl %al, %eax 5376; AVX1-NEXT: shlq $63, %rax 5377; AVX1-NEXT: xorq %rcx, %rax 5378; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 5379; AVX1-NEXT: vcomisd %xmm3, %xmm0 5380; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4 5381; AVX1-NEXT: ja .LBB126_4 5382; AVX1-NEXT: # %bb.3: # %entry 5383; AVX1-NEXT: vmovapd %xmm0, %xmm4 5384; AVX1-NEXT: .LBB126_4: # %entry 5385; AVX1-NEXT: vmovq %rax, %xmm2 5386; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3 5387; AVX1-NEXT: vcvttsd2si %xmm3, %rax 5388; AVX1-NEXT: setbe %cl 5389; AVX1-NEXT: movzbl %cl, %ecx 5390; AVX1-NEXT: shlq $63, %rcx 5391; AVX1-NEXT: xorq %rax, %rcx 5392; AVX1-NEXT: vmovq %rcx, %xmm3 5393; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero 5394; AVX1-NEXT: vcomisd %xmm4, %xmm0 5395; AVX1-NEXT: vxorpd %xmm5, %xmm5, %xmm5 5396; AVX1-NEXT: ja .LBB126_6 5397; AVX1-NEXT: # %bb.5: # %entry 5398; AVX1-NEXT: vmovapd %xmm0, %xmm5 5399; AVX1-NEXT: .LBB126_6: # %entry 5400; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 5401; AVX1-NEXT: vsubsd %xmm5, %xmm4, %xmm3 5402; AVX1-NEXT: vcvttsd2si %xmm3, %rax 5403; AVX1-NEXT: setbe %cl 5404; AVX1-NEXT: movzbl %cl, %ecx 5405; AVX1-NEXT: shlq $63, %rcx 5406; AVX1-NEXT: xorq %rax, %rcx 5407; AVX1-NEXT: vmovq %rcx, %xmm3 5408; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero 5409; AVX1-NEXT: vcomisd %xmm4, %xmm0 5410; AVX1-NEXT: ja .LBB126_8 5411; AVX1-NEXT: # %bb.7: # %entry 5412; AVX1-NEXT: vmovapd %xmm0, %xmm1 5413; AVX1-NEXT: .LBB126_8: # %entry 5414; AVX1-NEXT: vsubsd %xmm1, %xmm4, %xmm0 5415; AVX1-NEXT: vcvttsd2si %xmm0, %rax 5416; AVX1-NEXT: setbe %cl 5417; AVX1-NEXT: movzbl %cl, %ecx 5418; AVX1-NEXT: shlq $63, %rcx 5419; AVX1-NEXT: xorq %rax, %rcx 5420; AVX1-NEXT: vmovq %rcx, %xmm0 5421; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 5422; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 5423; AVX1-NEXT: retq 5424; 5425; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f64: 5426; AVX512F: # %bb.0: # %entry 5427; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax 5428; AVX512F-NEXT: vmovq %rax, %xmm0 5429; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax 5430; AVX512F-NEXT: vmovq %rax, %xmm1 5431; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 5432; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax 5433; AVX512F-NEXT: vmovq %rax, %xmm1 5434; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax 5435; AVX512F-NEXT: vmovq %rax, %xmm2 5436; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 5437; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 5438; AVX512F-NEXT: retq 5439; 5440; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f64: 5441; AVX512DQ: # %bb.0: # %entry 5442; AVX512DQ-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1] 5443; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 5444; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 5445; AVX512DQ-NEXT: retq 5446entry: 5447 %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64( 5448 <4 x double><double 42.1, double 42.2, 5449 double 42.3, double 42.4>, 5450 metadata !"fpexcept.strict") #0 5451 ret <4 x i64> %result 5452} 5453 5454 5455define <1 x float> @constrained_vector_fptrunc_v1f64() #0 { 5456; CHECK-LABEL: constrained_vector_fptrunc_v1f64: 5457; CHECK: # %bb.0: # %entry 5458; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5459; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 5460; CHECK-NEXT: retq 5461; 5462; AVX-LABEL: constrained_vector_fptrunc_v1f64: 5463; AVX: # %bb.0: # %entry 5464; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 5465; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 5466; AVX-NEXT: retq 5467entry: 5468 %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64( 5469 <1 x double><double 42.1>, 5470 metadata !"round.dynamic", 5471 metadata !"fpexcept.strict") #0 5472 ret <1 x float> %result 5473} 5474 5475define <2 x float> @constrained_vector_fptrunc_v2f64() #0 { 5476; CHECK-LABEL: constrained_vector_fptrunc_v2f64: 5477; CHECK: # %bb.0: # %entry 5478; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0 5479; CHECK-NEXT: retq 5480; 5481; AVX-LABEL: constrained_vector_fptrunc_v2f64: 5482; AVX: # %bb.0: # %entry 5483; AVX-NEXT: vcvtpd2psx {{.*}}(%rip), %xmm0 5484; AVX-NEXT: retq 5485entry: 5486 %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64( 5487 <2 x double><double 42.1, double 42.2>, 5488 metadata !"round.dynamic", 5489 metadata !"fpexcept.strict") #0 5490 ret <2 x float> %result 5491} 5492 5493define <3 x float> @constrained_vector_fptrunc_v3f64() #0 { 5494; CHECK-LABEL: constrained_vector_fptrunc_v3f64: 5495; CHECK: # %bb.0: # %entry 5496; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5497; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1 5498; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5499; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 5500; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 5501; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 5502; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1 5503; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 5504; CHECK-NEXT: retq 5505; 5506; AVX-LABEL: constrained_vector_fptrunc_v3f64: 5507; AVX: # %bb.0: # %entry 5508; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 5509; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 5510; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 5511; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 5512; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 5513; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 5514; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 5515; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 5516; AVX-NEXT: retq 5517entry: 5518 %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64( 5519 <3 x double><double 42.1, double 42.2, 5520 double 42.3>, 5521 metadata !"round.dynamic", 5522 metadata !"fpexcept.strict") #0 5523 ret <3 x float> %result 5524} 5525 5526define <4 x float> @constrained_vector_fptrunc_v4f64() #0 { 5527; CHECK-LABEL: constrained_vector_fptrunc_v4f64: 5528; CHECK: # %bb.0: # %entry 5529; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm1 5530; CHECK-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0 5531; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 5532; CHECK-NEXT: retq 5533; 5534; AVX-LABEL: constrained_vector_fptrunc_v4f64: 5535; AVX: # %bb.0: # %entry 5536; AVX-NEXT: vcvtpd2psy {{.*}}(%rip), %xmm0 5537; AVX-NEXT: retq 5538entry: 5539 %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64( 5540 <4 x double><double 42.1, double 42.2, 5541 double 42.3, double 42.4>, 5542 metadata !"round.dynamic", 5543 metadata !"fpexcept.strict") #0 5544 ret <4 x float> %result 5545} 5546 5547define <1 x double> @constrained_vector_fpext_v1f32() #0 { 5548; CHECK-LABEL: constrained_vector_fpext_v1f32: 5549; CHECK: # %bb.0: # %entry 5550; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5551; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 5552; CHECK-NEXT: retq 5553; 5554; AVX-LABEL: constrained_vector_fpext_v1f32: 5555; AVX: # %bb.0: # %entry 5556; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5557; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 5558; AVX-NEXT: retq 5559entry: 5560 %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32( 5561 <1 x float><float 42.0>, 5562 metadata !"fpexcept.strict") #0 5563 ret <1 x double> %result 5564} 5565 5566define <2 x double> @constrained_vector_fpext_v2f32() #0 { 5567; CHECK-LABEL: constrained_vector_fpext_v2f32: 5568; CHECK: # %bb.0: # %entry 5569; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm0 5570; CHECK-NEXT: retq 5571; 5572; AVX-LABEL: constrained_vector_fpext_v2f32: 5573; AVX: # %bb.0: # %entry 5574; AVX-NEXT: vcvtps2pd {{.*}}(%rip), %xmm0 5575; AVX-NEXT: retq 5576entry: 5577 %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32( 5578 <2 x float><float 42.0, float 43.0>, 5579 metadata !"fpexcept.strict") #0 5580 ret <2 x double> %result 5581} 5582 5583define <3 x double> @constrained_vector_fpext_v3f32() #0 { 5584; CHECK-LABEL: constrained_vector_fpext_v3f32: 5585; CHECK: # %bb.0: # %entry 5586; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5587; CHECK-NEXT: cvtss2sd %xmm0, %xmm1 5588; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5589; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 5590; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 5591; CHECK-NEXT: cvtss2sd %xmm2, %xmm2 5592; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) 5593; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 5594; CHECK-NEXT: wait 5595; CHECK-NEXT: retq 5596; 5597; AVX-LABEL: constrained_vector_fpext_v3f32: 5598; AVX: # %bb.0: # %entry 5599; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5600; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 5601; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 5602; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 5603; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 5604; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 5605; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 5606; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 5607; AVX-NEXT: retq 5608entry: 5609 %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32( 5610 <3 x float><float 42.0, float 43.0, 5611 float 44.0>, 5612 metadata !"fpexcept.strict") #0 5613 ret <3 x double> %result 5614} 5615 5616define <4 x double> @constrained_vector_fpext_v4f32() #0 { 5617; CHECK-LABEL: constrained_vector_fpext_v4f32: 5618; CHECK: # %bb.0: # %entry 5619; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm1 5620; CHECK-NEXT: cvtps2pd {{.*}}(%rip), %xmm0 5621; CHECK-NEXT: retq 5622; 5623; AVX-LABEL: constrained_vector_fpext_v4f32: 5624; AVX: # %bb.0: # %entry 5625; AVX-NEXT: vcvtps2pd {{.*}}(%rip), %ymm0 5626; AVX-NEXT: retq 5627entry: 5628 %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32( 5629 <4 x float><float 42.0, float 43.0, 5630 float 44.0, float 45.0>, 5631 metadata !"fpexcept.strict") #0 5632 ret <4 x double> %result 5633} 5634 5635define <1 x float> @constrained_vector_ceil_v1f32() #0 { 5636; CHECK-LABEL: constrained_vector_ceil_v1f32: 5637; CHECK: # %bb.0: # %entry 5638; CHECK-NEXT: pushq %rax 5639; CHECK-NEXT: .cfi_def_cfa_offset 16 5640; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5641; CHECK-NEXT: callq ceilf 5642; CHECK-NEXT: popq %rax 5643; CHECK-NEXT: .cfi_def_cfa_offset 8 5644; CHECK-NEXT: retq 5645; 5646; AVX-LABEL: constrained_vector_ceil_v1f32: 5647; AVX: # %bb.0: # %entry 5648; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5649; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 5650; AVX-NEXT: retq 5651entry: 5652 %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32( 5653 <1 x float> <float 1.5>, 5654 metadata !"fpexcept.strict") #0 5655 ret <1 x float> %ceil 5656} 5657 5658define <2 x double> @constrained_vector_ceil_v2f64() #0 { 5659; CHECK-LABEL: constrained_vector_ceil_v2f64: 5660; CHECK: # %bb.0: # %entry 5661; CHECK-NEXT: subq $24, %rsp 5662; CHECK-NEXT: .cfi_def_cfa_offset 32 5663; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5664; CHECK-NEXT: callq ceil 5665; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 5666; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5667; CHECK-NEXT: callq ceil 5668; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 5669; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 5670; CHECK-NEXT: addq $24, %rsp 5671; CHECK-NEXT: .cfi_def_cfa_offset 8 5672; CHECK-NEXT: retq 5673; 5674; AVX-LABEL: constrained_vector_ceil_v2f64: 5675; AVX: # %bb.0: # %entry 5676; AVX-NEXT: vroundpd $10, {{.*}}(%rip), %xmm0 5677; AVX-NEXT: retq 5678entry: 5679 %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( 5680 <2 x double> <double 1.1, double 1.9>, 5681 metadata !"fpexcept.strict") #0 5682 ret <2 x double> %ceil 5683} 5684 5685define <3 x float> @constrained_vector_ceil_v3f32() #0 { 5686; CHECK-LABEL: constrained_vector_ceil_v3f32: 5687; CHECK: # %bb.0: # %entry 5688; CHECK-NEXT: subq $40, %rsp 5689; CHECK-NEXT: .cfi_def_cfa_offset 48 5690; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5691; CHECK-NEXT: callq ceilf 5692; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 5693; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5694; CHECK-NEXT: callq ceilf 5695; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 5696; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5697; CHECK-NEXT: callq ceilf 5698; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 5699; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 5700; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 5701; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 5702; CHECK-NEXT: movaps %xmm1, %xmm0 5703; CHECK-NEXT: addq $40, %rsp 5704; CHECK-NEXT: .cfi_def_cfa_offset 8 5705; CHECK-NEXT: retq 5706; 5707; AVX-LABEL: constrained_vector_ceil_v3f32: 5708; AVX: # %bb.0: # %entry 5709; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5710; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 5711; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 5712; AVX-NEXT: vroundss $10, %xmm1, %xmm1, %xmm1 5713; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 5714; AVX-NEXT: vroundss $10, %xmm2, %xmm2, %xmm2 5715; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 5716; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 5717; AVX-NEXT: retq 5718entry: 5719 %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32( 5720 <3 x float> <float 1.5, float 2.5, float 3.5>, 5721 metadata !"fpexcept.strict") #0 5722 ret <3 x float> %ceil 5723} 5724 5725define <3 x double> @constrained_vector_ceil_v3f64() #0 { 5726; CHECK-LABEL: constrained_vector_ceil_v3f64: 5727; CHECK: # %bb.0: # %entry 5728; CHECK-NEXT: subq $24, %rsp 5729; CHECK-NEXT: .cfi_def_cfa_offset 32 5730; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5731; CHECK-NEXT: callq ceil 5732; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 5733; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5734; CHECK-NEXT: callq ceil 5735; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 5736; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5737; CHECK-NEXT: callq ceil 5738; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 5739; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 5740; CHECK-NEXT: wait 5741; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 5742; CHECK-NEXT: # xmm0 = mem[0],zero 5743; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 5744; CHECK-NEXT: # xmm1 = mem[0],zero 5745; CHECK-NEXT: addq $24, %rsp 5746; CHECK-NEXT: .cfi_def_cfa_offset 8 5747; CHECK-NEXT: retq 5748; 5749; AVX-LABEL: constrained_vector_ceil_v3f64: 5750; AVX: # %bb.0: # %entry 5751; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 5752; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 5753; AVX-NEXT: vroundpd $10, {{.*}}(%rip), %xmm1 5754; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 5755; AVX-NEXT: retq 5756entry: 5757 %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64( 5758 <3 x double> <double 1.1, double 1.9, double 1.5>, 5759 metadata !"fpexcept.strict") #0 5760 ret <3 x double> %ceil 5761} 5762 5763define <1 x float> @constrained_vector_floor_v1f32() #0 { 5764; CHECK-LABEL: constrained_vector_floor_v1f32: 5765; CHECK: # %bb.0: # %entry 5766; CHECK-NEXT: pushq %rax 5767; CHECK-NEXT: .cfi_def_cfa_offset 16 5768; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5769; CHECK-NEXT: callq floorf 5770; CHECK-NEXT: popq %rax 5771; CHECK-NEXT: .cfi_def_cfa_offset 8 5772; CHECK-NEXT: retq 5773; 5774; AVX-LABEL: constrained_vector_floor_v1f32: 5775; AVX: # %bb.0: # %entry 5776; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5777; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 5778; AVX-NEXT: retq 5779entry: 5780 %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32( 5781 <1 x float> <float 1.5>, 5782 metadata !"fpexcept.strict") #0 5783 ret <1 x float> %floor 5784} 5785 5786 5787define <2 x double> @constrained_vector_floor_v2f64() #0 { 5788; CHECK-LABEL: constrained_vector_floor_v2f64: 5789; CHECK: # %bb.0: # %entry 5790; CHECK-NEXT: subq $24, %rsp 5791; CHECK-NEXT: .cfi_def_cfa_offset 32 5792; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5793; CHECK-NEXT: callq floor 5794; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 5795; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5796; CHECK-NEXT: callq floor 5797; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 5798; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 5799; CHECK-NEXT: addq $24, %rsp 5800; CHECK-NEXT: .cfi_def_cfa_offset 8 5801; CHECK-NEXT: retq 5802; 5803; AVX-LABEL: constrained_vector_floor_v2f64: 5804; AVX: # %bb.0: # %entry 5805; AVX-NEXT: vroundpd $9, {{.*}}(%rip), %xmm0 5806; AVX-NEXT: retq 5807entry: 5808 %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64( 5809 <2 x double> <double 1.1, double 1.9>, 5810 metadata !"fpexcept.strict") #0 5811 ret <2 x double> %floor 5812} 5813 5814define <3 x float> @constrained_vector_floor_v3f32() #0 { 5815; CHECK-LABEL: constrained_vector_floor_v3f32: 5816; CHECK: # %bb.0: # %entry 5817; CHECK-NEXT: subq $40, %rsp 5818; CHECK-NEXT: .cfi_def_cfa_offset 48 5819; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5820; CHECK-NEXT: callq floorf 5821; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 5822; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5823; CHECK-NEXT: callq floorf 5824; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 5825; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5826; CHECK-NEXT: callq floorf 5827; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 5828; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 5829; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 5830; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 5831; CHECK-NEXT: movaps %xmm1, %xmm0 5832; CHECK-NEXT: addq $40, %rsp 5833; CHECK-NEXT: .cfi_def_cfa_offset 8 5834; CHECK-NEXT: retq 5835; 5836; AVX-LABEL: constrained_vector_floor_v3f32: 5837; AVX: # %bb.0: # %entry 5838; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5839; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 5840; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 5841; AVX-NEXT: vroundss $9, %xmm1, %xmm1, %xmm1 5842; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 5843; AVX-NEXT: vroundss $9, %xmm2, %xmm2, %xmm2 5844; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 5845; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 5846; AVX-NEXT: retq 5847entry: 5848 %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32( 5849 <3 x float> <float 1.5, float 2.5, float 3.5>, 5850 metadata !"fpexcept.strict") #0 5851 ret <3 x float> %floor 5852} 5853 5854define <3 x double> @constrained_vector_floor_v3f64() #0 { 5855; CHECK-LABEL: constrained_vector_floor_v3f64: 5856; CHECK: # %bb.0: # %entry 5857; CHECK-NEXT: subq $24, %rsp 5858; CHECK-NEXT: .cfi_def_cfa_offset 32 5859; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5860; CHECK-NEXT: callq floor 5861; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 5862; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5863; CHECK-NEXT: callq floor 5864; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 5865; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5866; CHECK-NEXT: callq floor 5867; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 5868; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 5869; CHECK-NEXT: wait 5870; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 5871; CHECK-NEXT: # xmm0 = mem[0],zero 5872; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 5873; CHECK-NEXT: # xmm1 = mem[0],zero 5874; CHECK-NEXT: addq $24, %rsp 5875; CHECK-NEXT: .cfi_def_cfa_offset 8 5876; CHECK-NEXT: retq 5877; 5878; AVX-LABEL: constrained_vector_floor_v3f64: 5879; AVX: # %bb.0: # %entry 5880; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 5881; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 5882; AVX-NEXT: vroundpd $9, {{.*}}(%rip), %xmm1 5883; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 5884; AVX-NEXT: retq 5885entry: 5886 %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64( 5887 <3 x double> <double 1.1, double 1.9, double 1.5>, 5888 metadata !"fpexcept.strict") #0 5889 ret <3 x double> %floor 5890} 5891 5892define <1 x float> @constrained_vector_round_v1f32() #0 { 5893; CHECK-LABEL: constrained_vector_round_v1f32: 5894; CHECK: # %bb.0: # %entry 5895; CHECK-NEXT: pushq %rax 5896; CHECK-NEXT: .cfi_def_cfa_offset 16 5897; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5898; CHECK-NEXT: callq roundf 5899; CHECK-NEXT: popq %rax 5900; CHECK-NEXT: .cfi_def_cfa_offset 8 5901; CHECK-NEXT: retq 5902; 5903; AVX-LABEL: constrained_vector_round_v1f32: 5904; AVX: # %bb.0: # %entry 5905; AVX-NEXT: pushq %rax 5906; AVX-NEXT: .cfi_def_cfa_offset 16 5907; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5908; AVX-NEXT: callq roundf 5909; AVX-NEXT: popq %rax 5910; AVX-NEXT: .cfi_def_cfa_offset 8 5911; AVX-NEXT: retq 5912entry: 5913 %round = call <1 x float> @llvm.experimental.constrained.round.v1f32( 5914 <1 x float> <float 1.5>, 5915 metadata !"fpexcept.strict") #0 5916 ret <1 x float> %round 5917} 5918 5919define <2 x double> @constrained_vector_round_v2f64() #0 { 5920; CHECK-LABEL: constrained_vector_round_v2f64: 5921; CHECK: # %bb.0: # %entry 5922; CHECK-NEXT: subq $24, %rsp 5923; CHECK-NEXT: .cfi_def_cfa_offset 32 5924; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5925; CHECK-NEXT: callq round 5926; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 5927; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5928; CHECK-NEXT: callq round 5929; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 5930; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 5931; CHECK-NEXT: addq $24, %rsp 5932; CHECK-NEXT: .cfi_def_cfa_offset 8 5933; CHECK-NEXT: retq 5934; 5935; AVX-LABEL: constrained_vector_round_v2f64: 5936; AVX: # %bb.0: # %entry 5937; AVX-NEXT: subq $24, %rsp 5938; AVX-NEXT: .cfi_def_cfa_offset 32 5939; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 5940; AVX-NEXT: callq round 5941; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 5942; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 5943; AVX-NEXT: callq round 5944; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 5945; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 5946; AVX-NEXT: addq $24, %rsp 5947; AVX-NEXT: .cfi_def_cfa_offset 8 5948; AVX-NEXT: retq 5949entry: 5950 %round = call <2 x double> @llvm.experimental.constrained.round.v2f64( 5951 <2 x double> <double 1.1, double 1.9>, 5952 metadata !"fpexcept.strict") #0 5953 ret <2 x double> %round 5954} 5955 5956define <3 x float> @constrained_vector_round_v3f32() #0 { 5957; CHECK-LABEL: constrained_vector_round_v3f32: 5958; CHECK: # %bb.0: # %entry 5959; CHECK-NEXT: subq $40, %rsp 5960; CHECK-NEXT: .cfi_def_cfa_offset 48 5961; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5962; CHECK-NEXT: callq roundf 5963; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 5964; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5965; CHECK-NEXT: callq roundf 5966; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 5967; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5968; CHECK-NEXT: callq roundf 5969; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 5970; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 5971; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 5972; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 5973; CHECK-NEXT: movaps %xmm1, %xmm0 5974; CHECK-NEXT: addq $40, %rsp 5975; CHECK-NEXT: .cfi_def_cfa_offset 8 5976; CHECK-NEXT: retq 5977; 5978; AVX-LABEL: constrained_vector_round_v3f32: 5979; AVX: # %bb.0: # %entry 5980; AVX-NEXT: subq $40, %rsp 5981; AVX-NEXT: .cfi_def_cfa_offset 48 5982; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5983; AVX-NEXT: callq roundf 5984; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 5985; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5986; AVX-NEXT: callq roundf 5987; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 5988; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5989; AVX-NEXT: callq roundf 5990; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 5991; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 5992; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 5993; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 5994; AVX-NEXT: addq $40, %rsp 5995; AVX-NEXT: .cfi_def_cfa_offset 8 5996; AVX-NEXT: retq 5997entry: 5998 %round = call <3 x float> @llvm.experimental.constrained.round.v3f32( 5999 <3 x float> <float 1.5, float 2.5, float 3.5>, 6000 metadata !"fpexcept.strict") #0 6001 ret <3 x float> %round 6002} 6003 6004 6005define <3 x double> @constrained_vector_round_v3f64() #0 { 6006; CHECK-LABEL: constrained_vector_round_v3f64: 6007; CHECK: # %bb.0: # %entry 6008; CHECK-NEXT: subq $24, %rsp 6009; CHECK-NEXT: .cfi_def_cfa_offset 32 6010; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 6011; CHECK-NEXT: callq round 6012; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 6013; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 6014; CHECK-NEXT: callq round 6015; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 6016; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 6017; CHECK-NEXT: callq round 6018; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 6019; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 6020; CHECK-NEXT: wait 6021; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 6022; CHECK-NEXT: # xmm0 = mem[0],zero 6023; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 6024; CHECK-NEXT: # xmm1 = mem[0],zero 6025; CHECK-NEXT: addq $24, %rsp 6026; CHECK-NEXT: .cfi_def_cfa_offset 8 6027; CHECK-NEXT: retq 6028; 6029; AVX-LABEL: constrained_vector_round_v3f64: 6030; AVX: # %bb.0: # %entry 6031; AVX-NEXT: subq $56, %rsp 6032; AVX-NEXT: .cfi_def_cfa_offset 64 6033; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6034; AVX-NEXT: callq round 6035; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 6036; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6037; AVX-NEXT: callq round 6038; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 6039; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 6040; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 6041; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6042; AVX-NEXT: vzeroupper 6043; AVX-NEXT: callq round 6044; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 6045; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6046; AVX-NEXT: addq $56, %rsp 6047; AVX-NEXT: .cfi_def_cfa_offset 8 6048; AVX-NEXT: retq 6049entry: 6050 %round = call <3 x double> @llvm.experimental.constrained.round.v3f64( 6051 <3 x double> <double 1.1, double 1.9, double 1.5>, 6052 metadata !"fpexcept.strict") #0 6053 ret <3 x double> %round 6054} 6055 6056define <1 x float> @constrained_vector_trunc_v1f32() #0 { 6057; CHECK-LABEL: constrained_vector_trunc_v1f32: 6058; CHECK: # %bb.0: # %entry 6059; CHECK-NEXT: pushq %rax 6060; CHECK-NEXT: .cfi_def_cfa_offset 16 6061; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6062; CHECK-NEXT: callq truncf 6063; CHECK-NEXT: popq %rax 6064; CHECK-NEXT: .cfi_def_cfa_offset 8 6065; CHECK-NEXT: retq 6066; 6067; AVX-LABEL: constrained_vector_trunc_v1f32: 6068; AVX: # %bb.0: # %entry 6069; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6070; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 6071; AVX-NEXT: retq 6072entry: 6073 %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32( 6074 <1 x float> <float 1.5>, 6075 metadata !"fpexcept.strict") #0 6076 ret <1 x float> %trunc 6077} 6078 6079define <2 x double> @constrained_vector_trunc_v2f64() #0 { 6080; CHECK-LABEL: constrained_vector_trunc_v2f64: 6081; CHECK: # %bb.0: # %entry 6082; CHECK-NEXT: subq $24, %rsp 6083; CHECK-NEXT: .cfi_def_cfa_offset 32 6084; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 6085; CHECK-NEXT: callq trunc 6086; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 6087; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 6088; CHECK-NEXT: callq trunc 6089; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 6090; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 6091; CHECK-NEXT: addq $24, %rsp 6092; CHECK-NEXT: .cfi_def_cfa_offset 8 6093; CHECK-NEXT: retq 6094; 6095; AVX-LABEL: constrained_vector_trunc_v2f64: 6096; AVX: # %bb.0: # %entry 6097; AVX-NEXT: vroundpd $11, {{.*}}(%rip), %xmm0 6098; AVX-NEXT: retq 6099entry: 6100 %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( 6101 <2 x double> <double 1.1, double 1.9>, 6102 metadata !"fpexcept.strict") #0 6103 ret <2 x double> %trunc 6104} 6105 6106define <3 x float> @constrained_vector_trunc_v3f32() #0 { 6107; CHECK-LABEL: constrained_vector_trunc_v3f32: 6108; CHECK: # %bb.0: # %entry 6109; CHECK-NEXT: subq $40, %rsp 6110; CHECK-NEXT: .cfi_def_cfa_offset 48 6111; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6112; CHECK-NEXT: callq truncf 6113; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6114; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6115; CHECK-NEXT: callq truncf 6116; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 6117; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6118; CHECK-NEXT: callq truncf 6119; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 6120; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6121; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 6122; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 6123; CHECK-NEXT: movaps %xmm1, %xmm0 6124; CHECK-NEXT: addq $40, %rsp 6125; CHECK-NEXT: .cfi_def_cfa_offset 8 6126; CHECK-NEXT: retq 6127; 6128; AVX-LABEL: constrained_vector_trunc_v3f32: 6129; AVX: # %bb.0: # %entry 6130; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6131; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 6132; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 6133; AVX-NEXT: vroundss $11, %xmm1, %xmm1, %xmm1 6134; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 6135; AVX-NEXT: vroundss $11, %xmm2, %xmm2, %xmm2 6136; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 6137; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 6138; AVX-NEXT: retq 6139entry: 6140 %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32( 6141 <3 x float> <float 1.5, float 2.5, float 3.5>, 6142 metadata !"fpexcept.strict") #0 6143 ret <3 x float> %trunc 6144} 6145 6146define <3 x double> @constrained_vector_trunc_v3f64() #0 { 6147; CHECK-LABEL: constrained_vector_trunc_v3f64: 6148; CHECK: # %bb.0: # %entry 6149; CHECK-NEXT: subq $24, %rsp 6150; CHECK-NEXT: .cfi_def_cfa_offset 32 6151; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 6152; CHECK-NEXT: callq trunc 6153; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 6154; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 6155; CHECK-NEXT: callq trunc 6156; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 6157; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 6158; CHECK-NEXT: callq trunc 6159; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 6160; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 6161; CHECK-NEXT: wait 6162; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 6163; CHECK-NEXT: # xmm0 = mem[0],zero 6164; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 6165; CHECK-NEXT: # xmm1 = mem[0],zero 6166; CHECK-NEXT: addq $24, %rsp 6167; CHECK-NEXT: .cfi_def_cfa_offset 8 6168; CHECK-NEXT: retq 6169; 6170; AVX-LABEL: constrained_vector_trunc_v3f64: 6171; AVX: # %bb.0: # %entry 6172; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6173; AVX-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 6174; AVX-NEXT: vroundpd $11, {{.*}}(%rip), %xmm1 6175; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6176; AVX-NEXT: retq 6177entry: 6178 %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64( 6179 <3 x double> <double 1.1, double 1.9, double 1.5>, 6180 metadata !"fpexcept.strict") #0 6181 ret <3 x double> %trunc 6182} 6183 6184define <1 x double> @constrained_vector_sitofp_v1f64_v1i32(<1 x i32> %x) #0 { 6185; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i32: 6186; CHECK: # %bb.0: # %entry 6187; CHECK-NEXT: cvtsi2sd %edi, %xmm0 6188; CHECK-NEXT: retq 6189; 6190; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i32: 6191; AVX: # %bb.0: # %entry 6192; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 6193; AVX-NEXT: retq 6194entry: 6195 %result = call <1 x double> 6196 @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32> %x, 6197 metadata !"round.dynamic", 6198 metadata !"fpexcept.strict") #0 6199 ret <1 x double> %result 6200} 6201 6202define <1 x float> @constrained_vector_sitofp_v1f32_v1i32(<1 x i32> %x) #0 { 6203; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i32: 6204; CHECK: # %bb.0: # %entry 6205; CHECK-NEXT: cvtsi2ss %edi, %xmm0 6206; CHECK-NEXT: retq 6207; 6208; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i32: 6209; AVX: # %bb.0: # %entry 6210; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 6211; AVX-NEXT: retq 6212entry: 6213 %result = call <1 x float> 6214 @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32> %x, 6215 metadata !"round.dynamic", 6216 metadata !"fpexcept.strict") #0 6217 ret <1 x float> %result 6218} 6219 6220define <1 x double> @constrained_vector_sitofp_v1f64_v1i64(<1 x i64> %x) #0 { 6221; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i64: 6222; CHECK: # %bb.0: # %entry 6223; CHECK-NEXT: cvtsi2sd %rdi, %xmm0 6224; CHECK-NEXT: retq 6225; 6226; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i64: 6227; AVX: # %bb.0: # %entry 6228; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 6229; AVX-NEXT: retq 6230entry: 6231 %result = call <1 x double> 6232 @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> %x, 6233 metadata !"round.dynamic", 6234 metadata !"fpexcept.strict") #0 6235 ret <1 x double> %result 6236} 6237 6238define <1 x float> @constrained_vector_sitofp_v1f32_v1i64(<1 x i64> %x) #0 { 6239; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i64: 6240; CHECK: # %bb.0: # %entry 6241; CHECK-NEXT: cvtsi2ss %rdi, %xmm0 6242; CHECK-NEXT: retq 6243; 6244; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i64: 6245; AVX: # %bb.0: # %entry 6246; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 6247; AVX-NEXT: retq 6248entry: 6249 %result = call <1 x float> 6250 @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64> %x, 6251 metadata !"round.dynamic", 6252 metadata !"fpexcept.strict") #0 6253 ret <1 x float> %result 6254} 6255 6256define <2 x double> @constrained_vector_sitofp_v2f64_v2i32(<2 x i32> %x) #0 { 6257; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i32: 6258; CHECK: # %bb.0: # %entry 6259; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0 6260; CHECK-NEXT: retq 6261; 6262; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i32: 6263; AVX: # %bb.0: # %entry 6264; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 6265; AVX-NEXT: retq 6266entry: 6267 %result = call <2 x double> 6268 @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x, 6269 metadata !"round.dynamic", 6270 metadata !"fpexcept.strict") #0 6271 ret <2 x double> %result 6272} 6273 6274define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 { 6275; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i32: 6276; CHECK: # %bb.0: # %entry 6277; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 6278; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0 6279; CHECK-NEXT: retq 6280; 6281; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i32: 6282; AVX: # %bb.0: # %entry 6283; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 6284; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 6285; AVX-NEXT: retq 6286entry: 6287 %result = call <2 x float> 6288 @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x, 6289 metadata !"round.dynamic", 6290 metadata !"fpexcept.strict") #0 6291 ret <2 x float> %result 6292} 6293 6294define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 { 6295; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i64: 6296; CHECK: # %bb.0: # %entry 6297; CHECK-NEXT: movq %xmm0, %rax 6298; CHECK-NEXT: cvtsi2sd %rax, %xmm1 6299; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6300; CHECK-NEXT: movq %xmm0, %rax 6301; CHECK-NEXT: xorps %xmm0, %xmm0 6302; CHECK-NEXT: cvtsi2sd %rax, %xmm0 6303; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 6304; CHECK-NEXT: movapd %xmm1, %xmm0 6305; CHECK-NEXT: retq 6306; 6307; AVX1-LABEL: constrained_vector_sitofp_v2f64_v2i64: 6308; AVX1: # %bb.0: # %entry 6309; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6310; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 6311; AVX1-NEXT: vmovq %xmm0, %rax 6312; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 6313; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 6314; AVX1-NEXT: retq 6315; 6316; AVX512F-LABEL: constrained_vector_sitofp_v2f64_v2i64: 6317; AVX512F: # %bb.0: # %entry 6318; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 6319; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 6320; AVX512F-NEXT: vmovq %xmm0, %rax 6321; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 6322; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 6323; AVX512F-NEXT: retq 6324; 6325; AVX512DQ-LABEL: constrained_vector_sitofp_v2f64_v2i64: 6326; AVX512DQ: # %bb.0: # %entry 6327; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 6328; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 6329; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 6330; AVX512DQ-NEXT: vzeroupper 6331; AVX512DQ-NEXT: retq 6332entry: 6333 %result = call <2 x double> 6334 @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x, 6335 metadata !"round.dynamic", 6336 metadata !"fpexcept.strict") #0 6337 ret <2 x double> %result 6338} 6339 6340define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 { 6341; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i64: 6342; CHECK: # %bb.0: # %entry 6343; CHECK-NEXT: movq %xmm0, %rax 6344; CHECK-NEXT: cvtsi2ss %rax, %xmm1 6345; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6346; CHECK-NEXT: movq %xmm0, %rax 6347; CHECK-NEXT: xorps %xmm0, %xmm0 6348; CHECK-NEXT: cvtsi2ss %rax, %xmm0 6349; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6350; CHECK-NEXT: movaps %xmm1, %xmm0 6351; CHECK-NEXT: retq 6352; 6353; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i64: 6354; AVX: # %bb.0: # %entry 6355; AVX-NEXT: vpextrq $1, %xmm0, %rax 6356; AVX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 6357; AVX-NEXT: vmovq %xmm0, %rax 6358; AVX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 6359; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 6360; AVX-NEXT: retq 6361entry: 6362 %result = call <2 x float> 6363 @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64> %x, 6364 metadata !"round.dynamic", 6365 metadata !"fpexcept.strict") #0 6366 ret <2 x float> %result 6367} 6368 6369define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 { 6370; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i32: 6371; CHECK: # %bb.0: # %entry 6372; CHECK-NEXT: movd %xmm0, %eax 6373; CHECK-NEXT: cvtsi2sd %eax, %xmm2 6374; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 6375; CHECK-NEXT: movd %xmm1, %eax 6376; CHECK-NEXT: xorps %xmm1, %xmm1 6377; CHECK-NEXT: cvtsi2sd %eax, %xmm1 6378; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6379; CHECK-NEXT: movd %xmm0, %eax 6380; CHECK-NEXT: xorps %xmm0, %xmm0 6381; CHECK-NEXT: cvtsi2sd %eax, %xmm0 6382; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) 6383; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 6384; CHECK-NEXT: wait 6385; CHECK-NEXT: movapd %xmm2, %xmm0 6386; CHECK-NEXT: retq 6387; 6388; AVX-LABEL: constrained_vector_sitofp_v3f64_v3i32: 6389; AVX: # %bb.0: # %entry 6390; AVX-NEXT: vextractps $1, %xmm0, %eax 6391; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm1 6392; AVX-NEXT: vmovd %xmm0, %eax 6393; AVX-NEXT: vcvtsi2sd %eax, %xmm2, %xmm2 6394; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 6395; AVX-NEXT: vpextrd $2, %xmm0, %eax 6396; AVX-NEXT: vcvtsi2sd %eax, %xmm3, %xmm0 6397; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6398; AVX-NEXT: retq 6399entry: 6400 %result = call <3 x double> 6401 @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32> %x, 6402 metadata !"round.dynamic", 6403 metadata !"fpexcept.strict") #0 6404 ret <3 x double> %result 6405} 6406 6407define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 { 6408; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i32: 6409; CHECK: # %bb.0: # %entry 6410; CHECK-NEXT: movd %xmm0, %eax 6411; CHECK-NEXT: cvtsi2ss %eax, %xmm1 6412; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 6413; CHECK-NEXT: movd %xmm2, %eax 6414; CHECK-NEXT: xorps %xmm2, %xmm2 6415; CHECK-NEXT: cvtsi2ss %eax, %xmm2 6416; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 6417; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6418; CHECK-NEXT: movd %xmm0, %eax 6419; CHECK-NEXT: xorps %xmm0, %xmm0 6420; CHECK-NEXT: cvtsi2ss %eax, %xmm0 6421; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 6422; CHECK-NEXT: movaps %xmm1, %xmm0 6423; CHECK-NEXT: retq 6424; 6425; AVX-LABEL: constrained_vector_sitofp_v3f32_v3i32: 6426; AVX: # %bb.0: # %entry 6427; AVX-NEXT: vextractps $1, %xmm0, %eax 6428; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1 6429; AVX-NEXT: vmovd %xmm0, %eax 6430; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm2 6431; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 6432; AVX-NEXT: vpextrd $2, %xmm0, %eax 6433; AVX-NEXT: vcvtsi2ss %eax, %xmm3, %xmm0 6434; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 6435; AVX-NEXT: retq 6436entry: 6437 %result = call <3 x float> 6438 @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32> %x, 6439 metadata !"round.dynamic", 6440 metadata !"fpexcept.strict") #0 6441 ret <3 x float> %result 6442} 6443 6444define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 { 6445; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i64: 6446; CHECK: # %bb.0: # %entry 6447; CHECK-NEXT: cvtsi2sd %rsi, %xmm1 6448; CHECK-NEXT: cvtsi2sd %rdi, %xmm0 6449; CHECK-NEXT: cvtsi2sd %rdx, %xmm2 6450; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) 6451; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 6452; CHECK-NEXT: wait 6453; CHECK-NEXT: retq 6454; 6455; AVX1-LABEL: constrained_vector_sitofp_v3f64_v3i64: 6456; AVX1: # %bb.0: # %entry 6457; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6458; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 6459; AVX1-NEXT: vmovq %xmm0, %rax 6460; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 6461; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 6462; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 6463; AVX1-NEXT: vmovq %xmm0, %rax 6464; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 6465; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6466; AVX1-NEXT: retq 6467; 6468; AVX512-LABEL: constrained_vector_sitofp_v3f64_v3i64: 6469; AVX512: # %bb.0: # %entry 6470; AVX512-NEXT: vpextrq $1, %xmm0, %rax 6471; AVX512-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 6472; AVX512-NEXT: vmovq %xmm0, %rax 6473; AVX512-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 6474; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 6475; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 6476; AVX512-NEXT: vmovq %xmm0, %rax 6477; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 6478; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6479; AVX512-NEXT: retq 6480entry: 6481 %result = call <3 x double> 6482 @llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64> %x, 6483 metadata !"round.dynamic", 6484 metadata !"fpexcept.strict") #0 6485 ret <3 x double> %result 6486} 6487 6488define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 { 6489; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i64: 6490; CHECK: # %bb.0: # %entry 6491; CHECK-NEXT: cvtsi2ss %rsi, %xmm1 6492; CHECK-NEXT: cvtsi2ss %rdi, %xmm0 6493; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6494; CHECK-NEXT: xorps %xmm1, %xmm1 6495; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 6496; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 6497; CHECK-NEXT: retq 6498; 6499; AVX1-LABEL: constrained_vector_sitofp_v3f32_v3i64: 6500; AVX1: # %bb.0: # %entry 6501; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6502; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 6503; AVX1-NEXT: vmovq %xmm0, %rax 6504; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 6505; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 6506; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 6507; AVX1-NEXT: vmovq %xmm0, %rax 6508; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 6509; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 6510; AVX1-NEXT: vzeroupper 6511; AVX1-NEXT: retq 6512; 6513; AVX512-LABEL: constrained_vector_sitofp_v3f32_v3i64: 6514; AVX512: # %bb.0: # %entry 6515; AVX512-NEXT: vpextrq $1, %xmm0, %rax 6516; AVX512-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 6517; AVX512-NEXT: vmovq %xmm0, %rax 6518; AVX512-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 6519; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 6520; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 6521; AVX512-NEXT: vmovq %xmm0, %rax 6522; AVX512-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 6523; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 6524; AVX512-NEXT: vzeroupper 6525; AVX512-NEXT: retq 6526entry: 6527 %result = call <3 x float> 6528 @llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64> %x, 6529 metadata !"round.dynamic", 6530 metadata !"fpexcept.strict") #0 6531 ret <3 x float> %result 6532} 6533 6534define <4 x double> @constrained_vector_sitofp_v4f64_v4i32(<4 x i32> %x) #0 { 6535; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i32: 6536; CHECK: # %bb.0: # %entry 6537; CHECK-NEXT: cvtdq2pd %xmm0, %xmm2 6538; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6539; CHECK-NEXT: cvtdq2pd %xmm0, %xmm1 6540; CHECK-NEXT: movaps %xmm2, %xmm0 6541; CHECK-NEXT: retq 6542; 6543; AVX-LABEL: constrained_vector_sitofp_v4f64_v4i32: 6544; AVX: # %bb.0: # %entry 6545; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 6546; AVX-NEXT: retq 6547entry: 6548 %result = call <4 x double> 6549 @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %x, 6550 metadata !"round.dynamic", 6551 metadata !"fpexcept.strict") #0 6552 ret <4 x double> %result 6553} 6554 6555define <4 x float> @constrained_vector_sitofp_v4f32_v4i32(<4 x i32> %x) #0 { 6556; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i32: 6557; CHECK: # %bb.0: # %entry 6558; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0 6559; CHECK-NEXT: retq 6560; 6561; AVX-LABEL: constrained_vector_sitofp_v4f32_v4i32: 6562; AVX: # %bb.0: # %entry 6563; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 6564; AVX-NEXT: retq 6565entry: 6566 %result = call <4 x float> 6567 @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x, 6568 metadata !"round.dynamic", 6569 metadata !"fpexcept.strict") #0 6570 ret <4 x float> %result 6571} 6572 6573define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 { 6574; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i64: 6575; CHECK: # %bb.0: # %entry 6576; CHECK-NEXT: movq %xmm0, %rax 6577; CHECK-NEXT: cvtsi2sd %rax, %xmm2 6578; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6579; CHECK-NEXT: movq %xmm0, %rax 6580; CHECK-NEXT: xorps %xmm0, %xmm0 6581; CHECK-NEXT: cvtsi2sd %rax, %xmm0 6582; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 6583; CHECK-NEXT: movq %xmm1, %rax 6584; CHECK-NEXT: cvtsi2sd %rax, %xmm3 6585; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 6586; CHECK-NEXT: movq %xmm0, %rax 6587; CHECK-NEXT: xorps %xmm0, %xmm0 6588; CHECK-NEXT: cvtsi2sd %rax, %xmm0 6589; CHECK-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0] 6590; CHECK-NEXT: movapd %xmm2, %xmm0 6591; CHECK-NEXT: movapd %xmm3, %xmm1 6592; CHECK-NEXT: retq 6593; 6594; AVX1-LABEL: constrained_vector_sitofp_v4f64_v4i64: 6595; AVX1: # %bb.0: # %entry 6596; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 6597; AVX1-NEXT: vpextrq $1, %xmm1, %rax 6598; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 6599; AVX1-NEXT: vmovq %xmm1, %rax 6600; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 6601; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 6602; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6603; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 6604; AVX1-NEXT: vmovq %xmm0, %rax 6605; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 6606; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 6607; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 6608; AVX1-NEXT: retq 6609; 6610; AVX512F-LABEL: constrained_vector_sitofp_v4f64_v4i64: 6611; AVX512F: # %bb.0: # %entry 6612; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 6613; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 6614; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 6615; AVX512F-NEXT: vmovq %xmm1, %rax 6616; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 6617; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 6618; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 6619; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 6620; AVX512F-NEXT: vmovq %xmm0, %rax 6621; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 6622; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 6623; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 6624; AVX512F-NEXT: retq 6625; 6626; AVX512DQ-LABEL: constrained_vector_sitofp_v4f64_v4i64: 6627; AVX512DQ: # %bb.0: # %entry 6628; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 6629; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 6630; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 6631; AVX512DQ-NEXT: retq 6632entry: 6633 %result = call <4 x double> 6634 @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x, 6635 metadata !"round.dynamic", 6636 metadata !"fpexcept.strict") #0 6637 ret <4 x double> %result 6638} 6639 6640define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 { 6641; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i64: 6642; CHECK: # %bb.0: # %entry 6643; CHECK-NEXT: movq %xmm1, %rax 6644; CHECK-NEXT: cvtsi2ss %rax, %xmm2 6645; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 6646; CHECK-NEXT: movq %xmm1, %rax 6647; CHECK-NEXT: xorps %xmm1, %xmm1 6648; CHECK-NEXT: cvtsi2ss %rax, %xmm1 6649; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 6650; CHECK-NEXT: movq %xmm0, %rax 6651; CHECK-NEXT: xorps %xmm1, %xmm1 6652; CHECK-NEXT: cvtsi2ss %rax, %xmm1 6653; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6654; CHECK-NEXT: movq %xmm0, %rax 6655; CHECK-NEXT: xorps %xmm0, %xmm0 6656; CHECK-NEXT: cvtsi2ss %rax, %xmm0 6657; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6658; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 6659; CHECK-NEXT: movaps %xmm1, %xmm0 6660; CHECK-NEXT: retq 6661; 6662; AVX1-LABEL: constrained_vector_sitofp_v4f32_v4i64: 6663; AVX1: # %bb.0: # %entry 6664; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6665; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 6666; AVX1-NEXT: vmovq %xmm0, %rax 6667; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 6668; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 6669; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 6670; AVX1-NEXT: vmovq %xmm0, %rax 6671; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 6672; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 6673; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6674; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 6675; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 6676; AVX1-NEXT: vzeroupper 6677; AVX1-NEXT: retq 6678; 6679; AVX512F-LABEL: constrained_vector_sitofp_v4f32_v4i64: 6680; AVX512F: # %bb.0: # %entry 6681; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 6682; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 6683; AVX512F-NEXT: vmovq %xmm0, %rax 6684; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 6685; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 6686; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 6687; AVX512F-NEXT: vmovq %xmm0, %rax 6688; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 6689; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 6690; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 6691; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 6692; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 6693; AVX512F-NEXT: vzeroupper 6694; AVX512F-NEXT: retq 6695; 6696; AVX512DQ-LABEL: constrained_vector_sitofp_v4f32_v4i64: 6697; AVX512DQ: # %bb.0: # %entry 6698; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 6699; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 6700; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 6701; AVX512DQ-NEXT: vzeroupper 6702; AVX512DQ-NEXT: retq 6703entry: 6704 %result = call <4 x float> 6705 @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x, 6706 metadata !"round.dynamic", 6707 metadata !"fpexcept.strict") #0 6708 ret <4 x float> %result 6709} 6710 6711define <1 x double> @constrained_vector_uitofp_v1f64_v1i32(<1 x i32> %x) #0 { 6712; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i32: 6713; CHECK: # %bb.0: # %entry 6714; CHECK-NEXT: movl %edi, %eax 6715; CHECK-NEXT: cvtsi2sd %rax, %xmm0 6716; CHECK-NEXT: retq 6717; 6718; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i32: 6719; AVX1: # %bb.0: # %entry 6720; AVX1-NEXT: movl %edi, %eax 6721; AVX1-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0 6722; AVX1-NEXT: retq 6723; 6724; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i32: 6725; AVX512: # %bb.0: # %entry 6726; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 6727; AVX512-NEXT: retq 6728entry: 6729 %result = call <1 x double> 6730 @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32> %x, 6731 metadata !"round.dynamic", 6732 metadata !"fpexcept.strict") #0 6733 ret <1 x double> %result 6734} 6735 6736define <1 x float> @constrained_vector_uitofp_v1f32_v1i32(<1 x i32> %x) #0 { 6737; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i32: 6738; CHECK: # %bb.0: # %entry 6739; CHECK-NEXT: movl %edi, %eax 6740; CHECK-NEXT: cvtsi2ss %rax, %xmm0 6741; CHECK-NEXT: retq 6742; 6743; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i32: 6744; AVX1: # %bb.0: # %entry 6745; AVX1-NEXT: movl %edi, %eax 6746; AVX1-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 6747; AVX1-NEXT: retq 6748; 6749; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i32: 6750; AVX512: # %bb.0: # %entry 6751; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 6752; AVX512-NEXT: retq 6753entry: 6754 %result = call <1 x float> 6755 @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32> %x, 6756 metadata !"round.dynamic", 6757 metadata !"fpexcept.strict") #0 6758 ret <1 x float> %result 6759} 6760 6761define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 { 6762; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i64: 6763; CHECK: # %bb.0: # %entry 6764; CHECK-NEXT: movq %rdi, %rax 6765; CHECK-NEXT: shrq %rax 6766; CHECK-NEXT: movl %edi, %ecx 6767; CHECK-NEXT: andl $1, %ecx 6768; CHECK-NEXT: orq %rax, %rcx 6769; CHECK-NEXT: testq %rdi, %rdi 6770; CHECK-NEXT: cmovnsq %rdi, %rcx 6771; CHECK-NEXT: cvtsi2sd %rcx, %xmm0 6772; CHECK-NEXT: jns .LBB169_2 6773; CHECK-NEXT: # %bb.1: 6774; CHECK-NEXT: addsd %xmm0, %xmm0 6775; CHECK-NEXT: .LBB169_2: # %entry 6776; CHECK-NEXT: retq 6777; 6778; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i64: 6779; AVX1: # %bb.0: # %entry 6780; AVX1-NEXT: movq %rdi, %rax 6781; AVX1-NEXT: shrq %rax 6782; AVX1-NEXT: movl %edi, %ecx 6783; AVX1-NEXT: andl $1, %ecx 6784; AVX1-NEXT: orq %rax, %rcx 6785; AVX1-NEXT: testq %rdi, %rdi 6786; AVX1-NEXT: cmovnsq %rdi, %rcx 6787; AVX1-NEXT: vcvtsi2sd %rcx, %xmm0, %xmm0 6788; AVX1-NEXT: jns .LBB169_2 6789; AVX1-NEXT: # %bb.1: 6790; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 6791; AVX1-NEXT: .LBB169_2: # %entry 6792; AVX1-NEXT: retq 6793; 6794; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i64: 6795; AVX512: # %bb.0: # %entry 6796; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0 6797; AVX512-NEXT: retq 6798entry: 6799 %result = call <1 x double> 6800 @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> %x, 6801 metadata !"round.dynamic", 6802 metadata !"fpexcept.strict") #0 6803 ret <1 x double> %result 6804} 6805 6806define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 { 6807; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i64: 6808; CHECK: # %bb.0: # %entry 6809; CHECK-NEXT: movq %rdi, %rax 6810; CHECK-NEXT: shrq %rax 6811; CHECK-NEXT: movl %edi, %ecx 6812; CHECK-NEXT: andl $1, %ecx 6813; CHECK-NEXT: orq %rax, %rcx 6814; CHECK-NEXT: testq %rdi, %rdi 6815; CHECK-NEXT: cmovnsq %rdi, %rcx 6816; CHECK-NEXT: cvtsi2ss %rcx, %xmm0 6817; CHECK-NEXT: jns .LBB170_2 6818; CHECK-NEXT: # %bb.1: 6819; CHECK-NEXT: addss %xmm0, %xmm0 6820; CHECK-NEXT: .LBB170_2: # %entry 6821; CHECK-NEXT: retq 6822; 6823; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64: 6824; AVX1: # %bb.0: # %entry 6825; AVX1-NEXT: movq %rdi, %rax 6826; AVX1-NEXT: shrq %rax 6827; AVX1-NEXT: movl %edi, %ecx 6828; AVX1-NEXT: andl $1, %ecx 6829; AVX1-NEXT: orq %rax, %rcx 6830; AVX1-NEXT: testq %rdi, %rdi 6831; AVX1-NEXT: cmovnsq %rdi, %rcx 6832; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0 6833; AVX1-NEXT: jns .LBB170_2 6834; AVX1-NEXT: # %bb.1: 6835; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 6836; AVX1-NEXT: .LBB170_2: # %entry 6837; AVX1-NEXT: retq 6838; 6839; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64: 6840; AVX512: # %bb.0: # %entry 6841; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0 6842; AVX512-NEXT: retq 6843entry: 6844 %result = call <1 x float> 6845 @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64> %x, 6846 metadata !"round.dynamic", 6847 metadata !"fpexcept.strict") #0 6848 ret <1 x float> %result 6849} 6850 6851define <2 x double> @constrained_vector_uitofp_v2f64_v2i32(<2 x i32> %x) #0 { 6852; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i32: 6853; CHECK: # %bb.0: # %entry 6854; CHECK-NEXT: xorpd %xmm1, %xmm1 6855; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6856; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 6857; CHECK-NEXT: orpd %xmm1, %xmm0 6858; CHECK-NEXT: subpd %xmm1, %xmm0 6859; CHECK-NEXT: retq 6860; 6861; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i32: 6862; AVX1: # %bb.0: # %entry 6863; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6864; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 6865; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 6866; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 6867; AVX1-NEXT: retq 6868; 6869; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i32: 6870; AVX512: # %bb.0: # %entry 6871; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 6872; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 6873; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 6874; AVX512-NEXT: vzeroupper 6875; AVX512-NEXT: retq 6876entry: 6877 %result = call <2 x double> 6878 @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x, 6879 metadata !"round.dynamic", 6880 metadata !"fpexcept.strict") #0 6881 ret <2 x double> %result 6882} 6883 6884define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 { 6885; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i32: 6886; CHECK: # %bb.0: # %entry 6887; CHECK-NEXT: xorpd %xmm1, %xmm1 6888; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6889; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 6890; CHECK-NEXT: orpd %xmm1, %xmm0 6891; CHECK-NEXT: subpd %xmm1, %xmm0 6892; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0 6893; CHECK-NEXT: retq 6894; 6895; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i32: 6896; AVX1: # %bb.0: # %entry 6897; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 6898; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 6899; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 6900; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 6901; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 6902; AVX1-NEXT: retq 6903; 6904; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i32: 6905; AVX512: # %bb.0: # %entry 6906; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 6907; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 6908; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 6909; AVX512-NEXT: vzeroupper 6910; AVX512-NEXT: retq 6911entry: 6912 %result = call <2 x float> 6913 @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x, 6914 metadata !"round.dynamic", 6915 metadata !"fpexcept.strict") #0 6916 ret <2 x float> %result 6917} 6918 6919define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 { 6920; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i64: 6921; CHECK: # %bb.0: # %entry 6922; CHECK-NEXT: movdqa %xmm0, %xmm1 6923; CHECK-NEXT: movq %xmm0, %rax 6924; CHECK-NEXT: movq %rax, %rcx 6925; CHECK-NEXT: shrq %rcx 6926; CHECK-NEXT: movl %eax, %edx 6927; CHECK-NEXT: andl $1, %edx 6928; CHECK-NEXT: orq %rcx, %rdx 6929; CHECK-NEXT: testq %rax, %rax 6930; CHECK-NEXT: cmovnsq %rax, %rdx 6931; CHECK-NEXT: xorps %xmm0, %xmm0 6932; CHECK-NEXT: cvtsi2sd %rdx, %xmm0 6933; CHECK-NEXT: jns .LBB173_2 6934; CHECK-NEXT: # %bb.1: 6935; CHECK-NEXT: addsd %xmm0, %xmm0 6936; CHECK-NEXT: .LBB173_2: # %entry 6937; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 6938; CHECK-NEXT: movq %xmm1, %rax 6939; CHECK-NEXT: movq %rax, %rcx 6940; CHECK-NEXT: shrq %rcx 6941; CHECK-NEXT: movl %eax, %edx 6942; CHECK-NEXT: andl $1, %edx 6943; CHECK-NEXT: orq %rcx, %rdx 6944; CHECK-NEXT: testq %rax, %rax 6945; CHECK-NEXT: cmovnsq %rax, %rdx 6946; CHECK-NEXT: xorps %xmm1, %xmm1 6947; CHECK-NEXT: cvtsi2sd %rdx, %xmm1 6948; CHECK-NEXT: jns .LBB173_4 6949; CHECK-NEXT: # %bb.3: 6950; CHECK-NEXT: addsd %xmm1, %xmm1 6951; CHECK-NEXT: .LBB173_4: # %entry 6952; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 6953; CHECK-NEXT: retq 6954; 6955; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i64: 6956; AVX1: # %bb.0: # %entry 6957; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6958; AVX1-NEXT: movq %rax, %rcx 6959; AVX1-NEXT: shrq %rcx 6960; AVX1-NEXT: movl %eax, %edx 6961; AVX1-NEXT: andl $1, %edx 6962; AVX1-NEXT: orq %rcx, %rdx 6963; AVX1-NEXT: testq %rax, %rax 6964; AVX1-NEXT: cmovnsq %rax, %rdx 6965; AVX1-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1 6966; AVX1-NEXT: jns .LBB173_2 6967; AVX1-NEXT: # %bb.1: 6968; AVX1-NEXT: vaddsd %xmm1, %xmm1, %xmm1 6969; AVX1-NEXT: .LBB173_2: # %entry 6970; AVX1-NEXT: vmovq %xmm0, %rax 6971; AVX1-NEXT: movq %rax, %rcx 6972; AVX1-NEXT: shrq %rcx 6973; AVX1-NEXT: movl %eax, %edx 6974; AVX1-NEXT: andl $1, %edx 6975; AVX1-NEXT: orq %rcx, %rdx 6976; AVX1-NEXT: testq %rax, %rax 6977; AVX1-NEXT: cmovnsq %rax, %rdx 6978; AVX1-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm0 6979; AVX1-NEXT: jns .LBB173_4 6980; AVX1-NEXT: # %bb.3: 6981; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 6982; AVX1-NEXT: .LBB173_4: # %entry 6983; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 6984; AVX1-NEXT: retq 6985; 6986; AVX512F-LABEL: constrained_vector_uitofp_v2f64_v2i64: 6987; AVX512F: # %bb.0: # %entry 6988; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 6989; AVX512F-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1 6990; AVX512F-NEXT: vmovq %xmm0, %rax 6991; AVX512F-NEXT: vcvtusi2sd %rax, %xmm2, %xmm0 6992; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 6993; AVX512F-NEXT: retq 6994; 6995; AVX512DQ-LABEL: constrained_vector_uitofp_v2f64_v2i64: 6996; AVX512DQ: # %bb.0: # %entry 6997; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 6998; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 6999; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 7000; AVX512DQ-NEXT: vzeroupper 7001; AVX512DQ-NEXT: retq 7002entry: 7003 %result = call <2 x double> 7004 @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x, 7005 metadata !"round.dynamic", 7006 metadata !"fpexcept.strict") #0 7007 ret <2 x double> %result 7008} 7009 7010define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 { 7011; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i64: 7012; CHECK: # %bb.0: # %entry 7013; CHECK-NEXT: movdqa %xmm0, %xmm1 7014; CHECK-NEXT: movq %xmm0, %rax 7015; CHECK-NEXT: movq %rax, %rcx 7016; CHECK-NEXT: shrq %rcx 7017; CHECK-NEXT: movl %eax, %edx 7018; CHECK-NEXT: andl $1, %edx 7019; CHECK-NEXT: orq %rcx, %rdx 7020; CHECK-NEXT: testq %rax, %rax 7021; CHECK-NEXT: cmovnsq %rax, %rdx 7022; CHECK-NEXT: xorps %xmm0, %xmm0 7023; CHECK-NEXT: cvtsi2ss %rdx, %xmm0 7024; CHECK-NEXT: jns .LBB174_2 7025; CHECK-NEXT: # %bb.1: 7026; CHECK-NEXT: addss %xmm0, %xmm0 7027; CHECK-NEXT: .LBB174_2: # %entry 7028; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 7029; CHECK-NEXT: movq %xmm1, %rax 7030; CHECK-NEXT: movq %rax, %rcx 7031; CHECK-NEXT: shrq %rcx 7032; CHECK-NEXT: movl %eax, %edx 7033; CHECK-NEXT: andl $1, %edx 7034; CHECK-NEXT: orq %rcx, %rdx 7035; CHECK-NEXT: testq %rax, %rax 7036; CHECK-NEXT: cmovnsq %rax, %rdx 7037; CHECK-NEXT: xorps %xmm1, %xmm1 7038; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 7039; CHECK-NEXT: jns .LBB174_4 7040; CHECK-NEXT: # %bb.3: 7041; CHECK-NEXT: addss %xmm1, %xmm1 7042; CHECK-NEXT: .LBB174_4: # %entry 7043; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 7044; CHECK-NEXT: retq 7045; 7046; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i64: 7047; AVX1: # %bb.0: # %entry 7048; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 7049; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2 7050; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 7051; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 7052; AVX1-NEXT: vpextrq $1, %xmm1, %rax 7053; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 7054; AVX1-NEXT: vmovq %xmm1, %rax 7055; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 7056; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero 7057; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2 7058; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 7059; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 7060; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 7061; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 7062; AVX1-NEXT: retq 7063; 7064; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i64: 7065; AVX512: # %bb.0: # %entry 7066; AVX512-NEXT: vpextrq $1, %xmm0, %rax 7067; AVX512-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 7068; AVX512-NEXT: vmovq %xmm0, %rax 7069; AVX512-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 7070; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 7071; AVX512-NEXT: retq 7072entry: 7073 %result = call <2 x float> 7074 @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64> %x, 7075 metadata !"round.dynamic", 7076 metadata !"fpexcept.strict") #0 7077 ret <2 x float> %result 7078} 7079 7080define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 { 7081; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i32: 7082; CHECK: # %bb.0: # %entry 7083; CHECK-NEXT: movd %xmm0, %eax 7084; CHECK-NEXT: cvtsi2sd %rax, %xmm2 7085; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 7086; CHECK-NEXT: movd %xmm1, %eax 7087; CHECK-NEXT: xorps %xmm1, %xmm1 7088; CHECK-NEXT: cvtsi2sd %rax, %xmm1 7089; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 7090; CHECK-NEXT: movd %xmm0, %eax 7091; CHECK-NEXT: xorps %xmm0, %xmm0 7092; CHECK-NEXT: cvtsi2sd %rax, %xmm0 7093; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) 7094; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 7095; CHECK-NEXT: wait 7096; CHECK-NEXT: movapd %xmm2, %xmm0 7097; CHECK-NEXT: retq 7098; 7099; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i32: 7100; AVX1: # %bb.0: # %entry 7101; AVX1-NEXT: vextractps $1, %xmm0, %eax 7102; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 7103; AVX1-NEXT: vmovd %xmm0, %eax 7104; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 7105; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 7106; AVX1-NEXT: vpextrd $2, %xmm0, %eax 7107; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 7108; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 7109; AVX1-NEXT: retq 7110; 7111; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i32: 7112; AVX512: # %bb.0: # %entry 7113; AVX512-NEXT: vextractps $1, %xmm0, %eax 7114; AVX512-NEXT: vcvtusi2sd %eax, %xmm1, %xmm1 7115; AVX512-NEXT: vmovd %xmm0, %eax 7116; AVX512-NEXT: vcvtusi2sd %eax, %xmm2, %xmm2 7117; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 7118; AVX512-NEXT: vpextrd $2, %xmm0, %eax 7119; AVX512-NEXT: vcvtusi2sd %eax, %xmm3, %xmm0 7120; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 7121; AVX512-NEXT: retq 7122entry: 7123 %result = call <3 x double> 7124 @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32> %x, 7125 metadata !"round.dynamic", 7126 metadata !"fpexcept.strict") #0 7127 ret <3 x double> %result 7128} 7129 7130define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 { 7131; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i32: 7132; CHECK: # %bb.0: # %entry 7133; CHECK-NEXT: movd %xmm0, %eax 7134; CHECK-NEXT: cvtsi2ss %rax, %xmm1 7135; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 7136; CHECK-NEXT: movd %xmm2, %eax 7137; CHECK-NEXT: xorps %xmm2, %xmm2 7138; CHECK-NEXT: cvtsi2ss %rax, %xmm2 7139; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 7140; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 7141; CHECK-NEXT: movd %xmm0, %eax 7142; CHECK-NEXT: xorps %xmm0, %xmm0 7143; CHECK-NEXT: cvtsi2ss %rax, %xmm0 7144; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 7145; CHECK-NEXT: movaps %xmm1, %xmm0 7146; CHECK-NEXT: retq 7147; 7148; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i32: 7149; AVX1: # %bb.0: # %entry 7150; AVX1-NEXT: vextractps $1, %xmm0, %eax 7151; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 7152; AVX1-NEXT: vmovd %xmm0, %eax 7153; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 7154; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 7155; AVX1-NEXT: vpextrd $2, %xmm0, %eax 7156; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 7157; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 7158; AVX1-NEXT: retq 7159; 7160; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i32: 7161; AVX512: # %bb.0: # %entry 7162; AVX512-NEXT: vextractps $1, %xmm0, %eax 7163; AVX512-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1 7164; AVX512-NEXT: vmovd %xmm0, %eax 7165; AVX512-NEXT: vcvtusi2ss %eax, %xmm2, %xmm2 7166; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 7167; AVX512-NEXT: vpextrd $2, %xmm0, %eax 7168; AVX512-NEXT: vcvtusi2ss %eax, %xmm3, %xmm0 7169; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 7170; AVX512-NEXT: retq 7171entry: 7172 %result = call <3 x float> 7173 @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32> %x, 7174 metadata !"round.dynamic", 7175 metadata !"fpexcept.strict") #0 7176 ret <3 x float> %result 7177} 7178 7179define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { 7180; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i64: 7181; CHECK: # %bb.0: # %entry 7182; CHECK-NEXT: movq %rdi, %rax 7183; CHECK-NEXT: shrq %rax 7184; CHECK-NEXT: movl %edi, %ecx 7185; CHECK-NEXT: andl $1, %ecx 7186; CHECK-NEXT: orq %rax, %rcx 7187; CHECK-NEXT: testq %rdi, %rdi 7188; CHECK-NEXT: cmovnsq %rdi, %rcx 7189; CHECK-NEXT: cvtsi2sd %rcx, %xmm0 7190; CHECK-NEXT: jns .LBB177_2 7191; CHECK-NEXT: # %bb.1: 7192; CHECK-NEXT: addsd %xmm0, %xmm0 7193; CHECK-NEXT: .LBB177_2: # %entry 7194; CHECK-NEXT: movq %rsi, %rax 7195; CHECK-NEXT: shrq %rax 7196; CHECK-NEXT: movl %esi, %ecx 7197; CHECK-NEXT: andl $1, %ecx 7198; CHECK-NEXT: orq %rax, %rcx 7199; CHECK-NEXT: testq %rsi, %rsi 7200; CHECK-NEXT: cmovnsq %rsi, %rcx 7201; CHECK-NEXT: cvtsi2sd %rcx, %xmm1 7202; CHECK-NEXT: jns .LBB177_4 7203; CHECK-NEXT: # %bb.3: 7204; CHECK-NEXT: addsd %xmm1, %xmm1 7205; CHECK-NEXT: .LBB177_4: # %entry 7206; CHECK-NEXT: movq %rdx, %rax 7207; CHECK-NEXT: shrq %rax 7208; CHECK-NEXT: movl %edx, %ecx 7209; CHECK-NEXT: andl $1, %ecx 7210; CHECK-NEXT: orq %rax, %rcx 7211; CHECK-NEXT: testq %rdx, %rdx 7212; CHECK-NEXT: cmovnsq %rdx, %rcx 7213; CHECK-NEXT: cvtsi2sd %rcx, %xmm2 7214; CHECK-NEXT: jns .LBB177_6 7215; CHECK-NEXT: # %bb.5: 7216; CHECK-NEXT: addsd %xmm2, %xmm2 7217; CHECK-NEXT: .LBB177_6: # %entry 7218; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) 7219; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 7220; CHECK-NEXT: wait 7221; CHECK-NEXT: retq 7222; 7223; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i64: 7224; AVX1: # %bb.0: # %entry 7225; AVX1-NEXT: vpextrq $1, %xmm0, %rax 7226; AVX1-NEXT: movq %rax, %rcx 7227; AVX1-NEXT: shrq %rcx 7228; AVX1-NEXT: movl %eax, %edx 7229; AVX1-NEXT: andl $1, %edx 7230; AVX1-NEXT: orq %rcx, %rdx 7231; AVX1-NEXT: testq %rax, %rax 7232; AVX1-NEXT: cmovnsq %rax, %rdx 7233; AVX1-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1 7234; AVX1-NEXT: jns .LBB177_2 7235; AVX1-NEXT: # %bb.1: 7236; AVX1-NEXT: vaddsd %xmm1, %xmm1, %xmm1 7237; AVX1-NEXT: .LBB177_2: # %entry 7238; AVX1-NEXT: vmovq %xmm0, %rax 7239; AVX1-NEXT: movq %rax, %rcx 7240; AVX1-NEXT: shrq %rcx 7241; AVX1-NEXT: movl %eax, %edx 7242; AVX1-NEXT: andl $1, %edx 7243; AVX1-NEXT: orq %rcx, %rdx 7244; AVX1-NEXT: testq %rax, %rax 7245; AVX1-NEXT: cmovnsq %rax, %rdx 7246; AVX1-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm2 7247; AVX1-NEXT: jns .LBB177_4 7248; AVX1-NEXT: # %bb.3: 7249; AVX1-NEXT: vaddsd %xmm2, %xmm2, %xmm2 7250; AVX1-NEXT: .LBB177_4: # %entry 7251; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 7252; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 7253; AVX1-NEXT: vmovq %xmm0, %rax 7254; AVX1-NEXT: movq %rax, %rcx 7255; AVX1-NEXT: shrq %rcx 7256; AVX1-NEXT: movl %eax, %edx 7257; AVX1-NEXT: andl $1, %edx 7258; AVX1-NEXT: orq %rcx, %rdx 7259; AVX1-NEXT: testq %rax, %rax 7260; AVX1-NEXT: cmovnsq %rax, %rdx 7261; AVX1-NEXT: vcvtsi2sd %rdx, %xmm3, %xmm0 7262; AVX1-NEXT: jns .LBB177_6 7263; AVX1-NEXT: # %bb.5: 7264; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 7265; AVX1-NEXT: .LBB177_6: # %entry 7266; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 7267; AVX1-NEXT: retq 7268; 7269; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i64: 7270; AVX512: # %bb.0: # %entry 7271; AVX512-NEXT: vpextrq $1, %xmm0, %rax 7272; AVX512-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1 7273; AVX512-NEXT: vmovq %xmm0, %rax 7274; AVX512-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2 7275; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 7276; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 7277; AVX512-NEXT: vmovq %xmm0, %rax 7278; AVX512-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0 7279; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 7280; AVX512-NEXT: retq 7281entry: 7282 %result = call <3 x double> 7283 @llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64> %x, 7284 metadata !"round.dynamic", 7285 metadata !"fpexcept.strict") #0 7286 ret <3 x double> %result 7287} 7288 7289define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { 7290; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i64: 7291; CHECK: # %bb.0: # %entry 7292; CHECK-NEXT: movq %rsi, %rax 7293; CHECK-NEXT: shrq %rax 7294; CHECK-NEXT: movl %esi, %ecx 7295; CHECK-NEXT: andl $1, %ecx 7296; CHECK-NEXT: orq %rax, %rcx 7297; CHECK-NEXT: testq %rsi, %rsi 7298; CHECK-NEXT: cmovnsq %rsi, %rcx 7299; CHECK-NEXT: cvtsi2ss %rcx, %xmm1 7300; CHECK-NEXT: jns .LBB178_2 7301; CHECK-NEXT: # %bb.1: 7302; CHECK-NEXT: addss %xmm1, %xmm1 7303; CHECK-NEXT: .LBB178_2: # %entry 7304; CHECK-NEXT: movq %rdi, %rax 7305; CHECK-NEXT: shrq %rax 7306; CHECK-NEXT: movl %edi, %ecx 7307; CHECK-NEXT: andl $1, %ecx 7308; CHECK-NEXT: orq %rax, %rcx 7309; CHECK-NEXT: testq %rdi, %rdi 7310; CHECK-NEXT: cmovnsq %rdi, %rcx 7311; CHECK-NEXT: cvtsi2ss %rcx, %xmm0 7312; CHECK-NEXT: jns .LBB178_4 7313; CHECK-NEXT: # %bb.3: 7314; CHECK-NEXT: addss %xmm0, %xmm0 7315; CHECK-NEXT: .LBB178_4: # %entry 7316; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 7317; CHECK-NEXT: movq %rdx, %rax 7318; CHECK-NEXT: shrq %rax 7319; CHECK-NEXT: movl %edx, %ecx 7320; CHECK-NEXT: andl $1, %ecx 7321; CHECK-NEXT: orq %rax, %rcx 7322; CHECK-NEXT: testq %rdx, %rdx 7323; CHECK-NEXT: cmovnsq %rdx, %rcx 7324; CHECK-NEXT: xorps %xmm1, %xmm1 7325; CHECK-NEXT: cvtsi2ss %rcx, %xmm1 7326; CHECK-NEXT: jns .LBB178_6 7327; CHECK-NEXT: # %bb.5: 7328; CHECK-NEXT: addss %xmm1, %xmm1 7329; CHECK-NEXT: .LBB178_6: # %entry 7330; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 7331; CHECK-NEXT: retq 7332; 7333; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i64: 7334; AVX1: # %bb.0: # %entry 7335; AVX1-NEXT: vpextrq $1, %xmm0, %rax 7336; AVX1-NEXT: movq %rax, %rcx 7337; AVX1-NEXT: shrq %rcx 7338; AVX1-NEXT: movl %eax, %edx 7339; AVX1-NEXT: andl $1, %edx 7340; AVX1-NEXT: orq %rcx, %rdx 7341; AVX1-NEXT: testq %rax, %rax 7342; AVX1-NEXT: cmovnsq %rax, %rdx 7343; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1 7344; AVX1-NEXT: jns .LBB178_2 7345; AVX1-NEXT: # %bb.1: 7346; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1 7347; AVX1-NEXT: .LBB178_2: # %entry 7348; AVX1-NEXT: vmovq %xmm0, %rax 7349; AVX1-NEXT: movq %rax, %rcx 7350; AVX1-NEXT: shrq %rcx 7351; AVX1-NEXT: movl %eax, %edx 7352; AVX1-NEXT: andl $1, %edx 7353; AVX1-NEXT: orq %rcx, %rdx 7354; AVX1-NEXT: testq %rax, %rax 7355; AVX1-NEXT: cmovnsq %rax, %rdx 7356; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2 7357; AVX1-NEXT: jns .LBB178_4 7358; AVX1-NEXT: # %bb.3: 7359; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 7360; AVX1-NEXT: .LBB178_4: # %entry 7361; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 7362; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 7363; AVX1-NEXT: vmovq %xmm0, %rax 7364; AVX1-NEXT: movq %rax, %rcx 7365; AVX1-NEXT: shrq %rcx 7366; AVX1-NEXT: movl %eax, %edx 7367; AVX1-NEXT: andl $1, %edx 7368; AVX1-NEXT: orq %rcx, %rdx 7369; AVX1-NEXT: testq %rax, %rax 7370; AVX1-NEXT: cmovnsq %rax, %rdx 7371; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0 7372; AVX1-NEXT: jns .LBB178_6 7373; AVX1-NEXT: # %bb.5: 7374; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 7375; AVX1-NEXT: .LBB178_6: # %entry 7376; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 7377; AVX1-NEXT: vzeroupper 7378; AVX1-NEXT: retq 7379; 7380; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i64: 7381; AVX512: # %bb.0: # %entry 7382; AVX512-NEXT: vpextrq $1, %xmm0, %rax 7383; AVX512-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 7384; AVX512-NEXT: vmovq %xmm0, %rax 7385; AVX512-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 7386; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 7387; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 7388; AVX512-NEXT: vmovq %xmm0, %rax 7389; AVX512-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 7390; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 7391; AVX512-NEXT: vzeroupper 7392; AVX512-NEXT: retq 7393entry: 7394 %result = call <3 x float> 7395 @llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64> %x, 7396 metadata !"round.dynamic", 7397 metadata !"fpexcept.strict") #0 7398 ret <3 x float> %result 7399} 7400 7401define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 { 7402; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i32: 7403; CHECK: # %bb.0: # %entry 7404; CHECK-NEXT: xorpd %xmm2, %xmm2 7405; CHECK-NEXT: movapd %xmm0, %xmm1 7406; CHECK-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 7407; CHECK-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15] 7408; CHECK-NEXT: orpd %xmm3, %xmm1 7409; CHECK-NEXT: subpd %xmm3, %xmm1 7410; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 7411; CHECK-NEXT: orpd %xmm3, %xmm0 7412; CHECK-NEXT: subpd %xmm3, %xmm0 7413; CHECK-NEXT: retq 7414; 7415; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i32: 7416; AVX1: # %bb.0: # %entry 7417; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 7418; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7419; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7420; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 7421; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 7422; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 7423; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 7424; AVX1-NEXT: retq 7425; 7426; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32: 7427; AVX512: # %bb.0: # %entry 7428; AVX512-NEXT: vmovaps %xmm0, %xmm0 7429; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 7430; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 7431; AVX512-NEXT: retq 7432entry: 7433 %result = call <4 x double> 7434 @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x, 7435 metadata !"round.dynamic", 7436 metadata !"fpexcept.strict") #0 7437 ret <4 x double> %result 7438} 7439 7440define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 { 7441; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i32: 7442; CHECK: # %bb.0: # %entry 7443; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 7444; CHECK-NEXT: pand %xmm0, %xmm1 7445; CHECK-NEXT: por {{.*}}(%rip), %xmm1 7446; CHECK-NEXT: psrld $16, %xmm0 7447; CHECK-NEXT: por {{.*}}(%rip), %xmm0 7448; CHECK-NEXT: subps {{.*}}(%rip), %xmm0 7449; CHECK-NEXT: addps %xmm1, %xmm0 7450; CHECK-NEXT: retq 7451; 7452; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i32: 7453; AVX1: # %bb.0: # %entry 7454; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 7455; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 7456; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 7457; AVX1-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 7458; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 7459; AVX1-NEXT: retq 7460; 7461; AVX512-LABEL: constrained_vector_uitofp_v4f32_v4i32: 7462; AVX512: # %bb.0: # %entry 7463; AVX512-NEXT: vmovaps %xmm0, %xmm0 7464; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 7465; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 7466; AVX512-NEXT: vzeroupper 7467; AVX512-NEXT: retq 7468entry: 7469 %result = call <4 x float> 7470 @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x, 7471 metadata !"round.dynamic", 7472 metadata !"fpexcept.strict") #0 7473 ret <4 x float> %result 7474} 7475 7476define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 { 7477; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i64: 7478; CHECK: # %bb.0: # %entry 7479; CHECK-NEXT: movdqa %xmm0, %xmm2 7480; CHECK-NEXT: movq %xmm0, %rax 7481; CHECK-NEXT: movq %rax, %rcx 7482; CHECK-NEXT: shrq %rcx 7483; CHECK-NEXT: movl %eax, %edx 7484; CHECK-NEXT: andl $1, %edx 7485; CHECK-NEXT: orq %rcx, %rdx 7486; CHECK-NEXT: testq %rax, %rax 7487; CHECK-NEXT: cmovnsq %rax, %rdx 7488; CHECK-NEXT: xorps %xmm0, %xmm0 7489; CHECK-NEXT: cvtsi2sd %rdx, %xmm0 7490; CHECK-NEXT: jns .LBB181_2 7491; CHECK-NEXT: # %bb.1: 7492; CHECK-NEXT: addsd %xmm0, %xmm0 7493; CHECK-NEXT: .LBB181_2: # %entry 7494; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 7495; CHECK-NEXT: movq %xmm2, %rax 7496; CHECK-NEXT: movq %rax, %rcx 7497; CHECK-NEXT: shrq %rcx 7498; CHECK-NEXT: movl %eax, %edx 7499; CHECK-NEXT: andl $1, %edx 7500; CHECK-NEXT: orq %rcx, %rdx 7501; CHECK-NEXT: testq %rax, %rax 7502; CHECK-NEXT: cmovnsq %rax, %rdx 7503; CHECK-NEXT: cvtsi2sd %rdx, %xmm3 7504; CHECK-NEXT: jns .LBB181_4 7505; CHECK-NEXT: # %bb.3: 7506; CHECK-NEXT: addsd %xmm3, %xmm3 7507; CHECK-NEXT: .LBB181_4: # %entry 7508; CHECK-NEXT: movq %xmm1, %rax 7509; CHECK-NEXT: movq %rax, %rcx 7510; CHECK-NEXT: shrq %rcx 7511; CHECK-NEXT: movl %eax, %edx 7512; CHECK-NEXT: andl $1, %edx 7513; CHECK-NEXT: orq %rcx, %rdx 7514; CHECK-NEXT: testq %rax, %rax 7515; CHECK-NEXT: cmovnsq %rax, %rdx 7516; CHECK-NEXT: xorps %xmm2, %xmm2 7517; CHECK-NEXT: cvtsi2sd %rdx, %xmm2 7518; CHECK-NEXT: jns .LBB181_6 7519; CHECK-NEXT: # %bb.5: 7520; CHECK-NEXT: addsd %xmm2, %xmm2 7521; CHECK-NEXT: .LBB181_6: # %entry 7522; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 7523; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 7524; CHECK-NEXT: movq %xmm1, %rax 7525; CHECK-NEXT: movq %rax, %rcx 7526; CHECK-NEXT: shrq %rcx 7527; CHECK-NEXT: movl %eax, %edx 7528; CHECK-NEXT: andl $1, %edx 7529; CHECK-NEXT: orq %rcx, %rdx 7530; CHECK-NEXT: testq %rax, %rax 7531; CHECK-NEXT: cmovnsq %rax, %rdx 7532; CHECK-NEXT: xorps %xmm1, %xmm1 7533; CHECK-NEXT: cvtsi2sd %rdx, %xmm1 7534; CHECK-NEXT: jns .LBB181_8 7535; CHECK-NEXT: # %bb.7: 7536; CHECK-NEXT: addsd %xmm1, %xmm1 7537; CHECK-NEXT: .LBB181_8: # %entry 7538; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] 7539; CHECK-NEXT: movapd %xmm2, %xmm1 7540; CHECK-NEXT: retq 7541; 7542; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i64: 7543; AVX1: # %bb.0: # %entry 7544; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 7545; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 7546; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7547; AVX1-NEXT: vpextrq $1, %xmm2, %rax 7548; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 7549; AVX1-NEXT: vmovq %xmm2, %rax 7550; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 7551; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 7552; AVX1-NEXT: vpextrq $1, %xmm1, %rax 7553; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 7554; AVX1-NEXT: vmovq %xmm1, %rax 7555; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm1 7556; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] 7557; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 7558; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7559; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2 7560; AVX1-NEXT: vpextrq $1, %xmm2, %rax 7561; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 7562; AVX1-NEXT: vmovq %xmm2, %rax 7563; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 7564; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 7565; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 7566; AVX1-NEXT: vpextrq $1, %xmm0, %rax 7567; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 7568; AVX1-NEXT: vmovq %xmm0, %rax 7569; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0 7570; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 7571; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 7572; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0 7573; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 7574; AVX1-NEXT: retq 7575; 7576; AVX512F-LABEL: constrained_vector_uitofp_v4f64_v4i64: 7577; AVX512F: # %bb.0: # %entry 7578; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 7579; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 7580; AVX512F-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2 7581; AVX512F-NEXT: vmovq %xmm1, %rax 7582; AVX512F-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1 7583; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 7584; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 7585; AVX512F-NEXT: vcvtusi2sd %rax, %xmm3, %xmm2 7586; AVX512F-NEXT: vmovq %xmm0, %rax 7587; AVX512F-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0 7588; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 7589; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 7590; AVX512F-NEXT: retq 7591; 7592; AVX512DQ-LABEL: constrained_vector_uitofp_v4f64_v4i64: 7593; AVX512DQ: # %bb.0: # %entry 7594; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 7595; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 7596; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 7597; AVX512DQ-NEXT: retq 7598entry: 7599 %result = call <4 x double> 7600 @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x, 7601 metadata !"round.dynamic", 7602 metadata !"fpexcept.strict") #0 7603 ret <4 x double> %result 7604} 7605 7606define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 { 7607; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i64: 7608; CHECK: # %bb.0: # %entry 7609; CHECK-NEXT: movq %xmm1, %rax 7610; CHECK-NEXT: movq %rax, %rcx 7611; CHECK-NEXT: shrq %rcx 7612; CHECK-NEXT: movl %eax, %edx 7613; CHECK-NEXT: andl $1, %edx 7614; CHECK-NEXT: orq %rcx, %rdx 7615; CHECK-NEXT: testq %rax, %rax 7616; CHECK-NEXT: cmovnsq %rax, %rdx 7617; CHECK-NEXT: cvtsi2ss %rdx, %xmm2 7618; CHECK-NEXT: jns .LBB182_2 7619; CHECK-NEXT: # %bb.1: 7620; CHECK-NEXT: addss %xmm2, %xmm2 7621; CHECK-NEXT: .LBB182_2: # %entry 7622; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 7623; CHECK-NEXT: movq %xmm1, %rax 7624; CHECK-NEXT: movq %rax, %rcx 7625; CHECK-NEXT: shrq %rcx 7626; CHECK-NEXT: movl %eax, %edx 7627; CHECK-NEXT: andl $1, %edx 7628; CHECK-NEXT: orq %rcx, %rdx 7629; CHECK-NEXT: testq %rax, %rax 7630; CHECK-NEXT: cmovnsq %rax, %rdx 7631; CHECK-NEXT: cvtsi2ss %rdx, %xmm3 7632; CHECK-NEXT: jns .LBB182_4 7633; CHECK-NEXT: # %bb.3: 7634; CHECK-NEXT: addss %xmm3, %xmm3 7635; CHECK-NEXT: .LBB182_4: # %entry 7636; CHECK-NEXT: movq %xmm0, %rax 7637; CHECK-NEXT: movq %rax, %rcx 7638; CHECK-NEXT: shrq %rcx 7639; CHECK-NEXT: movl %eax, %edx 7640; CHECK-NEXT: andl $1, %edx 7641; CHECK-NEXT: orq %rcx, %rdx 7642; CHECK-NEXT: testq %rax, %rax 7643; CHECK-NEXT: cmovnsq %rax, %rdx 7644; CHECK-NEXT: xorps %xmm1, %xmm1 7645; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 7646; CHECK-NEXT: jns .LBB182_6 7647; CHECK-NEXT: # %bb.5: 7648; CHECK-NEXT: addss %xmm1, %xmm1 7649; CHECK-NEXT: .LBB182_6: # %entry 7650; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 7651; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 7652; CHECK-NEXT: movq %xmm0, %rax 7653; CHECK-NEXT: movq %rax, %rcx 7654; CHECK-NEXT: shrq %rcx 7655; CHECK-NEXT: movl %eax, %edx 7656; CHECK-NEXT: andl $1, %edx 7657; CHECK-NEXT: orq %rcx, %rdx 7658; CHECK-NEXT: testq %rax, %rax 7659; CHECK-NEXT: cmovnsq %rax, %rdx 7660; CHECK-NEXT: xorps %xmm0, %xmm0 7661; CHECK-NEXT: cvtsi2ss %rdx, %xmm0 7662; CHECK-NEXT: jns .LBB182_8 7663; CHECK-NEXT: # %bb.7: 7664; CHECK-NEXT: addss %xmm0, %xmm0 7665; CHECK-NEXT: .LBB182_8: # %entry 7666; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7667; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 7668; CHECK-NEXT: movaps %xmm1, %xmm0 7669; CHECK-NEXT: retq 7670; 7671; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i64: 7672; AVX1: # %bb.0: # %entry 7673; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm1 7674; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7675; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm3 7676; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 7677; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm3 7678; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1 7679; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 7680; AVX1-NEXT: vpextrq $1, %xmm1, %rax 7681; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 7682; AVX1-NEXT: vmovq %xmm1, %rax 7683; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4 7684; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 7685; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 7686; AVX1-NEXT: vmovq %xmm1, %rax 7687; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 7688; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 7689; AVX1-NEXT: vpextrq $1, %xmm1, %rax 7690; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 7691; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] 7692; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3 7693; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 7694; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0 7695; AVX1-NEXT: vzeroupper 7696; AVX1-NEXT: retq 7697; 7698; AVX512F-LABEL: constrained_vector_uitofp_v4f32_v4i64: 7699; AVX512F: # %bb.0: # %entry 7700; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 7701; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 7702; AVX512F-NEXT: vmovq %xmm0, %rax 7703; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 7704; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 7705; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 7706; AVX512F-NEXT: vmovq %xmm0, %rax 7707; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2 7708; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 7709; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 7710; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 7711; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 7712; AVX512F-NEXT: vzeroupper 7713; AVX512F-NEXT: retq 7714; 7715; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64: 7716; AVX512DQ: # %bb.0: # %entry 7717; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 7718; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 7719; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 7720; AVX512DQ-NEXT: vzeroupper 7721; AVX512DQ-NEXT: retq 7722entry: 7723 %result = call <4 x float> 7724 @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x, 7725 metadata !"round.dynamic", 7726 metadata !"fpexcept.strict") #0 7727 ret <4 x float> %result 7728} 7729 7730; Simple test to make sure we don't fuse vselect+strict_fadd into a masked operation. 7731define <16 x float> @vpaddd_mask_test(<16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone strictfp { 7732; CHECK-LABEL: vpaddd_mask_test: 7733; CHECK: # %bb.0: 7734; CHECK-NEXT: pxor %xmm10, %xmm10 7735; CHECK-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 7736; CHECK-NEXT: pcmpeqd %xmm10, %xmm8 7737; CHECK-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 7738; CHECK-NEXT: pcmpeqd %xmm10, %xmm9 7739; CHECK-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 7740; CHECK-NEXT: pcmpeqd %xmm10, %xmm11 7741; CHECK-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm10 7742; CHECK-NEXT: addps %xmm3, %xmm7 7743; CHECK-NEXT: addps %xmm2, %xmm6 7744; CHECK-NEXT: addps %xmm1, %xmm5 7745; CHECK-NEXT: addps %xmm0, %xmm4 7746; CHECK-NEXT: andps %xmm10, %xmm0 7747; CHECK-NEXT: andnps %xmm4, %xmm10 7748; CHECK-NEXT: orps %xmm10, %xmm0 7749; CHECK-NEXT: andps %xmm11, %xmm1 7750; CHECK-NEXT: andnps %xmm5, %xmm11 7751; CHECK-NEXT: orps %xmm11, %xmm1 7752; CHECK-NEXT: andps %xmm9, %xmm2 7753; CHECK-NEXT: andnps %xmm6, %xmm9 7754; CHECK-NEXT: orps %xmm9, %xmm2 7755; CHECK-NEXT: andps %xmm8, %xmm3 7756; CHECK-NEXT: andnps %xmm7, %xmm8 7757; CHECK-NEXT: orps %xmm8, %xmm3 7758; CHECK-NEXT: retq 7759; 7760; AVX1-LABEL: vpaddd_mask_test: 7761; AVX1: # %bb.0: 7762; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 7763; AVX1-NEXT: vpxor %xmm7, %xmm7, %xmm7 7764; AVX1-NEXT: vpcmpeqd %xmm7, %xmm6, %xmm6 7765; AVX1-NEXT: vpcmpeqd %xmm7, %xmm5, %xmm5 7766; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 7767; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6 7768; AVX1-NEXT: vpcmpeqd %xmm7, %xmm6, %xmm6 7769; AVX1-NEXT: vpcmpeqd %xmm7, %xmm4, %xmm4 7770; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 7771; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm3 7772; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm2 7773; AVX1-NEXT: vblendvps %ymm4, %ymm0, %ymm2, %ymm0 7774; AVX1-NEXT: vblendvps %ymm5, %ymm1, %ymm3, %ymm1 7775; AVX1-NEXT: retq 7776; 7777; AVX512-LABEL: vpaddd_mask_test: 7778; AVX512: # %bb.0: 7779; AVX512-NEXT: vptestmd %zmm2, %zmm2, %k1 7780; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm1 7781; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1} 7782; AVX512-NEXT: retq 7783 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 7784 %x = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %i, <16 x float> %j, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 7785 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %i 7786 ret <16 x float> %r 7787} 7788declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) 7789 7790attributes #0 = { strictfp } 7791 7792; Single width declarations 7793declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) 7794declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) 7795declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) 7796declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) 7797declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata) 7798declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) 7799declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata) 7800declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata) 7801declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) 7802declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) 7803declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) 7804declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) 7805declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) 7806declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata) 7807declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata) 7808declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata) 7809declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) 7810declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata) 7811declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata) 7812declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float>, metadata) 7813declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata) 7814declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata) 7815declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata) 7816declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float>, metadata) 7817declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata) 7818declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata) 7819declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata) 7820declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) 7821declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) 7822declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata) 7823declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata) 7824declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata) 7825declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata) 7826declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) 7827declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32>, metadata, metadata) 7828declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata) 7829declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64>, metadata, metadata) 7830declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) 7831declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32>, metadata, metadata) 7832declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata) 7833declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64>, metadata, metadata) 7834 7835; Scalar width declarations 7836declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata) 7837declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata) 7838declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata) 7839declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata) 7840declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata) 7841declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata) 7842declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata) 7843declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata) 7844declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) 7845declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) 7846declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) 7847declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) 7848declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) 7849declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata) 7850declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata) 7851declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata) 7852declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata) 7853declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata) 7854declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata) 7855declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32(<1 x float>, metadata) 7856declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32(<1 x float>, metadata) 7857declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double>, metadata) 7858declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double>, metadata) 7859declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32(<1 x float>, metadata) 7860declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32(<1 x float>, metadata) 7861declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double>, metadata) 7862declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double>, metadata) 7863declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata) 7864declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata) 7865declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata) 7866declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata) 7867declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata) 7868declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata) 7869declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32>, metadata, metadata) 7870declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32>, metadata, metadata) 7871declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64>, metadata, metadata) 7872declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64>, metadata, metadata) 7873declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32>, metadata, metadata) 7874declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32>, metadata, metadata) 7875declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64>, metadata, metadata) 7876declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64>, metadata, metadata) 7877 7878; Illegal width declarations 7879declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) 7880declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata) 7881declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata) 7882declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata) 7883declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata) 7884declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata) 7885declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata) 7886declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata) 7887declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata) 7888declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata) 7889declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata) 7890declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata) 7891declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata) 7892declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata) 7893declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata) 7894declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata) 7895declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata) 7896declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata) 7897declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata) 7898declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) 7899declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) 7900declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) 7901declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) 7902declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata) 7903declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata) 7904declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata) 7905declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata) 7906declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata) 7907declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata) 7908declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata) 7909declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata) 7910declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata) 7911declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata) 7912declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata) 7913declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata) 7914declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata) 7915declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata) 7916declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata) 7917declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(<3 x float>, metadata) 7918declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32(<3 x float>, metadata) 7919declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(<3 x double>, metadata) 7920declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64(<3 x double>, metadata) 7921declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(<3 x float>, metadata) 7922declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32(<3 x float>, metadata) 7923declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(<3 x double>, metadata) 7924declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64(<3 x double>, metadata) 7925declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) 7926declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) 7927declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata) 7928declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata) 7929declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata) 7930declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata) 7931declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata) 7932declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata) 7933declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata) 7934declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata) 7935declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32>, metadata, metadata) 7936declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32>, metadata, metadata) 7937declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64>, metadata, metadata) 7938declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64>, metadata, metadata) 7939declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32>, metadata, metadata) 7940declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32>, metadata, metadata) 7941declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64>, metadata, metadata) 7942declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64>, metadata, metadata) 7943 7944; Double width declarations 7945declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata) 7946declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata) 7947declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata) 7948declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata) 7949declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata) 7950declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata) 7951declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata) 7952declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata) 7953declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) 7954declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) 7955declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) 7956declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) 7957declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) 7958declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata) 7959declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata) 7960declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata) 7961declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata) 7962declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata) 7963declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata) 7964declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata) 7965declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata) 7966declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double>, metadata) 7967declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata) 7968declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata) 7969declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata) 7970declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double>, metadata) 7971declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double>, metadata) 7972declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata) 7973declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata) 7974declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata) 7975declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata) 7976declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata) 7977declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata) 7978declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>, metadata, metadata) 7979declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata) 7980declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata) 7981declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata) 7982declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata) 7983declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata) 7984declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata) 7985declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata) 7986 7987