1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX1 5 6define float @trunc_unsigned_f32(float %x) #0 { 7; SSE2-LABEL: trunc_unsigned_f32: 8; SSE2: # %bb.0: 9; SSE2-NEXT: cvttss2si %xmm0, %rax 10; SSE2-NEXT: movl %eax, %eax 11; SSE2-NEXT: xorps %xmm0, %xmm0 12; SSE2-NEXT: cvtsi2ss %rax, %xmm0 13; SSE2-NEXT: retq 14; 15; SSE41-LABEL: trunc_unsigned_f32: 16; SSE41: # %bb.0: 17; SSE41-NEXT: roundss $11, %xmm0, %xmm0 18; SSE41-NEXT: retq 19; 20; AVX1-LABEL: trunc_unsigned_f32: 21; AVX1: # %bb.0: 22; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 23; AVX1-NEXT: retq 24 %i = fptoui float %x to i32 25 %r = uitofp i32 %i to float 26 ret float %r 27} 28 29define double @trunc_unsigned_f64(double %x) #0 { 30; SSE2-LABEL: trunc_unsigned_f64: 31; SSE2: # %bb.0: 32; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 33; SSE2-NEXT: movapd %xmm0, %xmm2 34; SSE2-NEXT: subsd %xmm1, %xmm2 35; SSE2-NEXT: cvttsd2si %xmm2, %rax 36; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 37; SSE2-NEXT: xorq %rax, %rcx 38; SSE2-NEXT: cvttsd2si %xmm0, %rax 39; SSE2-NEXT: ucomisd %xmm1, %xmm0 40; SSE2-NEXT: cmovaeq %rcx, %rax 41; SSE2-NEXT: movq %rax, %xmm1 42; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 43; SSE2-NEXT: subpd {{.*}}(%rip), %xmm1 44; SSE2-NEXT: movapd %xmm1, %xmm0 45; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 46; SSE2-NEXT: addsd %xmm1, %xmm0 47; SSE2-NEXT: retq 48; 49; SSE41-LABEL: trunc_unsigned_f64: 50; SSE41: # %bb.0: 51; SSE41-NEXT: roundsd $11, %xmm0, %xmm0 52; SSE41-NEXT: retq 53; 54; AVX1-LABEL: trunc_unsigned_f64: 55; AVX1: # %bb.0: 56; AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 57; AVX1-NEXT: retq 58 %i = fptoui double %x to i64 59 %r = uitofp i64 %i to double 60 ret double %r 61} 62 63define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) #0 { 64; SSE2-LABEL: trunc_unsigned_v4f32: 65; SSE2: # %bb.0: 66; SSE2-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 67; SSE2-NEXT: movaps %xmm0, %xmm1 68; SSE2-NEXT: cmpltps %xmm2, %xmm1 69; SSE2-NEXT: cvttps2dq %xmm0, %xmm3 70; SSE2-NEXT: subps %xmm2, %xmm0 71; SSE2-NEXT: cvttps2dq %xmm0, %xmm0 72; SSE2-NEXT: xorps {{.*}}(%rip), %xmm0 73; SSE2-NEXT: andps %xmm1, %xmm3 74; SSE2-NEXT: andnps %xmm0, %xmm1 75; SSE2-NEXT: orps %xmm3, %xmm1 76; SSE2-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535] 77; SSE2-NEXT: andps %xmm1, %xmm0 78; SSE2-NEXT: orps {{.*}}(%rip), %xmm0 79; SSE2-NEXT: psrld $16, %xmm1 80; SSE2-NEXT: por {{.*}}(%rip), %xmm1 81; SSE2-NEXT: subps {{.*}}(%rip), %xmm1 82; SSE2-NEXT: addps %xmm0, %xmm1 83; SSE2-NEXT: movaps %xmm1, %xmm0 84; SSE2-NEXT: retq 85; 86; SSE41-LABEL: trunc_unsigned_v4f32: 87; SSE41: # %bb.0: 88; SSE41-NEXT: roundps $11, %xmm0, %xmm0 89; SSE41-NEXT: retq 90; 91; AVX1-LABEL: trunc_unsigned_v4f32: 92; AVX1: # %bb.0: 93; AVX1-NEXT: vroundps $11, %xmm0, %xmm0 94; AVX1-NEXT: retq 95 %i = fptoui <4 x float> %x to <4 x i32> 96 %r = uitofp <4 x i32> %i to <4 x float> 97 ret <4 x float> %r 98} 99 100define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 { 101; SSE2-LABEL: trunc_unsigned_v2f64: 102; SSE2: # %bb.0: 103; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 104; SSE2-NEXT: movapd %xmm0, %xmm1 105; SSE2-NEXT: subsd %xmm2, %xmm1 106; SSE2-NEXT: cvttsd2si %xmm1, %rax 107; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 108; SSE2-NEXT: xorq %rcx, %rax 109; SSE2-NEXT: cvttsd2si %xmm0, %rdx 110; SSE2-NEXT: ucomisd %xmm2, %xmm0 111; SSE2-NEXT: cmovaeq %rax, %rdx 112; SSE2-NEXT: movq %rdx, %xmm1 113; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 114; SSE2-NEXT: movapd %xmm0, %xmm3 115; SSE2-NEXT: subsd %xmm2, %xmm3 116; SSE2-NEXT: cvttsd2si %xmm3, %rax 117; SSE2-NEXT: xorq %rcx, %rax 118; SSE2-NEXT: cvttsd2si %xmm0, %rcx 119; SSE2-NEXT: ucomisd %xmm2, %xmm0 120; SSE2-NEXT: cmovaeq %rax, %rcx 121; SSE2-NEXT: movq %rcx, %xmm0 122; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 123; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4294967295,4294967295] 124; SSE2-NEXT: pand %xmm1, %xmm0 125; SSE2-NEXT: por {{.*}}(%rip), %xmm0 126; SSE2-NEXT: psrlq $32, %xmm1 127; SSE2-NEXT: por {{.*}}(%rip), %xmm1 128; SSE2-NEXT: subpd {{.*}}(%rip), %xmm1 129; SSE2-NEXT: addpd %xmm0, %xmm1 130; SSE2-NEXT: movapd %xmm1, %xmm0 131; SSE2-NEXT: retq 132; 133; SSE41-LABEL: trunc_unsigned_v2f64: 134; SSE41: # %bb.0: 135; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 136; SSE41-NEXT: retq 137; 138; AVX1-LABEL: trunc_unsigned_v2f64: 139; AVX1: # %bb.0: 140; AVX1-NEXT: vroundpd $11, %xmm0, %xmm0 141; AVX1-NEXT: retq 142 %i = fptoui <2 x double> %x to <2 x i64> 143 %r = uitofp <2 x i64> %i to <2 x double> 144 ret <2 x double> %r 145} 146 147define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 { 148; SSE2-LABEL: trunc_unsigned_v4f64: 149; SSE2: # %bb.0: 150; SSE2-NEXT: movapd %xmm1, %xmm2 151; SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero 152; SSE2-NEXT: subsd %xmm3, %xmm1 153; SSE2-NEXT: cvttsd2si %xmm1, %rcx 154; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 155; SSE2-NEXT: xorq %rax, %rcx 156; SSE2-NEXT: cvttsd2si %xmm2, %rdx 157; SSE2-NEXT: ucomisd %xmm3, %xmm2 158; SSE2-NEXT: cmovaeq %rcx, %rdx 159; SSE2-NEXT: movq %rdx, %xmm1 160; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] 161; SSE2-NEXT: movapd %xmm2, %xmm4 162; SSE2-NEXT: subsd %xmm3, %xmm4 163; SSE2-NEXT: cvttsd2si %xmm4, %rcx 164; SSE2-NEXT: xorq %rax, %rcx 165; SSE2-NEXT: cvttsd2si %xmm2, %rdx 166; SSE2-NEXT: ucomisd %xmm3, %xmm2 167; SSE2-NEXT: cmovaeq %rcx, %rdx 168; SSE2-NEXT: movq %rdx, %xmm2 169; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 170; SSE2-NEXT: movapd %xmm0, %xmm2 171; SSE2-NEXT: subsd %xmm3, %xmm2 172; SSE2-NEXT: cvttsd2si %xmm2, %rcx 173; SSE2-NEXT: xorq %rax, %rcx 174; SSE2-NEXT: cvttsd2si %xmm0, %rdx 175; SSE2-NEXT: ucomisd %xmm3, %xmm0 176; SSE2-NEXT: cmovaeq %rcx, %rdx 177; SSE2-NEXT: movq %rdx, %xmm2 178; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 179; SSE2-NEXT: movapd %xmm0, %xmm4 180; SSE2-NEXT: subsd %xmm3, %xmm4 181; SSE2-NEXT: cvttsd2si %xmm4, %rcx 182; SSE2-NEXT: xorq %rax, %rcx 183; SSE2-NEXT: cvttsd2si %xmm0, %rax 184; SSE2-NEXT: ucomisd %xmm3, %xmm0 185; SSE2-NEXT: cmovaeq %rcx, %rax 186; SSE2-NEXT: movq %rax, %xmm0 187; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 188; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4294967295,4294967295] 189; SSE2-NEXT: movdqa %xmm2, %xmm3 190; SSE2-NEXT: pand %xmm0, %xmm3 191; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 192; SSE2-NEXT: por %xmm4, %xmm3 193; SSE2-NEXT: psrlq $32, %xmm2 194; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 195; SSE2-NEXT: por %xmm5, %xmm2 196; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 197; SSE2-NEXT: subpd %xmm6, %xmm2 198; SSE2-NEXT: addpd %xmm3, %xmm2 199; SSE2-NEXT: pand %xmm1, %xmm0 200; SSE2-NEXT: por %xmm4, %xmm0 201; SSE2-NEXT: psrlq $32, %xmm1 202; SSE2-NEXT: por %xmm5, %xmm1 203; SSE2-NEXT: subpd %xmm6, %xmm1 204; SSE2-NEXT: addpd %xmm0, %xmm1 205; SSE2-NEXT: movapd %xmm2, %xmm0 206; SSE2-NEXT: retq 207; 208; SSE41-LABEL: trunc_unsigned_v4f64: 209; SSE41: # %bb.0: 210; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 211; SSE41-NEXT: roundpd $11, %xmm1, %xmm1 212; SSE41-NEXT: retq 213; 214; AVX1-LABEL: trunc_unsigned_v4f64: 215; AVX1: # %bb.0: 216; AVX1-NEXT: vroundpd $11, %ymm0, %ymm0 217; AVX1-NEXT: retq 218 %i = fptoui <4 x double> %x to <4 x i64> 219 %r = uitofp <4 x i64> %i to <4 x double> 220 ret <4 x double> %r 221} 222 223define float @trunc_signed_f32_no_fast_math(float %x) { 224; SSE-LABEL: trunc_signed_f32_no_fast_math: 225; SSE: # %bb.0: 226; SSE-NEXT: cvttps2dq %xmm0, %xmm0 227; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 228; SSE-NEXT: retq 229; 230; AVX1-LABEL: trunc_signed_f32_no_fast_math: 231; AVX1: # %bb.0: 232; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 233; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 234; AVX1-NEXT: retq 235 %i = fptosi float %x to i32 236 %r = sitofp i32 %i to float 237 ret float %r 238} 239 240; Without -0.0, it is ok to use roundss if it is available. 241 242define float @trunc_signed_f32_nsz(float %x) #0 { 243; SSE2-LABEL: trunc_signed_f32_nsz: 244; SSE2: # %bb.0: 245; SSE2-NEXT: cvttps2dq %xmm0, %xmm0 246; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 247; SSE2-NEXT: retq 248; 249; SSE41-LABEL: trunc_signed_f32_nsz: 250; SSE41: # %bb.0: 251; SSE41-NEXT: roundss $11, %xmm0, %xmm0 252; SSE41-NEXT: retq 253; 254; AVX1-LABEL: trunc_signed_f32_nsz: 255; AVX1: # %bb.0: 256; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 257; AVX1-NEXT: retq 258 %i = fptosi float %x to i32 259 %r = sitofp i32 %i to float 260 ret float %r 261} 262 263define double @trunc_signed32_f64_no_fast_math(double %x) { 264; SSE-LABEL: trunc_signed32_f64_no_fast_math: 265; SSE: # %bb.0: 266; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 267; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 268; SSE-NEXT: retq 269; 270; AVX1-LABEL: trunc_signed32_f64_no_fast_math: 271; AVX1: # %bb.0: 272; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 273; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 274; AVX1-NEXT: retq 275 %i = fptosi double %x to i32 276 %r = sitofp i32 %i to double 277 ret double %r 278} 279 280define double @trunc_signed32_f64_nsz(double %x) #0 { 281; SSE2-LABEL: trunc_signed32_f64_nsz: 282; SSE2: # %bb.0: 283; SSE2-NEXT: cvttpd2dq %xmm0, %xmm0 284; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 285; SSE2-NEXT: retq 286; 287; SSE41-LABEL: trunc_signed32_f64_nsz: 288; SSE41: # %bb.0: 289; SSE41-NEXT: roundsd $11, %xmm0, %xmm0 290; SSE41-NEXT: retq 291; 292; AVX1-LABEL: trunc_signed32_f64_nsz: 293; AVX1: # %bb.0: 294; AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 295; AVX1-NEXT: retq 296 %i = fptosi double %x to i32 297 %r = sitofp i32 %i to double 298 ret double %r 299} 300 301define double @trunc_f32_signed32_f64_no_fast_math(float %x) { 302; SSE-LABEL: trunc_f32_signed32_f64_no_fast_math: 303; SSE: # %bb.0: 304; SSE-NEXT: cvttps2dq %xmm0, %xmm0 305; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 306; SSE-NEXT: retq 307; 308; AVX1-LABEL: trunc_f32_signed32_f64_no_fast_math: 309; AVX1: # %bb.0: 310; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 311; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 312; AVX1-NEXT: retq 313 %i = fptosi float %x to i32 314 %r = sitofp i32 %i to double 315 ret double %r 316} 317 318define double @trunc_f32_signed32_f64_nsz(float %x) #0 { 319; SSE-LABEL: trunc_f32_signed32_f64_nsz: 320; SSE: # %bb.0: 321; SSE-NEXT: cvttps2dq %xmm0, %xmm0 322; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 323; SSE-NEXT: retq 324; 325; AVX1-LABEL: trunc_f32_signed32_f64_nsz: 326; AVX1: # %bb.0: 327; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 328; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 329; AVX1-NEXT: retq 330 %i = fptosi float %x to i32 331 %r = sitofp i32 %i to double 332 ret double %r 333} 334 335define float @trunc_f64_signed32_f32_no_fast_math(double %x) { 336; SSE-LABEL: trunc_f64_signed32_f32_no_fast_math: 337; SSE: # %bb.0: 338; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 339; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 340; SSE-NEXT: retq 341; 342; AVX1-LABEL: trunc_f64_signed32_f32_no_fast_math: 343; AVX1: # %bb.0: 344; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 345; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 346; AVX1-NEXT: retq 347 %i = fptosi double %x to i32 348 %r = sitofp i32 %i to float 349 ret float %r 350} 351 352define float @trunc_f64_signed32_f32_nsz(double %x) #0 { 353; SSE-LABEL: trunc_f64_signed32_f32_nsz: 354; SSE: # %bb.0: 355; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 356; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 357; SSE-NEXT: retq 358; 359; AVX1-LABEL: trunc_f64_signed32_f32_nsz: 360; AVX1: # %bb.0: 361; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 362; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 363; AVX1-NEXT: retq 364 %i = fptosi double %x to i32 365 %r = sitofp i32 %i to float 366 ret float %r 367} 368 369define double @trunc_signed_f64_no_fast_math(double %x) { 370; SSE-LABEL: trunc_signed_f64_no_fast_math: 371; SSE: # %bb.0: 372; SSE-NEXT: cvttsd2si %xmm0, %rax 373; SSE-NEXT: xorps %xmm0, %xmm0 374; SSE-NEXT: cvtsi2sd %rax, %xmm0 375; SSE-NEXT: retq 376; 377; AVX1-LABEL: trunc_signed_f64_no_fast_math: 378; AVX1: # %bb.0: 379; AVX1-NEXT: vcvttsd2si %xmm0, %rax 380; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0 381; AVX1-NEXT: retq 382 %i = fptosi double %x to i64 383 %r = sitofp i64 %i to double 384 ret double %r 385} 386 387define double @trunc_signed_f64_nsz(double %x) #0 { 388; SSE2-LABEL: trunc_signed_f64_nsz: 389; SSE2: # %bb.0: 390; SSE2-NEXT: cvttsd2si %xmm0, %rax 391; SSE2-NEXT: xorps %xmm0, %xmm0 392; SSE2-NEXT: cvtsi2sd %rax, %xmm0 393; SSE2-NEXT: retq 394; 395; SSE41-LABEL: trunc_signed_f64_nsz: 396; SSE41: # %bb.0: 397; SSE41-NEXT: roundsd $11, %xmm0, %xmm0 398; SSE41-NEXT: retq 399; 400; AVX1-LABEL: trunc_signed_f64_nsz: 401; AVX1: # %bb.0: 402; AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 403; AVX1-NEXT: retq 404 %i = fptosi double %x to i64 405 %r = sitofp i64 %i to double 406 ret double %r 407} 408 409define <4 x float> @trunc_signed_v4f32_nsz(<4 x float> %x) #0 { 410; SSE2-LABEL: trunc_signed_v4f32_nsz: 411; SSE2: # %bb.0: 412; SSE2-NEXT: cvttps2dq %xmm0, %xmm0 413; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 414; SSE2-NEXT: retq 415; 416; SSE41-LABEL: trunc_signed_v4f32_nsz: 417; SSE41: # %bb.0: 418; SSE41-NEXT: roundps $11, %xmm0, %xmm0 419; SSE41-NEXT: retq 420; 421; AVX1-LABEL: trunc_signed_v4f32_nsz: 422; AVX1: # %bb.0: 423; AVX1-NEXT: vroundps $11, %xmm0, %xmm0 424; AVX1-NEXT: retq 425 %i = fptosi <4 x float> %x to <4 x i32> 426 %r = sitofp <4 x i32> %i to <4 x float> 427 ret <4 x float> %r 428} 429 430define <2 x double> @trunc_signed_v2f64_nsz(<2 x double> %x) #0 { 431; SSE2-LABEL: trunc_signed_v2f64_nsz: 432; SSE2: # %bb.0: 433; SSE2-NEXT: cvttsd2si %xmm0, %rax 434; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 435; SSE2-NEXT: cvttsd2si %xmm0, %rcx 436; SSE2-NEXT: xorps %xmm0, %xmm0 437; SSE2-NEXT: cvtsi2sd %rax, %xmm0 438; SSE2-NEXT: cvtsi2sd %rcx, %xmm1 439; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 440; SSE2-NEXT: retq 441; 442; SSE41-LABEL: trunc_signed_v2f64_nsz: 443; SSE41: # %bb.0: 444; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 445; SSE41-NEXT: retq 446; 447; AVX1-LABEL: trunc_signed_v2f64_nsz: 448; AVX1: # %bb.0: 449; AVX1-NEXT: vroundpd $11, %xmm0, %xmm0 450; AVX1-NEXT: retq 451 %i = fptosi <2 x double> %x to <2 x i64> 452 %r = sitofp <2 x i64> %i to <2 x double> 453 ret <2 x double> %r 454} 455 456define <4 x double> @trunc_signed_v4f64_nsz(<4 x double> %x) #0 { 457; SSE2-LABEL: trunc_signed_v4f64_nsz: 458; SSE2: # %bb.0: 459; SSE2-NEXT: cvttsd2si %xmm1, %rax 460; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 461; SSE2-NEXT: cvttsd2si %xmm1, %rcx 462; SSE2-NEXT: cvttsd2si %xmm0, %rdx 463; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 464; SSE2-NEXT: cvttsd2si %xmm0, %rsi 465; SSE2-NEXT: xorps %xmm0, %xmm0 466; SSE2-NEXT: cvtsi2sd %rdx, %xmm0 467; SSE2-NEXT: xorps %xmm1, %xmm1 468; SSE2-NEXT: cvtsi2sd %rsi, %xmm1 469; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 470; SSE2-NEXT: xorps %xmm1, %xmm1 471; SSE2-NEXT: cvtsi2sd %rax, %xmm1 472; SSE2-NEXT: cvtsi2sd %rcx, %xmm2 473; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 474; SSE2-NEXT: retq 475; 476; SSE41-LABEL: trunc_signed_v4f64_nsz: 477; SSE41: # %bb.0: 478; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 479; SSE41-NEXT: roundpd $11, %xmm1, %xmm1 480; SSE41-NEXT: retq 481; 482; AVX1-LABEL: trunc_signed_v4f64_nsz: 483; AVX1: # %bb.0: 484; AVX1-NEXT: vroundpd $11, %ymm0, %ymm0 485; AVX1-NEXT: retq 486 %i = fptosi <4 x double> %x to <4 x i64> 487 %r = sitofp <4 x i64> %i to <4 x double> 488 ret <4 x double> %r 489} 490 491; The fold may be guarded to allow existing code to continue 492; working based on its assumptions of float->int overflow. 493 494define float @trunc_unsigned_f32_disable_via_attr(float %x) #1 { 495; SSE-LABEL: trunc_unsigned_f32_disable_via_attr: 496; SSE: # %bb.0: 497; SSE-NEXT: cvttss2si %xmm0, %rax 498; SSE-NEXT: movl %eax, %eax 499; SSE-NEXT: xorps %xmm0, %xmm0 500; SSE-NEXT: cvtsi2ss %rax, %xmm0 501; SSE-NEXT: retq 502; 503; AVX1-LABEL: trunc_unsigned_f32_disable_via_attr: 504; AVX1: # %bb.0: 505; AVX1-NEXT: vcvttss2si %xmm0, %rax 506; AVX1-NEXT: movl %eax, %eax 507; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0 508; AVX1-NEXT: retq 509 %i = fptoui float %x to i32 510 %r = uitofp i32 %i to float 511 ret float %r 512} 513 514define double @trunc_signed_f64_disable_via_attr(double %x) #1 { 515; SSE-LABEL: trunc_signed_f64_disable_via_attr: 516; SSE: # %bb.0: 517; SSE-NEXT: cvttsd2si %xmm0, %rax 518; SSE-NEXT: xorps %xmm0, %xmm0 519; SSE-NEXT: cvtsi2sd %rax, %xmm0 520; SSE-NEXT: retq 521; 522; AVX1-LABEL: trunc_signed_f64_disable_via_attr: 523; AVX1: # %bb.0: 524; AVX1-NEXT: vcvttsd2si %xmm0, %rax 525; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0 526; AVX1-NEXT: retq 527 %i = fptosi double %x to i64 528 %r = sitofp i64 %i to double 529 ret double %r 530} 531 532attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" } 533attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" "strict-float-cast-overflow"="false" } 534