1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.1 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefix=AVX 4; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl | FileCheck %s --check-prefix=AVX 5 6; PR37751 - https://bugs.llvm.org/show_bug.cgi?id=37751 7; We can't combine into 'round' instructions because the behavior is different for out-of-range values. 8 9declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) 10declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) 11declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) 12declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) 13declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) 14declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) 15 16define float @float_to_int_to_float_mem_f32_i32(<4 x float>* %p) #0 { 17; SSE-LABEL: float_to_int_to_float_mem_f32_i32: 18; SSE: # %bb.0: 19; SSE-NEXT: cvttss2si (%rdi), %eax 20; SSE-NEXT: cvtsi2ss %eax, %xmm0 21; SSE-NEXT: retq 22; 23; AVX-LABEL: float_to_int_to_float_mem_f32_i32: 24; AVX: # %bb.0: 25; AVX-NEXT: vcvttss2si (%rdi), %eax 26; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 27; AVX-NEXT: retq 28 %x = load <4 x float>, <4 x float>* %p, align 16 29 %fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x) 30 %sitofp = sitofp i32 %fptosi to float 31 ret float %sitofp 32} 33 34define float @float_to_int_to_float_reg_f32_i32(<4 x float> %x) #0 { 35; SSE-LABEL: float_to_int_to_float_reg_f32_i32: 36; SSE: # %bb.0: 37; SSE-NEXT: cvttss2si %xmm0, %eax 38; SSE-NEXT: xorps %xmm0, %xmm0 39; SSE-NEXT: cvtsi2ss %eax, %xmm0 40; SSE-NEXT: retq 41; 42; AVX-LABEL: float_to_int_to_float_reg_f32_i32: 43; AVX: # %bb.0: 44; AVX-NEXT: vcvttss2si %xmm0, %eax 45; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm0 46; AVX-NEXT: retq 47 %fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x) 48 %sitofp = sitofp i32 %fptosi to float 49 ret float %sitofp 50} 51 52define float @float_to_int_to_float_mem_f32_i64(<4 x float>* %p) #0 { 53; SSE-LABEL: float_to_int_to_float_mem_f32_i64: 54; SSE: # %bb.0: 55; SSE-NEXT: cvttss2si (%rdi), %rax 56; SSE-NEXT: cvtsi2ss %rax, %xmm0 57; SSE-NEXT: retq 58; 59; AVX-LABEL: float_to_int_to_float_mem_f32_i64: 60; AVX: # %bb.0: 61; AVX-NEXT: vcvttss2si (%rdi), %rax 62; AVX-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 63; AVX-NEXT: retq 64 %x = load <4 x float>, <4 x float>* %p, align 16 65 %fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x) 66 %sitofp = sitofp i64 %fptosi to float 67 ret float %sitofp 68} 69 70define float @float_to_int_to_float_reg_f32_i64(<4 x float> %x) #0 { 71; SSE-LABEL: float_to_int_to_float_reg_f32_i64: 72; SSE: # %bb.0: 73; SSE-NEXT: cvttss2si %xmm0, %rax 74; SSE-NEXT: xorps %xmm0, %xmm0 75; SSE-NEXT: cvtsi2ss %rax, %xmm0 76; SSE-NEXT: retq 77; 78; AVX-LABEL: float_to_int_to_float_reg_f32_i64: 79; AVX: # %bb.0: 80; AVX-NEXT: vcvttss2si %xmm0, %rax 81; AVX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0 82; AVX-NEXT: retq 83 %fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x) 84 %sitofp = sitofp i64 %fptosi to float 85 ret float %sitofp 86} 87 88define double @float_to_int_to_float_mem_f64_i32(<2 x double>* %p) #0 { 89; SSE-LABEL: float_to_int_to_float_mem_f64_i32: 90; SSE: # %bb.0: 91; SSE-NEXT: cvttsd2si (%rdi), %eax 92; SSE-NEXT: cvtsi2sd %eax, %xmm0 93; SSE-NEXT: retq 94; 95; AVX-LABEL: float_to_int_to_float_mem_f64_i32: 96; AVX: # %bb.0: 97; AVX-NEXT: vcvttsd2si (%rdi), %eax 98; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 99; AVX-NEXT: retq 100 %x = load <2 x double>, <2 x double>* %p, align 16 101 %fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x) 102 %sitofp = sitofp i32 %fptosi to double 103 ret double %sitofp 104} 105 106define double @float_to_int_to_float_reg_f64_i32(<2 x double> %x) #0 { 107; SSE-LABEL: float_to_int_to_float_reg_f64_i32: 108; SSE: # %bb.0: 109; SSE-NEXT: cvttsd2si %xmm0, %eax 110; SSE-NEXT: xorps %xmm0, %xmm0 111; SSE-NEXT: cvtsi2sd %eax, %xmm0 112; SSE-NEXT: retq 113; 114; AVX-LABEL: float_to_int_to_float_reg_f64_i32: 115; AVX: # %bb.0: 116; AVX-NEXT: vcvttsd2si %xmm0, %eax 117; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm0 118; AVX-NEXT: retq 119 %fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x) 120 %sitofp = sitofp i32 %fptosi to double 121 ret double %sitofp 122} 123 124define double @float_to_int_to_float_mem_f64_i64(<2 x double>* %p) #0 { 125; SSE-LABEL: float_to_int_to_float_mem_f64_i64: 126; SSE: # %bb.0: 127; SSE-NEXT: cvttsd2si (%rdi), %rax 128; SSE-NEXT: cvtsi2sd %rax, %xmm0 129; SSE-NEXT: retq 130; 131; AVX-LABEL: float_to_int_to_float_mem_f64_i64: 132; AVX: # %bb.0: 133; AVX-NEXT: vcvttsd2si (%rdi), %rax 134; AVX-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0 135; AVX-NEXT: retq 136 %x = load <2 x double>, <2 x double>* %p, align 16 137 %fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x) 138 %sitofp = sitofp i64 %fptosi to double 139 ret double %sitofp 140} 141 142define double @float_to_int_to_float_reg_f64_i64(<2 x double> %x) #0 { 143; SSE-LABEL: float_to_int_to_float_reg_f64_i64: 144; SSE: # %bb.0: 145; SSE-NEXT: cvttsd2si %xmm0, %rax 146; SSE-NEXT: xorps %xmm0, %xmm0 147; SSE-NEXT: cvtsi2sd %rax, %xmm0 148; SSE-NEXT: retq 149; 150; AVX-LABEL: float_to_int_to_float_reg_f64_i64: 151; AVX: # %bb.0: 152; AVX-NEXT: vcvttsd2si %xmm0, %rax 153; AVX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0 154; AVX-NEXT: retq 155 %fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x) 156 %sitofp = sitofp i64 %fptosi to double 157 ret double %sitofp 158} 159 160define <4 x float> @float_to_int_to_float_mem_v4f32(<4 x float>* %p) #0 { 161; SSE-LABEL: float_to_int_to_float_mem_v4f32: 162; SSE: # %bb.0: 163; SSE-NEXT: cvttps2dq (%rdi), %xmm0 164; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 165; SSE-NEXT: retq 166; 167; AVX-LABEL: float_to_int_to_float_mem_v4f32: 168; AVX: # %bb.0: 169; AVX-NEXT: vcvttps2dq (%rdi), %xmm0 170; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 171; AVX-NEXT: retq 172 %x = load <4 x float>, <4 x float>* %p, align 16 173 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x) 174 %sitofp = sitofp <4 x i32> %fptosi to <4 x float> 175 ret <4 x float> %sitofp 176} 177 178define <4 x float> @float_to_int_to_float_reg_v4f32(<4 x float> %x) #0 { 179; SSE-LABEL: float_to_int_to_float_reg_v4f32: 180; SSE: # %bb.0: 181; SSE-NEXT: cvttps2dq %xmm0, %xmm0 182; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 183; SSE-NEXT: retq 184; 185; AVX-LABEL: float_to_int_to_float_reg_v4f32: 186; AVX: # %bb.0: 187; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 188; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 189; AVX-NEXT: retq 190 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x) 191 %sitofp = sitofp <4 x i32> %fptosi to <4 x float> 192 ret <4 x float> %sitofp 193} 194 195define <2 x double> @float_to_int_to_float_mem_v2f64(<2 x double>* %p) #0 { 196; SSE-LABEL: float_to_int_to_float_mem_v2f64: 197; SSE: # %bb.0: 198; SSE-NEXT: cvttpd2dq (%rdi), %xmm0 199; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 200; SSE-NEXT: retq 201; 202; AVX-LABEL: float_to_int_to_float_mem_v2f64: 203; AVX: # %bb.0: 204; AVX-NEXT: vcvttpd2dqx (%rdi), %xmm0 205; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 206; AVX-NEXT: retq 207 %x = load <2 x double>, <2 x double>* %p, align 16 208 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x) 209 %concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 210 %sitofp = sitofp <2 x i32> %concat to <2 x double> 211 ret <2 x double> %sitofp 212} 213 214define <2 x double> @float_to_int_to_float_reg_v2f64(<2 x double> %x) #0 { 215; SSE-LABEL: float_to_int_to_float_reg_v2f64: 216; SSE: # %bb.0: 217; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 218; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 219; SSE-NEXT: retq 220; 221; AVX-LABEL: float_to_int_to_float_reg_v2f64: 222; AVX: # %bb.0: 223; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 224; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 225; AVX-NEXT: retq 226 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x) 227 %concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 228 %sitofp = sitofp <2 x i32> %concat to <2 x double> 229 ret <2 x double> %sitofp 230} 231 232attributes #0 = { "no-signed-zeros-fp-math"="true" } 233 234