1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin9 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin9 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s --check-prefix=X64 4; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin9 -mattr=+avx | FileCheck %s --check-prefix=X32_AVX --check-prefix=X32_AVX1 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin9 -mattr=+avx | FileCheck %s --check-prefix=X64_AVX --check-prefix=X64_AVX1 6; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin9 -mattr=+avx512f | FileCheck %s --check-prefix=X32_AVX --check-prefix=X32_AVX512 7; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin9 -mattr=+avx512f | FileCheck %s --check-prefix=X64_AVX --check-prefix=X64_AVX512 8 9define i16 @test1(float %f) nounwind { 10; X32-LABEL: test1: 11; X32: ## %bb.0: 12; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 13; X32-NEXT: addss LCPI0_0, %xmm0 14; X32-NEXT: mulss LCPI0_1, %xmm0 15; X32-NEXT: xorps %xmm1, %xmm1 16; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 17; X32-NEXT: minss LCPI0_2, %xmm0 18; X32-NEXT: maxss %xmm1, %xmm0 19; X32-NEXT: cvttss2si %xmm0, %eax 20; X32-NEXT: ## kill: def $ax killed $ax killed $eax 21; X32-NEXT: retl 22; 23; X64-LABEL: test1: 24; X64: ## %bb.0: 25; X64-NEXT: addss {{.*}}(%rip), %xmm0 26; X64-NEXT: mulss {{.*}}(%rip), %xmm0 27; X64-NEXT: xorps %xmm1, %xmm1 28; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 29; X64-NEXT: minss {{.*}}(%rip), %xmm0 30; X64-NEXT: maxss %xmm1, %xmm0 31; X64-NEXT: cvttss2si %xmm0, %eax 32; X64-NEXT: ## kill: def $ax killed $ax killed $eax 33; X64-NEXT: retq 34; 35; X32_AVX1-LABEL: test1: 36; X32_AVX1: ## %bb.0: 37; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 38; X32_AVX1-NEXT: vaddss LCPI0_0, %xmm0, %xmm0 39; X32_AVX1-NEXT: vmulss LCPI0_1, %xmm0, %xmm0 40; X32_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 41; X32_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 42; X32_AVX1-NEXT: vminss LCPI0_2, %xmm0, %xmm0 43; X32_AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 44; X32_AVX1-NEXT: vcvttss2si %xmm0, %eax 45; X32_AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 46; X32_AVX1-NEXT: retl 47; 48; X64_AVX1-LABEL: test1: 49; X64_AVX1: ## %bb.0: 50; X64_AVX1-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 51; X64_AVX1-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 52; X64_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 53; X64_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 54; X64_AVX1-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0 55; X64_AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 56; X64_AVX1-NEXT: vcvttss2si %xmm0, %eax 57; X64_AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 58; X64_AVX1-NEXT: retq 59; 60; X32_AVX512-LABEL: test1: 61; X32_AVX512: ## %bb.0: 62; X32_AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 63; X32_AVX512-NEXT: vaddss LCPI0_0, %xmm0, %xmm0 64; X32_AVX512-NEXT: vmulss LCPI0_1, %xmm0, %xmm0 65; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 66; X32_AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 67; X32_AVX512-NEXT: vminss LCPI0_2, %xmm0, %xmm0 68; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 69; X32_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 70; X32_AVX512-NEXT: vcvttss2si %xmm0, %eax 71; X32_AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 72; X32_AVX512-NEXT: retl 73; 74; X64_AVX512-LABEL: test1: 75; X64_AVX512: ## %bb.0: 76; X64_AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 77; X64_AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 78; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 79; X64_AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 80; X64_AVX512-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0 81; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 82; X64_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 83; X64_AVX512-NEXT: vcvttss2si %xmm0, %eax 84; X64_AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 85; X64_AVX512-NEXT: retq 86 %tmp = insertelement <4 x float> undef, float %f, i32 0 ; <<4 x float>> [#uses=1] 87 %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1] 88 %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1] 89 %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1] 90 %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] 91 %tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] 92 %tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] 93 %tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1] 94 %tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1] 95 %tmp69 = trunc i32 %tmp.upgrd.1 to i16 ; <i16> [#uses=1] 96 ret i16 %tmp69 97} 98 99define i16 @test2(float %f) nounwind { 100; X32-LABEL: test2: 101; X32: ## %bb.0: 102; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 103; X32-NEXT: addss LCPI1_0, %xmm0 104; X32-NEXT: mulss LCPI1_1, %xmm0 105; X32-NEXT: minss LCPI1_2, %xmm0 106; X32-NEXT: xorps %xmm1, %xmm1 107; X32-NEXT: maxss %xmm1, %xmm0 108; X32-NEXT: cvttss2si %xmm0, %eax 109; X32-NEXT: ## kill: def $ax killed $ax killed $eax 110; X32-NEXT: retl 111; 112; X64-LABEL: test2: 113; X64: ## %bb.0: 114; X64-NEXT: addss {{.*}}(%rip), %xmm0 115; X64-NEXT: mulss {{.*}}(%rip), %xmm0 116; X64-NEXT: minss {{.*}}(%rip), %xmm0 117; X64-NEXT: xorps %xmm1, %xmm1 118; X64-NEXT: maxss %xmm1, %xmm0 119; X64-NEXT: cvttss2si %xmm0, %eax 120; X64-NEXT: ## kill: def $ax killed $ax killed $eax 121; X64-NEXT: retq 122; 123; X32_AVX-LABEL: test2: 124; X32_AVX: ## %bb.0: 125; X32_AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 126; X32_AVX-NEXT: vaddss LCPI1_0, %xmm0, %xmm0 127; X32_AVX-NEXT: vmulss LCPI1_1, %xmm0, %xmm0 128; X32_AVX-NEXT: vminss LCPI1_2, %xmm0, %xmm0 129; X32_AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 130; X32_AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 131; X32_AVX-NEXT: vcvttss2si %xmm0, %eax 132; X32_AVX-NEXT: ## kill: def $ax killed $ax killed $eax 133; X32_AVX-NEXT: retl 134; 135; X64_AVX-LABEL: test2: 136; X64_AVX: ## %bb.0: 137; X64_AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 138; X64_AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 139; X64_AVX-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0 140; X64_AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 141; X64_AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 142; X64_AVX-NEXT: vcvttss2si %xmm0, %eax 143; X64_AVX-NEXT: ## kill: def $ax killed $ax killed $eax 144; X64_AVX-NEXT: retq 145 %tmp28 = fsub float %f, 1.000000e+00 ; <float> [#uses=1] 146 %tmp37 = fmul float %tmp28, 5.000000e-01 ; <float> [#uses=1] 147 %tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0 ; <<4 x float>> [#uses=1] 148 %tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1] 149 %tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1] 150 %tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1] 151 %tmp69 = trunc i32 %tmp to i16 ; <i16> [#uses=1] 152 ret i16 %tmp69 153} 154 155declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) 156 157declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) 158 159declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) 160 161declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) 162 163declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) 164 165declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) 166 167declare <4 x float> @f() 168 169define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind { 170; X32-LABEL: test3: 171; X32: ## %bb.0: 172; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 173; X32-NEXT: roundss $4, (%eax), %xmm0 174; X32-NEXT: retl 175; 176; X64-LABEL: test3: 177; X64: ## %bb.0: 178; X64-NEXT: roundss $4, (%rdi), %xmm0 179; X64-NEXT: retq 180; 181; X32_AVX-LABEL: test3: 182; X32_AVX: ## %bb.0: 183; X32_AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 184; X32_AVX-NEXT: vroundss $4, (%eax), %xmm0, %xmm0 185; X32_AVX-NEXT: retl 186; 187; X64_AVX-LABEL: test3: 188; X64_AVX: ## %bb.0: 189; X64_AVX-NEXT: vroundss $4, (%rdi), %xmm0, %xmm0 190; X64_AVX-NEXT: retq 191 %a = load float , float *%b 192 %B = insertelement <4 x float> undef, float %a, i32 0 193 %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4) 194 ret <4 x float> %X 195} 196 197define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind { 198; X32-LABEL: test4: 199; X32: ## %bb.0: 200; X32-NEXT: subl $28, %esp 201; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 202; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 203; X32-NEXT: movaps %xmm0, (%esp) ## 16-byte Spill 204; X32-NEXT: calll _f 205; X32-NEXT: roundss $4, (%esp), %xmm0 ## 16-byte Folded Reload 206; X32-NEXT: addl $28, %esp 207; X32-NEXT: retl 208; 209; X64-LABEL: test4: 210; X64: ## %bb.0: 211; X64-NEXT: subq $24, %rsp 212; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 213; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill 214; X64-NEXT: callq _f 215; X64-NEXT: roundss $4, (%rsp), %xmm0 ## 16-byte Folded Reload 216; X64-NEXT: addq $24, %rsp 217; X64-NEXT: retq 218; 219; X32_AVX-LABEL: test4: 220; X32_AVX: ## %bb.0: 221; X32_AVX-NEXT: subl $28, %esp 222; X32_AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 223; X32_AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 224; X32_AVX-NEXT: vmovaps %xmm0, (%esp) ## 16-byte Spill 225; X32_AVX-NEXT: calll _f 226; X32_AVX-NEXT: vroundss $4, (%esp), %xmm0, %xmm0 ## 16-byte Folded Reload 227; X32_AVX-NEXT: addl $28, %esp 228; X32_AVX-NEXT: retl 229; 230; X64_AVX-LABEL: test4: 231; X64_AVX: ## %bb.0: 232; X64_AVX-NEXT: subq $24, %rsp 233; X64_AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 234; X64_AVX-NEXT: vmovaps %xmm0, (%rsp) ## 16-byte Spill 235; X64_AVX-NEXT: callq _f 236; X64_AVX-NEXT: vroundss $4, (%rsp), %xmm0, %xmm0 ## 16-byte Folded Reload 237; X64_AVX-NEXT: addq $24, %rsp 238; X64_AVX-NEXT: retq 239 %a = load float , float *%b 240 %B = insertelement <4 x float> undef, float %a, i32 0 241 %q = call <4 x float> @f() 242 %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4) 243 ret <4 x float> %X 244} 245 246; PR13576 247define <2 x double> @test5() nounwind uwtable readnone noinline { 248; X32-LABEL: test5: 249; X32: ## %bb.0: ## %entry 250; X32-NEXT: movaps {{.*#+}} xmm0 = [1.28E+2,1.23321E+2] 251; X32-NEXT: retl 252; 253; X64-LABEL: test5: 254; X64: ## %bb.0: ## %entry 255; X64-NEXT: movaps {{.*#+}} xmm0 = [1.28E+2,1.23321E+2] 256; X64-NEXT: retq 257; 258; X32_AVX-LABEL: test5: 259; X32_AVX: ## %bb.0: ## %entry 260; X32_AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.28E+2,1.23321E+2] 261; X32_AVX-NEXT: retl 262; 263; X64_AVX-LABEL: test5: 264; X64_AVX: ## %bb.0: ## %entry 265; X64_AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.28E+2,1.23321E+2] 266; X64_AVX-NEXT: retq 267entry: 268 %0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone 269 ret <2 x double> %0 270} 271 272declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 273 274define <4 x float> @minss_fold(float* %x, <4 x float> %y) { 275; X32-LABEL: minss_fold: 276; X32: ## %bb.0: ## %entry 277; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 278; X32-NEXT: minss (%eax), %xmm0 279; X32-NEXT: retl 280; 281; X64-LABEL: minss_fold: 282; X64: ## %bb.0: ## %entry 283; X64-NEXT: minss (%rdi), %xmm0 284; X64-NEXT: retq 285; 286; X32_AVX-LABEL: minss_fold: 287; X32_AVX: ## %bb.0: ## %entry 288; X32_AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 289; X32_AVX-NEXT: vminss (%eax), %xmm0, %xmm0 290; X32_AVX-NEXT: retl 291; 292; X64_AVX-LABEL: minss_fold: 293; X64_AVX: ## %bb.0: ## %entry 294; X64_AVX-NEXT: vminss (%rdi), %xmm0, %xmm0 295; X64_AVX-NEXT: retq 296entry: 297 %0 = load float, float* %x, align 1 298 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0 299 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0.000000e+00, i32 1 300 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float 0.000000e+00, i32 2 301 %vecinit4.i = insertelement <4 x float> %vecinit3.i, float 0.000000e+00, i32 3 302 %1 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %y, <4 x float> %vecinit4.i) 303 ret <4 x float> %1 304} 305 306define <4 x float> @maxss_fold(float* %x, <4 x float> %y) { 307; X32-LABEL: maxss_fold: 308; X32: ## %bb.0: ## %entry 309; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 310; X32-NEXT: maxss (%eax), %xmm0 311; X32-NEXT: retl 312; 313; X64-LABEL: maxss_fold: 314; X64: ## %bb.0: ## %entry 315; X64-NEXT: maxss (%rdi), %xmm0 316; X64-NEXT: retq 317; 318; X32_AVX-LABEL: maxss_fold: 319; X32_AVX: ## %bb.0: ## %entry 320; X32_AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 321; X32_AVX-NEXT: vmaxss (%eax), %xmm0, %xmm0 322; X32_AVX-NEXT: retl 323; 324; X64_AVX-LABEL: maxss_fold: 325; X64_AVX: ## %bb.0: ## %entry 326; X64_AVX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 327; X64_AVX-NEXT: retq 328entry: 329 %0 = load float, float* %x, align 1 330 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0 331 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0.000000e+00, i32 1 332 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float 0.000000e+00, i32 2 333 %vecinit4.i = insertelement <4 x float> %vecinit3.i, float 0.000000e+00, i32 3 334 %1 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %y, <4 x float> %vecinit4.i) 335 ret <4 x float> %1 336} 337 338define <4 x float> @cmpss_fold(float* %x, <4 x float> %y) { 339; X32-LABEL: cmpss_fold: 340; X32: ## %bb.0: ## %entry 341; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 342; X32-NEXT: cmpeqss (%eax), %xmm0 343; X32-NEXT: retl 344; 345; X64-LABEL: cmpss_fold: 346; X64: ## %bb.0: ## %entry 347; X64-NEXT: cmpeqss (%rdi), %xmm0 348; X64-NEXT: retq 349; 350; X32_AVX-LABEL: cmpss_fold: 351; X32_AVX: ## %bb.0: ## %entry 352; X32_AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 353; X32_AVX-NEXT: vcmpeqss (%eax), %xmm0, %xmm0 354; X32_AVX-NEXT: retl 355; 356; X64_AVX-LABEL: cmpss_fold: 357; X64_AVX: ## %bb.0: ## %entry 358; X64_AVX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 359; X64_AVX-NEXT: retq 360entry: 361 %0 = load float, float* %x, align 1 362 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0 363 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0.000000e+00, i32 1 364 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float 0.000000e+00, i32 2 365 %vecinit4.i = insertelement <4 x float> %vecinit3.i, float 0.000000e+00, i32 3 366 %1 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %y, <4 x float> %vecinit4.i, i8 0) 367 ret <4 x float> %1 368} 369declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 370 371 372define <4 x float> @double_fold(float* %x, <4 x float> %y) { 373; X32-LABEL: double_fold: 374; X32: ## %bb.0: ## %entry 375; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 376; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 377; X32-NEXT: movaps %xmm0, %xmm2 378; X32-NEXT: minss %xmm1, %xmm2 379; X32-NEXT: maxss %xmm1, %xmm0 380; X32-NEXT: addps %xmm2, %xmm0 381; X32-NEXT: retl 382; 383; X64-LABEL: double_fold: 384; X64: ## %bb.0: ## %entry 385; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 386; X64-NEXT: movaps %xmm0, %xmm2 387; X64-NEXT: minss %xmm1, %xmm2 388; X64-NEXT: maxss %xmm1, %xmm0 389; X64-NEXT: addps %xmm2, %xmm0 390; X64-NEXT: retq 391; 392; X32_AVX-LABEL: double_fold: 393; X32_AVX: ## %bb.0: ## %entry 394; X32_AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 395; X32_AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 396; X32_AVX-NEXT: vminss %xmm1, %xmm0, %xmm2 397; X32_AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 398; X32_AVX-NEXT: vaddps %xmm0, %xmm2, %xmm0 399; X32_AVX-NEXT: retl 400; 401; X64_AVX-LABEL: double_fold: 402; X64_AVX: ## %bb.0: ## %entry 403; X64_AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 404; X64_AVX-NEXT: vminss %xmm1, %xmm0, %xmm2 405; X64_AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 406; X64_AVX-NEXT: vaddps %xmm0, %xmm2, %xmm0 407; X64_AVX-NEXT: retq 408entry: 409 %0 = load float, float* %x, align 1 410 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0 411 %1 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %y, <4 x float> %vecinit.i) 412 %2 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %y, <4 x float> %vecinit.i) 413 %3 = fadd <4 x float> %1, %2 414 ret <4 x float> %3 415} 416