1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 4 5declare float @fminf(float, float) 6declare double @fmin(double, double) 7declare x86_fp80 @fminl(x86_fp80, x86_fp80) 8declare float @llvm.minnum.f32(float, float) 9declare double @llvm.minnum.f64(double, double) 10declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80) 11 12declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) 13declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) 14declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) 15declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) 16declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) 17 18; FIXME: As the vector tests show, the SSE run shouldn't need this many moves. 19 20define float @test_fminf(float %x, float %y) { 21; SSE-LABEL: test_fminf: 22; SSE: # %bb.0: 23; SSE-NEXT: movaps %xmm0, %xmm2 24; SSE-NEXT: cmpunordss %xmm0, %xmm2 25; SSE-NEXT: movaps %xmm2, %xmm3 26; SSE-NEXT: andps %xmm1, %xmm3 27; SSE-NEXT: minss %xmm0, %xmm1 28; SSE-NEXT: andnps %xmm1, %xmm2 29; SSE-NEXT: orps %xmm3, %xmm2 30; SSE-NEXT: movaps %xmm2, %xmm0 31; SSE-NEXT: retq 32; 33; AVX-LABEL: test_fminf: 34; AVX: # %bb.0: 35; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2 36; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 37; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 38; AVX-NEXT: retq 39 %z = call float @fminf(float %x, float %y) readnone 40 ret float %z 41} 42 43; FIXME: As the vector tests show, the SSE run shouldn't need this many moves. 44 45define double @test_fmin(double %x, double %y) { 46; SSE-LABEL: test_fmin: 47; SSE: # %bb.0: 48; SSE-NEXT: movapd %xmm0, %xmm2 49; SSE-NEXT: cmpunordsd %xmm0, %xmm2 50; SSE-NEXT: movapd %xmm2, %xmm3 51; SSE-NEXT: andpd %xmm1, %xmm3 52; SSE-NEXT: minsd %xmm0, %xmm1 53; SSE-NEXT: andnpd %xmm1, %xmm2 54; SSE-NEXT: orpd %xmm3, %xmm2 55; SSE-NEXT: movapd %xmm2, %xmm0 56; SSE-NEXT: retq 57; 58; AVX-LABEL: test_fmin: 59; AVX: # %bb.0: 60; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm2 61; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 62; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 63; AVX-NEXT: retq 64 %z = call double @fmin(double %x, double %y) readnone 65 ret double %z 66} 67 68define x86_fp80 @test_fminl(x86_fp80 %x, x86_fp80 %y) { 69; CHECK-LABEL: test_fminl: 70; CHECK: # %bb.0: 71; CHECK-NEXT: subq $40, %rsp 72; CHECK-NEXT: .cfi_def_cfa_offset 48 73; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 74; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 75; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 76; CHECK-NEXT: fstpt (%rsp) 77; CHECK-NEXT: callq fminl 78; CHECK-NEXT: addq $40, %rsp 79; CHECK-NEXT: .cfi_def_cfa_offset 8 80; CHECK-NEXT: retq 81 %z = call x86_fp80 @fminl(x86_fp80 %x, x86_fp80 %y) readnone 82 ret x86_fp80 %z 83} 84 85define float @test_intrinsic_fminf(float %x, float %y) { 86; SSE-LABEL: test_intrinsic_fminf: 87; SSE: # %bb.0: 88; SSE-NEXT: movaps %xmm0, %xmm2 89; SSE-NEXT: cmpunordss %xmm0, %xmm2 90; SSE-NEXT: movaps %xmm2, %xmm3 91; SSE-NEXT: andps %xmm1, %xmm3 92; SSE-NEXT: minss %xmm0, %xmm1 93; SSE-NEXT: andnps %xmm1, %xmm2 94; SSE-NEXT: orps %xmm3, %xmm2 95; SSE-NEXT: movaps %xmm2, %xmm0 96; SSE-NEXT: retq 97; 98; AVX-LABEL: test_intrinsic_fminf: 99; AVX: # %bb.0: 100; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2 101; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 102; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 103; AVX-NEXT: retq 104 %z = call float @llvm.minnum.f32(float %x, float %y) readnone 105 ret float %z 106} 107 108define double @test_intrinsic_fmin(double %x, double %y) { 109; SSE-LABEL: test_intrinsic_fmin: 110; SSE: # %bb.0: 111; SSE-NEXT: movapd %xmm0, %xmm2 112; SSE-NEXT: cmpunordsd %xmm0, %xmm2 113; SSE-NEXT: movapd %xmm2, %xmm3 114; SSE-NEXT: andpd %xmm1, %xmm3 115; SSE-NEXT: minsd %xmm0, %xmm1 116; SSE-NEXT: andnpd %xmm1, %xmm2 117; SSE-NEXT: orpd %xmm3, %xmm2 118; SSE-NEXT: movapd %xmm2, %xmm0 119; SSE-NEXT: retq 120; 121; AVX-LABEL: test_intrinsic_fmin: 122; AVX: # %bb.0: 123; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm2 124; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 125; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 126; AVX-NEXT: retq 127 %z = call double @llvm.minnum.f64(double %x, double %y) readnone 128 ret double %z 129} 130 131define x86_fp80 @test_intrinsic_fminl(x86_fp80 %x, x86_fp80 %y) { 132; CHECK-LABEL: test_intrinsic_fminl: 133; CHECK: # %bb.0: 134; CHECK-NEXT: subq $40, %rsp 135; CHECK-NEXT: .cfi_def_cfa_offset 48 136; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 137; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 138; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 139; CHECK-NEXT: fstpt (%rsp) 140; CHECK-NEXT: callq fminl 141; CHECK-NEXT: addq $40, %rsp 142; CHECK-NEXT: .cfi_def_cfa_offset 8 143; CHECK-NEXT: retq 144 %z = call x86_fp80 @llvm.minnum.f80(x86_fp80 %x, x86_fp80 %y) readnone 145 ret x86_fp80 %z 146} 147 148define <2 x float> @test_intrinsic_fmin_v2f32(<2 x float> %x, <2 x float> %y) { 149; SSE-LABEL: test_intrinsic_fmin_v2f32: 150; SSE: # %bb.0: 151; SSE-NEXT: movaps %xmm1, %xmm2 152; SSE-NEXT: minps %xmm0, %xmm2 153; SSE-NEXT: cmpunordps %xmm0, %xmm0 154; SSE-NEXT: andps %xmm0, %xmm1 155; SSE-NEXT: andnps %xmm2, %xmm0 156; SSE-NEXT: orps %xmm1, %xmm0 157; SSE-NEXT: retq 158; 159; AVX-LABEL: test_intrinsic_fmin_v2f32: 160; AVX: # %bb.0: 161; AVX-NEXT: vminps %xmm0, %xmm1, %xmm2 162; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0 163; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 164; AVX-NEXT: retq 165 %z = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y) readnone 166 ret <2 x float> %z 167} 168 169define <4 x float> @test_intrinsic_fmin_v4f32(<4 x float> %x, <4 x float> %y) { 170; SSE-LABEL: test_intrinsic_fmin_v4f32: 171; SSE: # %bb.0: 172; SSE-NEXT: movaps %xmm1, %xmm2 173; SSE-NEXT: minps %xmm0, %xmm2 174; SSE-NEXT: cmpunordps %xmm0, %xmm0 175; SSE-NEXT: andps %xmm0, %xmm1 176; SSE-NEXT: andnps %xmm2, %xmm0 177; SSE-NEXT: orps %xmm1, %xmm0 178; SSE-NEXT: retq 179; 180; AVX-LABEL: test_intrinsic_fmin_v4f32: 181; AVX: # %bb.0: 182; AVX-NEXT: vminps %xmm0, %xmm1, %xmm2 183; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0 184; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 185; AVX-NEXT: retq 186 %z = call <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) readnone 187 ret <4 x float> %z 188} 189 190define <2 x double> @test_intrinsic_fmin_v2f64(<2 x double> %x, <2 x double> %y) { 191; SSE-LABEL: test_intrinsic_fmin_v2f64: 192; SSE: # %bb.0: 193; SSE-NEXT: movapd %xmm1, %xmm2 194; SSE-NEXT: minpd %xmm0, %xmm2 195; SSE-NEXT: cmpunordpd %xmm0, %xmm0 196; SSE-NEXT: andpd %xmm0, %xmm1 197; SSE-NEXT: andnpd %xmm2, %xmm0 198; SSE-NEXT: orpd %xmm1, %xmm0 199; SSE-NEXT: retq 200; 201; AVX-LABEL: test_intrinsic_fmin_v2f64: 202; AVX: # %bb.0: 203; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm2 204; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm0 205; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 206; AVX-NEXT: retq 207 %z = call <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y) readnone 208 ret <2 x double> %z 209} 210 211define <4 x double> @test_intrinsic_fmin_v4f64(<4 x double> %x, <4 x double> %y) { 212; SSE-LABEL: test_intrinsic_fmin_v4f64: 213; SSE: # %bb.0: 214; SSE-NEXT: movapd %xmm2, %xmm4 215; SSE-NEXT: minpd %xmm0, %xmm4 216; SSE-NEXT: cmpunordpd %xmm0, %xmm0 217; SSE-NEXT: andpd %xmm0, %xmm2 218; SSE-NEXT: andnpd %xmm4, %xmm0 219; SSE-NEXT: orpd %xmm2, %xmm0 220; SSE-NEXT: movapd %xmm3, %xmm2 221; SSE-NEXT: minpd %xmm1, %xmm2 222; SSE-NEXT: cmpunordpd %xmm1, %xmm1 223; SSE-NEXT: andpd %xmm1, %xmm3 224; SSE-NEXT: andnpd %xmm2, %xmm1 225; SSE-NEXT: orpd %xmm3, %xmm1 226; SSE-NEXT: retq 227; 228; AVX-LABEL: test_intrinsic_fmin_v4f64: 229; AVX: # %bb.0: 230; AVX-NEXT: vminpd %ymm0, %ymm1, %ymm2 231; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 232; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 233; AVX-NEXT: retq 234 %z = call <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y) readnone 235 ret <4 x double> %z 236} 237 238define <8 x double> @test_intrinsic_fmin_v8f64(<8 x double> %x, <8 x double> %y) { 239; SSE-LABEL: test_intrinsic_fmin_v8f64: 240; SSE: # %bb.0: 241; SSE-NEXT: movapd %xmm4, %xmm8 242; SSE-NEXT: minpd %xmm0, %xmm8 243; SSE-NEXT: cmpunordpd %xmm0, %xmm0 244; SSE-NEXT: andpd %xmm0, %xmm4 245; SSE-NEXT: andnpd %xmm8, %xmm0 246; SSE-NEXT: orpd %xmm4, %xmm0 247; SSE-NEXT: movapd %xmm5, %xmm4 248; SSE-NEXT: minpd %xmm1, %xmm4 249; SSE-NEXT: cmpunordpd %xmm1, %xmm1 250; SSE-NEXT: andpd %xmm1, %xmm5 251; SSE-NEXT: andnpd %xmm4, %xmm1 252; SSE-NEXT: orpd %xmm5, %xmm1 253; SSE-NEXT: movapd %xmm6, %xmm4 254; SSE-NEXT: minpd %xmm2, %xmm4 255; SSE-NEXT: cmpunordpd %xmm2, %xmm2 256; SSE-NEXT: andpd %xmm2, %xmm6 257; SSE-NEXT: andnpd %xmm4, %xmm2 258; SSE-NEXT: orpd %xmm6, %xmm2 259; SSE-NEXT: movapd %xmm7, %xmm4 260; SSE-NEXT: minpd %xmm3, %xmm4 261; SSE-NEXT: cmpunordpd %xmm3, %xmm3 262; SSE-NEXT: andpd %xmm3, %xmm7 263; SSE-NEXT: andnpd %xmm4, %xmm3 264; SSE-NEXT: orpd %xmm7, %xmm3 265; SSE-NEXT: retq 266; 267; AVX-LABEL: test_intrinsic_fmin_v8f64: 268; AVX: # %bb.0: 269; AVX-NEXT: vminpd %ymm0, %ymm2, %ymm4 270; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 271; AVX-NEXT: vblendvpd %ymm0, %ymm2, %ymm4, %ymm0 272; AVX-NEXT: vminpd %ymm1, %ymm3, %ymm2 273; AVX-NEXT: vcmpunordpd %ymm1, %ymm1, %ymm1 274; AVX-NEXT: vblendvpd %ymm1, %ymm3, %ymm2, %ymm1 275; AVX-NEXT: retq 276 %z = call <8 x double> @llvm.minnum.v8f64(<8 x double> %x, <8 x double> %y) readnone 277 ret <8 x double> %z 278} 279 280; The IR-level FMF propagate to the node. With nnan, there's no need to blend. 281 282define float @minnum_intrinsic_nnan_fmf_f32(float %a, float %b) { 283; SSE-LABEL: minnum_intrinsic_nnan_fmf_f32: 284; SSE: # %bb.0: 285; SSE-NEXT: minss %xmm1, %xmm0 286; SSE-NEXT: retq 287; 288; AVX-LABEL: minnum_intrinsic_nnan_fmf_f32: 289; AVX: # %bb.0: 290; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 291; AVX-NEXT: retq 292 %r = tail call nnan float @llvm.minnum.f32(float %a, float %b) 293 ret float %r 294} 295 296; Make sure vectors work too. 297 298define <2 x double> @minnum_intrinsic_nnan_fmf_v2f64(<2 x double> %a, <2 x double> %b) { 299; SSE-LABEL: minnum_intrinsic_nnan_fmf_v2f64: 300; SSE: # %bb.0: 301; SSE-NEXT: minpd %xmm1, %xmm0 302; SSE-NEXT: retq 303; 304; AVX-LABEL: minnum_intrinsic_nnan_fmf_v2f64: 305; AVX: # %bb.0: 306; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 307; AVX-NEXT: retq 308 %r = tail call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) 309 ret <2 x double> %r 310} 311 312; Current (but legacy someday): a function-level attribute should also enable the fold. 313 314define double @minnum_intrinsic_nnan_attr_f64(double %a, double %b) #0 { 315; SSE-LABEL: minnum_intrinsic_nnan_attr_f64: 316; SSE: # %bb.0: 317; SSE-NEXT: minsd %xmm1, %xmm0 318; SSE-NEXT: retq 319; 320; AVX-LABEL: minnum_intrinsic_nnan_attr_f64: 321; AVX: # %bb.0: 322; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 323; AVX-NEXT: retq 324 %r = tail call double @llvm.minnum.f64(double %a, double %b) 325 ret double %r 326} 327 328; Make sure vectors work too. 329 330define <4 x float> @minnum_intrinsic_nnan_attr_v4f32(<4 x float> %a, <4 x float> %b) #0 { 331; SSE-LABEL: minnum_intrinsic_nnan_attr_v4f32: 332; SSE: # %bb.0: 333; SSE-NEXT: minps %xmm1, %xmm0 334; SSE-NEXT: retq 335; 336; AVX-LABEL: minnum_intrinsic_nnan_attr_v4f32: 337; AVX: # %bb.0: 338; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 339; AVX-NEXT: retq 340 %r = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) 341 ret <4 x float> %r 342} 343 344attributes #0 = { "no-nans-fp-math"="true" } 345 346