1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 6 7declare float @fmaxf(float, float) 8declare double @fmax(double, double) 9declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) 10declare float @llvm.maxnum.f32(float, float) 11declare double @llvm.maxnum.f64(double, double) 12declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) 13 14declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) 15declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) 16declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) 17declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) 18declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) 19declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) 20declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) 21 22; FIXME: As the vector tests show, the SSE run shouldn't need this many moves. 23 24define float @test_fmaxf(float %x, float %y) { 25; SSE-LABEL: test_fmaxf: 26; SSE: # %bb.0: 27; SSE-NEXT: movaps %xmm0, %xmm2 28; SSE-NEXT: cmpunordss %xmm0, %xmm2 29; SSE-NEXT: movaps %xmm2, %xmm3 30; SSE-NEXT: andps %xmm1, %xmm3 31; SSE-NEXT: maxss %xmm0, %xmm1 32; SSE-NEXT: andnps %xmm1, %xmm2 33; SSE-NEXT: orps %xmm3, %xmm2 34; SSE-NEXT: movaps %xmm2, %xmm0 35; SSE-NEXT: retq 36; 37; AVX1-LABEL: test_fmaxf: 38; AVX1: # %bb.0: 39; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2 40; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 41; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 42; AVX1-NEXT: retq 43; 44; AVX512-LABEL: test_fmaxf: 45; AVX512: # %bb.0: 46; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm2 47; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 48; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} 49; AVX512-NEXT: vmovaps %xmm2, %xmm0 50; AVX512-NEXT: retq 51 %z = call float @fmaxf(float %x, float %y) readnone 52 ret float %z 53} 54 55define float @test_fmaxf_minsize(float %x, float %y) minsize { 56; CHECK-LABEL: test_fmaxf_minsize: 57; CHECK: # %bb.0: 58; CHECK-NEXT: jmp fmaxf@PLT # TAILCALL 59 %z = call float @fmaxf(float %x, float %y) readnone 60 ret float %z 61} 62 63; FIXME: As the vector tests show, the SSE run shouldn't need this many moves. 64 65define double @test_fmax(double %x, double %y) { 66; SSE-LABEL: test_fmax: 67; SSE: # %bb.0: 68; SSE-NEXT: movapd %xmm0, %xmm2 69; SSE-NEXT: cmpunordsd %xmm0, %xmm2 70; SSE-NEXT: movapd %xmm2, %xmm3 71; SSE-NEXT: andpd %xmm1, %xmm3 72; SSE-NEXT: maxsd %xmm0, %xmm1 73; SSE-NEXT: andnpd %xmm1, %xmm2 74; SSE-NEXT: orpd %xmm3, %xmm2 75; SSE-NEXT: movapd %xmm2, %xmm0 76; SSE-NEXT: retq 77; 78; AVX1-LABEL: test_fmax: 79; AVX1: # %bb.0: 80; AVX1-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 81; AVX1-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 82; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 83; AVX1-NEXT: retq 84; 85; AVX512-LABEL: test_fmax: 86; AVX512: # %bb.0: 87; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 88; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 89; AVX512-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} 90; AVX512-NEXT: vmovapd %xmm2, %xmm0 91; AVX512-NEXT: retq 92 %z = call double @fmax(double %x, double %y) readnone 93 ret double %z 94} 95 96define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) { 97; CHECK-LABEL: test_fmaxl: 98; CHECK: # %bb.0: 99; CHECK-NEXT: subq $40, %rsp 100; CHECK-NEXT: .cfi_def_cfa_offset 48 101; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 102; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 103; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 104; CHECK-NEXT: fstpt (%rsp) 105; CHECK-NEXT: callq fmaxl 106; CHECK-NEXT: addq $40, %rsp 107; CHECK-NEXT: .cfi_def_cfa_offset 8 108; CHECK-NEXT: retq 109 %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone 110 ret x86_fp80 %z 111} 112 113define float @test_intrinsic_fmaxf(float %x, float %y) { 114; SSE-LABEL: test_intrinsic_fmaxf: 115; SSE: # %bb.0: 116; SSE-NEXT: movaps %xmm0, %xmm2 117; SSE-NEXT: cmpunordss %xmm0, %xmm2 118; SSE-NEXT: movaps %xmm2, %xmm3 119; SSE-NEXT: andps %xmm1, %xmm3 120; SSE-NEXT: maxss %xmm0, %xmm1 121; SSE-NEXT: andnps %xmm1, %xmm2 122; SSE-NEXT: orps %xmm3, %xmm2 123; SSE-NEXT: movaps %xmm2, %xmm0 124; SSE-NEXT: retq 125; 126; AVX1-LABEL: test_intrinsic_fmaxf: 127; AVX1: # %bb.0: 128; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2 129; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 130; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 131; AVX1-NEXT: retq 132; 133; AVX512-LABEL: test_intrinsic_fmaxf: 134; AVX512: # %bb.0: 135; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm2 136; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 137; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} 138; AVX512-NEXT: vmovaps %xmm2, %xmm0 139; AVX512-NEXT: retq 140 %z = call float @llvm.maxnum.f32(float %x, float %y) readnone 141 ret float %z 142} 143 144define double @test_intrinsic_fmax(double %x, double %y) { 145; SSE-LABEL: test_intrinsic_fmax: 146; SSE: # %bb.0: 147; SSE-NEXT: movapd %xmm0, %xmm2 148; SSE-NEXT: cmpunordsd %xmm0, %xmm2 149; SSE-NEXT: movapd %xmm2, %xmm3 150; SSE-NEXT: andpd %xmm1, %xmm3 151; SSE-NEXT: maxsd %xmm0, %xmm1 152; SSE-NEXT: andnpd %xmm1, %xmm2 153; SSE-NEXT: orpd %xmm3, %xmm2 154; SSE-NEXT: movapd %xmm2, %xmm0 155; SSE-NEXT: retq 156; 157; AVX1-LABEL: test_intrinsic_fmax: 158; AVX1: # %bb.0: 159; AVX1-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 160; AVX1-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 161; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 162; AVX1-NEXT: retq 163; 164; AVX512-LABEL: test_intrinsic_fmax: 165; AVX512: # %bb.0: 166; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 167; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 168; AVX512-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} 169; AVX512-NEXT: vmovapd %xmm2, %xmm0 170; AVX512-NEXT: retq 171 %z = call double @llvm.maxnum.f64(double %x, double %y) readnone 172 ret double %z 173} 174 175define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) { 176; CHECK-LABEL: test_intrinsic_fmaxl: 177; CHECK: # %bb.0: 178; CHECK-NEXT: subq $40, %rsp 179; CHECK-NEXT: .cfi_def_cfa_offset 48 180; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 181; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 182; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 183; CHECK-NEXT: fstpt (%rsp) 184; CHECK-NEXT: callq fmaxl 185; CHECK-NEXT: addq $40, %rsp 186; CHECK-NEXT: .cfi_def_cfa_offset 8 187; CHECK-NEXT: retq 188 %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone 189 ret x86_fp80 %z 190} 191 192define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) { 193; SSE2-LABEL: test_intrinsic_fmax_v2f32: 194; SSE2: # %bb.0: 195; SSE2-NEXT: movaps %xmm1, %xmm2 196; SSE2-NEXT: maxps %xmm0, %xmm2 197; SSE2-NEXT: cmpunordps %xmm0, %xmm0 198; SSE2-NEXT: andps %xmm0, %xmm1 199; SSE2-NEXT: andnps %xmm2, %xmm0 200; SSE2-NEXT: orps %xmm1, %xmm0 201; SSE2-NEXT: retq 202; 203; SSE4-LABEL: test_intrinsic_fmax_v2f32: 204; SSE4: # %bb.0: 205; SSE4-NEXT: movaps %xmm1, %xmm2 206; SSE4-NEXT: maxps %xmm0, %xmm2 207; SSE4-NEXT: cmpunordps %xmm0, %xmm0 208; SSE4-NEXT: blendvps %xmm0, %xmm1, %xmm2 209; SSE4-NEXT: movaps %xmm2, %xmm0 210; SSE4-NEXT: retq 211; 212; AVX-LABEL: test_intrinsic_fmax_v2f32: 213; AVX: # %bb.0: 214; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm2 215; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0 216; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 217; AVX-NEXT: retq 218 %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone 219 ret <2 x float> %z 220} 221 222define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) { 223; SSE2-LABEL: test_intrinsic_fmax_v4f32: 224; SSE2: # %bb.0: 225; SSE2-NEXT: movaps %xmm1, %xmm2 226; SSE2-NEXT: maxps %xmm0, %xmm2 227; SSE2-NEXT: cmpunordps %xmm0, %xmm0 228; SSE2-NEXT: andps %xmm0, %xmm1 229; SSE2-NEXT: andnps %xmm2, %xmm0 230; SSE2-NEXT: orps %xmm1, %xmm0 231; SSE2-NEXT: retq 232; 233; SSE4-LABEL: test_intrinsic_fmax_v4f32: 234; SSE4: # %bb.0: 235; SSE4-NEXT: movaps %xmm1, %xmm2 236; SSE4-NEXT: maxps %xmm0, %xmm2 237; SSE4-NEXT: cmpunordps %xmm0, %xmm0 238; SSE4-NEXT: blendvps %xmm0, %xmm1, %xmm2 239; SSE4-NEXT: movaps %xmm2, %xmm0 240; SSE4-NEXT: retq 241; 242; AVX-LABEL: test_intrinsic_fmax_v4f32: 243; AVX: # %bb.0: 244; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm2 245; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0 246; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 247; AVX-NEXT: retq 248 %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone 249 ret <4 x float> %z 250} 251 252define <8 x float> @test_intrinsic_fmax_v8f32(<8 x float> %x, <8 x float> %y) { 253; SSE2-LABEL: test_intrinsic_fmax_v8f32: 254; SSE2: # %bb.0: 255; SSE2-NEXT: movaps %xmm2, %xmm4 256; SSE2-NEXT: maxps %xmm0, %xmm4 257; SSE2-NEXT: cmpunordps %xmm0, %xmm0 258; SSE2-NEXT: andps %xmm0, %xmm2 259; SSE2-NEXT: andnps %xmm4, %xmm0 260; SSE2-NEXT: orps %xmm2, %xmm0 261; SSE2-NEXT: movaps %xmm3, %xmm2 262; SSE2-NEXT: maxps %xmm1, %xmm2 263; SSE2-NEXT: cmpunordps %xmm1, %xmm1 264; SSE2-NEXT: andps %xmm1, %xmm3 265; SSE2-NEXT: andnps %xmm2, %xmm1 266; SSE2-NEXT: orps %xmm3, %xmm1 267; SSE2-NEXT: retq 268; 269; SSE4-LABEL: test_intrinsic_fmax_v8f32: 270; SSE4: # %bb.0: 271; SSE4-NEXT: movaps %xmm1, %xmm5 272; SSE4-NEXT: movaps %xmm2, %xmm4 273; SSE4-NEXT: maxps %xmm0, %xmm4 274; SSE4-NEXT: cmpunordps %xmm0, %xmm0 275; SSE4-NEXT: blendvps %xmm0, %xmm2, %xmm4 276; SSE4-NEXT: movaps %xmm3, %xmm1 277; SSE4-NEXT: maxps %xmm5, %xmm1 278; SSE4-NEXT: cmpunordps %xmm5, %xmm5 279; SSE4-NEXT: movaps %xmm5, %xmm0 280; SSE4-NEXT: blendvps %xmm0, %xmm3, %xmm1 281; SSE4-NEXT: movaps %xmm4, %xmm0 282; SSE4-NEXT: retq 283; 284; AVX-LABEL: test_intrinsic_fmax_v8f32: 285; AVX: # %bb.0: 286; AVX-NEXT: vmaxps %ymm0, %ymm1, %ymm2 287; AVX-NEXT: vcmpunordps %ymm0, %ymm0, %ymm0 288; AVX-NEXT: vblendvps %ymm0, %ymm1, %ymm2, %ymm0 289; AVX-NEXT: retq 290 %z = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %x, <8 x float> %y) readnone 291 ret <8 x float> %z 292} 293 294define <16 x float> @test_intrinsic_fmax_v16f32(<16 x float> %x, <16 x float> %y) { 295; SSE2-LABEL: test_intrinsic_fmax_v16f32: 296; SSE2: # %bb.0: 297; SSE2-NEXT: movaps %xmm4, %xmm8 298; SSE2-NEXT: maxps %xmm0, %xmm8 299; SSE2-NEXT: cmpunordps %xmm0, %xmm0 300; SSE2-NEXT: andps %xmm0, %xmm4 301; SSE2-NEXT: andnps %xmm8, %xmm0 302; SSE2-NEXT: orps %xmm4, %xmm0 303; SSE2-NEXT: movaps %xmm5, %xmm4 304; SSE2-NEXT: maxps %xmm1, %xmm4 305; SSE2-NEXT: cmpunordps %xmm1, %xmm1 306; SSE2-NEXT: andps %xmm1, %xmm5 307; SSE2-NEXT: andnps %xmm4, %xmm1 308; SSE2-NEXT: orps %xmm5, %xmm1 309; SSE2-NEXT: movaps %xmm6, %xmm4 310; SSE2-NEXT: maxps %xmm2, %xmm4 311; SSE2-NEXT: cmpunordps %xmm2, %xmm2 312; SSE2-NEXT: andps %xmm2, %xmm6 313; SSE2-NEXT: andnps %xmm4, %xmm2 314; SSE2-NEXT: orps %xmm6, %xmm2 315; SSE2-NEXT: movaps %xmm7, %xmm4 316; SSE2-NEXT: maxps %xmm3, %xmm4 317; SSE2-NEXT: cmpunordps %xmm3, %xmm3 318; SSE2-NEXT: andps %xmm3, %xmm7 319; SSE2-NEXT: andnps %xmm4, %xmm3 320; SSE2-NEXT: orps %xmm7, %xmm3 321; SSE2-NEXT: retq 322; 323; SSE4-LABEL: test_intrinsic_fmax_v16f32: 324; SSE4: # %bb.0: 325; SSE4-NEXT: movaps %xmm3, %xmm8 326; SSE4-NEXT: movaps %xmm2, %xmm9 327; SSE4-NEXT: movaps %xmm1, %xmm2 328; SSE4-NEXT: movaps %xmm4, %xmm10 329; SSE4-NEXT: maxps %xmm0, %xmm10 330; SSE4-NEXT: cmpunordps %xmm0, %xmm0 331; SSE4-NEXT: blendvps %xmm0, %xmm4, %xmm10 332; SSE4-NEXT: movaps %xmm5, %xmm1 333; SSE4-NEXT: maxps %xmm2, %xmm1 334; SSE4-NEXT: cmpunordps %xmm2, %xmm2 335; SSE4-NEXT: movaps %xmm2, %xmm0 336; SSE4-NEXT: blendvps %xmm0, %xmm5, %xmm1 337; SSE4-NEXT: movaps %xmm6, %xmm2 338; SSE4-NEXT: maxps %xmm9, %xmm2 339; SSE4-NEXT: cmpunordps %xmm9, %xmm9 340; SSE4-NEXT: movaps %xmm9, %xmm0 341; SSE4-NEXT: blendvps %xmm0, %xmm6, %xmm2 342; SSE4-NEXT: movaps %xmm7, %xmm3 343; SSE4-NEXT: maxps %xmm8, %xmm3 344; SSE4-NEXT: cmpunordps %xmm8, %xmm8 345; SSE4-NEXT: movaps %xmm8, %xmm0 346; SSE4-NEXT: blendvps %xmm0, %xmm7, %xmm3 347; SSE4-NEXT: movaps %xmm10, %xmm0 348; SSE4-NEXT: retq 349; 350; AVX1-LABEL: test_intrinsic_fmax_v16f32: 351; AVX1: # %bb.0: 352; AVX1-NEXT: vmaxps %ymm0, %ymm2, %ymm4 353; AVX1-NEXT: vcmpunordps %ymm0, %ymm0, %ymm0 354; AVX1-NEXT: vblendvps %ymm0, %ymm2, %ymm4, %ymm0 355; AVX1-NEXT: vmaxps %ymm1, %ymm3, %ymm2 356; AVX1-NEXT: vcmpunordps %ymm1, %ymm1, %ymm1 357; AVX1-NEXT: vblendvps %ymm1, %ymm3, %ymm2, %ymm1 358; AVX1-NEXT: retq 359; 360; AVX512-LABEL: test_intrinsic_fmax_v16f32: 361; AVX512: # %bb.0: 362; AVX512-NEXT: vmaxps %zmm0, %zmm1, %zmm2 363; AVX512-NEXT: vcmpunordps %zmm0, %zmm0, %k1 364; AVX512-NEXT: vmovaps %zmm1, %zmm2 {%k1} 365; AVX512-NEXT: vmovaps %zmm2, %zmm0 366; AVX512-NEXT: retq 367 %z = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %x, <16 x float> %y) readnone 368 ret <16 x float> %z 369} 370 371define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) { 372; SSE2-LABEL: test_intrinsic_fmax_v2f64: 373; SSE2: # %bb.0: 374; SSE2-NEXT: movapd %xmm1, %xmm2 375; SSE2-NEXT: maxpd %xmm0, %xmm2 376; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 377; SSE2-NEXT: andpd %xmm0, %xmm1 378; SSE2-NEXT: andnpd %xmm2, %xmm0 379; SSE2-NEXT: orpd %xmm1, %xmm0 380; SSE2-NEXT: retq 381; 382; SSE4-LABEL: test_intrinsic_fmax_v2f64: 383; SSE4: # %bb.0: 384; SSE4-NEXT: movapd %xmm1, %xmm2 385; SSE4-NEXT: maxpd %xmm0, %xmm2 386; SSE4-NEXT: cmpunordpd %xmm0, %xmm0 387; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm2 388; SSE4-NEXT: movapd %xmm2, %xmm0 389; SSE4-NEXT: retq 390; 391; AVX-LABEL: test_intrinsic_fmax_v2f64: 392; AVX: # %bb.0: 393; AVX-NEXT: vmaxpd %xmm0, %xmm1, %xmm2 394; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm0 395; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 396; AVX-NEXT: retq 397 %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone 398 ret <2 x double> %z 399} 400 401define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) { 402; SSE2-LABEL: test_intrinsic_fmax_v4f64: 403; SSE2: # %bb.0: 404; SSE2-NEXT: movapd %xmm2, %xmm4 405; SSE2-NEXT: maxpd %xmm0, %xmm4 406; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 407; SSE2-NEXT: andpd %xmm0, %xmm2 408; SSE2-NEXT: andnpd %xmm4, %xmm0 409; SSE2-NEXT: orpd %xmm2, %xmm0 410; SSE2-NEXT: movapd %xmm3, %xmm2 411; SSE2-NEXT: maxpd %xmm1, %xmm2 412; SSE2-NEXT: cmpunordpd %xmm1, %xmm1 413; SSE2-NEXT: andpd %xmm1, %xmm3 414; SSE2-NEXT: andnpd %xmm2, %xmm1 415; SSE2-NEXT: orpd %xmm3, %xmm1 416; SSE2-NEXT: retq 417; 418; SSE4-LABEL: test_intrinsic_fmax_v4f64: 419; SSE4: # %bb.0: 420; SSE4-NEXT: movapd %xmm1, %xmm5 421; SSE4-NEXT: movapd %xmm2, %xmm4 422; SSE4-NEXT: maxpd %xmm0, %xmm4 423; SSE4-NEXT: cmpunordpd %xmm0, %xmm0 424; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm4 425; SSE4-NEXT: movapd %xmm3, %xmm1 426; SSE4-NEXT: maxpd %xmm5, %xmm1 427; SSE4-NEXT: cmpunordpd %xmm5, %xmm5 428; SSE4-NEXT: movapd %xmm5, %xmm0 429; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm1 430; SSE4-NEXT: movapd %xmm4, %xmm0 431; SSE4-NEXT: retq 432; 433; AVX-LABEL: test_intrinsic_fmax_v4f64: 434; AVX: # %bb.0: 435; AVX-NEXT: vmaxpd %ymm0, %ymm1, %ymm2 436; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 437; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 438; AVX-NEXT: retq 439 %z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone 440 ret <4 x double> %z 441} 442 443define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) { 444; SSE2-LABEL: test_intrinsic_fmax_v8f64: 445; SSE2: # %bb.0: 446; SSE2-NEXT: movapd %xmm4, %xmm8 447; SSE2-NEXT: maxpd %xmm0, %xmm8 448; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 449; SSE2-NEXT: andpd %xmm0, %xmm4 450; SSE2-NEXT: andnpd %xmm8, %xmm0 451; SSE2-NEXT: orpd %xmm4, %xmm0 452; SSE2-NEXT: movapd %xmm5, %xmm4 453; SSE2-NEXT: maxpd %xmm1, %xmm4 454; SSE2-NEXT: cmpunordpd %xmm1, %xmm1 455; SSE2-NEXT: andpd %xmm1, %xmm5 456; SSE2-NEXT: andnpd %xmm4, %xmm1 457; SSE2-NEXT: orpd %xmm5, %xmm1 458; SSE2-NEXT: movapd %xmm6, %xmm4 459; SSE2-NEXT: maxpd %xmm2, %xmm4 460; SSE2-NEXT: cmpunordpd %xmm2, %xmm2 461; SSE2-NEXT: andpd %xmm2, %xmm6 462; SSE2-NEXT: andnpd %xmm4, %xmm2 463; SSE2-NEXT: orpd %xmm6, %xmm2 464; SSE2-NEXT: movapd %xmm7, %xmm4 465; SSE2-NEXT: maxpd %xmm3, %xmm4 466; SSE2-NEXT: cmpunordpd %xmm3, %xmm3 467; SSE2-NEXT: andpd %xmm3, %xmm7 468; SSE2-NEXT: andnpd %xmm4, %xmm3 469; SSE2-NEXT: orpd %xmm7, %xmm3 470; SSE2-NEXT: retq 471; 472; SSE4-LABEL: test_intrinsic_fmax_v8f64: 473; SSE4: # %bb.0: 474; SSE4-NEXT: movapd %xmm3, %xmm8 475; SSE4-NEXT: movapd %xmm2, %xmm9 476; SSE4-NEXT: movapd %xmm1, %xmm2 477; SSE4-NEXT: movapd %xmm4, %xmm10 478; SSE4-NEXT: maxpd %xmm0, %xmm10 479; SSE4-NEXT: cmpunordpd %xmm0, %xmm0 480; SSE4-NEXT: blendvpd %xmm0, %xmm4, %xmm10 481; SSE4-NEXT: movapd %xmm5, %xmm1 482; SSE4-NEXT: maxpd %xmm2, %xmm1 483; SSE4-NEXT: cmpunordpd %xmm2, %xmm2 484; SSE4-NEXT: movapd %xmm2, %xmm0 485; SSE4-NEXT: blendvpd %xmm0, %xmm5, %xmm1 486; SSE4-NEXT: movapd %xmm6, %xmm2 487; SSE4-NEXT: maxpd %xmm9, %xmm2 488; SSE4-NEXT: cmpunordpd %xmm9, %xmm9 489; SSE4-NEXT: movapd %xmm9, %xmm0 490; SSE4-NEXT: blendvpd %xmm0, %xmm6, %xmm2 491; SSE4-NEXT: movapd %xmm7, %xmm3 492; SSE4-NEXT: maxpd %xmm8, %xmm3 493; SSE4-NEXT: cmpunordpd %xmm8, %xmm8 494; SSE4-NEXT: movapd %xmm8, %xmm0 495; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm3 496; SSE4-NEXT: movapd %xmm10, %xmm0 497; SSE4-NEXT: retq 498; 499; AVX1-LABEL: test_intrinsic_fmax_v8f64: 500; AVX1: # %bb.0: 501; AVX1-NEXT: vmaxpd %ymm0, %ymm2, %ymm4 502; AVX1-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 503; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm4, %ymm0 504; AVX1-NEXT: vmaxpd %ymm1, %ymm3, %ymm2 505; AVX1-NEXT: vcmpunordpd %ymm1, %ymm1, %ymm1 506; AVX1-NEXT: vblendvpd %ymm1, %ymm3, %ymm2, %ymm1 507; AVX1-NEXT: retq 508; 509; AVX512-LABEL: test_intrinsic_fmax_v8f64: 510; AVX512: # %bb.0: 511; AVX512-NEXT: vmaxpd %zmm0, %zmm1, %zmm2 512; AVX512-NEXT: vcmpunordpd %zmm0, %zmm0, %k1 513; AVX512-NEXT: vmovapd %zmm1, %zmm2 {%k1} 514; AVX512-NEXT: vmovapd %zmm2, %zmm0 515; AVX512-NEXT: retq 516 %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone 517 ret <8 x double> %z 518} 519 520; The IR-level FMF propagate to the node. With nnan, there's no need to blend. 521 522define double @maxnum_intrinsic_nnan_fmf_f64(double %a, double %b) { 523; SSE-LABEL: maxnum_intrinsic_nnan_fmf_f64: 524; SSE: # %bb.0: 525; SSE-NEXT: maxsd %xmm1, %xmm0 526; SSE-NEXT: retq 527; 528; AVX-LABEL: maxnum_intrinsic_nnan_fmf_f64: 529; AVX: # %bb.0: 530; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 531; AVX-NEXT: retq 532 %r = tail call nnan double @llvm.maxnum.f64(double %a, double %b) 533 ret double %r 534} 535 536; Make sure vectors work too. 537 538define <4 x float> @maxnum_intrinsic_nnan_fmf_f432(<4 x float> %a, <4 x float> %b) { 539; SSE-LABEL: maxnum_intrinsic_nnan_fmf_f432: 540; SSE: # %bb.0: 541; SSE-NEXT: maxps %xmm1, %xmm0 542; SSE-NEXT: retq 543; 544; AVX-LABEL: maxnum_intrinsic_nnan_fmf_f432: 545; AVX: # %bb.0: 546; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 547; AVX-NEXT: retq 548 %r = tail call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) 549 ret <4 x float> %r 550} 551 552; Current (but legacy someday): a function-level attribute should also enable the fold. 553 554define float @maxnum_intrinsic_nnan_attr_f32(float %a, float %b) #0 { 555; SSE-LABEL: maxnum_intrinsic_nnan_attr_f32: 556; SSE: # %bb.0: 557; SSE-NEXT: maxss %xmm1, %xmm0 558; SSE-NEXT: retq 559; 560; AVX-LABEL: maxnum_intrinsic_nnan_attr_f32: 561; AVX: # %bb.0: 562; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 563; AVX-NEXT: retq 564 %r = tail call float @llvm.maxnum.f32(float %a, float %b) 565 ret float %r 566} 567 568; Make sure vectors work too. 569 570define <2 x double> @maxnum_intrinsic_nnan_attr_f64(<2 x double> %a, <2 x double> %b) #0 { 571; SSE-LABEL: maxnum_intrinsic_nnan_attr_f64: 572; SSE: # %bb.0: 573; SSE-NEXT: maxpd %xmm1, %xmm0 574; SSE-NEXT: retq 575; 576; AVX-LABEL: maxnum_intrinsic_nnan_attr_f64: 577; AVX: # %bb.0: 578; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 579; AVX-NEXT: retq 580 %r = tail call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) 581 ret <2 x double> %r 582} 583 584define float @test_maxnum_const_op1(float %x) { 585; SSE-LABEL: test_maxnum_const_op1: 586; SSE: # %bb.0: 587; SSE-NEXT: maxss {{.*}}(%rip), %xmm0 588; SSE-NEXT: retq 589; 590; AVX-LABEL: test_maxnum_const_op1: 591; AVX: # %bb.0: 592; AVX-NEXT: vmaxss {{.*}}(%rip), %xmm0, %xmm0 593; AVX-NEXT: retq 594 %r = call float @llvm.maxnum.f32(float 1.0, float %x) 595 ret float %r 596} 597 598define float @test_maxnum_const_op2(float %x) { 599; SSE-LABEL: test_maxnum_const_op2: 600; SSE: # %bb.0: 601; SSE-NEXT: maxss {{.*}}(%rip), %xmm0 602; SSE-NEXT: retq 603; 604; AVX-LABEL: test_maxnum_const_op2: 605; AVX: # %bb.0: 606; AVX-NEXT: vmaxss {{.*}}(%rip), %xmm0, %xmm0 607; AVX-NEXT: retq 608 %r = call float @llvm.maxnum.f32(float %x, float 1.0) 609 ret float %r 610} 611 612define float @test_maxnum_const_nan(float %x) { 613; CHECK-LABEL: test_maxnum_const_nan: 614; CHECK: # %bb.0: 615; CHECK-NEXT: retq 616 %r = call float @llvm.maxnum.f32(float %x, float 0x7fff000000000000) 617 ret float %r 618} 619 620attributes #0 = { "no-nans-fp-math"="true" } 621 622