1; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 3 4declare float @fmaxf(float, float) 5declare double @fmax(double, double) 6declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) 7declare float @llvm.maxnum.f32(float, float) 8declare double @llvm.maxnum.f64(double, double) 9declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) 10 11declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) 12declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) 13declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) 14declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) 15declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) 16 17 18; CHECK-LABEL: @test_fmaxf 19; SSE: movaps %xmm0, %xmm2 20; SSE-NEXT: cmpunordss %xmm2, %xmm2 21; SSE-NEXT: movaps %xmm2, %xmm3 22; SSE-NEXT: andps %xmm1, %xmm3 23; SSE-NEXT: maxss %xmm0, %xmm1 24; SSE-NEXT: andnps %xmm1, %xmm2 25; SSE-NEXT: orps %xmm3, %xmm2 26; SSE-NEXT: movaps %xmm2, %xmm0 27; SSE-NEXT: retq 28; 29; AVX: vmaxss %xmm0, %xmm1, %xmm2 30; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 31; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 32; AVX-NEXT: retq 33define float @test_fmaxf(float %x, float %y) { 34 %z = call float @fmaxf(float %x, float %y) readnone 35 ret float %z 36} 37 38; CHECK-LABEL: @test_fmaxf_minsize 39; CHECK: jmp fmaxf 40define float @test_fmaxf_minsize(float %x, float %y) minsize { 41 %z = call float @fmaxf(float %x, float %y) readnone 42 ret float %z 43} 44 45; FIXME: Doubles should be inlined similarly to floats. 46 47; CHECK-LABEL: @test_fmax 48; CHECK: jmp fmax 49define double @test_fmax(double %x, double %y) { 50 %z = call double @fmax(double %x, double %y) readnone 51 ret double %z 52} 53 54; CHECK-LABEL: @test_fmaxl 55; CHECK: callq fmaxl 56define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) { 57 %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone 58 ret x86_fp80 %z 59} 60 61; CHECK-LABEL: @test_intrinsic_fmaxf 62; SSE: movaps %xmm0, %xmm2 63; SSE-NEXT: cmpunordss %xmm2, %xmm2 64; SSE-NEXT: movaps %xmm2, %xmm3 65; SSE-NEXT: andps %xmm1, %xmm3 66; SSE-NEXT: maxss %xmm0, %xmm1 67; SSE-NEXT: andnps %xmm1, %xmm2 68; SSE-NEXT: orps %xmm3, %xmm2 69; SSE-NEXT: movaps %xmm2, %xmm0 70; SSE-NEXT: retq 71; 72; AVX: vmaxss %xmm0, %xmm1, %xmm2 73; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 74; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 75; AVX-NEXT: retq 76define float @test_intrinsic_fmaxf(float %x, float %y) { 77 %z = call float @llvm.maxnum.f32(float %x, float %y) readnone 78 ret float %z 79} 80 81; FIXME: Doubles should be inlined similarly to floats. 82 83; CHECK-LABEL: @test_intrinsic_fmax 84; CHECK: jmp fmax 85define double @test_intrinsic_fmax(double %x, double %y) { 86 %z = call double @llvm.maxnum.f64(double %x, double %y) readnone 87 ret double %z 88} 89 90; CHECK-LABEL: @test_intrinsic_fmaxl 91; CHECK: callq fmaxl 92define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) { 93 %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone 94 ret x86_fp80 %z 95} 96 97; FIXME: This should not be doing 4 scalar ops on a 2 element vector. 98; FIXME: This should use vector ops (maxps / cmpps). 99 100; CHECK-LABEL: @test_intrinsic_fmax_v2f32 101; SSE: movaps %xmm1, %xmm2 102; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 103; SSE-NEXT: movaps %xmm0, %xmm3 104; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 105; SSE-NEXT: movaps %xmm3, %xmm4 106; SSE-NEXT: cmpunordss %xmm4, %xmm4 107; SSE-NEXT: movaps %xmm4, %xmm5 108; SSE-NEXT: andps %xmm2, %xmm5 109; SSE-NEXT: maxss %xmm3, %xmm2 110; SSE-NEXT: andnps %xmm2, %xmm4 111; SSE-NEXT: orps %xmm5, %xmm4 112; SSE-NEXT: movaps %xmm1, %xmm2 113; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] 114; SSE-NEXT: movaps %xmm0, %xmm5 115; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1,2,3] 116; SSE-NEXT: movaps %xmm5, %xmm3 117; SSE-NEXT: cmpunordss %xmm3, %xmm3 118; SSE-NEXT: movaps %xmm3, %xmm6 119; SSE-NEXT: andps %xmm2, %xmm6 120; SSE-NEXT: maxss %xmm5, %xmm2 121; SSE-NEXT: andnps %xmm2, %xmm3 122; SSE-NEXT: orps %xmm6, %xmm3 123; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 124; SSE-NEXT: movaps %xmm0, %xmm2 125; SSE-NEXT: cmpunordss %xmm2, %xmm2 126; SSE-NEXT: movaps %xmm2, %xmm4 127; SSE-NEXT: andps %xmm1, %xmm4 128; SSE-NEXT: movaps %xmm1, %xmm5 129; SSE-NEXT: maxss %xmm0, %xmm5 130; SSE-NEXT: andnps %xmm5, %xmm2 131; SSE-NEXT: orps %xmm4, %xmm2 132; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 133; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 134; SSE-NEXT: movapd %xmm0, %xmm4 135; SSE-NEXT: cmpunordss %xmm4, %xmm4 136; SSE-NEXT: movaps %xmm4, %xmm5 137; SSE-NEXT: andps %xmm1, %xmm5 138; SSE-NEXT: maxss %xmm0, %xmm1 139; SSE-NEXT: andnps %xmm1, %xmm4 140; SSE-NEXT: orps %xmm5, %xmm4 141; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 142; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 143; SSE-NEXT: movaps %xmm2, %xmm0 144; SSE-NEXT: retq 145; 146; AVX: vmaxss %xmm0, %xmm1, %xmm2 147; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3 148; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm2 149; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 150; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] 151; AVX-NEXT: vmaxss %xmm3, %xmm4, %xmm5 152; AVX-NEXT: vcmpunordss %xmm3, %xmm3, %xmm3 153; AVX-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3 154; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 155; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 156; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] 157; AVX-NEXT: vmaxss %xmm3, %xmm4, %xmm5 158; AVX-NEXT: vcmpunordss %xmm3, %xmm3, %xmm3 159; AVX-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3 160; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 161; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 162; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 163; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm3 164; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 165; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm3, %xmm0 166; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 167; AVX-NEXT: retq 168define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) { 169 %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone 170 ret <2 x float> %z 171} 172 173; FIXME: This should use vector ops (maxps / cmpps). 174 175; CHECK-LABEL: @test_intrinsic_fmax_v4f32 176; SSE: movaps %xmm1, %xmm2 177; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 178; SSE-NEXT: movaps %xmm0, %xmm3 179; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 180; SSE-NEXT: movaps %xmm3, %xmm4 181; SSE-NEXT: cmpunordss %xmm4, %xmm4 182; SSE-NEXT: movaps %xmm4, %xmm5 183; SSE-NEXT: andps %xmm2, %xmm5 184; SSE-NEXT: maxss %xmm3, %xmm2 185; SSE-NEXT: andnps %xmm2, %xmm4 186; SSE-NEXT: orps %xmm5, %xmm4 187; SSE-NEXT: movaps %xmm1, %xmm2 188; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] 189; SSE-NEXT: movaps %xmm0, %xmm5 190; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1,2,3] 191; SSE-NEXT: movaps %xmm5, %xmm3 192; SSE-NEXT: cmpunordss %xmm3, %xmm3 193; SSE-NEXT: movaps %xmm3, %xmm6 194; SSE-NEXT: andps %xmm2, %xmm6 195; SSE-NEXT: maxss %xmm5, %xmm2 196; SSE-NEXT: andnps %xmm2, %xmm3 197; SSE-NEXT: orps %xmm6, %xmm3 198; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 199; SSE-NEXT: movaps %xmm0, %xmm2 200; SSE-NEXT: cmpunordss %xmm2, %xmm2 201; SSE-NEXT: movaps %xmm2, %xmm4 202; SSE-NEXT: andps %xmm1, %xmm4 203; SSE-NEXT: movaps %xmm1, %xmm5 204; SSE-NEXT: maxss %xmm0, %xmm5 205; SSE-NEXT: andnps %xmm5, %xmm2 206; SSE-NEXT: orps %xmm4, %xmm2 207; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 208; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 209; SSE-NEXT: movapd %xmm0, %xmm4 210; SSE-NEXT: cmpunordss %xmm4, %xmm4 211; SSE-NEXT: movaps %xmm4, %xmm5 212; SSE-NEXT: andps %xmm1, %xmm5 213; SSE-NEXT: maxss %xmm0, %xmm1 214; SSE-NEXT: andnps %xmm1, %xmm4 215; SSE-NEXT: orps %xmm5, %xmm4 216; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 217; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 218; SSE-NEXT: movaps %xmm2, %xmm0 219; SSE-NEXT: retq 220; 221; AVX: vmaxss %xmm0, %xmm1, %xmm2 222; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3 223; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm2 224; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 225; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] 226; AVX-NEXT: vmaxss %xmm3, %xmm4, %xmm5 227; AVX-NEXT: vcmpunordss %xmm3, %xmm3, %xmm3 228; AVX-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3 229; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 230; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 231; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] 232; AVX-NEXT: vmaxss %xmm3, %xmm4, %xmm5 233; AVX-NEXT: vcmpunordss %xmm3, %xmm3, %xmm3 234; AVX-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3 235; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 236; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 237; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 238; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm3 239; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 240; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm3, %xmm0 241; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 242; AVX-NEXT: retq 243define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) { 244 %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone 245 ret <4 x float> %z 246} 247 248; FIXME: Vector of doubles should be inlined similarly to vector of floats. 249 250; CHECK-LABEL: @test_intrinsic_fmax_v2f64 251; CHECK: callq fmax 252; CHECK: callq fmax 253define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) { 254 %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone 255 ret <2 x double> %z 256} 257 258; FIXME: Vector of doubles should be inlined similarly to vector of floats. 259 260; CHECK-LABEL: @test_intrinsic_fmax_v4f64 261; CHECK: callq fmax 262; CHECK: callq fmax 263; CHECK: callq fmax 264; CHECK: callq fmax 265define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) { 266 %z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone 267 ret <4 x double> %z 268} 269 270; FIXME: Vector of doubles should be inlined similarly to vector of floats. 271 272; CHECK-LABEL: @test_intrinsic_fmax_v8f64 273; CHECK: callq fmax 274; CHECK: callq fmax 275; CHECK: callq fmax 276; CHECK: callq fmax 277; CHECK: callq fmax 278; CHECK: callq fmax 279; CHECK: callq fmax 280; CHECK: callq fmax 281define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) { 282 %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone 283 ret <8 x double> %z 284} 285 286