1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512 8 9; 10; vXf32 11; 12 13define float @test_v1f32(<1 x float> %a0) { 14; ALL-LABEL: test_v1f32: 15; ALL: # %bb.0: 16; ALL-NEXT: retq 17 %1 = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a0) 18 ret float %1 19} 20 21define float @test_v2f32(<2 x float> %a0) { 22; SSE2-LABEL: test_v2f32: 23; SSE2: # %bb.0: 24; SSE2-NEXT: movaps %xmm0, %xmm1 25; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 26; SSE2-NEXT: minss %xmm1, %xmm0 27; SSE2-NEXT: retq 28; 29; SSE41-LABEL: test_v2f32: 30; SSE41: # %bb.0: 31; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 32; SSE41-NEXT: minss %xmm1, %xmm0 33; SSE41-NEXT: retq 34; 35; AVX-LABEL: test_v2f32: 36; AVX: # %bb.0: 37; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 38; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 39; AVX-NEXT: retq 40; 41; AVX512-LABEL: test_v2f32: 42; AVX512: # %bb.0: 43; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 44; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 45; AVX512-NEXT: retq 46 %1 = call nnan float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a0) 47 ret float %1 48} 49 50define float @test_v3f32(<3 x float> %a0) { 51; SSE2-LABEL: test_v3f32: 52; SSE2: # %bb.0: 53; SSE2-NEXT: movaps %xmm0, %xmm2 54; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] 55; SSE2-NEXT: movaps %xmm0, %xmm1 56; SSE2-NEXT: minss %xmm2, %xmm1 57; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 58; SSE2-NEXT: minss %xmm0, %xmm1 59; SSE2-NEXT: movaps %xmm1, %xmm0 60; SSE2-NEXT: retq 61; 62; SSE41-LABEL: test_v3f32: 63; SSE41: # %bb.0: 64; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 65; SSE41-NEXT: movaps %xmm0, %xmm1 66; SSE41-NEXT: minss %xmm2, %xmm1 67; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 68; SSE41-NEXT: minss %xmm0, %xmm1 69; SSE41-NEXT: movaps %xmm1, %xmm0 70; SSE41-NEXT: retq 71; 72; AVX-LABEL: test_v3f32: 73; AVX: # %bb.0: 74; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 75; AVX-NEXT: vminss %xmm1, %xmm0, %xmm1 76; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 77; AVX-NEXT: vminss %xmm0, %xmm1, %xmm0 78; AVX-NEXT: retq 79; 80; AVX512-LABEL: test_v3f32: 81; AVX512: # %bb.0: 82; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 83; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm1 84; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 85; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0 86; AVX512-NEXT: retq 87 %1 = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a0) 88 ret float %1 89} 90 91define float @test_v4f32(<4 x float> %a0) { 92; SSE2-LABEL: test_v4f32: 93; SSE2: # %bb.0: 94; SSE2-NEXT: movaps %xmm0, %xmm1 95; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] 96; SSE2-NEXT: movaps %xmm0, %xmm2 97; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] 98; SSE2-NEXT: movaps %xmm0, %xmm3 99; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1] 100; SSE2-NEXT: minss %xmm3, %xmm0 101; SSE2-NEXT: minss %xmm2, %xmm0 102; SSE2-NEXT: minss %xmm1, %xmm0 103; SSE2-NEXT: retq 104; 105; SSE41-LABEL: test_v4f32: 106; SSE41: # %bb.0: 107; SSE41-NEXT: movaps %xmm0, %xmm1 108; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] 109; SSE41-NEXT: movaps %xmm0, %xmm2 110; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] 111; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 112; SSE41-NEXT: minss %xmm3, %xmm0 113; SSE41-NEXT: minss %xmm2, %xmm0 114; SSE41-NEXT: minss %xmm1, %xmm0 115; SSE41-NEXT: retq 116; 117; AVX-LABEL: test_v4f32: 118; AVX: # %bb.0: 119; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 120; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 121; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 122; AVX-NEXT: vminss %xmm3, %xmm0, %xmm0 123; AVX-NEXT: vminss %xmm2, %xmm0, %xmm0 124; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 125; AVX-NEXT: retq 126; 127; AVX512-LABEL: test_v4f32: 128; AVX512: # %bb.0: 129; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 130; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 131; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 132; AVX512-NEXT: vminss %xmm3, %xmm0, %xmm0 133; AVX512-NEXT: vminss %xmm2, %xmm0, %xmm0 134; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 135; AVX512-NEXT: retq 136 %1 = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a0) 137 ret float %1 138} 139 140define float @test_v8f32(<8 x float> %a0) { 141; SSE2-LABEL: test_v8f32: 142; SSE2: # %bb.0: 143; SSE2-NEXT: minps %xmm1, %xmm0 144; SSE2-NEXT: movaps %xmm0, %xmm2 145; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] 146; SSE2-NEXT: movaps %xmm0, %xmm1 147; SSE2-NEXT: minss %xmm2, %xmm1 148; SSE2-NEXT: movaps %xmm0, %xmm2 149; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] 150; SSE2-NEXT: minss %xmm2, %xmm1 151; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 152; SSE2-NEXT: minss %xmm0, %xmm1 153; SSE2-NEXT: movaps %xmm1, %xmm0 154; SSE2-NEXT: retq 155; 156; SSE41-LABEL: test_v8f32: 157; SSE41: # %bb.0: 158; SSE41-NEXT: minps %xmm1, %xmm0 159; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 160; SSE41-NEXT: movaps %xmm0, %xmm1 161; SSE41-NEXT: minss %xmm2, %xmm1 162; SSE41-NEXT: movaps %xmm0, %xmm2 163; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] 164; SSE41-NEXT: minss %xmm2, %xmm1 165; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 166; SSE41-NEXT: minss %xmm0, %xmm1 167; SSE41-NEXT: movaps %xmm1, %xmm0 168; SSE41-NEXT: retq 169; 170; AVX-LABEL: test_v8f32: 171; AVX: # %bb.0: 172; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 173; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3] 174; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] 175; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] 176; AVX-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] 177; AVX-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] 178; AVX-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] 179; AVX-NEXT: vminss %xmm7, %xmm0, %xmm0 180; AVX-NEXT: vminss %xmm6, %xmm0, %xmm0 181; AVX-NEXT: vminss %xmm5, %xmm0, %xmm0 182; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 183; AVX-NEXT: vminss %xmm4, %xmm0, %xmm0 184; AVX-NEXT: vminss %xmm3, %xmm0, %xmm0 185; AVX-NEXT: vminss %xmm2, %xmm0, %xmm0 186; AVX-NEXT: vzeroupper 187; AVX-NEXT: retq 188; 189; AVX512-LABEL: test_v8f32: 190; AVX512: # %bb.0: 191; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 192; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3] 193; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] 194; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] 195; AVX512-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3] 196; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] 197; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3] 198; AVX512-NEXT: vminss %xmm7, %xmm0, %xmm0 199; AVX512-NEXT: vminss %xmm6, %xmm0, %xmm0 200; AVX512-NEXT: vminss %xmm5, %xmm0, %xmm0 201; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 202; AVX512-NEXT: vminss %xmm4, %xmm0, %xmm0 203; AVX512-NEXT: vminss %xmm3, %xmm0, %xmm0 204; AVX512-NEXT: vminss %xmm2, %xmm0, %xmm0 205; AVX512-NEXT: vzeroupper 206; AVX512-NEXT: retq 207 %1 = call nnan float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a0) 208 ret float %1 209} 210 211define float @test_v16f32(<16 x float> %a0) { 212; SSE2-LABEL: test_v16f32: 213; SSE2: # %bb.0: 214; SSE2-NEXT: minps %xmm3, %xmm1 215; SSE2-NEXT: minps %xmm2, %xmm0 216; SSE2-NEXT: minps %xmm1, %xmm0 217; SSE2-NEXT: movaps %xmm0, %xmm2 218; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] 219; SSE2-NEXT: movaps %xmm0, %xmm1 220; SSE2-NEXT: minss %xmm2, %xmm1 221; SSE2-NEXT: movaps %xmm0, %xmm2 222; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] 223; SSE2-NEXT: minss %xmm2, %xmm1 224; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 225; SSE2-NEXT: minss %xmm0, %xmm1 226; SSE2-NEXT: movaps %xmm1, %xmm0 227; SSE2-NEXT: retq 228; 229; SSE41-LABEL: test_v16f32: 230; SSE41: # %bb.0: 231; SSE41-NEXT: minps %xmm3, %xmm1 232; SSE41-NEXT: minps %xmm2, %xmm0 233; SSE41-NEXT: minps %xmm1, %xmm0 234; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 235; SSE41-NEXT: movaps %xmm0, %xmm1 236; SSE41-NEXT: minss %xmm2, %xmm1 237; SSE41-NEXT: movaps %xmm0, %xmm2 238; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] 239; SSE41-NEXT: minss %xmm2, %xmm1 240; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 241; SSE41-NEXT: minss %xmm0, %xmm1 242; SSE41-NEXT: movaps %xmm1, %xmm0 243; SSE41-NEXT: retq 244; 245; AVX-LABEL: test_v16f32: 246; AVX: # %bb.0: 247; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 248; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 249; AVX-NEXT: vminss %xmm1, %xmm0, %xmm1 250; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 251; AVX-NEXT: vminss %xmm2, %xmm1, %xmm1 252; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] 253; AVX-NEXT: vminss %xmm2, %xmm1, %xmm1 254; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 255; AVX-NEXT: vminss %xmm0, %xmm1, %xmm1 256; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 257; AVX-NEXT: vminss %xmm2, %xmm1, %xmm1 258; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 259; AVX-NEXT: vminss %xmm2, %xmm1, %xmm1 260; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 261; AVX-NEXT: vminss %xmm0, %xmm1, %xmm0 262; AVX-NEXT: vzeroupper 263; AVX-NEXT: retq 264; 265; AVX512-LABEL: test_v16f32: 266; AVX512: # %bb.0: 267; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm1 268; AVX512-NEXT: vpermilps {{.*#+}} xmm8 = xmm1[3,3,3,3] 269; AVX512-NEXT: vpermilpd {{.*#+}} xmm9 = xmm1[1,0] 270; AVX512-NEXT: vmovshdup {{.*#+}} xmm10 = xmm1[1,1,3,3] 271; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm5 272; AVX512-NEXT: vpermilps {{.*#+}} xmm11 = xmm5[3,3,3,3] 273; AVX512-NEXT: vpermilpd {{.*#+}} xmm12 = xmm5[1,0] 274; AVX512-NEXT: vmovshdup {{.*#+}} xmm13 = xmm5[1,1,3,3] 275; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3 276; AVX512-NEXT: vpermilps {{.*#+}} xmm14 = xmm3[3,3,3,3] 277; AVX512-NEXT: vpermilpd {{.*#+}} xmm15 = xmm3[1,0] 278; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm3[1,1,3,3] 279; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3] 280; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] 281; AVX512-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3] 282; AVX512-NEXT: vminss %xmm6, %xmm0, %xmm0 283; AVX512-NEXT: vminss %xmm4, %xmm0, %xmm0 284; AVX512-NEXT: vminss %xmm2, %xmm0, %xmm0 285; AVX512-NEXT: vminss %xmm3, %xmm0, %xmm0 286; AVX512-NEXT: vminss %xmm7, %xmm0, %xmm0 287; AVX512-NEXT: vminss %xmm15, %xmm0, %xmm0 288; AVX512-NEXT: vminss %xmm14, %xmm0, %xmm0 289; AVX512-NEXT: vminss %xmm5, %xmm0, %xmm0 290; AVX512-NEXT: vminss %xmm13, %xmm0, %xmm0 291; AVX512-NEXT: vminss %xmm12, %xmm0, %xmm0 292; AVX512-NEXT: vminss %xmm11, %xmm0, %xmm0 293; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 294; AVX512-NEXT: vminss %xmm10, %xmm0, %xmm0 295; AVX512-NEXT: vminss %xmm9, %xmm0, %xmm0 296; AVX512-NEXT: vminss %xmm8, %xmm0, %xmm0 297; AVX512-NEXT: vzeroupper 298; AVX512-NEXT: retq 299 %1 = call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a0) 300 ret float %1 301} 302 303; 304; vXf64 305; 306 307define double @test_v2f64(<2 x double> %a0) { 308; SSE-LABEL: test_v2f64: 309; SSE: # %bb.0: 310; SSE-NEXT: movapd %xmm0, %xmm1 311; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 312; SSE-NEXT: minsd %xmm1, %xmm0 313; SSE-NEXT: retq 314; 315; AVX-LABEL: test_v2f64: 316; AVX: # %bb.0: 317; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 318; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 319; AVX-NEXT: retq 320; 321; AVX512-LABEL: test_v2f64: 322; AVX512: # %bb.0: 323; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 324; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 325; AVX512-NEXT: retq 326 %1 = call nnan double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0) 327 ret double %1 328} 329 330define double @test_v4f64(<4 x double> %a0) { 331; SSE-LABEL: test_v4f64: 332; SSE: # %bb.0: 333; SSE-NEXT: minpd %xmm1, %xmm0 334; SSE-NEXT: movapd %xmm0, %xmm1 335; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 336; SSE-NEXT: minsd %xmm1, %xmm0 337; SSE-NEXT: retq 338; 339; AVX-LABEL: test_v4f64: 340; AVX: # %bb.0: 341; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 342; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 343; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 344; AVX-NEXT: vminsd %xmm3, %xmm0, %xmm0 345; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 346; AVX-NEXT: vminsd %xmm2, %xmm0, %xmm0 347; AVX-NEXT: vzeroupper 348; AVX-NEXT: retq 349; 350; AVX512-LABEL: test_v4f64: 351; AVX512: # %bb.0: 352; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 353; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 354; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 355; AVX512-NEXT: vminsd %xmm3, %xmm0, %xmm0 356; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 357; AVX512-NEXT: vminsd %xmm2, %xmm0, %xmm0 358; AVX512-NEXT: vzeroupper 359; AVX512-NEXT: retq 360 %1 = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a0) 361 ret double %1 362} 363 364define double @test_v8f64(<8 x double> %a0) { 365; SSE-LABEL: test_v8f64: 366; SSE: # %bb.0: 367; SSE-NEXT: minpd %xmm3, %xmm1 368; SSE-NEXT: minpd %xmm2, %xmm0 369; SSE-NEXT: minpd %xmm1, %xmm0 370; SSE-NEXT: movapd %xmm0, %xmm1 371; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 372; SSE-NEXT: minsd %xmm1, %xmm0 373; SSE-NEXT: retq 374; 375; AVX-LABEL: test_v8f64: 376; AVX: # %bb.0: 377; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 378; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 379; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm1 380; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 381; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm1 382; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 383; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm0 384; AVX-NEXT: vzeroupper 385; AVX-NEXT: retq 386; 387; AVX512-LABEL: test_v8f64: 388; AVX512: # %bb.0: 389; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm1 390; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 391; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3 392; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] 393; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm5 394; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] 395; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0] 396; AVX512-NEXT: vminsd %xmm7, %xmm0, %xmm0 397; AVX512-NEXT: vminsd %xmm5, %xmm0, %xmm0 398; AVX512-NEXT: vminsd %xmm6, %xmm0, %xmm0 399; AVX512-NEXT: vminsd %xmm3, %xmm0, %xmm0 400; AVX512-NEXT: vminsd %xmm4, %xmm0, %xmm0 401; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 402; AVX512-NEXT: vminsd %xmm2, %xmm0, %xmm0 403; AVX512-NEXT: vzeroupper 404; AVX512-NEXT: retq 405 %1 = call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a0) 406 ret double %1 407} 408 409define double @test_v16f64(<16 x double> %a0) { 410; SSE-LABEL: test_v16f64: 411; SSE: # %bb.0: 412; SSE-NEXT: minpd %xmm7, %xmm3 413; SSE-NEXT: minpd %xmm5, %xmm1 414; SSE-NEXT: minpd %xmm3, %xmm1 415; SSE-NEXT: minpd %xmm6, %xmm2 416; SSE-NEXT: minpd %xmm4, %xmm0 417; SSE-NEXT: minpd %xmm2, %xmm0 418; SSE-NEXT: minpd %xmm1, %xmm0 419; SSE-NEXT: movapd %xmm0, %xmm1 420; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 421; SSE-NEXT: minsd %xmm1, %xmm0 422; SSE-NEXT: retq 423; 424; AVX-LABEL: test_v16f64: 425; AVX: # %bb.0: 426; AVX-NEXT: vminpd %ymm3, %ymm1, %ymm1 427; AVX-NEXT: vminpd %ymm2, %ymm0, %ymm0 428; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 429; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 430; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm1 431; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 432; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm1 433; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 434; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm0 435; AVX-NEXT: vzeroupper 436; AVX-NEXT: retq 437; 438; AVX512-LABEL: test_v16f64: 439; AVX512: # %bb.0: 440; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 441; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 442; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm1 443; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 444; AVX512-NEXT: vminsd %xmm2, %xmm1, %xmm1 445; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 446; AVX512-NEXT: vminsd %xmm2, %xmm1, %xmm1 447; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 448; AVX512-NEXT: vminsd %xmm2, %xmm1, %xmm1 449; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 450; AVX512-NEXT: vminsd %xmm2, %xmm1, %xmm1 451; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 452; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm1 453; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 454; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm0 455; AVX512-NEXT: vzeroupper 456; AVX512-NEXT: retq 457 %1 = call nnan double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a0) 458 ret double %1 459} 460 461define half @test_v2f16(<2 x half> %a0) nounwind { 462; SSE-LABEL: test_v2f16: 463; SSE: # %bb.0: 464; SSE-NEXT: pushq %rbx 465; SSE-NEXT: subq $16, %rsp 466; SSE-NEXT: movl %edi, %ebx 467; SSE-NEXT: movzwl %si, %edi 468; SSE-NEXT: callq __gnu_h2f_ieee 469; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 470; SSE-NEXT: movzwl %bx, %edi 471; SSE-NEXT: callq __gnu_h2f_ieee 472; SSE-NEXT: movaps %xmm0, %xmm1 473; SSE-NEXT: cmpunordss %xmm0, %xmm1 474; SSE-NEXT: movaps %xmm1, %xmm2 475; SSE-NEXT: movaps (%rsp), %xmm3 # 16-byte Reload 476; SSE-NEXT: andps %xmm3, %xmm2 477; SSE-NEXT: minss %xmm0, %xmm3 478; SSE-NEXT: andnps %xmm3, %xmm1 479; SSE-NEXT: orps %xmm2, %xmm1 480; SSE-NEXT: movaps %xmm1, %xmm0 481; SSE-NEXT: callq __gnu_f2h_ieee 482; SSE-NEXT: addq $16, %rsp 483; SSE-NEXT: popq %rbx 484; SSE-NEXT: retq 485; 486; AVX-LABEL: test_v2f16: 487; AVX: # %bb.0: 488; AVX-NEXT: pushq %rbx 489; AVX-NEXT: subq $16, %rsp 490; AVX-NEXT: movl %esi, %ebx 491; AVX-NEXT: movzwl %di, %edi 492; AVX-NEXT: callq __gnu_h2f_ieee 493; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 494; AVX-NEXT: movzwl %bx, %edi 495; AVX-NEXT: callq __gnu_h2f_ieee 496; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload 497; AVX-NEXT: # xmm2 = mem[0],zero,zero,zero 498; AVX-NEXT: vminss %xmm2, %xmm0, %xmm1 499; AVX-NEXT: vcmpunordss %xmm2, %xmm2, %xmm2 500; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 501; AVX-NEXT: callq __gnu_f2h_ieee 502; AVX-NEXT: addq $16, %rsp 503; AVX-NEXT: popq %rbx 504; AVX-NEXT: retq 505; 506; AVX512-LABEL: test_v2f16: 507; AVX512: # %bb.0: 508; AVX512-NEXT: movzwl %di, %eax 509; AVX512-NEXT: vmovd %eax, %xmm0 510; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 511; AVX512-NEXT: movzwl %si, %eax 512; AVX512-NEXT: vmovd %eax, %xmm1 513; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1 514; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm2 515; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 516; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} 517; AVX512-NEXT: vcvtps2ph $4, %xmm2, %xmm0 518; AVX512-NEXT: vmovd %xmm0, %eax 519; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 520; AVX512-NEXT: retq 521 %1 = call nnan half @llvm.vector.reduce.fmin.v2f16(<2 x half> %a0) 522 ret half %1 523} 524 525declare float @llvm.vector.reduce.fmin.v1f32(<1 x float>) 526declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) 527declare float @llvm.vector.reduce.fmin.v3f32(<3 x float>) 528declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 529declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) 530declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>) 531 532declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) 533declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) 534declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>) 535declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>) 536 537declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) 538